##// END OF EJS Templates
py3: slice over bytes to prevent getting the ascii values...
Pulkit Goyal -
r38095:86e0a4be default
parent child Browse files
Show More
@@ -1,2950 +1,2950 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import collections
12 12 import contextlib
13 13 import copy
14 14 import email
15 15 import errno
16 16 import hashlib
17 17 import os
18 18 import posixpath
19 19 import re
20 20 import shutil
21 21 import tempfile
22 22 import zlib
23 23
24 24 from .i18n import _
25 25 from .node import (
26 26 hex,
27 27 short,
28 28 )
29 29 from . import (
30 30 copies,
31 31 diffhelper,
32 32 encoding,
33 33 error,
34 34 mail,
35 35 mdiff,
36 36 pathutil,
37 37 pycompat,
38 38 scmutil,
39 39 similar,
40 40 util,
41 41 vfs as vfsmod,
42 42 )
43 43 from .utils import (
44 44 dateutil,
45 45 procutil,
46 46 stringutil,
47 47 )
48 48
49 49 stringio = util.stringio
50 50
51 51 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
52 52 tabsplitter = re.compile(br'(\t+|[^\t]+)')
53 53 wordsplitter = re.compile(br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|'
54 54 b'[^ \ta-zA-Z0-9_\x80-\xff])')
55 55
56 56 PatchError = error.PatchError
57 57
58 58 # public functions
59 59
60 60 def split(stream):
61 61 '''return an iterator of individual patches from a stream'''
62 62 def isheader(line, inheader):
63 63 if inheader and line.startswith((' ', '\t')):
64 64 # continuation
65 65 return True
66 66 if line.startswith((' ', '-', '+')):
67 67 # diff line - don't check for header pattern in there
68 68 return False
69 69 l = line.split(': ', 1)
70 70 return len(l) == 2 and ' ' not in l[0]
71 71
72 72 def chunk(lines):
73 73 return stringio(''.join(lines))
74 74
75 75 def hgsplit(stream, cur):
76 76 inheader = True
77 77
78 78 for line in stream:
79 79 if not line.strip():
80 80 inheader = False
81 81 if not inheader and line.startswith('# HG changeset patch'):
82 82 yield chunk(cur)
83 83 cur = []
84 84 inheader = True
85 85
86 86 cur.append(line)
87 87
88 88 if cur:
89 89 yield chunk(cur)
90 90
91 91 def mboxsplit(stream, cur):
92 92 for line in stream:
93 93 if line.startswith('From '):
94 94 for c in split(chunk(cur[1:])):
95 95 yield c
96 96 cur = []
97 97
98 98 cur.append(line)
99 99
100 100 if cur:
101 101 for c in split(chunk(cur[1:])):
102 102 yield c
103 103
104 104 def mimesplit(stream, cur):
105 105 def msgfp(m):
106 106 fp = stringio()
107 107 g = email.Generator.Generator(fp, mangle_from_=False)
108 108 g.flatten(m)
109 109 fp.seek(0)
110 110 return fp
111 111
112 112 for line in stream:
113 113 cur.append(line)
114 114 c = chunk(cur)
115 115
116 116 m = pycompat.emailparser().parse(c)
117 117 if not m.is_multipart():
118 118 yield msgfp(m)
119 119 else:
120 120 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
121 121 for part in m.walk():
122 122 ct = part.get_content_type()
123 123 if ct not in ok_types:
124 124 continue
125 125 yield msgfp(part)
126 126
127 127 def headersplit(stream, cur):
128 128 inheader = False
129 129
130 130 for line in stream:
131 131 if not inheader and isheader(line, inheader):
132 132 yield chunk(cur)
133 133 cur = []
134 134 inheader = True
135 135 if inheader and not isheader(line, inheader):
136 136 inheader = False
137 137
138 138 cur.append(line)
139 139
140 140 if cur:
141 141 yield chunk(cur)
142 142
143 143 def remainder(cur):
144 144 yield chunk(cur)
145 145
146 146 class fiter(object):
147 147 def __init__(self, fp):
148 148 self.fp = fp
149 149
150 150 def __iter__(self):
151 151 return self
152 152
153 153 def next(self):
154 154 l = self.fp.readline()
155 155 if not l:
156 156 raise StopIteration
157 157 return l
158 158
159 159 __next__ = next
160 160
161 161 inheader = False
162 162 cur = []
163 163
164 164 mimeheaders = ['content-type']
165 165
166 166 if not util.safehasattr(stream, 'next'):
167 167 # http responses, for example, have readline but not next
168 168 stream = fiter(stream)
169 169
170 170 for line in stream:
171 171 cur.append(line)
172 172 if line.startswith('# HG changeset patch'):
173 173 return hgsplit(stream, cur)
174 174 elif line.startswith('From '):
175 175 return mboxsplit(stream, cur)
176 176 elif isheader(line, inheader):
177 177 inheader = True
178 178 if line.split(':', 1)[0].lower() in mimeheaders:
179 179 # let email parser handle this
180 180 return mimesplit(stream, cur)
181 181 elif line.startswith('--- ') and inheader:
182 182 # No evil headers seen by diff start, split by hand
183 183 return headersplit(stream, cur)
184 184 # Not enough info, keep reading
185 185
186 186 # if we are here, we have a very plain patch
187 187 return remainder(cur)
188 188
189 189 ## Some facility for extensible patch parsing:
190 190 # list of pairs ("header to match", "data key")
191 191 patchheadermap = [('Date', 'date'),
192 192 ('Branch', 'branch'),
193 193 ('Node ID', 'nodeid'),
194 194 ]
195 195
196 196 @contextlib.contextmanager
197 197 def extract(ui, fileobj):
198 198 '''extract patch from data read from fileobj.
199 199
200 200 patch can be a normal patch or contained in an email message.
201 201
202 202 return a dictionary. Standard keys are:
203 203 - filename,
204 204 - message,
205 205 - user,
206 206 - date,
207 207 - branch,
208 208 - node,
209 209 - p1,
210 210 - p2.
211 211 Any item can be missing from the dictionary. If filename is missing,
212 212 fileobj did not contain a patch. Caller must unlink filename when done.'''
213 213
214 214 fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
215 215 tmpfp = os.fdopen(fd, r'wb')
216 216 try:
217 217 yield _extract(ui, fileobj, tmpname, tmpfp)
218 218 finally:
219 219 tmpfp.close()
220 220 os.unlink(tmpname)
221 221
222 222 def _extract(ui, fileobj, tmpname, tmpfp):
223 223
224 224 # attempt to detect the start of a patch
225 225 # (this heuristic is borrowed from quilt)
226 226 diffre = re.compile(br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
227 227 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
228 228 br'---[ \t].*?^\+\+\+[ \t]|'
229 229 br'\*\*\*[ \t].*?^---[ \t])',
230 230 re.MULTILINE | re.DOTALL)
231 231
232 232 data = {}
233 233
234 234 msg = pycompat.emailparser().parse(fileobj)
235 235
236 236 subject = msg[r'Subject'] and mail.headdecode(msg[r'Subject'])
237 237 data['user'] = msg[r'From'] and mail.headdecode(msg[r'From'])
238 238 if not subject and not data['user']:
239 239 # Not an email, restore parsed headers if any
240 240 subject = '\n'.join(': '.join(map(encoding.strtolocal, h))
241 241 for h in msg.items()) + '\n'
242 242
243 243 # should try to parse msg['Date']
244 244 parents = []
245 245
246 246 if subject:
247 247 if subject.startswith('[PATCH'):
248 248 pend = subject.find(']')
249 249 if pend >= 0:
250 250 subject = subject[pend + 1:].lstrip()
251 251 subject = re.sub(br'\n[ \t]+', ' ', subject)
252 252 ui.debug('Subject: %s\n' % subject)
253 253 if data['user']:
254 254 ui.debug('From: %s\n' % data['user'])
255 255 diffs_seen = 0
256 256 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
257 257 message = ''
258 258 for part in msg.walk():
259 259 content_type = pycompat.bytestr(part.get_content_type())
260 260 ui.debug('Content-Type: %s\n' % content_type)
261 261 if content_type not in ok_types:
262 262 continue
263 263 payload = part.get_payload(decode=True)
264 264 m = diffre.search(payload)
265 265 if m:
266 266 hgpatch = False
267 267 hgpatchheader = False
268 268 ignoretext = False
269 269
270 270 ui.debug('found patch at byte %d\n' % m.start(0))
271 271 diffs_seen += 1
272 272 cfp = stringio()
273 273 for line in payload[:m.start(0)].splitlines():
274 274 if line.startswith('# HG changeset patch') and not hgpatch:
275 275 ui.debug('patch generated by hg export\n')
276 276 hgpatch = True
277 277 hgpatchheader = True
278 278 # drop earlier commit message content
279 279 cfp.seek(0)
280 280 cfp.truncate()
281 281 subject = None
282 282 elif hgpatchheader:
283 283 if line.startswith('# User '):
284 284 data['user'] = line[7:]
285 285 ui.debug('From: %s\n' % data['user'])
286 286 elif line.startswith("# Parent "):
287 287 parents.append(line[9:].lstrip())
288 288 elif line.startswith("# "):
289 289 for header, key in patchheadermap:
290 290 prefix = '# %s ' % header
291 291 if line.startswith(prefix):
292 292 data[key] = line[len(prefix):]
293 293 else:
294 294 hgpatchheader = False
295 295 elif line == '---':
296 296 ignoretext = True
297 297 if not hgpatchheader and not ignoretext:
298 298 cfp.write(line)
299 299 cfp.write('\n')
300 300 message = cfp.getvalue()
301 301 if tmpfp:
302 302 tmpfp.write(payload)
303 303 if not payload.endswith('\n'):
304 304 tmpfp.write('\n')
305 305 elif not diffs_seen and message and content_type == 'text/plain':
306 306 message += '\n' + payload
307 307
308 308 if subject and not message.startswith(subject):
309 309 message = '%s\n%s' % (subject, message)
310 310 data['message'] = message
311 311 tmpfp.close()
312 312 if parents:
313 313 data['p1'] = parents.pop(0)
314 314 if parents:
315 315 data['p2'] = parents.pop(0)
316 316
317 317 if diffs_seen:
318 318 data['filename'] = tmpname
319 319
320 320 return data
321 321
322 322 class patchmeta(object):
323 323 """Patched file metadata
324 324
325 325 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
326 326 or COPY. 'path' is patched file path. 'oldpath' is set to the
327 327 origin file when 'op' is either COPY or RENAME, None otherwise. If
328 328 file mode is changed, 'mode' is a tuple (islink, isexec) where
329 329 'islink' is True if the file is a symlink and 'isexec' is True if
330 330 the file is executable. Otherwise, 'mode' is None.
331 331 """
332 332 def __init__(self, path):
333 333 self.path = path
334 334 self.oldpath = None
335 335 self.mode = None
336 336 self.op = 'MODIFY'
337 337 self.binary = False
338 338
339 339 def setmode(self, mode):
340 340 islink = mode & 0o20000
341 341 isexec = mode & 0o100
342 342 self.mode = (islink, isexec)
343 343
344 344 def copy(self):
345 345 other = patchmeta(self.path)
346 346 other.oldpath = self.oldpath
347 347 other.mode = self.mode
348 348 other.op = self.op
349 349 other.binary = self.binary
350 350 return other
351 351
352 352 def _ispatchinga(self, afile):
353 353 if afile == '/dev/null':
354 354 return self.op == 'ADD'
355 355 return afile == 'a/' + (self.oldpath or self.path)
356 356
357 357 def _ispatchingb(self, bfile):
358 358 if bfile == '/dev/null':
359 359 return self.op == 'DELETE'
360 360 return bfile == 'b/' + self.path
361 361
362 362 def ispatching(self, afile, bfile):
363 363 return self._ispatchinga(afile) and self._ispatchingb(bfile)
364 364
365 365 def __repr__(self):
366 366 return "<patchmeta %s %r>" % (self.op, self.path)
367 367
368 368 def readgitpatch(lr):
369 369 """extract git-style metadata about patches from <patchname>"""
370 370
371 371 # Filter patch for git information
372 372 gp = None
373 373 gitpatches = []
374 374 for line in lr:
375 375 line = line.rstrip(' \r\n')
376 376 if line.startswith('diff --git a/'):
377 377 m = gitre.match(line)
378 378 if m:
379 379 if gp:
380 380 gitpatches.append(gp)
381 381 dst = m.group(2)
382 382 gp = patchmeta(dst)
383 383 elif gp:
384 384 if line.startswith('--- '):
385 385 gitpatches.append(gp)
386 386 gp = None
387 387 continue
388 388 if line.startswith('rename from '):
389 389 gp.op = 'RENAME'
390 390 gp.oldpath = line[12:]
391 391 elif line.startswith('rename to '):
392 392 gp.path = line[10:]
393 393 elif line.startswith('copy from '):
394 394 gp.op = 'COPY'
395 395 gp.oldpath = line[10:]
396 396 elif line.startswith('copy to '):
397 397 gp.path = line[8:]
398 398 elif line.startswith('deleted file'):
399 399 gp.op = 'DELETE'
400 400 elif line.startswith('new file mode '):
401 401 gp.op = 'ADD'
402 402 gp.setmode(int(line[-6:], 8))
403 403 elif line.startswith('new mode '):
404 404 gp.setmode(int(line[-6:], 8))
405 405 elif line.startswith('GIT binary patch'):
406 406 gp.binary = True
407 407 if gp:
408 408 gitpatches.append(gp)
409 409
410 410 return gitpatches
411 411
412 412 class linereader(object):
413 413 # simple class to allow pushing lines back into the input stream
414 414 def __init__(self, fp):
415 415 self.fp = fp
416 416 self.buf = []
417 417
418 418 def push(self, line):
419 419 if line is not None:
420 420 self.buf.append(line)
421 421
422 422 def readline(self):
423 423 if self.buf:
424 424 l = self.buf[0]
425 425 del self.buf[0]
426 426 return l
427 427 return self.fp.readline()
428 428
429 429 def __iter__(self):
430 430 return iter(self.readline, '')
431 431
432 432 class abstractbackend(object):
433 433 def __init__(self, ui):
434 434 self.ui = ui
435 435
436 436 def getfile(self, fname):
437 437 """Return target file data and flags as a (data, (islink,
438 438 isexec)) tuple. Data is None if file is missing/deleted.
439 439 """
440 440 raise NotImplementedError
441 441
442 442 def setfile(self, fname, data, mode, copysource):
443 443 """Write data to target file fname and set its mode. mode is a
444 444 (islink, isexec) tuple. If data is None, the file content should
445 445 be left unchanged. If the file is modified after being copied,
446 446 copysource is set to the original file name.
447 447 """
448 448 raise NotImplementedError
449 449
450 450 def unlink(self, fname):
451 451 """Unlink target file."""
452 452 raise NotImplementedError
453 453
454 454 def writerej(self, fname, failed, total, lines):
455 455 """Write rejected lines for fname. total is the number of hunks
456 456 which failed to apply and total the total number of hunks for this
457 457 files.
458 458 """
459 459
460 460 def exists(self, fname):
461 461 raise NotImplementedError
462 462
463 463 def close(self):
464 464 raise NotImplementedError
465 465
466 466 class fsbackend(abstractbackend):
467 467 def __init__(self, ui, basedir):
468 468 super(fsbackend, self).__init__(ui)
469 469 self.opener = vfsmod.vfs(basedir)
470 470
471 471 def getfile(self, fname):
472 472 if self.opener.islink(fname):
473 473 return (self.opener.readlink(fname), (True, False))
474 474
475 475 isexec = False
476 476 try:
477 477 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
478 478 except OSError as e:
479 479 if e.errno != errno.ENOENT:
480 480 raise
481 481 try:
482 482 return (self.opener.read(fname), (False, isexec))
483 483 except IOError as e:
484 484 if e.errno != errno.ENOENT:
485 485 raise
486 486 return None, None
487 487
488 488 def setfile(self, fname, data, mode, copysource):
489 489 islink, isexec = mode
490 490 if data is None:
491 491 self.opener.setflags(fname, islink, isexec)
492 492 return
493 493 if islink:
494 494 self.opener.symlink(data, fname)
495 495 else:
496 496 self.opener.write(fname, data)
497 497 if isexec:
498 498 self.opener.setflags(fname, False, True)
499 499
500 500 def unlink(self, fname):
501 501 self.opener.unlinkpath(fname, ignoremissing=True)
502 502
503 503 def writerej(self, fname, failed, total, lines):
504 504 fname = fname + ".rej"
505 505 self.ui.warn(
506 506 _("%d out of %d hunks FAILED -- saving rejects to file %s\n") %
507 507 (failed, total, fname))
508 508 fp = self.opener(fname, 'w')
509 509 fp.writelines(lines)
510 510 fp.close()
511 511
512 512 def exists(self, fname):
513 513 return self.opener.lexists(fname)
514 514
515 515 class workingbackend(fsbackend):
516 516 def __init__(self, ui, repo, similarity):
517 517 super(workingbackend, self).__init__(ui, repo.root)
518 518 self.repo = repo
519 519 self.similarity = similarity
520 520 self.removed = set()
521 521 self.changed = set()
522 522 self.copied = []
523 523
524 524 def _checkknown(self, fname):
525 525 if self.repo.dirstate[fname] == '?' and self.exists(fname):
526 526 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
527 527
528 528 def setfile(self, fname, data, mode, copysource):
529 529 self._checkknown(fname)
530 530 super(workingbackend, self).setfile(fname, data, mode, copysource)
531 531 if copysource is not None:
532 532 self.copied.append((copysource, fname))
533 533 self.changed.add(fname)
534 534
535 535 def unlink(self, fname):
536 536 self._checkknown(fname)
537 537 super(workingbackend, self).unlink(fname)
538 538 self.removed.add(fname)
539 539 self.changed.add(fname)
540 540
541 541 def close(self):
542 542 wctx = self.repo[None]
543 543 changed = set(self.changed)
544 544 for src, dst in self.copied:
545 545 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
546 546 if self.removed:
547 547 wctx.forget(sorted(self.removed))
548 548 for f in self.removed:
549 549 if f not in self.repo.dirstate:
550 550 # File was deleted and no longer belongs to the
551 551 # dirstate, it was probably marked added then
552 552 # deleted, and should not be considered by
553 553 # marktouched().
554 554 changed.discard(f)
555 555 if changed:
556 556 scmutil.marktouched(self.repo, changed, self.similarity)
557 557 return sorted(self.changed)
558 558
559 559 class filestore(object):
560 560 def __init__(self, maxsize=None):
561 561 self.opener = None
562 562 self.files = {}
563 563 self.created = 0
564 564 self.maxsize = maxsize
565 565 if self.maxsize is None:
566 566 self.maxsize = 4*(2**20)
567 567 self.size = 0
568 568 self.data = {}
569 569
570 570 def setfile(self, fname, data, mode, copied=None):
571 571 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
572 572 self.data[fname] = (data, mode, copied)
573 573 self.size += len(data)
574 574 else:
575 575 if self.opener is None:
576 576 root = tempfile.mkdtemp(prefix='hg-patch-')
577 577 self.opener = vfsmod.vfs(root)
578 578 # Avoid filename issues with these simple names
579 579 fn = '%d' % self.created
580 580 self.opener.write(fn, data)
581 581 self.created += 1
582 582 self.files[fname] = (fn, mode, copied)
583 583
584 584 def getfile(self, fname):
585 585 if fname in self.data:
586 586 return self.data[fname]
587 587 if not self.opener or fname not in self.files:
588 588 return None, None, None
589 589 fn, mode, copied = self.files[fname]
590 590 return self.opener.read(fn), mode, copied
591 591
592 592 def close(self):
593 593 if self.opener:
594 594 shutil.rmtree(self.opener.base)
595 595
596 596 class repobackend(abstractbackend):
597 597 def __init__(self, ui, repo, ctx, store):
598 598 super(repobackend, self).__init__(ui)
599 599 self.repo = repo
600 600 self.ctx = ctx
601 601 self.store = store
602 602 self.changed = set()
603 603 self.removed = set()
604 604 self.copied = {}
605 605
606 606 def _checkknown(self, fname):
607 607 if fname not in self.ctx:
608 608 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
609 609
610 610 def getfile(self, fname):
611 611 try:
612 612 fctx = self.ctx[fname]
613 613 except error.LookupError:
614 614 return None, None
615 615 flags = fctx.flags()
616 616 return fctx.data(), ('l' in flags, 'x' in flags)
617 617
618 618 def setfile(self, fname, data, mode, copysource):
619 619 if copysource:
620 620 self._checkknown(copysource)
621 621 if data is None:
622 622 data = self.ctx[fname].data()
623 623 self.store.setfile(fname, data, mode, copysource)
624 624 self.changed.add(fname)
625 625 if copysource:
626 626 self.copied[fname] = copysource
627 627
628 628 def unlink(self, fname):
629 629 self._checkknown(fname)
630 630 self.removed.add(fname)
631 631
632 632 def exists(self, fname):
633 633 return fname in self.ctx
634 634
635 635 def close(self):
636 636 return self.changed | self.removed
637 637
638 638 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
639 639 unidesc = re.compile('@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
640 640 contextdesc = re.compile('(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
641 641 eolmodes = ['strict', 'crlf', 'lf', 'auto']
642 642
643 643 class patchfile(object):
644 644 def __init__(self, ui, gp, backend, store, eolmode='strict'):
645 645 self.fname = gp.path
646 646 self.eolmode = eolmode
647 647 self.eol = None
648 648 self.backend = backend
649 649 self.ui = ui
650 650 self.lines = []
651 651 self.exists = False
652 652 self.missing = True
653 653 self.mode = gp.mode
654 654 self.copysource = gp.oldpath
655 655 self.create = gp.op in ('ADD', 'COPY', 'RENAME')
656 656 self.remove = gp.op == 'DELETE'
657 657 if self.copysource is None:
658 658 data, mode = backend.getfile(self.fname)
659 659 else:
660 660 data, mode = store.getfile(self.copysource)[:2]
661 661 if data is not None:
662 662 self.exists = self.copysource is None or backend.exists(self.fname)
663 663 self.missing = False
664 664 if data:
665 665 self.lines = mdiff.splitnewlines(data)
666 666 if self.mode is None:
667 667 self.mode = mode
668 668 if self.lines:
669 669 # Normalize line endings
670 670 if self.lines[0].endswith('\r\n'):
671 671 self.eol = '\r\n'
672 672 elif self.lines[0].endswith('\n'):
673 673 self.eol = '\n'
674 674 if eolmode != 'strict':
675 675 nlines = []
676 676 for l in self.lines:
677 677 if l.endswith('\r\n'):
678 678 l = l[:-2] + '\n'
679 679 nlines.append(l)
680 680 self.lines = nlines
681 681 else:
682 682 if self.create:
683 683 self.missing = False
684 684 if self.mode is None:
685 685 self.mode = (False, False)
686 686 if self.missing:
687 687 self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
688 688 self.ui.warn(_("(use '--prefix' to apply patch relative to the "
689 689 "current directory)\n"))
690 690
691 691 self.hash = {}
692 692 self.dirty = 0
693 693 self.offset = 0
694 694 self.skew = 0
695 695 self.rej = []
696 696 self.fileprinted = False
697 697 self.printfile(False)
698 698 self.hunks = 0
699 699
700 700 def writelines(self, fname, lines, mode):
701 701 if self.eolmode == 'auto':
702 702 eol = self.eol
703 703 elif self.eolmode == 'crlf':
704 704 eol = '\r\n'
705 705 else:
706 706 eol = '\n'
707 707
708 708 if self.eolmode != 'strict' and eol and eol != '\n':
709 709 rawlines = []
710 710 for l in lines:
711 711 if l and l[-1] == '\n':
712 712 l = l[:-1] + eol
713 713 rawlines.append(l)
714 714 lines = rawlines
715 715
716 716 self.backend.setfile(fname, ''.join(lines), mode, self.copysource)
717 717
718 718 def printfile(self, warn):
719 719 if self.fileprinted:
720 720 return
721 721 if warn or self.ui.verbose:
722 722 self.fileprinted = True
723 723 s = _("patching file %s\n") % self.fname
724 724 if warn:
725 725 self.ui.warn(s)
726 726 else:
727 727 self.ui.note(s)
728 728
729 729
730 730 def findlines(self, l, linenum):
731 731 # looks through the hash and finds candidate lines. The
732 732 # result is a list of line numbers sorted based on distance
733 733 # from linenum
734 734
735 735 cand = self.hash.get(l, [])
736 736 if len(cand) > 1:
737 737 # resort our list of potentials forward then back.
738 738 cand.sort(key=lambda x: abs(x - linenum))
739 739 return cand
740 740
741 741 def write_rej(self):
742 742 # our rejects are a little different from patch(1). This always
743 743 # creates rejects in the same form as the original patch. A file
744 744 # header is inserted so that you can run the reject through patch again
745 745 # without having to type the filename.
746 746 if not self.rej:
747 747 return
748 748 base = os.path.basename(self.fname)
749 749 lines = ["--- %s\n+++ %s\n" % (base, base)]
750 750 for x in self.rej:
751 751 for l in x.hunk:
752 752 lines.append(l)
753 753 if l[-1:] != '\n':
754 754 lines.append("\n\ No newline at end of file\n")
755 755 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
756 756
757 757 def apply(self, h):
758 758 if not h.complete():
759 759 raise PatchError(_("bad hunk #%d %s (%d %d %d %d)") %
760 760 (h.number, h.desc, len(h.a), h.lena, len(h.b),
761 761 h.lenb))
762 762
763 763 self.hunks += 1
764 764
765 765 if self.missing:
766 766 self.rej.append(h)
767 767 return -1
768 768
769 769 if self.exists and self.create:
770 770 if self.copysource:
771 771 self.ui.warn(_("cannot create %s: destination already "
772 772 "exists\n") % self.fname)
773 773 else:
774 774 self.ui.warn(_("file %s already exists\n") % self.fname)
775 775 self.rej.append(h)
776 776 return -1
777 777
778 778 if isinstance(h, binhunk):
779 779 if self.remove:
780 780 self.backend.unlink(self.fname)
781 781 else:
782 782 l = h.new(self.lines)
783 783 self.lines[:] = l
784 784 self.offset += len(l)
785 785 self.dirty = True
786 786 return 0
787 787
788 788 horig = h
789 789 if (self.eolmode in ('crlf', 'lf')
790 790 or self.eolmode == 'auto' and self.eol):
791 791 # If new eols are going to be normalized, then normalize
792 792 # hunk data before patching. Otherwise, preserve input
793 793 # line-endings.
794 794 h = h.getnormalized()
795 795
796 796 # fast case first, no offsets, no fuzz
797 797 old, oldstart, new, newstart = h.fuzzit(0, False)
798 798 oldstart += self.offset
799 799 orig_start = oldstart
800 800 # if there's skew we want to emit the "(offset %d lines)" even
801 801 # when the hunk cleanly applies at start + skew, so skip the
802 802 # fast case code
803 803 if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
804 804 if self.remove:
805 805 self.backend.unlink(self.fname)
806 806 else:
807 807 self.lines[oldstart:oldstart + len(old)] = new
808 808 self.offset += len(new) - len(old)
809 809 self.dirty = True
810 810 return 0
811 811
812 812 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
813 813 self.hash = {}
814 814 for x, s in enumerate(self.lines):
815 815 self.hash.setdefault(s, []).append(x)
816 816
817 817 for fuzzlen in xrange(self.ui.configint("patch", "fuzz") + 1):
818 818 for toponly in [True, False]:
819 819 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
820 820 oldstart = oldstart + self.offset + self.skew
821 821 oldstart = min(oldstart, len(self.lines))
822 822 if old:
823 823 cand = self.findlines(old[0][1:], oldstart)
824 824 else:
825 825 # Only adding lines with no or fuzzed context, just
826 826 # take the skew in account
827 827 cand = [oldstart]
828 828
829 829 for l in cand:
830 830 if not old or diffhelper.testhunk(old, self.lines, l):
831 831 self.lines[l : l + len(old)] = new
832 832 self.offset += len(new) - len(old)
833 833 self.skew = l - orig_start
834 834 self.dirty = True
835 835 offset = l - orig_start - fuzzlen
836 836 if fuzzlen:
837 837 msg = _("Hunk #%d succeeded at %d "
838 838 "with fuzz %d "
839 839 "(offset %d lines).\n")
840 840 self.printfile(True)
841 841 self.ui.warn(msg %
842 842 (h.number, l + 1, fuzzlen, offset))
843 843 else:
844 844 msg = _("Hunk #%d succeeded at %d "
845 845 "(offset %d lines).\n")
846 846 self.ui.note(msg % (h.number, l + 1, offset))
847 847 return fuzzlen
848 848 self.printfile(True)
849 849 self.ui.warn(_("Hunk #%d FAILED at %d\n") % (h.number, orig_start))
850 850 self.rej.append(horig)
851 851 return -1
852 852
853 853 def close(self):
854 854 if self.dirty:
855 855 self.writelines(self.fname, self.lines, self.mode)
856 856 self.write_rej()
857 857 return len(self.rej)
858 858
859 859 class header(object):
860 860 """patch header
861 861 """
862 862 diffgit_re = re.compile('diff --git a/(.*) b/(.*)$')
863 863 diff_re = re.compile('diff -r .* (.*)$')
864 864 allhunks_re = re.compile('(?:index|deleted file) ')
865 865 pretty_re = re.compile('(?:new file|deleted file) ')
866 866 special_re = re.compile('(?:index|deleted|copy|rename) ')
867 867 newfile_re = re.compile('(?:new file)')
868 868
869 869 def __init__(self, header):
870 870 self.header = header
871 871 self.hunks = []
872 872
873 873 def binary(self):
874 874 return any(h.startswith('index ') for h in self.header)
875 875
876 876 def pretty(self, fp):
877 877 for h in self.header:
878 878 if h.startswith('index '):
879 879 fp.write(_('this modifies a binary file (all or nothing)\n'))
880 880 break
881 881 if self.pretty_re.match(h):
882 882 fp.write(h)
883 883 if self.binary():
884 884 fp.write(_('this is a binary file\n'))
885 885 break
886 886 if h.startswith('---'):
887 887 fp.write(_('%d hunks, %d lines changed\n') %
888 888 (len(self.hunks),
889 889 sum([max(h.added, h.removed) for h in self.hunks])))
890 890 break
891 891 fp.write(h)
892 892
893 893 def write(self, fp):
894 894 fp.write(''.join(self.header))
895 895
896 896 def allhunks(self):
897 897 return any(self.allhunks_re.match(h) for h in self.header)
898 898
899 899 def files(self):
900 900 match = self.diffgit_re.match(self.header[0])
901 901 if match:
902 902 fromfile, tofile = match.groups()
903 903 if fromfile == tofile:
904 904 return [fromfile]
905 905 return [fromfile, tofile]
906 906 else:
907 907 return self.diff_re.match(self.header[0]).groups()
908 908
909 909 def filename(self):
910 910 return self.files()[-1]
911 911
912 912 def __repr__(self):
913 913 return '<header %s>' % (' '.join(map(repr, self.files())))
914 914
915 915 def isnewfile(self):
916 916 return any(self.newfile_re.match(h) for h in self.header)
917 917
918 918 def special(self):
919 919 # Special files are shown only at the header level and not at the hunk
920 920 # level for example a file that has been deleted is a special file.
921 921 # The user cannot change the content of the operation, in the case of
922 922 # the deleted file he has to take the deletion or not take it, he
923 923 # cannot take some of it.
924 924 # Newly added files are special if they are empty, they are not special
925 925 # if they have some content as we want to be able to change it
926 926 nocontent = len(self.header) == 2
927 927 emptynewfile = self.isnewfile() and nocontent
928 928 return emptynewfile or \
929 929 any(self.special_re.match(h) for h in self.header)
930 930
931 931 class recordhunk(object):
932 932 """patch hunk
933 933
934 934 XXX shouldn't we merge this with the other hunk class?
935 935 """
936 936
937 937 def __init__(self, header, fromline, toline, proc, before, hunk, after,
938 938 maxcontext=None):
939 939 def trimcontext(lines, reverse=False):
940 940 if maxcontext is not None:
941 941 delta = len(lines) - maxcontext
942 942 if delta > 0:
943 943 if reverse:
944 944 return delta, lines[delta:]
945 945 else:
946 946 return delta, lines[:maxcontext]
947 947 return 0, lines
948 948
949 949 self.header = header
950 950 trimedbefore, self.before = trimcontext(before, True)
951 951 self.fromline = fromline + trimedbefore
952 952 self.toline = toline + trimedbefore
953 953 _trimedafter, self.after = trimcontext(after, False)
954 954 self.proc = proc
955 955 self.hunk = hunk
956 956 self.added, self.removed = self.countchanges(self.hunk)
957 957
958 958 def __eq__(self, v):
959 959 if not isinstance(v, recordhunk):
960 960 return False
961 961
962 962 return ((v.hunk == self.hunk) and
963 963 (v.proc == self.proc) and
964 964 (self.fromline == v.fromline) and
965 965 (self.header.files() == v.header.files()))
966 966
967 967 def __hash__(self):
968 968 return hash((tuple(self.hunk),
969 969 tuple(self.header.files()),
970 970 self.fromline,
971 971 self.proc))
972 972
973 973 def countchanges(self, hunk):
974 974 """hunk -> (n+,n-)"""
975 975 add = len([h for h in hunk if h.startswith('+')])
976 976 rem = len([h for h in hunk if h.startswith('-')])
977 977 return add, rem
978 978
979 979 def reversehunk(self):
980 980 """return another recordhunk which is the reverse of the hunk
981 981
982 982 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
983 983 that, swap fromline/toline and +/- signs while keep other things
984 984 unchanged.
985 985 """
986 986 m = {'+': '-', '-': '+', '\\': '\\'}
987 987 hunk = ['%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
988 988 return recordhunk(self.header, self.toline, self.fromline, self.proc,
989 989 self.before, hunk, self.after)
990 990
991 991 def write(self, fp):
992 992 delta = len(self.before) + len(self.after)
993 993 if self.after and self.after[-1] == '\\ No newline at end of file\n':
994 994 delta -= 1
995 995 fromlen = delta + self.removed
996 996 tolen = delta + self.added
997 997 fp.write('@@ -%d,%d +%d,%d @@%s\n' %
998 998 (self.fromline, fromlen, self.toline, tolen,
999 999 self.proc and (' ' + self.proc)))
1000 1000 fp.write(''.join(self.before + self.hunk + self.after))
1001 1001
1002 1002 pretty = write
1003 1003
1004 1004 def filename(self):
1005 1005 return self.header.filename()
1006 1006
1007 1007 def __repr__(self):
1008 1008 return '<hunk %r@%d>' % (self.filename(), self.fromline)
1009 1009
1010 1010 def getmessages():
1011 1011 return {
1012 1012 'multiple': {
1013 1013 'apply': _("apply change %d/%d to '%s'?"),
1014 1014 'discard': _("discard change %d/%d to '%s'?"),
1015 1015 'record': _("record change %d/%d to '%s'?"),
1016 1016 },
1017 1017 'single': {
1018 1018 'apply': _("apply this change to '%s'?"),
1019 1019 'discard': _("discard this change to '%s'?"),
1020 1020 'record': _("record this change to '%s'?"),
1021 1021 },
1022 1022 'help': {
1023 1023 'apply': _('[Ynesfdaq?]'
1024 1024 '$$ &Yes, apply this change'
1025 1025 '$$ &No, skip this change'
1026 1026 '$$ &Edit this change manually'
1027 1027 '$$ &Skip remaining changes to this file'
1028 1028 '$$ Apply remaining changes to this &file'
1029 1029 '$$ &Done, skip remaining changes and files'
1030 1030 '$$ Apply &all changes to all remaining files'
1031 1031 '$$ &Quit, applying no changes'
1032 1032 '$$ &? (display help)'),
1033 1033 'discard': _('[Ynesfdaq?]'
1034 1034 '$$ &Yes, discard this change'
1035 1035 '$$ &No, skip this change'
1036 1036 '$$ &Edit this change manually'
1037 1037 '$$ &Skip remaining changes to this file'
1038 1038 '$$ Discard remaining changes to this &file'
1039 1039 '$$ &Done, skip remaining changes and files'
1040 1040 '$$ Discard &all changes to all remaining files'
1041 1041 '$$ &Quit, discarding no changes'
1042 1042 '$$ &? (display help)'),
1043 1043 'record': _('[Ynesfdaq?]'
1044 1044 '$$ &Yes, record this change'
1045 1045 '$$ &No, skip this change'
1046 1046 '$$ &Edit this change manually'
1047 1047 '$$ &Skip remaining changes to this file'
1048 1048 '$$ Record remaining changes to this &file'
1049 1049 '$$ &Done, skip remaining changes and files'
1050 1050 '$$ Record &all changes to all remaining files'
1051 1051 '$$ &Quit, recording no changes'
1052 1052 '$$ &? (display help)'),
1053 1053 }
1054 1054 }
1055 1055
1056 1056 def filterpatch(ui, headers, operation=None):
1057 1057 """Interactively filter patch chunks into applied-only chunks"""
1058 1058 messages = getmessages()
1059 1059
1060 1060 if operation is None:
1061 1061 operation = 'record'
1062 1062
1063 1063 def prompt(skipfile, skipall, query, chunk):
1064 1064 """prompt query, and process base inputs
1065 1065
1066 1066 - y/n for the rest of file
1067 1067 - y/n for the rest
1068 1068 - ? (help)
1069 1069 - q (quit)
1070 1070
1071 1071 Return True/False and possibly updated skipfile and skipall.
1072 1072 """
1073 1073 newpatches = None
1074 1074 if skipall is not None:
1075 1075 return skipall, skipfile, skipall, newpatches
1076 1076 if skipfile is not None:
1077 1077 return skipfile, skipfile, skipall, newpatches
1078 1078 while True:
1079 1079 resps = messages['help'][operation]
1080 1080 r = ui.promptchoice("%s %s" % (query, resps))
1081 1081 ui.write("\n")
1082 1082 if r == 8: # ?
1083 1083 for c, t in ui.extractchoices(resps)[1]:
1084 1084 ui.write('%s - %s\n' % (c, encoding.lower(t)))
1085 1085 continue
1086 1086 elif r == 0: # yes
1087 1087 ret = True
1088 1088 elif r == 1: # no
1089 1089 ret = False
1090 1090 elif r == 2: # Edit patch
1091 1091 if chunk is None:
1092 1092 ui.write(_('cannot edit patch for whole file'))
1093 1093 ui.write("\n")
1094 1094 continue
1095 1095 if chunk.header.binary():
1096 1096 ui.write(_('cannot edit patch for binary file'))
1097 1097 ui.write("\n")
1098 1098 continue
1099 1099 # Patch comment based on the Git one (based on comment at end of
1100 1100 # https://mercurial-scm.org/wiki/RecordExtension)
1101 1101 phelp = '---' + _("""
1102 1102 To remove '-' lines, make them ' ' lines (context).
1103 1103 To remove '+' lines, delete them.
1104 1104 Lines starting with # will be removed from the patch.
1105 1105
1106 1106 If the patch applies cleanly, the edited hunk will immediately be
1107 1107 added to the record list. If it does not apply cleanly, a rejects
1108 1108 file will be generated: you can use that when you try again. If
1109 1109 all lines of the hunk are removed, then the edit is aborted and
1110 1110 the hunk is left unchanged.
1111 1111 """)
1112 1112 (patchfd, patchfn) = tempfile.mkstemp(prefix="hg-editor-",
1113 1113 suffix=".diff")
1114 1114 ncpatchfp = None
1115 1115 try:
1116 1116 # Write the initial patch
1117 1117 f = util.nativeeolwriter(os.fdopen(patchfd, r'wb'))
1118 1118 chunk.header.write(f)
1119 1119 chunk.write(f)
1120 1120 f.write('\n'.join(['# ' + i for i in phelp.splitlines()]))
1121 1121 f.close()
1122 1122 # Start the editor and wait for it to complete
1123 1123 editor = ui.geteditor()
1124 1124 ret = ui.system("%s \"%s\"" % (editor, patchfn),
1125 1125 environ={'HGUSER': ui.username()},
1126 1126 blockedtag='filterpatch')
1127 1127 if ret != 0:
1128 1128 ui.warn(_("editor exited with exit code %d\n") % ret)
1129 1129 continue
1130 1130 # Remove comment lines
1131 1131 patchfp = open(patchfn, r'rb')
1132 1132 ncpatchfp = stringio()
1133 1133 for line in util.iterfile(patchfp):
1134 1134 line = util.fromnativeeol(line)
1135 1135 if not line.startswith('#'):
1136 1136 ncpatchfp.write(line)
1137 1137 patchfp.close()
1138 1138 ncpatchfp.seek(0)
1139 1139 newpatches = parsepatch(ncpatchfp)
1140 1140 finally:
1141 1141 os.unlink(patchfn)
1142 1142 del ncpatchfp
1143 1143 # Signal that the chunk shouldn't be applied as-is, but
1144 1144 # provide the new patch to be used instead.
1145 1145 ret = False
1146 1146 elif r == 3: # Skip
1147 1147 ret = skipfile = False
1148 1148 elif r == 4: # file (Record remaining)
1149 1149 ret = skipfile = True
1150 1150 elif r == 5: # done, skip remaining
1151 1151 ret = skipall = False
1152 1152 elif r == 6: # all
1153 1153 ret = skipall = True
1154 1154 elif r == 7: # quit
1155 1155 raise error.Abort(_('user quit'))
1156 1156 return ret, skipfile, skipall, newpatches
1157 1157
1158 1158 seen = set()
1159 1159 applied = {} # 'filename' -> [] of chunks
1160 1160 skipfile, skipall = None, None
1161 1161 pos, total = 1, sum(len(h.hunks) for h in headers)
1162 1162 for h in headers:
1163 1163 pos += len(h.hunks)
1164 1164 skipfile = None
1165 1165 fixoffset = 0
1166 1166 hdr = ''.join(h.header)
1167 1167 if hdr in seen:
1168 1168 continue
1169 1169 seen.add(hdr)
1170 1170 if skipall is None:
1171 1171 h.pretty(ui)
1172 1172 msg = (_('examine changes to %s?') %
1173 1173 _(' and ').join("'%s'" % f for f in h.files()))
1174 1174 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1175 1175 if not r:
1176 1176 continue
1177 1177 applied[h.filename()] = [h]
1178 1178 if h.allhunks():
1179 1179 applied[h.filename()] += h.hunks
1180 1180 continue
1181 1181 for i, chunk in enumerate(h.hunks):
1182 1182 if skipfile is None and skipall is None:
1183 1183 chunk.pretty(ui)
1184 1184 if total == 1:
1185 1185 msg = messages['single'][operation] % chunk.filename()
1186 1186 else:
1187 1187 idx = pos - len(h.hunks) + i
1188 1188 msg = messages['multiple'][operation] % (idx, total,
1189 1189 chunk.filename())
1190 1190 r, skipfile, skipall, newpatches = prompt(skipfile,
1191 1191 skipall, msg, chunk)
1192 1192 if r:
1193 1193 if fixoffset:
1194 1194 chunk = copy.copy(chunk)
1195 1195 chunk.toline += fixoffset
1196 1196 applied[chunk.filename()].append(chunk)
1197 1197 elif newpatches is not None:
1198 1198 for newpatch in newpatches:
1199 1199 for newhunk in newpatch.hunks:
1200 1200 if fixoffset:
1201 1201 newhunk.toline += fixoffset
1202 1202 applied[newhunk.filename()].append(newhunk)
1203 1203 else:
1204 1204 fixoffset += chunk.removed - chunk.added
1205 1205 return (sum([h for h in applied.itervalues()
1206 1206 if h[0].special() or len(h) > 1], []), {})
1207 1207 class hunk(object):
1208 1208 def __init__(self, desc, num, lr, context):
1209 1209 self.number = num
1210 1210 self.desc = desc
1211 1211 self.hunk = [desc]
1212 1212 self.a = []
1213 1213 self.b = []
1214 1214 self.starta = self.lena = None
1215 1215 self.startb = self.lenb = None
1216 1216 if lr is not None:
1217 1217 if context:
1218 1218 self.read_context_hunk(lr)
1219 1219 else:
1220 1220 self.read_unified_hunk(lr)
1221 1221
1222 1222 def getnormalized(self):
1223 1223 """Return a copy with line endings normalized to LF."""
1224 1224
1225 1225 def normalize(lines):
1226 1226 nlines = []
1227 1227 for line in lines:
1228 1228 if line.endswith('\r\n'):
1229 1229 line = line[:-2] + '\n'
1230 1230 nlines.append(line)
1231 1231 return nlines
1232 1232
1233 1233 # Dummy object, it is rebuilt manually
1234 1234 nh = hunk(self.desc, self.number, None, None)
1235 1235 nh.number = self.number
1236 1236 nh.desc = self.desc
1237 1237 nh.hunk = self.hunk
1238 1238 nh.a = normalize(self.a)
1239 1239 nh.b = normalize(self.b)
1240 1240 nh.starta = self.starta
1241 1241 nh.startb = self.startb
1242 1242 nh.lena = self.lena
1243 1243 nh.lenb = self.lenb
1244 1244 return nh
1245 1245
1246 1246 def read_unified_hunk(self, lr):
1247 1247 m = unidesc.match(self.desc)
1248 1248 if not m:
1249 1249 raise PatchError(_("bad hunk #%d") % self.number)
1250 1250 self.starta, self.lena, self.startb, self.lenb = m.groups()
1251 1251 if self.lena is None:
1252 1252 self.lena = 1
1253 1253 else:
1254 1254 self.lena = int(self.lena)
1255 1255 if self.lenb is None:
1256 1256 self.lenb = 1
1257 1257 else:
1258 1258 self.lenb = int(self.lenb)
1259 1259 self.starta = int(self.starta)
1260 1260 self.startb = int(self.startb)
1261 1261 try:
1262 1262 diffhelper.addlines(lr, self.hunk, self.lena, self.lenb,
1263 1263 self.a, self.b)
1264 1264 except error.ParseError as e:
1265 1265 raise PatchError(_("bad hunk #%d: %s") % (self.number, e))
1266 1266 # if we hit eof before finishing out the hunk, the last line will
1267 1267 # be zero length. Lets try to fix it up.
1268 1268 while len(self.hunk[-1]) == 0:
1269 1269 del self.hunk[-1]
1270 1270 del self.a[-1]
1271 1271 del self.b[-1]
1272 1272 self.lena -= 1
1273 1273 self.lenb -= 1
1274 1274 self._fixnewline(lr)
1275 1275
1276 1276 def read_context_hunk(self, lr):
1277 1277 self.desc = lr.readline()
1278 1278 m = contextdesc.match(self.desc)
1279 1279 if not m:
1280 1280 raise PatchError(_("bad hunk #%d") % self.number)
1281 1281 self.starta, aend = m.groups()
1282 1282 self.starta = int(self.starta)
1283 1283 if aend is None:
1284 1284 aend = self.starta
1285 1285 self.lena = int(aend) - self.starta
1286 1286 if self.starta:
1287 1287 self.lena += 1
1288 1288 for x in xrange(self.lena):
1289 1289 l = lr.readline()
1290 1290 if l.startswith('---'):
1291 1291 # lines addition, old block is empty
1292 1292 lr.push(l)
1293 1293 break
1294 1294 s = l[2:]
1295 1295 if l.startswith('- ') or l.startswith('! '):
1296 1296 u = '-' + s
1297 1297 elif l.startswith(' '):
1298 1298 u = ' ' + s
1299 1299 else:
1300 1300 raise PatchError(_("bad hunk #%d old text line %d") %
1301 1301 (self.number, x))
1302 1302 self.a.append(u)
1303 1303 self.hunk.append(u)
1304 1304
1305 1305 l = lr.readline()
1306 1306 if l.startswith('\ '):
1307 1307 s = self.a[-1][:-1]
1308 1308 self.a[-1] = s
1309 1309 self.hunk[-1] = s
1310 1310 l = lr.readline()
1311 1311 m = contextdesc.match(l)
1312 1312 if not m:
1313 1313 raise PatchError(_("bad hunk #%d") % self.number)
1314 1314 self.startb, bend = m.groups()
1315 1315 self.startb = int(self.startb)
1316 1316 if bend is None:
1317 1317 bend = self.startb
1318 1318 self.lenb = int(bend) - self.startb
1319 1319 if self.startb:
1320 1320 self.lenb += 1
1321 1321 hunki = 1
1322 1322 for x in xrange(self.lenb):
1323 1323 l = lr.readline()
1324 1324 if l.startswith('\ '):
1325 1325 # XXX: the only way to hit this is with an invalid line range.
1326 1326 # The no-eol marker is not counted in the line range, but I
1327 1327 # guess there are diff(1) out there which behave differently.
1328 1328 s = self.b[-1][:-1]
1329 1329 self.b[-1] = s
1330 1330 self.hunk[hunki - 1] = s
1331 1331 continue
1332 1332 if not l:
1333 1333 # line deletions, new block is empty and we hit EOF
1334 1334 lr.push(l)
1335 1335 break
1336 1336 s = l[2:]
1337 1337 if l.startswith('+ ') or l.startswith('! '):
1338 1338 u = '+' + s
1339 1339 elif l.startswith(' '):
1340 1340 u = ' ' + s
1341 1341 elif len(self.b) == 0:
1342 1342 # line deletions, new block is empty
1343 1343 lr.push(l)
1344 1344 break
1345 1345 else:
1346 1346 raise PatchError(_("bad hunk #%d old text line %d") %
1347 1347 (self.number, x))
1348 1348 self.b.append(s)
1349 1349 while True:
1350 1350 if hunki >= len(self.hunk):
1351 1351 h = ""
1352 1352 else:
1353 1353 h = self.hunk[hunki]
1354 1354 hunki += 1
1355 1355 if h == u:
1356 1356 break
1357 1357 elif h.startswith('-'):
1358 1358 continue
1359 1359 else:
1360 1360 self.hunk.insert(hunki - 1, u)
1361 1361 break
1362 1362
1363 1363 if not self.a:
1364 1364 # this happens when lines were only added to the hunk
1365 1365 for x in self.hunk:
1366 1366 if x.startswith('-') or x.startswith(' '):
1367 1367 self.a.append(x)
1368 1368 if not self.b:
1369 1369 # this happens when lines were only deleted from the hunk
1370 1370 for x in self.hunk:
1371 1371 if x.startswith('+') or x.startswith(' '):
1372 1372 self.b.append(x[1:])
1373 1373 # @@ -start,len +start,len @@
1374 1374 self.desc = "@@ -%d,%d +%d,%d @@\n" % (self.starta, self.lena,
1375 1375 self.startb, self.lenb)
1376 1376 self.hunk[0] = self.desc
1377 1377 self._fixnewline(lr)
1378 1378
1379 1379 def _fixnewline(self, lr):
1380 1380 l = lr.readline()
1381 1381 if l.startswith('\ '):
1382 1382 diffhelper.fixnewline(self.hunk, self.a, self.b)
1383 1383 else:
1384 1384 lr.push(l)
1385 1385
1386 1386 def complete(self):
1387 1387 return len(self.a) == self.lena and len(self.b) == self.lenb
1388 1388
1389 1389 def _fuzzit(self, old, new, fuzz, toponly):
1390 1390 # this removes context lines from the top and bottom of list 'l'. It
1391 1391 # checks the hunk to make sure only context lines are removed, and then
1392 1392 # returns a new shortened list of lines.
1393 1393 fuzz = min(fuzz, len(old))
1394 1394 if fuzz:
1395 1395 top = 0
1396 1396 bot = 0
1397 1397 hlen = len(self.hunk)
1398 1398 for x in xrange(hlen - 1):
1399 1399 # the hunk starts with the @@ line, so use x+1
1400 1400 if self.hunk[x + 1].startswith(' '):
1401 1401 top += 1
1402 1402 else:
1403 1403 break
1404 1404 if not toponly:
1405 1405 for x in xrange(hlen - 1):
1406 1406 if self.hunk[hlen - bot - 1].startswith(' '):
1407 1407 bot += 1
1408 1408 else:
1409 1409 break
1410 1410
1411 1411 bot = min(fuzz, bot)
1412 1412 top = min(fuzz, top)
1413 1413 return old[top:len(old) - bot], new[top:len(new) - bot], top
1414 1414 return old, new, 0
1415 1415
1416 1416 def fuzzit(self, fuzz, toponly):
1417 1417 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1418 1418 oldstart = self.starta + top
1419 1419 newstart = self.startb + top
1420 1420 # zero length hunk ranges already have their start decremented
1421 1421 if self.lena and oldstart > 0:
1422 1422 oldstart -= 1
1423 1423 if self.lenb and newstart > 0:
1424 1424 newstart -= 1
1425 1425 return old, oldstart, new, newstart
1426 1426
1427 1427 class binhunk(object):
1428 1428 'A binary patch file.'
1429 1429 def __init__(self, lr, fname):
1430 1430 self.text = None
1431 1431 self.delta = False
1432 1432 self.hunk = ['GIT binary patch\n']
1433 1433 self._fname = fname
1434 1434 self._read(lr)
1435 1435
1436 1436 def complete(self):
1437 1437 return self.text is not None
1438 1438
1439 1439 def new(self, lines):
1440 1440 if self.delta:
1441 1441 return [applybindelta(self.text, ''.join(lines))]
1442 1442 return [self.text]
1443 1443
1444 1444 def _read(self, lr):
1445 1445 def getline(lr, hunk):
1446 1446 l = lr.readline()
1447 1447 hunk.append(l)
1448 1448 return l.rstrip('\r\n')
1449 1449
1450 1450 size = 0
1451 1451 while True:
1452 1452 line = getline(lr, self.hunk)
1453 1453 if not line:
1454 1454 raise PatchError(_('could not extract "%s" binary data')
1455 1455 % self._fname)
1456 1456 if line.startswith('literal '):
1457 1457 size = int(line[8:].rstrip())
1458 1458 break
1459 1459 if line.startswith('delta '):
1460 1460 size = int(line[6:].rstrip())
1461 1461 self.delta = True
1462 1462 break
1463 1463 dec = []
1464 1464 line = getline(lr, self.hunk)
1465 1465 while len(line) > 1:
1466 1466 l = line[0:1]
1467 1467 if l <= 'Z' and l >= 'A':
1468 1468 l = ord(l) - ord('A') + 1
1469 1469 else:
1470 1470 l = ord(l) - ord('a') + 27
1471 1471 try:
1472 1472 dec.append(util.b85decode(line[1:])[:l])
1473 1473 except ValueError as e:
1474 1474 raise PatchError(_('could not decode "%s" binary patch: %s')
1475 1475 % (self._fname, stringutil.forcebytestr(e)))
1476 1476 line = getline(lr, self.hunk)
1477 1477 text = zlib.decompress(''.join(dec))
1478 1478 if len(text) != size:
1479 1479 raise PatchError(_('"%s" length is %d bytes, should be %d')
1480 1480 % (self._fname, len(text), size))
1481 1481 self.text = text
1482 1482
1483 1483 def parsefilename(str):
1484 1484 # --- filename \t|space stuff
1485 1485 s = str[4:].rstrip('\r\n')
1486 1486 i = s.find('\t')
1487 1487 if i < 0:
1488 1488 i = s.find(' ')
1489 1489 if i < 0:
1490 1490 return s
1491 1491 return s[:i]
1492 1492
1493 1493 def reversehunks(hunks):
1494 1494 '''reverse the signs in the hunks given as argument
1495 1495
1496 1496 This function operates on hunks coming out of patch.filterpatch, that is
1497 1497 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1498 1498
1499 1499 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1500 1500 ... --- a/folder1/g
1501 1501 ... +++ b/folder1/g
1502 1502 ... @@ -1,7 +1,7 @@
1503 1503 ... +firstline
1504 1504 ... c
1505 1505 ... 1
1506 1506 ... 2
1507 1507 ... + 3
1508 1508 ... -4
1509 1509 ... 5
1510 1510 ... d
1511 1511 ... +lastline"""
1512 1512 >>> hunks = parsepatch([rawpatch])
1513 1513 >>> hunkscomingfromfilterpatch = []
1514 1514 >>> for h in hunks:
1515 1515 ... hunkscomingfromfilterpatch.append(h)
1516 1516 ... hunkscomingfromfilterpatch.extend(h.hunks)
1517 1517
1518 1518 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1519 1519 >>> from . import util
1520 1520 >>> fp = util.stringio()
1521 1521 >>> for c in reversedhunks:
1522 1522 ... c.write(fp)
1523 1523 >>> fp.seek(0) or None
1524 1524 >>> reversedpatch = fp.read()
1525 1525 >>> print(pycompat.sysstr(reversedpatch))
1526 1526 diff --git a/folder1/g b/folder1/g
1527 1527 --- a/folder1/g
1528 1528 +++ b/folder1/g
1529 1529 @@ -1,4 +1,3 @@
1530 1530 -firstline
1531 1531 c
1532 1532 1
1533 1533 2
1534 1534 @@ -2,6 +1,6 @@
1535 1535 c
1536 1536 1
1537 1537 2
1538 1538 - 3
1539 1539 +4
1540 1540 5
1541 1541 d
1542 1542 @@ -6,3 +5,2 @@
1543 1543 5
1544 1544 d
1545 1545 -lastline
1546 1546
1547 1547 '''
1548 1548
1549 1549 newhunks = []
1550 1550 for c in hunks:
1551 1551 if util.safehasattr(c, 'reversehunk'):
1552 1552 c = c.reversehunk()
1553 1553 newhunks.append(c)
1554 1554 return newhunks
1555 1555
1556 1556 def parsepatch(originalchunks, maxcontext=None):
1557 1557 """patch -> [] of headers -> [] of hunks
1558 1558
1559 1559 If maxcontext is not None, trim context lines if necessary.
1560 1560
1561 1561 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1562 1562 ... --- a/folder1/g
1563 1563 ... +++ b/folder1/g
1564 1564 ... @@ -1,8 +1,10 @@
1565 1565 ... 1
1566 1566 ... 2
1567 1567 ... -3
1568 1568 ... 4
1569 1569 ... 5
1570 1570 ... 6
1571 1571 ... +6.1
1572 1572 ... +6.2
1573 1573 ... 7
1574 1574 ... 8
1575 1575 ... +9'''
1576 1576 >>> out = util.stringio()
1577 1577 >>> headers = parsepatch([rawpatch], maxcontext=1)
1578 1578 >>> for header in headers:
1579 1579 ... header.write(out)
1580 1580 ... for hunk in header.hunks:
1581 1581 ... hunk.write(out)
1582 1582 >>> print(pycompat.sysstr(out.getvalue()))
1583 1583 diff --git a/folder1/g b/folder1/g
1584 1584 --- a/folder1/g
1585 1585 +++ b/folder1/g
1586 1586 @@ -2,3 +2,2 @@
1587 1587 2
1588 1588 -3
1589 1589 4
1590 1590 @@ -6,2 +5,4 @@
1591 1591 6
1592 1592 +6.1
1593 1593 +6.2
1594 1594 7
1595 1595 @@ -8,1 +9,2 @@
1596 1596 8
1597 1597 +9
1598 1598 """
1599 1599 class parser(object):
1600 1600 """patch parsing state machine"""
1601 1601 def __init__(self):
1602 1602 self.fromline = 0
1603 1603 self.toline = 0
1604 1604 self.proc = ''
1605 1605 self.header = None
1606 1606 self.context = []
1607 1607 self.before = []
1608 1608 self.hunk = []
1609 1609 self.headers = []
1610 1610
1611 1611 def addrange(self, limits):
1612 1612 fromstart, fromend, tostart, toend, proc = limits
1613 1613 self.fromline = int(fromstart)
1614 1614 self.toline = int(tostart)
1615 1615 self.proc = proc
1616 1616
1617 1617 def addcontext(self, context):
1618 1618 if self.hunk:
1619 1619 h = recordhunk(self.header, self.fromline, self.toline,
1620 1620 self.proc, self.before, self.hunk, context, maxcontext)
1621 1621 self.header.hunks.append(h)
1622 1622 self.fromline += len(self.before) + h.removed
1623 1623 self.toline += len(self.before) + h.added
1624 1624 self.before = []
1625 1625 self.hunk = []
1626 1626 self.context = context
1627 1627
1628 1628 def addhunk(self, hunk):
1629 1629 if self.context:
1630 1630 self.before = self.context
1631 1631 self.context = []
1632 1632 self.hunk = hunk
1633 1633
1634 1634 def newfile(self, hdr):
1635 1635 self.addcontext([])
1636 1636 h = header(hdr)
1637 1637 self.headers.append(h)
1638 1638 self.header = h
1639 1639
1640 1640 def addother(self, line):
1641 1641 pass # 'other' lines are ignored
1642 1642
1643 1643 def finished(self):
1644 1644 self.addcontext([])
1645 1645 return self.headers
1646 1646
1647 1647 transitions = {
1648 1648 'file': {'context': addcontext,
1649 1649 'file': newfile,
1650 1650 'hunk': addhunk,
1651 1651 'range': addrange},
1652 1652 'context': {'file': newfile,
1653 1653 'hunk': addhunk,
1654 1654 'range': addrange,
1655 1655 'other': addother},
1656 1656 'hunk': {'context': addcontext,
1657 1657 'file': newfile,
1658 1658 'range': addrange},
1659 1659 'range': {'context': addcontext,
1660 1660 'hunk': addhunk},
1661 1661 'other': {'other': addother},
1662 1662 }
1663 1663
1664 1664 p = parser()
1665 1665 fp = stringio()
1666 1666 fp.write(''.join(originalchunks))
1667 1667 fp.seek(0)
1668 1668
1669 1669 state = 'context'
1670 1670 for newstate, data in scanpatch(fp):
1671 1671 try:
1672 1672 p.transitions[state][newstate](p, data)
1673 1673 except KeyError:
1674 1674 raise PatchError('unhandled transition: %s -> %s' %
1675 1675 (state, newstate))
1676 1676 state = newstate
1677 1677 del fp
1678 1678 return p.finished()
1679 1679
1680 1680 def pathtransform(path, strip, prefix):
1681 1681 '''turn a path from a patch into a path suitable for the repository
1682 1682
1683 1683 prefix, if not empty, is expected to be normalized with a / at the end.
1684 1684
1685 1685 Returns (stripped components, path in repository).
1686 1686
1687 1687 >>> pathtransform(b'a/b/c', 0, b'')
1688 1688 ('', 'a/b/c')
1689 1689 >>> pathtransform(b' a/b/c ', 0, b'')
1690 1690 ('', ' a/b/c')
1691 1691 >>> pathtransform(b' a/b/c ', 2, b'')
1692 1692 ('a/b/', 'c')
1693 1693 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1694 1694 ('', 'd/e/a/b/c')
1695 1695 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1696 1696 ('a//b/', 'd/e/c')
1697 1697 >>> pathtransform(b'a/b/c', 3, b'')
1698 1698 Traceback (most recent call last):
1699 1699 PatchError: unable to strip away 1 of 3 dirs from a/b/c
1700 1700 '''
1701 1701 pathlen = len(path)
1702 1702 i = 0
1703 1703 if strip == 0:
1704 1704 return '', prefix + path.rstrip()
1705 1705 count = strip
1706 1706 while count > 0:
1707 1707 i = path.find('/', i)
1708 1708 if i == -1:
1709 1709 raise PatchError(_("unable to strip away %d of %d dirs from %s") %
1710 1710 (count, strip, path))
1711 1711 i += 1
1712 1712 # consume '//' in the path
1713 1713 while i < pathlen - 1 and path[i:i + 1] == '/':
1714 1714 i += 1
1715 1715 count -= 1
1716 1716 return path[:i].lstrip(), prefix + path[i:].rstrip()
1717 1717
1718 1718 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1719 1719 nulla = afile_orig == "/dev/null"
1720 1720 nullb = bfile_orig == "/dev/null"
1721 1721 create = nulla and hunk.starta == 0 and hunk.lena == 0
1722 1722 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1723 1723 abase, afile = pathtransform(afile_orig, strip, prefix)
1724 1724 gooda = not nulla and backend.exists(afile)
1725 1725 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1726 1726 if afile == bfile:
1727 1727 goodb = gooda
1728 1728 else:
1729 1729 goodb = not nullb and backend.exists(bfile)
1730 1730 missing = not goodb and not gooda and not create
1731 1731
1732 1732 # some diff programs apparently produce patches where the afile is
1733 1733 # not /dev/null, but afile starts with bfile
1734 1734 abasedir = afile[:afile.rfind('/') + 1]
1735 1735 bbasedir = bfile[:bfile.rfind('/') + 1]
1736 1736 if (missing and abasedir == bbasedir and afile.startswith(bfile)
1737 1737 and hunk.starta == 0 and hunk.lena == 0):
1738 1738 create = True
1739 1739 missing = False
1740 1740
1741 1741 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1742 1742 # diff is between a file and its backup. In this case, the original
1743 1743 # file should be patched (see original mpatch code).
1744 1744 isbackup = (abase == bbase and bfile.startswith(afile))
1745 1745 fname = None
1746 1746 if not missing:
1747 1747 if gooda and goodb:
1748 1748 if isbackup:
1749 1749 fname = afile
1750 1750 else:
1751 1751 fname = bfile
1752 1752 elif gooda:
1753 1753 fname = afile
1754 1754
1755 1755 if not fname:
1756 1756 if not nullb:
1757 1757 if isbackup:
1758 1758 fname = afile
1759 1759 else:
1760 1760 fname = bfile
1761 1761 elif not nulla:
1762 1762 fname = afile
1763 1763 else:
1764 1764 raise PatchError(_("undefined source and destination files"))
1765 1765
1766 1766 gp = patchmeta(fname)
1767 1767 if create:
1768 1768 gp.op = 'ADD'
1769 1769 elif remove:
1770 1770 gp.op = 'DELETE'
1771 1771 return gp
1772 1772
1773 1773 def scanpatch(fp):
1774 1774 """like patch.iterhunks, but yield different events
1775 1775
1776 1776 - ('file', [header_lines + fromfile + tofile])
1777 1777 - ('context', [context_lines])
1778 1778 - ('hunk', [hunk_lines])
1779 1779 - ('range', (-start,len, +start,len, proc))
1780 1780 """
1781 1781 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1782 1782 lr = linereader(fp)
1783 1783
1784 1784 def scanwhile(first, p):
1785 1785 """scan lr while predicate holds"""
1786 1786 lines = [first]
1787 1787 for line in iter(lr.readline, ''):
1788 1788 if p(line):
1789 1789 lines.append(line)
1790 1790 else:
1791 1791 lr.push(line)
1792 1792 break
1793 1793 return lines
1794 1794
1795 1795 for line in iter(lr.readline, ''):
1796 1796 if line.startswith('diff --git a/') or line.startswith('diff -r '):
1797 1797 def notheader(line):
1798 1798 s = line.split(None, 1)
1799 1799 return not s or s[0] not in ('---', 'diff')
1800 1800 header = scanwhile(line, notheader)
1801 1801 fromfile = lr.readline()
1802 1802 if fromfile.startswith('---'):
1803 1803 tofile = lr.readline()
1804 1804 header += [fromfile, tofile]
1805 1805 else:
1806 1806 lr.push(fromfile)
1807 1807 yield 'file', header
1808 1808 elif line.startswith(' '):
1809 1809 cs = (' ', '\\')
1810 1810 yield 'context', scanwhile(line, lambda l: l.startswith(cs))
1811 1811 elif line.startswith(('-', '+')):
1812 1812 cs = ('-', '+', '\\')
1813 1813 yield 'hunk', scanwhile(line, lambda l: l.startswith(cs))
1814 1814 else:
1815 1815 m = lines_re.match(line)
1816 1816 if m:
1817 1817 yield 'range', m.groups()
1818 1818 else:
1819 1819 yield 'other', line
1820 1820
1821 1821 def scangitpatch(lr, firstline):
1822 1822 """
1823 1823 Git patches can emit:
1824 1824 - rename a to b
1825 1825 - change b
1826 1826 - copy a to c
1827 1827 - change c
1828 1828
1829 1829 We cannot apply this sequence as-is, the renamed 'a' could not be
1830 1830 found for it would have been renamed already. And we cannot copy
1831 1831 from 'b' instead because 'b' would have been changed already. So
1832 1832 we scan the git patch for copy and rename commands so we can
1833 1833 perform the copies ahead of time.
1834 1834 """
1835 1835 pos = 0
1836 1836 try:
1837 1837 pos = lr.fp.tell()
1838 1838 fp = lr.fp
1839 1839 except IOError:
1840 1840 fp = stringio(lr.fp.read())
1841 1841 gitlr = linereader(fp)
1842 1842 gitlr.push(firstline)
1843 1843 gitpatches = readgitpatch(gitlr)
1844 1844 fp.seek(pos)
1845 1845 return gitpatches
1846 1846
1847 1847 def iterhunks(fp):
1848 1848 """Read a patch and yield the following events:
1849 1849 - ("file", afile, bfile, firsthunk): select a new target file.
1850 1850 - ("hunk", hunk): a new hunk is ready to be applied, follows a
1851 1851 "file" event.
1852 1852 - ("git", gitchanges): current diff is in git format, gitchanges
1853 1853 maps filenames to gitpatch records. Unique event.
1854 1854 """
1855 1855 afile = ""
1856 1856 bfile = ""
1857 1857 state = None
1858 1858 hunknum = 0
1859 1859 emitfile = newfile = False
1860 1860 gitpatches = None
1861 1861
1862 1862 # our states
1863 1863 BFILE = 1
1864 1864 context = None
1865 1865 lr = linereader(fp)
1866 1866
1867 1867 for x in iter(lr.readline, ''):
1868 1868 if state == BFILE and (
1869 1869 (not context and x.startswith('@'))
1870 1870 or (context is not False and x.startswith('***************'))
1871 1871 or x.startswith('GIT binary patch')):
1872 1872 gp = None
1873 1873 if (gitpatches and
1874 1874 gitpatches[-1].ispatching(afile, bfile)):
1875 1875 gp = gitpatches.pop()
1876 1876 if x.startswith('GIT binary patch'):
1877 1877 h = binhunk(lr, gp.path)
1878 1878 else:
1879 1879 if context is None and x.startswith('***************'):
1880 1880 context = True
1881 1881 h = hunk(x, hunknum + 1, lr, context)
1882 1882 hunknum += 1
1883 1883 if emitfile:
1884 1884 emitfile = False
1885 1885 yield 'file', (afile, bfile, h, gp and gp.copy() or None)
1886 1886 yield 'hunk', h
1887 1887 elif x.startswith('diff --git a/'):
1888 1888 m = gitre.match(x.rstrip(' \r\n'))
1889 1889 if not m:
1890 1890 continue
1891 1891 if gitpatches is None:
1892 1892 # scan whole input for git metadata
1893 1893 gitpatches = scangitpatch(lr, x)
1894 1894 yield 'git', [g.copy() for g in gitpatches
1895 1895 if g.op in ('COPY', 'RENAME')]
1896 1896 gitpatches.reverse()
1897 1897 afile = 'a/' + m.group(1)
1898 1898 bfile = 'b/' + m.group(2)
1899 1899 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
1900 1900 gp = gitpatches.pop()
1901 1901 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1902 1902 if not gitpatches:
1903 1903 raise PatchError(_('failed to synchronize metadata for "%s"')
1904 1904 % afile[2:])
1905 1905 gp = gitpatches[-1]
1906 1906 newfile = True
1907 1907 elif x.startswith('---'):
1908 1908 # check for a unified diff
1909 1909 l2 = lr.readline()
1910 1910 if not l2.startswith('+++'):
1911 1911 lr.push(l2)
1912 1912 continue
1913 1913 newfile = True
1914 1914 context = False
1915 1915 afile = parsefilename(x)
1916 1916 bfile = parsefilename(l2)
1917 1917 elif x.startswith('***'):
1918 1918 # check for a context diff
1919 1919 l2 = lr.readline()
1920 1920 if not l2.startswith('---'):
1921 1921 lr.push(l2)
1922 1922 continue
1923 1923 l3 = lr.readline()
1924 1924 lr.push(l3)
1925 1925 if not l3.startswith("***************"):
1926 1926 lr.push(l2)
1927 1927 continue
1928 1928 newfile = True
1929 1929 context = True
1930 1930 afile = parsefilename(x)
1931 1931 bfile = parsefilename(l2)
1932 1932
1933 1933 if newfile:
1934 1934 newfile = False
1935 1935 emitfile = True
1936 1936 state = BFILE
1937 1937 hunknum = 0
1938 1938
1939 1939 while gitpatches:
1940 1940 gp = gitpatches.pop()
1941 1941 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1942 1942
1943 1943 def applybindelta(binchunk, data):
1944 1944 """Apply a binary delta hunk
1945 1945 The algorithm used is the algorithm from git's patch-delta.c
1946 1946 """
1947 1947 def deltahead(binchunk):
1948 1948 i = 0
1949 1949 for c in binchunk:
1950 1950 i += 1
1951 1951 if not (ord(c) & 0x80):
1952 1952 return i
1953 1953 return i
1954 1954 out = ""
1955 1955 s = deltahead(binchunk)
1956 1956 binchunk = binchunk[s:]
1957 1957 s = deltahead(binchunk)
1958 1958 binchunk = binchunk[s:]
1959 1959 i = 0
1960 1960 while i < len(binchunk):
1961 cmd = ord(binchunk[i])
1961 cmd = ord(binchunk[i:i + 1])
1962 1962 i += 1
1963 1963 if (cmd & 0x80):
1964 1964 offset = 0
1965 1965 size = 0
1966 1966 if (cmd & 0x01):
1967 offset = ord(binchunk[i])
1967 offset = ord(binchunk[i:i + 1])
1968 1968 i += 1
1969 1969 if (cmd & 0x02):
1970 offset |= ord(binchunk[i]) << 8
1970 offset |= ord(binchunk[i:i + 1]) << 8
1971 1971 i += 1
1972 1972 if (cmd & 0x04):
1973 offset |= ord(binchunk[i]) << 16
1973 offset |= ord(binchunk[i:i + 1]) << 16
1974 1974 i += 1
1975 1975 if (cmd & 0x08):
1976 offset |= ord(binchunk[i]) << 24
1976 offset |= ord(binchunk[i:i + 1]) << 24
1977 1977 i += 1
1978 1978 if (cmd & 0x10):
1979 size = ord(binchunk[i])
1979 size = ord(binchunk[i:i + 1])
1980 1980 i += 1
1981 1981 if (cmd & 0x20):
1982 size |= ord(binchunk[i]) << 8
1982 size |= ord(binchunk[i:i + 1]) << 8
1983 1983 i += 1
1984 1984 if (cmd & 0x40):
1985 size |= ord(binchunk[i]) << 16
1985 size |= ord(binchunk[i:i + 1]) << 16
1986 1986 i += 1
1987 1987 if size == 0:
1988 1988 size = 0x10000
1989 1989 offset_end = offset + size
1990 1990 out += data[offset:offset_end]
1991 1991 elif cmd != 0:
1992 1992 offset_end = i + cmd
1993 1993 out += binchunk[i:offset_end]
1994 1994 i += cmd
1995 1995 else:
1996 1996 raise PatchError(_('unexpected delta opcode 0'))
1997 1997 return out
1998 1998
1999 1999 def applydiff(ui, fp, backend, store, strip=1, prefix='', eolmode='strict'):
2000 2000 """Reads a patch from fp and tries to apply it.
2001 2001
2002 2002 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2003 2003 there was any fuzz.
2004 2004
2005 2005 If 'eolmode' is 'strict', the patch content and patched file are
2006 2006 read in binary mode. Otherwise, line endings are ignored when
2007 2007 patching then normalized according to 'eolmode'.
2008 2008 """
2009 2009 return _applydiff(ui, fp, patchfile, backend, store, strip=strip,
2010 2010 prefix=prefix, eolmode=eolmode)
2011 2011
2012 2012 def _canonprefix(repo, prefix):
2013 2013 if prefix:
2014 2014 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2015 2015 if prefix != '':
2016 2016 prefix += '/'
2017 2017 return prefix
2018 2018
2019 2019 def _applydiff(ui, fp, patcher, backend, store, strip=1, prefix='',
2020 2020 eolmode='strict'):
2021 2021 prefix = _canonprefix(backend.repo, prefix)
2022 2022 def pstrip(p):
2023 2023 return pathtransform(p, strip - 1, prefix)[1]
2024 2024
2025 2025 rejects = 0
2026 2026 err = 0
2027 2027 current_file = None
2028 2028
2029 2029 for state, values in iterhunks(fp):
2030 2030 if state == 'hunk':
2031 2031 if not current_file:
2032 2032 continue
2033 2033 ret = current_file.apply(values)
2034 2034 if ret > 0:
2035 2035 err = 1
2036 2036 elif state == 'file':
2037 2037 if current_file:
2038 2038 rejects += current_file.close()
2039 2039 current_file = None
2040 2040 afile, bfile, first_hunk, gp = values
2041 2041 if gp:
2042 2042 gp.path = pstrip(gp.path)
2043 2043 if gp.oldpath:
2044 2044 gp.oldpath = pstrip(gp.oldpath)
2045 2045 else:
2046 2046 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2047 2047 prefix)
2048 2048 if gp.op == 'RENAME':
2049 2049 backend.unlink(gp.oldpath)
2050 2050 if not first_hunk:
2051 2051 if gp.op == 'DELETE':
2052 2052 backend.unlink(gp.path)
2053 2053 continue
2054 2054 data, mode = None, None
2055 2055 if gp.op in ('RENAME', 'COPY'):
2056 2056 data, mode = store.getfile(gp.oldpath)[:2]
2057 2057 if data is None:
2058 2058 # This means that the old path does not exist
2059 2059 raise PatchError(_("source file '%s' does not exist")
2060 2060 % gp.oldpath)
2061 2061 if gp.mode:
2062 2062 mode = gp.mode
2063 2063 if gp.op == 'ADD':
2064 2064 # Added files without content have no hunk and
2065 2065 # must be created
2066 2066 data = ''
2067 2067 if data or mode:
2068 2068 if (gp.op in ('ADD', 'RENAME', 'COPY')
2069 2069 and backend.exists(gp.path)):
2070 2070 raise PatchError(_("cannot create %s: destination "
2071 2071 "already exists") % gp.path)
2072 2072 backend.setfile(gp.path, data, mode, gp.oldpath)
2073 2073 continue
2074 2074 try:
2075 2075 current_file = patcher(ui, gp, backend, store,
2076 2076 eolmode=eolmode)
2077 2077 except PatchError as inst:
2078 2078 ui.warn(str(inst) + '\n')
2079 2079 current_file = None
2080 2080 rejects += 1
2081 2081 continue
2082 2082 elif state == 'git':
2083 2083 for gp in values:
2084 2084 path = pstrip(gp.oldpath)
2085 2085 data, mode = backend.getfile(path)
2086 2086 if data is None:
2087 2087 # The error ignored here will trigger a getfile()
2088 2088 # error in a place more appropriate for error
2089 2089 # handling, and will not interrupt the patching
2090 2090 # process.
2091 2091 pass
2092 2092 else:
2093 2093 store.setfile(path, data, mode)
2094 2094 else:
2095 2095 raise error.Abort(_('unsupported parser state: %s') % state)
2096 2096
2097 2097 if current_file:
2098 2098 rejects += current_file.close()
2099 2099
2100 2100 if rejects:
2101 2101 return -1
2102 2102 return err
2103 2103
2104 2104 def _externalpatch(ui, repo, patcher, patchname, strip, files,
2105 2105 similarity):
2106 2106 """use <patcher> to apply <patchname> to the working directory.
2107 2107 returns whether patch was applied with fuzz factor."""
2108 2108
2109 2109 fuzz = False
2110 2110 args = []
2111 2111 cwd = repo.root
2112 2112 if cwd:
2113 2113 args.append('-d %s' % procutil.shellquote(cwd))
2114 2114 cmd = ('%s %s -p%d < %s'
2115 2115 % (patcher, ' '.join(args), strip, procutil.shellquote(patchname)))
2116 2116 fp = procutil.popen(cmd, 'rb')
2117 2117 try:
2118 2118 for line in util.iterfile(fp):
2119 2119 line = line.rstrip()
2120 2120 ui.note(line + '\n')
2121 2121 if line.startswith('patching file '):
2122 2122 pf = util.parsepatchoutput(line)
2123 2123 printed_file = False
2124 2124 files.add(pf)
2125 2125 elif line.find('with fuzz') >= 0:
2126 2126 fuzz = True
2127 2127 if not printed_file:
2128 2128 ui.warn(pf + '\n')
2129 2129 printed_file = True
2130 2130 ui.warn(line + '\n')
2131 2131 elif line.find('saving rejects to file') >= 0:
2132 2132 ui.warn(line + '\n')
2133 2133 elif line.find('FAILED') >= 0:
2134 2134 if not printed_file:
2135 2135 ui.warn(pf + '\n')
2136 2136 printed_file = True
2137 2137 ui.warn(line + '\n')
2138 2138 finally:
2139 2139 if files:
2140 2140 scmutil.marktouched(repo, files, similarity)
2141 2141 code = fp.close()
2142 2142 if code:
2143 2143 raise PatchError(_("patch command failed: %s") %
2144 2144 procutil.explainexit(code))
2145 2145 return fuzz
2146 2146
2147 2147 def patchbackend(ui, backend, patchobj, strip, prefix, files=None,
2148 2148 eolmode='strict'):
2149 2149 if files is None:
2150 2150 files = set()
2151 2151 if eolmode is None:
2152 2152 eolmode = ui.config('patch', 'eol')
2153 2153 if eolmode.lower() not in eolmodes:
2154 2154 raise error.Abort(_('unsupported line endings type: %s') % eolmode)
2155 2155 eolmode = eolmode.lower()
2156 2156
2157 2157 store = filestore()
2158 2158 try:
2159 2159 fp = open(patchobj, 'rb')
2160 2160 except TypeError:
2161 2161 fp = patchobj
2162 2162 try:
2163 2163 ret = applydiff(ui, fp, backend, store, strip=strip, prefix=prefix,
2164 2164 eolmode=eolmode)
2165 2165 finally:
2166 2166 if fp != patchobj:
2167 2167 fp.close()
2168 2168 files.update(backend.close())
2169 2169 store.close()
2170 2170 if ret < 0:
2171 2171 raise PatchError(_('patch failed to apply'))
2172 2172 return ret > 0
2173 2173
2174 2174 def internalpatch(ui, repo, patchobj, strip, prefix='', files=None,
2175 2175 eolmode='strict', similarity=0):
2176 2176 """use builtin patch to apply <patchobj> to the working directory.
2177 2177 returns whether patch was applied with fuzz factor."""
2178 2178 backend = workingbackend(ui, repo, similarity)
2179 2179 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2180 2180
2181 2181 def patchrepo(ui, repo, ctx, store, patchobj, strip, prefix, files=None,
2182 2182 eolmode='strict'):
2183 2183 backend = repobackend(ui, repo, ctx, store)
2184 2184 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2185 2185
2186 2186 def patch(ui, repo, patchname, strip=1, prefix='', files=None, eolmode='strict',
2187 2187 similarity=0):
2188 2188 """Apply <patchname> to the working directory.
2189 2189
2190 2190 'eolmode' specifies how end of lines should be handled. It can be:
2191 2191 - 'strict': inputs are read in binary mode, EOLs are preserved
2192 2192 - 'crlf': EOLs are ignored when patching and reset to CRLF
2193 2193 - 'lf': EOLs are ignored when patching and reset to LF
2194 2194 - None: get it from user settings, default to 'strict'
2195 2195 'eolmode' is ignored when using an external patcher program.
2196 2196
2197 2197 Returns whether patch was applied with fuzz factor.
2198 2198 """
2199 2199 patcher = ui.config('ui', 'patch')
2200 2200 if files is None:
2201 2201 files = set()
2202 2202 if patcher:
2203 2203 return _externalpatch(ui, repo, patcher, patchname, strip,
2204 2204 files, similarity)
2205 2205 return internalpatch(ui, repo, patchname, strip, prefix, files, eolmode,
2206 2206 similarity)
2207 2207
2208 2208 def changedfiles(ui, repo, patchpath, strip=1, prefix=''):
2209 2209 backend = fsbackend(ui, repo.root)
2210 2210 prefix = _canonprefix(repo, prefix)
2211 2211 with open(patchpath, 'rb') as fp:
2212 2212 changed = set()
2213 2213 for state, values in iterhunks(fp):
2214 2214 if state == 'file':
2215 2215 afile, bfile, first_hunk, gp = values
2216 2216 if gp:
2217 2217 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2218 2218 if gp.oldpath:
2219 2219 gp.oldpath = pathtransform(gp.oldpath, strip - 1,
2220 2220 prefix)[1]
2221 2221 else:
2222 2222 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2223 2223 prefix)
2224 2224 changed.add(gp.path)
2225 2225 if gp.op == 'RENAME':
2226 2226 changed.add(gp.oldpath)
2227 2227 elif state not in ('hunk', 'git'):
2228 2228 raise error.Abort(_('unsupported parser state: %s') % state)
2229 2229 return changed
2230 2230
2231 2231 class GitDiffRequired(Exception):
2232 2232 pass
2233 2233
2234 2234 def diffallopts(ui, opts=None, untrusted=False, section='diff'):
2235 2235 '''return diffopts with all features supported and parsed'''
2236 2236 return difffeatureopts(ui, opts=opts, untrusted=untrusted, section=section,
2237 2237 git=True, whitespace=True, formatchanging=True)
2238 2238
2239 2239 diffopts = diffallopts
2240 2240
2241 2241 def difffeatureopts(ui, opts=None, untrusted=False, section='diff', git=False,
2242 2242 whitespace=False, formatchanging=False):
2243 2243 '''return diffopts with only opted-in features parsed
2244 2244
2245 2245 Features:
2246 2246 - git: git-style diffs
2247 2247 - whitespace: whitespace options like ignoreblanklines and ignorews
2248 2248 - formatchanging: options that will likely break or cause correctness issues
2249 2249 with most diff parsers
2250 2250 '''
2251 2251 def get(key, name=None, getter=ui.configbool, forceplain=None):
2252 2252 if opts:
2253 2253 v = opts.get(key)
2254 2254 # diffopts flags are either None-default (which is passed
2255 2255 # through unchanged, so we can identify unset values), or
2256 2256 # some other falsey default (eg --unified, which defaults
2257 2257 # to an empty string). We only want to override the config
2258 2258 # entries from hgrc with command line values if they
2259 2259 # appear to have been set, which is any truthy value,
2260 2260 # True, or False.
2261 2261 if v or isinstance(v, bool):
2262 2262 return v
2263 2263 if forceplain is not None and ui.plain():
2264 2264 return forceplain
2265 2265 return getter(section, name or key, untrusted=untrusted)
2266 2266
2267 2267 # core options, expected to be understood by every diff parser
2268 2268 buildopts = {
2269 2269 'nodates': get('nodates'),
2270 2270 'showfunc': get('show_function', 'showfunc'),
2271 2271 'context': get('unified', getter=ui.config),
2272 2272 }
2273 2273 buildopts['worddiff'] = ui.configbool('experimental', 'worddiff')
2274 2274 buildopts['xdiff'] = ui.configbool('experimental', 'xdiff')
2275 2275
2276 2276 if git:
2277 2277 buildopts['git'] = get('git')
2278 2278
2279 2279 # since this is in the experimental section, we need to call
2280 2280 # ui.configbool directory
2281 2281 buildopts['showsimilarity'] = ui.configbool('experimental',
2282 2282 'extendedheader.similarity')
2283 2283
2284 2284 # need to inspect the ui object instead of using get() since we want to
2285 2285 # test for an int
2286 2286 hconf = ui.config('experimental', 'extendedheader.index')
2287 2287 if hconf is not None:
2288 2288 hlen = None
2289 2289 try:
2290 2290 # the hash config could be an integer (for length of hash) or a
2291 2291 # word (e.g. short, full, none)
2292 2292 hlen = int(hconf)
2293 2293 if hlen < 0 or hlen > 40:
2294 2294 msg = _("invalid length for extendedheader.index: '%d'\n")
2295 2295 ui.warn(msg % hlen)
2296 2296 except ValueError:
2297 2297 # default value
2298 2298 if hconf == 'short' or hconf == '':
2299 2299 hlen = 12
2300 2300 elif hconf == 'full':
2301 2301 hlen = 40
2302 2302 elif hconf != 'none':
2303 2303 msg = _("invalid value for extendedheader.index: '%s'\n")
2304 2304 ui.warn(msg % hconf)
2305 2305 finally:
2306 2306 buildopts['index'] = hlen
2307 2307
2308 2308 if whitespace:
2309 2309 buildopts['ignorews'] = get('ignore_all_space', 'ignorews')
2310 2310 buildopts['ignorewsamount'] = get('ignore_space_change',
2311 2311 'ignorewsamount')
2312 2312 buildopts['ignoreblanklines'] = get('ignore_blank_lines',
2313 2313 'ignoreblanklines')
2314 2314 buildopts['ignorewseol'] = get('ignore_space_at_eol', 'ignorewseol')
2315 2315 if formatchanging:
2316 2316 buildopts['text'] = opts and opts.get('text')
2317 2317 binary = None if opts is None else opts.get('binary')
2318 2318 buildopts['nobinary'] = (not binary if binary is not None
2319 2319 else get('nobinary', forceplain=False))
2320 2320 buildopts['noprefix'] = get('noprefix', forceplain=False)
2321 2321
2322 2322 return mdiff.diffopts(**pycompat.strkwargs(buildopts))
2323 2323
2324 2324 def diff(repo, node1=None, node2=None, match=None, changes=None,
2325 2325 opts=None, losedatafn=None, prefix='', relroot='', copy=None,
2326 2326 hunksfilterfn=None):
2327 2327 '''yields diff of changes to files between two nodes, or node and
2328 2328 working directory.
2329 2329
2330 2330 if node1 is None, use first dirstate parent instead.
2331 2331 if node2 is None, compare node1 with working directory.
2332 2332
2333 2333 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2334 2334 every time some change cannot be represented with the current
2335 2335 patch format. Return False to upgrade to git patch format, True to
2336 2336 accept the loss or raise an exception to abort the diff. It is
2337 2337 called with the name of current file being diffed as 'fn'. If set
2338 2338 to None, patches will always be upgraded to git format when
2339 2339 necessary.
2340 2340
2341 2341 prefix is a filename prefix that is prepended to all filenames on
2342 2342 display (used for subrepos).
2343 2343
2344 2344 relroot, if not empty, must be normalized with a trailing /. Any match
2345 2345 patterns that fall outside it will be ignored.
2346 2346
2347 2347 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2348 2348 information.
2349 2349
2350 2350 hunksfilterfn, if not None, should be a function taking a filectx and
2351 2351 hunks generator that may yield filtered hunks.
2352 2352 '''
2353 2353 for fctx1, fctx2, hdr, hunks in diffhunks(
2354 2354 repo, node1=node1, node2=node2,
2355 2355 match=match, changes=changes, opts=opts,
2356 2356 losedatafn=losedatafn, prefix=prefix, relroot=relroot, copy=copy,
2357 2357 ):
2358 2358 if hunksfilterfn is not None:
2359 2359 # If the file has been removed, fctx2 is None; but this should
2360 2360 # not occur here since we catch removed files early in
2361 2361 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2362 2362 assert fctx2 is not None, \
2363 2363 'fctx2 unexpectly None in diff hunks filtering'
2364 2364 hunks = hunksfilterfn(fctx2, hunks)
2365 2365 text = ''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2366 2366 if hdr and (text or len(hdr) > 1):
2367 2367 yield '\n'.join(hdr) + '\n'
2368 2368 if text:
2369 2369 yield text
2370 2370
2371 2371 def diffhunks(repo, node1=None, node2=None, match=None, changes=None,
2372 2372 opts=None, losedatafn=None, prefix='', relroot='', copy=None):
2373 2373 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2374 2374 where `header` is a list of diff headers and `hunks` is an iterable of
2375 2375 (`hunkrange`, `hunklines`) tuples.
2376 2376
2377 2377 See diff() for the meaning of parameters.
2378 2378 """
2379 2379
2380 2380 if opts is None:
2381 2381 opts = mdiff.defaultopts
2382 2382
2383 2383 if not node1 and not node2:
2384 2384 node1 = repo.dirstate.p1()
2385 2385
2386 2386 def lrugetfilectx():
2387 2387 cache = {}
2388 2388 order = collections.deque()
2389 2389 def getfilectx(f, ctx):
2390 2390 fctx = ctx.filectx(f, filelog=cache.get(f))
2391 2391 if f not in cache:
2392 2392 if len(cache) > 20:
2393 2393 del cache[order.popleft()]
2394 2394 cache[f] = fctx.filelog()
2395 2395 else:
2396 2396 order.remove(f)
2397 2397 order.append(f)
2398 2398 return fctx
2399 2399 return getfilectx
2400 2400 getfilectx = lrugetfilectx()
2401 2401
2402 2402 ctx1 = repo[node1]
2403 2403 ctx2 = repo[node2]
2404 2404
2405 2405 relfiltered = False
2406 2406 if relroot != '' and match.always():
2407 2407 # as a special case, create a new matcher with just the relroot
2408 2408 pats = [relroot]
2409 2409 match = scmutil.match(ctx2, pats, default='path')
2410 2410 relfiltered = True
2411 2411
2412 2412 if not changes:
2413 2413 changes = repo.status(ctx1, ctx2, match=match)
2414 2414 modified, added, removed = changes[:3]
2415 2415
2416 2416 if not modified and not added and not removed:
2417 2417 return []
2418 2418
2419 2419 if repo.ui.debugflag:
2420 2420 hexfunc = hex
2421 2421 else:
2422 2422 hexfunc = short
2423 2423 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2424 2424
2425 2425 if copy is None:
2426 2426 copy = {}
2427 2427 if opts.git or opts.upgrade:
2428 2428 copy = copies.pathcopies(ctx1, ctx2, match=match)
2429 2429
2430 2430 if relroot is not None:
2431 2431 if not relfiltered:
2432 2432 # XXX this would ideally be done in the matcher, but that is
2433 2433 # generally meant to 'or' patterns, not 'and' them. In this case we
2434 2434 # need to 'and' all the patterns from the matcher with relroot.
2435 2435 def filterrel(l):
2436 2436 return [f for f in l if f.startswith(relroot)]
2437 2437 modified = filterrel(modified)
2438 2438 added = filterrel(added)
2439 2439 removed = filterrel(removed)
2440 2440 relfiltered = True
2441 2441 # filter out copies where either side isn't inside the relative root
2442 2442 copy = dict(((dst, src) for (dst, src) in copy.iteritems()
2443 2443 if dst.startswith(relroot)
2444 2444 and src.startswith(relroot)))
2445 2445
2446 2446 modifiedset = set(modified)
2447 2447 addedset = set(added)
2448 2448 removedset = set(removed)
2449 2449 for f in modified:
2450 2450 if f not in ctx1:
2451 2451 # Fix up added, since merged-in additions appear as
2452 2452 # modifications during merges
2453 2453 modifiedset.remove(f)
2454 2454 addedset.add(f)
2455 2455 for f in removed:
2456 2456 if f not in ctx1:
2457 2457 # Merged-in additions that are then removed are reported as removed.
2458 2458 # They are not in ctx1, so We don't want to show them in the diff.
2459 2459 removedset.remove(f)
2460 2460 modified = sorted(modifiedset)
2461 2461 added = sorted(addedset)
2462 2462 removed = sorted(removedset)
2463 2463 for dst, src in list(copy.items()):
2464 2464 if src not in ctx1:
2465 2465 # Files merged in during a merge and then copied/renamed are
2466 2466 # reported as copies. We want to show them in the diff as additions.
2467 2467 del copy[dst]
2468 2468
2469 2469 prefetchmatch = scmutil.matchfiles(
2470 2470 repo, list(modifiedset | addedset | removedset))
2471 2471 scmutil.prefetchfiles(repo, [ctx1.rev(), ctx2.rev()], prefetchmatch)
2472 2472
2473 2473 def difffn(opts, losedata):
2474 2474 return trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2475 2475 copy, getfilectx, opts, losedata, prefix, relroot)
2476 2476 if opts.upgrade and not opts.git:
2477 2477 try:
2478 2478 def losedata(fn):
2479 2479 if not losedatafn or not losedatafn(fn=fn):
2480 2480 raise GitDiffRequired
2481 2481 # Buffer the whole output until we are sure it can be generated
2482 2482 return list(difffn(opts.copy(git=False), losedata))
2483 2483 except GitDiffRequired:
2484 2484 return difffn(opts.copy(git=True), None)
2485 2485 else:
2486 2486 return difffn(opts, None)
2487 2487
2488 2488 def diffsinglehunk(hunklines):
2489 2489 """yield tokens for a list of lines in a single hunk"""
2490 2490 for line in hunklines:
2491 2491 # chomp
2492 2492 chompline = line.rstrip('\n')
2493 2493 # highlight tabs and trailing whitespace
2494 2494 stripline = chompline.rstrip()
2495 2495 if line.startswith('-'):
2496 2496 label = 'diff.deleted'
2497 2497 elif line.startswith('+'):
2498 2498 label = 'diff.inserted'
2499 2499 else:
2500 2500 raise error.ProgrammingError('unexpected hunk line: %s' % line)
2501 2501 for token in tabsplitter.findall(stripline):
2502 2502 if token.startswith('\t'):
2503 2503 yield (token, 'diff.tab')
2504 2504 else:
2505 2505 yield (token, label)
2506 2506
2507 2507 if chompline != stripline:
2508 2508 yield (chompline[len(stripline):], 'diff.trailingwhitespace')
2509 2509 if chompline != line:
2510 2510 yield (line[len(chompline):], '')
2511 2511
2512 2512 def diffsinglehunkinline(hunklines):
2513 2513 """yield tokens for a list of lines in a single hunk, with inline colors"""
2514 2514 # prepare deleted, and inserted content
2515 2515 a = ''
2516 2516 b = ''
2517 2517 for line in hunklines:
2518 2518 if line[0] == '-':
2519 2519 a += line[1:]
2520 2520 elif line[0] == '+':
2521 2521 b += line[1:]
2522 2522 else:
2523 2523 raise error.ProgrammingError('unexpected hunk line: %s' % line)
2524 2524 # fast path: if either side is empty, use diffsinglehunk
2525 2525 if not a or not b:
2526 2526 for t in diffsinglehunk(hunklines):
2527 2527 yield t
2528 2528 return
2529 2529 # re-split the content into words
2530 2530 al = wordsplitter.findall(a)
2531 2531 bl = wordsplitter.findall(b)
2532 2532 # re-arrange the words to lines since the diff algorithm is line-based
2533 2533 aln = [s if s == '\n' else s + '\n' for s in al]
2534 2534 bln = [s if s == '\n' else s + '\n' for s in bl]
2535 2535 an = ''.join(aln)
2536 2536 bn = ''.join(bln)
2537 2537 # run the diff algorithm, prepare atokens and btokens
2538 2538 atokens = []
2539 2539 btokens = []
2540 2540 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2541 2541 for (a1, a2, b1, b2), btype in blocks:
2542 2542 changed = btype == '!'
2543 2543 for token in mdiff.splitnewlines(''.join(al[a1:a2])):
2544 2544 atokens.append((changed, token))
2545 2545 for token in mdiff.splitnewlines(''.join(bl[b1:b2])):
2546 2546 btokens.append((changed, token))
2547 2547
2548 2548 # yield deleted tokens, then inserted ones
2549 2549 for prefix, label, tokens in [('-', 'diff.deleted', atokens),
2550 2550 ('+', 'diff.inserted', btokens)]:
2551 2551 nextisnewline = True
2552 2552 for changed, token in tokens:
2553 2553 if nextisnewline:
2554 2554 yield (prefix, label)
2555 2555 nextisnewline = False
2556 2556 # special handling line end
2557 2557 isendofline = token.endswith('\n')
2558 2558 if isendofline:
2559 2559 chomp = token[:-1] # chomp
2560 2560 token = chomp.rstrip() # detect spaces at the end
2561 2561 endspaces = chomp[len(token):]
2562 2562 # scan tabs
2563 2563 for maybetab in tabsplitter.findall(token):
2564 2564 if '\t' == maybetab[0]:
2565 2565 currentlabel = 'diff.tab'
2566 2566 else:
2567 2567 if changed:
2568 2568 currentlabel = label + '.changed'
2569 2569 else:
2570 2570 currentlabel = label + '.unchanged'
2571 2571 yield (maybetab, currentlabel)
2572 2572 if isendofline:
2573 2573 if endspaces:
2574 2574 yield (endspaces, 'diff.trailingwhitespace')
2575 2575 yield ('\n', '')
2576 2576 nextisnewline = True
2577 2577
2578 2578 def difflabel(func, *args, **kw):
2579 2579 '''yields 2-tuples of (output, label) based on the output of func()'''
2580 2580 if kw.get(r'opts') and kw[r'opts'].worddiff:
2581 2581 dodiffhunk = diffsinglehunkinline
2582 2582 else:
2583 2583 dodiffhunk = diffsinglehunk
2584 2584 headprefixes = [('diff', 'diff.diffline'),
2585 2585 ('copy', 'diff.extended'),
2586 2586 ('rename', 'diff.extended'),
2587 2587 ('old', 'diff.extended'),
2588 2588 ('new', 'diff.extended'),
2589 2589 ('deleted', 'diff.extended'),
2590 2590 ('index', 'diff.extended'),
2591 2591 ('similarity', 'diff.extended'),
2592 2592 ('---', 'diff.file_a'),
2593 2593 ('+++', 'diff.file_b')]
2594 2594 textprefixes = [('@', 'diff.hunk'),
2595 2595 # - and + are handled by diffsinglehunk
2596 2596 ]
2597 2597 head = False
2598 2598
2599 2599 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2600 2600 hunkbuffer = []
2601 2601 def consumehunkbuffer():
2602 2602 if hunkbuffer:
2603 2603 for token in dodiffhunk(hunkbuffer):
2604 2604 yield token
2605 2605 hunkbuffer[:] = []
2606 2606
2607 2607 for chunk in func(*args, **kw):
2608 2608 lines = chunk.split('\n')
2609 2609 linecount = len(lines)
2610 2610 for i, line in enumerate(lines):
2611 2611 if head:
2612 2612 if line.startswith('@'):
2613 2613 head = False
2614 2614 else:
2615 2615 if line and not line.startswith((' ', '+', '-', '@', '\\')):
2616 2616 head = True
2617 2617 diffline = False
2618 2618 if not head and line and line.startswith(('+', '-')):
2619 2619 diffline = True
2620 2620
2621 2621 prefixes = textprefixes
2622 2622 if head:
2623 2623 prefixes = headprefixes
2624 2624 if diffline:
2625 2625 # buffered
2626 2626 bufferedline = line
2627 2627 if i + 1 < linecount:
2628 2628 bufferedline += "\n"
2629 2629 hunkbuffer.append(bufferedline)
2630 2630 else:
2631 2631 # unbuffered
2632 2632 for token in consumehunkbuffer():
2633 2633 yield token
2634 2634 stripline = line.rstrip()
2635 2635 for prefix, label in prefixes:
2636 2636 if stripline.startswith(prefix):
2637 2637 yield (stripline, label)
2638 2638 if line != stripline:
2639 2639 yield (line[len(stripline):],
2640 2640 'diff.trailingwhitespace')
2641 2641 break
2642 2642 else:
2643 2643 yield (line, '')
2644 2644 if i + 1 < linecount:
2645 2645 yield ('\n', '')
2646 2646 for token in consumehunkbuffer():
2647 2647 yield token
2648 2648
2649 2649 def diffui(*args, **kw):
2650 2650 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2651 2651 return difflabel(diff, *args, **kw)
2652 2652
2653 2653 def _filepairs(modified, added, removed, copy, opts):
2654 2654 '''generates tuples (f1, f2, copyop), where f1 is the name of the file
2655 2655 before and f2 is the the name after. For added files, f1 will be None,
2656 2656 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2657 2657 or 'rename' (the latter two only if opts.git is set).'''
2658 2658 gone = set()
2659 2659
2660 2660 copyto = dict([(v, k) for k, v in copy.items()])
2661 2661
2662 2662 addedset, removedset = set(added), set(removed)
2663 2663
2664 2664 for f in sorted(modified + added + removed):
2665 2665 copyop = None
2666 2666 f1, f2 = f, f
2667 2667 if f in addedset:
2668 2668 f1 = None
2669 2669 if f in copy:
2670 2670 if opts.git:
2671 2671 f1 = copy[f]
2672 2672 if f1 in removedset and f1 not in gone:
2673 2673 copyop = 'rename'
2674 2674 gone.add(f1)
2675 2675 else:
2676 2676 copyop = 'copy'
2677 2677 elif f in removedset:
2678 2678 f2 = None
2679 2679 if opts.git:
2680 2680 # have we already reported a copy above?
2681 2681 if (f in copyto and copyto[f] in addedset
2682 2682 and copy[copyto[f]] == f):
2683 2683 continue
2684 2684 yield f1, f2, copyop
2685 2685
2686 2686 def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2687 2687 copy, getfilectx, opts, losedatafn, prefix, relroot):
2688 2688 '''given input data, generate a diff and yield it in blocks
2689 2689
2690 2690 If generating a diff would lose data like flags or binary data and
2691 2691 losedatafn is not None, it will be called.
2692 2692
2693 2693 relroot is removed and prefix is added to every path in the diff output.
2694 2694
2695 2695 If relroot is not empty, this function expects every path in modified,
2696 2696 added, removed and copy to start with it.'''
2697 2697
2698 2698 def gitindex(text):
2699 2699 if not text:
2700 2700 text = ""
2701 2701 l = len(text)
2702 2702 s = hashlib.sha1('blob %d\0' % l)
2703 2703 s.update(text)
2704 2704 return hex(s.digest())
2705 2705
2706 2706 if opts.noprefix:
2707 2707 aprefix = bprefix = ''
2708 2708 else:
2709 2709 aprefix = 'a/'
2710 2710 bprefix = 'b/'
2711 2711
2712 2712 def diffline(f, revs):
2713 2713 revinfo = ' '.join(["-r %s" % rev for rev in revs])
2714 2714 return 'diff %s %s' % (revinfo, f)
2715 2715
2716 2716 def isempty(fctx):
2717 2717 return fctx is None or fctx.size() == 0
2718 2718
2719 2719 date1 = dateutil.datestr(ctx1.date())
2720 2720 date2 = dateutil.datestr(ctx2.date())
2721 2721
2722 2722 gitmode = {'l': '120000', 'x': '100755', '': '100644'}
2723 2723
2724 2724 if relroot != '' and (repo.ui.configbool('devel', 'all-warnings')
2725 2725 or repo.ui.configbool('devel', 'check-relroot')):
2726 2726 for f in modified + added + removed + list(copy) + list(copy.values()):
2727 2727 if f is not None and not f.startswith(relroot):
2728 2728 raise AssertionError(
2729 2729 "file %s doesn't start with relroot %s" % (f, relroot))
2730 2730
2731 2731 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2732 2732 content1 = None
2733 2733 content2 = None
2734 2734 fctx1 = None
2735 2735 fctx2 = None
2736 2736 flag1 = None
2737 2737 flag2 = None
2738 2738 if f1:
2739 2739 fctx1 = getfilectx(f1, ctx1)
2740 2740 if opts.git or losedatafn:
2741 2741 flag1 = ctx1.flags(f1)
2742 2742 if f2:
2743 2743 fctx2 = getfilectx(f2, ctx2)
2744 2744 if opts.git or losedatafn:
2745 2745 flag2 = ctx2.flags(f2)
2746 2746 # if binary is True, output "summary" or "base85", but not "text diff"
2747 2747 if opts.text:
2748 2748 binary = False
2749 2749 else:
2750 2750 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2751 2751
2752 2752 if losedatafn and not opts.git:
2753 2753 if (binary or
2754 2754 # copy/rename
2755 2755 f2 in copy or
2756 2756 # empty file creation
2757 2757 (not f1 and isempty(fctx2)) or
2758 2758 # empty file deletion
2759 2759 (isempty(fctx1) and not f2) or
2760 2760 # create with flags
2761 2761 (not f1 and flag2) or
2762 2762 # change flags
2763 2763 (f1 and f2 and flag1 != flag2)):
2764 2764 losedatafn(f2 or f1)
2765 2765
2766 2766 path1 = f1 or f2
2767 2767 path2 = f2 or f1
2768 2768 path1 = posixpath.join(prefix, path1[len(relroot):])
2769 2769 path2 = posixpath.join(prefix, path2[len(relroot):])
2770 2770 header = []
2771 2771 if opts.git:
2772 2772 header.append('diff --git %s%s %s%s' %
2773 2773 (aprefix, path1, bprefix, path2))
2774 2774 if not f1: # added
2775 2775 header.append('new file mode %s' % gitmode[flag2])
2776 2776 elif not f2: # removed
2777 2777 header.append('deleted file mode %s' % gitmode[flag1])
2778 2778 else: # modified/copied/renamed
2779 2779 mode1, mode2 = gitmode[flag1], gitmode[flag2]
2780 2780 if mode1 != mode2:
2781 2781 header.append('old mode %s' % mode1)
2782 2782 header.append('new mode %s' % mode2)
2783 2783 if copyop is not None:
2784 2784 if opts.showsimilarity:
2785 2785 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
2786 2786 header.append('similarity index %d%%' % sim)
2787 2787 header.append('%s from %s' % (copyop, path1))
2788 2788 header.append('%s to %s' % (copyop, path2))
2789 2789 elif revs and not repo.ui.quiet:
2790 2790 header.append(diffline(path1, revs))
2791 2791
2792 2792 # fctx.is | diffopts | what to | is fctx.data()
2793 2793 # binary() | text nobinary git index | output? | outputted?
2794 2794 # ------------------------------------|----------------------------
2795 2795 # yes | no no no * | summary | no
2796 2796 # yes | no no yes * | base85 | yes
2797 2797 # yes | no yes no * | summary | no
2798 2798 # yes | no yes yes 0 | summary | no
2799 2799 # yes | no yes yes >0 | summary | semi [1]
2800 2800 # yes | yes * * * | text diff | yes
2801 2801 # no | * * * * | text diff | yes
2802 2802 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
2803 2803 if binary and (not opts.git or (opts.git and opts.nobinary and not
2804 2804 opts.index)):
2805 2805 # fast path: no binary content will be displayed, content1 and
2806 2806 # content2 are only used for equivalent test. cmp() could have a
2807 2807 # fast path.
2808 2808 if fctx1 is not None:
2809 2809 content1 = b'\0'
2810 2810 if fctx2 is not None:
2811 2811 if fctx1 is not None and not fctx1.cmp(fctx2):
2812 2812 content2 = b'\0' # not different
2813 2813 else:
2814 2814 content2 = b'\0\0'
2815 2815 else:
2816 2816 # normal path: load contents
2817 2817 if fctx1 is not None:
2818 2818 content1 = fctx1.data()
2819 2819 if fctx2 is not None:
2820 2820 content2 = fctx2.data()
2821 2821
2822 2822 if binary and opts.git and not opts.nobinary:
2823 2823 text = mdiff.b85diff(content1, content2)
2824 2824 if text:
2825 2825 header.append('index %s..%s' %
2826 2826 (gitindex(content1), gitindex(content2)))
2827 2827 hunks = (None, [text]),
2828 2828 else:
2829 2829 if opts.git and opts.index > 0:
2830 2830 flag = flag1
2831 2831 if flag is None:
2832 2832 flag = flag2
2833 2833 header.append('index %s..%s %s' %
2834 2834 (gitindex(content1)[0:opts.index],
2835 2835 gitindex(content2)[0:opts.index],
2836 2836 gitmode[flag]))
2837 2837
2838 2838 uheaders, hunks = mdiff.unidiff(content1, date1,
2839 2839 content2, date2,
2840 2840 path1, path2,
2841 2841 binary=binary, opts=opts)
2842 2842 header.extend(uheaders)
2843 2843 yield fctx1, fctx2, header, hunks
2844 2844
2845 2845 def diffstatsum(stats):
2846 2846 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
2847 2847 for f, a, r, b in stats:
2848 2848 maxfile = max(maxfile, encoding.colwidth(f))
2849 2849 maxtotal = max(maxtotal, a + r)
2850 2850 addtotal += a
2851 2851 removetotal += r
2852 2852 binary = binary or b
2853 2853
2854 2854 return maxfile, maxtotal, addtotal, removetotal, binary
2855 2855
2856 2856 def diffstatdata(lines):
2857 2857 diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
2858 2858
2859 2859 results = []
2860 2860 filename, adds, removes, isbinary = None, 0, 0, False
2861 2861
2862 2862 def addresult():
2863 2863 if filename:
2864 2864 results.append((filename, adds, removes, isbinary))
2865 2865
2866 2866 # inheader is used to track if a line is in the
2867 2867 # header portion of the diff. This helps properly account
2868 2868 # for lines that start with '--' or '++'
2869 2869 inheader = False
2870 2870
2871 2871 for line in lines:
2872 2872 if line.startswith('diff'):
2873 2873 addresult()
2874 2874 # starting a new file diff
2875 2875 # set numbers to 0 and reset inheader
2876 2876 inheader = True
2877 2877 adds, removes, isbinary = 0, 0, False
2878 2878 if line.startswith('diff --git a/'):
2879 2879 filename = gitre.search(line).group(2)
2880 2880 elif line.startswith('diff -r'):
2881 2881 # format: "diff -r ... -r ... filename"
2882 2882 filename = diffre.search(line).group(1)
2883 2883 elif line.startswith('@@'):
2884 2884 inheader = False
2885 2885 elif line.startswith('+') and not inheader:
2886 2886 adds += 1
2887 2887 elif line.startswith('-') and not inheader:
2888 2888 removes += 1
2889 2889 elif (line.startswith('GIT binary patch') or
2890 2890 line.startswith('Binary file')):
2891 2891 isbinary = True
2892 2892 addresult()
2893 2893 return results
2894 2894
2895 2895 def diffstat(lines, width=80):
2896 2896 output = []
2897 2897 stats = diffstatdata(lines)
2898 2898 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
2899 2899
2900 2900 countwidth = len(str(maxtotal))
2901 2901 if hasbinary and countwidth < 3:
2902 2902 countwidth = 3
2903 2903 graphwidth = width - countwidth - maxname - 6
2904 2904 if graphwidth < 10:
2905 2905 graphwidth = 10
2906 2906
2907 2907 def scale(i):
2908 2908 if maxtotal <= graphwidth:
2909 2909 return i
2910 2910 # If diffstat runs out of room it doesn't print anything,
2911 2911 # which isn't very useful, so always print at least one + or -
2912 2912 # if there were at least some changes.
2913 2913 return max(i * graphwidth // maxtotal, int(bool(i)))
2914 2914
2915 2915 for filename, adds, removes, isbinary in stats:
2916 2916 if isbinary:
2917 2917 count = 'Bin'
2918 2918 else:
2919 2919 count = '%d' % (adds + removes)
2920 2920 pluses = '+' * scale(adds)
2921 2921 minuses = '-' * scale(removes)
2922 2922 output.append(' %s%s | %*s %s%s\n' %
2923 2923 (filename, ' ' * (maxname - encoding.colwidth(filename)),
2924 2924 countwidth, count, pluses, minuses))
2925 2925
2926 2926 if stats:
2927 2927 output.append(_(' %d files changed, %d insertions(+), '
2928 2928 '%d deletions(-)\n')
2929 2929 % (len(stats), totaladds, totalremoves))
2930 2930
2931 2931 return ''.join(output)
2932 2932
2933 2933 def diffstatui(*args, **kw):
2934 2934 '''like diffstat(), but yields 2-tuples of (output, label) for
2935 2935 ui.write()
2936 2936 '''
2937 2937
2938 2938 for line in diffstat(*args, **kw).splitlines():
2939 2939 if line and line[-1] in '+-':
2940 2940 name, graph = line.rsplit(' ', 1)
2941 2941 yield (name + ' ', '')
2942 2942 m = re.search(br'\++', graph)
2943 2943 if m:
2944 2944 yield (m.group(0), 'diffstat.inserted')
2945 2945 m = re.search(br'-+', graph)
2946 2946 if m:
2947 2947 yield (m.group(0), 'diffstat.deleted')
2948 2948 else:
2949 2949 yield (line, '')
2950 2950 yield ('\n', '')
General Comments 0
You need to be logged in to leave comments. Login now