##// END OF EJS Templates
py3: convert content-type to bytes...
Gregory Szorc -
r36052:95791b27 default
parent child Browse files
Show More
@@ -1,2904 +1,2904 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import collections
12 12 import copy
13 13 import difflib
14 14 import email
15 15 import email.parser as emailparser
16 16 import errno
17 17 import hashlib
18 18 import os
19 19 import posixpath
20 20 import re
21 21 import shutil
22 22 import tempfile
23 23 import zlib
24 24
25 25 from .i18n import _
26 26 from .node import (
27 27 hex,
28 28 short,
29 29 )
30 30 from . import (
31 31 copies,
32 32 encoding,
33 33 error,
34 34 mail,
35 35 mdiff,
36 36 pathutil,
37 37 policy,
38 38 pycompat,
39 39 scmutil,
40 40 similar,
41 41 util,
42 42 vfs as vfsmod,
43 43 )
44 44
45 45 diffhelpers = policy.importmod(r'diffhelpers')
46 46 stringio = util.stringio
47 47
48 48 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
49 49 tabsplitter = re.compile(br'(\t+|[^\t]+)')
50 50 _nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])')
51 51
52 52 PatchError = error.PatchError
53 53
54 54 # public functions
55 55
56 56 def split(stream):
57 57 '''return an iterator of individual patches from a stream'''
58 58 def isheader(line, inheader):
59 59 if inheader and line[0] in (' ', '\t'):
60 60 # continuation
61 61 return True
62 62 if line[0] in (' ', '-', '+'):
63 63 # diff line - don't check for header pattern in there
64 64 return False
65 65 l = line.split(': ', 1)
66 66 return len(l) == 2 and ' ' not in l[0]
67 67
68 68 def chunk(lines):
69 69 return stringio(''.join(lines))
70 70
71 71 def hgsplit(stream, cur):
72 72 inheader = True
73 73
74 74 for line in stream:
75 75 if not line.strip():
76 76 inheader = False
77 77 if not inheader and line.startswith('# HG changeset patch'):
78 78 yield chunk(cur)
79 79 cur = []
80 80 inheader = True
81 81
82 82 cur.append(line)
83 83
84 84 if cur:
85 85 yield chunk(cur)
86 86
87 87 def mboxsplit(stream, cur):
88 88 for line in stream:
89 89 if line.startswith('From '):
90 90 for c in split(chunk(cur[1:])):
91 91 yield c
92 92 cur = []
93 93
94 94 cur.append(line)
95 95
96 96 if cur:
97 97 for c in split(chunk(cur[1:])):
98 98 yield c
99 99
100 100 def mimesplit(stream, cur):
101 101 def msgfp(m):
102 102 fp = stringio()
103 103 g = email.Generator.Generator(fp, mangle_from_=False)
104 104 g.flatten(m)
105 105 fp.seek(0)
106 106 return fp
107 107
108 108 for line in stream:
109 109 cur.append(line)
110 110 c = chunk(cur)
111 111
112 112 m = emailparser.Parser().parse(c)
113 113 if not m.is_multipart():
114 114 yield msgfp(m)
115 115 else:
116 116 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
117 117 for part in m.walk():
118 118 ct = part.get_content_type()
119 119 if ct not in ok_types:
120 120 continue
121 121 yield msgfp(part)
122 122
123 123 def headersplit(stream, cur):
124 124 inheader = False
125 125
126 126 for line in stream:
127 127 if not inheader and isheader(line, inheader):
128 128 yield chunk(cur)
129 129 cur = []
130 130 inheader = True
131 131 if inheader and not isheader(line, inheader):
132 132 inheader = False
133 133
134 134 cur.append(line)
135 135
136 136 if cur:
137 137 yield chunk(cur)
138 138
139 139 def remainder(cur):
140 140 yield chunk(cur)
141 141
142 142 class fiter(object):
143 143 def __init__(self, fp):
144 144 self.fp = fp
145 145
146 146 def __iter__(self):
147 147 return self
148 148
149 149 def next(self):
150 150 l = self.fp.readline()
151 151 if not l:
152 152 raise StopIteration
153 153 return l
154 154
155 155 __next__ = next
156 156
157 157 inheader = False
158 158 cur = []
159 159
160 160 mimeheaders = ['content-type']
161 161
162 162 if not util.safehasattr(stream, 'next'):
163 163 # http responses, for example, have readline but not next
164 164 stream = fiter(stream)
165 165
166 166 for line in stream:
167 167 cur.append(line)
168 168 if line.startswith('# HG changeset patch'):
169 169 return hgsplit(stream, cur)
170 170 elif line.startswith('From '):
171 171 return mboxsplit(stream, cur)
172 172 elif isheader(line, inheader):
173 173 inheader = True
174 174 if line.split(':', 1)[0].lower() in mimeheaders:
175 175 # let email parser handle this
176 176 return mimesplit(stream, cur)
177 177 elif line.startswith('--- ') and inheader:
178 178 # No evil headers seen by diff start, split by hand
179 179 return headersplit(stream, cur)
180 180 # Not enough info, keep reading
181 181
182 182 # if we are here, we have a very plain patch
183 183 return remainder(cur)
184 184
185 185 ## Some facility for extensible patch parsing:
186 186 # list of pairs ("header to match", "data key")
187 187 patchheadermap = [('Date', 'date'),
188 188 ('Branch', 'branch'),
189 189 ('Node ID', 'nodeid'),
190 190 ]
191 191
192 192 def extract(ui, fileobj):
193 193 '''extract patch from data read from fileobj.
194 194
195 195 patch can be a normal patch or contained in an email message.
196 196
197 197 return a dictionary. Standard keys are:
198 198 - filename,
199 199 - message,
200 200 - user,
201 201 - date,
202 202 - branch,
203 203 - node,
204 204 - p1,
205 205 - p2.
206 206 Any item can be missing from the dictionary. If filename is missing,
207 207 fileobj did not contain a patch. Caller must unlink filename when done.'''
208 208
209 209 # attempt to detect the start of a patch
210 210 # (this heuristic is borrowed from quilt)
211 211 diffre = re.compile(br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
212 212 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
213 213 br'---[ \t].*?^\+\+\+[ \t]|'
214 214 br'\*\*\*[ \t].*?^---[ \t])',
215 215 re.MULTILINE | re.DOTALL)
216 216
217 217 data = {}
218 218 fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
219 219 tmpfp = os.fdopen(fd, pycompat.sysstr('w'))
220 220 try:
221 221 msg = emailparser.Parser().parse(fileobj)
222 222
223 223 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
224 224 data['user'] = msg['From'] and mail.headdecode(msg['From'])
225 225 if not subject and not data['user']:
226 226 # Not an email, restore parsed headers if any
227 227 subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
228 228
229 229 # should try to parse msg['Date']
230 230 parents = []
231 231
232 232 if subject:
233 233 if subject.startswith('[PATCH'):
234 234 pend = subject.find(']')
235 235 if pend >= 0:
236 236 subject = subject[pend + 1:].lstrip()
237 237 subject = re.sub(br'\n[ \t]+', ' ', subject)
238 238 ui.debug('Subject: %s\n' % subject)
239 239 if data['user']:
240 240 ui.debug('From: %s\n' % data['user'])
241 241 diffs_seen = 0
242 242 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
243 243 message = ''
244 244 for part in msg.walk():
245 content_type = part.get_content_type()
245 content_type = pycompat.bytestr(part.get_content_type())
246 246 ui.debug('Content-Type: %s\n' % content_type)
247 247 if content_type not in ok_types:
248 248 continue
249 249 payload = part.get_payload(decode=True)
250 250 m = diffre.search(payload)
251 251 if m:
252 252 hgpatch = False
253 253 hgpatchheader = False
254 254 ignoretext = False
255 255
256 256 ui.debug('found patch at byte %d\n' % m.start(0))
257 257 diffs_seen += 1
258 258 cfp = stringio()
259 259 for line in payload[:m.start(0)].splitlines():
260 260 if line.startswith('# HG changeset patch') and not hgpatch:
261 261 ui.debug('patch generated by hg export\n')
262 262 hgpatch = True
263 263 hgpatchheader = True
264 264 # drop earlier commit message content
265 265 cfp.seek(0)
266 266 cfp.truncate()
267 267 subject = None
268 268 elif hgpatchheader:
269 269 if line.startswith('# User '):
270 270 data['user'] = line[7:]
271 271 ui.debug('From: %s\n' % data['user'])
272 272 elif line.startswith("# Parent "):
273 273 parents.append(line[9:].lstrip())
274 274 elif line.startswith("# "):
275 275 for header, key in patchheadermap:
276 276 prefix = '# %s ' % header
277 277 if line.startswith(prefix):
278 278 data[key] = line[len(prefix):]
279 279 else:
280 280 hgpatchheader = False
281 281 elif line == '---':
282 282 ignoretext = True
283 283 if not hgpatchheader and not ignoretext:
284 284 cfp.write(line)
285 285 cfp.write('\n')
286 286 message = cfp.getvalue()
287 287 if tmpfp:
288 288 tmpfp.write(payload)
289 289 if not payload.endswith('\n'):
290 290 tmpfp.write('\n')
291 291 elif not diffs_seen and message and content_type == 'text/plain':
292 292 message += '\n' + payload
293 293 except: # re-raises
294 294 tmpfp.close()
295 295 os.unlink(tmpname)
296 296 raise
297 297
298 298 if subject and not message.startswith(subject):
299 299 message = '%s\n%s' % (subject, message)
300 300 data['message'] = message
301 301 tmpfp.close()
302 302 if parents:
303 303 data['p1'] = parents.pop(0)
304 304 if parents:
305 305 data['p2'] = parents.pop(0)
306 306
307 307 if diffs_seen:
308 308 data['filename'] = tmpname
309 309 else:
310 310 os.unlink(tmpname)
311 311 return data
312 312
313 313 class patchmeta(object):
314 314 """Patched file metadata
315 315
316 316 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
317 317 or COPY. 'path' is patched file path. 'oldpath' is set to the
318 318 origin file when 'op' is either COPY or RENAME, None otherwise. If
319 319 file mode is changed, 'mode' is a tuple (islink, isexec) where
320 320 'islink' is True if the file is a symlink and 'isexec' is True if
321 321 the file is executable. Otherwise, 'mode' is None.
322 322 """
323 323 def __init__(self, path):
324 324 self.path = path
325 325 self.oldpath = None
326 326 self.mode = None
327 327 self.op = 'MODIFY'
328 328 self.binary = False
329 329
330 330 def setmode(self, mode):
331 331 islink = mode & 0o20000
332 332 isexec = mode & 0o100
333 333 self.mode = (islink, isexec)
334 334
335 335 def copy(self):
336 336 other = patchmeta(self.path)
337 337 other.oldpath = self.oldpath
338 338 other.mode = self.mode
339 339 other.op = self.op
340 340 other.binary = self.binary
341 341 return other
342 342
343 343 def _ispatchinga(self, afile):
344 344 if afile == '/dev/null':
345 345 return self.op == 'ADD'
346 346 return afile == 'a/' + (self.oldpath or self.path)
347 347
348 348 def _ispatchingb(self, bfile):
349 349 if bfile == '/dev/null':
350 350 return self.op == 'DELETE'
351 351 return bfile == 'b/' + self.path
352 352
353 353 def ispatching(self, afile, bfile):
354 354 return self._ispatchinga(afile) and self._ispatchingb(bfile)
355 355
356 356 def __repr__(self):
357 357 return "<patchmeta %s %r>" % (self.op, self.path)
358 358
359 359 def readgitpatch(lr):
360 360 """extract git-style metadata about patches from <patchname>"""
361 361
362 362 # Filter patch for git information
363 363 gp = None
364 364 gitpatches = []
365 365 for line in lr:
366 366 line = line.rstrip(' \r\n')
367 367 if line.startswith('diff --git a/'):
368 368 m = gitre.match(line)
369 369 if m:
370 370 if gp:
371 371 gitpatches.append(gp)
372 372 dst = m.group(2)
373 373 gp = patchmeta(dst)
374 374 elif gp:
375 375 if line.startswith('--- '):
376 376 gitpatches.append(gp)
377 377 gp = None
378 378 continue
379 379 if line.startswith('rename from '):
380 380 gp.op = 'RENAME'
381 381 gp.oldpath = line[12:]
382 382 elif line.startswith('rename to '):
383 383 gp.path = line[10:]
384 384 elif line.startswith('copy from '):
385 385 gp.op = 'COPY'
386 386 gp.oldpath = line[10:]
387 387 elif line.startswith('copy to '):
388 388 gp.path = line[8:]
389 389 elif line.startswith('deleted file'):
390 390 gp.op = 'DELETE'
391 391 elif line.startswith('new file mode '):
392 392 gp.op = 'ADD'
393 393 gp.setmode(int(line[-6:], 8))
394 394 elif line.startswith('new mode '):
395 395 gp.setmode(int(line[-6:], 8))
396 396 elif line.startswith('GIT binary patch'):
397 397 gp.binary = True
398 398 if gp:
399 399 gitpatches.append(gp)
400 400
401 401 return gitpatches
402 402
403 403 class linereader(object):
404 404 # simple class to allow pushing lines back into the input stream
405 405 def __init__(self, fp):
406 406 self.fp = fp
407 407 self.buf = []
408 408
409 409 def push(self, line):
410 410 if line is not None:
411 411 self.buf.append(line)
412 412
413 413 def readline(self):
414 414 if self.buf:
415 415 l = self.buf[0]
416 416 del self.buf[0]
417 417 return l
418 418 return self.fp.readline()
419 419
420 420 def __iter__(self):
421 421 return iter(self.readline, '')
422 422
423 423 class abstractbackend(object):
424 424 def __init__(self, ui):
425 425 self.ui = ui
426 426
427 427 def getfile(self, fname):
428 428 """Return target file data and flags as a (data, (islink,
429 429 isexec)) tuple. Data is None if file is missing/deleted.
430 430 """
431 431 raise NotImplementedError
432 432
433 433 def setfile(self, fname, data, mode, copysource):
434 434 """Write data to target file fname and set its mode. mode is a
435 435 (islink, isexec) tuple. If data is None, the file content should
436 436 be left unchanged. If the file is modified after being copied,
437 437 copysource is set to the original file name.
438 438 """
439 439 raise NotImplementedError
440 440
441 441 def unlink(self, fname):
442 442 """Unlink target file."""
443 443 raise NotImplementedError
444 444
445 445 def writerej(self, fname, failed, total, lines):
446 446 """Write rejected lines for fname. total is the number of hunks
447 447 which failed to apply and total the total number of hunks for this
448 448 files.
449 449 """
450 450
451 451 def exists(self, fname):
452 452 raise NotImplementedError
453 453
454 454 def close(self):
455 455 raise NotImplementedError
456 456
457 457 class fsbackend(abstractbackend):
458 458 def __init__(self, ui, basedir):
459 459 super(fsbackend, self).__init__(ui)
460 460 self.opener = vfsmod.vfs(basedir)
461 461
462 462 def getfile(self, fname):
463 463 if self.opener.islink(fname):
464 464 return (self.opener.readlink(fname), (True, False))
465 465
466 466 isexec = False
467 467 try:
468 468 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
469 469 except OSError as e:
470 470 if e.errno != errno.ENOENT:
471 471 raise
472 472 try:
473 473 return (self.opener.read(fname), (False, isexec))
474 474 except IOError as e:
475 475 if e.errno != errno.ENOENT:
476 476 raise
477 477 return None, None
478 478
479 479 def setfile(self, fname, data, mode, copysource):
480 480 islink, isexec = mode
481 481 if data is None:
482 482 self.opener.setflags(fname, islink, isexec)
483 483 return
484 484 if islink:
485 485 self.opener.symlink(data, fname)
486 486 else:
487 487 self.opener.write(fname, data)
488 488 if isexec:
489 489 self.opener.setflags(fname, False, True)
490 490
491 491 def unlink(self, fname):
492 492 self.opener.unlinkpath(fname, ignoremissing=True)
493 493
494 494 def writerej(self, fname, failed, total, lines):
495 495 fname = fname + ".rej"
496 496 self.ui.warn(
497 497 _("%d out of %d hunks FAILED -- saving rejects to file %s\n") %
498 498 (failed, total, fname))
499 499 fp = self.opener(fname, 'w')
500 500 fp.writelines(lines)
501 501 fp.close()
502 502
503 503 def exists(self, fname):
504 504 return self.opener.lexists(fname)
505 505
506 506 class workingbackend(fsbackend):
507 507 def __init__(self, ui, repo, similarity):
508 508 super(workingbackend, self).__init__(ui, repo.root)
509 509 self.repo = repo
510 510 self.similarity = similarity
511 511 self.removed = set()
512 512 self.changed = set()
513 513 self.copied = []
514 514
515 515 def _checkknown(self, fname):
516 516 if self.repo.dirstate[fname] == '?' and self.exists(fname):
517 517 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
518 518
519 519 def setfile(self, fname, data, mode, copysource):
520 520 self._checkknown(fname)
521 521 super(workingbackend, self).setfile(fname, data, mode, copysource)
522 522 if copysource is not None:
523 523 self.copied.append((copysource, fname))
524 524 self.changed.add(fname)
525 525
526 526 def unlink(self, fname):
527 527 self._checkknown(fname)
528 528 super(workingbackend, self).unlink(fname)
529 529 self.removed.add(fname)
530 530 self.changed.add(fname)
531 531
532 532 def close(self):
533 533 wctx = self.repo[None]
534 534 changed = set(self.changed)
535 535 for src, dst in self.copied:
536 536 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
537 537 if self.removed:
538 538 wctx.forget(sorted(self.removed))
539 539 for f in self.removed:
540 540 if f not in self.repo.dirstate:
541 541 # File was deleted and no longer belongs to the
542 542 # dirstate, it was probably marked added then
543 543 # deleted, and should not be considered by
544 544 # marktouched().
545 545 changed.discard(f)
546 546 if changed:
547 547 scmutil.marktouched(self.repo, changed, self.similarity)
548 548 return sorted(self.changed)
549 549
550 550 class filestore(object):
551 551 def __init__(self, maxsize=None):
552 552 self.opener = None
553 553 self.files = {}
554 554 self.created = 0
555 555 self.maxsize = maxsize
556 556 if self.maxsize is None:
557 557 self.maxsize = 4*(2**20)
558 558 self.size = 0
559 559 self.data = {}
560 560
561 561 def setfile(self, fname, data, mode, copied=None):
562 562 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
563 563 self.data[fname] = (data, mode, copied)
564 564 self.size += len(data)
565 565 else:
566 566 if self.opener is None:
567 567 root = tempfile.mkdtemp(prefix='hg-patch-')
568 568 self.opener = vfsmod.vfs(root)
569 569 # Avoid filename issues with these simple names
570 570 fn = str(self.created)
571 571 self.opener.write(fn, data)
572 572 self.created += 1
573 573 self.files[fname] = (fn, mode, copied)
574 574
575 575 def getfile(self, fname):
576 576 if fname in self.data:
577 577 return self.data[fname]
578 578 if not self.opener or fname not in self.files:
579 579 return None, None, None
580 580 fn, mode, copied = self.files[fname]
581 581 return self.opener.read(fn), mode, copied
582 582
583 583 def close(self):
584 584 if self.opener:
585 585 shutil.rmtree(self.opener.base)
586 586
587 587 class repobackend(abstractbackend):
588 588 def __init__(self, ui, repo, ctx, store):
589 589 super(repobackend, self).__init__(ui)
590 590 self.repo = repo
591 591 self.ctx = ctx
592 592 self.store = store
593 593 self.changed = set()
594 594 self.removed = set()
595 595 self.copied = {}
596 596
597 597 def _checkknown(self, fname):
598 598 if fname not in self.ctx:
599 599 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
600 600
601 601 def getfile(self, fname):
602 602 try:
603 603 fctx = self.ctx[fname]
604 604 except error.LookupError:
605 605 return None, None
606 606 flags = fctx.flags()
607 607 return fctx.data(), ('l' in flags, 'x' in flags)
608 608
609 609 def setfile(self, fname, data, mode, copysource):
610 610 if copysource:
611 611 self._checkknown(copysource)
612 612 if data is None:
613 613 data = self.ctx[fname].data()
614 614 self.store.setfile(fname, data, mode, copysource)
615 615 self.changed.add(fname)
616 616 if copysource:
617 617 self.copied[fname] = copysource
618 618
619 619 def unlink(self, fname):
620 620 self._checkknown(fname)
621 621 self.removed.add(fname)
622 622
623 623 def exists(self, fname):
624 624 return fname in self.ctx
625 625
626 626 def close(self):
627 627 return self.changed | self.removed
628 628
629 629 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
630 630 unidesc = re.compile('@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
631 631 contextdesc = re.compile('(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
632 632 eolmodes = ['strict', 'crlf', 'lf', 'auto']
633 633
634 634 class patchfile(object):
635 635 def __init__(self, ui, gp, backend, store, eolmode='strict'):
636 636 self.fname = gp.path
637 637 self.eolmode = eolmode
638 638 self.eol = None
639 639 self.backend = backend
640 640 self.ui = ui
641 641 self.lines = []
642 642 self.exists = False
643 643 self.missing = True
644 644 self.mode = gp.mode
645 645 self.copysource = gp.oldpath
646 646 self.create = gp.op in ('ADD', 'COPY', 'RENAME')
647 647 self.remove = gp.op == 'DELETE'
648 648 if self.copysource is None:
649 649 data, mode = backend.getfile(self.fname)
650 650 else:
651 651 data, mode = store.getfile(self.copysource)[:2]
652 652 if data is not None:
653 653 self.exists = self.copysource is None or backend.exists(self.fname)
654 654 self.missing = False
655 655 if data:
656 656 self.lines = mdiff.splitnewlines(data)
657 657 if self.mode is None:
658 658 self.mode = mode
659 659 if self.lines:
660 660 # Normalize line endings
661 661 if self.lines[0].endswith('\r\n'):
662 662 self.eol = '\r\n'
663 663 elif self.lines[0].endswith('\n'):
664 664 self.eol = '\n'
665 665 if eolmode != 'strict':
666 666 nlines = []
667 667 for l in self.lines:
668 668 if l.endswith('\r\n'):
669 669 l = l[:-2] + '\n'
670 670 nlines.append(l)
671 671 self.lines = nlines
672 672 else:
673 673 if self.create:
674 674 self.missing = False
675 675 if self.mode is None:
676 676 self.mode = (False, False)
677 677 if self.missing:
678 678 self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
679 679 self.ui.warn(_("(use '--prefix' to apply patch relative to the "
680 680 "current directory)\n"))
681 681
682 682 self.hash = {}
683 683 self.dirty = 0
684 684 self.offset = 0
685 685 self.skew = 0
686 686 self.rej = []
687 687 self.fileprinted = False
688 688 self.printfile(False)
689 689 self.hunks = 0
690 690
691 691 def writelines(self, fname, lines, mode):
692 692 if self.eolmode == 'auto':
693 693 eol = self.eol
694 694 elif self.eolmode == 'crlf':
695 695 eol = '\r\n'
696 696 else:
697 697 eol = '\n'
698 698
699 699 if self.eolmode != 'strict' and eol and eol != '\n':
700 700 rawlines = []
701 701 for l in lines:
702 702 if l and l[-1] == '\n':
703 703 l = l[:-1] + eol
704 704 rawlines.append(l)
705 705 lines = rawlines
706 706
707 707 self.backend.setfile(fname, ''.join(lines), mode, self.copysource)
708 708
709 709 def printfile(self, warn):
710 710 if self.fileprinted:
711 711 return
712 712 if warn or self.ui.verbose:
713 713 self.fileprinted = True
714 714 s = _("patching file %s\n") % self.fname
715 715 if warn:
716 716 self.ui.warn(s)
717 717 else:
718 718 self.ui.note(s)
719 719
720 720
721 721 def findlines(self, l, linenum):
722 722 # looks through the hash and finds candidate lines. The
723 723 # result is a list of line numbers sorted based on distance
724 724 # from linenum
725 725
726 726 cand = self.hash.get(l, [])
727 727 if len(cand) > 1:
728 728 # resort our list of potentials forward then back.
729 729 cand.sort(key=lambda x: abs(x - linenum))
730 730 return cand
731 731
732 732 def write_rej(self):
733 733 # our rejects are a little different from patch(1). This always
734 734 # creates rejects in the same form as the original patch. A file
735 735 # header is inserted so that you can run the reject through patch again
736 736 # without having to type the filename.
737 737 if not self.rej:
738 738 return
739 739 base = os.path.basename(self.fname)
740 740 lines = ["--- %s\n+++ %s\n" % (base, base)]
741 741 for x in self.rej:
742 742 for l in x.hunk:
743 743 lines.append(l)
744 744 if l[-1:] != '\n':
745 745 lines.append("\n\ No newline at end of file\n")
746 746 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
747 747
748 748 def apply(self, h):
749 749 if not h.complete():
750 750 raise PatchError(_("bad hunk #%d %s (%d %d %d %d)") %
751 751 (h.number, h.desc, len(h.a), h.lena, len(h.b),
752 752 h.lenb))
753 753
754 754 self.hunks += 1
755 755
756 756 if self.missing:
757 757 self.rej.append(h)
758 758 return -1
759 759
760 760 if self.exists and self.create:
761 761 if self.copysource:
762 762 self.ui.warn(_("cannot create %s: destination already "
763 763 "exists\n") % self.fname)
764 764 else:
765 765 self.ui.warn(_("file %s already exists\n") % self.fname)
766 766 self.rej.append(h)
767 767 return -1
768 768
769 769 if isinstance(h, binhunk):
770 770 if self.remove:
771 771 self.backend.unlink(self.fname)
772 772 else:
773 773 l = h.new(self.lines)
774 774 self.lines[:] = l
775 775 self.offset += len(l)
776 776 self.dirty = True
777 777 return 0
778 778
779 779 horig = h
780 780 if (self.eolmode in ('crlf', 'lf')
781 781 or self.eolmode == 'auto' and self.eol):
782 782 # If new eols are going to be normalized, then normalize
783 783 # hunk data before patching. Otherwise, preserve input
784 784 # line-endings.
785 785 h = h.getnormalized()
786 786
787 787 # fast case first, no offsets, no fuzz
788 788 old, oldstart, new, newstart = h.fuzzit(0, False)
789 789 oldstart += self.offset
790 790 orig_start = oldstart
791 791 # if there's skew we want to emit the "(offset %d lines)" even
792 792 # when the hunk cleanly applies at start + skew, so skip the
793 793 # fast case code
794 794 if (self.skew == 0 and
795 795 diffhelpers.testhunk(old, self.lines, oldstart) == 0):
796 796 if self.remove:
797 797 self.backend.unlink(self.fname)
798 798 else:
799 799 self.lines[oldstart:oldstart + len(old)] = new
800 800 self.offset += len(new) - len(old)
801 801 self.dirty = True
802 802 return 0
803 803
804 804 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
805 805 self.hash = {}
806 806 for x, s in enumerate(self.lines):
807 807 self.hash.setdefault(s, []).append(x)
808 808
809 809 for fuzzlen in xrange(self.ui.configint("patch", "fuzz") + 1):
810 810 for toponly in [True, False]:
811 811 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
812 812 oldstart = oldstart + self.offset + self.skew
813 813 oldstart = min(oldstart, len(self.lines))
814 814 if old:
815 815 cand = self.findlines(old[0][1:], oldstart)
816 816 else:
817 817 # Only adding lines with no or fuzzed context, just
818 818 # take the skew in account
819 819 cand = [oldstart]
820 820
821 821 for l in cand:
822 822 if not old or diffhelpers.testhunk(old, self.lines, l) == 0:
823 823 self.lines[l : l + len(old)] = new
824 824 self.offset += len(new) - len(old)
825 825 self.skew = l - orig_start
826 826 self.dirty = True
827 827 offset = l - orig_start - fuzzlen
828 828 if fuzzlen:
829 829 msg = _("Hunk #%d succeeded at %d "
830 830 "with fuzz %d "
831 831 "(offset %d lines).\n")
832 832 self.printfile(True)
833 833 self.ui.warn(msg %
834 834 (h.number, l + 1, fuzzlen, offset))
835 835 else:
836 836 msg = _("Hunk #%d succeeded at %d "
837 837 "(offset %d lines).\n")
838 838 self.ui.note(msg % (h.number, l + 1, offset))
839 839 return fuzzlen
840 840 self.printfile(True)
841 841 self.ui.warn(_("Hunk #%d FAILED at %d\n") % (h.number, orig_start))
842 842 self.rej.append(horig)
843 843 return -1
844 844
845 845 def close(self):
846 846 if self.dirty:
847 847 self.writelines(self.fname, self.lines, self.mode)
848 848 self.write_rej()
849 849 return len(self.rej)
850 850
851 851 class header(object):
852 852 """patch header
853 853 """
854 854 diffgit_re = re.compile('diff --git a/(.*) b/(.*)$')
855 855 diff_re = re.compile('diff -r .* (.*)$')
856 856 allhunks_re = re.compile('(?:index|deleted file) ')
857 857 pretty_re = re.compile('(?:new file|deleted file) ')
858 858 special_re = re.compile('(?:index|deleted|copy|rename) ')
859 859 newfile_re = re.compile('(?:new file)')
860 860
861 861 def __init__(self, header):
862 862 self.header = header
863 863 self.hunks = []
864 864
865 865 def binary(self):
866 866 return any(h.startswith('index ') for h in self.header)
867 867
868 868 def pretty(self, fp):
869 869 for h in self.header:
870 870 if h.startswith('index '):
871 871 fp.write(_('this modifies a binary file (all or nothing)\n'))
872 872 break
873 873 if self.pretty_re.match(h):
874 874 fp.write(h)
875 875 if self.binary():
876 876 fp.write(_('this is a binary file\n'))
877 877 break
878 878 if h.startswith('---'):
879 879 fp.write(_('%d hunks, %d lines changed\n') %
880 880 (len(self.hunks),
881 881 sum([max(h.added, h.removed) for h in self.hunks])))
882 882 break
883 883 fp.write(h)
884 884
885 885 def write(self, fp):
886 886 fp.write(''.join(self.header))
887 887
888 888 def allhunks(self):
889 889 return any(self.allhunks_re.match(h) for h in self.header)
890 890
891 891 def files(self):
892 892 match = self.diffgit_re.match(self.header[0])
893 893 if match:
894 894 fromfile, tofile = match.groups()
895 895 if fromfile == tofile:
896 896 return [fromfile]
897 897 return [fromfile, tofile]
898 898 else:
899 899 return self.diff_re.match(self.header[0]).groups()
900 900
901 901 def filename(self):
902 902 return self.files()[-1]
903 903
904 904 def __repr__(self):
905 905 return '<header %s>' % (' '.join(map(repr, self.files())))
906 906
907 907 def isnewfile(self):
908 908 return any(self.newfile_re.match(h) for h in self.header)
909 909
910 910 def special(self):
911 911 # Special files are shown only at the header level and not at the hunk
912 912 # level for example a file that has been deleted is a special file.
913 913 # The user cannot change the content of the operation, in the case of
914 914 # the deleted file he has to take the deletion or not take it, he
915 915 # cannot take some of it.
916 916 # Newly added files are special if they are empty, they are not special
917 917 # if they have some content as we want to be able to change it
918 918 nocontent = len(self.header) == 2
919 919 emptynewfile = self.isnewfile() and nocontent
920 920 return emptynewfile or \
921 921 any(self.special_re.match(h) for h in self.header)
922 922
923 923 class recordhunk(object):
924 924 """patch hunk
925 925
926 926 XXX shouldn't we merge this with the other hunk class?
927 927 """
928 928
929 929 def __init__(self, header, fromline, toline, proc, before, hunk, after,
930 930 maxcontext=None):
931 931 def trimcontext(lines, reverse=False):
932 932 if maxcontext is not None:
933 933 delta = len(lines) - maxcontext
934 934 if delta > 0:
935 935 if reverse:
936 936 return delta, lines[delta:]
937 937 else:
938 938 return delta, lines[:maxcontext]
939 939 return 0, lines
940 940
941 941 self.header = header
942 942 trimedbefore, self.before = trimcontext(before, True)
943 943 self.fromline = fromline + trimedbefore
944 944 self.toline = toline + trimedbefore
945 945 _trimedafter, self.after = trimcontext(after, False)
946 946 self.proc = proc
947 947 self.hunk = hunk
948 948 self.added, self.removed = self.countchanges(self.hunk)
949 949
950 950 def __eq__(self, v):
951 951 if not isinstance(v, recordhunk):
952 952 return False
953 953
954 954 return ((v.hunk == self.hunk) and
955 955 (v.proc == self.proc) and
956 956 (self.fromline == v.fromline) and
957 957 (self.header.files() == v.header.files()))
958 958
959 959 def __hash__(self):
960 960 return hash((tuple(self.hunk),
961 961 tuple(self.header.files()),
962 962 self.fromline,
963 963 self.proc))
964 964
965 965 def countchanges(self, hunk):
966 966 """hunk -> (n+,n-)"""
967 967 add = len([h for h in hunk if h.startswith('+')])
968 968 rem = len([h for h in hunk if h.startswith('-')])
969 969 return add, rem
970 970
971 971 def reversehunk(self):
972 972 """return another recordhunk which is the reverse of the hunk
973 973
974 974 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
975 975 that, swap fromline/toline and +/- signs while keep other things
976 976 unchanged.
977 977 """
978 978 m = {'+': '-', '-': '+', '\\': '\\'}
979 979 hunk = ['%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
980 980 return recordhunk(self.header, self.toline, self.fromline, self.proc,
981 981 self.before, hunk, self.after)
982 982
983 983 def write(self, fp):
984 984 delta = len(self.before) + len(self.after)
985 985 if self.after and self.after[-1] == '\\ No newline at end of file\n':
986 986 delta -= 1
987 987 fromlen = delta + self.removed
988 988 tolen = delta + self.added
989 989 fp.write('@@ -%d,%d +%d,%d @@%s\n' %
990 990 (self.fromline, fromlen, self.toline, tolen,
991 991 self.proc and (' ' + self.proc)))
992 992 fp.write(''.join(self.before + self.hunk + self.after))
993 993
994 994 pretty = write
995 995
996 996 def filename(self):
997 997 return self.header.filename()
998 998
999 999 def __repr__(self):
1000 1000 return '<hunk %r@%d>' % (self.filename(), self.fromline)
1001 1001
1002 1002 def getmessages():
1003 1003 return {
1004 1004 'multiple': {
1005 1005 'apply': _("apply change %d/%d to '%s'?"),
1006 1006 'discard': _("discard change %d/%d to '%s'?"),
1007 1007 'record': _("record change %d/%d to '%s'?"),
1008 1008 },
1009 1009 'single': {
1010 1010 'apply': _("apply this change to '%s'?"),
1011 1011 'discard': _("discard this change to '%s'?"),
1012 1012 'record': _("record this change to '%s'?"),
1013 1013 },
1014 1014 'help': {
1015 1015 'apply': _('[Ynesfdaq?]'
1016 1016 '$$ &Yes, apply this change'
1017 1017 '$$ &No, skip this change'
1018 1018 '$$ &Edit this change manually'
1019 1019 '$$ &Skip remaining changes to this file'
1020 1020 '$$ Apply remaining changes to this &file'
1021 1021 '$$ &Done, skip remaining changes and files'
1022 1022 '$$ Apply &all changes to all remaining files'
1023 1023 '$$ &Quit, applying no changes'
1024 1024 '$$ &? (display help)'),
1025 1025 'discard': _('[Ynesfdaq?]'
1026 1026 '$$ &Yes, discard this change'
1027 1027 '$$ &No, skip this change'
1028 1028 '$$ &Edit this change manually'
1029 1029 '$$ &Skip remaining changes to this file'
1030 1030 '$$ Discard remaining changes to this &file'
1031 1031 '$$ &Done, skip remaining changes and files'
1032 1032 '$$ Discard &all changes to all remaining files'
1033 1033 '$$ &Quit, discarding no changes'
1034 1034 '$$ &? (display help)'),
1035 1035 'record': _('[Ynesfdaq?]'
1036 1036 '$$ &Yes, record this change'
1037 1037 '$$ &No, skip this change'
1038 1038 '$$ &Edit this change manually'
1039 1039 '$$ &Skip remaining changes to this file'
1040 1040 '$$ Record remaining changes to this &file'
1041 1041 '$$ &Done, skip remaining changes and files'
1042 1042 '$$ Record &all changes to all remaining files'
1043 1043 '$$ &Quit, recording no changes'
1044 1044 '$$ &? (display help)'),
1045 1045 }
1046 1046 }
1047 1047
1048 1048 def filterpatch(ui, headers, operation=None):
1049 1049 """Interactively filter patch chunks into applied-only chunks"""
1050 1050 messages = getmessages()
1051 1051
1052 1052 if operation is None:
1053 1053 operation = 'record'
1054 1054
1055 1055 def prompt(skipfile, skipall, query, chunk):
1056 1056 """prompt query, and process base inputs
1057 1057
1058 1058 - y/n for the rest of file
1059 1059 - y/n for the rest
1060 1060 - ? (help)
1061 1061 - q (quit)
1062 1062
1063 1063 Return True/False and possibly updated skipfile and skipall.
1064 1064 """
1065 1065 newpatches = None
1066 1066 if skipall is not None:
1067 1067 return skipall, skipfile, skipall, newpatches
1068 1068 if skipfile is not None:
1069 1069 return skipfile, skipfile, skipall, newpatches
1070 1070 while True:
1071 1071 resps = messages['help'][operation]
1072 1072 r = ui.promptchoice("%s %s" % (query, resps))
1073 1073 ui.write("\n")
1074 1074 if r == 8: # ?
1075 1075 for c, t in ui.extractchoices(resps)[1]:
1076 1076 ui.write('%s - %s\n' % (c, encoding.lower(t)))
1077 1077 continue
1078 1078 elif r == 0: # yes
1079 1079 ret = True
1080 1080 elif r == 1: # no
1081 1081 ret = False
1082 1082 elif r == 2: # Edit patch
1083 1083 if chunk is None:
1084 1084 ui.write(_('cannot edit patch for whole file'))
1085 1085 ui.write("\n")
1086 1086 continue
1087 1087 if chunk.header.binary():
1088 1088 ui.write(_('cannot edit patch for binary file'))
1089 1089 ui.write("\n")
1090 1090 continue
1091 1091 # Patch comment based on the Git one (based on comment at end of
1092 1092 # https://mercurial-scm.org/wiki/RecordExtension)
1093 1093 phelp = '---' + _("""
1094 1094 To remove '-' lines, make them ' ' lines (context).
1095 1095 To remove '+' lines, delete them.
1096 1096 Lines starting with # will be removed from the patch.
1097 1097
1098 1098 If the patch applies cleanly, the edited hunk will immediately be
1099 1099 added to the record list. If it does not apply cleanly, a rejects
1100 1100 file will be generated: you can use that when you try again. If
1101 1101 all lines of the hunk are removed, then the edit is aborted and
1102 1102 the hunk is left unchanged.
1103 1103 """)
1104 1104 (patchfd, patchfn) = tempfile.mkstemp(prefix="hg-editor-",
1105 1105 suffix=".diff", text=True)
1106 1106 ncpatchfp = None
1107 1107 try:
1108 1108 # Write the initial patch
1109 1109 f = os.fdopen(patchfd, pycompat.sysstr("w"))
1110 1110 chunk.header.write(f)
1111 1111 chunk.write(f)
1112 1112 f.write('\n'.join(['# ' + i for i in phelp.splitlines()]))
1113 1113 f.close()
1114 1114 # Start the editor and wait for it to complete
1115 1115 editor = ui.geteditor()
1116 1116 ret = ui.system("%s \"%s\"" % (editor, patchfn),
1117 1117 environ={'HGUSER': ui.username()},
1118 1118 blockedtag='filterpatch')
1119 1119 if ret != 0:
1120 1120 ui.warn(_("editor exited with exit code %d\n") % ret)
1121 1121 continue
1122 1122 # Remove comment lines
1123 1123 patchfp = open(patchfn)
1124 1124 ncpatchfp = stringio()
1125 1125 for line in util.iterfile(patchfp):
1126 1126 if not line.startswith('#'):
1127 1127 ncpatchfp.write(line)
1128 1128 patchfp.close()
1129 1129 ncpatchfp.seek(0)
1130 1130 newpatches = parsepatch(ncpatchfp)
1131 1131 finally:
1132 1132 os.unlink(patchfn)
1133 1133 del ncpatchfp
1134 1134 # Signal that the chunk shouldn't be applied as-is, but
1135 1135 # provide the new patch to be used instead.
1136 1136 ret = False
1137 1137 elif r == 3: # Skip
1138 1138 ret = skipfile = False
1139 1139 elif r == 4: # file (Record remaining)
1140 1140 ret = skipfile = True
1141 1141 elif r == 5: # done, skip remaining
1142 1142 ret = skipall = False
1143 1143 elif r == 6: # all
1144 1144 ret = skipall = True
1145 1145 elif r == 7: # quit
1146 1146 raise error.Abort(_('user quit'))
1147 1147 return ret, skipfile, skipall, newpatches
1148 1148
1149 1149 seen = set()
1150 1150 applied = {} # 'filename' -> [] of chunks
1151 1151 skipfile, skipall = None, None
1152 1152 pos, total = 1, sum(len(h.hunks) for h in headers)
1153 1153 for h in headers:
1154 1154 pos += len(h.hunks)
1155 1155 skipfile = None
1156 1156 fixoffset = 0
1157 1157 hdr = ''.join(h.header)
1158 1158 if hdr in seen:
1159 1159 continue
1160 1160 seen.add(hdr)
1161 1161 if skipall is None:
1162 1162 h.pretty(ui)
1163 1163 msg = (_('examine changes to %s?') %
1164 1164 _(' and ').join("'%s'" % f for f in h.files()))
1165 1165 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1166 1166 if not r:
1167 1167 continue
1168 1168 applied[h.filename()] = [h]
1169 1169 if h.allhunks():
1170 1170 applied[h.filename()] += h.hunks
1171 1171 continue
1172 1172 for i, chunk in enumerate(h.hunks):
1173 1173 if skipfile is None and skipall is None:
1174 1174 chunk.pretty(ui)
1175 1175 if total == 1:
1176 1176 msg = messages['single'][operation] % chunk.filename()
1177 1177 else:
1178 1178 idx = pos - len(h.hunks) + i
1179 1179 msg = messages['multiple'][operation] % (idx, total,
1180 1180 chunk.filename())
1181 1181 r, skipfile, skipall, newpatches = prompt(skipfile,
1182 1182 skipall, msg, chunk)
1183 1183 if r:
1184 1184 if fixoffset:
1185 1185 chunk = copy.copy(chunk)
1186 1186 chunk.toline += fixoffset
1187 1187 applied[chunk.filename()].append(chunk)
1188 1188 elif newpatches is not None:
1189 1189 for newpatch in newpatches:
1190 1190 for newhunk in newpatch.hunks:
1191 1191 if fixoffset:
1192 1192 newhunk.toline += fixoffset
1193 1193 applied[newhunk.filename()].append(newhunk)
1194 1194 else:
1195 1195 fixoffset += chunk.removed - chunk.added
1196 1196 return (sum([h for h in applied.itervalues()
1197 1197 if h[0].special() or len(h) > 1], []), {})
1198 1198 class hunk(object):
1199 1199 def __init__(self, desc, num, lr, context):
1200 1200 self.number = num
1201 1201 self.desc = desc
1202 1202 self.hunk = [desc]
1203 1203 self.a = []
1204 1204 self.b = []
1205 1205 self.starta = self.lena = None
1206 1206 self.startb = self.lenb = None
1207 1207 if lr is not None:
1208 1208 if context:
1209 1209 self.read_context_hunk(lr)
1210 1210 else:
1211 1211 self.read_unified_hunk(lr)
1212 1212
1213 1213 def getnormalized(self):
1214 1214 """Return a copy with line endings normalized to LF."""
1215 1215
1216 1216 def normalize(lines):
1217 1217 nlines = []
1218 1218 for line in lines:
1219 1219 if line.endswith('\r\n'):
1220 1220 line = line[:-2] + '\n'
1221 1221 nlines.append(line)
1222 1222 return nlines
1223 1223
1224 1224 # Dummy object, it is rebuilt manually
1225 1225 nh = hunk(self.desc, self.number, None, None)
1226 1226 nh.number = self.number
1227 1227 nh.desc = self.desc
1228 1228 nh.hunk = self.hunk
1229 1229 nh.a = normalize(self.a)
1230 1230 nh.b = normalize(self.b)
1231 1231 nh.starta = self.starta
1232 1232 nh.startb = self.startb
1233 1233 nh.lena = self.lena
1234 1234 nh.lenb = self.lenb
1235 1235 return nh
1236 1236
1237 1237 def read_unified_hunk(self, lr):
1238 1238 m = unidesc.match(self.desc)
1239 1239 if not m:
1240 1240 raise PatchError(_("bad hunk #%d") % self.number)
1241 1241 self.starta, self.lena, self.startb, self.lenb = m.groups()
1242 1242 if self.lena is None:
1243 1243 self.lena = 1
1244 1244 else:
1245 1245 self.lena = int(self.lena)
1246 1246 if self.lenb is None:
1247 1247 self.lenb = 1
1248 1248 else:
1249 1249 self.lenb = int(self.lenb)
1250 1250 self.starta = int(self.starta)
1251 1251 self.startb = int(self.startb)
1252 1252 diffhelpers.addlines(lr, self.hunk, self.lena, self.lenb, self.a,
1253 1253 self.b)
1254 1254 # if we hit eof before finishing out the hunk, the last line will
1255 1255 # be zero length. Lets try to fix it up.
1256 1256 while len(self.hunk[-1]) == 0:
1257 1257 del self.hunk[-1]
1258 1258 del self.a[-1]
1259 1259 del self.b[-1]
1260 1260 self.lena -= 1
1261 1261 self.lenb -= 1
1262 1262 self._fixnewline(lr)
1263 1263
1264 1264 def read_context_hunk(self, lr):
1265 1265 self.desc = lr.readline()
1266 1266 m = contextdesc.match(self.desc)
1267 1267 if not m:
1268 1268 raise PatchError(_("bad hunk #%d") % self.number)
1269 1269 self.starta, aend = m.groups()
1270 1270 self.starta = int(self.starta)
1271 1271 if aend is None:
1272 1272 aend = self.starta
1273 1273 self.lena = int(aend) - self.starta
1274 1274 if self.starta:
1275 1275 self.lena += 1
1276 1276 for x in xrange(self.lena):
1277 1277 l = lr.readline()
1278 1278 if l.startswith('---'):
1279 1279 # lines addition, old block is empty
1280 1280 lr.push(l)
1281 1281 break
1282 1282 s = l[2:]
1283 1283 if l.startswith('- ') or l.startswith('! '):
1284 1284 u = '-' + s
1285 1285 elif l.startswith(' '):
1286 1286 u = ' ' + s
1287 1287 else:
1288 1288 raise PatchError(_("bad hunk #%d old text line %d") %
1289 1289 (self.number, x))
1290 1290 self.a.append(u)
1291 1291 self.hunk.append(u)
1292 1292
1293 1293 l = lr.readline()
1294 1294 if l.startswith('\ '):
1295 1295 s = self.a[-1][:-1]
1296 1296 self.a[-1] = s
1297 1297 self.hunk[-1] = s
1298 1298 l = lr.readline()
1299 1299 m = contextdesc.match(l)
1300 1300 if not m:
1301 1301 raise PatchError(_("bad hunk #%d") % self.number)
1302 1302 self.startb, bend = m.groups()
1303 1303 self.startb = int(self.startb)
1304 1304 if bend is None:
1305 1305 bend = self.startb
1306 1306 self.lenb = int(bend) - self.startb
1307 1307 if self.startb:
1308 1308 self.lenb += 1
1309 1309 hunki = 1
1310 1310 for x in xrange(self.lenb):
1311 1311 l = lr.readline()
1312 1312 if l.startswith('\ '):
1313 1313 # XXX: the only way to hit this is with an invalid line range.
1314 1314 # The no-eol marker is not counted in the line range, but I
1315 1315 # guess there are diff(1) out there which behave differently.
1316 1316 s = self.b[-1][:-1]
1317 1317 self.b[-1] = s
1318 1318 self.hunk[hunki - 1] = s
1319 1319 continue
1320 1320 if not l:
1321 1321 # line deletions, new block is empty and we hit EOF
1322 1322 lr.push(l)
1323 1323 break
1324 1324 s = l[2:]
1325 1325 if l.startswith('+ ') or l.startswith('! '):
1326 1326 u = '+' + s
1327 1327 elif l.startswith(' '):
1328 1328 u = ' ' + s
1329 1329 elif len(self.b) == 0:
1330 1330 # line deletions, new block is empty
1331 1331 lr.push(l)
1332 1332 break
1333 1333 else:
1334 1334 raise PatchError(_("bad hunk #%d old text line %d") %
1335 1335 (self.number, x))
1336 1336 self.b.append(s)
1337 1337 while True:
1338 1338 if hunki >= len(self.hunk):
1339 1339 h = ""
1340 1340 else:
1341 1341 h = self.hunk[hunki]
1342 1342 hunki += 1
1343 1343 if h == u:
1344 1344 break
1345 1345 elif h.startswith('-'):
1346 1346 continue
1347 1347 else:
1348 1348 self.hunk.insert(hunki - 1, u)
1349 1349 break
1350 1350
1351 1351 if not self.a:
1352 1352 # this happens when lines were only added to the hunk
1353 1353 for x in self.hunk:
1354 1354 if x.startswith('-') or x.startswith(' '):
1355 1355 self.a.append(x)
1356 1356 if not self.b:
1357 1357 # this happens when lines were only deleted from the hunk
1358 1358 for x in self.hunk:
1359 1359 if x.startswith('+') or x.startswith(' '):
1360 1360 self.b.append(x[1:])
1361 1361 # @@ -start,len +start,len @@
1362 1362 self.desc = "@@ -%d,%d +%d,%d @@\n" % (self.starta, self.lena,
1363 1363 self.startb, self.lenb)
1364 1364 self.hunk[0] = self.desc
1365 1365 self._fixnewline(lr)
1366 1366
1367 1367 def _fixnewline(self, lr):
1368 1368 l = lr.readline()
1369 1369 if l.startswith('\ '):
1370 1370 diffhelpers.fix_newline(self.hunk, self.a, self.b)
1371 1371 else:
1372 1372 lr.push(l)
1373 1373
1374 1374 def complete(self):
1375 1375 return len(self.a) == self.lena and len(self.b) == self.lenb
1376 1376
1377 1377 def _fuzzit(self, old, new, fuzz, toponly):
1378 1378 # this removes context lines from the top and bottom of list 'l'. It
1379 1379 # checks the hunk to make sure only context lines are removed, and then
1380 1380 # returns a new shortened list of lines.
1381 1381 fuzz = min(fuzz, len(old))
1382 1382 if fuzz:
1383 1383 top = 0
1384 1384 bot = 0
1385 1385 hlen = len(self.hunk)
1386 1386 for x in xrange(hlen - 1):
1387 1387 # the hunk starts with the @@ line, so use x+1
1388 1388 if self.hunk[x + 1][0] == ' ':
1389 1389 top += 1
1390 1390 else:
1391 1391 break
1392 1392 if not toponly:
1393 1393 for x in xrange(hlen - 1):
1394 1394 if self.hunk[hlen - bot - 1][0] == ' ':
1395 1395 bot += 1
1396 1396 else:
1397 1397 break
1398 1398
1399 1399 bot = min(fuzz, bot)
1400 1400 top = min(fuzz, top)
1401 1401 return old[top:len(old) - bot], new[top:len(new) - bot], top
1402 1402 return old, new, 0
1403 1403
1404 1404 def fuzzit(self, fuzz, toponly):
1405 1405 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1406 1406 oldstart = self.starta + top
1407 1407 newstart = self.startb + top
1408 1408 # zero length hunk ranges already have their start decremented
1409 1409 if self.lena and oldstart > 0:
1410 1410 oldstart -= 1
1411 1411 if self.lenb and newstart > 0:
1412 1412 newstart -= 1
1413 1413 return old, oldstart, new, newstart
1414 1414
1415 1415 class binhunk(object):
1416 1416 'A binary patch file.'
1417 1417 def __init__(self, lr, fname):
1418 1418 self.text = None
1419 1419 self.delta = False
1420 1420 self.hunk = ['GIT binary patch\n']
1421 1421 self._fname = fname
1422 1422 self._read(lr)
1423 1423
1424 1424 def complete(self):
1425 1425 return self.text is not None
1426 1426
1427 1427 def new(self, lines):
1428 1428 if self.delta:
1429 1429 return [applybindelta(self.text, ''.join(lines))]
1430 1430 return [self.text]
1431 1431
1432 1432 def _read(self, lr):
1433 1433 def getline(lr, hunk):
1434 1434 l = lr.readline()
1435 1435 hunk.append(l)
1436 1436 return l.rstrip('\r\n')
1437 1437
1438 1438 size = 0
1439 1439 while True:
1440 1440 line = getline(lr, self.hunk)
1441 1441 if not line:
1442 1442 raise PatchError(_('could not extract "%s" binary data')
1443 1443 % self._fname)
1444 1444 if line.startswith('literal '):
1445 1445 size = int(line[8:].rstrip())
1446 1446 break
1447 1447 if line.startswith('delta '):
1448 1448 size = int(line[6:].rstrip())
1449 1449 self.delta = True
1450 1450 break
1451 1451 dec = []
1452 1452 line = getline(lr, self.hunk)
1453 1453 while len(line) > 1:
1454 1454 l = line[0]
1455 1455 if l <= 'Z' and l >= 'A':
1456 1456 l = ord(l) - ord('A') + 1
1457 1457 else:
1458 1458 l = ord(l) - ord('a') + 27
1459 1459 try:
1460 1460 dec.append(util.b85decode(line[1:])[:l])
1461 1461 except ValueError as e:
1462 1462 raise PatchError(_('could not decode "%s" binary patch: %s')
1463 1463 % (self._fname, str(e)))
1464 1464 line = getline(lr, self.hunk)
1465 1465 text = zlib.decompress(''.join(dec))
1466 1466 if len(text) != size:
1467 1467 raise PatchError(_('"%s" length is %d bytes, should be %d')
1468 1468 % (self._fname, len(text), size))
1469 1469 self.text = text
1470 1470
1471 1471 def parsefilename(str):
1472 1472 # --- filename \t|space stuff
1473 1473 s = str[4:].rstrip('\r\n')
1474 1474 i = s.find('\t')
1475 1475 if i < 0:
1476 1476 i = s.find(' ')
1477 1477 if i < 0:
1478 1478 return s
1479 1479 return s[:i]
1480 1480
1481 1481 def reversehunks(hunks):
1482 1482 '''reverse the signs in the hunks given as argument
1483 1483
1484 1484 This function operates on hunks coming out of patch.filterpatch, that is
1485 1485 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1486 1486
1487 1487 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1488 1488 ... --- a/folder1/g
1489 1489 ... +++ b/folder1/g
1490 1490 ... @@ -1,7 +1,7 @@
1491 1491 ... +firstline
1492 1492 ... c
1493 1493 ... 1
1494 1494 ... 2
1495 1495 ... + 3
1496 1496 ... -4
1497 1497 ... 5
1498 1498 ... d
1499 1499 ... +lastline"""
1500 1500 >>> hunks = parsepatch([rawpatch])
1501 1501 >>> hunkscomingfromfilterpatch = []
1502 1502 >>> for h in hunks:
1503 1503 ... hunkscomingfromfilterpatch.append(h)
1504 1504 ... hunkscomingfromfilterpatch.extend(h.hunks)
1505 1505
1506 1506 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1507 1507 >>> from . import util
1508 1508 >>> fp = util.stringio()
1509 1509 >>> for c in reversedhunks:
1510 1510 ... c.write(fp)
1511 1511 >>> fp.seek(0) or None
1512 1512 >>> reversedpatch = fp.read()
1513 1513 >>> print(pycompat.sysstr(reversedpatch))
1514 1514 diff --git a/folder1/g b/folder1/g
1515 1515 --- a/folder1/g
1516 1516 +++ b/folder1/g
1517 1517 @@ -1,4 +1,3 @@
1518 1518 -firstline
1519 1519 c
1520 1520 1
1521 1521 2
1522 1522 @@ -2,6 +1,6 @@
1523 1523 c
1524 1524 1
1525 1525 2
1526 1526 - 3
1527 1527 +4
1528 1528 5
1529 1529 d
1530 1530 @@ -6,3 +5,2 @@
1531 1531 5
1532 1532 d
1533 1533 -lastline
1534 1534
1535 1535 '''
1536 1536
1537 1537 newhunks = []
1538 1538 for c in hunks:
1539 1539 if util.safehasattr(c, 'reversehunk'):
1540 1540 c = c.reversehunk()
1541 1541 newhunks.append(c)
1542 1542 return newhunks
1543 1543
1544 1544 def parsepatch(originalchunks, maxcontext=None):
1545 1545 """patch -> [] of headers -> [] of hunks
1546 1546
1547 1547 If maxcontext is not None, trim context lines if necessary.
1548 1548
1549 1549 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1550 1550 ... --- a/folder1/g
1551 1551 ... +++ b/folder1/g
1552 1552 ... @@ -1,8 +1,10 @@
1553 1553 ... 1
1554 1554 ... 2
1555 1555 ... -3
1556 1556 ... 4
1557 1557 ... 5
1558 1558 ... 6
1559 1559 ... +6.1
1560 1560 ... +6.2
1561 1561 ... 7
1562 1562 ... 8
1563 1563 ... +9'''
1564 1564 >>> out = util.stringio()
1565 1565 >>> headers = parsepatch([rawpatch], maxcontext=1)
1566 1566 >>> for header in headers:
1567 1567 ... header.write(out)
1568 1568 ... for hunk in header.hunks:
1569 1569 ... hunk.write(out)
1570 1570 >>> print(pycompat.sysstr(out.getvalue()))
1571 1571 diff --git a/folder1/g b/folder1/g
1572 1572 --- a/folder1/g
1573 1573 +++ b/folder1/g
1574 1574 @@ -2,3 +2,2 @@
1575 1575 2
1576 1576 -3
1577 1577 4
1578 1578 @@ -6,2 +5,4 @@
1579 1579 6
1580 1580 +6.1
1581 1581 +6.2
1582 1582 7
1583 1583 @@ -8,1 +9,2 @@
1584 1584 8
1585 1585 +9
1586 1586 """
1587 1587 class parser(object):
1588 1588 """patch parsing state machine"""
1589 1589 def __init__(self):
1590 1590 self.fromline = 0
1591 1591 self.toline = 0
1592 1592 self.proc = ''
1593 1593 self.header = None
1594 1594 self.context = []
1595 1595 self.before = []
1596 1596 self.hunk = []
1597 1597 self.headers = []
1598 1598
1599 1599 def addrange(self, limits):
1600 1600 fromstart, fromend, tostart, toend, proc = limits
1601 1601 self.fromline = int(fromstart)
1602 1602 self.toline = int(tostart)
1603 1603 self.proc = proc
1604 1604
1605 1605 def addcontext(self, context):
1606 1606 if self.hunk:
1607 1607 h = recordhunk(self.header, self.fromline, self.toline,
1608 1608 self.proc, self.before, self.hunk, context, maxcontext)
1609 1609 self.header.hunks.append(h)
1610 1610 self.fromline += len(self.before) + h.removed
1611 1611 self.toline += len(self.before) + h.added
1612 1612 self.before = []
1613 1613 self.hunk = []
1614 1614 self.context = context
1615 1615
1616 1616 def addhunk(self, hunk):
1617 1617 if self.context:
1618 1618 self.before = self.context
1619 1619 self.context = []
1620 1620 self.hunk = hunk
1621 1621
1622 1622 def newfile(self, hdr):
1623 1623 self.addcontext([])
1624 1624 h = header(hdr)
1625 1625 self.headers.append(h)
1626 1626 self.header = h
1627 1627
1628 1628 def addother(self, line):
1629 1629 pass # 'other' lines are ignored
1630 1630
1631 1631 def finished(self):
1632 1632 self.addcontext([])
1633 1633 return self.headers
1634 1634
1635 1635 transitions = {
1636 1636 'file': {'context': addcontext,
1637 1637 'file': newfile,
1638 1638 'hunk': addhunk,
1639 1639 'range': addrange},
1640 1640 'context': {'file': newfile,
1641 1641 'hunk': addhunk,
1642 1642 'range': addrange,
1643 1643 'other': addother},
1644 1644 'hunk': {'context': addcontext,
1645 1645 'file': newfile,
1646 1646 'range': addrange},
1647 1647 'range': {'context': addcontext,
1648 1648 'hunk': addhunk},
1649 1649 'other': {'other': addother},
1650 1650 }
1651 1651
1652 1652 p = parser()
1653 1653 fp = stringio()
1654 1654 fp.write(''.join(originalchunks))
1655 1655 fp.seek(0)
1656 1656
1657 1657 state = 'context'
1658 1658 for newstate, data in scanpatch(fp):
1659 1659 try:
1660 1660 p.transitions[state][newstate](p, data)
1661 1661 except KeyError:
1662 1662 raise PatchError('unhandled transition: %s -> %s' %
1663 1663 (state, newstate))
1664 1664 state = newstate
1665 1665 del fp
1666 1666 return p.finished()
1667 1667
1668 1668 def pathtransform(path, strip, prefix):
1669 1669 '''turn a path from a patch into a path suitable for the repository
1670 1670
1671 1671 prefix, if not empty, is expected to be normalized with a / at the end.
1672 1672
1673 1673 Returns (stripped components, path in repository).
1674 1674
1675 1675 >>> pathtransform(b'a/b/c', 0, b'')
1676 1676 ('', 'a/b/c')
1677 1677 >>> pathtransform(b' a/b/c ', 0, b'')
1678 1678 ('', ' a/b/c')
1679 1679 >>> pathtransform(b' a/b/c ', 2, b'')
1680 1680 ('a/b/', 'c')
1681 1681 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1682 1682 ('', 'd/e/a/b/c')
1683 1683 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1684 1684 ('a//b/', 'd/e/c')
1685 1685 >>> pathtransform(b'a/b/c', 3, b'')
1686 1686 Traceback (most recent call last):
1687 1687 PatchError: unable to strip away 1 of 3 dirs from a/b/c
1688 1688 '''
1689 1689 pathlen = len(path)
1690 1690 i = 0
1691 1691 if strip == 0:
1692 1692 return '', prefix + path.rstrip()
1693 1693 count = strip
1694 1694 while count > 0:
1695 1695 i = path.find('/', i)
1696 1696 if i == -1:
1697 1697 raise PatchError(_("unable to strip away %d of %d dirs from %s") %
1698 1698 (count, strip, path))
1699 1699 i += 1
1700 1700 # consume '//' in the path
1701 1701 while i < pathlen - 1 and path[i:i + 1] == '/':
1702 1702 i += 1
1703 1703 count -= 1
1704 1704 return path[:i].lstrip(), prefix + path[i:].rstrip()
1705 1705
1706 1706 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1707 1707 nulla = afile_orig == "/dev/null"
1708 1708 nullb = bfile_orig == "/dev/null"
1709 1709 create = nulla and hunk.starta == 0 and hunk.lena == 0
1710 1710 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1711 1711 abase, afile = pathtransform(afile_orig, strip, prefix)
1712 1712 gooda = not nulla and backend.exists(afile)
1713 1713 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1714 1714 if afile == bfile:
1715 1715 goodb = gooda
1716 1716 else:
1717 1717 goodb = not nullb and backend.exists(bfile)
1718 1718 missing = not goodb and not gooda and not create
1719 1719
1720 1720 # some diff programs apparently produce patches where the afile is
1721 1721 # not /dev/null, but afile starts with bfile
1722 1722 abasedir = afile[:afile.rfind('/') + 1]
1723 1723 bbasedir = bfile[:bfile.rfind('/') + 1]
1724 1724 if (missing and abasedir == bbasedir and afile.startswith(bfile)
1725 1725 and hunk.starta == 0 and hunk.lena == 0):
1726 1726 create = True
1727 1727 missing = False
1728 1728
1729 1729 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1730 1730 # diff is between a file and its backup. In this case, the original
1731 1731 # file should be patched (see original mpatch code).
1732 1732 isbackup = (abase == bbase and bfile.startswith(afile))
1733 1733 fname = None
1734 1734 if not missing:
1735 1735 if gooda and goodb:
1736 1736 if isbackup:
1737 1737 fname = afile
1738 1738 else:
1739 1739 fname = bfile
1740 1740 elif gooda:
1741 1741 fname = afile
1742 1742
1743 1743 if not fname:
1744 1744 if not nullb:
1745 1745 if isbackup:
1746 1746 fname = afile
1747 1747 else:
1748 1748 fname = bfile
1749 1749 elif not nulla:
1750 1750 fname = afile
1751 1751 else:
1752 1752 raise PatchError(_("undefined source and destination files"))
1753 1753
1754 1754 gp = patchmeta(fname)
1755 1755 if create:
1756 1756 gp.op = 'ADD'
1757 1757 elif remove:
1758 1758 gp.op = 'DELETE'
1759 1759 return gp
1760 1760
1761 1761 def scanpatch(fp):
1762 1762 """like patch.iterhunks, but yield different events
1763 1763
1764 1764 - ('file', [header_lines + fromfile + tofile])
1765 1765 - ('context', [context_lines])
1766 1766 - ('hunk', [hunk_lines])
1767 1767 - ('range', (-start,len, +start,len, proc))
1768 1768 """
1769 1769 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1770 1770 lr = linereader(fp)
1771 1771
1772 1772 def scanwhile(first, p):
1773 1773 """scan lr while predicate holds"""
1774 1774 lines = [first]
1775 1775 for line in iter(lr.readline, ''):
1776 1776 if p(line):
1777 1777 lines.append(line)
1778 1778 else:
1779 1779 lr.push(line)
1780 1780 break
1781 1781 return lines
1782 1782
1783 1783 for line in iter(lr.readline, ''):
1784 1784 if line.startswith('diff --git a/') or line.startswith('diff -r '):
1785 1785 def notheader(line):
1786 1786 s = line.split(None, 1)
1787 1787 return not s or s[0] not in ('---', 'diff')
1788 1788 header = scanwhile(line, notheader)
1789 1789 fromfile = lr.readline()
1790 1790 if fromfile.startswith('---'):
1791 1791 tofile = lr.readline()
1792 1792 header += [fromfile, tofile]
1793 1793 else:
1794 1794 lr.push(fromfile)
1795 1795 yield 'file', header
1796 1796 elif line[0:1] == ' ':
1797 1797 yield 'context', scanwhile(line, lambda l: l[0] in ' \\')
1798 1798 elif line[0] in '-+':
1799 1799 yield 'hunk', scanwhile(line, lambda l: l[0] in '-+\\')
1800 1800 else:
1801 1801 m = lines_re.match(line)
1802 1802 if m:
1803 1803 yield 'range', m.groups()
1804 1804 else:
1805 1805 yield 'other', line
1806 1806
1807 1807 def scangitpatch(lr, firstline):
1808 1808 """
1809 1809 Git patches can emit:
1810 1810 - rename a to b
1811 1811 - change b
1812 1812 - copy a to c
1813 1813 - change c
1814 1814
1815 1815 We cannot apply this sequence as-is, the renamed 'a' could not be
1816 1816 found for it would have been renamed already. And we cannot copy
1817 1817 from 'b' instead because 'b' would have been changed already. So
1818 1818 we scan the git patch for copy and rename commands so we can
1819 1819 perform the copies ahead of time.
1820 1820 """
1821 1821 pos = 0
1822 1822 try:
1823 1823 pos = lr.fp.tell()
1824 1824 fp = lr.fp
1825 1825 except IOError:
1826 1826 fp = stringio(lr.fp.read())
1827 1827 gitlr = linereader(fp)
1828 1828 gitlr.push(firstline)
1829 1829 gitpatches = readgitpatch(gitlr)
1830 1830 fp.seek(pos)
1831 1831 return gitpatches
1832 1832
1833 1833 def iterhunks(fp):
1834 1834 """Read a patch and yield the following events:
1835 1835 - ("file", afile, bfile, firsthunk): select a new target file.
1836 1836 - ("hunk", hunk): a new hunk is ready to be applied, follows a
1837 1837 "file" event.
1838 1838 - ("git", gitchanges): current diff is in git format, gitchanges
1839 1839 maps filenames to gitpatch records. Unique event.
1840 1840 """
1841 1841 afile = ""
1842 1842 bfile = ""
1843 1843 state = None
1844 1844 hunknum = 0
1845 1845 emitfile = newfile = False
1846 1846 gitpatches = None
1847 1847
1848 1848 # our states
1849 1849 BFILE = 1
1850 1850 context = None
1851 1851 lr = linereader(fp)
1852 1852
1853 1853 for x in iter(lr.readline, ''):
1854 1854 if state == BFILE and (
1855 1855 (not context and x[0] == '@')
1856 1856 or (context is not False and x.startswith('***************'))
1857 1857 or x.startswith('GIT binary patch')):
1858 1858 gp = None
1859 1859 if (gitpatches and
1860 1860 gitpatches[-1].ispatching(afile, bfile)):
1861 1861 gp = gitpatches.pop()
1862 1862 if x.startswith('GIT binary patch'):
1863 1863 h = binhunk(lr, gp.path)
1864 1864 else:
1865 1865 if context is None and x.startswith('***************'):
1866 1866 context = True
1867 1867 h = hunk(x, hunknum + 1, lr, context)
1868 1868 hunknum += 1
1869 1869 if emitfile:
1870 1870 emitfile = False
1871 1871 yield 'file', (afile, bfile, h, gp and gp.copy() or None)
1872 1872 yield 'hunk', h
1873 1873 elif x.startswith('diff --git a/'):
1874 1874 m = gitre.match(x.rstrip(' \r\n'))
1875 1875 if not m:
1876 1876 continue
1877 1877 if gitpatches is None:
1878 1878 # scan whole input for git metadata
1879 1879 gitpatches = scangitpatch(lr, x)
1880 1880 yield 'git', [g.copy() for g in gitpatches
1881 1881 if g.op in ('COPY', 'RENAME')]
1882 1882 gitpatches.reverse()
1883 1883 afile = 'a/' + m.group(1)
1884 1884 bfile = 'b/' + m.group(2)
1885 1885 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
1886 1886 gp = gitpatches.pop()
1887 1887 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1888 1888 if not gitpatches:
1889 1889 raise PatchError(_('failed to synchronize metadata for "%s"')
1890 1890 % afile[2:])
1891 1891 gp = gitpatches[-1]
1892 1892 newfile = True
1893 1893 elif x.startswith('---'):
1894 1894 # check for a unified diff
1895 1895 l2 = lr.readline()
1896 1896 if not l2.startswith('+++'):
1897 1897 lr.push(l2)
1898 1898 continue
1899 1899 newfile = True
1900 1900 context = False
1901 1901 afile = parsefilename(x)
1902 1902 bfile = parsefilename(l2)
1903 1903 elif x.startswith('***'):
1904 1904 # check for a context diff
1905 1905 l2 = lr.readline()
1906 1906 if not l2.startswith('---'):
1907 1907 lr.push(l2)
1908 1908 continue
1909 1909 l3 = lr.readline()
1910 1910 lr.push(l3)
1911 1911 if not l3.startswith("***************"):
1912 1912 lr.push(l2)
1913 1913 continue
1914 1914 newfile = True
1915 1915 context = True
1916 1916 afile = parsefilename(x)
1917 1917 bfile = parsefilename(l2)
1918 1918
1919 1919 if newfile:
1920 1920 newfile = False
1921 1921 emitfile = True
1922 1922 state = BFILE
1923 1923 hunknum = 0
1924 1924
1925 1925 while gitpatches:
1926 1926 gp = gitpatches.pop()
1927 1927 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1928 1928
1929 1929 def applybindelta(binchunk, data):
1930 1930 """Apply a binary delta hunk
1931 1931 The algorithm used is the algorithm from git's patch-delta.c
1932 1932 """
1933 1933 def deltahead(binchunk):
1934 1934 i = 0
1935 1935 for c in binchunk:
1936 1936 i += 1
1937 1937 if not (ord(c) & 0x80):
1938 1938 return i
1939 1939 return i
1940 1940 out = ""
1941 1941 s = deltahead(binchunk)
1942 1942 binchunk = binchunk[s:]
1943 1943 s = deltahead(binchunk)
1944 1944 binchunk = binchunk[s:]
1945 1945 i = 0
1946 1946 while i < len(binchunk):
1947 1947 cmd = ord(binchunk[i])
1948 1948 i += 1
1949 1949 if (cmd & 0x80):
1950 1950 offset = 0
1951 1951 size = 0
1952 1952 if (cmd & 0x01):
1953 1953 offset = ord(binchunk[i])
1954 1954 i += 1
1955 1955 if (cmd & 0x02):
1956 1956 offset |= ord(binchunk[i]) << 8
1957 1957 i += 1
1958 1958 if (cmd & 0x04):
1959 1959 offset |= ord(binchunk[i]) << 16
1960 1960 i += 1
1961 1961 if (cmd & 0x08):
1962 1962 offset |= ord(binchunk[i]) << 24
1963 1963 i += 1
1964 1964 if (cmd & 0x10):
1965 1965 size = ord(binchunk[i])
1966 1966 i += 1
1967 1967 if (cmd & 0x20):
1968 1968 size |= ord(binchunk[i]) << 8
1969 1969 i += 1
1970 1970 if (cmd & 0x40):
1971 1971 size |= ord(binchunk[i]) << 16
1972 1972 i += 1
1973 1973 if size == 0:
1974 1974 size = 0x10000
1975 1975 offset_end = offset + size
1976 1976 out += data[offset:offset_end]
1977 1977 elif cmd != 0:
1978 1978 offset_end = i + cmd
1979 1979 out += binchunk[i:offset_end]
1980 1980 i += cmd
1981 1981 else:
1982 1982 raise PatchError(_('unexpected delta opcode 0'))
1983 1983 return out
1984 1984
1985 1985 def applydiff(ui, fp, backend, store, strip=1, prefix='', eolmode='strict'):
1986 1986 """Reads a patch from fp and tries to apply it.
1987 1987
1988 1988 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
1989 1989 there was any fuzz.
1990 1990
1991 1991 If 'eolmode' is 'strict', the patch content and patched file are
1992 1992 read in binary mode. Otherwise, line endings are ignored when
1993 1993 patching then normalized according to 'eolmode'.
1994 1994 """
1995 1995 return _applydiff(ui, fp, patchfile, backend, store, strip=strip,
1996 1996 prefix=prefix, eolmode=eolmode)
1997 1997
1998 1998 def _canonprefix(repo, prefix):
1999 1999 if prefix:
2000 2000 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2001 2001 if prefix != '':
2002 2002 prefix += '/'
2003 2003 return prefix
2004 2004
2005 2005 def _applydiff(ui, fp, patcher, backend, store, strip=1, prefix='',
2006 2006 eolmode='strict'):
2007 2007 prefix = _canonprefix(backend.repo, prefix)
2008 2008 def pstrip(p):
2009 2009 return pathtransform(p, strip - 1, prefix)[1]
2010 2010
2011 2011 rejects = 0
2012 2012 err = 0
2013 2013 current_file = None
2014 2014
2015 2015 for state, values in iterhunks(fp):
2016 2016 if state == 'hunk':
2017 2017 if not current_file:
2018 2018 continue
2019 2019 ret = current_file.apply(values)
2020 2020 if ret > 0:
2021 2021 err = 1
2022 2022 elif state == 'file':
2023 2023 if current_file:
2024 2024 rejects += current_file.close()
2025 2025 current_file = None
2026 2026 afile, bfile, first_hunk, gp = values
2027 2027 if gp:
2028 2028 gp.path = pstrip(gp.path)
2029 2029 if gp.oldpath:
2030 2030 gp.oldpath = pstrip(gp.oldpath)
2031 2031 else:
2032 2032 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2033 2033 prefix)
2034 2034 if gp.op == 'RENAME':
2035 2035 backend.unlink(gp.oldpath)
2036 2036 if not first_hunk:
2037 2037 if gp.op == 'DELETE':
2038 2038 backend.unlink(gp.path)
2039 2039 continue
2040 2040 data, mode = None, None
2041 2041 if gp.op in ('RENAME', 'COPY'):
2042 2042 data, mode = store.getfile(gp.oldpath)[:2]
2043 2043 if data is None:
2044 2044 # This means that the old path does not exist
2045 2045 raise PatchError(_("source file '%s' does not exist")
2046 2046 % gp.oldpath)
2047 2047 if gp.mode:
2048 2048 mode = gp.mode
2049 2049 if gp.op == 'ADD':
2050 2050 # Added files without content have no hunk and
2051 2051 # must be created
2052 2052 data = ''
2053 2053 if data or mode:
2054 2054 if (gp.op in ('ADD', 'RENAME', 'COPY')
2055 2055 and backend.exists(gp.path)):
2056 2056 raise PatchError(_("cannot create %s: destination "
2057 2057 "already exists") % gp.path)
2058 2058 backend.setfile(gp.path, data, mode, gp.oldpath)
2059 2059 continue
2060 2060 try:
2061 2061 current_file = patcher(ui, gp, backend, store,
2062 2062 eolmode=eolmode)
2063 2063 except PatchError as inst:
2064 2064 ui.warn(str(inst) + '\n')
2065 2065 current_file = None
2066 2066 rejects += 1
2067 2067 continue
2068 2068 elif state == 'git':
2069 2069 for gp in values:
2070 2070 path = pstrip(gp.oldpath)
2071 2071 data, mode = backend.getfile(path)
2072 2072 if data is None:
2073 2073 # The error ignored here will trigger a getfile()
2074 2074 # error in a place more appropriate for error
2075 2075 # handling, and will not interrupt the patching
2076 2076 # process.
2077 2077 pass
2078 2078 else:
2079 2079 store.setfile(path, data, mode)
2080 2080 else:
2081 2081 raise error.Abort(_('unsupported parser state: %s') % state)
2082 2082
2083 2083 if current_file:
2084 2084 rejects += current_file.close()
2085 2085
2086 2086 if rejects:
2087 2087 return -1
2088 2088 return err
2089 2089
2090 2090 def _externalpatch(ui, repo, patcher, patchname, strip, files,
2091 2091 similarity):
2092 2092 """use <patcher> to apply <patchname> to the working directory.
2093 2093 returns whether patch was applied with fuzz factor."""
2094 2094
2095 2095 fuzz = False
2096 2096 args = []
2097 2097 cwd = repo.root
2098 2098 if cwd:
2099 2099 args.append('-d %s' % util.shellquote(cwd))
2100 2100 fp = util.popen('%s %s -p%d < %s' % (patcher, ' '.join(args), strip,
2101 2101 util.shellquote(patchname)))
2102 2102 try:
2103 2103 for line in util.iterfile(fp):
2104 2104 line = line.rstrip()
2105 2105 ui.note(line + '\n')
2106 2106 if line.startswith('patching file '):
2107 2107 pf = util.parsepatchoutput(line)
2108 2108 printed_file = False
2109 2109 files.add(pf)
2110 2110 elif line.find('with fuzz') >= 0:
2111 2111 fuzz = True
2112 2112 if not printed_file:
2113 2113 ui.warn(pf + '\n')
2114 2114 printed_file = True
2115 2115 ui.warn(line + '\n')
2116 2116 elif line.find('saving rejects to file') >= 0:
2117 2117 ui.warn(line + '\n')
2118 2118 elif line.find('FAILED') >= 0:
2119 2119 if not printed_file:
2120 2120 ui.warn(pf + '\n')
2121 2121 printed_file = True
2122 2122 ui.warn(line + '\n')
2123 2123 finally:
2124 2124 if files:
2125 2125 scmutil.marktouched(repo, files, similarity)
2126 2126 code = fp.close()
2127 2127 if code:
2128 2128 raise PatchError(_("patch command failed: %s") %
2129 2129 util.explainexit(code)[0])
2130 2130 return fuzz
2131 2131
2132 2132 def patchbackend(ui, backend, patchobj, strip, prefix, files=None,
2133 2133 eolmode='strict'):
2134 2134 if files is None:
2135 2135 files = set()
2136 2136 if eolmode is None:
2137 2137 eolmode = ui.config('patch', 'eol')
2138 2138 if eolmode.lower() not in eolmodes:
2139 2139 raise error.Abort(_('unsupported line endings type: %s') % eolmode)
2140 2140 eolmode = eolmode.lower()
2141 2141
2142 2142 store = filestore()
2143 2143 try:
2144 2144 fp = open(patchobj, 'rb')
2145 2145 except TypeError:
2146 2146 fp = patchobj
2147 2147 try:
2148 2148 ret = applydiff(ui, fp, backend, store, strip=strip, prefix=prefix,
2149 2149 eolmode=eolmode)
2150 2150 finally:
2151 2151 if fp != patchobj:
2152 2152 fp.close()
2153 2153 files.update(backend.close())
2154 2154 store.close()
2155 2155 if ret < 0:
2156 2156 raise PatchError(_('patch failed to apply'))
2157 2157 return ret > 0
2158 2158
2159 2159 def internalpatch(ui, repo, patchobj, strip, prefix='', files=None,
2160 2160 eolmode='strict', similarity=0):
2161 2161 """use builtin patch to apply <patchobj> to the working directory.
2162 2162 returns whether patch was applied with fuzz factor."""
2163 2163 backend = workingbackend(ui, repo, similarity)
2164 2164 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2165 2165
2166 2166 def patchrepo(ui, repo, ctx, store, patchobj, strip, prefix, files=None,
2167 2167 eolmode='strict'):
2168 2168 backend = repobackend(ui, repo, ctx, store)
2169 2169 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2170 2170
2171 2171 def patch(ui, repo, patchname, strip=1, prefix='', files=None, eolmode='strict',
2172 2172 similarity=0):
2173 2173 """Apply <patchname> to the working directory.
2174 2174
2175 2175 'eolmode' specifies how end of lines should be handled. It can be:
2176 2176 - 'strict': inputs are read in binary mode, EOLs are preserved
2177 2177 - 'crlf': EOLs are ignored when patching and reset to CRLF
2178 2178 - 'lf': EOLs are ignored when patching and reset to LF
2179 2179 - None: get it from user settings, default to 'strict'
2180 2180 'eolmode' is ignored when using an external patcher program.
2181 2181
2182 2182 Returns whether patch was applied with fuzz factor.
2183 2183 """
2184 2184 patcher = ui.config('ui', 'patch')
2185 2185 if files is None:
2186 2186 files = set()
2187 2187 if patcher:
2188 2188 return _externalpatch(ui, repo, patcher, patchname, strip,
2189 2189 files, similarity)
2190 2190 return internalpatch(ui, repo, patchname, strip, prefix, files, eolmode,
2191 2191 similarity)
2192 2192
2193 2193 def changedfiles(ui, repo, patchpath, strip=1, prefix=''):
2194 2194 backend = fsbackend(ui, repo.root)
2195 2195 prefix = _canonprefix(repo, prefix)
2196 2196 with open(patchpath, 'rb') as fp:
2197 2197 changed = set()
2198 2198 for state, values in iterhunks(fp):
2199 2199 if state == 'file':
2200 2200 afile, bfile, first_hunk, gp = values
2201 2201 if gp:
2202 2202 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2203 2203 if gp.oldpath:
2204 2204 gp.oldpath = pathtransform(gp.oldpath, strip - 1,
2205 2205 prefix)[1]
2206 2206 else:
2207 2207 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2208 2208 prefix)
2209 2209 changed.add(gp.path)
2210 2210 if gp.op == 'RENAME':
2211 2211 changed.add(gp.oldpath)
2212 2212 elif state not in ('hunk', 'git'):
2213 2213 raise error.Abort(_('unsupported parser state: %s') % state)
2214 2214 return changed
2215 2215
2216 2216 class GitDiffRequired(Exception):
2217 2217 pass
2218 2218
2219 2219 def diffallopts(ui, opts=None, untrusted=False, section='diff'):
2220 2220 '''return diffopts with all features supported and parsed'''
2221 2221 return difffeatureopts(ui, opts=opts, untrusted=untrusted, section=section,
2222 2222 git=True, whitespace=True, formatchanging=True)
2223 2223
2224 2224 diffopts = diffallopts
2225 2225
2226 2226 def difffeatureopts(ui, opts=None, untrusted=False, section='diff', git=False,
2227 2227 whitespace=False, formatchanging=False):
2228 2228 '''return diffopts with only opted-in features parsed
2229 2229
2230 2230 Features:
2231 2231 - git: git-style diffs
2232 2232 - whitespace: whitespace options like ignoreblanklines and ignorews
2233 2233 - formatchanging: options that will likely break or cause correctness issues
2234 2234 with most diff parsers
2235 2235 '''
2236 2236 def get(key, name=None, getter=ui.configbool, forceplain=None):
2237 2237 if opts:
2238 2238 v = opts.get(key)
2239 2239 # diffopts flags are either None-default (which is passed
2240 2240 # through unchanged, so we can identify unset values), or
2241 2241 # some other falsey default (eg --unified, which defaults
2242 2242 # to an empty string). We only want to override the config
2243 2243 # entries from hgrc with command line values if they
2244 2244 # appear to have been set, which is any truthy value,
2245 2245 # True, or False.
2246 2246 if v or isinstance(v, bool):
2247 2247 return v
2248 2248 if forceplain is not None and ui.plain():
2249 2249 return forceplain
2250 2250 return getter(section, name or key, untrusted=untrusted)
2251 2251
2252 2252 # core options, expected to be understood by every diff parser
2253 2253 buildopts = {
2254 2254 'nodates': get('nodates'),
2255 2255 'showfunc': get('show_function', 'showfunc'),
2256 2256 'context': get('unified', getter=ui.config),
2257 2257 }
2258 2258 buildopts['worddiff'] = ui.configbool('experimental', 'worddiff')
2259 2259
2260 2260 if git:
2261 2261 buildopts['git'] = get('git')
2262 2262
2263 2263 # since this is in the experimental section, we need to call
2264 2264 # ui.configbool directory
2265 2265 buildopts['showsimilarity'] = ui.configbool('experimental',
2266 2266 'extendedheader.similarity')
2267 2267
2268 2268 # need to inspect the ui object instead of using get() since we want to
2269 2269 # test for an int
2270 2270 hconf = ui.config('experimental', 'extendedheader.index')
2271 2271 if hconf is not None:
2272 2272 hlen = None
2273 2273 try:
2274 2274 # the hash config could be an integer (for length of hash) or a
2275 2275 # word (e.g. short, full, none)
2276 2276 hlen = int(hconf)
2277 2277 if hlen < 0 or hlen > 40:
2278 2278 msg = _("invalid length for extendedheader.index: '%d'\n")
2279 2279 ui.warn(msg % hlen)
2280 2280 except ValueError:
2281 2281 # default value
2282 2282 if hconf == 'short' or hconf == '':
2283 2283 hlen = 12
2284 2284 elif hconf == 'full':
2285 2285 hlen = 40
2286 2286 elif hconf != 'none':
2287 2287 msg = _("invalid value for extendedheader.index: '%s'\n")
2288 2288 ui.warn(msg % hconf)
2289 2289 finally:
2290 2290 buildopts['index'] = hlen
2291 2291
2292 2292 if whitespace:
2293 2293 buildopts['ignorews'] = get('ignore_all_space', 'ignorews')
2294 2294 buildopts['ignorewsamount'] = get('ignore_space_change',
2295 2295 'ignorewsamount')
2296 2296 buildopts['ignoreblanklines'] = get('ignore_blank_lines',
2297 2297 'ignoreblanklines')
2298 2298 buildopts['ignorewseol'] = get('ignore_space_at_eol', 'ignorewseol')
2299 2299 if formatchanging:
2300 2300 buildopts['text'] = opts and opts.get('text')
2301 2301 binary = None if opts is None else opts.get('binary')
2302 2302 buildopts['nobinary'] = (not binary if binary is not None
2303 2303 else get('nobinary', forceplain=False))
2304 2304 buildopts['noprefix'] = get('noprefix', forceplain=False)
2305 2305
2306 2306 return mdiff.diffopts(**pycompat.strkwargs(buildopts))
2307 2307
2308 2308 def diff(repo, node1=None, node2=None, match=None, changes=None,
2309 2309 opts=None, losedatafn=None, prefix='', relroot='', copy=None,
2310 2310 hunksfilterfn=None):
2311 2311 '''yields diff of changes to files between two nodes, or node and
2312 2312 working directory.
2313 2313
2314 2314 if node1 is None, use first dirstate parent instead.
2315 2315 if node2 is None, compare node1 with working directory.
2316 2316
2317 2317 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2318 2318 every time some change cannot be represented with the current
2319 2319 patch format. Return False to upgrade to git patch format, True to
2320 2320 accept the loss or raise an exception to abort the diff. It is
2321 2321 called with the name of current file being diffed as 'fn'. If set
2322 2322 to None, patches will always be upgraded to git format when
2323 2323 necessary.
2324 2324
2325 2325 prefix is a filename prefix that is prepended to all filenames on
2326 2326 display (used for subrepos).
2327 2327
2328 2328 relroot, if not empty, must be normalized with a trailing /. Any match
2329 2329 patterns that fall outside it will be ignored.
2330 2330
2331 2331 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2332 2332 information.
2333 2333
2334 2334 hunksfilterfn, if not None, should be a function taking a filectx and
2335 2335 hunks generator that may yield filtered hunks.
2336 2336 '''
2337 2337 for fctx1, fctx2, hdr, hunks in diffhunks(
2338 2338 repo, node1=node1, node2=node2,
2339 2339 match=match, changes=changes, opts=opts,
2340 2340 losedatafn=losedatafn, prefix=prefix, relroot=relroot, copy=copy,
2341 2341 ):
2342 2342 if hunksfilterfn is not None:
2343 2343 # If the file has been removed, fctx2 is None; but this should
2344 2344 # not occur here since we catch removed files early in
2345 2345 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2346 2346 assert fctx2 is not None, \
2347 2347 'fctx2 unexpectly None in diff hunks filtering'
2348 2348 hunks = hunksfilterfn(fctx2, hunks)
2349 2349 text = ''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2350 2350 if hdr and (text or len(hdr) > 1):
2351 2351 yield '\n'.join(hdr) + '\n'
2352 2352 if text:
2353 2353 yield text
2354 2354
2355 2355 def diffhunks(repo, node1=None, node2=None, match=None, changes=None,
2356 2356 opts=None, losedatafn=None, prefix='', relroot='', copy=None):
2357 2357 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2358 2358 where `header` is a list of diff headers and `hunks` is an iterable of
2359 2359 (`hunkrange`, `hunklines`) tuples.
2360 2360
2361 2361 See diff() for the meaning of parameters.
2362 2362 """
2363 2363
2364 2364 if opts is None:
2365 2365 opts = mdiff.defaultopts
2366 2366
2367 2367 if not node1 and not node2:
2368 2368 node1 = repo.dirstate.p1()
2369 2369
2370 2370 def lrugetfilectx():
2371 2371 cache = {}
2372 2372 order = collections.deque()
2373 2373 def getfilectx(f, ctx):
2374 2374 fctx = ctx.filectx(f, filelog=cache.get(f))
2375 2375 if f not in cache:
2376 2376 if len(cache) > 20:
2377 2377 del cache[order.popleft()]
2378 2378 cache[f] = fctx.filelog()
2379 2379 else:
2380 2380 order.remove(f)
2381 2381 order.append(f)
2382 2382 return fctx
2383 2383 return getfilectx
2384 2384 getfilectx = lrugetfilectx()
2385 2385
2386 2386 ctx1 = repo[node1]
2387 2387 ctx2 = repo[node2]
2388 2388
2389 2389 relfiltered = False
2390 2390 if relroot != '' and match.always():
2391 2391 # as a special case, create a new matcher with just the relroot
2392 2392 pats = [relroot]
2393 2393 match = scmutil.match(ctx2, pats, default='path')
2394 2394 relfiltered = True
2395 2395
2396 2396 if not changes:
2397 2397 changes = repo.status(ctx1, ctx2, match=match)
2398 2398 modified, added, removed = changes[:3]
2399 2399
2400 2400 if not modified and not added and not removed:
2401 2401 return []
2402 2402
2403 2403 if repo.ui.debugflag:
2404 2404 hexfunc = hex
2405 2405 else:
2406 2406 hexfunc = short
2407 2407 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2408 2408
2409 2409 if copy is None:
2410 2410 copy = {}
2411 2411 if opts.git or opts.upgrade:
2412 2412 copy = copies.pathcopies(ctx1, ctx2, match=match)
2413 2413
2414 2414 if relroot is not None:
2415 2415 if not relfiltered:
2416 2416 # XXX this would ideally be done in the matcher, but that is
2417 2417 # generally meant to 'or' patterns, not 'and' them. In this case we
2418 2418 # need to 'and' all the patterns from the matcher with relroot.
2419 2419 def filterrel(l):
2420 2420 return [f for f in l if f.startswith(relroot)]
2421 2421 modified = filterrel(modified)
2422 2422 added = filterrel(added)
2423 2423 removed = filterrel(removed)
2424 2424 relfiltered = True
2425 2425 # filter out copies where either side isn't inside the relative root
2426 2426 copy = dict(((dst, src) for (dst, src) in copy.iteritems()
2427 2427 if dst.startswith(relroot)
2428 2428 and src.startswith(relroot)))
2429 2429
2430 2430 modifiedset = set(modified)
2431 2431 addedset = set(added)
2432 2432 removedset = set(removed)
2433 2433 for f in modified:
2434 2434 if f not in ctx1:
2435 2435 # Fix up added, since merged-in additions appear as
2436 2436 # modifications during merges
2437 2437 modifiedset.remove(f)
2438 2438 addedset.add(f)
2439 2439 for f in removed:
2440 2440 if f not in ctx1:
2441 2441 # Merged-in additions that are then removed are reported as removed.
2442 2442 # They are not in ctx1, so We don't want to show them in the diff.
2443 2443 removedset.remove(f)
2444 2444 modified = sorted(modifiedset)
2445 2445 added = sorted(addedset)
2446 2446 removed = sorted(removedset)
2447 2447 for dst, src in list(copy.items()):
2448 2448 if src not in ctx1:
2449 2449 # Files merged in during a merge and then copied/renamed are
2450 2450 # reported as copies. We want to show them in the diff as additions.
2451 2451 del copy[dst]
2452 2452
2453 2453 def difffn(opts, losedata):
2454 2454 return trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2455 2455 copy, getfilectx, opts, losedata, prefix, relroot)
2456 2456 if opts.upgrade and not opts.git:
2457 2457 try:
2458 2458 def losedata(fn):
2459 2459 if not losedatafn or not losedatafn(fn=fn):
2460 2460 raise GitDiffRequired
2461 2461 # Buffer the whole output until we are sure it can be generated
2462 2462 return list(difffn(opts.copy(git=False), losedata))
2463 2463 except GitDiffRequired:
2464 2464 return difffn(opts.copy(git=True), None)
2465 2465 else:
2466 2466 return difffn(opts, None)
2467 2467
2468 2468 def difflabel(func, *args, **kw):
2469 2469 '''yields 2-tuples of (output, label) based on the output of func()'''
2470 2470 inlinecolor = False
2471 2471 if kw.get(r'opts'):
2472 2472 inlinecolor = kw[r'opts'].worddiff
2473 2473 headprefixes = [('diff', 'diff.diffline'),
2474 2474 ('copy', 'diff.extended'),
2475 2475 ('rename', 'diff.extended'),
2476 2476 ('old', 'diff.extended'),
2477 2477 ('new', 'diff.extended'),
2478 2478 ('deleted', 'diff.extended'),
2479 2479 ('index', 'diff.extended'),
2480 2480 ('similarity', 'diff.extended'),
2481 2481 ('---', 'diff.file_a'),
2482 2482 ('+++', 'diff.file_b')]
2483 2483 textprefixes = [('@', 'diff.hunk'),
2484 2484 ('-', 'diff.deleted'),
2485 2485 ('+', 'diff.inserted')]
2486 2486 head = False
2487 2487 for chunk in func(*args, **kw):
2488 2488 lines = chunk.split('\n')
2489 2489 matches = {}
2490 2490 if inlinecolor:
2491 2491 matches = _findmatches(lines)
2492 2492 for i, line in enumerate(lines):
2493 2493 if i != 0:
2494 2494 yield ('\n', '')
2495 2495 if head:
2496 2496 if line.startswith('@'):
2497 2497 head = False
2498 2498 else:
2499 2499 if line and line[0] not in ' +-@\\':
2500 2500 head = True
2501 2501 stripline = line
2502 2502 diffline = False
2503 2503 if not head and line and line[0] in '+-':
2504 2504 # highlight tabs and trailing whitespace, but only in
2505 2505 # changed lines
2506 2506 stripline = line.rstrip()
2507 2507 diffline = True
2508 2508
2509 2509 prefixes = textprefixes
2510 2510 if head:
2511 2511 prefixes = headprefixes
2512 2512 for prefix, label in prefixes:
2513 2513 if stripline.startswith(prefix):
2514 2514 if diffline:
2515 2515 if i in matches:
2516 2516 for t, l in _inlinediff(lines[i].rstrip(),
2517 2517 lines[matches[i]].rstrip(),
2518 2518 label):
2519 2519 yield (t, l)
2520 2520 else:
2521 2521 for token in tabsplitter.findall(stripline):
2522 2522 if '\t' == token[0]:
2523 2523 yield (token, 'diff.tab')
2524 2524 else:
2525 2525 yield (token, label)
2526 2526 else:
2527 2527 yield (stripline, label)
2528 2528 break
2529 2529 else:
2530 2530 yield (line, '')
2531 2531 if line != stripline:
2532 2532 yield (line[len(stripline):], 'diff.trailingwhitespace')
2533 2533
2534 2534 def _findmatches(slist):
2535 2535 '''Look for insertion matches to deletion and returns a dict of
2536 2536 correspondences.
2537 2537 '''
2538 2538 lastmatch = 0
2539 2539 matches = {}
2540 2540 for i, line in enumerate(slist):
2541 2541 if line == '':
2542 2542 continue
2543 2543 if line[0] == '-':
2544 2544 lastmatch = max(lastmatch, i)
2545 2545 newgroup = False
2546 2546 for j, newline in enumerate(slist[lastmatch + 1:]):
2547 2547 if newline == '':
2548 2548 continue
2549 2549 if newline[0] == '-' and newgroup: # too far, no match
2550 2550 break
2551 2551 if newline[0] == '+': # potential match
2552 2552 newgroup = True
2553 2553 sim = difflib.SequenceMatcher(None, line, newline).ratio()
2554 2554 if sim > 0.7:
2555 2555 lastmatch = lastmatch + 1 + j
2556 2556 matches[i] = lastmatch
2557 2557 matches[lastmatch] = i
2558 2558 break
2559 2559 return matches
2560 2560
2561 2561 def _inlinediff(s1, s2, operation):
2562 2562 '''Perform string diff to highlight specific changes.'''
2563 2563 operation_skip = '+?' if operation == 'diff.deleted' else '-?'
2564 2564 if operation == 'diff.deleted':
2565 2565 s2, s1 = s1, s2
2566 2566
2567 2567 buff = []
2568 2568 # we never want to higlight the leading +-
2569 2569 if operation == 'diff.deleted' and s2.startswith('-'):
2570 2570 label = operation
2571 2571 token = '-'
2572 2572 s2 = s2[1:]
2573 2573 s1 = s1[1:]
2574 2574 elif operation == 'diff.inserted' and s1.startswith('+'):
2575 2575 label = operation
2576 2576 token = '+'
2577 2577 s2 = s2[1:]
2578 2578 s1 = s1[1:]
2579 2579 else:
2580 2580 raise error.ProgrammingError("Case not expected, operation = %s" %
2581 2581 operation)
2582 2582
2583 2583 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
2584 2584 for part in s:
2585 2585 if part[0] in operation_skip or len(part) == 2:
2586 2586 continue
2587 2587 l = operation + '.highlight'
2588 2588 if part[0] in ' ':
2589 2589 l = operation
2590 2590 if part[2:] == '\t':
2591 2591 l = 'diff.tab'
2592 2592 if l == label: # contiguous token with same label
2593 2593 token += part[2:]
2594 2594 continue
2595 2595 else:
2596 2596 buff.append((token, label))
2597 2597 label = l
2598 2598 token = part[2:]
2599 2599 buff.append((token, label))
2600 2600
2601 2601 return buff
2602 2602
2603 2603 def diffui(*args, **kw):
2604 2604 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2605 2605 return difflabel(diff, *args, **kw)
2606 2606
2607 2607 def _filepairs(modified, added, removed, copy, opts):
2608 2608 '''generates tuples (f1, f2, copyop), where f1 is the name of the file
2609 2609 before and f2 is the the name after. For added files, f1 will be None,
2610 2610 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2611 2611 or 'rename' (the latter two only if opts.git is set).'''
2612 2612 gone = set()
2613 2613
2614 2614 copyto = dict([(v, k) for k, v in copy.items()])
2615 2615
2616 2616 addedset, removedset = set(added), set(removed)
2617 2617
2618 2618 for f in sorted(modified + added + removed):
2619 2619 copyop = None
2620 2620 f1, f2 = f, f
2621 2621 if f in addedset:
2622 2622 f1 = None
2623 2623 if f in copy:
2624 2624 if opts.git:
2625 2625 f1 = copy[f]
2626 2626 if f1 in removedset and f1 not in gone:
2627 2627 copyop = 'rename'
2628 2628 gone.add(f1)
2629 2629 else:
2630 2630 copyop = 'copy'
2631 2631 elif f in removedset:
2632 2632 f2 = None
2633 2633 if opts.git:
2634 2634 # have we already reported a copy above?
2635 2635 if (f in copyto and copyto[f] in addedset
2636 2636 and copy[copyto[f]] == f):
2637 2637 continue
2638 2638 yield f1, f2, copyop
2639 2639
2640 2640 def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2641 2641 copy, getfilectx, opts, losedatafn, prefix, relroot):
2642 2642 '''given input data, generate a diff and yield it in blocks
2643 2643
2644 2644 If generating a diff would lose data like flags or binary data and
2645 2645 losedatafn is not None, it will be called.
2646 2646
2647 2647 relroot is removed and prefix is added to every path in the diff output.
2648 2648
2649 2649 If relroot is not empty, this function expects every path in modified,
2650 2650 added, removed and copy to start with it.'''
2651 2651
2652 2652 def gitindex(text):
2653 2653 if not text:
2654 2654 text = ""
2655 2655 l = len(text)
2656 2656 s = hashlib.sha1('blob %d\0' % l)
2657 2657 s.update(text)
2658 2658 return hex(s.digest())
2659 2659
2660 2660 if opts.noprefix:
2661 2661 aprefix = bprefix = ''
2662 2662 else:
2663 2663 aprefix = 'a/'
2664 2664 bprefix = 'b/'
2665 2665
2666 2666 def diffline(f, revs):
2667 2667 revinfo = ' '.join(["-r %s" % rev for rev in revs])
2668 2668 return 'diff %s %s' % (revinfo, f)
2669 2669
2670 2670 def isempty(fctx):
2671 2671 return fctx is None or fctx.size() == 0
2672 2672
2673 2673 date1 = util.datestr(ctx1.date())
2674 2674 date2 = util.datestr(ctx2.date())
2675 2675
2676 2676 gitmode = {'l': '120000', 'x': '100755', '': '100644'}
2677 2677
2678 2678 if relroot != '' and (repo.ui.configbool('devel', 'all-warnings')
2679 2679 or repo.ui.configbool('devel', 'check-relroot')):
2680 2680 for f in modified + added + removed + list(copy) + list(copy.values()):
2681 2681 if f is not None and not f.startswith(relroot):
2682 2682 raise AssertionError(
2683 2683 "file %s doesn't start with relroot %s" % (f, relroot))
2684 2684
2685 2685 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2686 2686 content1 = None
2687 2687 content2 = None
2688 2688 fctx1 = None
2689 2689 fctx2 = None
2690 2690 flag1 = None
2691 2691 flag2 = None
2692 2692 if f1:
2693 2693 fctx1 = getfilectx(f1, ctx1)
2694 2694 if opts.git or losedatafn:
2695 2695 flag1 = ctx1.flags(f1)
2696 2696 if f2:
2697 2697 fctx2 = getfilectx(f2, ctx2)
2698 2698 if opts.git or losedatafn:
2699 2699 flag2 = ctx2.flags(f2)
2700 2700 # if binary is True, output "summary" or "base85", but not "text diff"
2701 2701 if opts.text:
2702 2702 binary = False
2703 2703 else:
2704 2704 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2705 2705
2706 2706 if losedatafn and not opts.git:
2707 2707 if (binary or
2708 2708 # copy/rename
2709 2709 f2 in copy or
2710 2710 # empty file creation
2711 2711 (not f1 and isempty(fctx2)) or
2712 2712 # empty file deletion
2713 2713 (isempty(fctx1) and not f2) or
2714 2714 # create with flags
2715 2715 (not f1 and flag2) or
2716 2716 # change flags
2717 2717 (f1 and f2 and flag1 != flag2)):
2718 2718 losedatafn(f2 or f1)
2719 2719
2720 2720 path1 = f1 or f2
2721 2721 path2 = f2 or f1
2722 2722 path1 = posixpath.join(prefix, path1[len(relroot):])
2723 2723 path2 = posixpath.join(prefix, path2[len(relroot):])
2724 2724 header = []
2725 2725 if opts.git:
2726 2726 header.append('diff --git %s%s %s%s' %
2727 2727 (aprefix, path1, bprefix, path2))
2728 2728 if not f1: # added
2729 2729 header.append('new file mode %s' % gitmode[flag2])
2730 2730 elif not f2: # removed
2731 2731 header.append('deleted file mode %s' % gitmode[flag1])
2732 2732 else: # modified/copied/renamed
2733 2733 mode1, mode2 = gitmode[flag1], gitmode[flag2]
2734 2734 if mode1 != mode2:
2735 2735 header.append('old mode %s' % mode1)
2736 2736 header.append('new mode %s' % mode2)
2737 2737 if copyop is not None:
2738 2738 if opts.showsimilarity:
2739 2739 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
2740 2740 header.append('similarity index %d%%' % sim)
2741 2741 header.append('%s from %s' % (copyop, path1))
2742 2742 header.append('%s to %s' % (copyop, path2))
2743 2743 elif revs and not repo.ui.quiet:
2744 2744 header.append(diffline(path1, revs))
2745 2745
2746 2746 # fctx.is | diffopts | what to | is fctx.data()
2747 2747 # binary() | text nobinary git index | output? | outputted?
2748 2748 # ------------------------------------|----------------------------
2749 2749 # yes | no no no * | summary | no
2750 2750 # yes | no no yes * | base85 | yes
2751 2751 # yes | no yes no * | summary | no
2752 2752 # yes | no yes yes 0 | summary | no
2753 2753 # yes | no yes yes >0 | summary | semi [1]
2754 2754 # yes | yes * * * | text diff | yes
2755 2755 # no | * * * * | text diff | yes
2756 2756 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
2757 2757 if binary and (not opts.git or (opts.git and opts.nobinary and not
2758 2758 opts.index)):
2759 2759 # fast path: no binary content will be displayed, content1 and
2760 2760 # content2 are only used for equivalent test. cmp() could have a
2761 2761 # fast path.
2762 2762 if fctx1 is not None:
2763 2763 content1 = b'\0'
2764 2764 if fctx2 is not None:
2765 2765 if fctx1 is not None and not fctx1.cmp(fctx2):
2766 2766 content2 = b'\0' # not different
2767 2767 else:
2768 2768 content2 = b'\0\0'
2769 2769 else:
2770 2770 # normal path: load contents
2771 2771 if fctx1 is not None:
2772 2772 content1 = fctx1.data()
2773 2773 if fctx2 is not None:
2774 2774 content2 = fctx2.data()
2775 2775
2776 2776 if binary and opts.git and not opts.nobinary:
2777 2777 text = mdiff.b85diff(content1, content2)
2778 2778 if text:
2779 2779 header.append('index %s..%s' %
2780 2780 (gitindex(content1), gitindex(content2)))
2781 2781 hunks = (None, [text]),
2782 2782 else:
2783 2783 if opts.git and opts.index > 0:
2784 2784 flag = flag1
2785 2785 if flag is None:
2786 2786 flag = flag2
2787 2787 header.append('index %s..%s %s' %
2788 2788 (gitindex(content1)[0:opts.index],
2789 2789 gitindex(content2)[0:opts.index],
2790 2790 gitmode[flag]))
2791 2791
2792 2792 uheaders, hunks = mdiff.unidiff(content1, date1,
2793 2793 content2, date2,
2794 2794 path1, path2,
2795 2795 binary=binary, opts=opts)
2796 2796 header.extend(uheaders)
2797 2797 yield fctx1, fctx2, header, hunks
2798 2798
2799 2799 def diffstatsum(stats):
2800 2800 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
2801 2801 for f, a, r, b in stats:
2802 2802 maxfile = max(maxfile, encoding.colwidth(f))
2803 2803 maxtotal = max(maxtotal, a + r)
2804 2804 addtotal += a
2805 2805 removetotal += r
2806 2806 binary = binary or b
2807 2807
2808 2808 return maxfile, maxtotal, addtotal, removetotal, binary
2809 2809
2810 2810 def diffstatdata(lines):
2811 2811 diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
2812 2812
2813 2813 results = []
2814 2814 filename, adds, removes, isbinary = None, 0, 0, False
2815 2815
2816 2816 def addresult():
2817 2817 if filename:
2818 2818 results.append((filename, adds, removes, isbinary))
2819 2819
2820 2820 # inheader is used to track if a line is in the
2821 2821 # header portion of the diff. This helps properly account
2822 2822 # for lines that start with '--' or '++'
2823 2823 inheader = False
2824 2824
2825 2825 for line in lines:
2826 2826 if line.startswith('diff'):
2827 2827 addresult()
2828 2828 # starting a new file diff
2829 2829 # set numbers to 0 and reset inheader
2830 2830 inheader = True
2831 2831 adds, removes, isbinary = 0, 0, False
2832 2832 if line.startswith('diff --git a/'):
2833 2833 filename = gitre.search(line).group(2)
2834 2834 elif line.startswith('diff -r'):
2835 2835 # format: "diff -r ... -r ... filename"
2836 2836 filename = diffre.search(line).group(1)
2837 2837 elif line.startswith('@@'):
2838 2838 inheader = False
2839 2839 elif line.startswith('+') and not inheader:
2840 2840 adds += 1
2841 2841 elif line.startswith('-') and not inheader:
2842 2842 removes += 1
2843 2843 elif (line.startswith('GIT binary patch') or
2844 2844 line.startswith('Binary file')):
2845 2845 isbinary = True
2846 2846 addresult()
2847 2847 return results
2848 2848
2849 2849 def diffstat(lines, width=80):
2850 2850 output = []
2851 2851 stats = diffstatdata(lines)
2852 2852 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
2853 2853
2854 2854 countwidth = len(str(maxtotal))
2855 2855 if hasbinary and countwidth < 3:
2856 2856 countwidth = 3
2857 2857 graphwidth = width - countwidth - maxname - 6
2858 2858 if graphwidth < 10:
2859 2859 graphwidth = 10
2860 2860
2861 2861 def scale(i):
2862 2862 if maxtotal <= graphwidth:
2863 2863 return i
2864 2864 # If diffstat runs out of room it doesn't print anything,
2865 2865 # which isn't very useful, so always print at least one + or -
2866 2866 # if there were at least some changes.
2867 2867 return max(i * graphwidth // maxtotal, int(bool(i)))
2868 2868
2869 2869 for filename, adds, removes, isbinary in stats:
2870 2870 if isbinary:
2871 2871 count = 'Bin'
2872 2872 else:
2873 2873 count = '%d' % (adds + removes)
2874 2874 pluses = '+' * scale(adds)
2875 2875 minuses = '-' * scale(removes)
2876 2876 output.append(' %s%s | %*s %s%s\n' %
2877 2877 (filename, ' ' * (maxname - encoding.colwidth(filename)),
2878 2878 countwidth, count, pluses, minuses))
2879 2879
2880 2880 if stats:
2881 2881 output.append(_(' %d files changed, %d insertions(+), '
2882 2882 '%d deletions(-)\n')
2883 2883 % (len(stats), totaladds, totalremoves))
2884 2884
2885 2885 return ''.join(output)
2886 2886
2887 2887 def diffstatui(*args, **kw):
2888 2888 '''like diffstat(), but yields 2-tuples of (output, label) for
2889 2889 ui.write()
2890 2890 '''
2891 2891
2892 2892 for line in diffstat(*args, **kw).splitlines():
2893 2893 if line and line[-1] in '+-':
2894 2894 name, graph = line.rsplit(' ', 1)
2895 2895 yield (name + ' ', '')
2896 2896 m = re.search(br'\++', graph)
2897 2897 if m:
2898 2898 yield (m.group(0), 'diffstat.inserted')
2899 2899 m = re.search(br'-+', graph)
2900 2900 if m:
2901 2901 yield (m.group(0), 'diffstat.deleted')
2902 2902 else:
2903 2903 yield (line, '')
2904 2904 yield ('\n', '')
General Comments 0
You need to be logged in to leave comments. Login now