##// END OF EJS Templates
patch: fix hunk newlines when parsing hunks, not in iterhunks()
Patrick Mezard -
r13699:d3c0e003 default
parent child Browse files
Show More
@@ -1,1624 +1,1620 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import cStringIO, email.Parser, os, errno, re
10 10 import tempfile, zlib
11 11
12 12 from i18n import _
13 13 from node import hex, nullid, short
14 14 import base85, mdiff, util, diffhelpers, copies, encoding
15 15
16 16 gitre = re.compile('diff --git a/(.*) b/(.*)')
17 17
18 18 class PatchError(Exception):
19 19 pass
20 20
21 21 # helper functions
22 22
23 23 def copyfile(src, dst, basedir):
24 24 abssrc, absdst = [util.canonpath(basedir, basedir, x) for x in [src, dst]]
25 25 if os.path.lexists(absdst):
26 26 raise util.Abort(_("cannot create %s: destination already exists") %
27 27 dst)
28 28
29 29 dstdir = os.path.dirname(absdst)
30 30 if dstdir and not os.path.isdir(dstdir):
31 31 try:
32 32 os.makedirs(dstdir)
33 33 except IOError:
34 34 raise util.Abort(
35 35 _("cannot create %s: unable to create destination directory")
36 36 % dst)
37 37
38 38 util.copyfile(abssrc, absdst)
39 39
40 40 # public functions
41 41
42 42 def split(stream):
43 43 '''return an iterator of individual patches from a stream'''
44 44 def isheader(line, inheader):
45 45 if inheader and line[0] in (' ', '\t'):
46 46 # continuation
47 47 return True
48 48 if line[0] in (' ', '-', '+'):
49 49 # diff line - don't check for header pattern in there
50 50 return False
51 51 l = line.split(': ', 1)
52 52 return len(l) == 2 and ' ' not in l[0]
53 53
54 54 def chunk(lines):
55 55 return cStringIO.StringIO(''.join(lines))
56 56
57 57 def hgsplit(stream, cur):
58 58 inheader = True
59 59
60 60 for line in stream:
61 61 if not line.strip():
62 62 inheader = False
63 63 if not inheader and line.startswith('# HG changeset patch'):
64 64 yield chunk(cur)
65 65 cur = []
66 66 inheader = True
67 67
68 68 cur.append(line)
69 69
70 70 if cur:
71 71 yield chunk(cur)
72 72
73 73 def mboxsplit(stream, cur):
74 74 for line in stream:
75 75 if line.startswith('From '):
76 76 for c in split(chunk(cur[1:])):
77 77 yield c
78 78 cur = []
79 79
80 80 cur.append(line)
81 81
82 82 if cur:
83 83 for c in split(chunk(cur[1:])):
84 84 yield c
85 85
86 86 def mimesplit(stream, cur):
87 87 def msgfp(m):
88 88 fp = cStringIO.StringIO()
89 89 g = email.Generator.Generator(fp, mangle_from_=False)
90 90 g.flatten(m)
91 91 fp.seek(0)
92 92 return fp
93 93
94 94 for line in stream:
95 95 cur.append(line)
96 96 c = chunk(cur)
97 97
98 98 m = email.Parser.Parser().parse(c)
99 99 if not m.is_multipart():
100 100 yield msgfp(m)
101 101 else:
102 102 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
103 103 for part in m.walk():
104 104 ct = part.get_content_type()
105 105 if ct not in ok_types:
106 106 continue
107 107 yield msgfp(part)
108 108
109 109 def headersplit(stream, cur):
110 110 inheader = False
111 111
112 112 for line in stream:
113 113 if not inheader and isheader(line, inheader):
114 114 yield chunk(cur)
115 115 cur = []
116 116 inheader = True
117 117 if inheader and not isheader(line, inheader):
118 118 inheader = False
119 119
120 120 cur.append(line)
121 121
122 122 if cur:
123 123 yield chunk(cur)
124 124
125 125 def remainder(cur):
126 126 yield chunk(cur)
127 127
128 128 class fiter(object):
129 129 def __init__(self, fp):
130 130 self.fp = fp
131 131
132 132 def __iter__(self):
133 133 return self
134 134
135 135 def next(self):
136 136 l = self.fp.readline()
137 137 if not l:
138 138 raise StopIteration
139 139 return l
140 140
141 141 inheader = False
142 142 cur = []
143 143
144 144 mimeheaders = ['content-type']
145 145
146 146 if not hasattr(stream, 'next'):
147 147 # http responses, for example, have readline but not next
148 148 stream = fiter(stream)
149 149
150 150 for line in stream:
151 151 cur.append(line)
152 152 if line.startswith('# HG changeset patch'):
153 153 return hgsplit(stream, cur)
154 154 elif line.startswith('From '):
155 155 return mboxsplit(stream, cur)
156 156 elif isheader(line, inheader):
157 157 inheader = True
158 158 if line.split(':', 1)[0].lower() in mimeheaders:
159 159 # let email parser handle this
160 160 return mimesplit(stream, cur)
161 161 elif line.startswith('--- ') and inheader:
162 162 # No evil headers seen by diff start, split by hand
163 163 return headersplit(stream, cur)
164 164 # Not enough info, keep reading
165 165
166 166 # if we are here, we have a very plain patch
167 167 return remainder(cur)
168 168
169 169 def extract(ui, fileobj):
170 170 '''extract patch from data read from fileobj.
171 171
172 172 patch can be a normal patch or contained in an email message.
173 173
174 174 return tuple (filename, message, user, date, branch, node, p1, p2).
175 175 Any item in the returned tuple can be None. If filename is None,
176 176 fileobj did not contain a patch. Caller must unlink filename when done.'''
177 177
178 178 # attempt to detect the start of a patch
179 179 # (this heuristic is borrowed from quilt)
180 180 diffre = re.compile(r'^(?:Index:[ \t]|diff[ \t]|RCS file: |'
181 181 r'retrieving revision [0-9]+(\.[0-9]+)*$|'
182 182 r'---[ \t].*?^\+\+\+[ \t]|'
183 183 r'\*\*\*[ \t].*?^---[ \t])', re.MULTILINE|re.DOTALL)
184 184
185 185 fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
186 186 tmpfp = os.fdopen(fd, 'w')
187 187 try:
188 188 msg = email.Parser.Parser().parse(fileobj)
189 189
190 190 subject = msg['Subject']
191 191 user = msg['From']
192 192 if not subject and not user:
193 193 # Not an email, restore parsed headers if any
194 194 subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
195 195
196 196 gitsendmail = 'git-send-email' in msg.get('X-Mailer', '')
197 197 # should try to parse msg['Date']
198 198 date = None
199 199 nodeid = None
200 200 branch = None
201 201 parents = []
202 202
203 203 if subject:
204 204 if subject.startswith('[PATCH'):
205 205 pend = subject.find(']')
206 206 if pend >= 0:
207 207 subject = subject[pend + 1:].lstrip()
208 208 subject = subject.replace('\n\t', ' ')
209 209 ui.debug('Subject: %s\n' % subject)
210 210 if user:
211 211 ui.debug('From: %s\n' % user)
212 212 diffs_seen = 0
213 213 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
214 214 message = ''
215 215 for part in msg.walk():
216 216 content_type = part.get_content_type()
217 217 ui.debug('Content-Type: %s\n' % content_type)
218 218 if content_type not in ok_types:
219 219 continue
220 220 payload = part.get_payload(decode=True)
221 221 m = diffre.search(payload)
222 222 if m:
223 223 hgpatch = False
224 224 hgpatchheader = False
225 225 ignoretext = False
226 226
227 227 ui.debug('found patch at byte %d\n' % m.start(0))
228 228 diffs_seen += 1
229 229 cfp = cStringIO.StringIO()
230 230 for line in payload[:m.start(0)].splitlines():
231 231 if line.startswith('# HG changeset patch') and not hgpatch:
232 232 ui.debug('patch generated by hg export\n')
233 233 hgpatch = True
234 234 hgpatchheader = True
235 235 # drop earlier commit message content
236 236 cfp.seek(0)
237 237 cfp.truncate()
238 238 subject = None
239 239 elif hgpatchheader:
240 240 if line.startswith('# User '):
241 241 user = line[7:]
242 242 ui.debug('From: %s\n' % user)
243 243 elif line.startswith("# Date "):
244 244 date = line[7:]
245 245 elif line.startswith("# Branch "):
246 246 branch = line[9:]
247 247 elif line.startswith("# Node ID "):
248 248 nodeid = line[10:]
249 249 elif line.startswith("# Parent "):
250 250 parents.append(line[10:])
251 251 elif not line.startswith("# "):
252 252 hgpatchheader = False
253 253 elif line == '---' and gitsendmail:
254 254 ignoretext = True
255 255 if not hgpatchheader and not ignoretext:
256 256 cfp.write(line)
257 257 cfp.write('\n')
258 258 message = cfp.getvalue()
259 259 if tmpfp:
260 260 tmpfp.write(payload)
261 261 if not payload.endswith('\n'):
262 262 tmpfp.write('\n')
263 263 elif not diffs_seen and message and content_type == 'text/plain':
264 264 message += '\n' + payload
265 265 except:
266 266 tmpfp.close()
267 267 os.unlink(tmpname)
268 268 raise
269 269
270 270 if subject and not message.startswith(subject):
271 271 message = '%s\n%s' % (subject, message)
272 272 tmpfp.close()
273 273 if not diffs_seen:
274 274 os.unlink(tmpname)
275 275 return None, message, user, date, branch, None, None, None
276 276 p1 = parents and parents.pop(0) or None
277 277 p2 = parents and parents.pop(0) or None
278 278 return tmpname, message, user, date, branch, nodeid, p1, p2
279 279
280 280 class patchmeta(object):
281 281 """Patched file metadata
282 282
283 283 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
284 284 or COPY. 'path' is patched file path. 'oldpath' is set to the
285 285 origin file when 'op' is either COPY or RENAME, None otherwise. If
286 286 file mode is changed, 'mode' is a tuple (islink, isexec) where
287 287 'islink' is True if the file is a symlink and 'isexec' is True if
288 288 the file is executable. Otherwise, 'mode' is None.
289 289 """
290 290 def __init__(self, path):
291 291 self.path = path
292 292 self.oldpath = None
293 293 self.mode = None
294 294 self.op = 'MODIFY'
295 295 self.binary = False
296 296
297 297 def setmode(self, mode):
298 298 islink = mode & 020000
299 299 isexec = mode & 0100
300 300 self.mode = (islink, isexec)
301 301
302 302 def __repr__(self):
303 303 return "<patchmeta %s %r>" % (self.op, self.path)
304 304
305 305 def readgitpatch(lr):
306 306 """extract git-style metadata about patches from <patchname>"""
307 307
308 308 # Filter patch for git information
309 309 gp = None
310 310 gitpatches = []
311 311 for line in lr:
312 312 line = line.rstrip(' \r\n')
313 313 if line.startswith('diff --git'):
314 314 m = gitre.match(line)
315 315 if m:
316 316 if gp:
317 317 gitpatches.append(gp)
318 318 dst = m.group(2)
319 319 gp = patchmeta(dst)
320 320 elif gp:
321 321 if line.startswith('--- '):
322 322 gitpatches.append(gp)
323 323 gp = None
324 324 continue
325 325 if line.startswith('rename from '):
326 326 gp.op = 'RENAME'
327 327 gp.oldpath = line[12:]
328 328 elif line.startswith('rename to '):
329 329 gp.path = line[10:]
330 330 elif line.startswith('copy from '):
331 331 gp.op = 'COPY'
332 332 gp.oldpath = line[10:]
333 333 elif line.startswith('copy to '):
334 334 gp.path = line[8:]
335 335 elif line.startswith('deleted file'):
336 336 gp.op = 'DELETE'
337 337 elif line.startswith('new file mode '):
338 338 gp.op = 'ADD'
339 339 gp.setmode(int(line[-6:], 8))
340 340 elif line.startswith('new mode '):
341 341 gp.setmode(int(line[-6:], 8))
342 342 elif line.startswith('GIT binary patch'):
343 343 gp.binary = True
344 344 if gp:
345 345 gitpatches.append(gp)
346 346
347 347 return gitpatches
348 348
349 349 class linereader(object):
350 350 # simple class to allow pushing lines back into the input stream
351 351 def __init__(self, fp, textmode=False):
352 352 self.fp = fp
353 353 self.buf = []
354 354 self.textmode = textmode
355 355 self.eol = None
356 356
357 357 def push(self, line):
358 358 if line is not None:
359 359 self.buf.append(line)
360 360
361 361 def readline(self):
362 362 if self.buf:
363 363 l = self.buf[0]
364 364 del self.buf[0]
365 365 return l
366 366 l = self.fp.readline()
367 367 if not self.eol:
368 368 if l.endswith('\r\n'):
369 369 self.eol = '\r\n'
370 370 elif l.endswith('\n'):
371 371 self.eol = '\n'
372 372 if self.textmode and l.endswith('\r\n'):
373 373 l = l[:-2] + '\n'
374 374 return l
375 375
376 376 def __iter__(self):
377 377 while 1:
378 378 l = self.readline()
379 379 if not l:
380 380 break
381 381 yield l
382 382
383 383 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
384 384 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@')
385 385 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)')
386 386 eolmodes = ['strict', 'crlf', 'lf', 'auto']
387 387
388 388 class patchfile(object):
389 389 def __init__(self, ui, fname, opener, missing=False, eolmode='strict'):
390 390 self.fname = fname
391 391 self.eolmode = eolmode
392 392 self.eol = None
393 393 self.opener = opener
394 394 self.ui = ui
395 395 self.lines = []
396 396 self.exists = False
397 397 self.missing = missing
398 398 if not missing:
399 399 try:
400 400 self.lines = self.readlines(fname)
401 401 self.exists = True
402 402 except IOError:
403 403 pass
404 404 else:
405 405 self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
406 406
407 407 self.hash = {}
408 408 self.dirty = 0
409 409 self.offset = 0
410 410 self.skew = 0
411 411 self.rej = []
412 412 self.fileprinted = False
413 413 self.printfile(False)
414 414 self.hunks = 0
415 415
416 416 def readlines(self, fname):
417 417 if os.path.islink(fname):
418 418 return [os.readlink(fname)]
419 419 fp = self.opener(fname, 'r')
420 420 try:
421 421 lr = linereader(fp, self.eolmode != 'strict')
422 422 lines = list(lr)
423 423 self.eol = lr.eol
424 424 return lines
425 425 finally:
426 426 fp.close()
427 427
428 428 def writelines(self, fname, lines):
429 429 # Ensure supplied data ends in fname, being a regular file or
430 430 # a symlink. cmdutil.updatedir will -too magically- take care
431 431 # of setting it to the proper type afterwards.
432 432 st_mode = None
433 433 islink = os.path.islink(fname)
434 434 if islink:
435 435 fp = cStringIO.StringIO()
436 436 else:
437 437 try:
438 438 st_mode = os.lstat(fname).st_mode & 0777
439 439 except OSError, e:
440 440 if e.errno != errno.ENOENT:
441 441 raise
442 442 fp = self.opener(fname, 'w')
443 443 try:
444 444 if self.eolmode == 'auto':
445 445 eol = self.eol
446 446 elif self.eolmode == 'crlf':
447 447 eol = '\r\n'
448 448 else:
449 449 eol = '\n'
450 450
451 451 if self.eolmode != 'strict' and eol and eol != '\n':
452 452 for l in lines:
453 453 if l and l[-1] == '\n':
454 454 l = l[:-1] + eol
455 455 fp.write(l)
456 456 else:
457 457 fp.writelines(lines)
458 458 if islink:
459 459 self.opener.symlink(fp.getvalue(), fname)
460 460 if st_mode is not None:
461 461 os.chmod(fname, st_mode)
462 462 finally:
463 463 fp.close()
464 464
465 465 def unlink(self, fname):
466 466 os.unlink(fname)
467 467
468 468 def printfile(self, warn):
469 469 if self.fileprinted:
470 470 return
471 471 if warn or self.ui.verbose:
472 472 self.fileprinted = True
473 473 s = _("patching file %s\n") % self.fname
474 474 if warn:
475 475 self.ui.warn(s)
476 476 else:
477 477 self.ui.note(s)
478 478
479 479
480 480 def findlines(self, l, linenum):
481 481 # looks through the hash and finds candidate lines. The
482 482 # result is a list of line numbers sorted based on distance
483 483 # from linenum
484 484
485 485 cand = self.hash.get(l, [])
486 486 if len(cand) > 1:
487 487 # resort our list of potentials forward then back.
488 488 cand.sort(key=lambda x: abs(x - linenum))
489 489 return cand
490 490
491 491 def hashlines(self):
492 492 self.hash = {}
493 493 for x, s in enumerate(self.lines):
494 494 self.hash.setdefault(s, []).append(x)
495 495
496 496 def makerejlines(self, fname):
497 497 base = os.path.basename(fname)
498 498 yield "--- %s\n+++ %s\n" % (base, base)
499 499 for x in self.rej:
500 500 for l in x.hunk:
501 501 yield l
502 502 if l[-1] != '\n':
503 503 yield "\n\ No newline at end of file\n"
504 504
505 505 def write_rej(self):
506 506 # our rejects are a little different from patch(1). This always
507 507 # creates rejects in the same form as the original patch. A file
508 508 # header is inserted so that you can run the reject through patch again
509 509 # without having to type the filename.
510 510
511 511 if not self.rej:
512 512 return
513 513
514 514 fname = self.fname + ".rej"
515 515 self.ui.warn(
516 516 _("%d out of %d hunks FAILED -- saving rejects to file %s\n") %
517 517 (len(self.rej), self.hunks, fname))
518 518
519 519 fp = self.opener(fname, 'w')
520 520 fp.writelines(self.makerejlines(self.fname))
521 521 fp.close()
522 522
523 523 def apply(self, h):
524 524 if not h.complete():
525 525 raise PatchError(_("bad hunk #%d %s (%d %d %d %d)") %
526 526 (h.number, h.desc, len(h.a), h.lena, len(h.b),
527 527 h.lenb))
528 528
529 529 self.hunks += 1
530 530
531 531 if self.missing:
532 532 self.rej.append(h)
533 533 return -1
534 534
535 535 if self.exists and h.createfile():
536 536 self.ui.warn(_("file %s already exists\n") % self.fname)
537 537 self.rej.append(h)
538 538 return -1
539 539
540 540 if isinstance(h, binhunk):
541 541 if h.rmfile():
542 542 self.unlink(self.fname)
543 543 else:
544 544 self.lines[:] = h.new()
545 545 self.offset += len(h.new())
546 546 self.dirty = 1
547 547 return 0
548 548
549 549 horig = h
550 550 if (self.eolmode in ('crlf', 'lf')
551 551 or self.eolmode == 'auto' and self.eol):
552 552 # If new eols are going to be normalized, then normalize
553 553 # hunk data before patching. Otherwise, preserve input
554 554 # line-endings.
555 555 h = h.getnormalized()
556 556
557 557 # fast case first, no offsets, no fuzz
558 558 old = h.old()
559 559 # patch starts counting at 1 unless we are adding the file
560 560 if h.starta == 0:
561 561 start = 0
562 562 else:
563 563 start = h.starta + self.offset - 1
564 564 orig_start = start
565 565 # if there's skew we want to emit the "(offset %d lines)" even
566 566 # when the hunk cleanly applies at start + skew, so skip the
567 567 # fast case code
568 568 if self.skew == 0 and diffhelpers.testhunk(old, self.lines, start) == 0:
569 569 if h.rmfile():
570 570 self.unlink(self.fname)
571 571 else:
572 572 self.lines[start : start + h.lena] = h.new()
573 573 self.offset += h.lenb - h.lena
574 574 self.dirty = 1
575 575 return 0
576 576
577 577 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
578 578 self.hashlines()
579 579 if h.hunk[-1][0] != ' ':
580 580 # if the hunk tried to put something at the bottom of the file
581 581 # override the start line and use eof here
582 582 search_start = len(self.lines)
583 583 else:
584 584 search_start = orig_start + self.skew
585 585
586 586 for fuzzlen in xrange(3):
587 587 for toponly in [True, False]:
588 588 old = h.old(fuzzlen, toponly)
589 589
590 590 cand = self.findlines(old[0][1:], search_start)
591 591 for l in cand:
592 592 if diffhelpers.testhunk(old, self.lines, l) == 0:
593 593 newlines = h.new(fuzzlen, toponly)
594 594 self.lines[l : l + len(old)] = newlines
595 595 self.offset += len(newlines) - len(old)
596 596 self.skew = l - orig_start
597 597 self.dirty = 1
598 598 offset = l - orig_start - fuzzlen
599 599 if fuzzlen:
600 600 msg = _("Hunk #%d succeeded at %d "
601 601 "with fuzz %d "
602 602 "(offset %d lines).\n")
603 603 self.printfile(True)
604 604 self.ui.warn(msg %
605 605 (h.number, l + 1, fuzzlen, offset))
606 606 else:
607 607 msg = _("Hunk #%d succeeded at %d "
608 608 "(offset %d lines).\n")
609 609 self.ui.note(msg % (h.number, l + 1, offset))
610 610 return fuzzlen
611 611 self.printfile(True)
612 612 self.ui.warn(_("Hunk #%d FAILED at %d\n") % (h.number, orig_start))
613 613 self.rej.append(horig)
614 614 return -1
615 615
616 616 class hunk(object):
617 617 def __init__(self, desc, num, lr, context, create=False, remove=False):
618 618 self.number = num
619 619 self.desc = desc
620 620 self.hunk = [desc]
621 621 self.a = []
622 622 self.b = []
623 623 self.starta = self.lena = None
624 624 self.startb = self.lenb = None
625 625 if lr is not None:
626 626 if context:
627 627 self.read_context_hunk(lr)
628 628 else:
629 629 self.read_unified_hunk(lr)
630 630 self.create = create
631 631 self.remove = remove and not create
632 632
633 633 def getnormalized(self):
634 634 """Return a copy with line endings normalized to LF."""
635 635
636 636 def normalize(lines):
637 637 nlines = []
638 638 for line in lines:
639 639 if line.endswith('\r\n'):
640 640 line = line[:-2] + '\n'
641 641 nlines.append(line)
642 642 return nlines
643 643
644 644 # Dummy object, it is rebuilt manually
645 645 nh = hunk(self.desc, self.number, None, None, False, False)
646 646 nh.number = self.number
647 647 nh.desc = self.desc
648 648 nh.hunk = self.hunk
649 649 nh.a = normalize(self.a)
650 650 nh.b = normalize(self.b)
651 651 nh.starta = self.starta
652 652 nh.startb = self.startb
653 653 nh.lena = self.lena
654 654 nh.lenb = self.lenb
655 655 nh.create = self.create
656 656 nh.remove = self.remove
657 657 return nh
658 658
659 659 def read_unified_hunk(self, lr):
660 660 m = unidesc.match(self.desc)
661 661 if not m:
662 662 raise PatchError(_("bad hunk #%d") % self.number)
663 663 self.starta, foo, self.lena, self.startb, foo2, self.lenb = m.groups()
664 664 if self.lena is None:
665 665 self.lena = 1
666 666 else:
667 667 self.lena = int(self.lena)
668 668 if self.lenb is None:
669 669 self.lenb = 1
670 670 else:
671 671 self.lenb = int(self.lenb)
672 672 self.starta = int(self.starta)
673 673 self.startb = int(self.startb)
674 674 diffhelpers.addlines(lr, self.hunk, self.lena, self.lenb, self.a, self.b)
675 675 # if we hit eof before finishing out the hunk, the last line will
676 676 # be zero length. Lets try to fix it up.
677 677 while len(self.hunk[-1]) == 0:
678 678 del self.hunk[-1]
679 679 del self.a[-1]
680 680 del self.b[-1]
681 681 self.lena -= 1
682 682 self.lenb -= 1
683 self._fixnewline(lr)
683 684
684 685 def read_context_hunk(self, lr):
685 686 self.desc = lr.readline()
686 687 m = contextdesc.match(self.desc)
687 688 if not m:
688 689 raise PatchError(_("bad hunk #%d") % self.number)
689 690 foo, self.starta, foo2, aend, foo3 = m.groups()
690 691 self.starta = int(self.starta)
691 692 if aend is None:
692 693 aend = self.starta
693 694 self.lena = int(aend) - self.starta
694 695 if self.starta:
695 696 self.lena += 1
696 697 for x in xrange(self.lena):
697 698 l = lr.readline()
698 699 if l.startswith('---'):
699 700 # lines addition, old block is empty
700 701 lr.push(l)
701 702 break
702 703 s = l[2:]
703 704 if l.startswith('- ') or l.startswith('! '):
704 705 u = '-' + s
705 706 elif l.startswith(' '):
706 707 u = ' ' + s
707 708 else:
708 709 raise PatchError(_("bad hunk #%d old text line %d") %
709 710 (self.number, x))
710 711 self.a.append(u)
711 712 self.hunk.append(u)
712 713
713 714 l = lr.readline()
714 715 if l.startswith('\ '):
715 716 s = self.a[-1][:-1]
716 717 self.a[-1] = s
717 718 self.hunk[-1] = s
718 719 l = lr.readline()
719 720 m = contextdesc.match(l)
720 721 if not m:
721 722 raise PatchError(_("bad hunk #%d") % self.number)
722 723 foo, self.startb, foo2, bend, foo3 = m.groups()
723 724 self.startb = int(self.startb)
724 725 if bend is None:
725 726 bend = self.startb
726 727 self.lenb = int(bend) - self.startb
727 728 if self.startb:
728 729 self.lenb += 1
729 730 hunki = 1
730 731 for x in xrange(self.lenb):
731 732 l = lr.readline()
732 733 if l.startswith('\ '):
733 734 # XXX: the only way to hit this is with an invalid line range.
734 735 # The no-eol marker is not counted in the line range, but I
735 736 # guess there are diff(1) out there which behave differently.
736 737 s = self.b[-1][:-1]
737 738 self.b[-1] = s
738 739 self.hunk[hunki - 1] = s
739 740 continue
740 741 if not l:
741 742 # line deletions, new block is empty and we hit EOF
742 743 lr.push(l)
743 744 break
744 745 s = l[2:]
745 746 if l.startswith('+ ') or l.startswith('! '):
746 747 u = '+' + s
747 748 elif l.startswith(' '):
748 749 u = ' ' + s
749 750 elif len(self.b) == 0:
750 751 # line deletions, new block is empty
751 752 lr.push(l)
752 753 break
753 754 else:
754 755 raise PatchError(_("bad hunk #%d old text line %d") %
755 756 (self.number, x))
756 757 self.b.append(s)
757 758 while True:
758 759 if hunki >= len(self.hunk):
759 760 h = ""
760 761 else:
761 762 h = self.hunk[hunki]
762 763 hunki += 1
763 764 if h == u:
764 765 break
765 766 elif h.startswith('-'):
766 767 continue
767 768 else:
768 769 self.hunk.insert(hunki - 1, u)
769 770 break
770 771
771 772 if not self.a:
772 773 # this happens when lines were only added to the hunk
773 774 for x in self.hunk:
774 775 if x.startswith('-') or x.startswith(' '):
775 776 self.a.append(x)
776 777 if not self.b:
777 778 # this happens when lines were only deleted from the hunk
778 779 for x in self.hunk:
779 780 if x.startswith('+') or x.startswith(' '):
780 781 self.b.append(x[1:])
781 782 # @@ -start,len +start,len @@
782 783 self.desc = "@@ -%d,%d +%d,%d @@\n" % (self.starta, self.lena,
783 784 self.startb, self.lenb)
784 785 self.hunk[0] = self.desc
786 self._fixnewline(lr)
785 787
786 def fix_newline(self):
787 diffhelpers.fix_newline(self.hunk, self.a, self.b)
788 def _fixnewline(self, lr):
789 l = lr.readline()
790 if l.startswith('\ '):
791 diffhelpers.fix_newline(self.hunk, self.a, self.b)
792 else:
793 lr.push(l)
788 794
789 795 def complete(self):
790 796 return len(self.a) == self.lena and len(self.b) == self.lenb
791 797
792 798 def createfile(self):
793 799 return self.starta == 0 and self.lena == 0 and self.create
794 800
795 801 def rmfile(self):
796 802 return self.startb == 0 and self.lenb == 0 and self.remove
797 803
798 804 def fuzzit(self, l, fuzz, toponly):
799 805 # this removes context lines from the top and bottom of list 'l'. It
800 806 # checks the hunk to make sure only context lines are removed, and then
801 807 # returns a new shortened list of lines.
802 808 fuzz = min(fuzz, len(l)-1)
803 809 if fuzz:
804 810 top = 0
805 811 bot = 0
806 812 hlen = len(self.hunk)
807 813 for x in xrange(hlen - 1):
808 814 # the hunk starts with the @@ line, so use x+1
809 815 if self.hunk[x + 1][0] == ' ':
810 816 top += 1
811 817 else:
812 818 break
813 819 if not toponly:
814 820 for x in xrange(hlen - 1):
815 821 if self.hunk[hlen - bot - 1][0] == ' ':
816 822 bot += 1
817 823 else:
818 824 break
819 825
820 826 # top and bot now count context in the hunk
821 827 # adjust them if either one is short
822 828 context = max(top, bot, 3)
823 829 if bot < context:
824 830 bot = max(0, fuzz - (context - bot))
825 831 else:
826 832 bot = min(fuzz, bot)
827 833 if top < context:
828 834 top = max(0, fuzz - (context - top))
829 835 else:
830 836 top = min(fuzz, top)
831 837
832 838 return l[top:len(l)-bot]
833 839 return l
834 840
835 841 def old(self, fuzz=0, toponly=False):
836 842 return self.fuzzit(self.a, fuzz, toponly)
837 843
838 844 def new(self, fuzz=0, toponly=False):
839 845 return self.fuzzit(self.b, fuzz, toponly)
840 846
841 847 class binhunk:
842 848 'A binary patch file. Only understands literals so far.'
843 849 def __init__(self, gitpatch):
844 850 self.gitpatch = gitpatch
845 851 self.text = None
846 852 self.hunk = ['GIT binary patch\n']
847 853
848 854 def createfile(self):
849 855 return self.gitpatch.op in ('ADD', 'RENAME', 'COPY')
850 856
851 857 def rmfile(self):
852 858 return self.gitpatch.op == 'DELETE'
853 859
854 860 def complete(self):
855 861 return self.text is not None
856 862
857 863 def new(self):
858 864 return [self.text]
859 865
860 866 def extract(self, lr):
861 867 line = lr.readline()
862 868 self.hunk.append(line)
863 869 while line and not line.startswith('literal '):
864 870 line = lr.readline()
865 871 self.hunk.append(line)
866 872 if not line:
867 873 raise PatchError(_('could not extract binary patch'))
868 874 size = int(line[8:].rstrip())
869 875 dec = []
870 876 line = lr.readline()
871 877 self.hunk.append(line)
872 878 while len(line) > 1:
873 879 l = line[0]
874 880 if l <= 'Z' and l >= 'A':
875 881 l = ord(l) - ord('A') + 1
876 882 else:
877 883 l = ord(l) - ord('a') + 27
878 884 dec.append(base85.b85decode(line[1:-1])[:l])
879 885 line = lr.readline()
880 886 self.hunk.append(line)
881 887 text = zlib.decompress(''.join(dec))
882 888 if len(text) != size:
883 889 raise PatchError(_('binary patch is %d bytes, not %d') %
884 890 len(text), size)
885 891 self.text = text
886 892
887 893 def parsefilename(str):
888 894 # --- filename \t|space stuff
889 895 s = str[4:].rstrip('\r\n')
890 896 i = s.find('\t')
891 897 if i < 0:
892 898 i = s.find(' ')
893 899 if i < 0:
894 900 return s
895 901 return s[:i]
896 902
897 903 def pathstrip(path, strip):
898 904 pathlen = len(path)
899 905 i = 0
900 906 if strip == 0:
901 907 return '', path.rstrip()
902 908 count = strip
903 909 while count > 0:
904 910 i = path.find('/', i)
905 911 if i == -1:
906 912 raise PatchError(_("unable to strip away %d of %d dirs from %s") %
907 913 (count, strip, path))
908 914 i += 1
909 915 # consume '//' in the path
910 916 while i < pathlen - 1 and path[i] == '/':
911 917 i += 1
912 918 count -= 1
913 919 return path[:i].lstrip(), path[i:].rstrip()
914 920
915 921 def selectfile(afile_orig, bfile_orig, hunk, strip):
916 922 nulla = afile_orig == "/dev/null"
917 923 nullb = bfile_orig == "/dev/null"
918 924 abase, afile = pathstrip(afile_orig, strip)
919 925 gooda = not nulla and os.path.lexists(afile)
920 926 bbase, bfile = pathstrip(bfile_orig, strip)
921 927 if afile == bfile:
922 928 goodb = gooda
923 929 else:
924 930 goodb = not nullb and os.path.lexists(bfile)
925 931 createfunc = hunk.createfile
926 932 missing = not goodb and not gooda and not createfunc()
927 933
928 934 # some diff programs apparently produce patches where the afile is
929 935 # not /dev/null, but afile starts with bfile
930 936 abasedir = afile[:afile.rfind('/') + 1]
931 937 bbasedir = bfile[:bfile.rfind('/') + 1]
932 938 if missing and abasedir == bbasedir and afile.startswith(bfile):
933 939 # this isn't very pretty
934 940 hunk.create = True
935 941 if createfunc():
936 942 missing = False
937 943 else:
938 944 hunk.create = False
939 945
940 946 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
941 947 # diff is between a file and its backup. In this case, the original
942 948 # file should be patched (see original mpatch code).
943 949 isbackup = (abase == bbase and bfile.startswith(afile))
944 950 fname = None
945 951 if not missing:
946 952 if gooda and goodb:
947 953 fname = isbackup and afile or bfile
948 954 elif gooda:
949 955 fname = afile
950 956
951 957 if not fname:
952 958 if not nullb:
953 959 fname = isbackup and afile or bfile
954 960 elif not nulla:
955 961 fname = afile
956 962 else:
957 963 raise PatchError(_("undefined source and destination files"))
958 964
959 965 return fname, missing
960 966
961 967 def scangitpatch(lr, firstline):
962 968 """
963 969 Git patches can emit:
964 970 - rename a to b
965 971 - change b
966 972 - copy a to c
967 973 - change c
968 974
969 975 We cannot apply this sequence as-is, the renamed 'a' could not be
970 976 found for it would have been renamed already. And we cannot copy
971 977 from 'b' instead because 'b' would have been changed already. So
972 978 we scan the git patch for copy and rename commands so we can
973 979 perform the copies ahead of time.
974 980 """
975 981 pos = 0
976 982 try:
977 983 pos = lr.fp.tell()
978 984 fp = lr.fp
979 985 except IOError:
980 986 fp = cStringIO.StringIO(lr.fp.read())
981 987 gitlr = linereader(fp, lr.textmode)
982 988 gitlr.push(firstline)
983 989 gitpatches = readgitpatch(gitlr)
984 990 fp.seek(pos)
985 991 return gitpatches
986 992
987 993 def iterhunks(ui, fp):
988 994 """Read a patch and yield the following events:
989 995 - ("file", afile, bfile, firsthunk): select a new target file.
990 996 - ("hunk", hunk): a new hunk is ready to be applied, follows a
991 997 "file" event.
992 998 - ("git", gitchanges): current diff is in git format, gitchanges
993 999 maps filenames to gitpatch records. Unique event.
994 1000 """
995 1001 changed = {}
996 current_hunk = None
997 1002 afile = ""
998 1003 bfile = ""
999 1004 state = None
1000 1005 hunknum = 0
1001 1006 emitfile = False
1002 1007 git = False
1003 1008
1004 1009 # our states
1005 1010 BFILE = 1
1006 1011 context = None
1007 1012 lr = linereader(fp)
1008 1013
1009 1014 while True:
1010 1015 newfile = newgitfile = False
1011 1016 x = lr.readline()
1012 1017 if not x:
1013 1018 break
1014 if current_hunk:
1015 if x.startswith('\ '):
1016 current_hunk.fix_newline()
1017 yield 'hunk', current_hunk
1018 current_hunk = None
1019 1019 if (state == BFILE and ((not context and x[0] == '@') or
1020 1020 ((context is not False) and x.startswith('***************')))):
1021 1021 if context is None and x.startswith('***************'):
1022 1022 context = True
1023 1023 gpatch = changed.get(bfile)
1024 1024 create = afile == '/dev/null' or gpatch and gpatch.op == 'ADD'
1025 1025 remove = bfile == '/dev/null' or gpatch and gpatch.op == 'DELETE'
1026 current_hunk = hunk(x, hunknum + 1, lr, context, create, remove)
1026 h = hunk(x, hunknum + 1, lr, context, create, remove)
1027 1027 hunknum += 1
1028 1028 if emitfile:
1029 1029 emitfile = False
1030 yield 'file', (afile, bfile, current_hunk)
1030 yield 'file', (afile, bfile, h)
1031 yield 'hunk', h
1031 1032 elif state == BFILE and x.startswith('GIT binary patch'):
1032 current_hunk = binhunk(changed[bfile])
1033 h = binhunk(changed[bfile])
1033 1034 hunknum += 1
1034 1035 if emitfile:
1035 1036 emitfile = False
1036 yield 'file', ('a/' + afile, 'b/' + bfile, current_hunk)
1037 current_hunk.extract(lr)
1037 yield 'file', ('a/' + afile, 'b/' + bfile, h)
1038 h.extract(lr)
1039 yield 'hunk', h
1038 1040 elif x.startswith('diff --git'):
1039 1041 # check for git diff, scanning the whole patch file if needed
1040 1042 m = gitre.match(x)
1041 1043 if m:
1042 1044 afile, bfile = m.group(1, 2)
1043 1045 if not git:
1044 1046 git = True
1045 1047 gitpatches = scangitpatch(lr, x)
1046 1048 yield 'git', gitpatches
1047 1049 for gp in gitpatches:
1048 1050 changed[gp.path] = gp
1049 1051 # else error?
1050 1052 # copy/rename + modify should modify target, not source
1051 1053 gp = changed.get(bfile)
1052 1054 if gp and (gp.op in ('COPY', 'DELETE', 'RENAME', 'ADD')
1053 1055 or gp.mode):
1054 1056 afile = bfile
1055 1057 newgitfile = True
1056 1058 elif x.startswith('---'):
1057 1059 # check for a unified diff
1058 1060 l2 = lr.readline()
1059 1061 if not l2.startswith('+++'):
1060 1062 lr.push(l2)
1061 1063 continue
1062 1064 newfile = True
1063 1065 context = False
1064 1066 afile = parsefilename(x)
1065 1067 bfile = parsefilename(l2)
1066 1068 elif x.startswith('***'):
1067 1069 # check for a context diff
1068 1070 l2 = lr.readline()
1069 1071 if not l2.startswith('---'):
1070 1072 lr.push(l2)
1071 1073 continue
1072 1074 l3 = lr.readline()
1073 1075 lr.push(l3)
1074 1076 if not l3.startswith("***************"):
1075 1077 lr.push(l2)
1076 1078 continue
1077 1079 newfile = True
1078 1080 context = True
1079 1081 afile = parsefilename(x)
1080 1082 bfile = parsefilename(l2)
1081 1083
1082 1084 if newgitfile or newfile:
1083 1085 emitfile = True
1084 1086 state = BFILE
1085 1087 hunknum = 0
1086 if current_hunk:
1087 if current_hunk.complete():
1088 yield 'hunk', current_hunk
1089 else:
1090 raise PatchError(_("malformed patch %s %s") % (afile,
1091 current_hunk.desc))
1092 1088
1093 1089 def applydiff(ui, fp, changed, strip=1, eolmode='strict'):
1094 1090 """Reads a patch from fp and tries to apply it.
1095 1091
1096 1092 The dict 'changed' is filled in with all of the filenames changed
1097 1093 by the patch. Returns 0 for a clean patch, -1 if any rejects were
1098 1094 found and 1 if there was any fuzz.
1099 1095
1100 1096 If 'eolmode' is 'strict', the patch content and patched file are
1101 1097 read in binary mode. Otherwise, line endings are ignored when
1102 1098 patching then normalized according to 'eolmode'.
1103 1099
1104 1100 Callers probably want to call 'cmdutil.updatedir' after this to
1105 1101 apply certain categories of changes not done by this function.
1106 1102 """
1107 1103 return _applydiff(ui, fp, patchfile, copyfile, changed, strip=strip,
1108 1104 eolmode=eolmode)
1109 1105
1110 1106 def _applydiff(ui, fp, patcher, copyfn, changed, strip=1, eolmode='strict'):
1111 1107 rejects = 0
1112 1108 err = 0
1113 1109 current_file = None
1114 1110 cwd = os.getcwd()
1115 1111 opener = util.opener(cwd)
1116 1112
1117 1113 def closefile():
1118 1114 if not current_file:
1119 1115 return 0
1120 1116 if current_file.dirty:
1121 1117 current_file.writelines(current_file.fname, current_file.lines)
1122 1118 current_file.write_rej()
1123 1119 return len(current_file.rej)
1124 1120
1125 1121 for state, values in iterhunks(ui, fp):
1126 1122 if state == 'hunk':
1127 1123 if not current_file:
1128 1124 continue
1129 1125 ret = current_file.apply(values)
1130 1126 if ret >= 0:
1131 1127 changed.setdefault(current_file.fname, None)
1132 1128 if ret > 0:
1133 1129 err = 1
1134 1130 elif state == 'file':
1135 1131 rejects += closefile()
1136 1132 afile, bfile, first_hunk = values
1137 1133 try:
1138 1134 current_file, missing = selectfile(afile, bfile,
1139 1135 first_hunk, strip)
1140 1136 current_file = patcher(ui, current_file, opener,
1141 1137 missing=missing, eolmode=eolmode)
1142 1138 except PatchError, err:
1143 1139 ui.warn(str(err) + '\n')
1144 1140 current_file = None
1145 1141 rejects += 1
1146 1142 continue
1147 1143 elif state == 'git':
1148 1144 for gp in values:
1149 1145 gp.path = pathstrip(gp.path, strip - 1)[1]
1150 1146 if gp.oldpath:
1151 1147 gp.oldpath = pathstrip(gp.oldpath, strip - 1)[1]
1152 1148 # Binary patches really overwrite target files, copying them
1153 1149 # will just make it fails with "target file exists"
1154 1150 if gp.op in ('COPY', 'RENAME') and not gp.binary:
1155 1151 copyfn(gp.oldpath, gp.path, cwd)
1156 1152 changed[gp.path] = gp
1157 1153 else:
1158 1154 raise util.Abort(_('unsupported parser state: %s') % state)
1159 1155
1160 1156 rejects += closefile()
1161 1157
1162 1158 if rejects:
1163 1159 return -1
1164 1160 return err
1165 1161
1166 1162 def externalpatch(patcher, patchname, ui, strip, cwd, files):
1167 1163 """use <patcher> to apply <patchname> to the working directory.
1168 1164 returns whether patch was applied with fuzz factor."""
1169 1165
1170 1166 fuzz = False
1171 1167 args = []
1172 1168 if cwd:
1173 1169 args.append('-d %s' % util.shellquote(cwd))
1174 1170 fp = util.popen('%s %s -p%d < %s' % (patcher, ' '.join(args), strip,
1175 1171 util.shellquote(patchname)))
1176 1172
1177 1173 for line in fp:
1178 1174 line = line.rstrip()
1179 1175 ui.note(line + '\n')
1180 1176 if line.startswith('patching file '):
1181 1177 pf = util.parse_patch_output(line)
1182 1178 printed_file = False
1183 1179 files.setdefault(pf, None)
1184 1180 elif line.find('with fuzz') >= 0:
1185 1181 fuzz = True
1186 1182 if not printed_file:
1187 1183 ui.warn(pf + '\n')
1188 1184 printed_file = True
1189 1185 ui.warn(line + '\n')
1190 1186 elif line.find('saving rejects to file') >= 0:
1191 1187 ui.warn(line + '\n')
1192 1188 elif line.find('FAILED') >= 0:
1193 1189 if not printed_file:
1194 1190 ui.warn(pf + '\n')
1195 1191 printed_file = True
1196 1192 ui.warn(line + '\n')
1197 1193 code = fp.close()
1198 1194 if code:
1199 1195 raise PatchError(_("patch command failed: %s") %
1200 1196 util.explain_exit(code)[0])
1201 1197 return fuzz
1202 1198
1203 1199 def internalpatch(patchobj, ui, strip, cwd, files=None, eolmode='strict'):
1204 1200 """use builtin patch to apply <patchobj> to the working directory.
1205 1201 returns whether patch was applied with fuzz factor."""
1206 1202
1207 1203 if files is None:
1208 1204 files = {}
1209 1205 if eolmode is None:
1210 1206 eolmode = ui.config('patch', 'eol', 'strict')
1211 1207 if eolmode.lower() not in eolmodes:
1212 1208 raise util.Abort(_('unsupported line endings type: %s') % eolmode)
1213 1209 eolmode = eolmode.lower()
1214 1210
1215 1211 try:
1216 1212 fp = open(patchobj, 'rb')
1217 1213 except TypeError:
1218 1214 fp = patchobj
1219 1215 if cwd:
1220 1216 curdir = os.getcwd()
1221 1217 os.chdir(cwd)
1222 1218 try:
1223 1219 ret = applydiff(ui, fp, files, strip=strip, eolmode=eolmode)
1224 1220 finally:
1225 1221 if cwd:
1226 1222 os.chdir(curdir)
1227 1223 if fp != patchobj:
1228 1224 fp.close()
1229 1225 if ret < 0:
1230 1226 raise PatchError(_('patch failed to apply'))
1231 1227 return ret > 0
1232 1228
1233 1229 def patch(patchname, ui, strip=1, cwd=None, files=None, eolmode='strict'):
1234 1230 """Apply <patchname> to the working directory.
1235 1231
1236 1232 'eolmode' specifies how end of lines should be handled. It can be:
1237 1233 - 'strict': inputs are read in binary mode, EOLs are preserved
1238 1234 - 'crlf': EOLs are ignored when patching and reset to CRLF
1239 1235 - 'lf': EOLs are ignored when patching and reset to LF
1240 1236 - None: get it from user settings, default to 'strict'
1241 1237 'eolmode' is ignored when using an external patcher program.
1242 1238
1243 1239 Returns whether patch was applied with fuzz factor.
1244 1240 """
1245 1241 patcher = ui.config('ui', 'patch')
1246 1242 if files is None:
1247 1243 files = {}
1248 1244 try:
1249 1245 if patcher:
1250 1246 return externalpatch(patcher, patchname, ui, strip, cwd, files)
1251 1247 return internalpatch(patchname, ui, strip, cwd, files, eolmode)
1252 1248 except PatchError, err:
1253 1249 raise util.Abort(str(err))
1254 1250
1255 1251 def b85diff(to, tn):
1256 1252 '''print base85-encoded binary diff'''
1257 1253 def gitindex(text):
1258 1254 if not text:
1259 1255 return hex(nullid)
1260 1256 l = len(text)
1261 1257 s = util.sha1('blob %d\0' % l)
1262 1258 s.update(text)
1263 1259 return s.hexdigest()
1264 1260
1265 1261 def fmtline(line):
1266 1262 l = len(line)
1267 1263 if l <= 26:
1268 1264 l = chr(ord('A') + l - 1)
1269 1265 else:
1270 1266 l = chr(l - 26 + ord('a') - 1)
1271 1267 return '%c%s\n' % (l, base85.b85encode(line, True))
1272 1268
1273 1269 def chunk(text, csize=52):
1274 1270 l = len(text)
1275 1271 i = 0
1276 1272 while i < l:
1277 1273 yield text[i:i + csize]
1278 1274 i += csize
1279 1275
1280 1276 tohash = gitindex(to)
1281 1277 tnhash = gitindex(tn)
1282 1278 if tohash == tnhash:
1283 1279 return ""
1284 1280
1285 1281 # TODO: deltas
1286 1282 ret = ['index %s..%s\nGIT binary patch\nliteral %s\n' %
1287 1283 (tohash, tnhash, len(tn))]
1288 1284 for l in chunk(zlib.compress(tn)):
1289 1285 ret.append(fmtline(l))
1290 1286 ret.append('\n')
1291 1287 return ''.join(ret)
1292 1288
1293 1289 class GitDiffRequired(Exception):
1294 1290 pass
1295 1291
1296 1292 def diffopts(ui, opts=None, untrusted=False):
1297 1293 def get(key, name=None, getter=ui.configbool):
1298 1294 return ((opts and opts.get(key)) or
1299 1295 getter('diff', name or key, None, untrusted=untrusted))
1300 1296 return mdiff.diffopts(
1301 1297 text=opts and opts.get('text'),
1302 1298 git=get('git'),
1303 1299 nodates=get('nodates'),
1304 1300 showfunc=get('show_function', 'showfunc'),
1305 1301 ignorews=get('ignore_all_space', 'ignorews'),
1306 1302 ignorewsamount=get('ignore_space_change', 'ignorewsamount'),
1307 1303 ignoreblanklines=get('ignore_blank_lines', 'ignoreblanklines'),
1308 1304 context=get('unified', getter=ui.config))
1309 1305
1310 1306 def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
1311 1307 losedatafn=None, prefix=''):
1312 1308 '''yields diff of changes to files between two nodes, or node and
1313 1309 working directory.
1314 1310
1315 1311 if node1 is None, use first dirstate parent instead.
1316 1312 if node2 is None, compare node1 with working directory.
1317 1313
1318 1314 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
1319 1315 every time some change cannot be represented with the current
1320 1316 patch format. Return False to upgrade to git patch format, True to
1321 1317 accept the loss or raise an exception to abort the diff. It is
1322 1318 called with the name of current file being diffed as 'fn'. If set
1323 1319 to None, patches will always be upgraded to git format when
1324 1320 necessary.
1325 1321
1326 1322 prefix is a filename prefix that is prepended to all filenames on
1327 1323 display (used for subrepos).
1328 1324 '''
1329 1325
1330 1326 if opts is None:
1331 1327 opts = mdiff.defaultopts
1332 1328
1333 1329 if not node1 and not node2:
1334 1330 node1 = repo.dirstate.parents()[0]
1335 1331
1336 1332 def lrugetfilectx():
1337 1333 cache = {}
1338 1334 order = []
1339 1335 def getfilectx(f, ctx):
1340 1336 fctx = ctx.filectx(f, filelog=cache.get(f))
1341 1337 if f not in cache:
1342 1338 if len(cache) > 20:
1343 1339 del cache[order.pop(0)]
1344 1340 cache[f] = fctx.filelog()
1345 1341 else:
1346 1342 order.remove(f)
1347 1343 order.append(f)
1348 1344 return fctx
1349 1345 return getfilectx
1350 1346 getfilectx = lrugetfilectx()
1351 1347
1352 1348 ctx1 = repo[node1]
1353 1349 ctx2 = repo[node2]
1354 1350
1355 1351 if not changes:
1356 1352 changes = repo.status(ctx1, ctx2, match=match)
1357 1353 modified, added, removed = changes[:3]
1358 1354
1359 1355 if not modified and not added and not removed:
1360 1356 return []
1361 1357
1362 1358 revs = None
1363 1359 if not repo.ui.quiet:
1364 1360 hexfunc = repo.ui.debugflag and hex or short
1365 1361 revs = [hexfunc(node) for node in [node1, node2] if node]
1366 1362
1367 1363 copy = {}
1368 1364 if opts.git or opts.upgrade:
1369 1365 copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0]
1370 1366
1371 1367 difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2,
1372 1368 modified, added, removed, copy, getfilectx, opts, losedata, prefix)
1373 1369 if opts.upgrade and not opts.git:
1374 1370 try:
1375 1371 def losedata(fn):
1376 1372 if not losedatafn or not losedatafn(fn=fn):
1377 1373 raise GitDiffRequired()
1378 1374 # Buffer the whole output until we are sure it can be generated
1379 1375 return list(difffn(opts.copy(git=False), losedata))
1380 1376 except GitDiffRequired:
1381 1377 return difffn(opts.copy(git=True), None)
1382 1378 else:
1383 1379 return difffn(opts, None)
1384 1380
1385 1381 def difflabel(func, *args, **kw):
1386 1382 '''yields 2-tuples of (output, label) based on the output of func()'''
1387 1383 prefixes = [('diff', 'diff.diffline'),
1388 1384 ('copy', 'diff.extended'),
1389 1385 ('rename', 'diff.extended'),
1390 1386 ('old', 'diff.extended'),
1391 1387 ('new', 'diff.extended'),
1392 1388 ('deleted', 'diff.extended'),
1393 1389 ('---', 'diff.file_a'),
1394 1390 ('+++', 'diff.file_b'),
1395 1391 ('@@', 'diff.hunk'),
1396 1392 ('-', 'diff.deleted'),
1397 1393 ('+', 'diff.inserted')]
1398 1394
1399 1395 for chunk in func(*args, **kw):
1400 1396 lines = chunk.split('\n')
1401 1397 for i, line in enumerate(lines):
1402 1398 if i != 0:
1403 1399 yield ('\n', '')
1404 1400 stripline = line
1405 1401 if line and line[0] in '+-':
1406 1402 # highlight trailing whitespace, but only in changed lines
1407 1403 stripline = line.rstrip()
1408 1404 for prefix, label in prefixes:
1409 1405 if stripline.startswith(prefix):
1410 1406 yield (stripline, label)
1411 1407 break
1412 1408 else:
1413 1409 yield (line, '')
1414 1410 if line != stripline:
1415 1411 yield (line[len(stripline):], 'diff.trailingwhitespace')
1416 1412
1417 1413 def diffui(*args, **kw):
1418 1414 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
1419 1415 return difflabel(diff, *args, **kw)
1420 1416
1421 1417
1422 1418 def _addmodehdr(header, omode, nmode):
1423 1419 if omode != nmode:
1424 1420 header.append('old mode %s\n' % omode)
1425 1421 header.append('new mode %s\n' % nmode)
1426 1422
1427 1423 def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
1428 1424 copy, getfilectx, opts, losedatafn, prefix):
1429 1425
1430 1426 def join(f):
1431 1427 return os.path.join(prefix, f)
1432 1428
1433 1429 date1 = util.datestr(ctx1.date())
1434 1430 man1 = ctx1.manifest()
1435 1431
1436 1432 gone = set()
1437 1433 gitmode = {'l': '120000', 'x': '100755', '': '100644'}
1438 1434
1439 1435 copyto = dict([(v, k) for k, v in copy.items()])
1440 1436
1441 1437 if opts.git:
1442 1438 revs = None
1443 1439
1444 1440 for f in sorted(modified + added + removed):
1445 1441 to = None
1446 1442 tn = None
1447 1443 dodiff = True
1448 1444 header = []
1449 1445 if f in man1:
1450 1446 to = getfilectx(f, ctx1).data()
1451 1447 if f not in removed:
1452 1448 tn = getfilectx(f, ctx2).data()
1453 1449 a, b = f, f
1454 1450 if opts.git or losedatafn:
1455 1451 if f in added:
1456 1452 mode = gitmode[ctx2.flags(f)]
1457 1453 if f in copy or f in copyto:
1458 1454 if opts.git:
1459 1455 if f in copy:
1460 1456 a = copy[f]
1461 1457 else:
1462 1458 a = copyto[f]
1463 1459 omode = gitmode[man1.flags(a)]
1464 1460 _addmodehdr(header, omode, mode)
1465 1461 if a in removed and a not in gone:
1466 1462 op = 'rename'
1467 1463 gone.add(a)
1468 1464 else:
1469 1465 op = 'copy'
1470 1466 header.append('%s from %s\n' % (op, join(a)))
1471 1467 header.append('%s to %s\n' % (op, join(f)))
1472 1468 to = getfilectx(a, ctx1).data()
1473 1469 else:
1474 1470 losedatafn(f)
1475 1471 else:
1476 1472 if opts.git:
1477 1473 header.append('new file mode %s\n' % mode)
1478 1474 elif ctx2.flags(f):
1479 1475 losedatafn(f)
1480 1476 # In theory, if tn was copied or renamed we should check
1481 1477 # if the source is binary too but the copy record already
1482 1478 # forces git mode.
1483 1479 if util.binary(tn):
1484 1480 if opts.git:
1485 1481 dodiff = 'binary'
1486 1482 else:
1487 1483 losedatafn(f)
1488 1484 if not opts.git and not tn:
1489 1485 # regular diffs cannot represent new empty file
1490 1486 losedatafn(f)
1491 1487 elif f in removed:
1492 1488 if opts.git:
1493 1489 # have we already reported a copy above?
1494 1490 if ((f in copy and copy[f] in added
1495 1491 and copyto[copy[f]] == f) or
1496 1492 (f in copyto and copyto[f] in added
1497 1493 and copy[copyto[f]] == f)):
1498 1494 dodiff = False
1499 1495 else:
1500 1496 header.append('deleted file mode %s\n' %
1501 1497 gitmode[man1.flags(f)])
1502 1498 elif not to or util.binary(to):
1503 1499 # regular diffs cannot represent empty file deletion
1504 1500 losedatafn(f)
1505 1501 else:
1506 1502 oflag = man1.flags(f)
1507 1503 nflag = ctx2.flags(f)
1508 1504 binary = util.binary(to) or util.binary(tn)
1509 1505 if opts.git:
1510 1506 _addmodehdr(header, gitmode[oflag], gitmode[nflag])
1511 1507 if binary:
1512 1508 dodiff = 'binary'
1513 1509 elif binary or nflag != oflag:
1514 1510 losedatafn(f)
1515 1511 if opts.git:
1516 1512 header.insert(0, mdiff.diffline(revs, join(a), join(b), opts))
1517 1513
1518 1514 if dodiff:
1519 1515 if dodiff == 'binary':
1520 1516 text = b85diff(to, tn)
1521 1517 else:
1522 1518 text = mdiff.unidiff(to, date1,
1523 1519 # ctx2 date may be dynamic
1524 1520 tn, util.datestr(ctx2.date()),
1525 1521 join(a), join(b), revs, opts=opts)
1526 1522 if header and (text or len(header) > 1):
1527 1523 yield ''.join(header)
1528 1524 if text:
1529 1525 yield text
1530 1526
1531 1527 def diffstatdata(lines):
1532 1528 diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
1533 1529
1534 1530 filename, adds, removes = None, 0, 0
1535 1531 for line in lines:
1536 1532 if line.startswith('diff'):
1537 1533 if filename:
1538 1534 isbinary = adds == 0 and removes == 0
1539 1535 yield (filename, adds, removes, isbinary)
1540 1536 # set numbers to 0 anyway when starting new file
1541 1537 adds, removes = 0, 0
1542 1538 if line.startswith('diff --git'):
1543 1539 filename = gitre.search(line).group(1)
1544 1540 elif line.startswith('diff -r'):
1545 1541 # format: "diff -r ... -r ... filename"
1546 1542 filename = diffre.search(line).group(1)
1547 1543 elif line.startswith('+') and not line.startswith('+++'):
1548 1544 adds += 1
1549 1545 elif line.startswith('-') and not line.startswith('---'):
1550 1546 removes += 1
1551 1547 if filename:
1552 1548 isbinary = adds == 0 and removes == 0
1553 1549 yield (filename, adds, removes, isbinary)
1554 1550
1555 1551 def diffstat(lines, width=80, git=False):
1556 1552 output = []
1557 1553 stats = list(diffstatdata(lines))
1558 1554
1559 1555 maxtotal, maxname = 0, 0
1560 1556 totaladds, totalremoves = 0, 0
1561 1557 hasbinary = False
1562 1558
1563 1559 sized = [(filename, adds, removes, isbinary, encoding.colwidth(filename))
1564 1560 for filename, adds, removes, isbinary in stats]
1565 1561
1566 1562 for filename, adds, removes, isbinary, namewidth in sized:
1567 1563 totaladds += adds
1568 1564 totalremoves += removes
1569 1565 maxname = max(maxname, namewidth)
1570 1566 maxtotal = max(maxtotal, adds + removes)
1571 1567 if isbinary:
1572 1568 hasbinary = True
1573 1569
1574 1570 countwidth = len(str(maxtotal))
1575 1571 if hasbinary and countwidth < 3:
1576 1572 countwidth = 3
1577 1573 graphwidth = width - countwidth - maxname - 6
1578 1574 if graphwidth < 10:
1579 1575 graphwidth = 10
1580 1576
1581 1577 def scale(i):
1582 1578 if maxtotal <= graphwidth:
1583 1579 return i
1584 1580 # If diffstat runs out of room it doesn't print anything,
1585 1581 # which isn't very useful, so always print at least one + or -
1586 1582 # if there were at least some changes.
1587 1583 return max(i * graphwidth // maxtotal, int(bool(i)))
1588 1584
1589 1585 for filename, adds, removes, isbinary, namewidth in sized:
1590 1586 if git and isbinary:
1591 1587 count = 'Bin'
1592 1588 else:
1593 1589 count = adds + removes
1594 1590 pluses = '+' * scale(adds)
1595 1591 minuses = '-' * scale(removes)
1596 1592 output.append(' %s%s | %*s %s%s\n' %
1597 1593 (filename, ' ' * (maxname - namewidth),
1598 1594 countwidth, count,
1599 1595 pluses, minuses))
1600 1596
1601 1597 if stats:
1602 1598 output.append(_(' %d files changed, %d insertions(+), %d deletions(-)\n')
1603 1599 % (len(stats), totaladds, totalremoves))
1604 1600
1605 1601 return ''.join(output)
1606 1602
1607 1603 def diffstatui(*args, **kw):
1608 1604 '''like diffstat(), but yields 2-tuples of (output, label) for
1609 1605 ui.write()
1610 1606 '''
1611 1607
1612 1608 for line in diffstat(*args, **kw).splitlines():
1613 1609 if line and line[-1] in '+-':
1614 1610 name, graph = line.rsplit(' ', 1)
1615 1611 yield (name + ' ', '')
1616 1612 m = re.search(r'\++', graph)
1617 1613 if m:
1618 1614 yield (m.group(0), 'diffstat.inserted')
1619 1615 m = re.search(r'-+', graph)
1620 1616 if m:
1621 1617 yield (m.group(0), 'diffstat.deleted')
1622 1618 else:
1623 1619 yield (line, '')
1624 1620 yield ('\n', '')
General Comments 0
You need to be logged in to leave comments. Login now