##// END OF EJS Templates
patch: implement a new worddiff algorithm...
Jun Wu -
r37750:35632d39 default
parent child Browse files
Show More
@@ -90,14 +90,16 b' except ImportError:'
90 'branches.inactive': 'none',
90 'branches.inactive': 'none',
91 'diff.changed': 'white',
91 'diff.changed': 'white',
92 'diff.deleted': 'red',
92 'diff.deleted': 'red',
93 'diff.deleted.highlight': 'red bold underline',
93 'diff.deleted.changed': 'red',
94 'diff.deleted.unchanged': 'red dim',
94 'diff.diffline': 'bold',
95 'diff.diffline': 'bold',
95 'diff.extended': 'cyan bold',
96 'diff.extended': 'cyan bold',
96 'diff.file_a': 'red bold',
97 'diff.file_a': 'red bold',
97 'diff.file_b': 'green bold',
98 'diff.file_b': 'green bold',
98 'diff.hunk': 'magenta',
99 'diff.hunk': 'magenta',
99 'diff.inserted': 'green',
100 'diff.inserted': 'green',
100 'diff.inserted.highlight': 'green bold underline',
101 'diff.inserted.changed': 'green',
102 'diff.inserted.unchanged': 'green dim',
101 'diff.tab': '',
103 'diff.tab': '',
102 'diff.trailingwhitespace': 'bold red_background',
104 'diff.trailingwhitespace': 'bold red_background',
103 'changeset.public': '',
105 'changeset.public': '',
@@ -50,7 +50,8 b' stringio = util.stringio'
50
50
51 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
51 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
52 tabsplitter = re.compile(br'(\t+|[^\t]+)')
52 tabsplitter = re.compile(br'(\t+|[^\t]+)')
53 _nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])')
53 wordsplitter = re.compile(br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|'
54 '[^ \ta-zA-Z0-9_\x80-\xff])')
54
55
55 PatchError = error.PatchError
56 PatchError = error.PatchError
56
57
@@ -2504,8 +2505,78 b' def diffsinglehunk(hunklines):'
2504 if chompline != line:
2505 if chompline != line:
2505 yield (line[len(chompline):], '')
2506 yield (line[len(chompline):], '')
2506
2507
2508 def diffsinglehunkinline(hunklines):
2509 """yield tokens for a list of lines in a single hunk, with inline colors"""
2510 # prepare deleted, and inserted content
2511 a = ''
2512 b = ''
2513 for line in hunklines:
2514 if line[0] == '-':
2515 a += line[1:]
2516 elif line[0] == '+':
2517 b += line[1:]
2518 else:
2519 raise error.ProgrammingError('unexpected hunk line: %s' % line)
2520 # fast path: if either side is empty, use diffsinglehunk
2521 if not a or not b:
2522 for t in diffsinglehunk(hunklines):
2523 yield t
2524 return
2525 # re-split the content into words
2526 al = wordsplitter.findall(a)
2527 bl = wordsplitter.findall(b)
2528 # re-arrange the words to lines since the diff algorithm is line-based
2529 aln = [s if s == '\n' else s + '\n' for s in al]
2530 bln = [s if s == '\n' else s + '\n' for s in bl]
2531 an = ''.join(aln)
2532 bn = ''.join(bln)
2533 # run the diff algorithm, prepare atokens and btokens
2534 atokens = []
2535 btokens = []
2536 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2537 for (a1, a2, b1, b2), btype in blocks:
2538 changed = btype == '!'
2539 for token in mdiff.splitnewlines(''.join(al[a1:a2])):
2540 atokens.append((changed, token))
2541 for token in mdiff.splitnewlines(''.join(bl[b1:b2])):
2542 btokens.append((changed, token))
2543
2544 # yield deleted tokens, then inserted ones
2545 for prefix, label, tokens in [('-', 'diff.deleted', atokens),
2546 ('+', 'diff.inserted', btokens)]:
2547 nextisnewline = True
2548 for changed, token in tokens:
2549 if nextisnewline:
2550 yield (prefix, label)
2551 nextisnewline = False
2552 # special handling line end
2553 isendofline = token.endswith('\n')
2554 if isendofline:
2555 chomp = token[:-1] # chomp
2556 token = chomp.rstrip() # detect spaces at the end
2557 endspaces = chomp[len(token):]
2558 # scan tabs
2559 for maybetab in tabsplitter.findall(token):
2560 if '\t' == maybetab[0]:
2561 currentlabel = 'diff.tab'
2562 else:
2563 if changed:
2564 currentlabel = label + '.changed'
2565 else:
2566 currentlabel = label + '.unchanged'
2567 yield (maybetab, currentlabel)
2568 if isendofline:
2569 if endspaces:
2570 yield (endspaces, 'diff.trailingwhitespace')
2571 yield ('\n', '')
2572 nextisnewline = True
2573
2507 def difflabel(func, *args, **kw):
2574 def difflabel(func, *args, **kw):
2508 '''yields 2-tuples of (output, label) based on the output of func()'''
2575 '''yields 2-tuples of (output, label) based on the output of func()'''
2576 if kw.get(r'opts') and kw[r'opts'].worddiff:
2577 dodiffhunk = diffsinglehunkinline
2578 else:
2579 dodiffhunk = diffsinglehunk
2509 headprefixes = [('diff', 'diff.diffline'),
2580 headprefixes = [('diff', 'diff.diffline'),
2510 ('copy', 'diff.extended'),
2581 ('copy', 'diff.extended'),
2511 ('rename', 'diff.extended'),
2582 ('rename', 'diff.extended'),
@@ -2525,7 +2596,7 b' def difflabel(func, *args, **kw):'
2525 hunkbuffer = []
2596 hunkbuffer = []
2526 def consumehunkbuffer():
2597 def consumehunkbuffer():
2527 if hunkbuffer:
2598 if hunkbuffer:
2528 for token in diffsinglehunk(hunkbuffer):
2599 for token in dodiffhunk(hunkbuffer):
2529 yield token
2600 yield token
2530 hunkbuffer[:] = []
2601 hunkbuffer[:] = []
2531
2602
@@ -337,41 +337,39 b' test inline color diff'
337 [diff.deleted|-(to see if it works)]
337 [diff.deleted|-(to see if it works)]
338 [diff.inserted|+three of those lines have]
338 [diff.inserted|+three of those lines have]
339 [diff.inserted|+collapsed onto one]
339 [diff.inserted|+collapsed onto one]
340 #if false
341 $ hg diff --config experimental.worddiff=True --color=debug
340 $ hg diff --config experimental.worddiff=True --color=debug
342 [diff.diffline|diff --git a/file1 b/file1]
341 [diff.diffline|diff --git a/file1 b/file1]
343 [diff.file_a|--- a/file1]
342 [diff.file_a|--- a/file1]
344 [diff.file_b|+++ b/file1]
343 [diff.file_b|+++ b/file1]
345 [diff.hunk|@@ -1,16 +1,17 @@]
344 [diff.hunk|@@ -1,16 +1,17 @@]
346 [diff.deleted|-this is the ][diff.deleted.highlight|first][diff.deleted| line]
345 [diff.deleted|-][diff.deleted.changed|this][diff.deleted.unchanged| is the first ][diff.deleted.changed|line]
347 [diff.deleted|-this is the second line]
346 [diff.deleted|-][diff.deleted.unchanged|this is the second line]
348 [diff.deleted|-][diff.deleted.highlight| ][diff.deleted|third line starts with space]
347 [diff.deleted|-][diff.deleted.changed| ][diff.deleted.unchanged|third line starts with space]
349 [diff.deleted|-][diff.deleted.highlight|+][diff.deleted| starts with a ][diff.deleted.highlight|plus][diff.deleted| sign]
348 [diff.deleted|-][diff.deleted.changed|+][diff.deleted.unchanged| starts with a ][diff.deleted.changed|plus][diff.deleted.unchanged| sign]
350 [diff.deleted|-][diff.tab| ][diff.deleted|this one with ][diff.deleted.highlight|one][diff.deleted| tab]
349 [diff.deleted|-][diff.tab| ][diff.deleted.unchanged|this one with ][diff.deleted.changed|one][diff.deleted.unchanged| tab]
351 [diff.deleted|-][diff.tab| ][diff.deleted|now with full ][diff.deleted.highlight|two][diff.deleted| tabs]
350 [diff.deleted|-][diff.tab| ][diff.deleted.unchanged|now with full ][diff.deleted.changed|two][diff.deleted.unchanged| tabs]
352 [diff.deleted|-][diff.tab| ][diff.deleted|now tabs][diff.tab| ][diff.deleted|everywhere, much fun]
351 [diff.deleted|-][diff.tab| ][diff.deleted.unchanged|now ][diff.deleted.unchanged|tabs][diff.tab| ][diff.deleted.unchanged|everywhere, much fun]
353 [diff.inserted|+that is the first paragraph]
352 [diff.inserted|+][diff.inserted.changed|that][diff.inserted.unchanged| is the first ][diff.inserted.changed|paragraph]
354 [diff.inserted|+][diff.inserted.highlight| ][diff.inserted|this is the ][diff.inserted.highlight|second][diff.inserted| line]
353 [diff.inserted|+][diff.inserted.changed| ][diff.inserted.unchanged|this is the second line]
355 [diff.inserted|+third line starts with space]
354 [diff.inserted|+][diff.inserted.unchanged|third line starts with space]
356 [diff.inserted|+][diff.inserted.highlight|-][diff.inserted| starts with a ][diff.inserted.highlight|minus][diff.inserted| sign]
355 [diff.inserted|+][diff.inserted.changed|-][diff.inserted.unchanged| starts with a ][diff.inserted.changed|minus][diff.inserted.unchanged| sign]
357 [diff.inserted|+][diff.tab| ][diff.inserted|this one with ][diff.inserted.highlight|two][diff.inserted| tab]
356 [diff.inserted|+][diff.tab| ][diff.inserted.unchanged|this one with ][diff.inserted.changed|two][diff.inserted.unchanged| tab]
358 [diff.inserted|+][diff.tab| ][diff.inserted|now with full ][diff.inserted.highlight|three][diff.inserted| tabs]
357 [diff.inserted|+][diff.tab| ][diff.inserted.unchanged|now with full ][diff.inserted.changed|three][diff.inserted.unchanged| tabs]
359 [diff.inserted|+][diff.tab| ][diff.inserted|now][diff.inserted.highlight| there are][diff.inserted| tabs][diff.tab| ][diff.inserted|everywhere, much fun]
358 [diff.inserted|+][diff.tab| ][diff.inserted.unchanged|now ][diff.inserted.changed|there are ][diff.inserted.unchanged|tabs][diff.tab| ][diff.inserted.unchanged|everywhere, much fun]
360
359
361 this line won't change
360 this line won't change
362
361
363 two lines are going to
362 two lines are going to
364 [diff.deleted|-be changed into ][diff.deleted.highlight|three][diff.deleted|!]
363 [diff.deleted|-][diff.deleted.unchanged|be changed into ][diff.deleted.changed|three][diff.deleted.unchanged|!]
365 [diff.inserted|+(entirely magically,]
364 [diff.inserted|+][diff.inserted.changed|(entirely magically,]
366 [diff.inserted|+ assuming this works)]
365 [diff.inserted|+][diff.inserted.changed| assuming this works)]
367 [diff.inserted|+be changed into ][diff.inserted.highlight|four][diff.inserted|!]
366 [diff.inserted|+][diff.inserted.unchanged|be changed into ][diff.inserted.changed|four][diff.inserted.unchanged|!]
368
367
369 [diff.deleted|-three of those lines ][diff.deleted.highlight|will]
368 [diff.deleted|-][diff.deleted.unchanged|three of those lines ][diff.deleted.changed|will]
370 [diff.deleted|-][diff.deleted.highlight|collapse][diff.deleted| onto one]
369 [diff.deleted|-][diff.deleted.changed|collapse][diff.deleted.unchanged| onto one]
371 [diff.deleted|-(to see if it works)]
370 [diff.deleted|-][diff.deleted.changed|(to see if it works)]
372 [diff.inserted|+three of those lines ][diff.inserted.highlight|have]
371 [diff.inserted|+][diff.inserted.unchanged|three of those lines ][diff.inserted.changed|have]
373 [diff.inserted|+][diff.inserted.highlight|collapsed][diff.inserted| onto one]
372 [diff.inserted|+][diff.inserted.changed|collapsed][diff.inserted.unchanged| onto one]
374 #endif
375
373
376 multibyte character shouldn't be broken up in word diff:
374 multibyte character shouldn't be broken up in word diff:
377
375
@@ -386,12 +384,10 b" multibyte character shouldn't be broken "
386 > EOF
384 > EOF
387 $ hg ci -m 'slightly change utf8 char' utf8
385 $ hg ci -m 'slightly change utf8 char' utf8
388
386
389 #if false
390 $ hg diff --config experimental.worddiff=True --color=debug -c.
387 $ hg diff --config experimental.worddiff=True --color=debug -c.
391 [diff.diffline|diff --git a/utf8 b/utf8]
388 [diff.diffline|diff --git a/utf8 b/utf8]
392 [diff.file_a|--- a/utf8]
389 [diff.file_a|--- a/utf8]
393 [diff.file_b|+++ b/utf8]
390 [diff.file_b|+++ b/utf8]
394 [diff.hunk|@@ -1,1 +1,1 @@]
391 [diff.hunk|@@ -1,1 +1,1 @@]
395 [diff.deleted|-blah ][diff.deleted.highlight|\xe3\x82\xa2][diff.deleted| blah] (esc)
392 [diff.deleted|-][diff.deleted.unchanged|blah ][diff.deleted.changed|\xe3\x82\xa2][diff.deleted.unchanged| blah] (esc)
396 [diff.inserted|+blah ][diff.inserted.highlight|\xe3\x82\xa4][diff.inserted| blah] (esc)
393 [diff.inserted|+][diff.inserted.unchanged|blah ][diff.inserted.changed|\xe3\x82\xa4][diff.inserted.unchanged| blah] (esc)
397 #endif
General Comments 0
You need to be logged in to leave comments. Login now