##// END OF EJS Templates
patch: implement a new worddiff algorithm...
Jun Wu -
r37750:35632d39 default
parent child Browse files
Show More
@@ -90,14 +90,16 b' except ImportError:'
90 90 'branches.inactive': 'none',
91 91 'diff.changed': 'white',
92 92 'diff.deleted': 'red',
93 'diff.deleted.highlight': 'red bold underline',
93 'diff.deleted.changed': 'red',
94 'diff.deleted.unchanged': 'red dim',
94 95 'diff.diffline': 'bold',
95 96 'diff.extended': 'cyan bold',
96 97 'diff.file_a': 'red bold',
97 98 'diff.file_b': 'green bold',
98 99 'diff.hunk': 'magenta',
99 100 'diff.inserted': 'green',
100 'diff.inserted.highlight': 'green bold underline',
101 'diff.inserted.changed': 'green',
102 'diff.inserted.unchanged': 'green dim',
101 103 'diff.tab': '',
102 104 'diff.trailingwhitespace': 'bold red_background',
103 105 'changeset.public': '',
@@ -50,7 +50,8 b' stringio = util.stringio'
50 50
51 51 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
52 52 tabsplitter = re.compile(br'(\t+|[^\t]+)')
53 _nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])')
53 wordsplitter = re.compile(br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|'
54 '[^ \ta-zA-Z0-9_\x80-\xff])')
54 55
55 56 PatchError = error.PatchError
56 57
@@ -2504,8 +2505,78 b' def diffsinglehunk(hunklines):'
2504 2505 if chompline != line:
2505 2506 yield (line[len(chompline):], '')
2506 2507
2508 def diffsinglehunkinline(hunklines):
2509 """yield tokens for a list of lines in a single hunk, with inline colors"""
2510 # prepare deleted, and inserted content
2511 a = ''
2512 b = ''
2513 for line in hunklines:
2514 if line[0] == '-':
2515 a += line[1:]
2516 elif line[0] == '+':
2517 b += line[1:]
2518 else:
2519 raise error.ProgrammingError('unexpected hunk line: %s' % line)
2520 # fast path: if either side is empty, use diffsinglehunk
2521 if not a or not b:
2522 for t in diffsinglehunk(hunklines):
2523 yield t
2524 return
2525 # re-split the content into words
2526 al = wordsplitter.findall(a)
2527 bl = wordsplitter.findall(b)
2528 # re-arrange the words to lines since the diff algorithm is line-based
2529 aln = [s if s == '\n' else s + '\n' for s in al]
2530 bln = [s if s == '\n' else s + '\n' for s in bl]
2531 an = ''.join(aln)
2532 bn = ''.join(bln)
2533 # run the diff algorithm, prepare atokens and btokens
2534 atokens = []
2535 btokens = []
2536 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2537 for (a1, a2, b1, b2), btype in blocks:
2538 changed = btype == '!'
2539 for token in mdiff.splitnewlines(''.join(al[a1:a2])):
2540 atokens.append((changed, token))
2541 for token in mdiff.splitnewlines(''.join(bl[b1:b2])):
2542 btokens.append((changed, token))
2543
2544 # yield deleted tokens, then inserted ones
2545 for prefix, label, tokens in [('-', 'diff.deleted', atokens),
2546 ('+', 'diff.inserted', btokens)]:
2547 nextisnewline = True
2548 for changed, token in tokens:
2549 if nextisnewline:
2550 yield (prefix, label)
2551 nextisnewline = False
2552 # special handling line end
2553 isendofline = token.endswith('\n')
2554 if isendofline:
2555 chomp = token[:-1] # chomp
2556 token = chomp.rstrip() # detect spaces at the end
2557 endspaces = chomp[len(token):]
2558 # scan tabs
2559 for maybetab in tabsplitter.findall(token):
2560 if '\t' == maybetab[0]:
2561 currentlabel = 'diff.tab'
2562 else:
2563 if changed:
2564 currentlabel = label + '.changed'
2565 else:
2566 currentlabel = label + '.unchanged'
2567 yield (maybetab, currentlabel)
2568 if isendofline:
2569 if endspaces:
2570 yield (endspaces, 'diff.trailingwhitespace')
2571 yield ('\n', '')
2572 nextisnewline = True
2573
2507 2574 def difflabel(func, *args, **kw):
2508 2575 '''yields 2-tuples of (output, label) based on the output of func()'''
2576 if kw.get(r'opts') and kw[r'opts'].worddiff:
2577 dodiffhunk = diffsinglehunkinline
2578 else:
2579 dodiffhunk = diffsinglehunk
2509 2580 headprefixes = [('diff', 'diff.diffline'),
2510 2581 ('copy', 'diff.extended'),
2511 2582 ('rename', 'diff.extended'),
@@ -2525,7 +2596,7 b' def difflabel(func, *args, **kw):'
2525 2596 hunkbuffer = []
2526 2597 def consumehunkbuffer():
2527 2598 if hunkbuffer:
2528 for token in diffsinglehunk(hunkbuffer):
2599 for token in dodiffhunk(hunkbuffer):
2529 2600 yield token
2530 2601 hunkbuffer[:] = []
2531 2602
@@ -337,41 +337,39 b' test inline color diff'
337 337 [diff.deleted|-(to see if it works)]
338 338 [diff.inserted|+three of those lines have]
339 339 [diff.inserted|+collapsed onto one]
340 #if false
341 340 $ hg diff --config experimental.worddiff=True --color=debug
342 341 [diff.diffline|diff --git a/file1 b/file1]
343 342 [diff.file_a|--- a/file1]
344 343 [diff.file_b|+++ b/file1]
345 344 [diff.hunk|@@ -1,16 +1,17 @@]
346 [diff.deleted|-this is the ][diff.deleted.highlight|first][diff.deleted| line]
347 [diff.deleted|-this is the second line]
348 [diff.deleted|-][diff.deleted.highlight| ][diff.deleted|third line starts with space]
349 [diff.deleted|-][diff.deleted.highlight|+][diff.deleted| starts with a ][diff.deleted.highlight|plus][diff.deleted| sign]
350 [diff.deleted|-][diff.tab| ][diff.deleted|this one with ][diff.deleted.highlight|one][diff.deleted| tab]
351 [diff.deleted|-][diff.tab| ][diff.deleted|now with full ][diff.deleted.highlight|two][diff.deleted| tabs]
352 [diff.deleted|-][diff.tab| ][diff.deleted|now tabs][diff.tab| ][diff.deleted|everywhere, much fun]
353 [diff.inserted|+that is the first paragraph]
354 [diff.inserted|+][diff.inserted.highlight| ][diff.inserted|this is the ][diff.inserted.highlight|second][diff.inserted| line]
355 [diff.inserted|+third line starts with space]
356 [diff.inserted|+][diff.inserted.highlight|-][diff.inserted| starts with a ][diff.inserted.highlight|minus][diff.inserted| sign]
357 [diff.inserted|+][diff.tab| ][diff.inserted|this one with ][diff.inserted.highlight|two][diff.inserted| tab]
358 [diff.inserted|+][diff.tab| ][diff.inserted|now with full ][diff.inserted.highlight|three][diff.inserted| tabs]
359 [diff.inserted|+][diff.tab| ][diff.inserted|now][diff.inserted.highlight| there are][diff.inserted| tabs][diff.tab| ][diff.inserted|everywhere, much fun]
345 [diff.deleted|-][diff.deleted.changed|this][diff.deleted.unchanged| is the first ][diff.deleted.changed|line]
346 [diff.deleted|-][diff.deleted.unchanged|this is the second line]
347 [diff.deleted|-][diff.deleted.changed| ][diff.deleted.unchanged|third line starts with space]
348 [diff.deleted|-][diff.deleted.changed|+][diff.deleted.unchanged| starts with a ][diff.deleted.changed|plus][diff.deleted.unchanged| sign]
349 [diff.deleted|-][diff.tab| ][diff.deleted.unchanged|this one with ][diff.deleted.changed|one][diff.deleted.unchanged| tab]
350 [diff.deleted|-][diff.tab| ][diff.deleted.unchanged|now with full ][diff.deleted.changed|two][diff.deleted.unchanged| tabs]
351 [diff.deleted|-][diff.tab| ][diff.deleted.unchanged|now ][diff.deleted.unchanged|tabs][diff.tab| ][diff.deleted.unchanged|everywhere, much fun]
352 [diff.inserted|+][diff.inserted.changed|that][diff.inserted.unchanged| is the first ][diff.inserted.changed|paragraph]
353 [diff.inserted|+][diff.inserted.changed| ][diff.inserted.unchanged|this is the second line]
354 [diff.inserted|+][diff.inserted.unchanged|third line starts with space]
355 [diff.inserted|+][diff.inserted.changed|-][diff.inserted.unchanged| starts with a ][diff.inserted.changed|minus][diff.inserted.unchanged| sign]
356 [diff.inserted|+][diff.tab| ][diff.inserted.unchanged|this one with ][diff.inserted.changed|two][diff.inserted.unchanged| tab]
357 [diff.inserted|+][diff.tab| ][diff.inserted.unchanged|now with full ][diff.inserted.changed|three][diff.inserted.unchanged| tabs]
358 [diff.inserted|+][diff.tab| ][diff.inserted.unchanged|now ][diff.inserted.changed|there are ][diff.inserted.unchanged|tabs][diff.tab| ][diff.inserted.unchanged|everywhere, much fun]
360 359
361 360 this line won't change
362 361
363 362 two lines are going to
364 [diff.deleted|-be changed into ][diff.deleted.highlight|three][diff.deleted|!]
365 [diff.inserted|+(entirely magically,]
366 [diff.inserted|+ assuming this works)]
367 [diff.inserted|+be changed into ][diff.inserted.highlight|four][diff.inserted|!]
363 [diff.deleted|-][diff.deleted.unchanged|be changed into ][diff.deleted.changed|three][diff.deleted.unchanged|!]
364 [diff.inserted|+][diff.inserted.changed|(entirely magically,]
365 [diff.inserted|+][diff.inserted.changed| assuming this works)]
366 [diff.inserted|+][diff.inserted.unchanged|be changed into ][diff.inserted.changed|four][diff.inserted.unchanged|!]
368 367
369 [diff.deleted|-three of those lines ][diff.deleted.highlight|will]
370 [diff.deleted|-][diff.deleted.highlight|collapse][diff.deleted| onto one]
371 [diff.deleted|-(to see if it works)]
372 [diff.inserted|+three of those lines ][diff.inserted.highlight|have]
373 [diff.inserted|+][diff.inserted.highlight|collapsed][diff.inserted| onto one]
374 #endif
368 [diff.deleted|-][diff.deleted.unchanged|three of those lines ][diff.deleted.changed|will]
369 [diff.deleted|-][diff.deleted.changed|collapse][diff.deleted.unchanged| onto one]
370 [diff.deleted|-][diff.deleted.changed|(to see if it works)]
371 [diff.inserted|+][diff.inserted.unchanged|three of those lines ][diff.inserted.changed|have]
372 [diff.inserted|+][diff.inserted.changed|collapsed][diff.inserted.unchanged| onto one]
375 373
376 374 multibyte character shouldn't be broken up in word diff:
377 375
@@ -386,12 +384,10 b" multibyte character shouldn't be broken "
386 384 > EOF
387 385 $ hg ci -m 'slightly change utf8 char' utf8
388 386
389 #if false
390 387 $ hg diff --config experimental.worddiff=True --color=debug -c.
391 388 [diff.diffline|diff --git a/utf8 b/utf8]
392 389 [diff.file_a|--- a/utf8]
393 390 [diff.file_b|+++ b/utf8]
394 391 [diff.hunk|@@ -1,1 +1,1 @@]
395 [diff.deleted|-blah ][diff.deleted.highlight|\xe3\x82\xa2][diff.deleted| blah] (esc)
396 [diff.inserted|+blah ][diff.inserted.highlight|\xe3\x82\xa4][diff.inserted| blah] (esc)
397 #endif
392 [diff.deleted|-][diff.deleted.unchanged|blah ][diff.deleted.changed|\xe3\x82\xa2][diff.deleted.unchanged| blah] (esc)
393 [diff.inserted|+][diff.inserted.unchanged|blah ][diff.inserted.changed|\xe3\x82\xa4][diff.inserted.unchanged| blah] (esc)
General Comments 0
You need to be logged in to leave comments. Login now