##// END OF EJS Templates
diff: drop internal inconsistent use of '...' as context line numbers...
Mads Kiilerich -
r8310:32757d5e default
parent child Browse files
Show More
@@ -1,681 +1,678 b''
1 1 # -*- coding: utf-8 -*-
2 2 # This program is free software: you can redistribute it and/or modify
3 3 # it under the terms of the GNU General Public License as published by
4 4 # the Free Software Foundation, either version 3 of the License, or
5 5 # (at your option) any later version.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 """
15 15 kallithea.lib.diffs
16 16 ~~~~~~~~~~~~~~~~~~~
17 17
18 18 Set of diffing helpers, previously part of vcs
19 19
20 20
21 21 This file was forked by the Kallithea project in July 2014.
22 22 Original author and date, and relevant copyright and licensing information is below:
23 23 :created_on: Dec 4, 2011
24 24 :author: marcink
25 25 :copyright: (c) 2013 RhodeCode GmbH, and others.
26 26 :license: GPLv3, see LICENSE.md for more details.
27 27 """
28 28 import difflib
29 29 import logging
30 30 import re
31 31
32 32 from tg.i18n import ugettext as _
33 33
34 34 from kallithea.lib import helpers as h
35 35 from kallithea.lib.utils2 import safe_str
36 36 from kallithea.lib.vcs.backends.base import EmptyChangeset
37 37 from kallithea.lib.vcs.exceptions import VCSError
38 38 from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
39 39
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43
44 44 def _safe_id(idstring):
45 45 r"""Make a string safe for including in an id attribute.
46 46
47 47 The HTML spec says that id attributes 'must begin with
48 48 a letter ([A-Za-z]) and may be followed by any number
49 49 of letters, digits ([0-9]), hyphens ("-"), underscores
50 50 ("_"), colons (":"), and periods (".")'. These regexps
51 51 are slightly over-zealous, in that they remove colons
52 52 and periods unnecessarily.
53 53
54 54 Whitespace is transformed into underscores, and then
55 55 anything which is not a hyphen or a character that
56 56 matches \w (alphanumerics and underscore) is removed.
57 57
58 58 """
59 59 # Transform all whitespace to underscore
60 60 idstring = re.sub(r'\s', "_", idstring)
61 61 # Remove everything that is not a hyphen or a member of \w
62 62 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
63 63 return idstring
64 64
65 65
66 66 def as_html(table_class='code-difftable', line_class='line',
67 67 old_lineno_class='lineno old', new_lineno_class='lineno new',
68 68 no_lineno_class='lineno',
69 69 code_class='code', enable_comments=False, parsed_lines=None):
70 70 """
71 71 Return given diff as html table with customized css classes
72 72 """
73 73 def _link_to_if(condition, label, url):
74 74 """
75 75 Generates a link if condition is meet or just the label if not.
76 76 """
77 77
78 78 if condition:
79 79 return '''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {
80 80 'url': url,
81 81 'label': label
82 82 }
83 83 else:
84 84 return label
85 85
86 86 _html_empty = True
87 87 _html = []
88 88 _html.append('''<table class="%(table_class)s">\n''' % {
89 89 'table_class': table_class
90 90 })
91 91
92 92 for file_info in parsed_lines:
93 93 for chunk in file_info['chunks']:
94 94 _html_empty = False
95 95 for change in chunk:
96 96 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
97 97 'lc': line_class,
98 98 'action': change['action']
99 99 })
100 100 anchor_old_id = ''
101 101 anchor_new_id = ''
102 102 anchor_old = "%(filename)s_o%(oldline_no)s" % {
103 103 'filename': _safe_id(file_info['filename']),
104 104 'oldline_no': change['old_lineno']
105 105 }
106 106 anchor_new = "%(filename)s_n%(newline_no)s" % {
107 107 'filename': _safe_id(file_info['filename']),
108 108 'newline_no': change['new_lineno']
109 109 }
110 cond_old = (change['old_lineno'] != '...' and
111 change['old_lineno'])
112 cond_new = (change['new_lineno'] != '...' and
113 change['new_lineno'])
114 no_lineno = (change['old_lineno'] == '...' and
115 change['new_lineno'] == '...')
110 cond_old = change['old_lineno']
111 cond_new = change['new_lineno']
112 no_lineno = not change['old_lineno'] and not change['new_lineno']
116 113 if cond_old:
117 114 anchor_old_id = 'id="%s"' % anchor_old
118 115 if cond_new:
119 116 anchor_new_id = 'id="%s"' % anchor_new
120 117 ###########################################################
121 118 # OLD LINE NUMBER
122 119 ###########################################################
123 120 _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
124 121 'a_id': anchor_old_id,
125 122 'olc': no_lineno_class if no_lineno else old_lineno_class,
126 123 'colspan': 'colspan="2"' if no_lineno else ''
127 124 })
128 125
129 126 _html.append('''%(link)s''' % {
130 127 'link': _link_to_if(not no_lineno, change['old_lineno'],
131 128 '#%s' % anchor_old)
132 129 })
133 130 _html.append('''</td>\n''')
134 131 ###########################################################
135 132 # NEW LINE NUMBER
136 133 ###########################################################
137 134
138 135 if not no_lineno:
139 136 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
140 137 'a_id': anchor_new_id,
141 138 'nlc': new_lineno_class
142 139 })
143 140
144 141 _html.append('''%(link)s''' % {
145 142 'link': _link_to_if(True, change['new_lineno'],
146 143 '#%s' % anchor_new)
147 144 })
148 145 _html.append('''</td>\n''')
149 146 ###########################################################
150 147 # CODE
151 148 ###########################################################
152 149 comments = '' if enable_comments else 'no-comment'
153 150 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
154 151 'cc': code_class,
155 152 'inc': comments
156 153 })
157 154 _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
158 155 'code': change['line']
159 156 })
160 157
161 158 _html.append('''\t</td>''')
162 159 _html.append('''\n</tr>\n''')
163 160 _html.append('''</table>''')
164 161 if _html_empty:
165 162 return None
166 163 return ''.join(_html)
167 164
168 165
169 166 def wrap_to_table(html):
170 167 """Given a string with html, return it wrapped in a table, similar to what
171 168 DiffProcessor returns."""
172 169 return '''\
173 170 <table class="code-difftable">
174 171 <tr class="line no-comment">
175 172 <td class="lineno new"></td>
176 173 <td class="code no-comment"><pre>%s</pre></td>
177 174 </tr>
178 175 </table>''' % html
179 176
180 177
181 178 def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
182 179 ignore_whitespace=True, line_context=3,
183 180 enable_comments=False):
184 181 """
185 182 Returns a file diff wrapped into a table.
186 183 Checks for diff_limit and presents a message if the diff is too big.
187 184 """
188 185 if filenode_old is None:
189 186 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
190 187
191 188 op = None
192 189 a_path = filenode_old.path # default, might be overriden by actual rename in diff
193 190 if filenode_old.is_binary or filenode_new.is_binary:
194 191 html_diff = wrap_to_table(_('Binary file'))
195 192 stats = (0, 0)
196 193
197 194 elif diff_limit != -1 and (
198 195 diff_limit is None or
199 196 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
200 197
201 198 raw_diff = get_gitdiff(filenode_old, filenode_new,
202 199 ignore_whitespace=ignore_whitespace,
203 200 context=line_context)
204 201 diff_processor = DiffProcessor(raw_diff)
205 202 if diff_processor.parsed: # there should be exactly one element, for the specified file
206 203 f = diff_processor.parsed[0]
207 204 op = f['operation']
208 205 a_path = f['old_filename']
209 206
210 207 html_diff = as_html(parsed_lines=diff_processor.parsed, enable_comments=enable_comments)
211 208 stats = diff_processor.stat()
212 209
213 210 else:
214 211 html_diff = wrap_to_table(_('Changeset was too big and was cut off, use '
215 212 'diff menu to display this diff'))
216 213 stats = (0, 0)
217 214
218 215 if not html_diff:
219 216 submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
220 217 if submodules:
221 218 html_diff = wrap_to_table(h.escape('Submodule %r' % submodules[0]))
222 219 else:
223 220 html_diff = wrap_to_table(_('No changes detected'))
224 221
225 222 cs1 = filenode_old.changeset.raw_id
226 223 cs2 = filenode_new.changeset.raw_id
227 224
228 225 return cs1, cs2, a_path, html_diff, stats, op
229 226
230 227
231 228 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
232 229 """
233 230 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
234 231 """
235 232 # make sure we pass in default context
236 233 context = context or 3
237 234 submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
238 235 if submodules:
239 236 return b''
240 237
241 238 for filenode in (filenode_old, filenode_new):
242 239 if not isinstance(filenode, FileNode):
243 240 raise VCSError("Given object should be FileNode object, not %s"
244 241 % filenode.__class__)
245 242
246 243 repo = filenode_new.changeset.repository
247 244 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
248 245 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
249 246
250 247 vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path,
251 248 ignore_whitespace, context)
252 249 return vcs_gitdiff
253 250
254 251
255 252 def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):
256 253 """
257 254 A thin wrapper around vcs lib get_diff.
258 255 """
259 256 try:
260 257 return scm_instance.get_diff(rev1, rev2, path=path,
261 258 ignore_whitespace=ignore_whitespace, context=context)
262 259 except MemoryError:
263 260 h.flash('MemoryError: Diff is too big', category='error')
264 261 return b''
265 262
266 263
267 264 NEW_FILENODE = 1
268 265 DEL_FILENODE = 2
269 266 MOD_FILENODE = 3
270 267 RENAMED_FILENODE = 4
271 268 COPIED_FILENODE = 5
272 269 CHMOD_FILENODE = 6
273 270 BIN_FILENODE = 7
274 271
275 272
276 273 class DiffProcessor(object):
277 274 """
278 275 Give it a unified or git diff and it returns a list of the files that were
279 276 mentioned in the diff together with a dict of meta information that
280 277 can be used to render it in a HTML template.
281 278 """
282 279 _diff_git_re = re.compile(b'^diff --git', re.MULTILINE)
283 280
284 281 def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
285 282 """
286 283 :param diff: a text in diff format
287 284 :param vcs: type of version control hg or git
288 285 :param diff_limit: define the size of diff that is considered "big"
289 286 based on that parameter cut off will be triggered, set to None
290 287 to show full diff
291 288 """
292 289 if not isinstance(diff, bytes):
293 290 raise Exception('Diff must be bytes - got %s' % type(diff))
294 291
295 292 self._diff = memoryview(diff)
296 293 self.adds = 0
297 294 self.removes = 0
298 295 self.diff_limit = diff_limit
299 296 self.limited_diff = False
300 297 self.vcs = vcs
301 298 self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
302 299
303 300 def _parse_gitdiff(self, inline_diff):
304 301 """Parse self._diff and return a list of dicts with meta info and chunks for each file.
305 302 Might set limited_diff.
306 303 Optionally, do an extra pass and to extra markup of one-liner changes.
307 304 """
308 305 _files = [] # list of dicts with meta info and chunks
309 306
310 307 starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
311 308 starts.append(len(self._diff))
312 309
313 310 for start, end in zip(starts, starts[1:]):
314 311 if self.diff_limit and end > self.diff_limit:
315 312 self.limited_diff = True
316 313 continue
317 314
318 315 head, diff_lines = _get_header(self.vcs, self._diff[start:end])
319 316
320 317 op = None
321 318 stats = {
322 319 'added': 0,
323 320 'deleted': 0,
324 321 'binary': False,
325 322 'ops': {},
326 323 }
327 324
328 325 if head['deleted_file_mode']:
329 326 op = 'removed'
330 327 stats['binary'] = True
331 328 stats['ops'][DEL_FILENODE] = 'deleted file'
332 329
333 330 elif head['new_file_mode']:
334 331 op = 'added'
335 332 stats['binary'] = True
336 333 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
337 334 else: # modify operation, can be cp, rename, chmod
338 335 # CHMOD
339 336 if head['new_mode'] and head['old_mode']:
340 337 op = 'modified'
341 338 stats['binary'] = True
342 339 stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
343 340 % (head['old_mode'], head['new_mode']))
344 341 # RENAME
345 342 if (head['rename_from'] and head['rename_to']
346 343 and head['rename_from'] != head['rename_to']):
347 344 op = 'renamed'
348 345 stats['binary'] = True
349 346 stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
350 347 % (head['rename_from'], head['rename_to']))
351 348 # COPY
352 349 if head.get('copy_from') and head.get('copy_to'):
353 350 op = 'modified'
354 351 stats['binary'] = True
355 352 stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
356 353 % (head['copy_from'], head['copy_to']))
357 354 # FALL BACK: detect missed old style add or remove
358 355 if op is None:
359 356 if not head['a_file'] and head['b_file']:
360 357 op = 'added'
361 358 stats['binary'] = True
362 359 stats['ops'][NEW_FILENODE] = 'new file'
363 360
364 361 elif head['a_file'] and not head['b_file']:
365 362 op = 'removed'
366 363 stats['binary'] = True
367 364 stats['ops'][DEL_FILENODE] = 'deleted file'
368 365
369 366 # it's not ADD not DELETE
370 367 if op is None:
371 368 op = 'modified'
372 369 stats['binary'] = True
373 370 stats['ops'][MOD_FILENODE] = 'modified file'
374 371
375 372 # a real non-binary diff
376 373 if head['a_file'] or head['b_file']:
377 374 chunks, added, deleted = _parse_lines(diff_lines)
378 375 stats['binary'] = False
379 376 stats['added'] = added
380 377 stats['deleted'] = deleted
381 378 # explicit mark that it's a modified file
382 379 if op == 'modified':
383 380 stats['ops'][MOD_FILENODE] = 'modified file'
384 381 else: # Git binary patch (or empty diff)
385 382 # Git binary patch
386 383 if head['bin_patch']:
387 384 stats['ops'][BIN_FILENODE] = 'binary diff not shown'
388 385 chunks = []
389 386
390 387 if op == 'removed' and chunks:
391 388 # a way of seeing deleted content could perhaps be nice - but
392 389 # not with the current UI
393 390 chunks = []
394 391
395 392 chunks.insert(0, [{
396 393 'old_lineno': '',
397 394 'new_lineno': '',
398 395 'action': 'context',
399 396 'line': msg,
400 397 } for _op, msg in stats['ops'].items()
401 398 if _op not in [MOD_FILENODE]])
402 399
403 400 _files.append({
404 401 'old_filename': head['a_path'],
405 402 'filename': head['b_path'],
406 403 'old_revision': head['a_blob_id'],
407 404 'new_revision': head['b_blob_id'],
408 405 'chunks': chunks,
409 406 'operation': op,
410 407 'stats': stats,
411 408 })
412 409
413 410 if not inline_diff:
414 411 return _files
415 412
416 413 # highlight inline changes when one del is followed by one add
417 414 for diff_data in _files:
418 415 for chunk in diff_data['chunks']:
419 416 lineiter = iter(chunk)
420 417 try:
421 418 peekline = next(lineiter)
422 419 while True:
423 420 # find a first del line
424 421 while peekline['action'] != 'del':
425 422 peekline = next(lineiter)
426 423 delline = peekline
427 424 peekline = next(lineiter)
428 425 # if not followed by add, eat all following del lines
429 426 if peekline['action'] != 'add':
430 427 while peekline['action'] == 'del':
431 428 peekline = next(lineiter)
432 429 continue
433 430 # found an add - make sure it is the only one
434 431 addline = peekline
435 432 try:
436 433 peekline = next(lineiter)
437 434 except StopIteration:
438 435 # add was last line - ok
439 436 _highlight_inline_diff(delline, addline)
440 437 raise
441 438 if peekline['action'] != 'add':
442 439 # there was only one add line - ok
443 440 _highlight_inline_diff(delline, addline)
444 441 except StopIteration:
445 442 pass
446 443
447 444 return _files
448 445
449 446 def stat(self):
450 447 """
451 448 Returns tuple of added, and removed lines for this instance
452 449 """
453 450 return self.adds, self.removes
454 451
455 452
456 453 _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)|(\t\n|\t$)')
457 454
458 455
459 456 def _escaper(string):
460 457 """
461 458 Do HTML escaping/markup
462 459 """
463 460
464 461 def substitute(m):
465 462 groups = m.groups()
466 463 if groups[0]:
467 464 return '&amp;'
468 465 if groups[1]:
469 466 return '&lt;'
470 467 if groups[2]:
471 468 return '&gt;'
472 469 if groups[3]:
473 470 return '<u>\t</u>' # Note: trailing tabs will get a longer match later
474 471 if groups[4]:
475 472 return '<u class="cr"></u>'
476 473 if groups[5]:
477 474 return ' <i></i>'
478 475 if groups[6]:
479 476 return '<u>\t</u><i></i>'
480 477 assert False
481 478
482 479 return _escape_re.sub(substitute, safe_str(string))
483 480
484 481
485 482 _git_header_re = re.compile(br"""
486 483 ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
487 484 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
488 485 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
489 486 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
490 487 ^rename[ ]from[ ](?P<rename_from>.+)\n
491 488 ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
492 489 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
493 490 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
494 491 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
495 492 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
496 493 (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
497 494 (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
498 495 (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
499 496 """, re.VERBOSE | re.MULTILINE)
500 497
501 498
502 499 _hg_header_re = re.compile(br"""
503 500 ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
504 501 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
505 502 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
506 503 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
507 504 (?:^rename[ ]from[ ](?P<rename_from>.+)\n
508 505 ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
509 506 (?:^copy[ ]from[ ](?P<copy_from>.+)\n
510 507 ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
511 508 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
512 509 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
513 510 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
514 511 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
515 512 (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
516 513 (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
517 514 (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
518 515 """, re.VERBOSE | re.MULTILINE)
519 516
520 517
521 518 _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
522 519
523 520
524 521 def _get_header(vcs, diff_chunk):
525 522 """
526 523 Parses a Git diff for a single file (header and chunks) and returns a tuple with:
527 524
528 525 1. A dict with meta info:
529 526
530 527 a_path, b_path, similarity_index, rename_from, rename_to,
531 528 old_mode, new_mode, new_file_mode, deleted_file_mode,
532 529 a_blob_id, b_blob_id, b_mode, a_file, b_file
533 530
534 531 2. An iterator yielding lines with simple HTML markup.
535 532 """
536 533 match = None
537 534 if vcs == 'git':
538 535 match = _git_header_re.match(diff_chunk)
539 536 elif vcs == 'hg':
540 537 match = _hg_header_re.match(diff_chunk)
541 538 if match is None:
542 539 raise Exception('diff not recognized as valid %s diff' % vcs)
543 540 meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
544 541 rest = diff_chunk[match.end():]
545 542 if rest and _header_next_check.match(rest):
546 543 raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
547 544 diff_lines = (_escaper(m.group(0)) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
548 545 return meta_info, diff_lines
549 546
550 547
551 548 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
552 549 _newline_marker = re.compile(r'^\\ No newline at end of file')
553 550
554 551
555 552 def _parse_lines(diff_lines):
556 553 """
557 554 Given an iterator of diff body lines, parse them and return a dict per
558 555 line and added/removed totals.
559 556 """
560 557 added = deleted = 0
561 558 old_line = old_end = new_line = new_end = None
562 559
563 560 chunks = []
564 561 try:
565 562 line = next(diff_lines)
566 563
567 564 while True:
568 565 lines = []
569 566 chunks.append(lines)
570 567
571 568 match = _chunk_re.match(line)
572 569
573 570 if not match:
574 571 raise Exception('error parsing diff @@ line %r' % line)
575 572
576 573 gr = match.groups()
577 574 (old_line, old_end,
578 575 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
579 576 old_line -= 1
580 577 new_line -= 1
581 578
582 579 context = len(gr) == 5
583 580 old_end += old_line
584 581 new_end += new_line
585 582
586 583 if context:
587 584 # skip context only if it's first line
588 585 if int(gr[0]) > 1:
589 586 lines.append({
590 'old_lineno': '...',
591 'new_lineno': '...',
587 'old_lineno': '',
588 'new_lineno': '',
592 589 'action': 'context',
593 590 'line': line,
594 591 })
595 592
596 593 line = next(diff_lines)
597 594
598 595 while old_line < old_end or new_line < new_end:
599 596 if not line:
600 597 raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
601 598
602 599 affects_old = affects_new = False
603 600
604 601 command = line[0]
605 602 if command == '+':
606 603 affects_new = True
607 604 action = 'add'
608 605 added += 1
609 606 elif command == '-':
610 607 affects_old = True
611 608 action = 'del'
612 609 deleted += 1
613 610 elif command == ' ':
614 611 affects_old = affects_new = True
615 612 action = 'unmod'
616 613 else:
617 614 raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
618 615
619 616 if not _newline_marker.match(line):
620 617 old_line += affects_old
621 618 new_line += affects_new
622 619 lines.append({
623 620 'old_lineno': affects_old and old_line or '',
624 621 'new_lineno': affects_new and new_line or '',
625 622 'action': action,
626 623 'line': line[1:],
627 624 })
628 625
629 626 line = next(diff_lines)
630 627
631 628 if _newline_marker.match(line):
632 629 # we need to append to lines, since this is not
633 630 # counted in the line specs of diff
634 631 lines.append({
635 'old_lineno': '...',
636 'new_lineno': '...',
632 'old_lineno': '',
633 'new_lineno': '',
637 634 'action': 'context',
638 635 'line': line,
639 636 })
640 637 line = next(diff_lines)
641 638 if old_line > old_end:
642 639 raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
643 640 if new_line > new_end:
644 641 raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
645 642 except StopIteration:
646 643 pass
647 644 if old_line != old_end or new_line != new_end:
648 645 raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
649 646
650 647 return chunks, added, deleted
651 648
652 649 # Used for inline highlighter word split, must match the substitutions in _escaper
653 650 _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
654 651
655 652
656 653 def _highlight_inline_diff(old, new):
657 654 """
658 655 Highlight simple add/remove in two lines given as info dicts. They are
659 656 modified in place and given markup with <del>/<ins>.
660 657 """
661 658 assert old['action'] == 'del'
662 659 assert new['action'] == 'add'
663 660
664 661 oldwords = _token_re.split(old['line'])
665 662 newwords = _token_re.split(new['line'])
666 663 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
667 664
668 665 oldfragments, newfragments = [], []
669 666 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
670 667 oldfrag = ''.join(oldwords[i1:i2])
671 668 newfrag = ''.join(newwords[j1:j2])
672 669 if tag != 'equal':
673 670 if oldfrag:
674 671 oldfrag = '<del>%s</del>' % oldfrag
675 672 if newfrag:
676 673 newfrag = '<ins>%s</ins>' % newfrag
677 674 oldfragments.append(oldfrag)
678 675 newfragments.append(newfrag)
679 676
680 677 old['line'] = "".join(oldfragments)
681 678 new['line'] = "".join(newfragments)
@@ -1,314 +1,314 b''
1 1 from kallithea.lib.diffs import BIN_FILENODE, CHMOD_FILENODE, COPIED_FILENODE, DEL_FILENODE, MOD_FILENODE, NEW_FILENODE, RENAMED_FILENODE, DiffProcessor
2 2 from kallithea.tests import base
3 3 from kallithea.tests.fixture import Fixture
4 4
5 5
6 6 fixture = Fixture()
7 7
8 8
9 9 DIFF_FIXTURES = {
10 10 'hg_diff_add_single_binary_file.diff': [
11 11 ('US Warszawa.jpg', 'added',
12 12 {'added': 0,
13 13 'deleted': 0,
14 14 'binary': True,
15 15 'ops': {NEW_FILENODE: 'new file 100755',
16 16 BIN_FILENODE: 'binary diff not shown'}}),
17 17 ],
18 18 'hg_diff_mod_single_binary_file.diff': [
19 19 ('US Warszawa.jpg', 'modified',
20 20 {'added': 0,
21 21 'deleted': 0,
22 22 'binary': True,
23 23 'ops': {MOD_FILENODE: 'modified file',
24 24 BIN_FILENODE: 'binary diff not shown'}}),
25 25 ],
26 26
27 27 'hg_diff_mod_single_file_and_rename_and_chmod.diff': [
28 28 ('README', 'renamed',
29 29 {'added': 3,
30 30 'deleted': 0,
31 31 'binary': False,
32 32 'ops': {RENAMED_FILENODE: 'file renamed from README.rst to README',
33 33 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
34 34 ],
35 35 'hg_diff_mod_file_and_rename.diff': [
36 36 ('README.rst', 'renamed',
37 37 {'added': 3,
38 38 'deleted': 0,
39 39 'binary': False,
40 40 'ops': {RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
41 41 ],
42 42 'hg_diff_del_single_binary_file.diff': [
43 43 ('US Warszawa.jpg', 'removed',
44 44 {'added': 0,
45 45 'deleted': 0,
46 46 'binary': True,
47 47 'ops': {DEL_FILENODE: 'deleted file',
48 48 BIN_FILENODE: 'binary diff not shown'}}),
49 49 ],
50 50 'hg_diff_chmod_and_mod_single_binary_file.diff': [
51 51 ('gravatar.png', 'modified',
52 52 {'added': 0,
53 53 'deleted': 0,
54 54 'binary': True,
55 55 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
56 56 BIN_FILENODE: 'binary diff not shown'}}),
57 57 ],
58 58 'hg_diff_chmod.diff': [
59 59 ('file', 'modified',
60 60 {'added': 0,
61 61 'deleted': 0,
62 62 'binary': True,
63 63 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
64 64 ],
65 65 'hg_diff_rename_file.diff': [
66 66 ('file_renamed', 'renamed',
67 67 {'added': 0,
68 68 'deleted': 0,
69 69 'binary': True,
70 70 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
71 71 ],
72 72 'hg_diff_rename_and_chmod_file.diff': [
73 73 ('README', 'renamed',
74 74 {'added': 0,
75 75 'deleted': 0,
76 76 'binary': True,
77 77 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
78 78 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
79 79 ],
80 80 'hg_diff_binary_and_normal.diff': [
81 81 ('img/baseline-10px.png', 'added',
82 82 {'added': 0,
83 83 'deleted': 0,
84 84 'binary': True,
85 85 'ops': {NEW_FILENODE: 'new file 100644',
86 86 BIN_FILENODE: 'binary diff not shown'}}),
87 87 ('img/baseline-20px.png', 'removed',
88 88 {'added': 0,
89 89 'deleted': 0,
90 90 'binary': True,
91 91 'ops': {DEL_FILENODE: 'deleted file',
92 92 BIN_FILENODE: 'binary diff not shown'}}),
93 93 ('index.html', 'modified',
94 94 {'added': 3,
95 95 'deleted': 2,
96 96 'binary': False,
97 97 'ops': {MOD_FILENODE: 'modified file'}}),
98 98 ('js/global.js', 'removed',
99 99 {'added': 0,
100 100 'deleted': 75,
101 101 'binary': False,
102 102 'ops': {DEL_FILENODE: 'deleted file'}}),
103 103 ('js/jquery/hashgrid.js', 'added',
104 104 {'added': 340,
105 105 'deleted': 0,
106 106 'binary': False,
107 107 'ops': {NEW_FILENODE: 'new file 100755'}}),
108 108 ('less/docs.less', 'modified',
109 109 {'added': 34,
110 110 'deleted': 0,
111 111 'binary': False,
112 112 'ops': {MOD_FILENODE: 'modified file'}}),
113 113 ('less/scaffolding.less', 'modified',
114 114 {'added': 1,
115 115 'deleted': 3,
116 116 'binary': False,
117 117 'ops': {MOD_FILENODE: 'modified file'}}),
118 118 ('readme.markdown', 'modified',
119 119 {'added': 1,
120 120 'deleted': 10,
121 121 'binary': False,
122 122 'ops': {MOD_FILENODE: 'modified file'}}),
123 123 ],
124 124 'git_diff_chmod.diff': [
125 125 ('work-horus.xls', 'modified',
126 126 {'added': 0,
127 127 'deleted': 0,
128 128 'binary': True,
129 129 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
130 130 ],
131 131 'git_diff_rename_file.diff': [
132 132 ('file.xls', 'renamed',
133 133 {'added': 0,
134 134 'deleted': 0,
135 135 'binary': True,
136 136 'ops': {RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}}),
137 137 ('files/var/www/favicon.ico/DEFAULT',
138 138 'renamed',
139 139 {'added': 0,
140 140 'binary': True,
141 141 'deleted': 0,
142 142 'ops': {4: 'file renamed from files/var/www/favicon.ico to files/var/www/favicon.ico/DEFAULT',
143 143 6: 'modified file chmod 100644 => 100755'}})
144 144 ],
145 145 'git_diff_mod_single_binary_file.diff': [
146 146 ('US Warszawa.jpg', 'modified',
147 147 {'added': 0,
148 148 'deleted': 0,
149 149 'binary': True,
150 150 'ops': {MOD_FILENODE: 'modified file',
151 151 BIN_FILENODE: 'binary diff not shown'}})
152 152 ],
153 153 'git_diff_binary_and_normal.diff': [
154 154 ('img/baseline-10px.png', 'added',
155 155 {'added': 0,
156 156 'deleted': 0,
157 157 'binary': True,
158 158 'ops': {NEW_FILENODE: 'new file 100644',
159 159 BIN_FILENODE: 'binary diff not shown'}}),
160 160 ('img/baseline-20px.png', 'removed',
161 161 {'added': 0,
162 162 'deleted': 0,
163 163 'binary': True,
164 164 'ops': {DEL_FILENODE: 'deleted file',
165 165 BIN_FILENODE: 'binary diff not shown'}}),
166 166 ('index.html', 'modified',
167 167 {'added': 3,
168 168 'deleted': 2,
169 169 'binary': False,
170 170 'ops': {MOD_FILENODE: 'modified file'}}),
171 171 ('js/global.js', 'removed',
172 172 {'added': 0,
173 173 'deleted': 75,
174 174 'binary': False,
175 175 'ops': {DEL_FILENODE: 'deleted file'}}),
176 176 ('js/jquery/hashgrid.js', 'added',
177 177 {'added': 340,
178 178 'deleted': 0,
179 179 'binary': False,
180 180 'ops': {NEW_FILENODE: 'new file 100755'}}),
181 181 ('less/docs.less', 'modified',
182 182 {'added': 34,
183 183 'deleted': 0,
184 184 'binary': False,
185 185 'ops': {MOD_FILENODE: 'modified file'}}),
186 186 ('less/scaffolding.less', 'modified',
187 187 {'added': 1,
188 188 'deleted': 3,
189 189 'binary': False,
190 190 'ops': {MOD_FILENODE: 'modified file'}}),
191 191 ('readme.markdown', 'modified',
192 192 {'added': 1,
193 193 'deleted': 10,
194 194 'binary': False,
195 195 'ops': {MOD_FILENODE: 'modified file'}}),
196 196 ],
197 197 'diff_with_diff_data.diff': [
198 198 ('vcs/backends/base.py', 'modified',
199 199 {'added': 18,
200 200 'deleted': 2,
201 201 'binary': False,
202 202 'ops': {MOD_FILENODE: 'modified file'}}),
203 203 ('vcs/backends/git/repository.py', 'modified',
204 204 {'added': 46,
205 205 'deleted': 15,
206 206 'binary': False,
207 207 'ops': {MOD_FILENODE: 'modified file'}}),
208 208 ('vcs/backends/hg.py', 'modified',
209 209 {'added': 22,
210 210 'deleted': 3,
211 211 'binary': False,
212 212 'ops': {MOD_FILENODE: 'modified file'}}),
213 213 ('vcs/tests/test_git.py', 'modified',
214 214 {'added': 5,
215 215 'deleted': 5,
216 216 'binary': False,
217 217 'ops': {MOD_FILENODE: 'modified file'}}),
218 218 ('vcs/tests/test_repository.py', 'modified',
219 219 {'added': 174,
220 220 'deleted': 2,
221 221 'binary': False,
222 222 'ops': {MOD_FILENODE: 'modified file'}}),
223 223 ],
224 224 'git_diff_modify_binary_file.diff': [
225 225 ('file.name', 'modified',
226 226 {'added': 0,
227 227 'deleted': 0,
228 228 'binary': True,
229 229 'ops': {MOD_FILENODE: 'modified file',
230 230 BIN_FILENODE: 'binary diff not shown'}})
231 231 ],
232 232 'hg_diff_copy_file.diff': [
233 233 ('file2', 'modified',
234 234 {'added': 0,
235 235 'deleted': 0,
236 236 'binary': True,
237 237 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
238 238 ],
239 239 'hg_diff_copy_and_modify_file.diff': [
240 240 ('file3', 'modified',
241 241 {'added': 1,
242 242 'deleted': 0,
243 243 'binary': False,
244 244 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
245 245 MOD_FILENODE: 'modified file'}}),
246 246 ],
247 247 'hg_diff_copy_and_chmod_file.diff': [
248 248 ('file4', 'modified',
249 249 {'added': 0,
250 250 'deleted': 0,
251 251 'binary': True,
252 252 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
253 253 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
254 254 ],
255 255 'hg_diff_copy_chmod_and_edit_file.diff': [
256 256 ('file5', 'modified',
257 257 {'added': 2,
258 258 'deleted': 1,
259 259 'binary': False,
260 260 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
261 261 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
262 262 MOD_FILENODE: 'modified file'}}),
263 263 ],
264 264 'hg_diff_rename_space_cr.diff': [
265 265 ('oh yes', 'renamed',
266 266 {'added': 3,
267 267 'deleted': 2,
268 268 'binary': False,
269 269 'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
270 270 ],
271 271 }
272 272
273 273
274 274 class TestDiffLib(base.TestController):
275 275
276 276 @base.parametrize('diff_fixture', DIFF_FIXTURES)
277 277 def test_diff(self, diff_fixture):
278 278 raw_diff = fixture.load_resource(diff_fixture, strip=False)
279 279 vcs = 'hg'
280 280 if diff_fixture.startswith('git_'):
281 281 vcs = 'git'
282 282 diff_processor = DiffProcessor(raw_diff, vcs=vcs)
283 283 data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
284 284 expected_data = DIFF_FIXTURES[diff_fixture]
285 285 assert expected_data == data
286 286
287 287 def test_diff_markup(self):
288 288 raw_diff = fixture.load_resource('markuptest.diff', strip=False)
289 289 diff_processor = DiffProcessor(raw_diff)
290 290 chunks = diff_processor.parsed[0]['chunks']
291 291 assert not chunks[0]
292 292 #from pprint import pprint; pprint(chunks[1])
293 293 l = ['\n']
294 294 for d in chunks[1]:
295 295 l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
296 296 s = ''.join(l)
297 297 assert s == r'''
298 context ... ... '@@ -51,6 +51,13 @@\n'
298 context '@@ -51,6 +51,13 @@\n'
299 299 unmod 51 51 '<u>\t</u>begin();\n'
300 300 unmod 52 52 '<u>\t</u>\n'
301 301 add 53 '<u>\t</u>int foo;<u class="cr"></u>\n'
302 302 add 54 '<u>\t</u>int bar; <u class="cr"></u>\n'
303 303 add 55 '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
304 304 add 56 '<u>\t</u>int space; <i></i>'
305 305 add 57 '<u>\t</u>int tab;<u>\t</u>\n'
306 306 add 58 '<u>\t</u>\n'
307 307 unmod 59 53 ' <i></i>'
308 308 del 54 '<u>\t</u>#define MAX_STEPS (48)\n'
309 309 add 60 '<u>\t</u><u class="cr"></u>\n'
310 310 add 61 '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
311 311 unmod 62 55 '\n'
312 312 del 56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)\n'
313 313 add 63 '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)\n'
314 314 '''
General Comments 0
You need to be logged in to leave comments. Login now