##// END OF EJS Templates
diffs: we should use removed instead of deleted.
marcink -
r3102:2cd36dd3 default
parent child Browse files
Show More
@@ -1,762 +1,762 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from pygments.lexers.special import TextLexer, Token
28 28 from pygments.lexers import get_lexer_by_name
29 29
30 30 from rhodecode.lib.helpers import (
31 31 get_lexer_for_filenode, html_escape, get_custom_lexer)
32 32 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
33 33 from rhodecode.lib.vcs.nodes import FileNode
34 34 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
35 35 from rhodecode.lib.diff_match_patch import diff_match_patch
36 36 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
37 37
38 38
39 39 plain_text_lexer = get_lexer_by_name(
40 40 'text', stripall=False, stripnl=False, ensurenl=False)
41 41
42 42
43 43 log = logging.getLogger(__name__)
44 44
45 45
46 46 def filenode_as_lines_tokens(filenode, lexer=None):
47 47 org_lexer = lexer
48 48 lexer = lexer or get_lexer_for_filenode(filenode)
49 49 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 50 lexer, filenode, org_lexer)
51 51 tokens = tokenize_string(filenode.content, lexer)
52 52 lines = split_token_stream(tokens)
53 53 rv = list(lines)
54 54 return rv
55 55
56 56
57 57 def tokenize_string(content, lexer):
58 58 """
59 59 Use pygments to tokenize some content based on a lexer
60 60 ensuring all original new lines and whitespace is preserved
61 61 """
62 62
63 63 lexer.stripall = False
64 64 lexer.stripnl = False
65 65 lexer.ensurenl = False
66 66
67 67 if isinstance(lexer, TextLexer):
68 68 lexed = [(Token.Text, content)]
69 69 else:
70 70 lexed = lex(content, lexer)
71 71
72 72 for token_type, token_text in lexed:
73 73 yield pygment_token_class(token_type), token_text
74 74
75 75
76 76 def split_token_stream(tokens):
77 77 """
78 78 Take a list of (TokenType, text) tuples and split them by a string
79 79
80 80 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
81 81 [(TEXT, 'some'), (TEXT, 'text'),
82 82 (TEXT, 'more'), (TEXT, 'text')]
83 83 """
84 84
85 85 buffer = []
86 86 for token_class, token_text in tokens:
87 87 parts = token_text.split('\n')
88 88 for part in parts[:-1]:
89 89 buffer.append((token_class, part))
90 90 yield buffer
91 91 buffer = []
92 92
93 93 buffer.append((token_class, parts[-1]))
94 94
95 95 if buffer:
96 96 yield buffer
97 97
98 98
99 99 def filenode_as_annotated_lines_tokens(filenode):
100 100 """
101 101 Take a file node and return a list of annotations => lines, if no annotation
102 102 is found, it will be None.
103 103
104 104 eg:
105 105
106 106 [
107 107 (annotation1, [
108 108 (1, line1_tokens_list),
109 109 (2, line2_tokens_list),
110 110 ]),
111 111 (annotation2, [
112 112 (3, line1_tokens_list),
113 113 ]),
114 114 (None, [
115 115 (4, line1_tokens_list),
116 116 ]),
117 117 (annotation1, [
118 118 (5, line1_tokens_list),
119 119 (6, line2_tokens_list),
120 120 ])
121 121 ]
122 122 """
123 123
124 124 commit_cache = {} # cache commit_getter lookups
125 125
126 126 def _get_annotation(commit_id, commit_getter):
127 127 if commit_id not in commit_cache:
128 128 commit_cache[commit_id] = commit_getter()
129 129 return commit_cache[commit_id]
130 130
131 131 annotation_lookup = {
132 132 line_no: _get_annotation(commit_id, commit_getter)
133 133 for line_no, commit_id, commit_getter, line_content
134 134 in filenode.annotate
135 135 }
136 136
137 137 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
138 138 for line_no, tokens
139 139 in enumerate(filenode_as_lines_tokens(filenode), 1))
140 140
141 141 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
142 142
143 143 for annotation, group in grouped_annotations_lines:
144 144 yield (
145 145 annotation, [(line_no, tokens)
146 146 for (_, line_no, tokens) in group]
147 147 )
148 148
149 149
150 150 def render_tokenstream(tokenstream):
151 151 result = []
152 152 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
153 153
154 154 if token_class:
155 155 result.append(u'<span class="%s">' % token_class)
156 156 else:
157 157 result.append(u'<span>')
158 158
159 159 for op_tag, token_text in token_ops_texts:
160 160
161 161 if op_tag:
162 162 result.append(u'<%s>' % op_tag)
163 163
164 164 escaped_text = html_escape(token_text)
165 165
166 166 # TODO: dan: investigate showing hidden characters like space/nl/tab
167 167 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
168 168 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
169 169 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
170 170
171 171 result.append(escaped_text)
172 172
173 173 if op_tag:
174 174 result.append(u'</%s>' % op_tag)
175 175
176 176 result.append(u'</span>')
177 177
178 178 html = ''.join(result)
179 179 return html
180 180
181 181
182 182 def rollup_tokenstream(tokenstream):
183 183 """
184 184 Group a token stream of the format:
185 185
186 186 ('class', 'op', 'text')
187 187 or
188 188 ('class', 'text')
189 189
190 190 into
191 191
192 192 [('class1',
193 193 [('op1', 'text'),
194 194 ('op2', 'text')]),
195 195 ('class2',
196 196 [('op3', 'text')])]
197 197
198 198 This is used to get the minimal tags necessary when
199 199 rendering to html eg for a token stream ie.
200 200
201 201 <span class="A"><ins>he</ins>llo</span>
202 202 vs
203 203 <span class="A"><ins>he</ins></span><span class="A">llo</span>
204 204
205 205 If a 2 tuple is passed in, the output op will be an empty string.
206 206
207 207 eg:
208 208
209 209 >>> rollup_tokenstream([('classA', '', 'h'),
210 210 ('classA', 'del', 'ell'),
211 211 ('classA', '', 'o'),
212 212 ('classB', '', ' '),
213 213 ('classA', '', 'the'),
214 214 ('classA', '', 're'),
215 215 ])
216 216
217 217 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
218 218 ('classB', [('', ' ')],
219 219 ('classA', [('', 'there')]]
220 220
221 221 """
222 222 if tokenstream and len(tokenstream[0]) == 2:
223 223 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
224 224
225 225 result = []
226 226 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
227 227 ops = []
228 228 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
229 229 text_buffer = []
230 230 for t_class, t_op, t_text in token_text_list:
231 231 text_buffer.append(t_text)
232 232 ops.append((token_op, ''.join(text_buffer)))
233 233 result.append((token_class, ops))
234 234 return result
235 235
236 236
237 237 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
238 238 """
239 239 Converts a list of (token_class, token_text) tuples to a list of
240 240 (token_class, token_op, token_text) tuples where token_op is one of
241 241 ('ins', 'del', '')
242 242
243 243 :param old_tokens: list of (token_class, token_text) tuples of old line
244 244 :param new_tokens: list of (token_class, token_text) tuples of new line
245 245 :param use_diff_match_patch: boolean, will use google's diff match patch
246 246 library which has options to 'smooth' out the character by character
247 247 differences making nicer ins/del blocks
248 248 """
249 249
250 250 old_tokens_result = []
251 251 new_tokens_result = []
252 252
253 253 similarity = difflib.SequenceMatcher(None,
254 254 ''.join(token_text for token_class, token_text in old_tokens),
255 255 ''.join(token_text for token_class, token_text in new_tokens)
256 256 ).ratio()
257 257
258 258 if similarity < 0.6: # return, the blocks are too different
259 259 for token_class, token_text in old_tokens:
260 260 old_tokens_result.append((token_class, '', token_text))
261 261 for token_class, token_text in new_tokens:
262 262 new_tokens_result.append((token_class, '', token_text))
263 263 return old_tokens_result, new_tokens_result, similarity
264 264
265 265 token_sequence_matcher = difflib.SequenceMatcher(None,
266 266 [x[1] for x in old_tokens],
267 267 [x[1] for x in new_tokens])
268 268
269 269 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
270 270 # check the differences by token block types first to give a more
271 271 # nicer "block" level replacement vs character diffs
272 272
273 273 if tag == 'equal':
274 274 for token_class, token_text in old_tokens[o1:o2]:
275 275 old_tokens_result.append((token_class, '', token_text))
276 276 for token_class, token_text in new_tokens[n1:n2]:
277 277 new_tokens_result.append((token_class, '', token_text))
278 278 elif tag == 'delete':
279 279 for token_class, token_text in old_tokens[o1:o2]:
280 280 old_tokens_result.append((token_class, 'del', token_text))
281 281 elif tag == 'insert':
282 282 for token_class, token_text in new_tokens[n1:n2]:
283 283 new_tokens_result.append((token_class, 'ins', token_text))
284 284 elif tag == 'replace':
285 285 # if same type token blocks must be replaced, do a diff on the
286 286 # characters in the token blocks to show individual changes
287 287
288 288 old_char_tokens = []
289 289 new_char_tokens = []
290 290 for token_class, token_text in old_tokens[o1:o2]:
291 291 for char in token_text:
292 292 old_char_tokens.append((token_class, char))
293 293
294 294 for token_class, token_text in new_tokens[n1:n2]:
295 295 for char in token_text:
296 296 new_char_tokens.append((token_class, char))
297 297
298 298 old_string = ''.join([token_text for
299 299 token_class, token_text in old_char_tokens])
300 300 new_string = ''.join([token_text for
301 301 token_class, token_text in new_char_tokens])
302 302
303 303 char_sequence = difflib.SequenceMatcher(
304 304 None, old_string, new_string)
305 305 copcodes = char_sequence.get_opcodes()
306 306 obuffer, nbuffer = [], []
307 307
308 308 if use_diff_match_patch:
309 309 dmp = diff_match_patch()
310 310 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
311 311 reps = dmp.diff_main(old_string, new_string)
312 312 dmp.diff_cleanupEfficiency(reps)
313 313
314 314 a, b = 0, 0
315 315 for op, rep in reps:
316 316 l = len(rep)
317 317 if op == 0:
318 318 for i, c in enumerate(rep):
319 319 obuffer.append((old_char_tokens[a+i][0], '', c))
320 320 nbuffer.append((new_char_tokens[b+i][0], '', c))
321 321 a += l
322 322 b += l
323 323 elif op == -1:
324 324 for i, c in enumerate(rep):
325 325 obuffer.append((old_char_tokens[a+i][0], 'del', c))
326 326 a += l
327 327 elif op == 1:
328 328 for i, c in enumerate(rep):
329 329 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
330 330 b += l
331 331 else:
332 332 for ctag, co1, co2, cn1, cn2 in copcodes:
333 333 if ctag == 'equal':
334 334 for token_class, token_text in old_char_tokens[co1:co2]:
335 335 obuffer.append((token_class, '', token_text))
336 336 for token_class, token_text in new_char_tokens[cn1:cn2]:
337 337 nbuffer.append((token_class, '', token_text))
338 338 elif ctag == 'delete':
339 339 for token_class, token_text in old_char_tokens[co1:co2]:
340 340 obuffer.append((token_class, 'del', token_text))
341 341 elif ctag == 'insert':
342 342 for token_class, token_text in new_char_tokens[cn1:cn2]:
343 343 nbuffer.append((token_class, 'ins', token_text))
344 344 elif ctag == 'replace':
345 345 for token_class, token_text in old_char_tokens[co1:co2]:
346 346 obuffer.append((token_class, 'del', token_text))
347 347 for token_class, token_text in new_char_tokens[cn1:cn2]:
348 348 nbuffer.append((token_class, 'ins', token_text))
349 349
350 350 old_tokens_result.extend(obuffer)
351 351 new_tokens_result.extend(nbuffer)
352 352
353 353 return old_tokens_result, new_tokens_result, similarity
354 354
355 355
356 356 def diffset_node_getter(commit):
357 357 def get_node(fname):
358 358 try:
359 359 return commit.get_node(fname)
360 360 except NodeDoesNotExistError:
361 361 return None
362 362
363 363 return get_node
364 364
365 365
366 366 class DiffSet(object):
367 367 """
368 368 An object for parsing the diff result from diffs.DiffProcessor and
369 369 adding highlighting, side by side/unified renderings and line diffs
370 370 """
371 371
372 372 HL_REAL = 'REAL' # highlights using original file, slow
373 373 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
374 374 # in the case of multiline code
375 375 HL_NONE = 'NONE' # no highlighting, fastest
376 376
377 377 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
378 378 source_repo_name=None,
379 379 source_node_getter=lambda filename: None,
380 380 target_node_getter=lambda filename: None,
381 381 source_nodes=None, target_nodes=None,
382 382 # files over this size will use fast highlighting
383 383 max_file_size_limit=150 * 1024,
384 384 ):
385 385
386 386 self.highlight_mode = highlight_mode
387 387 self.highlighted_filenodes = {}
388 388 self.source_node_getter = source_node_getter
389 389 self.target_node_getter = target_node_getter
390 390 self.source_nodes = source_nodes or {}
391 391 self.target_nodes = target_nodes or {}
392 392 self.repo_name = repo_name
393 393 self.source_repo_name = source_repo_name or repo_name
394 394 self.max_file_size_limit = max_file_size_limit
395 395
396 396 def render_patchset(self, patchset, source_ref=None, target_ref=None):
397 397 diffset = AttributeDict(dict(
398 398 lines_added=0,
399 399 lines_deleted=0,
400 400 changed_files=0,
401 401 files=[],
402 402 file_stats={},
403 403 limited_diff=isinstance(patchset, LimitedDiffContainer),
404 404 repo_name=self.repo_name,
405 405 source_repo_name=self.source_repo_name,
406 406 source_ref=source_ref,
407 407 target_ref=target_ref,
408 408 ))
409 409 for patch in patchset:
410 410 diffset.file_stats[patch['filename']] = patch['stats']
411 411 filediff = self.render_patch(patch)
412 412 filediff.diffset = StrictAttributeDict(dict(
413 413 source_ref=diffset.source_ref,
414 414 target_ref=diffset.target_ref,
415 415 repo_name=diffset.repo_name,
416 416 source_repo_name=diffset.source_repo_name,
417 417 ))
418 418 diffset.files.append(filediff)
419 419 diffset.changed_files += 1
420 420 if not patch['stats']['binary']:
421 421 diffset.lines_added += patch['stats']['added']
422 422 diffset.lines_deleted += patch['stats']['deleted']
423 423
424 424 return diffset
425 425
426 426 _lexer_cache = {}
427 427
428 428 def _get_lexer_for_filename(self, filename, filenode=None):
429 429 # cached because we might need to call it twice for source/target
430 430 if filename not in self._lexer_cache:
431 431 if filenode:
432 432 lexer = filenode.lexer
433 433 extension = filenode.extension
434 434 else:
435 435 lexer = FileNode.get_lexer(filename=filename)
436 436 extension = filename.split('.')[-1]
437 437
438 438 lexer = get_custom_lexer(extension) or lexer
439 439 self._lexer_cache[filename] = lexer
440 440 return self._lexer_cache[filename]
441 441
442 442 def render_patch(self, patch):
443 443 log.debug('rendering diff for %r', patch['filename'])
444 444
445 445 source_filename = patch['original_filename']
446 446 target_filename = patch['filename']
447 447
448 448 source_lexer = plain_text_lexer
449 449 target_lexer = plain_text_lexer
450 450
451 451 if not patch['stats']['binary']:
452 452 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
453 453 hl_mode = node_hl_mode or self.highlight_mode
454 454
455 455 if hl_mode == self.HL_REAL:
456 456 if (source_filename and patch['operation'] in ('D', 'M')
457 457 and source_filename not in self.source_nodes):
458 458 self.source_nodes[source_filename] = (
459 459 self.source_node_getter(source_filename))
460 460
461 461 if (target_filename and patch['operation'] in ('A', 'M')
462 462 and target_filename not in self.target_nodes):
463 463 self.target_nodes[target_filename] = (
464 464 self.target_node_getter(target_filename))
465 465
466 466 elif hl_mode == self.HL_FAST:
467 467 source_lexer = self._get_lexer_for_filename(source_filename)
468 468 target_lexer = self._get_lexer_for_filename(target_filename)
469 469
470 470 source_file = self.source_nodes.get(source_filename, source_filename)
471 471 target_file = self.target_nodes.get(target_filename, target_filename)
472 472
473 473 source_filenode, target_filenode = None, None
474 474
475 475 # TODO: dan: FileNode.lexer works on the content of the file - which
476 476 # can be slow - issue #4289 explains a lexer clean up - which once
477 477 # done can allow caching a lexer for a filenode to avoid the file lookup
478 478 if isinstance(source_file, FileNode):
479 479 source_filenode = source_file
480 480 #source_lexer = source_file.lexer
481 481 source_lexer = self._get_lexer_for_filename(source_filename)
482 482 source_file.lexer = source_lexer
483 483
484 484 if isinstance(target_file, FileNode):
485 485 target_filenode = target_file
486 486 #target_lexer = target_file.lexer
487 487 target_lexer = self._get_lexer_for_filename(target_filename)
488 488 target_file.lexer = target_lexer
489 489
490 490 source_file_path, target_file_path = None, None
491 491
492 492 if source_filename != '/dev/null':
493 493 source_file_path = source_filename
494 494 if target_filename != '/dev/null':
495 495 target_file_path = target_filename
496 496
497 497 source_file_type = source_lexer.name
498 498 target_file_type = target_lexer.name
499 499
500 500 filediff = AttributeDict({
501 501 'source_file_path': source_file_path,
502 502 'target_file_path': target_file_path,
503 503 'source_filenode': source_filenode,
504 504 'target_filenode': target_filenode,
505 505 'source_file_type': target_file_type,
506 506 'target_file_type': source_file_type,
507 507 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
508 508 'operation': patch['operation'],
509 509 'source_mode': patch['stats']['old_mode'],
510 510 'target_mode': patch['stats']['new_mode'],
511 511 'limited_diff': isinstance(patch, LimitedDiffContainer),
512 512 'hunks': [],
513 513 'hunk_ops': None,
514 514 'diffset': self,
515 515 })
516 516 file_chunks = patch['chunks'][1:]
517 517 for hunk in file_chunks:
518 518 hunkbit = self.parse_hunk(hunk, source_file, target_file)
519 519 hunkbit.source_file_path = source_file_path
520 520 hunkbit.target_file_path = target_file_path
521 521 filediff.hunks.append(hunkbit)
522 522
523 523 # Simulate hunk on OPS type line which doesn't really contain any diff
524 524 # this allows commenting on those
525 525 if not file_chunks:
526 526 actions = []
527 527 for op_id, op_text in filediff.patch['stats']['ops'].items():
528 528 if op_id == DEL_FILENODE:
529 actions.append(u'file was deleted')
529 actions.append(u'file was removed')
530 530 elif op_id == BIN_FILENODE:
531 531 actions.append(u'binary diff hidden')
532 532 else:
533 533 actions.append(safe_unicode(op_text))
534 534 action_line = u'NO CONTENT: ' + \
535 535 u', '.join(actions) or u'UNDEFINED_ACTION'
536 536
537 537 hunk_ops = {'source_length': 0, 'source_start': 0,
538 538 'lines': [
539 539 {'new_lineno': 0, 'old_lineno': 1,
540 540 'action': 'unmod-no-hl', 'line': action_line}
541 541 ],
542 542 'section_header': u'', 'target_start': 1, 'target_length': 1}
543 543
544 544 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
545 545 hunkbit.source_file_path = source_file_path
546 546 hunkbit.target_file_path = target_file_path
547 547 filediff.hunk_ops = hunkbit
548 548 return filediff
549 549
550 550 def parse_hunk(self, hunk, source_file, target_file):
551 551 result = AttributeDict(dict(
552 552 source_start=hunk['source_start'],
553 553 source_length=hunk['source_length'],
554 554 target_start=hunk['target_start'],
555 555 target_length=hunk['target_length'],
556 556 section_header=hunk['section_header'],
557 557 lines=[],
558 558 ))
559 559 before, after = [], []
560 560
561 561 for line in hunk['lines']:
562 562 if line['action'] in ['unmod', 'unmod-no-hl']:
563 563 no_hl = line['action'] == 'unmod-no-hl'
564 564 result.lines.extend(
565 565 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
566 566 after.append(line)
567 567 before.append(line)
568 568 elif line['action'] == 'add':
569 569 after.append(line)
570 570 elif line['action'] == 'del':
571 571 before.append(line)
572 572 elif line['action'] == 'old-no-nl':
573 573 before.append(line)
574 574 elif line['action'] == 'new-no-nl':
575 575 after.append(line)
576 576
577 577 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
578 578 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
579 579 result.lines.extend(
580 580 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
581 581 # NOTE(marcink): we must keep list() call here so we can cache the result...
582 582 result.unified = list(self.as_unified(result.lines))
583 583 result.sideside = result.lines
584 584
585 585 return result
586 586
587 587 def parse_lines(self, before_lines, after_lines, source_file, target_file,
588 588 no_hl=False):
589 589 # TODO: dan: investigate doing the diff comparison and fast highlighting
590 590 # on the entire before and after buffered block lines rather than by
591 591 # line, this means we can get better 'fast' highlighting if the context
592 592 # allows it - eg.
593 593 # line 4: """
594 594 # line 5: this gets highlighted as a string
595 595 # line 6: """
596 596
597 597 lines = []
598 598
599 599 before_newline = AttributeDict()
600 600 after_newline = AttributeDict()
601 601 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
602 602 before_newline_line = before_lines.pop(-1)
603 603 before_newline.content = '\n {}'.format(
604 604 render_tokenstream(
605 605 [(x[0], '', x[1])
606 606 for x in [('nonl', before_newline_line['line'])]]))
607 607
608 608 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
609 609 after_newline_line = after_lines.pop(-1)
610 610 after_newline.content = '\n {}'.format(
611 611 render_tokenstream(
612 612 [(x[0], '', x[1])
613 613 for x in [('nonl', after_newline_line['line'])]]))
614 614
615 615 while before_lines or after_lines:
616 616 before, after = None, None
617 617 before_tokens, after_tokens = None, None
618 618
619 619 if before_lines:
620 620 before = before_lines.pop(0)
621 621 if after_lines:
622 622 after = after_lines.pop(0)
623 623
624 624 original = AttributeDict()
625 625 modified = AttributeDict()
626 626
627 627 if before:
628 628 if before['action'] == 'old-no-nl':
629 629 before_tokens = [('nonl', before['line'])]
630 630 else:
631 631 before_tokens = self.get_line_tokens(
632 632 line_text=before['line'], line_number=before['old_lineno'],
633 633 input_file=source_file, no_hl=no_hl)
634 634 original.lineno = before['old_lineno']
635 635 original.content = before['line']
636 636 original.action = self.action_to_op(before['action'])
637 637
638 638 original.get_comment_args = (
639 639 source_file, 'o', before['old_lineno'])
640 640
641 641 if after:
642 642 if after['action'] == 'new-no-nl':
643 643 after_tokens = [('nonl', after['line'])]
644 644 else:
645 645 after_tokens = self.get_line_tokens(
646 646 line_text=after['line'], line_number=after['new_lineno'],
647 647 input_file=target_file, no_hl=no_hl)
648 648 modified.lineno = after['new_lineno']
649 649 modified.content = after['line']
650 650 modified.action = self.action_to_op(after['action'])
651 651
652 652 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
653 653
654 654 # diff the lines
655 655 if before_tokens and after_tokens:
656 656 o_tokens, m_tokens, similarity = tokens_diff(
657 657 before_tokens, after_tokens)
658 658 original.content = render_tokenstream(o_tokens)
659 659 modified.content = render_tokenstream(m_tokens)
660 660 elif before_tokens:
661 661 original.content = render_tokenstream(
662 662 [(x[0], '', x[1]) for x in before_tokens])
663 663 elif after_tokens:
664 664 modified.content = render_tokenstream(
665 665 [(x[0], '', x[1]) for x in after_tokens])
666 666
667 667 if not before_lines and before_newline:
668 668 original.content += before_newline.content
669 669 before_newline = None
670 670 if not after_lines and after_newline:
671 671 modified.content += after_newline.content
672 672 after_newline = None
673 673
674 674 lines.append(AttributeDict({
675 675 'original': original,
676 676 'modified': modified,
677 677 }))
678 678
679 679 return lines
680 680
681 681 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
682 682 filenode = None
683 683 filename = None
684 684
685 685 if isinstance(input_file, basestring):
686 686 filename = input_file
687 687 elif isinstance(input_file, FileNode):
688 688 filenode = input_file
689 689 filename = input_file.unicode_path
690 690
691 691 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
692 692 if hl_mode == self.HL_REAL and filenode:
693 693 lexer = self._get_lexer_for_filename(filename)
694 694 file_size_allowed = input_file.size < self.max_file_size_limit
695 695 if line_number and file_size_allowed:
696 696 return self.get_tokenized_filenode_line(
697 697 input_file, line_number, lexer)
698 698
699 699 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
700 700 lexer = self._get_lexer_for_filename(filename)
701 701 return list(tokenize_string(line_text, lexer))
702 702
703 703 return list(tokenize_string(line_text, plain_text_lexer))
704 704
705 705 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
706 706
707 707 if filenode not in self.highlighted_filenodes:
708 708 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
709 709 self.highlighted_filenodes[filenode] = tokenized_lines
710 710 return self.highlighted_filenodes[filenode][line_number - 1]
711 711
712 712 def action_to_op(self, action):
713 713 return {
714 714 'add': '+',
715 715 'del': '-',
716 716 'unmod': ' ',
717 717 'unmod-no-hl': ' ',
718 718 'old-no-nl': ' ',
719 719 'new-no-nl': ' ',
720 720 }.get(action, action)
721 721
722 722 def as_unified(self, lines):
723 723 """
724 724 Return a generator that yields the lines of a diff in unified order
725 725 """
726 726 def generator():
727 727 buf = []
728 728 for line in lines:
729 729
730 730 if buf and not line.original or line.original.action == ' ':
731 731 for b in buf:
732 732 yield b
733 733 buf = []
734 734
735 735 if line.original:
736 736 if line.original.action == ' ':
737 737 yield (line.original.lineno, line.modified.lineno,
738 738 line.original.action, line.original.content,
739 739 line.original.get_comment_args)
740 740 continue
741 741
742 742 if line.original.action == '-':
743 743 yield (line.original.lineno, None,
744 744 line.original.action, line.original.content,
745 745 line.original.get_comment_args)
746 746
747 747 if line.modified.action == '+':
748 748 buf.append((
749 749 None, line.modified.lineno,
750 750 line.modified.action, line.modified.content,
751 751 line.modified.get_comment_args))
752 752 continue
753 753
754 754 if line.modified:
755 755 yield (None, line.modified.lineno,
756 756 line.modified.action, line.modified.content,
757 757 line.modified.get_comment_args)
758 758
759 759 for b in buf:
760 760 yield b
761 761
762 762 return generator()
General Comments 0
You need to be logged in to leave comments. Login now