##// END OF EJS Templates
diffs: don't use recurred diffset attachment in diffs. This makes this structure much harder to...
marcink -
r2682:44d560de default
parent child Browse files
Show More
@@ -1,743 +1,748 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from pygments.lexers.special import TextLexer, Token
28 28
29 29 from rhodecode.lib.helpers import (
30 30 get_lexer_for_filenode, html_escape, get_custom_lexer)
31 from rhodecode.lib.utils2 import AttributeDict
31 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict
32 32 from rhodecode.lib.vcs.nodes import FileNode
33 33 from rhodecode.lib.diff_match_patch import diff_match_patch
34 34 from rhodecode.lib.diffs import LimitedDiffContainer
35 35 from pygments.lexers import get_lexer_by_name
36 36
37 37 plain_text_lexer = get_lexer_by_name(
38 38 'text', stripall=False, stripnl=False, ensurenl=False)
39 39
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43
44 44 def filenode_as_lines_tokens(filenode, lexer=None):
45 45 org_lexer = lexer
46 46 lexer = lexer or get_lexer_for_filenode(filenode)
47 47 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
48 48 lexer, filenode, org_lexer)
49 49 tokens = tokenize_string(filenode.content, lexer)
50 50 lines = split_token_stream(tokens)
51 51 rv = list(lines)
52 52 return rv
53 53
54 54
55 55 def tokenize_string(content, lexer):
56 56 """
57 57 Use pygments to tokenize some content based on a lexer
58 58 ensuring all original new lines and whitespace is preserved
59 59 """
60 60
61 61 lexer.stripall = False
62 62 lexer.stripnl = False
63 63 lexer.ensurenl = False
64 64
65 65 if isinstance(lexer, TextLexer):
66 66 lexed = [(Token.Text, content)]
67 67 else:
68 68 lexed = lex(content, lexer)
69 69
70 70 for token_type, token_text in lexed:
71 71 yield pygment_token_class(token_type), token_text
72 72
73 73
74 74 def split_token_stream(tokens):
75 75 """
76 76 Take a list of (TokenType, text) tuples and split them by a string
77 77
78 78 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
79 79 [(TEXT, 'some'), (TEXT, 'text'),
80 80 (TEXT, 'more'), (TEXT, 'text')]
81 81 """
82 82
83 83 buffer = []
84 84 for token_class, token_text in tokens:
85 85 parts = token_text.split('\n')
86 86 for part in parts[:-1]:
87 87 buffer.append((token_class, part))
88 88 yield buffer
89 89 buffer = []
90 90
91 91 buffer.append((token_class, parts[-1]))
92 92
93 93 if buffer:
94 94 yield buffer
95 95
96 96
97 97 def filenode_as_annotated_lines_tokens(filenode):
98 98 """
99 99 Take a file node and return a list of annotations => lines, if no annotation
100 100 is found, it will be None.
101 101
102 102 eg:
103 103
104 104 [
105 105 (annotation1, [
106 106 (1, line1_tokens_list),
107 107 (2, line2_tokens_list),
108 108 ]),
109 109 (annotation2, [
110 110 (3, line1_tokens_list),
111 111 ]),
112 112 (None, [
113 113 (4, line1_tokens_list),
114 114 ]),
115 115 (annotation1, [
116 116 (5, line1_tokens_list),
117 117 (6, line2_tokens_list),
118 118 ])
119 119 ]
120 120 """
121 121
122 122 commit_cache = {} # cache commit_getter lookups
123 123
124 124 def _get_annotation(commit_id, commit_getter):
125 125 if commit_id not in commit_cache:
126 126 commit_cache[commit_id] = commit_getter()
127 127 return commit_cache[commit_id]
128 128
129 129 annotation_lookup = {
130 130 line_no: _get_annotation(commit_id, commit_getter)
131 131 for line_no, commit_id, commit_getter, line_content
132 132 in filenode.annotate
133 133 }
134 134
135 135 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
136 136 for line_no, tokens
137 137 in enumerate(filenode_as_lines_tokens(filenode), 1))
138 138
139 139 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
140 140
141 141 for annotation, group in grouped_annotations_lines:
142 142 yield (
143 143 annotation, [(line_no, tokens)
144 144 for (_, line_no, tokens) in group]
145 145 )
146 146
147 147
148 148 def render_tokenstream(tokenstream):
149 149 result = []
150 150 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
151 151
152 152 if token_class:
153 153 result.append(u'<span class="%s">' % token_class)
154 154 else:
155 155 result.append(u'<span>')
156 156
157 157 for op_tag, token_text in token_ops_texts:
158 158
159 159 if op_tag:
160 160 result.append(u'<%s>' % op_tag)
161 161
162 162 escaped_text = html_escape(token_text)
163 163
164 164 # TODO: dan: investigate showing hidden characters like space/nl/tab
165 165 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
166 166 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
167 167 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
168 168
169 169 result.append(escaped_text)
170 170
171 171 if op_tag:
172 172 result.append(u'</%s>' % op_tag)
173 173
174 174 result.append(u'</span>')
175 175
176 176 html = ''.join(result)
177 177 return html
178 178
179 179
180 180 def rollup_tokenstream(tokenstream):
181 181 """
182 182 Group a token stream of the format:
183 183
184 184 ('class', 'op', 'text')
185 185 or
186 186 ('class', 'text')
187 187
188 188 into
189 189
190 190 [('class1',
191 191 [('op1', 'text'),
192 192 ('op2', 'text')]),
193 193 ('class2',
194 194 [('op3', 'text')])]
195 195
196 196 This is used to get the minimal tags necessary when
197 197 rendering to html eg for a token stream ie.
198 198
199 199 <span class="A"><ins>he</ins>llo</span>
200 200 vs
201 201 <span class="A"><ins>he</ins></span><span class="A">llo</span>
202 202
203 203 If a 2 tuple is passed in, the output op will be an empty string.
204 204
205 205 eg:
206 206
207 207 >>> rollup_tokenstream([('classA', '', 'h'),
208 208 ('classA', 'del', 'ell'),
209 209 ('classA', '', 'o'),
210 210 ('classB', '', ' '),
211 211 ('classA', '', 'the'),
212 212 ('classA', '', 're'),
213 213 ])
214 214
215 215 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
216 216 ('classB', [('', ' ')],
217 217 ('classA', [('', 'there')]]
218 218
219 219 """
220 220 if tokenstream and len(tokenstream[0]) == 2:
221 221 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
222 222
223 223 result = []
224 224 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
225 225 ops = []
226 226 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
227 227 text_buffer = []
228 228 for t_class, t_op, t_text in token_text_list:
229 229 text_buffer.append(t_text)
230 230 ops.append((token_op, ''.join(text_buffer)))
231 231 result.append((token_class, ops))
232 232 return result
233 233
234 234
235 235 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
236 236 """
237 237 Converts a list of (token_class, token_text) tuples to a list of
238 238 (token_class, token_op, token_text) tuples where token_op is one of
239 239 ('ins', 'del', '')
240 240
241 241 :param old_tokens: list of (token_class, token_text) tuples of old line
242 242 :param new_tokens: list of (token_class, token_text) tuples of new line
243 243 :param use_diff_match_patch: boolean, will use google's diff match patch
244 244 library which has options to 'smooth' out the character by character
245 245 differences making nicer ins/del blocks
246 246 """
247 247
248 248 old_tokens_result = []
249 249 new_tokens_result = []
250 250
251 251 similarity = difflib.SequenceMatcher(None,
252 252 ''.join(token_text for token_class, token_text in old_tokens),
253 253 ''.join(token_text for token_class, token_text in new_tokens)
254 254 ).ratio()
255 255
256 256 if similarity < 0.6: # return, the blocks are too different
257 257 for token_class, token_text in old_tokens:
258 258 old_tokens_result.append((token_class, '', token_text))
259 259 for token_class, token_text in new_tokens:
260 260 new_tokens_result.append((token_class, '', token_text))
261 261 return old_tokens_result, new_tokens_result, similarity
262 262
263 263 token_sequence_matcher = difflib.SequenceMatcher(None,
264 264 [x[1] for x in old_tokens],
265 265 [x[1] for x in new_tokens])
266 266
267 267 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
268 268 # check the differences by token block types first to give a more
269 269 # nicer "block" level replacement vs character diffs
270 270
271 271 if tag == 'equal':
272 272 for token_class, token_text in old_tokens[o1:o2]:
273 273 old_tokens_result.append((token_class, '', token_text))
274 274 for token_class, token_text in new_tokens[n1:n2]:
275 275 new_tokens_result.append((token_class, '', token_text))
276 276 elif tag == 'delete':
277 277 for token_class, token_text in old_tokens[o1:o2]:
278 278 old_tokens_result.append((token_class, 'del', token_text))
279 279 elif tag == 'insert':
280 280 for token_class, token_text in new_tokens[n1:n2]:
281 281 new_tokens_result.append((token_class, 'ins', token_text))
282 282 elif tag == 'replace':
283 283 # if same type token blocks must be replaced, do a diff on the
284 284 # characters in the token blocks to show individual changes
285 285
286 286 old_char_tokens = []
287 287 new_char_tokens = []
288 288 for token_class, token_text in old_tokens[o1:o2]:
289 289 for char in token_text:
290 290 old_char_tokens.append((token_class, char))
291 291
292 292 for token_class, token_text in new_tokens[n1:n2]:
293 293 for char in token_text:
294 294 new_char_tokens.append((token_class, char))
295 295
296 296 old_string = ''.join([token_text for
297 297 token_class, token_text in old_char_tokens])
298 298 new_string = ''.join([token_text for
299 299 token_class, token_text in new_char_tokens])
300 300
301 301 char_sequence = difflib.SequenceMatcher(
302 302 None, old_string, new_string)
303 303 copcodes = char_sequence.get_opcodes()
304 304 obuffer, nbuffer = [], []
305 305
306 306 if use_diff_match_patch:
307 307 dmp = diff_match_patch()
308 308 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
309 309 reps = dmp.diff_main(old_string, new_string)
310 310 dmp.diff_cleanupEfficiency(reps)
311 311
312 312 a, b = 0, 0
313 313 for op, rep in reps:
314 314 l = len(rep)
315 315 if op == 0:
316 316 for i, c in enumerate(rep):
317 317 obuffer.append((old_char_tokens[a+i][0], '', c))
318 318 nbuffer.append((new_char_tokens[b+i][0], '', c))
319 319 a += l
320 320 b += l
321 321 elif op == -1:
322 322 for i, c in enumerate(rep):
323 323 obuffer.append((old_char_tokens[a+i][0], 'del', c))
324 324 a += l
325 325 elif op == 1:
326 326 for i, c in enumerate(rep):
327 327 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
328 328 b += l
329 329 else:
330 330 for ctag, co1, co2, cn1, cn2 in copcodes:
331 331 if ctag == 'equal':
332 332 for token_class, token_text in old_char_tokens[co1:co2]:
333 333 obuffer.append((token_class, '', token_text))
334 334 for token_class, token_text in new_char_tokens[cn1:cn2]:
335 335 nbuffer.append((token_class, '', token_text))
336 336 elif ctag == 'delete':
337 337 for token_class, token_text in old_char_tokens[co1:co2]:
338 338 obuffer.append((token_class, 'del', token_text))
339 339 elif ctag == 'insert':
340 340 for token_class, token_text in new_char_tokens[cn1:cn2]:
341 341 nbuffer.append((token_class, 'ins', token_text))
342 342 elif ctag == 'replace':
343 343 for token_class, token_text in old_char_tokens[co1:co2]:
344 344 obuffer.append((token_class, 'del', token_text))
345 345 for token_class, token_text in new_char_tokens[cn1:cn2]:
346 346 nbuffer.append((token_class, 'ins', token_text))
347 347
348 348 old_tokens_result.extend(obuffer)
349 349 new_tokens_result.extend(nbuffer)
350 350
351 351 return old_tokens_result, new_tokens_result, similarity
352 352
353 353
354 354 class DiffSet(object):
355 355 """
356 356 An object for parsing the diff result from diffs.DiffProcessor and
357 357 adding highlighting, side by side/unified renderings and line diffs
358 358 """
359 359
360 360 HL_REAL = 'REAL' # highlights using original file, slow
361 361 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
362 362 # in the case of multiline code
363 363 HL_NONE = 'NONE' # no highlighting, fastest
364 364
365 365 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
366 366 source_repo_name=None,
367 367 source_node_getter=lambda filename: None,
368 368 target_node_getter=lambda filename: None,
369 369 source_nodes=None, target_nodes=None,
370 370 max_file_size_limit=150 * 1024, # files over this size will
371 371 # use fast highlighting
372 372 comments=None,
373 373 ):
374 374
375 375 self.highlight_mode = highlight_mode
376 376 self.highlighted_filenodes = {}
377 377 self.source_node_getter = source_node_getter
378 378 self.target_node_getter = target_node_getter
379 379 self.source_nodes = source_nodes or {}
380 380 self.target_nodes = target_nodes or {}
381 381 self.repo_name = repo_name
382 382 self.source_repo_name = source_repo_name or repo_name
383 383 self.comments = comments or {}
384 384 self.comments_store = self.comments.copy()
385 385 self.max_file_size_limit = max_file_size_limit
386 386
387 387 def render_patchset(self, patchset, source_ref=None, target_ref=None):
388 388 diffset = AttributeDict(dict(
389 389 lines_added=0,
390 390 lines_deleted=0,
391 391 changed_files=0,
392 392 files=[],
393 393 file_stats={},
394 394 limited_diff=isinstance(patchset, LimitedDiffContainer),
395 395 repo_name=self.repo_name,
396 396 source_repo_name=self.source_repo_name,
397 397 source_ref=source_ref,
398 398 target_ref=target_ref,
399 399 ))
400 400 for patch in patchset:
401 401 diffset.file_stats[patch['filename']] = patch['stats']
402 402 filediff = self.render_patch(patch)
403 filediff.diffset = diffset
403 filediff.diffset = StrictAttributeDict(dict(
404 source_ref=diffset.source_ref,
405 target_ref=diffset.target_ref,
406 repo_name=diffset.repo_name,
407 source_repo_name=diffset.source_repo_name,
408 ))
404 409 diffset.files.append(filediff)
405 410 diffset.changed_files += 1
406 411 if not patch['stats']['binary']:
407 412 diffset.lines_added += patch['stats']['added']
408 413 diffset.lines_deleted += patch['stats']['deleted']
409 414
410 415 return diffset
411 416
412 417 _lexer_cache = {}
413 418
414 419 def _get_lexer_for_filename(self, filename, filenode=None):
415 420 # cached because we might need to call it twice for source/target
416 421 if filename not in self._lexer_cache:
417 422 if filenode:
418 423 lexer = filenode.lexer
419 424 extension = filenode.extension
420 425 else:
421 426 lexer = FileNode.get_lexer(filename=filename)
422 427 extension = filename.split('.')[-1]
423 428
424 429 lexer = get_custom_lexer(extension) or lexer
425 430 self._lexer_cache[filename] = lexer
426 431 return self._lexer_cache[filename]
427 432
428 433 def render_patch(self, patch):
429 434 log.debug('rendering diff for %r' % patch['filename'])
430 435
431 436 source_filename = patch['original_filename']
432 437 target_filename = patch['filename']
433 438
434 439 source_lexer = plain_text_lexer
435 440 target_lexer = plain_text_lexer
436 441
437 442 if not patch['stats']['binary']:
438 443 if self.highlight_mode == self.HL_REAL:
439 444 if (source_filename and patch['operation'] in ('D', 'M')
440 445 and source_filename not in self.source_nodes):
441 446 self.source_nodes[source_filename] = (
442 447 self.source_node_getter(source_filename))
443 448
444 449 if (target_filename and patch['operation'] in ('A', 'M')
445 450 and target_filename not in self.target_nodes):
446 451 self.target_nodes[target_filename] = (
447 452 self.target_node_getter(target_filename))
448 453
449 454 elif self.highlight_mode == self.HL_FAST:
450 455 source_lexer = self._get_lexer_for_filename(source_filename)
451 456 target_lexer = self._get_lexer_for_filename(target_filename)
452 457
453 458 source_file = self.source_nodes.get(source_filename, source_filename)
454 459 target_file = self.target_nodes.get(target_filename, target_filename)
455 460
456 461 source_filenode, target_filenode = None, None
457 462
458 463 # TODO: dan: FileNode.lexer works on the content of the file - which
459 464 # can be slow - issue #4289 explains a lexer clean up - which once
460 465 # done can allow caching a lexer for a filenode to avoid the file lookup
461 466 if isinstance(source_file, FileNode):
462 467 source_filenode = source_file
463 468 #source_lexer = source_file.lexer
464 469 source_lexer = self._get_lexer_for_filename(source_filename)
465 470 source_file.lexer = source_lexer
466 471
467 472 if isinstance(target_file, FileNode):
468 473 target_filenode = target_file
469 474 #target_lexer = target_file.lexer
470 475 target_lexer = self._get_lexer_for_filename(target_filename)
471 476 target_file.lexer = target_lexer
472 477
473 478 source_file_path, target_file_path = None, None
474 479
475 480 if source_filename != '/dev/null':
476 481 source_file_path = source_filename
477 482 if target_filename != '/dev/null':
478 483 target_file_path = target_filename
479 484
480 485 source_file_type = source_lexer.name
481 486 target_file_type = target_lexer.name
482 487
483 488 filediff = AttributeDict({
484 489 'source_file_path': source_file_path,
485 490 'target_file_path': target_file_path,
486 491 'source_filenode': source_filenode,
487 492 'target_filenode': target_filenode,
488 493 'source_file_type': target_file_type,
489 494 'target_file_type': source_file_type,
490 495 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
491 496 'operation': patch['operation'],
492 497 'source_mode': patch['stats']['old_mode'],
493 498 'target_mode': patch['stats']['new_mode'],
494 499 'limited_diff': isinstance(patch, LimitedDiffContainer),
495 500 'hunks': [],
496 501 'diffset': self,
497 502 })
498 503
499 504 for hunk in patch['chunks'][1:]:
500 505 hunkbit = self.parse_hunk(hunk, source_file, target_file)
501 506 hunkbit.source_file_path = source_file_path
502 507 hunkbit.target_file_path = target_file_path
503 508 filediff.hunks.append(hunkbit)
504 509
505 510 left_comments = {}
506 511 if source_file_path in self.comments_store:
507 512 for lineno, comments in self.comments_store[source_file_path].items():
508 513 left_comments[lineno] = comments
509 514
510 515 if target_file_path in self.comments_store:
511 516 for lineno, comments in self.comments_store[target_file_path].items():
512 517 left_comments[lineno] = comments
513 518 # left comments are one that we couldn't place in diff lines.
514 519 # could be outdated, or the diff changed and this line is no
515 520 # longer available
516 521 filediff.left_comments = left_comments
517 522
518 523 return filediff
519 524
520 525 def parse_hunk(self, hunk, source_file, target_file):
521 526 result = AttributeDict(dict(
522 527 source_start=hunk['source_start'],
523 528 source_length=hunk['source_length'],
524 529 target_start=hunk['target_start'],
525 530 target_length=hunk['target_length'],
526 531 section_header=hunk['section_header'],
527 532 lines=[],
528 533 ))
529 534 before, after = [], []
530 535
531 536 for line in hunk['lines']:
532 537
533 538 if line['action'] == 'unmod':
534 539 result.lines.extend(
535 540 self.parse_lines(before, after, source_file, target_file))
536 541 after.append(line)
537 542 before.append(line)
538 543 elif line['action'] == 'add':
539 544 after.append(line)
540 545 elif line['action'] == 'del':
541 546 before.append(line)
542 547 elif line['action'] == 'old-no-nl':
543 548 before.append(line)
544 549 elif line['action'] == 'new-no-nl':
545 550 after.append(line)
546 551
547 552 result.lines.extend(
548 553 self.parse_lines(before, after, source_file, target_file))
549 554 result.unified = self.as_unified(result.lines)
550 555 result.sideside = result.lines
551 556
552 557 return result
553 558
554 559 def parse_lines(self, before_lines, after_lines, source_file, target_file):
555 560 # TODO: dan: investigate doing the diff comparison and fast highlighting
556 561 # on the entire before and after buffered block lines rather than by
557 562 # line, this means we can get better 'fast' highlighting if the context
558 563 # allows it - eg.
559 564 # line 4: """
560 565 # line 5: this gets highlighted as a string
561 566 # line 6: """
562 567
563 568 lines = []
564 569
565 570 before_newline = AttributeDict()
566 571 after_newline = AttributeDict()
567 572 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
568 573 before_newline_line = before_lines.pop(-1)
569 574 before_newline.content = '\n {}'.format(
570 575 render_tokenstream(
571 576 [(x[0], '', x[1])
572 577 for x in [('nonl', before_newline_line['line'])]]))
573 578
574 579 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
575 580 after_newline_line = after_lines.pop(-1)
576 581 after_newline.content = '\n {}'.format(
577 582 render_tokenstream(
578 583 [(x[0], '', x[1])
579 584 for x in [('nonl', after_newline_line['line'])]]))
580 585
581 586 while before_lines or after_lines:
582 587 before, after = None, None
583 588 before_tokens, after_tokens = None, None
584 589
585 590 if before_lines:
586 591 before = before_lines.pop(0)
587 592 if after_lines:
588 593 after = after_lines.pop(0)
589 594
590 595 original = AttributeDict()
591 596 modified = AttributeDict()
592 597
593 598 if before:
594 599 if before['action'] == 'old-no-nl':
595 600 before_tokens = [('nonl', before['line'])]
596 601 else:
597 602 before_tokens = self.get_line_tokens(
598 603 line_text=before['line'],
599 604 line_number=before['old_lineno'],
600 605 file=source_file)
601 606 original.lineno = before['old_lineno']
602 607 original.content = before['line']
603 608 original.action = self.action_to_op(before['action'])
604 609 original.comments = self.get_comments_for('old',
605 610 source_file, before['old_lineno'])
606 611
607 612 if after:
608 613 if after['action'] == 'new-no-nl':
609 614 after_tokens = [('nonl', after['line'])]
610 615 else:
611 616 after_tokens = self.get_line_tokens(
612 617 line_text=after['line'], line_number=after['new_lineno'],
613 618 file=target_file)
614 619 modified.lineno = after['new_lineno']
615 620 modified.content = after['line']
616 621 modified.action = self.action_to_op(after['action'])
617 622 modified.comments = self.get_comments_for('new',
618 623 target_file, after['new_lineno'])
619 624
620 625 # diff the lines
621 626 if before_tokens and after_tokens:
622 627 o_tokens, m_tokens, similarity = tokens_diff(
623 628 before_tokens, after_tokens)
624 629 original.content = render_tokenstream(o_tokens)
625 630 modified.content = render_tokenstream(m_tokens)
626 631 elif before_tokens:
627 632 original.content = render_tokenstream(
628 633 [(x[0], '', x[1]) for x in before_tokens])
629 634 elif after_tokens:
630 635 modified.content = render_tokenstream(
631 636 [(x[0], '', x[1]) for x in after_tokens])
632 637
633 638 if not before_lines and before_newline:
634 639 original.content += before_newline.content
635 640 before_newline = None
636 641 if not after_lines and after_newline:
637 642 modified.content += after_newline.content
638 643 after_newline = None
639 644
640 645 lines.append(AttributeDict({
641 646 'original': original,
642 647 'modified': modified,
643 648 }))
644 649
645 650 return lines
646 651
647 652 def get_comments_for(self, version, filename, line_number):
648 653 if hasattr(filename, 'unicode_path'):
649 654 filename = filename.unicode_path
650 655
651 656 if not isinstance(filename, basestring):
652 657 return None
653 658
654 659 line_key = {
655 660 'old': 'o',
656 661 'new': 'n',
657 662 }[version] + str(line_number)
658 663
659 664 if filename in self.comments_store:
660 665 file_comments = self.comments_store[filename]
661 666 if line_key in file_comments:
662 667 return file_comments.pop(line_key)
663 668
664 669 def get_line_tokens(self, line_text, line_number, file=None):
665 670 filenode = None
666 671 filename = None
667 672
668 673 if isinstance(file, basestring):
669 674 filename = file
670 675 elif isinstance(file, FileNode):
671 676 filenode = file
672 677 filename = file.unicode_path
673 678
674 679 if self.highlight_mode == self.HL_REAL and filenode:
675 680 lexer = self._get_lexer_for_filename(filename)
676 681 file_size_allowed = file.size < self.max_file_size_limit
677 682 if line_number and file_size_allowed:
678 683 return self.get_tokenized_filenode_line(
679 684 file, line_number, lexer)
680 685
681 686 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
682 687 lexer = self._get_lexer_for_filename(filename)
683 688 return list(tokenize_string(line_text, lexer))
684 689
685 690 return list(tokenize_string(line_text, plain_text_lexer))
686 691
687 692 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
688 693
689 694 if filenode not in self.highlighted_filenodes:
690 695 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
691 696 self.highlighted_filenodes[filenode] = tokenized_lines
692 697 return self.highlighted_filenodes[filenode][line_number - 1]
693 698
694 699 def action_to_op(self, action):
695 700 return {
696 701 'add': '+',
697 702 'del': '-',
698 703 'unmod': ' ',
699 704 'old-no-nl': ' ',
700 705 'new-no-nl': ' ',
701 706 }.get(action, action)
702 707
703 708 def as_unified(self, lines):
704 709 """
705 710 Return a generator that yields the lines of a diff in unified order
706 711 """
707 712 def generator():
708 713 buf = []
709 714 for line in lines:
710 715
711 716 if buf and not line.original or line.original.action == ' ':
712 717 for b in buf:
713 718 yield b
714 719 buf = []
715 720
716 721 if line.original:
717 722 if line.original.action == ' ':
718 723 yield (line.original.lineno, line.modified.lineno,
719 724 line.original.action, line.original.content,
720 725 line.original.comments)
721 726 continue
722 727
723 728 if line.original.action == '-':
724 729 yield (line.original.lineno, None,
725 730 line.original.action, line.original.content,
726 731 line.original.comments)
727 732
728 733 if line.modified.action == '+':
729 734 buf.append((
730 735 None, line.modified.lineno,
731 736 line.modified.action, line.modified.content,
732 737 line.modified.comments))
733 738 continue
734 739
735 740 if line.modified:
736 741 yield (None, line.modified.lineno,
737 742 line.modified.action, line.modified.content,
738 743 line.modified.comments)
739 744
740 745 for b in buf:
741 746 yield b
742 747
743 748 return generator()
General Comments 0
You need to be logged in to leave comments. Login now