##// END OF EJS Templates
diffs: fixed problem with potential diff display.
milka -
r4592:8ef51af3 stable
parent child Browse files
Show More
@@ -1,794 +1,798 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from pygments.lexers.special import TextLexer, Token
28 28 from pygments.lexers import get_lexer_by_name
29 29 from pyramid import compat
30 30
31 31 from rhodecode.lib.helpers import (
32 32 get_lexer_for_filenode, html_escape, get_custom_lexer)
33 33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
34 34 from rhodecode.lib.vcs.nodes import FileNode
35 35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
36 36 from rhodecode.lib.diff_match_patch import diff_match_patch
37 37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
38 38
39 39
40 40 plain_text_lexer = get_lexer_by_name(
41 41 'text', stripall=False, stripnl=False, ensurenl=False)
42 42
43 43
44 44 log = logging.getLogger(__name__)
45 45
46 46
47 47 def filenode_as_lines_tokens(filenode, lexer=None):
48 48 org_lexer = lexer
49 49 lexer = lexer or get_lexer_for_filenode(filenode)
50 50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
51 51 lexer, filenode, org_lexer)
52 52 content = filenode.content
53 53 tokens = tokenize_string(content, lexer)
54 54 lines = split_token_stream(tokens, content)
55 55 rv = list(lines)
56 56 return rv
57 57
58 58
59 59 def tokenize_string(content, lexer):
60 60 """
61 61 Use pygments to tokenize some content based on a lexer
62 62 ensuring all original new lines and whitespace is preserved
63 63 """
64 64
65 65 lexer.stripall = False
66 66 lexer.stripnl = False
67 67 lexer.ensurenl = False
68 68
69 69 if isinstance(lexer, TextLexer):
70 70 lexed = [(Token.Text, content)]
71 71 else:
72 72 lexed = lex(content, lexer)
73 73
74 74 for token_type, token_text in lexed:
75 75 yield pygment_token_class(token_type), token_text
76 76
77 77
78 78 def split_token_stream(tokens, content):
79 79 """
80 80 Take a list of (TokenType, text) tuples and split them by a string
81 81
82 82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
83 83 [(TEXT, 'some'), (TEXT, 'text'),
84 84 (TEXT, 'more'), (TEXT, 'text')]
85 85 """
86 86
87 87 token_buffer = []
88 88 for token_class, token_text in tokens:
89 89 parts = token_text.split('\n')
90 90 for part in parts[:-1]:
91 91 token_buffer.append((token_class, part))
92 92 yield token_buffer
93 93 token_buffer = []
94 94
95 95 token_buffer.append((token_class, parts[-1]))
96 96
97 97 if token_buffer:
98 98 yield token_buffer
99 99 elif content:
100 100 # this is a special case, we have the content, but tokenization didn't produce
101 101 # any results. THis can happen if know file extensions like .css have some bogus
102 102 # unicode content without any newline characters
103 103 yield [(pygment_token_class(Token.Text), content)]
104 104
105 105
106 106 def filenode_as_annotated_lines_tokens(filenode):
107 107 """
108 108 Take a file node and return a list of annotations => lines, if no annotation
109 109 is found, it will be None.
110 110
111 111 eg:
112 112
113 113 [
114 114 (annotation1, [
115 115 (1, line1_tokens_list),
116 116 (2, line2_tokens_list),
117 117 ]),
118 118 (annotation2, [
119 119 (3, line1_tokens_list),
120 120 ]),
121 121 (None, [
122 122 (4, line1_tokens_list),
123 123 ]),
124 124 (annotation1, [
125 125 (5, line1_tokens_list),
126 126 (6, line2_tokens_list),
127 127 ])
128 128 ]
129 129 """
130 130
131 131 commit_cache = {} # cache commit_getter lookups
132 132
133 133 def _get_annotation(commit_id, commit_getter):
134 134 if commit_id not in commit_cache:
135 135 commit_cache[commit_id] = commit_getter()
136 136 return commit_cache[commit_id]
137 137
138 138 annotation_lookup = {
139 139 line_no: _get_annotation(commit_id, commit_getter)
140 140 for line_no, commit_id, commit_getter, line_content
141 141 in filenode.annotate
142 142 }
143 143
144 144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
145 145 for line_no, tokens
146 146 in enumerate(filenode_as_lines_tokens(filenode), 1))
147 147
148 148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
149 149
150 150 for annotation, group in grouped_annotations_lines:
151 151 yield (
152 152 annotation, [(line_no, tokens)
153 153 for (_, line_no, tokens) in group]
154 154 )
155 155
156 156
157 157 def render_tokenstream(tokenstream):
158 158 result = []
159 159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
160 160
161 161 if token_class:
162 162 result.append(u'<span class="%s">' % token_class)
163 163 else:
164 164 result.append(u'<span>')
165 165
166 166 for op_tag, token_text in token_ops_texts:
167 167
168 168 if op_tag:
169 169 result.append(u'<%s>' % op_tag)
170 170
171 171 # NOTE(marcink): in some cases of mixed encodings, we might run into
172 172 # troubles in the html_escape, in this case we say unicode force on token_text
173 173 # that would ensure "correct" data even with the cost of rendered
174 174 try:
175 175 escaped_text = html_escape(token_text)
176 176 except TypeError:
177 177 escaped_text = html_escape(safe_unicode(token_text))
178 178
179 179 # TODO: dan: investigate showing hidden characters like space/nl/tab
180 180 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
181 181 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
182 182 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
183 183
184 184 result.append(escaped_text)
185 185
186 186 if op_tag:
187 187 result.append(u'</%s>' % op_tag)
188 188
189 189 result.append(u'</span>')
190 190
191 191 html = ''.join(result)
192 192 return html
193 193
194 194
195 195 def rollup_tokenstream(tokenstream):
196 196 """
197 197 Group a token stream of the format:
198 198
199 199 ('class', 'op', 'text')
200 200 or
201 201 ('class', 'text')
202 202
203 203 into
204 204
205 205 [('class1',
206 206 [('op1', 'text'),
207 207 ('op2', 'text')]),
208 208 ('class2',
209 209 [('op3', 'text')])]
210 210
211 211 This is used to get the minimal tags necessary when
212 212 rendering to html eg for a token stream ie.
213 213
214 214 <span class="A"><ins>he</ins>llo</span>
215 215 vs
216 216 <span class="A"><ins>he</ins></span><span class="A">llo</span>
217 217
218 218 If a 2 tuple is passed in, the output op will be an empty string.
219 219
220 220 eg:
221 221
222 222 >>> rollup_tokenstream([('classA', '', 'h'),
223 223 ('classA', 'del', 'ell'),
224 224 ('classA', '', 'o'),
225 225 ('classB', '', ' '),
226 226 ('classA', '', 'the'),
227 227 ('classA', '', 're'),
228 228 ])
229 229
230 230 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
231 231 ('classB', [('', ' ')],
232 232 ('classA', [('', 'there')]]
233 233
234 234 """
235 235 if tokenstream and len(tokenstream[0]) == 2:
236 236 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
237 237
238 238 result = []
239 239 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
240 240 ops = []
241 241 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
242 242 text_buffer = []
243 243 for t_class, t_op, t_text in token_text_list:
244 244 text_buffer.append(t_text)
245 245 ops.append((token_op, ''.join(text_buffer)))
246 246 result.append((token_class, ops))
247 247 return result
248 248
249 249
250 250 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
251 251 """
252 252 Converts a list of (token_class, token_text) tuples to a list of
253 253 (token_class, token_op, token_text) tuples where token_op is one of
254 254 ('ins', 'del', '')
255 255
256 256 :param old_tokens: list of (token_class, token_text) tuples of old line
257 257 :param new_tokens: list of (token_class, token_text) tuples of new line
258 258 :param use_diff_match_patch: boolean, will use google's diff match patch
259 259 library which has options to 'smooth' out the character by character
260 260 differences making nicer ins/del blocks
261 261 """
262 262
263 263 old_tokens_result = []
264 264 new_tokens_result = []
265 265
266 266 similarity = difflib.SequenceMatcher(None,
267 267 ''.join(token_text for token_class, token_text in old_tokens),
268 268 ''.join(token_text for token_class, token_text in new_tokens)
269 269 ).ratio()
270 270
271 271 if similarity < 0.6: # return, the blocks are too different
272 272 for token_class, token_text in old_tokens:
273 273 old_tokens_result.append((token_class, '', token_text))
274 274 for token_class, token_text in new_tokens:
275 275 new_tokens_result.append((token_class, '', token_text))
276 276 return old_tokens_result, new_tokens_result, similarity
277 277
278 278 token_sequence_matcher = difflib.SequenceMatcher(None,
279 279 [x[1] for x in old_tokens],
280 280 [x[1] for x in new_tokens])
281 281
282 282 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
283 283 # check the differences by token block types first to give a more
284 284 # nicer "block" level replacement vs character diffs
285 285
286 286 if tag == 'equal':
287 287 for token_class, token_text in old_tokens[o1:o2]:
288 288 old_tokens_result.append((token_class, '', token_text))
289 289 for token_class, token_text in new_tokens[n1:n2]:
290 290 new_tokens_result.append((token_class, '', token_text))
291 291 elif tag == 'delete':
292 292 for token_class, token_text in old_tokens[o1:o2]:
293 293 old_tokens_result.append((token_class, 'del', token_text))
294 294 elif tag == 'insert':
295 295 for token_class, token_text in new_tokens[n1:n2]:
296 296 new_tokens_result.append((token_class, 'ins', token_text))
297 297 elif tag == 'replace':
298 298 # if same type token blocks must be replaced, do a diff on the
299 299 # characters in the token blocks to show individual changes
300 300
301 301 old_char_tokens = []
302 302 new_char_tokens = []
303 303 for token_class, token_text in old_tokens[o1:o2]:
304 304 for char in token_text:
305 305 old_char_tokens.append((token_class, char))
306 306
307 307 for token_class, token_text in new_tokens[n1:n2]:
308 308 for char in token_text:
309 309 new_char_tokens.append((token_class, char))
310 310
311 311 old_string = ''.join([token_text for
312 312 token_class, token_text in old_char_tokens])
313 313 new_string = ''.join([token_text for
314 314 token_class, token_text in new_char_tokens])
315 315
316 316 char_sequence = difflib.SequenceMatcher(
317 317 None, old_string, new_string)
318 318 copcodes = char_sequence.get_opcodes()
319 319 obuffer, nbuffer = [], []
320 320
321 321 if use_diff_match_patch:
322 322 dmp = diff_match_patch()
323 323 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
324 324 reps = dmp.diff_main(old_string, new_string)
325 325 dmp.diff_cleanupEfficiency(reps)
326 326
327 327 a, b = 0, 0
328 328 for op, rep in reps:
329 329 l = len(rep)
330 330 if op == 0:
331 331 for i, c in enumerate(rep):
332 332 obuffer.append((old_char_tokens[a+i][0], '', c))
333 333 nbuffer.append((new_char_tokens[b+i][0], '', c))
334 334 a += l
335 335 b += l
336 336 elif op == -1:
337 337 for i, c in enumerate(rep):
338 338 obuffer.append((old_char_tokens[a+i][0], 'del', c))
339 339 a += l
340 340 elif op == 1:
341 341 for i, c in enumerate(rep):
342 342 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
343 343 b += l
344 344 else:
345 345 for ctag, co1, co2, cn1, cn2 in copcodes:
346 346 if ctag == 'equal':
347 347 for token_class, token_text in old_char_tokens[co1:co2]:
348 348 obuffer.append((token_class, '', token_text))
349 349 for token_class, token_text in new_char_tokens[cn1:cn2]:
350 350 nbuffer.append((token_class, '', token_text))
351 351 elif ctag == 'delete':
352 352 for token_class, token_text in old_char_tokens[co1:co2]:
353 353 obuffer.append((token_class, 'del', token_text))
354 354 elif ctag == 'insert':
355 355 for token_class, token_text in new_char_tokens[cn1:cn2]:
356 356 nbuffer.append((token_class, 'ins', token_text))
357 357 elif ctag == 'replace':
358 358 for token_class, token_text in old_char_tokens[co1:co2]:
359 359 obuffer.append((token_class, 'del', token_text))
360 360 for token_class, token_text in new_char_tokens[cn1:cn2]:
361 361 nbuffer.append((token_class, 'ins', token_text))
362 362
363 363 old_tokens_result.extend(obuffer)
364 364 new_tokens_result.extend(nbuffer)
365 365
366 366 return old_tokens_result, new_tokens_result, similarity
367 367
368 368
369 369 def diffset_node_getter(commit):
370 370 def get_node(fname):
371 371 try:
372 372 return commit.get_node(fname)
373 373 except NodeDoesNotExistError:
374 374 return None
375 375
376 376 return get_node
377 377
378 378
379 379 class DiffSet(object):
380 380 """
381 381 An object for parsing the diff result from diffs.DiffProcessor and
382 382 adding highlighting, side by side/unified renderings and line diffs
383 383 """
384 384
385 385 HL_REAL = 'REAL' # highlights using original file, slow
386 386 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
387 387 # in the case of multiline code
388 388 HL_NONE = 'NONE' # no highlighting, fastest
389 389
390 390 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
391 391 source_repo_name=None,
392 392 source_node_getter=lambda filename: None,
393 393 target_repo_name=None,
394 394 target_node_getter=lambda filename: None,
395 395 source_nodes=None, target_nodes=None,
396 396 # files over this size will use fast highlighting
397 397 max_file_size_limit=150 * 1024,
398 398 ):
399 399
400 400 self.highlight_mode = highlight_mode
401 self.highlighted_filenodes = {}
401 self.highlighted_filenodes = {
402 'before': {},
403 'after': {}
404 }
402 405 self.source_node_getter = source_node_getter
403 406 self.target_node_getter = target_node_getter
404 407 self.source_nodes = source_nodes or {}
405 408 self.target_nodes = target_nodes or {}
406 409 self.repo_name = repo_name
407 410 self.target_repo_name = target_repo_name or repo_name
408 411 self.source_repo_name = source_repo_name or repo_name
409 412 self.max_file_size_limit = max_file_size_limit
410 413
411 414 def render_patchset(self, patchset, source_ref=None, target_ref=None):
412 415 diffset = AttributeDict(dict(
413 416 lines_added=0,
414 417 lines_deleted=0,
415 418 changed_files=0,
416 419 files=[],
417 420 file_stats={},
418 421 limited_diff=isinstance(patchset, LimitedDiffContainer),
419 422 repo_name=self.repo_name,
420 423 target_repo_name=self.target_repo_name,
421 424 source_repo_name=self.source_repo_name,
422 425 source_ref=source_ref,
423 426 target_ref=target_ref,
424 427 ))
425 428 for patch in patchset:
426 429 diffset.file_stats[patch['filename']] = patch['stats']
427 430 filediff = self.render_patch(patch)
428 431 filediff.diffset = StrictAttributeDict(dict(
429 432 source_ref=diffset.source_ref,
430 433 target_ref=diffset.target_ref,
431 434 repo_name=diffset.repo_name,
432 435 source_repo_name=diffset.source_repo_name,
433 436 target_repo_name=diffset.target_repo_name,
434 437 ))
435 438 diffset.files.append(filediff)
436 439 diffset.changed_files += 1
437 440 if not patch['stats']['binary']:
438 441 diffset.lines_added += patch['stats']['added']
439 442 diffset.lines_deleted += patch['stats']['deleted']
440 443
441 444 return diffset
442 445
443 446 _lexer_cache = {}
444 447
445 448 def _get_lexer_for_filename(self, filename, filenode=None):
446 449 # cached because we might need to call it twice for source/target
447 450 if filename not in self._lexer_cache:
448 451 if filenode:
449 452 lexer = filenode.lexer
450 453 extension = filenode.extension
451 454 else:
452 455 lexer = FileNode.get_lexer(filename=filename)
453 456 extension = filename.split('.')[-1]
454 457
455 458 lexer = get_custom_lexer(extension) or lexer
456 459 self._lexer_cache[filename] = lexer
457 460 return self._lexer_cache[filename]
458 461
459 462 def render_patch(self, patch):
460 463 log.debug('rendering diff for %r', patch['filename'])
461 464
462 465 source_filename = patch['original_filename']
463 466 target_filename = patch['filename']
464 467
465 468 source_lexer = plain_text_lexer
466 469 target_lexer = plain_text_lexer
467 470
468 471 if not patch['stats']['binary']:
469 472 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
470 473 hl_mode = node_hl_mode or self.highlight_mode
471 474
472 475 if hl_mode == self.HL_REAL:
473 476 if (source_filename and patch['operation'] in ('D', 'M')
474 477 and source_filename not in self.source_nodes):
475 478 self.source_nodes[source_filename] = (
476 479 self.source_node_getter(source_filename))
477 480
478 481 if (target_filename and patch['operation'] in ('A', 'M')
479 482 and target_filename not in self.target_nodes):
480 483 self.target_nodes[target_filename] = (
481 484 self.target_node_getter(target_filename))
482 485
483 486 elif hl_mode == self.HL_FAST:
484 487 source_lexer = self._get_lexer_for_filename(source_filename)
485 488 target_lexer = self._get_lexer_for_filename(target_filename)
486 489
487 490 source_file = self.source_nodes.get(source_filename, source_filename)
488 491 target_file = self.target_nodes.get(target_filename, target_filename)
489 492 raw_id_uid = ''
490 493 if self.source_nodes.get(source_filename):
491 494 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
492 495
493 496 if not raw_id_uid and self.target_nodes.get(target_filename):
494 497 # in case this is a new file we only have it in target
495 498 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
496 499
497 500 source_filenode, target_filenode = None, None
498 501
499 502 # TODO: dan: FileNode.lexer works on the content of the file - which
500 503 # can be slow - issue #4289 explains a lexer clean up - which once
501 504 # done can allow caching a lexer for a filenode to avoid the file lookup
502 505 if isinstance(source_file, FileNode):
503 506 source_filenode = source_file
504 507 #source_lexer = source_file.lexer
505 508 source_lexer = self._get_lexer_for_filename(source_filename)
506 509 source_file.lexer = source_lexer
507 510
508 511 if isinstance(target_file, FileNode):
509 512 target_filenode = target_file
510 513 #target_lexer = target_file.lexer
511 514 target_lexer = self._get_lexer_for_filename(target_filename)
512 515 target_file.lexer = target_lexer
513 516
514 517 source_file_path, target_file_path = None, None
515 518
516 519 if source_filename != '/dev/null':
517 520 source_file_path = source_filename
518 521 if target_filename != '/dev/null':
519 522 target_file_path = target_filename
520 523
521 524 source_file_type = source_lexer.name
522 525 target_file_type = target_lexer.name
523 526
524 527 filediff = AttributeDict({
525 528 'source_file_path': source_file_path,
526 529 'target_file_path': target_file_path,
527 530 'source_filenode': source_filenode,
528 531 'target_filenode': target_filenode,
529 532 'source_file_type': target_file_type,
530 533 'target_file_type': source_file_type,
531 534 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
532 535 'operation': patch['operation'],
533 536 'source_mode': patch['stats']['old_mode'],
534 537 'target_mode': patch['stats']['new_mode'],
535 538 'limited_diff': patch['is_limited_diff'],
536 539 'hunks': [],
537 540 'hunk_ops': None,
538 541 'diffset': self,
539 542 'raw_id': raw_id_uid,
540 543 })
541 544
542 545 file_chunks = patch['chunks'][1:]
543 546 for i, hunk in enumerate(file_chunks, 1):
544 547 hunkbit = self.parse_hunk(hunk, source_file, target_file)
545 548 hunkbit.source_file_path = source_file_path
546 549 hunkbit.target_file_path = target_file_path
547 550 hunkbit.index = i
548 551 filediff.hunks.append(hunkbit)
549 552
550 553 # Simulate hunk on OPS type line which doesn't really contain any diff
551 554 # this allows commenting on those
552 555 if not file_chunks:
553 556 actions = []
554 557 for op_id, op_text in filediff.patch['stats']['ops'].items():
555 558 if op_id == DEL_FILENODE:
556 559 actions.append(u'file was removed')
557 560 elif op_id == BIN_FILENODE:
558 561 actions.append(u'binary diff hidden')
559 562 else:
560 563 actions.append(safe_unicode(op_text))
561 564 action_line = u'NO CONTENT: ' + \
562 565 u', '.join(actions) or u'UNDEFINED_ACTION'
563 566
564 567 hunk_ops = {'source_length': 0, 'source_start': 0,
565 568 'lines': [
566 569 {'new_lineno': 0, 'old_lineno': 1,
567 570 'action': 'unmod-no-hl', 'line': action_line}
568 571 ],
569 572 'section_header': u'', 'target_start': 1, 'target_length': 1}
570 573
571 574 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
572 575 hunkbit.source_file_path = source_file_path
573 576 hunkbit.target_file_path = target_file_path
574 577 filediff.hunk_ops = hunkbit
575 578 return filediff
576 579
577 580 def parse_hunk(self, hunk, source_file, target_file):
578 581 result = AttributeDict(dict(
579 582 source_start=hunk['source_start'],
580 583 source_length=hunk['source_length'],
581 584 target_start=hunk['target_start'],
582 585 target_length=hunk['target_length'],
583 586 section_header=hunk['section_header'],
584 587 lines=[],
585 588 ))
586 589 before, after = [], []
587 590
588 591 for line in hunk['lines']:
589 592 if line['action'] in ['unmod', 'unmod-no-hl']:
590 593 no_hl = line['action'] == 'unmod-no-hl'
591 594 result.lines.extend(
592 595 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
593 596 after.append(line)
594 597 before.append(line)
595 598 elif line['action'] == 'add':
596 599 after.append(line)
597 600 elif line['action'] == 'del':
598 601 before.append(line)
599 602 elif line['action'] == 'old-no-nl':
600 603 before.append(line)
601 604 elif line['action'] == 'new-no-nl':
602 605 after.append(line)
603 606
604 607 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
605 608 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
606 609 result.lines.extend(
607 610 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
608 611 # NOTE(marcink): we must keep list() call here so we can cache the result...
609 612 result.unified = list(self.as_unified(result.lines))
610 613 result.sideside = result.lines
611 614
612 615 return result
613 616
614 617 def parse_lines(self, before_lines, after_lines, source_file, target_file,
615 618 no_hl=False):
616 619 # TODO: dan: investigate doing the diff comparison and fast highlighting
617 620 # on the entire before and after buffered block lines rather than by
618 621 # line, this means we can get better 'fast' highlighting if the context
619 622 # allows it - eg.
620 623 # line 4: """
621 624 # line 5: this gets highlighted as a string
622 625 # line 6: """
623 626
624 627 lines = []
625 628
626 629 before_newline = AttributeDict()
627 630 after_newline = AttributeDict()
628 631 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
629 632 before_newline_line = before_lines.pop(-1)
630 633 before_newline.content = '\n {}'.format(
631 634 render_tokenstream(
632 635 [(x[0], '', x[1])
633 636 for x in [('nonl', before_newline_line['line'])]]))
634 637
635 638 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
636 639 after_newline_line = after_lines.pop(-1)
637 640 after_newline.content = '\n {}'.format(
638 641 render_tokenstream(
639 642 [(x[0], '', x[1])
640 643 for x in [('nonl', after_newline_line['line'])]]))
641 644
642 645 while before_lines or after_lines:
643 646 before, after = None, None
644 647 before_tokens, after_tokens = None, None
645 648
646 649 if before_lines:
647 650 before = before_lines.pop(0)
648 651 if after_lines:
649 652 after = after_lines.pop(0)
650 653
651 654 original = AttributeDict()
652 655 modified = AttributeDict()
653 656
654 657 if before:
655 658 if before['action'] == 'old-no-nl':
656 659 before_tokens = [('nonl', before['line'])]
657 660 else:
658 661 before_tokens = self.get_line_tokens(
659 662 line_text=before['line'], line_number=before['old_lineno'],
660 input_file=source_file, no_hl=no_hl)
663 input_file=source_file, no_hl=no_hl, source='before')
661 664 original.lineno = before['old_lineno']
662 665 original.content = before['line']
663 666 original.action = self.action_to_op(before['action'])
664 667
665 668 original.get_comment_args = (
666 669 source_file, 'o', before['old_lineno'])
667 670
668 671 if after:
669 672 if after['action'] == 'new-no-nl':
670 673 after_tokens = [('nonl', after['line'])]
671 674 else:
672 675 after_tokens = self.get_line_tokens(
673 676 line_text=after['line'], line_number=after['new_lineno'],
674 input_file=target_file, no_hl=no_hl)
677 input_file=target_file, no_hl=no_hl, source='after')
675 678 modified.lineno = after['new_lineno']
676 679 modified.content = after['line']
677 680 modified.action = self.action_to_op(after['action'])
678 681
679 682 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
680 683
681 684 # diff the lines
682 685 if before_tokens and after_tokens:
683 686 o_tokens, m_tokens, similarity = tokens_diff(
684 687 before_tokens, after_tokens)
685 688 original.content = render_tokenstream(o_tokens)
686 689 modified.content = render_tokenstream(m_tokens)
687 690 elif before_tokens:
688 691 original.content = render_tokenstream(
689 692 [(x[0], '', x[1]) for x in before_tokens])
690 693 elif after_tokens:
691 694 modified.content = render_tokenstream(
692 695 [(x[0], '', x[1]) for x in after_tokens])
693 696
694 697 if not before_lines and before_newline:
695 698 original.content += before_newline.content
696 699 before_newline = None
697 700 if not after_lines and after_newline:
698 701 modified.content += after_newline.content
699 702 after_newline = None
700 703
701 704 lines.append(AttributeDict({
702 705 'original': original,
703 706 'modified': modified,
704 707 }))
705 708
706 709 return lines
707 710
708 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
711 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False, source=''):
709 712 filenode = None
710 713 filename = None
711 714
712 715 if isinstance(input_file, compat.string_types):
713 716 filename = input_file
714 717 elif isinstance(input_file, FileNode):
715 718 filenode = input_file
716 719 filename = input_file.unicode_path
717 720
718 721 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
719 722 if hl_mode == self.HL_REAL and filenode:
720 723 lexer = self._get_lexer_for_filename(filename)
721 724 file_size_allowed = input_file.size < self.max_file_size_limit
722 725 if line_number and file_size_allowed:
723 return self.get_tokenized_filenode_line(
724 input_file, line_number, lexer)
726 return self.get_tokenized_filenode_line(input_file, line_number, lexer, source)
725 727
726 728 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
727 729 lexer = self._get_lexer_for_filename(filename)
728 730 return list(tokenize_string(line_text, lexer))
729 731
730 732 return list(tokenize_string(line_text, plain_text_lexer))
731 733
732 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
734 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None, source=''):
733 735
734 if filenode not in self.highlighted_filenodes:
735 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
736 self.highlighted_filenodes[filenode] = tokenized_lines
736 def tokenize(_filenode):
737 self.highlighted_filenodes[source][filenode] = filenode_as_lines_tokens(filenode, lexer)
738
739 if filenode not in self.highlighted_filenodes[source]:
740 tokenize(filenode)
737 741
738 742 try:
739 return self.highlighted_filenodes[filenode][line_number - 1]
743 return self.highlighted_filenodes[source][filenode][line_number - 1]
740 744 except Exception:
741 745 log.exception('diff rendering error')
742 return [('', u'rhodecode diff rendering error')]
746 return [('', u'L{}: rhodecode diff rendering error'.format(line_number))]
743 747
744 748 def action_to_op(self, action):
745 749 return {
746 750 'add': '+',
747 751 'del': '-',
748 752 'unmod': ' ',
749 753 'unmod-no-hl': ' ',
750 754 'old-no-nl': ' ',
751 755 'new-no-nl': ' ',
752 756 }.get(action, action)
753 757
754 758 def as_unified(self, lines):
755 759 """
756 760 Return a generator that yields the lines of a diff in unified order
757 761 """
758 762 def generator():
759 763 buf = []
760 764 for line in lines:
761 765
762 766 if buf and not line.original or line.original.action == ' ':
763 767 for b in buf:
764 768 yield b
765 769 buf = []
766 770
767 771 if line.original:
768 772 if line.original.action == ' ':
769 773 yield (line.original.lineno, line.modified.lineno,
770 774 line.original.action, line.original.content,
771 775 line.original.get_comment_args)
772 776 continue
773 777
774 778 if line.original.action == '-':
775 779 yield (line.original.lineno, None,
776 780 line.original.action, line.original.content,
777 781 line.original.get_comment_args)
778 782
779 783 if line.modified.action == '+':
780 784 buf.append((
781 785 None, line.modified.lineno,
782 786 line.modified.action, line.modified.content,
783 787 line.modified.get_comment_args))
784 788 continue
785 789
786 790 if line.modified:
787 791 yield (None, line.modified.lineno,
788 792 line.modified.action, line.modified.content,
789 793 line.modified.get_comment_args)
790 794
791 795 for b in buf:
792 796 yield b
793 797
794 798 return generator()
General Comments 0
You need to be logged in to leave comments. Login now