##// END OF EJS Templates
diffs: use custom lexer extraction in diffs to so it behaves consistently with...
marcink -
r1591:9abd8b35 default
parent child Browse files
Show More
@@ -1,703 +1,707 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, html_escape)
28 get_lexer_for_filenode, html_escape, get_custom_lexer)
29 29 from rhodecode.lib.utils2 import AttributeDict
30 30 from rhodecode.lib.vcs.nodes import FileNode
31 31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 32 from rhodecode.lib.diffs import LimitedDiffContainer
33 33 from pygments.lexers import get_lexer_by_name
34 34
35 35 plain_text_lexer = get_lexer_by_name(
36 36 'text', stripall=False, stripnl=False, ensurenl=False)
37 37
38 38
39 39 log = logging.getLogger()
40 40
41 41
42 42 def filenode_as_lines_tokens(filenode, lexer=None):
43 43 org_lexer = lexer
44 44 lexer = lexer or get_lexer_for_filenode(filenode)
45 45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 46 lexer, filenode, org_lexer)
47 47 tokens = tokenize_string(filenode.content, lexer)
48 48 lines = split_token_stream(tokens, split_string='\n')
49 49 rv = list(lines)
50 50 return rv
51 51
52 52
53 53 def tokenize_string(content, lexer):
54 54 """
55 55 Use pygments to tokenize some content based on a lexer
56 56 ensuring all original new lines and whitespace is preserved
57 57 """
58 58
59 59 lexer.stripall = False
60 60 lexer.stripnl = False
61 61 lexer.ensurenl = False
62 62 for token_type, token_text in lex(content, lexer):
63 63 yield pygment_token_class(token_type), token_text
64 64
65 65
66 66 def split_token_stream(tokens, split_string=u'\n'):
67 67 """
68 68 Take a list of (TokenType, text) tuples and split them by a string
69 69
70 70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
71 71 [(TEXT, 'some'), (TEXT, 'text'),
72 72 (TEXT, 'more'), (TEXT, 'text')]
73 73 """
74 74
75 75 buffer = []
76 76 for token_class, token_text in tokens:
77 77 parts = token_text.split(split_string)
78 78 for part in parts[:-1]:
79 79 buffer.append((token_class, part))
80 80 yield buffer
81 81 buffer = []
82 82
83 83 buffer.append((token_class, parts[-1]))
84 84
85 85 if buffer:
86 86 yield buffer
87 87
88 88
89 89 def filenode_as_annotated_lines_tokens(filenode):
90 90 """
91 91 Take a file node and return a list of annotations => lines, if no annotation
92 92 is found, it will be None.
93 93
94 94 eg:
95 95
96 96 [
97 97 (annotation1, [
98 98 (1, line1_tokens_list),
99 99 (2, line2_tokens_list),
100 100 ]),
101 101 (annotation2, [
102 102 (3, line1_tokens_list),
103 103 ]),
104 104 (None, [
105 105 (4, line1_tokens_list),
106 106 ]),
107 107 (annotation1, [
108 108 (5, line1_tokens_list),
109 109 (6, line2_tokens_list),
110 110 ])
111 111 ]
112 112 """
113 113
114 114 commit_cache = {} # cache commit_getter lookups
115 115
116 116 def _get_annotation(commit_id, commit_getter):
117 117 if commit_id not in commit_cache:
118 118 commit_cache[commit_id] = commit_getter()
119 119 return commit_cache[commit_id]
120 120
121 121 annotation_lookup = {
122 122 line_no: _get_annotation(commit_id, commit_getter)
123 123 for line_no, commit_id, commit_getter, line_content
124 124 in filenode.annotate
125 125 }
126 126
127 127 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
128 128 for line_no, tokens
129 129 in enumerate(filenode_as_lines_tokens(filenode), 1))
130 130
131 131 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
132 132
133 133 for annotation, group in grouped_annotations_lines:
134 134 yield (
135 135 annotation, [(line_no, tokens)
136 136 for (_, line_no, tokens) in group]
137 137 )
138 138
139 139
140 140 def render_tokenstream(tokenstream):
141 141 result = []
142 142 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
143 143
144 144 if token_class:
145 145 result.append(u'<span class="%s">' % token_class)
146 146 else:
147 147 result.append(u'<span>')
148 148
149 149 for op_tag, token_text in token_ops_texts:
150 150
151 151 if op_tag:
152 152 result.append(u'<%s>' % op_tag)
153 153
154 154 escaped_text = html_escape(token_text)
155 155
156 156 # TODO: dan: investigate showing hidden characters like space/nl/tab
157 157 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
158 158 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
159 159 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
160 160
161 161 result.append(escaped_text)
162 162
163 163 if op_tag:
164 164 result.append(u'</%s>' % op_tag)
165 165
166 166 result.append(u'</span>')
167 167
168 168 html = ''.join(result)
169 169 return html
170 170
171 171
172 172 def rollup_tokenstream(tokenstream):
173 173 """
174 174 Group a token stream of the format:
175 175
176 176 ('class', 'op', 'text')
177 177 or
178 178 ('class', 'text')
179 179
180 180 into
181 181
182 182 [('class1',
183 183 [('op1', 'text'),
184 184 ('op2', 'text')]),
185 185 ('class2',
186 186 [('op3', 'text')])]
187 187
188 188 This is used to get the minimal tags necessary when
189 189 rendering to html eg for a token stream ie.
190 190
191 191 <span class="A"><ins>he</ins>llo</span>
192 192 vs
193 193 <span class="A"><ins>he</ins></span><span class="A">llo</span>
194 194
195 195 If a 2 tuple is passed in, the output op will be an empty string.
196 196
197 197 eg:
198 198
199 199 >>> rollup_tokenstream([('classA', '', 'h'),
200 200 ('classA', 'del', 'ell'),
201 201 ('classA', '', 'o'),
202 202 ('classB', '', ' '),
203 203 ('classA', '', 'the'),
204 204 ('classA', '', 're'),
205 205 ])
206 206
207 207 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
208 208 ('classB', [('', ' ')],
209 209 ('classA', [('', 'there')]]
210 210
211 211 """
212 212 if tokenstream and len(tokenstream[0]) == 2:
213 213 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
214 214
215 215 result = []
216 216 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
217 217 ops = []
218 218 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
219 219 text_buffer = []
220 220 for t_class, t_op, t_text in token_text_list:
221 221 text_buffer.append(t_text)
222 222 ops.append((token_op, ''.join(text_buffer)))
223 223 result.append((token_class, ops))
224 224 return result
225 225
226 226
227 227 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
228 228 """
229 229 Converts a list of (token_class, token_text) tuples to a list of
230 230 (token_class, token_op, token_text) tuples where token_op is one of
231 231 ('ins', 'del', '')
232 232
233 233 :param old_tokens: list of (token_class, token_text) tuples of old line
234 234 :param new_tokens: list of (token_class, token_text) tuples of new line
235 235 :param use_diff_match_patch: boolean, will use google's diff match patch
236 236 library which has options to 'smooth' out the character by character
237 237 differences making nicer ins/del blocks
238 238 """
239 239
240 240 old_tokens_result = []
241 241 new_tokens_result = []
242 242
243 243 similarity = difflib.SequenceMatcher(None,
244 244 ''.join(token_text for token_class, token_text in old_tokens),
245 245 ''.join(token_text for token_class, token_text in new_tokens)
246 246 ).ratio()
247 247
248 248 if similarity < 0.6: # return, the blocks are too different
249 249 for token_class, token_text in old_tokens:
250 250 old_tokens_result.append((token_class, '', token_text))
251 251 for token_class, token_text in new_tokens:
252 252 new_tokens_result.append((token_class, '', token_text))
253 253 return old_tokens_result, new_tokens_result, similarity
254 254
255 255 token_sequence_matcher = difflib.SequenceMatcher(None,
256 256 [x[1] for x in old_tokens],
257 257 [x[1] for x in new_tokens])
258 258
259 259 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
260 260 # check the differences by token block types first to give a more
261 261 # nicer "block" level replacement vs character diffs
262 262
263 263 if tag == 'equal':
264 264 for token_class, token_text in old_tokens[o1:o2]:
265 265 old_tokens_result.append((token_class, '', token_text))
266 266 for token_class, token_text in new_tokens[n1:n2]:
267 267 new_tokens_result.append((token_class, '', token_text))
268 268 elif tag == 'delete':
269 269 for token_class, token_text in old_tokens[o1:o2]:
270 270 old_tokens_result.append((token_class, 'del', token_text))
271 271 elif tag == 'insert':
272 272 for token_class, token_text in new_tokens[n1:n2]:
273 273 new_tokens_result.append((token_class, 'ins', token_text))
274 274 elif tag == 'replace':
275 275 # if same type token blocks must be replaced, do a diff on the
276 276 # characters in the token blocks to show individual changes
277 277
278 278 old_char_tokens = []
279 279 new_char_tokens = []
280 280 for token_class, token_text in old_tokens[o1:o2]:
281 281 for char in token_text:
282 282 old_char_tokens.append((token_class, char))
283 283
284 284 for token_class, token_text in new_tokens[n1:n2]:
285 285 for char in token_text:
286 286 new_char_tokens.append((token_class, char))
287 287
288 288 old_string = ''.join([token_text for
289 289 token_class, token_text in old_char_tokens])
290 290 new_string = ''.join([token_text for
291 291 token_class, token_text in new_char_tokens])
292 292
293 293 char_sequence = difflib.SequenceMatcher(
294 294 None, old_string, new_string)
295 295 copcodes = char_sequence.get_opcodes()
296 296 obuffer, nbuffer = [], []
297 297
298 298 if use_diff_match_patch:
299 299 dmp = diff_match_patch()
300 300 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
301 301 reps = dmp.diff_main(old_string, new_string)
302 302 dmp.diff_cleanupEfficiency(reps)
303 303
304 304 a, b = 0, 0
305 305 for op, rep in reps:
306 306 l = len(rep)
307 307 if op == 0:
308 308 for i, c in enumerate(rep):
309 309 obuffer.append((old_char_tokens[a+i][0], '', c))
310 310 nbuffer.append((new_char_tokens[b+i][0], '', c))
311 311 a += l
312 312 b += l
313 313 elif op == -1:
314 314 for i, c in enumerate(rep):
315 315 obuffer.append((old_char_tokens[a+i][0], 'del', c))
316 316 a += l
317 317 elif op == 1:
318 318 for i, c in enumerate(rep):
319 319 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
320 320 b += l
321 321 else:
322 322 for ctag, co1, co2, cn1, cn2 in copcodes:
323 323 if ctag == 'equal':
324 324 for token_class, token_text in old_char_tokens[co1:co2]:
325 325 obuffer.append((token_class, '', token_text))
326 326 for token_class, token_text in new_char_tokens[cn1:cn2]:
327 327 nbuffer.append((token_class, '', token_text))
328 328 elif ctag == 'delete':
329 329 for token_class, token_text in old_char_tokens[co1:co2]:
330 330 obuffer.append((token_class, 'del', token_text))
331 331 elif ctag == 'insert':
332 332 for token_class, token_text in new_char_tokens[cn1:cn2]:
333 333 nbuffer.append((token_class, 'ins', token_text))
334 334 elif ctag == 'replace':
335 335 for token_class, token_text in old_char_tokens[co1:co2]:
336 336 obuffer.append((token_class, 'del', token_text))
337 337 for token_class, token_text in new_char_tokens[cn1:cn2]:
338 338 nbuffer.append((token_class, 'ins', token_text))
339 339
340 340 old_tokens_result.extend(obuffer)
341 341 new_tokens_result.extend(nbuffer)
342 342
343 343 return old_tokens_result, new_tokens_result, similarity
344 344
345 345
346 346 class DiffSet(object):
347 347 """
348 348 An object for parsing the diff result from diffs.DiffProcessor and
349 349 adding highlighting, side by side/unified renderings and line diffs
350 350 """
351 351
352 352 HL_REAL = 'REAL' # highlights using original file, slow
353 353 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
354 354 # in the case of multiline code
355 355 HL_NONE = 'NONE' # no highlighting, fastest
356 356
357 357 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
358 358 source_repo_name=None,
359 359 source_node_getter=lambda filename: None,
360 360 target_node_getter=lambda filename: None,
361 361 source_nodes=None, target_nodes=None,
362 362 max_file_size_limit=150 * 1024, # files over this size will
363 363 # use fast highlighting
364 364 comments=None,
365 365 ):
366 366
367 367 self.highlight_mode = highlight_mode
368 368 self.highlighted_filenodes = {}
369 369 self.source_node_getter = source_node_getter
370 370 self.target_node_getter = target_node_getter
371 371 self.source_nodes = source_nodes or {}
372 372 self.target_nodes = target_nodes or {}
373 373 self.repo_name = repo_name
374 374 self.source_repo_name = source_repo_name or repo_name
375 375 self.comments = comments or {}
376 376 self.comments_store = self.comments.copy()
377 377 self.max_file_size_limit = max_file_size_limit
378 378
379 379 def render_patchset(self, patchset, source_ref=None, target_ref=None):
380 380 diffset = AttributeDict(dict(
381 381 lines_added=0,
382 382 lines_deleted=0,
383 383 changed_files=0,
384 384 files=[],
385 385 file_stats={},
386 386 limited_diff=isinstance(patchset, LimitedDiffContainer),
387 387 repo_name=self.repo_name,
388 388 source_repo_name=self.source_repo_name,
389 389 source_ref=source_ref,
390 390 target_ref=target_ref,
391 391 ))
392 392 for patch in patchset:
393 393 diffset.file_stats[patch['filename']] = patch['stats']
394 394 filediff = self.render_patch(patch)
395 395 filediff.diffset = diffset
396 396 diffset.files.append(filediff)
397 397 diffset.changed_files += 1
398 398 if not patch['stats']['binary']:
399 399 diffset.lines_added += patch['stats']['added']
400 400 diffset.lines_deleted += patch['stats']['deleted']
401 401
402 402 return diffset
403 403
404 404 _lexer_cache = {}
405 405 def _get_lexer_for_filename(self, filename, filenode=None):
406 406 # cached because we might need to call it twice for source/target
407 407 if filename not in self._lexer_cache:
408 408 if filenode:
409 409 lexer = filenode.lexer
410 extension = filenode.extension
410 411 else:
411 412 lexer = FileNode.get_lexer(filename=filename)
413 extension = filename.split('.')[-1]
414
415 lexer = get_custom_lexer(extension) or lexer
412 416 self._lexer_cache[filename] = lexer
413 417 return self._lexer_cache[filename]
414 418
415 419 def render_patch(self, patch):
416 420 log.debug('rendering diff for %r' % patch['filename'])
417 421
418 422 source_filename = patch['original_filename']
419 423 target_filename = patch['filename']
420 424
421 425 source_lexer = plain_text_lexer
422 426 target_lexer = plain_text_lexer
423 427
424 428 if not patch['stats']['binary']:
425 429 if self.highlight_mode == self.HL_REAL:
426 430 if (source_filename and patch['operation'] in ('D', 'M')
427 431 and source_filename not in self.source_nodes):
428 432 self.source_nodes[source_filename] = (
429 433 self.source_node_getter(source_filename))
430 434
431 435 if (target_filename and patch['operation'] in ('A', 'M')
432 436 and target_filename not in self.target_nodes):
433 437 self.target_nodes[target_filename] = (
434 438 self.target_node_getter(target_filename))
435 439
436 440 elif self.highlight_mode == self.HL_FAST:
437 441 source_lexer = self._get_lexer_for_filename(source_filename)
438 442 target_lexer = self._get_lexer_for_filename(target_filename)
439 443
440 444 source_file = self.source_nodes.get(source_filename, source_filename)
441 445 target_file = self.target_nodes.get(target_filename, target_filename)
442 446
443 447 source_filenode, target_filenode = None, None
444 448
445 449 # TODO: dan: FileNode.lexer works on the content of the file - which
446 450 # can be slow - issue #4289 explains a lexer clean up - which once
447 451 # done can allow caching a lexer for a filenode to avoid the file lookup
448 452 if isinstance(source_file, FileNode):
449 453 source_filenode = source_file
450 454 #source_lexer = source_file.lexer
451 455 source_lexer = self._get_lexer_for_filename(source_filename)
452 456 source_file.lexer = source_lexer
453 457
454 458 if isinstance(target_file, FileNode):
455 459 target_filenode = target_file
456 460 #target_lexer = target_file.lexer
457 461 target_lexer = self._get_lexer_for_filename(target_filename)
458 462 target_file.lexer = target_lexer
459 463
460 464 source_file_path, target_file_path = None, None
461 465
462 466 if source_filename != '/dev/null':
463 467 source_file_path = source_filename
464 468 if target_filename != '/dev/null':
465 469 target_file_path = target_filename
466 470
467 471 source_file_type = source_lexer.name
468 472 target_file_type = target_lexer.name
469 473
470 474 op_hunks = patch['chunks'][0]
471 475 hunks = patch['chunks'][1:]
472 476
473 477 filediff = AttributeDict({
474 478 'source_file_path': source_file_path,
475 479 'target_file_path': target_file_path,
476 480 'source_filenode': source_filenode,
477 481 'target_filenode': target_filenode,
478 482 'hunks': [],
479 483 'source_file_type': target_file_type,
480 484 'target_file_type': source_file_type,
481 485 'patch': patch,
482 486 'source_mode': patch['stats']['old_mode'],
483 487 'target_mode': patch['stats']['new_mode'],
484 488 'limited_diff': isinstance(patch, LimitedDiffContainer),
485 489 'diffset': self,
486 490 })
487 491
488 492 for hunk in hunks:
489 493 hunkbit = self.parse_hunk(hunk, source_file, target_file)
490 494 hunkbit.filediff = filediff
491 495 filediff.hunks.append(hunkbit)
492 496
493 497 left_comments = {}
494 498
495 499 if source_file_path in self.comments_store:
496 500 for lineno, comments in self.comments_store[source_file_path].items():
497 501 left_comments[lineno] = comments
498 502
499 503 if target_file_path in self.comments_store:
500 504 for lineno, comments in self.comments_store[target_file_path].items():
501 505 left_comments[lineno] = comments
502 506
503 507 filediff.left_comments = left_comments
504 508 return filediff
505 509
506 510 def parse_hunk(self, hunk, source_file, target_file):
507 511 result = AttributeDict(dict(
508 512 source_start=hunk['source_start'],
509 513 source_length=hunk['source_length'],
510 514 target_start=hunk['target_start'],
511 515 target_length=hunk['target_length'],
512 516 section_header=hunk['section_header'],
513 517 lines=[],
514 518 ))
515 519 before, after = [], []
516 520
517 521 for line in hunk['lines']:
518 522 if line['action'] == 'unmod':
519 523 result.lines.extend(
520 524 self.parse_lines(before, after, source_file, target_file))
521 525 after.append(line)
522 526 before.append(line)
523 527 elif line['action'] == 'add':
524 528 after.append(line)
525 529 elif line['action'] == 'del':
526 530 before.append(line)
527 531 elif line['action'] == 'old-no-nl':
528 532 before.append(line)
529 533 elif line['action'] == 'new-no-nl':
530 534 after.append(line)
531 535
532 536 result.lines.extend(
533 537 self.parse_lines(before, after, source_file, target_file))
534 538 result.unified = self.as_unified(result.lines)
535 539 result.sideside = result.lines
536 540
537 541 return result
538 542
539 543 def parse_lines(self, before_lines, after_lines, source_file, target_file):
540 544 # TODO: dan: investigate doing the diff comparison and fast highlighting
541 545 # on the entire before and after buffered block lines rather than by
542 546 # line, this means we can get better 'fast' highlighting if the context
543 547 # allows it - eg.
544 548 # line 4: """
545 549 # line 5: this gets highlighted as a string
546 550 # line 6: """
547 551
548 552 lines = []
549 553 while before_lines or after_lines:
550 554 before, after = None, None
551 555 before_tokens, after_tokens = None, None
552 556
553 557 if before_lines:
554 558 before = before_lines.pop(0)
555 559 if after_lines:
556 560 after = after_lines.pop(0)
557 561
558 562 original = AttributeDict()
559 563 modified = AttributeDict()
560 564
561 565 if before:
562 566 if before['action'] == 'old-no-nl':
563 567 before_tokens = [('nonl', before['line'])]
564 568 else:
565 569 before_tokens = self.get_line_tokens(
566 570 line_text=before['line'], line_number=before['old_lineno'],
567 571 file=source_file)
568 572 original.lineno = before['old_lineno']
569 573 original.content = before['line']
570 574 original.action = self.action_to_op(before['action'])
571 575 original.comments = self.get_comments_for('old',
572 576 source_file, before['old_lineno'])
573 577
574 578 if after:
575 579 if after['action'] == 'new-no-nl':
576 580 after_tokens = [('nonl', after['line'])]
577 581 else:
578 582 after_tokens = self.get_line_tokens(
579 583 line_text=after['line'], line_number=after['new_lineno'],
580 584 file=target_file)
581 585 modified.lineno = after['new_lineno']
582 586 modified.content = after['line']
583 587 modified.action = self.action_to_op(after['action'])
584 588 modified.comments = self.get_comments_for('new',
585 589 target_file, after['new_lineno'])
586 590
587 591 # diff the lines
588 592 if before_tokens and after_tokens:
589 593 o_tokens, m_tokens, similarity = tokens_diff(
590 594 before_tokens, after_tokens)
591 595 original.content = render_tokenstream(o_tokens)
592 596 modified.content = render_tokenstream(m_tokens)
593 597 elif before_tokens:
594 598 original.content = render_tokenstream(
595 599 [(x[0], '', x[1]) for x in before_tokens])
596 600 elif after_tokens:
597 601 modified.content = render_tokenstream(
598 602 [(x[0], '', x[1]) for x in after_tokens])
599 603
600 604 lines.append(AttributeDict({
601 605 'original': original,
602 606 'modified': modified,
603 607 }))
604 608
605 609 return lines
606 610
607 611 def get_comments_for(self, version, file, line_number):
608 612 if hasattr(file, 'unicode_path'):
609 613 file = file.unicode_path
610 614
611 615 if not isinstance(file, basestring):
612 616 return None
613 617
614 618 line_key = {
615 619 'old': 'o',
616 620 'new': 'n',
617 621 }[version] + str(line_number)
618 622
619 623 if file in self.comments_store:
620 624 file_comments = self.comments_store[file]
621 625 if line_key in file_comments:
622 626 return file_comments.pop(line_key)
623 627
624 628 def get_line_tokens(self, line_text, line_number, file=None):
625 629 filenode = None
626 630 filename = None
627 631
628 632 if isinstance(file, basestring):
629 633 filename = file
630 634 elif isinstance(file, FileNode):
631 635 filenode = file
632 636 filename = file.unicode_path
633 637
634 638 if self.highlight_mode == self.HL_REAL and filenode:
635 639 lexer = self._get_lexer_for_filename(filename)
636 640 file_size_allowed = file.size < self.max_file_size_limit
637 641 if line_number and file_size_allowed:
638 642 return self.get_tokenized_filenode_line(
639 643 file, line_number, lexer)
640 644
641 645 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
642 646 lexer = self._get_lexer_for_filename(filename)
643 647 return list(tokenize_string(line_text, lexer))
644 648
645 649 return list(tokenize_string(line_text, plain_text_lexer))
646 650
647 651 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
648 652
649 653 if filenode not in self.highlighted_filenodes:
650 654 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
651 655 self.highlighted_filenodes[filenode] = tokenized_lines
652 656 return self.highlighted_filenodes[filenode][line_number - 1]
653 657
654 658 def action_to_op(self, action):
655 659 return {
656 660 'add': '+',
657 661 'del': '-',
658 662 'unmod': ' ',
659 663 'old-no-nl': ' ',
660 664 'new-no-nl': ' ',
661 665 }.get(action, action)
662 666
663 667 def as_unified(self, lines):
664 668 """
665 669 Return a generator that yields the lines of a diff in unified order
666 670 """
667 671 def generator():
668 672 buf = []
669 673 for line in lines:
670 674
671 675 if buf and not line.original or line.original.action == ' ':
672 676 for b in buf:
673 677 yield b
674 678 buf = []
675 679
676 680 if line.original:
677 681 if line.original.action == ' ':
678 682 yield (line.original.lineno, line.modified.lineno,
679 683 line.original.action, line.original.content,
680 684 line.original.comments)
681 685 continue
682 686
683 687 if line.original.action == '-':
684 688 yield (line.original.lineno, None,
685 689 line.original.action, line.original.content,
686 690 line.original.comments)
687 691
688 692 if line.modified.action == '+':
689 693 buf.append((
690 694 None, line.modified.lineno,
691 695 line.modified.action, line.modified.content,
692 696 line.modified.comments))
693 697 continue
694 698
695 699 if line.modified:
696 700 yield (None, line.modified.lineno,
697 701 line.modified.action, line.modified.content,
698 702 line.modified.comments)
699 703
700 704 for b in buf:
701 705 yield b
702 706
703 707 return generator()
General Comments 0
You need to be logged in to leave comments. Login now