##// END OF EJS Templates
diffs: optimize how lexer is fetche for rich highlight mode....
marcink -
r1356:1e4a47eb default
parent child Browse files
Show More
@@ -1,687 +1,701 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from rhodecode.lib.helpers import (
28 28 get_lexer_for_filenode, get_lexer_safe, html_escape)
29 29 from rhodecode.lib.utils2 import AttributeDict
30 30 from rhodecode.lib.vcs.nodes import FileNode
31 31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 32 from rhodecode.lib.diffs import LimitedDiffContainer
33 33 from pygments.lexers import get_lexer_by_name
34 34
35 35 plain_text_lexer = get_lexer_by_name(
36 36 'text', stripall=False, stripnl=False, ensurenl=False)
37 37
38 38
39 39 log = logging.getLogger()
40 40
41 41
42 42 def filenode_as_lines_tokens(filenode, lexer=None):
43 org_lexer = lexer
43 44 lexer = lexer or get_lexer_for_filenode(filenode)
44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 lexer, filenode, org_lexer)
45 47 tokens = tokenize_string(filenode.content, lexer)
46 48 lines = split_token_stream(tokens, split_string='\n')
47 49 rv = list(lines)
48 50 return rv
49 51
50 52
51 53 def tokenize_string(content, lexer):
52 54 """
53 55 Use pygments to tokenize some content based on a lexer
54 56 ensuring all original new lines and whitespace is preserved
55 57 """
56 58
57 59 lexer.stripall = False
58 60 lexer.stripnl = False
59 61 lexer.ensurenl = False
60 62 for token_type, token_text in lex(content, lexer):
61 63 yield pygment_token_class(token_type), token_text
62 64
63 65
64 66 def split_token_stream(tokens, split_string=u'\n'):
65 67 """
66 68 Take a list of (TokenType, text) tuples and split them by a string
67 69
68 70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
69 71 [(TEXT, 'some'), (TEXT, 'text'),
70 72 (TEXT, 'more'), (TEXT, 'text')]
71 73 """
72 74
73 75 buffer = []
74 76 for token_class, token_text in tokens:
75 77 parts = token_text.split(split_string)
76 78 for part in parts[:-1]:
77 79 buffer.append((token_class, part))
78 80 yield buffer
79 81 buffer = []
80 82
81 83 buffer.append((token_class, parts[-1]))
82 84
83 85 if buffer:
84 86 yield buffer
85 87
86 88
87 89 def filenode_as_annotated_lines_tokens(filenode):
88 90 """
89 91 Take a file node and return a list of annotations => lines, if no annotation
90 92 is found, it will be None.
91 93
92 94 eg:
93 95
94 96 [
95 97 (annotation1, [
96 98 (1, line1_tokens_list),
97 99 (2, line2_tokens_list),
98 100 ]),
99 101 (annotation2, [
100 102 (3, line1_tokens_list),
101 103 ]),
102 104 (None, [
103 105 (4, line1_tokens_list),
104 106 ]),
105 107 (annotation1, [
106 108 (5, line1_tokens_list),
107 109 (6, line2_tokens_list),
108 110 ])
109 111 ]
110 112 """
111 113
112 114 commit_cache = {} # cache commit_getter lookups
113 115
114 116 def _get_annotation(commit_id, commit_getter):
115 117 if commit_id not in commit_cache:
116 118 commit_cache[commit_id] = commit_getter()
117 119 return commit_cache[commit_id]
118 120
119 121 annotation_lookup = {
120 122 line_no: _get_annotation(commit_id, commit_getter)
121 123 for line_no, commit_id, commit_getter, line_content
122 124 in filenode.annotate
123 125 }
124 126
125 127 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
126 128 for line_no, tokens
127 129 in enumerate(filenode_as_lines_tokens(filenode), 1))
128 130
129 131 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
130 132
131 133 for annotation, group in grouped_annotations_lines:
132 134 yield (
133 135 annotation, [(line_no, tokens)
134 136 for (_, line_no, tokens) in group]
135 137 )
136 138
137 139
138 140 def render_tokenstream(tokenstream):
139 141 result = []
140 142 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
141 143
142 144 if token_class:
143 145 result.append(u'<span class="%s">' % token_class)
144 146 else:
145 147 result.append(u'<span>')
146 148
147 149 for op_tag, token_text in token_ops_texts:
148 150
149 151 if op_tag:
150 152 result.append(u'<%s>' % op_tag)
151 153
152 154 escaped_text = html_escape(token_text)
153 155
154 156 # TODO: dan: investigate showing hidden characters like space/nl/tab
155 157 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
156 158 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
157 159 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
158 160
159 161 result.append(escaped_text)
160 162
161 163 if op_tag:
162 164 result.append(u'</%s>' % op_tag)
163 165
164 166 result.append(u'</span>')
165 167
166 168 html = ''.join(result)
167 169 return html
168 170
169 171
170 172 def rollup_tokenstream(tokenstream):
171 173 """
172 174 Group a token stream of the format:
173 175
174 176 ('class', 'op', 'text')
175 177 or
176 178 ('class', 'text')
177 179
178 180 into
179 181
180 182 [('class1',
181 183 [('op1', 'text'),
182 184 ('op2', 'text')]),
183 185 ('class2',
184 186 [('op3', 'text')])]
185 187
186 188 This is used to get the minimal tags necessary when
187 189 rendering to html eg for a token stream ie.
188 190
189 191 <span class="A"><ins>he</ins>llo</span>
190 192 vs
191 193 <span class="A"><ins>he</ins></span><span class="A">llo</span>
192 194
193 195 If a 2 tuple is passed in, the output op will be an empty string.
194 196
195 197 eg:
196 198
197 199 >>> rollup_tokenstream([('classA', '', 'h'),
198 200 ('classA', 'del', 'ell'),
199 201 ('classA', '', 'o'),
200 202 ('classB', '', ' '),
201 203 ('classA', '', 'the'),
202 204 ('classA', '', 're'),
203 205 ])
204 206
205 207 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
206 208 ('classB', [('', ' ')],
207 209 ('classA', [('', 'there')]]
208 210
209 211 """
210 212 if tokenstream and len(tokenstream[0]) == 2:
211 213 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
212 214
213 215 result = []
214 216 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
215 217 ops = []
216 218 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
217 219 text_buffer = []
218 220 for t_class, t_op, t_text in token_text_list:
219 221 text_buffer.append(t_text)
220 222 ops.append((token_op, ''.join(text_buffer)))
221 223 result.append((token_class, ops))
222 224 return result
223 225
224 226
225 227 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
226 228 """
227 229 Converts a list of (token_class, token_text) tuples to a list of
228 230 (token_class, token_op, token_text) tuples where token_op is one of
229 231 ('ins', 'del', '')
230 232
231 233 :param old_tokens: list of (token_class, token_text) tuples of old line
232 234 :param new_tokens: list of (token_class, token_text) tuples of new line
233 235 :param use_diff_match_patch: boolean, will use google's diff match patch
234 236 library which has options to 'smooth' out the character by character
235 237 differences making nicer ins/del blocks
236 238 """
237 239
238 240 old_tokens_result = []
239 241 new_tokens_result = []
240 242
241 243 similarity = difflib.SequenceMatcher(None,
242 244 ''.join(token_text for token_class, token_text in old_tokens),
243 245 ''.join(token_text for token_class, token_text in new_tokens)
244 246 ).ratio()
245 247
246 248 if similarity < 0.6: # return, the blocks are too different
247 249 for token_class, token_text in old_tokens:
248 250 old_tokens_result.append((token_class, '', token_text))
249 251 for token_class, token_text in new_tokens:
250 252 new_tokens_result.append((token_class, '', token_text))
251 253 return old_tokens_result, new_tokens_result, similarity
252 254
253 255 token_sequence_matcher = difflib.SequenceMatcher(None,
254 256 [x[1] for x in old_tokens],
255 257 [x[1] for x in new_tokens])
256 258
257 259 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
258 260 # check the differences by token block types first to give a more
259 261 # nicer "block" level replacement vs character diffs
260 262
261 263 if tag == 'equal':
262 264 for token_class, token_text in old_tokens[o1:o2]:
263 265 old_tokens_result.append((token_class, '', token_text))
264 266 for token_class, token_text in new_tokens[n1:n2]:
265 267 new_tokens_result.append((token_class, '', token_text))
266 268 elif tag == 'delete':
267 269 for token_class, token_text in old_tokens[o1:o2]:
268 270 old_tokens_result.append((token_class, 'del', token_text))
269 271 elif tag == 'insert':
270 272 for token_class, token_text in new_tokens[n1:n2]:
271 273 new_tokens_result.append((token_class, 'ins', token_text))
272 274 elif tag == 'replace':
273 275 # if same type token blocks must be replaced, do a diff on the
274 276 # characters in the token blocks to show individual changes
275 277
276 278 old_char_tokens = []
277 279 new_char_tokens = []
278 280 for token_class, token_text in old_tokens[o1:o2]:
279 281 for char in token_text:
280 282 old_char_tokens.append((token_class, char))
281 283
282 284 for token_class, token_text in new_tokens[n1:n2]:
283 285 for char in token_text:
284 286 new_char_tokens.append((token_class, char))
285 287
286 288 old_string = ''.join([token_text for
287 289 token_class, token_text in old_char_tokens])
288 290 new_string = ''.join([token_text for
289 291 token_class, token_text in new_char_tokens])
290 292
291 293 char_sequence = difflib.SequenceMatcher(
292 294 None, old_string, new_string)
293 295 copcodes = char_sequence.get_opcodes()
294 296 obuffer, nbuffer = [], []
295 297
296 298 if use_diff_match_patch:
297 299 dmp = diff_match_patch()
298 300 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
299 301 reps = dmp.diff_main(old_string, new_string)
300 302 dmp.diff_cleanupEfficiency(reps)
301 303
302 304 a, b = 0, 0
303 305 for op, rep in reps:
304 306 l = len(rep)
305 307 if op == 0:
306 308 for i, c in enumerate(rep):
307 309 obuffer.append((old_char_tokens[a+i][0], '', c))
308 310 nbuffer.append((new_char_tokens[b+i][0], '', c))
309 311 a += l
310 312 b += l
311 313 elif op == -1:
312 314 for i, c in enumerate(rep):
313 315 obuffer.append((old_char_tokens[a+i][0], 'del', c))
314 316 a += l
315 317 elif op == 1:
316 318 for i, c in enumerate(rep):
317 319 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
318 320 b += l
319 321 else:
320 322 for ctag, co1, co2, cn1, cn2 in copcodes:
321 323 if ctag == 'equal':
322 324 for token_class, token_text in old_char_tokens[co1:co2]:
323 325 obuffer.append((token_class, '', token_text))
324 326 for token_class, token_text in new_char_tokens[cn1:cn2]:
325 327 nbuffer.append((token_class, '', token_text))
326 328 elif ctag == 'delete':
327 329 for token_class, token_text in old_char_tokens[co1:co2]:
328 330 obuffer.append((token_class, 'del', token_text))
329 331 elif ctag == 'insert':
330 332 for token_class, token_text in new_char_tokens[cn1:cn2]:
331 333 nbuffer.append((token_class, 'ins', token_text))
332 334 elif ctag == 'replace':
333 335 for token_class, token_text in old_char_tokens[co1:co2]:
334 336 obuffer.append((token_class, 'del', token_text))
335 337 for token_class, token_text in new_char_tokens[cn1:cn2]:
336 338 nbuffer.append((token_class, 'ins', token_text))
337 339
338 340 old_tokens_result.extend(obuffer)
339 341 new_tokens_result.extend(nbuffer)
340 342
341 343 return old_tokens_result, new_tokens_result, similarity
342 344
343 345
344 346 class DiffSet(object):
345 347 """
346 348 An object for parsing the diff result from diffs.DiffProcessor and
347 349 adding highlighting, side by side/unified renderings and line diffs
348 350 """
349 351
350 352 HL_REAL = 'REAL' # highlights using original file, slow
351 353 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
352 354 # in the case of multiline code
353 355 HL_NONE = 'NONE' # no highlighting, fastest
354 356
355 357 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
356 358 source_repo_name=None,
357 359 source_node_getter=lambda filename: None,
358 360 target_node_getter=lambda filename: None,
359 361 source_nodes=None, target_nodes=None,
360 362 max_file_size_limit=150 * 1024, # files over this size will
361 363 # use fast highlighting
362 364 comments=None,
363 365 ):
364 366
365 367 self.highlight_mode = highlight_mode
366 368 self.highlighted_filenodes = {}
367 369 self.source_node_getter = source_node_getter
368 370 self.target_node_getter = target_node_getter
369 371 self.source_nodes = source_nodes or {}
370 372 self.target_nodes = target_nodes or {}
371 373 self.repo_name = repo_name
372 374 self.source_repo_name = source_repo_name or repo_name
373 375 self.comments = comments or {}
374 376 self.comments_store = self.comments.copy()
375 377 self.max_file_size_limit = max_file_size_limit
376 378
377 379 def render_patchset(self, patchset, source_ref=None, target_ref=None):
378 380 diffset = AttributeDict(dict(
379 381 lines_added=0,
380 382 lines_deleted=0,
381 383 changed_files=0,
382 384 files=[],
383 385 limited_diff=isinstance(patchset, LimitedDiffContainer),
384 386 repo_name=self.repo_name,
385 387 source_repo_name=self.source_repo_name,
386 388 source_ref=source_ref,
387 389 target_ref=target_ref,
388 390 ))
389 391 for patch in patchset:
390 392 filediff = self.render_patch(patch)
391 393 filediff.diffset = diffset
392 394 diffset.files.append(filediff)
393 395 diffset.changed_files += 1
394 396 if not patch['stats']['binary']:
395 397 diffset.lines_added += patch['stats']['added']
396 398 diffset.lines_deleted += patch['stats']['deleted']
397 399
398 400 return diffset
399 401
400 402 _lexer_cache = {}
401 def _get_lexer_for_filename(self, filename):
403 def _get_lexer_for_filename(self, filename, filenode=None):
402 404 # cached because we might need to call it twice for source/target
403 405 if filename not in self._lexer_cache:
404 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
406 if filenode:
407 lexer = filenode.lexer
408 else:
409 lexer = get_lexer_safe(filepath=filename)
410 self._lexer_cache[filename] = lexer
405 411 return self._lexer_cache[filename]
406 412
407 413 def render_patch(self, patch):
408 414 log.debug('rendering diff for %r' % patch['filename'])
409 415
410 416 source_filename = patch['original_filename']
411 417 target_filename = patch['filename']
412 418
413 419 source_lexer = plain_text_lexer
414 420 target_lexer = plain_text_lexer
415 421
416 422 if not patch['stats']['binary']:
417 423 if self.highlight_mode == self.HL_REAL:
418 424 if (source_filename and patch['operation'] in ('D', 'M')
419 425 and source_filename not in self.source_nodes):
420 426 self.source_nodes[source_filename] = (
421 427 self.source_node_getter(source_filename))
422 428
423 429 if (target_filename and patch['operation'] in ('A', 'M')
424 430 and target_filename not in self.target_nodes):
425 431 self.target_nodes[target_filename] = (
426 432 self.target_node_getter(target_filename))
427 433
428 434 elif self.highlight_mode == self.HL_FAST:
429 435 source_lexer = self._get_lexer_for_filename(source_filename)
430 436 target_lexer = self._get_lexer_for_filename(target_filename)
431 437
432 438 source_file = self.source_nodes.get(source_filename, source_filename)
433 439 target_file = self.target_nodes.get(target_filename, target_filename)
434 440
435 441 source_filenode, target_filenode = None, None
436 442
437 443 # TODO: dan: FileNode.lexer works on the content of the file - which
438 444 # can be slow - issue #4289 explains a lexer clean up - which once
439 445 # done can allow caching a lexer for a filenode to avoid the file lookup
440 446 if isinstance(source_file, FileNode):
441 447 source_filenode = source_file
442 source_lexer = source_file.lexer
448 #source_lexer = source_file.lexer
449 source_lexer = self._get_lexer_for_filename(source_filename)
450 source_file.lexer = source_lexer
451
443 452 if isinstance(target_file, FileNode):
444 453 target_filenode = target_file
445 target_lexer = target_file.lexer
454 #target_lexer = target_file.lexer
455 target_lexer = self._get_lexer_for_filename(target_filename)
456 target_file.lexer = target_lexer
446 457
447 458 source_file_path, target_file_path = None, None
448 459
449 460 if source_filename != '/dev/null':
450 461 source_file_path = source_filename
451 462 if target_filename != '/dev/null':
452 463 target_file_path = target_filename
453 464
454 465 source_file_type = source_lexer.name
455 466 target_file_type = target_lexer.name
456 467
457 468 op_hunks = patch['chunks'][0]
458 469 hunks = patch['chunks'][1:]
459 470
460 471 filediff = AttributeDict({
461 472 'source_file_path': source_file_path,
462 473 'target_file_path': target_file_path,
463 474 'source_filenode': source_filenode,
464 475 'target_filenode': target_filenode,
465 476 'hunks': [],
466 477 'source_file_type': target_file_type,
467 478 'target_file_type': source_file_type,
468 479 'patch': patch,
469 480 'source_mode': patch['stats']['old_mode'],
470 481 'target_mode': patch['stats']['new_mode'],
471 482 'limited_diff': isinstance(patch, LimitedDiffContainer),
472 483 'diffset': self,
473 484 })
474 485
475 486 for hunk in hunks:
476 487 hunkbit = self.parse_hunk(hunk, source_file, target_file)
477 488 hunkbit.filediff = filediff
478 489 filediff.hunks.append(hunkbit)
479 490
480 491 left_comments = {}
481 492
482 493 if source_file_path in self.comments_store:
483 494 for lineno, comments in self.comments_store[source_file_path].items():
484 495 left_comments[lineno] = comments
485 496
486 497 if target_file_path in self.comments_store:
487 498 for lineno, comments in self.comments_store[target_file_path].items():
488 499 left_comments[lineno] = comments
489 500
490 501 filediff.left_comments = left_comments
491 502 return filediff
492 503
493 504 def parse_hunk(self, hunk, source_file, target_file):
494 505 result = AttributeDict(dict(
495 506 source_start=hunk['source_start'],
496 507 source_length=hunk['source_length'],
497 508 target_start=hunk['target_start'],
498 509 target_length=hunk['target_length'],
499 510 section_header=hunk['section_header'],
500 511 lines=[],
501 512 ))
502 513 before, after = [], []
503 514
504 515 for line in hunk['lines']:
505 516 if line['action'] == 'unmod':
506 517 result.lines.extend(
507 518 self.parse_lines(before, after, source_file, target_file))
508 519 after.append(line)
509 520 before.append(line)
510 521 elif line['action'] == 'add':
511 522 after.append(line)
512 523 elif line['action'] == 'del':
513 524 before.append(line)
514 525 elif line['action'] == 'old-no-nl':
515 526 before.append(line)
516 527 elif line['action'] == 'new-no-nl':
517 528 after.append(line)
518 529
519 530 result.lines.extend(
520 531 self.parse_lines(before, after, source_file, target_file))
521 532 result.unified = self.as_unified(result.lines)
522 533 result.sideside = result.lines
523 534
524 535 return result
525 536
526 537 def parse_lines(self, before_lines, after_lines, source_file, target_file):
527 538 # TODO: dan: investigate doing the diff comparison and fast highlighting
528 539 # on the entire before and after buffered block lines rather than by
529 540 # line, this means we can get better 'fast' highlighting if the context
530 541 # allows it - eg.
531 542 # line 4: """
532 543 # line 5: this gets highlighted as a string
533 544 # line 6: """
534 545
535 546 lines = []
536 547 while before_lines or after_lines:
537 548 before, after = None, None
538 549 before_tokens, after_tokens = None, None
539 550
540 551 if before_lines:
541 552 before = before_lines.pop(0)
542 553 if after_lines:
543 554 after = after_lines.pop(0)
544 555
545 556 original = AttributeDict()
546 557 modified = AttributeDict()
547 558
548 559 if before:
549 560 if before['action'] == 'old-no-nl':
550 561 before_tokens = [('nonl', before['line'])]
551 562 else:
552 563 before_tokens = self.get_line_tokens(
553 564 line_text=before['line'], line_number=before['old_lineno'],
554 565 file=source_file)
555 566 original.lineno = before['old_lineno']
556 567 original.content = before['line']
557 568 original.action = self.action_to_op(before['action'])
558 569 original.comments = self.get_comments_for('old',
559 570 source_file, before['old_lineno'])
560 571
561 572 if after:
562 573 if after['action'] == 'new-no-nl':
563 574 after_tokens = [('nonl', after['line'])]
564 575 else:
565 576 after_tokens = self.get_line_tokens(
566 577 line_text=after['line'], line_number=after['new_lineno'],
567 578 file=target_file)
568 579 modified.lineno = after['new_lineno']
569 580 modified.content = after['line']
570 581 modified.action = self.action_to_op(after['action'])
571 582 modified.comments = self.get_comments_for('new',
572 583 target_file, after['new_lineno'])
573 584
574 585 # diff the lines
575 586 if before_tokens and after_tokens:
576 587 o_tokens, m_tokens, similarity = tokens_diff(
577 588 before_tokens, after_tokens)
578 589 original.content = render_tokenstream(o_tokens)
579 590 modified.content = render_tokenstream(m_tokens)
580 591 elif before_tokens:
581 592 original.content = render_tokenstream(
582 593 [(x[0], '', x[1]) for x in before_tokens])
583 594 elif after_tokens:
584 595 modified.content = render_tokenstream(
585 596 [(x[0], '', x[1]) for x in after_tokens])
586 597
587 598 lines.append(AttributeDict({
588 599 'original': original,
589 600 'modified': modified,
590 601 }))
591 602
592 603 return lines
593 604
594 605 def get_comments_for(self, version, file, line_number):
595 606 if hasattr(file, 'unicode_path'):
596 607 file = file.unicode_path
597 608
598 609 if not isinstance(file, basestring):
599 610 return None
600 611
601 612 line_key = {
602 613 'old': 'o',
603 614 'new': 'n',
604 615 }[version] + str(line_number)
605 616
606 617 if file in self.comments_store:
607 618 file_comments = self.comments_store[file]
608 619 if line_key in file_comments:
609 620 return file_comments.pop(line_key)
610 621
611 622 def get_line_tokens(self, line_text, line_number, file=None):
612 623 filenode = None
613 624 filename = None
614 625
615 626 if isinstance(file, basestring):
616 627 filename = file
617 628 elif isinstance(file, FileNode):
618 629 filenode = file
619 630 filename = file.unicode_path
620 631
621 632 if self.highlight_mode == self.HL_REAL and filenode:
622 if line_number and file.size < self.max_file_size_limit:
623 return self.get_tokenized_filenode_line(file, line_number)
633 lexer = self._get_lexer_for_filename(filename)
634 file_size_allowed = file.size < self.max_file_size_limit
635 if line_number and file_size_allowed:
636 return self.get_tokenized_filenode_line(
637 file, line_number, lexer)
624 638
625 639 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
626 640 lexer = self._get_lexer_for_filename(filename)
627 641 return list(tokenize_string(line_text, lexer))
628 642
629 643 return list(tokenize_string(line_text, plain_text_lexer))
630 644
631 def get_tokenized_filenode_line(self, filenode, line_number):
645 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
632 646
633 647 if filenode not in self.highlighted_filenodes:
634 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
648 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
635 649 self.highlighted_filenodes[filenode] = tokenized_lines
636 650 return self.highlighted_filenodes[filenode][line_number - 1]
637 651
638 652 def action_to_op(self, action):
639 653 return {
640 654 'add': '+',
641 655 'del': '-',
642 656 'unmod': ' ',
643 657 'old-no-nl': ' ',
644 658 'new-no-nl': ' ',
645 659 }.get(action, action)
646 660
647 661 def as_unified(self, lines):
648 662 """
649 663 Return a generator that yields the lines of a diff in unified order
650 664 """
651 665 def generator():
652 666 buf = []
653 667 for line in lines:
654 668
655 669 if buf and not line.original or line.original.action == ' ':
656 670 for b in buf:
657 671 yield b
658 672 buf = []
659 673
660 674 if line.original:
661 675 if line.original.action == ' ':
662 676 yield (line.original.lineno, line.modified.lineno,
663 677 line.original.action, line.original.content,
664 678 line.original.comments)
665 679 continue
666 680
667 681 if line.original.action == '-':
668 682 yield (line.original.lineno, None,
669 683 line.original.action, line.original.content,
670 684 line.original.comments)
671 685
672 686 if line.modified.action == '+':
673 687 buf.append((
674 688 None, line.modified.lineno,
675 689 line.modified.action, line.modified.content,
676 690 line.modified.comments))
677 691 continue
678 692
679 693 if line.modified:
680 694 yield (None, line.modified.lineno,
681 695 line.modified.action, line.modified.content,
682 696 line.modified.comments)
683 697
684 698 for b in buf:
685 699 yield b
686 700
687 701 return generator()
General Comments 0
You need to be logged in to leave comments. Login now