##// END OF EJS Templates
diffs: in case of text lexers don't do any HL because of pygments newline...
marcink -
r2546:db577a02 stable
parent child Browse files
Show More
@@ -1,735 +1,743 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
28
27 29 from rhodecode.lib.helpers import (
28 30 get_lexer_for_filenode, html_escape, get_custom_lexer)
29 31 from rhodecode.lib.utils2 import AttributeDict
30 32 from rhodecode.lib.vcs.nodes import FileNode
31 33 from rhodecode.lib.diff_match_patch import diff_match_patch
32 34 from rhodecode.lib.diffs import LimitedDiffContainer
33 35 from pygments.lexers import get_lexer_by_name
34 36
35 37 plain_text_lexer = get_lexer_by_name(
36 38 'text', stripall=False, stripnl=False, ensurenl=False)
37 39
38 40
39 41 log = logging.getLogger()
40 42
41 43
42 44 def filenode_as_lines_tokens(filenode, lexer=None):
43 45 org_lexer = lexer
44 46 lexer = lexer or get_lexer_for_filenode(filenode)
45 47 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 48 lexer, filenode, org_lexer)
47 49 tokens = tokenize_string(filenode.content, lexer)
48 lines = split_token_stream(tokens, split_string='\n')
50 lines = split_token_stream(tokens)
49 51 rv = list(lines)
50 52 return rv
51 53
52 54
53 55 def tokenize_string(content, lexer):
54 56 """
55 57 Use pygments to tokenize some content based on a lexer
56 58 ensuring all original new lines and whitespace is preserved
57 59 """
58 60
59 61 lexer.stripall = False
60 62 lexer.stripnl = False
61 63 lexer.ensurenl = False
62 for token_type, token_text in lex(content, lexer):
64
65 if isinstance(lexer, TextLexer):
66 lexed = [(Token.Text, content)]
67 else:
68 lexed = lex(content, lexer)
69
70 for token_type, token_text in lexed:
63 71 yield pygment_token_class(token_type), token_text
64 72
65 73
66 def split_token_stream(tokens, split_string=u'\n'):
74 def split_token_stream(tokens):
67 75 """
68 76 Take a list of (TokenType, text) tuples and split them by a string
69 77
70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
78 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
71 79 [(TEXT, 'some'), (TEXT, 'text'),
72 80 (TEXT, 'more'), (TEXT, 'text')]
73 81 """
74 82
75 83 buffer = []
76 84 for token_class, token_text in tokens:
77 parts = token_text.split(split_string)
85 parts = token_text.split('\n')
78 86 for part in parts[:-1]:
79 87 buffer.append((token_class, part))
80 88 yield buffer
81 89 buffer = []
82 90
83 91 buffer.append((token_class, parts[-1]))
84 92
85 93 if buffer:
86 94 yield buffer
87 95
88 96
89 97 def filenode_as_annotated_lines_tokens(filenode):
90 98 """
91 99 Take a file node and return a list of annotations => lines, if no annotation
92 100 is found, it will be None.
93 101
94 102 eg:
95 103
96 104 [
97 105 (annotation1, [
98 106 (1, line1_tokens_list),
99 107 (2, line2_tokens_list),
100 108 ]),
101 109 (annotation2, [
102 110 (3, line1_tokens_list),
103 111 ]),
104 112 (None, [
105 113 (4, line1_tokens_list),
106 114 ]),
107 115 (annotation1, [
108 116 (5, line1_tokens_list),
109 117 (6, line2_tokens_list),
110 118 ])
111 119 ]
112 120 """
113 121
114 122 commit_cache = {} # cache commit_getter lookups
115 123
116 124 def _get_annotation(commit_id, commit_getter):
117 125 if commit_id not in commit_cache:
118 126 commit_cache[commit_id] = commit_getter()
119 127 return commit_cache[commit_id]
120 128
121 129 annotation_lookup = {
122 130 line_no: _get_annotation(commit_id, commit_getter)
123 131 for line_no, commit_id, commit_getter, line_content
124 132 in filenode.annotate
125 133 }
126 134
127 135 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
128 136 for line_no, tokens
129 137 in enumerate(filenode_as_lines_tokens(filenode), 1))
130 138
131 139 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
132 140
133 141 for annotation, group in grouped_annotations_lines:
134 142 yield (
135 143 annotation, [(line_no, tokens)
136 144 for (_, line_no, tokens) in group]
137 145 )
138 146
139 147
140 148 def render_tokenstream(tokenstream):
141 149 result = []
142 150 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
143 151
144 152 if token_class:
145 153 result.append(u'<span class="%s">' % token_class)
146 154 else:
147 155 result.append(u'<span>')
148 156
149 157 for op_tag, token_text in token_ops_texts:
150 158
151 159 if op_tag:
152 160 result.append(u'<%s>' % op_tag)
153 161
154 162 escaped_text = html_escape(token_text)
155 163
156 164 # TODO: dan: investigate showing hidden characters like space/nl/tab
157 165 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
158 166 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
159 167 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
160 168
161 169 result.append(escaped_text)
162 170
163 171 if op_tag:
164 172 result.append(u'</%s>' % op_tag)
165 173
166 174 result.append(u'</span>')
167 175
168 176 html = ''.join(result)
169 177 return html
170 178
171 179
172 180 def rollup_tokenstream(tokenstream):
173 181 """
174 182 Group a token stream of the format:
175 183
176 184 ('class', 'op', 'text')
177 185 or
178 186 ('class', 'text')
179 187
180 188 into
181 189
182 190 [('class1',
183 191 [('op1', 'text'),
184 192 ('op2', 'text')]),
185 193 ('class2',
186 194 [('op3', 'text')])]
187 195
188 196 This is used to get the minimal tags necessary when
189 197 rendering to html eg for a token stream ie.
190 198
191 199 <span class="A"><ins>he</ins>llo</span>
192 200 vs
193 201 <span class="A"><ins>he</ins></span><span class="A">llo</span>
194 202
195 203 If a 2 tuple is passed in, the output op will be an empty string.
196 204
197 205 eg:
198 206
199 207 >>> rollup_tokenstream([('classA', '', 'h'),
200 208 ('classA', 'del', 'ell'),
201 209 ('classA', '', 'o'),
202 210 ('classB', '', ' '),
203 211 ('classA', '', 'the'),
204 212 ('classA', '', 're'),
205 213 ])
206 214
207 215 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
208 216 ('classB', [('', ' ')],
209 217 ('classA', [('', 'there')]]
210 218
211 219 """
212 220 if tokenstream and len(tokenstream[0]) == 2:
213 221 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
214 222
215 223 result = []
216 224 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
217 225 ops = []
218 226 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
219 227 text_buffer = []
220 228 for t_class, t_op, t_text in token_text_list:
221 229 text_buffer.append(t_text)
222 230 ops.append((token_op, ''.join(text_buffer)))
223 231 result.append((token_class, ops))
224 232 return result
225 233
226 234
227 235 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
228 236 """
229 237 Converts a list of (token_class, token_text) tuples to a list of
230 238 (token_class, token_op, token_text) tuples where token_op is one of
231 239 ('ins', 'del', '')
232 240
233 241 :param old_tokens: list of (token_class, token_text) tuples of old line
234 242 :param new_tokens: list of (token_class, token_text) tuples of new line
235 243 :param use_diff_match_patch: boolean, will use google's diff match patch
236 244 library which has options to 'smooth' out the character by character
237 245 differences making nicer ins/del blocks
238 246 """
239 247
240 248 old_tokens_result = []
241 249 new_tokens_result = []
242 250
243 251 similarity = difflib.SequenceMatcher(None,
244 252 ''.join(token_text for token_class, token_text in old_tokens),
245 253 ''.join(token_text for token_class, token_text in new_tokens)
246 254 ).ratio()
247 255
248 256 if similarity < 0.6: # return, the blocks are too different
249 257 for token_class, token_text in old_tokens:
250 258 old_tokens_result.append((token_class, '', token_text))
251 259 for token_class, token_text in new_tokens:
252 260 new_tokens_result.append((token_class, '', token_text))
253 261 return old_tokens_result, new_tokens_result, similarity
254 262
255 263 token_sequence_matcher = difflib.SequenceMatcher(None,
256 264 [x[1] for x in old_tokens],
257 265 [x[1] for x in new_tokens])
258 266
259 267 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
260 268 # check the differences by token block types first to give a more
261 269 # nicer "block" level replacement vs character diffs
262 270
263 271 if tag == 'equal':
264 272 for token_class, token_text in old_tokens[o1:o2]:
265 273 old_tokens_result.append((token_class, '', token_text))
266 274 for token_class, token_text in new_tokens[n1:n2]:
267 275 new_tokens_result.append((token_class, '', token_text))
268 276 elif tag == 'delete':
269 277 for token_class, token_text in old_tokens[o1:o2]:
270 278 old_tokens_result.append((token_class, 'del', token_text))
271 279 elif tag == 'insert':
272 280 for token_class, token_text in new_tokens[n1:n2]:
273 281 new_tokens_result.append((token_class, 'ins', token_text))
274 282 elif tag == 'replace':
275 283 # if same type token blocks must be replaced, do a diff on the
276 284 # characters in the token blocks to show individual changes
277 285
278 286 old_char_tokens = []
279 287 new_char_tokens = []
280 288 for token_class, token_text in old_tokens[o1:o2]:
281 289 for char in token_text:
282 290 old_char_tokens.append((token_class, char))
283 291
284 292 for token_class, token_text in new_tokens[n1:n2]:
285 293 for char in token_text:
286 294 new_char_tokens.append((token_class, char))
287 295
288 296 old_string = ''.join([token_text for
289 297 token_class, token_text in old_char_tokens])
290 298 new_string = ''.join([token_text for
291 299 token_class, token_text in new_char_tokens])
292 300
293 301 char_sequence = difflib.SequenceMatcher(
294 302 None, old_string, new_string)
295 303 copcodes = char_sequence.get_opcodes()
296 304 obuffer, nbuffer = [], []
297 305
298 306 if use_diff_match_patch:
299 307 dmp = diff_match_patch()
300 308 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
301 309 reps = dmp.diff_main(old_string, new_string)
302 310 dmp.diff_cleanupEfficiency(reps)
303 311
304 312 a, b = 0, 0
305 313 for op, rep in reps:
306 314 l = len(rep)
307 315 if op == 0:
308 316 for i, c in enumerate(rep):
309 317 obuffer.append((old_char_tokens[a+i][0], '', c))
310 318 nbuffer.append((new_char_tokens[b+i][0], '', c))
311 319 a += l
312 320 b += l
313 321 elif op == -1:
314 322 for i, c in enumerate(rep):
315 323 obuffer.append((old_char_tokens[a+i][0], 'del', c))
316 324 a += l
317 325 elif op == 1:
318 326 for i, c in enumerate(rep):
319 327 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
320 328 b += l
321 329 else:
322 330 for ctag, co1, co2, cn1, cn2 in copcodes:
323 331 if ctag == 'equal':
324 332 for token_class, token_text in old_char_tokens[co1:co2]:
325 333 obuffer.append((token_class, '', token_text))
326 334 for token_class, token_text in new_char_tokens[cn1:cn2]:
327 335 nbuffer.append((token_class, '', token_text))
328 336 elif ctag == 'delete':
329 337 for token_class, token_text in old_char_tokens[co1:co2]:
330 338 obuffer.append((token_class, 'del', token_text))
331 339 elif ctag == 'insert':
332 340 for token_class, token_text in new_char_tokens[cn1:cn2]:
333 341 nbuffer.append((token_class, 'ins', token_text))
334 342 elif ctag == 'replace':
335 343 for token_class, token_text in old_char_tokens[co1:co2]:
336 344 obuffer.append((token_class, 'del', token_text))
337 345 for token_class, token_text in new_char_tokens[cn1:cn2]:
338 346 nbuffer.append((token_class, 'ins', token_text))
339 347
340 348 old_tokens_result.extend(obuffer)
341 349 new_tokens_result.extend(nbuffer)
342 350
343 351 return old_tokens_result, new_tokens_result, similarity
344 352
345 353
346 354 class DiffSet(object):
347 355 """
348 356 An object for parsing the diff result from diffs.DiffProcessor and
349 357 adding highlighting, side by side/unified renderings and line diffs
350 358 """
351 359
352 360 HL_REAL = 'REAL' # highlights using original file, slow
353 361 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
354 362 # in the case of multiline code
355 363 HL_NONE = 'NONE' # no highlighting, fastest
356 364
357 365 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
358 366 source_repo_name=None,
359 367 source_node_getter=lambda filename: None,
360 368 target_node_getter=lambda filename: None,
361 369 source_nodes=None, target_nodes=None,
362 370 max_file_size_limit=150 * 1024, # files over this size will
363 371 # use fast highlighting
364 372 comments=None,
365 373 ):
366 374
367 375 self.highlight_mode = highlight_mode
368 376 self.highlighted_filenodes = {}
369 377 self.source_node_getter = source_node_getter
370 378 self.target_node_getter = target_node_getter
371 379 self.source_nodes = source_nodes or {}
372 380 self.target_nodes = target_nodes or {}
373 381 self.repo_name = repo_name
374 382 self.source_repo_name = source_repo_name or repo_name
375 383 self.comments = comments or {}
376 384 self.comments_store = self.comments.copy()
377 385 self.max_file_size_limit = max_file_size_limit
378 386
379 387 def render_patchset(self, patchset, source_ref=None, target_ref=None):
380 388 diffset = AttributeDict(dict(
381 389 lines_added=0,
382 390 lines_deleted=0,
383 391 changed_files=0,
384 392 files=[],
385 393 file_stats={},
386 394 limited_diff=isinstance(patchset, LimitedDiffContainer),
387 395 repo_name=self.repo_name,
388 396 source_repo_name=self.source_repo_name,
389 397 source_ref=source_ref,
390 398 target_ref=target_ref,
391 399 ))
392 400 for patch in patchset:
393 401 diffset.file_stats[patch['filename']] = patch['stats']
394 402 filediff = self.render_patch(patch)
395 403 filediff.diffset = diffset
396 404 diffset.files.append(filediff)
397 405 diffset.changed_files += 1
398 406 if not patch['stats']['binary']:
399 407 diffset.lines_added += patch['stats']['added']
400 408 diffset.lines_deleted += patch['stats']['deleted']
401 409
402 410 return diffset
403 411
404 412 _lexer_cache = {}
405 413
406 414 def _get_lexer_for_filename(self, filename, filenode=None):
407 415 # cached because we might need to call it twice for source/target
408 416 if filename not in self._lexer_cache:
409 417 if filenode:
410 418 lexer = filenode.lexer
411 419 extension = filenode.extension
412 420 else:
413 421 lexer = FileNode.get_lexer(filename=filename)
414 422 extension = filename.split('.')[-1]
415 423
416 424 lexer = get_custom_lexer(extension) or lexer
417 425 self._lexer_cache[filename] = lexer
418 426 return self._lexer_cache[filename]
419 427
420 428 def render_patch(self, patch):
421 429 log.debug('rendering diff for %r' % patch['filename'])
422 430
423 431 source_filename = patch['original_filename']
424 432 target_filename = patch['filename']
425 433
426 434 source_lexer = plain_text_lexer
427 435 target_lexer = plain_text_lexer
428 436
429 437 if not patch['stats']['binary']:
430 438 if self.highlight_mode == self.HL_REAL:
431 439 if (source_filename and patch['operation'] in ('D', 'M')
432 440 and source_filename not in self.source_nodes):
433 441 self.source_nodes[source_filename] = (
434 442 self.source_node_getter(source_filename))
435 443
436 444 if (target_filename and patch['operation'] in ('A', 'M')
437 445 and target_filename not in self.target_nodes):
438 446 self.target_nodes[target_filename] = (
439 447 self.target_node_getter(target_filename))
440 448
441 449 elif self.highlight_mode == self.HL_FAST:
442 450 source_lexer = self._get_lexer_for_filename(source_filename)
443 451 target_lexer = self._get_lexer_for_filename(target_filename)
444 452
445 453 source_file = self.source_nodes.get(source_filename, source_filename)
446 454 target_file = self.target_nodes.get(target_filename, target_filename)
447 455
448 456 source_filenode, target_filenode = None, None
449 457
450 458 # TODO: dan: FileNode.lexer works on the content of the file - which
451 459 # can be slow - issue #4289 explains a lexer clean up - which once
452 460 # done can allow caching a lexer for a filenode to avoid the file lookup
453 461 if isinstance(source_file, FileNode):
454 462 source_filenode = source_file
455 463 #source_lexer = source_file.lexer
456 464 source_lexer = self._get_lexer_for_filename(source_filename)
457 465 source_file.lexer = source_lexer
458 466
459 467 if isinstance(target_file, FileNode):
460 468 target_filenode = target_file
461 469 #target_lexer = target_file.lexer
462 470 target_lexer = self._get_lexer_for_filename(target_filename)
463 471 target_file.lexer = target_lexer
464 472
465 473 source_file_path, target_file_path = None, None
466 474
467 475 if source_filename != '/dev/null':
468 476 source_file_path = source_filename
469 477 if target_filename != '/dev/null':
470 478 target_file_path = target_filename
471 479
472 480 source_file_type = source_lexer.name
473 481 target_file_type = target_lexer.name
474 482
475 483 filediff = AttributeDict({
476 484 'source_file_path': source_file_path,
477 485 'target_file_path': target_file_path,
478 486 'source_filenode': source_filenode,
479 487 'target_filenode': target_filenode,
480 488 'source_file_type': target_file_type,
481 489 'target_file_type': source_file_type,
482 490 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
483 491 'operation': patch['operation'],
484 492 'source_mode': patch['stats']['old_mode'],
485 493 'target_mode': patch['stats']['new_mode'],
486 494 'limited_diff': isinstance(patch, LimitedDiffContainer),
487 495 'hunks': [],
488 496 'diffset': self,
489 497 })
490 498
491 499 for hunk in patch['chunks'][1:]:
492 500 hunkbit = self.parse_hunk(hunk, source_file, target_file)
493 501 hunkbit.source_file_path = source_file_path
494 502 hunkbit.target_file_path = target_file_path
495 503 filediff.hunks.append(hunkbit)
496 504
497 505 left_comments = {}
498 506 if source_file_path in self.comments_store:
499 507 for lineno, comments in self.comments_store[source_file_path].items():
500 508 left_comments[lineno] = comments
501 509
502 510 if target_file_path in self.comments_store:
503 511 for lineno, comments in self.comments_store[target_file_path].items():
504 512 left_comments[lineno] = comments
505 513 # left comments are one that we couldn't place in diff lines.
506 514 # could be outdated, or the diff changed and this line is no
507 515 # longer available
508 516 filediff.left_comments = left_comments
509 517
510 518 return filediff
511 519
512 520 def parse_hunk(self, hunk, source_file, target_file):
513 521 result = AttributeDict(dict(
514 522 source_start=hunk['source_start'],
515 523 source_length=hunk['source_length'],
516 524 target_start=hunk['target_start'],
517 525 target_length=hunk['target_length'],
518 526 section_header=hunk['section_header'],
519 527 lines=[],
520 528 ))
521 529 before, after = [], []
522 530
523 531 for line in hunk['lines']:
524 532
525 533 if line['action'] == 'unmod':
526 534 result.lines.extend(
527 535 self.parse_lines(before, after, source_file, target_file))
528 536 after.append(line)
529 537 before.append(line)
530 538 elif line['action'] == 'add':
531 539 after.append(line)
532 540 elif line['action'] == 'del':
533 541 before.append(line)
534 542 elif line['action'] == 'old-no-nl':
535 543 before.append(line)
536 544 elif line['action'] == 'new-no-nl':
537 545 after.append(line)
538 546
539 547 result.lines.extend(
540 548 self.parse_lines(before, after, source_file, target_file))
541 549 result.unified = self.as_unified(result.lines)
542 550 result.sideside = result.lines
543 551
544 552 return result
545 553
546 554 def parse_lines(self, before_lines, after_lines, source_file, target_file):
547 555 # TODO: dan: investigate doing the diff comparison and fast highlighting
548 556 # on the entire before and after buffered block lines rather than by
549 557 # line, this means we can get better 'fast' highlighting if the context
550 558 # allows it - eg.
551 559 # line 4: """
552 560 # line 5: this gets highlighted as a string
553 561 # line 6: """
554 562
555 563 lines = []
556 564
557 565 before_newline = AttributeDict()
558 566 after_newline = AttributeDict()
559 567 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
560 568 before_newline_line = before_lines.pop(-1)
561 569 before_newline.content = '\n {}'.format(
562 570 render_tokenstream(
563 571 [(x[0], '', x[1])
564 572 for x in [('nonl', before_newline_line['line'])]]))
565 573
566 574 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
567 575 after_newline_line = after_lines.pop(-1)
568 576 after_newline.content = '\n {}'.format(
569 577 render_tokenstream(
570 578 [(x[0], '', x[1])
571 579 for x in [('nonl', after_newline_line['line'])]]))
572 580
573 581 while before_lines or after_lines:
574 582 before, after = None, None
575 583 before_tokens, after_tokens = None, None
576 584
577 585 if before_lines:
578 586 before = before_lines.pop(0)
579 587 if after_lines:
580 588 after = after_lines.pop(0)
581 589
582 590 original = AttributeDict()
583 591 modified = AttributeDict()
584 592
585 593 if before:
586 594 if before['action'] == 'old-no-nl':
587 595 before_tokens = [('nonl', before['line'])]
588 596 else:
589 597 before_tokens = self.get_line_tokens(
590 598 line_text=before['line'],
591 599 line_number=before['old_lineno'],
592 600 file=source_file)
593 601 original.lineno = before['old_lineno']
594 602 original.content = before['line']
595 603 original.action = self.action_to_op(before['action'])
596 604 original.comments = self.get_comments_for('old',
597 605 source_file, before['old_lineno'])
598 606
599 607 if after:
600 608 if after['action'] == 'new-no-nl':
601 609 after_tokens = [('nonl', after['line'])]
602 610 else:
603 611 after_tokens = self.get_line_tokens(
604 612 line_text=after['line'], line_number=after['new_lineno'],
605 613 file=target_file)
606 614 modified.lineno = after['new_lineno']
607 615 modified.content = after['line']
608 616 modified.action = self.action_to_op(after['action'])
609 617 modified.comments = self.get_comments_for('new',
610 618 target_file, after['new_lineno'])
611 619
612 620 # diff the lines
613 621 if before_tokens and after_tokens:
614 622 o_tokens, m_tokens, similarity = tokens_diff(
615 623 before_tokens, after_tokens)
616 624 original.content = render_tokenstream(o_tokens)
617 625 modified.content = render_tokenstream(m_tokens)
618 626 elif before_tokens:
619 627 original.content = render_tokenstream(
620 628 [(x[0], '', x[1]) for x in before_tokens])
621 629 elif after_tokens:
622 630 modified.content = render_tokenstream(
623 631 [(x[0], '', x[1]) for x in after_tokens])
624 632
625 633 if not before_lines and before_newline:
626 634 original.content += before_newline.content
627 635 before_newline = None
628 636 if not after_lines and after_newline:
629 637 modified.content += after_newline.content
630 638 after_newline = None
631 639
632 640 lines.append(AttributeDict({
633 641 'original': original,
634 642 'modified': modified,
635 643 }))
636 644
637 645 return lines
638 646
639 647 def get_comments_for(self, version, filename, line_number):
640 648 if hasattr(filename, 'unicode_path'):
641 649 filename = filename.unicode_path
642 650
643 651 if not isinstance(filename, basestring):
644 652 return None
645 653
646 654 line_key = {
647 655 'old': 'o',
648 656 'new': 'n',
649 657 }[version] + str(line_number)
650 658
651 659 if filename in self.comments_store:
652 660 file_comments = self.comments_store[filename]
653 661 if line_key in file_comments:
654 662 return file_comments.pop(line_key)
655 663
656 664 def get_line_tokens(self, line_text, line_number, file=None):
657 665 filenode = None
658 666 filename = None
659 667
660 668 if isinstance(file, basestring):
661 669 filename = file
662 670 elif isinstance(file, FileNode):
663 671 filenode = file
664 672 filename = file.unicode_path
665 673
666 674 if self.highlight_mode == self.HL_REAL and filenode:
667 675 lexer = self._get_lexer_for_filename(filename)
668 676 file_size_allowed = file.size < self.max_file_size_limit
669 677 if line_number and file_size_allowed:
670 678 return self.get_tokenized_filenode_line(
671 679 file, line_number, lexer)
672 680
673 681 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
674 682 lexer = self._get_lexer_for_filename(filename)
675 683 return list(tokenize_string(line_text, lexer))
676 684
677 685 return list(tokenize_string(line_text, plain_text_lexer))
678 686
679 687 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
680 688
681 689 if filenode not in self.highlighted_filenodes:
682 690 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
683 691 self.highlighted_filenodes[filenode] = tokenized_lines
684 692 return self.highlighted_filenodes[filenode][line_number - 1]
685 693
686 694 def action_to_op(self, action):
687 695 return {
688 696 'add': '+',
689 697 'del': '-',
690 698 'unmod': ' ',
691 699 'old-no-nl': ' ',
692 700 'new-no-nl': ' ',
693 701 }.get(action, action)
694 702
695 703 def as_unified(self, lines):
696 704 """
697 705 Return a generator that yields the lines of a diff in unified order
698 706 """
699 707 def generator():
700 708 buf = []
701 709 for line in lines:
702 710
703 711 if buf and not line.original or line.original.action == ' ':
704 712 for b in buf:
705 713 yield b
706 714 buf = []
707 715
708 716 if line.original:
709 717 if line.original.action == ' ':
710 718 yield (line.original.lineno, line.modified.lineno,
711 719 line.original.action, line.original.content,
712 720 line.original.comments)
713 721 continue
714 722
715 723 if line.original.action == '-':
716 724 yield (line.original.lineno, None,
717 725 line.original.action, line.original.content,
718 726 line.original.comments)
719 727
720 728 if line.modified.action == '+':
721 729 buf.append((
722 730 None, line.modified.lineno,
723 731 line.modified.action, line.modified.content,
724 732 line.modified.comments))
725 733 continue
726 734
727 735 if line.modified:
728 736 yield (None, line.modified.lineno,
729 737 line.modified.action, line.modified.content,
730 738 line.modified.comments)
731 739
732 740 for b in buf:
733 741 yield b
734 742
735 743 return generator()
@@ -1,1107 +1,1131 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import re
27 27 import collections
28 28 import difflib
29 29 import logging
30 30
31 31 from itertools import tee, imap
32 32
33 33 from rhodecode.lib.vcs.exceptions import VCSError
34 34 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
35 35 from rhodecode.lib.utils2 import safe_unicode
36 36
37 37 log = logging.getLogger(__name__)
38 38
39 39 # define max context, a file with more than this numbers of lines is unusable
40 40 # in browser anyway
41 41 MAX_CONTEXT = 1024 * 1014
42 42
43 43
44 44 class OPS(object):
45 45 ADD = 'A'
46 46 MOD = 'M'
47 47 DEL = 'D'
48 48
49 49
50 50 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
51 51 """
52 52 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
53 53
54 54 :param ignore_whitespace: ignore whitespaces in diff
55 55 """
56 56 # make sure we pass in default context
57 57 context = context or 3
58 58 # protect against IntOverflow when passing HUGE context
59 59 if context > MAX_CONTEXT:
60 60 context = MAX_CONTEXT
61 61
62 62 submodules = filter(lambda o: isinstance(o, SubModuleNode),
63 63 [filenode_new, filenode_old])
64 64 if submodules:
65 65 return ''
66 66
67 67 for filenode in (filenode_old, filenode_new):
68 68 if not isinstance(filenode, FileNode):
69 69 raise VCSError(
70 70 "Given object should be FileNode object, not %s"
71 71 % filenode.__class__)
72 72
73 73 repo = filenode_new.commit.repository
74 74 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
75 75 new_commit = filenode_new.commit
76 76
77 77 vcs_gitdiff = repo.get_diff(
78 78 old_commit, new_commit, filenode_new.path,
79 79 ignore_whitespace, context, path1=filenode_old.path)
80 80 return vcs_gitdiff
81 81
82 82 NEW_FILENODE = 1
83 83 DEL_FILENODE = 2
84 84 MOD_FILENODE = 3
85 85 RENAMED_FILENODE = 4
86 86 COPIED_FILENODE = 5
87 87 CHMOD_FILENODE = 6
88 88 BIN_FILENODE = 7
89 89
90 90
91 91 class LimitedDiffContainer(object):
92 92
93 93 def __init__(self, diff_limit, cur_diff_size, diff):
94 94 self.diff = diff
95 95 self.diff_limit = diff_limit
96 96 self.cur_diff_size = cur_diff_size
97 97
98 98 def __getitem__(self, key):
99 99 return self.diff.__getitem__(key)
100 100
101 101 def __iter__(self):
102 102 for l in self.diff:
103 103 yield l
104 104
105 105
106 106 class Action(object):
107 107 """
108 108 Contains constants for the action value of the lines in a parsed diff.
109 109 """
110 110
111 111 ADD = 'add'
112 112 DELETE = 'del'
113 113 UNMODIFIED = 'unmod'
114 114
115 115 CONTEXT = 'context'
116 116 OLD_NO_NL = 'old-no-nl'
117 117 NEW_NO_NL = 'new-no-nl'
118 118
119 119
120 120 class DiffProcessor(object):
121 121 """
122 122 Give it a unified or git diff and it returns a list of the files that were
123 123 mentioned in the diff together with a dict of meta information that
124 124 can be used to render it in a HTML template.
125 125
126 126 .. note:: Unicode handling
127 127
128 128 The original diffs are a byte sequence and can contain filenames
129 129 in mixed encodings. This class generally returns `unicode` objects
130 130 since the result is intended for presentation to the user.
131 131
132 132 """
133 133 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
134 134 _newline_marker = re.compile(r'^\\ No newline at end of file')
135 135
136 136 # used for inline highlighter word split
137 137 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
138 138
139 139 # collapse ranges of commits over given number
140 140 _collapse_commits_over = 5
141 141
142 142 def __init__(self, diff, format='gitdiff', diff_limit=None,
143 143 file_limit=None, show_full_diff=True):
144 144 """
145 145 :param diff: A `Diff` object representing a diff from a vcs backend
146 146 :param format: format of diff passed, `udiff` or `gitdiff`
147 147 :param diff_limit: define the size of diff that is considered "big"
148 148 based on that parameter cut off will be triggered, set to None
149 149 to show full diff
150 150 """
151 151 self._diff = diff
152 152 self._format = format
153 153 self.adds = 0
154 154 self.removes = 0
155 155 # calculate diff size
156 156 self.diff_limit = diff_limit
157 157 self.file_limit = file_limit
158 158 self.show_full_diff = show_full_diff
159 159 self.cur_diff_size = 0
160 160 self.parsed = False
161 161 self.parsed_diff = []
162 162
163 163 log.debug('Initialized DiffProcessor with %s mode', format)
164 164 if format == 'gitdiff':
165 165 self.differ = self._highlight_line_difflib
166 166 self._parser = self._parse_gitdiff
167 167 else:
168 168 self.differ = self._highlight_line_udiff
169 169 self._parser = self._new_parse_gitdiff
170 170
171 171 def _copy_iterator(self):
172 172 """
173 173 make a fresh copy of generator, we should not iterate thru
174 174 an original as it's needed for repeating operations on
175 175 this instance of DiffProcessor
176 176 """
177 177 self.__udiff, iterator_copy = tee(self.__udiff)
178 178 return iterator_copy
179 179
180 180 def _escaper(self, string):
181 181 """
182 182 Escaper for diff escapes special chars and checks the diff limit
183 183
184 184 :param string:
185 185 """
186
187 186 self.cur_diff_size += len(string)
188 187
189 188 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
190 189 raise DiffLimitExceeded('Diff Limit Exceeded')
191 190
192 return safe_unicode(string)\
191 return string \
193 192 .replace('&', '&amp;')\
194 193 .replace('<', '&lt;')\
195 194 .replace('>', '&gt;')
196 195
197 196 def _line_counter(self, l):
198 197 """
199 198 Checks each line and bumps total adds/removes for this diff
200 199
201 200 :param l:
202 201 """
203 202 if l.startswith('+') and not l.startswith('+++'):
204 203 self.adds += 1
205 204 elif l.startswith('-') and not l.startswith('---'):
206 205 self.removes += 1
207 206 return safe_unicode(l)
208 207
209 208 def _highlight_line_difflib(self, line, next_):
210 209 """
211 210 Highlight inline changes in both lines.
212 211 """
213 212
214 213 if line['action'] == Action.DELETE:
215 214 old, new = line, next_
216 215 else:
217 216 old, new = next_, line
218 217
219 218 oldwords = self._token_re.split(old['line'])
220 219 newwords = self._token_re.split(new['line'])
221 220 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
222 221
223 222 oldfragments, newfragments = [], []
224 223 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
225 224 oldfrag = ''.join(oldwords[i1:i2])
226 225 newfrag = ''.join(newwords[j1:j2])
227 226 if tag != 'equal':
228 227 if oldfrag:
229 228 oldfrag = '<del>%s</del>' % oldfrag
230 229 if newfrag:
231 230 newfrag = '<ins>%s</ins>' % newfrag
232 231 oldfragments.append(oldfrag)
233 232 newfragments.append(newfrag)
234 233
235 234 old['line'] = "".join(oldfragments)
236 235 new['line'] = "".join(newfragments)
237 236
238 237 def _highlight_line_udiff(self, line, next_):
239 238 """
240 239 Highlight inline changes in both lines.
241 240 """
242 241 start = 0
243 242 limit = min(len(line['line']), len(next_['line']))
244 243 while start < limit and line['line'][start] == next_['line'][start]:
245 244 start += 1
246 245 end = -1
247 246 limit -= start
248 247 while -end <= limit and line['line'][end] == next_['line'][end]:
249 248 end -= 1
250 249 end += 1
251 250 if start or end:
252 251 def do(l):
253 252 last = end + len(l['line'])
254 253 if l['action'] == Action.ADD:
255 254 tag = 'ins'
256 255 else:
257 256 tag = 'del'
258 257 l['line'] = '%s<%s>%s</%s>%s' % (
259 258 l['line'][:start],
260 259 tag,
261 260 l['line'][start:last],
262 261 tag,
263 262 l['line'][last:]
264 263 )
265 264 do(line)
266 265 do(next_)
267 266
268 267 def _clean_line(self, line, command):
269 268 if command in ['+', '-', ' ']:
270 269 # only modify the line if it's actually a diff thing
271 270 line = line[1:]
272 271 return line
273 272
274 273 def _parse_gitdiff(self, inline_diff=True):
275 274 _files = []
276 275 diff_container = lambda arg: arg
277 276
278 277 for chunk in self._diff.chunks():
279 278 head = chunk.header
280 279
281 diff = imap(self._escaper, chunk.diff.splitlines(1))
280 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
282 281 raw_diff = chunk.raw
283 282 limited_diff = False
284 283 exceeds_limit = False
285 284
286 285 op = None
287 286 stats = {
288 287 'added': 0,
289 288 'deleted': 0,
290 289 'binary': False,
291 290 'ops': {},
292 291 }
293 292
294 293 if head['deleted_file_mode']:
295 294 op = OPS.DEL
296 295 stats['binary'] = True
297 296 stats['ops'][DEL_FILENODE] = 'deleted file'
298 297
299 298 elif head['new_file_mode']:
300 299 op = OPS.ADD
301 300 stats['binary'] = True
302 301 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
303 302 else: # modify operation, can be copy, rename or chmod
304 303
305 304 # CHMOD
306 305 if head['new_mode'] and head['old_mode']:
307 306 op = OPS.MOD
308 307 stats['binary'] = True
309 308 stats['ops'][CHMOD_FILENODE] = (
310 309 'modified file chmod %s => %s' % (
311 310 head['old_mode'], head['new_mode']))
312 311 # RENAME
313 312 if head['rename_from'] != head['rename_to']:
314 313 op = OPS.MOD
315 314 stats['binary'] = True
316 315 stats['ops'][RENAMED_FILENODE] = (
317 316 'file renamed from %s to %s' % (
318 317 head['rename_from'], head['rename_to']))
319 318 # COPY
320 319 if head.get('copy_from') and head.get('copy_to'):
321 320 op = OPS.MOD
322 321 stats['binary'] = True
323 322 stats['ops'][COPIED_FILENODE] = (
324 323 'file copied from %s to %s' % (
325 324 head['copy_from'], head['copy_to']))
326 325
327 326 # If our new parsed headers didn't match anything fallback to
328 327 # old style detection
329 328 if op is None:
330 329 if not head['a_file'] and head['b_file']:
331 330 op = OPS.ADD
332 331 stats['binary'] = True
333 332 stats['ops'][NEW_FILENODE] = 'new file'
334 333
335 334 elif head['a_file'] and not head['b_file']:
336 335 op = OPS.DEL
337 336 stats['binary'] = True
338 337 stats['ops'][DEL_FILENODE] = 'deleted file'
339 338
340 339 # it's not ADD not DELETE
341 340 if op is None:
342 341 op = OPS.MOD
343 342 stats['binary'] = True
344 343 stats['ops'][MOD_FILENODE] = 'modified file'
345 344
346 345 # a real non-binary diff
347 346 if head['a_file'] or head['b_file']:
348 347 try:
349 348 raw_diff, chunks, _stats = self._parse_lines(diff)
350 349 stats['binary'] = False
351 350 stats['added'] = _stats[0]
352 351 stats['deleted'] = _stats[1]
353 352 # explicit mark that it's a modified file
354 353 if op == OPS.MOD:
355 354 stats['ops'][MOD_FILENODE] = 'modified file'
356 355 exceeds_limit = len(raw_diff) > self.file_limit
357 356
358 357 # changed from _escaper function so we validate size of
359 358 # each file instead of the whole diff
360 359 # diff will hide big files but still show small ones
361 360 # from my tests, big files are fairly safe to be parsed
362 361 # but the browser is the bottleneck
363 362 if not self.show_full_diff and exceeds_limit:
364 363 raise DiffLimitExceeded('File Limit Exceeded')
365 364
366 365 except DiffLimitExceeded:
367 366 diff_container = lambda _diff: \
368 367 LimitedDiffContainer(
369 368 self.diff_limit, self.cur_diff_size, _diff)
370 369
371 370 exceeds_limit = len(raw_diff) > self.file_limit
372 371 limited_diff = True
373 372 chunks = []
374 373
375 374 else: # GIT format binary patch, or possibly empty diff
376 375 if head['bin_patch']:
377 376 # we have operation already extracted, but we mark simply
378 377 # it's a diff we wont show for binary files
379 378 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
380 379 chunks = []
381 380
382 381 if chunks and not self.show_full_diff and op == OPS.DEL:
383 382 # if not full diff mode show deleted file contents
384 383 # TODO: anderson: if the view is not too big, there is no way
385 384 # to see the content of the file
386 385 chunks = []
387 386
388 387 chunks.insert(0, [{
389 388 'old_lineno': '',
390 389 'new_lineno': '',
391 390 'action': Action.CONTEXT,
392 391 'line': msg,
393 392 } for _op, msg in stats['ops'].iteritems()
394 393 if _op not in [MOD_FILENODE]])
395 394
396 395 _files.append({
397 396 'filename': safe_unicode(head['b_path']),
398 397 'old_revision': head['a_blob_id'],
399 398 'new_revision': head['b_blob_id'],
400 399 'chunks': chunks,
401 400 'raw_diff': safe_unicode(raw_diff),
402 401 'operation': op,
403 402 'stats': stats,
404 403 'exceeds_limit': exceeds_limit,
405 404 'is_limited_diff': limited_diff,
406 405 })
407 406
408 407 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
409 408 OPS.DEL: 2}.get(info['operation'])
410 409
411 410 if not inline_diff:
412 411 return diff_container(sorted(_files, key=sorter))
413 412
414 413 # highlight inline changes
415 414 for diff_data in _files:
416 415 for chunk in diff_data['chunks']:
417 416 lineiter = iter(chunk)
418 417 try:
419 418 while 1:
420 419 line = lineiter.next()
421 420 if line['action'] not in (
422 421 Action.UNMODIFIED, Action.CONTEXT):
423 422 nextline = lineiter.next()
424 423 if nextline['action'] in ['unmod', 'context'] or \
425 424 nextline['action'] == line['action']:
426 425 continue
427 426 self.differ(line, nextline)
428 427 except StopIteration:
429 428 pass
430 429
431 430 return diff_container(sorted(_files, key=sorter))
432 431
433 432 def _check_large_diff(self):
434 433 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
435 434 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
436 435 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
437 436
438 437 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
439 438 def _new_parse_gitdiff(self, inline_diff=True):
440 439 _files = []
441 440
442 441 # this can be overriden later to a LimitedDiffContainer type
443 442 diff_container = lambda arg: arg
444 443
445 444 for chunk in self._diff.chunks():
446 445 head = chunk.header
447 446 log.debug('parsing diff %r' % head)
448 447
449 448 raw_diff = chunk.raw
450 449 limited_diff = False
451 450 exceeds_limit = False
452 451
453 452 op = None
454 453 stats = {
455 454 'added': 0,
456 455 'deleted': 0,
457 456 'binary': False,
458 457 'old_mode': None,
459 458 'new_mode': None,
460 459 'ops': {},
461 460 }
462 461 if head['old_mode']:
463 462 stats['old_mode'] = head['old_mode']
464 463 if head['new_mode']:
465 464 stats['new_mode'] = head['new_mode']
466 465 if head['b_mode']:
467 466 stats['new_mode'] = head['b_mode']
468 467
469 468 # delete file
470 469 if head['deleted_file_mode']:
471 470 op = OPS.DEL
472 471 stats['binary'] = True
473 472 stats['ops'][DEL_FILENODE] = 'deleted file'
474 473
475 474 # new file
476 475 elif head['new_file_mode']:
477 476 op = OPS.ADD
478 477 stats['binary'] = True
479 478 stats['old_mode'] = None
480 479 stats['new_mode'] = head['new_file_mode']
481 480 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
482 481
483 482 # modify operation, can be copy, rename or chmod
484 483 else:
485 484 # CHMOD
486 485 if head['new_mode'] and head['old_mode']:
487 486 op = OPS.MOD
488 487 stats['binary'] = True
489 488 stats['ops'][CHMOD_FILENODE] = (
490 489 'modified file chmod %s => %s' % (
491 490 head['old_mode'], head['new_mode']))
492 491
493 492 # RENAME
494 493 if head['rename_from'] != head['rename_to']:
495 494 op = OPS.MOD
496 495 stats['binary'] = True
497 496 stats['renamed'] = (head['rename_from'], head['rename_to'])
498 497 stats['ops'][RENAMED_FILENODE] = (
499 498 'file renamed from %s to %s' % (
500 499 head['rename_from'], head['rename_to']))
501 500 # COPY
502 501 if head.get('copy_from') and head.get('copy_to'):
503 502 op = OPS.MOD
504 503 stats['binary'] = True
505 504 stats['copied'] = (head['copy_from'], head['copy_to'])
506 505 stats['ops'][COPIED_FILENODE] = (
507 506 'file copied from %s to %s' % (
508 507 head['copy_from'], head['copy_to']))
509 508
510 509 # If our new parsed headers didn't match anything fallback to
511 510 # old style detection
512 511 if op is None:
513 512 if not head['a_file'] and head['b_file']:
514 513 op = OPS.ADD
515 514 stats['binary'] = True
516 515 stats['new_file'] = True
517 516 stats['ops'][NEW_FILENODE] = 'new file'
518 517
519 518 elif head['a_file'] and not head['b_file']:
520 519 op = OPS.DEL
521 520 stats['binary'] = True
522 521 stats['ops'][DEL_FILENODE] = 'deleted file'
523 522
524 523 # it's not ADD not DELETE
525 524 if op is None:
526 525 op = OPS.MOD
527 526 stats['binary'] = True
528 527 stats['ops'][MOD_FILENODE] = 'modified file'
529 528
530 529 # a real non-binary diff
531 530 if head['a_file'] or head['b_file']:
532 diff = iter(chunk.diff.splitlines(1))
531 # simulate splitlines, so we keep the line end part
532 diff = self.diff_splitter(chunk.diff)
533 533
534 534 # append each file to the diff size
535 535 raw_chunk_size = len(raw_diff)
536 536
537 537 exceeds_limit = raw_chunk_size > self.file_limit
538 538 self.cur_diff_size += raw_chunk_size
539 539
540 540 try:
541 541 # Check each file instead of the whole diff.
542 542 # Diff will hide big files but still show small ones.
543 543 # From the tests big files are fairly safe to be parsed
544 544 # but the browser is the bottleneck.
545 545 if not self.show_full_diff and exceeds_limit:
546 546 log.debug('File `%s` exceeds current file_limit of %s',
547 547 safe_unicode(head['b_path']), self.file_limit)
548 548 raise DiffLimitExceeded(
549 549 'File Limit %s Exceeded', self.file_limit)
550 550
551 551 self._check_large_diff()
552 552
553 553 raw_diff, chunks, _stats = self._new_parse_lines(diff)
554 554 stats['binary'] = False
555 555 stats['added'] = _stats[0]
556 556 stats['deleted'] = _stats[1]
557 557 # explicit mark that it's a modified file
558 558 if op == OPS.MOD:
559 559 stats['ops'][MOD_FILENODE] = 'modified file'
560 560
561 561 except DiffLimitExceeded:
562 562 diff_container = lambda _diff: \
563 563 LimitedDiffContainer(
564 564 self.diff_limit, self.cur_diff_size, _diff)
565 565
566 566 limited_diff = True
567 567 chunks = []
568 568
569 569 else: # GIT format binary patch, or possibly empty diff
570 570 if head['bin_patch']:
571 571 # we have operation already extracted, but we mark simply
572 572 # it's a diff we wont show for binary files
573 573 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
574 574 chunks = []
575 575
576 576 # Hide content of deleted node by setting empty chunks
577 577 if chunks and not self.show_full_diff and op == OPS.DEL:
578 578 # if not full diff mode show deleted file contents
579 579 # TODO: anderson: if the view is not too big, there is no way
580 580 # to see the content of the file
581 581 chunks = []
582 582
583 583 chunks.insert(
584 584 0, [{'old_lineno': '',
585 585 'new_lineno': '',
586 586 'action': Action.CONTEXT,
587 587 'line': msg,
588 588 } for _op, msg in stats['ops'].iteritems()
589 589 if _op not in [MOD_FILENODE]])
590 590
591 591 original_filename = safe_unicode(head['a_path'])
592 592 _files.append({
593 593 'original_filename': original_filename,
594 594 'filename': safe_unicode(head['b_path']),
595 595 'old_revision': head['a_blob_id'],
596 596 'new_revision': head['b_blob_id'],
597 597 'chunks': chunks,
598 598 'raw_diff': safe_unicode(raw_diff),
599 599 'operation': op,
600 600 'stats': stats,
601 601 'exceeds_limit': exceeds_limit,
602 602 'is_limited_diff': limited_diff,
603 603 })
604 604
605 605 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
606 606 OPS.DEL: 2}.get(info['operation'])
607 607
608 608 return diff_container(sorted(_files, key=sorter))
609 609
610 610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
611 def _parse_lines(self, diff):
611 def _parse_lines(self, diff_iter):
612 612 """
613 613 Parse the diff an return data for the template.
614 614 """
615 615
616 lineiter = iter(diff)
617 616 stats = [0, 0]
618 617 chunks = []
619 618 raw_diff = []
620 619
621 620 try:
622 line = lineiter.next()
621 line = diff_iter.next()
623 622
624 623 while line:
625 624 raw_diff.append(line)
626 625 lines = []
627 626 chunks.append(lines)
628 627
629 628 match = self._chunk_re.match(line)
630 629
631 630 if not match:
632 631 break
633 632
634 633 gr = match.groups()
635 634 (old_line, old_end,
636 635 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
637 636 old_line -= 1
638 637 new_line -= 1
639 638
640 639 context = len(gr) == 5
641 640 old_end += old_line
642 641 new_end += new_line
643 642
644 643 if context:
645 644 # skip context only if it's first line
646 645 if int(gr[0]) > 1:
647 646 lines.append({
648 647 'old_lineno': '...',
649 648 'new_lineno': '...',
650 649 'action': Action.CONTEXT,
651 650 'line': line,
652 651 })
653 652
654 line = lineiter.next()
653 line = diff_iter.next()
655 654
656 655 while old_line < old_end or new_line < new_end:
657 656 command = ' '
658 657 if line:
659 658 command = line[0]
660 659
661 660 affects_old = affects_new = False
662 661
663 662 # ignore those if we don't expect them
664 663 if command in '#@':
665 664 continue
666 665 elif command == '+':
667 666 affects_new = True
668 667 action = Action.ADD
669 668 stats[0] += 1
670 669 elif command == '-':
671 670 affects_old = True
672 671 action = Action.DELETE
673 672 stats[1] += 1
674 673 else:
675 674 affects_old = affects_new = True
676 675 action = Action.UNMODIFIED
677 676
678 677 if not self._newline_marker.match(line):
679 678 old_line += affects_old
680 679 new_line += affects_new
681 680 lines.append({
682 681 'old_lineno': affects_old and old_line or '',
683 682 'new_lineno': affects_new and new_line or '',
684 683 'action': action,
685 684 'line': self._clean_line(line, command)
686 685 })
687 686 raw_diff.append(line)
688 687
689 line = lineiter.next()
688 line = diff_iter.next()
690 689
691 690 if self._newline_marker.match(line):
692 691 # we need to append to lines, since this is not
693 692 # counted in the line specs of diff
694 693 lines.append({
695 694 'old_lineno': '...',
696 695 'new_lineno': '...',
697 696 'action': Action.CONTEXT,
698 697 'line': self._clean_line(line, command)
699 698 })
700 699
701 700 except StopIteration:
702 701 pass
703 702 return ''.join(raw_diff), chunks, stats
704 703
705 704 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
706 705 def _new_parse_lines(self, diff_iter):
707 706 """
708 707 Parse the diff an return data for the template.
709 708 """
710 709
711 710 stats = [0, 0]
712 711 chunks = []
713 712 raw_diff = []
714 713
715 diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
716
717 714 try:
718 715 line = diff_iter.next()
719 716
720 717 while line:
721 718 raw_diff.append(line)
719 # match header e.g @@ -0,0 +1 @@\n'
722 720 match = self._chunk_re.match(line)
723 721
724 722 if not match:
725 723 break
726 724
727 725 gr = match.groups()
728 726 (old_line, old_end,
729 727 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730 728
731 729 lines = []
732 730 hunk = {
733 731 'section_header': gr[-1],
734 732 'source_start': old_line,
735 733 'source_length': old_end,
736 734 'target_start': new_line,
737 735 'target_length': new_end,
738 736 'lines': lines,
739 737 }
740 738 chunks.append(hunk)
741 739
742 740 old_line -= 1
743 741 new_line -= 1
744 742
745 743 context = len(gr) == 5
746 744 old_end += old_line
747 745 new_end += new_line
748 746
749 747 line = diff_iter.next()
750 748
751 749 while old_line < old_end or new_line < new_end:
752 750 command = ' '
753 751 if line:
754 752 command = line[0]
755 753
756 754 affects_old = affects_new = False
757 755
758 756 # ignore those if we don't expect them
759 757 if command in '#@':
760 758 continue
761 759 elif command == '+':
762 760 affects_new = True
763 761 action = Action.ADD
764 762 stats[0] += 1
765 763 elif command == '-':
766 764 affects_old = True
767 765 action = Action.DELETE
768 766 stats[1] += 1
769 767 else:
770 768 affects_old = affects_new = True
771 769 action = Action.UNMODIFIED
772 770
773 771 if not self._newline_marker.match(line):
774 772 old_line += affects_old
775 773 new_line += affects_new
776 774 lines.append({
777 775 'old_lineno': affects_old and old_line or '',
778 776 'new_lineno': affects_new and new_line or '',
779 777 'action': action,
780 778 'line': self._clean_line(line, command)
781 779 })
782 780 raw_diff.append(line)
783 781
784 782 line = diff_iter.next()
785 783
786 784 if self._newline_marker.match(line):
787 785 # we need to append to lines, since this is not
788 786 # counted in the line specs of diff
789 787 if affects_old:
790 788 action = Action.OLD_NO_NL
791 789 elif affects_new:
792 790 action = Action.NEW_NO_NL
793 791 else:
794 792 raise Exception('invalid context for no newline')
795 793
796 794 lines.append({
797 795 'old_lineno': None,
798 796 'new_lineno': None,
799 797 'action': action,
800 798 'line': self._clean_line(line, command)
801 799 })
802 800
803 801 except StopIteration:
804 802 pass
805 803
806 804 return ''.join(raw_diff), chunks, stats
807 805
808 806 def _safe_id(self, idstring):
809 807 """Make a string safe for including in an id attribute.
810 808
811 809 The HTML spec says that id attributes 'must begin with
812 810 a letter ([A-Za-z]) and may be followed by any number
813 811 of letters, digits ([0-9]), hyphens ("-"), underscores
814 812 ("_"), colons (":"), and periods (".")'. These regexps
815 813 are slightly over-zealous, in that they remove colons
816 814 and periods unnecessarily.
817 815
818 816 Whitespace is transformed into underscores, and then
819 817 anything which is not a hyphen or a character that
820 818 matches \w (alphanumerics and underscore) is removed.
821 819
822 820 """
823 821 # Transform all whitespace to underscore
824 822 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 823 # Remove everything that is not a hyphen or a member of \w
826 824 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 825 return idstring
828 826
827 @classmethod
828 def diff_splitter(cls, string):
829 """
830 Diff split that emulates .splitlines() but works only on \n
831 """
832 if not string:
833 return
834 elif string == '\n':
835 yield u'\n'
836 else:
837
838 has_newline = string.endswith('\n')
839 elements = string.split('\n')
840 if has_newline:
841 # skip last element as it's empty string from newlines
842 elements = elements[:-1]
843
844 len_elements = len(elements)
845
846 for cnt, line in enumerate(elements, start=1):
847 last_line = cnt == len_elements
848 if last_line and not has_newline:
849 yield safe_unicode(line)
850 else:
851 yield safe_unicode(line) + '\n'
852
829 853 def prepare(self, inline_diff=True):
830 854 """
831 855 Prepare the passed udiff for HTML rendering.
832 856
833 857 :return: A list of dicts with diff information.
834 858 """
835 859 parsed = self._parser(inline_diff=inline_diff)
836 860 self.parsed = True
837 861 self.parsed_diff = parsed
838 862 return parsed
839 863
840 864 def as_raw(self, diff_lines=None):
841 865 """
842 866 Returns raw diff as a byte string
843 867 """
844 868 return self._diff.raw
845 869
846 870 def as_html(self, table_class='code-difftable', line_class='line',
847 871 old_lineno_class='lineno old', new_lineno_class='lineno new',
848 872 code_class='code', enable_comments=False, parsed_lines=None):
849 873 """
850 874 Return given diff as html table with customized css classes
851 875 """
852 876 # TODO(marcink): not sure how to pass in translator
853 877 # here in an efficient way, leave the _ for proper gettext extraction
854 878 _ = lambda s: s
855 879
856 880 def _link_to_if(condition, label, url):
857 881 """
858 882 Generates a link if condition is meet or just the label if not.
859 883 """
860 884
861 885 if condition:
862 886 return '''<a href="%(url)s" class="tooltip"
863 887 title="%(title)s">%(label)s</a>''' % {
864 888 'title': _('Click to select line'),
865 889 'url': url,
866 890 'label': label
867 891 }
868 892 else:
869 893 return label
870 894 if not self.parsed:
871 895 self.prepare()
872 896
873 897 diff_lines = self.parsed_diff
874 898 if parsed_lines:
875 899 diff_lines = parsed_lines
876 900
877 901 _html_empty = True
878 902 _html = []
879 903 _html.append('''<table class="%(table_class)s">\n''' % {
880 904 'table_class': table_class
881 905 })
882 906
883 907 for diff in diff_lines:
884 908 for line in diff['chunks']:
885 909 _html_empty = False
886 910 for change in line:
887 911 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
888 912 'lc': line_class,
889 913 'action': change['action']
890 914 })
891 915 anchor_old_id = ''
892 916 anchor_new_id = ''
893 917 anchor_old = "%(filename)s_o%(oldline_no)s" % {
894 918 'filename': self._safe_id(diff['filename']),
895 919 'oldline_no': change['old_lineno']
896 920 }
897 921 anchor_new = "%(filename)s_n%(oldline_no)s" % {
898 922 'filename': self._safe_id(diff['filename']),
899 923 'oldline_no': change['new_lineno']
900 924 }
901 925 cond_old = (change['old_lineno'] != '...' and
902 926 change['old_lineno'])
903 927 cond_new = (change['new_lineno'] != '...' and
904 928 change['new_lineno'])
905 929 if cond_old:
906 930 anchor_old_id = 'id="%s"' % anchor_old
907 931 if cond_new:
908 932 anchor_new_id = 'id="%s"' % anchor_new
909 933
910 934 if change['action'] != Action.CONTEXT:
911 935 anchor_link = True
912 936 else:
913 937 anchor_link = False
914 938
915 939 ###########################################################
916 940 # COMMENT ICONS
917 941 ###########################################################
918 942 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
919 943
920 944 if enable_comments and change['action'] != Action.CONTEXT:
921 945 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
922 946
923 947 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
924 948
925 949 ###########################################################
926 950 # OLD LINE NUMBER
927 951 ###########################################################
928 952 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
929 953 'a_id': anchor_old_id,
930 954 'olc': old_lineno_class
931 955 })
932 956
933 957 _html.append('''%(link)s''' % {
934 958 'link': _link_to_if(anchor_link, change['old_lineno'],
935 959 '#%s' % anchor_old)
936 960 })
937 961 _html.append('''</td>\n''')
938 962 ###########################################################
939 963 # NEW LINE NUMBER
940 964 ###########################################################
941 965
942 966 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
943 967 'a_id': anchor_new_id,
944 968 'nlc': new_lineno_class
945 969 })
946 970
947 971 _html.append('''%(link)s''' % {
948 972 'link': _link_to_if(anchor_link, change['new_lineno'],
949 973 '#%s' % anchor_new)
950 974 })
951 975 _html.append('''</td>\n''')
952 976 ###########################################################
953 977 # CODE
954 978 ###########################################################
955 979 code_classes = [code_class]
956 980 if (not enable_comments or
957 981 change['action'] == Action.CONTEXT):
958 982 code_classes.append('no-comment')
959 983 _html.append('\t<td class="%s">' % ' '.join(code_classes))
960 984 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
961 985 'code': change['line']
962 986 })
963 987
964 988 _html.append('''\t</td>''')
965 989 _html.append('''\n</tr>\n''')
966 990 _html.append('''</table>''')
967 991 if _html_empty:
968 992 return None
969 993 return ''.join(_html)
970 994
971 995 def stat(self):
972 996 """
973 997 Returns tuple of added, and removed lines for this instance
974 998 """
975 999 return self.adds, self.removes
976 1000
977 1001 def get_context_of_line(
978 1002 self, path, diff_line=None, context_before=3, context_after=3):
979 1003 """
980 1004 Returns the context lines for the specified diff line.
981 1005
982 1006 :type diff_line: :class:`DiffLineNumber`
983 1007 """
984 1008 assert self.parsed, "DiffProcessor is not initialized."
985 1009
986 1010 if None not in diff_line:
987 1011 raise ValueError(
988 1012 "Cannot specify both line numbers: {}".format(diff_line))
989 1013
990 1014 file_diff = self._get_file_diff(path)
991 1015 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
992 1016
993 1017 first_line_to_include = max(idx - context_before, 0)
994 1018 first_line_after_context = idx + context_after + 1
995 1019 context_lines = chunk[first_line_to_include:first_line_after_context]
996 1020
997 1021 line_contents = [
998 1022 _context_line(line) for line in context_lines
999 1023 if _is_diff_content(line)]
1000 1024 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1001 1025 # Once they are fixed, we can drop this line here.
1002 1026 if line_contents:
1003 1027 line_contents[-1] = (
1004 1028 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1005 1029 return line_contents
1006 1030
1007 1031 def find_context(self, path, context, offset=0):
1008 1032 """
1009 1033 Finds the given `context` inside of the diff.
1010 1034
1011 1035 Use the parameter `offset` to specify which offset the target line has
1012 1036 inside of the given `context`. This way the correct diff line will be
1013 1037 returned.
1014 1038
1015 1039 :param offset: Shall be used to specify the offset of the main line
1016 1040 within the given `context`.
1017 1041 """
1018 1042 if offset < 0 or offset >= len(context):
1019 1043 raise ValueError(
1020 1044 "Only positive values up to the length of the context "
1021 1045 "minus one are allowed.")
1022 1046
1023 1047 matches = []
1024 1048 file_diff = self._get_file_diff(path)
1025 1049
1026 1050 for chunk in file_diff['chunks']:
1027 1051 context_iter = iter(context)
1028 1052 for line_idx, line in enumerate(chunk):
1029 1053 try:
1030 1054 if _context_line(line) == context_iter.next():
1031 1055 continue
1032 1056 except StopIteration:
1033 1057 matches.append((line_idx, chunk))
1034 1058 context_iter = iter(context)
1035 1059
1036 1060 # Increment position and triger StopIteration
1037 1061 # if we had a match at the end
1038 1062 line_idx += 1
1039 1063 try:
1040 1064 context_iter.next()
1041 1065 except StopIteration:
1042 1066 matches.append((line_idx, chunk))
1043 1067
1044 1068 effective_offset = len(context) - offset
1045 1069 found_at_diff_lines = [
1046 1070 _line_to_diff_line_number(chunk[idx - effective_offset])
1047 1071 for idx, chunk in matches]
1048 1072
1049 1073 return found_at_diff_lines
1050 1074
1051 1075 def _get_file_diff(self, path):
1052 1076 for file_diff in self.parsed_diff:
1053 1077 if file_diff['filename'] == path:
1054 1078 break
1055 1079 else:
1056 1080 raise FileNotInDiffException("File {} not in diff".format(path))
1057 1081 return file_diff
1058 1082
1059 1083 def _find_chunk_line_index(self, file_diff, diff_line):
1060 1084 for chunk in file_diff['chunks']:
1061 1085 for idx, line in enumerate(chunk):
1062 1086 if line['old_lineno'] == diff_line.old:
1063 1087 return chunk, idx
1064 1088 if line['new_lineno'] == diff_line.new:
1065 1089 return chunk, idx
1066 1090 raise LineNotInDiffException(
1067 1091 "The line {} is not part of the diff.".format(diff_line))
1068 1092
1069 1093
1070 1094 def _is_diff_content(line):
1071 1095 return line['action'] in (
1072 1096 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1073 1097
1074 1098
1075 1099 def _context_line(line):
1076 1100 return (line['action'], line['line'])
1077 1101
1078 1102
1079 1103 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1080 1104
1081 1105
1082 1106 def _line_to_diff_line_number(line):
1083 1107 new_line_no = line['new_lineno'] or None
1084 1108 old_line_no = line['old_lineno'] or None
1085 1109 return DiffLineNumber(old=old_line_no, new=new_line_no)
1086 1110
1087 1111
1088 1112 class FileNotInDiffException(Exception):
1089 1113 """
1090 1114 Raised when the context for a missing file is requested.
1091 1115
1092 1116 If you request the context for a line in a file which is not part of the
1093 1117 given diff, then this exception is raised.
1094 1118 """
1095 1119
1096 1120
1097 1121 class LineNotInDiffException(Exception):
1098 1122 """
1099 1123 Raised when the context for a missing line is requested.
1100 1124
1101 1125 If you request the context for a line in a file and this line is not
1102 1126 part of the given diff, then this exception is raised.
1103 1127 """
1104 1128
1105 1129
1106 1130 class DiffLimitExceeded(Exception):
1107 1131 pass
@@ -1,331 +1,311 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2016-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import pytest
22 22 from pygments.lexers import get_lexer_by_name
23 23
24 24 from rhodecode.tests import no_newline_id_generator
25 25 from rhodecode.lib.codeblocks import (
26 26 tokenize_string, split_token_stream, rollup_tokenstream,
27 27 render_tokenstream)
28 28
29 29
30 30 class TestTokenizeString(object):
31 31
32 32 python_code = '''
33 33 import this
34 34
35 35 var = 6
36 36 print "this"
37 37
38 38 '''
39 39
40 40 def test_tokenize_as_python(self):
41 41 lexer = get_lexer_by_name('python')
42 42 tokens = list(tokenize_string(self.python_code, lexer))
43 43
44 44 assert tokens == [
45 45 ('', u'\n'),
46 46 ('', u' '),
47 47 ('kn', u'import'),
48 48 ('', u' '),
49 49 ('nn', u'this'),
50 50 ('', u'\n'),
51 51 ('', u'\n'),
52 52 ('', u' '),
53 53 ('n', u'var'),
54 54 ('', u' '),
55 55 ('o', u'='),
56 56 ('', u' '),
57 57 ('mi', u'6'),
58 58 ('', u'\n'),
59 59 ('', u' '),
60 60 ('k', u'print'),
61 61 ('', u' '),
62 62 ('s2', u'"'),
63 63 ('s2', u'this'),
64 64 ('s2', u'"'),
65 65 ('', u'\n'),
66 66 ('', u'\n'),
67 67 ('', u' ')
68 68 ]
69 69
70 70 def test_tokenize_as_text(self):
71 71 lexer = get_lexer_by_name('text')
72 72 tokens = list(tokenize_string(self.python_code, lexer))
73 73
74 74 assert tokens == [
75 75 ('',
76 76 u'\n import this\n\n var = 6\n print "this"\n\n ')
77 77 ]
78 78
79 79
80 80 class TestSplitTokenStream(object):
81 81
82 82 def test_split_token_stream(self):
83 83 lines = list(split_token_stream(
84 84 [('type1', 'some\ntext'), ('type2', 'more\n')]))
85 85
86 86 assert lines == [
87 87 [('type1', u'some')],
88 88 [('type1', u'text'), ('type2', u'more')],
89 89 [('type2', u'')],
90 90 ]
91 91
92 def test_split_token_stream_other_char(self):
93 lines = list(split_token_stream(
94 [('type1', 'some\ntext'), ('type2', 'more\n')],
95 split_string='m'))
96
97 assert lines == [
98 [('type1', 'so')],
99 [('type1', 'e\ntext'), ('type2', '')],
100 [('type2', 'ore\n')],
101 ]
102
103 def test_split_token_stream_without_char(self):
104 lines = list(split_token_stream(
105 [('type1', 'some\ntext'), ('type2', 'more\n')],
106 split_string='z'))
107
108 assert lines == [
109 [('type1', 'some\ntext'), ('type2', 'more\n')]
110 ]
111
112 92 def test_split_token_stream_single(self):
113 93 lines = list(split_token_stream(
114 [('type1', '\n')], split_string='\n'))
94 [('type1', '\n')]))
115 95
116 96 assert lines == [
117 97 [('type1', '')],
118 98 [('type1', '')],
119 99 ]
120 100
121 101 def test_split_token_stream_single_repeat(self):
122 102 lines = list(split_token_stream(
123 [('type1', '\n\n\n')], split_string='\n'))
103 [('type1', '\n\n\n')]))
124 104
125 105 assert lines == [
126 106 [('type1', '')],
127 107 [('type1', '')],
128 108 [('type1', '')],
129 109 [('type1', '')],
130 110 ]
131 111
132 112 def test_split_token_stream_multiple_repeat(self):
133 113 lines = list(split_token_stream(
134 [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
114 [('type1', '\n\n'), ('type2', '\n\n')]))
135 115
136 116 assert lines == [
137 117 [('type1', '')],
138 118 [('type1', '')],
139 119 [('type1', ''), ('type2', '')],
140 120 [('type2', '')],
141 121 [('type2', '')],
142 122 ]
143 123
144 124
145 125 class TestRollupTokens(object):
146 126
147 127 @pytest.mark.parametrize('tokenstream,output', [
148 128 ([],
149 129 []),
150 130 ([('A', 'hell'), ('A', 'o')], [
151 131 ('A', [
152 132 ('', 'hello')]),
153 133 ]),
154 134 ([('A', 'hell'), ('B', 'o')], [
155 135 ('A', [
156 136 ('', 'hell')]),
157 137 ('B', [
158 138 ('', 'o')]),
159 139 ]),
160 140 ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
161 141 ('A', [
162 142 ('', 'hello')]),
163 143 ('B', [
164 144 ('', ' ')]),
165 145 ('A', [
166 146 ('', 'there')]),
167 147 ]),
168 148 ])
169 149 def test_rollup_tokenstream_without_ops(self, tokenstream, output):
170 150 assert list(rollup_tokenstream(tokenstream)) == output
171 151
172 152 @pytest.mark.parametrize('tokenstream,output', [
173 153 ([],
174 154 []),
175 155 ([('A', '', 'hell'), ('A', '', 'o')], [
176 156 ('A', [
177 157 ('', 'hello')]),
178 158 ]),
179 159 ([('A', '', 'hell'), ('B', '', 'o')], [
180 160 ('A', [
181 161 ('', 'hell')]),
182 162 ('B', [
183 163 ('', 'o')]),
184 164 ]),
185 165 ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
186 166 ('A', [
187 167 ('', 'h')]),
188 168 ('B', [
189 169 ('', 'e')]),
190 170 ('C', [
191 171 ('', 'y')]),
192 172 ]),
193 173 ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
194 174 ('A', [
195 175 ('', 'he')]),
196 176 ('C', [
197 177 ('', 'y')]),
198 178 ]),
199 179 ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
200 180 ('A', [
201 181 ('ins', 'he')
202 182 ]),
203 183 ]),
204 184 ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
205 185 ('A', [
206 186 ('ins', 'h'),
207 187 ('del', 'e')
208 188 ]),
209 189 ]),
210 190 ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
211 191 ('A', [
212 192 ('ins', 'h'),
213 193 ]),
214 194 ('B', [
215 195 ('del', 'ey'),
216 196 ]),
217 197 ]),
218 198 ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
219 199 ('A', [
220 200 ('ins', 'h'),
221 201 ('del', 'e'),
222 202 ]),
223 203 ('B', [
224 204 ('del', 'y'),
225 205 ]),
226 206 ]),
227 207 ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
228 208 ('A', [
229 209 ('', 'some'),
230 210 ('ins', 'new'),
231 211 ('', 'name'),
232 212 ]),
233 213 ]),
234 214 ])
235 215 def test_rollup_tokenstream_with_ops(self, tokenstream, output):
236 216 assert list(rollup_tokenstream(tokenstream)) == output
237 217
238 218
239 219 class TestRenderTokenStream(object):
240 220
241 221 @pytest.mark.parametrize('tokenstream,output', [
242 222 (
243 223 [],
244 224 '',
245 225 ),
246 226 (
247 227 [('', '', u'')],
248 228 '<span></span>',
249 229 ),
250 230 (
251 231 [('', '', u'text')],
252 232 '<span>text</span>',
253 233 ),
254 234 (
255 235 [('A', '', u'')],
256 236 '<span class="A"></span>',
257 237 ),
258 238 (
259 239 [('A', '', u'hello')],
260 240 '<span class="A">hello</span>',
261 241 ),
262 242 (
263 243 [('A', '', u'hel'), ('A', '', u'lo')],
264 244 '<span class="A">hello</span>',
265 245 ),
266 246 (
267 247 [('A', '', u'two\n'), ('A', '', u'lines')],
268 248 '<span class="A">two\nlines</span>',
269 249 ),
270 250 (
271 251 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
272 252 '<span class="A">\nthree\nlines</span>',
273 253 ),
274 254 (
275 255 [('', '', u'\n'), ('A', '', u'line')],
276 256 '<span>\n</span><span class="A">line</span>',
277 257 ),
278 258 (
279 259 [('', 'ins', u'\n'), ('A', '', u'line')],
280 260 '<span><ins>\n</ins></span><span class="A">line</span>',
281 261 ),
282 262 (
283 263 [('A', '', u'hel'), ('A', 'ins', u'lo')],
284 264 '<span class="A">hel<ins>lo</ins></span>',
285 265 ),
286 266 (
287 267 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
288 268 '<span class="A">hel<ins>lo</ins></span>',
289 269 ),
290 270 (
291 271 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
292 272 '<span class="A">hel<ins>l</ins><del>o</del></span>',
293 273 ),
294 274 (
295 275 [('A', '', u'hel'), ('B', '', u'lo')],
296 276 '<span class="A">hel</span><span class="B">lo</span>',
297 277 ),
298 278 (
299 279 [('A', '', u'hel'), ('B', 'ins', u'lo')],
300 280 '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
301 281 ),
302 282 ], ids=no_newline_id_generator)
303 283 def test_render_tokenstream_with_ops(self, tokenstream, output):
304 284 html = render_tokenstream(tokenstream)
305 285 assert html == output
306 286
307 287 @pytest.mark.parametrize('tokenstream,output', [
308 288 (
309 289 [('A', u'hel'), ('A', u'lo')],
310 290 '<span class="A">hello</span>',
311 291 ),
312 292 (
313 293 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
314 294 '<span class="A">hello</span>',
315 295 ),
316 296 (
317 297 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
318 298 '<span class="A">hello</span>',
319 299 ),
320 300 (
321 301 [('A', u'hel'), ('B', u'lo')],
322 302 '<span class="A">hel</span><span class="B">lo</span>',
323 303 ),
324 304 (
325 305 [('A', u'hel'), ('B', u'lo')],
326 306 '<span class="A">hel</span><span class="B">lo</span>',
327 307 ),
328 308 ])
329 309 def test_render_tokenstream_without_ops(self, tokenstream, output):
330 310 html = render_tokenstream(tokenstream)
331 311 assert html == output
@@ -1,813 +1,831 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import textwrap
22 22
23 23 import pytest
24 24
25 25 from rhodecode.lib.diffs import (
26 26 DiffProcessor,
27 27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
28 28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
29 from rhodecode.tests.fixture import Fixture
29 from rhodecode.tests.fixture import Fixture, no_newline_id_generator
30 30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
31 31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
32 32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
33 33
34 34 fixture = Fixture()
35 35
36 36
37 37 def test_diffprocessor_as_html_with_comments():
38 38 raw_diff = textwrap.dedent('''
39 39 diff --git a/setup.py b/setup.py
40 40 index 5b36422..cfd698e 100755
41 41 --- a/setup.py
42 42 +++ b/setup.py
43 43 @@ -2,7 +2,7 @@
44 44 #!/usr/bin/python
45 45 # Setup file for X
46 46 # Copyright (C) No one
47 47 -
48 48 +x
49 49 try:
50 50 from setuptools import setup, Extension
51 51 except ImportError:
52 52 ''')
53 53 diff = GitDiff(raw_diff)
54 54 processor = DiffProcessor(diff)
55 55 processor.prepare()
56 56
57 57 # Note that the cell with the context in line 5 (in the html) has the
58 58 # no-comment class, which will prevent the add comment icon to be displayed.
59 59 expected_html = textwrap.dedent('''
60 60 <table class="code-difftable">
61 61 <tr class="line context">
62 62 <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
63 63 <td class="lineno old">...</td>
64 64 <td class="lineno new">...</td>
65 65 <td class="code no-comment">
66 66 <pre>@@ -2,7 +2,7 @@
67 67 </pre>
68 68 </td>
69 69 </tr>
70 70 <tr class="line unmod">
71 71 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
72 72 <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
73 73 title="Click to select line">2</a></td>
74 74 <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
75 75 title="Click to select line">2</a></td>
76 76 <td class="code">
77 77 <pre>#!/usr/bin/python
78 78 </pre>
79 79 </td>
80 80 </tr>
81 81 <tr class="line unmod">
82 82 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
83 83 <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
84 84 title="Click to select line">3</a></td>
85 85 <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
86 86 title="Click to select line">3</a></td>
87 87 <td class="code">
88 88 <pre># Setup file for X
89 89 </pre>
90 90 </td>
91 91 </tr>
92 92 <tr class="line unmod">
93 93 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
94 94 <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
95 95 title="Click to select line">4</a></td>
96 96 <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
97 97 title="Click to select line">4</a></td>
98 98 <td class="code">
99 99 <pre># Copyright (C) No one
100 100 </pre>
101 101 </td>
102 102 </tr>
103 103 <tr class="line del">
104 104 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
105 105 <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
106 106 title="Click to select line">5</a></td>
107 107 <td class="lineno new"><a href="#setuppy_n" class="tooltip"
108 108 title="Click to select line"></a></td>
109 109 <td class="code">
110 110 <pre>
111 111 </pre>
112 112 </td>
113 113 </tr>
114 114 <tr class="line add">
115 115 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
116 116 <td class="lineno old"><a href="#setuppy_o" class="tooltip"
117 117 title="Click to select line"></a></td>
118 118 <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
119 119 title="Click to select line">5</a></td>
120 120 <td class="code">
121 121 <pre><ins>x</ins>
122 122 </pre>
123 123 </td>
124 124 </tr>
125 125 <tr class="line unmod">
126 126 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
127 127 <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
128 128 title="Click to select line">6</a></td>
129 129 <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
130 130 title="Click to select line">6</a></td>
131 131 <td class="code">
132 132 <pre>try:
133 133 </pre>
134 134 </td>
135 135 </tr>
136 136 <tr class="line unmod">
137 137 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
138 138 <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
139 139 title="Click to select line">7</a></td>
140 140 <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
141 141 title="Click to select line">7</a></td>
142 142 <td class="code">
143 143 <pre> from setuptools import setup, Extension
144 144 </pre>
145 145 </td>
146 146 </tr>
147 147 <tr class="line unmod">
148 148 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
149 149 <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
150 150 title="Click to select line">8</a></td>
151 151 <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
152 152 title="Click to select line">8</a></td>
153 153 <td class="code">
154 154 <pre>except ImportError:
155 155 </pre>
156 156 </td>
157 157 </tr>
158 158 </table>
159 159 ''').strip()
160 160 html = processor.as_html(enable_comments=True).replace('\t', ' ')
161 161
162 162 assert html == expected_html
163 163
164 164
165 class TestMixedFilenameEncodings:
165 class TestMixedFilenameEncodings(object):
166 166
167 167 @pytest.fixture(scope="class")
168 168 def raw_diff(self):
169 169 return fixture.load_resource(
170 170 'hg_diff_mixed_filename_encodings.diff')
171 171
172 172 @pytest.fixture
173 173 def processor(self, raw_diff):
174 174 diff = MercurialDiff(raw_diff)
175 175 processor = DiffProcessor(diff)
176 176 return processor
177 177
178 178 def test_filenames_are_decoded_to_unicode(self, processor):
179 179 diff_data = processor.prepare()
180 180 filenames = [item['filename'] for item in diff_data]
181 181 assert filenames == [
182 182 u'spΓ€cial-utf8.txt', u'spοΏ½cial-cp1252.txt', u'spοΏ½cial-latin1.txt']
183 183
184 184 def test_raw_diff_is_decoded_to_unicode(self, processor):
185 185 diff_data = processor.prepare()
186 186 raw_diffs = [item['raw_diff'] for item in diff_data]
187 187 new_file_message = u'\nnew file mode 100644\n'
188 188 expected_raw_diffs = [
189 189 u' a/spΓ€cial-utf8.txt b/spΓ€cial-utf8.txt' + new_file_message,
190 190 u' a/spοΏ½cial-cp1252.txt b/spοΏ½cial-cp1252.txt' + new_file_message,
191 191 u' a/spοΏ½cial-latin1.txt b/spοΏ½cial-latin1.txt' + new_file_message]
192 192 assert raw_diffs == expected_raw_diffs
193 193
194 194 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
195 195 assert processor.as_raw() == raw_diff
196 196
197 197
198 198 # TODO: mikhail: format the following data structure properly
199 199 DIFF_FIXTURES = [
200 200 ('hg',
201 201 'hg_diff_add_single_binary_file.diff',
202 202 [('US Warszawa.jpg', 'A',
203 203 {'added': 0,
204 204 'deleted': 0,
205 205 'binary': True,
206 206 'ops': {NEW_FILENODE: 'new file 100755',
207 207 BIN_FILENODE: 'binary diff hidden'}}),
208 208 ]),
209 209 ('hg',
210 210 'hg_diff_mod_single_binary_file.diff',
211 211 [('US Warszawa.jpg', 'M',
212 212 {'added': 0,
213 213 'deleted': 0,
214 214 'binary': True,
215 215 'ops': {MOD_FILENODE: 'modified file',
216 216 BIN_FILENODE: 'binary diff hidden'}}),
217 217 ]),
218 218 ('hg',
219 219 'hg_diff_mod_single_file_and_rename_and_chmod.diff',
220 220 [('README', 'M',
221 221 {'added': 3,
222 222 'deleted': 0,
223 223 'binary': False,
224 224 'ops': {MOD_FILENODE: 'modified file',
225 225 RENAMED_FILENODE: 'file renamed from README.rst to README',
226 226 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
227 227 ]),
228 228 ('hg',
229 229 'hg_diff_no_newline.diff',
230 230 [('server.properties', 'M',
231 231 {'added': 2,
232 232 'deleted': 1,
233 233 'binary': False,
234 234 'ops': {MOD_FILENODE: 'modified file'}}),
235 235 ]),
236 236 ('hg',
237 237 'hg_diff_mod_file_and_rename.diff',
238 238 [('README.rst', 'M',
239 239 {'added': 3,
240 240 'deleted': 0,
241 241 'binary': False,
242 242 'ops': {MOD_FILENODE: 'modified file',
243 243 RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
244 244 ]),
245 245 ('hg',
246 246 'hg_diff_del_single_binary_file.diff',
247 247 [('US Warszawa.jpg', 'D',
248 248 {'added': 0,
249 249 'deleted': 0,
250 250 'binary': True,
251 251 'ops': {DEL_FILENODE: 'deleted file',
252 252 BIN_FILENODE: 'binary diff hidden'}}),
253 253 ]),
254 254 ('hg',
255 255 'hg_diff_chmod_and_mod_single_binary_file.diff',
256 256 [('gravatar.png', 'M',
257 257 {'added': 0,
258 258 'deleted': 0,
259 259 'binary': True,
260 260 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
261 261 BIN_FILENODE: 'binary diff hidden'}}),
262 262 ]),
263 263 ('hg',
264 264 'hg_diff_chmod.diff',
265 265 [('file', 'M',
266 266 {'added': 0,
267 267 'deleted': 0,
268 268 'binary': True,
269 269 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
270 270 ]),
271 271 ('hg',
272 272 'hg_diff_rename_file.diff',
273 273 [('file_renamed', 'M',
274 274 {'added': 0,
275 275 'deleted': 0,
276 276 'binary': True,
277 277 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
278 278 ]),
279 279 ('hg',
280 280 'hg_diff_rename_and_chmod_file.diff',
281 281 [('README', 'M',
282 282 {'added': 0,
283 283 'deleted': 0,
284 284 'binary': True,
285 285 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
286 286 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
287 287 ]),
288 288 ('hg',
289 289 'hg_diff_binary_and_normal.diff',
290 290 [('img/baseline-10px.png', 'A',
291 291 {'added': 0,
292 292 'deleted': 0,
293 293 'binary': True,
294 294 'ops': {NEW_FILENODE: 'new file 100644',
295 295 BIN_FILENODE: 'binary diff hidden'}}),
296 296 ('js/jquery/hashgrid.js', 'A',
297 297 {'added': 340,
298 298 'deleted': 0,
299 299 'binary': False,
300 300 'ops': {NEW_FILENODE: 'new file 100755'}}),
301 301 ('index.html', 'M',
302 302 {'added': 3,
303 303 'deleted': 2,
304 304 'binary': False,
305 305 'ops': {MOD_FILENODE: 'modified file'}}),
306 306 ('less/docs.less', 'M',
307 307 {'added': 34,
308 308 'deleted': 0,
309 309 'binary': False,
310 310 'ops': {MOD_FILENODE: 'modified file'}}),
311 311 ('less/scaffolding.less', 'M',
312 312 {'added': 1,
313 313 'deleted': 3,
314 314 'binary': False,
315 315 'ops': {MOD_FILENODE: 'modified file'}}),
316 316 ('readme.markdown', 'M',
317 317 {'added': 1,
318 318 'deleted': 10,
319 319 'binary': False,
320 320 'ops': {MOD_FILENODE: 'modified file'}}),
321 321 ('img/baseline-20px.png', 'D',
322 322 {'added': 0,
323 323 'deleted': 0,
324 324 'binary': True,
325 325 'ops': {DEL_FILENODE: 'deleted file',
326 326 BIN_FILENODE: 'binary diff hidden'}}),
327 327 ('js/global.js', 'D',
328 328 {'added': 0,
329 329 'deleted': 75,
330 330 'binary': False,
331 331 'ops': {DEL_FILENODE: 'deleted file'}})
332 332 ]),
333 333 ('git',
334 334 'git_diff_chmod.diff',
335 335 [('work-horus.xls', 'M',
336 336 {'added': 0,
337 337 'deleted': 0,
338 338 'binary': True,
339 339 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
340 340 ]),
341 341 ('git',
342 342 'git_diff_rename_file.diff',
343 343 [('file.xls', 'M',
344 344 {'added': 0,
345 345 'deleted': 0,
346 346 'binary': True,
347 347 'ops': {
348 348 RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
349 349 ]),
350 350 ('git',
351 351 'git_diff_mod_single_binary_file.diff',
352 352 [('US Warszawa.jpg', 'M',
353 353 {'added': 0,
354 354 'deleted': 0,
355 355 'binary': True,
356 356 'ops': {MOD_FILENODE: 'modified file',
357 357 BIN_FILENODE: 'binary diff hidden'}})
358 358 ]),
359 359 ('git',
360 360 'git_diff_binary_and_normal.diff',
361 361 [('img/baseline-10px.png', 'A',
362 362 {'added': 0,
363 363 'deleted': 0,
364 364 'binary': True,
365 365 'ops': {NEW_FILENODE: 'new file 100644',
366 366 BIN_FILENODE: 'binary diff hidden'}}),
367 367 ('js/jquery/hashgrid.js', 'A',
368 368 {'added': 340,
369 369 'deleted': 0,
370 370 'binary': False,
371 371 'ops': {NEW_FILENODE: 'new file 100755'}}),
372 372 ('index.html', 'M',
373 373 {'added': 3,
374 374 'deleted': 2,
375 375 'binary': False,
376 376 'ops': {MOD_FILENODE: 'modified file'}}),
377 377 ('less/docs.less', 'M',
378 378 {'added': 34,
379 379 'deleted': 0,
380 380 'binary': False,
381 381 'ops': {MOD_FILENODE: 'modified file'}}),
382 382 ('less/scaffolding.less', 'M',
383 383 {'added': 1,
384 384 'deleted': 3,
385 385 'binary': False,
386 386 'ops': {MOD_FILENODE: 'modified file'}}),
387 387 ('readme.markdown', 'M',
388 388 {'added': 1,
389 389 'deleted': 10,
390 390 'binary': False,
391 391 'ops': {MOD_FILENODE: 'modified file'}}),
392 392 ('img/baseline-20px.png', 'D',
393 393 {'added': 0,
394 394 'deleted': 0,
395 395 'binary': True,
396 396 'ops': {DEL_FILENODE: 'deleted file',
397 397 BIN_FILENODE: 'binary diff hidden'}}),
398 398 ('js/global.js', 'D',
399 399 {'added': 0,
400 400 'deleted': 75,
401 401 'binary': False,
402 402 'ops': {DEL_FILENODE: 'deleted file'}}),
403 403 ]),
404 404 ('hg',
405 405 'diff_with_diff_data.diff',
406 406 [('vcs/backends/base.py', 'M',
407 407 {'added': 18,
408 408 'deleted': 2,
409 409 'binary': False,
410 410 'ops': {MOD_FILENODE: 'modified file'}}),
411 411 ('vcs/backends/git/repository.py', 'M',
412 412 {'added': 46,
413 413 'deleted': 15,
414 414 'binary': False,
415 415 'ops': {MOD_FILENODE: 'modified file'}}),
416 416 ('vcs/backends/hg.py', 'M',
417 417 {'added': 22,
418 418 'deleted': 3,
419 419 'binary': False,
420 420 'ops': {MOD_FILENODE: 'modified file'}}),
421 421 ('vcs/tests/test_git.py', 'M',
422 422 {'added': 5,
423 423 'deleted': 5,
424 424 'binary': False,
425 425 'ops': {MOD_FILENODE: 'modified file'}}),
426 426 ('vcs/tests/test_repository.py', 'M',
427 427 {'added': 174,
428 428 'deleted': 2,
429 429 'binary': False,
430 430 'ops': {MOD_FILENODE: 'modified file'}}),
431 431 ]),
432 432 ('hg',
433 433 'hg_diff_copy_file.diff',
434 434 [('file2', 'M',
435 435 {'added': 0,
436 436 'deleted': 0,
437 437 'binary': True,
438 438 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
439 439 ]),
440 440 ('hg',
441 441 'hg_diff_copy_and_modify_file.diff',
442 442 [('file3', 'M',
443 443 {'added': 1,
444 444 'deleted': 0,
445 445 'binary': False,
446 446 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
447 447 MOD_FILENODE: 'modified file'}}),
448 448 ]),
449 449 ('hg',
450 450 'hg_diff_copy_and_chmod_file.diff',
451 451 [('file4', 'M',
452 452 {'added': 0,
453 453 'deleted': 0,
454 454 'binary': True,
455 455 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
456 456 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
457 457 ]),
458 458 ('hg',
459 459 'hg_diff_copy_chmod_and_edit_file.diff',
460 460 [('file5', 'M',
461 461 {'added': 2,
462 462 'deleted': 1,
463 463 'binary': False,
464 464 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
465 465 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
466 466 MOD_FILENODE: 'modified file'}})]),
467 467
468 468 # Diffs to validate rename and copy file with space in its name
469 469 ('git',
470 470 'git_diff_rename_file_with_spaces.diff',
471 471 [('file_with_ two spaces.txt', 'M',
472 472 {'added': 0,
473 473 'deleted': 0,
474 474 'binary': True,
475 475 'ops': {
476 476 RENAMED_FILENODE: (
477 477 'file renamed from file_with_ spaces.txt to file_with_ '
478 478 ' two spaces.txt')}
479 479 }), ]),
480 480 ('hg',
481 481 'hg_diff_rename_file_with_spaces.diff',
482 482 [('file_changed _.txt', 'M',
483 483 {'added': 0,
484 484 'deleted': 0,
485 485 'binary': True,
486 486 'ops': {
487 487 RENAMED_FILENODE: (
488 488 'file renamed from file_ with update.txt to file_changed'
489 489 ' _.txt')}
490 490 }), ]),
491 491 ('hg',
492 492 'hg_diff_copy_file_with_spaces.diff',
493 493 [('file_copied_ with spaces.txt', 'M',
494 494 {'added': 0,
495 495 'deleted': 0,
496 496 'binary': True,
497 497 'ops': {
498 498 COPIED_FILENODE: (
499 499 'file copied from file_changed_without_spaces.txt to'
500 500 ' file_copied_ with spaces.txt')}
501 501 }),
502 502 ]),
503 503
504 504 # special signs from git
505 505 ('git',
506 506 'git_diff_binary_special_files.diff',
507 507 [('css/_Icon\\r', 'A',
508 508 {'added': 0,
509 509 'deleted': 0,
510 510 'binary': True,
511 511 'ops': {NEW_FILENODE: 'new file 100644',
512 512 BIN_FILENODE: 'binary diff hidden'}
513 513 }),
514 514 ]),
515 515 ('git',
516 516 'git_diff_binary_special_files_2.diff',
517 517 [('css/Icon\\r', 'A',
518 518 {'added': 0,
519 519 'deleted': 0,
520 520 'binary': True,
521 521 'ops': {NEW_FILENODE: 'new file 100644', }
522 522 }),
523 523 ]),
524 524
525 525 ('svn',
526 526 'svn_diff_binary_add_file.diff',
527 527 [('intl.dll', 'A',
528 528 {'added': 0,
529 529 'deleted': 0,
530 530 'binary': False,
531 531 'ops': {NEW_FILENODE: 'new file 10644',
532 532 #TODO(Marcink): depends on binary detection on svn patches
533 533 # BIN_FILENODE: 'binary diff hidden'
534 534 }
535 535 }),
536 536 ]),
537 537
538 538 ('svn',
539 539 'svn_diff_multiple_changes.diff',
540 540 [('trunk/doc/images/SettingsOverlay.png', 'M',
541 541 {'added': 0,
542 542 'deleted': 0,
543 543 'binary': False,
544 544 'ops': {MOD_FILENODE: 'modified file',
545 545 #TODO(Marcink): depends on binary detection on svn patches
546 546 # BIN_FILENODE: 'binary diff hidden'
547 547 }
548 548 }),
549 549 ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
550 550 {'added': 89,
551 551 'deleted': 34,
552 552 'binary': False,
553 553 'ops': {MOD_FILENODE: 'modified file'}
554 554 }),
555 555 ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
556 556 {'added': 66,
557 557 'deleted': 21,
558 558 'binary': False,
559 559 'ops': {MOD_FILENODE: 'modified file'}
560 560 }),
561 561 ('trunk/src/Changelog.txt', 'M',
562 562 {'added': 2,
563 563 'deleted': 0,
564 564 'binary': False,
565 565 'ops': {MOD_FILENODE: 'modified file'}
566 566 }),
567 567 ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
568 568 {'added': 19,
569 569 'deleted': 13,
570 570 'binary': False,
571 571 'ops': {MOD_FILENODE: 'modified file'}
572 572 }),
573 573 ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
574 574 {'added': 16,
575 575 'deleted': 1,
576 576 'binary': False,
577 577 'ops': {MOD_FILENODE: 'modified file'}
578 578 }),
579 579 ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
580 580 {'added': 3,
581 581 'deleted': 0,
582 582 'binary': False,
583 583 'ops': {MOD_FILENODE: 'modified file'}
584 584 }),
585 585 ('trunk/src/TortoiseProc/resource.h', 'M',
586 586 {'added': 2,
587 587 'deleted': 0,
588 588 'binary': False,
589 589 'ops': {MOD_FILENODE: 'modified file'}
590 590 }),
591 591 ('trunk/src/TortoiseShell/ShellCache.h', 'M',
592 592 {'added': 50,
593 593 'deleted': 1,
594 594 'binary': False,
595 595 'ops': {MOD_FILENODE: 'modified file'}
596 596 }),
597 597 ]),
598 598
599 599
600 600 # TODO: mikhail: do we still need this?
601 601 # (
602 602 # 'hg',
603 603 # 'large_diff.diff',
604 604 # [
605 605 # ('.hgignore', 'A', {
606 606 # 'deleted': 0, 'binary': False, 'added': 3, 'ops': {
607 607 # 1: 'new file 100644'}}),
608 608 # (
609 609 # 'MANIFEST.in', 'A',
610 610 # {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
611 611 # 1: 'new file 100644'}}),
612 612 # (
613 613 # 'README.txt', 'A',
614 614 # {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
615 615 # 1: 'new file 100644'}}),
616 616 # (
617 617 # 'development.ini', 'A', {
618 618 # 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
619 619 # 1: 'new file 100644'}}),
620 620 # (
621 621 # 'docs/index.txt', 'A', {
622 622 # 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
623 623 # 1: 'new file 100644'}}),
624 624 # (
625 625 # 'ez_setup.py', 'A', {
626 626 # 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
627 627 # 1: 'new file 100644'}}),
628 628 # (
629 629 # 'hgapp.py', 'A', {
630 630 # 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
631 631 # 1: 'new file 100644'}}),
632 632 # (
633 633 # 'hgwebdir.config', 'A', {
634 634 # 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
635 635 # 1: 'new file 100644'}}),
636 636 # (
637 637 # 'pylons_app.egg-info/PKG-INFO', 'A', {
638 638 # 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
639 639 # 1: 'new file 100644'}}),
640 640 # (
641 641 # 'pylons_app.egg-info/SOURCES.txt', 'A', {
642 642 # 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
643 643 # 1: 'new file 100644'}}),
644 644 # (
645 645 # 'pylons_app.egg-info/dependency_links.txt', 'A', {
646 646 # 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
647 647 # 1: 'new file 100644'}}),
648 648 # ]
649 649 # ),
650 650 ]
651 651
652 652 DIFF_FIXTURES_WITH_CONTENT = [
653 653 (
654 654 'hg', 'hg_diff_single_file_change_newline.diff',
655 655 [
656 656 (
657 657 'file_b', # filename
658 658 'A', # change
659 659 { # stats
660 660 'added': 1,
661 661 'deleted': 0,
662 662 'binary': False,
663 663 'ops': {NEW_FILENODE: 'new file 100644', }
664 664 },
665 665 '@@ -0,0 +1 @@\n+test_content b\n' # diff
666 666 ),
667 667 ],
668 668 ),
669 669 (
670 670 'hg', 'hg_diff_double_file_change_newline.diff',
671 671 [
672 672 (
673 673 'file_b', # filename
674 674 'A', # change
675 675 { # stats
676 676 'added': 1,
677 677 'deleted': 0,
678 678 'binary': False,
679 679 'ops': {NEW_FILENODE: 'new file 100644', }
680 680 },
681 681 '@@ -0,0 +1 @@\n+test_content b\n' # diff
682 682 ),
683 683 (
684 684 'file_c', # filename
685 685 'A', # change
686 686 { # stats
687 687 'added': 1,
688 688 'deleted': 0,
689 689 'binary': False,
690 690 'ops': {NEW_FILENODE: 'new file 100644', }
691 691 },
692 692 '@@ -0,0 +1 @@\n+test_content c\n' # diff
693 693 ),
694 694 ],
695 695 ),
696 696 (
697 697 'hg', 'hg_diff_double_file_change_double_newline.diff',
698 698 [
699 699 (
700 700 'file_b', # filename
701 701 'A', # change
702 702 { # stats
703 703 'added': 1,
704 704 'deleted': 0,
705 705 'binary': False,
706 706 'ops': {NEW_FILENODE: 'new file 100644', }
707 707 },
708 708 '@@ -0,0 +1 @@\n+test_content b\n\n' # diff
709 709 ),
710 710 (
711 711 'file_c', # filename
712 712 'A', # change
713 713 { # stats
714 714 'added': 1,
715 715 'deleted': 0,
716 716 'binary': False,
717 717 'ops': {NEW_FILENODE: 'new file 100644', }
718 718 },
719 719 '@@ -0,0 +1 @@\n+test_content c\n' # diff
720 720 ),
721 721 ],
722 722 ),
723 723 (
724 724 'hg', 'hg_diff_four_file_change_newline.diff',
725 725 [
726 726 (
727 727 'file', # filename
728 728 'A', # change
729 729 { # stats
730 730 'added': 1,
731 731 'deleted': 0,
732 732 'binary': False,
733 733 'ops': {NEW_FILENODE: 'new file 100644', }
734 734 },
735 735 '@@ -0,0 +1,1 @@\n+file\n' # diff
736 736 ),
737 737 (
738 738 'file2', # filename
739 739 'A', # change
740 740 { # stats
741 741 'added': 1,
742 742 'deleted': 0,
743 743 'binary': False,
744 744 'ops': {NEW_FILENODE: 'new file 100644', }
745 745 },
746 746 '@@ -0,0 +1,1 @@\n+another line\n' # diff
747 747 ),
748 748 (
749 749 'file3', # filename
750 750 'A', # change
751 751 { # stats
752 752 'added': 1,
753 753 'deleted': 0,
754 754 'binary': False,
755 755 'ops': {NEW_FILENODE: 'new file 100644', }
756 756 },
757 757 '@@ -0,0 +1,1 @@\n+newline\n' # diff
758 758 ),
759 759 (
760 760 'file4', # filename
761 761 'A', # change
762 762 { # stats
763 763 'added': 1,
764 764 'deleted': 0,
765 765 'binary': False,
766 766 'ops': {NEW_FILENODE: 'new file 100644', }
767 767 },
768 768 '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file' # diff
769 769 ),
770 770 ],
771 771 ),
772 772
773 773 ]
774 774
775 775
776 776 diff_class = {
777 777 'git': GitDiff,
778 778 'hg': MercurialDiff,
779 779 'svn': SubversionDiff,
780 780 }
781 781
782 782
783 783 @pytest.fixture(params=DIFF_FIXTURES)
784 784 def diff_fixture(request):
785 785 vcs, diff_fixture, expected = request.param
786 786 diff_txt = fixture.load_resource(diff_fixture)
787 787 diff = diff_class[vcs](diff_txt)
788 788 return diff, expected
789 789
790 790
791 791 def test_diff_lib(diff_fixture):
792 792 diff, expected_data = diff_fixture
793 793 diff_proc = DiffProcessor(diff)
794 794 diff_proc_d = diff_proc.prepare()
795 795 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
796 796 assert expected_data == data
797 797
798 798
799 799 @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
800 800 def diff_fixture_w_content(request):
801 801 vcs, diff_fixture, expected = request.param
802 802 diff_txt = fixture.load_resource(diff_fixture)
803 803 diff = diff_class[vcs](diff_txt)
804 804 return diff, expected
805 805
806 806
807 807 def test_diff_lib_newlines(diff_fixture_w_content):
808 808 diff, expected_data = diff_fixture_w_content
809 809 diff_proc = DiffProcessor(diff)
810 810 diff_proc_d = diff_proc.prepare()
811 811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
812 812 for x in diff_proc_d]
813 813 assert expected_data == data
814
815
816 @pytest.mark.parametrize('input_str', [
817 '',
818 '\n',
819 '\n\n',
820 'First\n+second',
821 'First\n+second\n',
822
823 '\n\n\n Multi \n\n\n',
824 '\n\n\n Multi beginning',
825 'Multi end \n\n\n',
826 'Multi end',
827 '@@ -0,0 +1 @@\n+test_content \n\n b\n'
828 ], ids=no_newline_id_generator)
829 def test_splitlines(input_str):
830 result = DiffProcessor.diff_splitter(input_str)
831 assert list(result) == input_str.splitlines(True)
General Comments 0
You need to be logged in to leave comments. Login now