##// END OF EJS Templates
logging: fixed some log calls.
marcink -
r3841:fd62517f default
parent child Browse files
Show More
@@ -1,1254 +1,1254 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import os
27 27 import re
28 28 import bz2
29 29 import time
30 30
31 31 import collections
32 32 import difflib
33 33 import logging
34 34 import cPickle as pickle
35 35 from itertools import tee, imap
36 36
37 37 from rhodecode.lib.vcs.exceptions import VCSError
38 38 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
39 39 from rhodecode.lib.utils2 import safe_unicode, safe_str
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43 # define max context, a file with more than this numbers of lines is unusable
44 44 # in browser anyway
45 45 MAX_CONTEXT = 20 * 1024
46 46 DEFAULT_CONTEXT = 3
47 47
48 48
49 49 def get_diff_context(request):
50 50 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
51 51
52 52
53 53 def get_diff_whitespace_flag(request):
54 54 return request.GET.get('ignorews', '') == '1'
55 55
56 56
57 57 class OPS(object):
58 58 ADD = 'A'
59 59 MOD = 'M'
60 60 DEL = 'D'
61 61
62 62
63 63 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
64 64 """
65 65 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
66 66
67 67 :param ignore_whitespace: ignore whitespaces in diff
68 68 """
69 69 # make sure we pass in default context
70 70 context = context or 3
71 71 # protect against IntOverflow when passing HUGE context
72 72 if context > MAX_CONTEXT:
73 73 context = MAX_CONTEXT
74 74
75 75 submodules = filter(lambda o: isinstance(o, SubModuleNode),
76 76 [filenode_new, filenode_old])
77 77 if submodules:
78 78 return ''
79 79
80 80 for filenode in (filenode_old, filenode_new):
81 81 if not isinstance(filenode, FileNode):
82 82 raise VCSError(
83 83 "Given object should be FileNode object, not %s"
84 84 % filenode.__class__)
85 85
86 86 repo = filenode_new.commit.repository
87 87 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
88 88 new_commit = filenode_new.commit
89 89
90 90 vcs_gitdiff = repo.get_diff(
91 91 old_commit, new_commit, filenode_new.path,
92 92 ignore_whitespace, context, path1=filenode_old.path)
93 93 return vcs_gitdiff
94 94
95 95 NEW_FILENODE = 1
96 96 DEL_FILENODE = 2
97 97 MOD_FILENODE = 3
98 98 RENAMED_FILENODE = 4
99 99 COPIED_FILENODE = 5
100 100 CHMOD_FILENODE = 6
101 101 BIN_FILENODE = 7
102 102
103 103
104 104 class LimitedDiffContainer(object):
105 105
106 106 def __init__(self, diff_limit, cur_diff_size, diff):
107 107 self.diff = diff
108 108 self.diff_limit = diff_limit
109 109 self.cur_diff_size = cur_diff_size
110 110
111 111 def __getitem__(self, key):
112 112 return self.diff.__getitem__(key)
113 113
114 114 def __iter__(self):
115 115 for l in self.diff:
116 116 yield l
117 117
118 118
119 119 class Action(object):
120 120 """
121 121 Contains constants for the action value of the lines in a parsed diff.
122 122 """
123 123
124 124 ADD = 'add'
125 125 DELETE = 'del'
126 126 UNMODIFIED = 'unmod'
127 127
128 128 CONTEXT = 'context'
129 129 OLD_NO_NL = 'old-no-nl'
130 130 NEW_NO_NL = 'new-no-nl'
131 131
132 132
133 133 class DiffProcessor(object):
134 134 """
135 135 Give it a unified or git diff and it returns a list of the files that were
136 136 mentioned in the diff together with a dict of meta information that
137 137 can be used to render it in a HTML template.
138 138
139 139 .. note:: Unicode handling
140 140
141 141 The original diffs are a byte sequence and can contain filenames
142 142 in mixed encodings. This class generally returns `unicode` objects
143 143 since the result is intended for presentation to the user.
144 144
145 145 """
146 146 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
147 147 _newline_marker = re.compile(r'^\\ No newline at end of file')
148 148
149 149 # used for inline highlighter word split
150 150 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
151 151
152 152 # collapse ranges of commits over given number
153 153 _collapse_commits_over = 5
154 154
155 155 def __init__(self, diff, format='gitdiff', diff_limit=None,
156 156 file_limit=None, show_full_diff=True):
157 157 """
158 158 :param diff: A `Diff` object representing a diff from a vcs backend
159 159 :param format: format of diff passed, `udiff` or `gitdiff`
160 160 :param diff_limit: define the size of diff that is considered "big"
161 161 based on that parameter cut off will be triggered, set to None
162 162 to show full diff
163 163 """
164 164 self._diff = diff
165 165 self._format = format
166 166 self.adds = 0
167 167 self.removes = 0
168 168 # calculate diff size
169 169 self.diff_limit = diff_limit
170 170 self.file_limit = file_limit
171 171 self.show_full_diff = show_full_diff
172 172 self.cur_diff_size = 0
173 173 self.parsed = False
174 174 self.parsed_diff = []
175 175
176 176 log.debug('Initialized DiffProcessor with %s mode', format)
177 177 if format == 'gitdiff':
178 178 self.differ = self._highlight_line_difflib
179 179 self._parser = self._parse_gitdiff
180 180 else:
181 181 self.differ = self._highlight_line_udiff
182 182 self._parser = self._new_parse_gitdiff
183 183
184 184 def _copy_iterator(self):
185 185 """
186 186 make a fresh copy of generator, we should not iterate thru
187 187 an original as it's needed for repeating operations on
188 188 this instance of DiffProcessor
189 189 """
190 190 self.__udiff, iterator_copy = tee(self.__udiff)
191 191 return iterator_copy
192 192
193 193 def _escaper(self, string):
194 194 """
195 195 Escaper for diff escapes special chars and checks the diff limit
196 196
197 197 :param string:
198 198 """
199 199 self.cur_diff_size += len(string)
200 200
201 201 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
202 202 raise DiffLimitExceeded('Diff Limit Exceeded')
203 203
204 204 return string \
205 205 .replace('&', '&amp;')\
206 206 .replace('<', '&lt;')\
207 207 .replace('>', '&gt;')
208 208
209 209 def _line_counter(self, l):
210 210 """
211 211 Checks each line and bumps total adds/removes for this diff
212 212
213 213 :param l:
214 214 """
215 215 if l.startswith('+') and not l.startswith('+++'):
216 216 self.adds += 1
217 217 elif l.startswith('-') and not l.startswith('---'):
218 218 self.removes += 1
219 219 return safe_unicode(l)
220 220
221 221 def _highlight_line_difflib(self, line, next_):
222 222 """
223 223 Highlight inline changes in both lines.
224 224 """
225 225
226 226 if line['action'] == Action.DELETE:
227 227 old, new = line, next_
228 228 else:
229 229 old, new = next_, line
230 230
231 231 oldwords = self._token_re.split(old['line'])
232 232 newwords = self._token_re.split(new['line'])
233 233 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
234 234
235 235 oldfragments, newfragments = [], []
236 236 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
237 237 oldfrag = ''.join(oldwords[i1:i2])
238 238 newfrag = ''.join(newwords[j1:j2])
239 239 if tag != 'equal':
240 240 if oldfrag:
241 241 oldfrag = '<del>%s</del>' % oldfrag
242 242 if newfrag:
243 243 newfrag = '<ins>%s</ins>' % newfrag
244 244 oldfragments.append(oldfrag)
245 245 newfragments.append(newfrag)
246 246
247 247 old['line'] = "".join(oldfragments)
248 248 new['line'] = "".join(newfragments)
249 249
250 250 def _highlight_line_udiff(self, line, next_):
251 251 """
252 252 Highlight inline changes in both lines.
253 253 """
254 254 start = 0
255 255 limit = min(len(line['line']), len(next_['line']))
256 256 while start < limit and line['line'][start] == next_['line'][start]:
257 257 start += 1
258 258 end = -1
259 259 limit -= start
260 260 while -end <= limit and line['line'][end] == next_['line'][end]:
261 261 end -= 1
262 262 end += 1
263 263 if start or end:
264 264 def do(l):
265 265 last = end + len(l['line'])
266 266 if l['action'] == Action.ADD:
267 267 tag = 'ins'
268 268 else:
269 269 tag = 'del'
270 270 l['line'] = '%s<%s>%s</%s>%s' % (
271 271 l['line'][:start],
272 272 tag,
273 273 l['line'][start:last],
274 274 tag,
275 275 l['line'][last:]
276 276 )
277 277 do(line)
278 278 do(next_)
279 279
280 280 def _clean_line(self, line, command):
281 281 if command in ['+', '-', ' ']:
282 282 # only modify the line if it's actually a diff thing
283 283 line = line[1:]
284 284 return line
285 285
286 286 def _parse_gitdiff(self, inline_diff=True):
287 287 _files = []
288 288 diff_container = lambda arg: arg
289 289
290 290 for chunk in self._diff.chunks():
291 291 head = chunk.header
292 292
293 293 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
294 294 raw_diff = chunk.raw
295 295 limited_diff = False
296 296 exceeds_limit = False
297 297
298 298 op = None
299 299 stats = {
300 300 'added': 0,
301 301 'deleted': 0,
302 302 'binary': False,
303 303 'ops': {},
304 304 }
305 305
306 306 if head['deleted_file_mode']:
307 307 op = OPS.DEL
308 308 stats['binary'] = True
309 309 stats['ops'][DEL_FILENODE] = 'deleted file'
310 310
311 311 elif head['new_file_mode']:
312 312 op = OPS.ADD
313 313 stats['binary'] = True
314 314 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
315 315 else: # modify operation, can be copy, rename or chmod
316 316
317 317 # CHMOD
318 318 if head['new_mode'] and head['old_mode']:
319 319 op = OPS.MOD
320 320 stats['binary'] = True
321 321 stats['ops'][CHMOD_FILENODE] = (
322 322 'modified file chmod %s => %s' % (
323 323 head['old_mode'], head['new_mode']))
324 324 # RENAME
325 325 if head['rename_from'] != head['rename_to']:
326 326 op = OPS.MOD
327 327 stats['binary'] = True
328 328 stats['ops'][RENAMED_FILENODE] = (
329 329 'file renamed from %s to %s' % (
330 330 head['rename_from'], head['rename_to']))
331 331 # COPY
332 332 if head.get('copy_from') and head.get('copy_to'):
333 333 op = OPS.MOD
334 334 stats['binary'] = True
335 335 stats['ops'][COPIED_FILENODE] = (
336 336 'file copied from %s to %s' % (
337 337 head['copy_from'], head['copy_to']))
338 338
339 339 # If our new parsed headers didn't match anything fallback to
340 340 # old style detection
341 341 if op is None:
342 342 if not head['a_file'] and head['b_file']:
343 343 op = OPS.ADD
344 344 stats['binary'] = True
345 345 stats['ops'][NEW_FILENODE] = 'new file'
346 346
347 347 elif head['a_file'] and not head['b_file']:
348 348 op = OPS.DEL
349 349 stats['binary'] = True
350 350 stats['ops'][DEL_FILENODE] = 'deleted file'
351 351
352 352 # it's not ADD not DELETE
353 353 if op is None:
354 354 op = OPS.MOD
355 355 stats['binary'] = True
356 356 stats['ops'][MOD_FILENODE] = 'modified file'
357 357
358 358 # a real non-binary diff
359 359 if head['a_file'] or head['b_file']:
360 360 try:
361 361 raw_diff, chunks, _stats = self._parse_lines(diff)
362 362 stats['binary'] = False
363 363 stats['added'] = _stats[0]
364 364 stats['deleted'] = _stats[1]
365 365 # explicit mark that it's a modified file
366 366 if op == OPS.MOD:
367 367 stats['ops'][MOD_FILENODE] = 'modified file'
368 368 exceeds_limit = len(raw_diff) > self.file_limit
369 369
370 370 # changed from _escaper function so we validate size of
371 371 # each file instead of the whole diff
372 372 # diff will hide big files but still show small ones
373 373 # from my tests, big files are fairly safe to be parsed
374 374 # but the browser is the bottleneck
375 375 if not self.show_full_diff and exceeds_limit:
376 376 raise DiffLimitExceeded('File Limit Exceeded')
377 377
378 378 except DiffLimitExceeded:
379 379 diff_container = lambda _diff: \
380 380 LimitedDiffContainer(
381 381 self.diff_limit, self.cur_diff_size, _diff)
382 382
383 383 exceeds_limit = len(raw_diff) > self.file_limit
384 384 limited_diff = True
385 385 chunks = []
386 386
387 387 else: # GIT format binary patch, or possibly empty diff
388 388 if head['bin_patch']:
389 389 # we have operation already extracted, but we mark simply
390 390 # it's a diff we wont show for binary files
391 391 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
392 392 chunks = []
393 393
394 394 if chunks and not self.show_full_diff and op == OPS.DEL:
395 395 # if not full diff mode show deleted file contents
396 396 # TODO: anderson: if the view is not too big, there is no way
397 397 # to see the content of the file
398 398 chunks = []
399 399
400 400 chunks.insert(0, [{
401 401 'old_lineno': '',
402 402 'new_lineno': '',
403 403 'action': Action.CONTEXT,
404 404 'line': msg,
405 405 } for _op, msg in stats['ops'].iteritems()
406 406 if _op not in [MOD_FILENODE]])
407 407
408 408 _files.append({
409 409 'filename': safe_unicode(head['b_path']),
410 410 'old_revision': head['a_blob_id'],
411 411 'new_revision': head['b_blob_id'],
412 412 'chunks': chunks,
413 413 'raw_diff': safe_unicode(raw_diff),
414 414 'operation': op,
415 415 'stats': stats,
416 416 'exceeds_limit': exceeds_limit,
417 417 'is_limited_diff': limited_diff,
418 418 })
419 419
420 420 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
421 421 OPS.DEL: 2}.get(info['operation'])
422 422
423 423 if not inline_diff:
424 424 return diff_container(sorted(_files, key=sorter))
425 425
426 426 # highlight inline changes
427 427 for diff_data in _files:
428 428 for chunk in diff_data['chunks']:
429 429 lineiter = iter(chunk)
430 430 try:
431 431 while 1:
432 432 line = lineiter.next()
433 433 if line['action'] not in (
434 434 Action.UNMODIFIED, Action.CONTEXT):
435 435 nextline = lineiter.next()
436 436 if nextline['action'] in ['unmod', 'context'] or \
437 437 nextline['action'] == line['action']:
438 438 continue
439 439 self.differ(line, nextline)
440 440 except StopIteration:
441 441 pass
442 442
443 443 return diff_container(sorted(_files, key=sorter))
444 444
445 445 def _check_large_diff(self):
446 446 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
447 447 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
448 448 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
449 449
450 450 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
451 451 def _new_parse_gitdiff(self, inline_diff=True):
452 452 _files = []
453 453
454 454 # this can be overriden later to a LimitedDiffContainer type
455 455 diff_container = lambda arg: arg
456 456
457 457 for chunk in self._diff.chunks():
458 458 head = chunk.header
459 459 log.debug('parsing diff %r', head)
460 460
461 461 raw_diff = chunk.raw
462 462 limited_diff = False
463 463 exceeds_limit = False
464 464
465 465 op = None
466 466 stats = {
467 467 'added': 0,
468 468 'deleted': 0,
469 469 'binary': False,
470 470 'old_mode': None,
471 471 'new_mode': None,
472 472 'ops': {},
473 473 }
474 474 if head['old_mode']:
475 475 stats['old_mode'] = head['old_mode']
476 476 if head['new_mode']:
477 477 stats['new_mode'] = head['new_mode']
478 478 if head['b_mode']:
479 479 stats['new_mode'] = head['b_mode']
480 480
481 481 # delete file
482 482 if head['deleted_file_mode']:
483 483 op = OPS.DEL
484 484 stats['binary'] = True
485 485 stats['ops'][DEL_FILENODE] = 'deleted file'
486 486
487 487 # new file
488 488 elif head['new_file_mode']:
489 489 op = OPS.ADD
490 490 stats['binary'] = True
491 491 stats['old_mode'] = None
492 492 stats['new_mode'] = head['new_file_mode']
493 493 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
494 494
495 495 # modify operation, can be copy, rename or chmod
496 496 else:
497 497 # CHMOD
498 498 if head['new_mode'] and head['old_mode']:
499 499 op = OPS.MOD
500 500 stats['binary'] = True
501 501 stats['ops'][CHMOD_FILENODE] = (
502 502 'modified file chmod %s => %s' % (
503 503 head['old_mode'], head['new_mode']))
504 504
505 505 # RENAME
506 506 if head['rename_from'] != head['rename_to']:
507 507 op = OPS.MOD
508 508 stats['binary'] = True
509 509 stats['renamed'] = (head['rename_from'], head['rename_to'])
510 510 stats['ops'][RENAMED_FILENODE] = (
511 511 'file renamed from %s to %s' % (
512 512 head['rename_from'], head['rename_to']))
513 513 # COPY
514 514 if head.get('copy_from') and head.get('copy_to'):
515 515 op = OPS.MOD
516 516 stats['binary'] = True
517 517 stats['copied'] = (head['copy_from'], head['copy_to'])
518 518 stats['ops'][COPIED_FILENODE] = (
519 519 'file copied from %s to %s' % (
520 520 head['copy_from'], head['copy_to']))
521 521
522 522 # If our new parsed headers didn't match anything fallback to
523 523 # old style detection
524 524 if op is None:
525 525 if not head['a_file'] and head['b_file']:
526 526 op = OPS.ADD
527 527 stats['binary'] = True
528 528 stats['new_file'] = True
529 529 stats['ops'][NEW_FILENODE] = 'new file'
530 530
531 531 elif head['a_file'] and not head['b_file']:
532 532 op = OPS.DEL
533 533 stats['binary'] = True
534 534 stats['ops'][DEL_FILENODE] = 'deleted file'
535 535
536 536 # it's not ADD not DELETE
537 537 if op is None:
538 538 op = OPS.MOD
539 539 stats['binary'] = True
540 540 stats['ops'][MOD_FILENODE] = 'modified file'
541 541
542 542 # a real non-binary diff
543 543 if head['a_file'] or head['b_file']:
544 544 # simulate splitlines, so we keep the line end part
545 545 diff = self.diff_splitter(chunk.diff)
546 546
547 547 # append each file to the diff size
548 548 raw_chunk_size = len(raw_diff)
549 549
550 550 exceeds_limit = raw_chunk_size > self.file_limit
551 551 self.cur_diff_size += raw_chunk_size
552 552
553 553 try:
554 554 # Check each file instead of the whole diff.
555 555 # Diff will hide big files but still show small ones.
556 556 # From the tests big files are fairly safe to be parsed
557 557 # but the browser is the bottleneck.
558 558 if not self.show_full_diff and exceeds_limit:
559 559 log.debug('File `%s` exceeds current file_limit of %s',
560 560 safe_unicode(head['b_path']), self.file_limit)
561 561 raise DiffLimitExceeded(
562 562 'File Limit %s Exceeded', self.file_limit)
563 563
564 564 self._check_large_diff()
565 565
566 566 raw_diff, chunks, _stats = self._new_parse_lines(diff)
567 567 stats['binary'] = False
568 568 stats['added'] = _stats[0]
569 569 stats['deleted'] = _stats[1]
570 570 # explicit mark that it's a modified file
571 571 if op == OPS.MOD:
572 572 stats['ops'][MOD_FILENODE] = 'modified file'
573 573
574 574 except DiffLimitExceeded:
575 575 diff_container = lambda _diff: \
576 576 LimitedDiffContainer(
577 577 self.diff_limit, self.cur_diff_size, _diff)
578 578
579 579 limited_diff = True
580 580 chunks = []
581 581
582 582 else: # GIT format binary patch, or possibly empty diff
583 583 if head['bin_patch']:
584 584 # we have operation already extracted, but we mark simply
585 585 # it's a diff we wont show for binary files
586 586 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
587 587 chunks = []
588 588
589 589 # Hide content of deleted node by setting empty chunks
590 590 if chunks and not self.show_full_diff and op == OPS.DEL:
591 591 # if not full diff mode show deleted file contents
592 592 # TODO: anderson: if the view is not too big, there is no way
593 593 # to see the content of the file
594 594 chunks = []
595 595
596 596 chunks.insert(
597 597 0, [{'old_lineno': '',
598 598 'new_lineno': '',
599 599 'action': Action.CONTEXT,
600 600 'line': msg,
601 601 } for _op, msg in stats['ops'].iteritems()
602 602 if _op not in [MOD_FILENODE]])
603 603
604 604 original_filename = safe_unicode(head['a_path'])
605 605 _files.append({
606 606 'original_filename': original_filename,
607 607 'filename': safe_unicode(head['b_path']),
608 608 'old_revision': head['a_blob_id'],
609 609 'new_revision': head['b_blob_id'],
610 610 'chunks': chunks,
611 611 'raw_diff': safe_unicode(raw_diff),
612 612 'operation': op,
613 613 'stats': stats,
614 614 'exceeds_limit': exceeds_limit,
615 615 'is_limited_diff': limited_diff,
616 616 })
617 617
618 618 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
619 619 OPS.DEL: 2}.get(info['operation'])
620 620
621 621 return diff_container(sorted(_files, key=sorter))
622 622
623 623 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
624 624 def _parse_lines(self, diff_iter):
625 625 """
626 626 Parse the diff an return data for the template.
627 627 """
628 628
629 629 stats = [0, 0]
630 630 chunks = []
631 631 raw_diff = []
632 632
633 633 try:
634 634 line = diff_iter.next()
635 635
636 636 while line:
637 637 raw_diff.append(line)
638 638 lines = []
639 639 chunks.append(lines)
640 640
641 641 match = self._chunk_re.match(line)
642 642
643 643 if not match:
644 644 break
645 645
646 646 gr = match.groups()
647 647 (old_line, old_end,
648 648 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
649 649 old_line -= 1
650 650 new_line -= 1
651 651
652 652 context = len(gr) == 5
653 653 old_end += old_line
654 654 new_end += new_line
655 655
656 656 if context:
657 657 # skip context only if it's first line
658 658 if int(gr[0]) > 1:
659 659 lines.append({
660 660 'old_lineno': '...',
661 661 'new_lineno': '...',
662 662 'action': Action.CONTEXT,
663 663 'line': line,
664 664 })
665 665
666 666 line = diff_iter.next()
667 667
668 668 while old_line < old_end or new_line < new_end:
669 669 command = ' '
670 670 if line:
671 671 command = line[0]
672 672
673 673 affects_old = affects_new = False
674 674
675 675 # ignore those if we don't expect them
676 676 if command in '#@':
677 677 continue
678 678 elif command == '+':
679 679 affects_new = True
680 680 action = Action.ADD
681 681 stats[0] += 1
682 682 elif command == '-':
683 683 affects_old = True
684 684 action = Action.DELETE
685 685 stats[1] += 1
686 686 else:
687 687 affects_old = affects_new = True
688 688 action = Action.UNMODIFIED
689 689
690 690 if not self._newline_marker.match(line):
691 691 old_line += affects_old
692 692 new_line += affects_new
693 693 lines.append({
694 694 'old_lineno': affects_old and old_line or '',
695 695 'new_lineno': affects_new and new_line or '',
696 696 'action': action,
697 697 'line': self._clean_line(line, command)
698 698 })
699 699 raw_diff.append(line)
700 700
701 701 line = diff_iter.next()
702 702
703 703 if self._newline_marker.match(line):
704 704 # we need to append to lines, since this is not
705 705 # counted in the line specs of diff
706 706 lines.append({
707 707 'old_lineno': '...',
708 708 'new_lineno': '...',
709 709 'action': Action.CONTEXT,
710 710 'line': self._clean_line(line, command)
711 711 })
712 712
713 713 except StopIteration:
714 714 pass
715 715 return ''.join(raw_diff), chunks, stats
716 716
717 717 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
718 718 def _new_parse_lines(self, diff_iter):
719 719 """
720 720 Parse the diff an return data for the template.
721 721 """
722 722
723 723 stats = [0, 0]
724 724 chunks = []
725 725 raw_diff = []
726 726
727 727 try:
728 728 line = diff_iter.next()
729 729
730 730 while line:
731 731 raw_diff.append(line)
732 732 # match header e.g @@ -0,0 +1 @@\n'
733 733 match = self._chunk_re.match(line)
734 734
735 735 if not match:
736 736 break
737 737
738 738 gr = match.groups()
739 739 (old_line, old_end,
740 740 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
741 741
742 742 lines = []
743 743 hunk = {
744 744 'section_header': gr[-1],
745 745 'source_start': old_line,
746 746 'source_length': old_end,
747 747 'target_start': new_line,
748 748 'target_length': new_end,
749 749 'lines': lines,
750 750 }
751 751 chunks.append(hunk)
752 752
753 753 old_line -= 1
754 754 new_line -= 1
755 755
756 756 context = len(gr) == 5
757 757 old_end += old_line
758 758 new_end += new_line
759 759
760 760 line = diff_iter.next()
761 761
762 762 while old_line < old_end or new_line < new_end:
763 763 command = ' '
764 764 if line:
765 765 command = line[0]
766 766
767 767 affects_old = affects_new = False
768 768
769 769 # ignore those if we don't expect them
770 770 if command in '#@':
771 771 continue
772 772 elif command == '+':
773 773 affects_new = True
774 774 action = Action.ADD
775 775 stats[0] += 1
776 776 elif command == '-':
777 777 affects_old = True
778 778 action = Action.DELETE
779 779 stats[1] += 1
780 780 else:
781 781 affects_old = affects_new = True
782 782 action = Action.UNMODIFIED
783 783
784 784 if not self._newline_marker.match(line):
785 785 old_line += affects_old
786 786 new_line += affects_new
787 787 lines.append({
788 788 'old_lineno': affects_old and old_line or '',
789 789 'new_lineno': affects_new and new_line or '',
790 790 'action': action,
791 791 'line': self._clean_line(line, command)
792 792 })
793 793 raw_diff.append(line)
794 794
795 795 line = diff_iter.next()
796 796
797 797 if self._newline_marker.match(line):
798 798 # we need to append to lines, since this is not
799 799 # counted in the line specs of diff
800 800 if affects_old:
801 801 action = Action.OLD_NO_NL
802 802 elif affects_new:
803 803 action = Action.NEW_NO_NL
804 804 else:
805 805 raise Exception('invalid context for no newline')
806 806
807 807 lines.append({
808 808 'old_lineno': None,
809 809 'new_lineno': None,
810 810 'action': action,
811 811 'line': self._clean_line(line, command)
812 812 })
813 813
814 814 except StopIteration:
815 815 pass
816 816
817 817 return ''.join(raw_diff), chunks, stats
818 818
819 819 def _safe_id(self, idstring):
820 820 """Make a string safe for including in an id attribute.
821 821
822 822 The HTML spec says that id attributes 'must begin with
823 823 a letter ([A-Za-z]) and may be followed by any number
824 824 of letters, digits ([0-9]), hyphens ("-"), underscores
825 825 ("_"), colons (":"), and periods (".")'. These regexps
826 826 are slightly over-zealous, in that they remove colons
827 827 and periods unnecessarily.
828 828
829 829 Whitespace is transformed into underscores, and then
830 830 anything which is not a hyphen or a character that
831 831 matches \w (alphanumerics and underscore) is removed.
832 832
833 833 """
834 834 # Transform all whitespace to underscore
835 835 idstring = re.sub(r'\s', "_", '%s' % idstring)
836 836 # Remove everything that is not a hyphen or a member of \w
837 837 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
838 838 return idstring
839 839
840 840 @classmethod
841 841 def diff_splitter(cls, string):
842 842 """
843 843 Diff split that emulates .splitlines() but works only on \n
844 844 """
845 845 if not string:
846 846 return
847 847 elif string == '\n':
848 848 yield u'\n'
849 849 else:
850 850
851 851 has_newline = string.endswith('\n')
852 852 elements = string.split('\n')
853 853 if has_newline:
854 854 # skip last element as it's empty string from newlines
855 855 elements = elements[:-1]
856 856
857 857 len_elements = len(elements)
858 858
859 859 for cnt, line in enumerate(elements, start=1):
860 860 last_line = cnt == len_elements
861 861 if last_line and not has_newline:
862 862 yield safe_unicode(line)
863 863 else:
864 864 yield safe_unicode(line) + '\n'
865 865
866 866 def prepare(self, inline_diff=True):
867 867 """
868 868 Prepare the passed udiff for HTML rendering.
869 869
870 870 :return: A list of dicts with diff information.
871 871 """
872 872 parsed = self._parser(inline_diff=inline_diff)
873 873 self.parsed = True
874 874 self.parsed_diff = parsed
875 875 return parsed
876 876
877 877 def as_raw(self, diff_lines=None):
878 878 """
879 879 Returns raw diff as a byte string
880 880 """
881 881 return self._diff.raw
882 882
883 883 def as_html(self, table_class='code-difftable', line_class='line',
884 884 old_lineno_class='lineno old', new_lineno_class='lineno new',
885 885 code_class='code', enable_comments=False, parsed_lines=None):
886 886 """
887 887 Return given diff as html table with customized css classes
888 888 """
889 889 # TODO(marcink): not sure how to pass in translator
890 890 # here in an efficient way, leave the _ for proper gettext extraction
891 891 _ = lambda s: s
892 892
893 893 def _link_to_if(condition, label, url):
894 894 """
895 895 Generates a link if condition is meet or just the label if not.
896 896 """
897 897
898 898 if condition:
899 899 return '''<a href="%(url)s" class="tooltip"
900 900 title="%(title)s">%(label)s</a>''' % {
901 901 'title': _('Click to select line'),
902 902 'url': url,
903 903 'label': label
904 904 }
905 905 else:
906 906 return label
907 907 if not self.parsed:
908 908 self.prepare()
909 909
910 910 diff_lines = self.parsed_diff
911 911 if parsed_lines:
912 912 diff_lines = parsed_lines
913 913
914 914 _html_empty = True
915 915 _html = []
916 916 _html.append('''<table class="%(table_class)s">\n''' % {
917 917 'table_class': table_class
918 918 })
919 919
920 920 for diff in diff_lines:
921 921 for line in diff['chunks']:
922 922 _html_empty = False
923 923 for change in line:
924 924 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
925 925 'lc': line_class,
926 926 'action': change['action']
927 927 })
928 928 anchor_old_id = ''
929 929 anchor_new_id = ''
930 930 anchor_old = "%(filename)s_o%(oldline_no)s" % {
931 931 'filename': self._safe_id(diff['filename']),
932 932 'oldline_no': change['old_lineno']
933 933 }
934 934 anchor_new = "%(filename)s_n%(oldline_no)s" % {
935 935 'filename': self._safe_id(diff['filename']),
936 936 'oldline_no': change['new_lineno']
937 937 }
938 938 cond_old = (change['old_lineno'] != '...' and
939 939 change['old_lineno'])
940 940 cond_new = (change['new_lineno'] != '...' and
941 941 change['new_lineno'])
942 942 if cond_old:
943 943 anchor_old_id = 'id="%s"' % anchor_old
944 944 if cond_new:
945 945 anchor_new_id = 'id="%s"' % anchor_new
946 946
947 947 if change['action'] != Action.CONTEXT:
948 948 anchor_link = True
949 949 else:
950 950 anchor_link = False
951 951
952 952 ###########################################################
953 953 # COMMENT ICONS
954 954 ###########################################################
955 955 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
956 956
957 957 if enable_comments and change['action'] != Action.CONTEXT:
958 958 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
959 959
960 960 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
961 961
962 962 ###########################################################
963 963 # OLD LINE NUMBER
964 964 ###########################################################
965 965 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
966 966 'a_id': anchor_old_id,
967 967 'olc': old_lineno_class
968 968 })
969 969
970 970 _html.append('''%(link)s''' % {
971 971 'link': _link_to_if(anchor_link, change['old_lineno'],
972 972 '#%s' % anchor_old)
973 973 })
974 974 _html.append('''</td>\n''')
975 975 ###########################################################
976 976 # NEW LINE NUMBER
977 977 ###########################################################
978 978
979 979 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
980 980 'a_id': anchor_new_id,
981 981 'nlc': new_lineno_class
982 982 })
983 983
984 984 _html.append('''%(link)s''' % {
985 985 'link': _link_to_if(anchor_link, change['new_lineno'],
986 986 '#%s' % anchor_new)
987 987 })
988 988 _html.append('''</td>\n''')
989 989 ###########################################################
990 990 # CODE
991 991 ###########################################################
992 992 code_classes = [code_class]
993 993 if (not enable_comments or
994 994 change['action'] == Action.CONTEXT):
995 995 code_classes.append('no-comment')
996 996 _html.append('\t<td class="%s">' % ' '.join(code_classes))
997 997 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
998 998 'code': change['line']
999 999 })
1000 1000
1001 1001 _html.append('''\t</td>''')
1002 1002 _html.append('''\n</tr>\n''')
1003 1003 _html.append('''</table>''')
1004 1004 if _html_empty:
1005 1005 return None
1006 1006 return ''.join(_html)
1007 1007
1008 1008 def stat(self):
1009 1009 """
1010 1010 Returns tuple of added, and removed lines for this instance
1011 1011 """
1012 1012 return self.adds, self.removes
1013 1013
1014 1014 def get_context_of_line(
1015 1015 self, path, diff_line=None, context_before=3, context_after=3):
1016 1016 """
1017 1017 Returns the context lines for the specified diff line.
1018 1018
1019 1019 :type diff_line: :class:`DiffLineNumber`
1020 1020 """
1021 1021 assert self.parsed, "DiffProcessor is not initialized."
1022 1022
1023 1023 if None not in diff_line:
1024 1024 raise ValueError(
1025 1025 "Cannot specify both line numbers: {}".format(diff_line))
1026 1026
1027 1027 file_diff = self._get_file_diff(path)
1028 1028 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1029 1029
1030 1030 first_line_to_include = max(idx - context_before, 0)
1031 1031 first_line_after_context = idx + context_after + 1
1032 1032 context_lines = chunk[first_line_to_include:first_line_after_context]
1033 1033
1034 1034 line_contents = [
1035 1035 _context_line(line) for line in context_lines
1036 1036 if _is_diff_content(line)]
1037 1037 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1038 1038 # Once they are fixed, we can drop this line here.
1039 1039 if line_contents:
1040 1040 line_contents[-1] = (
1041 1041 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1042 1042 return line_contents
1043 1043
1044 1044 def find_context(self, path, context, offset=0):
1045 1045 """
1046 1046 Finds the given `context` inside of the diff.
1047 1047
1048 1048 Use the parameter `offset` to specify which offset the target line has
1049 1049 inside of the given `context`. This way the correct diff line will be
1050 1050 returned.
1051 1051
1052 1052 :param offset: Shall be used to specify the offset of the main line
1053 1053 within the given `context`.
1054 1054 """
1055 1055 if offset < 0 or offset >= len(context):
1056 1056 raise ValueError(
1057 1057 "Only positive values up to the length of the context "
1058 1058 "minus one are allowed.")
1059 1059
1060 1060 matches = []
1061 1061 file_diff = self._get_file_diff(path)
1062 1062
1063 1063 for chunk in file_diff['chunks']:
1064 1064 context_iter = iter(context)
1065 1065 for line_idx, line in enumerate(chunk):
1066 1066 try:
1067 1067 if _context_line(line) == context_iter.next():
1068 1068 continue
1069 1069 except StopIteration:
1070 1070 matches.append((line_idx, chunk))
1071 1071 context_iter = iter(context)
1072 1072
1073 1073 # Increment position and triger StopIteration
1074 1074 # if we had a match at the end
1075 1075 line_idx += 1
1076 1076 try:
1077 1077 context_iter.next()
1078 1078 except StopIteration:
1079 1079 matches.append((line_idx, chunk))
1080 1080
1081 1081 effective_offset = len(context) - offset
1082 1082 found_at_diff_lines = [
1083 1083 _line_to_diff_line_number(chunk[idx - effective_offset])
1084 1084 for idx, chunk in matches]
1085 1085
1086 1086 return found_at_diff_lines
1087 1087
1088 1088 def _get_file_diff(self, path):
1089 1089 for file_diff in self.parsed_diff:
1090 1090 if file_diff['filename'] == path:
1091 1091 break
1092 1092 else:
1093 1093 raise FileNotInDiffException("File {} not in diff".format(path))
1094 1094 return file_diff
1095 1095
1096 1096 def _find_chunk_line_index(self, file_diff, diff_line):
1097 1097 for chunk in file_diff['chunks']:
1098 1098 for idx, line in enumerate(chunk):
1099 1099 if line['old_lineno'] == diff_line.old:
1100 1100 return chunk, idx
1101 1101 if line['new_lineno'] == diff_line.new:
1102 1102 return chunk, idx
1103 1103 raise LineNotInDiffException(
1104 1104 "The line {} is not part of the diff.".format(diff_line))
1105 1105
1106 1106
1107 1107 def _is_diff_content(line):
1108 1108 return line['action'] in (
1109 1109 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1110 1110
1111 1111
1112 1112 def _context_line(line):
1113 1113 return (line['action'], line['line'])
1114 1114
1115 1115
1116 1116 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1117 1117
1118 1118
1119 1119 def _line_to_diff_line_number(line):
1120 1120 new_line_no = line['new_lineno'] or None
1121 1121 old_line_no = line['old_lineno'] or None
1122 1122 return DiffLineNumber(old=old_line_no, new=new_line_no)
1123 1123
1124 1124
1125 1125 class FileNotInDiffException(Exception):
1126 1126 """
1127 1127 Raised when the context for a missing file is requested.
1128 1128
1129 1129 If you request the context for a line in a file which is not part of the
1130 1130 given diff, then this exception is raised.
1131 1131 """
1132 1132
1133 1133
1134 1134 class LineNotInDiffException(Exception):
1135 1135 """
1136 1136 Raised when the context for a missing line is requested.
1137 1137
1138 1138 If you request the context for a line in a file and this line is not
1139 1139 part of the given diff, then this exception is raised.
1140 1140 """
1141 1141
1142 1142
1143 1143 class DiffLimitExceeded(Exception):
1144 1144 pass
1145 1145
1146 1146
1147 1147 # NOTE(marcink): if diffs.mako change, probably this
1148 1148 # needs a bump to next version
1149 1149 CURRENT_DIFF_VERSION = 'v4'
1150 1150
1151 1151
1152 1152 def _cleanup_cache_file(cached_diff_file):
1153 1153 # cleanup file to not store it "damaged"
1154 1154 try:
1155 1155 os.remove(cached_diff_file)
1156 1156 except Exception:
1157 1157 log.exception('Failed to cleanup path %s', cached_diff_file)
1158 1158
1159 1159
1160 1160 def cache_diff(cached_diff_file, diff, commits):
1161 1161 mode = 'plain' if 'mode:plain' in cached_diff_file else ''
1162 1162
1163 1163 struct = {
1164 1164 'version': CURRENT_DIFF_VERSION,
1165 1165 'diff': diff,
1166 1166 'commits': commits
1167 1167 }
1168 1168
1169 1169 start = time.time()
1170 1170 try:
1171 1171 if mode == 'plain':
1172 1172 with open(cached_diff_file, 'wb') as f:
1173 1173 pickle.dump(struct, f)
1174 1174 else:
1175 1175 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1176 1176 pickle.dump(struct, f)
1177 1177 except Exception:
1178 1178 log.warn('Failed to save cache', exc_info=True)
1179 1179 _cleanup_cache_file(cached_diff_file)
1180 1180
1181 1181 log.debug('Saved diff cache under %s in %.3fs', cached_diff_file, time.time() - start)
1182 1182
1183 1183
1184 1184 def load_cached_diff(cached_diff_file):
1185 1185 mode = 'plain' if 'mode:plain' in cached_diff_file else ''
1186 1186
1187 1187 default_struct = {
1188 1188 'version': CURRENT_DIFF_VERSION,
1189 1189 'diff': None,
1190 1190 'commits': None
1191 1191 }
1192 1192
1193 1193 has_cache = os.path.isfile(cached_diff_file)
1194 1194 if not has_cache:
1195 log.debug('Reading diff cache file failed', cached_diff_file)
1195 log.debug('Reading diff cache file failed %s', cached_diff_file)
1196 1196 return default_struct
1197 1197
1198 1198 data = None
1199 1199
1200 1200 start = time.time()
1201 1201 try:
1202 1202 if mode == 'plain':
1203 1203 with open(cached_diff_file, 'rb') as f:
1204 1204 data = pickle.load(f)
1205 1205 else:
1206 1206 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1207 1207 data = pickle.load(f)
1208 1208 except Exception:
1209 1209 log.warn('Failed to read diff cache file', exc_info=True)
1210 1210
1211 1211 if not data:
1212 1212 data = default_struct
1213 1213
1214 1214 if not isinstance(data, dict):
1215 1215 # old version of data ?
1216 1216 data = default_struct
1217 1217
1218 1218 # check version
1219 1219 if data.get('version') != CURRENT_DIFF_VERSION:
1220 1220 # purge cache
1221 1221 _cleanup_cache_file(cached_diff_file)
1222 1222 return default_struct
1223 1223
1224 1224 log.debug('Loaded diff cache from %s in %.3fs', cached_diff_file, time.time() - start)
1225 1225
1226 1226 return data
1227 1227
1228 1228
1229 1229 def generate_diff_cache_key(*args):
1230 1230 """
1231 1231 Helper to generate a cache key using arguments
1232 1232 """
1233 1233 def arg_mapper(input_param):
1234 1234 input_param = safe_str(input_param)
1235 1235 # we cannot allow '/' in arguments since it would allow
1236 1236 # subdirectory usage
1237 1237 input_param.replace('/', '_')
1238 1238 return input_param or None # prevent empty string arguments
1239 1239
1240 1240 return '_'.join([
1241 1241 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1242 1242
1243 1243
1244 1244 def diff_cache_exist(cache_storage, *args):
1245 1245 """
1246 1246 Based on all generated arguments check and return a cache path
1247 1247 """
1248 1248 cache_key = generate_diff_cache_key(*args)
1249 1249 cache_file_path = os.path.join(cache_storage, cache_key)
1250 1250 # prevent path traversal attacks using some param that have e.g '../../'
1251 1251 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1252 1252 raise ValueError('Final path must be within {}'.format(cache_storage))
1253 1253
1254 1254 return cache_file_path
@@ -1,381 +1,380 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2014-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 HG commit module
23 23 """
24 24
25 25 import os
26 26
27 27 from zope.cachedescriptors.property import Lazy as LazyProperty
28 28
29 29 from rhodecode.lib.datelib import utcdate_fromtimestamp
30 30 from rhodecode.lib.utils import safe_str, safe_unicode
31 31 from rhodecode.lib.vcs import path as vcspath
32 32 from rhodecode.lib.vcs.backends import base
33 33 from rhodecode.lib.vcs.backends.hg.diff import MercurialDiff
34 34 from rhodecode.lib.vcs.exceptions import CommitError
35 35 from rhodecode.lib.vcs.nodes import (
36 36 AddedFileNodesGenerator, ChangedFileNodesGenerator, DirNode, FileNode,
37 37 NodeKind, RemovedFileNodesGenerator, RootNode, SubModuleNode,
38 38 LargeFileNode, LARGEFILE_PREFIX)
39 39 from rhodecode.lib.vcs.utils.paths import get_dirs_for_path
40 40
41 41
42 42 class MercurialCommit(base.BaseCommit):
43 43 """
44 44 Represents state of the repository at the single commit.
45 45 """
46 46
47 47 _filter_pre_load = [
48 48 # git specific property not supported here
49 49 "_commit",
50 50 ]
51 51
52 52 def __init__(self, repository, raw_id, idx, pre_load=None):
53 53 raw_id = safe_str(raw_id)
54 54
55 55 self.repository = repository
56 56 self._remote = repository._remote
57 57
58 58 self.raw_id = raw_id
59 59 self.idx = idx
60 60
61 61 self._set_bulk_properties(pre_load)
62 62
63 63 # caches
64 64 self.nodes = {}
65 65
66 66 def _set_bulk_properties(self, pre_load):
67 67 if not pre_load:
68 68 return
69 69 pre_load = [entry for entry in pre_load
70 70 if entry not in self._filter_pre_load]
71 71 if not pre_load:
72 72 return
73 73
74 74 result = self._remote.bulk_request(self.idx, pre_load)
75 75 for attr, value in result.items():
76 76 if attr in ["author", "branch", "message"]:
77 77 value = safe_unicode(value)
78 78 elif attr == "affected_files":
79 79 value = map(safe_unicode, value)
80 80 elif attr == "date":
81 81 value = utcdate_fromtimestamp(*value)
82 82 elif attr in ["children", "parents"]:
83 83 value = self._make_commits(value)
84 84 elif attr in ["phase"]:
85 85 value = self._get_phase_text(value)
86 86 self.__dict__[attr] = value
87 87
88 88 @LazyProperty
89 89 def tags(self):
90 90 tags = [name for name, commit_id in self.repository.tags.iteritems()
91 91 if commit_id == self.raw_id]
92 92 return tags
93 93
94 94 @LazyProperty
95 95 def branch(self):
96 96 return safe_unicode(self._remote.ctx_branch(self.idx))
97 97
98 98 @LazyProperty
99 99 def bookmarks(self):
100 100 bookmarks = [
101 101 name for name, commit_id in self.repository.bookmarks.iteritems()
102 102 if commit_id == self.raw_id]
103 103 return bookmarks
104 104
105 105 @LazyProperty
106 106 def message(self):
107 107 return safe_unicode(self._remote.ctx_description(self.idx))
108 108
109 109 @LazyProperty
110 110 def committer(self):
111 111 return safe_unicode(self.author)
112 112
113 113 @LazyProperty
114 114 def author(self):
115 115 return safe_unicode(self._remote.ctx_user(self.idx))
116 116
117 117 @LazyProperty
118 118 def date(self):
119 119 return utcdate_fromtimestamp(*self._remote.ctx_date(self.idx))
120 120
121 121 @LazyProperty
122 122 def status(self):
123 123 """
124 124 Returns modified, added, removed, deleted files for current commit
125 125 """
126 126 return self._remote.ctx_status(self.idx)
127 127
128 128 @LazyProperty
129 129 def _file_paths(self):
130 130 return self._remote.ctx_list(self.idx)
131 131
132 132 @LazyProperty
133 133 def _dir_paths(self):
134 134 p = list(set(get_dirs_for_path(*self._file_paths)))
135 135 p.insert(0, '')
136 136 return p
137 137
138 138 @LazyProperty
139 139 def _paths(self):
140 140 return self._dir_paths + self._file_paths
141 141
142 142 @LazyProperty
143 143 def id(self):
144 144 if self.last:
145 145 return u'tip'
146 146 return self.short_id
147 147
148 148 @LazyProperty
149 149 def short_id(self):
150 150 return self.raw_id[:12]
151 151
152 152 def _make_commits(self, indexes, pre_load=None):
153 153 return [self.repository.get_commit(commit_idx=idx, pre_load=pre_load)
154 154 for idx in indexes if idx >= 0]
155 155
156 156 @LazyProperty
157 157 def parents(self):
158 158 """
159 159 Returns list of parent commits.
160 160 """
161 161 parents = self._remote.ctx_parents(self.idx)
162 162 return self._make_commits(parents)
163 163
164 164 def _get_phase_text(self, phase_id):
165 165 return {
166 166 0: 'public',
167 167 1: 'draft',
168 168 2: 'secret',
169 169 }.get(phase_id) or ''
170 170
171 171 @LazyProperty
172 172 def phase(self):
173 173 phase_id = self._remote.ctx_phase(self.idx)
174 174 phase_text = self._get_phase_text(phase_id)
175 175
176 176 return safe_unicode(phase_text)
177 177
178 178 @LazyProperty
179 179 def obsolete(self):
180 180 obsolete = self._remote.ctx_obsolete(self.idx)
181 181 return obsolete
182 182
183 183 @LazyProperty
184 184 def hidden(self):
185 185 hidden = self._remote.ctx_hidden(self.idx)
186 186 return hidden
187 187
188 188 @LazyProperty
189 189 def children(self):
190 190 """
191 191 Returns list of child commits.
192 192 """
193 193 children = self._remote.ctx_children(self.idx)
194 194 return self._make_commits(children)
195 195
196 196 def _fix_path(self, path):
197 197 """
198 198 Mercurial keeps filenodes as str so we need to encode from unicode
199 199 to str.
200 200 """
201 201 return safe_str(super(MercurialCommit, self)._fix_path(path))
202 202
203 203 def _get_kind(self, path):
204 204 path = self._fix_path(path)
205 205 if path in self._file_paths:
206 206 return NodeKind.FILE
207 207 elif path in self._dir_paths:
208 208 return NodeKind.DIR
209 209 else:
210 210 raise CommitError(
211 211 "Node does not exist at the given path '%s'" % (path, ))
212 212
213 213 def _get_filectx(self, path):
214 214 path = self._fix_path(path)
215 215 if self._get_kind(path) != NodeKind.FILE:
216 216 raise CommitError(
217 217 "File does not exist for idx %s at '%s'" % (self.raw_id, path))
218 218 return path
219 219
220 220 def get_file_mode(self, path):
221 221 """
222 222 Returns stat mode of the file at the given ``path``.
223 223 """
224 224 path = self._get_filectx(path)
225 225 if 'x' in self._remote.fctx_flags(self.idx, path):
226 226 return base.FILEMODE_EXECUTABLE
227 227 else:
228 228 return base.FILEMODE_DEFAULT
229 229
230 230 def is_link(self, path):
231 231 path = self._get_filectx(path)
232 232 return 'l' in self._remote.fctx_flags(self.idx, path)
233 233
234 234 def get_file_content(self, path):
235 235 """
236 236 Returns content of the file at given ``path``.
237 237 """
238 238 path = self._get_filectx(path)
239 239 return self._remote.fctx_data(self.idx, path)
240 240
241 241 def get_file_size(self, path):
242 242 """
243 243 Returns size of the file at given ``path``.
244 244 """
245 245 path = self._get_filectx(path)
246 246 return self._remote.fctx_size(self.idx, path)
247 247
248 248 def get_path_history(self, path, limit=None, pre_load=None):
249 249 """
250 250 Returns history of file as reversed list of `MercurialCommit` objects
251 251 for which file at given ``path`` has been modified.
252 252 """
253 253 path = self._get_filectx(path)
254 254 hist = self._remote.node_history(self.idx, path, limit)
255 255 return [
256 256 self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
257 257 for commit_id in hist]
258 258
259 259 def get_file_annotate(self, path, pre_load=None):
260 260 """
261 261 Returns a generator of four element tuples with
262 262 lineno, commit_id, commit lazy loader and line
263 263 """
264 264 result = self._remote.fctx_annotate(self.idx, path)
265 265
266 266 for ln_no, commit_id, content in result:
267 267 yield (
268 268 ln_no, commit_id,
269 269 lambda: self.repository.get_commit(commit_id=commit_id,
270 270 pre_load=pre_load),
271 271 content)
272 272
273 273 def get_nodes(self, path):
274 274 """
275 275 Returns combined ``DirNode`` and ``FileNode`` objects list representing
276 276 state of commit at the given ``path``. If node at the given ``path``
277 277 is not instance of ``DirNode``, CommitError would be raised.
278 278 """
279 279
280 280 if self._get_kind(path) != NodeKind.DIR:
281 281 raise CommitError(
282 "Directory does not exist for idx %s at '%s'" %
283 (self.idx, path))
282 "Directory does not exist for idx %s at '%s'" % (self.raw_id, path))
284 283 path = self._fix_path(path)
285 284
286 285 filenodes = [
287 286 FileNode(f, commit=self) for f in self._file_paths
288 287 if os.path.dirname(f) == path]
289 288 # TODO: johbo: Check if this can be done in a more obvious way
290 289 dirs = path == '' and '' or [
291 290 d for d in self._dir_paths
292 291 if d and vcspath.dirname(d) == path]
293 292 dirnodes = [
294 293 DirNode(d, commit=self) for d in dirs
295 294 if os.path.dirname(d) == path]
296 295
297 296 alias = self.repository.alias
298 297 for k, vals in self._submodules.iteritems():
299 298 if vcspath.dirname(k) == path:
300 299 loc = vals[0]
301 300 commit = vals[1]
302 301 dirnodes.append(SubModuleNode(k, url=loc, commit=commit, alias=alias))
303 302 nodes = dirnodes + filenodes
304 303 # cache nodes
305 304 for node in nodes:
306 305 self.nodes[node.path] = node
307 306 nodes.sort()
308 307
309 308 return nodes
310 309
311 310 def get_node(self, path, pre_load=None):
312 311 """
313 312 Returns `Node` object from the given `path`. If there is no node at
314 313 the given `path`, `NodeDoesNotExistError` would be raised.
315 314 """
316 315 path = self._fix_path(path)
317 316
318 317 if path not in self.nodes:
319 318 if path in self._file_paths:
320 319 node = FileNode(path, commit=self, pre_load=pre_load)
321 320 elif path in self._dir_paths:
322 321 if path == '':
323 322 node = RootNode(commit=self)
324 323 else:
325 324 node = DirNode(path, commit=self)
326 325 else:
327 326 raise self.no_node_at_path(path)
328 327
329 328 # cache node
330 329 self.nodes[path] = node
331 330 return self.nodes[path]
332 331
333 332 def get_largefile_node(self, path):
334 333
335 334 if self._remote.is_large_file(path):
336 335 # content of that file regular FileNode is the hash of largefile
337 336 file_id = self.get_file_content(path).strip()
338 337
339 338 if self._remote.in_largefiles_store(file_id):
340 339 lf_path = self._remote.store_path(file_id)
341 340 return LargeFileNode(lf_path, commit=self, org_path=path)
342 341 elif self._remote.in_user_cache(file_id):
343 342 lf_path = self._remote.store_path(file_id)
344 343 self._remote.link(file_id, path)
345 344 return LargeFileNode(lf_path, commit=self, org_path=path)
346 345
347 346 @LazyProperty
348 347 def _submodules(self):
349 348 """
350 349 Returns a dictionary with submodule information from substate file
351 350 of hg repository.
352 351 """
353 352 return self._remote.ctx_substate(self.idx)
354 353
355 354 @LazyProperty
356 355 def affected_files(self):
357 356 """
358 357 Gets a fast accessible file changes for given commit
359 358 """
360 359 return self._remote.ctx_files(self.idx)
361 360
362 361 @property
363 362 def added(self):
364 363 """
365 364 Returns list of added ``FileNode`` objects.
366 365 """
367 366 return AddedFileNodesGenerator([n for n in self.status[1]], self)
368 367
369 368 @property
370 369 def changed(self):
371 370 """
372 371 Returns list of modified ``FileNode`` objects.
373 372 """
374 373 return ChangedFileNodesGenerator([n for n in self.status[0]], self)
375 374
376 375 @property
377 376 def removed(self):
378 377 """
379 378 Returns list of removed ``FileNode`` objects.
380 379 """
381 380 return RemovedFileNodesGenerator([n for n in self.status[2]], self)
General Comments 0
You need to be logged in to leave comments. Login now