##// END OF EJS Templates
diffs: added load time for diffs
marcink -
r3838:a11aca8c default
parent child Browse files
Show More
@@ -1,1237 +1,1240 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2019 RhodeCode GmbH
3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import os
26 import os
27 import re
27 import re
28 import bz2
28 import bz2
29 import time
29
30
30 import collections
31 import collections
31 import difflib
32 import difflib
32 import logging
33 import logging
33 import cPickle as pickle
34 import cPickle as pickle
34 from itertools import tee, imap
35 from itertools import tee, imap
35
36
36 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.utils2 import safe_unicode, safe_str
39 from rhodecode.lib.utils2 import safe_unicode, safe_str
39
40
40 log = logging.getLogger(__name__)
41 log = logging.getLogger(__name__)
41
42
42 # define max context, a file with more than this numbers of lines is unusable
43 # define max context, a file with more than this numbers of lines is unusable
43 # in browser anyway
44 # in browser anyway
44 MAX_CONTEXT = 20 * 1024
45 MAX_CONTEXT = 20 * 1024
45 DEFAULT_CONTEXT = 3
46 DEFAULT_CONTEXT = 3
46
47
47
48
48 def get_diff_context(request):
49 def get_diff_context(request):
49 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
50 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
50
51
51
52
52 def get_diff_whitespace_flag(request):
53 def get_diff_whitespace_flag(request):
53 return request.GET.get('ignorews', '') == '1'
54 return request.GET.get('ignorews', '') == '1'
54
55
55
56
56 class OPS(object):
57 class OPS(object):
57 ADD = 'A'
58 ADD = 'A'
58 MOD = 'M'
59 MOD = 'M'
59 DEL = 'D'
60 DEL = 'D'
60
61
61
62
62 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
63 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
63 """
64 """
64 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
65 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
65
66
66 :param ignore_whitespace: ignore whitespaces in diff
67 :param ignore_whitespace: ignore whitespaces in diff
67 """
68 """
68 # make sure we pass in default context
69 # make sure we pass in default context
69 context = context or 3
70 context = context or 3
70 # protect against IntOverflow when passing HUGE context
71 # protect against IntOverflow when passing HUGE context
71 if context > MAX_CONTEXT:
72 if context > MAX_CONTEXT:
72 context = MAX_CONTEXT
73 context = MAX_CONTEXT
73
74
74 submodules = filter(lambda o: isinstance(o, SubModuleNode),
75 submodules = filter(lambda o: isinstance(o, SubModuleNode),
75 [filenode_new, filenode_old])
76 [filenode_new, filenode_old])
76 if submodules:
77 if submodules:
77 return ''
78 return ''
78
79
79 for filenode in (filenode_old, filenode_new):
80 for filenode in (filenode_old, filenode_new):
80 if not isinstance(filenode, FileNode):
81 if not isinstance(filenode, FileNode):
81 raise VCSError(
82 raise VCSError(
82 "Given object should be FileNode object, not %s"
83 "Given object should be FileNode object, not %s"
83 % filenode.__class__)
84 % filenode.__class__)
84
85
85 repo = filenode_new.commit.repository
86 repo = filenode_new.commit.repository
86 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
87 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
87 new_commit = filenode_new.commit
88 new_commit = filenode_new.commit
88
89
89 vcs_gitdiff = repo.get_diff(
90 vcs_gitdiff = repo.get_diff(
90 old_commit, new_commit, filenode_new.path,
91 old_commit, new_commit, filenode_new.path,
91 ignore_whitespace, context, path1=filenode_old.path)
92 ignore_whitespace, context, path1=filenode_old.path)
92 return vcs_gitdiff
93 return vcs_gitdiff
93
94
94 NEW_FILENODE = 1
95 NEW_FILENODE = 1
95 DEL_FILENODE = 2
96 DEL_FILENODE = 2
96 MOD_FILENODE = 3
97 MOD_FILENODE = 3
97 RENAMED_FILENODE = 4
98 RENAMED_FILENODE = 4
98 COPIED_FILENODE = 5
99 COPIED_FILENODE = 5
99 CHMOD_FILENODE = 6
100 CHMOD_FILENODE = 6
100 BIN_FILENODE = 7
101 BIN_FILENODE = 7
101
102
102
103
103 class LimitedDiffContainer(object):
104 class LimitedDiffContainer(object):
104
105
105 def __init__(self, diff_limit, cur_diff_size, diff):
106 def __init__(self, diff_limit, cur_diff_size, diff):
106 self.diff = diff
107 self.diff = diff
107 self.diff_limit = diff_limit
108 self.diff_limit = diff_limit
108 self.cur_diff_size = cur_diff_size
109 self.cur_diff_size = cur_diff_size
109
110
110 def __getitem__(self, key):
111 def __getitem__(self, key):
111 return self.diff.__getitem__(key)
112 return self.diff.__getitem__(key)
112
113
113 def __iter__(self):
114 def __iter__(self):
114 for l in self.diff:
115 for l in self.diff:
115 yield l
116 yield l
116
117
117
118
118 class Action(object):
119 class Action(object):
119 """
120 """
120 Contains constants for the action value of the lines in a parsed diff.
121 Contains constants for the action value of the lines in a parsed diff.
121 """
122 """
122
123
123 ADD = 'add'
124 ADD = 'add'
124 DELETE = 'del'
125 DELETE = 'del'
125 UNMODIFIED = 'unmod'
126 UNMODIFIED = 'unmod'
126
127
127 CONTEXT = 'context'
128 CONTEXT = 'context'
128 OLD_NO_NL = 'old-no-nl'
129 OLD_NO_NL = 'old-no-nl'
129 NEW_NO_NL = 'new-no-nl'
130 NEW_NO_NL = 'new-no-nl'
130
131
131
132
132 class DiffProcessor(object):
133 class DiffProcessor(object):
133 """
134 """
134 Give it a unified or git diff and it returns a list of the files that were
135 Give it a unified or git diff and it returns a list of the files that were
135 mentioned in the diff together with a dict of meta information that
136 mentioned in the diff together with a dict of meta information that
136 can be used to render it in a HTML template.
137 can be used to render it in a HTML template.
137
138
138 .. note:: Unicode handling
139 .. note:: Unicode handling
139
140
140 The original diffs are a byte sequence and can contain filenames
141 The original diffs are a byte sequence and can contain filenames
141 in mixed encodings. This class generally returns `unicode` objects
142 in mixed encodings. This class generally returns `unicode` objects
142 since the result is intended for presentation to the user.
143 since the result is intended for presentation to the user.
143
144
144 """
145 """
145 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
146 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
146 _newline_marker = re.compile(r'^\\ No newline at end of file')
147 _newline_marker = re.compile(r'^\\ No newline at end of file')
147
148
148 # used for inline highlighter word split
149 # used for inline highlighter word split
149 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
150 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
150
151
151 # collapse ranges of commits over given number
152 # collapse ranges of commits over given number
152 _collapse_commits_over = 5
153 _collapse_commits_over = 5
153
154
154 def __init__(self, diff, format='gitdiff', diff_limit=None,
155 def __init__(self, diff, format='gitdiff', diff_limit=None,
155 file_limit=None, show_full_diff=True):
156 file_limit=None, show_full_diff=True):
156 """
157 """
157 :param diff: A `Diff` object representing a diff from a vcs backend
158 :param diff: A `Diff` object representing a diff from a vcs backend
158 :param format: format of diff passed, `udiff` or `gitdiff`
159 :param format: format of diff passed, `udiff` or `gitdiff`
159 :param diff_limit: define the size of diff that is considered "big"
160 :param diff_limit: define the size of diff that is considered "big"
160 based on that parameter cut off will be triggered, set to None
161 based on that parameter cut off will be triggered, set to None
161 to show full diff
162 to show full diff
162 """
163 """
163 self._diff = diff
164 self._diff = diff
164 self._format = format
165 self._format = format
165 self.adds = 0
166 self.adds = 0
166 self.removes = 0
167 self.removes = 0
167 # calculate diff size
168 # calculate diff size
168 self.diff_limit = diff_limit
169 self.diff_limit = diff_limit
169 self.file_limit = file_limit
170 self.file_limit = file_limit
170 self.show_full_diff = show_full_diff
171 self.show_full_diff = show_full_diff
171 self.cur_diff_size = 0
172 self.cur_diff_size = 0
172 self.parsed = False
173 self.parsed = False
173 self.parsed_diff = []
174 self.parsed_diff = []
174
175
175 log.debug('Initialized DiffProcessor with %s mode', format)
176 log.debug('Initialized DiffProcessor with %s mode', format)
176 if format == 'gitdiff':
177 if format == 'gitdiff':
177 self.differ = self._highlight_line_difflib
178 self.differ = self._highlight_line_difflib
178 self._parser = self._parse_gitdiff
179 self._parser = self._parse_gitdiff
179 else:
180 else:
180 self.differ = self._highlight_line_udiff
181 self.differ = self._highlight_line_udiff
181 self._parser = self._new_parse_gitdiff
182 self._parser = self._new_parse_gitdiff
182
183
183 def _copy_iterator(self):
184 def _copy_iterator(self):
184 """
185 """
185 make a fresh copy of generator, we should not iterate thru
186 make a fresh copy of generator, we should not iterate thru
186 an original as it's needed for repeating operations on
187 an original as it's needed for repeating operations on
187 this instance of DiffProcessor
188 this instance of DiffProcessor
188 """
189 """
189 self.__udiff, iterator_copy = tee(self.__udiff)
190 self.__udiff, iterator_copy = tee(self.__udiff)
190 return iterator_copy
191 return iterator_copy
191
192
192 def _escaper(self, string):
193 def _escaper(self, string):
193 """
194 """
194 Escaper for diff escapes special chars and checks the diff limit
195 Escaper for diff escapes special chars and checks the diff limit
195
196
196 :param string:
197 :param string:
197 """
198 """
198 self.cur_diff_size += len(string)
199 self.cur_diff_size += len(string)
199
200
200 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
201 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
201 raise DiffLimitExceeded('Diff Limit Exceeded')
202 raise DiffLimitExceeded('Diff Limit Exceeded')
202
203
203 return string \
204 return string \
204 .replace('&', '&amp;')\
205 .replace('&', '&amp;')\
205 .replace('<', '&lt;')\
206 .replace('<', '&lt;')\
206 .replace('>', '&gt;')
207 .replace('>', '&gt;')
207
208
208 def _line_counter(self, l):
209 def _line_counter(self, l):
209 """
210 """
210 Checks each line and bumps total adds/removes for this diff
211 Checks each line and bumps total adds/removes for this diff
211
212
212 :param l:
213 :param l:
213 """
214 """
214 if l.startswith('+') and not l.startswith('+++'):
215 if l.startswith('+') and not l.startswith('+++'):
215 self.adds += 1
216 self.adds += 1
216 elif l.startswith('-') and not l.startswith('---'):
217 elif l.startswith('-') and not l.startswith('---'):
217 self.removes += 1
218 self.removes += 1
218 return safe_unicode(l)
219 return safe_unicode(l)
219
220
220 def _highlight_line_difflib(self, line, next_):
221 def _highlight_line_difflib(self, line, next_):
221 """
222 """
222 Highlight inline changes in both lines.
223 Highlight inline changes in both lines.
223 """
224 """
224
225
225 if line['action'] == Action.DELETE:
226 if line['action'] == Action.DELETE:
226 old, new = line, next_
227 old, new = line, next_
227 else:
228 else:
228 old, new = next_, line
229 old, new = next_, line
229
230
230 oldwords = self._token_re.split(old['line'])
231 oldwords = self._token_re.split(old['line'])
231 newwords = self._token_re.split(new['line'])
232 newwords = self._token_re.split(new['line'])
232 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
233 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
233
234
234 oldfragments, newfragments = [], []
235 oldfragments, newfragments = [], []
235 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
236 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
236 oldfrag = ''.join(oldwords[i1:i2])
237 oldfrag = ''.join(oldwords[i1:i2])
237 newfrag = ''.join(newwords[j1:j2])
238 newfrag = ''.join(newwords[j1:j2])
238 if tag != 'equal':
239 if tag != 'equal':
239 if oldfrag:
240 if oldfrag:
240 oldfrag = '<del>%s</del>' % oldfrag
241 oldfrag = '<del>%s</del>' % oldfrag
241 if newfrag:
242 if newfrag:
242 newfrag = '<ins>%s</ins>' % newfrag
243 newfrag = '<ins>%s</ins>' % newfrag
243 oldfragments.append(oldfrag)
244 oldfragments.append(oldfrag)
244 newfragments.append(newfrag)
245 newfragments.append(newfrag)
245
246
246 old['line'] = "".join(oldfragments)
247 old['line'] = "".join(oldfragments)
247 new['line'] = "".join(newfragments)
248 new['line'] = "".join(newfragments)
248
249
249 def _highlight_line_udiff(self, line, next_):
250 def _highlight_line_udiff(self, line, next_):
250 """
251 """
251 Highlight inline changes in both lines.
252 Highlight inline changes in both lines.
252 """
253 """
253 start = 0
254 start = 0
254 limit = min(len(line['line']), len(next_['line']))
255 limit = min(len(line['line']), len(next_['line']))
255 while start < limit and line['line'][start] == next_['line'][start]:
256 while start < limit and line['line'][start] == next_['line'][start]:
256 start += 1
257 start += 1
257 end = -1
258 end = -1
258 limit -= start
259 limit -= start
259 while -end <= limit and line['line'][end] == next_['line'][end]:
260 while -end <= limit and line['line'][end] == next_['line'][end]:
260 end -= 1
261 end -= 1
261 end += 1
262 end += 1
262 if start or end:
263 if start or end:
263 def do(l):
264 def do(l):
264 last = end + len(l['line'])
265 last = end + len(l['line'])
265 if l['action'] == Action.ADD:
266 if l['action'] == Action.ADD:
266 tag = 'ins'
267 tag = 'ins'
267 else:
268 else:
268 tag = 'del'
269 tag = 'del'
269 l['line'] = '%s<%s>%s</%s>%s' % (
270 l['line'] = '%s<%s>%s</%s>%s' % (
270 l['line'][:start],
271 l['line'][:start],
271 tag,
272 tag,
272 l['line'][start:last],
273 l['line'][start:last],
273 tag,
274 tag,
274 l['line'][last:]
275 l['line'][last:]
275 )
276 )
276 do(line)
277 do(line)
277 do(next_)
278 do(next_)
278
279
279 def _clean_line(self, line, command):
280 def _clean_line(self, line, command):
280 if command in ['+', '-', ' ']:
281 if command in ['+', '-', ' ']:
281 # only modify the line if it's actually a diff thing
282 # only modify the line if it's actually a diff thing
282 line = line[1:]
283 line = line[1:]
283 return line
284 return line
284
285
285 def _parse_gitdiff(self, inline_diff=True):
286 def _parse_gitdiff(self, inline_diff=True):
286 _files = []
287 _files = []
287 diff_container = lambda arg: arg
288 diff_container = lambda arg: arg
288
289
289 for chunk in self._diff.chunks():
290 for chunk in self._diff.chunks():
290 head = chunk.header
291 head = chunk.header
291
292
292 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
293 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
293 raw_diff = chunk.raw
294 raw_diff = chunk.raw
294 limited_diff = False
295 limited_diff = False
295 exceeds_limit = False
296 exceeds_limit = False
296
297
297 op = None
298 op = None
298 stats = {
299 stats = {
299 'added': 0,
300 'added': 0,
300 'deleted': 0,
301 'deleted': 0,
301 'binary': False,
302 'binary': False,
302 'ops': {},
303 'ops': {},
303 }
304 }
304
305
305 if head['deleted_file_mode']:
306 if head['deleted_file_mode']:
306 op = OPS.DEL
307 op = OPS.DEL
307 stats['binary'] = True
308 stats['binary'] = True
308 stats['ops'][DEL_FILENODE] = 'deleted file'
309 stats['ops'][DEL_FILENODE] = 'deleted file'
309
310
310 elif head['new_file_mode']:
311 elif head['new_file_mode']:
311 op = OPS.ADD
312 op = OPS.ADD
312 stats['binary'] = True
313 stats['binary'] = True
313 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
314 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
314 else: # modify operation, can be copy, rename or chmod
315 else: # modify operation, can be copy, rename or chmod
315
316
316 # CHMOD
317 # CHMOD
317 if head['new_mode'] and head['old_mode']:
318 if head['new_mode'] and head['old_mode']:
318 op = OPS.MOD
319 op = OPS.MOD
319 stats['binary'] = True
320 stats['binary'] = True
320 stats['ops'][CHMOD_FILENODE] = (
321 stats['ops'][CHMOD_FILENODE] = (
321 'modified file chmod %s => %s' % (
322 'modified file chmod %s => %s' % (
322 head['old_mode'], head['new_mode']))
323 head['old_mode'], head['new_mode']))
323 # RENAME
324 # RENAME
324 if head['rename_from'] != head['rename_to']:
325 if head['rename_from'] != head['rename_to']:
325 op = OPS.MOD
326 op = OPS.MOD
326 stats['binary'] = True
327 stats['binary'] = True
327 stats['ops'][RENAMED_FILENODE] = (
328 stats['ops'][RENAMED_FILENODE] = (
328 'file renamed from %s to %s' % (
329 'file renamed from %s to %s' % (
329 head['rename_from'], head['rename_to']))
330 head['rename_from'], head['rename_to']))
330 # COPY
331 # COPY
331 if head.get('copy_from') and head.get('copy_to'):
332 if head.get('copy_from') and head.get('copy_to'):
332 op = OPS.MOD
333 op = OPS.MOD
333 stats['binary'] = True
334 stats['binary'] = True
334 stats['ops'][COPIED_FILENODE] = (
335 stats['ops'][COPIED_FILENODE] = (
335 'file copied from %s to %s' % (
336 'file copied from %s to %s' % (
336 head['copy_from'], head['copy_to']))
337 head['copy_from'], head['copy_to']))
337
338
338 # If our new parsed headers didn't match anything fallback to
339 # If our new parsed headers didn't match anything fallback to
339 # old style detection
340 # old style detection
340 if op is None:
341 if op is None:
341 if not head['a_file'] and head['b_file']:
342 if not head['a_file'] and head['b_file']:
342 op = OPS.ADD
343 op = OPS.ADD
343 stats['binary'] = True
344 stats['binary'] = True
344 stats['ops'][NEW_FILENODE] = 'new file'
345 stats['ops'][NEW_FILENODE] = 'new file'
345
346
346 elif head['a_file'] and not head['b_file']:
347 elif head['a_file'] and not head['b_file']:
347 op = OPS.DEL
348 op = OPS.DEL
348 stats['binary'] = True
349 stats['binary'] = True
349 stats['ops'][DEL_FILENODE] = 'deleted file'
350 stats['ops'][DEL_FILENODE] = 'deleted file'
350
351
351 # it's not ADD not DELETE
352 # it's not ADD not DELETE
352 if op is None:
353 if op is None:
353 op = OPS.MOD
354 op = OPS.MOD
354 stats['binary'] = True
355 stats['binary'] = True
355 stats['ops'][MOD_FILENODE] = 'modified file'
356 stats['ops'][MOD_FILENODE] = 'modified file'
356
357
357 # a real non-binary diff
358 # a real non-binary diff
358 if head['a_file'] or head['b_file']:
359 if head['a_file'] or head['b_file']:
359 try:
360 try:
360 raw_diff, chunks, _stats = self._parse_lines(diff)
361 raw_diff, chunks, _stats = self._parse_lines(diff)
361 stats['binary'] = False
362 stats['binary'] = False
362 stats['added'] = _stats[0]
363 stats['added'] = _stats[0]
363 stats['deleted'] = _stats[1]
364 stats['deleted'] = _stats[1]
364 # explicit mark that it's a modified file
365 # explicit mark that it's a modified file
365 if op == OPS.MOD:
366 if op == OPS.MOD:
366 stats['ops'][MOD_FILENODE] = 'modified file'
367 stats['ops'][MOD_FILENODE] = 'modified file'
367 exceeds_limit = len(raw_diff) > self.file_limit
368 exceeds_limit = len(raw_diff) > self.file_limit
368
369
369 # changed from _escaper function so we validate size of
370 # changed from _escaper function so we validate size of
370 # each file instead of the whole diff
371 # each file instead of the whole diff
371 # diff will hide big files but still show small ones
372 # diff will hide big files but still show small ones
372 # from my tests, big files are fairly safe to be parsed
373 # from my tests, big files are fairly safe to be parsed
373 # but the browser is the bottleneck
374 # but the browser is the bottleneck
374 if not self.show_full_diff and exceeds_limit:
375 if not self.show_full_diff and exceeds_limit:
375 raise DiffLimitExceeded('File Limit Exceeded')
376 raise DiffLimitExceeded('File Limit Exceeded')
376
377
377 except DiffLimitExceeded:
378 except DiffLimitExceeded:
378 diff_container = lambda _diff: \
379 diff_container = lambda _diff: \
379 LimitedDiffContainer(
380 LimitedDiffContainer(
380 self.diff_limit, self.cur_diff_size, _diff)
381 self.diff_limit, self.cur_diff_size, _diff)
381
382
382 exceeds_limit = len(raw_diff) > self.file_limit
383 exceeds_limit = len(raw_diff) > self.file_limit
383 limited_diff = True
384 limited_diff = True
384 chunks = []
385 chunks = []
385
386
386 else: # GIT format binary patch, or possibly empty diff
387 else: # GIT format binary patch, or possibly empty diff
387 if head['bin_patch']:
388 if head['bin_patch']:
388 # we have operation already extracted, but we mark simply
389 # we have operation already extracted, but we mark simply
389 # it's a diff we wont show for binary files
390 # it's a diff we wont show for binary files
390 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
391 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
391 chunks = []
392 chunks = []
392
393
393 if chunks and not self.show_full_diff and op == OPS.DEL:
394 if chunks and not self.show_full_diff and op == OPS.DEL:
394 # if not full diff mode show deleted file contents
395 # if not full diff mode show deleted file contents
395 # TODO: anderson: if the view is not too big, there is no way
396 # TODO: anderson: if the view is not too big, there is no way
396 # to see the content of the file
397 # to see the content of the file
397 chunks = []
398 chunks = []
398
399
399 chunks.insert(0, [{
400 chunks.insert(0, [{
400 'old_lineno': '',
401 'old_lineno': '',
401 'new_lineno': '',
402 'new_lineno': '',
402 'action': Action.CONTEXT,
403 'action': Action.CONTEXT,
403 'line': msg,
404 'line': msg,
404 } for _op, msg in stats['ops'].iteritems()
405 } for _op, msg in stats['ops'].iteritems()
405 if _op not in [MOD_FILENODE]])
406 if _op not in [MOD_FILENODE]])
406
407
407 _files.append({
408 _files.append({
408 'filename': safe_unicode(head['b_path']),
409 'filename': safe_unicode(head['b_path']),
409 'old_revision': head['a_blob_id'],
410 'old_revision': head['a_blob_id'],
410 'new_revision': head['b_blob_id'],
411 'new_revision': head['b_blob_id'],
411 'chunks': chunks,
412 'chunks': chunks,
412 'raw_diff': safe_unicode(raw_diff),
413 'raw_diff': safe_unicode(raw_diff),
413 'operation': op,
414 'operation': op,
414 'stats': stats,
415 'stats': stats,
415 'exceeds_limit': exceeds_limit,
416 'exceeds_limit': exceeds_limit,
416 'is_limited_diff': limited_diff,
417 'is_limited_diff': limited_diff,
417 })
418 })
418
419
419 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
420 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
420 OPS.DEL: 2}.get(info['operation'])
421 OPS.DEL: 2}.get(info['operation'])
421
422
422 if not inline_diff:
423 if not inline_diff:
423 return diff_container(sorted(_files, key=sorter))
424 return diff_container(sorted(_files, key=sorter))
424
425
425 # highlight inline changes
426 # highlight inline changes
426 for diff_data in _files:
427 for diff_data in _files:
427 for chunk in diff_data['chunks']:
428 for chunk in diff_data['chunks']:
428 lineiter = iter(chunk)
429 lineiter = iter(chunk)
429 try:
430 try:
430 while 1:
431 while 1:
431 line = lineiter.next()
432 line = lineiter.next()
432 if line['action'] not in (
433 if line['action'] not in (
433 Action.UNMODIFIED, Action.CONTEXT):
434 Action.UNMODIFIED, Action.CONTEXT):
434 nextline = lineiter.next()
435 nextline = lineiter.next()
435 if nextline['action'] in ['unmod', 'context'] or \
436 if nextline['action'] in ['unmod', 'context'] or \
436 nextline['action'] == line['action']:
437 nextline['action'] == line['action']:
437 continue
438 continue
438 self.differ(line, nextline)
439 self.differ(line, nextline)
439 except StopIteration:
440 except StopIteration:
440 pass
441 pass
441
442
442 return diff_container(sorted(_files, key=sorter))
443 return diff_container(sorted(_files, key=sorter))
443
444
444 def _check_large_diff(self):
445 def _check_large_diff(self):
445 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
446 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
446 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
447 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
447 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
448 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
448
449
449 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
450 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
450 def _new_parse_gitdiff(self, inline_diff=True):
451 def _new_parse_gitdiff(self, inline_diff=True):
451 _files = []
452 _files = []
452
453
453 # this can be overriden later to a LimitedDiffContainer type
454 # this can be overriden later to a LimitedDiffContainer type
454 diff_container = lambda arg: arg
455 diff_container = lambda arg: arg
455
456
456 for chunk in self._diff.chunks():
457 for chunk in self._diff.chunks():
457 head = chunk.header
458 head = chunk.header
458 log.debug('parsing diff %r', head)
459 log.debug('parsing diff %r', head)
459
460
460 raw_diff = chunk.raw
461 raw_diff = chunk.raw
461 limited_diff = False
462 limited_diff = False
462 exceeds_limit = False
463 exceeds_limit = False
463
464
464 op = None
465 op = None
465 stats = {
466 stats = {
466 'added': 0,
467 'added': 0,
467 'deleted': 0,
468 'deleted': 0,
468 'binary': False,
469 'binary': False,
469 'old_mode': None,
470 'old_mode': None,
470 'new_mode': None,
471 'new_mode': None,
471 'ops': {},
472 'ops': {},
472 }
473 }
473 if head['old_mode']:
474 if head['old_mode']:
474 stats['old_mode'] = head['old_mode']
475 stats['old_mode'] = head['old_mode']
475 if head['new_mode']:
476 if head['new_mode']:
476 stats['new_mode'] = head['new_mode']
477 stats['new_mode'] = head['new_mode']
477 if head['b_mode']:
478 if head['b_mode']:
478 stats['new_mode'] = head['b_mode']
479 stats['new_mode'] = head['b_mode']
479
480
480 # delete file
481 # delete file
481 if head['deleted_file_mode']:
482 if head['deleted_file_mode']:
482 op = OPS.DEL
483 op = OPS.DEL
483 stats['binary'] = True
484 stats['binary'] = True
484 stats['ops'][DEL_FILENODE] = 'deleted file'
485 stats['ops'][DEL_FILENODE] = 'deleted file'
485
486
486 # new file
487 # new file
487 elif head['new_file_mode']:
488 elif head['new_file_mode']:
488 op = OPS.ADD
489 op = OPS.ADD
489 stats['binary'] = True
490 stats['binary'] = True
490 stats['old_mode'] = None
491 stats['old_mode'] = None
491 stats['new_mode'] = head['new_file_mode']
492 stats['new_mode'] = head['new_file_mode']
492 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
493 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
493
494
494 # modify operation, can be copy, rename or chmod
495 # modify operation, can be copy, rename or chmod
495 else:
496 else:
496 # CHMOD
497 # CHMOD
497 if head['new_mode'] and head['old_mode']:
498 if head['new_mode'] and head['old_mode']:
498 op = OPS.MOD
499 op = OPS.MOD
499 stats['binary'] = True
500 stats['binary'] = True
500 stats['ops'][CHMOD_FILENODE] = (
501 stats['ops'][CHMOD_FILENODE] = (
501 'modified file chmod %s => %s' % (
502 'modified file chmod %s => %s' % (
502 head['old_mode'], head['new_mode']))
503 head['old_mode'], head['new_mode']))
503
504
504 # RENAME
505 # RENAME
505 if head['rename_from'] != head['rename_to']:
506 if head['rename_from'] != head['rename_to']:
506 op = OPS.MOD
507 op = OPS.MOD
507 stats['binary'] = True
508 stats['binary'] = True
508 stats['renamed'] = (head['rename_from'], head['rename_to'])
509 stats['renamed'] = (head['rename_from'], head['rename_to'])
509 stats['ops'][RENAMED_FILENODE] = (
510 stats['ops'][RENAMED_FILENODE] = (
510 'file renamed from %s to %s' % (
511 'file renamed from %s to %s' % (
511 head['rename_from'], head['rename_to']))
512 head['rename_from'], head['rename_to']))
512 # COPY
513 # COPY
513 if head.get('copy_from') and head.get('copy_to'):
514 if head.get('copy_from') and head.get('copy_to'):
514 op = OPS.MOD
515 op = OPS.MOD
515 stats['binary'] = True
516 stats['binary'] = True
516 stats['copied'] = (head['copy_from'], head['copy_to'])
517 stats['copied'] = (head['copy_from'], head['copy_to'])
517 stats['ops'][COPIED_FILENODE] = (
518 stats['ops'][COPIED_FILENODE] = (
518 'file copied from %s to %s' % (
519 'file copied from %s to %s' % (
519 head['copy_from'], head['copy_to']))
520 head['copy_from'], head['copy_to']))
520
521
521 # If our new parsed headers didn't match anything fallback to
522 # If our new parsed headers didn't match anything fallback to
522 # old style detection
523 # old style detection
523 if op is None:
524 if op is None:
524 if not head['a_file'] and head['b_file']:
525 if not head['a_file'] and head['b_file']:
525 op = OPS.ADD
526 op = OPS.ADD
526 stats['binary'] = True
527 stats['binary'] = True
527 stats['new_file'] = True
528 stats['new_file'] = True
528 stats['ops'][NEW_FILENODE] = 'new file'
529 stats['ops'][NEW_FILENODE] = 'new file'
529
530
530 elif head['a_file'] and not head['b_file']:
531 elif head['a_file'] and not head['b_file']:
531 op = OPS.DEL
532 op = OPS.DEL
532 stats['binary'] = True
533 stats['binary'] = True
533 stats['ops'][DEL_FILENODE] = 'deleted file'
534 stats['ops'][DEL_FILENODE] = 'deleted file'
534
535
535 # it's not ADD not DELETE
536 # it's not ADD not DELETE
536 if op is None:
537 if op is None:
537 op = OPS.MOD
538 op = OPS.MOD
538 stats['binary'] = True
539 stats['binary'] = True
539 stats['ops'][MOD_FILENODE] = 'modified file'
540 stats['ops'][MOD_FILENODE] = 'modified file'
540
541
541 # a real non-binary diff
542 # a real non-binary diff
542 if head['a_file'] or head['b_file']:
543 if head['a_file'] or head['b_file']:
543 # simulate splitlines, so we keep the line end part
544 # simulate splitlines, so we keep the line end part
544 diff = self.diff_splitter(chunk.diff)
545 diff = self.diff_splitter(chunk.diff)
545
546
546 # append each file to the diff size
547 # append each file to the diff size
547 raw_chunk_size = len(raw_diff)
548 raw_chunk_size = len(raw_diff)
548
549
549 exceeds_limit = raw_chunk_size > self.file_limit
550 exceeds_limit = raw_chunk_size > self.file_limit
550 self.cur_diff_size += raw_chunk_size
551 self.cur_diff_size += raw_chunk_size
551
552
552 try:
553 try:
553 # Check each file instead of the whole diff.
554 # Check each file instead of the whole diff.
554 # Diff will hide big files but still show small ones.
555 # Diff will hide big files but still show small ones.
555 # From the tests big files are fairly safe to be parsed
556 # From the tests big files are fairly safe to be parsed
556 # but the browser is the bottleneck.
557 # but the browser is the bottleneck.
557 if not self.show_full_diff and exceeds_limit:
558 if not self.show_full_diff and exceeds_limit:
558 log.debug('File `%s` exceeds current file_limit of %s',
559 log.debug('File `%s` exceeds current file_limit of %s',
559 safe_unicode(head['b_path']), self.file_limit)
560 safe_unicode(head['b_path']), self.file_limit)
560 raise DiffLimitExceeded(
561 raise DiffLimitExceeded(
561 'File Limit %s Exceeded', self.file_limit)
562 'File Limit %s Exceeded', self.file_limit)
562
563
563 self._check_large_diff()
564 self._check_large_diff()
564
565
565 raw_diff, chunks, _stats = self._new_parse_lines(diff)
566 raw_diff, chunks, _stats = self._new_parse_lines(diff)
566 stats['binary'] = False
567 stats['binary'] = False
567 stats['added'] = _stats[0]
568 stats['added'] = _stats[0]
568 stats['deleted'] = _stats[1]
569 stats['deleted'] = _stats[1]
569 # explicit mark that it's a modified file
570 # explicit mark that it's a modified file
570 if op == OPS.MOD:
571 if op == OPS.MOD:
571 stats['ops'][MOD_FILENODE] = 'modified file'
572 stats['ops'][MOD_FILENODE] = 'modified file'
572
573
573 except DiffLimitExceeded:
574 except DiffLimitExceeded:
574 diff_container = lambda _diff: \
575 diff_container = lambda _diff: \
575 LimitedDiffContainer(
576 LimitedDiffContainer(
576 self.diff_limit, self.cur_diff_size, _diff)
577 self.diff_limit, self.cur_diff_size, _diff)
577
578
578 limited_diff = True
579 limited_diff = True
579 chunks = []
580 chunks = []
580
581
581 else: # GIT format binary patch, or possibly empty diff
582 else: # GIT format binary patch, or possibly empty diff
582 if head['bin_patch']:
583 if head['bin_patch']:
583 # we have operation already extracted, but we mark simply
584 # we have operation already extracted, but we mark simply
584 # it's a diff we wont show for binary files
585 # it's a diff we wont show for binary files
585 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
586 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
586 chunks = []
587 chunks = []
587
588
588 # Hide content of deleted node by setting empty chunks
589 # Hide content of deleted node by setting empty chunks
589 if chunks and not self.show_full_diff and op == OPS.DEL:
590 if chunks and not self.show_full_diff and op == OPS.DEL:
590 # if not full diff mode show deleted file contents
591 # if not full diff mode show deleted file contents
591 # TODO: anderson: if the view is not too big, there is no way
592 # TODO: anderson: if the view is not too big, there is no way
592 # to see the content of the file
593 # to see the content of the file
593 chunks = []
594 chunks = []
594
595
595 chunks.insert(
596 chunks.insert(
596 0, [{'old_lineno': '',
597 0, [{'old_lineno': '',
597 'new_lineno': '',
598 'new_lineno': '',
598 'action': Action.CONTEXT,
599 'action': Action.CONTEXT,
599 'line': msg,
600 'line': msg,
600 } for _op, msg in stats['ops'].iteritems()
601 } for _op, msg in stats['ops'].iteritems()
601 if _op not in [MOD_FILENODE]])
602 if _op not in [MOD_FILENODE]])
602
603
603 original_filename = safe_unicode(head['a_path'])
604 original_filename = safe_unicode(head['a_path'])
604 _files.append({
605 _files.append({
605 'original_filename': original_filename,
606 'original_filename': original_filename,
606 'filename': safe_unicode(head['b_path']),
607 'filename': safe_unicode(head['b_path']),
607 'old_revision': head['a_blob_id'],
608 'old_revision': head['a_blob_id'],
608 'new_revision': head['b_blob_id'],
609 'new_revision': head['b_blob_id'],
609 'chunks': chunks,
610 'chunks': chunks,
610 'raw_diff': safe_unicode(raw_diff),
611 'raw_diff': safe_unicode(raw_diff),
611 'operation': op,
612 'operation': op,
612 'stats': stats,
613 'stats': stats,
613 'exceeds_limit': exceeds_limit,
614 'exceeds_limit': exceeds_limit,
614 'is_limited_diff': limited_diff,
615 'is_limited_diff': limited_diff,
615 })
616 })
616
617
617 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
618 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
618 OPS.DEL: 2}.get(info['operation'])
619 OPS.DEL: 2}.get(info['operation'])
619
620
620 return diff_container(sorted(_files, key=sorter))
621 return diff_container(sorted(_files, key=sorter))
621
622
622 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
623 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
623 def _parse_lines(self, diff_iter):
624 def _parse_lines(self, diff_iter):
624 """
625 """
625 Parse the diff an return data for the template.
626 Parse the diff an return data for the template.
626 """
627 """
627
628
628 stats = [0, 0]
629 stats = [0, 0]
629 chunks = []
630 chunks = []
630 raw_diff = []
631 raw_diff = []
631
632
632 try:
633 try:
633 line = diff_iter.next()
634 line = diff_iter.next()
634
635
635 while line:
636 while line:
636 raw_diff.append(line)
637 raw_diff.append(line)
637 lines = []
638 lines = []
638 chunks.append(lines)
639 chunks.append(lines)
639
640
640 match = self._chunk_re.match(line)
641 match = self._chunk_re.match(line)
641
642
642 if not match:
643 if not match:
643 break
644 break
644
645
645 gr = match.groups()
646 gr = match.groups()
646 (old_line, old_end,
647 (old_line, old_end,
647 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
648 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
648 old_line -= 1
649 old_line -= 1
649 new_line -= 1
650 new_line -= 1
650
651
651 context = len(gr) == 5
652 context = len(gr) == 5
652 old_end += old_line
653 old_end += old_line
653 new_end += new_line
654 new_end += new_line
654
655
655 if context:
656 if context:
656 # skip context only if it's first line
657 # skip context only if it's first line
657 if int(gr[0]) > 1:
658 if int(gr[0]) > 1:
658 lines.append({
659 lines.append({
659 'old_lineno': '...',
660 'old_lineno': '...',
660 'new_lineno': '...',
661 'new_lineno': '...',
661 'action': Action.CONTEXT,
662 'action': Action.CONTEXT,
662 'line': line,
663 'line': line,
663 })
664 })
664
665
665 line = diff_iter.next()
666 line = diff_iter.next()
666
667
667 while old_line < old_end or new_line < new_end:
668 while old_line < old_end or new_line < new_end:
668 command = ' '
669 command = ' '
669 if line:
670 if line:
670 command = line[0]
671 command = line[0]
671
672
672 affects_old = affects_new = False
673 affects_old = affects_new = False
673
674
674 # ignore those if we don't expect them
675 # ignore those if we don't expect them
675 if command in '#@':
676 if command in '#@':
676 continue
677 continue
677 elif command == '+':
678 elif command == '+':
678 affects_new = True
679 affects_new = True
679 action = Action.ADD
680 action = Action.ADD
680 stats[0] += 1
681 stats[0] += 1
681 elif command == '-':
682 elif command == '-':
682 affects_old = True
683 affects_old = True
683 action = Action.DELETE
684 action = Action.DELETE
684 stats[1] += 1
685 stats[1] += 1
685 else:
686 else:
686 affects_old = affects_new = True
687 affects_old = affects_new = True
687 action = Action.UNMODIFIED
688 action = Action.UNMODIFIED
688
689
689 if not self._newline_marker.match(line):
690 if not self._newline_marker.match(line):
690 old_line += affects_old
691 old_line += affects_old
691 new_line += affects_new
692 new_line += affects_new
692 lines.append({
693 lines.append({
693 'old_lineno': affects_old and old_line or '',
694 'old_lineno': affects_old and old_line or '',
694 'new_lineno': affects_new and new_line or '',
695 'new_lineno': affects_new and new_line or '',
695 'action': action,
696 'action': action,
696 'line': self._clean_line(line, command)
697 'line': self._clean_line(line, command)
697 })
698 })
698 raw_diff.append(line)
699 raw_diff.append(line)
699
700
700 line = diff_iter.next()
701 line = diff_iter.next()
701
702
702 if self._newline_marker.match(line):
703 if self._newline_marker.match(line):
703 # we need to append to lines, since this is not
704 # we need to append to lines, since this is not
704 # counted in the line specs of diff
705 # counted in the line specs of diff
705 lines.append({
706 lines.append({
706 'old_lineno': '...',
707 'old_lineno': '...',
707 'new_lineno': '...',
708 'new_lineno': '...',
708 'action': Action.CONTEXT,
709 'action': Action.CONTEXT,
709 'line': self._clean_line(line, command)
710 'line': self._clean_line(line, command)
710 })
711 })
711
712
712 except StopIteration:
713 except StopIteration:
713 pass
714 pass
714 return ''.join(raw_diff), chunks, stats
715 return ''.join(raw_diff), chunks, stats
715
716
716 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
717 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
717 def _new_parse_lines(self, diff_iter):
718 def _new_parse_lines(self, diff_iter):
718 """
719 """
719 Parse the diff an return data for the template.
720 Parse the diff an return data for the template.
720 """
721 """
721
722
722 stats = [0, 0]
723 stats = [0, 0]
723 chunks = []
724 chunks = []
724 raw_diff = []
725 raw_diff = []
725
726
726 try:
727 try:
727 line = diff_iter.next()
728 line = diff_iter.next()
728
729
729 while line:
730 while line:
730 raw_diff.append(line)
731 raw_diff.append(line)
731 # match header e.g @@ -0,0 +1 @@\n'
732 # match header e.g @@ -0,0 +1 @@\n'
732 match = self._chunk_re.match(line)
733 match = self._chunk_re.match(line)
733
734
734 if not match:
735 if not match:
735 break
736 break
736
737
737 gr = match.groups()
738 gr = match.groups()
738 (old_line, old_end,
739 (old_line, old_end,
739 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
740 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
740
741
741 lines = []
742 lines = []
742 hunk = {
743 hunk = {
743 'section_header': gr[-1],
744 'section_header': gr[-1],
744 'source_start': old_line,
745 'source_start': old_line,
745 'source_length': old_end,
746 'source_length': old_end,
746 'target_start': new_line,
747 'target_start': new_line,
747 'target_length': new_end,
748 'target_length': new_end,
748 'lines': lines,
749 'lines': lines,
749 }
750 }
750 chunks.append(hunk)
751 chunks.append(hunk)
751
752
752 old_line -= 1
753 old_line -= 1
753 new_line -= 1
754 new_line -= 1
754
755
755 context = len(gr) == 5
756 context = len(gr) == 5
756 old_end += old_line
757 old_end += old_line
757 new_end += new_line
758 new_end += new_line
758
759
759 line = diff_iter.next()
760 line = diff_iter.next()
760
761
761 while old_line < old_end or new_line < new_end:
762 while old_line < old_end or new_line < new_end:
762 command = ' '
763 command = ' '
763 if line:
764 if line:
764 command = line[0]
765 command = line[0]
765
766
766 affects_old = affects_new = False
767 affects_old = affects_new = False
767
768
768 # ignore those if we don't expect them
769 # ignore those if we don't expect them
769 if command in '#@':
770 if command in '#@':
770 continue
771 continue
771 elif command == '+':
772 elif command == '+':
772 affects_new = True
773 affects_new = True
773 action = Action.ADD
774 action = Action.ADD
774 stats[0] += 1
775 stats[0] += 1
775 elif command == '-':
776 elif command == '-':
776 affects_old = True
777 affects_old = True
777 action = Action.DELETE
778 action = Action.DELETE
778 stats[1] += 1
779 stats[1] += 1
779 else:
780 else:
780 affects_old = affects_new = True
781 affects_old = affects_new = True
781 action = Action.UNMODIFIED
782 action = Action.UNMODIFIED
782
783
783 if not self._newline_marker.match(line):
784 if not self._newline_marker.match(line):
784 old_line += affects_old
785 old_line += affects_old
785 new_line += affects_new
786 new_line += affects_new
786 lines.append({
787 lines.append({
787 'old_lineno': affects_old and old_line or '',
788 'old_lineno': affects_old and old_line or '',
788 'new_lineno': affects_new and new_line or '',
789 'new_lineno': affects_new and new_line or '',
789 'action': action,
790 'action': action,
790 'line': self._clean_line(line, command)
791 'line': self._clean_line(line, command)
791 })
792 })
792 raw_diff.append(line)
793 raw_diff.append(line)
793
794
794 line = diff_iter.next()
795 line = diff_iter.next()
795
796
796 if self._newline_marker.match(line):
797 if self._newline_marker.match(line):
797 # we need to append to lines, since this is not
798 # we need to append to lines, since this is not
798 # counted in the line specs of diff
799 # counted in the line specs of diff
799 if affects_old:
800 if affects_old:
800 action = Action.OLD_NO_NL
801 action = Action.OLD_NO_NL
801 elif affects_new:
802 elif affects_new:
802 action = Action.NEW_NO_NL
803 action = Action.NEW_NO_NL
803 else:
804 else:
804 raise Exception('invalid context for no newline')
805 raise Exception('invalid context for no newline')
805
806
806 lines.append({
807 lines.append({
807 'old_lineno': None,
808 'old_lineno': None,
808 'new_lineno': None,
809 'new_lineno': None,
809 'action': action,
810 'action': action,
810 'line': self._clean_line(line, command)
811 'line': self._clean_line(line, command)
811 })
812 })
812
813
813 except StopIteration:
814 except StopIteration:
814 pass
815 pass
815
816
816 return ''.join(raw_diff), chunks, stats
817 return ''.join(raw_diff), chunks, stats
817
818
818 def _safe_id(self, idstring):
819 def _safe_id(self, idstring):
819 """Make a string safe for including in an id attribute.
820 """Make a string safe for including in an id attribute.
820
821
821 The HTML spec says that id attributes 'must begin with
822 The HTML spec says that id attributes 'must begin with
822 a letter ([A-Za-z]) and may be followed by any number
823 a letter ([A-Za-z]) and may be followed by any number
823 of letters, digits ([0-9]), hyphens ("-"), underscores
824 of letters, digits ([0-9]), hyphens ("-"), underscores
824 ("_"), colons (":"), and periods (".")'. These regexps
825 ("_"), colons (":"), and periods (".")'. These regexps
825 are slightly over-zealous, in that they remove colons
826 are slightly over-zealous, in that they remove colons
826 and periods unnecessarily.
827 and periods unnecessarily.
827
828
828 Whitespace is transformed into underscores, and then
829 Whitespace is transformed into underscores, and then
829 anything which is not a hyphen or a character that
830 anything which is not a hyphen or a character that
830 matches \w (alphanumerics and underscore) is removed.
831 matches \w (alphanumerics and underscore) is removed.
831
832
832 """
833 """
833 # Transform all whitespace to underscore
834 # Transform all whitespace to underscore
834 idstring = re.sub(r'\s', "_", '%s' % idstring)
835 idstring = re.sub(r'\s', "_", '%s' % idstring)
835 # Remove everything that is not a hyphen or a member of \w
836 # Remove everything that is not a hyphen or a member of \w
836 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
837 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
837 return idstring
838 return idstring
838
839
839 @classmethod
840 @classmethod
840 def diff_splitter(cls, string):
841 def diff_splitter(cls, string):
841 """
842 """
842 Diff split that emulates .splitlines() but works only on \n
843 Diff split that emulates .splitlines() but works only on \n
843 """
844 """
844 if not string:
845 if not string:
845 return
846 return
846 elif string == '\n':
847 elif string == '\n':
847 yield u'\n'
848 yield u'\n'
848 else:
849 else:
849
850
850 has_newline = string.endswith('\n')
851 has_newline = string.endswith('\n')
851 elements = string.split('\n')
852 elements = string.split('\n')
852 if has_newline:
853 if has_newline:
853 # skip last element as it's empty string from newlines
854 # skip last element as it's empty string from newlines
854 elements = elements[:-1]
855 elements = elements[:-1]
855
856
856 len_elements = len(elements)
857 len_elements = len(elements)
857
858
858 for cnt, line in enumerate(elements, start=1):
859 for cnt, line in enumerate(elements, start=1):
859 last_line = cnt == len_elements
860 last_line = cnt == len_elements
860 if last_line and not has_newline:
861 if last_line and not has_newline:
861 yield safe_unicode(line)
862 yield safe_unicode(line)
862 else:
863 else:
863 yield safe_unicode(line) + '\n'
864 yield safe_unicode(line) + '\n'
864
865
865 def prepare(self, inline_diff=True):
866 def prepare(self, inline_diff=True):
866 """
867 """
867 Prepare the passed udiff for HTML rendering.
868 Prepare the passed udiff for HTML rendering.
868
869
869 :return: A list of dicts with diff information.
870 :return: A list of dicts with diff information.
870 """
871 """
871 parsed = self._parser(inline_diff=inline_diff)
872 parsed = self._parser(inline_diff=inline_diff)
872 self.parsed = True
873 self.parsed = True
873 self.parsed_diff = parsed
874 self.parsed_diff = parsed
874 return parsed
875 return parsed
875
876
876 def as_raw(self, diff_lines=None):
877 def as_raw(self, diff_lines=None):
877 """
878 """
878 Returns raw diff as a byte string
879 Returns raw diff as a byte string
879 """
880 """
880 return self._diff.raw
881 return self._diff.raw
881
882
882 def as_html(self, table_class='code-difftable', line_class='line',
883 def as_html(self, table_class='code-difftable', line_class='line',
883 old_lineno_class='lineno old', new_lineno_class='lineno new',
884 old_lineno_class='lineno old', new_lineno_class='lineno new',
884 code_class='code', enable_comments=False, parsed_lines=None):
885 code_class='code', enable_comments=False, parsed_lines=None):
885 """
886 """
886 Return given diff as html table with customized css classes
887 Return given diff as html table with customized css classes
887 """
888 """
888 # TODO(marcink): not sure how to pass in translator
889 # TODO(marcink): not sure how to pass in translator
889 # here in an efficient way, leave the _ for proper gettext extraction
890 # here in an efficient way, leave the _ for proper gettext extraction
890 _ = lambda s: s
891 _ = lambda s: s
891
892
892 def _link_to_if(condition, label, url):
893 def _link_to_if(condition, label, url):
893 """
894 """
894 Generates a link if condition is meet or just the label if not.
895 Generates a link if condition is meet or just the label if not.
895 """
896 """
896
897
897 if condition:
898 if condition:
898 return '''<a href="%(url)s" class="tooltip"
899 return '''<a href="%(url)s" class="tooltip"
899 title="%(title)s">%(label)s</a>''' % {
900 title="%(title)s">%(label)s</a>''' % {
900 'title': _('Click to select line'),
901 'title': _('Click to select line'),
901 'url': url,
902 'url': url,
902 'label': label
903 'label': label
903 }
904 }
904 else:
905 else:
905 return label
906 return label
906 if not self.parsed:
907 if not self.parsed:
907 self.prepare()
908 self.prepare()
908
909
909 diff_lines = self.parsed_diff
910 diff_lines = self.parsed_diff
910 if parsed_lines:
911 if parsed_lines:
911 diff_lines = parsed_lines
912 diff_lines = parsed_lines
912
913
913 _html_empty = True
914 _html_empty = True
914 _html = []
915 _html = []
915 _html.append('''<table class="%(table_class)s">\n''' % {
916 _html.append('''<table class="%(table_class)s">\n''' % {
916 'table_class': table_class
917 'table_class': table_class
917 })
918 })
918
919
919 for diff in diff_lines:
920 for diff in diff_lines:
920 for line in diff['chunks']:
921 for line in diff['chunks']:
921 _html_empty = False
922 _html_empty = False
922 for change in line:
923 for change in line:
923 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
924 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
924 'lc': line_class,
925 'lc': line_class,
925 'action': change['action']
926 'action': change['action']
926 })
927 })
927 anchor_old_id = ''
928 anchor_old_id = ''
928 anchor_new_id = ''
929 anchor_new_id = ''
929 anchor_old = "%(filename)s_o%(oldline_no)s" % {
930 anchor_old = "%(filename)s_o%(oldline_no)s" % {
930 'filename': self._safe_id(diff['filename']),
931 'filename': self._safe_id(diff['filename']),
931 'oldline_no': change['old_lineno']
932 'oldline_no': change['old_lineno']
932 }
933 }
933 anchor_new = "%(filename)s_n%(oldline_no)s" % {
934 anchor_new = "%(filename)s_n%(oldline_no)s" % {
934 'filename': self._safe_id(diff['filename']),
935 'filename': self._safe_id(diff['filename']),
935 'oldline_no': change['new_lineno']
936 'oldline_no': change['new_lineno']
936 }
937 }
937 cond_old = (change['old_lineno'] != '...' and
938 cond_old = (change['old_lineno'] != '...' and
938 change['old_lineno'])
939 change['old_lineno'])
939 cond_new = (change['new_lineno'] != '...' and
940 cond_new = (change['new_lineno'] != '...' and
940 change['new_lineno'])
941 change['new_lineno'])
941 if cond_old:
942 if cond_old:
942 anchor_old_id = 'id="%s"' % anchor_old
943 anchor_old_id = 'id="%s"' % anchor_old
943 if cond_new:
944 if cond_new:
944 anchor_new_id = 'id="%s"' % anchor_new
945 anchor_new_id = 'id="%s"' % anchor_new
945
946
946 if change['action'] != Action.CONTEXT:
947 if change['action'] != Action.CONTEXT:
947 anchor_link = True
948 anchor_link = True
948 else:
949 else:
949 anchor_link = False
950 anchor_link = False
950
951
951 ###########################################################
952 ###########################################################
952 # COMMENT ICONS
953 # COMMENT ICONS
953 ###########################################################
954 ###########################################################
954 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
955 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
955
956
956 if enable_comments and change['action'] != Action.CONTEXT:
957 if enable_comments and change['action'] != Action.CONTEXT:
957 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
958 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
958
959
959 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
960 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
960
961
961 ###########################################################
962 ###########################################################
962 # OLD LINE NUMBER
963 # OLD LINE NUMBER
963 ###########################################################
964 ###########################################################
964 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
965 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
965 'a_id': anchor_old_id,
966 'a_id': anchor_old_id,
966 'olc': old_lineno_class
967 'olc': old_lineno_class
967 })
968 })
968
969
969 _html.append('''%(link)s''' % {
970 _html.append('''%(link)s''' % {
970 'link': _link_to_if(anchor_link, change['old_lineno'],
971 'link': _link_to_if(anchor_link, change['old_lineno'],
971 '#%s' % anchor_old)
972 '#%s' % anchor_old)
972 })
973 })
973 _html.append('''</td>\n''')
974 _html.append('''</td>\n''')
974 ###########################################################
975 ###########################################################
975 # NEW LINE NUMBER
976 # NEW LINE NUMBER
976 ###########################################################
977 ###########################################################
977
978
978 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
979 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
979 'a_id': anchor_new_id,
980 'a_id': anchor_new_id,
980 'nlc': new_lineno_class
981 'nlc': new_lineno_class
981 })
982 })
982
983
983 _html.append('''%(link)s''' % {
984 _html.append('''%(link)s''' % {
984 'link': _link_to_if(anchor_link, change['new_lineno'],
985 'link': _link_to_if(anchor_link, change['new_lineno'],
985 '#%s' % anchor_new)
986 '#%s' % anchor_new)
986 })
987 })
987 _html.append('''</td>\n''')
988 _html.append('''</td>\n''')
988 ###########################################################
989 ###########################################################
989 # CODE
990 # CODE
990 ###########################################################
991 ###########################################################
991 code_classes = [code_class]
992 code_classes = [code_class]
992 if (not enable_comments or
993 if (not enable_comments or
993 change['action'] == Action.CONTEXT):
994 change['action'] == Action.CONTEXT):
994 code_classes.append('no-comment')
995 code_classes.append('no-comment')
995 _html.append('\t<td class="%s">' % ' '.join(code_classes))
996 _html.append('\t<td class="%s">' % ' '.join(code_classes))
996 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
997 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
997 'code': change['line']
998 'code': change['line']
998 })
999 })
999
1000
1000 _html.append('''\t</td>''')
1001 _html.append('''\t</td>''')
1001 _html.append('''\n</tr>\n''')
1002 _html.append('''\n</tr>\n''')
1002 _html.append('''</table>''')
1003 _html.append('''</table>''')
1003 if _html_empty:
1004 if _html_empty:
1004 return None
1005 return None
1005 return ''.join(_html)
1006 return ''.join(_html)
1006
1007
1007 def stat(self):
1008 def stat(self):
1008 """
1009 """
1009 Returns tuple of added, and removed lines for this instance
1010 Returns tuple of added, and removed lines for this instance
1010 """
1011 """
1011 return self.adds, self.removes
1012 return self.adds, self.removes
1012
1013
1013 def get_context_of_line(
1014 def get_context_of_line(
1014 self, path, diff_line=None, context_before=3, context_after=3):
1015 self, path, diff_line=None, context_before=3, context_after=3):
1015 """
1016 """
1016 Returns the context lines for the specified diff line.
1017 Returns the context lines for the specified diff line.
1017
1018
1018 :type diff_line: :class:`DiffLineNumber`
1019 :type diff_line: :class:`DiffLineNumber`
1019 """
1020 """
1020 assert self.parsed, "DiffProcessor is not initialized."
1021 assert self.parsed, "DiffProcessor is not initialized."
1021
1022
1022 if None not in diff_line:
1023 if None not in diff_line:
1023 raise ValueError(
1024 raise ValueError(
1024 "Cannot specify both line numbers: {}".format(diff_line))
1025 "Cannot specify both line numbers: {}".format(diff_line))
1025
1026
1026 file_diff = self._get_file_diff(path)
1027 file_diff = self._get_file_diff(path)
1027 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1028 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1028
1029
1029 first_line_to_include = max(idx - context_before, 0)
1030 first_line_to_include = max(idx - context_before, 0)
1030 first_line_after_context = idx + context_after + 1
1031 first_line_after_context = idx + context_after + 1
1031 context_lines = chunk[first_line_to_include:first_line_after_context]
1032 context_lines = chunk[first_line_to_include:first_line_after_context]
1032
1033
1033 line_contents = [
1034 line_contents = [
1034 _context_line(line) for line in context_lines
1035 _context_line(line) for line in context_lines
1035 if _is_diff_content(line)]
1036 if _is_diff_content(line)]
1036 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1037 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1037 # Once they are fixed, we can drop this line here.
1038 # Once they are fixed, we can drop this line here.
1038 if line_contents:
1039 if line_contents:
1039 line_contents[-1] = (
1040 line_contents[-1] = (
1040 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1041 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1041 return line_contents
1042 return line_contents
1042
1043
1043 def find_context(self, path, context, offset=0):
1044 def find_context(self, path, context, offset=0):
1044 """
1045 """
1045 Finds the given `context` inside of the diff.
1046 Finds the given `context` inside of the diff.
1046
1047
1047 Use the parameter `offset` to specify which offset the target line has
1048 Use the parameter `offset` to specify which offset the target line has
1048 inside of the given `context`. This way the correct diff line will be
1049 inside of the given `context`. This way the correct diff line will be
1049 returned.
1050 returned.
1050
1051
1051 :param offset: Shall be used to specify the offset of the main line
1052 :param offset: Shall be used to specify the offset of the main line
1052 within the given `context`.
1053 within the given `context`.
1053 """
1054 """
1054 if offset < 0 or offset >= len(context):
1055 if offset < 0 or offset >= len(context):
1055 raise ValueError(
1056 raise ValueError(
1056 "Only positive values up to the length of the context "
1057 "Only positive values up to the length of the context "
1057 "minus one are allowed.")
1058 "minus one are allowed.")
1058
1059
1059 matches = []
1060 matches = []
1060 file_diff = self._get_file_diff(path)
1061 file_diff = self._get_file_diff(path)
1061
1062
1062 for chunk in file_diff['chunks']:
1063 for chunk in file_diff['chunks']:
1063 context_iter = iter(context)
1064 context_iter = iter(context)
1064 for line_idx, line in enumerate(chunk):
1065 for line_idx, line in enumerate(chunk):
1065 try:
1066 try:
1066 if _context_line(line) == context_iter.next():
1067 if _context_line(line) == context_iter.next():
1067 continue
1068 continue
1068 except StopIteration:
1069 except StopIteration:
1069 matches.append((line_idx, chunk))
1070 matches.append((line_idx, chunk))
1070 context_iter = iter(context)
1071 context_iter = iter(context)
1071
1072
1072 # Increment position and triger StopIteration
1073 # Increment position and triger StopIteration
1073 # if we had a match at the end
1074 # if we had a match at the end
1074 line_idx += 1
1075 line_idx += 1
1075 try:
1076 try:
1076 context_iter.next()
1077 context_iter.next()
1077 except StopIteration:
1078 except StopIteration:
1078 matches.append((line_idx, chunk))
1079 matches.append((line_idx, chunk))
1079
1080
1080 effective_offset = len(context) - offset
1081 effective_offset = len(context) - offset
1081 found_at_diff_lines = [
1082 found_at_diff_lines = [
1082 _line_to_diff_line_number(chunk[idx - effective_offset])
1083 _line_to_diff_line_number(chunk[idx - effective_offset])
1083 for idx, chunk in matches]
1084 for idx, chunk in matches]
1084
1085
1085 return found_at_diff_lines
1086 return found_at_diff_lines
1086
1087
1087 def _get_file_diff(self, path):
1088 def _get_file_diff(self, path):
1088 for file_diff in self.parsed_diff:
1089 for file_diff in self.parsed_diff:
1089 if file_diff['filename'] == path:
1090 if file_diff['filename'] == path:
1090 break
1091 break
1091 else:
1092 else:
1092 raise FileNotInDiffException("File {} not in diff".format(path))
1093 raise FileNotInDiffException("File {} not in diff".format(path))
1093 return file_diff
1094 return file_diff
1094
1095
1095 def _find_chunk_line_index(self, file_diff, diff_line):
1096 def _find_chunk_line_index(self, file_diff, diff_line):
1096 for chunk in file_diff['chunks']:
1097 for chunk in file_diff['chunks']:
1097 for idx, line in enumerate(chunk):
1098 for idx, line in enumerate(chunk):
1098 if line['old_lineno'] == diff_line.old:
1099 if line['old_lineno'] == diff_line.old:
1099 return chunk, idx
1100 return chunk, idx
1100 if line['new_lineno'] == diff_line.new:
1101 if line['new_lineno'] == diff_line.new:
1101 return chunk, idx
1102 return chunk, idx
1102 raise LineNotInDiffException(
1103 raise LineNotInDiffException(
1103 "The line {} is not part of the diff.".format(diff_line))
1104 "The line {} is not part of the diff.".format(diff_line))
1104
1105
1105
1106
1106 def _is_diff_content(line):
1107 def _is_diff_content(line):
1107 return line['action'] in (
1108 return line['action'] in (
1108 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1109 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1109
1110
1110
1111
1111 def _context_line(line):
1112 def _context_line(line):
1112 return (line['action'], line['line'])
1113 return (line['action'], line['line'])
1113
1114
1114
1115
1115 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1116 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1116
1117
1117
1118
1118 def _line_to_diff_line_number(line):
1119 def _line_to_diff_line_number(line):
1119 new_line_no = line['new_lineno'] or None
1120 new_line_no = line['new_lineno'] or None
1120 old_line_no = line['old_lineno'] or None
1121 old_line_no = line['old_lineno'] or None
1121 return DiffLineNumber(old=old_line_no, new=new_line_no)
1122 return DiffLineNumber(old=old_line_no, new=new_line_no)
1122
1123
1123
1124
1124 class FileNotInDiffException(Exception):
1125 class FileNotInDiffException(Exception):
1125 """
1126 """
1126 Raised when the context for a missing file is requested.
1127 Raised when the context for a missing file is requested.
1127
1128
1128 If you request the context for a line in a file which is not part of the
1129 If you request the context for a line in a file which is not part of the
1129 given diff, then this exception is raised.
1130 given diff, then this exception is raised.
1130 """
1131 """
1131
1132
1132
1133
1133 class LineNotInDiffException(Exception):
1134 class LineNotInDiffException(Exception):
1134 """
1135 """
1135 Raised when the context for a missing line is requested.
1136 Raised when the context for a missing line is requested.
1136
1137
1137 If you request the context for a line in a file and this line is not
1138 If you request the context for a line in a file and this line is not
1138 part of the given diff, then this exception is raised.
1139 part of the given diff, then this exception is raised.
1139 """
1140 """
1140
1141
1141
1142
1142 class DiffLimitExceeded(Exception):
1143 class DiffLimitExceeded(Exception):
1143 pass
1144 pass
1144
1145
1145
1146
1146 # NOTE(marcink): if diffs.mako change, probably this
1147 # NOTE(marcink): if diffs.mako change, probably this
1147 # needs a bump to next version
1148 # needs a bump to next version
1148 CURRENT_DIFF_VERSION = 'v4'
1149 CURRENT_DIFF_VERSION = 'v4'
1149
1150
1150
1151
1151 def _cleanup_cache_file(cached_diff_file):
1152 def _cleanup_cache_file(cached_diff_file):
1152 # cleanup file to not store it "damaged"
1153 # cleanup file to not store it "damaged"
1153 try:
1154 try:
1154 os.remove(cached_diff_file)
1155 os.remove(cached_diff_file)
1155 except Exception:
1156 except Exception:
1156 log.exception('Failed to cleanup path %s', cached_diff_file)
1157 log.exception('Failed to cleanup path %s', cached_diff_file)
1157
1158
1158
1159
1159 def cache_diff(cached_diff_file, diff, commits):
1160 def cache_diff(cached_diff_file, diff, commits):
1160
1161
1161 struct = {
1162 struct = {
1162 'version': CURRENT_DIFF_VERSION,
1163 'version': CURRENT_DIFF_VERSION,
1163 'diff': diff,
1164 'diff': diff,
1164 'commits': commits
1165 'commits': commits
1165 }
1166 }
1166
1167
1167 try:
1168 try:
1168 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1169 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1169 pickle.dump(struct, f)
1170 pickle.dump(struct, f)
1170 log.debug('Saved diff cache under %s', cached_diff_file)
1171 log.debug('Saved diff cache under %s', cached_diff_file)
1171 except Exception:
1172 except Exception:
1172 log.warn('Failed to save cache', exc_info=True)
1173 log.warn('Failed to save cache', exc_info=True)
1173 _cleanup_cache_file(cached_diff_file)
1174 _cleanup_cache_file(cached_diff_file)
1174
1175
1175
1176
1176 def load_cached_diff(cached_diff_file):
1177 def load_cached_diff(cached_diff_file):
1177
1178
1178 default_struct = {
1179 default_struct = {
1179 'version': CURRENT_DIFF_VERSION,
1180 'version': CURRENT_DIFF_VERSION,
1180 'diff': None,
1181 'diff': None,
1181 'commits': None
1182 'commits': None
1182 }
1183 }
1183
1184
1184 has_cache = os.path.isfile(cached_diff_file)
1185 has_cache = os.path.isfile(cached_diff_file)
1185 if not has_cache:
1186 if not has_cache:
1186 return default_struct
1187 return default_struct
1187
1188
1188 data = None
1189 data = None
1190 start = time.time()
1189 try:
1191 try:
1190 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1192 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1191 data = pickle.load(f)
1193 data = pickle.load(f)
1192 log.debug('Loaded diff cache from %s', cached_diff_file)
1194 load_time = time.time() - start
1195 log.debug('Loaded diff cache from %s in %.3fs', cached_diff_file, load_time)
1193 except Exception:
1196 except Exception:
1194 log.warn('Failed to read diff cache file', exc_info=True)
1197 log.warn('Failed to read diff cache file', exc_info=True)
1195
1198
1196 if not data:
1199 if not data:
1197 data = default_struct
1200 data = default_struct
1198
1201
1199 if not isinstance(data, dict):
1202 if not isinstance(data, dict):
1200 # old version of data ?
1203 # old version of data ?
1201 data = default_struct
1204 data = default_struct
1202
1205
1203 # check version
1206 # check version
1204 if data.get('version') != CURRENT_DIFF_VERSION:
1207 if data.get('version') != CURRENT_DIFF_VERSION:
1205 # purge cache
1208 # purge cache
1206 _cleanup_cache_file(cached_diff_file)
1209 _cleanup_cache_file(cached_diff_file)
1207 return default_struct
1210 return default_struct
1208
1211
1209 return data
1212 return data
1210
1213
1211
1214
1212 def generate_diff_cache_key(*args):
1215 def generate_diff_cache_key(*args):
1213 """
1216 """
1214 Helper to generate a cache key using arguments
1217 Helper to generate a cache key using arguments
1215 """
1218 """
1216 def arg_mapper(input_param):
1219 def arg_mapper(input_param):
1217 input_param = safe_str(input_param)
1220 input_param = safe_str(input_param)
1218 # we cannot allow '/' in arguments since it would allow
1221 # we cannot allow '/' in arguments since it would allow
1219 # subdirectory usage
1222 # subdirectory usage
1220 input_param.replace('/', '_')
1223 input_param.replace('/', '_')
1221 return input_param or None # prevent empty string arguments
1224 return input_param or None # prevent empty string arguments
1222
1225
1223 return '_'.join([
1226 return '_'.join([
1224 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1227 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1225
1228
1226
1229
1227 def diff_cache_exist(cache_storage, *args):
1230 def diff_cache_exist(cache_storage, *args):
1228 """
1231 """
1229 Based on all generated arguments check and return a cache path
1232 Based on all generated arguments check and return a cache path
1230 """
1233 """
1231 cache_key = generate_diff_cache_key(*args)
1234 cache_key = generate_diff_cache_key(*args)
1232 cache_file_path = os.path.join(cache_storage, cache_key)
1235 cache_file_path = os.path.join(cache_storage, cache_key)
1233 # prevent path traversal attacks using some param that have e.g '../../'
1236 # prevent path traversal attacks using some param that have e.g '../../'
1234 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1237 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1235 raise ValueError('Final path must be within {}'.format(cache_storage))
1238 raise ValueError('Final path must be within {}'.format(cache_storage))
1236
1239
1237 return cache_file_path
1240 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now