##// END OF EJS Templates
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty
marcink -
r3854:7b87073e default
parent child Browse files
Show More
@@ -1,1254 +1,1271 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2019 RhodeCode GmbH
3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import os
26 import os
27 import re
27 import re
28 import bz2
28 import bz2
29 import gzip
29 import time
30 import time
30
31
31 import collections
32 import collections
32 import difflib
33 import difflib
33 import logging
34 import logging
34 import cPickle as pickle
35 import cPickle as pickle
35 from itertools import tee, imap
36 from itertools import tee, imap
36
37
37 from rhodecode.lib.vcs.exceptions import VCSError
38 from rhodecode.lib.vcs.exceptions import VCSError
38 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
39 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
39 from rhodecode.lib.utils2 import safe_unicode, safe_str
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
40
41
41 log = logging.getLogger(__name__)
42 log = logging.getLogger(__name__)
42
43
43 # define max context, a file with more than this numbers of lines is unusable
44 # define max context, a file with more than this numbers of lines is unusable
44 # in browser anyway
45 # in browser anyway
45 MAX_CONTEXT = 20 * 1024
46 MAX_CONTEXT = 20 * 1024
46 DEFAULT_CONTEXT = 3
47 DEFAULT_CONTEXT = 3
47
48
48
49
49 def get_diff_context(request):
50 def get_diff_context(request):
50 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
51 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
51
52
52
53
53 def get_diff_whitespace_flag(request):
54 def get_diff_whitespace_flag(request):
54 return request.GET.get('ignorews', '') == '1'
55 return request.GET.get('ignorews', '') == '1'
55
56
56
57
57 class OPS(object):
58 class OPS(object):
58 ADD = 'A'
59 ADD = 'A'
59 MOD = 'M'
60 MOD = 'M'
60 DEL = 'D'
61 DEL = 'D'
61
62
62
63
63 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
64 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
64 """
65 """
65 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
66 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
66
67
67 :param ignore_whitespace: ignore whitespaces in diff
68 :param ignore_whitespace: ignore whitespaces in diff
68 """
69 """
69 # make sure we pass in default context
70 # make sure we pass in default context
70 context = context or 3
71 context = context or 3
71 # protect against IntOverflow when passing HUGE context
72 # protect against IntOverflow when passing HUGE context
72 if context > MAX_CONTEXT:
73 if context > MAX_CONTEXT:
73 context = MAX_CONTEXT
74 context = MAX_CONTEXT
74
75
75 submodules = filter(lambda o: isinstance(o, SubModuleNode),
76 submodules = filter(lambda o: isinstance(o, SubModuleNode),
76 [filenode_new, filenode_old])
77 [filenode_new, filenode_old])
77 if submodules:
78 if submodules:
78 return ''
79 return ''
79
80
80 for filenode in (filenode_old, filenode_new):
81 for filenode in (filenode_old, filenode_new):
81 if not isinstance(filenode, FileNode):
82 if not isinstance(filenode, FileNode):
82 raise VCSError(
83 raise VCSError(
83 "Given object should be FileNode object, not %s"
84 "Given object should be FileNode object, not %s"
84 % filenode.__class__)
85 % filenode.__class__)
85
86
86 repo = filenode_new.commit.repository
87 repo = filenode_new.commit.repository
87 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
88 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
88 new_commit = filenode_new.commit
89 new_commit = filenode_new.commit
89
90
90 vcs_gitdiff = repo.get_diff(
91 vcs_gitdiff = repo.get_diff(
91 old_commit, new_commit, filenode_new.path,
92 old_commit, new_commit, filenode_new.path,
92 ignore_whitespace, context, path1=filenode_old.path)
93 ignore_whitespace, context, path1=filenode_old.path)
93 return vcs_gitdiff
94 return vcs_gitdiff
94
95
95 NEW_FILENODE = 1
96 NEW_FILENODE = 1
96 DEL_FILENODE = 2
97 DEL_FILENODE = 2
97 MOD_FILENODE = 3
98 MOD_FILENODE = 3
98 RENAMED_FILENODE = 4
99 RENAMED_FILENODE = 4
99 COPIED_FILENODE = 5
100 COPIED_FILENODE = 5
100 CHMOD_FILENODE = 6
101 CHMOD_FILENODE = 6
101 BIN_FILENODE = 7
102 BIN_FILENODE = 7
102
103
103
104
104 class LimitedDiffContainer(object):
105 class LimitedDiffContainer(object):
105
106
106 def __init__(self, diff_limit, cur_diff_size, diff):
107 def __init__(self, diff_limit, cur_diff_size, diff):
107 self.diff = diff
108 self.diff = diff
108 self.diff_limit = diff_limit
109 self.diff_limit = diff_limit
109 self.cur_diff_size = cur_diff_size
110 self.cur_diff_size = cur_diff_size
110
111
111 def __getitem__(self, key):
112 def __getitem__(self, key):
112 return self.diff.__getitem__(key)
113 return self.diff.__getitem__(key)
113
114
114 def __iter__(self):
115 def __iter__(self):
115 for l in self.diff:
116 for l in self.diff:
116 yield l
117 yield l
117
118
118
119
119 class Action(object):
120 class Action(object):
120 """
121 """
121 Contains constants for the action value of the lines in a parsed diff.
122 Contains constants for the action value of the lines in a parsed diff.
122 """
123 """
123
124
124 ADD = 'add'
125 ADD = 'add'
125 DELETE = 'del'
126 DELETE = 'del'
126 UNMODIFIED = 'unmod'
127 UNMODIFIED = 'unmod'
127
128
128 CONTEXT = 'context'
129 CONTEXT = 'context'
129 OLD_NO_NL = 'old-no-nl'
130 OLD_NO_NL = 'old-no-nl'
130 NEW_NO_NL = 'new-no-nl'
131 NEW_NO_NL = 'new-no-nl'
131
132
132
133
133 class DiffProcessor(object):
134 class DiffProcessor(object):
134 """
135 """
135 Give it a unified or git diff and it returns a list of the files that were
136 Give it a unified or git diff and it returns a list of the files that were
136 mentioned in the diff together with a dict of meta information that
137 mentioned in the diff together with a dict of meta information that
137 can be used to render it in a HTML template.
138 can be used to render it in a HTML template.
138
139
139 .. note:: Unicode handling
140 .. note:: Unicode handling
140
141
141 The original diffs are a byte sequence and can contain filenames
142 The original diffs are a byte sequence and can contain filenames
142 in mixed encodings. This class generally returns `unicode` objects
143 in mixed encodings. This class generally returns `unicode` objects
143 since the result is intended for presentation to the user.
144 since the result is intended for presentation to the user.
144
145
145 """
146 """
146 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
147 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
147 _newline_marker = re.compile(r'^\\ No newline at end of file')
148 _newline_marker = re.compile(r'^\\ No newline at end of file')
148
149
149 # used for inline highlighter word split
150 # used for inline highlighter word split
150 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
151 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
151
152
152 # collapse ranges of commits over given number
153 # collapse ranges of commits over given number
153 _collapse_commits_over = 5
154 _collapse_commits_over = 5
154
155
155 def __init__(self, diff, format='gitdiff', diff_limit=None,
156 def __init__(self, diff, format='gitdiff', diff_limit=None,
156 file_limit=None, show_full_diff=True):
157 file_limit=None, show_full_diff=True):
157 """
158 """
158 :param diff: A `Diff` object representing a diff from a vcs backend
159 :param diff: A `Diff` object representing a diff from a vcs backend
159 :param format: format of diff passed, `udiff` or `gitdiff`
160 :param format: format of diff passed, `udiff` or `gitdiff`
160 :param diff_limit: define the size of diff that is considered "big"
161 :param diff_limit: define the size of diff that is considered "big"
161 based on that parameter cut off will be triggered, set to None
162 based on that parameter cut off will be triggered, set to None
162 to show full diff
163 to show full diff
163 """
164 """
164 self._diff = diff
165 self._diff = diff
165 self._format = format
166 self._format = format
166 self.adds = 0
167 self.adds = 0
167 self.removes = 0
168 self.removes = 0
168 # calculate diff size
169 # calculate diff size
169 self.diff_limit = diff_limit
170 self.diff_limit = diff_limit
170 self.file_limit = file_limit
171 self.file_limit = file_limit
171 self.show_full_diff = show_full_diff
172 self.show_full_diff = show_full_diff
172 self.cur_diff_size = 0
173 self.cur_diff_size = 0
173 self.parsed = False
174 self.parsed = False
174 self.parsed_diff = []
175 self.parsed_diff = []
175
176
176 log.debug('Initialized DiffProcessor with %s mode', format)
177 log.debug('Initialized DiffProcessor with %s mode', format)
177 if format == 'gitdiff':
178 if format == 'gitdiff':
178 self.differ = self._highlight_line_difflib
179 self.differ = self._highlight_line_difflib
179 self._parser = self._parse_gitdiff
180 self._parser = self._parse_gitdiff
180 else:
181 else:
181 self.differ = self._highlight_line_udiff
182 self.differ = self._highlight_line_udiff
182 self._parser = self._new_parse_gitdiff
183 self._parser = self._new_parse_gitdiff
183
184
184 def _copy_iterator(self):
185 def _copy_iterator(self):
185 """
186 """
186 make a fresh copy of generator, we should not iterate thru
187 make a fresh copy of generator, we should not iterate thru
187 an original as it's needed for repeating operations on
188 an original as it's needed for repeating operations on
188 this instance of DiffProcessor
189 this instance of DiffProcessor
189 """
190 """
190 self.__udiff, iterator_copy = tee(self.__udiff)
191 self.__udiff, iterator_copy = tee(self.__udiff)
191 return iterator_copy
192 return iterator_copy
192
193
193 def _escaper(self, string):
194 def _escaper(self, string):
194 """
195 """
195 Escaper for diff escapes special chars and checks the diff limit
196 Escaper for diff escapes special chars and checks the diff limit
196
197
197 :param string:
198 :param string:
198 """
199 """
199 self.cur_diff_size += len(string)
200 self.cur_diff_size += len(string)
200
201
201 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
202 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
202 raise DiffLimitExceeded('Diff Limit Exceeded')
203 raise DiffLimitExceeded('Diff Limit Exceeded')
203
204
204 return string \
205 return string \
205 .replace('&', '&amp;')\
206 .replace('&', '&amp;')\
206 .replace('<', '&lt;')\
207 .replace('<', '&lt;')\
207 .replace('>', '&gt;')
208 .replace('>', '&gt;')
208
209
209 def _line_counter(self, l):
210 def _line_counter(self, l):
210 """
211 """
211 Checks each line and bumps total adds/removes for this diff
212 Checks each line and bumps total adds/removes for this diff
212
213
213 :param l:
214 :param l:
214 """
215 """
215 if l.startswith('+') and not l.startswith('+++'):
216 if l.startswith('+') and not l.startswith('+++'):
216 self.adds += 1
217 self.adds += 1
217 elif l.startswith('-') and not l.startswith('---'):
218 elif l.startswith('-') and not l.startswith('---'):
218 self.removes += 1
219 self.removes += 1
219 return safe_unicode(l)
220 return safe_unicode(l)
220
221
221 def _highlight_line_difflib(self, line, next_):
222 def _highlight_line_difflib(self, line, next_):
222 """
223 """
223 Highlight inline changes in both lines.
224 Highlight inline changes in both lines.
224 """
225 """
225
226
226 if line['action'] == Action.DELETE:
227 if line['action'] == Action.DELETE:
227 old, new = line, next_
228 old, new = line, next_
228 else:
229 else:
229 old, new = next_, line
230 old, new = next_, line
230
231
231 oldwords = self._token_re.split(old['line'])
232 oldwords = self._token_re.split(old['line'])
232 newwords = self._token_re.split(new['line'])
233 newwords = self._token_re.split(new['line'])
233 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
234 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
234
235
235 oldfragments, newfragments = [], []
236 oldfragments, newfragments = [], []
236 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
237 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
237 oldfrag = ''.join(oldwords[i1:i2])
238 oldfrag = ''.join(oldwords[i1:i2])
238 newfrag = ''.join(newwords[j1:j2])
239 newfrag = ''.join(newwords[j1:j2])
239 if tag != 'equal':
240 if tag != 'equal':
240 if oldfrag:
241 if oldfrag:
241 oldfrag = '<del>%s</del>' % oldfrag
242 oldfrag = '<del>%s</del>' % oldfrag
242 if newfrag:
243 if newfrag:
243 newfrag = '<ins>%s</ins>' % newfrag
244 newfrag = '<ins>%s</ins>' % newfrag
244 oldfragments.append(oldfrag)
245 oldfragments.append(oldfrag)
245 newfragments.append(newfrag)
246 newfragments.append(newfrag)
246
247
247 old['line'] = "".join(oldfragments)
248 old['line'] = "".join(oldfragments)
248 new['line'] = "".join(newfragments)
249 new['line'] = "".join(newfragments)
249
250
250 def _highlight_line_udiff(self, line, next_):
251 def _highlight_line_udiff(self, line, next_):
251 """
252 """
252 Highlight inline changes in both lines.
253 Highlight inline changes in both lines.
253 """
254 """
254 start = 0
255 start = 0
255 limit = min(len(line['line']), len(next_['line']))
256 limit = min(len(line['line']), len(next_['line']))
256 while start < limit and line['line'][start] == next_['line'][start]:
257 while start < limit and line['line'][start] == next_['line'][start]:
257 start += 1
258 start += 1
258 end = -1
259 end = -1
259 limit -= start
260 limit -= start
260 while -end <= limit and line['line'][end] == next_['line'][end]:
261 while -end <= limit and line['line'][end] == next_['line'][end]:
261 end -= 1
262 end -= 1
262 end += 1
263 end += 1
263 if start or end:
264 if start or end:
264 def do(l):
265 def do(l):
265 last = end + len(l['line'])
266 last = end + len(l['line'])
266 if l['action'] == Action.ADD:
267 if l['action'] == Action.ADD:
267 tag = 'ins'
268 tag = 'ins'
268 else:
269 else:
269 tag = 'del'
270 tag = 'del'
270 l['line'] = '%s<%s>%s</%s>%s' % (
271 l['line'] = '%s<%s>%s</%s>%s' % (
271 l['line'][:start],
272 l['line'][:start],
272 tag,
273 tag,
273 l['line'][start:last],
274 l['line'][start:last],
274 tag,
275 tag,
275 l['line'][last:]
276 l['line'][last:]
276 )
277 )
277 do(line)
278 do(line)
278 do(next_)
279 do(next_)
279
280
280 def _clean_line(self, line, command):
281 def _clean_line(self, line, command):
281 if command in ['+', '-', ' ']:
282 if command in ['+', '-', ' ']:
282 # only modify the line if it's actually a diff thing
283 # only modify the line if it's actually a diff thing
283 line = line[1:]
284 line = line[1:]
284 return line
285 return line
285
286
286 def _parse_gitdiff(self, inline_diff=True):
287 def _parse_gitdiff(self, inline_diff=True):
287 _files = []
288 _files = []
288 diff_container = lambda arg: arg
289 diff_container = lambda arg: arg
289
290
290 for chunk in self._diff.chunks():
291 for chunk in self._diff.chunks():
291 head = chunk.header
292 head = chunk.header
292
293
293 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
294 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
294 raw_diff = chunk.raw
295 raw_diff = chunk.raw
295 limited_diff = False
296 limited_diff = False
296 exceeds_limit = False
297 exceeds_limit = False
297
298
298 op = None
299 op = None
299 stats = {
300 stats = {
300 'added': 0,
301 'added': 0,
301 'deleted': 0,
302 'deleted': 0,
302 'binary': False,
303 'binary': False,
303 'ops': {},
304 'ops': {},
304 }
305 }
305
306
306 if head['deleted_file_mode']:
307 if head['deleted_file_mode']:
307 op = OPS.DEL
308 op = OPS.DEL
308 stats['binary'] = True
309 stats['binary'] = True
309 stats['ops'][DEL_FILENODE] = 'deleted file'
310 stats['ops'][DEL_FILENODE] = 'deleted file'
310
311
311 elif head['new_file_mode']:
312 elif head['new_file_mode']:
312 op = OPS.ADD
313 op = OPS.ADD
313 stats['binary'] = True
314 stats['binary'] = True
314 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
315 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
315 else: # modify operation, can be copy, rename or chmod
316 else: # modify operation, can be copy, rename or chmod
316
317
317 # CHMOD
318 # CHMOD
318 if head['new_mode'] and head['old_mode']:
319 if head['new_mode'] and head['old_mode']:
319 op = OPS.MOD
320 op = OPS.MOD
320 stats['binary'] = True
321 stats['binary'] = True
321 stats['ops'][CHMOD_FILENODE] = (
322 stats['ops'][CHMOD_FILENODE] = (
322 'modified file chmod %s => %s' % (
323 'modified file chmod %s => %s' % (
323 head['old_mode'], head['new_mode']))
324 head['old_mode'], head['new_mode']))
324 # RENAME
325 # RENAME
325 if head['rename_from'] != head['rename_to']:
326 if head['rename_from'] != head['rename_to']:
326 op = OPS.MOD
327 op = OPS.MOD
327 stats['binary'] = True
328 stats['binary'] = True
328 stats['ops'][RENAMED_FILENODE] = (
329 stats['ops'][RENAMED_FILENODE] = (
329 'file renamed from %s to %s' % (
330 'file renamed from %s to %s' % (
330 head['rename_from'], head['rename_to']))
331 head['rename_from'], head['rename_to']))
331 # COPY
332 # COPY
332 if head.get('copy_from') and head.get('copy_to'):
333 if head.get('copy_from') and head.get('copy_to'):
333 op = OPS.MOD
334 op = OPS.MOD
334 stats['binary'] = True
335 stats['binary'] = True
335 stats['ops'][COPIED_FILENODE] = (
336 stats['ops'][COPIED_FILENODE] = (
336 'file copied from %s to %s' % (
337 'file copied from %s to %s' % (
337 head['copy_from'], head['copy_to']))
338 head['copy_from'], head['copy_to']))
338
339
339 # If our new parsed headers didn't match anything fallback to
340 # If our new parsed headers didn't match anything fallback to
340 # old style detection
341 # old style detection
341 if op is None:
342 if op is None:
342 if not head['a_file'] and head['b_file']:
343 if not head['a_file'] and head['b_file']:
343 op = OPS.ADD
344 op = OPS.ADD
344 stats['binary'] = True
345 stats['binary'] = True
345 stats['ops'][NEW_FILENODE] = 'new file'
346 stats['ops'][NEW_FILENODE] = 'new file'
346
347
347 elif head['a_file'] and not head['b_file']:
348 elif head['a_file'] and not head['b_file']:
348 op = OPS.DEL
349 op = OPS.DEL
349 stats['binary'] = True
350 stats['binary'] = True
350 stats['ops'][DEL_FILENODE] = 'deleted file'
351 stats['ops'][DEL_FILENODE] = 'deleted file'
351
352
352 # it's not ADD not DELETE
353 # it's not ADD not DELETE
353 if op is None:
354 if op is None:
354 op = OPS.MOD
355 op = OPS.MOD
355 stats['binary'] = True
356 stats['binary'] = True
356 stats['ops'][MOD_FILENODE] = 'modified file'
357 stats['ops'][MOD_FILENODE] = 'modified file'
357
358
358 # a real non-binary diff
359 # a real non-binary diff
359 if head['a_file'] or head['b_file']:
360 if head['a_file'] or head['b_file']:
360 try:
361 try:
361 raw_diff, chunks, _stats = self._parse_lines(diff)
362 raw_diff, chunks, _stats = self._parse_lines(diff)
362 stats['binary'] = False
363 stats['binary'] = False
363 stats['added'] = _stats[0]
364 stats['added'] = _stats[0]
364 stats['deleted'] = _stats[1]
365 stats['deleted'] = _stats[1]
365 # explicit mark that it's a modified file
366 # explicit mark that it's a modified file
366 if op == OPS.MOD:
367 if op == OPS.MOD:
367 stats['ops'][MOD_FILENODE] = 'modified file'
368 stats['ops'][MOD_FILENODE] = 'modified file'
368 exceeds_limit = len(raw_diff) > self.file_limit
369 exceeds_limit = len(raw_diff) > self.file_limit
369
370
370 # changed from _escaper function so we validate size of
371 # changed from _escaper function so we validate size of
371 # each file instead of the whole diff
372 # each file instead of the whole diff
372 # diff will hide big files but still show small ones
373 # diff will hide big files but still show small ones
373 # from my tests, big files are fairly safe to be parsed
374 # from my tests, big files are fairly safe to be parsed
374 # but the browser is the bottleneck
375 # but the browser is the bottleneck
375 if not self.show_full_diff and exceeds_limit:
376 if not self.show_full_diff and exceeds_limit:
376 raise DiffLimitExceeded('File Limit Exceeded')
377 raise DiffLimitExceeded('File Limit Exceeded')
377
378
378 except DiffLimitExceeded:
379 except DiffLimitExceeded:
379 diff_container = lambda _diff: \
380 diff_container = lambda _diff: \
380 LimitedDiffContainer(
381 LimitedDiffContainer(
381 self.diff_limit, self.cur_diff_size, _diff)
382 self.diff_limit, self.cur_diff_size, _diff)
382
383
383 exceeds_limit = len(raw_diff) > self.file_limit
384 exceeds_limit = len(raw_diff) > self.file_limit
384 limited_diff = True
385 limited_diff = True
385 chunks = []
386 chunks = []
386
387
387 else: # GIT format binary patch, or possibly empty diff
388 else: # GIT format binary patch, or possibly empty diff
388 if head['bin_patch']:
389 if head['bin_patch']:
389 # we have operation already extracted, but we mark simply
390 # we have operation already extracted, but we mark simply
390 # it's a diff we wont show for binary files
391 # it's a diff we wont show for binary files
391 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
392 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
392 chunks = []
393 chunks = []
393
394
394 if chunks and not self.show_full_diff and op == OPS.DEL:
395 if chunks and not self.show_full_diff and op == OPS.DEL:
395 # if not full diff mode show deleted file contents
396 # if not full diff mode show deleted file contents
396 # TODO: anderson: if the view is not too big, there is no way
397 # TODO: anderson: if the view is not too big, there is no way
397 # to see the content of the file
398 # to see the content of the file
398 chunks = []
399 chunks = []
399
400
400 chunks.insert(0, [{
401 chunks.insert(0, [{
401 'old_lineno': '',
402 'old_lineno': '',
402 'new_lineno': '',
403 'new_lineno': '',
403 'action': Action.CONTEXT,
404 'action': Action.CONTEXT,
404 'line': msg,
405 'line': msg,
405 } for _op, msg in stats['ops'].iteritems()
406 } for _op, msg in stats['ops'].iteritems()
406 if _op not in [MOD_FILENODE]])
407 if _op not in [MOD_FILENODE]])
407
408
408 _files.append({
409 _files.append({
409 'filename': safe_unicode(head['b_path']),
410 'filename': safe_unicode(head['b_path']),
410 'old_revision': head['a_blob_id'],
411 'old_revision': head['a_blob_id'],
411 'new_revision': head['b_blob_id'],
412 'new_revision': head['b_blob_id'],
412 'chunks': chunks,
413 'chunks': chunks,
413 'raw_diff': safe_unicode(raw_diff),
414 'raw_diff': safe_unicode(raw_diff),
414 'operation': op,
415 'operation': op,
415 'stats': stats,
416 'stats': stats,
416 'exceeds_limit': exceeds_limit,
417 'exceeds_limit': exceeds_limit,
417 'is_limited_diff': limited_diff,
418 'is_limited_diff': limited_diff,
418 })
419 })
419
420
420 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
421 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
421 OPS.DEL: 2}.get(info['operation'])
422 OPS.DEL: 2}.get(info['operation'])
422
423
423 if not inline_diff:
424 if not inline_diff:
424 return diff_container(sorted(_files, key=sorter))
425 return diff_container(sorted(_files, key=sorter))
425
426
426 # highlight inline changes
427 # highlight inline changes
427 for diff_data in _files:
428 for diff_data in _files:
428 for chunk in diff_data['chunks']:
429 for chunk in diff_data['chunks']:
429 lineiter = iter(chunk)
430 lineiter = iter(chunk)
430 try:
431 try:
431 while 1:
432 while 1:
432 line = lineiter.next()
433 line = lineiter.next()
433 if line['action'] not in (
434 if line['action'] not in (
434 Action.UNMODIFIED, Action.CONTEXT):
435 Action.UNMODIFIED, Action.CONTEXT):
435 nextline = lineiter.next()
436 nextline = lineiter.next()
436 if nextline['action'] in ['unmod', 'context'] or \
437 if nextline['action'] in ['unmod', 'context'] or \
437 nextline['action'] == line['action']:
438 nextline['action'] == line['action']:
438 continue
439 continue
439 self.differ(line, nextline)
440 self.differ(line, nextline)
440 except StopIteration:
441 except StopIteration:
441 pass
442 pass
442
443
443 return diff_container(sorted(_files, key=sorter))
444 return diff_container(sorted(_files, key=sorter))
444
445
445 def _check_large_diff(self):
446 def _check_large_diff(self):
446 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
447 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
447 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
448 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
448 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
449 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
449
450
450 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
451 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
451 def _new_parse_gitdiff(self, inline_diff=True):
452 def _new_parse_gitdiff(self, inline_diff=True):
452 _files = []
453 _files = []
453
454
454 # this can be overriden later to a LimitedDiffContainer type
455 # this can be overriden later to a LimitedDiffContainer type
455 diff_container = lambda arg: arg
456 diff_container = lambda arg: arg
456
457
457 for chunk in self._diff.chunks():
458 for chunk in self._diff.chunks():
458 head = chunk.header
459 head = chunk.header
459 log.debug('parsing diff %r', head)
460 log.debug('parsing diff %r', head)
460
461
461 raw_diff = chunk.raw
462 raw_diff = chunk.raw
462 limited_diff = False
463 limited_diff = False
463 exceeds_limit = False
464 exceeds_limit = False
464
465
465 op = None
466 op = None
466 stats = {
467 stats = {
467 'added': 0,
468 'added': 0,
468 'deleted': 0,
469 'deleted': 0,
469 'binary': False,
470 'binary': False,
470 'old_mode': None,
471 'old_mode': None,
471 'new_mode': None,
472 'new_mode': None,
472 'ops': {},
473 'ops': {},
473 }
474 }
474 if head['old_mode']:
475 if head['old_mode']:
475 stats['old_mode'] = head['old_mode']
476 stats['old_mode'] = head['old_mode']
476 if head['new_mode']:
477 if head['new_mode']:
477 stats['new_mode'] = head['new_mode']
478 stats['new_mode'] = head['new_mode']
478 if head['b_mode']:
479 if head['b_mode']:
479 stats['new_mode'] = head['b_mode']
480 stats['new_mode'] = head['b_mode']
480
481
481 # delete file
482 # delete file
482 if head['deleted_file_mode']:
483 if head['deleted_file_mode']:
483 op = OPS.DEL
484 op = OPS.DEL
484 stats['binary'] = True
485 stats['binary'] = True
485 stats['ops'][DEL_FILENODE] = 'deleted file'
486 stats['ops'][DEL_FILENODE] = 'deleted file'
486
487
487 # new file
488 # new file
488 elif head['new_file_mode']:
489 elif head['new_file_mode']:
489 op = OPS.ADD
490 op = OPS.ADD
490 stats['binary'] = True
491 stats['binary'] = True
491 stats['old_mode'] = None
492 stats['old_mode'] = None
492 stats['new_mode'] = head['new_file_mode']
493 stats['new_mode'] = head['new_file_mode']
493 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
494 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
494
495
495 # modify operation, can be copy, rename or chmod
496 # modify operation, can be copy, rename or chmod
496 else:
497 else:
497 # CHMOD
498 # CHMOD
498 if head['new_mode'] and head['old_mode']:
499 if head['new_mode'] and head['old_mode']:
499 op = OPS.MOD
500 op = OPS.MOD
500 stats['binary'] = True
501 stats['binary'] = True
501 stats['ops'][CHMOD_FILENODE] = (
502 stats['ops'][CHMOD_FILENODE] = (
502 'modified file chmod %s => %s' % (
503 'modified file chmod %s => %s' % (
503 head['old_mode'], head['new_mode']))
504 head['old_mode'], head['new_mode']))
504
505
505 # RENAME
506 # RENAME
506 if head['rename_from'] != head['rename_to']:
507 if head['rename_from'] != head['rename_to']:
507 op = OPS.MOD
508 op = OPS.MOD
508 stats['binary'] = True
509 stats['binary'] = True
509 stats['renamed'] = (head['rename_from'], head['rename_to'])
510 stats['renamed'] = (head['rename_from'], head['rename_to'])
510 stats['ops'][RENAMED_FILENODE] = (
511 stats['ops'][RENAMED_FILENODE] = (
511 'file renamed from %s to %s' % (
512 'file renamed from %s to %s' % (
512 head['rename_from'], head['rename_to']))
513 head['rename_from'], head['rename_to']))
513 # COPY
514 # COPY
514 if head.get('copy_from') and head.get('copy_to'):
515 if head.get('copy_from') and head.get('copy_to'):
515 op = OPS.MOD
516 op = OPS.MOD
516 stats['binary'] = True
517 stats['binary'] = True
517 stats['copied'] = (head['copy_from'], head['copy_to'])
518 stats['copied'] = (head['copy_from'], head['copy_to'])
518 stats['ops'][COPIED_FILENODE] = (
519 stats['ops'][COPIED_FILENODE] = (
519 'file copied from %s to %s' % (
520 'file copied from %s to %s' % (
520 head['copy_from'], head['copy_to']))
521 head['copy_from'], head['copy_to']))
521
522
522 # If our new parsed headers didn't match anything fallback to
523 # If our new parsed headers didn't match anything fallback to
523 # old style detection
524 # old style detection
524 if op is None:
525 if op is None:
525 if not head['a_file'] and head['b_file']:
526 if not head['a_file'] and head['b_file']:
526 op = OPS.ADD
527 op = OPS.ADD
527 stats['binary'] = True
528 stats['binary'] = True
528 stats['new_file'] = True
529 stats['new_file'] = True
529 stats['ops'][NEW_FILENODE] = 'new file'
530 stats['ops'][NEW_FILENODE] = 'new file'
530
531
531 elif head['a_file'] and not head['b_file']:
532 elif head['a_file'] and not head['b_file']:
532 op = OPS.DEL
533 op = OPS.DEL
533 stats['binary'] = True
534 stats['binary'] = True
534 stats['ops'][DEL_FILENODE] = 'deleted file'
535 stats['ops'][DEL_FILENODE] = 'deleted file'
535
536
536 # it's not ADD not DELETE
537 # it's not ADD not DELETE
537 if op is None:
538 if op is None:
538 op = OPS.MOD
539 op = OPS.MOD
539 stats['binary'] = True
540 stats['binary'] = True
540 stats['ops'][MOD_FILENODE] = 'modified file'
541 stats['ops'][MOD_FILENODE] = 'modified file'
541
542
542 # a real non-binary diff
543 # a real non-binary diff
543 if head['a_file'] or head['b_file']:
544 if head['a_file'] or head['b_file']:
544 # simulate splitlines, so we keep the line end part
545 # simulate splitlines, so we keep the line end part
545 diff = self.diff_splitter(chunk.diff)
546 diff = self.diff_splitter(chunk.diff)
546
547
547 # append each file to the diff size
548 # append each file to the diff size
548 raw_chunk_size = len(raw_diff)
549 raw_chunk_size = len(raw_diff)
549
550
550 exceeds_limit = raw_chunk_size > self.file_limit
551 exceeds_limit = raw_chunk_size > self.file_limit
551 self.cur_diff_size += raw_chunk_size
552 self.cur_diff_size += raw_chunk_size
552
553
553 try:
554 try:
554 # Check each file instead of the whole diff.
555 # Check each file instead of the whole diff.
555 # Diff will hide big files but still show small ones.
556 # Diff will hide big files but still show small ones.
556 # From the tests big files are fairly safe to be parsed
557 # From the tests big files are fairly safe to be parsed
557 # but the browser is the bottleneck.
558 # but the browser is the bottleneck.
558 if not self.show_full_diff and exceeds_limit:
559 if not self.show_full_diff and exceeds_limit:
559 log.debug('File `%s` exceeds current file_limit of %s',
560 log.debug('File `%s` exceeds current file_limit of %s',
560 safe_unicode(head['b_path']), self.file_limit)
561 safe_unicode(head['b_path']), self.file_limit)
561 raise DiffLimitExceeded(
562 raise DiffLimitExceeded(
562 'File Limit %s Exceeded', self.file_limit)
563 'File Limit %s Exceeded', self.file_limit)
563
564
564 self._check_large_diff()
565 self._check_large_diff()
565
566
566 raw_diff, chunks, _stats = self._new_parse_lines(diff)
567 raw_diff, chunks, _stats = self._new_parse_lines(diff)
567 stats['binary'] = False
568 stats['binary'] = False
568 stats['added'] = _stats[0]
569 stats['added'] = _stats[0]
569 stats['deleted'] = _stats[1]
570 stats['deleted'] = _stats[1]
570 # explicit mark that it's a modified file
571 # explicit mark that it's a modified file
571 if op == OPS.MOD:
572 if op == OPS.MOD:
572 stats['ops'][MOD_FILENODE] = 'modified file'
573 stats['ops'][MOD_FILENODE] = 'modified file'
573
574
574 except DiffLimitExceeded:
575 except DiffLimitExceeded:
575 diff_container = lambda _diff: \
576 diff_container = lambda _diff: \
576 LimitedDiffContainer(
577 LimitedDiffContainer(
577 self.diff_limit, self.cur_diff_size, _diff)
578 self.diff_limit, self.cur_diff_size, _diff)
578
579
579 limited_diff = True
580 limited_diff = True
580 chunks = []
581 chunks = []
581
582
582 else: # GIT format binary patch, or possibly empty diff
583 else: # GIT format binary patch, or possibly empty diff
583 if head['bin_patch']:
584 if head['bin_patch']:
584 # we have operation already extracted, but we mark simply
585 # we have operation already extracted, but we mark simply
585 # it's a diff we wont show for binary files
586 # it's a diff we wont show for binary files
586 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
587 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
587 chunks = []
588 chunks = []
588
589
589 # Hide content of deleted node by setting empty chunks
590 # Hide content of deleted node by setting empty chunks
590 if chunks and not self.show_full_diff and op == OPS.DEL:
591 if chunks and not self.show_full_diff and op == OPS.DEL:
591 # if not full diff mode show deleted file contents
592 # if not full diff mode show deleted file contents
592 # TODO: anderson: if the view is not too big, there is no way
593 # TODO: anderson: if the view is not too big, there is no way
593 # to see the content of the file
594 # to see the content of the file
594 chunks = []
595 chunks = []
595
596
596 chunks.insert(
597 chunks.insert(
597 0, [{'old_lineno': '',
598 0, [{'old_lineno': '',
598 'new_lineno': '',
599 'new_lineno': '',
599 'action': Action.CONTEXT,
600 'action': Action.CONTEXT,
600 'line': msg,
601 'line': msg,
601 } for _op, msg in stats['ops'].iteritems()
602 } for _op, msg in stats['ops'].iteritems()
602 if _op not in [MOD_FILENODE]])
603 if _op not in [MOD_FILENODE]])
603
604
604 original_filename = safe_unicode(head['a_path'])
605 original_filename = safe_unicode(head['a_path'])
605 _files.append({
606 _files.append({
606 'original_filename': original_filename,
607 'original_filename': original_filename,
607 'filename': safe_unicode(head['b_path']),
608 'filename': safe_unicode(head['b_path']),
608 'old_revision': head['a_blob_id'],
609 'old_revision': head['a_blob_id'],
609 'new_revision': head['b_blob_id'],
610 'new_revision': head['b_blob_id'],
610 'chunks': chunks,
611 'chunks': chunks,
611 'raw_diff': safe_unicode(raw_diff),
612 'raw_diff': safe_unicode(raw_diff),
612 'operation': op,
613 'operation': op,
613 'stats': stats,
614 'stats': stats,
614 'exceeds_limit': exceeds_limit,
615 'exceeds_limit': exceeds_limit,
615 'is_limited_diff': limited_diff,
616 'is_limited_diff': limited_diff,
616 })
617 })
617
618
618 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
619 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
619 OPS.DEL: 2}.get(info['operation'])
620 OPS.DEL: 2}.get(info['operation'])
620
621
621 return diff_container(sorted(_files, key=sorter))
622 return diff_container(sorted(_files, key=sorter))
622
623
623 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
624 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
624 def _parse_lines(self, diff_iter):
625 def _parse_lines(self, diff_iter):
625 """
626 """
626 Parse the diff an return data for the template.
627 Parse the diff an return data for the template.
627 """
628 """
628
629
629 stats = [0, 0]
630 stats = [0, 0]
630 chunks = []
631 chunks = []
631 raw_diff = []
632 raw_diff = []
632
633
633 try:
634 try:
634 line = diff_iter.next()
635 line = diff_iter.next()
635
636
636 while line:
637 while line:
637 raw_diff.append(line)
638 raw_diff.append(line)
638 lines = []
639 lines = []
639 chunks.append(lines)
640 chunks.append(lines)
640
641
641 match = self._chunk_re.match(line)
642 match = self._chunk_re.match(line)
642
643
643 if not match:
644 if not match:
644 break
645 break
645
646
646 gr = match.groups()
647 gr = match.groups()
647 (old_line, old_end,
648 (old_line, old_end,
648 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
649 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
649 old_line -= 1
650 old_line -= 1
650 new_line -= 1
651 new_line -= 1
651
652
652 context = len(gr) == 5
653 context = len(gr) == 5
653 old_end += old_line
654 old_end += old_line
654 new_end += new_line
655 new_end += new_line
655
656
656 if context:
657 if context:
657 # skip context only if it's first line
658 # skip context only if it's first line
658 if int(gr[0]) > 1:
659 if int(gr[0]) > 1:
659 lines.append({
660 lines.append({
660 'old_lineno': '...',
661 'old_lineno': '...',
661 'new_lineno': '...',
662 'new_lineno': '...',
662 'action': Action.CONTEXT,
663 'action': Action.CONTEXT,
663 'line': line,
664 'line': line,
664 })
665 })
665
666
666 line = diff_iter.next()
667 line = diff_iter.next()
667
668
668 while old_line < old_end or new_line < new_end:
669 while old_line < old_end or new_line < new_end:
669 command = ' '
670 command = ' '
670 if line:
671 if line:
671 command = line[0]
672 command = line[0]
672
673
673 affects_old = affects_new = False
674 affects_old = affects_new = False
674
675
675 # ignore those if we don't expect them
676 # ignore those if we don't expect them
676 if command in '#@':
677 if command in '#@':
677 continue
678 continue
678 elif command == '+':
679 elif command == '+':
679 affects_new = True
680 affects_new = True
680 action = Action.ADD
681 action = Action.ADD
681 stats[0] += 1
682 stats[0] += 1
682 elif command == '-':
683 elif command == '-':
683 affects_old = True
684 affects_old = True
684 action = Action.DELETE
685 action = Action.DELETE
685 stats[1] += 1
686 stats[1] += 1
686 else:
687 else:
687 affects_old = affects_new = True
688 affects_old = affects_new = True
688 action = Action.UNMODIFIED
689 action = Action.UNMODIFIED
689
690
690 if not self._newline_marker.match(line):
691 if not self._newline_marker.match(line):
691 old_line += affects_old
692 old_line += affects_old
692 new_line += affects_new
693 new_line += affects_new
693 lines.append({
694 lines.append({
694 'old_lineno': affects_old and old_line or '',
695 'old_lineno': affects_old and old_line or '',
695 'new_lineno': affects_new and new_line or '',
696 'new_lineno': affects_new and new_line or '',
696 'action': action,
697 'action': action,
697 'line': self._clean_line(line, command)
698 'line': self._clean_line(line, command)
698 })
699 })
699 raw_diff.append(line)
700 raw_diff.append(line)
700
701
701 line = diff_iter.next()
702 line = diff_iter.next()
702
703
703 if self._newline_marker.match(line):
704 if self._newline_marker.match(line):
704 # we need to append to lines, since this is not
705 # we need to append to lines, since this is not
705 # counted in the line specs of diff
706 # counted in the line specs of diff
706 lines.append({
707 lines.append({
707 'old_lineno': '...',
708 'old_lineno': '...',
708 'new_lineno': '...',
709 'new_lineno': '...',
709 'action': Action.CONTEXT,
710 'action': Action.CONTEXT,
710 'line': self._clean_line(line, command)
711 'line': self._clean_line(line, command)
711 })
712 })
712
713
713 except StopIteration:
714 except StopIteration:
714 pass
715 pass
715 return ''.join(raw_diff), chunks, stats
716 return ''.join(raw_diff), chunks, stats
716
717
717 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
718 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
718 def _new_parse_lines(self, diff_iter):
719 def _new_parse_lines(self, diff_iter):
719 """
720 """
720 Parse the diff an return data for the template.
721 Parse the diff an return data for the template.
721 """
722 """
722
723
723 stats = [0, 0]
724 stats = [0, 0]
724 chunks = []
725 chunks = []
725 raw_diff = []
726 raw_diff = []
726
727
727 try:
728 try:
728 line = diff_iter.next()
729 line = diff_iter.next()
729
730
730 while line:
731 while line:
731 raw_diff.append(line)
732 raw_diff.append(line)
732 # match header e.g @@ -0,0 +1 @@\n'
733 # match header e.g @@ -0,0 +1 @@\n'
733 match = self._chunk_re.match(line)
734 match = self._chunk_re.match(line)
734
735
735 if not match:
736 if not match:
736 break
737 break
737
738
738 gr = match.groups()
739 gr = match.groups()
739 (old_line, old_end,
740 (old_line, old_end,
740 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
741 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
741
742
742 lines = []
743 lines = []
743 hunk = {
744 hunk = {
744 'section_header': gr[-1],
745 'section_header': gr[-1],
745 'source_start': old_line,
746 'source_start': old_line,
746 'source_length': old_end,
747 'source_length': old_end,
747 'target_start': new_line,
748 'target_start': new_line,
748 'target_length': new_end,
749 'target_length': new_end,
749 'lines': lines,
750 'lines': lines,
750 }
751 }
751 chunks.append(hunk)
752 chunks.append(hunk)
752
753
753 old_line -= 1
754 old_line -= 1
754 new_line -= 1
755 new_line -= 1
755
756
756 context = len(gr) == 5
757 context = len(gr) == 5
757 old_end += old_line
758 old_end += old_line
758 new_end += new_line
759 new_end += new_line
759
760
760 line = diff_iter.next()
761 line = diff_iter.next()
761
762
762 while old_line < old_end or new_line < new_end:
763 while old_line < old_end or new_line < new_end:
763 command = ' '
764 command = ' '
764 if line:
765 if line:
765 command = line[0]
766 command = line[0]
766
767
767 affects_old = affects_new = False
768 affects_old = affects_new = False
768
769
769 # ignore those if we don't expect them
770 # ignore those if we don't expect them
770 if command in '#@':
771 if command in '#@':
771 continue
772 continue
772 elif command == '+':
773 elif command == '+':
773 affects_new = True
774 affects_new = True
774 action = Action.ADD
775 action = Action.ADD
775 stats[0] += 1
776 stats[0] += 1
776 elif command == '-':
777 elif command == '-':
777 affects_old = True
778 affects_old = True
778 action = Action.DELETE
779 action = Action.DELETE
779 stats[1] += 1
780 stats[1] += 1
780 else:
781 else:
781 affects_old = affects_new = True
782 affects_old = affects_new = True
782 action = Action.UNMODIFIED
783 action = Action.UNMODIFIED
783
784
784 if not self._newline_marker.match(line):
785 if not self._newline_marker.match(line):
785 old_line += affects_old
786 old_line += affects_old
786 new_line += affects_new
787 new_line += affects_new
787 lines.append({
788 lines.append({
788 'old_lineno': affects_old and old_line or '',
789 'old_lineno': affects_old and old_line or '',
789 'new_lineno': affects_new and new_line or '',
790 'new_lineno': affects_new and new_line or '',
790 'action': action,
791 'action': action,
791 'line': self._clean_line(line, command)
792 'line': self._clean_line(line, command)
792 })
793 })
793 raw_diff.append(line)
794 raw_diff.append(line)
794
795
795 line = diff_iter.next()
796 line = diff_iter.next()
796
797
797 if self._newline_marker.match(line):
798 if self._newline_marker.match(line):
798 # we need to append to lines, since this is not
799 # we need to append to lines, since this is not
799 # counted in the line specs of diff
800 # counted in the line specs of diff
800 if affects_old:
801 if affects_old:
801 action = Action.OLD_NO_NL
802 action = Action.OLD_NO_NL
802 elif affects_new:
803 elif affects_new:
803 action = Action.NEW_NO_NL
804 action = Action.NEW_NO_NL
804 else:
805 else:
805 raise Exception('invalid context for no newline')
806 raise Exception('invalid context for no newline')
806
807
807 lines.append({
808 lines.append({
808 'old_lineno': None,
809 'old_lineno': None,
809 'new_lineno': None,
810 'new_lineno': None,
810 'action': action,
811 'action': action,
811 'line': self._clean_line(line, command)
812 'line': self._clean_line(line, command)
812 })
813 })
813
814
814 except StopIteration:
815 except StopIteration:
815 pass
816 pass
816
817
817 return ''.join(raw_diff), chunks, stats
818 return ''.join(raw_diff), chunks, stats
818
819
819 def _safe_id(self, idstring):
820 def _safe_id(self, idstring):
820 """Make a string safe for including in an id attribute.
821 """Make a string safe for including in an id attribute.
821
822
822 The HTML spec says that id attributes 'must begin with
823 The HTML spec says that id attributes 'must begin with
823 a letter ([A-Za-z]) and may be followed by any number
824 a letter ([A-Za-z]) and may be followed by any number
824 of letters, digits ([0-9]), hyphens ("-"), underscores
825 of letters, digits ([0-9]), hyphens ("-"), underscores
825 ("_"), colons (":"), and periods (".")'. These regexps
826 ("_"), colons (":"), and periods (".")'. These regexps
826 are slightly over-zealous, in that they remove colons
827 are slightly over-zealous, in that they remove colons
827 and periods unnecessarily.
828 and periods unnecessarily.
828
829
829 Whitespace is transformed into underscores, and then
830 Whitespace is transformed into underscores, and then
830 anything which is not a hyphen or a character that
831 anything which is not a hyphen or a character that
831 matches \w (alphanumerics and underscore) is removed.
832 matches \w (alphanumerics and underscore) is removed.
832
833
833 """
834 """
834 # Transform all whitespace to underscore
835 # Transform all whitespace to underscore
835 idstring = re.sub(r'\s', "_", '%s' % idstring)
836 idstring = re.sub(r'\s', "_", '%s' % idstring)
836 # Remove everything that is not a hyphen or a member of \w
837 # Remove everything that is not a hyphen or a member of \w
837 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
838 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
838 return idstring
839 return idstring
839
840
840 @classmethod
841 @classmethod
841 def diff_splitter(cls, string):
842 def diff_splitter(cls, string):
842 """
843 """
843 Diff split that emulates .splitlines() but works only on \n
844 Diff split that emulates .splitlines() but works only on \n
844 """
845 """
845 if not string:
846 if not string:
846 return
847 return
847 elif string == '\n':
848 elif string == '\n':
848 yield u'\n'
849 yield u'\n'
849 else:
850 else:
850
851
851 has_newline = string.endswith('\n')
852 has_newline = string.endswith('\n')
852 elements = string.split('\n')
853 elements = string.split('\n')
853 if has_newline:
854 if has_newline:
854 # skip last element as it's empty string from newlines
855 # skip last element as it's empty string from newlines
855 elements = elements[:-1]
856 elements = elements[:-1]
856
857
857 len_elements = len(elements)
858 len_elements = len(elements)
858
859
859 for cnt, line in enumerate(elements, start=1):
860 for cnt, line in enumerate(elements, start=1):
860 last_line = cnt == len_elements
861 last_line = cnt == len_elements
861 if last_line and not has_newline:
862 if last_line and not has_newline:
862 yield safe_unicode(line)
863 yield safe_unicode(line)
863 else:
864 else:
864 yield safe_unicode(line) + '\n'
865 yield safe_unicode(line) + '\n'
865
866
866 def prepare(self, inline_diff=True):
867 def prepare(self, inline_diff=True):
867 """
868 """
868 Prepare the passed udiff for HTML rendering.
869 Prepare the passed udiff for HTML rendering.
869
870
870 :return: A list of dicts with diff information.
871 :return: A list of dicts with diff information.
871 """
872 """
872 parsed = self._parser(inline_diff=inline_diff)
873 parsed = self._parser(inline_diff=inline_diff)
873 self.parsed = True
874 self.parsed = True
874 self.parsed_diff = parsed
875 self.parsed_diff = parsed
875 return parsed
876 return parsed
876
877
877 def as_raw(self, diff_lines=None):
878 def as_raw(self, diff_lines=None):
878 """
879 """
879 Returns raw diff as a byte string
880 Returns raw diff as a byte string
880 """
881 """
881 return self._diff.raw
882 return self._diff.raw
882
883
883 def as_html(self, table_class='code-difftable', line_class='line',
884 def as_html(self, table_class='code-difftable', line_class='line',
884 old_lineno_class='lineno old', new_lineno_class='lineno new',
885 old_lineno_class='lineno old', new_lineno_class='lineno new',
885 code_class='code', enable_comments=False, parsed_lines=None):
886 code_class='code', enable_comments=False, parsed_lines=None):
886 """
887 """
887 Return given diff as html table with customized css classes
888 Return given diff as html table with customized css classes
888 """
889 """
889 # TODO(marcink): not sure how to pass in translator
890 # TODO(marcink): not sure how to pass in translator
890 # here in an efficient way, leave the _ for proper gettext extraction
891 # here in an efficient way, leave the _ for proper gettext extraction
891 _ = lambda s: s
892 _ = lambda s: s
892
893
893 def _link_to_if(condition, label, url):
894 def _link_to_if(condition, label, url):
894 """
895 """
895 Generates a link if condition is meet or just the label if not.
896 Generates a link if condition is meet or just the label if not.
896 """
897 """
897
898
898 if condition:
899 if condition:
899 return '''<a href="%(url)s" class="tooltip"
900 return '''<a href="%(url)s" class="tooltip"
900 title="%(title)s">%(label)s</a>''' % {
901 title="%(title)s">%(label)s</a>''' % {
901 'title': _('Click to select line'),
902 'title': _('Click to select line'),
902 'url': url,
903 'url': url,
903 'label': label
904 'label': label
904 }
905 }
905 else:
906 else:
906 return label
907 return label
907 if not self.parsed:
908 if not self.parsed:
908 self.prepare()
909 self.prepare()
909
910
910 diff_lines = self.parsed_diff
911 diff_lines = self.parsed_diff
911 if parsed_lines:
912 if parsed_lines:
912 diff_lines = parsed_lines
913 diff_lines = parsed_lines
913
914
914 _html_empty = True
915 _html_empty = True
915 _html = []
916 _html = []
916 _html.append('''<table class="%(table_class)s">\n''' % {
917 _html.append('''<table class="%(table_class)s">\n''' % {
917 'table_class': table_class
918 'table_class': table_class
918 })
919 })
919
920
920 for diff in diff_lines:
921 for diff in diff_lines:
921 for line in diff['chunks']:
922 for line in diff['chunks']:
922 _html_empty = False
923 _html_empty = False
923 for change in line:
924 for change in line:
924 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
925 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
925 'lc': line_class,
926 'lc': line_class,
926 'action': change['action']
927 'action': change['action']
927 })
928 })
928 anchor_old_id = ''
929 anchor_old_id = ''
929 anchor_new_id = ''
930 anchor_new_id = ''
930 anchor_old = "%(filename)s_o%(oldline_no)s" % {
931 anchor_old = "%(filename)s_o%(oldline_no)s" % {
931 'filename': self._safe_id(diff['filename']),
932 'filename': self._safe_id(diff['filename']),
932 'oldline_no': change['old_lineno']
933 'oldline_no': change['old_lineno']
933 }
934 }
934 anchor_new = "%(filename)s_n%(oldline_no)s" % {
935 anchor_new = "%(filename)s_n%(oldline_no)s" % {
935 'filename': self._safe_id(diff['filename']),
936 'filename': self._safe_id(diff['filename']),
936 'oldline_no': change['new_lineno']
937 'oldline_no': change['new_lineno']
937 }
938 }
938 cond_old = (change['old_lineno'] != '...' and
939 cond_old = (change['old_lineno'] != '...' and
939 change['old_lineno'])
940 change['old_lineno'])
940 cond_new = (change['new_lineno'] != '...' and
941 cond_new = (change['new_lineno'] != '...' and
941 change['new_lineno'])
942 change['new_lineno'])
942 if cond_old:
943 if cond_old:
943 anchor_old_id = 'id="%s"' % anchor_old
944 anchor_old_id = 'id="%s"' % anchor_old
944 if cond_new:
945 if cond_new:
945 anchor_new_id = 'id="%s"' % anchor_new
946 anchor_new_id = 'id="%s"' % anchor_new
946
947
947 if change['action'] != Action.CONTEXT:
948 if change['action'] != Action.CONTEXT:
948 anchor_link = True
949 anchor_link = True
949 else:
950 else:
950 anchor_link = False
951 anchor_link = False
951
952
952 ###########################################################
953 ###########################################################
953 # COMMENT ICONS
954 # COMMENT ICONS
954 ###########################################################
955 ###########################################################
955 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
956 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
956
957
957 if enable_comments and change['action'] != Action.CONTEXT:
958 if enable_comments and change['action'] != Action.CONTEXT:
958 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
959 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
959
960
960 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
961 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
961
962
962 ###########################################################
963 ###########################################################
963 # OLD LINE NUMBER
964 # OLD LINE NUMBER
964 ###########################################################
965 ###########################################################
965 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
966 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
966 'a_id': anchor_old_id,
967 'a_id': anchor_old_id,
967 'olc': old_lineno_class
968 'olc': old_lineno_class
968 })
969 })
969
970
970 _html.append('''%(link)s''' % {
971 _html.append('''%(link)s''' % {
971 'link': _link_to_if(anchor_link, change['old_lineno'],
972 'link': _link_to_if(anchor_link, change['old_lineno'],
972 '#%s' % anchor_old)
973 '#%s' % anchor_old)
973 })
974 })
974 _html.append('''</td>\n''')
975 _html.append('''</td>\n''')
975 ###########################################################
976 ###########################################################
976 # NEW LINE NUMBER
977 # NEW LINE NUMBER
977 ###########################################################
978 ###########################################################
978
979
979 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
980 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
980 'a_id': anchor_new_id,
981 'a_id': anchor_new_id,
981 'nlc': new_lineno_class
982 'nlc': new_lineno_class
982 })
983 })
983
984
984 _html.append('''%(link)s''' % {
985 _html.append('''%(link)s''' % {
985 'link': _link_to_if(anchor_link, change['new_lineno'],
986 'link': _link_to_if(anchor_link, change['new_lineno'],
986 '#%s' % anchor_new)
987 '#%s' % anchor_new)
987 })
988 })
988 _html.append('''</td>\n''')
989 _html.append('''</td>\n''')
989 ###########################################################
990 ###########################################################
990 # CODE
991 # CODE
991 ###########################################################
992 ###########################################################
992 code_classes = [code_class]
993 code_classes = [code_class]
993 if (not enable_comments or
994 if (not enable_comments or
994 change['action'] == Action.CONTEXT):
995 change['action'] == Action.CONTEXT):
995 code_classes.append('no-comment')
996 code_classes.append('no-comment')
996 _html.append('\t<td class="%s">' % ' '.join(code_classes))
997 _html.append('\t<td class="%s">' % ' '.join(code_classes))
997 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
998 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
998 'code': change['line']
999 'code': change['line']
999 })
1000 })
1000
1001
1001 _html.append('''\t</td>''')
1002 _html.append('''\t</td>''')
1002 _html.append('''\n</tr>\n''')
1003 _html.append('''\n</tr>\n''')
1003 _html.append('''</table>''')
1004 _html.append('''</table>''')
1004 if _html_empty:
1005 if _html_empty:
1005 return None
1006 return None
1006 return ''.join(_html)
1007 return ''.join(_html)
1007
1008
1008 def stat(self):
1009 def stat(self):
1009 """
1010 """
1010 Returns tuple of added, and removed lines for this instance
1011 Returns tuple of added, and removed lines for this instance
1011 """
1012 """
1012 return self.adds, self.removes
1013 return self.adds, self.removes
1013
1014
1014 def get_context_of_line(
1015 def get_context_of_line(
1015 self, path, diff_line=None, context_before=3, context_after=3):
1016 self, path, diff_line=None, context_before=3, context_after=3):
1016 """
1017 """
1017 Returns the context lines for the specified diff line.
1018 Returns the context lines for the specified diff line.
1018
1019
1019 :type diff_line: :class:`DiffLineNumber`
1020 :type diff_line: :class:`DiffLineNumber`
1020 """
1021 """
1021 assert self.parsed, "DiffProcessor is not initialized."
1022 assert self.parsed, "DiffProcessor is not initialized."
1022
1023
1023 if None not in diff_line:
1024 if None not in diff_line:
1024 raise ValueError(
1025 raise ValueError(
1025 "Cannot specify both line numbers: {}".format(diff_line))
1026 "Cannot specify both line numbers: {}".format(diff_line))
1026
1027
1027 file_diff = self._get_file_diff(path)
1028 file_diff = self._get_file_diff(path)
1028 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1029 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1029
1030
1030 first_line_to_include = max(idx - context_before, 0)
1031 first_line_to_include = max(idx - context_before, 0)
1031 first_line_after_context = idx + context_after + 1
1032 first_line_after_context = idx + context_after + 1
1032 context_lines = chunk[first_line_to_include:first_line_after_context]
1033 context_lines = chunk[first_line_to_include:first_line_after_context]
1033
1034
1034 line_contents = [
1035 line_contents = [
1035 _context_line(line) for line in context_lines
1036 _context_line(line) for line in context_lines
1036 if _is_diff_content(line)]
1037 if _is_diff_content(line)]
1037 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1038 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1038 # Once they are fixed, we can drop this line here.
1039 # Once they are fixed, we can drop this line here.
1039 if line_contents:
1040 if line_contents:
1040 line_contents[-1] = (
1041 line_contents[-1] = (
1041 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1042 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1042 return line_contents
1043 return line_contents
1043
1044
1044 def find_context(self, path, context, offset=0):
1045 def find_context(self, path, context, offset=0):
1045 """
1046 """
1046 Finds the given `context` inside of the diff.
1047 Finds the given `context` inside of the diff.
1047
1048
1048 Use the parameter `offset` to specify which offset the target line has
1049 Use the parameter `offset` to specify which offset the target line has
1049 inside of the given `context`. This way the correct diff line will be
1050 inside of the given `context`. This way the correct diff line will be
1050 returned.
1051 returned.
1051
1052
1052 :param offset: Shall be used to specify the offset of the main line
1053 :param offset: Shall be used to specify the offset of the main line
1053 within the given `context`.
1054 within the given `context`.
1054 """
1055 """
1055 if offset < 0 or offset >= len(context):
1056 if offset < 0 or offset >= len(context):
1056 raise ValueError(
1057 raise ValueError(
1057 "Only positive values up to the length of the context "
1058 "Only positive values up to the length of the context "
1058 "minus one are allowed.")
1059 "minus one are allowed.")
1059
1060
1060 matches = []
1061 matches = []
1061 file_diff = self._get_file_diff(path)
1062 file_diff = self._get_file_diff(path)
1062
1063
1063 for chunk in file_diff['chunks']:
1064 for chunk in file_diff['chunks']:
1064 context_iter = iter(context)
1065 context_iter = iter(context)
1065 for line_idx, line in enumerate(chunk):
1066 for line_idx, line in enumerate(chunk):
1066 try:
1067 try:
1067 if _context_line(line) == context_iter.next():
1068 if _context_line(line) == context_iter.next():
1068 continue
1069 continue
1069 except StopIteration:
1070 except StopIteration:
1070 matches.append((line_idx, chunk))
1071 matches.append((line_idx, chunk))
1071 context_iter = iter(context)
1072 context_iter = iter(context)
1072
1073
1073 # Increment position and triger StopIteration
1074 # Increment position and triger StopIteration
1074 # if we had a match at the end
1075 # if we had a match at the end
1075 line_idx += 1
1076 line_idx += 1
1076 try:
1077 try:
1077 context_iter.next()
1078 context_iter.next()
1078 except StopIteration:
1079 except StopIteration:
1079 matches.append((line_idx, chunk))
1080 matches.append((line_idx, chunk))
1080
1081
1081 effective_offset = len(context) - offset
1082 effective_offset = len(context) - offset
1082 found_at_diff_lines = [
1083 found_at_diff_lines = [
1083 _line_to_diff_line_number(chunk[idx - effective_offset])
1084 _line_to_diff_line_number(chunk[idx - effective_offset])
1084 for idx, chunk in matches]
1085 for idx, chunk in matches]
1085
1086
1086 return found_at_diff_lines
1087 return found_at_diff_lines
1087
1088
1088 def _get_file_diff(self, path):
1089 def _get_file_diff(self, path):
1089 for file_diff in self.parsed_diff:
1090 for file_diff in self.parsed_diff:
1090 if file_diff['filename'] == path:
1091 if file_diff['filename'] == path:
1091 break
1092 break
1092 else:
1093 else:
1093 raise FileNotInDiffException("File {} not in diff".format(path))
1094 raise FileNotInDiffException("File {} not in diff".format(path))
1094 return file_diff
1095 return file_diff
1095
1096
1096 def _find_chunk_line_index(self, file_diff, diff_line):
1097 def _find_chunk_line_index(self, file_diff, diff_line):
1097 for chunk in file_diff['chunks']:
1098 for chunk in file_diff['chunks']:
1098 for idx, line in enumerate(chunk):
1099 for idx, line in enumerate(chunk):
1099 if line['old_lineno'] == diff_line.old:
1100 if line['old_lineno'] == diff_line.old:
1100 return chunk, idx
1101 return chunk, idx
1101 if line['new_lineno'] == diff_line.new:
1102 if line['new_lineno'] == diff_line.new:
1102 return chunk, idx
1103 return chunk, idx
1103 raise LineNotInDiffException(
1104 raise LineNotInDiffException(
1104 "The line {} is not part of the diff.".format(diff_line))
1105 "The line {} is not part of the diff.".format(diff_line))
1105
1106
1106
1107
1107 def _is_diff_content(line):
1108 def _is_diff_content(line):
1108 return line['action'] in (
1109 return line['action'] in (
1109 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1110 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1110
1111
1111
1112
1112 def _context_line(line):
1113 def _context_line(line):
1113 return (line['action'], line['line'])
1114 return (line['action'], line['line'])
1114
1115
1115
1116
1116 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1117 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1117
1118
1118
1119
1119 def _line_to_diff_line_number(line):
1120 def _line_to_diff_line_number(line):
1120 new_line_no = line['new_lineno'] or None
1121 new_line_no = line['new_lineno'] or None
1121 old_line_no = line['old_lineno'] or None
1122 old_line_no = line['old_lineno'] or None
1122 return DiffLineNumber(old=old_line_no, new=new_line_no)
1123 return DiffLineNumber(old=old_line_no, new=new_line_no)
1123
1124
1124
1125
1125 class FileNotInDiffException(Exception):
1126 class FileNotInDiffException(Exception):
1126 """
1127 """
1127 Raised when the context for a missing file is requested.
1128 Raised when the context for a missing file is requested.
1128
1129
1129 If you request the context for a line in a file which is not part of the
1130 If you request the context for a line in a file which is not part of the
1130 given diff, then this exception is raised.
1131 given diff, then this exception is raised.
1131 """
1132 """
1132
1133
1133
1134
1134 class LineNotInDiffException(Exception):
1135 class LineNotInDiffException(Exception):
1135 """
1136 """
1136 Raised when the context for a missing line is requested.
1137 Raised when the context for a missing line is requested.
1137
1138
1138 If you request the context for a line in a file and this line is not
1139 If you request the context for a line in a file and this line is not
1139 part of the given diff, then this exception is raised.
1140 part of the given diff, then this exception is raised.
1140 """
1141 """
1141
1142
1142
1143
1143 class DiffLimitExceeded(Exception):
1144 class DiffLimitExceeded(Exception):
1144 pass
1145 pass
1145
1146
1146
1147
1147 # NOTE(marcink): if diffs.mako change, probably this
1148 # NOTE(marcink): if diffs.mako change, probably this
1148 # needs a bump to next version
1149 # needs a bump to next version
1149 CURRENT_DIFF_VERSION = 'v4'
1150 CURRENT_DIFF_VERSION = 'v4'
1150
1151
1151
1152
1152 def _cleanup_cache_file(cached_diff_file):
1153 def _cleanup_cache_file(cached_diff_file):
1153 # cleanup file to not store it "damaged"
1154 # cleanup file to not store it "damaged"
1154 try:
1155 try:
1155 os.remove(cached_diff_file)
1156 os.remove(cached_diff_file)
1156 except Exception:
1157 except Exception:
1157 log.exception('Failed to cleanup path %s', cached_diff_file)
1158 log.exception('Failed to cleanup path %s', cached_diff_file)
1158
1159
1159
1160
1161 def _get_compression_mode(cached_diff_file):
1162 mode = 'bz2'
1163 if 'mode:plain' in cached_diff_file:
1164 mode = 'plain'
1165 elif 'mode:gzip' in cached_diff_file:
1166 mode = 'gzip'
1167 return mode
1168
1169
1160 def cache_diff(cached_diff_file, diff, commits):
1170 def cache_diff(cached_diff_file, diff, commits):
1161 mode = 'plain' if 'mode:plain' in cached_diff_file else ''
1171 compression_mode = _get_compression_mode(cached_diff_file)
1162
1172
1163 struct = {
1173 struct = {
1164 'version': CURRENT_DIFF_VERSION,
1174 'version': CURRENT_DIFF_VERSION,
1165 'diff': diff,
1175 'diff': diff,
1166 'commits': commits
1176 'commits': commits
1167 }
1177 }
1168
1178
1169 start = time.time()
1179 start = time.time()
1170 try:
1180 try:
1171 if mode == 'plain':
1181 if compression_mode == 'plain':
1172 with open(cached_diff_file, 'wb') as f:
1182 with open(cached_diff_file, 'wb') as f:
1173 pickle.dump(struct, f)
1183 pickle.dump(struct, f)
1184 elif compression_mode == 'gzip':
1185 with gzip.GzipFile(cached_diff_file, 'wb') as f:
1186 pickle.dump(struct, f)
1174 else:
1187 else:
1175 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1188 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1176 pickle.dump(struct, f)
1189 pickle.dump(struct, f)
1177 except Exception:
1190 except Exception:
1178 log.warn('Failed to save cache', exc_info=True)
1191 log.warn('Failed to save cache', exc_info=True)
1179 _cleanup_cache_file(cached_diff_file)
1192 _cleanup_cache_file(cached_diff_file)
1180
1193
1181 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
1194 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
1182
1195
1183
1196
1184 def load_cached_diff(cached_diff_file):
1197 def load_cached_diff(cached_diff_file):
1185 mode = 'plain' if 'mode:plain' in cached_diff_file else ''
1198 compression_mode = _get_compression_mode(cached_diff_file)
1186
1199
1187 default_struct = {
1200 default_struct = {
1188 'version': CURRENT_DIFF_VERSION,
1201 'version': CURRENT_DIFF_VERSION,
1189 'diff': None,
1202 'diff': None,
1190 'commits': None
1203 'commits': None
1191 }
1204 }
1192
1205
1193 has_cache = os.path.isfile(cached_diff_file)
1206 has_cache = os.path.isfile(cached_diff_file)
1194 if not has_cache:
1207 if not has_cache:
1195 log.debug('Reading diff cache file failed %s', cached_diff_file)
1208 log.debug('Reading diff cache file failed %s', cached_diff_file)
1196 return default_struct
1209 return default_struct
1197
1210
1198 data = None
1211 data = None
1199
1212
1200 start = time.time()
1213 start = time.time()
1201 try:
1214 try:
1202 if mode == 'plain':
1215 if compression_mode == 'plain':
1203 with open(cached_diff_file, 'rb') as f:
1216 with open(cached_diff_file, 'rb') as f:
1204 data = pickle.load(f)
1217 data = pickle.load(f)
1218 elif compression_mode == 'gzip':
1219 with gzip.GzipFile(cached_diff_file, 'rb') as f:
1220 data = pickle.load(f)
1205 else:
1221 else:
1206 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1222 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1207 data = pickle.load(f)
1223 data = pickle.load(f)
1208 except Exception:
1224 except Exception:
1209 log.warn('Failed to read diff cache file', exc_info=True)
1225 log.warn('Failed to read diff cache file', exc_info=True)
1210
1226
1211 if not data:
1227 if not data:
1212 data = default_struct
1228 data = default_struct
1213
1229
1214 if not isinstance(data, dict):
1230 if not isinstance(data, dict):
1215 # old version of data ?
1231 # old version of data ?
1216 data = default_struct
1232 data = default_struct
1217
1233
1218 # check version
1234 # check version
1219 if data.get('version') != CURRENT_DIFF_VERSION:
1235 if data.get('version') != CURRENT_DIFF_VERSION:
1220 # purge cache
1236 # purge cache
1221 _cleanup_cache_file(cached_diff_file)
1237 _cleanup_cache_file(cached_diff_file)
1222 return default_struct
1238 return default_struct
1223
1239
1224 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
1240 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
1225
1241
1226 return data
1242 return data
1227
1243
1228
1244
1229 def generate_diff_cache_key(*args):
1245 def generate_diff_cache_key(*args):
1230 """
1246 """
1231 Helper to generate a cache key using arguments
1247 Helper to generate a cache key using arguments
1232 """
1248 """
1233 def arg_mapper(input_param):
1249 def arg_mapper(input_param):
1234 input_param = safe_str(input_param)
1250 input_param = safe_str(input_param)
1235 # we cannot allow '/' in arguments since it would allow
1251 # we cannot allow '/' in arguments since it would allow
1236 # subdirectory usage
1252 # subdirectory usage
1237 input_param.replace('/', '_')
1253 input_param.replace('/', '_')
1238 return input_param or None # prevent empty string arguments
1254 return input_param or None # prevent empty string arguments
1239
1255
1240 return '_'.join([
1256 return '_'.join([
1241 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1257 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1242
1258
1243
1259
1244 def diff_cache_exist(cache_storage, *args):
1260 def diff_cache_exist(cache_storage, *args):
1245 """
1261 """
1246 Based on all generated arguments check and return a cache path
1262 Based on all generated arguments check and return a cache path
1247 """
1263 """
1264 args = list(args) + ['mode:gzip']
1248 cache_key = generate_diff_cache_key(*args)
1265 cache_key = generate_diff_cache_key(*args)
1249 cache_file_path = os.path.join(cache_storage, cache_key)
1266 cache_file_path = os.path.join(cache_storage, cache_key)
1250 # prevent path traversal attacks using some param that have e.g '../../'
1267 # prevent path traversal attacks using some param that have e.g '../../'
1251 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1268 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1252 raise ValueError('Final path must be within {}'.format(cache_storage))
1269 raise ValueError('Final path must be within {}'.format(cache_storage))
1253
1270
1254 return cache_file_path
1271 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now