##// END OF EJS Templates
diff-cache: use bz2 to reduce diff-cache size.
marcink -
r2690:01439ec4 default
parent child Browse files
Show More
@@ -1,1212 +1,1213 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import os
26 import os
27 import re
27 import re
28 import bz2
29
28 import collections
30 import collections
29 import difflib
31 import difflib
30 import logging
32 import logging
31 import cPickle as pickle
33 import cPickle as pickle
32
33 from itertools import tee, imap
34 from itertools import tee, imap
34
35
35 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.utils2 import safe_unicode, safe_str
38 from rhodecode.lib.utils2 import safe_unicode, safe_str
38
39
39 log = logging.getLogger(__name__)
40 log = logging.getLogger(__name__)
40
41
41 # define max context, a file with more than this numbers of lines is unusable
42 # define max context, a file with more than this numbers of lines is unusable
42 # in browser anyway
43 # in browser anyway
43 MAX_CONTEXT = 1024 * 1014
44 MAX_CONTEXT = 1024 * 1014
44
45
45
46
46 class OPS(object):
47 class OPS(object):
47 ADD = 'A'
48 ADD = 'A'
48 MOD = 'M'
49 MOD = 'M'
49 DEL = 'D'
50 DEL = 'D'
50
51
51
52
52 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
53 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
53 """
54 """
54 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
55 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
55
56
56 :param ignore_whitespace: ignore whitespaces in diff
57 :param ignore_whitespace: ignore whitespaces in diff
57 """
58 """
58 # make sure we pass in default context
59 # make sure we pass in default context
59 context = context or 3
60 context = context or 3
60 # protect against IntOverflow when passing HUGE context
61 # protect against IntOverflow when passing HUGE context
61 if context > MAX_CONTEXT:
62 if context > MAX_CONTEXT:
62 context = MAX_CONTEXT
63 context = MAX_CONTEXT
63
64
64 submodules = filter(lambda o: isinstance(o, SubModuleNode),
65 submodules = filter(lambda o: isinstance(o, SubModuleNode),
65 [filenode_new, filenode_old])
66 [filenode_new, filenode_old])
66 if submodules:
67 if submodules:
67 return ''
68 return ''
68
69
69 for filenode in (filenode_old, filenode_new):
70 for filenode in (filenode_old, filenode_new):
70 if not isinstance(filenode, FileNode):
71 if not isinstance(filenode, FileNode):
71 raise VCSError(
72 raise VCSError(
72 "Given object should be FileNode object, not %s"
73 "Given object should be FileNode object, not %s"
73 % filenode.__class__)
74 % filenode.__class__)
74
75
75 repo = filenode_new.commit.repository
76 repo = filenode_new.commit.repository
76 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
77 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
77 new_commit = filenode_new.commit
78 new_commit = filenode_new.commit
78
79
79 vcs_gitdiff = repo.get_diff(
80 vcs_gitdiff = repo.get_diff(
80 old_commit, new_commit, filenode_new.path,
81 old_commit, new_commit, filenode_new.path,
81 ignore_whitespace, context, path1=filenode_old.path)
82 ignore_whitespace, context, path1=filenode_old.path)
82 return vcs_gitdiff
83 return vcs_gitdiff
83
84
84 NEW_FILENODE = 1
85 NEW_FILENODE = 1
85 DEL_FILENODE = 2
86 DEL_FILENODE = 2
86 MOD_FILENODE = 3
87 MOD_FILENODE = 3
87 RENAMED_FILENODE = 4
88 RENAMED_FILENODE = 4
88 COPIED_FILENODE = 5
89 COPIED_FILENODE = 5
89 CHMOD_FILENODE = 6
90 CHMOD_FILENODE = 6
90 BIN_FILENODE = 7
91 BIN_FILENODE = 7
91
92
92
93
93 class LimitedDiffContainer(object):
94 class LimitedDiffContainer(object):
94
95
95 def __init__(self, diff_limit, cur_diff_size, diff):
96 def __init__(self, diff_limit, cur_diff_size, diff):
96 self.diff = diff
97 self.diff = diff
97 self.diff_limit = diff_limit
98 self.diff_limit = diff_limit
98 self.cur_diff_size = cur_diff_size
99 self.cur_diff_size = cur_diff_size
99
100
100 def __getitem__(self, key):
101 def __getitem__(self, key):
101 return self.diff.__getitem__(key)
102 return self.diff.__getitem__(key)
102
103
103 def __iter__(self):
104 def __iter__(self):
104 for l in self.diff:
105 for l in self.diff:
105 yield l
106 yield l
106
107
107
108
108 class Action(object):
109 class Action(object):
109 """
110 """
110 Contains constants for the action value of the lines in a parsed diff.
111 Contains constants for the action value of the lines in a parsed diff.
111 """
112 """
112
113
113 ADD = 'add'
114 ADD = 'add'
114 DELETE = 'del'
115 DELETE = 'del'
115 UNMODIFIED = 'unmod'
116 UNMODIFIED = 'unmod'
116
117
117 CONTEXT = 'context'
118 CONTEXT = 'context'
118 OLD_NO_NL = 'old-no-nl'
119 OLD_NO_NL = 'old-no-nl'
119 NEW_NO_NL = 'new-no-nl'
120 NEW_NO_NL = 'new-no-nl'
120
121
121
122
122 class DiffProcessor(object):
123 class DiffProcessor(object):
123 """
124 """
124 Give it a unified or git diff and it returns a list of the files that were
125 Give it a unified or git diff and it returns a list of the files that were
125 mentioned in the diff together with a dict of meta information that
126 mentioned in the diff together with a dict of meta information that
126 can be used to render it in a HTML template.
127 can be used to render it in a HTML template.
127
128
128 .. note:: Unicode handling
129 .. note:: Unicode handling
129
130
130 The original diffs are a byte sequence and can contain filenames
131 The original diffs are a byte sequence and can contain filenames
131 in mixed encodings. This class generally returns `unicode` objects
132 in mixed encodings. This class generally returns `unicode` objects
132 since the result is intended for presentation to the user.
133 since the result is intended for presentation to the user.
133
134
134 """
135 """
135 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
136 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
136 _newline_marker = re.compile(r'^\\ No newline at end of file')
137 _newline_marker = re.compile(r'^\\ No newline at end of file')
137
138
138 # used for inline highlighter word split
139 # used for inline highlighter word split
139 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
140 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
140
141
141 # collapse ranges of commits over given number
142 # collapse ranges of commits over given number
142 _collapse_commits_over = 5
143 _collapse_commits_over = 5
143
144
144 def __init__(self, diff, format='gitdiff', diff_limit=None,
145 def __init__(self, diff, format='gitdiff', diff_limit=None,
145 file_limit=None, show_full_diff=True):
146 file_limit=None, show_full_diff=True):
146 """
147 """
147 :param diff: A `Diff` object representing a diff from a vcs backend
148 :param diff: A `Diff` object representing a diff from a vcs backend
148 :param format: format of diff passed, `udiff` or `gitdiff`
149 :param format: format of diff passed, `udiff` or `gitdiff`
149 :param diff_limit: define the size of diff that is considered "big"
150 :param diff_limit: define the size of diff that is considered "big"
150 based on that parameter cut off will be triggered, set to None
151 based on that parameter cut off will be triggered, set to None
151 to show full diff
152 to show full diff
152 """
153 """
153 self._diff = diff
154 self._diff = diff
154 self._format = format
155 self._format = format
155 self.adds = 0
156 self.adds = 0
156 self.removes = 0
157 self.removes = 0
157 # calculate diff size
158 # calculate diff size
158 self.diff_limit = diff_limit
159 self.diff_limit = diff_limit
159 self.file_limit = file_limit
160 self.file_limit = file_limit
160 self.show_full_diff = show_full_diff
161 self.show_full_diff = show_full_diff
161 self.cur_diff_size = 0
162 self.cur_diff_size = 0
162 self.parsed = False
163 self.parsed = False
163 self.parsed_diff = []
164 self.parsed_diff = []
164
165
165 log.debug('Initialized DiffProcessor with %s mode', format)
166 log.debug('Initialized DiffProcessor with %s mode', format)
166 if format == 'gitdiff':
167 if format == 'gitdiff':
167 self.differ = self._highlight_line_difflib
168 self.differ = self._highlight_line_difflib
168 self._parser = self._parse_gitdiff
169 self._parser = self._parse_gitdiff
169 else:
170 else:
170 self.differ = self._highlight_line_udiff
171 self.differ = self._highlight_line_udiff
171 self._parser = self._new_parse_gitdiff
172 self._parser = self._new_parse_gitdiff
172
173
173 def _copy_iterator(self):
174 def _copy_iterator(self):
174 """
175 """
175 make a fresh copy of generator, we should not iterate thru
176 make a fresh copy of generator, we should not iterate thru
176 an original as it's needed for repeating operations on
177 an original as it's needed for repeating operations on
177 this instance of DiffProcessor
178 this instance of DiffProcessor
178 """
179 """
179 self.__udiff, iterator_copy = tee(self.__udiff)
180 self.__udiff, iterator_copy = tee(self.__udiff)
180 return iterator_copy
181 return iterator_copy
181
182
182 def _escaper(self, string):
183 def _escaper(self, string):
183 """
184 """
184 Escaper for diff escapes special chars and checks the diff limit
185 Escaper for diff escapes special chars and checks the diff limit
185
186
186 :param string:
187 :param string:
187 """
188 """
188 self.cur_diff_size += len(string)
189 self.cur_diff_size += len(string)
189
190
190 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
191 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
191 raise DiffLimitExceeded('Diff Limit Exceeded')
192 raise DiffLimitExceeded('Diff Limit Exceeded')
192
193
193 return string \
194 return string \
194 .replace('&', '&amp;')\
195 .replace('&', '&amp;')\
195 .replace('<', '&lt;')\
196 .replace('<', '&lt;')\
196 .replace('>', '&gt;')
197 .replace('>', '&gt;')
197
198
198 def _line_counter(self, l):
199 def _line_counter(self, l):
199 """
200 """
200 Checks each line and bumps total adds/removes for this diff
201 Checks each line and bumps total adds/removes for this diff
201
202
202 :param l:
203 :param l:
203 """
204 """
204 if l.startswith('+') and not l.startswith('+++'):
205 if l.startswith('+') and not l.startswith('+++'):
205 self.adds += 1
206 self.adds += 1
206 elif l.startswith('-') and not l.startswith('---'):
207 elif l.startswith('-') and not l.startswith('---'):
207 self.removes += 1
208 self.removes += 1
208 return safe_unicode(l)
209 return safe_unicode(l)
209
210
210 def _highlight_line_difflib(self, line, next_):
211 def _highlight_line_difflib(self, line, next_):
211 """
212 """
212 Highlight inline changes in both lines.
213 Highlight inline changes in both lines.
213 """
214 """
214
215
215 if line['action'] == Action.DELETE:
216 if line['action'] == Action.DELETE:
216 old, new = line, next_
217 old, new = line, next_
217 else:
218 else:
218 old, new = next_, line
219 old, new = next_, line
219
220
220 oldwords = self._token_re.split(old['line'])
221 oldwords = self._token_re.split(old['line'])
221 newwords = self._token_re.split(new['line'])
222 newwords = self._token_re.split(new['line'])
222 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
223 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
223
224
224 oldfragments, newfragments = [], []
225 oldfragments, newfragments = [], []
225 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
226 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
226 oldfrag = ''.join(oldwords[i1:i2])
227 oldfrag = ''.join(oldwords[i1:i2])
227 newfrag = ''.join(newwords[j1:j2])
228 newfrag = ''.join(newwords[j1:j2])
228 if tag != 'equal':
229 if tag != 'equal':
229 if oldfrag:
230 if oldfrag:
230 oldfrag = '<del>%s</del>' % oldfrag
231 oldfrag = '<del>%s</del>' % oldfrag
231 if newfrag:
232 if newfrag:
232 newfrag = '<ins>%s</ins>' % newfrag
233 newfrag = '<ins>%s</ins>' % newfrag
233 oldfragments.append(oldfrag)
234 oldfragments.append(oldfrag)
234 newfragments.append(newfrag)
235 newfragments.append(newfrag)
235
236
236 old['line'] = "".join(oldfragments)
237 old['line'] = "".join(oldfragments)
237 new['line'] = "".join(newfragments)
238 new['line'] = "".join(newfragments)
238
239
239 def _highlight_line_udiff(self, line, next_):
240 def _highlight_line_udiff(self, line, next_):
240 """
241 """
241 Highlight inline changes in both lines.
242 Highlight inline changes in both lines.
242 """
243 """
243 start = 0
244 start = 0
244 limit = min(len(line['line']), len(next_['line']))
245 limit = min(len(line['line']), len(next_['line']))
245 while start < limit and line['line'][start] == next_['line'][start]:
246 while start < limit and line['line'][start] == next_['line'][start]:
246 start += 1
247 start += 1
247 end = -1
248 end = -1
248 limit -= start
249 limit -= start
249 while -end <= limit and line['line'][end] == next_['line'][end]:
250 while -end <= limit and line['line'][end] == next_['line'][end]:
250 end -= 1
251 end -= 1
251 end += 1
252 end += 1
252 if start or end:
253 if start or end:
253 def do(l):
254 def do(l):
254 last = end + len(l['line'])
255 last = end + len(l['line'])
255 if l['action'] == Action.ADD:
256 if l['action'] == Action.ADD:
256 tag = 'ins'
257 tag = 'ins'
257 else:
258 else:
258 tag = 'del'
259 tag = 'del'
259 l['line'] = '%s<%s>%s</%s>%s' % (
260 l['line'] = '%s<%s>%s</%s>%s' % (
260 l['line'][:start],
261 l['line'][:start],
261 tag,
262 tag,
262 l['line'][start:last],
263 l['line'][start:last],
263 tag,
264 tag,
264 l['line'][last:]
265 l['line'][last:]
265 )
266 )
266 do(line)
267 do(line)
267 do(next_)
268 do(next_)
268
269
269 def _clean_line(self, line, command):
270 def _clean_line(self, line, command):
270 if command in ['+', '-', ' ']:
271 if command in ['+', '-', ' ']:
271 # only modify the line if it's actually a diff thing
272 # only modify the line if it's actually a diff thing
272 line = line[1:]
273 line = line[1:]
273 return line
274 return line
274
275
275 def _parse_gitdiff(self, inline_diff=True):
276 def _parse_gitdiff(self, inline_diff=True):
276 _files = []
277 _files = []
277 diff_container = lambda arg: arg
278 diff_container = lambda arg: arg
278
279
279 for chunk in self._diff.chunks():
280 for chunk in self._diff.chunks():
280 head = chunk.header
281 head = chunk.header
281
282
282 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
283 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
283 raw_diff = chunk.raw
284 raw_diff = chunk.raw
284 limited_diff = False
285 limited_diff = False
285 exceeds_limit = False
286 exceeds_limit = False
286
287
287 op = None
288 op = None
288 stats = {
289 stats = {
289 'added': 0,
290 'added': 0,
290 'deleted': 0,
291 'deleted': 0,
291 'binary': False,
292 'binary': False,
292 'ops': {},
293 'ops': {},
293 }
294 }
294
295
295 if head['deleted_file_mode']:
296 if head['deleted_file_mode']:
296 op = OPS.DEL
297 op = OPS.DEL
297 stats['binary'] = True
298 stats['binary'] = True
298 stats['ops'][DEL_FILENODE] = 'deleted file'
299 stats['ops'][DEL_FILENODE] = 'deleted file'
299
300
300 elif head['new_file_mode']:
301 elif head['new_file_mode']:
301 op = OPS.ADD
302 op = OPS.ADD
302 stats['binary'] = True
303 stats['binary'] = True
303 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
304 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
304 else: # modify operation, can be copy, rename or chmod
305 else: # modify operation, can be copy, rename or chmod
305
306
306 # CHMOD
307 # CHMOD
307 if head['new_mode'] and head['old_mode']:
308 if head['new_mode'] and head['old_mode']:
308 op = OPS.MOD
309 op = OPS.MOD
309 stats['binary'] = True
310 stats['binary'] = True
310 stats['ops'][CHMOD_FILENODE] = (
311 stats['ops'][CHMOD_FILENODE] = (
311 'modified file chmod %s => %s' % (
312 'modified file chmod %s => %s' % (
312 head['old_mode'], head['new_mode']))
313 head['old_mode'], head['new_mode']))
313 # RENAME
314 # RENAME
314 if head['rename_from'] != head['rename_to']:
315 if head['rename_from'] != head['rename_to']:
315 op = OPS.MOD
316 op = OPS.MOD
316 stats['binary'] = True
317 stats['binary'] = True
317 stats['ops'][RENAMED_FILENODE] = (
318 stats['ops'][RENAMED_FILENODE] = (
318 'file renamed from %s to %s' % (
319 'file renamed from %s to %s' % (
319 head['rename_from'], head['rename_to']))
320 head['rename_from'], head['rename_to']))
320 # COPY
321 # COPY
321 if head.get('copy_from') and head.get('copy_to'):
322 if head.get('copy_from') and head.get('copy_to'):
322 op = OPS.MOD
323 op = OPS.MOD
323 stats['binary'] = True
324 stats['binary'] = True
324 stats['ops'][COPIED_FILENODE] = (
325 stats['ops'][COPIED_FILENODE] = (
325 'file copied from %s to %s' % (
326 'file copied from %s to %s' % (
326 head['copy_from'], head['copy_to']))
327 head['copy_from'], head['copy_to']))
327
328
328 # If our new parsed headers didn't match anything fallback to
329 # If our new parsed headers didn't match anything fallback to
329 # old style detection
330 # old style detection
330 if op is None:
331 if op is None:
331 if not head['a_file'] and head['b_file']:
332 if not head['a_file'] and head['b_file']:
332 op = OPS.ADD
333 op = OPS.ADD
333 stats['binary'] = True
334 stats['binary'] = True
334 stats['ops'][NEW_FILENODE] = 'new file'
335 stats['ops'][NEW_FILENODE] = 'new file'
335
336
336 elif head['a_file'] and not head['b_file']:
337 elif head['a_file'] and not head['b_file']:
337 op = OPS.DEL
338 op = OPS.DEL
338 stats['binary'] = True
339 stats['binary'] = True
339 stats['ops'][DEL_FILENODE] = 'deleted file'
340 stats['ops'][DEL_FILENODE] = 'deleted file'
340
341
341 # it's not ADD not DELETE
342 # it's not ADD not DELETE
342 if op is None:
343 if op is None:
343 op = OPS.MOD
344 op = OPS.MOD
344 stats['binary'] = True
345 stats['binary'] = True
345 stats['ops'][MOD_FILENODE] = 'modified file'
346 stats['ops'][MOD_FILENODE] = 'modified file'
346
347
347 # a real non-binary diff
348 # a real non-binary diff
348 if head['a_file'] or head['b_file']:
349 if head['a_file'] or head['b_file']:
349 try:
350 try:
350 raw_diff, chunks, _stats = self._parse_lines(diff)
351 raw_diff, chunks, _stats = self._parse_lines(diff)
351 stats['binary'] = False
352 stats['binary'] = False
352 stats['added'] = _stats[0]
353 stats['added'] = _stats[0]
353 stats['deleted'] = _stats[1]
354 stats['deleted'] = _stats[1]
354 # explicit mark that it's a modified file
355 # explicit mark that it's a modified file
355 if op == OPS.MOD:
356 if op == OPS.MOD:
356 stats['ops'][MOD_FILENODE] = 'modified file'
357 stats['ops'][MOD_FILENODE] = 'modified file'
357 exceeds_limit = len(raw_diff) > self.file_limit
358 exceeds_limit = len(raw_diff) > self.file_limit
358
359
359 # changed from _escaper function so we validate size of
360 # changed from _escaper function so we validate size of
360 # each file instead of the whole diff
361 # each file instead of the whole diff
361 # diff will hide big files but still show small ones
362 # diff will hide big files but still show small ones
362 # from my tests, big files are fairly safe to be parsed
363 # from my tests, big files are fairly safe to be parsed
363 # but the browser is the bottleneck
364 # but the browser is the bottleneck
364 if not self.show_full_diff and exceeds_limit:
365 if not self.show_full_diff and exceeds_limit:
365 raise DiffLimitExceeded('File Limit Exceeded')
366 raise DiffLimitExceeded('File Limit Exceeded')
366
367
367 except DiffLimitExceeded:
368 except DiffLimitExceeded:
368 diff_container = lambda _diff: \
369 diff_container = lambda _diff: \
369 LimitedDiffContainer(
370 LimitedDiffContainer(
370 self.diff_limit, self.cur_diff_size, _diff)
371 self.diff_limit, self.cur_diff_size, _diff)
371
372
372 exceeds_limit = len(raw_diff) > self.file_limit
373 exceeds_limit = len(raw_diff) > self.file_limit
373 limited_diff = True
374 limited_diff = True
374 chunks = []
375 chunks = []
375
376
376 else: # GIT format binary patch, or possibly empty diff
377 else: # GIT format binary patch, or possibly empty diff
377 if head['bin_patch']:
378 if head['bin_patch']:
378 # we have operation already extracted, but we mark simply
379 # we have operation already extracted, but we mark simply
379 # it's a diff we wont show for binary files
380 # it's a diff we wont show for binary files
380 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
381 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
381 chunks = []
382 chunks = []
382
383
383 if chunks and not self.show_full_diff and op == OPS.DEL:
384 if chunks and not self.show_full_diff and op == OPS.DEL:
384 # if not full diff mode show deleted file contents
385 # if not full diff mode show deleted file contents
385 # TODO: anderson: if the view is not too big, there is no way
386 # TODO: anderson: if the view is not too big, there is no way
386 # to see the content of the file
387 # to see the content of the file
387 chunks = []
388 chunks = []
388
389
389 chunks.insert(0, [{
390 chunks.insert(0, [{
390 'old_lineno': '',
391 'old_lineno': '',
391 'new_lineno': '',
392 'new_lineno': '',
392 'action': Action.CONTEXT,
393 'action': Action.CONTEXT,
393 'line': msg,
394 'line': msg,
394 } for _op, msg in stats['ops'].iteritems()
395 } for _op, msg in stats['ops'].iteritems()
395 if _op not in [MOD_FILENODE]])
396 if _op not in [MOD_FILENODE]])
396
397
397 _files.append({
398 _files.append({
398 'filename': safe_unicode(head['b_path']),
399 'filename': safe_unicode(head['b_path']),
399 'old_revision': head['a_blob_id'],
400 'old_revision': head['a_blob_id'],
400 'new_revision': head['b_blob_id'],
401 'new_revision': head['b_blob_id'],
401 'chunks': chunks,
402 'chunks': chunks,
402 'raw_diff': safe_unicode(raw_diff),
403 'raw_diff': safe_unicode(raw_diff),
403 'operation': op,
404 'operation': op,
404 'stats': stats,
405 'stats': stats,
405 'exceeds_limit': exceeds_limit,
406 'exceeds_limit': exceeds_limit,
406 'is_limited_diff': limited_diff,
407 'is_limited_diff': limited_diff,
407 })
408 })
408
409
409 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
410 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
410 OPS.DEL: 2}.get(info['operation'])
411 OPS.DEL: 2}.get(info['operation'])
411
412
412 if not inline_diff:
413 if not inline_diff:
413 return diff_container(sorted(_files, key=sorter))
414 return diff_container(sorted(_files, key=sorter))
414
415
415 # highlight inline changes
416 # highlight inline changes
416 for diff_data in _files:
417 for diff_data in _files:
417 for chunk in diff_data['chunks']:
418 for chunk in diff_data['chunks']:
418 lineiter = iter(chunk)
419 lineiter = iter(chunk)
419 try:
420 try:
420 while 1:
421 while 1:
421 line = lineiter.next()
422 line = lineiter.next()
422 if line['action'] not in (
423 if line['action'] not in (
423 Action.UNMODIFIED, Action.CONTEXT):
424 Action.UNMODIFIED, Action.CONTEXT):
424 nextline = lineiter.next()
425 nextline = lineiter.next()
425 if nextline['action'] in ['unmod', 'context'] or \
426 if nextline['action'] in ['unmod', 'context'] or \
426 nextline['action'] == line['action']:
427 nextline['action'] == line['action']:
427 continue
428 continue
428 self.differ(line, nextline)
429 self.differ(line, nextline)
429 except StopIteration:
430 except StopIteration:
430 pass
431 pass
431
432
432 return diff_container(sorted(_files, key=sorter))
433 return diff_container(sorted(_files, key=sorter))
433
434
434 def _check_large_diff(self):
435 def _check_large_diff(self):
435 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
436 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
436 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
437 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
437 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
438 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
438
439
439 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
440 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
440 def _new_parse_gitdiff(self, inline_diff=True):
441 def _new_parse_gitdiff(self, inline_diff=True):
441 _files = []
442 _files = []
442
443
443 # this can be overriden later to a LimitedDiffContainer type
444 # this can be overriden later to a LimitedDiffContainer type
444 diff_container = lambda arg: arg
445 diff_container = lambda arg: arg
445
446
446 for chunk in self._diff.chunks():
447 for chunk in self._diff.chunks():
447 head = chunk.header
448 head = chunk.header
448 log.debug('parsing diff %r' % head)
449 log.debug('parsing diff %r' % head)
449
450
450 raw_diff = chunk.raw
451 raw_diff = chunk.raw
451 limited_diff = False
452 limited_diff = False
452 exceeds_limit = False
453 exceeds_limit = False
453
454
454 op = None
455 op = None
455 stats = {
456 stats = {
456 'added': 0,
457 'added': 0,
457 'deleted': 0,
458 'deleted': 0,
458 'binary': False,
459 'binary': False,
459 'old_mode': None,
460 'old_mode': None,
460 'new_mode': None,
461 'new_mode': None,
461 'ops': {},
462 'ops': {},
462 }
463 }
463 if head['old_mode']:
464 if head['old_mode']:
464 stats['old_mode'] = head['old_mode']
465 stats['old_mode'] = head['old_mode']
465 if head['new_mode']:
466 if head['new_mode']:
466 stats['new_mode'] = head['new_mode']
467 stats['new_mode'] = head['new_mode']
467 if head['b_mode']:
468 if head['b_mode']:
468 stats['new_mode'] = head['b_mode']
469 stats['new_mode'] = head['b_mode']
469
470
470 # delete file
471 # delete file
471 if head['deleted_file_mode']:
472 if head['deleted_file_mode']:
472 op = OPS.DEL
473 op = OPS.DEL
473 stats['binary'] = True
474 stats['binary'] = True
474 stats['ops'][DEL_FILENODE] = 'deleted file'
475 stats['ops'][DEL_FILENODE] = 'deleted file'
475
476
476 # new file
477 # new file
477 elif head['new_file_mode']:
478 elif head['new_file_mode']:
478 op = OPS.ADD
479 op = OPS.ADD
479 stats['binary'] = True
480 stats['binary'] = True
480 stats['old_mode'] = None
481 stats['old_mode'] = None
481 stats['new_mode'] = head['new_file_mode']
482 stats['new_mode'] = head['new_file_mode']
482 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
483 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
483
484
484 # modify operation, can be copy, rename or chmod
485 # modify operation, can be copy, rename or chmod
485 else:
486 else:
486 # CHMOD
487 # CHMOD
487 if head['new_mode'] and head['old_mode']:
488 if head['new_mode'] and head['old_mode']:
488 op = OPS.MOD
489 op = OPS.MOD
489 stats['binary'] = True
490 stats['binary'] = True
490 stats['ops'][CHMOD_FILENODE] = (
491 stats['ops'][CHMOD_FILENODE] = (
491 'modified file chmod %s => %s' % (
492 'modified file chmod %s => %s' % (
492 head['old_mode'], head['new_mode']))
493 head['old_mode'], head['new_mode']))
493
494
494 # RENAME
495 # RENAME
495 if head['rename_from'] != head['rename_to']:
496 if head['rename_from'] != head['rename_to']:
496 op = OPS.MOD
497 op = OPS.MOD
497 stats['binary'] = True
498 stats['binary'] = True
498 stats['renamed'] = (head['rename_from'], head['rename_to'])
499 stats['renamed'] = (head['rename_from'], head['rename_to'])
499 stats['ops'][RENAMED_FILENODE] = (
500 stats['ops'][RENAMED_FILENODE] = (
500 'file renamed from %s to %s' % (
501 'file renamed from %s to %s' % (
501 head['rename_from'], head['rename_to']))
502 head['rename_from'], head['rename_to']))
502 # COPY
503 # COPY
503 if head.get('copy_from') and head.get('copy_to'):
504 if head.get('copy_from') and head.get('copy_to'):
504 op = OPS.MOD
505 op = OPS.MOD
505 stats['binary'] = True
506 stats['binary'] = True
506 stats['copied'] = (head['copy_from'], head['copy_to'])
507 stats['copied'] = (head['copy_from'], head['copy_to'])
507 stats['ops'][COPIED_FILENODE] = (
508 stats['ops'][COPIED_FILENODE] = (
508 'file copied from %s to %s' % (
509 'file copied from %s to %s' % (
509 head['copy_from'], head['copy_to']))
510 head['copy_from'], head['copy_to']))
510
511
511 # If our new parsed headers didn't match anything fallback to
512 # If our new parsed headers didn't match anything fallback to
512 # old style detection
513 # old style detection
513 if op is None:
514 if op is None:
514 if not head['a_file'] and head['b_file']:
515 if not head['a_file'] and head['b_file']:
515 op = OPS.ADD
516 op = OPS.ADD
516 stats['binary'] = True
517 stats['binary'] = True
517 stats['new_file'] = True
518 stats['new_file'] = True
518 stats['ops'][NEW_FILENODE] = 'new file'
519 stats['ops'][NEW_FILENODE] = 'new file'
519
520
520 elif head['a_file'] and not head['b_file']:
521 elif head['a_file'] and not head['b_file']:
521 op = OPS.DEL
522 op = OPS.DEL
522 stats['binary'] = True
523 stats['binary'] = True
523 stats['ops'][DEL_FILENODE] = 'deleted file'
524 stats['ops'][DEL_FILENODE] = 'deleted file'
524
525
525 # it's not ADD not DELETE
526 # it's not ADD not DELETE
526 if op is None:
527 if op is None:
527 op = OPS.MOD
528 op = OPS.MOD
528 stats['binary'] = True
529 stats['binary'] = True
529 stats['ops'][MOD_FILENODE] = 'modified file'
530 stats['ops'][MOD_FILENODE] = 'modified file'
530
531
531 # a real non-binary diff
532 # a real non-binary diff
532 if head['a_file'] or head['b_file']:
533 if head['a_file'] or head['b_file']:
533 # simulate splitlines, so we keep the line end part
534 # simulate splitlines, so we keep the line end part
534 diff = self.diff_splitter(chunk.diff)
535 diff = self.diff_splitter(chunk.diff)
535
536
536 # append each file to the diff size
537 # append each file to the diff size
537 raw_chunk_size = len(raw_diff)
538 raw_chunk_size = len(raw_diff)
538
539
539 exceeds_limit = raw_chunk_size > self.file_limit
540 exceeds_limit = raw_chunk_size > self.file_limit
540 self.cur_diff_size += raw_chunk_size
541 self.cur_diff_size += raw_chunk_size
541
542
542 try:
543 try:
543 # Check each file instead of the whole diff.
544 # Check each file instead of the whole diff.
544 # Diff will hide big files but still show small ones.
545 # Diff will hide big files but still show small ones.
545 # From the tests big files are fairly safe to be parsed
546 # From the tests big files are fairly safe to be parsed
546 # but the browser is the bottleneck.
547 # but the browser is the bottleneck.
547 if not self.show_full_diff and exceeds_limit:
548 if not self.show_full_diff and exceeds_limit:
548 log.debug('File `%s` exceeds current file_limit of %s',
549 log.debug('File `%s` exceeds current file_limit of %s',
549 safe_unicode(head['b_path']), self.file_limit)
550 safe_unicode(head['b_path']), self.file_limit)
550 raise DiffLimitExceeded(
551 raise DiffLimitExceeded(
551 'File Limit %s Exceeded', self.file_limit)
552 'File Limit %s Exceeded', self.file_limit)
552
553
553 self._check_large_diff()
554 self._check_large_diff()
554
555
555 raw_diff, chunks, _stats = self._new_parse_lines(diff)
556 raw_diff, chunks, _stats = self._new_parse_lines(diff)
556 stats['binary'] = False
557 stats['binary'] = False
557 stats['added'] = _stats[0]
558 stats['added'] = _stats[0]
558 stats['deleted'] = _stats[1]
559 stats['deleted'] = _stats[1]
559 # explicit mark that it's a modified file
560 # explicit mark that it's a modified file
560 if op == OPS.MOD:
561 if op == OPS.MOD:
561 stats['ops'][MOD_FILENODE] = 'modified file'
562 stats['ops'][MOD_FILENODE] = 'modified file'
562
563
563 except DiffLimitExceeded:
564 except DiffLimitExceeded:
564 diff_container = lambda _diff: \
565 diff_container = lambda _diff: \
565 LimitedDiffContainer(
566 LimitedDiffContainer(
566 self.diff_limit, self.cur_diff_size, _diff)
567 self.diff_limit, self.cur_diff_size, _diff)
567
568
568 limited_diff = True
569 limited_diff = True
569 chunks = []
570 chunks = []
570
571
571 else: # GIT format binary patch, or possibly empty diff
572 else: # GIT format binary patch, or possibly empty diff
572 if head['bin_patch']:
573 if head['bin_patch']:
573 # we have operation already extracted, but we mark simply
574 # we have operation already extracted, but we mark simply
574 # it's a diff we wont show for binary files
575 # it's a diff we wont show for binary files
575 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
576 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
576 chunks = []
577 chunks = []
577
578
578 # Hide content of deleted node by setting empty chunks
579 # Hide content of deleted node by setting empty chunks
579 if chunks and not self.show_full_diff and op == OPS.DEL:
580 if chunks and not self.show_full_diff and op == OPS.DEL:
580 # if not full diff mode show deleted file contents
581 # if not full diff mode show deleted file contents
581 # TODO: anderson: if the view is not too big, there is no way
582 # TODO: anderson: if the view is not too big, there is no way
582 # to see the content of the file
583 # to see the content of the file
583 chunks = []
584 chunks = []
584
585
585 chunks.insert(
586 chunks.insert(
586 0, [{'old_lineno': '',
587 0, [{'old_lineno': '',
587 'new_lineno': '',
588 'new_lineno': '',
588 'action': Action.CONTEXT,
589 'action': Action.CONTEXT,
589 'line': msg,
590 'line': msg,
590 } for _op, msg in stats['ops'].iteritems()
591 } for _op, msg in stats['ops'].iteritems()
591 if _op not in [MOD_FILENODE]])
592 if _op not in [MOD_FILENODE]])
592
593
593 original_filename = safe_unicode(head['a_path'])
594 original_filename = safe_unicode(head['a_path'])
594 _files.append({
595 _files.append({
595 'original_filename': original_filename,
596 'original_filename': original_filename,
596 'filename': safe_unicode(head['b_path']),
597 'filename': safe_unicode(head['b_path']),
597 'old_revision': head['a_blob_id'],
598 'old_revision': head['a_blob_id'],
598 'new_revision': head['b_blob_id'],
599 'new_revision': head['b_blob_id'],
599 'chunks': chunks,
600 'chunks': chunks,
600 'raw_diff': safe_unicode(raw_diff),
601 'raw_diff': safe_unicode(raw_diff),
601 'operation': op,
602 'operation': op,
602 'stats': stats,
603 'stats': stats,
603 'exceeds_limit': exceeds_limit,
604 'exceeds_limit': exceeds_limit,
604 'is_limited_diff': limited_diff,
605 'is_limited_diff': limited_diff,
605 })
606 })
606
607
607 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
608 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
608 OPS.DEL: 2}.get(info['operation'])
609 OPS.DEL: 2}.get(info['operation'])
609
610
610 return diff_container(sorted(_files, key=sorter))
611 return diff_container(sorted(_files, key=sorter))
611
612
612 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
613 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
613 def _parse_lines(self, diff_iter):
614 def _parse_lines(self, diff_iter):
614 """
615 """
615 Parse the diff an return data for the template.
616 Parse the diff an return data for the template.
616 """
617 """
617
618
618 stats = [0, 0]
619 stats = [0, 0]
619 chunks = []
620 chunks = []
620 raw_diff = []
621 raw_diff = []
621
622
622 try:
623 try:
623 line = diff_iter.next()
624 line = diff_iter.next()
624
625
625 while line:
626 while line:
626 raw_diff.append(line)
627 raw_diff.append(line)
627 lines = []
628 lines = []
628 chunks.append(lines)
629 chunks.append(lines)
629
630
630 match = self._chunk_re.match(line)
631 match = self._chunk_re.match(line)
631
632
632 if not match:
633 if not match:
633 break
634 break
634
635
635 gr = match.groups()
636 gr = match.groups()
636 (old_line, old_end,
637 (old_line, old_end,
637 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
638 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
638 old_line -= 1
639 old_line -= 1
639 new_line -= 1
640 new_line -= 1
640
641
641 context = len(gr) == 5
642 context = len(gr) == 5
642 old_end += old_line
643 old_end += old_line
643 new_end += new_line
644 new_end += new_line
644
645
645 if context:
646 if context:
646 # skip context only if it's first line
647 # skip context only if it's first line
647 if int(gr[0]) > 1:
648 if int(gr[0]) > 1:
648 lines.append({
649 lines.append({
649 'old_lineno': '...',
650 'old_lineno': '...',
650 'new_lineno': '...',
651 'new_lineno': '...',
651 'action': Action.CONTEXT,
652 'action': Action.CONTEXT,
652 'line': line,
653 'line': line,
653 })
654 })
654
655
655 line = diff_iter.next()
656 line = diff_iter.next()
656
657
657 while old_line < old_end or new_line < new_end:
658 while old_line < old_end or new_line < new_end:
658 command = ' '
659 command = ' '
659 if line:
660 if line:
660 command = line[0]
661 command = line[0]
661
662
662 affects_old = affects_new = False
663 affects_old = affects_new = False
663
664
664 # ignore those if we don't expect them
665 # ignore those if we don't expect them
665 if command in '#@':
666 if command in '#@':
666 continue
667 continue
667 elif command == '+':
668 elif command == '+':
668 affects_new = True
669 affects_new = True
669 action = Action.ADD
670 action = Action.ADD
670 stats[0] += 1
671 stats[0] += 1
671 elif command == '-':
672 elif command == '-':
672 affects_old = True
673 affects_old = True
673 action = Action.DELETE
674 action = Action.DELETE
674 stats[1] += 1
675 stats[1] += 1
675 else:
676 else:
676 affects_old = affects_new = True
677 affects_old = affects_new = True
677 action = Action.UNMODIFIED
678 action = Action.UNMODIFIED
678
679
679 if not self._newline_marker.match(line):
680 if not self._newline_marker.match(line):
680 old_line += affects_old
681 old_line += affects_old
681 new_line += affects_new
682 new_line += affects_new
682 lines.append({
683 lines.append({
683 'old_lineno': affects_old and old_line or '',
684 'old_lineno': affects_old and old_line or '',
684 'new_lineno': affects_new and new_line or '',
685 'new_lineno': affects_new and new_line or '',
685 'action': action,
686 'action': action,
686 'line': self._clean_line(line, command)
687 'line': self._clean_line(line, command)
687 })
688 })
688 raw_diff.append(line)
689 raw_diff.append(line)
689
690
690 line = diff_iter.next()
691 line = diff_iter.next()
691
692
692 if self._newline_marker.match(line):
693 if self._newline_marker.match(line):
693 # we need to append to lines, since this is not
694 # we need to append to lines, since this is not
694 # counted in the line specs of diff
695 # counted in the line specs of diff
695 lines.append({
696 lines.append({
696 'old_lineno': '...',
697 'old_lineno': '...',
697 'new_lineno': '...',
698 'new_lineno': '...',
698 'action': Action.CONTEXT,
699 'action': Action.CONTEXT,
699 'line': self._clean_line(line, command)
700 'line': self._clean_line(line, command)
700 })
701 })
701
702
702 except StopIteration:
703 except StopIteration:
703 pass
704 pass
704 return ''.join(raw_diff), chunks, stats
705 return ''.join(raw_diff), chunks, stats
705
706
706 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
707 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
707 def _new_parse_lines(self, diff_iter):
708 def _new_parse_lines(self, diff_iter):
708 """
709 """
709 Parse the diff an return data for the template.
710 Parse the diff an return data for the template.
710 """
711 """
711
712
712 stats = [0, 0]
713 stats = [0, 0]
713 chunks = []
714 chunks = []
714 raw_diff = []
715 raw_diff = []
715
716
716 try:
717 try:
717 line = diff_iter.next()
718 line = diff_iter.next()
718
719
719 while line:
720 while line:
720 raw_diff.append(line)
721 raw_diff.append(line)
721 # match header e.g @@ -0,0 +1 @@\n'
722 # match header e.g @@ -0,0 +1 @@\n'
722 match = self._chunk_re.match(line)
723 match = self._chunk_re.match(line)
723
724
724 if not match:
725 if not match:
725 break
726 break
726
727
727 gr = match.groups()
728 gr = match.groups()
728 (old_line, old_end,
729 (old_line, old_end,
729 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730
731
731 lines = []
732 lines = []
732 hunk = {
733 hunk = {
733 'section_header': gr[-1],
734 'section_header': gr[-1],
734 'source_start': old_line,
735 'source_start': old_line,
735 'source_length': old_end,
736 'source_length': old_end,
736 'target_start': new_line,
737 'target_start': new_line,
737 'target_length': new_end,
738 'target_length': new_end,
738 'lines': lines,
739 'lines': lines,
739 }
740 }
740 chunks.append(hunk)
741 chunks.append(hunk)
741
742
742 old_line -= 1
743 old_line -= 1
743 new_line -= 1
744 new_line -= 1
744
745
745 context = len(gr) == 5
746 context = len(gr) == 5
746 old_end += old_line
747 old_end += old_line
747 new_end += new_line
748 new_end += new_line
748
749
749 line = diff_iter.next()
750 line = diff_iter.next()
750
751
751 while old_line < old_end or new_line < new_end:
752 while old_line < old_end or new_line < new_end:
752 command = ' '
753 command = ' '
753 if line:
754 if line:
754 command = line[0]
755 command = line[0]
755
756
756 affects_old = affects_new = False
757 affects_old = affects_new = False
757
758
758 # ignore those if we don't expect them
759 # ignore those if we don't expect them
759 if command in '#@':
760 if command in '#@':
760 continue
761 continue
761 elif command == '+':
762 elif command == '+':
762 affects_new = True
763 affects_new = True
763 action = Action.ADD
764 action = Action.ADD
764 stats[0] += 1
765 stats[0] += 1
765 elif command == '-':
766 elif command == '-':
766 affects_old = True
767 affects_old = True
767 action = Action.DELETE
768 action = Action.DELETE
768 stats[1] += 1
769 stats[1] += 1
769 else:
770 else:
770 affects_old = affects_new = True
771 affects_old = affects_new = True
771 action = Action.UNMODIFIED
772 action = Action.UNMODIFIED
772
773
773 if not self._newline_marker.match(line):
774 if not self._newline_marker.match(line):
774 old_line += affects_old
775 old_line += affects_old
775 new_line += affects_new
776 new_line += affects_new
776 lines.append({
777 lines.append({
777 'old_lineno': affects_old and old_line or '',
778 'old_lineno': affects_old and old_line or '',
778 'new_lineno': affects_new and new_line or '',
779 'new_lineno': affects_new and new_line or '',
779 'action': action,
780 'action': action,
780 'line': self._clean_line(line, command)
781 'line': self._clean_line(line, command)
781 })
782 })
782 raw_diff.append(line)
783 raw_diff.append(line)
783
784
784 line = diff_iter.next()
785 line = diff_iter.next()
785
786
786 if self._newline_marker.match(line):
787 if self._newline_marker.match(line):
787 # we need to append to lines, since this is not
788 # we need to append to lines, since this is not
788 # counted in the line specs of diff
789 # counted in the line specs of diff
789 if affects_old:
790 if affects_old:
790 action = Action.OLD_NO_NL
791 action = Action.OLD_NO_NL
791 elif affects_new:
792 elif affects_new:
792 action = Action.NEW_NO_NL
793 action = Action.NEW_NO_NL
793 else:
794 else:
794 raise Exception('invalid context for no newline')
795 raise Exception('invalid context for no newline')
795
796
796 lines.append({
797 lines.append({
797 'old_lineno': None,
798 'old_lineno': None,
798 'new_lineno': None,
799 'new_lineno': None,
799 'action': action,
800 'action': action,
800 'line': self._clean_line(line, command)
801 'line': self._clean_line(line, command)
801 })
802 })
802
803
803 except StopIteration:
804 except StopIteration:
804 pass
805 pass
805
806
806 return ''.join(raw_diff), chunks, stats
807 return ''.join(raw_diff), chunks, stats
807
808
808 def _safe_id(self, idstring):
809 def _safe_id(self, idstring):
809 """Make a string safe for including in an id attribute.
810 """Make a string safe for including in an id attribute.
810
811
811 The HTML spec says that id attributes 'must begin with
812 The HTML spec says that id attributes 'must begin with
812 a letter ([A-Za-z]) and may be followed by any number
813 a letter ([A-Za-z]) and may be followed by any number
813 of letters, digits ([0-9]), hyphens ("-"), underscores
814 of letters, digits ([0-9]), hyphens ("-"), underscores
814 ("_"), colons (":"), and periods (".")'. These regexps
815 ("_"), colons (":"), and periods (".")'. These regexps
815 are slightly over-zealous, in that they remove colons
816 are slightly over-zealous, in that they remove colons
816 and periods unnecessarily.
817 and periods unnecessarily.
817
818
818 Whitespace is transformed into underscores, and then
819 Whitespace is transformed into underscores, and then
819 anything which is not a hyphen or a character that
820 anything which is not a hyphen or a character that
820 matches \w (alphanumerics and underscore) is removed.
821 matches \w (alphanumerics and underscore) is removed.
821
822
822 """
823 """
823 # Transform all whitespace to underscore
824 # Transform all whitespace to underscore
824 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 # Remove everything that is not a hyphen or a member of \w
826 # Remove everything that is not a hyphen or a member of \w
826 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 return idstring
828 return idstring
828
829
829 @classmethod
830 @classmethod
830 def diff_splitter(cls, string):
831 def diff_splitter(cls, string):
831 """
832 """
832 Diff split that emulates .splitlines() but works only on \n
833 Diff split that emulates .splitlines() but works only on \n
833 """
834 """
834 if not string:
835 if not string:
835 return
836 return
836 elif string == '\n':
837 elif string == '\n':
837 yield u'\n'
838 yield u'\n'
838 else:
839 else:
839
840
840 has_newline = string.endswith('\n')
841 has_newline = string.endswith('\n')
841 elements = string.split('\n')
842 elements = string.split('\n')
842 if has_newline:
843 if has_newline:
843 # skip last element as it's empty string from newlines
844 # skip last element as it's empty string from newlines
844 elements = elements[:-1]
845 elements = elements[:-1]
845
846
846 len_elements = len(elements)
847 len_elements = len(elements)
847
848
848 for cnt, line in enumerate(elements, start=1):
849 for cnt, line in enumerate(elements, start=1):
849 last_line = cnt == len_elements
850 last_line = cnt == len_elements
850 if last_line and not has_newline:
851 if last_line and not has_newline:
851 yield safe_unicode(line)
852 yield safe_unicode(line)
852 else:
853 else:
853 yield safe_unicode(line) + '\n'
854 yield safe_unicode(line) + '\n'
854
855
855 def prepare(self, inline_diff=True):
856 def prepare(self, inline_diff=True):
856 """
857 """
857 Prepare the passed udiff for HTML rendering.
858 Prepare the passed udiff for HTML rendering.
858
859
859 :return: A list of dicts with diff information.
860 :return: A list of dicts with diff information.
860 """
861 """
861 parsed = self._parser(inline_diff=inline_diff)
862 parsed = self._parser(inline_diff=inline_diff)
862 self.parsed = True
863 self.parsed = True
863 self.parsed_diff = parsed
864 self.parsed_diff = parsed
864 return parsed
865 return parsed
865
866
866 def as_raw(self, diff_lines=None):
867 def as_raw(self, diff_lines=None):
867 """
868 """
868 Returns raw diff as a byte string
869 Returns raw diff as a byte string
869 """
870 """
870 return self._diff.raw
871 return self._diff.raw
871
872
872 def as_html(self, table_class='code-difftable', line_class='line',
873 def as_html(self, table_class='code-difftable', line_class='line',
873 old_lineno_class='lineno old', new_lineno_class='lineno new',
874 old_lineno_class='lineno old', new_lineno_class='lineno new',
874 code_class='code', enable_comments=False, parsed_lines=None):
875 code_class='code', enable_comments=False, parsed_lines=None):
875 """
876 """
876 Return given diff as html table with customized css classes
877 Return given diff as html table with customized css classes
877 """
878 """
878 # TODO(marcink): not sure how to pass in translator
879 # TODO(marcink): not sure how to pass in translator
879 # here in an efficient way, leave the _ for proper gettext extraction
880 # here in an efficient way, leave the _ for proper gettext extraction
880 _ = lambda s: s
881 _ = lambda s: s
881
882
882 def _link_to_if(condition, label, url):
883 def _link_to_if(condition, label, url):
883 """
884 """
884 Generates a link if condition is meet or just the label if not.
885 Generates a link if condition is meet or just the label if not.
885 """
886 """
886
887
887 if condition:
888 if condition:
888 return '''<a href="%(url)s" class="tooltip"
889 return '''<a href="%(url)s" class="tooltip"
889 title="%(title)s">%(label)s</a>''' % {
890 title="%(title)s">%(label)s</a>''' % {
890 'title': _('Click to select line'),
891 'title': _('Click to select line'),
891 'url': url,
892 'url': url,
892 'label': label
893 'label': label
893 }
894 }
894 else:
895 else:
895 return label
896 return label
896 if not self.parsed:
897 if not self.parsed:
897 self.prepare()
898 self.prepare()
898
899
899 diff_lines = self.parsed_diff
900 diff_lines = self.parsed_diff
900 if parsed_lines:
901 if parsed_lines:
901 diff_lines = parsed_lines
902 diff_lines = parsed_lines
902
903
903 _html_empty = True
904 _html_empty = True
904 _html = []
905 _html = []
905 _html.append('''<table class="%(table_class)s">\n''' % {
906 _html.append('''<table class="%(table_class)s">\n''' % {
906 'table_class': table_class
907 'table_class': table_class
907 })
908 })
908
909
909 for diff in diff_lines:
910 for diff in diff_lines:
910 for line in diff['chunks']:
911 for line in diff['chunks']:
911 _html_empty = False
912 _html_empty = False
912 for change in line:
913 for change in line:
913 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
914 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
914 'lc': line_class,
915 'lc': line_class,
915 'action': change['action']
916 'action': change['action']
916 })
917 })
917 anchor_old_id = ''
918 anchor_old_id = ''
918 anchor_new_id = ''
919 anchor_new_id = ''
919 anchor_old = "%(filename)s_o%(oldline_no)s" % {
920 anchor_old = "%(filename)s_o%(oldline_no)s" % {
920 'filename': self._safe_id(diff['filename']),
921 'filename': self._safe_id(diff['filename']),
921 'oldline_no': change['old_lineno']
922 'oldline_no': change['old_lineno']
922 }
923 }
923 anchor_new = "%(filename)s_n%(oldline_no)s" % {
924 anchor_new = "%(filename)s_n%(oldline_no)s" % {
924 'filename': self._safe_id(diff['filename']),
925 'filename': self._safe_id(diff['filename']),
925 'oldline_no': change['new_lineno']
926 'oldline_no': change['new_lineno']
926 }
927 }
927 cond_old = (change['old_lineno'] != '...' and
928 cond_old = (change['old_lineno'] != '...' and
928 change['old_lineno'])
929 change['old_lineno'])
929 cond_new = (change['new_lineno'] != '...' and
930 cond_new = (change['new_lineno'] != '...' and
930 change['new_lineno'])
931 change['new_lineno'])
931 if cond_old:
932 if cond_old:
932 anchor_old_id = 'id="%s"' % anchor_old
933 anchor_old_id = 'id="%s"' % anchor_old
933 if cond_new:
934 if cond_new:
934 anchor_new_id = 'id="%s"' % anchor_new
935 anchor_new_id = 'id="%s"' % anchor_new
935
936
936 if change['action'] != Action.CONTEXT:
937 if change['action'] != Action.CONTEXT:
937 anchor_link = True
938 anchor_link = True
938 else:
939 else:
939 anchor_link = False
940 anchor_link = False
940
941
941 ###########################################################
942 ###########################################################
942 # COMMENT ICONS
943 # COMMENT ICONS
943 ###########################################################
944 ###########################################################
944 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
945 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
945
946
946 if enable_comments and change['action'] != Action.CONTEXT:
947 if enable_comments and change['action'] != Action.CONTEXT:
947 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
948 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
948
949
949 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
950 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
950
951
951 ###########################################################
952 ###########################################################
952 # OLD LINE NUMBER
953 # OLD LINE NUMBER
953 ###########################################################
954 ###########################################################
954 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
955 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
955 'a_id': anchor_old_id,
956 'a_id': anchor_old_id,
956 'olc': old_lineno_class
957 'olc': old_lineno_class
957 })
958 })
958
959
959 _html.append('''%(link)s''' % {
960 _html.append('''%(link)s''' % {
960 'link': _link_to_if(anchor_link, change['old_lineno'],
961 'link': _link_to_if(anchor_link, change['old_lineno'],
961 '#%s' % anchor_old)
962 '#%s' % anchor_old)
962 })
963 })
963 _html.append('''</td>\n''')
964 _html.append('''</td>\n''')
964 ###########################################################
965 ###########################################################
965 # NEW LINE NUMBER
966 # NEW LINE NUMBER
966 ###########################################################
967 ###########################################################
967
968
968 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
969 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
969 'a_id': anchor_new_id,
970 'a_id': anchor_new_id,
970 'nlc': new_lineno_class
971 'nlc': new_lineno_class
971 })
972 })
972
973
973 _html.append('''%(link)s''' % {
974 _html.append('''%(link)s''' % {
974 'link': _link_to_if(anchor_link, change['new_lineno'],
975 'link': _link_to_if(anchor_link, change['new_lineno'],
975 '#%s' % anchor_new)
976 '#%s' % anchor_new)
976 })
977 })
977 _html.append('''</td>\n''')
978 _html.append('''</td>\n''')
978 ###########################################################
979 ###########################################################
979 # CODE
980 # CODE
980 ###########################################################
981 ###########################################################
981 code_classes = [code_class]
982 code_classes = [code_class]
982 if (not enable_comments or
983 if (not enable_comments or
983 change['action'] == Action.CONTEXT):
984 change['action'] == Action.CONTEXT):
984 code_classes.append('no-comment')
985 code_classes.append('no-comment')
985 _html.append('\t<td class="%s">' % ' '.join(code_classes))
986 _html.append('\t<td class="%s">' % ' '.join(code_classes))
986 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
987 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
987 'code': change['line']
988 'code': change['line']
988 })
989 })
989
990
990 _html.append('''\t</td>''')
991 _html.append('''\t</td>''')
991 _html.append('''\n</tr>\n''')
992 _html.append('''\n</tr>\n''')
992 _html.append('''</table>''')
993 _html.append('''</table>''')
993 if _html_empty:
994 if _html_empty:
994 return None
995 return None
995 return ''.join(_html)
996 return ''.join(_html)
996
997
997 def stat(self):
998 def stat(self):
998 """
999 """
999 Returns tuple of added, and removed lines for this instance
1000 Returns tuple of added, and removed lines for this instance
1000 """
1001 """
1001 return self.adds, self.removes
1002 return self.adds, self.removes
1002
1003
1003 def get_context_of_line(
1004 def get_context_of_line(
1004 self, path, diff_line=None, context_before=3, context_after=3):
1005 self, path, diff_line=None, context_before=3, context_after=3):
1005 """
1006 """
1006 Returns the context lines for the specified diff line.
1007 Returns the context lines for the specified diff line.
1007
1008
1008 :type diff_line: :class:`DiffLineNumber`
1009 :type diff_line: :class:`DiffLineNumber`
1009 """
1010 """
1010 assert self.parsed, "DiffProcessor is not initialized."
1011 assert self.parsed, "DiffProcessor is not initialized."
1011
1012
1012 if None not in diff_line:
1013 if None not in diff_line:
1013 raise ValueError(
1014 raise ValueError(
1014 "Cannot specify both line numbers: {}".format(diff_line))
1015 "Cannot specify both line numbers: {}".format(diff_line))
1015
1016
1016 file_diff = self._get_file_diff(path)
1017 file_diff = self._get_file_diff(path)
1017 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1018 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1018
1019
1019 first_line_to_include = max(idx - context_before, 0)
1020 first_line_to_include = max(idx - context_before, 0)
1020 first_line_after_context = idx + context_after + 1
1021 first_line_after_context = idx + context_after + 1
1021 context_lines = chunk[first_line_to_include:first_line_after_context]
1022 context_lines = chunk[first_line_to_include:first_line_after_context]
1022
1023
1023 line_contents = [
1024 line_contents = [
1024 _context_line(line) for line in context_lines
1025 _context_line(line) for line in context_lines
1025 if _is_diff_content(line)]
1026 if _is_diff_content(line)]
1026 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1027 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1027 # Once they are fixed, we can drop this line here.
1028 # Once they are fixed, we can drop this line here.
1028 if line_contents:
1029 if line_contents:
1029 line_contents[-1] = (
1030 line_contents[-1] = (
1030 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1031 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1031 return line_contents
1032 return line_contents
1032
1033
1033 def find_context(self, path, context, offset=0):
1034 def find_context(self, path, context, offset=0):
1034 """
1035 """
1035 Finds the given `context` inside of the diff.
1036 Finds the given `context` inside of the diff.
1036
1037
1037 Use the parameter `offset` to specify which offset the target line has
1038 Use the parameter `offset` to specify which offset the target line has
1038 inside of the given `context`. This way the correct diff line will be
1039 inside of the given `context`. This way the correct diff line will be
1039 returned.
1040 returned.
1040
1041
1041 :param offset: Shall be used to specify the offset of the main line
1042 :param offset: Shall be used to specify the offset of the main line
1042 within the given `context`.
1043 within the given `context`.
1043 """
1044 """
1044 if offset < 0 or offset >= len(context):
1045 if offset < 0 or offset >= len(context):
1045 raise ValueError(
1046 raise ValueError(
1046 "Only positive values up to the length of the context "
1047 "Only positive values up to the length of the context "
1047 "minus one are allowed.")
1048 "minus one are allowed.")
1048
1049
1049 matches = []
1050 matches = []
1050 file_diff = self._get_file_diff(path)
1051 file_diff = self._get_file_diff(path)
1051
1052
1052 for chunk in file_diff['chunks']:
1053 for chunk in file_diff['chunks']:
1053 context_iter = iter(context)
1054 context_iter = iter(context)
1054 for line_idx, line in enumerate(chunk):
1055 for line_idx, line in enumerate(chunk):
1055 try:
1056 try:
1056 if _context_line(line) == context_iter.next():
1057 if _context_line(line) == context_iter.next():
1057 continue
1058 continue
1058 except StopIteration:
1059 except StopIteration:
1059 matches.append((line_idx, chunk))
1060 matches.append((line_idx, chunk))
1060 context_iter = iter(context)
1061 context_iter = iter(context)
1061
1062
1062 # Increment position and triger StopIteration
1063 # Increment position and triger StopIteration
1063 # if we had a match at the end
1064 # if we had a match at the end
1064 line_idx += 1
1065 line_idx += 1
1065 try:
1066 try:
1066 context_iter.next()
1067 context_iter.next()
1067 except StopIteration:
1068 except StopIteration:
1068 matches.append((line_idx, chunk))
1069 matches.append((line_idx, chunk))
1069
1070
1070 effective_offset = len(context) - offset
1071 effective_offset = len(context) - offset
1071 found_at_diff_lines = [
1072 found_at_diff_lines = [
1072 _line_to_diff_line_number(chunk[idx - effective_offset])
1073 _line_to_diff_line_number(chunk[idx - effective_offset])
1073 for idx, chunk in matches]
1074 for idx, chunk in matches]
1074
1075
1075 return found_at_diff_lines
1076 return found_at_diff_lines
1076
1077
1077 def _get_file_diff(self, path):
1078 def _get_file_diff(self, path):
1078 for file_diff in self.parsed_diff:
1079 for file_diff in self.parsed_diff:
1079 if file_diff['filename'] == path:
1080 if file_diff['filename'] == path:
1080 break
1081 break
1081 else:
1082 else:
1082 raise FileNotInDiffException("File {} not in diff".format(path))
1083 raise FileNotInDiffException("File {} not in diff".format(path))
1083 return file_diff
1084 return file_diff
1084
1085
1085 def _find_chunk_line_index(self, file_diff, diff_line):
1086 def _find_chunk_line_index(self, file_diff, diff_line):
1086 for chunk in file_diff['chunks']:
1087 for chunk in file_diff['chunks']:
1087 for idx, line in enumerate(chunk):
1088 for idx, line in enumerate(chunk):
1088 if line['old_lineno'] == diff_line.old:
1089 if line['old_lineno'] == diff_line.old:
1089 return chunk, idx
1090 return chunk, idx
1090 if line['new_lineno'] == diff_line.new:
1091 if line['new_lineno'] == diff_line.new:
1091 return chunk, idx
1092 return chunk, idx
1092 raise LineNotInDiffException(
1093 raise LineNotInDiffException(
1093 "The line {} is not part of the diff.".format(diff_line))
1094 "The line {} is not part of the diff.".format(diff_line))
1094
1095
1095
1096
1096 def _is_diff_content(line):
1097 def _is_diff_content(line):
1097 return line['action'] in (
1098 return line['action'] in (
1098 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1099 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1099
1100
1100
1101
1101 def _context_line(line):
1102 def _context_line(line):
1102 return (line['action'], line['line'])
1103 return (line['action'], line['line'])
1103
1104
1104
1105
1105 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1106 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1106
1107
1107
1108
1108 def _line_to_diff_line_number(line):
1109 def _line_to_diff_line_number(line):
1109 new_line_no = line['new_lineno'] or None
1110 new_line_no = line['new_lineno'] or None
1110 old_line_no = line['old_lineno'] or None
1111 old_line_no = line['old_lineno'] or None
1111 return DiffLineNumber(old=old_line_no, new=new_line_no)
1112 return DiffLineNumber(old=old_line_no, new=new_line_no)
1112
1113
1113
1114
1114 class FileNotInDiffException(Exception):
1115 class FileNotInDiffException(Exception):
1115 """
1116 """
1116 Raised when the context for a missing file is requested.
1117 Raised when the context for a missing file is requested.
1117
1118
1118 If you request the context for a line in a file which is not part of the
1119 If you request the context for a line in a file which is not part of the
1119 given diff, then this exception is raised.
1120 given diff, then this exception is raised.
1120 """
1121 """
1121
1122
1122
1123
1123 class LineNotInDiffException(Exception):
1124 class LineNotInDiffException(Exception):
1124 """
1125 """
1125 Raised when the context for a missing line is requested.
1126 Raised when the context for a missing line is requested.
1126
1127
1127 If you request the context for a line in a file and this line is not
1128 If you request the context for a line in a file and this line is not
1128 part of the given diff, then this exception is raised.
1129 part of the given diff, then this exception is raised.
1129 """
1130 """
1130
1131
1131
1132
1132 class DiffLimitExceeded(Exception):
1133 class DiffLimitExceeded(Exception):
1133 pass
1134 pass
1134
1135
1135
1136
1136 def cache_diff(cached_diff_file, diff, commits):
1137 def cache_diff(cached_diff_file, diff, commits):
1137
1138
1138 struct = {
1139 struct = {
1139 'version': 'v1',
1140 'version': 'v1',
1140 'diff': diff,
1141 'diff': diff,
1141 'commits': commits
1142 'commits': commits
1142 }
1143 }
1143
1144
1144 try:
1145 try:
1145 with open(cached_diff_file, 'wb') as f:
1146 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1146 pickle.dump(struct, f)
1147 pickle.dump(struct, f)
1147 log.debug('Saved diff cache under %s', cached_diff_file)
1148 log.debug('Saved diff cache under %s', cached_diff_file)
1148 except Exception:
1149 except Exception:
1149 log.warn('Failed to save cache', exc_info=True)
1150 log.warn('Failed to save cache', exc_info=True)
1150 # cleanup file to not store it "damaged"
1151 # cleanup file to not store it "damaged"
1151 try:
1152 try:
1152 os.remove(cached_diff_file)
1153 os.remove(cached_diff_file)
1153 except Exception:
1154 except Exception:
1154 log.exception('Failed to cleanup path %s', cached_diff_file)
1155 log.exception('Failed to cleanup path %s', cached_diff_file)
1155
1156
1156
1157
1157 def load_cached_diff(cached_diff_file):
1158 def load_cached_diff(cached_diff_file):
1158
1159
1159 default_struct = {
1160 default_struct = {
1160 'version': 'v1',
1161 'version': 'v1',
1161 'diff': None,
1162 'diff': None,
1162 'commits': None
1163 'commits': None
1163 }
1164 }
1164
1165
1165 has_cache = os.path.isfile(cached_diff_file)
1166 has_cache = os.path.isfile(cached_diff_file)
1166 if not has_cache:
1167 if not has_cache:
1167 return default_struct
1168 return default_struct
1168
1169
1169 data = None
1170 data = None
1170 try:
1171 try:
1171 with open(cached_diff_file, 'rb') as f:
1172 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1172 data = pickle.load(f)
1173 data = pickle.load(f)
1173 log.debug('Loaded diff cache from %s', cached_diff_file)
1174 log.debug('Loaded diff cache from %s', cached_diff_file)
1174 except Exception:
1175 except Exception:
1175 log.warn('Failed to read diff cache file', exc_info=True)
1176 log.warn('Failed to read diff cache file', exc_info=True)
1176
1177
1177 if not data:
1178 if not data:
1178 data = default_struct
1179 data = default_struct
1179
1180
1180 if not isinstance(data, dict):
1181 if not isinstance(data, dict):
1181 # old version of data ?
1182 # old version of data ?
1182 data = default_struct
1183 data = default_struct
1183
1184
1184 return data
1185 return data
1185
1186
1186
1187
1187 def generate_diff_cache_key(*args):
1188 def generate_diff_cache_key(*args):
1188 """
1189 """
1189 Helper to generate a cache key using arguments
1190 Helper to generate a cache key using arguments
1190 """
1191 """
1191 def arg_mapper(input_param):
1192 def arg_mapper(input_param):
1192 input_param = safe_str(input_param)
1193 input_param = safe_str(input_param)
1193 # we cannot allow '/' in arguments since it would allow
1194 # we cannot allow '/' in arguments since it would allow
1194 # subdirectory usage
1195 # subdirectory usage
1195 input_param.replace('/', '_')
1196 input_param.replace('/', '_')
1196 return input_param or None # prevent empty string arguments
1197 return input_param or None # prevent empty string arguments
1197
1198
1198 return '_'.join([
1199 return '_'.join([
1199 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1200 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1200
1201
1201
1202
1202 def diff_cache_exist(cache_storage, *args):
1203 def diff_cache_exist(cache_storage, *args):
1203 """
1204 """
1204 Based on all generated arguments check and return a cache path
1205 Based on all generated arguments check and return a cache path
1205 """
1206 """
1206 cache_key = generate_diff_cache_key(*args)
1207 cache_key = generate_diff_cache_key(*args)
1207 cache_file_path = os.path.join(cache_storage, cache_key)
1208 cache_file_path = os.path.join(cache_storage, cache_key)
1208 # prevent path traversal attacks using some param that have e.g '../../'
1209 # prevent path traversal attacks using some param that have e.g '../../'
1209 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1210 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1210 raise ValueError('Final path must be within {}'.format(cache_storage))
1211 raise ValueError('Final path must be within {}'.format(cache_storage))
1211
1212
1212 return cache_file_path
1213 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now