##// END OF EJS Templates
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.
marcink -
r3079:b924aea3 default
parent child Browse files
Show More
@@ -1,1213 +1,1228 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import os
26 import os
27 import re
27 import re
28 import bz2
28 import bz2
29
29
30 import collections
30 import collections
31 import difflib
31 import difflib
32 import logging
32 import logging
33 import cPickle as pickle
33 import cPickle as pickle
34 from itertools import tee, imap
34 from itertools import tee, imap
35
35
36 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.utils2 import safe_unicode, safe_str
38 from rhodecode.lib.utils2 import safe_unicode, safe_str
39
39
40 log = logging.getLogger(__name__)
40 log = logging.getLogger(__name__)
41
41
42 # define max context, a file with more than this numbers of lines is unusable
42 # define max context, a file with more than this numbers of lines is unusable
43 # in browser anyway
43 # in browser anyway
44 MAX_CONTEXT = 1024 * 1014
44 MAX_CONTEXT = 1024 * 1014
45
45
46
46
47 class OPS(object):
47 class OPS(object):
48 ADD = 'A'
48 ADD = 'A'
49 MOD = 'M'
49 MOD = 'M'
50 DEL = 'D'
50 DEL = 'D'
51
51
52
52
53 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
53 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
54 """
54 """
55 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
55 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
56
56
57 :param ignore_whitespace: ignore whitespaces in diff
57 :param ignore_whitespace: ignore whitespaces in diff
58 """
58 """
59 # make sure we pass in default context
59 # make sure we pass in default context
60 context = context or 3
60 context = context or 3
61 # protect against IntOverflow when passing HUGE context
61 # protect against IntOverflow when passing HUGE context
62 if context > MAX_CONTEXT:
62 if context > MAX_CONTEXT:
63 context = MAX_CONTEXT
63 context = MAX_CONTEXT
64
64
65 submodules = filter(lambda o: isinstance(o, SubModuleNode),
65 submodules = filter(lambda o: isinstance(o, SubModuleNode),
66 [filenode_new, filenode_old])
66 [filenode_new, filenode_old])
67 if submodules:
67 if submodules:
68 return ''
68 return ''
69
69
70 for filenode in (filenode_old, filenode_new):
70 for filenode in (filenode_old, filenode_new):
71 if not isinstance(filenode, FileNode):
71 if not isinstance(filenode, FileNode):
72 raise VCSError(
72 raise VCSError(
73 "Given object should be FileNode object, not %s"
73 "Given object should be FileNode object, not %s"
74 % filenode.__class__)
74 % filenode.__class__)
75
75
76 repo = filenode_new.commit.repository
76 repo = filenode_new.commit.repository
77 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
77 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
78 new_commit = filenode_new.commit
78 new_commit = filenode_new.commit
79
79
80 vcs_gitdiff = repo.get_diff(
80 vcs_gitdiff = repo.get_diff(
81 old_commit, new_commit, filenode_new.path,
81 old_commit, new_commit, filenode_new.path,
82 ignore_whitespace, context, path1=filenode_old.path)
82 ignore_whitespace, context, path1=filenode_old.path)
83 return vcs_gitdiff
83 return vcs_gitdiff
84
84
85 NEW_FILENODE = 1
85 NEW_FILENODE = 1
86 DEL_FILENODE = 2
86 DEL_FILENODE = 2
87 MOD_FILENODE = 3
87 MOD_FILENODE = 3
88 RENAMED_FILENODE = 4
88 RENAMED_FILENODE = 4
89 COPIED_FILENODE = 5
89 COPIED_FILENODE = 5
90 CHMOD_FILENODE = 6
90 CHMOD_FILENODE = 6
91 BIN_FILENODE = 7
91 BIN_FILENODE = 7
92
92
93
93
94 class LimitedDiffContainer(object):
94 class LimitedDiffContainer(object):
95
95
96 def __init__(self, diff_limit, cur_diff_size, diff):
96 def __init__(self, diff_limit, cur_diff_size, diff):
97 self.diff = diff
97 self.diff = diff
98 self.diff_limit = diff_limit
98 self.diff_limit = diff_limit
99 self.cur_diff_size = cur_diff_size
99 self.cur_diff_size = cur_diff_size
100
100
101 def __getitem__(self, key):
101 def __getitem__(self, key):
102 return self.diff.__getitem__(key)
102 return self.diff.__getitem__(key)
103
103
104 def __iter__(self):
104 def __iter__(self):
105 for l in self.diff:
105 for l in self.diff:
106 yield l
106 yield l
107
107
108
108
109 class Action(object):
109 class Action(object):
110 """
110 """
111 Contains constants for the action value of the lines in a parsed diff.
111 Contains constants for the action value of the lines in a parsed diff.
112 """
112 """
113
113
114 ADD = 'add'
114 ADD = 'add'
115 DELETE = 'del'
115 DELETE = 'del'
116 UNMODIFIED = 'unmod'
116 UNMODIFIED = 'unmod'
117
117
118 CONTEXT = 'context'
118 CONTEXT = 'context'
119 OLD_NO_NL = 'old-no-nl'
119 OLD_NO_NL = 'old-no-nl'
120 NEW_NO_NL = 'new-no-nl'
120 NEW_NO_NL = 'new-no-nl'
121
121
122
122
123 class DiffProcessor(object):
123 class DiffProcessor(object):
124 """
124 """
125 Give it a unified or git diff and it returns a list of the files that were
125 Give it a unified or git diff and it returns a list of the files that were
126 mentioned in the diff together with a dict of meta information that
126 mentioned in the diff together with a dict of meta information that
127 can be used to render it in a HTML template.
127 can be used to render it in a HTML template.
128
128
129 .. note:: Unicode handling
129 .. note:: Unicode handling
130
130
131 The original diffs are a byte sequence and can contain filenames
131 The original diffs are a byte sequence and can contain filenames
132 in mixed encodings. This class generally returns `unicode` objects
132 in mixed encodings. This class generally returns `unicode` objects
133 since the result is intended for presentation to the user.
133 since the result is intended for presentation to the user.
134
134
135 """
135 """
136 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
136 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
137 _newline_marker = re.compile(r'^\\ No newline at end of file')
137 _newline_marker = re.compile(r'^\\ No newline at end of file')
138
138
139 # used for inline highlighter word split
139 # used for inline highlighter word split
140 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
140 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
141
141
142 # collapse ranges of commits over given number
142 # collapse ranges of commits over given number
143 _collapse_commits_over = 5
143 _collapse_commits_over = 5
144
144
145 def __init__(self, diff, format='gitdiff', diff_limit=None,
145 def __init__(self, diff, format='gitdiff', diff_limit=None,
146 file_limit=None, show_full_diff=True):
146 file_limit=None, show_full_diff=True):
147 """
147 """
148 :param diff: A `Diff` object representing a diff from a vcs backend
148 :param diff: A `Diff` object representing a diff from a vcs backend
149 :param format: format of diff passed, `udiff` or `gitdiff`
149 :param format: format of diff passed, `udiff` or `gitdiff`
150 :param diff_limit: define the size of diff that is considered "big"
150 :param diff_limit: define the size of diff that is considered "big"
151 based on that parameter cut off will be triggered, set to None
151 based on that parameter cut off will be triggered, set to None
152 to show full diff
152 to show full diff
153 """
153 """
154 self._diff = diff
154 self._diff = diff
155 self._format = format
155 self._format = format
156 self.adds = 0
156 self.adds = 0
157 self.removes = 0
157 self.removes = 0
158 # calculate diff size
158 # calculate diff size
159 self.diff_limit = diff_limit
159 self.diff_limit = diff_limit
160 self.file_limit = file_limit
160 self.file_limit = file_limit
161 self.show_full_diff = show_full_diff
161 self.show_full_diff = show_full_diff
162 self.cur_diff_size = 0
162 self.cur_diff_size = 0
163 self.parsed = False
163 self.parsed = False
164 self.parsed_diff = []
164 self.parsed_diff = []
165
165
166 log.debug('Initialized DiffProcessor with %s mode', format)
166 log.debug('Initialized DiffProcessor with %s mode', format)
167 if format == 'gitdiff':
167 if format == 'gitdiff':
168 self.differ = self._highlight_line_difflib
168 self.differ = self._highlight_line_difflib
169 self._parser = self._parse_gitdiff
169 self._parser = self._parse_gitdiff
170 else:
170 else:
171 self.differ = self._highlight_line_udiff
171 self.differ = self._highlight_line_udiff
172 self._parser = self._new_parse_gitdiff
172 self._parser = self._new_parse_gitdiff
173
173
174 def _copy_iterator(self):
174 def _copy_iterator(self):
175 """
175 """
176 make a fresh copy of generator, we should not iterate thru
176 make a fresh copy of generator, we should not iterate thru
177 an original as it's needed for repeating operations on
177 an original as it's needed for repeating operations on
178 this instance of DiffProcessor
178 this instance of DiffProcessor
179 """
179 """
180 self.__udiff, iterator_copy = tee(self.__udiff)
180 self.__udiff, iterator_copy = tee(self.__udiff)
181 return iterator_copy
181 return iterator_copy
182
182
183 def _escaper(self, string):
183 def _escaper(self, string):
184 """
184 """
185 Escaper for diff escapes special chars and checks the diff limit
185 Escaper for diff escapes special chars and checks the diff limit
186
186
187 :param string:
187 :param string:
188 """
188 """
189 self.cur_diff_size += len(string)
189 self.cur_diff_size += len(string)
190
190
191 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
191 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
192 raise DiffLimitExceeded('Diff Limit Exceeded')
192 raise DiffLimitExceeded('Diff Limit Exceeded')
193
193
194 return string \
194 return string \
195 .replace('&', '&amp;')\
195 .replace('&', '&amp;')\
196 .replace('<', '&lt;')\
196 .replace('<', '&lt;')\
197 .replace('>', '&gt;')
197 .replace('>', '&gt;')
198
198
199 def _line_counter(self, l):
199 def _line_counter(self, l):
200 """
200 """
201 Checks each line and bumps total adds/removes for this diff
201 Checks each line and bumps total adds/removes for this diff
202
202
203 :param l:
203 :param l:
204 """
204 """
205 if l.startswith('+') and not l.startswith('+++'):
205 if l.startswith('+') and not l.startswith('+++'):
206 self.adds += 1
206 self.adds += 1
207 elif l.startswith('-') and not l.startswith('---'):
207 elif l.startswith('-') and not l.startswith('---'):
208 self.removes += 1
208 self.removes += 1
209 return safe_unicode(l)
209 return safe_unicode(l)
210
210
211 def _highlight_line_difflib(self, line, next_):
211 def _highlight_line_difflib(self, line, next_):
212 """
212 """
213 Highlight inline changes in both lines.
213 Highlight inline changes in both lines.
214 """
214 """
215
215
216 if line['action'] == Action.DELETE:
216 if line['action'] == Action.DELETE:
217 old, new = line, next_
217 old, new = line, next_
218 else:
218 else:
219 old, new = next_, line
219 old, new = next_, line
220
220
221 oldwords = self._token_re.split(old['line'])
221 oldwords = self._token_re.split(old['line'])
222 newwords = self._token_re.split(new['line'])
222 newwords = self._token_re.split(new['line'])
223 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
223 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
224
224
225 oldfragments, newfragments = [], []
225 oldfragments, newfragments = [], []
226 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
226 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
227 oldfrag = ''.join(oldwords[i1:i2])
227 oldfrag = ''.join(oldwords[i1:i2])
228 newfrag = ''.join(newwords[j1:j2])
228 newfrag = ''.join(newwords[j1:j2])
229 if tag != 'equal':
229 if tag != 'equal':
230 if oldfrag:
230 if oldfrag:
231 oldfrag = '<del>%s</del>' % oldfrag
231 oldfrag = '<del>%s</del>' % oldfrag
232 if newfrag:
232 if newfrag:
233 newfrag = '<ins>%s</ins>' % newfrag
233 newfrag = '<ins>%s</ins>' % newfrag
234 oldfragments.append(oldfrag)
234 oldfragments.append(oldfrag)
235 newfragments.append(newfrag)
235 newfragments.append(newfrag)
236
236
237 old['line'] = "".join(oldfragments)
237 old['line'] = "".join(oldfragments)
238 new['line'] = "".join(newfragments)
238 new['line'] = "".join(newfragments)
239
239
240 def _highlight_line_udiff(self, line, next_):
240 def _highlight_line_udiff(self, line, next_):
241 """
241 """
242 Highlight inline changes in both lines.
242 Highlight inline changes in both lines.
243 """
243 """
244 start = 0
244 start = 0
245 limit = min(len(line['line']), len(next_['line']))
245 limit = min(len(line['line']), len(next_['line']))
246 while start < limit and line['line'][start] == next_['line'][start]:
246 while start < limit and line['line'][start] == next_['line'][start]:
247 start += 1
247 start += 1
248 end = -1
248 end = -1
249 limit -= start
249 limit -= start
250 while -end <= limit and line['line'][end] == next_['line'][end]:
250 while -end <= limit and line['line'][end] == next_['line'][end]:
251 end -= 1
251 end -= 1
252 end += 1
252 end += 1
253 if start or end:
253 if start or end:
254 def do(l):
254 def do(l):
255 last = end + len(l['line'])
255 last = end + len(l['line'])
256 if l['action'] == Action.ADD:
256 if l['action'] == Action.ADD:
257 tag = 'ins'
257 tag = 'ins'
258 else:
258 else:
259 tag = 'del'
259 tag = 'del'
260 l['line'] = '%s<%s>%s</%s>%s' % (
260 l['line'] = '%s<%s>%s</%s>%s' % (
261 l['line'][:start],
261 l['line'][:start],
262 tag,
262 tag,
263 l['line'][start:last],
263 l['line'][start:last],
264 tag,
264 tag,
265 l['line'][last:]
265 l['line'][last:]
266 )
266 )
267 do(line)
267 do(line)
268 do(next_)
268 do(next_)
269
269
270 def _clean_line(self, line, command):
270 def _clean_line(self, line, command):
271 if command in ['+', '-', ' ']:
271 if command in ['+', '-', ' ']:
272 # only modify the line if it's actually a diff thing
272 # only modify the line if it's actually a diff thing
273 line = line[1:]
273 line = line[1:]
274 return line
274 return line
275
275
276 def _parse_gitdiff(self, inline_diff=True):
276 def _parse_gitdiff(self, inline_diff=True):
277 _files = []
277 _files = []
278 diff_container = lambda arg: arg
278 diff_container = lambda arg: arg
279
279
280 for chunk in self._diff.chunks():
280 for chunk in self._diff.chunks():
281 head = chunk.header
281 head = chunk.header
282
282
283 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
283 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
284 raw_diff = chunk.raw
284 raw_diff = chunk.raw
285 limited_diff = False
285 limited_diff = False
286 exceeds_limit = False
286 exceeds_limit = False
287
287
288 op = None
288 op = None
289 stats = {
289 stats = {
290 'added': 0,
290 'added': 0,
291 'deleted': 0,
291 'deleted': 0,
292 'binary': False,
292 'binary': False,
293 'ops': {},
293 'ops': {},
294 }
294 }
295
295
296 if head['deleted_file_mode']:
296 if head['deleted_file_mode']:
297 op = OPS.DEL
297 op = OPS.DEL
298 stats['binary'] = True
298 stats['binary'] = True
299 stats['ops'][DEL_FILENODE] = 'deleted file'
299 stats['ops'][DEL_FILENODE] = 'deleted file'
300
300
301 elif head['new_file_mode']:
301 elif head['new_file_mode']:
302 op = OPS.ADD
302 op = OPS.ADD
303 stats['binary'] = True
303 stats['binary'] = True
304 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
304 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
305 else: # modify operation, can be copy, rename or chmod
305 else: # modify operation, can be copy, rename or chmod
306
306
307 # CHMOD
307 # CHMOD
308 if head['new_mode'] and head['old_mode']:
308 if head['new_mode'] and head['old_mode']:
309 op = OPS.MOD
309 op = OPS.MOD
310 stats['binary'] = True
310 stats['binary'] = True
311 stats['ops'][CHMOD_FILENODE] = (
311 stats['ops'][CHMOD_FILENODE] = (
312 'modified file chmod %s => %s' % (
312 'modified file chmod %s => %s' % (
313 head['old_mode'], head['new_mode']))
313 head['old_mode'], head['new_mode']))
314 # RENAME
314 # RENAME
315 if head['rename_from'] != head['rename_to']:
315 if head['rename_from'] != head['rename_to']:
316 op = OPS.MOD
316 op = OPS.MOD
317 stats['binary'] = True
317 stats['binary'] = True
318 stats['ops'][RENAMED_FILENODE] = (
318 stats['ops'][RENAMED_FILENODE] = (
319 'file renamed from %s to %s' % (
319 'file renamed from %s to %s' % (
320 head['rename_from'], head['rename_to']))
320 head['rename_from'], head['rename_to']))
321 # COPY
321 # COPY
322 if head.get('copy_from') and head.get('copy_to'):
322 if head.get('copy_from') and head.get('copy_to'):
323 op = OPS.MOD
323 op = OPS.MOD
324 stats['binary'] = True
324 stats['binary'] = True
325 stats['ops'][COPIED_FILENODE] = (
325 stats['ops'][COPIED_FILENODE] = (
326 'file copied from %s to %s' % (
326 'file copied from %s to %s' % (
327 head['copy_from'], head['copy_to']))
327 head['copy_from'], head['copy_to']))
328
328
329 # If our new parsed headers didn't match anything fallback to
329 # If our new parsed headers didn't match anything fallback to
330 # old style detection
330 # old style detection
331 if op is None:
331 if op is None:
332 if not head['a_file'] and head['b_file']:
332 if not head['a_file'] and head['b_file']:
333 op = OPS.ADD
333 op = OPS.ADD
334 stats['binary'] = True
334 stats['binary'] = True
335 stats['ops'][NEW_FILENODE] = 'new file'
335 stats['ops'][NEW_FILENODE] = 'new file'
336
336
337 elif head['a_file'] and not head['b_file']:
337 elif head['a_file'] and not head['b_file']:
338 op = OPS.DEL
338 op = OPS.DEL
339 stats['binary'] = True
339 stats['binary'] = True
340 stats['ops'][DEL_FILENODE] = 'deleted file'
340 stats['ops'][DEL_FILENODE] = 'deleted file'
341
341
342 # it's not ADD not DELETE
342 # it's not ADD not DELETE
343 if op is None:
343 if op is None:
344 op = OPS.MOD
344 op = OPS.MOD
345 stats['binary'] = True
345 stats['binary'] = True
346 stats['ops'][MOD_FILENODE] = 'modified file'
346 stats['ops'][MOD_FILENODE] = 'modified file'
347
347
348 # a real non-binary diff
348 # a real non-binary diff
349 if head['a_file'] or head['b_file']:
349 if head['a_file'] or head['b_file']:
350 try:
350 try:
351 raw_diff, chunks, _stats = self._parse_lines(diff)
351 raw_diff, chunks, _stats = self._parse_lines(diff)
352 stats['binary'] = False
352 stats['binary'] = False
353 stats['added'] = _stats[0]
353 stats['added'] = _stats[0]
354 stats['deleted'] = _stats[1]
354 stats['deleted'] = _stats[1]
355 # explicit mark that it's a modified file
355 # explicit mark that it's a modified file
356 if op == OPS.MOD:
356 if op == OPS.MOD:
357 stats['ops'][MOD_FILENODE] = 'modified file'
357 stats['ops'][MOD_FILENODE] = 'modified file'
358 exceeds_limit = len(raw_diff) > self.file_limit
358 exceeds_limit = len(raw_diff) > self.file_limit
359
359
360 # changed from _escaper function so we validate size of
360 # changed from _escaper function so we validate size of
361 # each file instead of the whole diff
361 # each file instead of the whole diff
362 # diff will hide big files but still show small ones
362 # diff will hide big files but still show small ones
363 # from my tests, big files are fairly safe to be parsed
363 # from my tests, big files are fairly safe to be parsed
364 # but the browser is the bottleneck
364 # but the browser is the bottleneck
365 if not self.show_full_diff and exceeds_limit:
365 if not self.show_full_diff and exceeds_limit:
366 raise DiffLimitExceeded('File Limit Exceeded')
366 raise DiffLimitExceeded('File Limit Exceeded')
367
367
368 except DiffLimitExceeded:
368 except DiffLimitExceeded:
369 diff_container = lambda _diff: \
369 diff_container = lambda _diff: \
370 LimitedDiffContainer(
370 LimitedDiffContainer(
371 self.diff_limit, self.cur_diff_size, _diff)
371 self.diff_limit, self.cur_diff_size, _diff)
372
372
373 exceeds_limit = len(raw_diff) > self.file_limit
373 exceeds_limit = len(raw_diff) > self.file_limit
374 limited_diff = True
374 limited_diff = True
375 chunks = []
375 chunks = []
376
376
377 else: # GIT format binary patch, or possibly empty diff
377 else: # GIT format binary patch, or possibly empty diff
378 if head['bin_patch']:
378 if head['bin_patch']:
379 # we have operation already extracted, but we mark simply
379 # we have operation already extracted, but we mark simply
380 # it's a diff we wont show for binary files
380 # it's a diff we wont show for binary files
381 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
381 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
382 chunks = []
382 chunks = []
383
383
384 if chunks and not self.show_full_diff and op == OPS.DEL:
384 if chunks and not self.show_full_diff and op == OPS.DEL:
385 # if not full diff mode show deleted file contents
385 # if not full diff mode show deleted file contents
386 # TODO: anderson: if the view is not too big, there is no way
386 # TODO: anderson: if the view is not too big, there is no way
387 # to see the content of the file
387 # to see the content of the file
388 chunks = []
388 chunks = []
389
389
390 chunks.insert(0, [{
390 chunks.insert(0, [{
391 'old_lineno': '',
391 'old_lineno': '',
392 'new_lineno': '',
392 'new_lineno': '',
393 'action': Action.CONTEXT,
393 'action': Action.CONTEXT,
394 'line': msg,
394 'line': msg,
395 } for _op, msg in stats['ops'].iteritems()
395 } for _op, msg in stats['ops'].iteritems()
396 if _op not in [MOD_FILENODE]])
396 if _op not in [MOD_FILENODE]])
397
397
398 _files.append({
398 _files.append({
399 'filename': safe_unicode(head['b_path']),
399 'filename': safe_unicode(head['b_path']),
400 'old_revision': head['a_blob_id'],
400 'old_revision': head['a_blob_id'],
401 'new_revision': head['b_blob_id'],
401 'new_revision': head['b_blob_id'],
402 'chunks': chunks,
402 'chunks': chunks,
403 'raw_diff': safe_unicode(raw_diff),
403 'raw_diff': safe_unicode(raw_diff),
404 'operation': op,
404 'operation': op,
405 'stats': stats,
405 'stats': stats,
406 'exceeds_limit': exceeds_limit,
406 'exceeds_limit': exceeds_limit,
407 'is_limited_diff': limited_diff,
407 'is_limited_diff': limited_diff,
408 })
408 })
409
409
410 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
410 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
411 OPS.DEL: 2}.get(info['operation'])
411 OPS.DEL: 2}.get(info['operation'])
412
412
413 if not inline_diff:
413 if not inline_diff:
414 return diff_container(sorted(_files, key=sorter))
414 return diff_container(sorted(_files, key=sorter))
415
415
416 # highlight inline changes
416 # highlight inline changes
417 for diff_data in _files:
417 for diff_data in _files:
418 for chunk in diff_data['chunks']:
418 for chunk in diff_data['chunks']:
419 lineiter = iter(chunk)
419 lineiter = iter(chunk)
420 try:
420 try:
421 while 1:
421 while 1:
422 line = lineiter.next()
422 line = lineiter.next()
423 if line['action'] not in (
423 if line['action'] not in (
424 Action.UNMODIFIED, Action.CONTEXT):
424 Action.UNMODIFIED, Action.CONTEXT):
425 nextline = lineiter.next()
425 nextline = lineiter.next()
426 if nextline['action'] in ['unmod', 'context'] or \
426 if nextline['action'] in ['unmod', 'context'] or \
427 nextline['action'] == line['action']:
427 nextline['action'] == line['action']:
428 continue
428 continue
429 self.differ(line, nextline)
429 self.differ(line, nextline)
430 except StopIteration:
430 except StopIteration:
431 pass
431 pass
432
432
433 return diff_container(sorted(_files, key=sorter))
433 return diff_container(sorted(_files, key=sorter))
434
434
435 def _check_large_diff(self):
435 def _check_large_diff(self):
436 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
436 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
437 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
437 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
438 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
438 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
439
439
440 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
440 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
441 def _new_parse_gitdiff(self, inline_diff=True):
441 def _new_parse_gitdiff(self, inline_diff=True):
442 _files = []
442 _files = []
443
443
444 # this can be overriden later to a LimitedDiffContainer type
444 # this can be overriden later to a LimitedDiffContainer type
445 diff_container = lambda arg: arg
445 diff_container = lambda arg: arg
446
446
447 for chunk in self._diff.chunks():
447 for chunk in self._diff.chunks():
448 head = chunk.header
448 head = chunk.header
449 log.debug('parsing diff %r', head)
449 log.debug('parsing diff %r', head)
450
450
451 raw_diff = chunk.raw
451 raw_diff = chunk.raw
452 limited_diff = False
452 limited_diff = False
453 exceeds_limit = False
453 exceeds_limit = False
454
454
455 op = None
455 op = None
456 stats = {
456 stats = {
457 'added': 0,
457 'added': 0,
458 'deleted': 0,
458 'deleted': 0,
459 'binary': False,
459 'binary': False,
460 'old_mode': None,
460 'old_mode': None,
461 'new_mode': None,
461 'new_mode': None,
462 'ops': {},
462 'ops': {},
463 }
463 }
464 if head['old_mode']:
464 if head['old_mode']:
465 stats['old_mode'] = head['old_mode']
465 stats['old_mode'] = head['old_mode']
466 if head['new_mode']:
466 if head['new_mode']:
467 stats['new_mode'] = head['new_mode']
467 stats['new_mode'] = head['new_mode']
468 if head['b_mode']:
468 if head['b_mode']:
469 stats['new_mode'] = head['b_mode']
469 stats['new_mode'] = head['b_mode']
470
470
471 # delete file
471 # delete file
472 if head['deleted_file_mode']:
472 if head['deleted_file_mode']:
473 op = OPS.DEL
473 op = OPS.DEL
474 stats['binary'] = True
474 stats['binary'] = True
475 stats['ops'][DEL_FILENODE] = 'deleted file'
475 stats['ops'][DEL_FILENODE] = 'deleted file'
476
476
477 # new file
477 # new file
478 elif head['new_file_mode']:
478 elif head['new_file_mode']:
479 op = OPS.ADD
479 op = OPS.ADD
480 stats['binary'] = True
480 stats['binary'] = True
481 stats['old_mode'] = None
481 stats['old_mode'] = None
482 stats['new_mode'] = head['new_file_mode']
482 stats['new_mode'] = head['new_file_mode']
483 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
483 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
484
484
485 # modify operation, can be copy, rename or chmod
485 # modify operation, can be copy, rename or chmod
486 else:
486 else:
487 # CHMOD
487 # CHMOD
488 if head['new_mode'] and head['old_mode']:
488 if head['new_mode'] and head['old_mode']:
489 op = OPS.MOD
489 op = OPS.MOD
490 stats['binary'] = True
490 stats['binary'] = True
491 stats['ops'][CHMOD_FILENODE] = (
491 stats['ops'][CHMOD_FILENODE] = (
492 'modified file chmod %s => %s' % (
492 'modified file chmod %s => %s' % (
493 head['old_mode'], head['new_mode']))
493 head['old_mode'], head['new_mode']))
494
494
495 # RENAME
495 # RENAME
496 if head['rename_from'] != head['rename_to']:
496 if head['rename_from'] != head['rename_to']:
497 op = OPS.MOD
497 op = OPS.MOD
498 stats['binary'] = True
498 stats['binary'] = True
499 stats['renamed'] = (head['rename_from'], head['rename_to'])
499 stats['renamed'] = (head['rename_from'], head['rename_to'])
500 stats['ops'][RENAMED_FILENODE] = (
500 stats['ops'][RENAMED_FILENODE] = (
501 'file renamed from %s to %s' % (
501 'file renamed from %s to %s' % (
502 head['rename_from'], head['rename_to']))
502 head['rename_from'], head['rename_to']))
503 # COPY
503 # COPY
504 if head.get('copy_from') and head.get('copy_to'):
504 if head.get('copy_from') and head.get('copy_to'):
505 op = OPS.MOD
505 op = OPS.MOD
506 stats['binary'] = True
506 stats['binary'] = True
507 stats['copied'] = (head['copy_from'], head['copy_to'])
507 stats['copied'] = (head['copy_from'], head['copy_to'])
508 stats['ops'][COPIED_FILENODE] = (
508 stats['ops'][COPIED_FILENODE] = (
509 'file copied from %s to %s' % (
509 'file copied from %s to %s' % (
510 head['copy_from'], head['copy_to']))
510 head['copy_from'], head['copy_to']))
511
511
512 # If our new parsed headers didn't match anything fallback to
512 # If our new parsed headers didn't match anything fallback to
513 # old style detection
513 # old style detection
514 if op is None:
514 if op is None:
515 if not head['a_file'] and head['b_file']:
515 if not head['a_file'] and head['b_file']:
516 op = OPS.ADD
516 op = OPS.ADD
517 stats['binary'] = True
517 stats['binary'] = True
518 stats['new_file'] = True
518 stats['new_file'] = True
519 stats['ops'][NEW_FILENODE] = 'new file'
519 stats['ops'][NEW_FILENODE] = 'new file'
520
520
521 elif head['a_file'] and not head['b_file']:
521 elif head['a_file'] and not head['b_file']:
522 op = OPS.DEL
522 op = OPS.DEL
523 stats['binary'] = True
523 stats['binary'] = True
524 stats['ops'][DEL_FILENODE] = 'deleted file'
524 stats['ops'][DEL_FILENODE] = 'deleted file'
525
525
526 # it's not ADD not DELETE
526 # it's not ADD not DELETE
527 if op is None:
527 if op is None:
528 op = OPS.MOD
528 op = OPS.MOD
529 stats['binary'] = True
529 stats['binary'] = True
530 stats['ops'][MOD_FILENODE] = 'modified file'
530 stats['ops'][MOD_FILENODE] = 'modified file'
531
531
532 # a real non-binary diff
532 # a real non-binary diff
533 if head['a_file'] or head['b_file']:
533 if head['a_file'] or head['b_file']:
534 # simulate splitlines, so we keep the line end part
534 # simulate splitlines, so we keep the line end part
535 diff = self.diff_splitter(chunk.diff)
535 diff = self.diff_splitter(chunk.diff)
536
536
537 # append each file to the diff size
537 # append each file to the diff size
538 raw_chunk_size = len(raw_diff)
538 raw_chunk_size = len(raw_diff)
539
539
540 exceeds_limit = raw_chunk_size > self.file_limit
540 exceeds_limit = raw_chunk_size > self.file_limit
541 self.cur_diff_size += raw_chunk_size
541 self.cur_diff_size += raw_chunk_size
542
542
543 try:
543 try:
544 # Check each file instead of the whole diff.
544 # Check each file instead of the whole diff.
545 # Diff will hide big files but still show small ones.
545 # Diff will hide big files but still show small ones.
546 # From the tests big files are fairly safe to be parsed
546 # From the tests big files are fairly safe to be parsed
547 # but the browser is the bottleneck.
547 # but the browser is the bottleneck.
548 if not self.show_full_diff and exceeds_limit:
548 if not self.show_full_diff and exceeds_limit:
549 log.debug('File `%s` exceeds current file_limit of %s',
549 log.debug('File `%s` exceeds current file_limit of %s',
550 safe_unicode(head['b_path']), self.file_limit)
550 safe_unicode(head['b_path']), self.file_limit)
551 raise DiffLimitExceeded(
551 raise DiffLimitExceeded(
552 'File Limit %s Exceeded', self.file_limit)
552 'File Limit %s Exceeded', self.file_limit)
553
553
554 self._check_large_diff()
554 self._check_large_diff()
555
555
556 raw_diff, chunks, _stats = self._new_parse_lines(diff)
556 raw_diff, chunks, _stats = self._new_parse_lines(diff)
557 stats['binary'] = False
557 stats['binary'] = False
558 stats['added'] = _stats[0]
558 stats['added'] = _stats[0]
559 stats['deleted'] = _stats[1]
559 stats['deleted'] = _stats[1]
560 # explicit mark that it's a modified file
560 # explicit mark that it's a modified file
561 if op == OPS.MOD:
561 if op == OPS.MOD:
562 stats['ops'][MOD_FILENODE] = 'modified file'
562 stats['ops'][MOD_FILENODE] = 'modified file'
563
563
564 except DiffLimitExceeded:
564 except DiffLimitExceeded:
565 diff_container = lambda _diff: \
565 diff_container = lambda _diff: \
566 LimitedDiffContainer(
566 LimitedDiffContainer(
567 self.diff_limit, self.cur_diff_size, _diff)
567 self.diff_limit, self.cur_diff_size, _diff)
568
568
569 limited_diff = True
569 limited_diff = True
570 chunks = []
570 chunks = []
571
571
572 else: # GIT format binary patch, or possibly empty diff
572 else: # GIT format binary patch, or possibly empty diff
573 if head['bin_patch']:
573 if head['bin_patch']:
574 # we have operation already extracted, but we mark simply
574 # we have operation already extracted, but we mark simply
575 # it's a diff we wont show for binary files
575 # it's a diff we wont show for binary files
576 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
576 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
577 chunks = []
577 chunks = []
578
578
579 # Hide content of deleted node by setting empty chunks
579 # Hide content of deleted node by setting empty chunks
580 if chunks and not self.show_full_diff and op == OPS.DEL:
580 if chunks and not self.show_full_diff and op == OPS.DEL:
581 # if not full diff mode show deleted file contents
581 # if not full diff mode show deleted file contents
582 # TODO: anderson: if the view is not too big, there is no way
582 # TODO: anderson: if the view is not too big, there is no way
583 # to see the content of the file
583 # to see the content of the file
584 chunks = []
584 chunks = []
585
585
586 chunks.insert(
586 chunks.insert(
587 0, [{'old_lineno': '',
587 0, [{'old_lineno': '',
588 'new_lineno': '',
588 'new_lineno': '',
589 'action': Action.CONTEXT,
589 'action': Action.CONTEXT,
590 'line': msg,
590 'line': msg,
591 } for _op, msg in stats['ops'].iteritems()
591 } for _op, msg in stats['ops'].iteritems()
592 if _op not in [MOD_FILENODE]])
592 if _op not in [MOD_FILENODE]])
593
593
594 original_filename = safe_unicode(head['a_path'])
594 original_filename = safe_unicode(head['a_path'])
595 _files.append({
595 _files.append({
596 'original_filename': original_filename,
596 'original_filename': original_filename,
597 'filename': safe_unicode(head['b_path']),
597 'filename': safe_unicode(head['b_path']),
598 'old_revision': head['a_blob_id'],
598 'old_revision': head['a_blob_id'],
599 'new_revision': head['b_blob_id'],
599 'new_revision': head['b_blob_id'],
600 'chunks': chunks,
600 'chunks': chunks,
601 'raw_diff': safe_unicode(raw_diff),
601 'raw_diff': safe_unicode(raw_diff),
602 'operation': op,
602 'operation': op,
603 'stats': stats,
603 'stats': stats,
604 'exceeds_limit': exceeds_limit,
604 'exceeds_limit': exceeds_limit,
605 'is_limited_diff': limited_diff,
605 'is_limited_diff': limited_diff,
606 })
606 })
607
607
608 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
608 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
609 OPS.DEL: 2}.get(info['operation'])
609 OPS.DEL: 2}.get(info['operation'])
610
610
611 return diff_container(sorted(_files, key=sorter))
611 return diff_container(sorted(_files, key=sorter))
612
612
613 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
613 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
614 def _parse_lines(self, diff_iter):
614 def _parse_lines(self, diff_iter):
615 """
615 """
616 Parse the diff an return data for the template.
616 Parse the diff an return data for the template.
617 """
617 """
618
618
619 stats = [0, 0]
619 stats = [0, 0]
620 chunks = []
620 chunks = []
621 raw_diff = []
621 raw_diff = []
622
622
623 try:
623 try:
624 line = diff_iter.next()
624 line = diff_iter.next()
625
625
626 while line:
626 while line:
627 raw_diff.append(line)
627 raw_diff.append(line)
628 lines = []
628 lines = []
629 chunks.append(lines)
629 chunks.append(lines)
630
630
631 match = self._chunk_re.match(line)
631 match = self._chunk_re.match(line)
632
632
633 if not match:
633 if not match:
634 break
634 break
635
635
636 gr = match.groups()
636 gr = match.groups()
637 (old_line, old_end,
637 (old_line, old_end,
638 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
638 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
639 old_line -= 1
639 old_line -= 1
640 new_line -= 1
640 new_line -= 1
641
641
642 context = len(gr) == 5
642 context = len(gr) == 5
643 old_end += old_line
643 old_end += old_line
644 new_end += new_line
644 new_end += new_line
645
645
646 if context:
646 if context:
647 # skip context only if it's first line
647 # skip context only if it's first line
648 if int(gr[0]) > 1:
648 if int(gr[0]) > 1:
649 lines.append({
649 lines.append({
650 'old_lineno': '...',
650 'old_lineno': '...',
651 'new_lineno': '...',
651 'new_lineno': '...',
652 'action': Action.CONTEXT,
652 'action': Action.CONTEXT,
653 'line': line,
653 'line': line,
654 })
654 })
655
655
656 line = diff_iter.next()
656 line = diff_iter.next()
657
657
658 while old_line < old_end or new_line < new_end:
658 while old_line < old_end or new_line < new_end:
659 command = ' '
659 command = ' '
660 if line:
660 if line:
661 command = line[0]
661 command = line[0]
662
662
663 affects_old = affects_new = False
663 affects_old = affects_new = False
664
664
665 # ignore those if we don't expect them
665 # ignore those if we don't expect them
666 if command in '#@':
666 if command in '#@':
667 continue
667 continue
668 elif command == '+':
668 elif command == '+':
669 affects_new = True
669 affects_new = True
670 action = Action.ADD
670 action = Action.ADD
671 stats[0] += 1
671 stats[0] += 1
672 elif command == '-':
672 elif command == '-':
673 affects_old = True
673 affects_old = True
674 action = Action.DELETE
674 action = Action.DELETE
675 stats[1] += 1
675 stats[1] += 1
676 else:
676 else:
677 affects_old = affects_new = True
677 affects_old = affects_new = True
678 action = Action.UNMODIFIED
678 action = Action.UNMODIFIED
679
679
680 if not self._newline_marker.match(line):
680 if not self._newline_marker.match(line):
681 old_line += affects_old
681 old_line += affects_old
682 new_line += affects_new
682 new_line += affects_new
683 lines.append({
683 lines.append({
684 'old_lineno': affects_old and old_line or '',
684 'old_lineno': affects_old and old_line or '',
685 'new_lineno': affects_new and new_line or '',
685 'new_lineno': affects_new and new_line or '',
686 'action': action,
686 'action': action,
687 'line': self._clean_line(line, command)
687 'line': self._clean_line(line, command)
688 })
688 })
689 raw_diff.append(line)
689 raw_diff.append(line)
690
690
691 line = diff_iter.next()
691 line = diff_iter.next()
692
692
693 if self._newline_marker.match(line):
693 if self._newline_marker.match(line):
694 # we need to append to lines, since this is not
694 # we need to append to lines, since this is not
695 # counted in the line specs of diff
695 # counted in the line specs of diff
696 lines.append({
696 lines.append({
697 'old_lineno': '...',
697 'old_lineno': '...',
698 'new_lineno': '...',
698 'new_lineno': '...',
699 'action': Action.CONTEXT,
699 'action': Action.CONTEXT,
700 'line': self._clean_line(line, command)
700 'line': self._clean_line(line, command)
701 })
701 })
702
702
703 except StopIteration:
703 except StopIteration:
704 pass
704 pass
705 return ''.join(raw_diff), chunks, stats
705 return ''.join(raw_diff), chunks, stats
706
706
707 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
707 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
708 def _new_parse_lines(self, diff_iter):
708 def _new_parse_lines(self, diff_iter):
709 """
709 """
710 Parse the diff an return data for the template.
710 Parse the diff an return data for the template.
711 """
711 """
712
712
713 stats = [0, 0]
713 stats = [0, 0]
714 chunks = []
714 chunks = []
715 raw_diff = []
715 raw_diff = []
716
716
717 try:
717 try:
718 line = diff_iter.next()
718 line = diff_iter.next()
719
719
720 while line:
720 while line:
721 raw_diff.append(line)
721 raw_diff.append(line)
722 # match header e.g @@ -0,0 +1 @@\n'
722 # match header e.g @@ -0,0 +1 @@\n'
723 match = self._chunk_re.match(line)
723 match = self._chunk_re.match(line)
724
724
725 if not match:
725 if not match:
726 break
726 break
727
727
728 gr = match.groups()
728 gr = match.groups()
729 (old_line, old_end,
729 (old_line, old_end,
730 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
731
731
732 lines = []
732 lines = []
733 hunk = {
733 hunk = {
734 'section_header': gr[-1],
734 'section_header': gr[-1],
735 'source_start': old_line,
735 'source_start': old_line,
736 'source_length': old_end,
736 'source_length': old_end,
737 'target_start': new_line,
737 'target_start': new_line,
738 'target_length': new_end,
738 'target_length': new_end,
739 'lines': lines,
739 'lines': lines,
740 }
740 }
741 chunks.append(hunk)
741 chunks.append(hunk)
742
742
743 old_line -= 1
743 old_line -= 1
744 new_line -= 1
744 new_line -= 1
745
745
746 context = len(gr) == 5
746 context = len(gr) == 5
747 old_end += old_line
747 old_end += old_line
748 new_end += new_line
748 new_end += new_line
749
749
750 line = diff_iter.next()
750 line = diff_iter.next()
751
751
752 while old_line < old_end or new_line < new_end:
752 while old_line < old_end or new_line < new_end:
753 command = ' '
753 command = ' '
754 if line:
754 if line:
755 command = line[0]
755 command = line[0]
756
756
757 affects_old = affects_new = False
757 affects_old = affects_new = False
758
758
759 # ignore those if we don't expect them
759 # ignore those if we don't expect them
760 if command in '#@':
760 if command in '#@':
761 continue
761 continue
762 elif command == '+':
762 elif command == '+':
763 affects_new = True
763 affects_new = True
764 action = Action.ADD
764 action = Action.ADD
765 stats[0] += 1
765 stats[0] += 1
766 elif command == '-':
766 elif command == '-':
767 affects_old = True
767 affects_old = True
768 action = Action.DELETE
768 action = Action.DELETE
769 stats[1] += 1
769 stats[1] += 1
770 else:
770 else:
771 affects_old = affects_new = True
771 affects_old = affects_new = True
772 action = Action.UNMODIFIED
772 action = Action.UNMODIFIED
773
773
774 if not self._newline_marker.match(line):
774 if not self._newline_marker.match(line):
775 old_line += affects_old
775 old_line += affects_old
776 new_line += affects_new
776 new_line += affects_new
777 lines.append({
777 lines.append({
778 'old_lineno': affects_old and old_line or '',
778 'old_lineno': affects_old and old_line or '',
779 'new_lineno': affects_new and new_line or '',
779 'new_lineno': affects_new and new_line or '',
780 'action': action,
780 'action': action,
781 'line': self._clean_line(line, command)
781 'line': self._clean_line(line, command)
782 })
782 })
783 raw_diff.append(line)
783 raw_diff.append(line)
784
784
785 line = diff_iter.next()
785 line = diff_iter.next()
786
786
787 if self._newline_marker.match(line):
787 if self._newline_marker.match(line):
788 # we need to append to lines, since this is not
788 # we need to append to lines, since this is not
789 # counted in the line specs of diff
789 # counted in the line specs of diff
790 if affects_old:
790 if affects_old:
791 action = Action.OLD_NO_NL
791 action = Action.OLD_NO_NL
792 elif affects_new:
792 elif affects_new:
793 action = Action.NEW_NO_NL
793 action = Action.NEW_NO_NL
794 else:
794 else:
795 raise Exception('invalid context for no newline')
795 raise Exception('invalid context for no newline')
796
796
797 lines.append({
797 lines.append({
798 'old_lineno': None,
798 'old_lineno': None,
799 'new_lineno': None,
799 'new_lineno': None,
800 'action': action,
800 'action': action,
801 'line': self._clean_line(line, command)
801 'line': self._clean_line(line, command)
802 })
802 })
803
803
804 except StopIteration:
804 except StopIteration:
805 pass
805 pass
806
806
807 return ''.join(raw_diff), chunks, stats
807 return ''.join(raw_diff), chunks, stats
808
808
809 def _safe_id(self, idstring):
809 def _safe_id(self, idstring):
810 """Make a string safe for including in an id attribute.
810 """Make a string safe for including in an id attribute.
811
811
812 The HTML spec says that id attributes 'must begin with
812 The HTML spec says that id attributes 'must begin with
813 a letter ([A-Za-z]) and may be followed by any number
813 a letter ([A-Za-z]) and may be followed by any number
814 of letters, digits ([0-9]), hyphens ("-"), underscores
814 of letters, digits ([0-9]), hyphens ("-"), underscores
815 ("_"), colons (":"), and periods (".")'. These regexps
815 ("_"), colons (":"), and periods (".")'. These regexps
816 are slightly over-zealous, in that they remove colons
816 are slightly over-zealous, in that they remove colons
817 and periods unnecessarily.
817 and periods unnecessarily.
818
818
819 Whitespace is transformed into underscores, and then
819 Whitespace is transformed into underscores, and then
820 anything which is not a hyphen or a character that
820 anything which is not a hyphen or a character that
821 matches \w (alphanumerics and underscore) is removed.
821 matches \w (alphanumerics and underscore) is removed.
822
822
823 """
823 """
824 # Transform all whitespace to underscore
824 # Transform all whitespace to underscore
825 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 idstring = re.sub(r'\s', "_", '%s' % idstring)
826 # Remove everything that is not a hyphen or a member of \w
826 # Remove everything that is not a hyphen or a member of \w
827 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
828 return idstring
828 return idstring
829
829
830 @classmethod
830 @classmethod
831 def diff_splitter(cls, string):
831 def diff_splitter(cls, string):
832 """
832 """
833 Diff split that emulates .splitlines() but works only on \n
833 Diff split that emulates .splitlines() but works only on \n
834 """
834 """
835 if not string:
835 if not string:
836 return
836 return
837 elif string == '\n':
837 elif string == '\n':
838 yield u'\n'
838 yield u'\n'
839 else:
839 else:
840
840
841 has_newline = string.endswith('\n')
841 has_newline = string.endswith('\n')
842 elements = string.split('\n')
842 elements = string.split('\n')
843 if has_newline:
843 if has_newline:
844 # skip last element as it's empty string from newlines
844 # skip last element as it's empty string from newlines
845 elements = elements[:-1]
845 elements = elements[:-1]
846
846
847 len_elements = len(elements)
847 len_elements = len(elements)
848
848
849 for cnt, line in enumerate(elements, start=1):
849 for cnt, line in enumerate(elements, start=1):
850 last_line = cnt == len_elements
850 last_line = cnt == len_elements
851 if last_line and not has_newline:
851 if last_line and not has_newline:
852 yield safe_unicode(line)
852 yield safe_unicode(line)
853 else:
853 else:
854 yield safe_unicode(line) + '\n'
854 yield safe_unicode(line) + '\n'
855
855
856 def prepare(self, inline_diff=True):
856 def prepare(self, inline_diff=True):
857 """
857 """
858 Prepare the passed udiff for HTML rendering.
858 Prepare the passed udiff for HTML rendering.
859
859
860 :return: A list of dicts with diff information.
860 :return: A list of dicts with diff information.
861 """
861 """
862 parsed = self._parser(inline_diff=inline_diff)
862 parsed = self._parser(inline_diff=inline_diff)
863 self.parsed = True
863 self.parsed = True
864 self.parsed_diff = parsed
864 self.parsed_diff = parsed
865 return parsed
865 return parsed
866
866
867 def as_raw(self, diff_lines=None):
867 def as_raw(self, diff_lines=None):
868 """
868 """
869 Returns raw diff as a byte string
869 Returns raw diff as a byte string
870 """
870 """
871 return self._diff.raw
871 return self._diff.raw
872
872
873 def as_html(self, table_class='code-difftable', line_class='line',
873 def as_html(self, table_class='code-difftable', line_class='line',
874 old_lineno_class='lineno old', new_lineno_class='lineno new',
874 old_lineno_class='lineno old', new_lineno_class='lineno new',
875 code_class='code', enable_comments=False, parsed_lines=None):
875 code_class='code', enable_comments=False, parsed_lines=None):
876 """
876 """
877 Return given diff as html table with customized css classes
877 Return given diff as html table with customized css classes
878 """
878 """
879 # TODO(marcink): not sure how to pass in translator
879 # TODO(marcink): not sure how to pass in translator
880 # here in an efficient way, leave the _ for proper gettext extraction
880 # here in an efficient way, leave the _ for proper gettext extraction
881 _ = lambda s: s
881 _ = lambda s: s
882
882
883 def _link_to_if(condition, label, url):
883 def _link_to_if(condition, label, url):
884 """
884 """
885 Generates a link if condition is meet or just the label if not.
885 Generates a link if condition is meet or just the label if not.
886 """
886 """
887
887
888 if condition:
888 if condition:
889 return '''<a href="%(url)s" class="tooltip"
889 return '''<a href="%(url)s" class="tooltip"
890 title="%(title)s">%(label)s</a>''' % {
890 title="%(title)s">%(label)s</a>''' % {
891 'title': _('Click to select line'),
891 'title': _('Click to select line'),
892 'url': url,
892 'url': url,
893 'label': label
893 'label': label
894 }
894 }
895 else:
895 else:
896 return label
896 return label
897 if not self.parsed:
897 if not self.parsed:
898 self.prepare()
898 self.prepare()
899
899
900 diff_lines = self.parsed_diff
900 diff_lines = self.parsed_diff
901 if parsed_lines:
901 if parsed_lines:
902 diff_lines = parsed_lines
902 diff_lines = parsed_lines
903
903
904 _html_empty = True
904 _html_empty = True
905 _html = []
905 _html = []
906 _html.append('''<table class="%(table_class)s">\n''' % {
906 _html.append('''<table class="%(table_class)s">\n''' % {
907 'table_class': table_class
907 'table_class': table_class
908 })
908 })
909
909
910 for diff in diff_lines:
910 for diff in diff_lines:
911 for line in diff['chunks']:
911 for line in diff['chunks']:
912 _html_empty = False
912 _html_empty = False
913 for change in line:
913 for change in line:
914 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
914 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
915 'lc': line_class,
915 'lc': line_class,
916 'action': change['action']
916 'action': change['action']
917 })
917 })
918 anchor_old_id = ''
918 anchor_old_id = ''
919 anchor_new_id = ''
919 anchor_new_id = ''
920 anchor_old = "%(filename)s_o%(oldline_no)s" % {
920 anchor_old = "%(filename)s_o%(oldline_no)s" % {
921 'filename': self._safe_id(diff['filename']),
921 'filename': self._safe_id(diff['filename']),
922 'oldline_no': change['old_lineno']
922 'oldline_no': change['old_lineno']
923 }
923 }
924 anchor_new = "%(filename)s_n%(oldline_no)s" % {
924 anchor_new = "%(filename)s_n%(oldline_no)s" % {
925 'filename': self._safe_id(diff['filename']),
925 'filename': self._safe_id(diff['filename']),
926 'oldline_no': change['new_lineno']
926 'oldline_no': change['new_lineno']
927 }
927 }
928 cond_old = (change['old_lineno'] != '...' and
928 cond_old = (change['old_lineno'] != '...' and
929 change['old_lineno'])
929 change['old_lineno'])
930 cond_new = (change['new_lineno'] != '...' and
930 cond_new = (change['new_lineno'] != '...' and
931 change['new_lineno'])
931 change['new_lineno'])
932 if cond_old:
932 if cond_old:
933 anchor_old_id = 'id="%s"' % anchor_old
933 anchor_old_id = 'id="%s"' % anchor_old
934 if cond_new:
934 if cond_new:
935 anchor_new_id = 'id="%s"' % anchor_new
935 anchor_new_id = 'id="%s"' % anchor_new
936
936
937 if change['action'] != Action.CONTEXT:
937 if change['action'] != Action.CONTEXT:
938 anchor_link = True
938 anchor_link = True
939 else:
939 else:
940 anchor_link = False
940 anchor_link = False
941
941
942 ###########################################################
942 ###########################################################
943 # COMMENT ICONS
943 # COMMENT ICONS
944 ###########################################################
944 ###########################################################
945 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
945 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
946
946
947 if enable_comments and change['action'] != Action.CONTEXT:
947 if enable_comments and change['action'] != Action.CONTEXT:
948 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
948 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
949
949
950 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
950 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
951
951
952 ###########################################################
952 ###########################################################
953 # OLD LINE NUMBER
953 # OLD LINE NUMBER
954 ###########################################################
954 ###########################################################
955 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
955 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
956 'a_id': anchor_old_id,
956 'a_id': anchor_old_id,
957 'olc': old_lineno_class
957 'olc': old_lineno_class
958 })
958 })
959
959
960 _html.append('''%(link)s''' % {
960 _html.append('''%(link)s''' % {
961 'link': _link_to_if(anchor_link, change['old_lineno'],
961 'link': _link_to_if(anchor_link, change['old_lineno'],
962 '#%s' % anchor_old)
962 '#%s' % anchor_old)
963 })
963 })
964 _html.append('''</td>\n''')
964 _html.append('''</td>\n''')
965 ###########################################################
965 ###########################################################
966 # NEW LINE NUMBER
966 # NEW LINE NUMBER
967 ###########################################################
967 ###########################################################
968
968
969 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
969 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
970 'a_id': anchor_new_id,
970 'a_id': anchor_new_id,
971 'nlc': new_lineno_class
971 'nlc': new_lineno_class
972 })
972 })
973
973
974 _html.append('''%(link)s''' % {
974 _html.append('''%(link)s''' % {
975 'link': _link_to_if(anchor_link, change['new_lineno'],
975 'link': _link_to_if(anchor_link, change['new_lineno'],
976 '#%s' % anchor_new)
976 '#%s' % anchor_new)
977 })
977 })
978 _html.append('''</td>\n''')
978 _html.append('''</td>\n''')
979 ###########################################################
979 ###########################################################
980 # CODE
980 # CODE
981 ###########################################################
981 ###########################################################
982 code_classes = [code_class]
982 code_classes = [code_class]
983 if (not enable_comments or
983 if (not enable_comments or
984 change['action'] == Action.CONTEXT):
984 change['action'] == Action.CONTEXT):
985 code_classes.append('no-comment')
985 code_classes.append('no-comment')
986 _html.append('\t<td class="%s">' % ' '.join(code_classes))
986 _html.append('\t<td class="%s">' % ' '.join(code_classes))
987 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
987 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
988 'code': change['line']
988 'code': change['line']
989 })
989 })
990
990
991 _html.append('''\t</td>''')
991 _html.append('''\t</td>''')
992 _html.append('''\n</tr>\n''')
992 _html.append('''\n</tr>\n''')
993 _html.append('''</table>''')
993 _html.append('''</table>''')
994 if _html_empty:
994 if _html_empty:
995 return None
995 return None
996 return ''.join(_html)
996 return ''.join(_html)
997
997
998 def stat(self):
998 def stat(self):
999 """
999 """
1000 Returns tuple of added, and removed lines for this instance
1000 Returns tuple of added, and removed lines for this instance
1001 """
1001 """
1002 return self.adds, self.removes
1002 return self.adds, self.removes
1003
1003
1004 def get_context_of_line(
1004 def get_context_of_line(
1005 self, path, diff_line=None, context_before=3, context_after=3):
1005 self, path, diff_line=None, context_before=3, context_after=3):
1006 """
1006 """
1007 Returns the context lines for the specified diff line.
1007 Returns the context lines for the specified diff line.
1008
1008
1009 :type diff_line: :class:`DiffLineNumber`
1009 :type diff_line: :class:`DiffLineNumber`
1010 """
1010 """
1011 assert self.parsed, "DiffProcessor is not initialized."
1011 assert self.parsed, "DiffProcessor is not initialized."
1012
1012
1013 if None not in diff_line:
1013 if None not in diff_line:
1014 raise ValueError(
1014 raise ValueError(
1015 "Cannot specify both line numbers: {}".format(diff_line))
1015 "Cannot specify both line numbers: {}".format(diff_line))
1016
1016
1017 file_diff = self._get_file_diff(path)
1017 file_diff = self._get_file_diff(path)
1018 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1018 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1019
1019
1020 first_line_to_include = max(idx - context_before, 0)
1020 first_line_to_include = max(idx - context_before, 0)
1021 first_line_after_context = idx + context_after + 1
1021 first_line_after_context = idx + context_after + 1
1022 context_lines = chunk[first_line_to_include:first_line_after_context]
1022 context_lines = chunk[first_line_to_include:first_line_after_context]
1023
1023
1024 line_contents = [
1024 line_contents = [
1025 _context_line(line) for line in context_lines
1025 _context_line(line) for line in context_lines
1026 if _is_diff_content(line)]
1026 if _is_diff_content(line)]
1027 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1027 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1028 # Once they are fixed, we can drop this line here.
1028 # Once they are fixed, we can drop this line here.
1029 if line_contents:
1029 if line_contents:
1030 line_contents[-1] = (
1030 line_contents[-1] = (
1031 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1031 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1032 return line_contents
1032 return line_contents
1033
1033
1034 def find_context(self, path, context, offset=0):
1034 def find_context(self, path, context, offset=0):
1035 """
1035 """
1036 Finds the given `context` inside of the diff.
1036 Finds the given `context` inside of the diff.
1037
1037
1038 Use the parameter `offset` to specify which offset the target line has
1038 Use the parameter `offset` to specify which offset the target line has
1039 inside of the given `context`. This way the correct diff line will be
1039 inside of the given `context`. This way the correct diff line will be
1040 returned.
1040 returned.
1041
1041
1042 :param offset: Shall be used to specify the offset of the main line
1042 :param offset: Shall be used to specify the offset of the main line
1043 within the given `context`.
1043 within the given `context`.
1044 """
1044 """
1045 if offset < 0 or offset >= len(context):
1045 if offset < 0 or offset >= len(context):
1046 raise ValueError(
1046 raise ValueError(
1047 "Only positive values up to the length of the context "
1047 "Only positive values up to the length of the context "
1048 "minus one are allowed.")
1048 "minus one are allowed.")
1049
1049
1050 matches = []
1050 matches = []
1051 file_diff = self._get_file_diff(path)
1051 file_diff = self._get_file_diff(path)
1052
1052
1053 for chunk in file_diff['chunks']:
1053 for chunk in file_diff['chunks']:
1054 context_iter = iter(context)
1054 context_iter = iter(context)
1055 for line_idx, line in enumerate(chunk):
1055 for line_idx, line in enumerate(chunk):
1056 try:
1056 try:
1057 if _context_line(line) == context_iter.next():
1057 if _context_line(line) == context_iter.next():
1058 continue
1058 continue
1059 except StopIteration:
1059 except StopIteration:
1060 matches.append((line_idx, chunk))
1060 matches.append((line_idx, chunk))
1061 context_iter = iter(context)
1061 context_iter = iter(context)
1062
1062
1063 # Increment position and triger StopIteration
1063 # Increment position and triger StopIteration
1064 # if we had a match at the end
1064 # if we had a match at the end
1065 line_idx += 1
1065 line_idx += 1
1066 try:
1066 try:
1067 context_iter.next()
1067 context_iter.next()
1068 except StopIteration:
1068 except StopIteration:
1069 matches.append((line_idx, chunk))
1069 matches.append((line_idx, chunk))
1070
1070
1071 effective_offset = len(context) - offset
1071 effective_offset = len(context) - offset
1072 found_at_diff_lines = [
1072 found_at_diff_lines = [
1073 _line_to_diff_line_number(chunk[idx - effective_offset])
1073 _line_to_diff_line_number(chunk[idx - effective_offset])
1074 for idx, chunk in matches]
1074 for idx, chunk in matches]
1075
1075
1076 return found_at_diff_lines
1076 return found_at_diff_lines
1077
1077
1078 def _get_file_diff(self, path):
1078 def _get_file_diff(self, path):
1079 for file_diff in self.parsed_diff:
1079 for file_diff in self.parsed_diff:
1080 if file_diff['filename'] == path:
1080 if file_diff['filename'] == path:
1081 break
1081 break
1082 else:
1082 else:
1083 raise FileNotInDiffException("File {} not in diff".format(path))
1083 raise FileNotInDiffException("File {} not in diff".format(path))
1084 return file_diff
1084 return file_diff
1085
1085
1086 def _find_chunk_line_index(self, file_diff, diff_line):
1086 def _find_chunk_line_index(self, file_diff, diff_line):
1087 for chunk in file_diff['chunks']:
1087 for chunk in file_diff['chunks']:
1088 for idx, line in enumerate(chunk):
1088 for idx, line in enumerate(chunk):
1089 if line['old_lineno'] == diff_line.old:
1089 if line['old_lineno'] == diff_line.old:
1090 return chunk, idx
1090 return chunk, idx
1091 if line['new_lineno'] == diff_line.new:
1091 if line['new_lineno'] == diff_line.new:
1092 return chunk, idx
1092 return chunk, idx
1093 raise LineNotInDiffException(
1093 raise LineNotInDiffException(
1094 "The line {} is not part of the diff.".format(diff_line))
1094 "The line {} is not part of the diff.".format(diff_line))
1095
1095
1096
1096
1097 def _is_diff_content(line):
1097 def _is_diff_content(line):
1098 return line['action'] in (
1098 return line['action'] in (
1099 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1099 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1100
1100
1101
1101
1102 def _context_line(line):
1102 def _context_line(line):
1103 return (line['action'], line['line'])
1103 return (line['action'], line['line'])
1104
1104
1105
1105
1106 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1106 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1107
1107
1108
1108
1109 def _line_to_diff_line_number(line):
1109 def _line_to_diff_line_number(line):
1110 new_line_no = line['new_lineno'] or None
1110 new_line_no = line['new_lineno'] or None
1111 old_line_no = line['old_lineno'] or None
1111 old_line_no = line['old_lineno'] or None
1112 return DiffLineNumber(old=old_line_no, new=new_line_no)
1112 return DiffLineNumber(old=old_line_no, new=new_line_no)
1113
1113
1114
1114
1115 class FileNotInDiffException(Exception):
1115 class FileNotInDiffException(Exception):
1116 """
1116 """
1117 Raised when the context for a missing file is requested.
1117 Raised when the context for a missing file is requested.
1118
1118
1119 If you request the context for a line in a file which is not part of the
1119 If you request the context for a line in a file which is not part of the
1120 given diff, then this exception is raised.
1120 given diff, then this exception is raised.
1121 """
1121 """
1122
1122
1123
1123
1124 class LineNotInDiffException(Exception):
1124 class LineNotInDiffException(Exception):
1125 """
1125 """
1126 Raised when the context for a missing line is requested.
1126 Raised when the context for a missing line is requested.
1127
1127
1128 If you request the context for a line in a file and this line is not
1128 If you request the context for a line in a file and this line is not
1129 part of the given diff, then this exception is raised.
1129 part of the given diff, then this exception is raised.
1130 """
1130 """
1131
1131
1132
1132
1133 class DiffLimitExceeded(Exception):
1133 class DiffLimitExceeded(Exception):
1134 pass
1134 pass
1135
1135
1136
1136
1137 # NOTE(marcink): if diffs.mako change, probably this
1138 # needs a bump to next version
1139 CURRENT_DIFF_VERSION = 'v1'
1140
1141
1142 def _cleanup_cache_file(cached_diff_file):
1143 # cleanup file to not store it "damaged"
1144 try:
1145 os.remove(cached_diff_file)
1146 except Exception:
1147 log.exception('Failed to cleanup path %s', cached_diff_file)
1148
1149
1137 def cache_diff(cached_diff_file, diff, commits):
1150 def cache_diff(cached_diff_file, diff, commits):
1138
1151
1139 struct = {
1152 struct = {
1140 'version': 'v1',
1153 'version': CURRENT_DIFF_VERSION,
1141 'diff': diff,
1154 'diff': diff,
1142 'commits': commits
1155 'commits': commits
1143 }
1156 }
1144
1157
1145 try:
1158 try:
1146 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1159 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1147 pickle.dump(struct, f)
1160 pickle.dump(struct, f)
1148 log.debug('Saved diff cache under %s', cached_diff_file)
1161 log.debug('Saved diff cache under %s', cached_diff_file)
1149 except Exception:
1162 except Exception:
1150 log.warn('Failed to save cache', exc_info=True)
1163 log.warn('Failed to save cache', exc_info=True)
1151 # cleanup file to not store it "damaged"
1164 _cleanup_cache_file(cached_diff_file)
1152 try:
1153 os.remove(cached_diff_file)
1154 except Exception:
1155 log.exception('Failed to cleanup path %s', cached_diff_file)
1156
1165
1157
1166
1158 def load_cached_diff(cached_diff_file):
1167 def load_cached_diff(cached_diff_file):
1159
1168
1160 default_struct = {
1169 default_struct = {
1161 'version': 'v1',
1170 'version': CURRENT_DIFF_VERSION,
1162 'diff': None,
1171 'diff': None,
1163 'commits': None
1172 'commits': None
1164 }
1173 }
1165
1174
1166 has_cache = os.path.isfile(cached_diff_file)
1175 has_cache = os.path.isfile(cached_diff_file)
1167 if not has_cache:
1176 if not has_cache:
1168 return default_struct
1177 return default_struct
1169
1178
1170 data = None
1179 data = None
1171 try:
1180 try:
1172 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1181 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1173 data = pickle.load(f)
1182 data = pickle.load(f)
1174 log.debug('Loaded diff cache from %s', cached_diff_file)
1183 log.debug('Loaded diff cache from %s', cached_diff_file)
1175 except Exception:
1184 except Exception:
1176 log.warn('Failed to read diff cache file', exc_info=True)
1185 log.warn('Failed to read diff cache file', exc_info=True)
1177
1186
1178 if not data:
1187 if not data:
1179 data = default_struct
1188 data = default_struct
1180
1189
1181 if not isinstance(data, dict):
1190 if not isinstance(data, dict):
1182 # old version of data ?
1191 # old version of data ?
1183 data = default_struct
1192 data = default_struct
1184
1193
1194 # check version
1195 if data.get('version') != CURRENT_DIFF_VERSION:
1196 # purge cache
1197 _cleanup_cache_file(cached_diff_file)
1198 return default_struct
1199
1185 return data
1200 return data
1186
1201
1187
1202
1188 def generate_diff_cache_key(*args):
1203 def generate_diff_cache_key(*args):
1189 """
1204 """
1190 Helper to generate a cache key using arguments
1205 Helper to generate a cache key using arguments
1191 """
1206 """
1192 def arg_mapper(input_param):
1207 def arg_mapper(input_param):
1193 input_param = safe_str(input_param)
1208 input_param = safe_str(input_param)
1194 # we cannot allow '/' in arguments since it would allow
1209 # we cannot allow '/' in arguments since it would allow
1195 # subdirectory usage
1210 # subdirectory usage
1196 input_param.replace('/', '_')
1211 input_param.replace('/', '_')
1197 return input_param or None # prevent empty string arguments
1212 return input_param or None # prevent empty string arguments
1198
1213
1199 return '_'.join([
1214 return '_'.join([
1200 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1215 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1201
1216
1202
1217
1203 def diff_cache_exist(cache_storage, *args):
1218 def diff_cache_exist(cache_storage, *args):
1204 """
1219 """
1205 Based on all generated arguments check and return a cache path
1220 Based on all generated arguments check and return a cache path
1206 """
1221 """
1207 cache_key = generate_diff_cache_key(*args)
1222 cache_key = generate_diff_cache_key(*args)
1208 cache_file_path = os.path.join(cache_storage, cache_key)
1223 cache_file_path = os.path.join(cache_storage, cache_key)
1209 # prevent path traversal attacks using some param that have e.g '../../'
1224 # prevent path traversal attacks using some param that have e.g '../../'
1210 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1225 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1211 raise ValueError('Final path must be within {}'.format(cache_storage))
1226 raise ValueError('Final path must be within {}'.format(cache_storage))
1212
1227
1213 return cache_file_path
1228 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now