##// END OF EJS Templates
diffs: improve logging
marcink -
r4324:6440858f default
parent child Browse files
Show More
@@ -1,1271 +1,1272 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2020 RhodeCode GmbH
3 # Copyright (C) 2011-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import os
26 import os
27 import re
27 import re
28 import bz2
28 import bz2
29 import gzip
29 import gzip
30 import time
30 import time
31
31
32 import collections
32 import collections
33 import difflib
33 import difflib
34 import logging
34 import logging
35 import cPickle as pickle
35 import cPickle as pickle
36 from itertools import tee, imap
36 from itertools import tee, imap
37
37
38 from rhodecode.lib.vcs.exceptions import VCSError
38 from rhodecode.lib.vcs.exceptions import VCSError
39 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
39 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
41
41
42 log = logging.getLogger(__name__)
42 log = logging.getLogger(__name__)
43
43
44 # define max context, a file with more than this numbers of lines is unusable
44 # define max context, a file with more than this numbers of lines is unusable
45 # in browser anyway
45 # in browser anyway
46 MAX_CONTEXT = 20 * 1024
46 MAX_CONTEXT = 20 * 1024
47 DEFAULT_CONTEXT = 3
47 DEFAULT_CONTEXT = 3
48
48
49
49
50 def get_diff_context(request):
50 def get_diff_context(request):
51 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
51 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
52
52
53
53
54 def get_diff_whitespace_flag(request):
54 def get_diff_whitespace_flag(request):
55 return request.GET.get('ignorews', '') == '1'
55 return request.GET.get('ignorews', '') == '1'
56
56
57
57
58 class OPS(object):
58 class OPS(object):
59 ADD = 'A'
59 ADD = 'A'
60 MOD = 'M'
60 MOD = 'M'
61 DEL = 'D'
61 DEL = 'D'
62
62
63
63
64 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
64 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
65 """
65 """
66 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
66 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
67
67
68 :param ignore_whitespace: ignore whitespaces in diff
68 :param ignore_whitespace: ignore whitespaces in diff
69 """
69 """
70 # make sure we pass in default context
70 # make sure we pass in default context
71 context = context or 3
71 context = context or 3
72 # protect against IntOverflow when passing HUGE context
72 # protect against IntOverflow when passing HUGE context
73 if context > MAX_CONTEXT:
73 if context > MAX_CONTEXT:
74 context = MAX_CONTEXT
74 context = MAX_CONTEXT
75
75
76 submodules = filter(lambda o: isinstance(o, SubModuleNode),
76 submodules = filter(lambda o: isinstance(o, SubModuleNode),
77 [filenode_new, filenode_old])
77 [filenode_new, filenode_old])
78 if submodules:
78 if submodules:
79 return ''
79 return ''
80
80
81 for filenode in (filenode_old, filenode_new):
81 for filenode in (filenode_old, filenode_new):
82 if not isinstance(filenode, FileNode):
82 if not isinstance(filenode, FileNode):
83 raise VCSError(
83 raise VCSError(
84 "Given object should be FileNode object, not %s"
84 "Given object should be FileNode object, not %s"
85 % filenode.__class__)
85 % filenode.__class__)
86
86
87 repo = filenode_new.commit.repository
87 repo = filenode_new.commit.repository
88 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
88 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
89 new_commit = filenode_new.commit
89 new_commit = filenode_new.commit
90
90
91 vcs_gitdiff = repo.get_diff(
91 vcs_gitdiff = repo.get_diff(
92 old_commit, new_commit, filenode_new.path,
92 old_commit, new_commit, filenode_new.path,
93 ignore_whitespace, context, path1=filenode_old.path)
93 ignore_whitespace, context, path1=filenode_old.path)
94 return vcs_gitdiff
94 return vcs_gitdiff
95
95
96 NEW_FILENODE = 1
96 NEW_FILENODE = 1
97 DEL_FILENODE = 2
97 DEL_FILENODE = 2
98 MOD_FILENODE = 3
98 MOD_FILENODE = 3
99 RENAMED_FILENODE = 4
99 RENAMED_FILENODE = 4
100 COPIED_FILENODE = 5
100 COPIED_FILENODE = 5
101 CHMOD_FILENODE = 6
101 CHMOD_FILENODE = 6
102 BIN_FILENODE = 7
102 BIN_FILENODE = 7
103
103
104
104
105 class LimitedDiffContainer(object):
105 class LimitedDiffContainer(object):
106
106
107 def __init__(self, diff_limit, cur_diff_size, diff):
107 def __init__(self, diff_limit, cur_diff_size, diff):
108 self.diff = diff
108 self.diff = diff
109 self.diff_limit = diff_limit
109 self.diff_limit = diff_limit
110 self.cur_diff_size = cur_diff_size
110 self.cur_diff_size = cur_diff_size
111
111
112 def __getitem__(self, key):
112 def __getitem__(self, key):
113 return self.diff.__getitem__(key)
113 return self.diff.__getitem__(key)
114
114
115 def __iter__(self):
115 def __iter__(self):
116 for l in self.diff:
116 for l in self.diff:
117 yield l
117 yield l
118
118
119
119
120 class Action(object):
120 class Action(object):
121 """
121 """
122 Contains constants for the action value of the lines in a parsed diff.
122 Contains constants for the action value of the lines in a parsed diff.
123 """
123 """
124
124
125 ADD = 'add'
125 ADD = 'add'
126 DELETE = 'del'
126 DELETE = 'del'
127 UNMODIFIED = 'unmod'
127 UNMODIFIED = 'unmod'
128
128
129 CONTEXT = 'context'
129 CONTEXT = 'context'
130 OLD_NO_NL = 'old-no-nl'
130 OLD_NO_NL = 'old-no-nl'
131 NEW_NO_NL = 'new-no-nl'
131 NEW_NO_NL = 'new-no-nl'
132
132
133
133
134 class DiffProcessor(object):
134 class DiffProcessor(object):
135 """
135 """
136 Give it a unified or git diff and it returns a list of the files that were
136 Give it a unified or git diff and it returns a list of the files that were
137 mentioned in the diff together with a dict of meta information that
137 mentioned in the diff together with a dict of meta information that
138 can be used to render it in a HTML template.
138 can be used to render it in a HTML template.
139
139
140 .. note:: Unicode handling
140 .. note:: Unicode handling
141
141
142 The original diffs are a byte sequence and can contain filenames
142 The original diffs are a byte sequence and can contain filenames
143 in mixed encodings. This class generally returns `unicode` objects
143 in mixed encodings. This class generally returns `unicode` objects
144 since the result is intended for presentation to the user.
144 since the result is intended for presentation to the user.
145
145
146 """
146 """
147 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
147 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
148 _newline_marker = re.compile(r'^\\ No newline at end of file')
148 _newline_marker = re.compile(r'^\\ No newline at end of file')
149
149
150 # used for inline highlighter word split
150 # used for inline highlighter word split
151 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
151 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
152
152
153 # collapse ranges of commits over given number
153 # collapse ranges of commits over given number
154 _collapse_commits_over = 5
154 _collapse_commits_over = 5
155
155
156 def __init__(self, diff, format='gitdiff', diff_limit=None,
156 def __init__(self, diff, format='gitdiff', diff_limit=None,
157 file_limit=None, show_full_diff=True):
157 file_limit=None, show_full_diff=True):
158 """
158 """
159 :param diff: A `Diff` object representing a diff from a vcs backend
159 :param diff: A `Diff` object representing a diff from a vcs backend
160 :param format: format of diff passed, `udiff` or `gitdiff`
160 :param format: format of diff passed, `udiff` or `gitdiff`
161 :param diff_limit: define the size of diff that is considered "big"
161 :param diff_limit: define the size of diff that is considered "big"
162 based on that parameter cut off will be triggered, set to None
162 based on that parameter cut off will be triggered, set to None
163 to show full diff
163 to show full diff
164 """
164 """
165 self._diff = diff
165 self._diff = diff
166 self._format = format
166 self._format = format
167 self.adds = 0
167 self.adds = 0
168 self.removes = 0
168 self.removes = 0
169 # calculate diff size
169 # calculate diff size
170 self.diff_limit = diff_limit
170 self.diff_limit = diff_limit
171 self.file_limit = file_limit
171 self.file_limit = file_limit
172 self.show_full_diff = show_full_diff
172 self.show_full_diff = show_full_diff
173 self.cur_diff_size = 0
173 self.cur_diff_size = 0
174 self.parsed = False
174 self.parsed = False
175 self.parsed_diff = []
175 self.parsed_diff = []
176
176
177 log.debug('Initialized DiffProcessor with %s mode', format)
177 log.debug('Initialized DiffProcessor with %s mode', format)
178 if format == 'gitdiff':
178 if format == 'gitdiff':
179 self.differ = self._highlight_line_difflib
179 self.differ = self._highlight_line_difflib
180 self._parser = self._parse_gitdiff
180 self._parser = self._parse_gitdiff
181 else:
181 else:
182 self.differ = self._highlight_line_udiff
182 self.differ = self._highlight_line_udiff
183 self._parser = self._new_parse_gitdiff
183 self._parser = self._new_parse_gitdiff
184
184
185 def _copy_iterator(self):
185 def _copy_iterator(self):
186 """
186 """
187 make a fresh copy of generator, we should not iterate thru
187 make a fresh copy of generator, we should not iterate thru
188 an original as it's needed for repeating operations on
188 an original as it's needed for repeating operations on
189 this instance of DiffProcessor
189 this instance of DiffProcessor
190 """
190 """
191 self.__udiff, iterator_copy = tee(self.__udiff)
191 self.__udiff, iterator_copy = tee(self.__udiff)
192 return iterator_copy
192 return iterator_copy
193
193
194 def _escaper(self, string):
194 def _escaper(self, string):
195 """
195 """
196 Escaper for diff escapes special chars and checks the diff limit
196 Escaper for diff escapes special chars and checks the diff limit
197
197
198 :param string:
198 :param string:
199 """
199 """
200 self.cur_diff_size += len(string)
200 self.cur_diff_size += len(string)
201
201
202 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
202 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
203 raise DiffLimitExceeded('Diff Limit Exceeded')
203 raise DiffLimitExceeded('Diff Limit Exceeded')
204
204
205 return string \
205 return string \
206 .replace('&', '&amp;')\
206 .replace('&', '&amp;')\
207 .replace('<', '&lt;')\
207 .replace('<', '&lt;')\
208 .replace('>', '&gt;')
208 .replace('>', '&gt;')
209
209
210 def _line_counter(self, l):
210 def _line_counter(self, l):
211 """
211 """
212 Checks each line and bumps total adds/removes for this diff
212 Checks each line and bumps total adds/removes for this diff
213
213
214 :param l:
214 :param l:
215 """
215 """
216 if l.startswith('+') and not l.startswith('+++'):
216 if l.startswith('+') and not l.startswith('+++'):
217 self.adds += 1
217 self.adds += 1
218 elif l.startswith('-') and not l.startswith('---'):
218 elif l.startswith('-') and not l.startswith('---'):
219 self.removes += 1
219 self.removes += 1
220 return safe_unicode(l)
220 return safe_unicode(l)
221
221
222 def _highlight_line_difflib(self, line, next_):
222 def _highlight_line_difflib(self, line, next_):
223 """
223 """
224 Highlight inline changes in both lines.
224 Highlight inline changes in both lines.
225 """
225 """
226
226
227 if line['action'] == Action.DELETE:
227 if line['action'] == Action.DELETE:
228 old, new = line, next_
228 old, new = line, next_
229 else:
229 else:
230 old, new = next_, line
230 old, new = next_, line
231
231
232 oldwords = self._token_re.split(old['line'])
232 oldwords = self._token_re.split(old['line'])
233 newwords = self._token_re.split(new['line'])
233 newwords = self._token_re.split(new['line'])
234 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
234 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
235
235
236 oldfragments, newfragments = [], []
236 oldfragments, newfragments = [], []
237 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
237 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
238 oldfrag = ''.join(oldwords[i1:i2])
238 oldfrag = ''.join(oldwords[i1:i2])
239 newfrag = ''.join(newwords[j1:j2])
239 newfrag = ''.join(newwords[j1:j2])
240 if tag != 'equal':
240 if tag != 'equal':
241 if oldfrag:
241 if oldfrag:
242 oldfrag = '<del>%s</del>' % oldfrag
242 oldfrag = '<del>%s</del>' % oldfrag
243 if newfrag:
243 if newfrag:
244 newfrag = '<ins>%s</ins>' % newfrag
244 newfrag = '<ins>%s</ins>' % newfrag
245 oldfragments.append(oldfrag)
245 oldfragments.append(oldfrag)
246 newfragments.append(newfrag)
246 newfragments.append(newfrag)
247
247
248 old['line'] = "".join(oldfragments)
248 old['line'] = "".join(oldfragments)
249 new['line'] = "".join(newfragments)
249 new['line'] = "".join(newfragments)
250
250
251 def _highlight_line_udiff(self, line, next_):
251 def _highlight_line_udiff(self, line, next_):
252 """
252 """
253 Highlight inline changes in both lines.
253 Highlight inline changes in both lines.
254 """
254 """
255 start = 0
255 start = 0
256 limit = min(len(line['line']), len(next_['line']))
256 limit = min(len(line['line']), len(next_['line']))
257 while start < limit and line['line'][start] == next_['line'][start]:
257 while start < limit and line['line'][start] == next_['line'][start]:
258 start += 1
258 start += 1
259 end = -1
259 end = -1
260 limit -= start
260 limit -= start
261 while -end <= limit and line['line'][end] == next_['line'][end]:
261 while -end <= limit and line['line'][end] == next_['line'][end]:
262 end -= 1
262 end -= 1
263 end += 1
263 end += 1
264 if start or end:
264 if start or end:
265 def do(l):
265 def do(l):
266 last = end + len(l['line'])
266 last = end + len(l['line'])
267 if l['action'] == Action.ADD:
267 if l['action'] == Action.ADD:
268 tag = 'ins'
268 tag = 'ins'
269 else:
269 else:
270 tag = 'del'
270 tag = 'del'
271 l['line'] = '%s<%s>%s</%s>%s' % (
271 l['line'] = '%s<%s>%s</%s>%s' % (
272 l['line'][:start],
272 l['line'][:start],
273 tag,
273 tag,
274 l['line'][start:last],
274 l['line'][start:last],
275 tag,
275 tag,
276 l['line'][last:]
276 l['line'][last:]
277 )
277 )
278 do(line)
278 do(line)
279 do(next_)
279 do(next_)
280
280
281 def _clean_line(self, line, command):
281 def _clean_line(self, line, command):
282 if command in ['+', '-', ' ']:
282 if command in ['+', '-', ' ']:
283 # only modify the line if it's actually a diff thing
283 # only modify the line if it's actually a diff thing
284 line = line[1:]
284 line = line[1:]
285 return line
285 return line
286
286
287 def _parse_gitdiff(self, inline_diff=True):
287 def _parse_gitdiff(self, inline_diff=True):
288 _files = []
288 _files = []
289 diff_container = lambda arg: arg
289 diff_container = lambda arg: arg
290
290
291 for chunk in self._diff.chunks():
291 for chunk in self._diff.chunks():
292 head = chunk.header
292 head = chunk.header
293
293
294 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
294 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
295 raw_diff = chunk.raw
295 raw_diff = chunk.raw
296 limited_diff = False
296 limited_diff = False
297 exceeds_limit = False
297 exceeds_limit = False
298
298
299 op = None
299 op = None
300 stats = {
300 stats = {
301 'added': 0,
301 'added': 0,
302 'deleted': 0,
302 'deleted': 0,
303 'binary': False,
303 'binary': False,
304 'ops': {},
304 'ops': {},
305 }
305 }
306
306
307 if head['deleted_file_mode']:
307 if head['deleted_file_mode']:
308 op = OPS.DEL
308 op = OPS.DEL
309 stats['binary'] = True
309 stats['binary'] = True
310 stats['ops'][DEL_FILENODE] = 'deleted file'
310 stats['ops'][DEL_FILENODE] = 'deleted file'
311
311
312 elif head['new_file_mode']:
312 elif head['new_file_mode']:
313 op = OPS.ADD
313 op = OPS.ADD
314 stats['binary'] = True
314 stats['binary'] = True
315 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
315 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
316 else: # modify operation, can be copy, rename or chmod
316 else: # modify operation, can be copy, rename or chmod
317
317
318 # CHMOD
318 # CHMOD
319 if head['new_mode'] and head['old_mode']:
319 if head['new_mode'] and head['old_mode']:
320 op = OPS.MOD
320 op = OPS.MOD
321 stats['binary'] = True
321 stats['binary'] = True
322 stats['ops'][CHMOD_FILENODE] = (
322 stats['ops'][CHMOD_FILENODE] = (
323 'modified file chmod %s => %s' % (
323 'modified file chmod %s => %s' % (
324 head['old_mode'], head['new_mode']))
324 head['old_mode'], head['new_mode']))
325 # RENAME
325 # RENAME
326 if head['rename_from'] != head['rename_to']:
326 if head['rename_from'] != head['rename_to']:
327 op = OPS.MOD
327 op = OPS.MOD
328 stats['binary'] = True
328 stats['binary'] = True
329 stats['ops'][RENAMED_FILENODE] = (
329 stats['ops'][RENAMED_FILENODE] = (
330 'file renamed from %s to %s' % (
330 'file renamed from %s to %s' % (
331 head['rename_from'], head['rename_to']))
331 head['rename_from'], head['rename_to']))
332 # COPY
332 # COPY
333 if head.get('copy_from') and head.get('copy_to'):
333 if head.get('copy_from') and head.get('copy_to'):
334 op = OPS.MOD
334 op = OPS.MOD
335 stats['binary'] = True
335 stats['binary'] = True
336 stats['ops'][COPIED_FILENODE] = (
336 stats['ops'][COPIED_FILENODE] = (
337 'file copied from %s to %s' % (
337 'file copied from %s to %s' % (
338 head['copy_from'], head['copy_to']))
338 head['copy_from'], head['copy_to']))
339
339
340 # If our new parsed headers didn't match anything fallback to
340 # If our new parsed headers didn't match anything fallback to
341 # old style detection
341 # old style detection
342 if op is None:
342 if op is None:
343 if not head['a_file'] and head['b_file']:
343 if not head['a_file'] and head['b_file']:
344 op = OPS.ADD
344 op = OPS.ADD
345 stats['binary'] = True
345 stats['binary'] = True
346 stats['ops'][NEW_FILENODE] = 'new file'
346 stats['ops'][NEW_FILENODE] = 'new file'
347
347
348 elif head['a_file'] and not head['b_file']:
348 elif head['a_file'] and not head['b_file']:
349 op = OPS.DEL
349 op = OPS.DEL
350 stats['binary'] = True
350 stats['binary'] = True
351 stats['ops'][DEL_FILENODE] = 'deleted file'
351 stats['ops'][DEL_FILENODE] = 'deleted file'
352
352
353 # it's not ADD not DELETE
353 # it's not ADD not DELETE
354 if op is None:
354 if op is None:
355 op = OPS.MOD
355 op = OPS.MOD
356 stats['binary'] = True
356 stats['binary'] = True
357 stats['ops'][MOD_FILENODE] = 'modified file'
357 stats['ops'][MOD_FILENODE] = 'modified file'
358
358
359 # a real non-binary diff
359 # a real non-binary diff
360 if head['a_file'] or head['b_file']:
360 if head['a_file'] or head['b_file']:
361 try:
361 try:
362 raw_diff, chunks, _stats = self._parse_lines(diff)
362 raw_diff, chunks, _stats = self._parse_lines(diff)
363 stats['binary'] = False
363 stats['binary'] = False
364 stats['added'] = _stats[0]
364 stats['added'] = _stats[0]
365 stats['deleted'] = _stats[1]
365 stats['deleted'] = _stats[1]
366 # explicit mark that it's a modified file
366 # explicit mark that it's a modified file
367 if op == OPS.MOD:
367 if op == OPS.MOD:
368 stats['ops'][MOD_FILENODE] = 'modified file'
368 stats['ops'][MOD_FILENODE] = 'modified file'
369 exceeds_limit = len(raw_diff) > self.file_limit
369 exceeds_limit = len(raw_diff) > self.file_limit
370
370
371 # changed from _escaper function so we validate size of
371 # changed from _escaper function so we validate size of
372 # each file instead of the whole diff
372 # each file instead of the whole diff
373 # diff will hide big files but still show small ones
373 # diff will hide big files but still show small ones
374 # from my tests, big files are fairly safe to be parsed
374 # from my tests, big files are fairly safe to be parsed
375 # but the browser is the bottleneck
375 # but the browser is the bottleneck
376 if not self.show_full_diff and exceeds_limit:
376 if not self.show_full_diff and exceeds_limit:
377 raise DiffLimitExceeded('File Limit Exceeded')
377 raise DiffLimitExceeded('File Limit Exceeded')
378
378
379 except DiffLimitExceeded:
379 except DiffLimitExceeded:
380 diff_container = lambda _diff: \
380 diff_container = lambda _diff: \
381 LimitedDiffContainer(
381 LimitedDiffContainer(
382 self.diff_limit, self.cur_diff_size, _diff)
382 self.diff_limit, self.cur_diff_size, _diff)
383
383
384 exceeds_limit = len(raw_diff) > self.file_limit
384 exceeds_limit = len(raw_diff) > self.file_limit
385 limited_diff = True
385 limited_diff = True
386 chunks = []
386 chunks = []
387
387
388 else: # GIT format binary patch, or possibly empty diff
388 else: # GIT format binary patch, or possibly empty diff
389 if head['bin_patch']:
389 if head['bin_patch']:
390 # we have operation already extracted, but we mark simply
390 # we have operation already extracted, but we mark simply
391 # it's a diff we wont show for binary files
391 # it's a diff we wont show for binary files
392 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
392 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
393 chunks = []
393 chunks = []
394
394
395 if chunks and not self.show_full_diff and op == OPS.DEL:
395 if chunks and not self.show_full_diff and op == OPS.DEL:
396 # if not full diff mode show deleted file contents
396 # if not full diff mode show deleted file contents
397 # TODO: anderson: if the view is not too big, there is no way
397 # TODO: anderson: if the view is not too big, there is no way
398 # to see the content of the file
398 # to see the content of the file
399 chunks = []
399 chunks = []
400
400
401 chunks.insert(0, [{
401 chunks.insert(0, [{
402 'old_lineno': '',
402 'old_lineno': '',
403 'new_lineno': '',
403 'new_lineno': '',
404 'action': Action.CONTEXT,
404 'action': Action.CONTEXT,
405 'line': msg,
405 'line': msg,
406 } for _op, msg in stats['ops'].iteritems()
406 } for _op, msg in stats['ops'].iteritems()
407 if _op not in [MOD_FILENODE]])
407 if _op not in [MOD_FILENODE]])
408
408
409 _files.append({
409 _files.append({
410 'filename': safe_unicode(head['b_path']),
410 'filename': safe_unicode(head['b_path']),
411 'old_revision': head['a_blob_id'],
411 'old_revision': head['a_blob_id'],
412 'new_revision': head['b_blob_id'],
412 'new_revision': head['b_blob_id'],
413 'chunks': chunks,
413 'chunks': chunks,
414 'raw_diff': safe_unicode(raw_diff),
414 'raw_diff': safe_unicode(raw_diff),
415 'operation': op,
415 'operation': op,
416 'stats': stats,
416 'stats': stats,
417 'exceeds_limit': exceeds_limit,
417 'exceeds_limit': exceeds_limit,
418 'is_limited_diff': limited_diff,
418 'is_limited_diff': limited_diff,
419 })
419 })
420
420
421 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
421 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
422 OPS.DEL: 2}.get(info['operation'])
422 OPS.DEL: 2}.get(info['operation'])
423
423
424 if not inline_diff:
424 if not inline_diff:
425 return diff_container(sorted(_files, key=sorter))
425 return diff_container(sorted(_files, key=sorter))
426
426
427 # highlight inline changes
427 # highlight inline changes
428 for diff_data in _files:
428 for diff_data in _files:
429 for chunk in diff_data['chunks']:
429 for chunk in diff_data['chunks']:
430 lineiter = iter(chunk)
430 lineiter = iter(chunk)
431 try:
431 try:
432 while 1:
432 while 1:
433 line = lineiter.next()
433 line = lineiter.next()
434 if line['action'] not in (
434 if line['action'] not in (
435 Action.UNMODIFIED, Action.CONTEXT):
435 Action.UNMODIFIED, Action.CONTEXT):
436 nextline = lineiter.next()
436 nextline = lineiter.next()
437 if nextline['action'] in ['unmod', 'context'] or \
437 if nextline['action'] in ['unmod', 'context'] or \
438 nextline['action'] == line['action']:
438 nextline['action'] == line['action']:
439 continue
439 continue
440 self.differ(line, nextline)
440 self.differ(line, nextline)
441 except StopIteration:
441 except StopIteration:
442 pass
442 pass
443
443
444 return diff_container(sorted(_files, key=sorter))
444 return diff_container(sorted(_files, key=sorter))
445
445
446 def _check_large_diff(self):
446 def _check_large_diff(self):
447 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
447 if self.diff_limit:
448 log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)
448 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
449 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
449 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
450 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
450
451
451 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
452 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
452 def _new_parse_gitdiff(self, inline_diff=True):
453 def _new_parse_gitdiff(self, inline_diff=True):
453 _files = []
454 _files = []
454
455
455 # this can be overriden later to a LimitedDiffContainer type
456 # this can be overriden later to a LimitedDiffContainer type
456 diff_container = lambda arg: arg
457 diff_container = lambda arg: arg
457
458
458 for chunk in self._diff.chunks():
459 for chunk in self._diff.chunks():
459 head = chunk.header
460 head = chunk.header
460 log.debug('parsing diff %r', head)
461 log.debug('parsing diff %r', head)
461
462
462 raw_diff = chunk.raw
463 raw_diff = chunk.raw
463 limited_diff = False
464 limited_diff = False
464 exceeds_limit = False
465 exceeds_limit = False
465
466
466 op = None
467 op = None
467 stats = {
468 stats = {
468 'added': 0,
469 'added': 0,
469 'deleted': 0,
470 'deleted': 0,
470 'binary': False,
471 'binary': False,
471 'old_mode': None,
472 'old_mode': None,
472 'new_mode': None,
473 'new_mode': None,
473 'ops': {},
474 'ops': {},
474 }
475 }
475 if head['old_mode']:
476 if head['old_mode']:
476 stats['old_mode'] = head['old_mode']
477 stats['old_mode'] = head['old_mode']
477 if head['new_mode']:
478 if head['new_mode']:
478 stats['new_mode'] = head['new_mode']
479 stats['new_mode'] = head['new_mode']
479 if head['b_mode']:
480 if head['b_mode']:
480 stats['new_mode'] = head['b_mode']
481 stats['new_mode'] = head['b_mode']
481
482
482 # delete file
483 # delete file
483 if head['deleted_file_mode']:
484 if head['deleted_file_mode']:
484 op = OPS.DEL
485 op = OPS.DEL
485 stats['binary'] = True
486 stats['binary'] = True
486 stats['ops'][DEL_FILENODE] = 'deleted file'
487 stats['ops'][DEL_FILENODE] = 'deleted file'
487
488
488 # new file
489 # new file
489 elif head['new_file_mode']:
490 elif head['new_file_mode']:
490 op = OPS.ADD
491 op = OPS.ADD
491 stats['binary'] = True
492 stats['binary'] = True
492 stats['old_mode'] = None
493 stats['old_mode'] = None
493 stats['new_mode'] = head['new_file_mode']
494 stats['new_mode'] = head['new_file_mode']
494 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
495 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
495
496
496 # modify operation, can be copy, rename or chmod
497 # modify operation, can be copy, rename or chmod
497 else:
498 else:
498 # CHMOD
499 # CHMOD
499 if head['new_mode'] and head['old_mode']:
500 if head['new_mode'] and head['old_mode']:
500 op = OPS.MOD
501 op = OPS.MOD
501 stats['binary'] = True
502 stats['binary'] = True
502 stats['ops'][CHMOD_FILENODE] = (
503 stats['ops'][CHMOD_FILENODE] = (
503 'modified file chmod %s => %s' % (
504 'modified file chmod %s => %s' % (
504 head['old_mode'], head['new_mode']))
505 head['old_mode'], head['new_mode']))
505
506
506 # RENAME
507 # RENAME
507 if head['rename_from'] != head['rename_to']:
508 if head['rename_from'] != head['rename_to']:
508 op = OPS.MOD
509 op = OPS.MOD
509 stats['binary'] = True
510 stats['binary'] = True
510 stats['renamed'] = (head['rename_from'], head['rename_to'])
511 stats['renamed'] = (head['rename_from'], head['rename_to'])
511 stats['ops'][RENAMED_FILENODE] = (
512 stats['ops'][RENAMED_FILENODE] = (
512 'file renamed from %s to %s' % (
513 'file renamed from %s to %s' % (
513 head['rename_from'], head['rename_to']))
514 head['rename_from'], head['rename_to']))
514 # COPY
515 # COPY
515 if head.get('copy_from') and head.get('copy_to'):
516 if head.get('copy_from') and head.get('copy_to'):
516 op = OPS.MOD
517 op = OPS.MOD
517 stats['binary'] = True
518 stats['binary'] = True
518 stats['copied'] = (head['copy_from'], head['copy_to'])
519 stats['copied'] = (head['copy_from'], head['copy_to'])
519 stats['ops'][COPIED_FILENODE] = (
520 stats['ops'][COPIED_FILENODE] = (
520 'file copied from %s to %s' % (
521 'file copied from %s to %s' % (
521 head['copy_from'], head['copy_to']))
522 head['copy_from'], head['copy_to']))
522
523
523 # If our new parsed headers didn't match anything fallback to
524 # If our new parsed headers didn't match anything fallback to
524 # old style detection
525 # old style detection
525 if op is None:
526 if op is None:
526 if not head['a_file'] and head['b_file']:
527 if not head['a_file'] and head['b_file']:
527 op = OPS.ADD
528 op = OPS.ADD
528 stats['binary'] = True
529 stats['binary'] = True
529 stats['new_file'] = True
530 stats['new_file'] = True
530 stats['ops'][NEW_FILENODE] = 'new file'
531 stats['ops'][NEW_FILENODE] = 'new file'
531
532
532 elif head['a_file'] and not head['b_file']:
533 elif head['a_file'] and not head['b_file']:
533 op = OPS.DEL
534 op = OPS.DEL
534 stats['binary'] = True
535 stats['binary'] = True
535 stats['ops'][DEL_FILENODE] = 'deleted file'
536 stats['ops'][DEL_FILENODE] = 'deleted file'
536
537
537 # it's not ADD not DELETE
538 # it's not ADD not DELETE
538 if op is None:
539 if op is None:
539 op = OPS.MOD
540 op = OPS.MOD
540 stats['binary'] = True
541 stats['binary'] = True
541 stats['ops'][MOD_FILENODE] = 'modified file'
542 stats['ops'][MOD_FILENODE] = 'modified file'
542
543
543 # a real non-binary diff
544 # a real non-binary diff
544 if head['a_file'] or head['b_file']:
545 if head['a_file'] or head['b_file']:
545 # simulate splitlines, so we keep the line end part
546 # simulate splitlines, so we keep the line end part
546 diff = self.diff_splitter(chunk.diff)
547 diff = self.diff_splitter(chunk.diff)
547
548
548 # append each file to the diff size
549 # append each file to the diff size
549 raw_chunk_size = len(raw_diff)
550 raw_chunk_size = len(raw_diff)
550
551
551 exceeds_limit = raw_chunk_size > self.file_limit
552 exceeds_limit = raw_chunk_size > self.file_limit
552 self.cur_diff_size += raw_chunk_size
553 self.cur_diff_size += raw_chunk_size
553
554
554 try:
555 try:
555 # Check each file instead of the whole diff.
556 # Check each file instead of the whole diff.
556 # Diff will hide big files but still show small ones.
557 # Diff will hide big files but still show small ones.
557 # From the tests big files are fairly safe to be parsed
558 # From the tests big files are fairly safe to be parsed
558 # but the browser is the bottleneck.
559 # but the browser is the bottleneck.
559 if not self.show_full_diff and exceeds_limit:
560 if not self.show_full_diff and exceeds_limit:
560 log.debug('File `%s` exceeds current file_limit of %s',
561 log.debug('File `%s` exceeds current file_limit of %s',
561 safe_unicode(head['b_path']), self.file_limit)
562 safe_unicode(head['b_path']), self.file_limit)
562 raise DiffLimitExceeded(
563 raise DiffLimitExceeded(
563 'File Limit %s Exceeded', self.file_limit)
564 'File Limit %s Exceeded', self.file_limit)
564
565
565 self._check_large_diff()
566 self._check_large_diff()
566
567
567 raw_diff, chunks, _stats = self._new_parse_lines(diff)
568 raw_diff, chunks, _stats = self._new_parse_lines(diff)
568 stats['binary'] = False
569 stats['binary'] = False
569 stats['added'] = _stats[0]
570 stats['added'] = _stats[0]
570 stats['deleted'] = _stats[1]
571 stats['deleted'] = _stats[1]
571 # explicit mark that it's a modified file
572 # explicit mark that it's a modified file
572 if op == OPS.MOD:
573 if op == OPS.MOD:
573 stats['ops'][MOD_FILENODE] = 'modified file'
574 stats['ops'][MOD_FILENODE] = 'modified file'
574
575
575 except DiffLimitExceeded:
576 except DiffLimitExceeded:
576 diff_container = lambda _diff: \
577 diff_container = lambda _diff: \
577 LimitedDiffContainer(
578 LimitedDiffContainer(
578 self.diff_limit, self.cur_diff_size, _diff)
579 self.diff_limit, self.cur_diff_size, _diff)
579
580
580 limited_diff = True
581 limited_diff = True
581 chunks = []
582 chunks = []
582
583
583 else: # GIT format binary patch, or possibly empty diff
584 else: # GIT format binary patch, or possibly empty diff
584 if head['bin_patch']:
585 if head['bin_patch']:
585 # we have operation already extracted, but we mark simply
586 # we have operation already extracted, but we mark simply
586 # it's a diff we wont show for binary files
587 # it's a diff we wont show for binary files
587 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
588 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
588 chunks = []
589 chunks = []
589
590
590 # Hide content of deleted node by setting empty chunks
591 # Hide content of deleted node by setting empty chunks
591 if chunks and not self.show_full_diff and op == OPS.DEL:
592 if chunks and not self.show_full_diff and op == OPS.DEL:
592 # if not full diff mode show deleted file contents
593 # if not full diff mode show deleted file contents
593 # TODO: anderson: if the view is not too big, there is no way
594 # TODO: anderson: if the view is not too big, there is no way
594 # to see the content of the file
595 # to see the content of the file
595 chunks = []
596 chunks = []
596
597
597 chunks.insert(
598 chunks.insert(
598 0, [{'old_lineno': '',
599 0, [{'old_lineno': '',
599 'new_lineno': '',
600 'new_lineno': '',
600 'action': Action.CONTEXT,
601 'action': Action.CONTEXT,
601 'line': msg,
602 'line': msg,
602 } for _op, msg in stats['ops'].iteritems()
603 } for _op, msg in stats['ops'].iteritems()
603 if _op not in [MOD_FILENODE]])
604 if _op not in [MOD_FILENODE]])
604
605
605 original_filename = safe_unicode(head['a_path'])
606 original_filename = safe_unicode(head['a_path'])
606 _files.append({
607 _files.append({
607 'original_filename': original_filename,
608 'original_filename': original_filename,
608 'filename': safe_unicode(head['b_path']),
609 'filename': safe_unicode(head['b_path']),
609 'old_revision': head['a_blob_id'],
610 'old_revision': head['a_blob_id'],
610 'new_revision': head['b_blob_id'],
611 'new_revision': head['b_blob_id'],
611 'chunks': chunks,
612 'chunks': chunks,
612 'raw_diff': safe_unicode(raw_diff),
613 'raw_diff': safe_unicode(raw_diff),
613 'operation': op,
614 'operation': op,
614 'stats': stats,
615 'stats': stats,
615 'exceeds_limit': exceeds_limit,
616 'exceeds_limit': exceeds_limit,
616 'is_limited_diff': limited_diff,
617 'is_limited_diff': limited_diff,
617 })
618 })
618
619
619 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
620 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
620 OPS.DEL: 2}.get(info['operation'])
621 OPS.DEL: 2}.get(info['operation'])
621
622
622 return diff_container(sorted(_files, key=sorter))
623 return diff_container(sorted(_files, key=sorter))
623
624
624 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
625 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
625 def _parse_lines(self, diff_iter):
626 def _parse_lines(self, diff_iter):
626 """
627 """
627 Parse the diff an return data for the template.
628 Parse the diff an return data for the template.
628 """
629 """
629
630
630 stats = [0, 0]
631 stats = [0, 0]
631 chunks = []
632 chunks = []
632 raw_diff = []
633 raw_diff = []
633
634
634 try:
635 try:
635 line = diff_iter.next()
636 line = diff_iter.next()
636
637
637 while line:
638 while line:
638 raw_diff.append(line)
639 raw_diff.append(line)
639 lines = []
640 lines = []
640 chunks.append(lines)
641 chunks.append(lines)
641
642
642 match = self._chunk_re.match(line)
643 match = self._chunk_re.match(line)
643
644
644 if not match:
645 if not match:
645 break
646 break
646
647
647 gr = match.groups()
648 gr = match.groups()
648 (old_line, old_end,
649 (old_line, old_end,
649 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
650 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
650 old_line -= 1
651 old_line -= 1
651 new_line -= 1
652 new_line -= 1
652
653
653 context = len(gr) == 5
654 context = len(gr) == 5
654 old_end += old_line
655 old_end += old_line
655 new_end += new_line
656 new_end += new_line
656
657
657 if context:
658 if context:
658 # skip context only if it's first line
659 # skip context only if it's first line
659 if int(gr[0]) > 1:
660 if int(gr[0]) > 1:
660 lines.append({
661 lines.append({
661 'old_lineno': '...',
662 'old_lineno': '...',
662 'new_lineno': '...',
663 'new_lineno': '...',
663 'action': Action.CONTEXT,
664 'action': Action.CONTEXT,
664 'line': line,
665 'line': line,
665 })
666 })
666
667
667 line = diff_iter.next()
668 line = diff_iter.next()
668
669
669 while old_line < old_end or new_line < new_end:
670 while old_line < old_end or new_line < new_end:
670 command = ' '
671 command = ' '
671 if line:
672 if line:
672 command = line[0]
673 command = line[0]
673
674
674 affects_old = affects_new = False
675 affects_old = affects_new = False
675
676
676 # ignore those if we don't expect them
677 # ignore those if we don't expect them
677 if command in '#@':
678 if command in '#@':
678 continue
679 continue
679 elif command == '+':
680 elif command == '+':
680 affects_new = True
681 affects_new = True
681 action = Action.ADD
682 action = Action.ADD
682 stats[0] += 1
683 stats[0] += 1
683 elif command == '-':
684 elif command == '-':
684 affects_old = True
685 affects_old = True
685 action = Action.DELETE
686 action = Action.DELETE
686 stats[1] += 1
687 stats[1] += 1
687 else:
688 else:
688 affects_old = affects_new = True
689 affects_old = affects_new = True
689 action = Action.UNMODIFIED
690 action = Action.UNMODIFIED
690
691
691 if not self._newline_marker.match(line):
692 if not self._newline_marker.match(line):
692 old_line += affects_old
693 old_line += affects_old
693 new_line += affects_new
694 new_line += affects_new
694 lines.append({
695 lines.append({
695 'old_lineno': affects_old and old_line or '',
696 'old_lineno': affects_old and old_line or '',
696 'new_lineno': affects_new and new_line or '',
697 'new_lineno': affects_new and new_line or '',
697 'action': action,
698 'action': action,
698 'line': self._clean_line(line, command)
699 'line': self._clean_line(line, command)
699 })
700 })
700 raw_diff.append(line)
701 raw_diff.append(line)
701
702
702 line = diff_iter.next()
703 line = diff_iter.next()
703
704
704 if self._newline_marker.match(line):
705 if self._newline_marker.match(line):
705 # we need to append to lines, since this is not
706 # we need to append to lines, since this is not
706 # counted in the line specs of diff
707 # counted in the line specs of diff
707 lines.append({
708 lines.append({
708 'old_lineno': '...',
709 'old_lineno': '...',
709 'new_lineno': '...',
710 'new_lineno': '...',
710 'action': Action.CONTEXT,
711 'action': Action.CONTEXT,
711 'line': self._clean_line(line, command)
712 'line': self._clean_line(line, command)
712 })
713 })
713
714
714 except StopIteration:
715 except StopIteration:
715 pass
716 pass
716 return ''.join(raw_diff), chunks, stats
717 return ''.join(raw_diff), chunks, stats
717
718
718 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
719 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
719 def _new_parse_lines(self, diff_iter):
720 def _new_parse_lines(self, diff_iter):
720 """
721 """
721 Parse the diff an return data for the template.
722 Parse the diff an return data for the template.
722 """
723 """
723
724
724 stats = [0, 0]
725 stats = [0, 0]
725 chunks = []
726 chunks = []
726 raw_diff = []
727 raw_diff = []
727
728
728 try:
729 try:
729 line = diff_iter.next()
730 line = diff_iter.next()
730
731
731 while line:
732 while line:
732 raw_diff.append(line)
733 raw_diff.append(line)
733 # match header e.g @@ -0,0 +1 @@\n'
734 # match header e.g @@ -0,0 +1 @@\n'
734 match = self._chunk_re.match(line)
735 match = self._chunk_re.match(line)
735
736
736 if not match:
737 if not match:
737 break
738 break
738
739
739 gr = match.groups()
740 gr = match.groups()
740 (old_line, old_end,
741 (old_line, old_end,
741 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
742 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
742
743
743 lines = []
744 lines = []
744 hunk = {
745 hunk = {
745 'section_header': gr[-1],
746 'section_header': gr[-1],
746 'source_start': old_line,
747 'source_start': old_line,
747 'source_length': old_end,
748 'source_length': old_end,
748 'target_start': new_line,
749 'target_start': new_line,
749 'target_length': new_end,
750 'target_length': new_end,
750 'lines': lines,
751 'lines': lines,
751 }
752 }
752 chunks.append(hunk)
753 chunks.append(hunk)
753
754
754 old_line -= 1
755 old_line -= 1
755 new_line -= 1
756 new_line -= 1
756
757
757 context = len(gr) == 5
758 context = len(gr) == 5
758 old_end += old_line
759 old_end += old_line
759 new_end += new_line
760 new_end += new_line
760
761
761 line = diff_iter.next()
762 line = diff_iter.next()
762
763
763 while old_line < old_end or new_line < new_end:
764 while old_line < old_end or new_line < new_end:
764 command = ' '
765 command = ' '
765 if line:
766 if line:
766 command = line[0]
767 command = line[0]
767
768
768 affects_old = affects_new = False
769 affects_old = affects_new = False
769
770
770 # ignore those if we don't expect them
771 # ignore those if we don't expect them
771 if command in '#@':
772 if command in '#@':
772 continue
773 continue
773 elif command == '+':
774 elif command == '+':
774 affects_new = True
775 affects_new = True
775 action = Action.ADD
776 action = Action.ADD
776 stats[0] += 1
777 stats[0] += 1
777 elif command == '-':
778 elif command == '-':
778 affects_old = True
779 affects_old = True
779 action = Action.DELETE
780 action = Action.DELETE
780 stats[1] += 1
781 stats[1] += 1
781 else:
782 else:
782 affects_old = affects_new = True
783 affects_old = affects_new = True
783 action = Action.UNMODIFIED
784 action = Action.UNMODIFIED
784
785
785 if not self._newline_marker.match(line):
786 if not self._newline_marker.match(line):
786 old_line += affects_old
787 old_line += affects_old
787 new_line += affects_new
788 new_line += affects_new
788 lines.append({
789 lines.append({
789 'old_lineno': affects_old and old_line or '',
790 'old_lineno': affects_old and old_line or '',
790 'new_lineno': affects_new and new_line or '',
791 'new_lineno': affects_new and new_line or '',
791 'action': action,
792 'action': action,
792 'line': self._clean_line(line, command)
793 'line': self._clean_line(line, command)
793 })
794 })
794 raw_diff.append(line)
795 raw_diff.append(line)
795
796
796 line = diff_iter.next()
797 line = diff_iter.next()
797
798
798 if self._newline_marker.match(line):
799 if self._newline_marker.match(line):
799 # we need to append to lines, since this is not
800 # we need to append to lines, since this is not
800 # counted in the line specs of diff
801 # counted in the line specs of diff
801 if affects_old:
802 if affects_old:
802 action = Action.OLD_NO_NL
803 action = Action.OLD_NO_NL
803 elif affects_new:
804 elif affects_new:
804 action = Action.NEW_NO_NL
805 action = Action.NEW_NO_NL
805 else:
806 else:
806 raise Exception('invalid context for no newline')
807 raise Exception('invalid context for no newline')
807
808
808 lines.append({
809 lines.append({
809 'old_lineno': None,
810 'old_lineno': None,
810 'new_lineno': None,
811 'new_lineno': None,
811 'action': action,
812 'action': action,
812 'line': self._clean_line(line, command)
813 'line': self._clean_line(line, command)
813 })
814 })
814
815
815 except StopIteration:
816 except StopIteration:
816 pass
817 pass
817
818
818 return ''.join(raw_diff), chunks, stats
819 return ''.join(raw_diff), chunks, stats
819
820
820 def _safe_id(self, idstring):
821 def _safe_id(self, idstring):
821 """Make a string safe for including in an id attribute.
822 """Make a string safe for including in an id attribute.
822
823
823 The HTML spec says that id attributes 'must begin with
824 The HTML spec says that id attributes 'must begin with
824 a letter ([A-Za-z]) and may be followed by any number
825 a letter ([A-Za-z]) and may be followed by any number
825 of letters, digits ([0-9]), hyphens ("-"), underscores
826 of letters, digits ([0-9]), hyphens ("-"), underscores
826 ("_"), colons (":"), and periods (".")'. These regexps
827 ("_"), colons (":"), and periods (".")'. These regexps
827 are slightly over-zealous, in that they remove colons
828 are slightly over-zealous, in that they remove colons
828 and periods unnecessarily.
829 and periods unnecessarily.
829
830
830 Whitespace is transformed into underscores, and then
831 Whitespace is transformed into underscores, and then
831 anything which is not a hyphen or a character that
832 anything which is not a hyphen or a character that
832 matches \w (alphanumerics and underscore) is removed.
833 matches \w (alphanumerics and underscore) is removed.
833
834
834 """
835 """
835 # Transform all whitespace to underscore
836 # Transform all whitespace to underscore
836 idstring = re.sub(r'\s', "_", '%s' % idstring)
837 idstring = re.sub(r'\s', "_", '%s' % idstring)
837 # Remove everything that is not a hyphen or a member of \w
838 # Remove everything that is not a hyphen or a member of \w
838 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
839 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
839 return idstring
840 return idstring
840
841
841 @classmethod
842 @classmethod
842 def diff_splitter(cls, string):
843 def diff_splitter(cls, string):
843 """
844 """
844 Diff split that emulates .splitlines() but works only on \n
845 Diff split that emulates .splitlines() but works only on \n
845 """
846 """
846 if not string:
847 if not string:
847 return
848 return
848 elif string == '\n':
849 elif string == '\n':
849 yield u'\n'
850 yield u'\n'
850 else:
851 else:
851
852
852 has_newline = string.endswith('\n')
853 has_newline = string.endswith('\n')
853 elements = string.split('\n')
854 elements = string.split('\n')
854 if has_newline:
855 if has_newline:
855 # skip last element as it's empty string from newlines
856 # skip last element as it's empty string from newlines
856 elements = elements[:-1]
857 elements = elements[:-1]
857
858
858 len_elements = len(elements)
859 len_elements = len(elements)
859
860
860 for cnt, line in enumerate(elements, start=1):
861 for cnt, line in enumerate(elements, start=1):
861 last_line = cnt == len_elements
862 last_line = cnt == len_elements
862 if last_line and not has_newline:
863 if last_line and not has_newline:
863 yield safe_unicode(line)
864 yield safe_unicode(line)
864 else:
865 else:
865 yield safe_unicode(line) + '\n'
866 yield safe_unicode(line) + '\n'
866
867
867 def prepare(self, inline_diff=True):
868 def prepare(self, inline_diff=True):
868 """
869 """
869 Prepare the passed udiff for HTML rendering.
870 Prepare the passed udiff for HTML rendering.
870
871
871 :return: A list of dicts with diff information.
872 :return: A list of dicts with diff information.
872 """
873 """
873 parsed = self._parser(inline_diff=inline_diff)
874 parsed = self._parser(inline_diff=inline_diff)
874 self.parsed = True
875 self.parsed = True
875 self.parsed_diff = parsed
876 self.parsed_diff = parsed
876 return parsed
877 return parsed
877
878
878 def as_raw(self, diff_lines=None):
879 def as_raw(self, diff_lines=None):
879 """
880 """
880 Returns raw diff as a byte string
881 Returns raw diff as a byte string
881 """
882 """
882 return self._diff.raw
883 return self._diff.raw
883
884
884 def as_html(self, table_class='code-difftable', line_class='line',
885 def as_html(self, table_class='code-difftable', line_class='line',
885 old_lineno_class='lineno old', new_lineno_class='lineno new',
886 old_lineno_class='lineno old', new_lineno_class='lineno new',
886 code_class='code', enable_comments=False, parsed_lines=None):
887 code_class='code', enable_comments=False, parsed_lines=None):
887 """
888 """
888 Return given diff as html table with customized css classes
889 Return given diff as html table with customized css classes
889 """
890 """
890 # TODO(marcink): not sure how to pass in translator
891 # TODO(marcink): not sure how to pass in translator
891 # here in an efficient way, leave the _ for proper gettext extraction
892 # here in an efficient way, leave the _ for proper gettext extraction
892 _ = lambda s: s
893 _ = lambda s: s
893
894
894 def _link_to_if(condition, label, url):
895 def _link_to_if(condition, label, url):
895 """
896 """
896 Generates a link if condition is meet or just the label if not.
897 Generates a link if condition is meet or just the label if not.
897 """
898 """
898
899
899 if condition:
900 if condition:
900 return '''<a href="%(url)s" class="tooltip"
901 return '''<a href="%(url)s" class="tooltip"
901 title="%(title)s">%(label)s</a>''' % {
902 title="%(title)s">%(label)s</a>''' % {
902 'title': _('Click to select line'),
903 'title': _('Click to select line'),
903 'url': url,
904 'url': url,
904 'label': label
905 'label': label
905 }
906 }
906 else:
907 else:
907 return label
908 return label
908 if not self.parsed:
909 if not self.parsed:
909 self.prepare()
910 self.prepare()
910
911
911 diff_lines = self.parsed_diff
912 diff_lines = self.parsed_diff
912 if parsed_lines:
913 if parsed_lines:
913 diff_lines = parsed_lines
914 diff_lines = parsed_lines
914
915
915 _html_empty = True
916 _html_empty = True
916 _html = []
917 _html = []
917 _html.append('''<table class="%(table_class)s">\n''' % {
918 _html.append('''<table class="%(table_class)s">\n''' % {
918 'table_class': table_class
919 'table_class': table_class
919 })
920 })
920
921
921 for diff in diff_lines:
922 for diff in diff_lines:
922 for line in diff['chunks']:
923 for line in diff['chunks']:
923 _html_empty = False
924 _html_empty = False
924 for change in line:
925 for change in line:
925 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
926 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
926 'lc': line_class,
927 'lc': line_class,
927 'action': change['action']
928 'action': change['action']
928 })
929 })
929 anchor_old_id = ''
930 anchor_old_id = ''
930 anchor_new_id = ''
931 anchor_new_id = ''
931 anchor_old = "%(filename)s_o%(oldline_no)s" % {
932 anchor_old = "%(filename)s_o%(oldline_no)s" % {
932 'filename': self._safe_id(diff['filename']),
933 'filename': self._safe_id(diff['filename']),
933 'oldline_no': change['old_lineno']
934 'oldline_no': change['old_lineno']
934 }
935 }
935 anchor_new = "%(filename)s_n%(oldline_no)s" % {
936 anchor_new = "%(filename)s_n%(oldline_no)s" % {
936 'filename': self._safe_id(diff['filename']),
937 'filename': self._safe_id(diff['filename']),
937 'oldline_no': change['new_lineno']
938 'oldline_no': change['new_lineno']
938 }
939 }
939 cond_old = (change['old_lineno'] != '...' and
940 cond_old = (change['old_lineno'] != '...' and
940 change['old_lineno'])
941 change['old_lineno'])
941 cond_new = (change['new_lineno'] != '...' and
942 cond_new = (change['new_lineno'] != '...' and
942 change['new_lineno'])
943 change['new_lineno'])
943 if cond_old:
944 if cond_old:
944 anchor_old_id = 'id="%s"' % anchor_old
945 anchor_old_id = 'id="%s"' % anchor_old
945 if cond_new:
946 if cond_new:
946 anchor_new_id = 'id="%s"' % anchor_new
947 anchor_new_id = 'id="%s"' % anchor_new
947
948
948 if change['action'] != Action.CONTEXT:
949 if change['action'] != Action.CONTEXT:
949 anchor_link = True
950 anchor_link = True
950 else:
951 else:
951 anchor_link = False
952 anchor_link = False
952
953
953 ###########################################################
954 ###########################################################
954 # COMMENT ICONS
955 # COMMENT ICONS
955 ###########################################################
956 ###########################################################
956 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
957 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
957
958
958 if enable_comments and change['action'] != Action.CONTEXT:
959 if enable_comments and change['action'] != Action.CONTEXT:
959 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
960 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
960
961
961 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
962 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
962
963
963 ###########################################################
964 ###########################################################
964 # OLD LINE NUMBER
965 # OLD LINE NUMBER
965 ###########################################################
966 ###########################################################
966 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
967 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
967 'a_id': anchor_old_id,
968 'a_id': anchor_old_id,
968 'olc': old_lineno_class
969 'olc': old_lineno_class
969 })
970 })
970
971
971 _html.append('''%(link)s''' % {
972 _html.append('''%(link)s''' % {
972 'link': _link_to_if(anchor_link, change['old_lineno'],
973 'link': _link_to_if(anchor_link, change['old_lineno'],
973 '#%s' % anchor_old)
974 '#%s' % anchor_old)
974 })
975 })
975 _html.append('''</td>\n''')
976 _html.append('''</td>\n''')
976 ###########################################################
977 ###########################################################
977 # NEW LINE NUMBER
978 # NEW LINE NUMBER
978 ###########################################################
979 ###########################################################
979
980
980 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
981 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
981 'a_id': anchor_new_id,
982 'a_id': anchor_new_id,
982 'nlc': new_lineno_class
983 'nlc': new_lineno_class
983 })
984 })
984
985
985 _html.append('''%(link)s''' % {
986 _html.append('''%(link)s''' % {
986 'link': _link_to_if(anchor_link, change['new_lineno'],
987 'link': _link_to_if(anchor_link, change['new_lineno'],
987 '#%s' % anchor_new)
988 '#%s' % anchor_new)
988 })
989 })
989 _html.append('''</td>\n''')
990 _html.append('''</td>\n''')
990 ###########################################################
991 ###########################################################
991 # CODE
992 # CODE
992 ###########################################################
993 ###########################################################
993 code_classes = [code_class]
994 code_classes = [code_class]
994 if (not enable_comments or
995 if (not enable_comments or
995 change['action'] == Action.CONTEXT):
996 change['action'] == Action.CONTEXT):
996 code_classes.append('no-comment')
997 code_classes.append('no-comment')
997 _html.append('\t<td class="%s">' % ' '.join(code_classes))
998 _html.append('\t<td class="%s">' % ' '.join(code_classes))
998 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
999 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
999 'code': change['line']
1000 'code': change['line']
1000 })
1001 })
1001
1002
1002 _html.append('''\t</td>''')
1003 _html.append('''\t</td>''')
1003 _html.append('''\n</tr>\n''')
1004 _html.append('''\n</tr>\n''')
1004 _html.append('''</table>''')
1005 _html.append('''</table>''')
1005 if _html_empty:
1006 if _html_empty:
1006 return None
1007 return None
1007 return ''.join(_html)
1008 return ''.join(_html)
1008
1009
1009 def stat(self):
1010 def stat(self):
1010 """
1011 """
1011 Returns tuple of added, and removed lines for this instance
1012 Returns tuple of added, and removed lines for this instance
1012 """
1013 """
1013 return self.adds, self.removes
1014 return self.adds, self.removes
1014
1015
1015 def get_context_of_line(
1016 def get_context_of_line(
1016 self, path, diff_line=None, context_before=3, context_after=3):
1017 self, path, diff_line=None, context_before=3, context_after=3):
1017 """
1018 """
1018 Returns the context lines for the specified diff line.
1019 Returns the context lines for the specified diff line.
1019
1020
1020 :type diff_line: :class:`DiffLineNumber`
1021 :type diff_line: :class:`DiffLineNumber`
1021 """
1022 """
1022 assert self.parsed, "DiffProcessor is not initialized."
1023 assert self.parsed, "DiffProcessor is not initialized."
1023
1024
1024 if None not in diff_line:
1025 if None not in diff_line:
1025 raise ValueError(
1026 raise ValueError(
1026 "Cannot specify both line numbers: {}".format(diff_line))
1027 "Cannot specify both line numbers: {}".format(diff_line))
1027
1028
1028 file_diff = self._get_file_diff(path)
1029 file_diff = self._get_file_diff(path)
1029 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1030 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1030
1031
1031 first_line_to_include = max(idx - context_before, 0)
1032 first_line_to_include = max(idx - context_before, 0)
1032 first_line_after_context = idx + context_after + 1
1033 first_line_after_context = idx + context_after + 1
1033 context_lines = chunk[first_line_to_include:first_line_after_context]
1034 context_lines = chunk[first_line_to_include:first_line_after_context]
1034
1035
1035 line_contents = [
1036 line_contents = [
1036 _context_line(line) for line in context_lines
1037 _context_line(line) for line in context_lines
1037 if _is_diff_content(line)]
1038 if _is_diff_content(line)]
1038 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1039 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1039 # Once they are fixed, we can drop this line here.
1040 # Once they are fixed, we can drop this line here.
1040 if line_contents:
1041 if line_contents:
1041 line_contents[-1] = (
1042 line_contents[-1] = (
1042 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1043 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1043 return line_contents
1044 return line_contents
1044
1045
1045 def find_context(self, path, context, offset=0):
1046 def find_context(self, path, context, offset=0):
1046 """
1047 """
1047 Finds the given `context` inside of the diff.
1048 Finds the given `context` inside of the diff.
1048
1049
1049 Use the parameter `offset` to specify which offset the target line has
1050 Use the parameter `offset` to specify which offset the target line has
1050 inside of the given `context`. This way the correct diff line will be
1051 inside of the given `context`. This way the correct diff line will be
1051 returned.
1052 returned.
1052
1053
1053 :param offset: Shall be used to specify the offset of the main line
1054 :param offset: Shall be used to specify the offset of the main line
1054 within the given `context`.
1055 within the given `context`.
1055 """
1056 """
1056 if offset < 0 or offset >= len(context):
1057 if offset < 0 or offset >= len(context):
1057 raise ValueError(
1058 raise ValueError(
1058 "Only positive values up to the length of the context "
1059 "Only positive values up to the length of the context "
1059 "minus one are allowed.")
1060 "minus one are allowed.")
1060
1061
1061 matches = []
1062 matches = []
1062 file_diff = self._get_file_diff(path)
1063 file_diff = self._get_file_diff(path)
1063
1064
1064 for chunk in file_diff['chunks']:
1065 for chunk in file_diff['chunks']:
1065 context_iter = iter(context)
1066 context_iter = iter(context)
1066 for line_idx, line in enumerate(chunk):
1067 for line_idx, line in enumerate(chunk):
1067 try:
1068 try:
1068 if _context_line(line) == context_iter.next():
1069 if _context_line(line) == context_iter.next():
1069 continue
1070 continue
1070 except StopIteration:
1071 except StopIteration:
1071 matches.append((line_idx, chunk))
1072 matches.append((line_idx, chunk))
1072 context_iter = iter(context)
1073 context_iter = iter(context)
1073
1074
1074 # Increment position and triger StopIteration
1075 # Increment position and triger StopIteration
1075 # if we had a match at the end
1076 # if we had a match at the end
1076 line_idx += 1
1077 line_idx += 1
1077 try:
1078 try:
1078 context_iter.next()
1079 context_iter.next()
1079 except StopIteration:
1080 except StopIteration:
1080 matches.append((line_idx, chunk))
1081 matches.append((line_idx, chunk))
1081
1082
1082 effective_offset = len(context) - offset
1083 effective_offset = len(context) - offset
1083 found_at_diff_lines = [
1084 found_at_diff_lines = [
1084 _line_to_diff_line_number(chunk[idx - effective_offset])
1085 _line_to_diff_line_number(chunk[idx - effective_offset])
1085 for idx, chunk in matches]
1086 for idx, chunk in matches]
1086
1087
1087 return found_at_diff_lines
1088 return found_at_diff_lines
1088
1089
1089 def _get_file_diff(self, path):
1090 def _get_file_diff(self, path):
1090 for file_diff in self.parsed_diff:
1091 for file_diff in self.parsed_diff:
1091 if file_diff['filename'] == path:
1092 if file_diff['filename'] == path:
1092 break
1093 break
1093 else:
1094 else:
1094 raise FileNotInDiffException("File {} not in diff".format(path))
1095 raise FileNotInDiffException("File {} not in diff".format(path))
1095 return file_diff
1096 return file_diff
1096
1097
1097 def _find_chunk_line_index(self, file_diff, diff_line):
1098 def _find_chunk_line_index(self, file_diff, diff_line):
1098 for chunk in file_diff['chunks']:
1099 for chunk in file_diff['chunks']:
1099 for idx, line in enumerate(chunk):
1100 for idx, line in enumerate(chunk):
1100 if line['old_lineno'] == diff_line.old:
1101 if line['old_lineno'] == diff_line.old:
1101 return chunk, idx
1102 return chunk, idx
1102 if line['new_lineno'] == diff_line.new:
1103 if line['new_lineno'] == diff_line.new:
1103 return chunk, idx
1104 return chunk, idx
1104 raise LineNotInDiffException(
1105 raise LineNotInDiffException(
1105 "The line {} is not part of the diff.".format(diff_line))
1106 "The line {} is not part of the diff.".format(diff_line))
1106
1107
1107
1108
1108 def _is_diff_content(line):
1109 def _is_diff_content(line):
1109 return line['action'] in (
1110 return line['action'] in (
1110 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1111 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1111
1112
1112
1113
1113 def _context_line(line):
1114 def _context_line(line):
1114 return (line['action'], line['line'])
1115 return (line['action'], line['line'])
1115
1116
1116
1117
1117 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1118 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1118
1119
1119
1120
1120 def _line_to_diff_line_number(line):
1121 def _line_to_diff_line_number(line):
1121 new_line_no = line['new_lineno'] or None
1122 new_line_no = line['new_lineno'] or None
1122 old_line_no = line['old_lineno'] or None
1123 old_line_no = line['old_lineno'] or None
1123 return DiffLineNumber(old=old_line_no, new=new_line_no)
1124 return DiffLineNumber(old=old_line_no, new=new_line_no)
1124
1125
1125
1126
1126 class FileNotInDiffException(Exception):
1127 class FileNotInDiffException(Exception):
1127 """
1128 """
1128 Raised when the context for a missing file is requested.
1129 Raised when the context for a missing file is requested.
1129
1130
1130 If you request the context for a line in a file which is not part of the
1131 If you request the context for a line in a file which is not part of the
1131 given diff, then this exception is raised.
1132 given diff, then this exception is raised.
1132 """
1133 """
1133
1134
1134
1135
1135 class LineNotInDiffException(Exception):
1136 class LineNotInDiffException(Exception):
1136 """
1137 """
1137 Raised when the context for a missing line is requested.
1138 Raised when the context for a missing line is requested.
1138
1139
1139 If you request the context for a line in a file and this line is not
1140 If you request the context for a line in a file and this line is not
1140 part of the given diff, then this exception is raised.
1141 part of the given diff, then this exception is raised.
1141 """
1142 """
1142
1143
1143
1144
1144 class DiffLimitExceeded(Exception):
1145 class DiffLimitExceeded(Exception):
1145 pass
1146 pass
1146
1147
1147
1148
1148 # NOTE(marcink): if diffs.mako change, probably this
1149 # NOTE(marcink): if diffs.mako change, probably this
1149 # needs a bump to next version
1150 # needs a bump to next version
1150 CURRENT_DIFF_VERSION = 'v4'
1151 CURRENT_DIFF_VERSION = 'v4'
1151
1152
1152
1153
1153 def _cleanup_cache_file(cached_diff_file):
1154 def _cleanup_cache_file(cached_diff_file):
1154 # cleanup file to not store it "damaged"
1155 # cleanup file to not store it "damaged"
1155 try:
1156 try:
1156 os.remove(cached_diff_file)
1157 os.remove(cached_diff_file)
1157 except Exception:
1158 except Exception:
1158 log.exception('Failed to cleanup path %s', cached_diff_file)
1159 log.exception('Failed to cleanup path %s', cached_diff_file)
1159
1160
1160
1161
1161 def _get_compression_mode(cached_diff_file):
1162 def _get_compression_mode(cached_diff_file):
1162 mode = 'bz2'
1163 mode = 'bz2'
1163 if 'mode:plain' in cached_diff_file:
1164 if 'mode:plain' in cached_diff_file:
1164 mode = 'plain'
1165 mode = 'plain'
1165 elif 'mode:gzip' in cached_diff_file:
1166 elif 'mode:gzip' in cached_diff_file:
1166 mode = 'gzip'
1167 mode = 'gzip'
1167 return mode
1168 return mode
1168
1169
1169
1170
1170 def cache_diff(cached_diff_file, diff, commits):
1171 def cache_diff(cached_diff_file, diff, commits):
1171 compression_mode = _get_compression_mode(cached_diff_file)
1172 compression_mode = _get_compression_mode(cached_diff_file)
1172
1173
1173 struct = {
1174 struct = {
1174 'version': CURRENT_DIFF_VERSION,
1175 'version': CURRENT_DIFF_VERSION,
1175 'diff': diff,
1176 'diff': diff,
1176 'commits': commits
1177 'commits': commits
1177 }
1178 }
1178
1179
1179 start = time.time()
1180 start = time.time()
1180 try:
1181 try:
1181 if compression_mode == 'plain':
1182 if compression_mode == 'plain':
1182 with open(cached_diff_file, 'wb') as f:
1183 with open(cached_diff_file, 'wb') as f:
1183 pickle.dump(struct, f)
1184 pickle.dump(struct, f)
1184 elif compression_mode == 'gzip':
1185 elif compression_mode == 'gzip':
1185 with gzip.GzipFile(cached_diff_file, 'wb') as f:
1186 with gzip.GzipFile(cached_diff_file, 'wb') as f:
1186 pickle.dump(struct, f)
1187 pickle.dump(struct, f)
1187 else:
1188 else:
1188 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1189 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1189 pickle.dump(struct, f)
1190 pickle.dump(struct, f)
1190 except Exception:
1191 except Exception:
1191 log.warn('Failed to save cache', exc_info=True)
1192 log.warn('Failed to save cache', exc_info=True)
1192 _cleanup_cache_file(cached_diff_file)
1193 _cleanup_cache_file(cached_diff_file)
1193
1194
1194 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
1195 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
1195
1196
1196
1197
1197 def load_cached_diff(cached_diff_file):
1198 def load_cached_diff(cached_diff_file):
1198 compression_mode = _get_compression_mode(cached_diff_file)
1199 compression_mode = _get_compression_mode(cached_diff_file)
1199
1200
1200 default_struct = {
1201 default_struct = {
1201 'version': CURRENT_DIFF_VERSION,
1202 'version': CURRENT_DIFF_VERSION,
1202 'diff': None,
1203 'diff': None,
1203 'commits': None
1204 'commits': None
1204 }
1205 }
1205
1206
1206 has_cache = os.path.isfile(cached_diff_file)
1207 has_cache = os.path.isfile(cached_diff_file)
1207 if not has_cache:
1208 if not has_cache:
1208 log.debug('Reading diff cache file failed %s', cached_diff_file)
1209 log.debug('Reading diff cache file failed %s', cached_diff_file)
1209 return default_struct
1210 return default_struct
1210
1211
1211 data = None
1212 data = None
1212
1213
1213 start = time.time()
1214 start = time.time()
1214 try:
1215 try:
1215 if compression_mode == 'plain':
1216 if compression_mode == 'plain':
1216 with open(cached_diff_file, 'rb') as f:
1217 with open(cached_diff_file, 'rb') as f:
1217 data = pickle.load(f)
1218 data = pickle.load(f)
1218 elif compression_mode == 'gzip':
1219 elif compression_mode == 'gzip':
1219 with gzip.GzipFile(cached_diff_file, 'rb') as f:
1220 with gzip.GzipFile(cached_diff_file, 'rb') as f:
1220 data = pickle.load(f)
1221 data = pickle.load(f)
1221 else:
1222 else:
1222 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1223 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1223 data = pickle.load(f)
1224 data = pickle.load(f)
1224 except Exception:
1225 except Exception:
1225 log.warn('Failed to read diff cache file', exc_info=True)
1226 log.warn('Failed to read diff cache file', exc_info=True)
1226
1227
1227 if not data:
1228 if not data:
1228 data = default_struct
1229 data = default_struct
1229
1230
1230 if not isinstance(data, dict):
1231 if not isinstance(data, dict):
1231 # old version of data ?
1232 # old version of data ?
1232 data = default_struct
1233 data = default_struct
1233
1234
1234 # check version
1235 # check version
1235 if data.get('version') != CURRENT_DIFF_VERSION:
1236 if data.get('version') != CURRENT_DIFF_VERSION:
1236 # purge cache
1237 # purge cache
1237 _cleanup_cache_file(cached_diff_file)
1238 _cleanup_cache_file(cached_diff_file)
1238 return default_struct
1239 return default_struct
1239
1240
1240 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
1241 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
1241
1242
1242 return data
1243 return data
1243
1244
1244
1245
1245 def generate_diff_cache_key(*args):
1246 def generate_diff_cache_key(*args):
1246 """
1247 """
1247 Helper to generate a cache key using arguments
1248 Helper to generate a cache key using arguments
1248 """
1249 """
1249 def arg_mapper(input_param):
1250 def arg_mapper(input_param):
1250 input_param = safe_str(input_param)
1251 input_param = safe_str(input_param)
1251 # we cannot allow '/' in arguments since it would allow
1252 # we cannot allow '/' in arguments since it would allow
1252 # subdirectory usage
1253 # subdirectory usage
1253 input_param.replace('/', '_')
1254 input_param.replace('/', '_')
1254 return input_param or None # prevent empty string arguments
1255 return input_param or None # prevent empty string arguments
1255
1256
1256 return '_'.join([
1257 return '_'.join([
1257 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1258 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1258
1259
1259
1260
1260 def diff_cache_exist(cache_storage, *args):
1261 def diff_cache_exist(cache_storage, *args):
1261 """
1262 """
1262 Based on all generated arguments check and return a cache path
1263 Based on all generated arguments check and return a cache path
1263 """
1264 """
1264 args = list(args) + ['mode:gzip']
1265 args = list(args) + ['mode:gzip']
1265 cache_key = generate_diff_cache_key(*args)
1266 cache_key = generate_diff_cache_key(*args)
1266 cache_file_path = os.path.join(cache_storage, cache_key)
1267 cache_file_path = os.path.join(cache_storage, cache_key)
1267 # prevent path traversal attacks using some param that have e.g '../../'
1268 # prevent path traversal attacks using some param that have e.g '../../'
1268 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1269 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1269 raise ValueError('Final path must be within {}'.format(cache_storage))
1270 raise ValueError('Final path must be within {}'.format(cache_storage))
1270
1271
1271 return cache_file_path
1272 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now