##// END OF EJS Templates
py3: 2to3 fixes
super-admin -
r4931:46ff414c default
parent child Browse files
Show More
@@ -1,207 +1,207 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import os
22 22 import textwrap
23 23 import string
24 24 import functools
25 25 import logging
26 26 import tempfile
27 27 import logging.config
28 28 log = logging.getLogger(__name__)
29 29
30 30 # skip keys, that are set here, so we don't double process those
31 31 set_keys = {
32 32 '__file__': ''
33 33 }
34 34
35 35
36 36 def str2bool(_str):
37 37 """
38 38 returns True/False value from given string, it tries to translate the
39 39 string into boolean
40 40
41 41 :param _str: string value to translate into boolean
42 42 :rtype: boolean
43 43 :returns: boolean from given string
44 44 """
45 45 if _str is None:
46 46 return False
47 47 if _str in (True, False):
48 48 return _str
49 49 _str = str(_str).strip().lower()
50 50 return _str in ('t', 'true', 'y', 'yes', 'on', '1')
51 51
52 52
53 53 def aslist(obj, sep=None, strip=True):
54 54 """
55 55 Returns given string separated by sep as list
56 56
57 57 :param obj:
58 58 :param sep:
59 59 :param strip:
60 60 """
61 if isinstance(obj, (str,)):
61 if isinstance(obj, str):
62 62 if obj in ['', ""]:
63 63 return []
64 64
65 65 lst = obj.split(sep)
66 66 if strip:
67 67 lst = [v.strip() for v in lst]
68 68 return lst
69 69 elif isinstance(obj, (list, tuple)):
70 70 return obj
71 71 elif obj is None:
72 72 return []
73 73 else:
74 74 return [obj]
75 75
76 76
77 77 class SettingsMaker(object):
78 78
79 79 def __init__(self, app_settings):
80 80 self.settings = app_settings
81 81
82 82 @classmethod
83 83 def _bool_func(cls, input_val):
84 if isinstance(input_val, unicode):
84 if isinstance(input_val, str):
85 85 input_val = input_val.encode('utf8')
86 86 return str2bool(input_val)
87 87
88 88 @classmethod
89 89 def _int_func(cls, input_val):
90 90 return int(input_val)
91 91
92 92 @classmethod
93 93 def _list_func(cls, input_val, sep=','):
94 94 return aslist(input_val, sep=sep)
95 95
96 96 @classmethod
97 97 def _string_func(cls, input_val, lower=True):
98 98 if lower:
99 99 input_val = input_val.lower()
100 100 return input_val
101 101
102 102 @classmethod
103 103 def _float_func(cls, input_val):
104 104 return float(input_val)
105 105
106 106 @classmethod
107 107 def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
108 108
109 109 # ensure we have our dir created
110 110 if not os.path.isdir(input_val) and ensure_dir:
111 111 os.makedirs(input_val, mode=mode)
112 112
113 113 if not os.path.isdir(input_val):
114 114 raise Exception('Dir at {} does not exist'.format(input_val))
115 115 return input_val
116 116
117 117 @classmethod
118 118 def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
119 119 dirname = os.path.dirname(input_val)
120 120 cls._dir_func(dirname, ensure_dir=ensure_dir)
121 121 return input_val
122 122
123 123 @classmethod
124 124 def _key_transformator(cls, key):
125 125 return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
126 126
127 127 def maybe_env_key(self, key):
128 128 # now maybe we have this KEY in env, search and use the value with higher priority.
129 129 transformed_key = self._key_transformator(key)
130 130 envvar_value = os.environ.get(transformed_key)
131 131 if envvar_value:
132 132 log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
133 133
134 134 return envvar_value
135 135
136 136 def env_expand(self):
137 137 replaced = {}
138 138 for k, v in self.settings.items():
139 139 if k not in set_keys:
140 140 envvar_value = self.maybe_env_key(k)
141 141 if envvar_value:
142 142 replaced[k] = envvar_value
143 143 set_keys[k] = envvar_value
144 144
145 145 # replace ALL keys updated
146 146 self.settings.update(replaced)
147 147
148 148 def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
149 149 """
150 150 Helper to enable debug on running instance
151 151 :return:
152 152 """
153 153
154 154 if not str2bool(self.settings.get('logging.autoconfigure')):
155 155 log.info('logging configuration based on main .ini file')
156 156 return
157 157
158 158 if logging_conf is None:
159 159 logging_conf = self.settings.get('logging.logging_conf_file') or ''
160 160
161 161 if not os.path.isfile(logging_conf):
162 162 log.error('Unable to setup logging based on %s, '
163 163 'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
164 164 return
165 165
166 166 with open(logging_conf, 'rb') as f:
167 167 ini_template = textwrap.dedent(f.read())
168 168 ini_template = string.Template(ini_template).safe_substitute(
169 169 RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
170 170 RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
171 171 )
172 172
173 173 with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
174 174 log.info('Saved Temporary LOGGING config at %s', f.name)
175 175 f.write(ini_template)
176 176
177 177 logging.config.fileConfig(f.name)
178 178 os.remove(f.name)
179 179
180 180 def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
181 181 input_val = self.settings.get(key, default)
182 182
183 183 if default_when_empty and not input_val:
184 184 # use default value when value is set in the config but it is empty
185 185 input_val = default
186 186
187 187 parser_func = {
188 188 'bool': self._bool_func,
189 189 'int': self._int_func,
190 190 'list': self._list_func,
191 191 'list:newline': functools.partial(self._list_func, sep='/n'),
192 192 'list:spacesep': functools.partial(self._list_func, sep=' '),
193 193 'string': functools.partial(self._string_func, lower=lower),
194 194 'dir': self._dir_func,
195 195 'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
196 196 'file': self._file_path_func,
197 197 'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
198 198 None: lambda i: i
199 199 }[parser]
200 200
201 201 envvar_value = self.maybe_env_key(key)
202 202 if envvar_value:
203 203 input_val = envvar_value
204 204 set_keys[key] = input_val
205 205
206 206 self.settings[key] = parser_func(input_val)
207 207 return self.settings[key]
@@ -1,1272 +1,1272 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import os
27 27 import re
28 28 import bz2
29 29 import gzip
30 30 import time
31 31
32 32 import collections
33 33 import difflib
34 34 import logging
35 35 import pickle
36 36 from itertools import tee
37 37
38 38 from rhodecode.lib.vcs.exceptions import VCSError
39 39 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
40 40 from rhodecode.lib.utils2 import safe_unicode, safe_str
41 41
42 42 log = logging.getLogger(__name__)
43 43
44 44 # define max context, a file with more than this numbers of lines is unusable
45 45 # in browser anyway
46 46 MAX_CONTEXT = 20 * 1024
47 47 DEFAULT_CONTEXT = 3
48 48
49 49
50 50 def get_diff_context(request):
51 51 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
52 52
53 53
54 54 def get_diff_whitespace_flag(request):
55 55 return request.GET.get('ignorews', '') == '1'
56 56
57 57
58 58 class OPS(object):
59 59 ADD = 'A'
60 60 MOD = 'M'
61 61 DEL = 'D'
62 62
63 63
64 64 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
65 65 """
66 66 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
67 67
68 68 :param ignore_whitespace: ignore whitespaces in diff
69 69 """
70 70 # make sure we pass in default context
71 71 context = context or 3
72 72 # protect against IntOverflow when passing HUGE context
73 73 if context > MAX_CONTEXT:
74 74 context = MAX_CONTEXT
75 75
76 76 submodules = filter(lambda o: isinstance(o, SubModuleNode),
77 77 [filenode_new, filenode_old])
78 78 if submodules:
79 79 return ''
80 80
81 81 for filenode in (filenode_old, filenode_new):
82 82 if not isinstance(filenode, FileNode):
83 83 raise VCSError(
84 84 "Given object should be FileNode object, not %s"
85 85 % filenode.__class__)
86 86
87 87 repo = filenode_new.commit.repository
88 88 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
89 89 new_commit = filenode_new.commit
90 90
91 91 vcs_gitdiff = repo.get_diff(
92 92 old_commit, new_commit, filenode_new.path,
93 93 ignore_whitespace, context, path1=filenode_old.path)
94 94 return vcs_gitdiff
95 95
96 96 NEW_FILENODE = 1
97 97 DEL_FILENODE = 2
98 98 MOD_FILENODE = 3
99 99 RENAMED_FILENODE = 4
100 100 COPIED_FILENODE = 5
101 101 CHMOD_FILENODE = 6
102 102 BIN_FILENODE = 7
103 103
104 104
105 105 class LimitedDiffContainer(object):
106 106
107 107 def __init__(self, diff_limit, cur_diff_size, diff):
108 108 self.diff = diff
109 109 self.diff_limit = diff_limit
110 110 self.cur_diff_size = cur_diff_size
111 111
112 112 def __getitem__(self, key):
113 113 return self.diff.__getitem__(key)
114 114
115 115 def __iter__(self):
116 116 for l in self.diff:
117 117 yield l
118 118
119 119
120 120 class Action(object):
121 121 """
122 122 Contains constants for the action value of the lines in a parsed diff.
123 123 """
124 124
125 125 ADD = 'add'
126 126 DELETE = 'del'
127 127 UNMODIFIED = 'unmod'
128 128
129 129 CONTEXT = 'context'
130 130 OLD_NO_NL = 'old-no-nl'
131 131 NEW_NO_NL = 'new-no-nl'
132 132
133 133
134 134 class DiffProcessor(object):
135 135 """
136 136 Give it a unified or git diff and it returns a list of the files that were
137 137 mentioned in the diff together with a dict of meta information that
138 138 can be used to render it in a HTML template.
139 139
140 140 .. note:: Unicode handling
141 141
142 142 The original diffs are a byte sequence and can contain filenames
143 143 in mixed encodings. This class generally returns `unicode` objects
144 144 since the result is intended for presentation to the user.
145 145
146 146 """
147 147 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
148 148 _newline_marker = re.compile(r'^\\ No newline at end of file')
149 149
150 150 # used for inline highlighter word split
151 151 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
152 152
153 153 # collapse ranges of commits over given number
154 154 _collapse_commits_over = 5
155 155
156 156 def __init__(self, diff, format='gitdiff', diff_limit=None,
157 157 file_limit=None, show_full_diff=True):
158 158 """
159 159 :param diff: A `Diff` object representing a diff from a vcs backend
160 160 :param format: format of diff passed, `udiff` or `gitdiff`
161 161 :param diff_limit: define the size of diff that is considered "big"
162 162 based on that parameter cut off will be triggered, set to None
163 163 to show full diff
164 164 """
165 165 self._diff = diff
166 166 self._format = format
167 167 self.adds = 0
168 168 self.removes = 0
169 169 # calculate diff size
170 170 self.diff_limit = diff_limit
171 171 self.file_limit = file_limit
172 172 self.show_full_diff = show_full_diff
173 173 self.cur_diff_size = 0
174 174 self.parsed = False
175 175 self.parsed_diff = []
176 176
177 177 log.debug('Initialized DiffProcessor with %s mode', format)
178 178 if format == 'gitdiff':
179 179 self.differ = self._highlight_line_difflib
180 180 self._parser = self._parse_gitdiff
181 181 else:
182 182 self.differ = self._highlight_line_udiff
183 183 self._parser = self._new_parse_gitdiff
184 184
185 185 def _copy_iterator(self):
186 186 """
187 187 make a fresh copy of generator, we should not iterate thru
188 188 an original as it's needed for repeating operations on
189 189 this instance of DiffProcessor
190 190 """
191 191 self.__udiff, iterator_copy = tee(self.__udiff)
192 192 return iterator_copy
193 193
194 194 def _escaper(self, string):
195 195 """
196 196 Escaper for diff escapes special chars and checks the diff limit
197 197
198 198 :param string:
199 199 """
200 200 self.cur_diff_size += len(string)
201 201
202 202 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
203 203 raise DiffLimitExceeded('Diff Limit Exceeded')
204 204
205 205 return string \
206 206 .replace('&', '&amp;')\
207 207 .replace('<', '&lt;')\
208 208 .replace('>', '&gt;')
209 209
210 210 def _line_counter(self, l):
211 211 """
212 212 Checks each line and bumps total adds/removes for this diff
213 213
214 214 :param l:
215 215 """
216 216 if l.startswith('+') and not l.startswith('+++'):
217 217 self.adds += 1
218 218 elif l.startswith('-') and not l.startswith('---'):
219 219 self.removes += 1
220 220 return safe_unicode(l)
221 221
222 222 def _highlight_line_difflib(self, line, next_):
223 223 """
224 224 Highlight inline changes in both lines.
225 225 """
226 226
227 227 if line['action'] == Action.DELETE:
228 228 old, new = line, next_
229 229 else:
230 230 old, new = next_, line
231 231
232 232 oldwords = self._token_re.split(old['line'])
233 233 newwords = self._token_re.split(new['line'])
234 234 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
235 235
236 236 oldfragments, newfragments = [], []
237 237 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
238 238 oldfrag = ''.join(oldwords[i1:i2])
239 239 newfrag = ''.join(newwords[j1:j2])
240 240 if tag != 'equal':
241 241 if oldfrag:
242 242 oldfrag = '<del>%s</del>' % oldfrag
243 243 if newfrag:
244 244 newfrag = '<ins>%s</ins>' % newfrag
245 245 oldfragments.append(oldfrag)
246 246 newfragments.append(newfrag)
247 247
248 248 old['line'] = "".join(oldfragments)
249 249 new['line'] = "".join(newfragments)
250 250
251 251 def _highlight_line_udiff(self, line, next_):
252 252 """
253 253 Highlight inline changes in both lines.
254 254 """
255 255 start = 0
256 256 limit = min(len(line['line']), len(next_['line']))
257 257 while start < limit and line['line'][start] == next_['line'][start]:
258 258 start += 1
259 259 end = -1
260 260 limit -= start
261 261 while -end <= limit and line['line'][end] == next_['line'][end]:
262 262 end -= 1
263 263 end += 1
264 264 if start or end:
265 265 def do(l):
266 266 last = end + len(l['line'])
267 267 if l['action'] == Action.ADD:
268 268 tag = 'ins'
269 269 else:
270 270 tag = 'del'
271 271 l['line'] = '%s<%s>%s</%s>%s' % (
272 272 l['line'][:start],
273 273 tag,
274 274 l['line'][start:last],
275 275 tag,
276 276 l['line'][last:]
277 277 )
278 278 do(line)
279 279 do(next_)
280 280
281 281 def _clean_line(self, line, command):
282 282 if command in ['+', '-', ' ']:
283 283 # only modify the line if it's actually a diff thing
284 284 line = line[1:]
285 285 return line
286 286
287 287 def _parse_gitdiff(self, inline_diff=True):
288 288 _files = []
289 289 diff_container = lambda arg: arg
290 290
291 291 for chunk in self._diff.chunks():
292 292 head = chunk.header
293 293
294 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
294 diff = map(self._escaper, self.diff_splitter(chunk.diff))
295 295 raw_diff = chunk.raw
296 296 limited_diff = False
297 297 exceeds_limit = False
298 298
299 299 op = None
300 300 stats = {
301 301 'added': 0,
302 302 'deleted': 0,
303 303 'binary': False,
304 304 'ops': {},
305 305 }
306 306
307 307 if head['deleted_file_mode']:
308 308 op = OPS.DEL
309 309 stats['binary'] = True
310 310 stats['ops'][DEL_FILENODE] = 'deleted file'
311 311
312 312 elif head['new_file_mode']:
313 313 op = OPS.ADD
314 314 stats['binary'] = True
315 315 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
316 316 else: # modify operation, can be copy, rename or chmod
317 317
318 318 # CHMOD
319 319 if head['new_mode'] and head['old_mode']:
320 320 op = OPS.MOD
321 321 stats['binary'] = True
322 322 stats['ops'][CHMOD_FILENODE] = (
323 323 'modified file chmod %s => %s' % (
324 324 head['old_mode'], head['new_mode']))
325 325 # RENAME
326 326 if head['rename_from'] != head['rename_to']:
327 327 op = OPS.MOD
328 328 stats['binary'] = True
329 329 stats['ops'][RENAMED_FILENODE] = (
330 330 'file renamed from %s to %s' % (
331 331 head['rename_from'], head['rename_to']))
332 332 # COPY
333 333 if head.get('copy_from') and head.get('copy_to'):
334 334 op = OPS.MOD
335 335 stats['binary'] = True
336 336 stats['ops'][COPIED_FILENODE] = (
337 337 'file copied from %s to %s' % (
338 338 head['copy_from'], head['copy_to']))
339 339
340 340 # If our new parsed headers didn't match anything fallback to
341 341 # old style detection
342 342 if op is None:
343 343 if not head['a_file'] and head['b_file']:
344 344 op = OPS.ADD
345 345 stats['binary'] = True
346 346 stats['ops'][NEW_FILENODE] = 'new file'
347 347
348 348 elif head['a_file'] and not head['b_file']:
349 349 op = OPS.DEL
350 350 stats['binary'] = True
351 351 stats['ops'][DEL_FILENODE] = 'deleted file'
352 352
353 353 # it's not ADD not DELETE
354 354 if op is None:
355 355 op = OPS.MOD
356 356 stats['binary'] = True
357 357 stats['ops'][MOD_FILENODE] = 'modified file'
358 358
359 359 # a real non-binary diff
360 360 if head['a_file'] or head['b_file']:
361 361 try:
362 362 raw_diff, chunks, _stats = self._parse_lines(diff)
363 363 stats['binary'] = False
364 364 stats['added'] = _stats[0]
365 365 stats['deleted'] = _stats[1]
366 366 # explicit mark that it's a modified file
367 367 if op == OPS.MOD:
368 368 stats['ops'][MOD_FILENODE] = 'modified file'
369 369 exceeds_limit = len(raw_diff) > self.file_limit
370 370
371 371 # changed from _escaper function so we validate size of
372 372 # each file instead of the whole diff
373 373 # diff will hide big files but still show small ones
374 374 # from my tests, big files are fairly safe to be parsed
375 375 # but the browser is the bottleneck
376 376 if not self.show_full_diff and exceeds_limit:
377 377 raise DiffLimitExceeded('File Limit Exceeded')
378 378
379 379 except DiffLimitExceeded:
380 380 diff_container = lambda _diff: \
381 381 LimitedDiffContainer(
382 382 self.diff_limit, self.cur_diff_size, _diff)
383 383
384 384 exceeds_limit = len(raw_diff) > self.file_limit
385 385 limited_diff = True
386 386 chunks = []
387 387
388 388 else: # GIT format binary patch, or possibly empty diff
389 389 if head['bin_patch']:
390 390 # we have operation already extracted, but we mark simply
391 391 # it's a diff we wont show for binary files
392 392 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
393 393 chunks = []
394 394
395 395 if chunks and not self.show_full_diff and op == OPS.DEL:
396 396 # if not full diff mode show deleted file contents
397 397 # TODO: anderson: if the view is not too big, there is no way
398 398 # to see the content of the file
399 399 chunks = []
400 400
401 401 chunks.insert(0, [{
402 402 'old_lineno': '',
403 403 'new_lineno': '',
404 404 'action': Action.CONTEXT,
405 405 'line': msg,
406 } for _op, msg in stats['ops'].iteritems()
406 } for _op, msg in stats['ops'].items()
407 407 if _op not in [MOD_FILENODE]])
408 408
409 409 _files.append({
410 410 'filename': safe_unicode(head['b_path']),
411 411 'old_revision': head['a_blob_id'],
412 412 'new_revision': head['b_blob_id'],
413 413 'chunks': chunks,
414 414 'raw_diff': safe_unicode(raw_diff),
415 415 'operation': op,
416 416 'stats': stats,
417 417 'exceeds_limit': exceeds_limit,
418 418 'is_limited_diff': limited_diff,
419 419 })
420 420
421 421 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
422 422 OPS.DEL: 2}.get(info['operation'])
423 423
424 424 if not inline_diff:
425 425 return diff_container(sorted(_files, key=sorter))
426 426
427 427 # highlight inline changes
428 428 for diff_data in _files:
429 429 for chunk in diff_data['chunks']:
430 430 lineiter = iter(chunk)
431 431 try:
432 432 while 1:
433 433 line = next(lineiter)
434 434 if line['action'] not in (
435 435 Action.UNMODIFIED, Action.CONTEXT):
436 436 nextline = next(lineiter)
437 437 if nextline['action'] in ['unmod', 'context'] or \
438 438 nextline['action'] == line['action']:
439 439 continue
440 440 self.differ(line, nextline)
441 441 except StopIteration:
442 442 pass
443 443
444 444 return diff_container(sorted(_files, key=sorter))
445 445
446 446 def _check_large_diff(self):
447 447 if self.diff_limit:
448 448 log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)
449 449 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
450 450 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
451 451
452 452 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
453 453 def _new_parse_gitdiff(self, inline_diff=True):
454 454 _files = []
455 455
456 456 # this can be overriden later to a LimitedDiffContainer type
457 457 diff_container = lambda arg: arg
458 458
459 459 for chunk in self._diff.chunks():
460 460 head = chunk.header
461 461 log.debug('parsing diff %r', head)
462 462
463 463 raw_diff = chunk.raw
464 464 limited_diff = False
465 465 exceeds_limit = False
466 466
467 467 op = None
468 468 stats = {
469 469 'added': 0,
470 470 'deleted': 0,
471 471 'binary': False,
472 472 'old_mode': None,
473 473 'new_mode': None,
474 474 'ops': {},
475 475 }
476 476 if head['old_mode']:
477 477 stats['old_mode'] = head['old_mode']
478 478 if head['new_mode']:
479 479 stats['new_mode'] = head['new_mode']
480 480 if head['b_mode']:
481 481 stats['new_mode'] = head['b_mode']
482 482
483 483 # delete file
484 484 if head['deleted_file_mode']:
485 485 op = OPS.DEL
486 486 stats['binary'] = True
487 487 stats['ops'][DEL_FILENODE] = 'deleted file'
488 488
489 489 # new file
490 490 elif head['new_file_mode']:
491 491 op = OPS.ADD
492 492 stats['binary'] = True
493 493 stats['old_mode'] = None
494 494 stats['new_mode'] = head['new_file_mode']
495 495 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
496 496
497 497 # modify operation, can be copy, rename or chmod
498 498 else:
499 499 # CHMOD
500 500 if head['new_mode'] and head['old_mode']:
501 501 op = OPS.MOD
502 502 stats['binary'] = True
503 503 stats['ops'][CHMOD_FILENODE] = (
504 504 'modified file chmod %s => %s' % (
505 505 head['old_mode'], head['new_mode']))
506 506
507 507 # RENAME
508 508 if head['rename_from'] != head['rename_to']:
509 509 op = OPS.MOD
510 510 stats['binary'] = True
511 511 stats['renamed'] = (head['rename_from'], head['rename_to'])
512 512 stats['ops'][RENAMED_FILENODE] = (
513 513 'file renamed from %s to %s' % (
514 514 head['rename_from'], head['rename_to']))
515 515 # COPY
516 516 if head.get('copy_from') and head.get('copy_to'):
517 517 op = OPS.MOD
518 518 stats['binary'] = True
519 519 stats['copied'] = (head['copy_from'], head['copy_to'])
520 520 stats['ops'][COPIED_FILENODE] = (
521 521 'file copied from %s to %s' % (
522 522 head['copy_from'], head['copy_to']))
523 523
524 524 # If our new parsed headers didn't match anything fallback to
525 525 # old style detection
526 526 if op is None:
527 527 if not head['a_file'] and head['b_file']:
528 528 op = OPS.ADD
529 529 stats['binary'] = True
530 530 stats['new_file'] = True
531 531 stats['ops'][NEW_FILENODE] = 'new file'
532 532
533 533 elif head['a_file'] and not head['b_file']:
534 534 op = OPS.DEL
535 535 stats['binary'] = True
536 536 stats['ops'][DEL_FILENODE] = 'deleted file'
537 537
538 538 # it's not ADD not DELETE
539 539 if op is None:
540 540 op = OPS.MOD
541 541 stats['binary'] = True
542 542 stats['ops'][MOD_FILENODE] = 'modified file'
543 543
544 544 # a real non-binary diff
545 545 if head['a_file'] or head['b_file']:
546 546 # simulate splitlines, so we keep the line end part
547 547 diff = self.diff_splitter(chunk.diff)
548 548
549 549 # append each file to the diff size
550 550 raw_chunk_size = len(raw_diff)
551 551
552 552 exceeds_limit = raw_chunk_size > self.file_limit
553 553 self.cur_diff_size += raw_chunk_size
554 554
555 555 try:
556 556 # Check each file instead of the whole diff.
557 557 # Diff will hide big files but still show small ones.
558 558 # From the tests big files are fairly safe to be parsed
559 559 # but the browser is the bottleneck.
560 560 if not self.show_full_diff and exceeds_limit:
561 561 log.debug('File `%s` exceeds current file_limit of %s',
562 562 safe_unicode(head['b_path']), self.file_limit)
563 563 raise DiffLimitExceeded(
564 564 'File Limit %s Exceeded', self.file_limit)
565 565
566 566 self._check_large_diff()
567 567
568 568 raw_diff, chunks, _stats = self._new_parse_lines(diff)
569 569 stats['binary'] = False
570 570 stats['added'] = _stats[0]
571 571 stats['deleted'] = _stats[1]
572 572 # explicit mark that it's a modified file
573 573 if op == OPS.MOD:
574 574 stats['ops'][MOD_FILENODE] = 'modified file'
575 575
576 576 except DiffLimitExceeded:
577 577 diff_container = lambda _diff: \
578 578 LimitedDiffContainer(
579 579 self.diff_limit, self.cur_diff_size, _diff)
580 580
581 581 limited_diff = True
582 582 chunks = []
583 583
584 584 else: # GIT format binary patch, or possibly empty diff
585 585 if head['bin_patch']:
586 586 # we have operation already extracted, but we mark simply
587 587 # it's a diff we wont show for binary files
588 588 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
589 589 chunks = []
590 590
591 591 # Hide content of deleted node by setting empty chunks
592 592 if chunks and not self.show_full_diff and op == OPS.DEL:
593 593 # if not full diff mode show deleted file contents
594 594 # TODO: anderson: if the view is not too big, there is no way
595 595 # to see the content of the file
596 596 chunks = []
597 597
598 598 chunks.insert(
599 599 0, [{'old_lineno': '',
600 600 'new_lineno': '',
601 601 'action': Action.CONTEXT,
602 602 'line': msg,
603 } for _op, msg in stats['ops'].iteritems()
603 } for _op, msg in stats['ops'].items()
604 604 if _op not in [MOD_FILENODE]])
605 605
606 606 original_filename = safe_unicode(head['a_path'])
607 607 _files.append({
608 608 'original_filename': original_filename,
609 609 'filename': safe_unicode(head['b_path']),
610 610 'old_revision': head['a_blob_id'],
611 611 'new_revision': head['b_blob_id'],
612 612 'chunks': chunks,
613 613 'raw_diff': safe_unicode(raw_diff),
614 614 'operation': op,
615 615 'stats': stats,
616 616 'exceeds_limit': exceeds_limit,
617 617 'is_limited_diff': limited_diff,
618 618 })
619 619
620 620 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
621 621 OPS.DEL: 2}.get(info['operation'])
622 622
623 623 return diff_container(sorted(_files, key=sorter))
624 624
625 625 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
626 626 def _parse_lines(self, diff_iter):
627 627 """
628 628 Parse the diff an return data for the template.
629 629 """
630 630
631 631 stats = [0, 0]
632 632 chunks = []
633 633 raw_diff = []
634 634
635 635 try:
636 636 line = next(diff_iter)
637 637
638 638 while line:
639 639 raw_diff.append(line)
640 640 lines = []
641 641 chunks.append(lines)
642 642
643 643 match = self._chunk_re.match(line)
644 644
645 645 if not match:
646 646 break
647 647
648 648 gr = match.groups()
649 649 (old_line, old_end,
650 650 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
651 651 old_line -= 1
652 652 new_line -= 1
653 653
654 654 context = len(gr) == 5
655 655 old_end += old_line
656 656 new_end += new_line
657 657
658 658 if context:
659 659 # skip context only if it's first line
660 660 if int(gr[0]) > 1:
661 661 lines.append({
662 662 'old_lineno': '...',
663 663 'new_lineno': '...',
664 664 'action': Action.CONTEXT,
665 665 'line': line,
666 666 })
667 667
668 668 line = next(diff_iter)
669 669
670 670 while old_line < old_end or new_line < new_end:
671 671 command = ' '
672 672 if line:
673 673 command = line[0]
674 674
675 675 affects_old = affects_new = False
676 676
677 677 # ignore those if we don't expect them
678 678 if command in '#@':
679 679 continue
680 680 elif command == '+':
681 681 affects_new = True
682 682 action = Action.ADD
683 683 stats[0] += 1
684 684 elif command == '-':
685 685 affects_old = True
686 686 action = Action.DELETE
687 687 stats[1] += 1
688 688 else:
689 689 affects_old = affects_new = True
690 690 action = Action.UNMODIFIED
691 691
692 692 if not self._newline_marker.match(line):
693 693 old_line += affects_old
694 694 new_line += affects_new
695 695 lines.append({
696 696 'old_lineno': affects_old and old_line or '',
697 697 'new_lineno': affects_new and new_line or '',
698 698 'action': action,
699 699 'line': self._clean_line(line, command)
700 700 })
701 701 raw_diff.append(line)
702 702
703 703 line = next(diff_iter)
704 704
705 705 if self._newline_marker.match(line):
706 706 # we need to append to lines, since this is not
707 707 # counted in the line specs of diff
708 708 lines.append({
709 709 'old_lineno': '...',
710 710 'new_lineno': '...',
711 711 'action': Action.CONTEXT,
712 712 'line': self._clean_line(line, command)
713 713 })
714 714
715 715 except StopIteration:
716 716 pass
717 717 return ''.join(raw_diff), chunks, stats
718 718
719 719 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
720 720 def _new_parse_lines(self, diff_iter):
721 721 """
722 722 Parse the diff an return data for the template.
723 723 """
724 724
725 725 stats = [0, 0]
726 726 chunks = []
727 727 raw_diff = []
728 728
729 729 try:
730 730 line = next(diff_iter)
731 731
732 732 while line:
733 733 raw_diff.append(line)
734 734 # match header e.g @@ -0,0 +1 @@\n'
735 735 match = self._chunk_re.match(line)
736 736
737 737 if not match:
738 738 break
739 739
740 740 gr = match.groups()
741 741 (old_line, old_end,
742 742 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
743 743
744 744 lines = []
745 745 hunk = {
746 746 'section_header': gr[-1],
747 747 'source_start': old_line,
748 748 'source_length': old_end,
749 749 'target_start': new_line,
750 750 'target_length': new_end,
751 751 'lines': lines,
752 752 }
753 753 chunks.append(hunk)
754 754
755 755 old_line -= 1
756 756 new_line -= 1
757 757
758 758 context = len(gr) == 5
759 759 old_end += old_line
760 760 new_end += new_line
761 761
762 762 line = next(diff_iter)
763 763
764 764 while old_line < old_end or new_line < new_end:
765 765 command = ' '
766 766 if line:
767 767 command = line[0]
768 768
769 769 affects_old = affects_new = False
770 770
771 771 # ignore those if we don't expect them
772 772 if command in '#@':
773 773 continue
774 774 elif command == '+':
775 775 affects_new = True
776 776 action = Action.ADD
777 777 stats[0] += 1
778 778 elif command == '-':
779 779 affects_old = True
780 780 action = Action.DELETE
781 781 stats[1] += 1
782 782 else:
783 783 affects_old = affects_new = True
784 784 action = Action.UNMODIFIED
785 785
786 786 if not self._newline_marker.match(line):
787 787 old_line += affects_old
788 788 new_line += affects_new
789 789 lines.append({
790 790 'old_lineno': affects_old and old_line or '',
791 791 'new_lineno': affects_new and new_line or '',
792 792 'action': action,
793 793 'line': self._clean_line(line, command)
794 794 })
795 795 raw_diff.append(line)
796 796
797 797 line = next(diff_iter)
798 798
799 799 if self._newline_marker.match(line):
800 800 # we need to append to lines, since this is not
801 801 # counted in the line specs of diff
802 802 if affects_old:
803 803 action = Action.OLD_NO_NL
804 804 elif affects_new:
805 805 action = Action.NEW_NO_NL
806 806 else:
807 807 raise Exception('invalid context for no newline')
808 808
809 809 lines.append({
810 810 'old_lineno': None,
811 811 'new_lineno': None,
812 812 'action': action,
813 813 'line': self._clean_line(line, command)
814 814 })
815 815
816 816 except StopIteration:
817 817 pass
818 818
819 819 return ''.join(raw_diff), chunks, stats
820 820
821 821 def _safe_id(self, idstring):
822 822 """Make a string safe for including in an id attribute.
823 823
824 824 The HTML spec says that id attributes 'must begin with
825 825 a letter ([A-Za-z]) and may be followed by any number
826 826 of letters, digits ([0-9]), hyphens ("-"), underscores
827 827 ("_"), colons (":"), and periods (".")'. These regexps
828 828 are slightly over-zealous, in that they remove colons
829 829 and periods unnecessarily.
830 830
831 831 Whitespace is transformed into underscores, and then
832 832 anything which is not a hyphen or a character that
833 833 matches \w (alphanumerics and underscore) is removed.
834 834
835 835 """
836 836 # Transform all whitespace to underscore
837 837 idstring = re.sub(r'\s', "_", '%s' % idstring)
838 838 # Remove everything that is not a hyphen or a member of \w
839 839 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
840 840 return idstring
841 841
842 842 @classmethod
843 843 def diff_splitter(cls, string):
844 844 """
845 845 Diff split that emulates .splitlines() but works only on \n
846 846 """
847 847 if not string:
848 848 return
849 849 elif string == '\n':
850 850 yield u'\n'
851 851 else:
852 852
853 853 has_newline = string.endswith('\n')
854 854 elements = string.split('\n')
855 855 if has_newline:
856 856 # skip last element as it's empty string from newlines
857 857 elements = elements[:-1]
858 858
859 859 len_elements = len(elements)
860 860
861 861 for cnt, line in enumerate(elements, start=1):
862 862 last_line = cnt == len_elements
863 863 if last_line and not has_newline:
864 864 yield safe_unicode(line)
865 865 else:
866 866 yield safe_unicode(line) + '\n'
867 867
868 868 def prepare(self, inline_diff=True):
869 869 """
870 870 Prepare the passed udiff for HTML rendering.
871 871
872 872 :return: A list of dicts with diff information.
873 873 """
874 874 parsed = self._parser(inline_diff=inline_diff)
875 875 self.parsed = True
876 876 self.parsed_diff = parsed
877 877 return parsed
878 878
879 879 def as_raw(self, diff_lines=None):
880 880 """
881 881 Returns raw diff as a byte string
882 882 """
883 883 return self._diff.raw
884 884
885 885 def as_html(self, table_class='code-difftable', line_class='line',
886 886 old_lineno_class='lineno old', new_lineno_class='lineno new',
887 887 code_class='code', enable_comments=False, parsed_lines=None):
888 888 """
889 889 Return given diff as html table with customized css classes
890 890 """
891 891 # TODO(marcink): not sure how to pass in translator
892 892 # here in an efficient way, leave the _ for proper gettext extraction
893 893 _ = lambda s: s
894 894
895 895 def _link_to_if(condition, label, url):
896 896 """
897 897 Generates a link if condition is meet or just the label if not.
898 898 """
899 899
900 900 if condition:
901 901 return '''<a href="%(url)s" class="tooltip"
902 902 title="%(title)s">%(label)s</a>''' % {
903 903 'title': _('Click to select line'),
904 904 'url': url,
905 905 'label': label
906 906 }
907 907 else:
908 908 return label
909 909 if not self.parsed:
910 910 self.prepare()
911 911
912 912 diff_lines = self.parsed_diff
913 913 if parsed_lines:
914 914 diff_lines = parsed_lines
915 915
916 916 _html_empty = True
917 917 _html = []
918 918 _html.append('''<table class="%(table_class)s">\n''' % {
919 919 'table_class': table_class
920 920 })
921 921
922 922 for diff in diff_lines:
923 923 for line in diff['chunks']:
924 924 _html_empty = False
925 925 for change in line:
926 926 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
927 927 'lc': line_class,
928 928 'action': change['action']
929 929 })
930 930 anchor_old_id = ''
931 931 anchor_new_id = ''
932 932 anchor_old = "%(filename)s_o%(oldline_no)s" % {
933 933 'filename': self._safe_id(diff['filename']),
934 934 'oldline_no': change['old_lineno']
935 935 }
936 936 anchor_new = "%(filename)s_n%(oldline_no)s" % {
937 937 'filename': self._safe_id(diff['filename']),
938 938 'oldline_no': change['new_lineno']
939 939 }
940 940 cond_old = (change['old_lineno'] != '...' and
941 941 change['old_lineno'])
942 942 cond_new = (change['new_lineno'] != '...' and
943 943 change['new_lineno'])
944 944 if cond_old:
945 945 anchor_old_id = 'id="%s"' % anchor_old
946 946 if cond_new:
947 947 anchor_new_id = 'id="%s"' % anchor_new
948 948
949 949 if change['action'] != Action.CONTEXT:
950 950 anchor_link = True
951 951 else:
952 952 anchor_link = False
953 953
954 954 ###########################################################
955 955 # COMMENT ICONS
956 956 ###########################################################
957 957 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
958 958
959 959 if enable_comments and change['action'] != Action.CONTEXT:
960 960 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
961 961
962 962 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
963 963
964 964 ###########################################################
965 965 # OLD LINE NUMBER
966 966 ###########################################################
967 967 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
968 968 'a_id': anchor_old_id,
969 969 'olc': old_lineno_class
970 970 })
971 971
972 972 _html.append('''%(link)s''' % {
973 973 'link': _link_to_if(anchor_link, change['old_lineno'],
974 974 '#%s' % anchor_old)
975 975 })
976 976 _html.append('''</td>\n''')
977 977 ###########################################################
978 978 # NEW LINE NUMBER
979 979 ###########################################################
980 980
981 981 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
982 982 'a_id': anchor_new_id,
983 983 'nlc': new_lineno_class
984 984 })
985 985
986 986 _html.append('''%(link)s''' % {
987 987 'link': _link_to_if(anchor_link, change['new_lineno'],
988 988 '#%s' % anchor_new)
989 989 })
990 990 _html.append('''</td>\n''')
991 991 ###########################################################
992 992 # CODE
993 993 ###########################################################
994 994 code_classes = [code_class]
995 995 if (not enable_comments or
996 996 change['action'] == Action.CONTEXT):
997 997 code_classes.append('no-comment')
998 998 _html.append('\t<td class="%s">' % ' '.join(code_classes))
999 999 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
1000 1000 'code': change['line']
1001 1001 })
1002 1002
1003 1003 _html.append('''\t</td>''')
1004 1004 _html.append('''\n</tr>\n''')
1005 1005 _html.append('''</table>''')
1006 1006 if _html_empty:
1007 1007 return None
1008 1008 return ''.join(_html)
1009 1009
1010 1010 def stat(self):
1011 1011 """
1012 1012 Returns tuple of added, and removed lines for this instance
1013 1013 """
1014 1014 return self.adds, self.removes
1015 1015
1016 1016 def get_context_of_line(
1017 1017 self, path, diff_line=None, context_before=3, context_after=3):
1018 1018 """
1019 1019 Returns the context lines for the specified diff line.
1020 1020
1021 1021 :type diff_line: :class:`DiffLineNumber`
1022 1022 """
1023 1023 assert self.parsed, "DiffProcessor is not initialized."
1024 1024
1025 1025 if None not in diff_line:
1026 1026 raise ValueError(
1027 1027 "Cannot specify both line numbers: {}".format(diff_line))
1028 1028
1029 1029 file_diff = self._get_file_diff(path)
1030 1030 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1031 1031
1032 1032 first_line_to_include = max(idx - context_before, 0)
1033 1033 first_line_after_context = idx + context_after + 1
1034 1034 context_lines = chunk[first_line_to_include:first_line_after_context]
1035 1035
1036 1036 line_contents = [
1037 1037 _context_line(line) for line in context_lines
1038 1038 if _is_diff_content(line)]
1039 1039 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1040 1040 # Once they are fixed, we can drop this line here.
1041 1041 if line_contents:
1042 1042 line_contents[-1] = (
1043 1043 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1044 1044 return line_contents
1045 1045
1046 1046 def find_context(self, path, context, offset=0):
1047 1047 """
1048 1048 Finds the given `context` inside of the diff.
1049 1049
1050 1050 Use the parameter `offset` to specify which offset the target line has
1051 1051 inside of the given `context`. This way the correct diff line will be
1052 1052 returned.
1053 1053
1054 1054 :param offset: Shall be used to specify the offset of the main line
1055 1055 within the given `context`.
1056 1056 """
1057 1057 if offset < 0 or offset >= len(context):
1058 1058 raise ValueError(
1059 1059 "Only positive values up to the length of the context "
1060 1060 "minus one are allowed.")
1061 1061
1062 1062 matches = []
1063 1063 file_diff = self._get_file_diff(path)
1064 1064
1065 1065 for chunk in file_diff['chunks']:
1066 1066 context_iter = iter(context)
1067 1067 for line_idx, line in enumerate(chunk):
1068 1068 try:
1069 1069 if _context_line(line) == next(context_iter):
1070 1070 continue
1071 1071 except StopIteration:
1072 1072 matches.append((line_idx, chunk))
1073 1073 context_iter = iter(context)
1074 1074
1075 1075 # Increment position and triger StopIteration
1076 1076 # if we had a match at the end
1077 1077 line_idx += 1
1078 1078 try:
1079 1079 next(context_iter)
1080 1080 except StopIteration:
1081 1081 matches.append((line_idx, chunk))
1082 1082
1083 1083 effective_offset = len(context) - offset
1084 1084 found_at_diff_lines = [
1085 1085 _line_to_diff_line_number(chunk[idx - effective_offset])
1086 1086 for idx, chunk in matches]
1087 1087
1088 1088 return found_at_diff_lines
1089 1089
1090 1090 def _get_file_diff(self, path):
1091 1091 for file_diff in self.parsed_diff:
1092 1092 if file_diff['filename'] == path:
1093 1093 break
1094 1094 else:
1095 1095 raise FileNotInDiffException("File {} not in diff".format(path))
1096 1096 return file_diff
1097 1097
1098 1098 def _find_chunk_line_index(self, file_diff, diff_line):
1099 1099 for chunk in file_diff['chunks']:
1100 1100 for idx, line in enumerate(chunk):
1101 1101 if line['old_lineno'] == diff_line.old:
1102 1102 return chunk, idx
1103 1103 if line['new_lineno'] == diff_line.new:
1104 1104 return chunk, idx
1105 1105 raise LineNotInDiffException(
1106 1106 "The line {} is not part of the diff.".format(diff_line))
1107 1107
1108 1108
1109 1109 def _is_diff_content(line):
1110 1110 return line['action'] in (
1111 1111 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1112 1112
1113 1113
1114 1114 def _context_line(line):
1115 1115 return (line['action'], line['line'])
1116 1116
1117 1117
1118 1118 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1119 1119
1120 1120
1121 1121 def _line_to_diff_line_number(line):
1122 1122 new_line_no = line['new_lineno'] or None
1123 1123 old_line_no = line['old_lineno'] or None
1124 1124 return DiffLineNumber(old=old_line_no, new=new_line_no)
1125 1125
1126 1126
1127 1127 class FileNotInDiffException(Exception):
1128 1128 """
1129 1129 Raised when the context for a missing file is requested.
1130 1130
1131 1131 If you request the context for a line in a file which is not part of the
1132 1132 given diff, then this exception is raised.
1133 1133 """
1134 1134
1135 1135
1136 1136 class LineNotInDiffException(Exception):
1137 1137 """
1138 1138 Raised when the context for a missing line is requested.
1139 1139
1140 1140 If you request the context for a line in a file and this line is not
1141 1141 part of the given diff, then this exception is raised.
1142 1142 """
1143 1143
1144 1144
1145 1145 class DiffLimitExceeded(Exception):
1146 1146 pass
1147 1147
1148 1148
1149 1149 # NOTE(marcink): if diffs.mako change, probably this
1150 1150 # needs a bump to next version
1151 1151 CURRENT_DIFF_VERSION = 'v5'
1152 1152
1153 1153
1154 1154 def _cleanup_cache_file(cached_diff_file):
1155 1155 # cleanup file to not store it "damaged"
1156 1156 try:
1157 1157 os.remove(cached_diff_file)
1158 1158 except Exception:
1159 1159 log.exception('Failed to cleanup path %s', cached_diff_file)
1160 1160
1161 1161
1162 1162 def _get_compression_mode(cached_diff_file):
1163 1163 mode = 'bz2'
1164 1164 if 'mode:plain' in cached_diff_file:
1165 1165 mode = 'plain'
1166 1166 elif 'mode:gzip' in cached_diff_file:
1167 1167 mode = 'gzip'
1168 1168 return mode
1169 1169
1170 1170
1171 1171 def cache_diff(cached_diff_file, diff, commits):
1172 1172 compression_mode = _get_compression_mode(cached_diff_file)
1173 1173
1174 1174 struct = {
1175 1175 'version': CURRENT_DIFF_VERSION,
1176 1176 'diff': diff,
1177 1177 'commits': commits
1178 1178 }
1179 1179
1180 1180 start = time.time()
1181 1181 try:
1182 1182 if compression_mode == 'plain':
1183 1183 with open(cached_diff_file, 'wb') as f:
1184 1184 pickle.dump(struct, f)
1185 1185 elif compression_mode == 'gzip':
1186 1186 with gzip.GzipFile(cached_diff_file, 'wb') as f:
1187 1187 pickle.dump(struct, f)
1188 1188 else:
1189 1189 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1190 1190 pickle.dump(struct, f)
1191 1191 except Exception:
1192 1192 log.warn('Failed to save cache', exc_info=True)
1193 1193 _cleanup_cache_file(cached_diff_file)
1194 1194
1195 1195 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
1196 1196
1197 1197
1198 1198 def load_cached_diff(cached_diff_file):
1199 1199 compression_mode = _get_compression_mode(cached_diff_file)
1200 1200
1201 1201 default_struct = {
1202 1202 'version': CURRENT_DIFF_VERSION,
1203 1203 'diff': None,
1204 1204 'commits': None
1205 1205 }
1206 1206
1207 1207 has_cache = os.path.isfile(cached_diff_file)
1208 1208 if not has_cache:
1209 1209 log.debug('Reading diff cache file failed %s', cached_diff_file)
1210 1210 return default_struct
1211 1211
1212 1212 data = None
1213 1213
1214 1214 start = time.time()
1215 1215 try:
1216 1216 if compression_mode == 'plain':
1217 1217 with open(cached_diff_file, 'rb') as f:
1218 1218 data = pickle.load(f)
1219 1219 elif compression_mode == 'gzip':
1220 1220 with gzip.GzipFile(cached_diff_file, 'rb') as f:
1221 1221 data = pickle.load(f)
1222 1222 else:
1223 1223 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1224 1224 data = pickle.load(f)
1225 1225 except Exception:
1226 1226 log.warn('Failed to read diff cache file', exc_info=True)
1227 1227
1228 1228 if not data:
1229 1229 data = default_struct
1230 1230
1231 1231 if not isinstance(data, dict):
1232 1232 # old version of data ?
1233 1233 data = default_struct
1234 1234
1235 1235 # check version
1236 1236 if data.get('version') != CURRENT_DIFF_VERSION:
1237 1237 # purge cache
1238 1238 _cleanup_cache_file(cached_diff_file)
1239 1239 return default_struct
1240 1240
1241 1241 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
1242 1242
1243 1243 return data
1244 1244
1245 1245
1246 1246 def generate_diff_cache_key(*args):
1247 1247 """
1248 1248 Helper to generate a cache key using arguments
1249 1249 """
1250 1250 def arg_mapper(input_param):
1251 1251 input_param = safe_str(input_param)
1252 1252 # we cannot allow '/' in arguments since it would allow
1253 1253 # subdirectory usage
1254 1254 input_param.replace('/', '_')
1255 1255 return input_param or None # prevent empty string arguments
1256 1256
1257 1257 return '_'.join([
1258 1258 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1259 1259
1260 1260
1261 1261 def diff_cache_exist(cache_storage, *args):
1262 1262 """
1263 1263 Based on all generated arguments check and return a cache path
1264 1264 """
1265 1265 args = list(args) + ['mode:gzip']
1266 1266 cache_key = generate_diff_cache_key(*args)
1267 1267 cache_file_path = os.path.join(cache_storage, cache_key)
1268 1268 # prevent path traversal attacks using some param that have e.g '../../'
1269 1269 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1270 1270 raise ValueError('Final path must be within {}'.format(cache_storage))
1271 1271
1272 1272 return cache_file_path
@@ -1,365 +1,365 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import os
22 22 import time
23 23 import logging
24 24 import tempfile
25 25 import traceback
26 26 import threading
27 27 import socket
28 28 import msgpack
29 29
30 from BaseHTTPServer import BaseHTTPRequestHandler
31 from SocketServer import TCPServer
30 from http.server import BaseHTTPRequestHandler
31 from socketserver import TCPServer
32 32
33 33 import rhodecode
34 34 from rhodecode.lib.exceptions import HTTPLockedRC, HTTPBranchProtected
35 35 from rhodecode.model import meta
36 36 from rhodecode.lib.base import bootstrap_request, bootstrap_config
37 37 from rhodecode.lib import hooks_base
38 38 from rhodecode.lib.utils2 import AttributeDict
39 39 from rhodecode.lib.ext_json import json
40 40 from rhodecode.lib import rc_cache
41 41
42 42 log = logging.getLogger(__name__)
43 43
44 44
45 45 class HooksHttpHandler(BaseHTTPRequestHandler):
46 46
47 47 def do_POST(self):
48 48 hooks_proto, method, extras = self._read_request()
49 49 log.debug('Handling HooksHttpHandler %s with %s proto', method, hooks_proto)
50 50
51 51 txn_id = getattr(self.server, 'txn_id', None)
52 52 if txn_id:
53 53 log.debug('Computing TXN_ID based on `%s`:`%s`',
54 54 extras['repository'], extras['txn_id'])
55 55 computed_txn_id = rc_cache.utils.compute_key_from_params(
56 56 extras['repository'], extras['txn_id'])
57 57 if txn_id != computed_txn_id:
58 58 raise Exception(
59 59 'TXN ID fail: expected {} got {} instead'.format(
60 60 txn_id, computed_txn_id))
61 61
62 62 request = getattr(self.server, 'request', None)
63 63 try:
64 64 hooks = Hooks(request=request, log_prefix='HOOKS: {} '.format(self.server.server_address))
65 65 result = self._call_hook_method(hooks, method, extras)
66 66 except Exception as e:
67 67 exc_tb = traceback.format_exc()
68 68 result = {
69 69 'exception': e.__class__.__name__,
70 70 'exception_traceback': exc_tb,
71 71 'exception_args': e.args
72 72 }
73 73 self._write_response(hooks_proto, result)
74 74
75 75 def _read_request(self):
76 76 length = int(self.headers['Content-Length'])
77 77 hooks_proto = self.headers.get('rc-hooks-protocol') or 'json.v1'
78 78 if hooks_proto == 'msgpack.v1':
79 79 # support for new vcsserver msgpack based protocol hooks
80 80 data = msgpack.unpackb(self.rfile.read(length), raw=False)
81 81 else:
82 82 body = self.rfile.read(length).decode('utf-8')
83 83 data = json.loads(body)
84 84
85 85 return hooks_proto, data['method'], data['extras']
86 86
87 87 def _write_response(self, hooks_proto, result):
88 88 self.send_response(200)
89 89 if hooks_proto == 'msgpack.v1':
90 90 self.send_header("Content-type", "application/msgpack")
91 91 self.end_headers()
92 92 self.wfile.write(msgpack.packb(result))
93 93 else:
94 94 self.send_header("Content-type", "text/json")
95 95 self.end_headers()
96 96 self.wfile.write(json.dumps(result))
97 97
98 98 def _call_hook_method(self, hooks, method, extras):
99 99 try:
100 100 result = getattr(hooks, method)(extras)
101 101 finally:
102 102 meta.Session.remove()
103 103 return result
104 104
105 105 def log_message(self, format, *args):
106 106 """
107 107 This is an overridden method of BaseHTTPRequestHandler which logs using
108 108 logging library instead of writing directly to stderr.
109 109 """
110 110
111 111 message = format % args
112 112
113 113 log.debug(
114 114 "HOOKS: %s - - [%s] %s", self.client_address,
115 115 self.log_date_time_string(), message)
116 116
117 117
118 118 class DummyHooksCallbackDaemon(object):
119 119 hooks_uri = ''
120 120
121 121 def __init__(self):
122 122 self.hooks_module = Hooks.__module__
123 123
124 124 def __enter__(self):
125 125 log.debug('Running `%s` callback daemon', self.__class__.__name__)
126 126 return self
127 127
128 128 def __exit__(self, exc_type, exc_val, exc_tb):
129 129 log.debug('Exiting `%s` callback daemon', self.__class__.__name__)
130 130
131 131
132 132 class ThreadedHookCallbackDaemon(object):
133 133
134 134 _callback_thread = None
135 135 _daemon = None
136 136 _done = False
137 137
138 138 def __init__(self, txn_id=None, host=None, port=None):
139 139 self._prepare(txn_id=txn_id, host=host, port=port)
140 140
141 141 def __enter__(self):
142 142 log.debug('Running `%s` callback daemon', self.__class__.__name__)
143 143 self._run()
144 144 return self
145 145
146 146 def __exit__(self, exc_type, exc_val, exc_tb):
147 147 log.debug('Exiting `%s` callback daemon', self.__class__.__name__)
148 148 self._stop()
149 149
150 150 def _prepare(self, txn_id=None, host=None, port=None):
151 151 raise NotImplementedError()
152 152
153 153 def _run(self):
154 154 raise NotImplementedError()
155 155
156 156 def _stop(self):
157 157 raise NotImplementedError()
158 158
159 159
160 160 class HttpHooksCallbackDaemon(ThreadedHookCallbackDaemon):
161 161 """
162 162 Context manager which will run a callback daemon in a background thread.
163 163 """
164 164
165 165 hooks_uri = None
166 166
167 167 # From Python docs: Polling reduces our responsiveness to a shutdown
168 168 # request and wastes cpu at all other times.
169 169 POLL_INTERVAL = 0.01
170 170
171 171 @property
172 172 def _hook_prefix(self):
173 173 return 'HOOKS: {} '.format(self.hooks_uri)
174 174
175 175 def get_hostname(self):
176 176 return socket.gethostname() or '127.0.0.1'
177 177
178 178 def get_available_port(self, min_port=20000, max_port=65535):
179 179 from rhodecode.lib.utils2 import get_available_port as _get_port
180 180 return _get_port(min_port, max_port)
181 181
182 182 def _prepare(self, txn_id=None, host=None, port=None):
183 183 from pyramid.threadlocal import get_current_request
184 184
185 185 if not host or host == "*":
186 186 host = self.get_hostname()
187 187 if not port:
188 188 port = self.get_available_port()
189 189
190 190 server_address = (host, port)
191 191 self.hooks_uri = '{}:{}'.format(host, port)
192 192 self.txn_id = txn_id
193 193 self._done = False
194 194
195 195 log.debug(
196 196 "%s Preparing HTTP callback daemon registering hook object: %s",
197 197 self._hook_prefix, HooksHttpHandler)
198 198
199 199 self._daemon = TCPServer(server_address, HooksHttpHandler)
200 200 # inject transaction_id for later verification
201 201 self._daemon.txn_id = self.txn_id
202 202
203 203 # pass the WEB app request into daemon
204 204 self._daemon.request = get_current_request()
205 205
206 206 def _run(self):
207 207 log.debug("Running event loop of callback daemon in background thread")
208 208 callback_thread = threading.Thread(
209 209 target=self._daemon.serve_forever,
210 210 kwargs={'poll_interval': self.POLL_INTERVAL})
211 211 callback_thread.daemon = True
212 212 callback_thread.start()
213 213 self._callback_thread = callback_thread
214 214
215 215 def _stop(self):
216 216 log.debug("Waiting for background thread to finish.")
217 217 self._daemon.shutdown()
218 218 self._callback_thread.join()
219 219 self._daemon = None
220 220 self._callback_thread = None
221 221 if self.txn_id:
222 222 txn_id_file = get_txn_id_data_path(self.txn_id)
223 223 log.debug('Cleaning up TXN ID %s', txn_id_file)
224 224 if os.path.isfile(txn_id_file):
225 225 os.remove(txn_id_file)
226 226
227 227 log.debug("Background thread done.")
228 228
229 229
230 230 def get_txn_id_data_path(txn_id):
231 231 import rhodecode
232 232
233 233 root = rhodecode.CONFIG.get('cache_dir') or tempfile.gettempdir()
234 234 final_dir = os.path.join(root, 'svn_txn_id')
235 235
236 236 if not os.path.isdir(final_dir):
237 237 os.makedirs(final_dir)
238 238 return os.path.join(final_dir, 'rc_txn_id_{}'.format(txn_id))
239 239
240 240
241 241 def store_txn_id_data(txn_id, data_dict):
242 242 if not txn_id:
243 243 log.warning('Cannot store txn_id because it is empty')
244 244 return
245 245
246 246 path = get_txn_id_data_path(txn_id)
247 247 try:
248 248 with open(path, 'wb') as f:
249 249 f.write(json.dumps(data_dict))
250 250 except Exception:
251 251 log.exception('Failed to write txn_id metadata')
252 252
253 253
254 254 def get_txn_id_from_store(txn_id):
255 255 """
256 256 Reads txn_id from store and if present returns the data for callback manager
257 257 """
258 258 path = get_txn_id_data_path(txn_id)
259 259 try:
260 260 with open(path, 'rb') as f:
261 261 return json.loads(f.read())
262 262 except Exception:
263 263 return {}
264 264
265 265
266 266 def prepare_callback_daemon(extras, protocol, host, use_direct_calls, txn_id=None):
267 267 txn_details = get_txn_id_from_store(txn_id)
268 268 port = txn_details.get('port', 0)
269 269 if use_direct_calls:
270 270 callback_daemon = DummyHooksCallbackDaemon()
271 271 extras['hooks_module'] = callback_daemon.hooks_module
272 272 else:
273 273 if protocol == 'http':
274 274 callback_daemon = HttpHooksCallbackDaemon(
275 275 txn_id=txn_id, host=host, port=port)
276 276 else:
277 277 log.error('Unsupported callback daemon protocol "%s"', protocol)
278 278 raise Exception('Unsupported callback daemon protocol.')
279 279
280 280 extras['hooks_uri'] = callback_daemon.hooks_uri
281 281 extras['hooks_protocol'] = protocol
282 282 extras['time'] = time.time()
283 283
284 284 # register txn_id
285 285 extras['txn_id'] = txn_id
286 286 log.debug('Prepared a callback daemon: %s at url `%s`',
287 287 callback_daemon.__class__.__name__, callback_daemon.hooks_uri)
288 288 return callback_daemon, extras
289 289
290 290
291 291 class Hooks(object):
292 292 """
293 293 Exposes the hooks for remote call backs
294 294 """
295 295 def __init__(self, request=None, log_prefix=''):
296 296 self.log_prefix = log_prefix
297 297 self.request = request
298 298
299 299 def repo_size(self, extras):
300 300 log.debug("%sCalled repo_size of %s object", self.log_prefix, self)
301 301 return self._call_hook(hooks_base.repo_size, extras)
302 302
303 303 def pre_pull(self, extras):
304 304 log.debug("%sCalled pre_pull of %s object", self.log_prefix, self)
305 305 return self._call_hook(hooks_base.pre_pull, extras)
306 306
307 307 def post_pull(self, extras):
308 308 log.debug("%sCalled post_pull of %s object", self.log_prefix, self)
309 309 return self._call_hook(hooks_base.post_pull, extras)
310 310
311 311 def pre_push(self, extras):
312 312 log.debug("%sCalled pre_push of %s object", self.log_prefix, self)
313 313 return self._call_hook(hooks_base.pre_push, extras)
314 314
315 315 def post_push(self, extras):
316 316 log.debug("%sCalled post_push of %s object", self.log_prefix, self)
317 317 return self._call_hook(hooks_base.post_push, extras)
318 318
319 319 def _call_hook(self, hook, extras):
320 320 extras = AttributeDict(extras)
321 321 server_url = extras['server_url']
322 322
323 323 extras.request = self.request
324 324
325 325 try:
326 326 result = hook(extras)
327 327 if result is None:
328 328 raise Exception(
329 329 'Failed to obtain hook result from func: {}'.format(hook))
330 330 except HTTPBranchProtected as handled_error:
331 331 # Those special cases doesn't need error reporting. It's a case of
332 332 # locked repo or protected branch
333 333 result = AttributeDict({
334 334 'status': handled_error.code,
335 335 'output': handled_error.explanation
336 336 })
337 337 except (HTTPLockedRC, Exception) as error:
338 338 # locked needs different handling since we need to also
339 339 # handle PULL operations
340 340 exc_tb = ''
341 341 if not isinstance(error, HTTPLockedRC):
342 342 exc_tb = traceback.format_exc()
343 343 log.exception('%sException when handling hook %s', self.log_prefix, hook)
344 344 error_args = error.args
345 345 return {
346 346 'status': 128,
347 347 'output': '',
348 348 'exception': type(error).__name__,
349 349 'exception_traceback': exc_tb,
350 350 'exception_args': error_args,
351 351 }
352 352 finally:
353 353 meta.Session.remove()
354 354
355 355 log.debug('%sGot hook call response %s', self.log_prefix, result)
356 356 return {
357 357 'status': result.status,
358 358 'output': result.output,
359 359 }
360 360
361 361 def __enter__(self):
362 362 return self
363 363
364 364 def __exit__(self, exc_type, exc_val, exc_tb):
365 365 pass
General Comments 0
You need to be logged in to leave comments. Login now