##// END OF EJS Templates
core: few python3 fixes found during ce tests runs
super-admin -
r1085:4d8f2d38 python3
parent child Browse files
Show More
@@ -1,136 +1,136 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import os
18 18 import sys
19 19 import traceback
20 20 import logging
21 21 import urllib.parse
22 22
23 23 from vcsserver.lib.rc_cache import region_meta
24 24
25 25 from vcsserver import exceptions
26 26 from vcsserver.exceptions import NoContentException
27 27 from vcsserver.hgcompat import archival
28 28 from vcsserver.str_utils import safe_bytes
29 29
30 30 log = logging.getLogger(__name__)
31 31
32 32
33 33 class RepoFactory(object):
34 34 """
35 35 Utility to create instances of repository
36 36
37 37 It provides internal caching of the `repo` object based on
38 38 the :term:`call context`.
39 39 """
40 40 repo_type = None
41 41
42 42 def __init__(self):
43 43 self._cache_region = region_meta.dogpile_cache_regions['repo_object']
44 44
45 45 def _create_config(self, path, config):
46 46 config = {}
47 47 return config
48 48
49 49 def _create_repo(self, wire, create):
50 50 raise NotImplementedError()
51 51
52 52 def repo(self, wire, create=False):
53 53 raise NotImplementedError()
54 54
55 55
56 56 def obfuscate_qs(query_string):
57 57 if query_string is None:
58 58 return None
59 59
60 60 parsed = []
61 61 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
62 62 if k in ['auth_token', 'api_key']:
63 63 v = "*****"
64 64 parsed.append((k, v))
65 65
66 66 return '&'.join('{}{}'.format(
67 67 k, '={}'.format(v) if v else '') for k, v in parsed)
68 68
69 69
70 70 def raise_from_original(new_type, org_exc: Exception):
71 71 """
72 72 Raise a new exception type with original args and traceback.
73 73 """
74 74
75 75 exc_type, exc_value, exc_traceback = sys.exc_info()
76 76 new_exc = new_type(*exc_value.args)
77 77
78 78 # store the original traceback into the new exc
79 79 new_exc._org_exc_tb = traceback.format_tb(exc_traceback)
80 80
81 81 try:
82 82 raise new_exc.with_traceback(exc_traceback)
83 83 finally:
84 84 del exc_traceback
85 85
86 86
87 87 class ArchiveNode(object):
88 88 def __init__(self, path, mode, is_link, raw_bytes):
89 89 self.path = path
90 90 self.mode = mode
91 91 self.is_link = is_link
92 92 self.raw_bytes = raw_bytes
93 93
94 94
95 95 def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
96 96 archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
97 97 """
98 98 walker should be a file walker, for example:
99 99 def walker():
100 100 for file_info in files:
101 101 yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
102 102 """
103 103 extra_metadata = extra_metadata or {}
104 104 archive_dest_path = safe_bytes(archive_dest_path)
105 105
106 106 if kind == "tgz":
107 107 archiver = archival.tarit(archive_dest_path, mtime, b"gz")
108 108 elif kind == "tbz2":
109 109 archiver = archival.tarit(archive_dest_path, mtime, b"bz2")
110 110 elif kind == 'zip':
111 111 archiver = archival.zipit(archive_dest_path, mtime)
112 112 else:
113 113 raise exceptions.ArchiveException()(
114 114 f'Remote does not support: "{kind}" archive type.')
115 115
116 116 for f in walker(commit_id, archive_at_path):
117 f_path = os.path.join(safe_bytes(archive_dir_name), f.path.lstrip(b'/'))
117 f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
118 118 try:
119 119 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
120 120 except NoContentException:
121 121 # NOTE(marcink): this is a special case for SVN so we can create "empty"
122 122 # directories which arent supported by archiver
123 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, '')
123 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
124 124
125 125 if write_metadata:
126 126 metadata = dict([
127 127 ('commit_id', commit_id),
128 128 ('mtime', mtime),
129 129 ])
130 130 metadata.update(extra_metadata)
131 131
132 132 meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
133 133 f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
134 134 archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
135 135
136 136 return archiver.done()
@@ -1,1366 +1,1367 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 25 import urllib.request, urllib.parse, urllib.error
26 26 import urllib.request, urllib.error, urllib.parse
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_str, ascii_bytes
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception as e:
73 73 # NOTE(marcink): becuase of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 return Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 return Repo(repo_path)
115 115
116 116 def repo(self, wire, create=False, use_libgit2=False):
117 117 """
118 118 Get a repository instance for the given path.
119 119 """
120 120 return self._create_repo(wire, create, use_libgit2)
121 121
122 122 def repo_libgit2(self, wire):
123 123 return self.repo(wire, use_libgit2=True)
124 124
125 125
126 126 class GitRemote(RemoteBase):
127 127
128 128 def __init__(self, factory):
129 129 self._factory = factory
130 130 self._bulk_methods = {
131 131 "date": self.date,
132 132 "author": self.author,
133 133 "branch": self.branch,
134 134 "message": self.message,
135 135 "parents": self.parents,
136 136 "_commit": self.revision,
137 137 }
138 138
139 139 def _wire_to_config(self, wire):
140 140 if 'config' in wire:
141 141 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
142 142 return {}
143 143
144 144 def _remote_conf(self, config):
145 145 params = [
146 146 '-c', 'core.askpass=""',
147 147 ]
148 148 ssl_cert_dir = config.get('vcs_ssl_dir')
149 149 if ssl_cert_dir:
150 150 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
151 151 return params
152 152
153 153 @reraise_safe_exceptions
154 154 def discover_git_version(self):
155 155 stdout, _ = self.run_git_command(
156 156 {}, ['--version'], _bare=True, _safe=True)
157 157 prefix = b'git version'
158 158 if stdout.startswith(prefix):
159 159 stdout = stdout[len(prefix):]
160 160 return safe_str(stdout.strip())
161 161
162 162 @reraise_safe_exceptions
163 163 def is_empty(self, wire):
164 164 repo_init = self._factory.repo_libgit2(wire)
165 165 with repo_init as repo:
166 166
167 167 try:
168 168 has_head = repo.head.name
169 169 if has_head:
170 170 return False
171 171
172 172 # NOTE(marcink): check again using more expensive method
173 173 return repo.is_empty
174 174 except Exception:
175 175 pass
176 176
177 177 return True
178 178
179 179 @reraise_safe_exceptions
180 180 def assert_correct_path(self, wire):
181 181 cache_on, context_uid, repo_id = self._cache_on(wire)
182 182 region = self._region(wire)
183 183
184 184 @region.conditional_cache_on_arguments(condition=cache_on)
185 185 def _assert_correct_path(_context_uid, _repo_id):
186 186 try:
187 187 repo_init = self._factory.repo_libgit2(wire)
188 188 with repo_init as repo:
189 189 pass
190 190 except pygit2.GitError:
191 191 path = wire.get('path')
192 192 tb = traceback.format_exc()
193 193 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
194 194 return False
195 195
196 196 return True
197 197 return _assert_correct_path(context_uid, repo_id)
198 198
199 199 @reraise_safe_exceptions
200 200 def bare(self, wire):
201 201 repo_init = self._factory.repo_libgit2(wire)
202 202 with repo_init as repo:
203 203 return repo.is_bare
204 204
205 205 @reraise_safe_exceptions
206 206 def blob_as_pretty_string(self, wire, sha):
207 207 repo_init = self._factory.repo_libgit2(wire)
208 208 with repo_init as repo:
209 209 blob_obj = repo[sha]
210 210 blob = blob_obj.data
211 211 return blob
212 212
213 213 @reraise_safe_exceptions
214 214 def blob_raw_length(self, wire, sha):
215 215 cache_on, context_uid, repo_id = self._cache_on(wire)
216 216 region = self._region(wire)
217 217
218 218 @region.conditional_cache_on_arguments(condition=cache_on)
219 219 def _blob_raw_length(_repo_id, _sha):
220 220
221 221 repo_init = self._factory.repo_libgit2(wire)
222 222 with repo_init as repo:
223 223 blob = repo[sha]
224 224 return blob.size
225 225
226 226 return _blob_raw_length(repo_id, sha)
227 227
228 228 def _parse_lfs_pointer(self, raw_content):
229 229 spec_string = b'version https://git-lfs.github.com/spec'
230 230 if raw_content and raw_content.startswith(spec_string):
231 231
232 232 pattern = re.compile(rb"""
233 233 (?:\n)?
234 234 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
235 235 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
236 236 ^size[ ](?P<oid_size>[0-9]+)\n
237 237 (?:\n)?
238 238 """, re.VERBOSE | re.MULTILINE)
239 239 match = pattern.match(raw_content)
240 240 if match:
241 241 return match.groupdict()
242 242
243 243 return {}
244 244
245 245 @reraise_safe_exceptions
246 246 def is_large_file(self, wire, commit_id):
247 247 cache_on, context_uid, repo_id = self._cache_on(wire)
248 248 region = self._region(wire)
249 249
250 250 @region.conditional_cache_on_arguments(condition=cache_on)
251 251 def _is_large_file(_repo_id, _sha):
252 252 repo_init = self._factory.repo_libgit2(wire)
253 253 with repo_init as repo:
254 254 blob = repo[commit_id]
255 255 if blob.is_binary:
256 256 return {}
257 257
258 258 return self._parse_lfs_pointer(blob.data)
259 259
260 260 return _is_large_file(repo_id, commit_id)
261 261
262 262 @reraise_safe_exceptions
263 263 def is_binary(self, wire, tree_id):
264 264 cache_on, context_uid, repo_id = self._cache_on(wire)
265 265 region = self._region(wire)
266 266
267 267 @region.conditional_cache_on_arguments(condition=cache_on)
268 268 def _is_binary(_repo_id, _tree_id):
269 269 repo_init = self._factory.repo_libgit2(wire)
270 270 with repo_init as repo:
271 271 blob_obj = repo[tree_id]
272 272 return blob_obj.is_binary
273 273
274 274 return _is_binary(repo_id, tree_id)
275 275
276 276 @reraise_safe_exceptions
277 277 def md5_hash(self, wire, tree_id):
278 278 cache_on, context_uid, repo_id = self._cache_on(wire)
279 279 region = self._region(wire)
280 280
281 281 @region.conditional_cache_on_arguments(condition=cache_on)
282 282 def _md5_hash(_repo_id, _tree_id):
283 283 return ''
284 284
285 285 return _md5_hash(repo_id, tree_id)
286 286
287 287 @reraise_safe_exceptions
288 288 def in_largefiles_store(self, wire, oid):
289 289 conf = self._wire_to_config(wire)
290 290 repo_init = self._factory.repo_libgit2(wire)
291 291 with repo_init as repo:
292 292 repo_name = repo.path
293 293
294 294 store_location = conf.get('vcs_git_lfs_store_location')
295 295 if store_location:
296 296
297 297 store = LFSOidStore(
298 298 oid=oid, repo=repo_name, store_location=store_location)
299 299 return store.has_oid()
300 300
301 301 return False
302 302
303 303 @reraise_safe_exceptions
304 304 def store_path(self, wire, oid):
305 305 conf = self._wire_to_config(wire)
306 306 repo_init = self._factory.repo_libgit2(wire)
307 307 with repo_init as repo:
308 308 repo_name = repo.path
309 309
310 310 store_location = conf.get('vcs_git_lfs_store_location')
311 311 if store_location:
312 312 store = LFSOidStore(
313 313 oid=oid, repo=repo_name, store_location=store_location)
314 314 return store.oid_path
315 315 raise ValueError('Unable to fetch oid with path {}'.format(oid))
316 316
317 317 @reraise_safe_exceptions
318 318 def bulk_request(self, wire, rev, pre_load):
319 319 cache_on, context_uid, repo_id = self._cache_on(wire)
320 320 region = self._region(wire)
321 321
322 322 @region.conditional_cache_on_arguments(condition=cache_on)
323 323 def _bulk_request(_repo_id, _rev, _pre_load):
324 324 result = {}
325 325 for attr in pre_load:
326 326 try:
327 327 method = self._bulk_methods[attr]
328 328 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
329 329 args = [wire, rev]
330 330 result[attr] = method(*args)
331 331 except KeyError as e:
332 332 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
333 333 return result
334 334
335 335 return _bulk_request(repo_id, rev, sorted(pre_load))
336 336
337 337 def _build_opener(self, url):
338 338 handlers = []
339 339 url_obj = url_parser(url)
340 340 _, authinfo = url_obj.authinfo()
341 341
342 342 if authinfo:
343 343 # create a password manager
344 344 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
345 345 passmgr.add_password(*authinfo)
346 346
347 347 handlers.extend((httpbasicauthhandler(passmgr),
348 348 httpdigestauthhandler(passmgr)))
349 349
350 350 return urllib.request.build_opener(*handlers)
351 351
352 352 def _type_id_to_name(self, type_id: int):
353 353 return {
354 354 1: 'commit',
355 355 2: 'tree',
356 356 3: 'blob',
357 357 4: 'tag'
358 358 }[type_id]
359 359
360 360 @reraise_safe_exceptions
361 361 def check_url(self, url, config):
362 362 url_obj = url_parser(safe_bytes(url))
363 363 test_uri, _ = url_obj.authinfo()
364 364 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
365 365 url_obj.query = obfuscate_qs(url_obj.query)
366 366 cleaned_uri = str(url_obj)
367 367 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
368 368
369 369 if not test_uri.endswith('info/refs'):
370 370 test_uri = test_uri.rstrip('/') + '/info/refs'
371 371
372 372 o = self._build_opener(url)
373 373 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
374 374
375 375 q = {"service": 'git-upload-pack'}
376 376 qs = '?%s' % urllib.parse.urlencode(q)
377 377 cu = "%s%s" % (test_uri, qs)
378 378 req = urllib.request.Request(cu, None, {})
379 379
380 380 try:
381 381 log.debug("Trying to open URL %s", cleaned_uri)
382 382 resp = o.open(req)
383 383 if resp.code != 200:
384 384 raise exceptions.URLError()('Return Code is not 200')
385 385 except Exception as e:
386 386 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
387 387 # means it cannot be cloned
388 388 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
389 389
390 390 # now detect if it's proper git repo
391 391 gitdata = resp.read()
392 392 if 'service=git-upload-pack' in gitdata:
393 393 pass
394 394 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
395 395 # old style git can return some other format !
396 396 pass
397 397 else:
398 398 raise exceptions.URLError()(
399 399 "url [%s] does not look like an git" % (cleaned_uri,))
400 400
401 401 return True
402 402
403 403 @reraise_safe_exceptions
404 404 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
405 405 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
406 406 remote_refs = self.pull(wire, url, apply_refs=False)
407 407 repo = self._factory.repo(wire)
408 408 if isinstance(valid_refs, list):
409 409 valid_refs = tuple(valid_refs)
410 410
411 411 for k in remote_refs:
412 412 # only parse heads/tags and skip so called deferred tags
413 413 if k.startswith(valid_refs) and not k.endswith(deferred):
414 414 repo[k] = remote_refs[k]
415 415
416 416 if update_after_clone:
417 417 # we want to checkout HEAD
418 418 repo["HEAD"] = remote_refs["HEAD"]
419 419 index.build_index_from_tree(repo.path, repo.index_path(),
420 420 repo.object_store, repo["HEAD"].tree)
421 421
422 422 @reraise_safe_exceptions
423 423 def branch(self, wire, commit_id):
424 424 cache_on, context_uid, repo_id = self._cache_on(wire)
425 425 region = self._region(wire)
426 426 @region.conditional_cache_on_arguments(condition=cache_on)
427 427 def _branch(_context_uid, _repo_id, _commit_id):
428 428 regex = re.compile('^refs/heads')
429 429
430 430 def filter_with(ref):
431 431 return regex.match(ref[0]) and ref[1] == _commit_id
432 432
433 433 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
434 434 return [x[0].split('refs/heads/')[-1] for x in branches]
435 435
436 436 return _branch(context_uid, repo_id, commit_id)
437 437
438 438 @reraise_safe_exceptions
439 439 def commit_branches(self, wire, commit_id):
440 440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 441 region = self._region(wire)
442 442 @region.conditional_cache_on_arguments(condition=cache_on)
443 443 def _commit_branches(_context_uid, _repo_id, _commit_id):
444 444 repo_init = self._factory.repo_libgit2(wire)
445 445 with repo_init as repo:
446 446 branches = [x for x in repo.branches.with_commit(_commit_id)]
447 447 return branches
448 448
449 449 return _commit_branches(context_uid, repo_id, commit_id)
450 450
451 451 @reraise_safe_exceptions
452 452 def add_object(self, wire, content):
453 453 repo_init = self._factory.repo_libgit2(wire)
454 454 with repo_init as repo:
455 455 blob = objects.Blob()
456 456 blob.set_raw_string(content)
457 457 repo.object_store.add_object(blob)
458 458 return blob.id
459 459
460 460 # TODO: this is quite complex, check if that can be simplified
461 461 @reraise_safe_exceptions
462 462 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
463 463 # Defines the root tree
464 464 class _Root(object):
465 465 def __repr__(self):
466 466 return 'ROOT TREE'
467 467 ROOT = _Root()
468 468
469 469 repo = self._factory.repo(wire)
470 470 object_store = repo.object_store
471 471
472 472 # Create tree and populates it with blobs
473 473 if commit_tree:
474 474 commit_tree = safe_bytes(commit_tree)
475 475
476 476 if commit_tree and repo[commit_tree]:
477 477 git_commit = repo[safe_bytes(commit_data['parents'][0])]
478 478 commit_tree = repo[git_commit.tree] # root tree
479 479 else:
480 480 commit_tree = objects.Tree()
481 481
482 482 for node in updated:
483 483 # Compute subdirs if needed
484 484 dirpath, nodename = vcspath.split(node['path'])
485 485 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
486 486 parent = commit_tree
487 487 ancestors = [('', parent)]
488 488
489 489 # Tries to dig for the deepest existing tree
490 490 while dirnames:
491 491 curdir = dirnames.pop(0)
492 492 try:
493 493 dir_id = parent[curdir][1]
494 494 except KeyError:
495 495 # put curdir back into dirnames and stops
496 496 dirnames.insert(0, curdir)
497 497 break
498 498 else:
499 499 # If found, updates parent
500 500 parent = repo[dir_id]
501 501 ancestors.append((curdir, parent))
502 502 # Now parent is deepest existing tree and we need to create
503 503 # subtrees for dirnames (in reverse order)
504 504 # [this only applies for nodes from added]
505 505 new_trees = []
506 506
507 507 blob = objects.Blob.from_string(node['content'])
508 508
509 509 if dirnames:
510 510 # If there are trees which should be created we need to build
511 511 # them now (in reverse order)
512 512 reversed_dirnames = list(reversed(dirnames))
513 513 curtree = objects.Tree()
514 514 curtree[node['node_path']] = node['mode'], blob.id
515 515 new_trees.append(curtree)
516 516 for dirname in reversed_dirnames[:-1]:
517 517 newtree = objects.Tree()
518 518 newtree[dirname] = (DIR_STAT, curtree.id)
519 519 new_trees.append(newtree)
520 520 curtree = newtree
521 521 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
522 522 else:
523 523 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
524 524
525 525 new_trees.append(parent)
526 526 # Update ancestors
527 527 reversed_ancestors = reversed(
528 528 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
529 529 for parent, tree, path in reversed_ancestors:
530 530 parent[path] = (DIR_STAT, tree.id)
531 531 object_store.add_object(tree)
532 532
533 533 object_store.add_object(blob)
534 534 for tree in new_trees:
535 535 object_store.add_object(tree)
536 536
537 537 for node_path in removed:
538 538 paths = node_path.split('/')
539 539 tree = commit_tree # start with top-level
540 540 trees = [{'tree': tree, 'path': ROOT}]
541 541 # Traverse deep into the forest...
542 542 # resolve final tree by iterating the path.
543 543 # e.g a/b/c.txt will get
544 544 # - root as tree then
545 545 # - 'a' as tree,
546 546 # - 'b' as tree,
547 547 # - stop at c as blob.
548 548 for path in paths:
549 549 try:
550 550 obj = repo[tree[path][1]]
551 551 if isinstance(obj, objects.Tree):
552 552 trees.append({'tree': obj, 'path': path})
553 553 tree = obj
554 554 except KeyError:
555 555 break
556 556 #PROBLEM:
557 557 """
558 558 We're not editing same reference tree object
559 559 """
560 560 # Cut down the blob and all rotten trees on the way back...
561 561 for path, tree_data in reversed(list(zip(paths, trees))):
562 562 tree = tree_data['tree']
563 563 tree.__delitem__(path)
564 564 # This operation edits the tree, we need to mark new commit back
565 565
566 566 if len(tree) > 0:
567 567 # This tree still has elements - don't remove it or any
568 568 # of it's parents
569 569 break
570 570
571 571 object_store.add_object(commit_tree)
572 572
573 573 # Create commit
574 574 commit = objects.Commit()
575 575 commit.tree = commit_tree.id
576 576 bytes_keys = [
577 577 'author',
578 578 'committer',
579 579 'message',
580 580 'encoding',
581 581 'parents'
582 582 ]
583 583
584 584 for k, v in commit_data.items():
585 585 if k in bytes_keys:
586 586 if k == 'parents':
587 587 v = [safe_bytes(x) for x in v]
588 588 else:
589 589 v = safe_bytes(v)
590 590 setattr(commit, k, v)
591 591
592 592 object_store.add_object(commit)
593 593
594 594 self.create_branch(wire, branch, safe_str(commit.id))
595 595
596 596 # dulwich set-ref
597 597 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
598 598
599 599 return commit.id
600 600
601 601 @reraise_safe_exceptions
602 602 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
603 603 if url != 'default' and '://' not in url:
604 604 client = LocalGitClient(url)
605 605 else:
606 606 url_obj = url_parser(url)
607 607 o = self._build_opener(url)
608 608 url, _ = url_obj.authinfo()
609 609 client = HttpGitClient(base_url=url, opener=o)
610 610 repo = self._factory.repo(wire)
611 611
612 612 determine_wants = repo.object_store.determine_wants_all
613 613 if refs:
614 614 refs = [ascii_bytes(x) for x in refs]
615 615
616 616 def determine_wants_requested(remote_refs):
617 617 determined = []
618 618 for ref_name, ref_hash in remote_refs.items():
619 619 bytes_ref_name = safe_bytes(ref_name)
620 620
621 621 if bytes_ref_name in refs:
622 622 bytes_ref_hash = safe_bytes(ref_hash)
623 623 determined.append(bytes_ref_hash)
624 624 return determined
625 625
626 626 # swap with our custom requested wants
627 627 determine_wants = determine_wants_requested
628 628
629 629 try:
630 630 remote_refs = client.fetch(
631 631 path=url, target=repo, determine_wants=determine_wants)
632 632
633 633 except NotGitRepository as e:
634 634 log.warning(
635 635 'Trying to fetch from "%s" failed, not a Git repository.', url)
636 636 # Exception can contain unicode which we convert
637 637 raise exceptions.AbortException(e)(repr(e))
638 638
639 639 # mikhail: client.fetch() returns all the remote refs, but fetches only
640 640 # refs filtered by `determine_wants` function. We need to filter result
641 641 # as well
642 642 if refs:
643 643 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
644 644
645 645 if apply_refs:
646 646 # TODO: johbo: Needs proper test coverage with a git repository
647 647 # that contains a tag object, so that we would end up with
648 648 # a peeled ref at this point.
649 649 for k in remote_refs:
650 650 if k.endswith(PEELED_REF_MARKER):
651 651 log.debug("Skipping peeled reference %s", k)
652 652 continue
653 653 repo[k] = remote_refs[k]
654 654
655 655 if refs and not update_after:
656 656 # mikhail: explicitly set the head to the last ref.
657 657 repo[HEAD_MARKER] = remote_refs[refs[-1]]
658 658
659 659 if update_after:
660 660 # we want to checkout HEAD
661 661 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
662 662 index.build_index_from_tree(repo.path, repo.index_path(),
663 663 repo.object_store, repo[HEAD_MARKER].tree)
664 664 return remote_refs
665 665
666 666 @reraise_safe_exceptions
667 667 def sync_fetch(self, wire, url, refs=None, all_refs=False):
668 668 repo = self._factory.repo(wire)
669 669 if refs and not isinstance(refs, (list, tuple)):
670 670 refs = [refs]
671 671
672 672 config = self._wire_to_config(wire)
673 673 # get all remote refs we'll use to fetch later
674 674 cmd = ['ls-remote']
675 675 if not all_refs:
676 676 cmd += ['--heads', '--tags']
677 677 cmd += [url]
678 678 output, __ = self.run_git_command(
679 679 wire, cmd, fail_on_stderr=False,
680 680 _copts=self._remote_conf(config),
681 681 extra_env={'GIT_TERMINAL_PROMPT': '0'})
682 682
683 683 remote_refs = collections.OrderedDict()
684 684 fetch_refs = []
685 685
686 686 for ref_line in output.splitlines():
687 687 sha, ref = ref_line.split(b'\t')
688 688 sha = sha.strip()
689 689 if ref in remote_refs:
690 690 # duplicate, skip
691 691 continue
692 692 if ref.endswith(PEELED_REF_MARKER):
693 693 log.debug("Skipping peeled reference %s", ref)
694 694 continue
695 695 # don't sync HEAD
696 696 if ref in [HEAD_MARKER]:
697 697 continue
698 698
699 699 remote_refs[ref] = sha
700 700
701 701 if refs and sha in refs:
702 702 # we filter fetch using our specified refs
703 703 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
704 704 elif not refs:
705 705 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
706 706 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
707 707
708 708 if fetch_refs:
709 709 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
710 710 fetch_refs_chunks = list(chunk)
711 711 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
712 712 self.run_git_command(
713 713 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
714 714 fail_on_stderr=False,
715 715 _copts=self._remote_conf(config),
716 716 extra_env={'GIT_TERMINAL_PROMPT': '0'})
717 717
718 718 return remote_refs
719 719
720 720 @reraise_safe_exceptions
721 721 def sync_push(self, wire, url, refs=None):
722 722 if not self.check_url(url, wire):
723 723 return
724 724 config = self._wire_to_config(wire)
725 725 self._factory.repo(wire)
726 726 self.run_git_command(
727 727 wire, ['push', url, '--mirror'], fail_on_stderr=False,
728 728 _copts=self._remote_conf(config),
729 729 extra_env={'GIT_TERMINAL_PROMPT': '0'})
730 730
731 731 @reraise_safe_exceptions
732 732 def get_remote_refs(self, wire, url):
733 733 repo = Repo(url)
734 734 return repo.get_refs()
735 735
736 736 @reraise_safe_exceptions
737 737 def get_description(self, wire):
738 738 repo = self._factory.repo(wire)
739 739 return repo.get_description()
740 740
741 741 @reraise_safe_exceptions
742 742 def get_missing_revs(self, wire, rev1, rev2, path2):
743 743 repo = self._factory.repo(wire)
744 744 LocalGitClient(thin_packs=False).fetch(path2, repo)
745 745
746 746 wire_remote = wire.copy()
747 747 wire_remote['path'] = path2
748 748 repo_remote = self._factory.repo(wire_remote)
749 749 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
750 750
751 751 revs = [
752 752 x.commit.id
753 753 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
754 754 return revs
755 755
756 756 @reraise_safe_exceptions
757 757 def get_object(self, wire, sha, maybe_unreachable=False):
758 758 cache_on, context_uid, repo_id = self._cache_on(wire)
759 759 region = self._region(wire)
760 760
761 761 @region.conditional_cache_on_arguments(condition=cache_on)
762 762 def _get_object(_context_uid, _repo_id, _sha):
763 763 repo_init = self._factory.repo_libgit2(wire)
764 764 with repo_init as repo:
765 765
766 766 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
767 767 try:
768 768 commit = repo.revparse_single(sha)
769 769 except KeyError:
770 770 # NOTE(marcink): KeyError doesn't give us any meaningful information
771 771 # here, we instead give something more explicit
772 772 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
773 773 raise exceptions.LookupException(e)(missing_commit_err)
774 774 except ValueError as e:
775 775 raise exceptions.LookupException(e)(missing_commit_err)
776 776
777 777 is_tag = False
778 778 if isinstance(commit, pygit2.Tag):
779 779 commit = repo.get(commit.target)
780 780 is_tag = True
781 781
782 782 check_dangling = True
783 783 if is_tag:
784 784 check_dangling = False
785 785
786 786 if check_dangling and maybe_unreachable:
787 787 check_dangling = False
788 788
789 789 # we used a reference and it parsed means we're not having a dangling commit
790 790 if sha != commit.hex:
791 791 check_dangling = False
792 792
793 793 if check_dangling:
794 794 # check for dangling commit
795 795 for branch in repo.branches.with_commit(commit.hex):
796 796 if branch:
797 797 break
798 798 else:
799 799 # NOTE(marcink): Empty error doesn't give us any meaningful information
800 800 # here, we instead give something more explicit
801 801 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
802 802 raise exceptions.LookupException(e)(missing_commit_err)
803 803
804 804 commit_id = commit.hex
805 805 type_id = commit.type
806 806
807 807 return {
808 808 'id': commit_id,
809 809 'type': self._type_id_to_name(type_id),
810 810 'commit_id': commit_id,
811 811 'idx': 0
812 812 }
813 813
814 814 return _get_object(context_uid, repo_id, sha)
815 815
816 816 @reraise_safe_exceptions
817 817 def get_refs(self, wire):
818 818 cache_on, context_uid, repo_id = self._cache_on(wire)
819 819 region = self._region(wire)
820 820
821 821 @region.conditional_cache_on_arguments(condition=cache_on)
822 822 def _get_refs(_context_uid, _repo_id):
823 823
824 824 repo_init = self._factory.repo_libgit2(wire)
825 825 with repo_init as repo:
826 826 regex = re.compile('^refs/(heads|tags)/')
827 827 return {x.name: x.target.hex for x in
828 828 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
829 829
830 830 return _get_refs(context_uid, repo_id)
831 831
832 832 @reraise_safe_exceptions
833 833 def get_branch_pointers(self, wire):
834 834 cache_on, context_uid, repo_id = self._cache_on(wire)
835 835 region = self._region(wire)
836 836
837 837 @region.conditional_cache_on_arguments(condition=cache_on)
838 838 def _get_branch_pointers(_context_uid, _repo_id):
839 839
840 840 repo_init = self._factory.repo_libgit2(wire)
841 841 regex = re.compile('^refs/heads')
842 842 with repo_init as repo:
843 843 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
844 844 return {x.target.hex: x.shorthand for x in branches}
845 845
846 846 return _get_branch_pointers(context_uid, repo_id)
847 847
848 848 @reraise_safe_exceptions
849 849 def head(self, wire, show_exc=True):
850 850 cache_on, context_uid, repo_id = self._cache_on(wire)
851 851 region = self._region(wire)
852 852
853 853 @region.conditional_cache_on_arguments(condition=cache_on)
854 854 def _head(_context_uid, _repo_id, _show_exc):
855 855 repo_init = self._factory.repo_libgit2(wire)
856 856 with repo_init as repo:
857 857 try:
858 858 return repo.head.peel().hex
859 859 except Exception:
860 860 if show_exc:
861 861 raise
862 862 return _head(context_uid, repo_id, show_exc)
863 863
864 864 @reraise_safe_exceptions
865 865 def init(self, wire):
866 866 repo_path = safe_str(wire['path'])
867 867 self.repo = Repo.init(repo_path)
868 868
869 869 @reraise_safe_exceptions
870 870 def init_bare(self, wire):
871 871 repo_path = safe_str(wire['path'])
872 872 self.repo = Repo.init_bare(repo_path)
873 873
874 874 @reraise_safe_exceptions
875 875 def revision(self, wire, rev):
876 876
877 877 cache_on, context_uid, repo_id = self._cache_on(wire)
878 878 region = self._region(wire)
879 879
880 880 @region.conditional_cache_on_arguments(condition=cache_on)
881 881 def _revision(_context_uid, _repo_id, _rev):
882 882 repo_init = self._factory.repo_libgit2(wire)
883 883 with repo_init as repo:
884 884 commit = repo[rev]
885 885 obj_data = {
886 886 'id': commit.id.hex,
887 887 }
888 888 # tree objects itself don't have tree_id attribute
889 889 if hasattr(commit, 'tree_id'):
890 890 obj_data['tree'] = commit.tree_id.hex
891 891
892 892 return obj_data
893 893 return _revision(context_uid, repo_id, rev)
894 894
895 895 @reraise_safe_exceptions
896 896 def date(self, wire, commit_id):
897 897 cache_on, context_uid, repo_id = self._cache_on(wire)
898 898 region = self._region(wire)
899 899
900 900 @region.conditional_cache_on_arguments(condition=cache_on)
901 901 def _date(_repo_id, _commit_id):
902 902 repo_init = self._factory.repo_libgit2(wire)
903 903 with repo_init as repo:
904 904 commit = repo[commit_id]
905 905
906 906 if hasattr(commit, 'commit_time'):
907 907 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
908 908 else:
909 909 commit = commit.get_object()
910 910 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
911 911
912 912 # TODO(marcink): check dulwich difference of offset vs timezone
913 913 return [commit_time, commit_time_offset]
914 914 return _date(repo_id, commit_id)
915 915
916 916 @reraise_safe_exceptions
917 917 def author(self, wire, commit_id):
918 918 cache_on, context_uid, repo_id = self._cache_on(wire)
919 919 region = self._region(wire)
920 920
921 921 @region.conditional_cache_on_arguments(condition=cache_on)
922 922 def _author(_repo_id, _commit_id):
923 923 repo_init = self._factory.repo_libgit2(wire)
924 924 with repo_init as repo:
925 925 commit = repo[commit_id]
926 926
927 927 if hasattr(commit, 'author'):
928 928 author = commit.author
929 929 else:
930 930 author = commit.get_object().author
931 931
932 932 if author.email:
933 933 return "{} <{}>".format(author.name, author.email)
934 934
935 935 try:
936 936 return "{}".format(author.name)
937 937 except Exception:
938 938 return "{}".format(safe_str(author.raw_name))
939 939
940 940 return _author(repo_id, commit_id)
941 941
942 942 @reraise_safe_exceptions
943 943 def message(self, wire, commit_id):
944 944 cache_on, context_uid, repo_id = self._cache_on(wire)
945 945 region = self._region(wire)
946 946 @region.conditional_cache_on_arguments(condition=cache_on)
947 947 def _message(_repo_id, _commit_id):
948 948 repo_init = self._factory.repo_libgit2(wire)
949 949 with repo_init as repo:
950 950 commit = repo[commit_id]
951 951 return commit.message
952 952 return _message(repo_id, commit_id)
953 953
954 954 @reraise_safe_exceptions
955 955 def parents(self, wire, commit_id):
956 956 cache_on, context_uid, repo_id = self._cache_on(wire)
957 957 region = self._region(wire)
958 958
959 959 @region.conditional_cache_on_arguments(condition=cache_on)
960 960 def _parents(_repo_id, _commit_id):
961 961 repo_init = self._factory.repo_libgit2(wire)
962 962 with repo_init as repo:
963 963 commit = repo[commit_id]
964 964 if hasattr(commit, 'parent_ids'):
965 965 parent_ids = commit.parent_ids
966 966 else:
967 967 parent_ids = commit.get_object().parent_ids
968 968
969 969 return [x.hex for x in parent_ids]
970 970 return _parents(repo_id, commit_id)
971 971
972 972 @reraise_safe_exceptions
973 973 def children(self, wire, commit_id):
974 974 cache_on, context_uid, repo_id = self._cache_on(wire)
975 975 region = self._region(wire)
976 976
977 977 head = self.head(wire)
978 978
979 979 @region.conditional_cache_on_arguments(condition=cache_on)
980 980 def _children(_repo_id, _commit_id):
981 981
982 982 output, __ = self.run_git_command(
983 983 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
984 984
985 985 child_ids = []
986 986 pat = re.compile(r'^{}'.format(commit_id))
987 987 for line in output.splitlines():
988 988 line = safe_str(line)
989 989 if pat.match(line):
990 990 found_ids = line.split(' ')[1:]
991 991 child_ids.extend(found_ids)
992 992 break
993 993
994 994 return child_ids
995 995 return _children(repo_id, commit_id)
996 996
997 997 @reraise_safe_exceptions
998 998 def set_refs(self, wire, key, value):
999 999 repo_init = self._factory.repo_libgit2(wire)
1000 1000 with repo_init as repo:
1001 1001 repo.references.create(key, value, force=True)
1002 1002
1003 1003 @reraise_safe_exceptions
1004 1004 def create_branch(self, wire, branch_name, commit_id, force=False):
1005 1005 repo_init = self._factory.repo_libgit2(wire)
1006 1006 with repo_init as repo:
1007 1007 commit = repo[commit_id]
1008 1008
1009 1009 if force:
1010 1010 repo.branches.local.create(branch_name, commit, force=force)
1011 1011 elif not repo.branches.get(branch_name):
1012 1012 # create only if that branch isn't existing
1013 1013 repo.branches.local.create(branch_name, commit, force=force)
1014 1014
1015 1015 @reraise_safe_exceptions
1016 1016 def remove_ref(self, wire, key):
1017 1017 repo_init = self._factory.repo_libgit2(wire)
1018 1018 with repo_init as repo:
1019 1019 repo.references.delete(key)
1020 1020
1021 1021 @reraise_safe_exceptions
1022 1022 def tag_remove(self, wire, tag_name):
1023 1023 repo_init = self._factory.repo_libgit2(wire)
1024 1024 with repo_init as repo:
1025 1025 key = 'refs/tags/{}'.format(tag_name)
1026 1026 repo.references.delete(key)
1027 1027
1028 1028 @reraise_safe_exceptions
1029 1029 def tree_changes(self, wire, source_id, target_id):
1030 1030 # TODO(marcink): remove this seems it's only used by tests
1031 1031 repo = self._factory.repo(wire)
1032 1032 source = repo[source_id].tree if source_id else None
1033 1033 target = repo[target_id].tree
1034 1034 result = repo.object_store.tree_changes(source, target)
1035 1035 return list(result)
1036 1036
1037 1037 @reraise_safe_exceptions
1038 1038 def tree_and_type_for_path(self, wire, commit_id, path):
1039 1039
1040 1040 cache_on, context_uid, repo_id = self._cache_on(wire)
1041 1041 region = self._region(wire)
1042 1042
1043 1043 @region.conditional_cache_on_arguments(condition=cache_on)
1044 1044 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1045 1045 repo_init = self._factory.repo_libgit2(wire)
1046 1046
1047 1047 with repo_init as repo:
1048 1048 commit = repo[commit_id]
1049 1049 try:
1050 1050 tree = commit.tree[path]
1051 1051 except KeyError:
1052 1052 return None, None, None
1053 1053
1054 1054 return tree.id.hex, tree.type_str, tree.filemode
1055 1055 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1056 1056
1057 1057 @reraise_safe_exceptions
1058 1058 def tree_items(self, wire, tree_id):
1059 1059 cache_on, context_uid, repo_id = self._cache_on(wire)
1060 1060 region = self._region(wire)
1061 1061
1062 1062 @region.conditional_cache_on_arguments(condition=cache_on)
1063 1063 def _tree_items(_repo_id, _tree_id):
1064 1064
1065 1065 repo_init = self._factory.repo_libgit2(wire)
1066 1066 with repo_init as repo:
1067 1067 try:
1068 1068 tree = repo[tree_id]
1069 1069 except KeyError:
1070 1070 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1071 1071
1072 1072 result = []
1073 1073 for item in tree:
1074 1074 item_sha = item.hex
1075 1075 item_mode = item.filemode
1076 1076 item_type = item.type_str
1077 1077
1078 1078 if item_type == 'commit':
1079 1079 # NOTE(marcink): submodules we translate to 'link' for backward compat
1080 1080 item_type = 'link'
1081 1081
1082 1082 result.append((item.name, item_mode, item_sha, item_type))
1083 1083 return result
1084 1084 return _tree_items(repo_id, tree_id)
1085 1085
1086 1086 @reraise_safe_exceptions
1087 1087 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1088 1088 """
1089 1089 Old version that uses subprocess to call diff
1090 1090 """
1091 1091
1092 1092 flags = [
1093 1093 '-U%s' % context, '--patch',
1094 1094 '--binary',
1095 1095 '--find-renames',
1096 1096 '--no-indent-heuristic',
1097 1097 # '--indent-heuristic',
1098 1098 #'--full-index',
1099 1099 #'--abbrev=40'
1100 1100 ]
1101 1101
1102 1102 if opt_ignorews:
1103 1103 flags.append('--ignore-all-space')
1104 1104
1105 1105 if commit_id_1 == self.EMPTY_COMMIT:
1106 1106 cmd = ['show'] + flags + [commit_id_2]
1107 1107 else:
1108 1108 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1109 1109
1110 1110 if file_filter:
1111 1111 cmd.extend(['--', file_filter])
1112 1112
1113 1113 diff, __ = self.run_git_command(wire, cmd)
1114 1114 # If we used 'show' command, strip first few lines (until actual diff
1115 1115 # starts)
1116 1116 if commit_id_1 == self.EMPTY_COMMIT:
1117 1117 lines = diff.splitlines()
1118 1118 x = 0
1119 1119 for line in lines:
1120 1120 if line.startswith(b'diff'):
1121 1121 break
1122 1122 x += 1
1123 1123 # Append new line just like 'diff' command do
1124 1124 diff = '\n'.join(lines[x:]) + '\n'
1125 1125 return diff
1126 1126
1127 1127 @reraise_safe_exceptions
1128 1128 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1129 1129 repo_init = self._factory.repo_libgit2(wire)
1130
1130 1131 with repo_init as repo:
1131 1132 swap = True
1132 1133 flags = 0
1133 1134 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1134 1135
1135 1136 if opt_ignorews:
1136 1137 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1137 1138
1138 1139 if commit_id_1 == self.EMPTY_COMMIT:
1139 1140 comm1 = repo[commit_id_2]
1140 1141 diff_obj = comm1.tree.diff_to_tree(
1141 1142 flags=flags, context_lines=context, swap=swap)
1142 1143
1143 1144 else:
1144 1145 comm1 = repo[commit_id_2]
1145 1146 comm2 = repo[commit_id_1]
1146 1147 diff_obj = comm1.tree.diff_to_tree(
1147 1148 comm2.tree, flags=flags, context_lines=context, swap=swap)
1148 1149 similar_flags = 0
1149 1150 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1150 1151 diff_obj.find_similar(flags=similar_flags)
1151 1152
1152 1153 if file_filter:
1153 1154 for p in diff_obj:
1154 1155 if p.delta.old_file.path == file_filter:
1155 return p.patch or ''
1156 return p.data or ''
1156 1157 # fo matching path == no diff
1157 1158 return ''
1158 1159 return diff_obj.patch or ''
1159 1160
1160 1161 @reraise_safe_exceptions
1161 1162 def node_history(self, wire, commit_id, path, limit):
1162 1163 cache_on, context_uid, repo_id = self._cache_on(wire)
1163 1164 region = self._region(wire)
1164 1165
1165 1166 @region.conditional_cache_on_arguments(condition=cache_on)
1166 1167 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1167 1168 # optimize for n==1, rev-list is much faster for that use-case
1168 1169 if limit == 1:
1169 1170 cmd = ['rev-list', '-1', commit_id, '--', path]
1170 1171 else:
1171 1172 cmd = ['log']
1172 1173 if limit:
1173 1174 cmd.extend(['-n', str(safe_int(limit, 0))])
1174 1175 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1175 1176
1176 1177 output, __ = self.run_git_command(wire, cmd)
1177 1178 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1178 1179
1179 1180 return [x for x in commit_ids]
1180 1181 return _node_history(context_uid, repo_id, commit_id, path, limit)
1181 1182
1182 1183 @reraise_safe_exceptions
1183 1184 def node_annotate_legacy(self, wire, commit_id, path):
1184 1185 #note: replaced by pygit2 impelementation
1185 1186 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1186 1187 # -l ==> outputs long shas (and we need all 40 characters)
1187 1188 # --root ==> doesn't put '^' character for boundaries
1188 1189 # -r commit_id ==> blames for the given commit
1189 1190 output, __ = self.run_git_command(wire, cmd)
1190 1191
1191 1192 result = []
1192 1193 for i, blame_line in enumerate(output.splitlines()[:-1]):
1193 1194 line_no = i + 1
1194 1195 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1195 1196 result.append((line_no, blame_commit_id, line))
1196 1197
1197 1198 return result
1198 1199
1199 1200 @reraise_safe_exceptions
1200 1201 def node_annotate(self, wire, commit_id, path):
1201 1202
1202 1203 result_libgit = []
1203 1204 repo_init = self._factory.repo_libgit2(wire)
1204 1205 with repo_init as repo:
1205 1206 commit = repo[commit_id]
1206 1207 blame_obj = repo.blame(path, newest_commit=commit_id)
1207 1208 for i, line in enumerate(commit.tree[path].data.splitlines()):
1208 1209 line_no = i + 1
1209 1210 hunk = blame_obj.for_line(line_no)
1210 1211 blame_commit_id = hunk.final_commit_id.hex
1211 1212
1212 1213 result_libgit.append((line_no, blame_commit_id, line))
1213 1214
1214 1215 return result_libgit
1215 1216
1216 1217 @reraise_safe_exceptions
1217 1218 def update_server_info(self, wire):
1218 1219 repo = self._factory.repo(wire)
1219 1220 update_server_info(repo)
1220 1221
1221 1222 @reraise_safe_exceptions
1222 1223 def get_all_commit_ids(self, wire):
1223 1224
1224 1225 cache_on, context_uid, repo_id = self._cache_on(wire)
1225 1226 region = self._region(wire)
1226 1227
1227 1228 @region.conditional_cache_on_arguments(condition=cache_on)
1228 1229 def _get_all_commit_ids(_context_uid, _repo_id):
1229 1230
1230 1231 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1231 1232 try:
1232 1233 output, __ = self.run_git_command(wire, cmd)
1233 1234 return output.splitlines()
1234 1235 except Exception:
1235 1236 # Can be raised for empty repositories
1236 1237 return []
1237 1238
1238 1239 @region.conditional_cache_on_arguments(condition=cache_on)
1239 1240 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1240 1241 repo_init = self._factory.repo_libgit2(wire)
1241 1242 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1242 1243 results = []
1243 1244 with repo_init as repo:
1244 1245 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1245 1246 results.append(commit.id.hex)
1246 1247
1247 1248 return _get_all_commit_ids(context_uid, repo_id)
1248 1249
1249 1250 @reraise_safe_exceptions
1250 1251 def run_git_command(self, wire, cmd, **opts):
1251 1252 path = wire.get('path', None)
1252 1253
1253 1254 if path and os.path.isdir(path):
1254 1255 opts['cwd'] = path
1255 1256
1256 1257 if '_bare' in opts:
1257 1258 _copts = []
1258 1259 del opts['_bare']
1259 1260 else:
1260 1261 _copts = ['-c', 'core.quotepath=false', ]
1261 1262 safe_call = False
1262 1263 if '_safe' in opts:
1263 1264 # no exc on failure
1264 1265 del opts['_safe']
1265 1266 safe_call = True
1266 1267
1267 1268 if '_copts' in opts:
1268 1269 _copts.extend(opts['_copts'] or [])
1269 1270 del opts['_copts']
1270 1271
1271 1272 gitenv = os.environ.copy()
1272 1273 gitenv.update(opts.pop('extra_env', {}))
1273 1274 # need to clean fix GIT_DIR !
1274 1275 if 'GIT_DIR' in gitenv:
1275 1276 del gitenv['GIT_DIR']
1276 1277 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1277 1278 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1278 1279
1279 1280 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1280 1281 _opts = {'env': gitenv, 'shell': False}
1281 1282
1282 1283 proc = None
1283 1284 try:
1284 1285 _opts.update(opts)
1285 1286 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1286 1287
1287 1288 return b''.join(proc), b''.join(proc.stderr)
1288 1289 except OSError as err:
1289 1290 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1290 1291 tb_err = ("Couldn't run git command (%s).\n"
1291 1292 "Original error was:%s\n"
1292 1293 "Call options:%s\n"
1293 1294 % (cmd, err, _opts))
1294 1295 log.exception(tb_err)
1295 1296 if safe_call:
1296 1297 return '', err
1297 1298 else:
1298 1299 raise exceptions.VcsException()(tb_err)
1299 1300 finally:
1300 1301 if proc:
1301 1302 proc.close()
1302 1303
1303 1304 @reraise_safe_exceptions
1304 1305 def install_hooks(self, wire, force=False):
1305 1306 from vcsserver.hook_utils import install_git_hooks
1306 1307 bare = self.bare(wire)
1307 1308 path = wire['path']
1308 1309 binary_dir = settings.BINARY_DIR
1309 1310 executable = None
1310 1311 if binary_dir:
1311 1312 executable = os.path.join(binary_dir, 'python3')
1312 1313 return install_git_hooks(path, bare, force_create=force)
1313 1314
1314 1315 @reraise_safe_exceptions
1315 1316 def get_hooks_info(self, wire):
1316 1317 from vcsserver.hook_utils import (
1317 1318 get_git_pre_hook_version, get_git_post_hook_version)
1318 1319 bare = self.bare(wire)
1319 1320 path = wire['path']
1320 1321 return {
1321 1322 'pre_version': get_git_pre_hook_version(path, bare),
1322 1323 'post_version': get_git_post_hook_version(path, bare),
1323 1324 }
1324 1325
1325 1326 @reraise_safe_exceptions
1326 1327 def set_head_ref(self, wire, head_name):
1327 1328 log.debug('Setting refs/head to `%s`', head_name)
1328 1329 cmd = ['symbolic-ref', '"HEAD"', '"refs/heads/%s"' % head_name]
1329 1330 output, __ = self.run_git_command(wire, cmd)
1330 1331 return [head_name] + output.splitlines()
1331 1332
1332 1333 @reraise_safe_exceptions
1333 1334 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1334 1335 archive_dir_name, commit_id):
1335 1336
1336 1337 def file_walker(_commit_id, path):
1337 1338 repo_init = self._factory.repo_libgit2(wire)
1338 1339
1339 1340 with repo_init as repo:
1340 1341 commit = repo[commit_id]
1341 1342
1342 1343 if path in ['', '/']:
1343 1344 tree = commit.tree
1344 1345 else:
1345 1346 tree = commit.tree[path.rstrip('/')]
1346 1347 tree_id = tree.id.hex
1347 1348 try:
1348 1349 tree = repo[tree_id]
1349 1350 except KeyError:
1350 1351 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1351 1352
1352 1353 index = LibGit2Index.Index()
1353 1354 index.read_tree(tree)
1354 1355 file_iter = index
1355 1356
1356 1357 for fn in file_iter:
1357 1358 file_path = fn.path
1358 1359 mode = fn.mode
1359 1360 is_link = stat.S_ISLNK(mode)
1360 1361 if mode == pygit2.GIT_FILEMODE_COMMIT:
1361 1362 log.debug('Skipping path %s as a commit node', file_path)
1362 1363 continue
1363 1364 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1364 1365
1365 1366 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1366 1367 archive_dir_name, commit_id)
@@ -1,1101 +1,1103 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17 import binascii
18 18 import io
19 19 import logging
20 20 import stat
21 21 import urllib.request
22 22 import urllib.parse
23 23 import traceback
24 24 import hashlib
25 25
26 26 from hgext import largefiles, rebase, purge
27 27
28 28 from mercurial import commands
29 29 from mercurial import unionrepo
30 30 from mercurial import verify
31 31 from mercurial import repair
32 32
33 33 import vcsserver
34 34 from vcsserver import exceptions
35 35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode
36 36 from vcsserver.hgcompat import (
37 37 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 38 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
39 39 makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
40 40 patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
41 41 RepoLookupError, InterventionRequired, RequirementError,
42 42 alwaysmatcher, patternmatcher, hgutil, hgext_strip)
43 43 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
44 44 from vcsserver.vcs_base import RemoteBase
45 45
46 46 log = logging.getLogger(__name__)
47 47
48 48
49 49 def make_ui_from_config(repo_config):
50 50
51 51 class LoggingUI(ui.ui):
52 52
53 53 def status(self, *msg, **opts):
54 54 str_msg = map(safe_str, msg)
55 55 log.info(' '.join(str_msg).rstrip('\n'))
56 56 #super(LoggingUI, self).status(*msg, **opts)
57 57
58 58 def warn(self, *msg, **opts):
59 59 str_msg = map(safe_str, msg)
60 60 log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
61 61 #super(LoggingUI, self).warn(*msg, **opts)
62 62
63 63 def error(self, *msg, **opts):
64 64 str_msg = map(safe_str, msg)
65 65 log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
66 66 #super(LoggingUI, self).error(*msg, **opts)
67 67
68 68 def note(self, *msg, **opts):
69 69 str_msg = map(safe_str, msg)
70 70 log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
71 71 #super(LoggingUI, self).note(*msg, **opts)
72 72
73 73 def debug(self, *msg, **opts):
74 74 str_msg = map(safe_str, msg)
75 75 log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
76 76 #super(LoggingUI, self).debug(*msg, **opts)
77 77
78 78 baseui = LoggingUI()
79 79
80 80 # clean the baseui object
81 81 baseui._ocfg = hgconfig.config()
82 82 baseui._ucfg = hgconfig.config()
83 83 baseui._tcfg = hgconfig.config()
84 84
85 85 for section, option, value in repo_config:
86 86 baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
87 87
88 88 # make our hgweb quiet so it doesn't print output
89 89 baseui.setconfig(b'ui', b'quiet', b'true')
90 90
91 91 baseui.setconfig(b'ui', b'paginate', b'never')
92 92 # for better Error reporting of Mercurial
93 93 baseui.setconfig(b'ui', b'message-output', b'stderr')
94 94
95 95 # force mercurial to only use 1 thread, otherwise it may try to set a
96 96 # signal in a non-main thread, thus generating a ValueError.
97 97 baseui.setconfig(b'worker', b'numcpus', 1)
98 98
99 99 # If there is no config for the largefiles extension, we explicitly disable
100 100 # it here. This overrides settings from repositories hgrc file. Recent
101 101 # mercurial versions enable largefiles in hgrc on clone from largefile
102 102 # repo.
103 103 if not baseui.hasconfig(b'extensions', b'largefiles'):
104 104 log.debug('Explicitly disable largefiles extension for repo.')
105 105 baseui.setconfig(b'extensions', b'largefiles', b'!')
106 106
107 107 return baseui
108 108
109 109
110 110 def reraise_safe_exceptions(func):
111 111 """Decorator for converting mercurial exceptions to something neutral."""
112 112
113 113 def wrapper(*args, **kwargs):
114 114 try:
115 115 return func(*args, **kwargs)
116 116 except (Abort, InterventionRequired) as e:
117 117 raise_from_original(exceptions.AbortException(e), e)
118 118 except RepoLookupError as e:
119 119 raise_from_original(exceptions.LookupException(e), e)
120 120 except RequirementError as e:
121 121 raise_from_original(exceptions.RequirementException(e), e)
122 122 except RepoError as e:
123 123 raise_from_original(exceptions.VcsException(e), e)
124 124 except LookupError as e:
125 125 raise_from_original(exceptions.LookupException(e), e)
126 126 except Exception as e:
127 127 if not hasattr(e, '_vcs_kind'):
128 128 log.exception("Unhandled exception in hg remote call")
129 129 raise_from_original(exceptions.UnhandledException(e), e)
130 130
131 131 raise
132 132 return wrapper
133 133
134 134
135 135 class MercurialFactory(RepoFactory):
136 136 repo_type = 'hg'
137 137
138 138 def _create_config(self, config, hooks=True):
139 139 if not hooks:
140 140 hooks_to_clean = frozenset((
141 141 'changegroup.repo_size', 'preoutgoing.pre_pull',
142 142 'outgoing.pull_logger', 'prechangegroup.pre_push'))
143 143 new_config = []
144 144 for section, option, value in config:
145 145 if section == 'hooks' and option in hooks_to_clean:
146 146 continue
147 147 new_config.append((section, option, value))
148 148 config = new_config
149 149
150 150 baseui = make_ui_from_config(config)
151 151 return baseui
152 152
153 153 def _create_repo(self, wire, create):
154 154 baseui = self._create_config(wire["config"])
155 155 return instance(baseui, safe_bytes(wire["path"]), create)
156 156
157 157 def repo(self, wire, create=False):
158 158 """
159 159 Get a repository instance for the given path.
160 160 """
161 161 return self._create_repo(wire, create)
162 162
163 163
164 164 def patch_ui_message_output(baseui):
165 165 baseui.setconfig(b'ui', b'quiet', b'false')
166 166 output = io.BytesIO()
167 167
168 168 def write(data, **unused_kwargs):
169 169 output.write(data)
170 170
171 171 baseui.status = write
172 172 baseui.write = write
173 173 baseui.warn = write
174 174 baseui.debug = write
175 175
176 176 return baseui, output
177 177
178 178
179 179 class HgRemote(RemoteBase):
180 180
181 181 def __init__(self, factory):
182 182 self._factory = factory
183 183 self._bulk_methods = {
184 184 "affected_files": self.ctx_files,
185 185 "author": self.ctx_user,
186 186 "branch": self.ctx_branch,
187 187 "children": self.ctx_children,
188 188 "date": self.ctx_date,
189 189 "message": self.ctx_description,
190 190 "parents": self.ctx_parents,
191 191 "status": self.ctx_status,
192 192 "obsolete": self.ctx_obsolete,
193 193 "phase": self.ctx_phase,
194 194 "hidden": self.ctx_hidden,
195 195 "_file_paths": self.ctx_list,
196 196 }
197 197
198 198 def _get_ctx(self, repo, ref):
199 199 return get_ctx(repo, ref)
200 200
201 201 @reraise_safe_exceptions
202 202 def discover_hg_version(self):
203 203 from mercurial import util
204 204 return safe_str(util.version())
205 205
206 206 @reraise_safe_exceptions
207 207 def is_empty(self, wire):
208 208 repo = self._factory.repo(wire)
209 209
210 210 try:
211 211 return len(repo) == 0
212 212 except Exception:
213 213 log.exception("failed to read object_store")
214 214 return False
215 215
216 216 @reraise_safe_exceptions
217 217 def bookmarks(self, wire):
218 218 cache_on, context_uid, repo_id = self._cache_on(wire)
219 219 region = self._region(wire)
220 220
221 221 @region.conditional_cache_on_arguments(condition=cache_on)
222 222 def _bookmarks(_context_uid, _repo_id):
223 223 repo = self._factory.repo(wire)
224 224 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
225 225
226 226 return _bookmarks(context_uid, repo_id)
227 227
228 228 @reraise_safe_exceptions
229 229 def branches(self, wire, normal, closed):
230 230 cache_on, context_uid, repo_id = self._cache_on(wire)
231 231 region = self._region(wire)
232 232
233 233 @region.conditional_cache_on_arguments(condition=cache_on)
234 234 def _branches(_context_uid, _repo_id, _normal, _closed):
235 235 repo = self._factory.repo(wire)
236 236 iter_branches = repo.branchmap().iterbranches()
237 237 bt = {}
238 238 for branch_name, _heads, tip_node, is_closed in iter_branches:
239 239 if normal and not is_closed:
240 240 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
241 241 if closed and is_closed:
242 242 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
243 243
244 244 return bt
245 245
246 246 return _branches(context_uid, repo_id, normal, closed)
247 247
248 248 @reraise_safe_exceptions
249 249 def bulk_request(self, wire, commit_id, pre_load):
250 250 cache_on, context_uid, repo_id = self._cache_on(wire)
251 251 region = self._region(wire)
252 252
253 253 @region.conditional_cache_on_arguments(condition=cache_on)
254 254 def _bulk_request(_repo_id, _commit_id, _pre_load):
255 255 result = {}
256 256 for attr in pre_load:
257 257 try:
258 258 method = self._bulk_methods[attr]
259 259 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
260 260 result[attr] = method(wire, commit_id)
261 261 except KeyError as e:
262 262 raise exceptions.VcsException(e)(
263 263 'Unknown bulk attribute: "%s"' % attr)
264 264 return result
265 265
266 266 return _bulk_request(repo_id, commit_id, sorted(pre_load))
267 267
268 268 @reraise_safe_exceptions
269 269 def ctx_branch(self, wire, commit_id):
270 270 cache_on, context_uid, repo_id = self._cache_on(wire)
271 271 region = self._region(wire)
272 272
273 273 @region.conditional_cache_on_arguments(condition=cache_on)
274 274 def _ctx_branch(_repo_id, _commit_id):
275 275 repo = self._factory.repo(wire)
276 276 ctx = self._get_ctx(repo, commit_id)
277 277 return ctx.branch()
278 278 return _ctx_branch(repo_id, commit_id)
279 279
280 280 @reraise_safe_exceptions
281 281 def ctx_date(self, wire, commit_id):
282 282 cache_on, context_uid, repo_id = self._cache_on(wire)
283 283 region = self._region(wire)
284 284
285 285 @region.conditional_cache_on_arguments(condition=cache_on)
286 286 def _ctx_date(_repo_id, _commit_id):
287 287 repo = self._factory.repo(wire)
288 288 ctx = self._get_ctx(repo, commit_id)
289 289 return ctx.date()
290 290 return _ctx_date(repo_id, commit_id)
291 291
292 292 @reraise_safe_exceptions
293 293 def ctx_description(self, wire, revision):
294 294 repo = self._factory.repo(wire)
295 295 ctx = self._get_ctx(repo, revision)
296 296 return ctx.description()
297 297
298 298 @reraise_safe_exceptions
299 299 def ctx_files(self, wire, commit_id):
300 300 cache_on, context_uid, repo_id = self._cache_on(wire)
301 301 region = self._region(wire)
302 302
303 303 @region.conditional_cache_on_arguments(condition=cache_on)
304 304 def _ctx_files(_repo_id, _commit_id):
305 305 repo = self._factory.repo(wire)
306 306 ctx = self._get_ctx(repo, commit_id)
307 307 return ctx.files()
308 308
309 309 return _ctx_files(repo_id, commit_id)
310 310
311 311 @reraise_safe_exceptions
312 312 def ctx_list(self, path, revision):
313 313 repo = self._factory.repo(path)
314 314 ctx = self._get_ctx(repo, revision)
315 315 return list(ctx)
316 316
317 317 @reraise_safe_exceptions
318 318 def ctx_parents(self, wire, commit_id):
319 319 cache_on, context_uid, repo_id = self._cache_on(wire)
320 320 region = self._region(wire)
321 321
322 322 @region.conditional_cache_on_arguments(condition=cache_on)
323 323 def _ctx_parents(_repo_id, _commit_id):
324 324 repo = self._factory.repo(wire)
325 325 ctx = self._get_ctx(repo, commit_id)
326 326 return [parent.hex() for parent in ctx.parents()
327 327 if not (parent.hidden() or parent.obsolete())]
328 328
329 329 return _ctx_parents(repo_id, commit_id)
330 330
331 331 @reraise_safe_exceptions
332 332 def ctx_children(self, wire, commit_id):
333 333 cache_on, context_uid, repo_id = self._cache_on(wire)
334 334 region = self._region(wire)
335 335
336 336 @region.conditional_cache_on_arguments(condition=cache_on)
337 337 def _ctx_children(_repo_id, _commit_id):
338 338 repo = self._factory.repo(wire)
339 339 ctx = self._get_ctx(repo, commit_id)
340 340 return [child.hex() for child in ctx.children()
341 341 if not (child.hidden() or child.obsolete())]
342 342
343 343 return _ctx_children(repo_id, commit_id)
344 344
345 345 @reraise_safe_exceptions
346 346 def ctx_phase(self, wire, commit_id):
347 347 cache_on, context_uid, repo_id = self._cache_on(wire)
348 348 region = self._region(wire)
349 349
350 350 @region.conditional_cache_on_arguments(condition=cache_on)
351 351 def _ctx_phase(_context_uid, _repo_id, _commit_id):
352 352 repo = self._factory.repo(wire)
353 353 ctx = self._get_ctx(repo, commit_id)
354 354 # public=0, draft=1, secret=3
355 355 return ctx.phase()
356 356 return _ctx_phase(context_uid, repo_id, commit_id)
357 357
358 358 @reraise_safe_exceptions
359 359 def ctx_obsolete(self, wire, commit_id):
360 360 cache_on, context_uid, repo_id = self._cache_on(wire)
361 361 region = self._region(wire)
362 362
363 363 @region.conditional_cache_on_arguments(condition=cache_on)
364 364 def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
365 365 repo = self._factory.repo(wire)
366 366 ctx = self._get_ctx(repo, commit_id)
367 367 return ctx.obsolete()
368 368 return _ctx_obsolete(context_uid, repo_id, commit_id)
369 369
370 370 @reraise_safe_exceptions
371 371 def ctx_hidden(self, wire, commit_id):
372 372 cache_on, context_uid, repo_id = self._cache_on(wire)
373 373 region = self._region(wire)
374 374
375 375 @region.conditional_cache_on_arguments(condition=cache_on)
376 376 def _ctx_hidden(_context_uid, _repo_id, _commit_id):
377 377 repo = self._factory.repo(wire)
378 378 ctx = self._get_ctx(repo, commit_id)
379 379 return ctx.hidden()
380 380 return _ctx_hidden(context_uid, repo_id, commit_id)
381 381
382 382 @reraise_safe_exceptions
383 383 def ctx_substate(self, wire, revision):
384 384 repo = self._factory.repo(wire)
385 385 ctx = self._get_ctx(repo, revision)
386 386 return ctx.substate
387 387
388 388 @reraise_safe_exceptions
389 389 def ctx_status(self, wire, revision):
390 390 repo = self._factory.repo(wire)
391 391 ctx = self._get_ctx(repo, revision)
392 392 status = repo[ctx.p1().node()].status(other=ctx.node())
393 393 # object of status (odd, custom named tuple in mercurial) is not
394 394 # correctly serializable, we make it a list, as the underling
395 395 # API expects this to be a list
396 396 return list(status)
397 397
398 398 @reraise_safe_exceptions
399 399 def ctx_user(self, wire, revision):
400 400 repo = self._factory.repo(wire)
401 401 ctx = self._get_ctx(repo, revision)
402 402 return ctx.user()
403 403
404 404 @reraise_safe_exceptions
405 405 def check_url(self, url, config):
406 406 _proto = None
407 407 if '+' in url[:url.find('://')]:
408 408 _proto = url[0:url.find('+')]
409 409 url = url[url.find('+') + 1:]
410 410 handlers = []
411 411 url_obj = url_parser(url)
412 412 test_uri, authinfo = url_obj.authinfo()
413 413 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
414 414 url_obj.query = obfuscate_qs(url_obj.query)
415 415
416 416 cleaned_uri = str(url_obj)
417 417 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
418 418
419 419 if authinfo:
420 420 # create a password manager
421 421 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
422 422 passmgr.add_password(*authinfo)
423 423
424 424 handlers.extend((httpbasicauthhandler(passmgr),
425 425 httpdigestauthhandler(passmgr)))
426 426
427 427 o = urllib.request.build_opener(*handlers)
428 428 o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
429 429 ('Accept', 'application/mercurial-0.1')]
430 430
431 431 q = {"cmd": 'between'}
432 432 q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
433 433 qs = '?%s' % urllib.parse.urlencode(q)
434 434 cu = "%s%s" % (test_uri, qs)
435 435 req = urllib.request.Request(cu, None, {})
436 436
437 437 try:
438 438 log.debug("Trying to open URL %s", cleaned_uri)
439 439 resp = o.open(req)
440 440 if resp.code != 200:
441 441 raise exceptions.URLError()('Return Code is not 200')
442 442 except Exception as e:
443 443 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
444 444 # means it cannot be cloned
445 445 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
446 446
447 447 # now check if it's a proper hg repo, but don't do it for svn
448 448 try:
449 449 if _proto == 'svn':
450 450 pass
451 451 else:
452 452 # check for pure hg repos
453 453 log.debug(
454 454 "Verifying if URL is a Mercurial repository: %s",
455 455 cleaned_uri)
456 456 ui = make_ui_from_config(config)
457 457 peer_checker = makepeer(ui, url)
458 458 peer_checker.lookup('tip')
459 459 except Exception as e:
460 460 log.warning("URL is not a valid Mercurial repository: %s",
461 461 cleaned_uri)
462 462 raise exceptions.URLError(e)(
463 463 "url [%s] does not look like an hg repo org_exc: %s"
464 464 % (cleaned_uri, e))
465 465
466 466 log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
467 467 return True
468 468
469 469 @reraise_safe_exceptions
470 470 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_git, opt_ignorews, context):
471 471 repo = self._factory.repo(wire)
472 472
473 473 if file_filter:
474 match_filter = match(file_filter[0], '', [file_filter[1]])
474 # unpack the file-filter
475 repo_path, node_path = file_filter
476 match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
475 477 else:
476 478 match_filter = file_filter
477 479 opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
478 480
479 481 try:
480 482 diff_iter = patch.diff(
481 483 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
482 484 return b"".join(diff_iter)
483 485 except RepoLookupError as e:
484 486 raise exceptions.LookupException(e)()
485 487
486 488 @reraise_safe_exceptions
487 489 def node_history(self, wire, revision, path, limit):
488 490 cache_on, context_uid, repo_id = self._cache_on(wire)
489 491 region = self._region(wire)
490 492
491 493 @region.conditional_cache_on_arguments(condition=cache_on)
492 494 def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
493 495 repo = self._factory.repo(wire)
494 496
495 497 ctx = self._get_ctx(repo, revision)
496 498 fctx = ctx.filectx(safe_bytes(path))
497 499
498 500 def history_iter():
499 501 limit_rev = fctx.rev()
500 502 for obj in reversed(list(fctx.filelog())):
501 503 obj = fctx.filectx(obj)
502 504 ctx = obj.changectx()
503 505 if ctx.hidden() or ctx.obsolete():
504 506 continue
505 507
506 508 if limit_rev >= obj.rev():
507 509 yield obj
508 510
509 511 history = []
510 512 for cnt, obj in enumerate(history_iter()):
511 513 if limit and cnt >= limit:
512 514 break
513 515 history.append(hex(obj.node()))
514 516
515 517 return [x for x in history]
516 518 return _node_history(context_uid, repo_id, revision, path, limit)
517 519
518 520 @reraise_safe_exceptions
519 521 def node_history_untill(self, wire, revision, path, limit):
520 522 cache_on, context_uid, repo_id = self._cache_on(wire)
521 523 region = self._region(wire)
522 524
523 525 @region.conditional_cache_on_arguments(condition=cache_on)
524 526 def _node_history_until(_context_uid, _repo_id):
525 527 repo = self._factory.repo(wire)
526 528 ctx = self._get_ctx(repo, revision)
527 529 fctx = ctx.filectx(safe_bytes(path))
528 530
529 531 file_log = list(fctx.filelog())
530 532 if limit:
531 533 # Limit to the last n items
532 534 file_log = file_log[-limit:]
533 535
534 536 return [hex(fctx.filectx(cs).node()) for cs in reversed(file_log)]
535 537 return _node_history_until(context_uid, repo_id, revision, path, limit)
536 538
537 539 @reraise_safe_exceptions
538 540 def fctx_annotate(self, wire, revision, path):
539 541 repo = self._factory.repo(wire)
540 542 ctx = self._get_ctx(repo, revision)
541 543 fctx = ctx.filectx(safe_bytes(path))
542 544
543 545 result = []
544 546 for i, annotate_obj in enumerate(fctx.annotate(), 1):
545 547 ln_no = i
546 548 sha = hex(annotate_obj.fctx.node())
547 549 content = annotate_obj.text
548 550 result.append((ln_no, sha, content))
549 551 return result
550 552
551 553 @reraise_safe_exceptions
552 554 def fctx_node_data(self, wire, revision, path):
553 555 repo = self._factory.repo(wire)
554 556 ctx = self._get_ctx(repo, revision)
555 557 fctx = ctx.filectx(safe_bytes(path))
556 558 return fctx.data()
557 559
558 560 @reraise_safe_exceptions
559 561 def fctx_flags(self, wire, commit_id, path):
560 562 cache_on, context_uid, repo_id = self._cache_on(wire)
561 563 region = self._region(wire)
562 564
563 565 @region.conditional_cache_on_arguments(condition=cache_on)
564 566 def _fctx_flags(_repo_id, _commit_id, _path):
565 567 repo = self._factory.repo(wire)
566 568 ctx = self._get_ctx(repo, commit_id)
567 569 fctx = ctx.filectx(safe_bytes(path))
568 570 return fctx.flags()
569 571
570 572 return _fctx_flags(repo_id, commit_id, path)
571 573
572 574 @reraise_safe_exceptions
573 575 def fctx_size(self, wire, commit_id, path):
574 576 cache_on, context_uid, repo_id = self._cache_on(wire)
575 577 region = self._region(wire)
576 578
577 579 @region.conditional_cache_on_arguments(condition=cache_on)
578 580 def _fctx_size(_repo_id, _revision, _path):
579 581 repo = self._factory.repo(wire)
580 582 ctx = self._get_ctx(repo, commit_id)
581 583 fctx = ctx.filectx(safe_bytes(path))
582 584 return fctx.size()
583 585 return _fctx_size(repo_id, commit_id, path)
584 586
585 587 @reraise_safe_exceptions
586 588 def get_all_commit_ids(self, wire, name):
587 589 cache_on, context_uid, repo_id = self._cache_on(wire)
588 590 region = self._region(wire)
589 591
590 592 @region.conditional_cache_on_arguments(condition=cache_on)
591 593 def _get_all_commit_ids(_context_uid, _repo_id, _name):
592 594 repo = self._factory.repo(wire)
593 595 revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
594 596 return revs
595 597 return _get_all_commit_ids(context_uid, repo_id, name)
596 598
597 599 @reraise_safe_exceptions
598 600 def get_config_value(self, wire, section, name, untrusted=False):
599 601 repo = self._factory.repo(wire)
600 602 return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
601 603
602 604 @reraise_safe_exceptions
603 605 def is_large_file(self, wire, commit_id, path):
604 606 cache_on, context_uid, repo_id = self._cache_on(wire)
605 607 region = self._region(wire)
606 608
607 609 @region.conditional_cache_on_arguments(condition=cache_on)
608 610 def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
609 611 return largefiles.lfutil.isstandin(safe_bytes(path))
610 612
611 613 return _is_large_file(context_uid, repo_id, commit_id, path)
612 614
613 615 @reraise_safe_exceptions
614 616 def is_binary(self, wire, revision, path):
615 617 cache_on, context_uid, repo_id = self._cache_on(wire)
616 618 region = self._region(wire)
617 619
618 620 @region.conditional_cache_on_arguments(condition=cache_on)
619 621 def _is_binary(_repo_id, _sha, _path):
620 622 repo = self._factory.repo(wire)
621 623 ctx = self._get_ctx(repo, revision)
622 624 fctx = ctx.filectx(safe_bytes(path))
623 625 return fctx.isbinary()
624 626
625 627 return _is_binary(repo_id, revision, path)
626 628
627 629 @reraise_safe_exceptions
628 630 def md5_hash(self, wire, revision, path):
629 631 cache_on, context_uid, repo_id = self._cache_on(wire)
630 632 region = self._region(wire)
631 633
632 634 @region.conditional_cache_on_arguments(condition=cache_on)
633 635 def _md5_hash(_repo_id, _sha, _path):
634 636 repo = self._factory.repo(wire)
635 637 ctx = self._get_ctx(repo, revision)
636 638 fctx = ctx.filectx(safe_bytes(path))
637 639 return hashlib.md5(fctx.data()).hexdigest()
638 640
639 641 return _md5_hash(repo_id, revision, path)
640 642
641 643 @reraise_safe_exceptions
642 644 def in_largefiles_store(self, wire, sha):
643 645 repo = self._factory.repo(wire)
644 646 return largefiles.lfutil.instore(repo, sha)
645 647
646 648 @reraise_safe_exceptions
647 649 def in_user_cache(self, wire, sha):
648 650 repo = self._factory.repo(wire)
649 651 return largefiles.lfutil.inusercache(repo.ui, sha)
650 652
651 653 @reraise_safe_exceptions
652 654 def store_path(self, wire, sha):
653 655 repo = self._factory.repo(wire)
654 656 return largefiles.lfutil.storepath(repo, sha)
655 657
656 658 @reraise_safe_exceptions
657 659 def link(self, wire, sha, path):
658 660 repo = self._factory.repo(wire)
659 661 largefiles.lfutil.link(
660 662 largefiles.lfutil.usercachepath(repo.ui, sha), path)
661 663
662 664 @reraise_safe_exceptions
663 665 def localrepository(self, wire, create=False):
664 666 self._factory.repo(wire, create=create)
665 667
666 668 @reraise_safe_exceptions
667 669 def lookup(self, wire, revision, both):
668 670 cache_on, context_uid, repo_id = self._cache_on(wire)
669 671 region = self._region(wire)
670 672
671 673 @region.conditional_cache_on_arguments(condition=cache_on)
672 674 def _lookup(_context_uid, _repo_id, _revision, _both):
673 675
674 676 repo = self._factory.repo(wire)
675 677 rev = _revision
676 678 if isinstance(rev, int):
677 679 # NOTE(marcink):
678 680 # since Mercurial doesn't support negative indexes properly
679 681 # we need to shift accordingly by one to get proper index, e.g
680 682 # repo[-1] => repo[-2]
681 683 # repo[0] => repo[-1]
682 684 if rev <= 0:
683 685 rev = rev + -1
684 686 try:
685 687 ctx = self._get_ctx(repo, rev)
686 except (TypeError, RepoLookupError) as e:
688 except (TypeError, RepoLookupError, binascii.Error) as e:
687 689 e._org_exc_tb = traceback.format_exc()
688 690 raise exceptions.LookupException(e)(rev)
689 691 except LookupError as e:
690 692 e._org_exc_tb = traceback.format_exc()
691 693 raise exceptions.LookupException(e)(e.name)
692 694
693 695 if not both:
694 696 return ctx.hex()
695 697
696 698 ctx = repo[ctx.hex()]
697 699 return ctx.hex(), ctx.rev()
698 700
699 701 return _lookup(context_uid, repo_id, revision, both)
700 702
701 703 @reraise_safe_exceptions
702 704 def sync_push(self, wire, url):
703 705 if not self.check_url(url, wire['config']):
704 706 return
705 707
706 708 repo = self._factory.repo(wire)
707 709
708 710 # Disable any prompts for this repo
709 711 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
710 712
711 713 bookmarks = list(dict(repo._bookmarks).keys())
712 714 remote = peer(repo, {}, safe_bytes(url))
713 715 # Disable any prompts for this remote
714 716 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
715 717
716 718 return exchange.push(
717 719 repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
718 720
719 721 @reraise_safe_exceptions
720 722 def revision(self, wire, rev):
721 723 repo = self._factory.repo(wire)
722 724 ctx = self._get_ctx(repo, rev)
723 725 return ctx.rev()
724 726
725 727 @reraise_safe_exceptions
726 728 def rev_range(self, wire, commit_filter):
727 729 cache_on, context_uid, repo_id = self._cache_on(wire)
728 730 region = self._region(wire)
729 731
730 732 @region.conditional_cache_on_arguments(condition=cache_on)
731 733 def _rev_range(_context_uid, _repo_id, _filter):
732 734 repo = self._factory.repo(wire)
733 735 revisions = [
734 736 ascii_str(repo[rev].hex())
735 737 for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
736 738 ]
737 739 return revisions
738 740
739 741 return _rev_range(context_uid, repo_id, sorted(commit_filter))
740 742
741 743 @reraise_safe_exceptions
742 744 def rev_range_hash(self, wire, node):
743 745 repo = self._factory.repo(wire)
744 746
745 747 def get_revs(repo, rev_opt):
746 748 if rev_opt:
747 749 revs = revrange(repo, rev_opt)
748 750 if len(revs) == 0:
749 751 return (nullrev, nullrev)
750 752 return max(revs), min(revs)
751 753 else:
752 754 return len(repo) - 1, 0
753 755
754 756 stop, start = get_revs(repo, [node + ':'])
755 757 revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
756 758 return revs
757 759
758 760 @reraise_safe_exceptions
759 761 def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
760 762 org_path = safe_bytes(wire["path"])
761 763 other_path = safe_bytes(kwargs.pop('other_path', ''))
762 764
763 765 # case when we want to compare two independent repositories
764 766 if other_path and other_path != wire["path"]:
765 767 baseui = self._factory._create_config(wire["config"])
766 768 repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
767 769 else:
768 770 repo = self._factory.repo(wire)
769 771 return list(repo.revs(rev_spec, *args))
770 772
771 773 @reraise_safe_exceptions
772 774 def verify(self, wire,):
773 775 repo = self._factory.repo(wire)
774 776 baseui = self._factory._create_config(wire['config'])
775 777
776 778 baseui, output = patch_ui_message_output(baseui)
777 779
778 780 repo.ui = baseui
779 781 verify.verify(repo)
780 782 return output.getvalue()
781 783
782 784 @reraise_safe_exceptions
783 785 def hg_update_cache(self, wire,):
784 786 repo = self._factory.repo(wire)
785 787 baseui = self._factory._create_config(wire['config'])
786 788 baseui, output = patch_ui_message_output(baseui)
787 789
788 790 repo.ui = baseui
789 791 with repo.wlock(), repo.lock():
790 792 repo.updatecaches(full=True)
791 793
792 794 return output.getvalue()
793 795
794 796 @reraise_safe_exceptions
795 797 def hg_rebuild_fn_cache(self, wire,):
796 798 repo = self._factory.repo(wire)
797 799 baseui = self._factory._create_config(wire['config'])
798 800 baseui, output = patch_ui_message_output(baseui)
799 801
800 802 repo.ui = baseui
801 803
802 804 repair.rebuildfncache(baseui, repo)
803 805
804 806 return output.getvalue()
805 807
806 808 @reraise_safe_exceptions
807 809 def tags(self, wire):
808 810 cache_on, context_uid, repo_id = self._cache_on(wire)
809 811 region = self._region(wire)
810 812
811 813 @region.conditional_cache_on_arguments(condition=cache_on)
812 814 def _tags(_context_uid, _repo_id):
813 815 repo = self._factory.repo(wire)
814 816 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
815 817
816 818 return _tags(context_uid, repo_id)
817 819
818 820 @reraise_safe_exceptions
819 821 def update(self, wire, node='', clean=False):
820 822 repo = self._factory.repo(wire)
821 823 baseui = self._factory._create_config(wire['config'])
822 824 node = safe_bytes(node)
823 825
824 826 commands.update(baseui, repo, node=node, clean=clean)
825 827
826 828 @reraise_safe_exceptions
827 829 def identify(self, wire):
828 830 repo = self._factory.repo(wire)
829 831 baseui = self._factory._create_config(wire['config'])
830 832 output = io.BytesIO()
831 833 baseui.write = output.write
832 834 # This is required to get a full node id
833 835 baseui.debugflag = True
834 836 commands.identify(baseui, repo, id=True)
835 837
836 838 return output.getvalue()
837 839
838 840 @reraise_safe_exceptions
839 841 def heads(self, wire, branch=None):
840 842 repo = self._factory.repo(wire)
841 843 baseui = self._factory._create_config(wire['config'])
842 844 output = io.BytesIO()
843 845
844 846 def write(data, **unused_kwargs):
845 847 output.write(data)
846 848
847 849 baseui.write = write
848 850 if branch:
849 851 args = [safe_bytes(branch)]
850 852 else:
851 853 args = []
852 854 commands.heads(baseui, repo, template=b'{node} ', *args)
853 855
854 856 return output.getvalue()
855 857
856 858 @reraise_safe_exceptions
857 859 def ancestor(self, wire, revision1, revision2):
858 860 repo = self._factory.repo(wire)
859 861 changelog = repo.changelog
860 862 lookup = repo.lookup
861 863 a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
862 864 return hex(a)
863 865
864 866 @reraise_safe_exceptions
865 867 def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
866 868 baseui = self._factory._create_config(wire["config"], hooks=hooks)
867 869 clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
868 870
869 871 @reraise_safe_exceptions
870 872 def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
871 873
872 874 repo = self._factory.repo(wire)
873 875 baseui = self._factory._create_config(wire['config'])
874 876 publishing = baseui.configbool(b'phases', b'publish')
875 877
876 878 def _filectxfn(_repo, ctx, path: bytes):
877 879 """
878 880 Marks given path as added/changed/removed in a given _repo. This is
879 881 for internal mercurial commit function.
880 882 """
881 883
882 884 # check if this path is removed
883 885 if safe_str(path) in removed:
884 886 # returning None is a way to mark node for removal
885 887 return None
886 888
887 889 # check if this path is added
888 890 for node in updated:
889 891 if safe_bytes(node['path']) == path:
890 892 return memfilectx(
891 893 _repo,
892 894 changectx=ctx,
893 895 path=safe_bytes(node['path']),
894 896 data=safe_bytes(node['content']),
895 897 islink=False,
896 898 isexec=bool(node['mode'] & stat.S_IXUSR),
897 899 copysource=False)
898 900 abort_exc = exceptions.AbortException()
899 901 raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
900 902
901 903 if publishing:
902 904 new_commit_phase = b'public'
903 905 else:
904 906 new_commit_phase = b'draft'
905 907 with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
906 908 kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
907 909 commit_ctx = memctx(
908 910 repo=repo,
909 911 parents=parents,
910 912 text=safe_bytes(message),
911 913 files=[safe_bytes(x) for x in files],
912 914 filectxfn=_filectxfn,
913 915 user=safe_bytes(user),
914 916 date=(commit_time, commit_timezone),
915 917 extra=kwargs)
916 918
917 919 n = repo.commitctx(commit_ctx)
918 920 new_id = hex(n)
919 921
920 922 return new_id
921 923
922 924 @reraise_safe_exceptions
923 925 def pull(self, wire, url, commit_ids=None):
924 926 repo = self._factory.repo(wire)
925 927 # Disable any prompts for this repo
926 928 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
927 929
928 930 remote = peer(repo, {}, safe_bytes(url))
929 931 # Disable any prompts for this remote
930 932 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
931 933
932 934 if commit_ids:
933 935 commit_ids = [bin(commit_id) for commit_id in commit_ids]
934 936
935 937 return exchange.pull(
936 938 repo, remote, heads=commit_ids, force=None).cgresult
937 939
938 940 @reraise_safe_exceptions
939 941 def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
940 942 repo = self._factory.repo(wire)
941 943 baseui = self._factory._create_config(wire['config'], hooks=hooks)
942 944
943 945 source = safe_bytes(source)
944 946
945 947 # Mercurial internally has a lot of logic that checks ONLY if
946 948 # option is defined, we just pass those if they are defined then
947 949 opts = {}
948 950 if bookmark:
949 951 if isinstance(branch, list):
950 952 bookmark = [safe_bytes(x) for x in bookmark]
951 953 else:
952 954 bookmark = safe_bytes(bookmark)
953 955 opts['bookmark'] = bookmark
954 956 if branch:
955 957 if isinstance(branch, list):
956 958 branch = [safe_bytes(x) for x in branch]
957 959 else:
958 960 branch = safe_bytes(branch)
959 961 opts['branch'] = branch
960 962 if revision:
961 963 opts['rev'] = safe_bytes(revision)
962 964
963 965 commands.pull(baseui, repo, source, **opts)
964 966
965 967 @reraise_safe_exceptions
966 968 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
967 969 repo = self._factory.repo(wire)
968 970 baseui = self._factory._create_config(wire['config'], hooks=hooks)
969 971 commands.push(baseui, repo, dest=dest_path, rev=revisions,
970 972 new_branch=push_branches)
971 973
972 974 @reraise_safe_exceptions
973 975 def strip(self, wire, revision, update, backup):
974 976 repo = self._factory.repo(wire)
975 977 ctx = self._get_ctx(repo, revision)
976 978 hgext_strip(
977 979 repo.baseui, repo, ctx.node(), update=update, backup=backup)
978 980
979 981 @reraise_safe_exceptions
980 982 def get_unresolved_files(self, wire):
981 983 repo = self._factory.repo(wire)
982 984
983 985 log.debug('Calculating unresolved files for repo: %s', repo)
984 986 output = io.BytesIO()
985 987
986 988 def write(data, **unused_kwargs):
987 989 output.write(data)
988 990
989 991 baseui = self._factory._create_config(wire['config'])
990 992 baseui.write = write
991 993
992 994 commands.resolve(baseui, repo, list=True)
993 995 unresolved = output.getvalue().splitlines(0)
994 996 return unresolved
995 997
996 998 @reraise_safe_exceptions
997 999 def merge(self, wire, revision):
998 1000 repo = self._factory.repo(wire)
999 1001 baseui = self._factory._create_config(wire['config'])
1000 1002 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1001 1003
1002 1004 # In case of sub repositories are used mercurial prompts the user in
1003 1005 # case of merge conflicts or different sub repository sources. By
1004 1006 # setting the interactive flag to `False` mercurial doesn't prompt the
1005 1007 # used but instead uses a default value.
1006 1008 repo.ui.setconfig(b'ui', b'interactive', False)
1007 1009 commands.merge(baseui, repo, rev=revision)
1008 1010
1009 1011 @reraise_safe_exceptions
1010 1012 def merge_state(self, wire):
1011 1013 repo = self._factory.repo(wire)
1012 1014 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1013 1015
1014 1016 # In case of sub repositories are used mercurial prompts the user in
1015 1017 # case of merge conflicts or different sub repository sources. By
1016 1018 # setting the interactive flag to `False` mercurial doesn't prompt the
1017 1019 # used but instead uses a default value.
1018 1020 repo.ui.setconfig(b'ui', b'interactive', False)
1019 1021 ms = hg_merge.mergestate(repo)
1020 1022 return [x for x in ms.unresolved()]
1021 1023
1022 1024 @reraise_safe_exceptions
1023 1025 def commit(self, wire, message, username, close_branch=False):
1024 1026 repo = self._factory.repo(wire)
1025 1027 baseui = self._factory._create_config(wire['config'])
1026 1028 repo.ui.setconfig(b'ui', b'username', username)
1027 1029 commands.commit(baseui, repo, message=message, close_branch=close_branch)
1028 1030
1029 1031 @reraise_safe_exceptions
1030 1032 def rebase(self, wire, source=None, dest=None, abort=False):
1031 1033 repo = self._factory.repo(wire)
1032 1034 baseui = self._factory._create_config(wire['config'])
1033 1035 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1034 1036 # In case of sub repositories are used mercurial prompts the user in
1035 1037 # case of merge conflicts or different sub repository sources. By
1036 1038 # setting the interactive flag to `False` mercurial doesn't prompt the
1037 1039 # used but instead uses a default value.
1038 1040 repo.ui.setconfig(b'ui', b'interactive', False)
1039 1041 rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
1040 1042
1041 1043 @reraise_safe_exceptions
1042 1044 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
1043 1045 repo = self._factory.repo(wire)
1044 1046 ctx = self._get_ctx(repo, revision)
1045 1047 node = ctx.node()
1046 1048
1047 1049 date = (tag_time, tag_timezone)
1048 1050 try:
1049 1051 hg_tag.tag(repo, name, node, message, local, user, date)
1050 1052 except Abort as e:
1051 1053 log.exception("Tag operation aborted")
1052 1054 # Exception can contain unicode which we convert
1053 1055 raise exceptions.AbortException(e)(repr(e))
1054 1056
1055 1057 @reraise_safe_exceptions
1056 1058 def bookmark(self, wire, bookmark, revision=''):
1057 1059 repo = self._factory.repo(wire)
1058 1060 baseui = self._factory._create_config(wire['config'])
1059 1061 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1060 1062
1061 1063 @reraise_safe_exceptions
1062 1064 def install_hooks(self, wire, force=False):
1063 1065 # we don't need any special hooks for Mercurial
1064 1066 pass
1065 1067
1066 1068 @reraise_safe_exceptions
1067 1069 def get_hooks_info(self, wire):
1068 1070 return {
1069 1071 'pre_version': vcsserver.__version__,
1070 1072 'post_version': vcsserver.__version__,
1071 1073 }
1072 1074
1073 1075 @reraise_safe_exceptions
1074 1076 def set_head_ref(self, wire, head_name):
1075 1077 pass
1076 1078
1077 1079 @reraise_safe_exceptions
1078 1080 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1079 1081 archive_dir_name, commit_id):
1080 1082
1081 1083 def file_walker(_commit_id, path):
1082 1084 repo = self._factory.repo(wire)
1083 1085 ctx = repo[_commit_id]
1084 1086 is_root = path in ['', '/']
1085 1087 if is_root:
1086 1088 matcher = alwaysmatcher(badfn=None)
1087 1089 else:
1088 1090 matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
1089 1091 file_iter = ctx.manifest().walk(matcher)
1090 1092
1091 1093 for fn in file_iter:
1092 1094 file_path = fn
1093 1095 flags = ctx.flags(fn)
1094 1096 mode = b'x' in flags and 0o755 or 0o644
1095 1097 is_link = b'l' in flags
1096 1098
1097 1099 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1098 1100
1099 1101 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1100 1102 archive_dir_name, commit_id)
1101 1103
@@ -1,879 +1,888 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import os
20 20 import subprocess
21 21 from urllib.error import URLError
22 22 import urllib.parse
23 23 import logging
24 24 import posixpath as vcspath
25 25 import io
26 26 import urllib.request
27 27 import urllib.parse
28 28 import urllib.error
29 29 import traceback
30 30
31 31 import svn.client
32 32 import svn.core
33 33 import svn.delta
34 34 import svn.diff
35 35 import svn.fs
36 36 import svn.repos
37 37
38 38 from vcsserver import svn_diff, exceptions, subprocessio, settings
39 39 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo
40 40 from vcsserver.exceptions import NoContentException
41 41 from vcsserver.str_utils import safe_str, safe_bytes
42 42 from vcsserver.vcs_base import RemoteBase
43 43 from vcsserver.lib.svnremoterepo import svnremoterepo
44 44 log = logging.getLogger(__name__)
45 45
46 46
47 47 svn_compatible_versions_map = {
48 48 'pre-1.4-compatible': '1.3',
49 49 'pre-1.5-compatible': '1.4',
50 50 'pre-1.6-compatible': '1.5',
51 51 'pre-1.8-compatible': '1.7',
52 52 'pre-1.9-compatible': '1.8',
53 53 }
54 54
55 55 current_compatible_version = '1.14'
56 56
57 57
58 58 def reraise_safe_exceptions(func):
59 59 """Decorator for converting svn exceptions to something neutral."""
60 60 def wrapper(*args, **kwargs):
61 61 try:
62 62 return func(*args, **kwargs)
63 63 except Exception as e:
64 64 if not hasattr(e, '_vcs_kind'):
65 65 log.exception("Unhandled exception in svn remote call")
66 66 raise_from_original(exceptions.UnhandledException(e), e)
67 67 raise
68 68 return wrapper
69 69
70 70
71 71 class SubversionFactory(RepoFactory):
72 72 repo_type = 'svn'
73 73
74 74 def _create_repo(self, wire, create, compatible_version):
75 75 path = svn.core.svn_path_canonicalize(wire['path'])
76 76 if create:
77 77 fs_config = {'compatible-version': current_compatible_version}
78 78 if compatible_version:
79 79
80 80 compatible_version_string = \
81 81 svn_compatible_versions_map.get(compatible_version) \
82 82 or compatible_version
83 83 fs_config['compatible-version'] = compatible_version_string
84 84
85 85 log.debug('Create SVN repo with config "%s"', fs_config)
86 86 repo = svn.repos.create(path, "", "", None, fs_config)
87 87 else:
88 88 repo = svn.repos.open(path)
89 89
90 90 log.debug('Got SVN object: %s', repo)
91 91 return repo
92 92
93 93 def repo(self, wire, create=False, compatible_version=None):
94 94 """
95 95 Get a repository instance for the given path.
96 96 """
97 97 return self._create_repo(wire, create, compatible_version)
98 98
99 99
100 100 NODE_TYPE_MAPPING = {
101 101 svn.core.svn_node_file: 'file',
102 102 svn.core.svn_node_dir: 'dir',
103 103 }
104 104
105 105
106 106 class SvnRemote(RemoteBase):
107 107
108 108 def __init__(self, factory, hg_factory=None):
109 109 self._factory = factory
110 110
111 111 @reraise_safe_exceptions
112 112 def discover_svn_version(self):
113 113 try:
114 114 import svn.core
115 115 svn_ver = svn.core.SVN_VERSION
116 116 except ImportError:
117 117 svn_ver = None
118 118 return safe_str(svn_ver)
119 119
120 120 @reraise_safe_exceptions
121 121 def is_empty(self, wire):
122 122
123 123 try:
124 124 return self.lookup(wire, -1) == 0
125 125 except Exception:
126 126 log.exception("failed to read object_store")
127 127 return False
128 128
129 129 def check_url(self, url):
130 130
131 131 # uuid function get's only valid UUID from proper repo, else
132 132 # throws exception
133 133 username, password, src_url = self.get_url_and_credentials(url)
134 134 try:
135 135 svnremoterepo(username, password, src_url).svn().uuid
136 136 except Exception:
137 137 tb = traceback.format_exc()
138 138 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
139 139 raise URLError(
140 140 '"%s" is not a valid Subversion source url.' % (url, ))
141 141 return True
142 142
143 143 def is_path_valid_repository(self, wire, path):
144 144
145 145 # NOTE(marcink): short circuit the check for SVN repo
146 146 # the repos.open might be expensive to check, but we have one cheap
147 147 # pre condition that we can use, to check for 'format' file
148 148
149 149 if not os.path.isfile(os.path.join(path, 'format')):
150 150 return False
151 151
152 152 try:
153 153 svn.repos.open(path)
154 154 except svn.core.SubversionException:
155 155 tb = traceback.format_exc()
156 156 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
157 157 return False
158 158 return True
159 159
160 160 @reraise_safe_exceptions
161 161 def verify(self, wire,):
162 162 repo_path = wire['path']
163 163 if not self.is_path_valid_repository(wire, repo_path):
164 164 raise Exception(
165 165 "Path %s is not a valid Subversion repository." % repo_path)
166 166
167 167 cmd = ['svnadmin', 'info', repo_path]
168 168 stdout, stderr = subprocessio.run_command(cmd)
169 169 return stdout
170 170
171 171 def lookup(self, wire, revision):
172 172 if revision not in [-1, None, 'HEAD']:
173 173 raise NotImplementedError
174 174 repo = self._factory.repo(wire)
175 175 fs_ptr = svn.repos.fs(repo)
176 176 head = svn.fs.youngest_rev(fs_ptr)
177 177 return head
178 178
179 179 def lookup_interval(self, wire, start_ts, end_ts):
180 180 repo = self._factory.repo(wire)
181 181 fsobj = svn.repos.fs(repo)
182 182 start_rev = None
183 183 end_rev = None
184 184 if start_ts:
185 185 start_ts_svn = apr_time_t(start_ts)
186 186 start_rev = svn.repos.dated_revision(repo, start_ts_svn) + 1
187 187 else:
188 188 start_rev = 1
189 189 if end_ts:
190 190 end_ts_svn = apr_time_t(end_ts)
191 191 end_rev = svn.repos.dated_revision(repo, end_ts_svn)
192 192 else:
193 193 end_rev = svn.fs.youngest_rev(fsobj)
194 194 return start_rev, end_rev
195 195
196 196 def revision_properties(self, wire, revision):
197 197
198 198 cache_on, context_uid, repo_id = self._cache_on(wire)
199 199 region = self._region(wire)
200 200 @region.conditional_cache_on_arguments(condition=cache_on)
201 201 def _revision_properties(_repo_id, _revision):
202 202 repo = self._factory.repo(wire)
203 203 fs_ptr = svn.repos.fs(repo)
204 204 return svn.fs.revision_proplist(fs_ptr, revision)
205 205 return _revision_properties(repo_id, revision)
206 206
207 207 def revision_changes(self, wire, revision):
208 208
209 209 repo = self._factory.repo(wire)
210 210 fsobj = svn.repos.fs(repo)
211 211 rev_root = svn.fs.revision_root(fsobj, revision)
212 212
213 213 editor = svn.repos.ChangeCollector(fsobj, rev_root)
214 214 editor_ptr, editor_baton = svn.delta.make_editor(editor)
215 215 base_dir = ""
216 216 send_deltas = False
217 217 svn.repos.replay2(
218 218 rev_root, base_dir, svn.core.SVN_INVALID_REVNUM, send_deltas,
219 219 editor_ptr, editor_baton, None)
220 220
221 221 added = []
222 222 changed = []
223 223 removed = []
224 224
225 225 # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
226 226 for path, change in editor.changes.items():
227 227 # TODO: Decide what to do with directory nodes. Subversion can add
228 228 # empty directories.
229 229
230 230 if change.item_kind == svn.core.svn_node_dir:
231 231 continue
232 232 if change.action in [svn.repos.CHANGE_ACTION_ADD]:
233 233 added.append(path)
234 234 elif change.action in [svn.repos.CHANGE_ACTION_MODIFY,
235 235 svn.repos.CHANGE_ACTION_REPLACE]:
236 236 changed.append(path)
237 237 elif change.action in [svn.repos.CHANGE_ACTION_DELETE]:
238 238 removed.append(path)
239 239 else:
240 240 raise NotImplementedError(
241 241 "Action %s not supported on path %s" % (
242 242 change.action, path))
243 243
244 244 changes = {
245 245 'added': added,
246 246 'changed': changed,
247 247 'removed': removed,
248 248 }
249 249 return changes
250 250
251 251 @reraise_safe_exceptions
252 252 def node_history(self, wire, path, revision, limit):
253 253 cache_on, context_uid, repo_id = self._cache_on(wire)
254 254 region = self._region(wire)
255 255 @region.conditional_cache_on_arguments(condition=cache_on)
256 256 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
257 257 cross_copies = False
258 258 repo = self._factory.repo(wire)
259 259 fsobj = svn.repos.fs(repo)
260 260 rev_root = svn.fs.revision_root(fsobj, revision)
261 261
262 262 history_revisions = []
263 263 history = svn.fs.node_history(rev_root, path)
264 264 history = svn.fs.history_prev(history, cross_copies)
265 265 while history:
266 266 __, node_revision = svn.fs.history_location(history)
267 267 history_revisions.append(node_revision)
268 268 if limit and len(history_revisions) >= limit:
269 269 break
270 270 history = svn.fs.history_prev(history, cross_copies)
271 271 return history_revisions
272 272 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
273 273
274 274 def node_properties(self, wire, path, revision):
275 275 cache_on, context_uid, repo_id = self._cache_on(wire)
276 276 region = self._region(wire)
277 277 @region.conditional_cache_on_arguments(condition=cache_on)
278 278 def _node_properties(_repo_id, _path, _revision):
279 279 repo = self._factory.repo(wire)
280 280 fsobj = svn.repos.fs(repo)
281 281 rev_root = svn.fs.revision_root(fsobj, revision)
282 282 return svn.fs.node_proplist(rev_root, path)
283 283 return _node_properties(repo_id, path, revision)
284 284
285 285 def file_annotate(self, wire, path, revision):
286 286 abs_path = 'file://' + urllib.request.pathname2url(
287 287 vcspath.join(wire['path'], path))
288 288 file_uri = svn.core.svn_path_canonicalize(abs_path)
289 289
290 290 start_rev = svn_opt_revision_value_t(0)
291 291 peg_rev = svn_opt_revision_value_t(revision)
292 292 end_rev = peg_rev
293 293
294 294 annotations = []
295 295
296 296 def receiver(line_no, revision, author, date, line, pool):
297 297 annotations.append((line_no, revision, line))
298 298
299 299 # TODO: Cannot use blame5, missing typemap function in the swig code
300 300 try:
301 301 svn.client.blame2(
302 302 file_uri, peg_rev, start_rev, end_rev,
303 303 receiver, svn.client.create_context())
304 304 except svn.core.SubversionException as exc:
305 305 log.exception("Error during blame operation.")
306 306 raise Exception(
307 307 "Blame not supported or file does not exist at path %s. "
308 308 "Error %s." % (path, exc))
309 309
310 310 return annotations
311 311
312 312 def get_node_type(self, wire, path, revision=None):
313 313
314 314 cache_on, context_uid, repo_id = self._cache_on(wire)
315 315 region = self._region(wire)
316 316 @region.conditional_cache_on_arguments(condition=cache_on)
317 317 def _get_node_type(_repo_id, _path, _revision):
318 318 repo = self._factory.repo(wire)
319 319 fs_ptr = svn.repos.fs(repo)
320 320 if _revision is None:
321 321 _revision = svn.fs.youngest_rev(fs_ptr)
322 322 root = svn.fs.revision_root(fs_ptr, _revision)
323 323 node = svn.fs.check_path(root, path)
324 324 return NODE_TYPE_MAPPING.get(node, None)
325 325 return _get_node_type(repo_id, path, revision)
326 326
327 327 def get_nodes(self, wire, path, revision=None):
328 328
329 329 cache_on, context_uid, repo_id = self._cache_on(wire)
330 330 region = self._region(wire)
331 331
332 332 @region.conditional_cache_on_arguments(condition=cache_on)
333 333 def _get_nodes(_repo_id, _path, _revision):
334 334 repo = self._factory.repo(wire)
335 335 fsobj = svn.repos.fs(repo)
336 336 if _revision is None:
337 337 _revision = svn.fs.youngest_rev(fsobj)
338 338 root = svn.fs.revision_root(fsobj, _revision)
339 339 entries = svn.fs.dir_entries(root, path)
340 340 result = []
341 341 for entry_path, entry_info in entries.items():
342 342 result.append(
343 343 (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
344 344 return result
345 345 return _get_nodes(repo_id, path, revision)
346 346
347 347 def get_file_content(self, wire, path, rev=None):
348 348 repo = self._factory.repo(wire)
349 349 fsobj = svn.repos.fs(repo)
350 350 if rev is None:
351 351 rev = svn.fs.youngest_revision(fsobj)
352 352 root = svn.fs.revision_root(fsobj, rev)
353 353 content = svn.core.Stream(svn.fs.file_contents(root, path))
354 354 return content.read()
355 355
356 356 def get_file_size(self, wire, path, revision=None):
357 357
358 358 cache_on, context_uid, repo_id = self._cache_on(wire)
359 359 region = self._region(wire)
360 360
361 361 @region.conditional_cache_on_arguments(condition=cache_on)
362 362 def _get_file_size(_repo_id, _path, _revision):
363 363 repo = self._factory.repo(wire)
364 364 fsobj = svn.repos.fs(repo)
365 365 if _revision is None:
366 366 _revision = svn.fs.youngest_revision(fsobj)
367 367 root = svn.fs.revision_root(fsobj, _revision)
368 368 size = svn.fs.file_length(root, path)
369 369 return size
370 370 return _get_file_size(repo_id, path, revision)
371 371
372 372 def create_repository(self, wire, compatible_version=None):
373 373 log.info('Creating Subversion repository in path "%s"', wire['path'])
374 374 self._factory.repo(wire, create=True,
375 375 compatible_version=compatible_version)
376 376
377 377 def get_url_and_credentials(self, src_url):
378 378 obj = urllib.parse.urlparse(src_url)
379 379 username = obj.username or None
380 380 password = obj.password or None
381 381 return username, password, src_url
382 382
383 383 def import_remote_repository(self, wire, src_url):
384 384 repo_path = wire['path']
385 385 if not self.is_path_valid_repository(wire, repo_path):
386 386 raise Exception(
387 387 "Path %s is not a valid Subversion repository." % repo_path)
388 388
389 389 username, password, src_url = self.get_url_and_credentials(src_url)
390 390 rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
391 391 '--trust-server-cert-failures=unknown-ca']
392 392 if username and password:
393 393 rdump_cmd += ['--username', username, '--password', password]
394 394 rdump_cmd += [src_url]
395 395
396 396 rdump = subprocess.Popen(
397 397 rdump_cmd,
398 398 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
399 399 load = subprocess.Popen(
400 400 ['svnadmin', 'load', repo_path], stdin=rdump.stdout)
401 401
402 402 # TODO: johbo: This can be a very long operation, might be better
403 403 # to track some kind of status and provide an api to check if the
404 404 # import is done.
405 405 rdump.wait()
406 406 load.wait()
407 407
408 408 log.debug('Return process ended with code: %s', rdump.returncode)
409 409 if rdump.returncode != 0:
410 410 errors = rdump.stderr.read()
411 411 log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
412 412
413 413 reason = 'UNKNOWN'
414 414 if b'svnrdump: E230001:' in errors:
415 415 reason = 'INVALID_CERTIFICATE'
416 416
417 417 if reason == 'UNKNOWN':
418 418 reason = 'UNKNOWN:{}'.format(safe_str(errors))
419 419
420 420 raise Exception(
421 421 'Failed to dump the remote repository from %s. Reason:%s' % (
422 422 src_url, reason))
423 423 if load.returncode != 0:
424 424 raise Exception(
425 425 'Failed to load the dump of remote repository from %s.' %
426 426 (src_url, ))
427 427
428 428 def commit(self, wire, message, author, timestamp, updated, removed):
429 429
430 430 updated = [{k: safe_bytes(v) for k, v in x.items() if isinstance(v, str)} for x in updated]
431 431
432 432 message = safe_bytes(message)
433 433 author = safe_bytes(author)
434 434
435 435 repo = self._factory.repo(wire)
436 436 fsobj = svn.repos.fs(repo)
437 437
438 438 rev = svn.fs.youngest_rev(fsobj)
439 439 txn = svn.repos.fs_begin_txn_for_commit(repo, rev, author, message)
440 440 txn_root = svn.fs.txn_root(txn)
441 441
442 442 for node in updated:
443 443 TxnNodeProcessor(node, txn_root).update()
444 444 for node in removed:
445 445 TxnNodeProcessor(node, txn_root).remove()
446 446
447 447 commit_id = svn.repos.fs_commit_txn(repo, txn)
448 448
449 449 if timestamp:
450 450 apr_time = apr_time_t(timestamp)
451 451 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
452 452 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
453 453
454 454 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
455 455 return commit_id
456 456
457 457 def diff(self, wire, rev1, rev2, path1=None, path2=None,
458 458 ignore_whitespace=False, context=3):
459 459
460 460 wire.update(cache=False)
461 461 repo = self._factory.repo(wire)
462 462 diff_creator = SvnDiffer(
463 463 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
464 464 try:
465 465 return diff_creator.generate_diff()
466 466 except svn.core.SubversionException as e:
467 467 log.exception(
468 468 "Error during diff operation operation. "
469 469 "Path might not exist %s, %s" % (path1, path2))
470 470 return ""
471 471
472 472 @reraise_safe_exceptions
473 473 def is_large_file(self, wire, path):
474 474 return False
475 475
476 476 @reraise_safe_exceptions
477 477 def is_binary(self, wire, rev, path):
478 478 cache_on, context_uid, repo_id = self._cache_on(wire)
479 479 region = self._region(wire)
480 480
481 481 @region.conditional_cache_on_arguments(condition=cache_on)
482 482 def _is_binary(_repo_id, _rev, _path):
483 483 raw_bytes = self.get_file_content(wire, path, rev)
484 484 return raw_bytes and b'\0' in raw_bytes
485 485
486 486 return _is_binary(repo_id, rev, path)
487 487
488 488 @reraise_safe_exceptions
489 489 def md5_hash(self, wire, rev, path):
490 490 cache_on, context_uid, repo_id = self._cache_on(wire)
491 491 region = self._region(wire)
492 492
493 493 @region.conditional_cache_on_arguments(condition=cache_on)
494 494 def _md5_hash(_repo_id, _rev, _path):
495 495 return ''
496 496
497 497 return _md5_hash(repo_id, rev, path)
498 498
499 499 @reraise_safe_exceptions
500 500 def run_svn_command(self, wire, cmd, **opts):
501 501 path = wire.get('path', None)
502 502
503 503 if path and os.path.isdir(path):
504 504 opts['cwd'] = path
505 505
506 506 safe_call = opts.pop('_safe', False)
507 507
508 508 svnenv = os.environ.copy()
509 509 svnenv.update(opts.pop('extra_env', {}))
510 510
511 511 _opts = {'env': svnenv, 'shell': False}
512 512
513 513 try:
514 514 _opts.update(opts)
515 515 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
516 516
517 517 return b''.join(proc), b''.join(proc.stderr)
518 518 except OSError as err:
519 519 if safe_call:
520 520 return '', safe_str(err).strip()
521 521 else:
522 522 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
523 523 tb_err = ("Couldn't run svn command (%s).\n"
524 524 "Original error was:%s\n"
525 525 "Call options:%s\n"
526 526 % (cmd, err, _opts))
527 527 log.exception(tb_err)
528 528 raise exceptions.VcsException()(tb_err)
529 529
530 530 @reraise_safe_exceptions
531 531 def install_hooks(self, wire, force=False):
532 532 from vcsserver.hook_utils import install_svn_hooks
533 533 repo_path = wire['path']
534 534 binary_dir = settings.BINARY_DIR
535 535 executable = None
536 536 if binary_dir:
537 537 executable = os.path.join(binary_dir, 'python3')
538 538 return install_svn_hooks(repo_path, force_create=force)
539 539
540 540 @reraise_safe_exceptions
541 541 def get_hooks_info(self, wire):
542 542 from vcsserver.hook_utils import (
543 543 get_svn_pre_hook_version, get_svn_post_hook_version)
544 544 repo_path = wire['path']
545 545 return {
546 546 'pre_version': get_svn_pre_hook_version(repo_path),
547 547 'post_version': get_svn_post_hook_version(repo_path),
548 548 }
549 549
550 550 @reraise_safe_exceptions
551 551 def set_head_ref(self, wire, head_name):
552 552 pass
553 553
554 554 @reraise_safe_exceptions
555 555 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
556 556 archive_dir_name, commit_id):
557 557
558 558 def walk_tree(root, root_dir, _commit_id):
559 559 """
560 560 Special recursive svn repo walker
561 561 """
562 root_dir = safe_bytes(root_dir)
562 563
563 564 filemode_default = 0o100644
564 565 filemode_executable = 0o100755
565 566
566 567 file_iter = svn.fs.dir_entries(root, root_dir)
567 568 for f_name in file_iter:
568 569 f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
569 570
570 571 if f_type == 'dir':
571 572 # return only DIR, and then all entries in that dir
572 573 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
573 574 new_root = os.path.join(root_dir, f_name)
574 575 for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id):
575 576 yield _f_name, _f_data, _f_type
576 577 else:
577 f_path = os.path.join(root_dir, f_name).rstrip('/')
578
579 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
578 580 prop_list = svn.fs.node_proplist(root, f_path)
579 581
580 582 f_mode = filemode_default
581 583 if prop_list.get('svn:executable'):
582 584 f_mode = filemode_executable
583 585
584 586 f_is_link = False
585 587 if prop_list.get('svn:special'):
586 588 f_is_link = True
587 589
588 590 data = {
589 591 'is_link': f_is_link,
590 592 'mode': f_mode,
591 593 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
592 594 }
593 595
594 596 yield f_path, data, f_type
595 597
596 598 def file_walker(_commit_id, path):
597 599 repo = self._factory.repo(wire)
598 600 root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
599 601
600 602 def no_content():
601 603 raise NoContentException()
602 604
603 605 for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
604 606 file_path = f_name
605 607
606 608 if f_type == 'dir':
607 609 mode = f_data['mode']
608 610 yield ArchiveNode(file_path, mode, False, no_content)
609 611 else:
610 612 mode = f_data['mode']
611 613 is_link = f_data['is_link']
612 614 data_stream = f_data['content_stream']
613 615 yield ArchiveNode(file_path, mode, is_link, data_stream)
614 616
615 617 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
616 618 archive_dir_name, commit_id)
617 619
618 620
619 621 class SvnDiffer(object):
620 622 """
621 623 Utility to create diffs based on difflib and the Subversion api
622 624 """
623 625
624 626 binary_content = False
625 627
626 628 def __init__(
627 629 self, repo, src_rev, src_path, tgt_rev, tgt_path,
628 630 ignore_whitespace, context):
629 631 self.repo = repo
630 632 self.ignore_whitespace = ignore_whitespace
631 633 self.context = context
632 634
633 635 fsobj = svn.repos.fs(repo)
634 636
635 637 self.tgt_rev = tgt_rev
636 638 self.tgt_path = tgt_path or ''
637 639 self.tgt_root = svn.fs.revision_root(fsobj, tgt_rev)
638 640 self.tgt_kind = svn.fs.check_path(self.tgt_root, self.tgt_path)
639 641
640 642 self.src_rev = src_rev
641 643 self.src_path = src_path or self.tgt_path
642 644 self.src_root = svn.fs.revision_root(fsobj, src_rev)
643 645 self.src_kind = svn.fs.check_path(self.src_root, self.src_path)
644 646
645 647 self._validate()
646 648
647 649 def _validate(self):
648 650 if (self.tgt_kind != svn.core.svn_node_none and
649 651 self.src_kind != svn.core.svn_node_none and
650 652 self.src_kind != self.tgt_kind):
651 653 # TODO: johbo: proper error handling
652 654 raise Exception(
653 655 "Source and target are not compatible for diff generation. "
654 656 "Source type: %s, target type: %s" %
655 657 (self.src_kind, self.tgt_kind))
656 658
657 659 def generate_diff(self):
658 660 buf = io.StringIO()
659 661 if self.tgt_kind == svn.core.svn_node_dir:
660 662 self._generate_dir_diff(buf)
661 663 else:
662 664 self._generate_file_diff(buf)
663 665 return buf.getvalue()
664 666
665 667 def _generate_dir_diff(self, buf):
666 668 editor = DiffChangeEditor()
667 669 editor_ptr, editor_baton = svn.delta.make_editor(editor)
668 670 svn.repos.dir_delta2(
669 671 self.src_root,
670 672 self.src_path,
671 673 '', # src_entry
672 674 self.tgt_root,
673 675 self.tgt_path,
674 676 editor_ptr, editor_baton,
675 677 authorization_callback_allow_all,
676 678 False, # text_deltas
677 679 svn.core.svn_depth_infinity, # depth
678 680 False, # entry_props
679 681 False, # ignore_ancestry
680 682 )
681 683
682 684 for path, __, change in sorted(editor.changes):
683 685 self._generate_node_diff(
684 686 buf, change, path, self.tgt_path, path, self.src_path)
685 687
686 688 def _generate_file_diff(self, buf):
687 689 change = None
688 690 if self.src_kind == svn.core.svn_node_none:
689 691 change = "add"
690 692 elif self.tgt_kind == svn.core.svn_node_none:
691 693 change = "delete"
692 694 tgt_base, tgt_path = vcspath.split(self.tgt_path)
693 695 src_base, src_path = vcspath.split(self.src_path)
694 696 self._generate_node_diff(
695 697 buf, change, tgt_path, tgt_base, src_path, src_base)
696 698
697 699 def _generate_node_diff(
698 700 self, buf, change, tgt_path, tgt_base, src_path, src_base):
699 701
702
703 tgt_path = safe_str(tgt_path)
704 src_path = safe_str(src_path)
705
706
700 707 if self.src_rev == self.tgt_rev and tgt_base == src_base:
701 708 # makes consistent behaviour with git/hg to return empty diff if
702 709 # we compare same revisions
703 710 return
704 711
705 712 tgt_full_path = vcspath.join(tgt_base, tgt_path)
706 713 src_full_path = vcspath.join(src_base, src_path)
707 714
708 715 self.binary_content = False
709 716 mime_type = self._get_mime_type(tgt_full_path)
710 717
711 718 if mime_type and not mime_type.startswith('text'):
712 719 self.binary_content = True
713 720 buf.write("=" * 67 + '\n')
714 721 buf.write("Cannot display: file marked as a binary type.\n")
715 722 buf.write("svn:mime-type = %s\n" % mime_type)
716 723 buf.write("Index: %s\n" % (tgt_path, ))
717 724 buf.write("=" * 67 + '\n')
718 725 buf.write("diff --git a/%(tgt_path)s b/%(tgt_path)s\n" % {
719 726 'tgt_path': tgt_path})
720 727
721 728 if change == 'add':
722 729 # TODO: johbo: SVN is missing a zero here compared to git
723 730 buf.write("new file mode 10644\n")
724 731
725 732 #TODO(marcink): intro to binary detection of svn patches
726 733 # if self.binary_content:
727 734 # buf.write('GIT binary patch\n')
728 735
729 736 buf.write("--- /dev/null\t(revision 0)\n")
730 737 src_lines = []
731 738 else:
732 739 if change == 'delete':
733 740 buf.write("deleted file mode 10644\n")
734 741
735 742 #TODO(marcink): intro to binary detection of svn patches
736 743 # if self.binary_content:
737 744 # buf.write('GIT binary patch\n')
738 745
739 746 buf.write("--- a/%s\t(revision %s)\n" % (
740 747 src_path, self.src_rev))
741 748 src_lines = self._svn_readlines(self.src_root, src_full_path)
742 749
743 750 if change == 'delete':
744 751 buf.write("+++ /dev/null\t(revision %s)\n" % (self.tgt_rev, ))
745 752 tgt_lines = []
746 753 else:
747 754 buf.write("+++ b/%s\t(revision %s)\n" % (
748 755 tgt_path, self.tgt_rev))
749 756 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
750 757
751 758 if not self.binary_content:
752 759 udiff = svn_diff.unified_diff(
753 760 src_lines, tgt_lines, context=self.context,
754 761 ignore_blank_lines=self.ignore_whitespace,
755 762 ignore_case=False,
756 763 ignore_space_changes=self.ignore_whitespace)
764
757 765 buf.writelines(udiff)
758 766
759 767 def _get_mime_type(self, path):
760 768 try:
761 769 mime_type = svn.fs.node_prop(
762 770 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
763 771 except svn.core.SubversionException:
764 772 mime_type = svn.fs.node_prop(
765 773 self.src_root, path, svn.core.SVN_PROP_MIME_TYPE)
766 774 return mime_type
767 775
768 776 def _svn_readlines(self, fs_root, node_path):
769 777 if self.binary_content:
770 778 return []
771 779 node_kind = svn.fs.check_path(fs_root, node_path)
772 780 if node_kind not in (
773 781 svn.core.svn_node_file, svn.core.svn_node_symlink):
774 782 return []
775 783 content = svn.core.Stream(
776 784 svn.fs.file_contents(fs_root, node_path)).read()
785
777 786 return content.splitlines(True)
778 787
779 788
780 789 class DiffChangeEditor(svn.delta.Editor):
781 790 """
782 791 Records changes between two given revisions
783 792 """
784 793
785 794 def __init__(self):
786 795 self.changes = []
787 796
788 797 def delete_entry(self, path, revision, parent_baton, pool=None):
789 798 self.changes.append((path, None, 'delete'))
790 799
791 800 def add_file(
792 801 self, path, parent_baton, copyfrom_path, copyfrom_revision,
793 802 file_pool=None):
794 803 self.changes.append((path, 'file', 'add'))
795 804
796 805 def open_file(self, path, parent_baton, base_revision, file_pool=None):
797 806 self.changes.append((path, 'file', 'change'))
798 807
799 808
800 809 def authorization_callback_allow_all(root, path, pool):
801 810 return True
802 811
803 812
804 813 class TxnNodeProcessor(object):
805 814 """
806 815 Utility to process the change of one node within a transaction root.
807 816
808 817 It encapsulates the knowledge of how to add, update or remove
809 818 a node for a given transaction root. The purpose is to support the method
810 819 `SvnRemote.commit`.
811 820 """
812 821
813 822 def __init__(self, node, txn_root):
814 823 assert isinstance(node['path'], bytes)
815 824
816 825 self.node = node
817 826 self.txn_root = txn_root
818 827
819 828 def update(self):
820 829 self._ensure_parent_dirs()
821 830 self._add_file_if_node_does_not_exist()
822 831 self._update_file_content()
823 832 self._update_file_properties()
824 833
825 834 def remove(self):
826 835 svn.fs.delete(self.txn_root, self.node['path'])
827 836 # TODO: Clean up directory if empty
828 837
829 838 def _ensure_parent_dirs(self):
830 839 curdir = vcspath.dirname(self.node['path'])
831 840 dirs_to_create = []
832 841 while not self._svn_path_exists(curdir):
833 842 dirs_to_create.append(curdir)
834 843 curdir = vcspath.dirname(curdir)
835 844
836 845 for curdir in reversed(dirs_to_create):
837 846 log.debug('Creating missing directory "%s"', curdir)
838 847 svn.fs.make_dir(self.txn_root, curdir)
839 848
840 849 def _svn_path_exists(self, path):
841 850 path_status = svn.fs.check_path(self.txn_root, path)
842 851 return path_status != svn.core.svn_node_none
843 852
844 853 def _add_file_if_node_does_not_exist(self):
845 854 kind = svn.fs.check_path(self.txn_root, self.node['path'])
846 855 if kind == svn.core.svn_node_none:
847 856 svn.fs.make_file(self.txn_root, self.node['path'])
848 857
849 858 def _update_file_content(self):
850 859 assert isinstance(self.node['content'], bytes)
851 860
852 861 handler, baton = svn.fs.apply_textdelta(
853 862 self.txn_root, self.node['path'], None, None)
854 863 svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
855 864
856 865 def _update_file_properties(self):
857 866 properties = self.node.get('properties', {})
858 867 for key, value in properties.items():
859 868 svn.fs.change_node_prop(
860 869 self.txn_root, self.node['path'], key, value)
861 870
862 871
863 872 def apr_time_t(timestamp):
864 873 """
865 874 Convert a Python timestamp into APR timestamp type apr_time_t
866 875 """
867 876 return timestamp * 1E6
868 877
869 878
870 879 def svn_opt_revision_value_t(num):
871 880 """
872 881 Put `num` into a `svn_opt_revision_value_t` structure.
873 882 """
874 883 value = svn.core.svn_opt_revision_value_t()
875 884 value.number = num
876 885 revision = svn.core.svn_opt_revision_t()
877 886 revision.kind = svn.core.svn_opt_revision_number
878 887 revision.value = value
879 888 return revision
@@ -1,209 +1,210 b''
1 1 # -*- coding: utf-8 -*-
2 2 #
3 3 # Copyright (C) 2004-2009 Edgewall Software
4 4 # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
5 5 # All rights reserved.
6 6 #
7 7 # This software is licensed as described in the file COPYING, which
8 8 # you should have received as part of this distribution. The terms
9 9 # are also available at http://trac.edgewall.org/wiki/TracLicense.
10 10 #
11 11 # This software consists of voluntary contributions made by many
12 12 # individuals. For the exact contribution history, see the revision
13 13 # history and logs, available at http://trac.edgewall.org/log/.
14 14 #
15 15 # Author: Christopher Lenz <cmlenz@gmx.de>
16 16
17 17 import difflib
18 18
19 19
20 20 def get_filtered_hunks(fromlines, tolines, context=None,
21 21 ignore_blank_lines=False, ignore_case=False,
22 22 ignore_space_changes=False):
23 23 """Retrieve differences in the form of `difflib.SequenceMatcher`
24 24 opcodes, grouped according to the ``context`` and ``ignore_*``
25 25 parameters.
26 26
27 27 :param fromlines: list of lines corresponding to the old content
28 28 :param tolines: list of lines corresponding to the new content
29 29 :param ignore_blank_lines: differences about empty lines only are ignored
30 30 :param ignore_case: upper case / lower case only differences are ignored
31 31 :param ignore_space_changes: differences in amount of spaces are ignored
32 32 :param context: the number of "equal" lines kept for representing
33 33 the context of the change
34 34 :return: generator of grouped `difflib.SequenceMatcher` opcodes
35 35
36 36 If none of the ``ignore_*`` parameters is `True`, there's nothing
37 37 to filter out the results will come straight from the
38 38 SequenceMatcher.
39 39 """
40 40 hunks = get_hunks(fromlines, tolines, context)
41 41 if ignore_space_changes or ignore_case or ignore_blank_lines:
42 42 hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
43 43 ignore_blank_lines, ignore_case,
44 44 ignore_space_changes)
45 45 return hunks
46 46
47 47
48 48 def get_hunks(fromlines, tolines, context=None):
49 49 """Generator yielding grouped opcodes describing differences .
50 50
51 51 See `get_filtered_hunks` for the parameter descriptions.
52 52 """
53 53 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
54 54 if context is None:
55 55 return (hunk for hunk in [matcher.get_opcodes()])
56 56 else:
57 57 return matcher.get_grouped_opcodes(context)
58 58
59 59
60 60 def filter_ignorable_lines(hunks, fromlines, tolines, context,
61 61 ignore_blank_lines, ignore_case,
62 62 ignore_space_changes):
63 63 """Detect line changes that should be ignored and emits them as
64 64 tagged as "equal", possibly joined with the preceding and/or
65 65 following "equal" block.
66 66
67 67 See `get_filtered_hunks` for the parameter descriptions.
68 68 """
69 69 def is_ignorable(tag, fromlines, tolines):
70 70 if tag == 'delete' and ignore_blank_lines:
71 71 if ''.join(fromlines) == '':
72 72 return True
73 73 elif tag == 'insert' and ignore_blank_lines:
74 74 if ''.join(tolines) == '':
75 75 return True
76 76 elif tag == 'replace' and (ignore_case or ignore_space_changes):
77 77 if len(fromlines) != len(tolines):
78 78 return False
79 79
80 80 def f(input_str):
81 81 if ignore_case:
82 82 input_str = input_str.lower()
83 83 if ignore_space_changes:
84 84 input_str = ' '.join(input_str.split())
85 85 return input_str
86 86
87 87 for i in range(len(fromlines)):
88 88 if f(fromlines[i]) != f(tolines[i]):
89 89 return False
90 90 return True
91 91
92 92 hunks = list(hunks)
93 93 opcodes = []
94 94 ignored_lines = False
95 95 prev = None
96 96 for hunk in hunks:
97 97 for tag, i1, i2, j1, j2 in hunk:
98 98 if tag == 'equal':
99 99 if prev:
100 100 prev = (tag, prev[1], i2, prev[3], j2)
101 101 else:
102 102 prev = (tag, i1, i2, j1, j2)
103 103 else:
104 104 if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
105 105 ignored_lines = True
106 106 if prev:
107 107 prev = 'equal', prev[1], i2, prev[3], j2
108 108 else:
109 109 prev = 'equal', i1, i2, j1, j2
110 110 continue
111 111 if prev:
112 112 opcodes.append(prev)
113 113 opcodes.append((tag, i1, i2, j1, j2))
114 114 prev = None
115 115 if prev:
116 116 opcodes.append(prev)
117 117
118 118 if ignored_lines:
119 119 if context is None:
120 120 yield opcodes
121 121 else:
122 122 # we leave at most n lines with the tag 'equal' before and after
123 123 # every change
124 124 n = context
125 125 nn = n + n
126 126
127 127 group = []
128 128 def all_equal():
129 129 all(op[0] == 'equal' for op in group)
130 130 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
131 131 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
132 132 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
133 133 elif tag == 'equal' and i2 - i1 > nn:
134 134 group.append((tag, i1, min(i2, i1 + n), j1,
135 135 min(j2, j1 + n)))
136 136 if not all_equal():
137 137 yield group
138 138 group = []
139 139 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
140 140 group.append((tag, i1, i2, j1, j2))
141 141
142 142 if group and not (len(group) == 1 and group[0][0] == 'equal'):
143 143 if group[-1][0] == 'equal': # Fixup trailing unchanged block
144 144 tag, i1, i2, j1, j2 = group[-1]
145 145 group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
146 146 if not all_equal():
147 147 yield group
148 148 else:
149 149 for hunk in hunks:
150 150 yield hunk
151 151
152 152
153 153 NO_NEWLINE_AT_END = '\\ No newline at end of file'
154 154
155 155
156 156 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
157 157 ignore_case=0, ignore_space_changes=0, lineterm='\n'):
158 158 """
159 159 Generator producing lines corresponding to a textual diff.
160 160
161 161 See `get_filtered_hunks` for the parameter descriptions.
162 162 """
163 163 # TODO: johbo: Check if this can be nicely integrated into the matching
164
164 165 if ignore_space_changes:
165 166 fromlines = [l.strip() for l in fromlines]
166 167 tolines = [l.strip() for l in tolines]
167 168
168 169 for group in get_filtered_hunks(fromlines, tolines, context,
169 170 ignore_blank_lines, ignore_case,
170 171 ignore_space_changes):
171 172 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
172 173 if i1 == 0 and i2 == 0:
173 174 i1, i2 = -1, -1 # support for Add changes
174 175 if j1 == 0 and j2 == 0:
175 176 j1, j2 = -1, -1 # support for Delete changes
176 177 yield '@@ -%s +%s @@%s' % (
177 178 _hunk_range(i1 + 1, i2 - i1),
178 179 _hunk_range(j1 + 1, j2 - j1),
179 180 lineterm)
180 181 for tag, i1, i2, j1, j2 in group:
181 182 if tag == 'equal':
182 183 for line in fromlines[i1:i2]:
183 184 if not line.endswith(lineterm):
184 185 yield ' ' + line + lineterm
185 186 yield NO_NEWLINE_AT_END + lineterm
186 187 else:
187 188 yield ' ' + line
188 189 else:
189 190 if tag in ('replace', 'delete'):
190 191 for line in fromlines[i1:i2]:
191 192 if not line.endswith(lineterm):
192 193 yield '-' + line + lineterm
193 194 yield NO_NEWLINE_AT_END + lineterm
194 195 else:
195 196 yield '-' + line
196 197 if tag in ('replace', 'insert'):
197 198 for line in tolines[j1:j2]:
198 199 if not line.endswith(lineterm):
199 200 yield '+' + line + lineterm
200 201 yield NO_NEWLINE_AT_END + lineterm
201 202 else:
202 203 yield '+' + line
203 204
204 205
205 206 def _hunk_range(start, length):
206 207 if length != 1:
207 208 return '%d,%d' % (start, length)
208 209 else:
209 210 return '%d' % (start, )
General Comments 0
You need to be logged in to leave comments. Login now