##// END OF EJS Templates
app: new optimized remote endpoints for python3 rewrite
super-admin -
r1124:8fcf8b08 python3
parent child Browse files
Show More
@@ -1,1382 +1,1463 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 import posixpath as vcspath
22 21 import re
23 22 import stat
24 23 import traceback
25 24 import urllib.request
26 25 import urllib.parse
27 26 import urllib.error
28 27 from functools import wraps
29 28
30 29 import more_itertools
31 30 import pygit2
32 31 from pygit2 import Repository as LibGit2Repo
33 32 from pygit2 import index as LibGit2Index
34 33 from dulwich import index, objects
35 from dulwich.client import HttpGitClient, LocalGitClient
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
36 35 from dulwich.errors import (
37 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
38 37 MissingCommitError, ObjectMissing, HangupException,
39 38 UnexpectedCommandError)
40 39 from dulwich.repo import Repo as DulwichRepo
41 40 from dulwich.server import update_server_info
42 41
43 42 from vcsserver import exceptions, settings, subprocessio
44 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 45 from vcsserver.hgcompat import (
47 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 47 from vcsserver.git_lfs.lib import LFSOidStore
49 48 from vcsserver.vcs_base import RemoteBase
50 49
51 50 DIR_STAT = stat.S_IFDIR
52 51 FILE_MODE = stat.S_IFMT
53 52 GIT_LINK = objects.S_IFGITLINK
54 53 PEELED_REF_MARKER = b'^{}'
55 54 HEAD_MARKER = b'HEAD'
56 55
57 56 log = logging.getLogger(__name__)
58 57
59 58
60 59 def reraise_safe_exceptions(func):
61 60 """Converts Dulwich exceptions to something neutral."""
62 61
63 62 @wraps(func)
64 63 def wrapper(*args, **kwargs):
65 64 try:
66 65 return func(*args, **kwargs)
67 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 67 exc = exceptions.LookupException(org_exc=e)
69 68 raise exc(safe_str(e))
70 69 except (HangupException, UnexpectedCommandError) as e:
71 70 exc = exceptions.VcsException(org_exc=e)
72 71 raise exc(safe_str(e))
73 72 except Exception:
74 73 # NOTE(marcink): because of how dulwich handles some exceptions
75 74 # (KeyError on empty repos), we cannot track this and catch all
76 75 # exceptions, it's an exceptions from other handlers
77 76 #if not hasattr(e, '_vcs_kind'):
78 77 #log.exception("Unhandled exception in git remote call")
79 78 #raise_from_original(exceptions.UnhandledException)
80 79 raise
81 80 return wrapper
82 81
83 82
84 83 class Repo(DulwichRepo):
85 84 """
86 85 A wrapper for dulwich Repo class.
87 86
88 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 88 "Too many open files" error. We need to close all opened file descriptors
90 89 once the repo object is destroyed.
91 90 """
92 91 def __del__(self):
93 92 if hasattr(self, 'object_store'):
94 93 self.close()
95 94
96 95
97 96 class Repository(LibGit2Repo):
98 97
99 98 def __enter__(self):
100 99 return self
101 100
102 101 def __exit__(self, exc_type, exc_val, exc_tb):
103 102 self.free()
104 103
105 104
106 105 class GitFactory(RepoFactory):
107 106 repo_type = 'git'
108 107
109 108 def _create_repo(self, wire, create, use_libgit2=False):
110 109 if use_libgit2:
111 110 repo = Repository(safe_bytes(wire['path']))
112 111 else:
113 112 # dulwich mode
114 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 114 repo = Repo(repo_path)
116 115
117 116 log.debug('repository created: got GIT object: %s', repo)
118 117 return repo
119 118
120 119 def repo(self, wire, create=False, use_libgit2=False):
121 120 """
122 121 Get a repository instance for the given path.
123 122 """
124 123 return self._create_repo(wire, create, use_libgit2)
125 124
126 125 def repo_libgit2(self, wire):
127 126 return self.repo(wire, use_libgit2=True)
128 127
129 128
129 def create_signature_from_string(author_str, **kwargs):
130 """
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132
133 :param author_str: String of the format 'Name <email>'
134 :return: pygit2.Signature object
135 """
136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 if match is None:
138 raise ValueError(f"Invalid format: {author_str}")
139
140 name, email = match.groups()
141 return pygit2.Signature(name, email, **kwargs)
142
143
144 def get_obfuscated_url(url_obj):
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 url_obj.query = obfuscate_qs(url_obj.query)
147 obfuscated_uri = str(url_obj)
148 return obfuscated_uri
149
150
130 151 class GitRemote(RemoteBase):
131 152
132 153 def __init__(self, factory):
133 154 self._factory = factory
134 155 self._bulk_methods = {
135 156 "date": self.date,
136 157 "author": self.author,
137 158 "branch": self.branch,
138 159 "message": self.message,
139 160 "parents": self.parents,
140 161 "_commit": self.revision,
141 162 }
163 self._bulk_file_methods = {
164 "size": self.get_node_size,
165 "data": self.get_node_data,
166 "flags": self.get_node_flags,
167 "is_binary": self.get_node_is_binary,
168 "md5": self.md5_hash
169 }
142 170
143 171 def _wire_to_config(self, wire):
144 172 if 'config' in wire:
145 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
146 174 return {}
147 175
148 176 def _remote_conf(self, config):
149 177 params = [
150 178 '-c', 'core.askpass=""',
151 179 ]
152 180 ssl_cert_dir = config.get('vcs_ssl_dir')
153 181 if ssl_cert_dir:
154 182 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
155 183 return params
156 184
157 185 @reraise_safe_exceptions
158 186 def discover_git_version(self):
159 187 stdout, _ = self.run_git_command(
160 188 {}, ['--version'], _bare=True, _safe=True)
161 189 prefix = b'git version'
162 190 if stdout.startswith(prefix):
163 191 stdout = stdout[len(prefix):]
164 192 return safe_str(stdout.strip())
165 193
166 194 @reraise_safe_exceptions
167 195 def is_empty(self, wire):
168 196 repo_init = self._factory.repo_libgit2(wire)
169 197 with repo_init as repo:
170 198
171 199 try:
172 200 has_head = repo.head.name
173 201 if has_head:
174 202 return False
175 203
176 204 # NOTE(marcink): check again using more expensive method
177 205 return repo.is_empty
178 206 except Exception:
179 207 pass
180 208
181 209 return True
182 210
183 211 @reraise_safe_exceptions
184 212 def assert_correct_path(self, wire):
185 213 cache_on, context_uid, repo_id = self._cache_on(wire)
186 214 region = self._region(wire)
187 215
188 216 @region.conditional_cache_on_arguments(condition=cache_on)
189 217 def _assert_correct_path(_context_uid, _repo_id, fast_check):
190 218 if fast_check:
191 219 path = safe_str(wire['path'])
192 220 if pygit2.discover_repository(path):
193 221 return True
194 222 return False
195 223 else:
196 224 try:
197 225 repo_init = self._factory.repo_libgit2(wire)
198 226 with repo_init:
199 227 pass
200 228 except pygit2.GitError:
201 229 path = wire.get('path')
202 230 tb = traceback.format_exc()
203 231 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
204 232 return False
205 233 return True
206 234
207 235 return _assert_correct_path(context_uid, repo_id, True)
208 236
209 237 @reraise_safe_exceptions
210 238 def bare(self, wire):
211 239 repo_init = self._factory.repo_libgit2(wire)
212 240 with repo_init as repo:
213 241 return repo.is_bare
214 242
215 243 @reraise_safe_exceptions
244 def get_node_data(self, wire, commit_id, path):
245 repo_init = self._factory.repo_libgit2(wire)
246 with repo_init as repo:
247 commit = repo[commit_id]
248 blob_obj = commit.tree[path]
249
250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 raise exceptions.LookupException()(
252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253
254 return BytesEnvelope(blob_obj.data)
255
256 @reraise_safe_exceptions
257 def get_node_size(self, wire, commit_id, path):
258 repo_init = self._factory.repo_libgit2(wire)
259 with repo_init as repo:
260 commit = repo[commit_id]
261 blob_obj = commit.tree[path]
262
263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 raise exceptions.LookupException()(
265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266
267 return blob_obj.size
268
269 @reraise_safe_exceptions
270 def get_node_flags(self, wire, commit_id, path):
271 repo_init = self._factory.repo_libgit2(wire)
272 with repo_init as repo:
273 commit = repo[commit_id]
274 blob_obj = commit.tree[path]
275
276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 raise exceptions.LookupException()(
278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279
280 return blob_obj.filemode
281
282 @reraise_safe_exceptions
283 def get_node_is_binary(self, wire, commit_id, path):
284 repo_init = self._factory.repo_libgit2(wire)
285 with repo_init as repo:
286 commit = repo[commit_id]
287 blob_obj = commit.tree[path]
288
289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 raise exceptions.LookupException()(
291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292
293 return blob_obj.is_binary
294
295 @reraise_safe_exceptions
216 296 def blob_as_pretty_string(self, wire, sha):
217 297 repo_init = self._factory.repo_libgit2(wire)
218 298 with repo_init as repo:
219 299 blob_obj = repo[sha]
220 return BinaryEnvelope(blob_obj.data)
300 return BytesEnvelope(blob_obj.data)
221 301
222 302 @reraise_safe_exceptions
223 303 def blob_raw_length(self, wire, sha):
224 304 cache_on, context_uid, repo_id = self._cache_on(wire)
225 305 region = self._region(wire)
226 306
227 307 @region.conditional_cache_on_arguments(condition=cache_on)
228 308 def _blob_raw_length(_repo_id, _sha):
229 309
230 310 repo_init = self._factory.repo_libgit2(wire)
231 311 with repo_init as repo:
232 312 blob = repo[sha]
233 313 return blob.size
234 314
235 315 return _blob_raw_length(repo_id, sha)
236 316
237 317 def _parse_lfs_pointer(self, raw_content):
238 318 spec_string = b'version https://git-lfs.github.com/spec'
239 319 if raw_content and raw_content.startswith(spec_string):
240 320
241 321 pattern = re.compile(rb"""
242 322 (?:\n)?
243 323 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
244 324 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
245 325 ^size[ ](?P<oid_size>[0-9]+)\n
246 326 (?:\n)?
247 327 """, re.VERBOSE | re.MULTILINE)
248 328 match = pattern.match(raw_content)
249 329 if match:
250 330 return match.groupdict()
251 331
252 332 return {}
253 333
254 334 @reraise_safe_exceptions
255 335 def is_large_file(self, wire, commit_id):
256 336 cache_on, context_uid, repo_id = self._cache_on(wire)
257 337 region = self._region(wire)
258 338
259 339 @region.conditional_cache_on_arguments(condition=cache_on)
260 340 def _is_large_file(_repo_id, _sha):
261 341 repo_init = self._factory.repo_libgit2(wire)
262 342 with repo_init as repo:
263 343 blob = repo[commit_id]
264 344 if blob.is_binary:
265 345 return {}
266 346
267 347 return self._parse_lfs_pointer(blob.data)
268 348
269 349 return _is_large_file(repo_id, commit_id)
270 350
271 351 @reraise_safe_exceptions
272 352 def is_binary(self, wire, tree_id):
273 353 cache_on, context_uid, repo_id = self._cache_on(wire)
274 354 region = self._region(wire)
275 355
276 356 @region.conditional_cache_on_arguments(condition=cache_on)
277 357 def _is_binary(_repo_id, _tree_id):
278 358 repo_init = self._factory.repo_libgit2(wire)
279 359 with repo_init as repo:
280 360 blob_obj = repo[tree_id]
281 361 return blob_obj.is_binary
282 362
283 363 return _is_binary(repo_id, tree_id)
284 364
285 365 @reraise_safe_exceptions
286 def md5_hash(self, wire, tree_id):
366 def md5_hash(self, wire, commit_id, path):
287 367 cache_on, context_uid, repo_id = self._cache_on(wire)
288 368 region = self._region(wire)
289 369
290 370 @region.conditional_cache_on_arguments(condition=cache_on)
291 def _md5_hash(_repo_id, _tree_id):
371 def _md5_hash(_repo_id, _commit_id, _path):
372 repo_init = self._factory.repo_libgit2(wire)
373 with repo_init as repo:
374 commit = repo[_commit_id]
375 blob_obj = commit.tree[_path]
376
377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 raise exceptions.LookupException()(
379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380
292 381 return ''
293 382
294 return _md5_hash(repo_id, tree_id)
383 return _md5_hash(repo_id, commit_id, path)
295 384
296 385 @reraise_safe_exceptions
297 386 def in_largefiles_store(self, wire, oid):
298 387 conf = self._wire_to_config(wire)
299 388 repo_init = self._factory.repo_libgit2(wire)
300 389 with repo_init as repo:
301 390 repo_name = repo.path
302 391
303 392 store_location = conf.get('vcs_git_lfs_store_location')
304 393 if store_location:
305 394
306 395 store = LFSOidStore(
307 396 oid=oid, repo=repo_name, store_location=store_location)
308 397 return store.has_oid()
309 398
310 399 return False
311 400
312 401 @reraise_safe_exceptions
313 402 def store_path(self, wire, oid):
314 403 conf = self._wire_to_config(wire)
315 404 repo_init = self._factory.repo_libgit2(wire)
316 405 with repo_init as repo:
317 406 repo_name = repo.path
318 407
319 408 store_location = conf.get('vcs_git_lfs_store_location')
320 409 if store_location:
321 410 store = LFSOidStore(
322 411 oid=oid, repo=repo_name, store_location=store_location)
323 412 return store.oid_path
324 413 raise ValueError(f'Unable to fetch oid with path {oid}')
325 414
326 415 @reraise_safe_exceptions
327 416 def bulk_request(self, wire, rev, pre_load):
328 417 cache_on, context_uid, repo_id = self._cache_on(wire)
329 418 region = self._region(wire)
330 419
331 420 @region.conditional_cache_on_arguments(condition=cache_on)
332 421 def _bulk_request(_repo_id, _rev, _pre_load):
333 422 result = {}
334 423 for attr in pre_load:
335 424 try:
336 425 method = self._bulk_methods[attr]
337 426 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
338 427 args = [wire, rev]
339 428 result[attr] = method(*args)
340 429 except KeyError as e:
341 430 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
342 431 return result
343 432
344 433 return _bulk_request(repo_id, rev, sorted(pre_load))
345 434
346 def _build_opener(self, url):
435 @reraise_safe_exceptions
436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 region = self._region(wire)
439
440 @region.conditional_cache_on_arguments(condition=cache_on)
441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 result = {}
443 for attr in pre_load:
444 try:
445 method = self._bulk_file_methods[attr]
446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 result[attr] = method(wire, _commit_id, _path)
448 except KeyError as e:
449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 return BinaryEnvelope(result)
451
452 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
453
454 def _build_opener(self, url: str):
347 455 handlers = []
348 url_obj = url_parser(url)
349 _, authinfo = url_obj.authinfo()
456 url_obj = url_parser(safe_bytes(url))
457 authinfo = url_obj.authinfo()[1]
350 458
351 459 if authinfo:
352 460 # create a password manager
353 461 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
354 462 passmgr.add_password(*authinfo)
355 463
356 464 handlers.extend((httpbasicauthhandler(passmgr),
357 465 httpdigestauthhandler(passmgr)))
358 466
359 467 return urllib.request.build_opener(*handlers)
360 468
361 def _type_id_to_name(self, type_id: int):
362 return {
363 1: 'commit',
364 2: 'tree',
365 3: 'blob',
366 4: 'tag'
367 }[type_id]
368
369 469 @reraise_safe_exceptions
370 470 def check_url(self, url, config):
371 471 url_obj = url_parser(safe_bytes(url))
372 test_uri, _ = url_obj.authinfo()
373 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
374 url_obj.query = obfuscate_qs(url_obj.query)
375 cleaned_uri = str(url_obj)
376 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
472
473 test_uri = safe_str(url_obj.authinfo()[0])
474 obfuscated_uri = get_obfuscated_url(url_obj)
475
476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
377 477
378 478 if not test_uri.endswith('info/refs'):
379 479 test_uri = test_uri.rstrip('/') + '/info/refs'
380 480
381 o = self._build_opener(url)
481 o = self._build_opener(test_uri)
382 482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
383 483
384 484 q = {"service": 'git-upload-pack'}
385 485 qs = '?%s' % urllib.parse.urlencode(q)
386 486 cu = "{}{}".format(test_uri, qs)
387 487 req = urllib.request.Request(cu, None, {})
388 488
389 489 try:
390 log.debug("Trying to open URL %s", cleaned_uri)
490 log.debug("Trying to open URL %s", obfuscated_uri)
391 491 resp = o.open(req)
392 492 if resp.code != 200:
393 493 raise exceptions.URLError()('Return Code is not 200')
394 494 except Exception as e:
395 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
396 496 # means it cannot be cloned
397 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
497 raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
398 498
399 499 # now detect if it's proper git repo
400 gitdata = resp.read()
401 if 'service=git-upload-pack' in gitdata:
500 gitdata: bytes = resp.read()
501
502 if b'service=git-upload-pack' in gitdata:
402 503 pass
403 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
404 505 # old style git can return some other format !
405 506 pass
406 507 else:
407 raise exceptions.URLError()(
408 "url [{}] does not look like an git".format(cleaned_uri))
508 e = None
509 raise exceptions.URLError(e)(
510 "url [%s] does not look like an hg repo org_exc: %s"
511 % (obfuscated_uri, e))
409 512
410 513 return True
411 514
412 515 @reraise_safe_exceptions
413 516 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
414 517 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
415 518 remote_refs = self.pull(wire, url, apply_refs=False)
416 519 repo = self._factory.repo(wire)
417 520 if isinstance(valid_refs, list):
418 521 valid_refs = tuple(valid_refs)
419 522
420 523 for k in remote_refs:
421 524 # only parse heads/tags and skip so called deferred tags
422 525 if k.startswith(valid_refs) and not k.endswith(deferred):
423 526 repo[k] = remote_refs[k]
424 527
425 528 if update_after_clone:
426 529 # we want to checkout HEAD
427 530 repo["HEAD"] = remote_refs["HEAD"]
428 531 index.build_index_from_tree(repo.path, repo.index_path(),
429 532 repo.object_store, repo["HEAD"].tree)
430 533
431 534 @reraise_safe_exceptions
432 535 def branch(self, wire, commit_id):
433 536 cache_on, context_uid, repo_id = self._cache_on(wire)
434 537 region = self._region(wire)
435 538
436 539 @region.conditional_cache_on_arguments(condition=cache_on)
437 540 def _branch(_context_uid, _repo_id, _commit_id):
438 541 regex = re.compile('^refs/heads')
439 542
440 543 def filter_with(ref):
441 544 return regex.match(ref[0]) and ref[1] == _commit_id
442 545
443 546 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
444 547 return [x[0].split('refs/heads/')[-1] for x in branches]
445 548
446 549 return _branch(context_uid, repo_id, commit_id)
447 550
448 551 @reraise_safe_exceptions
449 552 def commit_branches(self, wire, commit_id):
450 553 cache_on, context_uid, repo_id = self._cache_on(wire)
451 554 region = self._region(wire)
452 555
453 556 @region.conditional_cache_on_arguments(condition=cache_on)
454 557 def _commit_branches(_context_uid, _repo_id, _commit_id):
455 558 repo_init = self._factory.repo_libgit2(wire)
456 559 with repo_init as repo:
457 560 branches = [x for x in repo.branches.with_commit(_commit_id)]
458 561 return branches
459 562
460 563 return _commit_branches(context_uid, repo_id, commit_id)
461 564
462 565 @reraise_safe_exceptions
463 566 def add_object(self, wire, content):
464 567 repo_init = self._factory.repo_libgit2(wire)
465 568 with repo_init as repo:
466 569 blob = objects.Blob()
467 570 blob.set_raw_string(content)
468 571 repo.object_store.add_object(blob)
469 572 return blob.id
470 573
471 # TODO: this is quite complex, check if that can be simplified
574 @reraise_safe_exceptions
575 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
576 repo_init = self._factory.repo_libgit2(wire)
577 with repo_init as repo:
578
579 if date_args:
580 current_time, offset = date_args
581
582 kw = {
583 'time': current_time,
584 'offset': offset
585 }
586 author = create_signature_from_string(author, **kw)
587 committer = create_signature_from_string(committer, **kw)
588
589 tree = new_tree_id
590 if isinstance(tree, (bytes, str)):
591 # validate this tree is in the repo...
592 tree = repo[safe_str(tree)].id
593
594 parents = []
595 # ensure we COMMIT on top of given branch head
596 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
597 if branch in repo.branches.local:
598 parents += [repo.branches[branch].target]
599 elif [x for x in repo.branches.local]:
600 parents += [repo.head.target]
601 #else:
602 # in case we want to commit on new branch we create it on top of HEAD
603 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
604
605 # # Create a new commit
606 commit_oid = repo.create_commit(
607 f'refs/heads/{branch}', # the name of the reference to update
608 author, # the author of the commit
609 committer, # the committer of the commit
610 message, # the commit message
611 tree, # the tree produced by the index
612 parents # list of parents for the new commit, usually just one,
613 )
614
615 new_commit_id = safe_str(commit_oid)
616
617 return new_commit_id
618
472 619 @reraise_safe_exceptions
473 620 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
474 # Defines the root tree
475 class _Root(object):
476 def __repr__(self):
477 return 'ROOT TREE'
478 ROOT = _Root()
479 621
480 repo = self._factory.repo(wire)
481 object_store = repo.object_store
482
483 # Create tree and populates it with blobs
484 if commit_tree:
485 commit_tree = safe_bytes(commit_tree)
486
487 if commit_tree and repo[commit_tree]:
488 git_commit = repo[safe_bytes(commit_data['parents'][0])]
489 commit_tree = repo[git_commit.tree] # root tree
490 else:
491 commit_tree = objects.Tree()
492
493 for node in updated:
494 # Compute subdirs if needed
495 dirpath, nodename = vcspath.split(node['path'])
496 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
497 parent = commit_tree
498 ancestors = [('', parent)]
622 def mode2pygit(mode):
623 """
624 git only supports two filemode 644 and 755
499 625
500 # Tries to dig for the deepest existing tree
501 while dirnames:
502 curdir = dirnames.pop(0)
503 try:
504 dir_id = parent[curdir][1]
505 except KeyError:
506 # put curdir back into dirnames and stops
507 dirnames.insert(0, curdir)
508 break
509 else:
510 # If found, updates parent
511 parent = repo[dir_id]
512 ancestors.append((curdir, parent))
513 # Now parent is deepest existing tree and we need to create
514 # subtrees for dirnames (in reverse order)
515 # [this only applies for nodes from added]
516 new_trees = []
626 0o100755 -> 33261
627 0o100644 -> 33188
628 """
629 return {
630 0o100644: pygit2.GIT_FILEMODE_BLOB,
631 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
632 0o120000: pygit2.GIT_FILEMODE_LINK
633 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
517 634
518 blob = objects.Blob.from_string(node['content'])
519
520 node_path = safe_bytes(node['node_path'])
635 repo_init = self._factory.repo_libgit2(wire)
636 with repo_init as repo:
637 repo_index = repo.index
521 638
522 if dirnames:
523 # If there are trees which should be created we need to build
524 # them now (in reverse order)
525 reversed_dirnames = list(reversed(dirnames))
526 curtree = objects.Tree()
527 curtree[node_path] = node['mode'], blob.id
528 new_trees.append(curtree)
529 for dirname in reversed_dirnames[:-1]:
530 newtree = objects.Tree()
531 newtree[dirname] = (DIR_STAT, curtree.id)
532 new_trees.append(newtree)
533 curtree = newtree
534 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
535 else:
536 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
639 for pathspec in updated:
640 blob_id = repo.create_blob(pathspec['content'])
641 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
642 repo_index.add(ie)
537 643
538 new_trees.append(parent)
539 # Update ancestors
540 reversed_ancestors = reversed(
541 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
542 for parent, tree, path in reversed_ancestors:
543 parent[path] = (DIR_STAT, tree.id)
544 object_store.add_object(tree)
644 for pathspec in removed:
645 repo_index.remove(pathspec)
545 646
546 object_store.add_object(blob)
547 for tree in new_trees:
548 object_store.add_object(tree)
647 # Write changes to the index
648 repo_index.write()
649
650 # Create a tree from the updated index
651 commit_tree = repo_index.write_tree()
652
653 new_tree_id = commit_tree
549 654
550 for node_path in removed:
551 paths = node_path.split('/')
552 tree = commit_tree # start with top-level
553 trees = [{'tree': tree, 'path': ROOT}]
554 # Traverse deep into the forest...
555 # resolve final tree by iterating the path.
556 # e.g a/b/c.txt will get
557 # - root as tree then
558 # - 'a' as tree,
559 # - 'b' as tree,
560 # - stop at c as blob.
561 for path in paths:
562 try:
563 obj = repo[tree[path][1]]
564 if isinstance(obj, objects.Tree):
565 trees.append({'tree': obj, 'path': path})
566 tree = obj
567 except KeyError:
568 break
569 #PROBLEM:
570 """
571 We're not editing same reference tree object
572 """
573 # Cut down the blob and all rotten trees on the way back...
574 for path, tree_data in reversed(list(zip(paths, trees))):
575 tree = tree_data['tree']
576 tree.__delitem__(path)
577 # This operation edits the tree, we need to mark new commit back
655 author = commit_data['author']
656 committer = commit_data['committer']
657 message = commit_data['message']
658
659 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
578 660
579 if len(tree) > 0:
580 # This tree still has elements - don't remove it or any
581 # of it's parents
582 break
583
584 object_store.add_object(commit_tree)
661 new_commit_id = self.create_commit(wire, author, committer, message, branch,
662 new_tree_id, date_args=date_args)
585 663
586 # Create commit
587 commit = objects.Commit()
588 commit.tree = commit_tree.id
589 bytes_keys = [
590 'author',
591 'committer',
592 'message',
593 'encoding',
594 'parents'
595 ]
664 # libgit2, ensure the branch is there and exists
665 self.create_branch(wire, branch, new_commit_id)
596 666
597 for k, v in commit_data.items():
598 if k in bytes_keys:
599 if k == 'parents':
600 v = [safe_bytes(x) for x in v]
601 else:
602 v = safe_bytes(v)
603 setattr(commit, k, v)
667 # libgit2, set new ref to this created commit
668 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
604 669
605 object_store.add_object(commit)
606
607 self.create_branch(wire, branch, safe_str(commit.id))
608
609 # dulwich set-ref
610 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
611
612 return commit.id
670 return new_commit_id
613 671
614 672 @reraise_safe_exceptions
615 673 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
616 674 if url != 'default' and '://' not in url:
617 675 client = LocalGitClient(url)
618 676 else:
619 url_obj = url_parser(url)
677 url_obj = url_parser(safe_bytes(url))
620 678 o = self._build_opener(url)
621 url, _ = url_obj.authinfo()
679 url = url_obj.authinfo()[0]
622 680 client = HttpGitClient(base_url=url, opener=o)
623 681 repo = self._factory.repo(wire)
624 682
625 683 determine_wants = repo.object_store.determine_wants_all
626 684 if refs:
627 685 refs = [ascii_bytes(x) for x in refs]
628 686
629 687 def determine_wants_requested(remote_refs):
630 688 determined = []
631 689 for ref_name, ref_hash in remote_refs.items():
632 690 bytes_ref_name = safe_bytes(ref_name)
633 691
634 692 if bytes_ref_name in refs:
635 693 bytes_ref_hash = safe_bytes(ref_hash)
636 694 determined.append(bytes_ref_hash)
637 695 return determined
638 696
639 697 # swap with our custom requested wants
640 698 determine_wants = determine_wants_requested
641 699
642 700 try:
643 701 remote_refs = client.fetch(
644 702 path=url, target=repo, determine_wants=determine_wants)
645 703
646 704 except NotGitRepository as e:
647 705 log.warning(
648 706 'Trying to fetch from "%s" failed, not a Git repository.', url)
649 707 # Exception can contain unicode which we convert
650 708 raise exceptions.AbortException(e)(repr(e))
651 709
652 710 # mikhail: client.fetch() returns all the remote refs, but fetches only
653 711 # refs filtered by `determine_wants` function. We need to filter result
654 712 # as well
655 713 if refs:
656 714 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
657 715
658 716 if apply_refs:
659 717 # TODO: johbo: Needs proper test coverage with a git repository
660 718 # that contains a tag object, so that we would end up with
661 719 # a peeled ref at this point.
662 720 for k in remote_refs:
663 721 if k.endswith(PEELED_REF_MARKER):
664 722 log.debug("Skipping peeled reference %s", k)
665 723 continue
666 724 repo[k] = remote_refs[k]
667 725
668 726 if refs and not update_after:
669 727 # mikhail: explicitly set the head to the last ref.
670 728 repo[HEAD_MARKER] = remote_refs[refs[-1]]
671 729
672 730 if update_after:
673 731 # we want to check out HEAD
674 732 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
675 733 index.build_index_from_tree(repo.path, repo.index_path(),
676 734 repo.object_store, repo[HEAD_MARKER].tree)
735
736 if isinstance(remote_refs, FetchPackResult):
737 return remote_refs.refs
677 738 return remote_refs
678 739
679 740 @reraise_safe_exceptions
680 741 def sync_fetch(self, wire, url, refs=None, all_refs=False):
681 742 self._factory.repo(wire)
682 743 if refs and not isinstance(refs, (list, tuple)):
683 744 refs = [refs]
684 745
685 746 config = self._wire_to_config(wire)
686 747 # get all remote refs we'll use to fetch later
687 748 cmd = ['ls-remote']
688 749 if not all_refs:
689 750 cmd += ['--heads', '--tags']
690 751 cmd += [url]
691 752 output, __ = self.run_git_command(
692 753 wire, cmd, fail_on_stderr=False,
693 754 _copts=self._remote_conf(config),
694 755 extra_env={'GIT_TERMINAL_PROMPT': '0'})
695 756
696 757 remote_refs = collections.OrderedDict()
697 758 fetch_refs = []
698 759
699 760 for ref_line in output.splitlines():
700 761 sha, ref = ref_line.split(b'\t')
701 762 sha = sha.strip()
702 763 if ref in remote_refs:
703 764 # duplicate, skip
704 765 continue
705 766 if ref.endswith(PEELED_REF_MARKER):
706 767 log.debug("Skipping peeled reference %s", ref)
707 768 continue
708 769 # don't sync HEAD
709 770 if ref in [HEAD_MARKER]:
710 771 continue
711 772
712 773 remote_refs[ref] = sha
713 774
714 775 if refs and sha in refs:
715 776 # we filter fetch using our specified refs
716 777 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
717 778 elif not refs:
718 779 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
719 780 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
720 781
721 782 if fetch_refs:
722 783 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
723 784 fetch_refs_chunks = list(chunk)
724 785 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
725 786 self.run_git_command(
726 787 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
727 788 fail_on_stderr=False,
728 789 _copts=self._remote_conf(config),
729 790 extra_env={'GIT_TERMINAL_PROMPT': '0'})
730 791
731 792 return remote_refs
732 793
733 794 @reraise_safe_exceptions
734 795 def sync_push(self, wire, url, refs=None):
735 796 if not self.check_url(url, wire):
736 797 return
737 798 config = self._wire_to_config(wire)
738 799 self._factory.repo(wire)
739 800 self.run_git_command(
740 801 wire, ['push', url, '--mirror'], fail_on_stderr=False,
741 802 _copts=self._remote_conf(config),
742 803 extra_env={'GIT_TERMINAL_PROMPT': '0'})
743 804
744 805 @reraise_safe_exceptions
745 806 def get_remote_refs(self, wire, url):
746 807 repo = Repo(url)
747 808 return repo.get_refs()
748 809
749 810 @reraise_safe_exceptions
750 811 def get_description(self, wire):
751 812 repo = self._factory.repo(wire)
752 813 return repo.get_description()
753 814
754 815 @reraise_safe_exceptions
755 816 def get_missing_revs(self, wire, rev1, rev2, path2):
756 817 repo = self._factory.repo(wire)
757 818 LocalGitClient(thin_packs=False).fetch(path2, repo)
758 819
759 820 wire_remote = wire.copy()
760 821 wire_remote['path'] = path2
761 822 repo_remote = self._factory.repo(wire_remote)
762 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
823 LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
763 824
764 825 revs = [
765 826 x.commit.id
766 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
827 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
767 828 return revs
768 829
769 830 @reraise_safe_exceptions
770 831 def get_object(self, wire, sha, maybe_unreachable=False):
771 832 cache_on, context_uid, repo_id = self._cache_on(wire)
772 833 region = self._region(wire)
773 834
774 835 @region.conditional_cache_on_arguments(condition=cache_on)
775 836 def _get_object(_context_uid, _repo_id, _sha):
776 837 repo_init = self._factory.repo_libgit2(wire)
777 838 with repo_init as repo:
778 839
779 840 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
780 841 try:
781 842 commit = repo.revparse_single(sha)
782 843 except KeyError:
783 844 # NOTE(marcink): KeyError doesn't give us any meaningful information
784 845 # here, we instead give something more explicit
785 846 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
786 847 raise exceptions.LookupException(e)(missing_commit_err)
787 848 except ValueError as e:
788 849 raise exceptions.LookupException(e)(missing_commit_err)
789 850
790 851 is_tag = False
791 852 if isinstance(commit, pygit2.Tag):
792 853 commit = repo.get(commit.target)
793 854 is_tag = True
794 855
795 856 check_dangling = True
796 857 if is_tag:
797 858 check_dangling = False
798 859
799 860 if check_dangling and maybe_unreachable:
800 861 check_dangling = False
801 862
802 863 # we used a reference and it parsed means we're not having a dangling commit
803 864 if sha != commit.hex:
804 865 check_dangling = False
805 866
806 867 if check_dangling:
807 868 # check for dangling commit
808 869 for branch in repo.branches.with_commit(commit.hex):
809 870 if branch:
810 871 break
811 872 else:
812 873 # NOTE(marcink): Empty error doesn't give us any meaningful information
813 874 # here, we instead give something more explicit
814 875 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
815 876 raise exceptions.LookupException(e)(missing_commit_err)
816 877
817 878 commit_id = commit.hex
818 type_id = commit.type
879 type_str = commit.type_str
819 880
820 881 return {
821 882 'id': commit_id,
822 'type': self._type_id_to_name(type_id),
883 'type': type_str,
823 884 'commit_id': commit_id,
824 885 'idx': 0
825 886 }
826 887
827 888 return _get_object(context_uid, repo_id, sha)
828 889
829 890 @reraise_safe_exceptions
830 891 def get_refs(self, wire):
831 892 cache_on, context_uid, repo_id = self._cache_on(wire)
832 893 region = self._region(wire)
833 894
834 895 @region.conditional_cache_on_arguments(condition=cache_on)
835 896 def _get_refs(_context_uid, _repo_id):
836 897
837 898 repo_init = self._factory.repo_libgit2(wire)
838 899 with repo_init as repo:
839 900 regex = re.compile('^refs/(heads|tags)/')
840 901 return {x.name: x.target.hex for x in
841 902 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
842 903
843 904 return _get_refs(context_uid, repo_id)
844 905
845 906 @reraise_safe_exceptions
846 907 def get_branch_pointers(self, wire):
847 908 cache_on, context_uid, repo_id = self._cache_on(wire)
848 909 region = self._region(wire)
849 910
850 911 @region.conditional_cache_on_arguments(condition=cache_on)
851 912 def _get_branch_pointers(_context_uid, _repo_id):
852 913
853 914 repo_init = self._factory.repo_libgit2(wire)
854 915 regex = re.compile('^refs/heads')
855 916 with repo_init as repo:
856 917 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
857 918 return {x.target.hex: x.shorthand for x in branches}
858 919
859 920 return _get_branch_pointers(context_uid, repo_id)
860 921
861 922 @reraise_safe_exceptions
862 923 def head(self, wire, show_exc=True):
863 924 cache_on, context_uid, repo_id = self._cache_on(wire)
864 925 region = self._region(wire)
865 926
866 927 @region.conditional_cache_on_arguments(condition=cache_on)
867 928 def _head(_context_uid, _repo_id, _show_exc):
868 929 repo_init = self._factory.repo_libgit2(wire)
869 930 with repo_init as repo:
870 931 try:
871 932 return repo.head.peel().hex
872 933 except Exception:
873 934 if show_exc:
874 935 raise
875 936 return _head(context_uid, repo_id, show_exc)
876 937
877 938 @reraise_safe_exceptions
878 939 def init(self, wire):
879 940 repo_path = safe_str(wire['path'])
880 941 self.repo = Repo.init(repo_path)
881 942
882 943 @reraise_safe_exceptions
883 944 def init_bare(self, wire):
884 945 repo_path = safe_str(wire['path'])
885 946 self.repo = Repo.init_bare(repo_path)
886 947
887 948 @reraise_safe_exceptions
888 949 def revision(self, wire, rev):
889 950
890 951 cache_on, context_uid, repo_id = self._cache_on(wire)
891 952 region = self._region(wire)
892 953
893 954 @region.conditional_cache_on_arguments(condition=cache_on)
894 955 def _revision(_context_uid, _repo_id, _rev):
895 956 repo_init = self._factory.repo_libgit2(wire)
896 957 with repo_init as repo:
897 958 commit = repo[rev]
898 959 obj_data = {
899 960 'id': commit.id.hex,
900 961 }
901 962 # tree objects itself don't have tree_id attribute
902 963 if hasattr(commit, 'tree_id'):
903 964 obj_data['tree'] = commit.tree_id.hex
904 965
905 966 return obj_data
906 967 return _revision(context_uid, repo_id, rev)
907 968
908 969 @reraise_safe_exceptions
909 970 def date(self, wire, commit_id):
910 971 cache_on, context_uid, repo_id = self._cache_on(wire)
911 972 region = self._region(wire)
912 973
913 974 @region.conditional_cache_on_arguments(condition=cache_on)
914 975 def _date(_repo_id, _commit_id):
915 976 repo_init = self._factory.repo_libgit2(wire)
916 977 with repo_init as repo:
917 978 commit = repo[commit_id]
918 979
919 980 if hasattr(commit, 'commit_time'):
920 981 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
921 982 else:
922 983 commit = commit.get_object()
923 984 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
924 985
925 986 # TODO(marcink): check dulwich difference of offset vs timezone
926 987 return [commit_time, commit_time_offset]
927 988 return _date(repo_id, commit_id)
928 989
929 990 @reraise_safe_exceptions
930 991 def author(self, wire, commit_id):
931 992 cache_on, context_uid, repo_id = self._cache_on(wire)
932 993 region = self._region(wire)
933 994
934 995 @region.conditional_cache_on_arguments(condition=cache_on)
935 996 def _author(_repo_id, _commit_id):
936 997 repo_init = self._factory.repo_libgit2(wire)
937 998 with repo_init as repo:
938 999 commit = repo[commit_id]
939 1000
940 1001 if hasattr(commit, 'author'):
941 1002 author = commit.author
942 1003 else:
943 1004 author = commit.get_object().author
944 1005
945 1006 if author.email:
946 1007 return f"{author.name} <{author.email}>"
947 1008
948 1009 try:
949 1010 return f"{author.name}"
950 1011 except Exception:
951 1012 return f"{safe_str(author.raw_name)}"
952 1013
953 1014 return _author(repo_id, commit_id)
954 1015
955 1016 @reraise_safe_exceptions
956 1017 def message(self, wire, commit_id):
957 1018 cache_on, context_uid, repo_id = self._cache_on(wire)
958 1019 region = self._region(wire)
959 1020
960 1021 @region.conditional_cache_on_arguments(condition=cache_on)
961 1022 def _message(_repo_id, _commit_id):
962 1023 repo_init = self._factory.repo_libgit2(wire)
963 1024 with repo_init as repo:
964 1025 commit = repo[commit_id]
965 1026 return commit.message
966 1027 return _message(repo_id, commit_id)
967 1028
968 1029 @reraise_safe_exceptions
969 1030 def parents(self, wire, commit_id):
970 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
971 1032 region = self._region(wire)
972 1033
973 1034 @region.conditional_cache_on_arguments(condition=cache_on)
974 1035 def _parents(_repo_id, _commit_id):
975 1036 repo_init = self._factory.repo_libgit2(wire)
976 1037 with repo_init as repo:
977 1038 commit = repo[commit_id]
978 1039 if hasattr(commit, 'parent_ids'):
979 1040 parent_ids = commit.parent_ids
980 1041 else:
981 1042 parent_ids = commit.get_object().parent_ids
982 1043
983 1044 return [x.hex for x in parent_ids]
984 1045 return _parents(repo_id, commit_id)
985 1046
986 1047 @reraise_safe_exceptions
987 1048 def children(self, wire, commit_id):
988 1049 cache_on, context_uid, repo_id = self._cache_on(wire)
989 1050 region = self._region(wire)
990 1051
991 1052 head = self.head(wire)
992 1053
993 1054 @region.conditional_cache_on_arguments(condition=cache_on)
994 1055 def _children(_repo_id, _commit_id):
995 1056
996 1057 output, __ = self.run_git_command(
997 1058 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
998 1059
999 1060 child_ids = []
1000 1061 pat = re.compile(fr'^{commit_id}')
1001 1062 for line in output.splitlines():
1002 1063 line = safe_str(line)
1003 1064 if pat.match(line):
1004 1065 found_ids = line.split(' ')[1:]
1005 1066 child_ids.extend(found_ids)
1006 1067 break
1007 1068
1008 1069 return child_ids
1009 1070 return _children(repo_id, commit_id)
1010 1071
1011 1072 @reraise_safe_exceptions
1012 1073 def set_refs(self, wire, key, value):
1013 1074 repo_init = self._factory.repo_libgit2(wire)
1014 1075 with repo_init as repo:
1015 1076 repo.references.create(key, value, force=True)
1016 1077
1017 1078 @reraise_safe_exceptions
1018 1079 def create_branch(self, wire, branch_name, commit_id, force=False):
1019 1080 repo_init = self._factory.repo_libgit2(wire)
1020 1081 with repo_init as repo:
1082 if commit_id:
1021 1083 commit = repo[commit_id]
1084 else:
1085 # if commit is not given just use the HEAD
1086 commit = repo.head()
1022 1087
1023 1088 if force:
1024 1089 repo.branches.local.create(branch_name, commit, force=force)
1025 1090 elif not repo.branches.get(branch_name):
1026 1091 # create only if that branch isn't existing
1027 1092 repo.branches.local.create(branch_name, commit, force=force)
1028 1093
1029 1094 @reraise_safe_exceptions
1030 1095 def remove_ref(self, wire, key):
1031 1096 repo_init = self._factory.repo_libgit2(wire)
1032 1097 with repo_init as repo:
1033 1098 repo.references.delete(key)
1034 1099
1035 1100 @reraise_safe_exceptions
1036 1101 def tag_remove(self, wire, tag_name):
1037 1102 repo_init = self._factory.repo_libgit2(wire)
1038 1103 with repo_init as repo:
1039 1104 key = f'refs/tags/{tag_name}'
1040 1105 repo.references.delete(key)
1041 1106
1042 1107 @reraise_safe_exceptions
1043 1108 def tree_changes(self, wire, source_id, target_id):
1044 # TODO(marcink): remove this seems it's only used by tests
1045 1109 repo = self._factory.repo(wire)
1110 # source can be empty
1111 source_id = safe_bytes(source_id if source_id else b'')
1112 target_id = safe_bytes(target_id)
1113
1046 1114 source = repo[source_id].tree if source_id else None
1047 1115 target = repo[target_id].tree
1048 1116 result = repo.object_store.tree_changes(source, target)
1049 return list(result)
1117
1118 added = set()
1119 modified = set()
1120 deleted = set()
1121 for (old_path, new_path), (_, _), (_, _) in list(result):
1122 if new_path and old_path:
1123 modified.add(new_path)
1124 elif new_path and not old_path:
1125 added.add(new_path)
1126 elif not new_path and old_path:
1127 deleted.add(old_path)
1128
1129 return list(added), list(modified), list(deleted)
1050 1130
1051 1131 @reraise_safe_exceptions
1052 1132 def tree_and_type_for_path(self, wire, commit_id, path):
1053 1133
1054 1134 cache_on, context_uid, repo_id = self._cache_on(wire)
1055 1135 region = self._region(wire)
1056 1136
1057 1137 @region.conditional_cache_on_arguments(condition=cache_on)
1058 1138 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1059 1139 repo_init = self._factory.repo_libgit2(wire)
1060 1140
1061 1141 with repo_init as repo:
1062 1142 commit = repo[commit_id]
1063 1143 try:
1064 1144 tree = commit.tree[path]
1065 1145 except KeyError:
1066 1146 return None, None, None
1067 1147
1068 1148 return tree.id.hex, tree.type_str, tree.filemode
1069 1149 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1070 1150
1071 1151 @reraise_safe_exceptions
1072 1152 def tree_items(self, wire, tree_id):
1073 1153 cache_on, context_uid, repo_id = self._cache_on(wire)
1074 1154 region = self._region(wire)
1075 1155
1076 1156 @region.conditional_cache_on_arguments(condition=cache_on)
1077 1157 def _tree_items(_repo_id, _tree_id):
1078 1158
1079 1159 repo_init = self._factory.repo_libgit2(wire)
1080 1160 with repo_init as repo:
1081 1161 try:
1082 1162 tree = repo[tree_id]
1083 1163 except KeyError:
1084 1164 raise ObjectMissing(f'No tree with id: {tree_id}')
1085 1165
1086 1166 result = []
1087 1167 for item in tree:
1088 1168 item_sha = item.hex
1089 1169 item_mode = item.filemode
1090 1170 item_type = item.type_str
1091 1171
1092 1172 if item_type == 'commit':
1093 1173 # NOTE(marcink): submodules we translate to 'link' for backward compat
1094 1174 item_type = 'link'
1095 1175
1096 1176 result.append((item.name, item_mode, item_sha, item_type))
1097 1177 return result
1098 1178 return _tree_items(repo_id, tree_id)
1099 1179
1100 1180 @reraise_safe_exceptions
1101 1181 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1102 1182 """
1103 1183 Old version that uses subprocess to call diff
1104 1184 """
1105 1185
1106 1186 flags = [
1107 1187 '-U%s' % context, '--patch',
1108 1188 '--binary',
1109 1189 '--find-renames',
1110 1190 '--no-indent-heuristic',
1111 1191 # '--indent-heuristic',
1112 1192 #'--full-index',
1113 1193 #'--abbrev=40'
1114 1194 ]
1115 1195
1116 1196 if opt_ignorews:
1117 1197 flags.append('--ignore-all-space')
1118 1198
1119 1199 if commit_id_1 == self.EMPTY_COMMIT:
1120 1200 cmd = ['show'] + flags + [commit_id_2]
1121 1201 else:
1122 1202 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1123 1203
1124 1204 if file_filter:
1125 1205 cmd.extend(['--', file_filter])
1126 1206
1127 1207 diff, __ = self.run_git_command(wire, cmd)
1128 1208 # If we used 'show' command, strip first few lines (until actual diff
1129 1209 # starts)
1130 1210 if commit_id_1 == self.EMPTY_COMMIT:
1131 1211 lines = diff.splitlines()
1132 1212 x = 0
1133 1213 for line in lines:
1134 1214 if line.startswith(b'diff'):
1135 1215 break
1136 1216 x += 1
1137 1217 # Append new line just like 'diff' command do
1138 1218 diff = '\n'.join(lines[x:]) + '\n'
1139 1219 return diff
1140 1220
1141 1221 @reraise_safe_exceptions
1142 1222 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1143 1223 repo_init = self._factory.repo_libgit2(wire)
1144 1224
1145 1225 with repo_init as repo:
1146 1226 swap = True
1147 1227 flags = 0
1148 1228 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1149 1229
1150 1230 if opt_ignorews:
1151 1231 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1152 1232
1153 1233 if commit_id_1 == self.EMPTY_COMMIT:
1154 1234 comm1 = repo[commit_id_2]
1155 1235 diff_obj = comm1.tree.diff_to_tree(
1156 1236 flags=flags, context_lines=context, swap=swap)
1157 1237
1158 1238 else:
1159 1239 comm1 = repo[commit_id_2]
1160 1240 comm2 = repo[commit_id_1]
1161 1241 diff_obj = comm1.tree.diff_to_tree(
1162 1242 comm2.tree, flags=flags, context_lines=context, swap=swap)
1163 1243 similar_flags = 0
1164 1244 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1165 1245 diff_obj.find_similar(flags=similar_flags)
1166 1246
1167 1247 if file_filter:
1168 1248 for p in diff_obj:
1169 1249 if p.delta.old_file.path == file_filter:
1170 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1250 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1171 1251 # fo matching path == no diff
1172 return BinaryEnvelope(b'')
1173 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1252 return BytesEnvelope(b'')
1253
1254 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1174 1255
1175 1256 @reraise_safe_exceptions
1176 1257 def node_history(self, wire, commit_id, path, limit):
1177 1258 cache_on, context_uid, repo_id = self._cache_on(wire)
1178 1259 region = self._region(wire)
1179 1260
1180 1261 @region.conditional_cache_on_arguments(condition=cache_on)
1181 1262 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1182 1263 # optimize for n==1, rev-list is much faster for that use-case
1183 1264 if limit == 1:
1184 1265 cmd = ['rev-list', '-1', commit_id, '--', path]
1185 1266 else:
1186 1267 cmd = ['log']
1187 1268 if limit:
1188 1269 cmd.extend(['-n', str(safe_int(limit, 0))])
1189 1270 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1190 1271
1191 1272 output, __ = self.run_git_command(wire, cmd)
1192 1273 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1193 1274
1194 1275 return [x for x in commit_ids]
1195 1276 return _node_history(context_uid, repo_id, commit_id, path, limit)
1196 1277
1197 1278 @reraise_safe_exceptions
1198 1279 def node_annotate_legacy(self, wire, commit_id, path):
1199 1280 # note: replaced by pygit2 implementation
1200 1281 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1201 1282 # -l ==> outputs long shas (and we need all 40 characters)
1202 1283 # --root ==> doesn't put '^' character for boundaries
1203 1284 # -r commit_id ==> blames for the given commit
1204 1285 output, __ = self.run_git_command(wire, cmd)
1205 1286
1206 1287 result = []
1207 1288 for i, blame_line in enumerate(output.splitlines()[:-1]):
1208 1289 line_no = i + 1
1209 1290 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1210 1291 result.append((line_no, blame_commit_id, line))
1211 1292
1212 1293 return result
1213 1294
1214 1295 @reraise_safe_exceptions
1215 1296 def node_annotate(self, wire, commit_id, path):
1216 1297
1217 1298 result_libgit = []
1218 1299 repo_init = self._factory.repo_libgit2(wire)
1219 1300 with repo_init as repo:
1220 1301 commit = repo[commit_id]
1221 1302 blame_obj = repo.blame(path, newest_commit=commit_id)
1222 1303 for i, line in enumerate(commit.tree[path].data.splitlines()):
1223 1304 line_no = i + 1
1224 1305 hunk = blame_obj.for_line(line_no)
1225 1306 blame_commit_id = hunk.final_commit_id.hex
1226 1307
1227 1308 result_libgit.append((line_no, blame_commit_id, line))
1228 1309
1229 1310 return result_libgit
1230 1311
1231 1312 @reraise_safe_exceptions
1232 1313 def update_server_info(self, wire):
1233 1314 repo = self._factory.repo(wire)
1234 1315 update_server_info(repo)
1235 1316
1236 1317 @reraise_safe_exceptions
1237 1318 def get_all_commit_ids(self, wire):
1238 1319
1239 1320 cache_on, context_uid, repo_id = self._cache_on(wire)
1240 1321 region = self._region(wire)
1241 1322
1242 1323 @region.conditional_cache_on_arguments(condition=cache_on)
1243 1324 def _get_all_commit_ids(_context_uid, _repo_id):
1244 1325
1245 1326 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1246 1327 try:
1247 1328 output, __ = self.run_git_command(wire, cmd)
1248 1329 return output.splitlines()
1249 1330 except Exception:
1250 1331 # Can be raised for empty repositories
1251 1332 return []
1252 1333
1253 1334 @region.conditional_cache_on_arguments(condition=cache_on)
1254 1335 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1255 1336 repo_init = self._factory.repo_libgit2(wire)
1256 1337 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1257 1338 results = []
1258 1339 with repo_init as repo:
1259 1340 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1260 1341 results.append(commit.id.hex)
1261 1342
1262 1343 return _get_all_commit_ids(context_uid, repo_id)
1263 1344
1264 1345 @reraise_safe_exceptions
1265 1346 def run_git_command(self, wire, cmd, **opts):
1266 1347 path = wire.get('path', None)
1267 1348
1268 1349 if path and os.path.isdir(path):
1269 1350 opts['cwd'] = path
1270 1351
1271 1352 if '_bare' in opts:
1272 1353 _copts = []
1273 1354 del opts['_bare']
1274 1355 else:
1275 1356 _copts = ['-c', 'core.quotepath=false',]
1276 1357 safe_call = False
1277 1358 if '_safe' in opts:
1278 1359 # no exc on failure
1279 1360 del opts['_safe']
1280 1361 safe_call = True
1281 1362
1282 1363 if '_copts' in opts:
1283 1364 _copts.extend(opts['_copts'] or [])
1284 1365 del opts['_copts']
1285 1366
1286 1367 gitenv = os.environ.copy()
1287 1368 gitenv.update(opts.pop('extra_env', {}))
1288 1369 # need to clean fix GIT_DIR !
1289 1370 if 'GIT_DIR' in gitenv:
1290 1371 del gitenv['GIT_DIR']
1291 1372 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1292 1373 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1293 1374
1294 1375 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1295 1376 _opts = {'env': gitenv, 'shell': False}
1296 1377
1297 1378 proc = None
1298 1379 try:
1299 1380 _opts.update(opts)
1300 1381 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1301 1382
1302 1383 return b''.join(proc), b''.join(proc.stderr)
1303 1384 except OSError as err:
1304 1385 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1305 1386 tb_err = ("Couldn't run git command (%s).\n"
1306 1387 "Original error was:%s\n"
1307 1388 "Call options:%s\n"
1308 1389 % (cmd, err, _opts))
1309 1390 log.exception(tb_err)
1310 1391 if safe_call:
1311 1392 return '', err
1312 1393 else:
1313 1394 raise exceptions.VcsException()(tb_err)
1314 1395 finally:
1315 1396 if proc:
1316 1397 proc.close()
1317 1398
1318 1399 @reraise_safe_exceptions
1319 1400 def install_hooks(self, wire, force=False):
1320 1401 from vcsserver.hook_utils import install_git_hooks
1321 1402 bare = self.bare(wire)
1322 1403 path = wire['path']
1323 1404 binary_dir = settings.BINARY_DIR
1324 1405 if binary_dir:
1325 1406 os.path.join(binary_dir, 'python3')
1326 1407 return install_git_hooks(path, bare, force_create=force)
1327 1408
1328 1409 @reraise_safe_exceptions
1329 1410 def get_hooks_info(self, wire):
1330 1411 from vcsserver.hook_utils import (
1331 1412 get_git_pre_hook_version, get_git_post_hook_version)
1332 1413 bare = self.bare(wire)
1333 1414 path = wire['path']
1334 1415 return {
1335 1416 'pre_version': get_git_pre_hook_version(path, bare),
1336 1417 'post_version': get_git_post_hook_version(path, bare),
1337 1418 }
1338 1419
1339 1420 @reraise_safe_exceptions
1340 1421 def set_head_ref(self, wire, head_name):
1341 1422 log.debug('Setting refs/head to `%s`', head_name)
1342 1423 repo_init = self._factory.repo_libgit2(wire)
1343 1424 with repo_init as repo:
1344 1425 repo.set_head(f'refs/heads/{head_name}')
1345 1426
1346 1427 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1347 1428
1348 1429 @reraise_safe_exceptions
1349 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1350 archive_dir_name, commit_id):
1430 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1431 archive_dir_name, commit_id, cache_config):
1351 1432
1352 1433 def file_walker(_commit_id, path):
1353 1434 repo_init = self._factory.repo_libgit2(wire)
1354 1435
1355 1436 with repo_init as repo:
1356 1437 commit = repo[commit_id]
1357 1438
1358 1439 if path in ['', '/']:
1359 1440 tree = commit.tree
1360 1441 else:
1361 1442 tree = commit.tree[path.rstrip('/')]
1362 1443 tree_id = tree.id.hex
1363 1444 try:
1364 1445 tree = repo[tree_id]
1365 1446 except KeyError:
1366 1447 raise ObjectMissing(f'No tree with id: {tree_id}')
1367 1448
1368 1449 index = LibGit2Index.Index()
1369 1450 index.read_tree(tree)
1370 1451 file_iter = index
1371 1452
1372 1453 for file_node in file_iter:
1373 1454 file_path = file_node.path
1374 1455 mode = file_node.mode
1375 1456 is_link = stat.S_ISLNK(mode)
1376 1457 if mode == pygit2.GIT_FILEMODE_COMMIT:
1377 1458 log.debug('Skipping path %s as a commit node', file_path)
1378 1459 continue
1379 1460 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1380 1461
1381 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1382 archive_dir_name, commit_id)
1462 return store_archive_in_cache(
1463 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -1,1105 +1,1159 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import binascii
18 18 import io
19 19 import logging
20 20 import stat
21 21 import urllib.request
22 22 import urllib.parse
23 23 import traceback
24 24 import hashlib
25 25
26 26 from hgext import largefiles, rebase, purge
27 27
28 28 from mercurial import commands
29 29 from mercurial import unionrepo
30 30 from mercurial import verify
31 31 from mercurial import repair
32 32
33 33 import vcsserver
34 34 from vcsserver import exceptions
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode, BinaryEnvelope
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, store_archive_in_cache, ArchiveNode, BytesEnvelope, \
36 BinaryEnvelope
36 37 from vcsserver.hgcompat import (
37 38 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 39 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
39 40 makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
40 41 patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
41 42 RepoLookupError, InterventionRequired, RequirementError,
42 43 alwaysmatcher, patternmatcher, hgutil, hgext_strip)
43 44 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
44 45 from vcsserver.vcs_base import RemoteBase
46 from vcsserver.config import hooks as hooks_config
47
45 48
46 49 log = logging.getLogger(__name__)
47 50
48 51
49 52 def make_ui_from_config(repo_config):
50 53
51 54 class LoggingUI(ui.ui):
52 55
53 56 def status(self, *msg, **opts):
54 57 str_msg = map(safe_str, msg)
55 58 log.info(' '.join(str_msg).rstrip('\n'))
56 59 #super(LoggingUI, self).status(*msg, **opts)
57 60
58 61 def warn(self, *msg, **opts):
59 62 str_msg = map(safe_str, msg)
60 63 log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
61 64 #super(LoggingUI, self).warn(*msg, **opts)
62 65
63 66 def error(self, *msg, **opts):
64 67 str_msg = map(safe_str, msg)
65 68 log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
66 69 #super(LoggingUI, self).error(*msg, **opts)
67 70
68 71 def note(self, *msg, **opts):
69 72 str_msg = map(safe_str, msg)
70 73 log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
71 74 #super(LoggingUI, self).note(*msg, **opts)
72 75
73 76 def debug(self, *msg, **opts):
74 77 str_msg = map(safe_str, msg)
75 78 log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
76 79 #super(LoggingUI, self).debug(*msg, **opts)
77 80
78 81 baseui = LoggingUI()
79 82
80 83 # clean the baseui object
81 84 baseui._ocfg = hgconfig.config()
82 85 baseui._ucfg = hgconfig.config()
83 86 baseui._tcfg = hgconfig.config()
84 87
85 88 for section, option, value in repo_config:
86 89 baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
87 90
88 91 # make our hgweb quiet so it doesn't print output
89 92 baseui.setconfig(b'ui', b'quiet', b'true')
90 93
91 94 baseui.setconfig(b'ui', b'paginate', b'never')
92 95 # for better Error reporting of Mercurial
93 96 baseui.setconfig(b'ui', b'message-output', b'stderr')
94 97
95 98 # force mercurial to only use 1 thread, otherwise it may try to set a
96 99 # signal in a non-main thread, thus generating a ValueError.
97 100 baseui.setconfig(b'worker', b'numcpus', 1)
98 101
99 102 # If there is no config for the largefiles extension, we explicitly disable
100 103 # it here. This overrides settings from repositories hgrc file. Recent
101 104 # mercurial versions enable largefiles in hgrc on clone from largefile
102 105 # repo.
103 106 if not baseui.hasconfig(b'extensions', b'largefiles'):
104 107 log.debug('Explicitly disable largefiles extension for repo.')
105 108 baseui.setconfig(b'extensions', b'largefiles', b'!')
106 109
107 110 return baseui
108 111
109 112
110 113 def reraise_safe_exceptions(func):
111 114 """Decorator for converting mercurial exceptions to something neutral."""
112 115
113 116 def wrapper(*args, **kwargs):
114 117 try:
115 118 return func(*args, **kwargs)
116 119 except (Abort, InterventionRequired) as e:
117 120 raise_from_original(exceptions.AbortException(e), e)
118 121 except RepoLookupError as e:
119 122 raise_from_original(exceptions.LookupException(e), e)
120 123 except RequirementError as e:
121 124 raise_from_original(exceptions.RequirementException(e), e)
122 125 except RepoError as e:
123 126 raise_from_original(exceptions.VcsException(e), e)
124 127 except LookupError as e:
125 128 raise_from_original(exceptions.LookupException(e), e)
126 129 except Exception as e:
127 130 if not hasattr(e, '_vcs_kind'):
128 131 log.exception("Unhandled exception in hg remote call")
129 132 raise_from_original(exceptions.UnhandledException(e), e)
130 133
131 134 raise
132 135 return wrapper
133 136
134 137
135 138 class MercurialFactory(RepoFactory):
136 139 repo_type = 'hg'
137 140
138 141 def _create_config(self, config, hooks=True):
139 142 if not hooks:
140 hooks_to_clean = frozenset((
141 'changegroup.repo_size', 'preoutgoing.pre_pull',
142 'outgoing.pull_logger', 'prechangegroup.pre_push'))
143
144 hooks_to_clean = {
145
146 hooks_config.HOOK_REPO_SIZE,
147 hooks_config.HOOK_PRE_PULL,
148 hooks_config.HOOK_PULL,
149
150 hooks_config.HOOK_PRE_PUSH,
151 # TODO: what about PRETXT, this was disabled in pre 5.0.0
152 hooks_config.HOOK_PRETX_PUSH,
153
154 }
143 155 new_config = []
144 156 for section, option, value in config:
145 157 if section == 'hooks' and option in hooks_to_clean:
146 158 continue
147 159 new_config.append((section, option, value))
148 160 config = new_config
149 161
150 162 baseui = make_ui_from_config(config)
151 163 return baseui
152 164
153 165 def _create_repo(self, wire, create):
154 166 baseui = self._create_config(wire["config"])
155 167 repo = instance(baseui, safe_bytes(wire["path"]), create)
156 168 log.debug('repository created: got HG object: %s', repo)
157 169 return repo
158 170
159 171 def repo(self, wire, create=False):
160 172 """
161 173 Get a repository instance for the given path.
162 174 """
163 175 return self._create_repo(wire, create)
164 176
165 177
166 178 def patch_ui_message_output(baseui):
167 179 baseui.setconfig(b'ui', b'quiet', b'false')
168 180 output = io.BytesIO()
169 181
170 182 def write(data, **unused_kwargs):
171 183 output.write(data)
172 184
173 185 baseui.status = write
174 186 baseui.write = write
175 187 baseui.warn = write
176 188 baseui.debug = write
177 189
178 190 return baseui, output
179 191
180 192
193 def get_obfuscated_url(url_obj):
194 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
195 url_obj.query = obfuscate_qs(url_obj.query)
196 obfuscated_uri = str(url_obj)
197 return obfuscated_uri
198
199
200 def normalize_url_for_hg(url: str):
201 _proto = None
202
203 if '+' in url[:url.find('://')]:
204 _proto = url[0:url.find('+')]
205 url = url[url.find('+') + 1:]
206 return url, _proto
207
208
181 209 class HgRemote(RemoteBase):
182 210
183 211 def __init__(self, factory):
184 212 self._factory = factory
185 213 self._bulk_methods = {
186 214 "affected_files": self.ctx_files,
187 215 "author": self.ctx_user,
188 216 "branch": self.ctx_branch,
189 217 "children": self.ctx_children,
190 218 "date": self.ctx_date,
191 219 "message": self.ctx_description,
192 220 "parents": self.ctx_parents,
193 221 "status": self.ctx_status,
194 222 "obsolete": self.ctx_obsolete,
195 223 "phase": self.ctx_phase,
196 224 "hidden": self.ctx_hidden,
197 225 "_file_paths": self.ctx_list,
198 226 }
227 self._bulk_file_methods = {
228 "size": self.fctx_size,
229 "data": self.fctx_node_data,
230 "flags": self.fctx_flags,
231 "is_binary": self.is_binary,
232 "md5": self.md5_hash,
233 }
199 234
200 235 def _get_ctx(self, repo, ref):
201 236 return get_ctx(repo, ref)
202 237
203 238 @reraise_safe_exceptions
204 239 def discover_hg_version(self):
205 240 from mercurial import util
206 241 return safe_str(util.version())
207 242
208 243 @reraise_safe_exceptions
209 244 def is_empty(self, wire):
210 245 repo = self._factory.repo(wire)
211 246
212 247 try:
213 248 return len(repo) == 0
214 249 except Exception:
215 250 log.exception("failed to read object_store")
216 251 return False
217 252
218 253 @reraise_safe_exceptions
219 254 def bookmarks(self, wire):
220 255 cache_on, context_uid, repo_id = self._cache_on(wire)
221 256 region = self._region(wire)
222 257
223 258 @region.conditional_cache_on_arguments(condition=cache_on)
224 259 def _bookmarks(_context_uid, _repo_id):
225 260 repo = self._factory.repo(wire)
226 261 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
227 262
228 263 return _bookmarks(context_uid, repo_id)
229 264
230 265 @reraise_safe_exceptions
231 266 def branches(self, wire, normal, closed):
232 267 cache_on, context_uid, repo_id = self._cache_on(wire)
233 268 region = self._region(wire)
234 269
235 270 @region.conditional_cache_on_arguments(condition=cache_on)
236 271 def _branches(_context_uid, _repo_id, _normal, _closed):
237 272 repo = self._factory.repo(wire)
238 273 iter_branches = repo.branchmap().iterbranches()
239 274 bt = {}
240 275 for branch_name, _heads, tip_node, is_closed in iter_branches:
241 276 if normal and not is_closed:
242 277 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
243 278 if closed and is_closed:
244 279 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
245 280
246 281 return bt
247 282
248 283 return _branches(context_uid, repo_id, normal, closed)
249 284
250 285 @reraise_safe_exceptions
251 286 def bulk_request(self, wire, commit_id, pre_load):
252 287 cache_on, context_uid, repo_id = self._cache_on(wire)
253 288 region = self._region(wire)
254 289
255 290 @region.conditional_cache_on_arguments(condition=cache_on)
256 291 def _bulk_request(_repo_id, _commit_id, _pre_load):
257 292 result = {}
258 293 for attr in pre_load:
259 294 try:
260 295 method = self._bulk_methods[attr]
261 296 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
262 297 result[attr] = method(wire, commit_id)
263 298 except KeyError as e:
264 299 raise exceptions.VcsException(e)(
265 300 'Unknown bulk attribute: "%s"' % attr)
266 301 return result
267 302
268 303 return _bulk_request(repo_id, commit_id, sorted(pre_load))
269 304
270 305 @reraise_safe_exceptions
271 306 def ctx_branch(self, wire, commit_id):
272 307 cache_on, context_uid, repo_id = self._cache_on(wire)
273 308 region = self._region(wire)
274 309
275 310 @region.conditional_cache_on_arguments(condition=cache_on)
276 311 def _ctx_branch(_repo_id, _commit_id):
277 312 repo = self._factory.repo(wire)
278 313 ctx = self._get_ctx(repo, commit_id)
279 314 return ctx.branch()
280 315 return _ctx_branch(repo_id, commit_id)
281 316
282 317 @reraise_safe_exceptions
283 318 def ctx_date(self, wire, commit_id):
284 319 cache_on, context_uid, repo_id = self._cache_on(wire)
285 320 region = self._region(wire)
286 321
287 322 @region.conditional_cache_on_arguments(condition=cache_on)
288 323 def _ctx_date(_repo_id, _commit_id):
289 324 repo = self._factory.repo(wire)
290 325 ctx = self._get_ctx(repo, commit_id)
291 326 return ctx.date()
292 327 return _ctx_date(repo_id, commit_id)
293 328
294 329 @reraise_safe_exceptions
295 330 def ctx_description(self, wire, revision):
296 331 repo = self._factory.repo(wire)
297 332 ctx = self._get_ctx(repo, revision)
298 333 return ctx.description()
299 334
300 335 @reraise_safe_exceptions
301 336 def ctx_files(self, wire, commit_id):
302 337 cache_on, context_uid, repo_id = self._cache_on(wire)
303 338 region = self._region(wire)
304 339
305 340 @region.conditional_cache_on_arguments(condition=cache_on)
306 341 def _ctx_files(_repo_id, _commit_id):
307 342 repo = self._factory.repo(wire)
308 343 ctx = self._get_ctx(repo, commit_id)
309 344 return ctx.files()
310 345
311 346 return _ctx_files(repo_id, commit_id)
312 347
313 348 @reraise_safe_exceptions
314 349 def ctx_list(self, path, revision):
315 350 repo = self._factory.repo(path)
316 351 ctx = self._get_ctx(repo, revision)
317 352 return list(ctx)
318 353
319 354 @reraise_safe_exceptions
320 355 def ctx_parents(self, wire, commit_id):
321 356 cache_on, context_uid, repo_id = self._cache_on(wire)
322 357 region = self._region(wire)
323 358
324 359 @region.conditional_cache_on_arguments(condition=cache_on)
325 360 def _ctx_parents(_repo_id, _commit_id):
326 361 repo = self._factory.repo(wire)
327 362 ctx = self._get_ctx(repo, commit_id)
328 363 return [parent.hex() for parent in ctx.parents()
329 364 if not (parent.hidden() or parent.obsolete())]
330 365
331 366 return _ctx_parents(repo_id, commit_id)
332 367
333 368 @reraise_safe_exceptions
334 369 def ctx_children(self, wire, commit_id):
335 370 cache_on, context_uid, repo_id = self._cache_on(wire)
336 371 region = self._region(wire)
337 372
338 373 @region.conditional_cache_on_arguments(condition=cache_on)
339 374 def _ctx_children(_repo_id, _commit_id):
340 375 repo = self._factory.repo(wire)
341 376 ctx = self._get_ctx(repo, commit_id)
342 377 return [child.hex() for child in ctx.children()
343 378 if not (child.hidden() or child.obsolete())]
344 379
345 380 return _ctx_children(repo_id, commit_id)
346 381
347 382 @reraise_safe_exceptions
348 383 def ctx_phase(self, wire, commit_id):
349 384 cache_on, context_uid, repo_id = self._cache_on(wire)
350 385 region = self._region(wire)
351 386
352 387 @region.conditional_cache_on_arguments(condition=cache_on)
353 388 def _ctx_phase(_context_uid, _repo_id, _commit_id):
354 389 repo = self._factory.repo(wire)
355 390 ctx = self._get_ctx(repo, commit_id)
356 391 # public=0, draft=1, secret=3
357 392 return ctx.phase()
358 393 return _ctx_phase(context_uid, repo_id, commit_id)
359 394
360 395 @reraise_safe_exceptions
361 396 def ctx_obsolete(self, wire, commit_id):
362 397 cache_on, context_uid, repo_id = self._cache_on(wire)
363 398 region = self._region(wire)
364 399
365 400 @region.conditional_cache_on_arguments(condition=cache_on)
366 401 def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
367 402 repo = self._factory.repo(wire)
368 403 ctx = self._get_ctx(repo, commit_id)
369 404 return ctx.obsolete()
370 405 return _ctx_obsolete(context_uid, repo_id, commit_id)
371 406
372 407 @reraise_safe_exceptions
373 408 def ctx_hidden(self, wire, commit_id):
374 409 cache_on, context_uid, repo_id = self._cache_on(wire)
375 410 region = self._region(wire)
376 411
377 412 @region.conditional_cache_on_arguments(condition=cache_on)
378 413 def _ctx_hidden(_context_uid, _repo_id, _commit_id):
379 414 repo = self._factory.repo(wire)
380 415 ctx = self._get_ctx(repo, commit_id)
381 416 return ctx.hidden()
382 417 return _ctx_hidden(context_uid, repo_id, commit_id)
383 418
384 419 @reraise_safe_exceptions
385 420 def ctx_substate(self, wire, revision):
386 421 repo = self._factory.repo(wire)
387 422 ctx = self._get_ctx(repo, revision)
388 423 return ctx.substate
389 424
390 425 @reraise_safe_exceptions
391 426 def ctx_status(self, wire, revision):
392 427 repo = self._factory.repo(wire)
393 428 ctx = self._get_ctx(repo, revision)
394 429 status = repo[ctx.p1().node()].status(other=ctx.node())
395 430 # object of status (odd, custom named tuple in mercurial) is not
396 431 # correctly serializable, we make it a list, as the underling
397 432 # API expects this to be a list
398 433 return list(status)
399 434
400 435 @reraise_safe_exceptions
401 436 def ctx_user(self, wire, revision):
402 437 repo = self._factory.repo(wire)
403 438 ctx = self._get_ctx(repo, revision)
404 439 return ctx.user()
405 440
406 441 @reraise_safe_exceptions
407 442 def check_url(self, url, config):
408 _proto = None
409 if '+' in url[:url.find('://')]:
410 _proto = url[0:url.find('+')]
411 url = url[url.find('+') + 1:]
443 url, _proto = normalize_url_for_hg(url)
444 url_obj = url_parser(safe_bytes(url))
445
446 test_uri = safe_str(url_obj.authinfo()[0])
447 authinfo = url_obj.authinfo()[1]
448 obfuscated_uri = get_obfuscated_url(url_obj)
449 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
450
412 451 handlers = []
413 url_obj = url_parser(url)
414 test_uri, authinfo = url_obj.authinfo()
415 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
416 url_obj.query = obfuscate_qs(url_obj.query)
417
418 cleaned_uri = str(url_obj)
419 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
420
421 452 if authinfo:
422 453 # create a password manager
423 454 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
424 455 passmgr.add_password(*authinfo)
425 456
426 457 handlers.extend((httpbasicauthhandler(passmgr),
427 458 httpdigestauthhandler(passmgr)))
428 459
429 460 o = urllib.request.build_opener(*handlers)
430 461 o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
431 462 ('Accept', 'application/mercurial-0.1')]
432 463
433 464 q = {"cmd": 'between'}
434 465 q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
435 466 qs = '?%s' % urllib.parse.urlencode(q)
436 467 cu = "{}{}".format(test_uri, qs)
437 468 req = urllib.request.Request(cu, None, {})
438 469
439 470 try:
440 log.debug("Trying to open URL %s", cleaned_uri)
471 log.debug("Trying to open URL %s", obfuscated_uri)
441 472 resp = o.open(req)
442 473 if resp.code != 200:
443 474 raise exceptions.URLError()('Return Code is not 200')
444 475 except Exception as e:
445 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
476 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
446 477 # means it cannot be cloned
447 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
478 raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
448 479
449 480 # now check if it's a proper hg repo, but don't do it for svn
450 481 try:
451 482 if _proto == 'svn':
452 483 pass
453 484 else:
454 485 # check for pure hg repos
455 486 log.debug(
456 "Verifying if URL is a Mercurial repository: %s",
457 cleaned_uri)
487 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
458 488 ui = make_ui_from_config(config)
459 peer_checker = makepeer(ui, url)
460 peer_checker.lookup('tip')
489 peer_checker = makepeer(ui, safe_bytes(url))
490 peer_checker.lookup(b'tip')
461 491 except Exception as e:
462 492 log.warning("URL is not a valid Mercurial repository: %s",
463 cleaned_uri)
493 obfuscated_uri)
464 494 raise exceptions.URLError(e)(
465 495 "url [%s] does not look like an hg repo org_exc: %s"
466 % (cleaned_uri, e))
496 % (obfuscated_uri, e))
467 497
468 log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
498 log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
469 499 return True
470 500
471 501 @reraise_safe_exceptions
472 502 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_git, opt_ignorews, context):
473 503 repo = self._factory.repo(wire)
474 504
475 505 if file_filter:
476 506 # unpack the file-filter
477 507 repo_path, node_path = file_filter
478 508 match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
479 509 else:
480 510 match_filter = file_filter
481 511 opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
482 512
483 513 try:
484 514 diff_iter = patch.diff(
485 515 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
486 return BinaryEnvelope(b"".join(diff_iter))
516 return BytesEnvelope(b"".join(diff_iter))
487 517 except RepoLookupError as e:
488 518 raise exceptions.LookupException(e)()
489 519
490 520 @reraise_safe_exceptions
491 521 def node_history(self, wire, revision, path, limit):
492 522 cache_on, context_uid, repo_id = self._cache_on(wire)
493 523 region = self._region(wire)
494 524
495 525 @region.conditional_cache_on_arguments(condition=cache_on)
496 526 def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
497 527 repo = self._factory.repo(wire)
498 528
499 529 ctx = self._get_ctx(repo, revision)
500 530 fctx = ctx.filectx(safe_bytes(path))
501 531
502 532 def history_iter():
503 533 limit_rev = fctx.rev()
504 534 for obj in reversed(list(fctx.filelog())):
505 535 obj = fctx.filectx(obj)
506 536 ctx = obj.changectx()
507 537 if ctx.hidden() or ctx.obsolete():
508 538 continue
509 539
510 540 if limit_rev >= obj.rev():
511 541 yield obj
512 542
513 543 history = []
514 544 for cnt, obj in enumerate(history_iter()):
515 545 if limit and cnt >= limit:
516 546 break
517 547 history.append(hex(obj.node()))
518 548
519 549 return [x for x in history]
520 550 return _node_history(context_uid, repo_id, revision, path, limit)
521 551
522 552 @reraise_safe_exceptions
523 553 def node_history_untill(self, wire, revision, path, limit):
524 554 cache_on, context_uid, repo_id = self._cache_on(wire)
525 555 region = self._region(wire)
526 556
527 557 @region.conditional_cache_on_arguments(condition=cache_on)
528 558 def _node_history_until(_context_uid, _repo_id):
529 559 repo = self._factory.repo(wire)
530 560 ctx = self._get_ctx(repo, revision)
531 561 fctx = ctx.filectx(safe_bytes(path))
532 562
533 563 file_log = list(fctx.filelog())
534 564 if limit:
535 565 # Limit to the last n items
536 566 file_log = file_log[-limit:]
537 567
538 568 return [hex(fctx.filectx(cs).node()) for cs in reversed(file_log)]
539 569 return _node_history_until(context_uid, repo_id, revision, path, limit)
540 570
541 571 @reraise_safe_exceptions
572 def bulk_file_request(self, wire, commit_id, path, pre_load):
573 cache_on, context_uid, repo_id = self._cache_on(wire)
574 region = self._region(wire)
575
576 @region.conditional_cache_on_arguments(condition=cache_on)
577 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
578 result = {}
579 for attr in pre_load:
580 try:
581 method = self._bulk_file_methods[attr]
582 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
583 result[attr] = method(wire, _commit_id, _path)
584 except KeyError as e:
585 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
586 return BinaryEnvelope(result)
587
588 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
589
590 @reraise_safe_exceptions
542 591 def fctx_annotate(self, wire, revision, path):
543 592 repo = self._factory.repo(wire)
544 593 ctx = self._get_ctx(repo, revision)
545 594 fctx = ctx.filectx(safe_bytes(path))
546 595
547 596 result = []
548 597 for i, annotate_obj in enumerate(fctx.annotate(), 1):
549 598 ln_no = i
550 599 sha = hex(annotate_obj.fctx.node())
551 600 content = annotate_obj.text
552 601 result.append((ln_no, sha, content))
553 602 return result
554 603
555 604 @reraise_safe_exceptions
556 605 def fctx_node_data(self, wire, revision, path):
557 606 repo = self._factory.repo(wire)
558 607 ctx = self._get_ctx(repo, revision)
559 608 fctx = ctx.filectx(safe_bytes(path))
560 return BinaryEnvelope(fctx.data())
609 return BytesEnvelope(fctx.data())
561 610
562 611 @reraise_safe_exceptions
563 612 def fctx_flags(self, wire, commit_id, path):
564 613 cache_on, context_uid, repo_id = self._cache_on(wire)
565 614 region = self._region(wire)
566 615
567 616 @region.conditional_cache_on_arguments(condition=cache_on)
568 617 def _fctx_flags(_repo_id, _commit_id, _path):
569 618 repo = self._factory.repo(wire)
570 619 ctx = self._get_ctx(repo, commit_id)
571 620 fctx = ctx.filectx(safe_bytes(path))
572 621 return fctx.flags()
573 622
574 623 return _fctx_flags(repo_id, commit_id, path)
575 624
576 625 @reraise_safe_exceptions
577 626 def fctx_size(self, wire, commit_id, path):
578 627 cache_on, context_uid, repo_id = self._cache_on(wire)
579 628 region = self._region(wire)
580 629
581 630 @region.conditional_cache_on_arguments(condition=cache_on)
582 631 def _fctx_size(_repo_id, _revision, _path):
583 632 repo = self._factory.repo(wire)
584 633 ctx = self._get_ctx(repo, commit_id)
585 634 fctx = ctx.filectx(safe_bytes(path))
586 635 return fctx.size()
587 636 return _fctx_size(repo_id, commit_id, path)
588 637
589 638 @reraise_safe_exceptions
590 639 def get_all_commit_ids(self, wire, name):
591 640 cache_on, context_uid, repo_id = self._cache_on(wire)
592 641 region = self._region(wire)
593 642
594 643 @region.conditional_cache_on_arguments(condition=cache_on)
595 644 def _get_all_commit_ids(_context_uid, _repo_id, _name):
596 645 repo = self._factory.repo(wire)
597 646 revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
598 647 return revs
599 648 return _get_all_commit_ids(context_uid, repo_id, name)
600 649
601 650 @reraise_safe_exceptions
602 651 def get_config_value(self, wire, section, name, untrusted=False):
603 652 repo = self._factory.repo(wire)
604 653 return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
605 654
606 655 @reraise_safe_exceptions
607 656 def is_large_file(self, wire, commit_id, path):
608 657 cache_on, context_uid, repo_id = self._cache_on(wire)
609 658 region = self._region(wire)
610 659
611 660 @region.conditional_cache_on_arguments(condition=cache_on)
612 661 def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
613 662 return largefiles.lfutil.isstandin(safe_bytes(path))
614 663
615 664 return _is_large_file(context_uid, repo_id, commit_id, path)
616 665
617 666 @reraise_safe_exceptions
618 667 def is_binary(self, wire, revision, path):
619 668 cache_on, context_uid, repo_id = self._cache_on(wire)
620 669 region = self._region(wire)
621 670
622 671 @region.conditional_cache_on_arguments(condition=cache_on)
623 672 def _is_binary(_repo_id, _sha, _path):
624 673 repo = self._factory.repo(wire)
625 674 ctx = self._get_ctx(repo, revision)
626 675 fctx = ctx.filectx(safe_bytes(path))
627 676 return fctx.isbinary()
628 677
629 678 return _is_binary(repo_id, revision, path)
630 679
631 680 @reraise_safe_exceptions
632 681 def md5_hash(self, wire, revision, path):
633 682 cache_on, context_uid, repo_id = self._cache_on(wire)
634 683 region = self._region(wire)
635 684
636 685 @region.conditional_cache_on_arguments(condition=cache_on)
637 686 def _md5_hash(_repo_id, _sha, _path):
638 687 repo = self._factory.repo(wire)
639 688 ctx = self._get_ctx(repo, revision)
640 689 fctx = ctx.filectx(safe_bytes(path))
641 690 return hashlib.md5(fctx.data()).hexdigest()
642 691
643 692 return _md5_hash(repo_id, revision, path)
644 693
645 694 @reraise_safe_exceptions
646 695 def in_largefiles_store(self, wire, sha):
647 696 repo = self._factory.repo(wire)
648 697 return largefiles.lfutil.instore(repo, sha)
649 698
650 699 @reraise_safe_exceptions
651 700 def in_user_cache(self, wire, sha):
652 701 repo = self._factory.repo(wire)
653 702 return largefiles.lfutil.inusercache(repo.ui, sha)
654 703
655 704 @reraise_safe_exceptions
656 705 def store_path(self, wire, sha):
657 706 repo = self._factory.repo(wire)
658 707 return largefiles.lfutil.storepath(repo, sha)
659 708
660 709 @reraise_safe_exceptions
661 710 def link(self, wire, sha, path):
662 711 repo = self._factory.repo(wire)
663 712 largefiles.lfutil.link(
664 713 largefiles.lfutil.usercachepath(repo.ui, sha), path)
665 714
666 715 @reraise_safe_exceptions
667 716 def localrepository(self, wire, create=False):
668 717 self._factory.repo(wire, create=create)
669 718
670 719 @reraise_safe_exceptions
671 720 def lookup(self, wire, revision, both):
672 721 cache_on, context_uid, repo_id = self._cache_on(wire)
673 722 region = self._region(wire)
674 723
675 724 @region.conditional_cache_on_arguments(condition=cache_on)
676 725 def _lookup(_context_uid, _repo_id, _revision, _both):
677
678 726 repo = self._factory.repo(wire)
679 727 rev = _revision
680 728 if isinstance(rev, int):
681 729 # NOTE(marcink):
682 730 # since Mercurial doesn't support negative indexes properly
683 731 # we need to shift accordingly by one to get proper index, e.g
684 732 # repo[-1] => repo[-2]
685 733 # repo[0] => repo[-1]
686 734 if rev <= 0:
687 735 rev = rev + -1
688 736 try:
689 737 ctx = self._get_ctx(repo, rev)
690 738 except (TypeError, RepoLookupError, binascii.Error) as e:
691 739 e._org_exc_tb = traceback.format_exc()
692 740 raise exceptions.LookupException(e)(rev)
693 741 except LookupError as e:
694 742 e._org_exc_tb = traceback.format_exc()
695 743 raise exceptions.LookupException(e)(e.name)
696 744
697 745 if not both:
698 746 return ctx.hex()
699 747
700 748 ctx = repo[ctx.hex()]
701 749 return ctx.hex(), ctx.rev()
702 750
703 751 return _lookup(context_uid, repo_id, revision, both)
704 752
705 753 @reraise_safe_exceptions
706 754 def sync_push(self, wire, url):
707 755 if not self.check_url(url, wire['config']):
708 756 return
709 757
710 758 repo = self._factory.repo(wire)
711 759
712 760 # Disable any prompts for this repo
713 761 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
714 762
715 763 bookmarks = list(dict(repo._bookmarks).keys())
716 764 remote = peer(repo, {}, safe_bytes(url))
717 765 # Disable any prompts for this remote
718 766 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
719 767
720 768 return exchange.push(
721 769 repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
722 770
723 771 @reraise_safe_exceptions
724 772 def revision(self, wire, rev):
725 773 repo = self._factory.repo(wire)
726 774 ctx = self._get_ctx(repo, rev)
727 775 return ctx.rev()
728 776
729 777 @reraise_safe_exceptions
730 778 def rev_range(self, wire, commit_filter):
731 779 cache_on, context_uid, repo_id = self._cache_on(wire)
732 780 region = self._region(wire)
733 781
734 782 @region.conditional_cache_on_arguments(condition=cache_on)
735 783 def _rev_range(_context_uid, _repo_id, _filter):
736 784 repo = self._factory.repo(wire)
737 785 revisions = [
738 786 ascii_str(repo[rev].hex())
739 787 for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
740 788 ]
741 789 return revisions
742 790
743 791 return _rev_range(context_uid, repo_id, sorted(commit_filter))
744 792
745 793 @reraise_safe_exceptions
746 794 def rev_range_hash(self, wire, node):
747 795 repo = self._factory.repo(wire)
748 796
749 797 def get_revs(repo, rev_opt):
750 798 if rev_opt:
751 799 revs = revrange(repo, rev_opt)
752 800 if len(revs) == 0:
753 801 return (nullrev, nullrev)
754 802 return max(revs), min(revs)
755 803 else:
756 804 return len(repo) - 1, 0
757 805
758 806 stop, start = get_revs(repo, [node + ':'])
759 807 revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
760 808 return revs
761 809
762 810 @reraise_safe_exceptions
763 811 def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
764 812 org_path = safe_bytes(wire["path"])
765 813 other_path = safe_bytes(kwargs.pop('other_path', ''))
766 814
767 815 # case when we want to compare two independent repositories
768 816 if other_path and other_path != wire["path"]:
769 817 baseui = self._factory._create_config(wire["config"])
770 818 repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
771 819 else:
772 820 repo = self._factory.repo(wire)
773 821 return list(repo.revs(rev_spec, *args))
774 822
775 823 @reraise_safe_exceptions
776 824 def verify(self, wire,):
777 825 repo = self._factory.repo(wire)
778 826 baseui = self._factory._create_config(wire['config'])
779 827
780 828 baseui, output = patch_ui_message_output(baseui)
781 829
782 830 repo.ui = baseui
783 831 verify.verify(repo)
784 832 return output.getvalue()
785 833
786 834 @reraise_safe_exceptions
787 835 def hg_update_cache(self, wire,):
788 836 repo = self._factory.repo(wire)
789 837 baseui = self._factory._create_config(wire['config'])
790 838 baseui, output = patch_ui_message_output(baseui)
791 839
792 840 repo.ui = baseui
793 841 with repo.wlock(), repo.lock():
794 842 repo.updatecaches(full=True)
795 843
796 844 return output.getvalue()
797 845
798 846 @reraise_safe_exceptions
799 847 def hg_rebuild_fn_cache(self, wire,):
800 848 repo = self._factory.repo(wire)
801 849 baseui = self._factory._create_config(wire['config'])
802 850 baseui, output = patch_ui_message_output(baseui)
803 851
804 852 repo.ui = baseui
805 853
806 854 repair.rebuildfncache(baseui, repo)
807 855
808 856 return output.getvalue()
809 857
810 858 @reraise_safe_exceptions
811 859 def tags(self, wire):
812 860 cache_on, context_uid, repo_id = self._cache_on(wire)
813 861 region = self._region(wire)
814 862
815 863 @region.conditional_cache_on_arguments(condition=cache_on)
816 864 def _tags(_context_uid, _repo_id):
817 865 repo = self._factory.repo(wire)
818 866 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
819 867
820 868 return _tags(context_uid, repo_id)
821 869
822 870 @reraise_safe_exceptions
823 871 def update(self, wire, node='', clean=False):
824 872 repo = self._factory.repo(wire)
825 873 baseui = self._factory._create_config(wire['config'])
826 874 node = safe_bytes(node)
827 875
828 876 commands.update(baseui, repo, node=node, clean=clean)
829 877
830 878 @reraise_safe_exceptions
831 879 def identify(self, wire):
832 880 repo = self._factory.repo(wire)
833 881 baseui = self._factory._create_config(wire['config'])
834 882 output = io.BytesIO()
835 883 baseui.write = output.write
836 884 # This is required to get a full node id
837 885 baseui.debugflag = True
838 886 commands.identify(baseui, repo, id=True)
839 887
840 888 return output.getvalue()
841 889
842 890 @reraise_safe_exceptions
843 891 def heads(self, wire, branch=None):
844 892 repo = self._factory.repo(wire)
845 893 baseui = self._factory._create_config(wire['config'])
846 894 output = io.BytesIO()
847 895
848 896 def write(data, **unused_kwargs):
849 897 output.write(data)
850 898
851 899 baseui.write = write
852 900 if branch:
853 901 args = [safe_bytes(branch)]
854 902 else:
855 903 args = []
856 904 commands.heads(baseui, repo, template=b'{node} ', *args)
857 905
858 906 return output.getvalue()
859 907
860 908 @reraise_safe_exceptions
861 909 def ancestor(self, wire, revision1, revision2):
862 910 repo = self._factory.repo(wire)
863 911 changelog = repo.changelog
864 912 lookup = repo.lookup
865 913 a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
866 914 return hex(a)
867 915
868 916 @reraise_safe_exceptions
869 917 def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
870 918 baseui = self._factory._create_config(wire["config"], hooks=hooks)
871 919 clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
872 920
873 921 @reraise_safe_exceptions
874 922 def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
875 923
876 924 repo = self._factory.repo(wire)
877 925 baseui = self._factory._create_config(wire['config'])
878 926 publishing = baseui.configbool(b'phases', b'publish')
879 927
880 928 def _filectxfn(_repo, ctx, path: bytes):
881 929 """
882 930 Marks given path as added/changed/removed in a given _repo. This is
883 931 for internal mercurial commit function.
884 932 """
885 933
886 934 # check if this path is removed
887 935 if safe_str(path) in removed:
888 936 # returning None is a way to mark node for removal
889 937 return None
890 938
891 939 # check if this path is added
892 940 for node in updated:
893 941 if safe_bytes(node['path']) == path:
894 942 return memfilectx(
895 943 _repo,
896 944 changectx=ctx,
897 945 path=safe_bytes(node['path']),
898 946 data=safe_bytes(node['content']),
899 947 islink=False,
900 948 isexec=bool(node['mode'] & stat.S_IXUSR),
901 949 copysource=False)
902 950 abort_exc = exceptions.AbortException()
903 951 raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
904 952
905 953 if publishing:
906 954 new_commit_phase = b'public'
907 955 else:
908 956 new_commit_phase = b'draft'
909 957 with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
910 958 kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
911 959 commit_ctx = memctx(
912 960 repo=repo,
913 961 parents=parents,
914 962 text=safe_bytes(message),
915 963 files=[safe_bytes(x) for x in files],
916 964 filectxfn=_filectxfn,
917 965 user=safe_bytes(user),
918 966 date=(commit_time, commit_timezone),
919 967 extra=kwargs)
920 968
921 969 n = repo.commitctx(commit_ctx)
922 970 new_id = hex(n)
923 971
924 972 return new_id
925 973
926 974 @reraise_safe_exceptions
927 975 def pull(self, wire, url, commit_ids=None):
928 976 repo = self._factory.repo(wire)
929 977 # Disable any prompts for this repo
930 978 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
931 979
932 980 remote = peer(repo, {}, safe_bytes(url))
933 981 # Disable any prompts for this remote
934 982 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
935 983
936 984 if commit_ids:
937 985 commit_ids = [bin(commit_id) for commit_id in commit_ids]
938 986
939 987 return exchange.pull(
940 988 repo, remote, heads=commit_ids, force=None).cgresult
941 989
942 990 @reraise_safe_exceptions
943 991 def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
944 992 repo = self._factory.repo(wire)
945 993 baseui = self._factory._create_config(wire['config'], hooks=hooks)
946 994
947 995 source = safe_bytes(source)
948 996
949 997 # Mercurial internally has a lot of logic that checks ONLY if
950 998 # option is defined, we just pass those if they are defined then
951 999 opts = {}
1000
952 1001 if bookmark:
953 if isinstance(branch, list):
954 bookmark = [safe_bytes(x) for x in bookmark]
955 else:
956 bookmark = safe_bytes(bookmark)
957 opts['bookmark'] = bookmark
1002 opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
1003 if isinstance(bookmark, list) else safe_bytes(bookmark)
1004
958 1005 if branch:
959 if isinstance(branch, list):
960 branch = [safe_bytes(x) for x in branch]
961 else:
962 branch = safe_bytes(branch)
963 opts['branch'] = branch
1006 opts['branch'] = [safe_bytes(x) for x in branch] \
1007 if isinstance(branch, list) else safe_bytes(branch)
1008
964 1009 if revision:
965 opts['rev'] = safe_bytes(revision)
1010 opts['rev'] = [safe_bytes(x) for x in revision] \
1011 if isinstance(revision, list) else safe_bytes(revision)
966 1012
967 1013 commands.pull(baseui, repo, source, **opts)
968 1014
969 1015 @reraise_safe_exceptions
970 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
1016 def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
971 1017 repo = self._factory.repo(wire)
972 1018 baseui = self._factory._create_config(wire['config'], hooks=hooks)
973 commands.push(baseui, repo, dest=dest_path, rev=revisions,
1019
1020 revisions = [safe_bytes(x) for x in revisions] \
1021 if isinstance(revisions, list) else safe_bytes(revisions)
1022
1023 commands.push(baseui, repo, safe_bytes(dest_path),
1024 rev=revisions,
974 1025 new_branch=push_branches)
975 1026
976 1027 @reraise_safe_exceptions
977 1028 def strip(self, wire, revision, update, backup):
978 1029 repo = self._factory.repo(wire)
979 1030 ctx = self._get_ctx(repo, revision)
980 hgext_strip(
1031 hgext_strip.strip(
981 1032 repo.baseui, repo, ctx.node(), update=update, backup=backup)
982 1033
983 1034 @reraise_safe_exceptions
984 1035 def get_unresolved_files(self, wire):
985 1036 repo = self._factory.repo(wire)
986 1037
987 1038 log.debug('Calculating unresolved files for repo: %s', repo)
988 1039 output = io.BytesIO()
989 1040
990 1041 def write(data, **unused_kwargs):
991 1042 output.write(data)
992 1043
993 1044 baseui = self._factory._create_config(wire['config'])
994 1045 baseui.write = write
995 1046
996 1047 commands.resolve(baseui, repo, list=True)
997 1048 unresolved = output.getvalue().splitlines(0)
998 1049 return unresolved
999 1050
1000 1051 @reraise_safe_exceptions
1001 1052 def merge(self, wire, revision):
1002 1053 repo = self._factory.repo(wire)
1003 1054 baseui = self._factory._create_config(wire['config'])
1004 1055 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1005 1056
1006 1057 # In case of sub repositories are used mercurial prompts the user in
1007 1058 # case of merge conflicts or different sub repository sources. By
1008 1059 # setting the interactive flag to `False` mercurial doesn't prompt the
1009 1060 # used but instead uses a default value.
1010 1061 repo.ui.setconfig(b'ui', b'interactive', False)
1011 commands.merge(baseui, repo, rev=revision)
1062 commands.merge(baseui, repo, rev=safe_bytes(revision))
1012 1063
1013 1064 @reraise_safe_exceptions
1014 1065 def merge_state(self, wire):
1015 1066 repo = self._factory.repo(wire)
1016 1067 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1017 1068
1018 1069 # In case of sub repositories are used mercurial prompts the user in
1019 1070 # case of merge conflicts or different sub repository sources. By
1020 1071 # setting the interactive flag to `False` mercurial doesn't prompt the
1021 1072 # used but instead uses a default value.
1022 1073 repo.ui.setconfig(b'ui', b'interactive', False)
1023 1074 ms = hg_merge.mergestate(repo)
1024 1075 return [x for x in ms.unresolved()]
1025 1076
1026 1077 @reraise_safe_exceptions
1027 1078 def commit(self, wire, message, username, close_branch=False):
1028 1079 repo = self._factory.repo(wire)
1029 1080 baseui = self._factory._create_config(wire['config'])
1030 repo.ui.setconfig(b'ui', b'username', username)
1031 commands.commit(baseui, repo, message=message, close_branch=close_branch)
1081 repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
1082 commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
1032 1083
1033 1084 @reraise_safe_exceptions
1034 def rebase(self, wire, source=None, dest=None, abort=False):
1085 def rebase(self, wire, source='', dest='', abort=False):
1035 1086 repo = self._factory.repo(wire)
1036 1087 baseui = self._factory._create_config(wire['config'])
1037 1088 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1038 1089 # In case of sub repositories are used mercurial prompts the user in
1039 1090 # case of merge conflicts or different sub repository sources. By
1040 1091 # setting the interactive flag to `False` mercurial doesn't prompt the
1041 1092 # used but instead uses a default value.
1042 1093 repo.ui.setconfig(b'ui', b'interactive', False)
1043 rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
1094
1095 rebase.rebase(baseui, repo, base=safe_bytes(source or ''), dest=safe_bytes(dest or ''),
1096 abort=abort, keep=not abort)
1044 1097
1045 1098 @reraise_safe_exceptions
1046 1099 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
1047 1100 repo = self._factory.repo(wire)
1048 1101 ctx = self._get_ctx(repo, revision)
1049 1102 node = ctx.node()
1050 1103
1051 1104 date = (tag_time, tag_timezone)
1052 1105 try:
1053 hg_tag.tag(repo, name, node, message, local, user, date)
1106 hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
1054 1107 except Abort as e:
1055 1108 log.exception("Tag operation aborted")
1056 1109 # Exception can contain unicode which we convert
1057 1110 raise exceptions.AbortException(e)(repr(e))
1058 1111
1059 1112 @reraise_safe_exceptions
1060 1113 def bookmark(self, wire, bookmark, revision=''):
1061 1114 repo = self._factory.repo(wire)
1062 1115 baseui = self._factory._create_config(wire['config'])
1116 revision = revision or ''
1063 1117 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1064 1118
1065 1119 @reraise_safe_exceptions
1066 1120 def install_hooks(self, wire, force=False):
1067 1121 # we don't need any special hooks for Mercurial
1068 1122 pass
1069 1123
1070 1124 @reraise_safe_exceptions
1071 1125 def get_hooks_info(self, wire):
1072 1126 return {
1073 1127 'pre_version': vcsserver.__version__,
1074 1128 'post_version': vcsserver.__version__,
1075 1129 }
1076 1130
1077 1131 @reraise_safe_exceptions
1078 1132 def set_head_ref(self, wire, head_name):
1079 1133 pass
1080 1134
1081 1135 @reraise_safe_exceptions
1082 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1083 archive_dir_name, commit_id):
1136 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1137 archive_dir_name, commit_id, cache_config):
1084 1138
1085 1139 def file_walker(_commit_id, path):
1086 1140 repo = self._factory.repo(wire)
1087 1141 ctx = repo[_commit_id]
1088 1142 is_root = path in ['', '/']
1089 1143 if is_root:
1090 1144 matcher = alwaysmatcher(badfn=None)
1091 1145 else:
1092 1146 matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
1093 1147 file_iter = ctx.manifest().walk(matcher)
1094 1148
1095 1149 for fn in file_iter:
1096 1150 file_path = fn
1097 1151 flags = ctx.flags(fn)
1098 1152 mode = b'x' in flags and 0o755 or 0o644
1099 1153 is_link = b'l' in flags
1100 1154
1101 1155 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1102 1156
1103 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1104 archive_dir_name, commit_id)
1157 return store_archive_in_cache(
1158 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1105 1159
@@ -1,890 +1,935 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import os
20 20 import subprocess
21 21 from urllib.error import URLError
22 22 import urllib.parse
23 23 import logging
24 24 import posixpath as vcspath
25 25 import io
26 26 import urllib.request
27 27 import urllib.parse
28 28 import urllib.error
29 29 import traceback
30 30
31 31
32 32 import svn.client # noqa
33 33 import svn.core # noqa
34 34 import svn.delta # noqa
35 35 import svn.diff # noqa
36 36 import svn.fs # noqa
37 37 import svn.repos # noqa
38 38
39 39 from vcsserver import svn_diff, exceptions, subprocessio, settings
40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo, BinaryEnvelope
40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
41 41 from vcsserver.exceptions import NoContentException
42 42 from vcsserver.str_utils import safe_str, safe_bytes
43 from vcsserver.type_utils import assert_bytes
43 44 from vcsserver.vcs_base import RemoteBase
44 45 from vcsserver.lib.svnremoterepo import svnremoterepo
45 46 log = logging.getLogger(__name__)
46 47
47 48
48 49 svn_compatible_versions_map = {
49 50 'pre-1.4-compatible': '1.3',
50 51 'pre-1.5-compatible': '1.4',
51 52 'pre-1.6-compatible': '1.5',
52 53 'pre-1.8-compatible': '1.7',
53 54 'pre-1.9-compatible': '1.8',
54 55 }
55 56
56 57 current_compatible_version = '1.14'
57 58
58 59
59 60 def reraise_safe_exceptions(func):
60 61 """Decorator for converting svn exceptions to something neutral."""
61 62 def wrapper(*args, **kwargs):
62 63 try:
63 64 return func(*args, **kwargs)
64 65 except Exception as e:
65 66 if not hasattr(e, '_vcs_kind'):
66 67 log.exception("Unhandled exception in svn remote call")
67 68 raise_from_original(exceptions.UnhandledException(e), e)
68 69 raise
69 70 return wrapper
70 71
71 72
72 73 class SubversionFactory(RepoFactory):
73 74 repo_type = 'svn'
74 75
75 76 def _create_repo(self, wire, create, compatible_version):
76 77 path = svn.core.svn_path_canonicalize(wire['path'])
77 78 if create:
78 79 fs_config = {'compatible-version': current_compatible_version}
79 80 if compatible_version:
80 81
81 82 compatible_version_string = \
82 83 svn_compatible_versions_map.get(compatible_version) \
83 84 or compatible_version
84 85 fs_config['compatible-version'] = compatible_version_string
85 86
86 87 log.debug('Create SVN repo with config `%s`', fs_config)
87 88 repo = svn.repos.create(path, "", "", None, fs_config)
88 89 else:
89 90 repo = svn.repos.open(path)
90 91
91 92 log.debug('repository created: got SVN object: %s', repo)
92 93 return repo
93 94
94 95 def repo(self, wire, create=False, compatible_version=None):
95 96 """
96 97 Get a repository instance for the given path.
97 98 """
98 99 return self._create_repo(wire, create, compatible_version)
99 100
100 101
101 102 NODE_TYPE_MAPPING = {
102 103 svn.core.svn_node_file: 'file',
103 104 svn.core.svn_node_dir: 'dir',
104 105 }
105 106
106 107
107 108 class SvnRemote(RemoteBase):
108 109
109 110 def __init__(self, factory, hg_factory=None):
110 111 self._factory = factory
111 112
113 self._bulk_methods = {
114 # NOT supported in SVN ATM...
115 }
116 self._bulk_file_methods = {
117 "size": self.get_file_size,
118 "data": self.get_file_content,
119 "flags": self.get_node_type,
120 "is_binary": self.is_binary,
121 "md5": self.md5_hash
122 }
123
124 @reraise_safe_exceptions
125 def bulk_file_request(self, wire, commit_id, path, pre_load):
126 cache_on, context_uid, repo_id = self._cache_on(wire)
127 region = self._region(wire)
128
129 # since we use unified API, we need to cast from str to in for SVN
130 commit_id = int(commit_id)
131
132 @region.conditional_cache_on_arguments(condition=cache_on)
133 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
134 result = {}
135 for attr in pre_load:
136 try:
137 method = self._bulk_file_methods[attr]
138 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
139 result[attr] = method(wire, _commit_id, _path)
140 except KeyError as e:
141 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
142 return BinaryEnvelope(result)
143
144 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
145
112 146 @reraise_safe_exceptions
113 147 def discover_svn_version(self):
114 148 try:
115 149 import svn.core
116 150 svn_ver = svn.core.SVN_VERSION
117 151 except ImportError:
118 152 svn_ver = None
119 153 return safe_str(svn_ver)
120 154
121 155 @reraise_safe_exceptions
122 156 def is_empty(self, wire):
123
124 157 try:
125 158 return self.lookup(wire, -1) == 0
126 159 except Exception:
127 160 log.exception("failed to read object_store")
128 161 return False
129 162
130 def check_url(self, url):
163 def check_url(self, url, config):
131 164
132 # uuid function get's only valid UUID from proper repo, else
165 # uuid function gets only valid UUID from proper repo, else
133 166 # throws exception
134 167 username, password, src_url = self.get_url_and_credentials(url)
135 168 try:
136 svnremoterepo(username, password, src_url).svn().uuid
169 svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
137 170 except Exception:
138 171 tb = traceback.format_exc()
139 172 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
140 raise URLError(
141 '"{}" is not a valid Subversion source url.'.format(url))
173 raise URLError(f'"{url}" is not a valid Subversion source url.')
142 174 return True
143 175
144 176 def is_path_valid_repository(self, wire, path):
145 177
146 178 # NOTE(marcink): short circuit the check for SVN repo
147 179 # the repos.open might be expensive to check, but we have one cheap
148 180 # pre condition that we can use, to check for 'format' file
149 181
150 182 if not os.path.isfile(os.path.join(path, 'format')):
151 183 return False
152 184
153 185 try:
154 186 svn.repos.open(path)
155 187 except svn.core.SubversionException:
156 188 tb = traceback.format_exc()
157 189 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
158 190 return False
159 191 return True
160 192
161 193 @reraise_safe_exceptions
162 194 def verify(self, wire,):
163 195 repo_path = wire['path']
164 196 if not self.is_path_valid_repository(wire, repo_path):
165 197 raise Exception(
166 198 "Path %s is not a valid Subversion repository." % repo_path)
167 199
168 200 cmd = ['svnadmin', 'info', repo_path]
169 201 stdout, stderr = subprocessio.run_command(cmd)
170 202 return stdout
171 203
204 @reraise_safe_exceptions
172 205 def lookup(self, wire, revision):
173 206 if revision not in [-1, None, 'HEAD']:
174 207 raise NotImplementedError
175 208 repo = self._factory.repo(wire)
176 209 fs_ptr = svn.repos.fs(repo)
177 210 head = svn.fs.youngest_rev(fs_ptr)
178 211 return head
179 212
213 @reraise_safe_exceptions
180 214 def lookup_interval(self, wire, start_ts, end_ts):
181 215 repo = self._factory.repo(wire)
182 216 fsobj = svn.repos.fs(repo)
183 217 start_rev = None
184 218 end_rev = None
185 219 if start_ts:
186 220 start_ts_svn = apr_time_t(start_ts)
187 221 start_rev = svn.repos.dated_revision(repo, start_ts_svn) + 1
188 222 else:
189 223 start_rev = 1
190 224 if end_ts:
191 225 end_ts_svn = apr_time_t(end_ts)
192 226 end_rev = svn.repos.dated_revision(repo, end_ts_svn)
193 227 else:
194 228 end_rev = svn.fs.youngest_rev(fsobj)
195 229 return start_rev, end_rev
196 230
231 @reraise_safe_exceptions
197 232 def revision_properties(self, wire, revision):
198 233
199 234 cache_on, context_uid, repo_id = self._cache_on(wire)
200 235 region = self._region(wire)
236
201 237 @region.conditional_cache_on_arguments(condition=cache_on)
202 238 def _revision_properties(_repo_id, _revision):
203 239 repo = self._factory.repo(wire)
204 240 fs_ptr = svn.repos.fs(repo)
205 241 return svn.fs.revision_proplist(fs_ptr, revision)
206 242 return _revision_properties(repo_id, revision)
207 243
208 244 def revision_changes(self, wire, revision):
209 245
210 246 repo = self._factory.repo(wire)
211 247 fsobj = svn.repos.fs(repo)
212 248 rev_root = svn.fs.revision_root(fsobj, revision)
213 249
214 250 editor = svn.repos.ChangeCollector(fsobj, rev_root)
215 251 editor_ptr, editor_baton = svn.delta.make_editor(editor)
216 252 base_dir = ""
217 253 send_deltas = False
218 254 svn.repos.replay2(
219 255 rev_root, base_dir, svn.core.SVN_INVALID_REVNUM, send_deltas,
220 256 editor_ptr, editor_baton, None)
221 257
222 258 added = []
223 259 changed = []
224 260 removed = []
225 261
226 262 # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
227 263 for path, change in editor.changes.items():
228 264 # TODO: Decide what to do with directory nodes. Subversion can add
229 265 # empty directories.
230 266
231 267 if change.item_kind == svn.core.svn_node_dir:
232 268 continue
233 269 if change.action in [svn.repos.CHANGE_ACTION_ADD]:
234 270 added.append(path)
235 271 elif change.action in [svn.repos.CHANGE_ACTION_MODIFY,
236 272 svn.repos.CHANGE_ACTION_REPLACE]:
237 273 changed.append(path)
238 274 elif change.action in [svn.repos.CHANGE_ACTION_DELETE]:
239 275 removed.append(path)
240 276 else:
241 277 raise NotImplementedError(
242 278 "Action {} not supported on path {}".format(
243 279 change.action, path))
244 280
245 281 changes = {
246 282 'added': added,
247 283 'changed': changed,
248 284 'removed': removed,
249 285 }
250 286 return changes
251 287
252 288 @reraise_safe_exceptions
253 289 def node_history(self, wire, path, revision, limit):
254 290 cache_on, context_uid, repo_id = self._cache_on(wire)
255 291 region = self._region(wire)
292
256 293 @region.conditional_cache_on_arguments(condition=cache_on)
257 294 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
258 295 cross_copies = False
259 296 repo = self._factory.repo(wire)
260 297 fsobj = svn.repos.fs(repo)
261 298 rev_root = svn.fs.revision_root(fsobj, revision)
262 299
263 300 history_revisions = []
264 301 history = svn.fs.node_history(rev_root, path)
265 302 history = svn.fs.history_prev(history, cross_copies)
266 303 while history:
267 304 __, node_revision = svn.fs.history_location(history)
268 305 history_revisions.append(node_revision)
269 306 if limit and len(history_revisions) >= limit:
270 307 break
271 308 history = svn.fs.history_prev(history, cross_copies)
272 309 return history_revisions
273 310 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
274 311
312 @reraise_safe_exceptions
275 313 def node_properties(self, wire, path, revision):
276 314 cache_on, context_uid, repo_id = self._cache_on(wire)
277 315 region = self._region(wire)
278 316
279 317 @region.conditional_cache_on_arguments(condition=cache_on)
280 318 def _node_properties(_repo_id, _path, _revision):
281 319 repo = self._factory.repo(wire)
282 320 fsobj = svn.repos.fs(repo)
283 321 rev_root = svn.fs.revision_root(fsobj, revision)
284 322 return svn.fs.node_proplist(rev_root, path)
285 323 return _node_properties(repo_id, path, revision)
286 324
287 325 def file_annotate(self, wire, path, revision):
288 326 abs_path = 'file://' + urllib.request.pathname2url(
289 327 vcspath.join(wire['path'], path))
290 328 file_uri = svn.core.svn_path_canonicalize(abs_path)
291 329
292 330 start_rev = svn_opt_revision_value_t(0)
293 331 peg_rev = svn_opt_revision_value_t(revision)
294 332 end_rev = peg_rev
295 333
296 334 annotations = []
297 335
298 336 def receiver(line_no, revision, author, date, line, pool):
299 337 annotations.append((line_no, revision, line))
300 338
301 339 # TODO: Cannot use blame5, missing typemap function in the swig code
302 340 try:
303 341 svn.client.blame2(
304 342 file_uri, peg_rev, start_rev, end_rev,
305 343 receiver, svn.client.create_context())
306 344 except svn.core.SubversionException as exc:
307 345 log.exception("Error during blame operation.")
308 346 raise Exception(
309 347 "Blame not supported or file does not exist at path %s. "
310 348 "Error %s." % (path, exc))
311 349
312 350 return annotations
313 351
314 def get_node_type(self, wire, path, revision=None):
352 @reraise_safe_exceptions
353 def get_node_type(self, wire, revision=None, path=''):
315 354
316 355 cache_on, context_uid, repo_id = self._cache_on(wire)
317 356 region = self._region(wire)
318 357
319 358 @region.conditional_cache_on_arguments(condition=cache_on)
320 def _get_node_type(_repo_id, _path, _revision):
359 def _get_node_type(_repo_id, _revision, _path):
321 360 repo = self._factory.repo(wire)
322 361 fs_ptr = svn.repos.fs(repo)
323 362 if _revision is None:
324 363 _revision = svn.fs.youngest_rev(fs_ptr)
325 364 root = svn.fs.revision_root(fs_ptr, _revision)
326 365 node = svn.fs.check_path(root, path)
327 366 return NODE_TYPE_MAPPING.get(node, None)
328 return _get_node_type(repo_id, path, revision)
367 return _get_node_type(repo_id, revision, path)
329 368
330 def get_nodes(self, wire, path, revision=None):
369 @reraise_safe_exceptions
370 def get_nodes(self, wire, revision=None, path=''):
331 371
332 372 cache_on, context_uid, repo_id = self._cache_on(wire)
333 373 region = self._region(wire)
334 374
335 375 @region.conditional_cache_on_arguments(condition=cache_on)
336 376 def _get_nodes(_repo_id, _path, _revision):
337 377 repo = self._factory.repo(wire)
338 378 fsobj = svn.repos.fs(repo)
339 379 if _revision is None:
340 380 _revision = svn.fs.youngest_rev(fsobj)
341 381 root = svn.fs.revision_root(fsobj, _revision)
342 382 entries = svn.fs.dir_entries(root, path)
343 383 result = []
344 384 for entry_path, entry_info in entries.items():
345 385 result.append(
346 386 (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
347 387 return result
348 388 return _get_nodes(repo_id, path, revision)
349 389
350 def get_file_content(self, wire, path, rev=None):
390 @reraise_safe_exceptions
391 def get_file_content(self, wire, rev=None, path=''):
351 392 repo = self._factory.repo(wire)
352 393 fsobj = svn.repos.fs(repo)
394
353 395 if rev is None:
354 rev = svn.fs.youngest_revision(fsobj)
396 rev = svn.fs.youngest_rev(fsobj)
397
355 398 root = svn.fs.revision_root(fsobj, rev)
356 399 content = svn.core.Stream(svn.fs.file_contents(root, path))
357 return BinaryEnvelope(content.read())
400 return BytesEnvelope(content.read())
358 401
359 def get_file_size(self, wire, path, revision=None):
402 @reraise_safe_exceptions
403 def get_file_size(self, wire, revision=None, path=''):
360 404
361 405 cache_on, context_uid, repo_id = self._cache_on(wire)
362 406 region = self._region(wire)
363 407
364 408 @region.conditional_cache_on_arguments(condition=cache_on)
365 def _get_file_size(_repo_id, _path, _revision):
409 def _get_file_size(_repo_id, _revision, _path):
366 410 repo = self._factory.repo(wire)
367 411 fsobj = svn.repos.fs(repo)
368 412 if _revision is None:
369 413 _revision = svn.fs.youngest_revision(fsobj)
370 414 root = svn.fs.revision_root(fsobj, _revision)
371 415 size = svn.fs.file_length(root, path)
372 416 return size
373 return _get_file_size(repo_id, path, revision)
417 return _get_file_size(repo_id, revision, path)
374 418
375 419 def create_repository(self, wire, compatible_version=None):
376 420 log.info('Creating Subversion repository in path "%s"', wire['path'])
377 421 self._factory.repo(wire, create=True,
378 422 compatible_version=compatible_version)
379 423
380 def get_url_and_credentials(self, src_url):
424 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
381 425 obj = urllib.parse.urlparse(src_url)
382 username = obj.username or None
383 password = obj.password or None
426 username = obj.username or ''
427 password = obj.password or ''
384 428 return username, password, src_url
385 429
386 430 def import_remote_repository(self, wire, src_url):
387 431 repo_path = wire['path']
388 432 if not self.is_path_valid_repository(wire, repo_path):
389 433 raise Exception(
390 434 "Path %s is not a valid Subversion repository." % repo_path)
391 435
392 436 username, password, src_url = self.get_url_and_credentials(src_url)
393 437 rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
394 438 '--trust-server-cert-failures=unknown-ca']
395 439 if username and password:
396 440 rdump_cmd += ['--username', username, '--password', password]
397 441 rdump_cmd += [src_url]
398 442
399 443 rdump = subprocess.Popen(
400 444 rdump_cmd,
401 445 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
402 446 load = subprocess.Popen(
403 447 ['svnadmin', 'load', repo_path], stdin=rdump.stdout)
404 448
405 449 # TODO: johbo: This can be a very long operation, might be better
406 450 # to track some kind of status and provide an api to check if the
407 451 # import is done.
408 452 rdump.wait()
409 453 load.wait()
410 454
411 455 log.debug('Return process ended with code: %s', rdump.returncode)
412 456 if rdump.returncode != 0:
413 457 errors = rdump.stderr.read()
414 458 log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
415 459
416 460 reason = 'UNKNOWN'
417 461 if b'svnrdump: E230001:' in errors:
418 462 reason = 'INVALID_CERTIFICATE'
419 463
420 464 if reason == 'UNKNOWN':
421 465 reason = f'UNKNOWN:{safe_str(errors)}'
422 466
423 467 raise Exception(
424 468 'Failed to dump the remote repository from {}. Reason:{}'.format(
425 469 src_url, reason))
426 470 if load.returncode != 0:
427 471 raise Exception(
428 472 'Failed to load the dump of remote repository from %s.' %
429 473 (src_url, ))
430 474
431 475 def commit(self, wire, message, author, timestamp, updated, removed):
432 476
433 updated = [{k: safe_bytes(v) for k, v in x.items() if isinstance(v, str)} for x in updated]
434
435 477 message = safe_bytes(message)
436 478 author = safe_bytes(author)
437 479
438 480 repo = self._factory.repo(wire)
439 481 fsobj = svn.repos.fs(repo)
440 482
441 483 rev = svn.fs.youngest_rev(fsobj)
442 484 txn = svn.repos.fs_begin_txn_for_commit(repo, rev, author, message)
443 485 txn_root = svn.fs.txn_root(txn)
444 486
445 487 for node in updated:
446 488 TxnNodeProcessor(node, txn_root).update()
447 489 for node in removed:
448 490 TxnNodeProcessor(node, txn_root).remove()
449 491
450 492 commit_id = svn.repos.fs_commit_txn(repo, txn)
451 493
452 494 if timestamp:
453 apr_time = int(apr_time_t(timestamp))
495 apr_time = apr_time_t(timestamp)
454 496 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
455 497 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
456 498
457 499 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
458 500 return commit_id
459 501
502 @reraise_safe_exceptions
460 503 def diff(self, wire, rev1, rev2, path1=None, path2=None,
461 504 ignore_whitespace=False, context=3):
462 505
463 506 wire.update(cache=False)
464 507 repo = self._factory.repo(wire)
465 508 diff_creator = SvnDiffer(
466 509 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
467 510 try:
468 return BinaryEnvelope(diff_creator.generate_diff())
511 return BytesEnvelope(diff_creator.generate_diff())
469 512 except svn.core.SubversionException as e:
470 513 log.exception(
471 514 "Error during diff operation operation. "
472 515 "Path might not exist %s, %s", path1, path2)
473 return BinaryEnvelope(b'')
516 return BytesEnvelope(b'')
474 517
475 518 @reraise_safe_exceptions
476 519 def is_large_file(self, wire, path):
477 520 return False
478 521
479 522 @reraise_safe_exceptions
480 523 def is_binary(self, wire, rev, path):
481 524 cache_on, context_uid, repo_id = self._cache_on(wire)
482 525 region = self._region(wire)
483 526
484 527 @region.conditional_cache_on_arguments(condition=cache_on)
485 528 def _is_binary(_repo_id, _rev, _path):
486 raw_bytes = self.get_file_content(wire, path, rev)
487 return raw_bytes and b'\0' in raw_bytes
529 raw_bytes = self.get_file_content(wire, rev, path)
530 if not raw_bytes:
531 return False
532 return b'\0' in raw_bytes
488 533
489 534 return _is_binary(repo_id, rev, path)
490 535
491 536 @reraise_safe_exceptions
492 537 def md5_hash(self, wire, rev, path):
493 538 cache_on, context_uid, repo_id = self._cache_on(wire)
494 539 region = self._region(wire)
495 540
496 541 @region.conditional_cache_on_arguments(condition=cache_on)
497 542 def _md5_hash(_repo_id, _rev, _path):
498 543 return ''
499 544
500 545 return _md5_hash(repo_id, rev, path)
501 546
502 547 @reraise_safe_exceptions
503 548 def run_svn_command(self, wire, cmd, **opts):
504 549 path = wire.get('path', None)
505 550
506 551 if path and os.path.isdir(path):
507 552 opts['cwd'] = path
508 553
509 554 safe_call = opts.pop('_safe', False)
510 555
511 556 svnenv = os.environ.copy()
512 557 svnenv.update(opts.pop('extra_env', {}))
513 558
514 559 _opts = {'env': svnenv, 'shell': False}
515 560
516 561 try:
517 562 _opts.update(opts)
518 563 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
519 564
520 565 return b''.join(proc), b''.join(proc.stderr)
521 566 except OSError as err:
522 567 if safe_call:
523 568 return '', safe_str(err).strip()
524 569 else:
525 570 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
526 571 tb_err = ("Couldn't run svn command (%s).\n"
527 572 "Original error was:%s\n"
528 573 "Call options:%s\n"
529 574 % (cmd, err, _opts))
530 575 log.exception(tb_err)
531 576 raise exceptions.VcsException()(tb_err)
532 577
533 578 @reraise_safe_exceptions
534 579 def install_hooks(self, wire, force=False):
535 580 from vcsserver.hook_utils import install_svn_hooks
536 581 repo_path = wire['path']
537 582 binary_dir = settings.BINARY_DIR
538 583 executable = None
539 584 if binary_dir:
540 585 executable = os.path.join(binary_dir, 'python3')
541 586 return install_svn_hooks(repo_path, force_create=force)
542 587
543 588 @reraise_safe_exceptions
544 589 def get_hooks_info(self, wire):
545 590 from vcsserver.hook_utils import (
546 591 get_svn_pre_hook_version, get_svn_post_hook_version)
547 592 repo_path = wire['path']
548 593 return {
549 594 'pre_version': get_svn_pre_hook_version(repo_path),
550 595 'post_version': get_svn_post_hook_version(repo_path),
551 596 }
552 597
553 598 @reraise_safe_exceptions
554 599 def set_head_ref(self, wire, head_name):
555 600 pass
556 601
557 602 @reraise_safe_exceptions
558 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
559 archive_dir_name, commit_id):
603 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
604 archive_dir_name, commit_id, cache_config):
560 605
561 606 def walk_tree(root, root_dir, _commit_id):
562 607 """
563 608 Special recursive svn repo walker
564 609 """
565 610 root_dir = safe_bytes(root_dir)
566 611
567 612 filemode_default = 0o100644
568 613 filemode_executable = 0o100755
569 614
570 615 file_iter = svn.fs.dir_entries(root, root_dir)
571 616 for f_name in file_iter:
572 617 f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
573 618
574 619 if f_type == 'dir':
575 620 # return only DIR, and then all entries in that dir
576 621 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
577 622 new_root = os.path.join(root_dir, f_name)
578 623 yield from walk_tree(root, new_root, _commit_id)
579 624 else:
580 625
581 626 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
582 627 prop_list = svn.fs.node_proplist(root, f_path)
583 628
584 629 f_mode = filemode_default
585 630 if prop_list.get('svn:executable'):
586 631 f_mode = filemode_executable
587 632
588 633 f_is_link = False
589 634 if prop_list.get('svn:special'):
590 635 f_is_link = True
591 636
592 637 data = {
593 638 'is_link': f_is_link,
594 639 'mode': f_mode,
595 640 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
596 641 }
597 642
598 643 yield f_path, data, f_type
599 644
600 645 def file_walker(_commit_id, path):
601 646 repo = self._factory.repo(wire)
602 647 root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
603 648
604 649 def no_content():
605 650 raise NoContentException()
606 651
607 652 for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
608 653 file_path = f_name
609 654
610 655 if f_type == 'dir':
611 656 mode = f_data['mode']
612 657 yield ArchiveNode(file_path, mode, False, no_content)
613 658 else:
614 659 mode = f_data['mode']
615 660 is_link = f_data['is_link']
616 661 data_stream = f_data['content_stream']
617 662 yield ArchiveNode(file_path, mode, is_link, data_stream)
618 663
619 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
620 archive_dir_name, commit_id)
664 return store_archive_in_cache(
665 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
621 666
622 667
623 668 class SvnDiffer(object):
624 669 """
625 670 Utility to create diffs based on difflib and the Subversion api
626 671 """
627 672
628 673 binary_content = False
629 674
630 675 def __init__(
631 676 self, repo, src_rev, src_path, tgt_rev, tgt_path,
632 677 ignore_whitespace, context):
633 678 self.repo = repo
634 679 self.ignore_whitespace = ignore_whitespace
635 680 self.context = context
636 681
637 682 fsobj = svn.repos.fs(repo)
638 683
639 684 self.tgt_rev = tgt_rev
640 685 self.tgt_path = tgt_path or ''
641 686 self.tgt_root = svn.fs.revision_root(fsobj, tgt_rev)
642 687 self.tgt_kind = svn.fs.check_path(self.tgt_root, self.tgt_path)
643 688
644 689 self.src_rev = src_rev
645 690 self.src_path = src_path or self.tgt_path
646 691 self.src_root = svn.fs.revision_root(fsobj, src_rev)
647 692 self.src_kind = svn.fs.check_path(self.src_root, self.src_path)
648 693
649 694 self._validate()
650 695
651 696 def _validate(self):
652 697 if (self.tgt_kind != svn.core.svn_node_none and
653 698 self.src_kind != svn.core.svn_node_none and
654 699 self.src_kind != self.tgt_kind):
655 700 # TODO: johbo: proper error handling
656 701 raise Exception(
657 702 "Source and target are not compatible for diff generation. "
658 703 "Source type: %s, target type: %s" %
659 704 (self.src_kind, self.tgt_kind))
660 705
661 def generate_diff(self):
662 buf = io.StringIO()
706 def generate_diff(self) -> bytes:
707 buf = io.BytesIO()
663 708 if self.tgt_kind == svn.core.svn_node_dir:
664 709 self._generate_dir_diff(buf)
665 710 else:
666 711 self._generate_file_diff(buf)
667 712 return buf.getvalue()
668 713
669 def _generate_dir_diff(self, buf):
714 def _generate_dir_diff(self, buf: io.BytesIO):
670 715 editor = DiffChangeEditor()
671 716 editor_ptr, editor_baton = svn.delta.make_editor(editor)
672 717 svn.repos.dir_delta2(
673 718 self.src_root,
674 719 self.src_path,
675 720 '', # src_entry
676 721 self.tgt_root,
677 722 self.tgt_path,
678 723 editor_ptr, editor_baton,
679 724 authorization_callback_allow_all,
680 725 False, # text_deltas
681 726 svn.core.svn_depth_infinity, # depth
682 727 False, # entry_props
683 728 False, # ignore_ancestry
684 729 )
685 730
686 731 for path, __, change in sorted(editor.changes):
687 732 self._generate_node_diff(
688 733 buf, change, path, self.tgt_path, path, self.src_path)
689 734
690 def _generate_file_diff(self, buf):
735 def _generate_file_diff(self, buf: io.BytesIO):
691 736 change = None
692 737 if self.src_kind == svn.core.svn_node_none:
693 738 change = "add"
694 739 elif self.tgt_kind == svn.core.svn_node_none:
695 740 change = "delete"
696 741 tgt_base, tgt_path = vcspath.split(self.tgt_path)
697 742 src_base, src_path = vcspath.split(self.src_path)
698 743 self._generate_node_diff(
699 744 buf, change, tgt_path, tgt_base, src_path, src_base)
700 745
701 746 def _generate_node_diff(
702 self, buf, change, tgt_path, tgt_base, src_path, src_base):
703
747 self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
704 748
749 tgt_path_bytes = safe_bytes(tgt_path)
705 750 tgt_path = safe_str(tgt_path)
751
752 src_path_bytes = safe_bytes(src_path)
706 753 src_path = safe_str(src_path)
707 754
708
709 755 if self.src_rev == self.tgt_rev and tgt_base == src_base:
710 756 # makes consistent behaviour with git/hg to return empty diff if
711 757 # we compare same revisions
712 758 return
713 759
714 760 tgt_full_path = vcspath.join(tgt_base, tgt_path)
715 761 src_full_path = vcspath.join(src_base, src_path)
716 762
717 763 self.binary_content = False
718 764 mime_type = self._get_mime_type(tgt_full_path)
719 765
720 if mime_type and not mime_type.startswith('text'):
766 if mime_type and not mime_type.startswith(b'text'):
721 767 self.binary_content = True
722 buf.write("=" * 67 + '\n')
723 buf.write("Cannot display: file marked as a binary type.\n")
724 buf.write("svn:mime-type = %s\n" % mime_type)
725 buf.write("Index: {}\n".format(tgt_path))
726 buf.write("=" * 67 + '\n')
727 buf.write("diff --git a/{tgt_path} b/{tgt_path}\n".format(
728 tgt_path=tgt_path))
768 buf.write(b"=" * 67 + b'\n')
769 buf.write(b"Cannot display: file marked as a binary type.\n")
770 buf.write(b"svn:mime-type = %s\n" % mime_type)
771 buf.write(b"Index: %b\n" % tgt_path_bytes)
772 buf.write(b"=" * 67 + b'\n')
773 buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
729 774
730 775 if change == 'add':
731 776 # TODO: johbo: SVN is missing a zero here compared to git
732 buf.write("new file mode 10644\n")
777 buf.write(b"new file mode 10644\n")
778
779 # TODO(marcink): intro to binary detection of svn patches
780 # if self.binary_content:
781 # buf.write(b'GIT binary patch\n')
782
783 buf.write(b"--- /dev/null\t(revision 0)\n")
784 src_lines = []
785 else:
786 if change == 'delete':
787 buf.write(b"deleted file mode 10644\n")
733 788
734 789 #TODO(marcink): intro to binary detection of svn patches
735 790 # if self.binary_content:
736 791 # buf.write('GIT binary patch\n')
737 792
738 buf.write("--- /dev/null\t(revision 0)\n")
739 src_lines = []
740 else:
741 if change == 'delete':
742 buf.write("deleted file mode 10644\n")
743
744 #TODO(marcink): intro to binary detection of svn patches
745 # if self.binary_content:
746 # buf.write('GIT binary patch\n')
747
748 buf.write("--- a/{}\t(revision {})\n".format(
749 src_path, self.src_rev))
793 buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
750 794 src_lines = self._svn_readlines(self.src_root, src_full_path)
751 795
752 796 if change == 'delete':
753 buf.write("+++ /dev/null\t(revision {})\n".format(self.tgt_rev))
797 buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
754 798 tgt_lines = []
755 799 else:
756 buf.write("+++ b/{}\t(revision {})\n".format(
757 tgt_path, self.tgt_rev))
800 buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
758 801 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
759 802
803 # we made our diff header, time to generate the diff content into our buffer
804
760 805 if not self.binary_content:
761 806 udiff = svn_diff.unified_diff(
762 807 src_lines, tgt_lines, context=self.context,
763 808 ignore_blank_lines=self.ignore_whitespace,
764 809 ignore_case=False,
765 810 ignore_space_changes=self.ignore_whitespace)
766 811
767 812 buf.writelines(udiff)
768 813
769 def _get_mime_type(self, path):
814 def _get_mime_type(self, path) -> bytes:
770 815 try:
771 816 mime_type = svn.fs.node_prop(
772 817 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
773 818 except svn.core.SubversionException:
774 819 mime_type = svn.fs.node_prop(
775 820 self.src_root, path, svn.core.SVN_PROP_MIME_TYPE)
776 821 return mime_type
777 822
778 823 def _svn_readlines(self, fs_root, node_path):
779 824 if self.binary_content:
780 825 return []
781 826 node_kind = svn.fs.check_path(fs_root, node_path)
782 827 if node_kind not in (
783 828 svn.core.svn_node_file, svn.core.svn_node_symlink):
784 829 return []
785 830 content = svn.core.Stream(
786 831 svn.fs.file_contents(fs_root, node_path)).read()
787 832
788 833 return content.splitlines(True)
789 834
790 835
791 836 class DiffChangeEditor(svn.delta.Editor):
792 837 """
793 838 Records changes between two given revisions
794 839 """
795 840
796 841 def __init__(self):
797 842 self.changes = []
798 843
799 844 def delete_entry(self, path, revision, parent_baton, pool=None):
800 845 self.changes.append((path, None, 'delete'))
801 846
802 847 def add_file(
803 848 self, path, parent_baton, copyfrom_path, copyfrom_revision,
804 849 file_pool=None):
805 850 self.changes.append((path, 'file', 'add'))
806 851
807 852 def open_file(self, path, parent_baton, base_revision, file_pool=None):
808 853 self.changes.append((path, 'file', 'change'))
809 854
810 855
811 856 def authorization_callback_allow_all(root, path, pool):
812 857 return True
813 858
814 859
815 860 class TxnNodeProcessor(object):
816 861 """
817 862 Utility to process the change of one node within a transaction root.
818 863
819 864 It encapsulates the knowledge of how to add, update or remove
820 865 a node for a given transaction root. The purpose is to support the method
821 866 `SvnRemote.commit`.
822 867 """
823 868
824 869 def __init__(self, node, txn_root):
825 assert isinstance(node['path'], bytes)
870 assert_bytes(node['path'])
826 871
827 872 self.node = node
828 873 self.txn_root = txn_root
829 874
830 875 def update(self):
831 876 self._ensure_parent_dirs()
832 877 self._add_file_if_node_does_not_exist()
833 878 self._update_file_content()
834 879 self._update_file_properties()
835 880
836 881 def remove(self):
837 882 svn.fs.delete(self.txn_root, self.node['path'])
838 883 # TODO: Clean up directory if empty
839 884
840 885 def _ensure_parent_dirs(self):
841 886 curdir = vcspath.dirname(self.node['path'])
842 887 dirs_to_create = []
843 888 while not self._svn_path_exists(curdir):
844 889 dirs_to_create.append(curdir)
845 890 curdir = vcspath.dirname(curdir)
846 891
847 892 for curdir in reversed(dirs_to_create):
848 893 log.debug('Creating missing directory "%s"', curdir)
849 894 svn.fs.make_dir(self.txn_root, curdir)
850 895
851 896 def _svn_path_exists(self, path):
852 897 path_status = svn.fs.check_path(self.txn_root, path)
853 898 return path_status != svn.core.svn_node_none
854 899
855 900 def _add_file_if_node_does_not_exist(self):
856 901 kind = svn.fs.check_path(self.txn_root, self.node['path'])
857 902 if kind == svn.core.svn_node_none:
858 903 svn.fs.make_file(self.txn_root, self.node['path'])
859 904
860 905 def _update_file_content(self):
861 assert isinstance(self.node['content'], bytes)
906 assert_bytes(self.node['content'])
862 907
863 908 handler, baton = svn.fs.apply_textdelta(
864 909 self.txn_root, self.node['path'], None, None)
865 910 svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
866 911
867 912 def _update_file_properties(self):
868 913 properties = self.node.get('properties', {})
869 914 for key, value in properties.items():
870 915 svn.fs.change_node_prop(
871 self.txn_root, self.node['path'], key, value)
916 self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
872 917
873 918
874 919 def apr_time_t(timestamp):
875 920 """
876 921 Convert a Python timestamp into APR timestamp type apr_time_t
877 922 """
878 return timestamp * 1E6
923 return int(timestamp * 1E6)
879 924
880 925
881 926 def svn_opt_revision_value_t(num):
882 927 """
883 928 Put `num` into a `svn_opt_revision_value_t` structure.
884 929 """
885 930 value = svn.core.svn_opt_revision_value_t()
886 931 value.number = num
887 932 revision = svn.core.svn_opt_revision_t()
888 933 revision.kind = svn.core.svn_opt_revision_number
889 934 revision.value = value
890 935 return revision
@@ -1,209 +1,211 b''
1 1 #
2 2 # Copyright (C) 2004-2009 Edgewall Software
3 3 # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
4 4 # All rights reserved.
5 5 #
6 6 # This software is licensed as described in the file COPYING, which
7 7 # you should have received as part of this distribution. The terms
8 8 # are also available at http://trac.edgewall.org/wiki/TracLicense.
9 9 #
10 10 # This software consists of voluntary contributions made by many
11 11 # individuals. For the exact contribution history, see the revision
12 12 # history and logs, available at http://trac.edgewall.org/log/.
13 13 #
14 14 # Author: Christopher Lenz <cmlenz@gmx.de>
15 15
16 16 import difflib
17 17
18 18
19 def get_filtered_hunks(fromlines, tolines, context=None,
20 ignore_blank_lines=False, ignore_case=False,
21 ignore_space_changes=False):
19 def get_filtered_hunks(from_lines, to_lines, context=None,
20 ignore_blank_lines: bool = False, ignore_case: bool = False,
21 ignore_space_changes: bool = False):
22 22 """Retrieve differences in the form of `difflib.SequenceMatcher`
23 23 opcodes, grouped according to the ``context`` and ``ignore_*``
24 24 parameters.
25 25
26 :param fromlines: list of lines corresponding to the old content
27 :param tolines: list of lines corresponding to the new content
26 :param from_lines: list of lines corresponding to the old content
27 :param to_lines: list of lines corresponding to the new content
28 28 :param ignore_blank_lines: differences about empty lines only are ignored
29 29 :param ignore_case: upper case / lower case only differences are ignored
30 30 :param ignore_space_changes: differences in amount of spaces are ignored
31 31 :param context: the number of "equal" lines kept for representing
32 32 the context of the change
33 33 :return: generator of grouped `difflib.SequenceMatcher` opcodes
34 34
35 35 If none of the ``ignore_*`` parameters is `True`, there's nothing
36 36 to filter out the results will come straight from the
37 37 SequenceMatcher.
38 38 """
39 hunks = get_hunks(fromlines, tolines, context)
39 hunks = get_hunks(from_lines, to_lines, context)
40 40 if ignore_space_changes or ignore_case or ignore_blank_lines:
41 hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
41 hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context,
42 42 ignore_blank_lines, ignore_case,
43 43 ignore_space_changes)
44 44 return hunks
45 45
46 46
47 def get_hunks(fromlines, tolines, context=None):
47 def get_hunks(from_lines, to_lines, context=None):
48 48 """Generator yielding grouped opcodes describing differences .
49 49
50 50 See `get_filtered_hunks` for the parameter descriptions.
51 51 """
52 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
52 matcher = difflib.SequenceMatcher(None, from_lines, to_lines)
53 53 if context is None:
54 54 return (hunk for hunk in [matcher.get_opcodes()])
55 55 else:
56 56 return matcher.get_grouped_opcodes(context)
57 57
58 58
59 def filter_ignorable_lines(hunks, fromlines, tolines, context,
59 def filter_ignorable_lines(hunks, from_lines, to_lines, context,
60 60 ignore_blank_lines, ignore_case,
61 61 ignore_space_changes):
62 62 """Detect line changes that should be ignored and emits them as
63 63 tagged as "equal", possibly joined with the preceding and/or
64 64 following "equal" block.
65 65
66 66 See `get_filtered_hunks` for the parameter descriptions.
67 67 """
68 68 def is_ignorable(tag, fromlines, tolines):
69
69 70 if tag == 'delete' and ignore_blank_lines:
70 if ''.join(fromlines) == '':
71 if b''.join(fromlines) == b'':
71 72 return True
72 73 elif tag == 'insert' and ignore_blank_lines:
73 if ''.join(tolines) == '':
74 if b''.join(tolines) == b'':
74 75 return True
75 76 elif tag == 'replace' and (ignore_case or ignore_space_changes):
76 77 if len(fromlines) != len(tolines):
77 78 return False
78 79
79 80 def f(input_str):
80 81 if ignore_case:
81 82 input_str = input_str.lower()
82 83 if ignore_space_changes:
83 input_str = ' '.join(input_str.split())
84 input_str = b' '.join(input_str.split())
84 85 return input_str
85 86
86 87 for i in range(len(fromlines)):
87 88 if f(fromlines[i]) != f(tolines[i]):
88 89 return False
89 90 return True
90 91
91 92 hunks = list(hunks)
92 93 opcodes = []
93 94 ignored_lines = False
94 95 prev = None
95 96 for hunk in hunks:
96 97 for tag, i1, i2, j1, j2 in hunk:
97 98 if tag == 'equal':
98 99 if prev:
99 100 prev = (tag, prev[1], i2, prev[3], j2)
100 101 else:
101 102 prev = (tag, i1, i2, j1, j2)
102 103 else:
103 if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
104 if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]):
104 105 ignored_lines = True
105 106 if prev:
106 107 prev = 'equal', prev[1], i2, prev[3], j2
107 108 else:
108 109 prev = 'equal', i1, i2, j1, j2
109 110 continue
110 111 if prev:
111 112 opcodes.append(prev)
112 113 opcodes.append((tag, i1, i2, j1, j2))
113 114 prev = None
114 115 if prev:
115 116 opcodes.append(prev)
116 117
117 118 if ignored_lines:
118 119 if context is None:
119 120 yield opcodes
120 121 else:
121 122 # we leave at most n lines with the tag 'equal' before and after
122 123 # every change
123 124 n = context
124 125 nn = n + n
125 126
126 127 group = []
128
127 129 def all_equal():
128 130 all(op[0] == 'equal' for op in group)
129 131 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
130 132 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
131 133 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
132 134 elif tag == 'equal' and i2 - i1 > nn:
133 135 group.append((tag, i1, min(i2, i1 + n), j1,
134 136 min(j2, j1 + n)))
135 137 if not all_equal():
136 138 yield group
137 139 group = []
138 140 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
139 141 group.append((tag, i1, i2, j1, j2))
140 142
141 143 if group and not (len(group) == 1 and group[0][0] == 'equal'):
142 144 if group[-1][0] == 'equal': # Fixup trailing unchanged block
143 145 tag, i1, i2, j1, j2 = group[-1]
144 146 group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
145 147 if not all_equal():
146 148 yield group
147 149 else:
148 150 for hunk in hunks:
149 151 yield hunk
150 152
151 153
152 NO_NEWLINE_AT_END = '\\ No newline at end of file'
154 NO_NEWLINE_AT_END = b'\\ No newline at end of file'
155 LINE_TERM = b'\n'
153 156
154 157
155 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
156 ignore_case=0, ignore_space_changes=0, lineterm='\n'):
158 def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False,
159 ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes:
157 160 """
158 161 Generator producing lines corresponding to a textual diff.
159 162
160 163 See `get_filtered_hunks` for the parameter descriptions.
161 164 """
162 165 # TODO: johbo: Check if this can be nicely integrated into the matching
163 166
164 167 if ignore_space_changes:
165 fromlines = [l.strip() for l in fromlines]
166 tolines = [l.strip() for l in tolines]
168 from_lines = [l.strip() for l in from_lines]
169 to_lines = [l.strip() for l in to_lines]
167 170
168 for group in get_filtered_hunks(fromlines, tolines, context,
171 def _hunk_range(start, length) -> bytes:
172 if length != 1:
173 return b'%d,%d' % (start, length)
174 else:
175 return b'%d' % (start,)
176
177 for group in get_filtered_hunks(from_lines, to_lines, context,
169 178 ignore_blank_lines, ignore_case,
170 179 ignore_space_changes):
171 180 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
172 181 if i1 == 0 and i2 == 0:
173 182 i1, i2 = -1, -1 # support for Add changes
174 183 if j1 == 0 and j2 == 0:
175 184 j1, j2 = -1, -1 # support for Delete changes
176 yield '@@ -{} +{} @@{}'.format(
185 yield b'@@ -%b +%b @@%b' % (
177 186 _hunk_range(i1 + 1, i2 - i1),
178 187 _hunk_range(j1 + 1, j2 - j1),
179 188 lineterm)
180 189 for tag, i1, i2, j1, j2 in group:
181 190 if tag == 'equal':
182 for line in fromlines[i1:i2]:
191 for line in from_lines[i1:i2]:
183 192 if not line.endswith(lineterm):
184 yield ' ' + line + lineterm
185 yield NO_NEWLINE_AT_END + lineterm
186 else:
187 yield ' ' + line
188 else:
189 if tag in ('replace', 'delete'):
190 for line in fromlines[i1:i2]:
191 if not line.endswith(lineterm):
192 yield '-' + line + lineterm
193 yield b' ' + line + lineterm
193 194 yield NO_NEWLINE_AT_END + lineterm
194 195 else:
195 yield '-' + line
196 if tag in ('replace', 'insert'):
197 for line in tolines[j1:j2]:
196 yield b' ' + line
197 else:
198 if tag in ('replace', 'delete'):
199 for line in from_lines[i1:i2]:
198 200 if not line.endswith(lineterm):
199 yield '+' + line + lineterm
201 yield b'-' + line + lineterm
200 202 yield NO_NEWLINE_AT_END + lineterm
201 203 else:
202 yield '+' + line
203
204
205 def _hunk_range(start, length):
206 if length != 1:
207 return '%d,%d' % (start, length)
204 yield b'-' + line
205 if tag in ('replace', 'insert'):
206 for line in to_lines[j1:j2]:
207 if not line.endswith(lineterm):
208 yield b'+' + line + lineterm
209 yield NO_NEWLINE_AT_END + lineterm
208 210 else:
209 return '%d' % (start, )
211 yield b'+' + line
@@ -1,103 +1,103 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import mock
20 20 import pytest
21 21 import sys
22 22
23 23 from vcsserver.str_utils import ascii_bytes
24 24
25 25
26 26 class MockPopen(object):
27 27 def __init__(self, stderr):
28 28 self.stdout = io.BytesIO(b'')
29 29 self.stderr = io.BytesIO(stderr)
30 30 self.returncode = 1
31 31
32 32 def wait(self):
33 33 pass
34 34
35 35
36 36 INVALID_CERTIFICATE_STDERR = '\n'.join([
37 37 'svnrdump: E230001: Unable to connect to a repository at URL url',
38 38 'svnrdump: E230001: Server SSL certificate verification failed: issuer is not trusted',
39 39 ])
40 40
41 41
42 42 @pytest.mark.parametrize('stderr,expected_reason', [
43 43 (INVALID_CERTIFICATE_STDERR, 'INVALID_CERTIFICATE'),
44 44 ('svnrdump: E123456', 'UNKNOWN:svnrdump: E123456'),
45 45 ], ids=['invalid-cert-stderr', 'svnrdump-err-123456'])
46 46 @pytest.mark.xfail(sys.platform == "cygwin",
47 47 reason="SVN not packaged for Cygwin")
48 48 def test_import_remote_repository_certificate_error(stderr, expected_reason):
49 49 from vcsserver.remote import svn
50 50 factory = mock.Mock()
51 51 factory.repo = mock.Mock(return_value=mock.Mock())
52 52
53 53 remote = svn.SvnRemote(factory)
54 54 remote.is_path_valid_repository = lambda wire, path: True
55 55
56 56 with mock.patch('subprocess.Popen',
57 57 return_value=MockPopen(ascii_bytes(stderr))):
58 58 with pytest.raises(Exception) as excinfo:
59 59 remote.import_remote_repository({'path': 'path'}, 'url')
60 60
61 61 expected_error_args = 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason)
62 62
63 63 assert excinfo.value.args[0] == expected_error_args
64 64
65 65
66 66 def test_svn_libraries_can_be_imported():
67 67 import svn.client
68 68 assert svn.client is not None
69 69
70 70
71 71 @pytest.mark.parametrize('example_url, parts', [
72 ('http://server.com', (None, None, 'http://server.com')),
73 ('http://user@server.com', ('user', None, 'http://user@server.com')),
72 ('http://server.com', ('', '', 'http://server.com')),
73 ('http://user@server.com', ('user', '', 'http://user@server.com')),
74 74 ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')),
75 ('<script>', (None, None, '<script>')),
76 ('http://', (None, None, 'http://')),
75 ('<script>', ('', '', '<script>')),
76 ('http://', ('', '', 'http://')),
77 77 ])
78 78 def test_username_password_extraction_from_url(example_url, parts):
79 79 from vcsserver.remote import svn
80 80
81 81 factory = mock.Mock()
82 82 factory.repo = mock.Mock(return_value=mock.Mock())
83 83
84 84 remote = svn.SvnRemote(factory)
85 85 remote.is_path_valid_repository = lambda wire, path: True
86 86
87 87 assert remote.get_url_and_credentials(example_url) == parts
88 88
89 89
90 90 @pytest.mark.parametrize('call_url', [
91 91 b'https://svn.code.sf.net/p/svnbook/source/trunk/',
92 92 b'https://marcink@svn.code.sf.net/p/svnbook/source/trunk/',
93 93 b'https://marcink:qweqwe@svn.code.sf.net/p/svnbook/source/trunk/',
94 94 ])
95 95 def test_check_url(call_url):
96 96 from vcsserver.remote import svn
97 97 factory = mock.Mock()
98 98 factory.repo = mock.Mock(return_value=mock.Mock())
99 99
100 100 remote = svn.SvnRemote(factory)
101 101 remote.is_path_valid_repository = lambda wire, path: True
102 assert remote.check_url(call_url)
102 assert remote.check_url(call_url, {'dummy': 'config'})
103 103
@@ -1,112 +1,123 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17 import base64
18 18 import time
19 19 import logging
20 20
21 import msgpack
22
21 23 import vcsserver
22 24 from vcsserver.str_utils import safe_str, ascii_str
23 25
24 26 log = logging.getLogger(__name__)
25 27
26 28
27 29 def get_access_path(environ):
28 30 path = environ.get('PATH_INFO')
29 31 return path
30 32
31 33
32 34 def get_user_agent(environ):
33 35 return environ.get('HTTP_USER_AGENT')
34 36
35 37
36 def get_call_context(registry) -> dict:
38 def get_call_context(request) -> dict:
37 39 cc = {}
40 registry = request.registry
38 41 if hasattr(registry, 'vcs_call_context'):
39 42 cc.update({
40 43 'X-RC-Method': registry.vcs_call_context.get('method'),
41 44 'X-RC-Repo-Name': registry.vcs_call_context.get('repo_name')
42 45 })
43 46
44 47 return cc
45 48
46 49
50 def get_headers_call_context(environ, strict=True):
51 if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
52 packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
53 return msgpack.unpackb(packed_cc)
54 elif strict:
55 raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
56
57
47 58 class RequestWrapperTween(object):
48 59 def __init__(self, handler, registry):
49 60 self.handler = handler
50 61 self.registry = registry
51 62
52 63 # one-time configuration code goes here
53 64
54 65 def __call__(self, request):
55 66 start = time.time()
56 67 log.debug('Starting request time measurement')
57 68 response = None
58 69
59 70 try:
60 71 response = self.handler(request)
61 72 finally:
62 73 ua = get_user_agent(request.environ)
63 call_context = get_call_context(request.registry)
74 call_context = get_call_context(request)
64 75 vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
65 76 repo_name = call_context.get('X-RC-Repo-Name', '')
66 77
67 78 count = request.request_count()
68 79 _ver_ = vcsserver.__version__
69 80 _path = safe_str(get_access_path(request.environ))
70 81
71 82 ip = '127.0.0.1'
72 83 match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
73 84 resp_code = getattr(response, 'status_code', 'UNDEFINED')
74 85
75 86 _view_path = f"{repo_name}@{_path}/{vcs_method}"
76 87
77 88 total = time.time() - start
78 89
79 90 log.info(
80 91 'Req[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s',
81 92 count, ip, request.environ.get('REQUEST_METHOD'),
82 93 _view_path, total, ua, _ver_,
83 94 extra={"time": total, "ver": _ver_, "code": resp_code,
84 95 "path": _path, "view_name": match_route, "user_agent": ua,
85 96 "vcs_method": vcs_method, "repo_name": repo_name}
86 97 )
87 98
88 99 statsd = request.registry.statsd
89 100 if statsd:
90 101 match_route = request.matched_route.name if request.matched_route else _path
91 102 elapsed_time_ms = round(1000.0 * total) # use ms only
92 103 statsd.timing(
93 104 "vcsserver_req_timing.histogram", elapsed_time_ms,
94 105 tags=[
95 106 "view_name:{}".format(match_route),
96 107 "code:{}".format(resp_code)
97 108 ],
98 109 use_decimals=False
99 110 )
100 111 statsd.incr(
101 112 "vcsserver_req_total", tags=[
102 113 "view_name:{}".format(match_route),
103 114 "code:{}".format(resp_code)
104 115 ])
105 116
106 117 return response
107 118
108 119
109 120 def includeme(config):
110 121 config.add_tween(
111 122 'vcsserver.tweens.request_wrapper.RequestWrapperTween',
112 123 )
General Comments 0
You need to be logged in to leave comments. Login now