##// END OF EJS Templates
app: new optimized remote endpoints for python3 rewrite
super-admin -
r1124:8fcf8b08 python3
parent child Browse files
Show More
@@ -18,7 +18,6 b''
18 18 import collections
19 19 import logging
20 20 import os
21 import posixpath as vcspath
22 21 import re
23 22 import stat
24 23 import traceback
@@ -32,7 +31,7 b' import pygit2'
32 31 from pygit2 import Repository as LibGit2Repo
33 32 from pygit2 import index as LibGit2Index
34 33 from dulwich import index, objects
35 from dulwich.client import HttpGitClient, LocalGitClient
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
36 35 from dulwich.errors import (
37 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
38 37 MissingCommitError, ObjectMissing, HangupException,
@@ -42,7 +41,7 b' from dulwich.server import update_server'
42 41
43 42 from vcsserver import exceptions, settings, subprocessio
44 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 45 from vcsserver.hgcompat import (
47 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 47 from vcsserver.git_lfs.lib import LFSOidStore
@@ -127,6 +126,28 b' class GitFactory(RepoFactory):'
127 126 return self.repo(wire, use_libgit2=True)
128 127
129 128
129 def create_signature_from_string(author_str, **kwargs):
130 """
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132
133 :param author_str: String of the format 'Name <email>'
134 :return: pygit2.Signature object
135 """
136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 if match is None:
138 raise ValueError(f"Invalid format: {author_str}")
139
140 name, email = match.groups()
141 return pygit2.Signature(name, email, **kwargs)
142
143
144 def get_obfuscated_url(url_obj):
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 url_obj.query = obfuscate_qs(url_obj.query)
147 obfuscated_uri = str(url_obj)
148 return obfuscated_uri
149
150
130 151 class GitRemote(RemoteBase):
131 152
132 153 def __init__(self, factory):
@@ -139,6 +160,13 b' class GitRemote(RemoteBase):'
139 160 "parents": self.parents,
140 161 "_commit": self.revision,
141 162 }
163 self._bulk_file_methods = {
164 "size": self.get_node_size,
165 "data": self.get_node_data,
166 "flags": self.get_node_flags,
167 "is_binary": self.get_node_is_binary,
168 "md5": self.md5_hash
169 }
142 170
143 171 def _wire_to_config(self, wire):
144 172 if 'config' in wire:
@@ -213,11 +241,63 b' class GitRemote(RemoteBase):'
213 241 return repo.is_bare
214 242
215 243 @reraise_safe_exceptions
244 def get_node_data(self, wire, commit_id, path):
245 repo_init = self._factory.repo_libgit2(wire)
246 with repo_init as repo:
247 commit = repo[commit_id]
248 blob_obj = commit.tree[path]
249
250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 raise exceptions.LookupException()(
252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253
254 return BytesEnvelope(blob_obj.data)
255
256 @reraise_safe_exceptions
257 def get_node_size(self, wire, commit_id, path):
258 repo_init = self._factory.repo_libgit2(wire)
259 with repo_init as repo:
260 commit = repo[commit_id]
261 blob_obj = commit.tree[path]
262
263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 raise exceptions.LookupException()(
265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266
267 return blob_obj.size
268
269 @reraise_safe_exceptions
270 def get_node_flags(self, wire, commit_id, path):
271 repo_init = self._factory.repo_libgit2(wire)
272 with repo_init as repo:
273 commit = repo[commit_id]
274 blob_obj = commit.tree[path]
275
276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 raise exceptions.LookupException()(
278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279
280 return blob_obj.filemode
281
282 @reraise_safe_exceptions
283 def get_node_is_binary(self, wire, commit_id, path):
284 repo_init = self._factory.repo_libgit2(wire)
285 with repo_init as repo:
286 commit = repo[commit_id]
287 blob_obj = commit.tree[path]
288
289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 raise exceptions.LookupException()(
291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292
293 return blob_obj.is_binary
294
295 @reraise_safe_exceptions
216 296 def blob_as_pretty_string(self, wire, sha):
217 297 repo_init = self._factory.repo_libgit2(wire)
218 298 with repo_init as repo:
219 299 blob_obj = repo[sha]
220 return BinaryEnvelope(blob_obj.data)
300 return BytesEnvelope(blob_obj.data)
221 301
222 302 @reraise_safe_exceptions
223 303 def blob_raw_length(self, wire, sha):
@@ -283,15 +363,24 b' class GitRemote(RemoteBase):'
283 363 return _is_binary(repo_id, tree_id)
284 364
285 365 @reraise_safe_exceptions
286 def md5_hash(self, wire, tree_id):
366 def md5_hash(self, wire, commit_id, path):
287 367 cache_on, context_uid, repo_id = self._cache_on(wire)
288 368 region = self._region(wire)
289 369
290 370 @region.conditional_cache_on_arguments(condition=cache_on)
291 def _md5_hash(_repo_id, _tree_id):
371 def _md5_hash(_repo_id, _commit_id, _path):
372 repo_init = self._factory.repo_libgit2(wire)
373 with repo_init as repo:
374 commit = repo[_commit_id]
375 blob_obj = commit.tree[_path]
376
377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 raise exceptions.LookupException()(
379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380
292 381 return ''
293 382
294 return _md5_hash(repo_id, tree_id)
383 return _md5_hash(repo_id, commit_id, path)
295 384
296 385 @reraise_safe_exceptions
297 386 def in_largefiles_store(self, wire, oid):
@@ -343,10 +432,29 b' class GitRemote(RemoteBase):'
343 432
344 433 return _bulk_request(repo_id, rev, sorted(pre_load))
345 434
346 def _build_opener(self, url):
435 @reraise_safe_exceptions
436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 region = self._region(wire)
439
440 @region.conditional_cache_on_arguments(condition=cache_on)
441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 result = {}
443 for attr in pre_load:
444 try:
445 method = self._bulk_file_methods[attr]
446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 result[attr] = method(wire, _commit_id, _path)
448 except KeyError as e:
449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 return BinaryEnvelope(result)
451
452 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
453
454 def _build_opener(self, url: str):
347 455 handlers = []
348 url_obj = url_parser(url)
349 _, authinfo = url_obj.authinfo()
456 url_obj = url_parser(safe_bytes(url))
457 authinfo = url_obj.authinfo()[1]
350 458
351 459 if authinfo:
352 460 # create a password manager
@@ -358,27 +466,19 b' class GitRemote(RemoteBase):'
358 466
359 467 return urllib.request.build_opener(*handlers)
360 468
361 def _type_id_to_name(self, type_id: int):
362 return {
363 1: 'commit',
364 2: 'tree',
365 3: 'blob',
366 4: 'tag'
367 }[type_id]
368
369 469 @reraise_safe_exceptions
370 470 def check_url(self, url, config):
371 471 url_obj = url_parser(safe_bytes(url))
372 test_uri, _ = url_obj.authinfo()
373 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
374 url_obj.query = obfuscate_qs(url_obj.query)
375 cleaned_uri = str(url_obj)
376 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
472
473 test_uri = safe_str(url_obj.authinfo()[0])
474 obfuscated_uri = get_obfuscated_url(url_obj)
475
476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
377 477
378 478 if not test_uri.endswith('info/refs'):
379 479 test_uri = test_uri.rstrip('/') + '/info/refs'
380 480
381 o = self._build_opener(url)
481 o = self._build_opener(test_uri)
382 482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
383 483
384 484 q = {"service": 'git-upload-pack'}
@@ -387,25 +487,28 b' class GitRemote(RemoteBase):'
387 487 req = urllib.request.Request(cu, None, {})
388 488
389 489 try:
390 log.debug("Trying to open URL %s", cleaned_uri)
490 log.debug("Trying to open URL %s", obfuscated_uri)
391 491 resp = o.open(req)
392 492 if resp.code != 200:
393 493 raise exceptions.URLError()('Return Code is not 200')
394 494 except Exception as e:
395 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
396 496 # means it cannot be cloned
397 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
497 raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
398 498
399 499 # now detect if it's proper git repo
400 gitdata = resp.read()
401 if 'service=git-upload-pack' in gitdata:
500 gitdata: bytes = resp.read()
501
502 if b'service=git-upload-pack' in gitdata:
402 503 pass
403 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
404 505 # old style git can return some other format !
405 506 pass
406 507 else:
407 raise exceptions.URLError()(
408 "url [{}] does not look like an git".format(cleaned_uri))
508 e = None
509 raise exceptions.URLError(e)(
510 "url [%s] does not look like an hg repo org_exc: %s"
511 % (obfuscated_uri, e))
409 512
410 513 return True
411 514
@@ -468,157 +571,112 b' class GitRemote(RemoteBase):'
468 571 repo.object_store.add_object(blob)
469 572 return blob.id
470 573
471 # TODO: this is quite complex, check if that can be simplified
574 @reraise_safe_exceptions
575 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
576 repo_init = self._factory.repo_libgit2(wire)
577 with repo_init as repo:
578
579 if date_args:
580 current_time, offset = date_args
581
582 kw = {
583 'time': current_time,
584 'offset': offset
585 }
586 author = create_signature_from_string(author, **kw)
587 committer = create_signature_from_string(committer, **kw)
588
589 tree = new_tree_id
590 if isinstance(tree, (bytes, str)):
591 # validate this tree is in the repo...
592 tree = repo[safe_str(tree)].id
593
594 parents = []
595 # ensure we COMMIT on top of given branch head
596 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
597 if branch in repo.branches.local:
598 parents += [repo.branches[branch].target]
599 elif [x for x in repo.branches.local]:
600 parents += [repo.head.target]
601 #else:
602 # in case we want to commit on new branch we create it on top of HEAD
603 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
604
605 # # Create a new commit
606 commit_oid = repo.create_commit(
607 f'refs/heads/{branch}', # the name of the reference to update
608 author, # the author of the commit
609 committer, # the committer of the commit
610 message, # the commit message
611 tree, # the tree produced by the index
612 parents # list of parents for the new commit, usually just one,
613 )
614
615 new_commit_id = safe_str(commit_oid)
616
617 return new_commit_id
618
472 619 @reraise_safe_exceptions
473 620 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
474 # Defines the root tree
475 class _Root(object):
476 def __repr__(self):
477 return 'ROOT TREE'
478 ROOT = _Root()
479 621
480 repo = self._factory.repo(wire)
481 object_store = repo.object_store
482
483 # Create tree and populates it with blobs
484 if commit_tree:
485 commit_tree = safe_bytes(commit_tree)
486
487 if commit_tree and repo[commit_tree]:
488 git_commit = repo[safe_bytes(commit_data['parents'][0])]
489 commit_tree = repo[git_commit.tree] # root tree
490 else:
491 commit_tree = objects.Tree()
492
493 for node in updated:
494 # Compute subdirs if needed
495 dirpath, nodename = vcspath.split(node['path'])
496 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
497 parent = commit_tree
498 ancestors = [('', parent)]
622 def mode2pygit(mode):
623 """
624 git only supports two filemode 644 and 755
499 625
500 # Tries to dig for the deepest existing tree
501 while dirnames:
502 curdir = dirnames.pop(0)
503 try:
504 dir_id = parent[curdir][1]
505 except KeyError:
506 # put curdir back into dirnames and stops
507 dirnames.insert(0, curdir)
508 break
509 else:
510 # If found, updates parent
511 parent = repo[dir_id]
512 ancestors.append((curdir, parent))
513 # Now parent is deepest existing tree and we need to create
514 # subtrees for dirnames (in reverse order)
515 # [this only applies for nodes from added]
516 new_trees = []
626 0o100755 -> 33261
627 0o100644 -> 33188
628 """
629 return {
630 0o100644: pygit2.GIT_FILEMODE_BLOB,
631 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
632 0o120000: pygit2.GIT_FILEMODE_LINK
633 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
517 634
518 blob = objects.Blob.from_string(node['content'])
519
520 node_path = safe_bytes(node['node_path'])
635 repo_init = self._factory.repo_libgit2(wire)
636 with repo_init as repo:
637 repo_index = repo.index
521 638
522 if dirnames:
523 # If there are trees which should be created we need to build
524 # them now (in reverse order)
525 reversed_dirnames = list(reversed(dirnames))
526 curtree = objects.Tree()
527 curtree[node_path] = node['mode'], blob.id
528 new_trees.append(curtree)
529 for dirname in reversed_dirnames[:-1]:
530 newtree = objects.Tree()
531 newtree[dirname] = (DIR_STAT, curtree.id)
532 new_trees.append(newtree)
533 curtree = newtree
534 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
535 else:
536 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
639 for pathspec in updated:
640 blob_id = repo.create_blob(pathspec['content'])
641 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
642 repo_index.add(ie)
537 643
538 new_trees.append(parent)
539 # Update ancestors
540 reversed_ancestors = reversed(
541 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
542 for parent, tree, path in reversed_ancestors:
543 parent[path] = (DIR_STAT, tree.id)
544 object_store.add_object(tree)
644 for pathspec in removed:
645 repo_index.remove(pathspec)
545 646
546 object_store.add_object(blob)
547 for tree in new_trees:
548 object_store.add_object(tree)
647 # Write changes to the index
648 repo_index.write()
649
650 # Create a tree from the updated index
651 commit_tree = repo_index.write_tree()
652
653 new_tree_id = commit_tree
549 654
550 for node_path in removed:
551 paths = node_path.split('/')
552 tree = commit_tree # start with top-level
553 trees = [{'tree': tree, 'path': ROOT}]
554 # Traverse deep into the forest...
555 # resolve final tree by iterating the path.
556 # e.g a/b/c.txt will get
557 # - root as tree then
558 # - 'a' as tree,
559 # - 'b' as tree,
560 # - stop at c as blob.
561 for path in paths:
562 try:
563 obj = repo[tree[path][1]]
564 if isinstance(obj, objects.Tree):
565 trees.append({'tree': obj, 'path': path})
566 tree = obj
567 except KeyError:
568 break
569 #PROBLEM:
570 """
571 We're not editing same reference tree object
572 """
573 # Cut down the blob and all rotten trees on the way back...
574 for path, tree_data in reversed(list(zip(paths, trees))):
575 tree = tree_data['tree']
576 tree.__delitem__(path)
577 # This operation edits the tree, we need to mark new commit back
655 author = commit_data['author']
656 committer = commit_data['committer']
657 message = commit_data['message']
658
659 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
578 660
579 if len(tree) > 0:
580 # This tree still has elements - don't remove it or any
581 # of it's parents
582 break
583
584 object_store.add_object(commit_tree)
661 new_commit_id = self.create_commit(wire, author, committer, message, branch,
662 new_tree_id, date_args=date_args)
585 663
586 # Create commit
587 commit = objects.Commit()
588 commit.tree = commit_tree.id
589 bytes_keys = [
590 'author',
591 'committer',
592 'message',
593 'encoding',
594 'parents'
595 ]
664 # libgit2, ensure the branch is there and exists
665 self.create_branch(wire, branch, new_commit_id)
596 666
597 for k, v in commit_data.items():
598 if k in bytes_keys:
599 if k == 'parents':
600 v = [safe_bytes(x) for x in v]
601 else:
602 v = safe_bytes(v)
603 setattr(commit, k, v)
667 # libgit2, set new ref to this created commit
668 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
604 669
605 object_store.add_object(commit)
606
607 self.create_branch(wire, branch, safe_str(commit.id))
608
609 # dulwich set-ref
610 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
611
612 return commit.id
670 return new_commit_id
613 671
614 672 @reraise_safe_exceptions
615 673 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
616 674 if url != 'default' and '://' not in url:
617 675 client = LocalGitClient(url)
618 676 else:
619 url_obj = url_parser(url)
677 url_obj = url_parser(safe_bytes(url))
620 678 o = self._build_opener(url)
621 url, _ = url_obj.authinfo()
679 url = url_obj.authinfo()[0]
622 680 client = HttpGitClient(base_url=url, opener=o)
623 681 repo = self._factory.repo(wire)
624 682
@@ -674,6 +732,9 b' class GitRemote(RemoteBase):'
674 732 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
675 733 index.build_index_from_tree(repo.path, repo.index_path(),
676 734 repo.object_store, repo[HEAD_MARKER].tree)
735
736 if isinstance(remote_refs, FetchPackResult):
737 return remote_refs.refs
677 738 return remote_refs
678 739
679 740 @reraise_safe_exceptions
@@ -759,11 +820,11 b' class GitRemote(RemoteBase):'
759 820 wire_remote = wire.copy()
760 821 wire_remote['path'] = path2
761 822 repo_remote = self._factory.repo(wire_remote)
762 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
823 LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
763 824
764 825 revs = [
765 826 x.commit.id
766 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
827 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
767 828 return revs
768 829
769 830 @reraise_safe_exceptions
@@ -815,11 +876,11 b' class GitRemote(RemoteBase):'
815 876 raise exceptions.LookupException(e)(missing_commit_err)
816 877
817 878 commit_id = commit.hex
818 type_id = commit.type
879 type_str = commit.type_str
819 880
820 881 return {
821 882 'id': commit_id,
822 'type': self._type_id_to_name(type_id),
883 'type': type_str,
823 884 'commit_id': commit_id,
824 885 'idx': 0
825 886 }
@@ -1018,7 +1079,11 b' class GitRemote(RemoteBase):'
1018 1079 def create_branch(self, wire, branch_name, commit_id, force=False):
1019 1080 repo_init = self._factory.repo_libgit2(wire)
1020 1081 with repo_init as repo:
1082 if commit_id:
1021 1083 commit = repo[commit_id]
1084 else:
1085 # if commit is not given just use the HEAD
1086 commit = repo.head()
1022 1087
1023 1088 if force:
1024 1089 repo.branches.local.create(branch_name, commit, force=force)
@@ -1041,12 +1106,27 b' class GitRemote(RemoteBase):'
1041 1106
1042 1107 @reraise_safe_exceptions
1043 1108 def tree_changes(self, wire, source_id, target_id):
1044 # TODO(marcink): remove this seems it's only used by tests
1045 1109 repo = self._factory.repo(wire)
1110 # source can be empty
1111 source_id = safe_bytes(source_id if source_id else b'')
1112 target_id = safe_bytes(target_id)
1113
1046 1114 source = repo[source_id].tree if source_id else None
1047 1115 target = repo[target_id].tree
1048 1116 result = repo.object_store.tree_changes(source, target)
1049 return list(result)
1117
1118 added = set()
1119 modified = set()
1120 deleted = set()
1121 for (old_path, new_path), (_, _), (_, _) in list(result):
1122 if new_path and old_path:
1123 modified.add(new_path)
1124 elif new_path and not old_path:
1125 added.add(new_path)
1126 elif not new_path and old_path:
1127 deleted.add(old_path)
1128
1129 return list(added), list(modified), list(deleted)
1050 1130
1051 1131 @reraise_safe_exceptions
1052 1132 def tree_and_type_for_path(self, wire, commit_id, path):
@@ -1167,10 +1247,11 b' class GitRemote(RemoteBase):'
1167 1247 if file_filter:
1168 1248 for p in diff_obj:
1169 1249 if p.delta.old_file.path == file_filter:
1170 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1250 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1171 1251 # fo matching path == no diff
1172 return BinaryEnvelope(b'')
1173 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1252 return BytesEnvelope(b'')
1253
1254 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1174 1255
1175 1256 @reraise_safe_exceptions
1176 1257 def node_history(self, wire, commit_id, path, limit):
@@ -1346,8 +1427,8 b' class GitRemote(RemoteBase):'
1346 1427 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1347 1428
1348 1429 @reraise_safe_exceptions
1349 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1350 archive_dir_name, commit_id):
1430 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1431 archive_dir_name, commit_id, cache_config):
1351 1432
1352 1433 def file_walker(_commit_id, path):
1353 1434 repo_init = self._factory.repo_libgit2(wire)
@@ -1378,5 +1459,5 b' class GitRemote(RemoteBase):'
1378 1459 continue
1379 1460 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1380 1461
1381 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1382 archive_dir_name, commit_id)
1462 return store_archive_in_cache(
1463 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -32,7 +32,8 b' from mercurial import repair'
32 32
33 33 import vcsserver
34 34 from vcsserver import exceptions
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode, BinaryEnvelope
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, store_archive_in_cache, ArchiveNode, BytesEnvelope, \
36 BinaryEnvelope
36 37 from vcsserver.hgcompat import (
37 38 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 39 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
@@ -42,6 +43,8 b' from vcsserver.hgcompat import ('
42 43 alwaysmatcher, patternmatcher, hgutil, hgext_strip)
43 44 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
44 45 from vcsserver.vcs_base import RemoteBase
46 from vcsserver.config import hooks as hooks_config
47
45 48
46 49 log = logging.getLogger(__name__)
47 50
@@ -137,9 +140,18 b' class MercurialFactory(RepoFactory):'
137 140
138 141 def _create_config(self, config, hooks=True):
139 142 if not hooks:
140 hooks_to_clean = frozenset((
141 'changegroup.repo_size', 'preoutgoing.pre_pull',
142 'outgoing.pull_logger', 'prechangegroup.pre_push'))
143
144 hooks_to_clean = {
145
146 hooks_config.HOOK_REPO_SIZE,
147 hooks_config.HOOK_PRE_PULL,
148 hooks_config.HOOK_PULL,
149
150 hooks_config.HOOK_PRE_PUSH,
151 # TODO: what about PRETXT, this was disabled in pre 5.0.0
152 hooks_config.HOOK_PRETX_PUSH,
153
154 }
143 155 new_config = []
144 156 for section, option, value in config:
145 157 if section == 'hooks' and option in hooks_to_clean:
@@ -178,6 +190,22 b' def patch_ui_message_output(baseui):'
178 190 return baseui, output
179 191
180 192
193 def get_obfuscated_url(url_obj):
194 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
195 url_obj.query = obfuscate_qs(url_obj.query)
196 obfuscated_uri = str(url_obj)
197 return obfuscated_uri
198
199
200 def normalize_url_for_hg(url: str):
201 _proto = None
202
203 if '+' in url[:url.find('://')]:
204 _proto = url[0:url.find('+')]
205 url = url[url.find('+') + 1:]
206 return url, _proto
207
208
181 209 class HgRemote(RemoteBase):
182 210
183 211 def __init__(self, factory):
@@ -196,6 +224,13 b' class HgRemote(RemoteBase):'
196 224 "hidden": self.ctx_hidden,
197 225 "_file_paths": self.ctx_list,
198 226 }
227 self._bulk_file_methods = {
228 "size": self.fctx_size,
229 "data": self.fctx_node_data,
230 "flags": self.fctx_flags,
231 "is_binary": self.is_binary,
232 "md5": self.md5_hash,
233 }
199 234
200 235 def _get_ctx(self, repo, ref):
201 236 return get_ctx(repo, ref)
@@ -405,19 +440,15 b' class HgRemote(RemoteBase):'
405 440
406 441 @reraise_safe_exceptions
407 442 def check_url(self, url, config):
408 _proto = None
409 if '+' in url[:url.find('://')]:
410 _proto = url[0:url.find('+')]
411 url = url[url.find('+') + 1:]
443 url, _proto = normalize_url_for_hg(url)
444 url_obj = url_parser(safe_bytes(url))
445
446 test_uri = safe_str(url_obj.authinfo()[0])
447 authinfo = url_obj.authinfo()[1]
448 obfuscated_uri = get_obfuscated_url(url_obj)
449 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
450
412 451 handlers = []
413 url_obj = url_parser(url)
414 test_uri, authinfo = url_obj.authinfo()
415 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
416 url_obj.query = obfuscate_qs(url_obj.query)
417
418 cleaned_uri = str(url_obj)
419 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
420
421 452 if authinfo:
422 453 # create a password manager
423 454 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
@@ -437,14 +468,14 b' class HgRemote(RemoteBase):'
437 468 req = urllib.request.Request(cu, None, {})
438 469
439 470 try:
440 log.debug("Trying to open URL %s", cleaned_uri)
471 log.debug("Trying to open URL %s", obfuscated_uri)
441 472 resp = o.open(req)
442 473 if resp.code != 200:
443 474 raise exceptions.URLError()('Return Code is not 200')
444 475 except Exception as e:
445 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
476 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
446 477 # means it cannot be cloned
447 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
478 raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
448 479
449 480 # now check if it's a proper hg repo, but don't do it for svn
450 481 try:
@@ -453,19 +484,18 b' class HgRemote(RemoteBase):'
453 484 else:
454 485 # check for pure hg repos
455 486 log.debug(
456 "Verifying if URL is a Mercurial repository: %s",
457 cleaned_uri)
487 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
458 488 ui = make_ui_from_config(config)
459 peer_checker = makepeer(ui, url)
460 peer_checker.lookup('tip')
489 peer_checker = makepeer(ui, safe_bytes(url))
490 peer_checker.lookup(b'tip')
461 491 except Exception as e:
462 492 log.warning("URL is not a valid Mercurial repository: %s",
463 cleaned_uri)
493 obfuscated_uri)
464 494 raise exceptions.URLError(e)(
465 495 "url [%s] does not look like an hg repo org_exc: %s"
466 % (cleaned_uri, e))
496 % (obfuscated_uri, e))
467 497
468 log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
498 log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
469 499 return True
470 500
471 501 @reraise_safe_exceptions
@@ -483,7 +513,7 b' class HgRemote(RemoteBase):'
483 513 try:
484 514 diff_iter = patch.diff(
485 515 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
486 return BinaryEnvelope(b"".join(diff_iter))
516 return BytesEnvelope(b"".join(diff_iter))
487 517 except RepoLookupError as e:
488 518 raise exceptions.LookupException(e)()
489 519
@@ -539,6 +569,25 b' class HgRemote(RemoteBase):'
539 569 return _node_history_until(context_uid, repo_id, revision, path, limit)
540 570
541 571 @reraise_safe_exceptions
572 def bulk_file_request(self, wire, commit_id, path, pre_load):
573 cache_on, context_uid, repo_id = self._cache_on(wire)
574 region = self._region(wire)
575
576 @region.conditional_cache_on_arguments(condition=cache_on)
577 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
578 result = {}
579 for attr in pre_load:
580 try:
581 method = self._bulk_file_methods[attr]
582 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
583 result[attr] = method(wire, _commit_id, _path)
584 except KeyError as e:
585 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
586 return BinaryEnvelope(result)
587
588 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
589
590 @reraise_safe_exceptions
542 591 def fctx_annotate(self, wire, revision, path):
543 592 repo = self._factory.repo(wire)
544 593 ctx = self._get_ctx(repo, revision)
@@ -557,7 +606,7 b' class HgRemote(RemoteBase):'
557 606 repo = self._factory.repo(wire)
558 607 ctx = self._get_ctx(repo, revision)
559 608 fctx = ctx.filectx(safe_bytes(path))
560 return BinaryEnvelope(fctx.data())
609 return BytesEnvelope(fctx.data())
561 610
562 611 @reraise_safe_exceptions
563 612 def fctx_flags(self, wire, commit_id, path):
@@ -674,7 +723,6 b' class HgRemote(RemoteBase):'
674 723
675 724 @region.conditional_cache_on_arguments(condition=cache_on)
676 725 def _lookup(_context_uid, _repo_id, _revision, _both):
677
678 726 repo = self._factory.repo(wire)
679 727 rev = _revision
680 728 if isinstance(rev, int):
@@ -949,35 +997,38 b' class HgRemote(RemoteBase):'
949 997 # Mercurial internally has a lot of logic that checks ONLY if
950 998 # option is defined, we just pass those if they are defined then
951 999 opts = {}
1000
952 1001 if bookmark:
953 if isinstance(branch, list):
954 bookmark = [safe_bytes(x) for x in bookmark]
955 else:
956 bookmark = safe_bytes(bookmark)
957 opts['bookmark'] = bookmark
1002 opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
1003 if isinstance(bookmark, list) else safe_bytes(bookmark)
1004
958 1005 if branch:
959 if isinstance(branch, list):
960 branch = [safe_bytes(x) for x in branch]
961 else:
962 branch = safe_bytes(branch)
963 opts['branch'] = branch
1006 opts['branch'] = [safe_bytes(x) for x in branch] \
1007 if isinstance(branch, list) else safe_bytes(branch)
1008
964 1009 if revision:
965 opts['rev'] = safe_bytes(revision)
1010 opts['rev'] = [safe_bytes(x) for x in revision] \
1011 if isinstance(revision, list) else safe_bytes(revision)
966 1012
967 1013 commands.pull(baseui, repo, source, **opts)
968 1014
969 1015 @reraise_safe_exceptions
970 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
1016 def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
971 1017 repo = self._factory.repo(wire)
972 1018 baseui = self._factory._create_config(wire['config'], hooks=hooks)
973 commands.push(baseui, repo, dest=dest_path, rev=revisions,
1019
1020 revisions = [safe_bytes(x) for x in revisions] \
1021 if isinstance(revisions, list) else safe_bytes(revisions)
1022
1023 commands.push(baseui, repo, safe_bytes(dest_path),
1024 rev=revisions,
974 1025 new_branch=push_branches)
975 1026
976 1027 @reraise_safe_exceptions
977 1028 def strip(self, wire, revision, update, backup):
978 1029 repo = self._factory.repo(wire)
979 1030 ctx = self._get_ctx(repo, revision)
980 hgext_strip(
1031 hgext_strip.strip(
981 1032 repo.baseui, repo, ctx.node(), update=update, backup=backup)
982 1033
983 1034 @reraise_safe_exceptions
@@ -1008,7 +1059,7 b' class HgRemote(RemoteBase):'
1008 1059 # setting the interactive flag to `False` mercurial doesn't prompt the
1009 1060 # used but instead uses a default value.
1010 1061 repo.ui.setconfig(b'ui', b'interactive', False)
1011 commands.merge(baseui, repo, rev=revision)
1062 commands.merge(baseui, repo, rev=safe_bytes(revision))
1012 1063
1013 1064 @reraise_safe_exceptions
1014 1065 def merge_state(self, wire):
@@ -1027,11 +1078,11 b' class HgRemote(RemoteBase):'
1027 1078 def commit(self, wire, message, username, close_branch=False):
1028 1079 repo = self._factory.repo(wire)
1029 1080 baseui = self._factory._create_config(wire['config'])
1030 repo.ui.setconfig(b'ui', b'username', username)
1031 commands.commit(baseui, repo, message=message, close_branch=close_branch)
1081 repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
1082 commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
1032 1083
1033 1084 @reraise_safe_exceptions
1034 def rebase(self, wire, source=None, dest=None, abort=False):
1085 def rebase(self, wire, source='', dest='', abort=False):
1035 1086 repo = self._factory.repo(wire)
1036 1087 baseui = self._factory._create_config(wire['config'])
1037 1088 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
@@ -1040,7 +1091,9 b' class HgRemote(RemoteBase):'
1040 1091 # setting the interactive flag to `False` mercurial doesn't prompt the
1041 1092 # used but instead uses a default value.
1042 1093 repo.ui.setconfig(b'ui', b'interactive', False)
1043 rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
1094
1095 rebase.rebase(baseui, repo, base=safe_bytes(source or ''), dest=safe_bytes(dest or ''),
1096 abort=abort, keep=not abort)
1044 1097
1045 1098 @reraise_safe_exceptions
1046 1099 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
@@ -1050,7 +1103,7 b' class HgRemote(RemoteBase):'
1050 1103
1051 1104 date = (tag_time, tag_timezone)
1052 1105 try:
1053 hg_tag.tag(repo, name, node, message, local, user, date)
1106 hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
1054 1107 except Abort as e:
1055 1108 log.exception("Tag operation aborted")
1056 1109 # Exception can contain unicode which we convert
@@ -1060,6 +1113,7 b' class HgRemote(RemoteBase):'
1060 1113 def bookmark(self, wire, bookmark, revision=''):
1061 1114 repo = self._factory.repo(wire)
1062 1115 baseui = self._factory._create_config(wire['config'])
1116 revision = revision or ''
1063 1117 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1064 1118
1065 1119 @reraise_safe_exceptions
@@ -1079,8 +1133,8 b' class HgRemote(RemoteBase):'
1079 1133 pass
1080 1134
1081 1135 @reraise_safe_exceptions
1082 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1083 archive_dir_name, commit_id):
1136 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1137 archive_dir_name, commit_id, cache_config):
1084 1138
1085 1139 def file_walker(_commit_id, path):
1086 1140 repo = self._factory.repo(wire)
@@ -1100,6 +1154,6 b' class HgRemote(RemoteBase):'
1100 1154
1101 1155 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1102 1156
1103 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1104 archive_dir_name, commit_id)
1157 return store_archive_in_cache(
1158 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1105 1159
@@ -37,9 +37,10 b' import svn.fs # noqa'
37 37 import svn.repos # noqa
38 38
39 39 from vcsserver import svn_diff, exceptions, subprocessio, settings
40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo, BinaryEnvelope
40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
41 41 from vcsserver.exceptions import NoContentException
42 42 from vcsserver.str_utils import safe_str, safe_bytes
43 from vcsserver.type_utils import assert_bytes
43 44 from vcsserver.vcs_base import RemoteBase
44 45 from vcsserver.lib.svnremoterepo import svnremoterepo
45 46 log = logging.getLogger(__name__)
@@ -109,6 +110,39 b' class SvnRemote(RemoteBase):'
109 110 def __init__(self, factory, hg_factory=None):
110 111 self._factory = factory
111 112
113 self._bulk_methods = {
114 # NOT supported in SVN ATM...
115 }
116 self._bulk_file_methods = {
117 "size": self.get_file_size,
118 "data": self.get_file_content,
119 "flags": self.get_node_type,
120 "is_binary": self.is_binary,
121 "md5": self.md5_hash
122 }
123
124 @reraise_safe_exceptions
125 def bulk_file_request(self, wire, commit_id, path, pre_load):
126 cache_on, context_uid, repo_id = self._cache_on(wire)
127 region = self._region(wire)
128
129 # since we use unified API, we need to cast from str to in for SVN
130 commit_id = int(commit_id)
131
132 @region.conditional_cache_on_arguments(condition=cache_on)
133 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
134 result = {}
135 for attr in pre_load:
136 try:
137 method = self._bulk_file_methods[attr]
138 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
139 result[attr] = method(wire, _commit_id, _path)
140 except KeyError as e:
141 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
142 return BinaryEnvelope(result)
143
144 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
145
112 146 @reraise_safe_exceptions
113 147 def discover_svn_version(self):
114 148 try:
@@ -120,25 +154,23 b' class SvnRemote(RemoteBase):'
120 154
121 155 @reraise_safe_exceptions
122 156 def is_empty(self, wire):
123
124 157 try:
125 158 return self.lookup(wire, -1) == 0
126 159 except Exception:
127 160 log.exception("failed to read object_store")
128 161 return False
129 162
130 def check_url(self, url):
163 def check_url(self, url, config):
131 164
132 # uuid function get's only valid UUID from proper repo, else
165 # uuid function gets only valid UUID from proper repo, else
133 166 # throws exception
134 167 username, password, src_url = self.get_url_and_credentials(url)
135 168 try:
136 svnremoterepo(username, password, src_url).svn().uuid
169 svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
137 170 except Exception:
138 171 tb = traceback.format_exc()
139 172 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
140 raise URLError(
141 '"{}" is not a valid Subversion source url.'.format(url))
173 raise URLError(f'"{url}" is not a valid Subversion source url.')
142 174 return True
143 175
144 176 def is_path_valid_repository(self, wire, path):
@@ -169,6 +201,7 b' class SvnRemote(RemoteBase):'
169 201 stdout, stderr = subprocessio.run_command(cmd)
170 202 return stdout
171 203
204 @reraise_safe_exceptions
172 205 def lookup(self, wire, revision):
173 206 if revision not in [-1, None, 'HEAD']:
174 207 raise NotImplementedError
@@ -177,6 +210,7 b' class SvnRemote(RemoteBase):'
177 210 head = svn.fs.youngest_rev(fs_ptr)
178 211 return head
179 212
213 @reraise_safe_exceptions
180 214 def lookup_interval(self, wire, start_ts, end_ts):
181 215 repo = self._factory.repo(wire)
182 216 fsobj = svn.repos.fs(repo)
@@ -194,10 +228,12 b' class SvnRemote(RemoteBase):'
194 228 end_rev = svn.fs.youngest_rev(fsobj)
195 229 return start_rev, end_rev
196 230
231 @reraise_safe_exceptions
197 232 def revision_properties(self, wire, revision):
198 233
199 234 cache_on, context_uid, repo_id = self._cache_on(wire)
200 235 region = self._region(wire)
236
201 237 @region.conditional_cache_on_arguments(condition=cache_on)
202 238 def _revision_properties(_repo_id, _revision):
203 239 repo = self._factory.repo(wire)
@@ -253,6 +289,7 b' class SvnRemote(RemoteBase):'
253 289 def node_history(self, wire, path, revision, limit):
254 290 cache_on, context_uid, repo_id = self._cache_on(wire)
255 291 region = self._region(wire)
292
256 293 @region.conditional_cache_on_arguments(condition=cache_on)
257 294 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
258 295 cross_copies = False
@@ -272,6 +309,7 b' class SvnRemote(RemoteBase):'
272 309 return history_revisions
273 310 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
274 311
312 @reraise_safe_exceptions
275 313 def node_properties(self, wire, path, revision):
276 314 cache_on, context_uid, repo_id = self._cache_on(wire)
277 315 region = self._region(wire)
@@ -311,13 +349,14 b' class SvnRemote(RemoteBase):'
311 349
312 350 return annotations
313 351
314 def get_node_type(self, wire, path, revision=None):
352 @reraise_safe_exceptions
353 def get_node_type(self, wire, revision=None, path=''):
315 354
316 355 cache_on, context_uid, repo_id = self._cache_on(wire)
317 356 region = self._region(wire)
318 357
319 358 @region.conditional_cache_on_arguments(condition=cache_on)
320 def _get_node_type(_repo_id, _path, _revision):
359 def _get_node_type(_repo_id, _revision, _path):
321 360 repo = self._factory.repo(wire)
322 361 fs_ptr = svn.repos.fs(repo)
323 362 if _revision is None:
@@ -325,9 +364,10 b' class SvnRemote(RemoteBase):'
325 364 root = svn.fs.revision_root(fs_ptr, _revision)
326 365 node = svn.fs.check_path(root, path)
327 366 return NODE_TYPE_MAPPING.get(node, None)
328 return _get_node_type(repo_id, path, revision)
367 return _get_node_type(repo_id, revision, path)
329 368
330 def get_nodes(self, wire, path, revision=None):
369 @reraise_safe_exceptions
370 def get_nodes(self, wire, revision=None, path=''):
331 371
332 372 cache_on, context_uid, repo_id = self._cache_on(wire)
333 373 region = self._region(wire)
@@ -347,22 +387,26 b' class SvnRemote(RemoteBase):'
347 387 return result
348 388 return _get_nodes(repo_id, path, revision)
349 389
350 def get_file_content(self, wire, path, rev=None):
390 @reraise_safe_exceptions
391 def get_file_content(self, wire, rev=None, path=''):
351 392 repo = self._factory.repo(wire)
352 393 fsobj = svn.repos.fs(repo)
394
353 395 if rev is None:
354 rev = svn.fs.youngest_revision(fsobj)
396 rev = svn.fs.youngest_rev(fsobj)
397
355 398 root = svn.fs.revision_root(fsobj, rev)
356 399 content = svn.core.Stream(svn.fs.file_contents(root, path))
357 return BinaryEnvelope(content.read())
400 return BytesEnvelope(content.read())
358 401
359 def get_file_size(self, wire, path, revision=None):
402 @reraise_safe_exceptions
403 def get_file_size(self, wire, revision=None, path=''):
360 404
361 405 cache_on, context_uid, repo_id = self._cache_on(wire)
362 406 region = self._region(wire)
363 407
364 408 @region.conditional_cache_on_arguments(condition=cache_on)
365 def _get_file_size(_repo_id, _path, _revision):
409 def _get_file_size(_repo_id, _revision, _path):
366 410 repo = self._factory.repo(wire)
367 411 fsobj = svn.repos.fs(repo)
368 412 if _revision is None:
@@ -370,17 +414,17 b' class SvnRemote(RemoteBase):'
370 414 root = svn.fs.revision_root(fsobj, _revision)
371 415 size = svn.fs.file_length(root, path)
372 416 return size
373 return _get_file_size(repo_id, path, revision)
417 return _get_file_size(repo_id, revision, path)
374 418
375 419 def create_repository(self, wire, compatible_version=None):
376 420 log.info('Creating Subversion repository in path "%s"', wire['path'])
377 421 self._factory.repo(wire, create=True,
378 422 compatible_version=compatible_version)
379 423
380 def get_url_and_credentials(self, src_url):
424 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
381 425 obj = urllib.parse.urlparse(src_url)
382 username = obj.username or None
383 password = obj.password or None
426 username = obj.username or ''
427 password = obj.password or ''
384 428 return username, password, src_url
385 429
386 430 def import_remote_repository(self, wire, src_url):
@@ -430,8 +474,6 b' class SvnRemote(RemoteBase):'
430 474
431 475 def commit(self, wire, message, author, timestamp, updated, removed):
432 476
433 updated = [{k: safe_bytes(v) for k, v in x.items() if isinstance(v, str)} for x in updated]
434
435 477 message = safe_bytes(message)
436 478 author = safe_bytes(author)
437 479
@@ -450,13 +492,14 b' class SvnRemote(RemoteBase):'
450 492 commit_id = svn.repos.fs_commit_txn(repo, txn)
451 493
452 494 if timestamp:
453 apr_time = int(apr_time_t(timestamp))
495 apr_time = apr_time_t(timestamp)
454 496 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
455 497 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
456 498
457 499 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
458 500 return commit_id
459 501
502 @reraise_safe_exceptions
460 503 def diff(self, wire, rev1, rev2, path1=None, path2=None,
461 504 ignore_whitespace=False, context=3):
462 505
@@ -465,12 +508,12 b' class SvnRemote(RemoteBase):'
465 508 diff_creator = SvnDiffer(
466 509 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
467 510 try:
468 return BinaryEnvelope(diff_creator.generate_diff())
511 return BytesEnvelope(diff_creator.generate_diff())
469 512 except svn.core.SubversionException as e:
470 513 log.exception(
471 514 "Error during diff operation operation. "
472 515 "Path might not exist %s, %s", path1, path2)
473 return BinaryEnvelope(b'')
516 return BytesEnvelope(b'')
474 517
475 518 @reraise_safe_exceptions
476 519 def is_large_file(self, wire, path):
@@ -483,8 +526,10 b' class SvnRemote(RemoteBase):'
483 526
484 527 @region.conditional_cache_on_arguments(condition=cache_on)
485 528 def _is_binary(_repo_id, _rev, _path):
486 raw_bytes = self.get_file_content(wire, path, rev)
487 return raw_bytes and b'\0' in raw_bytes
529 raw_bytes = self.get_file_content(wire, rev, path)
530 if not raw_bytes:
531 return False
532 return b'\0' in raw_bytes
488 533
489 534 return _is_binary(repo_id, rev, path)
490 535
@@ -555,8 +600,8 b' class SvnRemote(RemoteBase):'
555 600 pass
556 601
557 602 @reraise_safe_exceptions
558 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
559 archive_dir_name, commit_id):
603 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
604 archive_dir_name, commit_id, cache_config):
560 605
561 606 def walk_tree(root, root_dir, _commit_id):
562 607 """
@@ -616,8 +661,8 b' class SvnRemote(RemoteBase):'
616 661 data_stream = f_data['content_stream']
617 662 yield ArchiveNode(file_path, mode, is_link, data_stream)
618 663
619 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
620 archive_dir_name, commit_id)
664 return store_archive_in_cache(
665 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
621 666
622 667
623 668 class SvnDiffer(object):
@@ -658,15 +703,15 b' class SvnDiffer(object):'
658 703 "Source type: %s, target type: %s" %
659 704 (self.src_kind, self.tgt_kind))
660 705
661 def generate_diff(self):
662 buf = io.StringIO()
706 def generate_diff(self) -> bytes:
707 buf = io.BytesIO()
663 708 if self.tgt_kind == svn.core.svn_node_dir:
664 709 self._generate_dir_diff(buf)
665 710 else:
666 711 self._generate_file_diff(buf)
667 712 return buf.getvalue()
668 713
669 def _generate_dir_diff(self, buf):
714 def _generate_dir_diff(self, buf: io.BytesIO):
670 715 editor = DiffChangeEditor()
671 716 editor_ptr, editor_baton = svn.delta.make_editor(editor)
672 717 svn.repos.dir_delta2(
@@ -687,7 +732,7 b' class SvnDiffer(object):'
687 732 self._generate_node_diff(
688 733 buf, change, path, self.tgt_path, path, self.src_path)
689 734
690 def _generate_file_diff(self, buf):
735 def _generate_file_diff(self, buf: io.BytesIO):
691 736 change = None
692 737 if self.src_kind == svn.core.svn_node_none:
693 738 change = "add"
@@ -699,13 +744,14 b' class SvnDiffer(object):'
699 744 buf, change, tgt_path, tgt_base, src_path, src_base)
700 745
701 746 def _generate_node_diff(
702 self, buf, change, tgt_path, tgt_base, src_path, src_base):
703
747 self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
704 748
749 tgt_path_bytes = safe_bytes(tgt_path)
705 750 tgt_path = safe_str(tgt_path)
751
752 src_path_bytes = safe_bytes(src_path)
706 753 src_path = safe_str(src_path)
707 754
708
709 755 if self.src_rev == self.tgt_rev and tgt_base == src_base:
710 756 # makes consistent behaviour with git/hg to return empty diff if
711 757 # we compare same revisions
@@ -717,46 +763,45 b' class SvnDiffer(object):'
717 763 self.binary_content = False
718 764 mime_type = self._get_mime_type(tgt_full_path)
719 765
720 if mime_type and not mime_type.startswith('text'):
766 if mime_type and not mime_type.startswith(b'text'):
721 767 self.binary_content = True
722 buf.write("=" * 67 + '\n')
723 buf.write("Cannot display: file marked as a binary type.\n")
724 buf.write("svn:mime-type = %s\n" % mime_type)
725 buf.write("Index: {}\n".format(tgt_path))
726 buf.write("=" * 67 + '\n')
727 buf.write("diff --git a/{tgt_path} b/{tgt_path}\n".format(
728 tgt_path=tgt_path))
768 buf.write(b"=" * 67 + b'\n')
769 buf.write(b"Cannot display: file marked as a binary type.\n")
770 buf.write(b"svn:mime-type = %s\n" % mime_type)
771 buf.write(b"Index: %b\n" % tgt_path_bytes)
772 buf.write(b"=" * 67 + b'\n')
773 buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
729 774
730 775 if change == 'add':
731 776 # TODO: johbo: SVN is missing a zero here compared to git
732 buf.write("new file mode 10644\n")
777 buf.write(b"new file mode 10644\n")
778
779 # TODO(marcink): intro to binary detection of svn patches
780 # if self.binary_content:
781 # buf.write(b'GIT binary patch\n')
782
783 buf.write(b"--- /dev/null\t(revision 0)\n")
784 src_lines = []
785 else:
786 if change == 'delete':
787 buf.write(b"deleted file mode 10644\n")
733 788
734 789 #TODO(marcink): intro to binary detection of svn patches
735 790 # if self.binary_content:
736 791 # buf.write('GIT binary patch\n')
737 792
738 buf.write("--- /dev/null\t(revision 0)\n")
739 src_lines = []
740 else:
741 if change == 'delete':
742 buf.write("deleted file mode 10644\n")
743
744 #TODO(marcink): intro to binary detection of svn patches
745 # if self.binary_content:
746 # buf.write('GIT binary patch\n')
747
748 buf.write("--- a/{}\t(revision {})\n".format(
749 src_path, self.src_rev))
793 buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
750 794 src_lines = self._svn_readlines(self.src_root, src_full_path)
751 795
752 796 if change == 'delete':
753 buf.write("+++ /dev/null\t(revision {})\n".format(self.tgt_rev))
797 buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
754 798 tgt_lines = []
755 799 else:
756 buf.write("+++ b/{}\t(revision {})\n".format(
757 tgt_path, self.tgt_rev))
800 buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
758 801 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
759 802
803 # we made our diff header, time to generate the diff content into our buffer
804
760 805 if not self.binary_content:
761 806 udiff = svn_diff.unified_diff(
762 807 src_lines, tgt_lines, context=self.context,
@@ -766,7 +811,7 b' class SvnDiffer(object):'
766 811
767 812 buf.writelines(udiff)
768 813
769 def _get_mime_type(self, path):
814 def _get_mime_type(self, path) -> bytes:
770 815 try:
771 816 mime_type = svn.fs.node_prop(
772 817 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
@@ -822,7 +867,7 b' class TxnNodeProcessor(object):'
822 867 """
823 868
824 869 def __init__(self, node, txn_root):
825 assert isinstance(node['path'], bytes)
870 assert_bytes(node['path'])
826 871
827 872 self.node = node
828 873 self.txn_root = txn_root
@@ -858,7 +903,7 b' class TxnNodeProcessor(object):'
858 903 svn.fs.make_file(self.txn_root, self.node['path'])
859 904
860 905 def _update_file_content(self):
861 assert isinstance(self.node['content'], bytes)
906 assert_bytes(self.node['content'])
862 907
863 908 handler, baton = svn.fs.apply_textdelta(
864 909 self.txn_root, self.node['path'], None, None)
@@ -868,14 +913,14 b' class TxnNodeProcessor(object):'
868 913 properties = self.node.get('properties', {})
869 914 for key, value in properties.items():
870 915 svn.fs.change_node_prop(
871 self.txn_root, self.node['path'], key, value)
916 self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
872 917
873 918
874 919 def apr_time_t(timestamp):
875 920 """
876 921 Convert a Python timestamp into APR timestamp type apr_time_t
877 922 """
878 return timestamp * 1E6
923 return int(timestamp * 1E6)
879 924
880 925
881 926 def svn_opt_revision_value_t(num):
@@ -16,15 +16,15 b''
16 16 import difflib
17 17
18 18
19 def get_filtered_hunks(fromlines, tolines, context=None,
20 ignore_blank_lines=False, ignore_case=False,
21 ignore_space_changes=False):
19 def get_filtered_hunks(from_lines, to_lines, context=None,
20 ignore_blank_lines: bool = False, ignore_case: bool = False,
21 ignore_space_changes: bool = False):
22 22 """Retrieve differences in the form of `difflib.SequenceMatcher`
23 23 opcodes, grouped according to the ``context`` and ``ignore_*``
24 24 parameters.
25 25
26 :param fromlines: list of lines corresponding to the old content
27 :param tolines: list of lines corresponding to the new content
26 :param from_lines: list of lines corresponding to the old content
27 :param to_lines: list of lines corresponding to the new content
28 28 :param ignore_blank_lines: differences about empty lines only are ignored
29 29 :param ignore_case: upper case / lower case only differences are ignored
30 30 :param ignore_space_changes: differences in amount of spaces are ignored
@@ -36,27 +36,27 b' def get_filtered_hunks(fromlines, toline'
36 36 to filter out the results will come straight from the
37 37 SequenceMatcher.
38 38 """
39 hunks = get_hunks(fromlines, tolines, context)
39 hunks = get_hunks(from_lines, to_lines, context)
40 40 if ignore_space_changes or ignore_case or ignore_blank_lines:
41 hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
41 hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context,
42 42 ignore_blank_lines, ignore_case,
43 43 ignore_space_changes)
44 44 return hunks
45 45
46 46
47 def get_hunks(fromlines, tolines, context=None):
47 def get_hunks(from_lines, to_lines, context=None):
48 48 """Generator yielding grouped opcodes describing differences .
49 49
50 50 See `get_filtered_hunks` for the parameter descriptions.
51 51 """
52 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
52 matcher = difflib.SequenceMatcher(None, from_lines, to_lines)
53 53 if context is None:
54 54 return (hunk for hunk in [matcher.get_opcodes()])
55 55 else:
56 56 return matcher.get_grouped_opcodes(context)
57 57
58 58
59 def filter_ignorable_lines(hunks, fromlines, tolines, context,
59 def filter_ignorable_lines(hunks, from_lines, to_lines, context,
60 60 ignore_blank_lines, ignore_case,
61 61 ignore_space_changes):
62 62 """Detect line changes that should be ignored and emits them as
@@ -66,11 +66,12 b' def filter_ignorable_lines(hunks, fromli'
66 66 See `get_filtered_hunks` for the parameter descriptions.
67 67 """
68 68 def is_ignorable(tag, fromlines, tolines):
69
69 70 if tag == 'delete' and ignore_blank_lines:
70 if ''.join(fromlines) == '':
71 if b''.join(fromlines) == b'':
71 72 return True
72 73 elif tag == 'insert' and ignore_blank_lines:
73 if ''.join(tolines) == '':
74 if b''.join(tolines) == b'':
74 75 return True
75 76 elif tag == 'replace' and (ignore_case or ignore_space_changes):
76 77 if len(fromlines) != len(tolines):
@@ -80,7 +81,7 b' def filter_ignorable_lines(hunks, fromli'
80 81 if ignore_case:
81 82 input_str = input_str.lower()
82 83 if ignore_space_changes:
83 input_str = ' '.join(input_str.split())
84 input_str = b' '.join(input_str.split())
84 85 return input_str
85 86
86 87 for i in range(len(fromlines)):
@@ -100,7 +101,7 b' def filter_ignorable_lines(hunks, fromli'
100 101 else:
101 102 prev = (tag, i1, i2, j1, j2)
102 103 else:
103 if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
104 if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]):
104 105 ignored_lines = True
105 106 if prev:
106 107 prev = 'equal', prev[1], i2, prev[3], j2
@@ -124,6 +125,7 b' def filter_ignorable_lines(hunks, fromli'
124 125 nn = n + n
125 126
126 127 group = []
128
127 129 def all_equal():
128 130 all(op[0] == 'equal' for op in group)
129 131 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
@@ -149,11 +151,12 b' def filter_ignorable_lines(hunks, fromli'
149 151 yield hunk
150 152
151 153
152 NO_NEWLINE_AT_END = '\\ No newline at end of file'
154 NO_NEWLINE_AT_END = b'\\ No newline at end of file'
155 LINE_TERM = b'\n'
153 156
154 157
155 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
156 ignore_case=0, ignore_space_changes=0, lineterm='\n'):
158 def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False,
159 ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes:
157 160 """
158 161 Generator producing lines corresponding to a textual diff.
159 162
@@ -162,10 +165,16 b' def unified_diff(fromlines, tolines, con'
162 165 # TODO: johbo: Check if this can be nicely integrated into the matching
163 166
164 167 if ignore_space_changes:
165 fromlines = [l.strip() for l in fromlines]
166 tolines = [l.strip() for l in tolines]
168 from_lines = [l.strip() for l in from_lines]
169 to_lines = [l.strip() for l in to_lines]
167 170
168 for group in get_filtered_hunks(fromlines, tolines, context,
171 def _hunk_range(start, length) -> bytes:
172 if length != 1:
173 return b'%d,%d' % (start, length)
174 else:
175 return b'%d' % (start,)
176
177 for group in get_filtered_hunks(from_lines, to_lines, context,
169 178 ignore_blank_lines, ignore_case,
170 179 ignore_space_changes):
171 180 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
@@ -173,37 +182,30 b' def unified_diff(fromlines, tolines, con'
173 182 i1, i2 = -1, -1 # support for Add changes
174 183 if j1 == 0 and j2 == 0:
175 184 j1, j2 = -1, -1 # support for Delete changes
176 yield '@@ -{} +{} @@{}'.format(
185 yield b'@@ -%b +%b @@%b' % (
177 186 _hunk_range(i1 + 1, i2 - i1),
178 187 _hunk_range(j1 + 1, j2 - j1),
179 188 lineterm)
180 189 for tag, i1, i2, j1, j2 in group:
181 190 if tag == 'equal':
182 for line in fromlines[i1:i2]:
191 for line in from_lines[i1:i2]:
183 192 if not line.endswith(lineterm):
184 yield ' ' + line + lineterm
185 yield NO_NEWLINE_AT_END + lineterm
186 else:
187 yield ' ' + line
188 else:
189 if tag in ('replace', 'delete'):
190 for line in fromlines[i1:i2]:
191 if not line.endswith(lineterm):
192 yield '-' + line + lineterm
193 yield b' ' + line + lineterm
193 194 yield NO_NEWLINE_AT_END + lineterm
194 195 else:
195 yield '-' + line
196 if tag in ('replace', 'insert'):
197 for line in tolines[j1:j2]:
196 yield b' ' + line
197 else:
198 if tag in ('replace', 'delete'):
199 for line in from_lines[i1:i2]:
198 200 if not line.endswith(lineterm):
199 yield '+' + line + lineterm
201 yield b'-' + line + lineterm
200 202 yield NO_NEWLINE_AT_END + lineterm
201 203 else:
202 yield '+' + line
203
204
205 def _hunk_range(start, length):
206 if length != 1:
207 return '%d,%d' % (start, length)
204 yield b'-' + line
205 if tag in ('replace', 'insert'):
206 for line in to_lines[j1:j2]:
207 if not line.endswith(lineterm):
208 yield b'+' + line + lineterm
209 yield NO_NEWLINE_AT_END + lineterm
208 210 else:
209 return '%d' % (start, )
211 yield b'+' + line
@@ -69,11 +69,11 b' def test_svn_libraries_can_be_imported()'
69 69
70 70
71 71 @pytest.mark.parametrize('example_url, parts', [
72 ('http://server.com', (None, None, 'http://server.com')),
73 ('http://user@server.com', ('user', None, 'http://user@server.com')),
72 ('http://server.com', ('', '', 'http://server.com')),
73 ('http://user@server.com', ('user', '', 'http://user@server.com')),
74 74 ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')),
75 ('<script>', (None, None, '<script>')),
76 ('http://', (None, None, 'http://')),
75 ('<script>', ('', '', '<script>')),
76 ('http://', ('', '', 'http://')),
77 77 ])
78 78 def test_username_password_extraction_from_url(example_url, parts):
79 79 from vcsserver.remote import svn
@@ -99,5 +99,5 b' def test_check_url(call_url):'
99 99
100 100 remote = svn.SvnRemote(factory)
101 101 remote.is_path_valid_repository = lambda wire, path: True
102 assert remote.check_url(call_url)
102 assert remote.check_url(call_url, {'dummy': 'config'})
103 103
@@ -14,10 +14,12 b''
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17 import base64
18 18 import time
19 19 import logging
20 20
21 import msgpack
22
21 23 import vcsserver
22 24 from vcsserver.str_utils import safe_str, ascii_str
23 25
@@ -33,8 +35,9 b' def get_user_agent(environ):'
33 35 return environ.get('HTTP_USER_AGENT')
34 36
35 37
36 def get_call_context(registry) -> dict:
38 def get_call_context(request) -> dict:
37 39 cc = {}
40 registry = request.registry
38 41 if hasattr(registry, 'vcs_call_context'):
39 42 cc.update({
40 43 'X-RC-Method': registry.vcs_call_context.get('method'),
@@ -44,6 +47,14 b' def get_call_context(registry) -> dict:'
44 47 return cc
45 48
46 49
50 def get_headers_call_context(environ, strict=True):
51 if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
52 packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
53 return msgpack.unpackb(packed_cc)
54 elif strict:
55 raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
56
57
47 58 class RequestWrapperTween(object):
48 59 def __init__(self, handler, registry):
49 60 self.handler = handler
@@ -60,7 +71,7 b' class RequestWrapperTween(object):'
60 71 response = self.handler(request)
61 72 finally:
62 73 ua = get_user_agent(request.environ)
63 call_context = get_call_context(request.registry)
74 call_context = get_call_context(request)
64 75 vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
65 76 repo_name = call_context.get('X-RC-Repo-Name', '')
66 77
General Comments 0
You need to be logged in to leave comments. Login now