##// END OF EJS Templates
app: new optimized remote endpoints for python3 rewrite
super-admin -
r1124:8fcf8b08 python3
parent child Browse files
Show More
@@ -18,7 +18,6 b''
18 import collections
18 import collections
19 import logging
19 import logging
20 import os
20 import os
21 import posixpath as vcspath
22 import re
21 import re
23 import stat
22 import stat
24 import traceback
23 import traceback
@@ -32,7 +31,7 b' import pygit2'
32 from pygit2 import Repository as LibGit2Repo
31 from pygit2 import Repository as LibGit2Repo
33 from pygit2 import index as LibGit2Index
32 from pygit2 import index as LibGit2Index
34 from dulwich import index, objects
33 from dulwich import index, objects
35 from dulwich.client import HttpGitClient, LocalGitClient
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
36 from dulwich.errors import (
35 from dulwich.errors import (
37 NotGitRepository, ChecksumMismatch, WrongObjectException,
36 NotGitRepository, ChecksumMismatch, WrongObjectException,
38 MissingCommitError, ObjectMissing, HangupException,
37 MissingCommitError, ObjectMissing, HangupException,
@@ -42,7 +41,7 b' from dulwich.server import update_server'
42
41
43 from vcsserver import exceptions, settings, subprocessio
42 from vcsserver import exceptions, settings, subprocessio
44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 from vcsserver.hgcompat import (
45 from vcsserver.hgcompat import (
47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 from vcsserver.git_lfs.lib import LFSOidStore
47 from vcsserver.git_lfs.lib import LFSOidStore
@@ -127,6 +126,28 b' class GitFactory(RepoFactory):'
127 return self.repo(wire, use_libgit2=True)
126 return self.repo(wire, use_libgit2=True)
128
127
129
128
129 def create_signature_from_string(author_str, **kwargs):
130 """
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132
133 :param author_str: String of the format 'Name <email>'
134 :return: pygit2.Signature object
135 """
136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 if match is None:
138 raise ValueError(f"Invalid format: {author_str}")
139
140 name, email = match.groups()
141 return pygit2.Signature(name, email, **kwargs)
142
143
144 def get_obfuscated_url(url_obj):
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 url_obj.query = obfuscate_qs(url_obj.query)
147 obfuscated_uri = str(url_obj)
148 return obfuscated_uri
149
150
130 class GitRemote(RemoteBase):
151 class GitRemote(RemoteBase):
131
152
132 def __init__(self, factory):
153 def __init__(self, factory):
@@ -139,6 +160,13 b' class GitRemote(RemoteBase):'
139 "parents": self.parents,
160 "parents": self.parents,
140 "_commit": self.revision,
161 "_commit": self.revision,
141 }
162 }
163 self._bulk_file_methods = {
164 "size": self.get_node_size,
165 "data": self.get_node_data,
166 "flags": self.get_node_flags,
167 "is_binary": self.get_node_is_binary,
168 "md5": self.md5_hash
169 }
142
170
143 def _wire_to_config(self, wire):
171 def _wire_to_config(self, wire):
144 if 'config' in wire:
172 if 'config' in wire:
@@ -213,11 +241,63 b' class GitRemote(RemoteBase):'
213 return repo.is_bare
241 return repo.is_bare
214
242
215 @reraise_safe_exceptions
243 @reraise_safe_exceptions
244 def get_node_data(self, wire, commit_id, path):
245 repo_init = self._factory.repo_libgit2(wire)
246 with repo_init as repo:
247 commit = repo[commit_id]
248 blob_obj = commit.tree[path]
249
250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 raise exceptions.LookupException()(
252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253
254 return BytesEnvelope(blob_obj.data)
255
256 @reraise_safe_exceptions
257 def get_node_size(self, wire, commit_id, path):
258 repo_init = self._factory.repo_libgit2(wire)
259 with repo_init as repo:
260 commit = repo[commit_id]
261 blob_obj = commit.tree[path]
262
263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 raise exceptions.LookupException()(
265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266
267 return blob_obj.size
268
269 @reraise_safe_exceptions
270 def get_node_flags(self, wire, commit_id, path):
271 repo_init = self._factory.repo_libgit2(wire)
272 with repo_init as repo:
273 commit = repo[commit_id]
274 blob_obj = commit.tree[path]
275
276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 raise exceptions.LookupException()(
278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279
280 return blob_obj.filemode
281
282 @reraise_safe_exceptions
283 def get_node_is_binary(self, wire, commit_id, path):
284 repo_init = self._factory.repo_libgit2(wire)
285 with repo_init as repo:
286 commit = repo[commit_id]
287 blob_obj = commit.tree[path]
288
289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 raise exceptions.LookupException()(
291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292
293 return blob_obj.is_binary
294
295 @reraise_safe_exceptions
216 def blob_as_pretty_string(self, wire, sha):
296 def blob_as_pretty_string(self, wire, sha):
217 repo_init = self._factory.repo_libgit2(wire)
297 repo_init = self._factory.repo_libgit2(wire)
218 with repo_init as repo:
298 with repo_init as repo:
219 blob_obj = repo[sha]
299 blob_obj = repo[sha]
220 return BinaryEnvelope(blob_obj.data)
300 return BytesEnvelope(blob_obj.data)
221
301
222 @reraise_safe_exceptions
302 @reraise_safe_exceptions
223 def blob_raw_length(self, wire, sha):
303 def blob_raw_length(self, wire, sha):
@@ -283,15 +363,24 b' class GitRemote(RemoteBase):'
283 return _is_binary(repo_id, tree_id)
363 return _is_binary(repo_id, tree_id)
284
364
285 @reraise_safe_exceptions
365 @reraise_safe_exceptions
286 def md5_hash(self, wire, tree_id):
366 def md5_hash(self, wire, commit_id, path):
287 cache_on, context_uid, repo_id = self._cache_on(wire)
367 cache_on, context_uid, repo_id = self._cache_on(wire)
288 region = self._region(wire)
368 region = self._region(wire)
289
369
290 @region.conditional_cache_on_arguments(condition=cache_on)
370 @region.conditional_cache_on_arguments(condition=cache_on)
291 def _md5_hash(_repo_id, _tree_id):
371 def _md5_hash(_repo_id, _commit_id, _path):
292 return ''
372 repo_init = self._factory.repo_libgit2(wire)
373 with repo_init as repo:
374 commit = repo[_commit_id]
375 blob_obj = commit.tree[_path]
293
376
294 return _md5_hash(repo_id, tree_id)
377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 raise exceptions.LookupException()(
379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380
381 return ''
382
383 return _md5_hash(repo_id, commit_id, path)
295
384
296 @reraise_safe_exceptions
385 @reraise_safe_exceptions
297 def in_largefiles_store(self, wire, oid):
386 def in_largefiles_store(self, wire, oid):
@@ -343,10 +432,29 b' class GitRemote(RemoteBase):'
343
432
344 return _bulk_request(repo_id, rev, sorted(pre_load))
433 return _bulk_request(repo_id, rev, sorted(pre_load))
345
434
346 def _build_opener(self, url):
435 @reraise_safe_exceptions
436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 region = self._region(wire)
439
440 @region.conditional_cache_on_arguments(condition=cache_on)
441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 result = {}
443 for attr in pre_load:
444 try:
445 method = self._bulk_file_methods[attr]
446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 result[attr] = method(wire, _commit_id, _path)
448 except KeyError as e:
449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 return BinaryEnvelope(result)
451
452 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
453
454 def _build_opener(self, url: str):
347 handlers = []
455 handlers = []
348 url_obj = url_parser(url)
456 url_obj = url_parser(safe_bytes(url))
349 _, authinfo = url_obj.authinfo()
457 authinfo = url_obj.authinfo()[1]
350
458
351 if authinfo:
459 if authinfo:
352 # create a password manager
460 # create a password manager
@@ -358,27 +466,19 b' class GitRemote(RemoteBase):'
358
466
359 return urllib.request.build_opener(*handlers)
467 return urllib.request.build_opener(*handlers)
360
468
361 def _type_id_to_name(self, type_id: int):
362 return {
363 1: 'commit',
364 2: 'tree',
365 3: 'blob',
366 4: 'tag'
367 }[type_id]
368
369 @reraise_safe_exceptions
469 @reraise_safe_exceptions
370 def check_url(self, url, config):
470 def check_url(self, url, config):
371 url_obj = url_parser(safe_bytes(url))
471 url_obj = url_parser(safe_bytes(url))
372 test_uri, _ = url_obj.authinfo()
472
373 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
473 test_uri = safe_str(url_obj.authinfo()[0])
374 url_obj.query = obfuscate_qs(url_obj.query)
474 obfuscated_uri = get_obfuscated_url(url_obj)
375 cleaned_uri = str(url_obj)
475
376 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
377
477
378 if not test_uri.endswith('info/refs'):
478 if not test_uri.endswith('info/refs'):
379 test_uri = test_uri.rstrip('/') + '/info/refs'
479 test_uri = test_uri.rstrip('/') + '/info/refs'
380
480
381 o = self._build_opener(url)
481 o = self._build_opener(test_uri)
382 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
383
483
384 q = {"service": 'git-upload-pack'}
484 q = {"service": 'git-upload-pack'}
@@ -387,25 +487,28 b' class GitRemote(RemoteBase):'
387 req = urllib.request.Request(cu, None, {})
487 req = urllib.request.Request(cu, None, {})
388
488
389 try:
489 try:
390 log.debug("Trying to open URL %s", cleaned_uri)
490 log.debug("Trying to open URL %s", obfuscated_uri)
391 resp = o.open(req)
491 resp = o.open(req)
392 if resp.code != 200:
492 if resp.code != 200:
393 raise exceptions.URLError()('Return Code is not 200')
493 raise exceptions.URLError()('Return Code is not 200')
394 except Exception as e:
494 except Exception as e:
395 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
396 # means it cannot be cloned
496 # means it cannot be cloned
397 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
497 raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
398
498
399 # now detect if it's proper git repo
499 # now detect if it's proper git repo
400 gitdata = resp.read()
500 gitdata: bytes = resp.read()
401 if 'service=git-upload-pack' in gitdata:
501
502 if b'service=git-upload-pack' in gitdata:
402 pass
503 pass
403 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
404 # old style git can return some other format !
505 # old style git can return some other format !
405 pass
506 pass
406 else:
507 else:
407 raise exceptions.URLError()(
508 e = None
408 "url [{}] does not look like an git".format(cleaned_uri))
509 raise exceptions.URLError(e)(
510 "url [%s] does not look like an hg repo org_exc: %s"
511 % (obfuscated_uri, e))
409
512
410 return True
513 return True
411
514
@@ -468,157 +571,112 b' class GitRemote(RemoteBase):'
468 repo.object_store.add_object(blob)
571 repo.object_store.add_object(blob)
469 return blob.id
572 return blob.id
470
573
471 # TODO: this is quite complex, check if that can be simplified
574 @reraise_safe_exceptions
575 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
576 repo_init = self._factory.repo_libgit2(wire)
577 with repo_init as repo:
578
579 if date_args:
580 current_time, offset = date_args
581
582 kw = {
583 'time': current_time,
584 'offset': offset
585 }
586 author = create_signature_from_string(author, **kw)
587 committer = create_signature_from_string(committer, **kw)
588
589 tree = new_tree_id
590 if isinstance(tree, (bytes, str)):
591 # validate this tree is in the repo...
592 tree = repo[safe_str(tree)].id
593
594 parents = []
595 # ensure we COMMIT on top of given branch head
596 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
597 if branch in repo.branches.local:
598 parents += [repo.branches[branch].target]
599 elif [x for x in repo.branches.local]:
600 parents += [repo.head.target]
601 #else:
602 # in case we want to commit on new branch we create it on top of HEAD
603 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
604
605 # # Create a new commit
606 commit_oid = repo.create_commit(
607 f'refs/heads/{branch}', # the name of the reference to update
608 author, # the author of the commit
609 committer, # the committer of the commit
610 message, # the commit message
611 tree, # the tree produced by the index
612 parents # list of parents for the new commit, usually just one,
613 )
614
615 new_commit_id = safe_str(commit_oid)
616
617 return new_commit_id
618
472 @reraise_safe_exceptions
619 @reraise_safe_exceptions
473 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
620 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
474 # Defines the root tree
475 class _Root(object):
476 def __repr__(self):
477 return 'ROOT TREE'
478 ROOT = _Root()
479
621
480 repo = self._factory.repo(wire)
622 def mode2pygit(mode):
481 object_store = repo.object_store
623 """
482
624 git only supports two filemode 644 and 755
483 # Create tree and populates it with blobs
484 if commit_tree:
485 commit_tree = safe_bytes(commit_tree)
486
487 if commit_tree and repo[commit_tree]:
488 git_commit = repo[safe_bytes(commit_data['parents'][0])]
489 commit_tree = repo[git_commit.tree] # root tree
490 else:
491 commit_tree = objects.Tree()
492
493 for node in updated:
494 # Compute subdirs if needed
495 dirpath, nodename = vcspath.split(node['path'])
496 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
497 parent = commit_tree
498 ancestors = [('', parent)]
499
625
500 # Tries to dig for the deepest existing tree
626 0o100755 -> 33261
501 while dirnames:
627 0o100644 -> 33188
502 curdir = dirnames.pop(0)
628 """
503 try:
629 return {
504 dir_id = parent[curdir][1]
630 0o100644: pygit2.GIT_FILEMODE_BLOB,
505 except KeyError:
631 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
506 # put curdir back into dirnames and stops
632 0o120000: pygit2.GIT_FILEMODE_LINK
507 dirnames.insert(0, curdir)
633 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
508 break
509 else:
510 # If found, updates parent
511 parent = repo[dir_id]
512 ancestors.append((curdir, parent))
513 # Now parent is deepest existing tree and we need to create
514 # subtrees for dirnames (in reverse order)
515 # [this only applies for nodes from added]
516 new_trees = []
517
634
518 blob = objects.Blob.from_string(node['content'])
635 repo_init = self._factory.repo_libgit2(wire)
519
636 with repo_init as repo:
520 node_path = safe_bytes(node['node_path'])
637 repo_index = repo.index
521
638
522 if dirnames:
639 for pathspec in updated:
523 # If there are trees which should be created we need to build
640 blob_id = repo.create_blob(pathspec['content'])
524 # them now (in reverse order)
641 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
525 reversed_dirnames = list(reversed(dirnames))
642 repo_index.add(ie)
526 curtree = objects.Tree()
527 curtree[node_path] = node['mode'], blob.id
528 new_trees.append(curtree)
529 for dirname in reversed_dirnames[:-1]:
530 newtree = objects.Tree()
531 newtree[dirname] = (DIR_STAT, curtree.id)
532 new_trees.append(newtree)
533 curtree = newtree
534 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
535 else:
536 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
537
643
538 new_trees.append(parent)
644 for pathspec in removed:
539 # Update ancestors
645 repo_index.remove(pathspec)
540 reversed_ancestors = reversed(
541 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
542 for parent, tree, path in reversed_ancestors:
543 parent[path] = (DIR_STAT, tree.id)
544 object_store.add_object(tree)
545
646
546 object_store.add_object(blob)
647 # Write changes to the index
547 for tree in new_trees:
648 repo_index.write()
548 object_store.add_object(tree)
649
650 # Create a tree from the updated index
651 commit_tree = repo_index.write_tree()
652
653 new_tree_id = commit_tree
549
654
550 for node_path in removed:
655 author = commit_data['author']
551 paths = node_path.split('/')
656 committer = commit_data['committer']
552 tree = commit_tree # start with top-level
657 message = commit_data['message']
553 trees = [{'tree': tree, 'path': ROOT}]
658
554 # Traverse deep into the forest...
659 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
555 # resolve final tree by iterating the path.
556 # e.g a/b/c.txt will get
557 # - root as tree then
558 # - 'a' as tree,
559 # - 'b' as tree,
560 # - stop at c as blob.
561 for path in paths:
562 try:
563 obj = repo[tree[path][1]]
564 if isinstance(obj, objects.Tree):
565 trees.append({'tree': obj, 'path': path})
566 tree = obj
567 except KeyError:
568 break
569 #PROBLEM:
570 """
571 We're not editing same reference tree object
572 """
573 # Cut down the blob and all rotten trees on the way back...
574 for path, tree_data in reversed(list(zip(paths, trees))):
575 tree = tree_data['tree']
576 tree.__delitem__(path)
577 # This operation edits the tree, we need to mark new commit back
578
660
579 if len(tree) > 0:
661 new_commit_id = self.create_commit(wire, author, committer, message, branch,
580 # This tree still has elements - don't remove it or any
662 new_tree_id, date_args=date_args)
581 # of it's parents
582 break
583
584 object_store.add_object(commit_tree)
585
663
586 # Create commit
664 # libgit2, ensure the branch is there and exists
587 commit = objects.Commit()
665 self.create_branch(wire, branch, new_commit_id)
588 commit.tree = commit_tree.id
589 bytes_keys = [
590 'author',
591 'committer',
592 'message',
593 'encoding',
594 'parents'
595 ]
596
666
597 for k, v in commit_data.items():
667 # libgit2, set new ref to this created commit
598 if k in bytes_keys:
668 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
599 if k == 'parents':
600 v = [safe_bytes(x) for x in v]
601 else:
602 v = safe_bytes(v)
603 setattr(commit, k, v)
604
669
605 object_store.add_object(commit)
670 return new_commit_id
606
607 self.create_branch(wire, branch, safe_str(commit.id))
608
609 # dulwich set-ref
610 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
611
612 return commit.id
613
671
614 @reraise_safe_exceptions
672 @reraise_safe_exceptions
615 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
673 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
616 if url != 'default' and '://' not in url:
674 if url != 'default' and '://' not in url:
617 client = LocalGitClient(url)
675 client = LocalGitClient(url)
618 else:
676 else:
619 url_obj = url_parser(url)
677 url_obj = url_parser(safe_bytes(url))
620 o = self._build_opener(url)
678 o = self._build_opener(url)
621 url, _ = url_obj.authinfo()
679 url = url_obj.authinfo()[0]
622 client = HttpGitClient(base_url=url, opener=o)
680 client = HttpGitClient(base_url=url, opener=o)
623 repo = self._factory.repo(wire)
681 repo = self._factory.repo(wire)
624
682
@@ -674,6 +732,9 b' class GitRemote(RemoteBase):'
674 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
732 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
675 index.build_index_from_tree(repo.path, repo.index_path(),
733 index.build_index_from_tree(repo.path, repo.index_path(),
676 repo.object_store, repo[HEAD_MARKER].tree)
734 repo.object_store, repo[HEAD_MARKER].tree)
735
736 if isinstance(remote_refs, FetchPackResult):
737 return remote_refs.refs
677 return remote_refs
738 return remote_refs
678
739
679 @reraise_safe_exceptions
740 @reraise_safe_exceptions
@@ -759,11 +820,11 b' class GitRemote(RemoteBase):'
759 wire_remote = wire.copy()
820 wire_remote = wire.copy()
760 wire_remote['path'] = path2
821 wire_remote['path'] = path2
761 repo_remote = self._factory.repo(wire_remote)
822 repo_remote = self._factory.repo(wire_remote)
762 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
823 LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
763
824
764 revs = [
825 revs = [
765 x.commit.id
826 x.commit.id
766 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
827 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
767 return revs
828 return revs
768
829
769 @reraise_safe_exceptions
830 @reraise_safe_exceptions
@@ -815,11 +876,11 b' class GitRemote(RemoteBase):'
815 raise exceptions.LookupException(e)(missing_commit_err)
876 raise exceptions.LookupException(e)(missing_commit_err)
816
877
817 commit_id = commit.hex
878 commit_id = commit.hex
818 type_id = commit.type
879 type_str = commit.type_str
819
880
820 return {
881 return {
821 'id': commit_id,
882 'id': commit_id,
822 'type': self._type_id_to_name(type_id),
883 'type': type_str,
823 'commit_id': commit_id,
884 'commit_id': commit_id,
824 'idx': 0
885 'idx': 0
825 }
886 }
@@ -1018,7 +1079,11 b' class GitRemote(RemoteBase):'
1018 def create_branch(self, wire, branch_name, commit_id, force=False):
1079 def create_branch(self, wire, branch_name, commit_id, force=False):
1019 repo_init = self._factory.repo_libgit2(wire)
1080 repo_init = self._factory.repo_libgit2(wire)
1020 with repo_init as repo:
1081 with repo_init as repo:
1021 commit = repo[commit_id]
1082 if commit_id:
1083 commit = repo[commit_id]
1084 else:
1085 # if commit is not given just use the HEAD
1086 commit = repo.head()
1022
1087
1023 if force:
1088 if force:
1024 repo.branches.local.create(branch_name, commit, force=force)
1089 repo.branches.local.create(branch_name, commit, force=force)
@@ -1041,12 +1106,27 b' class GitRemote(RemoteBase):'
1041
1106
1042 @reraise_safe_exceptions
1107 @reraise_safe_exceptions
1043 def tree_changes(self, wire, source_id, target_id):
1108 def tree_changes(self, wire, source_id, target_id):
1044 # TODO(marcink): remove this seems it's only used by tests
1045 repo = self._factory.repo(wire)
1109 repo = self._factory.repo(wire)
1110 # source can be empty
1111 source_id = safe_bytes(source_id if source_id else b'')
1112 target_id = safe_bytes(target_id)
1113
1046 source = repo[source_id].tree if source_id else None
1114 source = repo[source_id].tree if source_id else None
1047 target = repo[target_id].tree
1115 target = repo[target_id].tree
1048 result = repo.object_store.tree_changes(source, target)
1116 result = repo.object_store.tree_changes(source, target)
1049 return list(result)
1117
1118 added = set()
1119 modified = set()
1120 deleted = set()
1121 for (old_path, new_path), (_, _), (_, _) in list(result):
1122 if new_path and old_path:
1123 modified.add(new_path)
1124 elif new_path and not old_path:
1125 added.add(new_path)
1126 elif not new_path and old_path:
1127 deleted.add(old_path)
1128
1129 return list(added), list(modified), list(deleted)
1050
1130
1051 @reraise_safe_exceptions
1131 @reraise_safe_exceptions
1052 def tree_and_type_for_path(self, wire, commit_id, path):
1132 def tree_and_type_for_path(self, wire, commit_id, path):
@@ -1167,10 +1247,11 b' class GitRemote(RemoteBase):'
1167 if file_filter:
1247 if file_filter:
1168 for p in diff_obj:
1248 for p in diff_obj:
1169 if p.delta.old_file.path == file_filter:
1249 if p.delta.old_file.path == file_filter:
1170 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1250 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1171 # fo matching path == no diff
1251 # fo matching path == no diff
1172 return BinaryEnvelope(b'')
1252 return BytesEnvelope(b'')
1173 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1253
1254 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1174
1255
1175 @reraise_safe_exceptions
1256 @reraise_safe_exceptions
1176 def node_history(self, wire, commit_id, path, limit):
1257 def node_history(self, wire, commit_id, path, limit):
@@ -1346,8 +1427,8 b' class GitRemote(RemoteBase):'
1346 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1427 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1347
1428
1348 @reraise_safe_exceptions
1429 @reraise_safe_exceptions
1349 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1430 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1350 archive_dir_name, commit_id):
1431 archive_dir_name, commit_id, cache_config):
1351
1432
1352 def file_walker(_commit_id, path):
1433 def file_walker(_commit_id, path):
1353 repo_init = self._factory.repo_libgit2(wire)
1434 repo_init = self._factory.repo_libgit2(wire)
@@ -1378,5 +1459,5 b' class GitRemote(RemoteBase):'
1378 continue
1459 continue
1379 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1460 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1380
1461
1381 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1462 return store_archive_in_cache(
1382 archive_dir_name, commit_id)
1463 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -32,7 +32,8 b' from mercurial import repair'
32
32
33 import vcsserver
33 import vcsserver
34 from vcsserver import exceptions
34 from vcsserver import exceptions
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode, BinaryEnvelope
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, store_archive_in_cache, ArchiveNode, BytesEnvelope, \
36 BinaryEnvelope
36 from vcsserver.hgcompat import (
37 from vcsserver.hgcompat import (
37 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
39 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
@@ -42,6 +43,8 b' from vcsserver.hgcompat import ('
42 alwaysmatcher, patternmatcher, hgutil, hgext_strip)
43 alwaysmatcher, patternmatcher, hgutil, hgext_strip)
43 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
44 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
44 from vcsserver.vcs_base import RemoteBase
45 from vcsserver.vcs_base import RemoteBase
46 from vcsserver.config import hooks as hooks_config
47
45
48
46 log = logging.getLogger(__name__)
49 log = logging.getLogger(__name__)
47
50
@@ -137,9 +140,18 b' class MercurialFactory(RepoFactory):'
137
140
138 def _create_config(self, config, hooks=True):
141 def _create_config(self, config, hooks=True):
139 if not hooks:
142 if not hooks:
140 hooks_to_clean = frozenset((
143
141 'changegroup.repo_size', 'preoutgoing.pre_pull',
144 hooks_to_clean = {
142 'outgoing.pull_logger', 'prechangegroup.pre_push'))
145
146 hooks_config.HOOK_REPO_SIZE,
147 hooks_config.HOOK_PRE_PULL,
148 hooks_config.HOOK_PULL,
149
150 hooks_config.HOOK_PRE_PUSH,
151 # TODO: what about PRETXT, this was disabled in pre 5.0.0
152 hooks_config.HOOK_PRETX_PUSH,
153
154 }
143 new_config = []
155 new_config = []
144 for section, option, value in config:
156 for section, option, value in config:
145 if section == 'hooks' and option in hooks_to_clean:
157 if section == 'hooks' and option in hooks_to_clean:
@@ -178,6 +190,22 b' def patch_ui_message_output(baseui):'
178 return baseui, output
190 return baseui, output
179
191
180
192
193 def get_obfuscated_url(url_obj):
194 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
195 url_obj.query = obfuscate_qs(url_obj.query)
196 obfuscated_uri = str(url_obj)
197 return obfuscated_uri
198
199
200 def normalize_url_for_hg(url: str):
201 _proto = None
202
203 if '+' in url[:url.find('://')]:
204 _proto = url[0:url.find('+')]
205 url = url[url.find('+') + 1:]
206 return url, _proto
207
208
181 class HgRemote(RemoteBase):
209 class HgRemote(RemoteBase):
182
210
183 def __init__(self, factory):
211 def __init__(self, factory):
@@ -196,6 +224,13 b' class HgRemote(RemoteBase):'
196 "hidden": self.ctx_hidden,
224 "hidden": self.ctx_hidden,
197 "_file_paths": self.ctx_list,
225 "_file_paths": self.ctx_list,
198 }
226 }
227 self._bulk_file_methods = {
228 "size": self.fctx_size,
229 "data": self.fctx_node_data,
230 "flags": self.fctx_flags,
231 "is_binary": self.is_binary,
232 "md5": self.md5_hash,
233 }
199
234
200 def _get_ctx(self, repo, ref):
235 def _get_ctx(self, repo, ref):
201 return get_ctx(repo, ref)
236 return get_ctx(repo, ref)
@@ -405,19 +440,15 b' class HgRemote(RemoteBase):'
405
440
406 @reraise_safe_exceptions
441 @reraise_safe_exceptions
407 def check_url(self, url, config):
442 def check_url(self, url, config):
408 _proto = None
443 url, _proto = normalize_url_for_hg(url)
409 if '+' in url[:url.find('://')]:
444 url_obj = url_parser(safe_bytes(url))
410 _proto = url[0:url.find('+')]
445
411 url = url[url.find('+') + 1:]
446 test_uri = safe_str(url_obj.authinfo()[0])
447 authinfo = url_obj.authinfo()[1]
448 obfuscated_uri = get_obfuscated_url(url_obj)
449 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
450
412 handlers = []
451 handlers = []
413 url_obj = url_parser(url)
414 test_uri, authinfo = url_obj.authinfo()
415 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
416 url_obj.query = obfuscate_qs(url_obj.query)
417
418 cleaned_uri = str(url_obj)
419 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
420
421 if authinfo:
452 if authinfo:
422 # create a password manager
453 # create a password manager
423 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
454 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
@@ -437,14 +468,14 b' class HgRemote(RemoteBase):'
437 req = urllib.request.Request(cu, None, {})
468 req = urllib.request.Request(cu, None, {})
438
469
439 try:
470 try:
440 log.debug("Trying to open URL %s", cleaned_uri)
471 log.debug("Trying to open URL %s", obfuscated_uri)
441 resp = o.open(req)
472 resp = o.open(req)
442 if resp.code != 200:
473 if resp.code != 200:
443 raise exceptions.URLError()('Return Code is not 200')
474 raise exceptions.URLError()('Return Code is not 200')
444 except Exception as e:
475 except Exception as e:
445 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
476 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
446 # means it cannot be cloned
477 # means it cannot be cloned
447 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
478 raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
448
479
449 # now check if it's a proper hg repo, but don't do it for svn
480 # now check if it's a proper hg repo, but don't do it for svn
450 try:
481 try:
@@ -453,19 +484,18 b' class HgRemote(RemoteBase):'
453 else:
484 else:
454 # check for pure hg repos
485 # check for pure hg repos
455 log.debug(
486 log.debug(
456 "Verifying if URL is a Mercurial repository: %s",
487 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
457 cleaned_uri)
458 ui = make_ui_from_config(config)
488 ui = make_ui_from_config(config)
459 peer_checker = makepeer(ui, url)
489 peer_checker = makepeer(ui, safe_bytes(url))
460 peer_checker.lookup('tip')
490 peer_checker.lookup(b'tip')
461 except Exception as e:
491 except Exception as e:
462 log.warning("URL is not a valid Mercurial repository: %s",
492 log.warning("URL is not a valid Mercurial repository: %s",
463 cleaned_uri)
493 obfuscated_uri)
464 raise exceptions.URLError(e)(
494 raise exceptions.URLError(e)(
465 "url [%s] does not look like an hg repo org_exc: %s"
495 "url [%s] does not look like an hg repo org_exc: %s"
466 % (cleaned_uri, e))
496 % (obfuscated_uri, e))
467
497
468 log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
498 log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
469 return True
499 return True
470
500
471 @reraise_safe_exceptions
501 @reraise_safe_exceptions
@@ -483,7 +513,7 b' class HgRemote(RemoteBase):'
483 try:
513 try:
484 diff_iter = patch.diff(
514 diff_iter = patch.diff(
485 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
515 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
486 return BinaryEnvelope(b"".join(diff_iter))
516 return BytesEnvelope(b"".join(diff_iter))
487 except RepoLookupError as e:
517 except RepoLookupError as e:
488 raise exceptions.LookupException(e)()
518 raise exceptions.LookupException(e)()
489
519
@@ -539,6 +569,25 b' class HgRemote(RemoteBase):'
539 return _node_history_until(context_uid, repo_id, revision, path, limit)
569 return _node_history_until(context_uid, repo_id, revision, path, limit)
540
570
541 @reraise_safe_exceptions
571 @reraise_safe_exceptions
572 def bulk_file_request(self, wire, commit_id, path, pre_load):
573 cache_on, context_uid, repo_id = self._cache_on(wire)
574 region = self._region(wire)
575
576 @region.conditional_cache_on_arguments(condition=cache_on)
577 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
578 result = {}
579 for attr in pre_load:
580 try:
581 method = self._bulk_file_methods[attr]
582 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
583 result[attr] = method(wire, _commit_id, _path)
584 except KeyError as e:
585 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
586 return BinaryEnvelope(result)
587
588 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
589
590 @reraise_safe_exceptions
542 def fctx_annotate(self, wire, revision, path):
591 def fctx_annotate(self, wire, revision, path):
543 repo = self._factory.repo(wire)
592 repo = self._factory.repo(wire)
544 ctx = self._get_ctx(repo, revision)
593 ctx = self._get_ctx(repo, revision)
@@ -557,7 +606,7 b' class HgRemote(RemoteBase):'
557 repo = self._factory.repo(wire)
606 repo = self._factory.repo(wire)
558 ctx = self._get_ctx(repo, revision)
607 ctx = self._get_ctx(repo, revision)
559 fctx = ctx.filectx(safe_bytes(path))
608 fctx = ctx.filectx(safe_bytes(path))
560 return BinaryEnvelope(fctx.data())
609 return BytesEnvelope(fctx.data())
561
610
562 @reraise_safe_exceptions
611 @reraise_safe_exceptions
563 def fctx_flags(self, wire, commit_id, path):
612 def fctx_flags(self, wire, commit_id, path):
@@ -674,7 +723,6 b' class HgRemote(RemoteBase):'
674
723
675 @region.conditional_cache_on_arguments(condition=cache_on)
724 @region.conditional_cache_on_arguments(condition=cache_on)
676 def _lookup(_context_uid, _repo_id, _revision, _both):
725 def _lookup(_context_uid, _repo_id, _revision, _both):
677
678 repo = self._factory.repo(wire)
726 repo = self._factory.repo(wire)
679 rev = _revision
727 rev = _revision
680 if isinstance(rev, int):
728 if isinstance(rev, int):
@@ -949,35 +997,38 b' class HgRemote(RemoteBase):'
949 # Mercurial internally has a lot of logic that checks ONLY if
997 # Mercurial internally has a lot of logic that checks ONLY if
950 # option is defined, we just pass those if they are defined then
998 # option is defined, we just pass those if they are defined then
951 opts = {}
999 opts = {}
1000
952 if bookmark:
1001 if bookmark:
953 if isinstance(branch, list):
1002 opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
954 bookmark = [safe_bytes(x) for x in bookmark]
1003 if isinstance(bookmark, list) else safe_bytes(bookmark)
955 else:
1004
956 bookmark = safe_bytes(bookmark)
957 opts['bookmark'] = bookmark
958 if branch:
1005 if branch:
959 if isinstance(branch, list):
1006 opts['branch'] = [safe_bytes(x) for x in branch] \
960 branch = [safe_bytes(x) for x in branch]
1007 if isinstance(branch, list) else safe_bytes(branch)
961 else:
1008
962 branch = safe_bytes(branch)
963 opts['branch'] = branch
964 if revision:
1009 if revision:
965 opts['rev'] = safe_bytes(revision)
1010 opts['rev'] = [safe_bytes(x) for x in revision] \
1011 if isinstance(revision, list) else safe_bytes(revision)
966
1012
967 commands.pull(baseui, repo, source, **opts)
1013 commands.pull(baseui, repo, source, **opts)
968
1014
969 @reraise_safe_exceptions
1015 @reraise_safe_exceptions
970 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
1016 def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
971 repo = self._factory.repo(wire)
1017 repo = self._factory.repo(wire)
972 baseui = self._factory._create_config(wire['config'], hooks=hooks)
1018 baseui = self._factory._create_config(wire['config'], hooks=hooks)
973 commands.push(baseui, repo, dest=dest_path, rev=revisions,
1019
1020 revisions = [safe_bytes(x) for x in revisions] \
1021 if isinstance(revisions, list) else safe_bytes(revisions)
1022
1023 commands.push(baseui, repo, safe_bytes(dest_path),
1024 rev=revisions,
974 new_branch=push_branches)
1025 new_branch=push_branches)
975
1026
976 @reraise_safe_exceptions
1027 @reraise_safe_exceptions
977 def strip(self, wire, revision, update, backup):
1028 def strip(self, wire, revision, update, backup):
978 repo = self._factory.repo(wire)
1029 repo = self._factory.repo(wire)
979 ctx = self._get_ctx(repo, revision)
1030 ctx = self._get_ctx(repo, revision)
980 hgext_strip(
1031 hgext_strip.strip(
981 repo.baseui, repo, ctx.node(), update=update, backup=backup)
1032 repo.baseui, repo, ctx.node(), update=update, backup=backup)
982
1033
983 @reraise_safe_exceptions
1034 @reraise_safe_exceptions
@@ -1008,7 +1059,7 b' class HgRemote(RemoteBase):'
1008 # setting the interactive flag to `False` mercurial doesn't prompt the
1059 # setting the interactive flag to `False` mercurial doesn't prompt the
1009 # used but instead uses a default value.
1060 # used but instead uses a default value.
1010 repo.ui.setconfig(b'ui', b'interactive', False)
1061 repo.ui.setconfig(b'ui', b'interactive', False)
1011 commands.merge(baseui, repo, rev=revision)
1062 commands.merge(baseui, repo, rev=safe_bytes(revision))
1012
1063
1013 @reraise_safe_exceptions
1064 @reraise_safe_exceptions
1014 def merge_state(self, wire):
1065 def merge_state(self, wire):
@@ -1027,11 +1078,11 b' class HgRemote(RemoteBase):'
1027 def commit(self, wire, message, username, close_branch=False):
1078 def commit(self, wire, message, username, close_branch=False):
1028 repo = self._factory.repo(wire)
1079 repo = self._factory.repo(wire)
1029 baseui = self._factory._create_config(wire['config'])
1080 baseui = self._factory._create_config(wire['config'])
1030 repo.ui.setconfig(b'ui', b'username', username)
1081 repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
1031 commands.commit(baseui, repo, message=message, close_branch=close_branch)
1082 commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
1032
1083
1033 @reraise_safe_exceptions
1084 @reraise_safe_exceptions
1034 def rebase(self, wire, source=None, dest=None, abort=False):
1085 def rebase(self, wire, source='', dest='', abort=False):
1035 repo = self._factory.repo(wire)
1086 repo = self._factory.repo(wire)
1036 baseui = self._factory._create_config(wire['config'])
1087 baseui = self._factory._create_config(wire['config'])
1037 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1088 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
@@ -1040,7 +1091,9 b' class HgRemote(RemoteBase):'
1040 # setting the interactive flag to `False` mercurial doesn't prompt the
1091 # setting the interactive flag to `False` mercurial doesn't prompt the
1041 # used but instead uses a default value.
1092 # used but instead uses a default value.
1042 repo.ui.setconfig(b'ui', b'interactive', False)
1093 repo.ui.setconfig(b'ui', b'interactive', False)
1043 rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
1094
1095 rebase.rebase(baseui, repo, base=safe_bytes(source or ''), dest=safe_bytes(dest or ''),
1096 abort=abort, keep=not abort)
1044
1097
1045 @reraise_safe_exceptions
1098 @reraise_safe_exceptions
1046 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
1099 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
@@ -1050,7 +1103,7 b' class HgRemote(RemoteBase):'
1050
1103
1051 date = (tag_time, tag_timezone)
1104 date = (tag_time, tag_timezone)
1052 try:
1105 try:
1053 hg_tag.tag(repo, name, node, message, local, user, date)
1106 hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
1054 except Abort as e:
1107 except Abort as e:
1055 log.exception("Tag operation aborted")
1108 log.exception("Tag operation aborted")
1056 # Exception can contain unicode which we convert
1109 # Exception can contain unicode which we convert
@@ -1060,6 +1113,7 b' class HgRemote(RemoteBase):'
1060 def bookmark(self, wire, bookmark, revision=''):
1113 def bookmark(self, wire, bookmark, revision=''):
1061 repo = self._factory.repo(wire)
1114 repo = self._factory.repo(wire)
1062 baseui = self._factory._create_config(wire['config'])
1115 baseui = self._factory._create_config(wire['config'])
1116 revision = revision or ''
1063 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1117 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1064
1118
1065 @reraise_safe_exceptions
1119 @reraise_safe_exceptions
@@ -1079,8 +1133,8 b' class HgRemote(RemoteBase):'
1079 pass
1133 pass
1080
1134
1081 @reraise_safe_exceptions
1135 @reraise_safe_exceptions
1082 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1136 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1083 archive_dir_name, commit_id):
1137 archive_dir_name, commit_id, cache_config):
1084
1138
1085 def file_walker(_commit_id, path):
1139 def file_walker(_commit_id, path):
1086 repo = self._factory.repo(wire)
1140 repo = self._factory.repo(wire)
@@ -1100,6 +1154,6 b' class HgRemote(RemoteBase):'
1100
1154
1101 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1155 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1102
1156
1103 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1157 return store_archive_in_cache(
1104 archive_dir_name, commit_id)
1158 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1105
1159
@@ -37,9 +37,10 b' import svn.fs # noqa'
37 import svn.repos # noqa
37 import svn.repos # noqa
38
38
39 from vcsserver import svn_diff, exceptions, subprocessio, settings
39 from vcsserver import svn_diff, exceptions, subprocessio, settings
40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo, BinaryEnvelope
40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
41 from vcsserver.exceptions import NoContentException
41 from vcsserver.exceptions import NoContentException
42 from vcsserver.str_utils import safe_str, safe_bytes
42 from vcsserver.str_utils import safe_str, safe_bytes
43 from vcsserver.type_utils import assert_bytes
43 from vcsserver.vcs_base import RemoteBase
44 from vcsserver.vcs_base import RemoteBase
44 from vcsserver.lib.svnremoterepo import svnremoterepo
45 from vcsserver.lib.svnremoterepo import svnremoterepo
45 log = logging.getLogger(__name__)
46 log = logging.getLogger(__name__)
@@ -109,6 +110,39 b' class SvnRemote(RemoteBase):'
109 def __init__(self, factory, hg_factory=None):
110 def __init__(self, factory, hg_factory=None):
110 self._factory = factory
111 self._factory = factory
111
112
113 self._bulk_methods = {
114 # NOT supported in SVN ATM...
115 }
116 self._bulk_file_methods = {
117 "size": self.get_file_size,
118 "data": self.get_file_content,
119 "flags": self.get_node_type,
120 "is_binary": self.is_binary,
121 "md5": self.md5_hash
122 }
123
124 @reraise_safe_exceptions
125 def bulk_file_request(self, wire, commit_id, path, pre_load):
126 cache_on, context_uid, repo_id = self._cache_on(wire)
127 region = self._region(wire)
128
129 # since we use unified API, we need to cast from str to in for SVN
130 commit_id = int(commit_id)
131
132 @region.conditional_cache_on_arguments(condition=cache_on)
133 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
134 result = {}
135 for attr in pre_load:
136 try:
137 method = self._bulk_file_methods[attr]
138 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
139 result[attr] = method(wire, _commit_id, _path)
140 except KeyError as e:
141 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
142 return BinaryEnvelope(result)
143
144 return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
145
112 @reraise_safe_exceptions
146 @reraise_safe_exceptions
113 def discover_svn_version(self):
147 def discover_svn_version(self):
114 try:
148 try:
@@ -120,25 +154,23 b' class SvnRemote(RemoteBase):'
120
154
121 @reraise_safe_exceptions
155 @reraise_safe_exceptions
122 def is_empty(self, wire):
156 def is_empty(self, wire):
123
124 try:
157 try:
125 return self.lookup(wire, -1) == 0
158 return self.lookup(wire, -1) == 0
126 except Exception:
159 except Exception:
127 log.exception("failed to read object_store")
160 log.exception("failed to read object_store")
128 return False
161 return False
129
162
130 def check_url(self, url):
163 def check_url(self, url, config):
131
164
132 # uuid function get's only valid UUID from proper repo, else
165 # uuid function gets only valid UUID from proper repo, else
133 # throws exception
166 # throws exception
134 username, password, src_url = self.get_url_and_credentials(url)
167 username, password, src_url = self.get_url_and_credentials(url)
135 try:
168 try:
136 svnremoterepo(username, password, src_url).svn().uuid
169 svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
137 except Exception:
170 except Exception:
138 tb = traceback.format_exc()
171 tb = traceback.format_exc()
139 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
172 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
140 raise URLError(
173 raise URLError(f'"{url}" is not a valid Subversion source url.')
141 '"{}" is not a valid Subversion source url.'.format(url))
142 return True
174 return True
143
175
144 def is_path_valid_repository(self, wire, path):
176 def is_path_valid_repository(self, wire, path):
@@ -169,6 +201,7 b' class SvnRemote(RemoteBase):'
169 stdout, stderr = subprocessio.run_command(cmd)
201 stdout, stderr = subprocessio.run_command(cmd)
170 return stdout
202 return stdout
171
203
204 @reraise_safe_exceptions
172 def lookup(self, wire, revision):
205 def lookup(self, wire, revision):
173 if revision not in [-1, None, 'HEAD']:
206 if revision not in [-1, None, 'HEAD']:
174 raise NotImplementedError
207 raise NotImplementedError
@@ -177,6 +210,7 b' class SvnRemote(RemoteBase):'
177 head = svn.fs.youngest_rev(fs_ptr)
210 head = svn.fs.youngest_rev(fs_ptr)
178 return head
211 return head
179
212
213 @reraise_safe_exceptions
180 def lookup_interval(self, wire, start_ts, end_ts):
214 def lookup_interval(self, wire, start_ts, end_ts):
181 repo = self._factory.repo(wire)
215 repo = self._factory.repo(wire)
182 fsobj = svn.repos.fs(repo)
216 fsobj = svn.repos.fs(repo)
@@ -194,10 +228,12 b' class SvnRemote(RemoteBase):'
194 end_rev = svn.fs.youngest_rev(fsobj)
228 end_rev = svn.fs.youngest_rev(fsobj)
195 return start_rev, end_rev
229 return start_rev, end_rev
196
230
231 @reraise_safe_exceptions
197 def revision_properties(self, wire, revision):
232 def revision_properties(self, wire, revision):
198
233
199 cache_on, context_uid, repo_id = self._cache_on(wire)
234 cache_on, context_uid, repo_id = self._cache_on(wire)
200 region = self._region(wire)
235 region = self._region(wire)
236
201 @region.conditional_cache_on_arguments(condition=cache_on)
237 @region.conditional_cache_on_arguments(condition=cache_on)
202 def _revision_properties(_repo_id, _revision):
238 def _revision_properties(_repo_id, _revision):
203 repo = self._factory.repo(wire)
239 repo = self._factory.repo(wire)
@@ -253,6 +289,7 b' class SvnRemote(RemoteBase):'
253 def node_history(self, wire, path, revision, limit):
289 def node_history(self, wire, path, revision, limit):
254 cache_on, context_uid, repo_id = self._cache_on(wire)
290 cache_on, context_uid, repo_id = self._cache_on(wire)
255 region = self._region(wire)
291 region = self._region(wire)
292
256 @region.conditional_cache_on_arguments(condition=cache_on)
293 @region.conditional_cache_on_arguments(condition=cache_on)
257 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
294 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
258 cross_copies = False
295 cross_copies = False
@@ -272,6 +309,7 b' class SvnRemote(RemoteBase):'
272 return history_revisions
309 return history_revisions
273 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
310 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
274
311
312 @reraise_safe_exceptions
275 def node_properties(self, wire, path, revision):
313 def node_properties(self, wire, path, revision):
276 cache_on, context_uid, repo_id = self._cache_on(wire)
314 cache_on, context_uid, repo_id = self._cache_on(wire)
277 region = self._region(wire)
315 region = self._region(wire)
@@ -311,13 +349,14 b' class SvnRemote(RemoteBase):'
311
349
312 return annotations
350 return annotations
313
351
314 def get_node_type(self, wire, path, revision=None):
352 @reraise_safe_exceptions
353 def get_node_type(self, wire, revision=None, path=''):
315
354
316 cache_on, context_uid, repo_id = self._cache_on(wire)
355 cache_on, context_uid, repo_id = self._cache_on(wire)
317 region = self._region(wire)
356 region = self._region(wire)
318
357
319 @region.conditional_cache_on_arguments(condition=cache_on)
358 @region.conditional_cache_on_arguments(condition=cache_on)
320 def _get_node_type(_repo_id, _path, _revision):
359 def _get_node_type(_repo_id, _revision, _path):
321 repo = self._factory.repo(wire)
360 repo = self._factory.repo(wire)
322 fs_ptr = svn.repos.fs(repo)
361 fs_ptr = svn.repos.fs(repo)
323 if _revision is None:
362 if _revision is None:
@@ -325,9 +364,10 b' class SvnRemote(RemoteBase):'
325 root = svn.fs.revision_root(fs_ptr, _revision)
364 root = svn.fs.revision_root(fs_ptr, _revision)
326 node = svn.fs.check_path(root, path)
365 node = svn.fs.check_path(root, path)
327 return NODE_TYPE_MAPPING.get(node, None)
366 return NODE_TYPE_MAPPING.get(node, None)
328 return _get_node_type(repo_id, path, revision)
367 return _get_node_type(repo_id, revision, path)
329
368
330 def get_nodes(self, wire, path, revision=None):
369 @reraise_safe_exceptions
370 def get_nodes(self, wire, revision=None, path=''):
331
371
332 cache_on, context_uid, repo_id = self._cache_on(wire)
372 cache_on, context_uid, repo_id = self._cache_on(wire)
333 region = self._region(wire)
373 region = self._region(wire)
@@ -347,22 +387,26 b' class SvnRemote(RemoteBase):'
347 return result
387 return result
348 return _get_nodes(repo_id, path, revision)
388 return _get_nodes(repo_id, path, revision)
349
389
350 def get_file_content(self, wire, path, rev=None):
390 @reraise_safe_exceptions
391 def get_file_content(self, wire, rev=None, path=''):
351 repo = self._factory.repo(wire)
392 repo = self._factory.repo(wire)
352 fsobj = svn.repos.fs(repo)
393 fsobj = svn.repos.fs(repo)
394
353 if rev is None:
395 if rev is None:
354 rev = svn.fs.youngest_revision(fsobj)
396 rev = svn.fs.youngest_rev(fsobj)
397
355 root = svn.fs.revision_root(fsobj, rev)
398 root = svn.fs.revision_root(fsobj, rev)
356 content = svn.core.Stream(svn.fs.file_contents(root, path))
399 content = svn.core.Stream(svn.fs.file_contents(root, path))
357 return BinaryEnvelope(content.read())
400 return BytesEnvelope(content.read())
358
401
359 def get_file_size(self, wire, path, revision=None):
402 @reraise_safe_exceptions
403 def get_file_size(self, wire, revision=None, path=''):
360
404
361 cache_on, context_uid, repo_id = self._cache_on(wire)
405 cache_on, context_uid, repo_id = self._cache_on(wire)
362 region = self._region(wire)
406 region = self._region(wire)
363
407
364 @region.conditional_cache_on_arguments(condition=cache_on)
408 @region.conditional_cache_on_arguments(condition=cache_on)
365 def _get_file_size(_repo_id, _path, _revision):
409 def _get_file_size(_repo_id, _revision, _path):
366 repo = self._factory.repo(wire)
410 repo = self._factory.repo(wire)
367 fsobj = svn.repos.fs(repo)
411 fsobj = svn.repos.fs(repo)
368 if _revision is None:
412 if _revision is None:
@@ -370,17 +414,17 b' class SvnRemote(RemoteBase):'
370 root = svn.fs.revision_root(fsobj, _revision)
414 root = svn.fs.revision_root(fsobj, _revision)
371 size = svn.fs.file_length(root, path)
415 size = svn.fs.file_length(root, path)
372 return size
416 return size
373 return _get_file_size(repo_id, path, revision)
417 return _get_file_size(repo_id, revision, path)
374
418
375 def create_repository(self, wire, compatible_version=None):
419 def create_repository(self, wire, compatible_version=None):
376 log.info('Creating Subversion repository in path "%s"', wire['path'])
420 log.info('Creating Subversion repository in path "%s"', wire['path'])
377 self._factory.repo(wire, create=True,
421 self._factory.repo(wire, create=True,
378 compatible_version=compatible_version)
422 compatible_version=compatible_version)
379
423
380 def get_url_and_credentials(self, src_url):
424 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
381 obj = urllib.parse.urlparse(src_url)
425 obj = urllib.parse.urlparse(src_url)
382 username = obj.username or None
426 username = obj.username or ''
383 password = obj.password or None
427 password = obj.password or ''
384 return username, password, src_url
428 return username, password, src_url
385
429
386 def import_remote_repository(self, wire, src_url):
430 def import_remote_repository(self, wire, src_url):
@@ -430,8 +474,6 b' class SvnRemote(RemoteBase):'
430
474
431 def commit(self, wire, message, author, timestamp, updated, removed):
475 def commit(self, wire, message, author, timestamp, updated, removed):
432
476
433 updated = [{k: safe_bytes(v) for k, v in x.items() if isinstance(v, str)} for x in updated]
434
435 message = safe_bytes(message)
477 message = safe_bytes(message)
436 author = safe_bytes(author)
478 author = safe_bytes(author)
437
479
@@ -450,13 +492,14 b' class SvnRemote(RemoteBase):'
450 commit_id = svn.repos.fs_commit_txn(repo, txn)
492 commit_id = svn.repos.fs_commit_txn(repo, txn)
451
493
452 if timestamp:
494 if timestamp:
453 apr_time = int(apr_time_t(timestamp))
495 apr_time = apr_time_t(timestamp)
454 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
496 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
455 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
497 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
456
498
457 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
499 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
458 return commit_id
500 return commit_id
459
501
502 @reraise_safe_exceptions
460 def diff(self, wire, rev1, rev2, path1=None, path2=None,
503 def diff(self, wire, rev1, rev2, path1=None, path2=None,
461 ignore_whitespace=False, context=3):
504 ignore_whitespace=False, context=3):
462
505
@@ -465,12 +508,12 b' class SvnRemote(RemoteBase):'
465 diff_creator = SvnDiffer(
508 diff_creator = SvnDiffer(
466 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
509 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
467 try:
510 try:
468 return BinaryEnvelope(diff_creator.generate_diff())
511 return BytesEnvelope(diff_creator.generate_diff())
469 except svn.core.SubversionException as e:
512 except svn.core.SubversionException as e:
470 log.exception(
513 log.exception(
471 "Error during diff operation operation. "
514 "Error during diff operation operation. "
472 "Path might not exist %s, %s", path1, path2)
515 "Path might not exist %s, %s", path1, path2)
473 return BinaryEnvelope(b'')
516 return BytesEnvelope(b'')
474
517
475 @reraise_safe_exceptions
518 @reraise_safe_exceptions
476 def is_large_file(self, wire, path):
519 def is_large_file(self, wire, path):
@@ -483,8 +526,10 b' class SvnRemote(RemoteBase):'
483
526
484 @region.conditional_cache_on_arguments(condition=cache_on)
527 @region.conditional_cache_on_arguments(condition=cache_on)
485 def _is_binary(_repo_id, _rev, _path):
528 def _is_binary(_repo_id, _rev, _path):
486 raw_bytes = self.get_file_content(wire, path, rev)
529 raw_bytes = self.get_file_content(wire, rev, path)
487 return raw_bytes and b'\0' in raw_bytes
530 if not raw_bytes:
531 return False
532 return b'\0' in raw_bytes
488
533
489 return _is_binary(repo_id, rev, path)
534 return _is_binary(repo_id, rev, path)
490
535
@@ -555,8 +600,8 b' class SvnRemote(RemoteBase):'
555 pass
600 pass
556
601
557 @reraise_safe_exceptions
602 @reraise_safe_exceptions
558 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
603 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
559 archive_dir_name, commit_id):
604 archive_dir_name, commit_id, cache_config):
560
605
561 def walk_tree(root, root_dir, _commit_id):
606 def walk_tree(root, root_dir, _commit_id):
562 """
607 """
@@ -616,8 +661,8 b' class SvnRemote(RemoteBase):'
616 data_stream = f_data['content_stream']
661 data_stream = f_data['content_stream']
617 yield ArchiveNode(file_path, mode, is_link, data_stream)
662 yield ArchiveNode(file_path, mode, is_link, data_stream)
618
663
619 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
664 return store_archive_in_cache(
620 archive_dir_name, commit_id)
665 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
621
666
622
667
623 class SvnDiffer(object):
668 class SvnDiffer(object):
@@ -658,15 +703,15 b' class SvnDiffer(object):'
658 "Source type: %s, target type: %s" %
703 "Source type: %s, target type: %s" %
659 (self.src_kind, self.tgt_kind))
704 (self.src_kind, self.tgt_kind))
660
705
661 def generate_diff(self):
706 def generate_diff(self) -> bytes:
662 buf = io.StringIO()
707 buf = io.BytesIO()
663 if self.tgt_kind == svn.core.svn_node_dir:
708 if self.tgt_kind == svn.core.svn_node_dir:
664 self._generate_dir_diff(buf)
709 self._generate_dir_diff(buf)
665 else:
710 else:
666 self._generate_file_diff(buf)
711 self._generate_file_diff(buf)
667 return buf.getvalue()
712 return buf.getvalue()
668
713
669 def _generate_dir_diff(self, buf):
714 def _generate_dir_diff(self, buf: io.BytesIO):
670 editor = DiffChangeEditor()
715 editor = DiffChangeEditor()
671 editor_ptr, editor_baton = svn.delta.make_editor(editor)
716 editor_ptr, editor_baton = svn.delta.make_editor(editor)
672 svn.repos.dir_delta2(
717 svn.repos.dir_delta2(
@@ -687,7 +732,7 b' class SvnDiffer(object):'
687 self._generate_node_diff(
732 self._generate_node_diff(
688 buf, change, path, self.tgt_path, path, self.src_path)
733 buf, change, path, self.tgt_path, path, self.src_path)
689
734
690 def _generate_file_diff(self, buf):
735 def _generate_file_diff(self, buf: io.BytesIO):
691 change = None
736 change = None
692 if self.src_kind == svn.core.svn_node_none:
737 if self.src_kind == svn.core.svn_node_none:
693 change = "add"
738 change = "add"
@@ -699,13 +744,14 b' class SvnDiffer(object):'
699 buf, change, tgt_path, tgt_base, src_path, src_base)
744 buf, change, tgt_path, tgt_base, src_path, src_base)
700
745
701 def _generate_node_diff(
746 def _generate_node_diff(
702 self, buf, change, tgt_path, tgt_base, src_path, src_base):
747 self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
703
704
748
749 tgt_path_bytes = safe_bytes(tgt_path)
705 tgt_path = safe_str(tgt_path)
750 tgt_path = safe_str(tgt_path)
751
752 src_path_bytes = safe_bytes(src_path)
706 src_path = safe_str(src_path)
753 src_path = safe_str(src_path)
707
754
708
709 if self.src_rev == self.tgt_rev and tgt_base == src_base:
755 if self.src_rev == self.tgt_rev and tgt_base == src_base:
710 # makes consistent behaviour with git/hg to return empty diff if
756 # makes consistent behaviour with git/hg to return empty diff if
711 # we compare same revisions
757 # we compare same revisions
@@ -717,46 +763,45 b' class SvnDiffer(object):'
717 self.binary_content = False
763 self.binary_content = False
718 mime_type = self._get_mime_type(tgt_full_path)
764 mime_type = self._get_mime_type(tgt_full_path)
719
765
720 if mime_type and not mime_type.startswith('text'):
766 if mime_type and not mime_type.startswith(b'text'):
721 self.binary_content = True
767 self.binary_content = True
722 buf.write("=" * 67 + '\n')
768 buf.write(b"=" * 67 + b'\n')
723 buf.write("Cannot display: file marked as a binary type.\n")
769 buf.write(b"Cannot display: file marked as a binary type.\n")
724 buf.write("svn:mime-type = %s\n" % mime_type)
770 buf.write(b"svn:mime-type = %s\n" % mime_type)
725 buf.write("Index: {}\n".format(tgt_path))
771 buf.write(b"Index: %b\n" % tgt_path_bytes)
726 buf.write("=" * 67 + '\n')
772 buf.write(b"=" * 67 + b'\n')
727 buf.write("diff --git a/{tgt_path} b/{tgt_path}\n".format(
773 buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
728 tgt_path=tgt_path))
729
774
730 if change == 'add':
775 if change == 'add':
731 # TODO: johbo: SVN is missing a zero here compared to git
776 # TODO: johbo: SVN is missing a zero here compared to git
732 buf.write("new file mode 10644\n")
777 buf.write(b"new file mode 10644\n")
778
779 # TODO(marcink): intro to binary detection of svn patches
780 # if self.binary_content:
781 # buf.write(b'GIT binary patch\n')
733
782
734 #TODO(marcink): intro to binary detection of svn patches
783 buf.write(b"--- /dev/null\t(revision 0)\n")
784 src_lines = []
785 else:
786 if change == 'delete':
787 buf.write(b"deleted file mode 10644\n")
788
789 # TODO(marcink): intro to binary detection of svn patches
735 # if self.binary_content:
790 # if self.binary_content:
736 # buf.write('GIT binary patch\n')
791 # buf.write('GIT binary patch\n')
737
792
738 buf.write("--- /dev/null\t(revision 0)\n")
793 buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
739 src_lines = []
740 else:
741 if change == 'delete':
742 buf.write("deleted file mode 10644\n")
743
744 #TODO(marcink): intro to binary detection of svn patches
745 # if self.binary_content:
746 # buf.write('GIT binary patch\n')
747
748 buf.write("--- a/{}\t(revision {})\n".format(
749 src_path, self.src_rev))
750 src_lines = self._svn_readlines(self.src_root, src_full_path)
794 src_lines = self._svn_readlines(self.src_root, src_full_path)
751
795
752 if change == 'delete':
796 if change == 'delete':
753 buf.write("+++ /dev/null\t(revision {})\n".format(self.tgt_rev))
797 buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
754 tgt_lines = []
798 tgt_lines = []
755 else:
799 else:
756 buf.write("+++ b/{}\t(revision {})\n".format(
800 buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
757 tgt_path, self.tgt_rev))
758 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
801 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
759
802
803 # we made our diff header, time to generate the diff content into our buffer
804
760 if not self.binary_content:
805 if not self.binary_content:
761 udiff = svn_diff.unified_diff(
806 udiff = svn_diff.unified_diff(
762 src_lines, tgt_lines, context=self.context,
807 src_lines, tgt_lines, context=self.context,
@@ -766,7 +811,7 b' class SvnDiffer(object):'
766
811
767 buf.writelines(udiff)
812 buf.writelines(udiff)
768
813
769 def _get_mime_type(self, path):
814 def _get_mime_type(self, path) -> bytes:
770 try:
815 try:
771 mime_type = svn.fs.node_prop(
816 mime_type = svn.fs.node_prop(
772 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
817 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
@@ -822,7 +867,7 b' class TxnNodeProcessor(object):'
822 """
867 """
823
868
824 def __init__(self, node, txn_root):
869 def __init__(self, node, txn_root):
825 assert isinstance(node['path'], bytes)
870 assert_bytes(node['path'])
826
871
827 self.node = node
872 self.node = node
828 self.txn_root = txn_root
873 self.txn_root = txn_root
@@ -858,7 +903,7 b' class TxnNodeProcessor(object):'
858 svn.fs.make_file(self.txn_root, self.node['path'])
903 svn.fs.make_file(self.txn_root, self.node['path'])
859
904
860 def _update_file_content(self):
905 def _update_file_content(self):
861 assert isinstance(self.node['content'], bytes)
906 assert_bytes(self.node['content'])
862
907
863 handler, baton = svn.fs.apply_textdelta(
908 handler, baton = svn.fs.apply_textdelta(
864 self.txn_root, self.node['path'], None, None)
909 self.txn_root, self.node['path'], None, None)
@@ -868,14 +913,14 b' class TxnNodeProcessor(object):'
868 properties = self.node.get('properties', {})
913 properties = self.node.get('properties', {})
869 for key, value in properties.items():
914 for key, value in properties.items():
870 svn.fs.change_node_prop(
915 svn.fs.change_node_prop(
871 self.txn_root, self.node['path'], key, value)
916 self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
872
917
873
918
874 def apr_time_t(timestamp):
919 def apr_time_t(timestamp):
875 """
920 """
876 Convert a Python timestamp into APR timestamp type apr_time_t
921 Convert a Python timestamp into APR timestamp type apr_time_t
877 """
922 """
878 return timestamp * 1E6
923 return int(timestamp * 1E6)
879
924
880
925
881 def svn_opt_revision_value_t(num):
926 def svn_opt_revision_value_t(num):
@@ -16,15 +16,15 b''
16 import difflib
16 import difflib
17
17
18
18
19 def get_filtered_hunks(fromlines, tolines, context=None,
19 def get_filtered_hunks(from_lines, to_lines, context=None,
20 ignore_blank_lines=False, ignore_case=False,
20 ignore_blank_lines: bool = False, ignore_case: bool = False,
21 ignore_space_changes=False):
21 ignore_space_changes: bool = False):
22 """Retrieve differences in the form of `difflib.SequenceMatcher`
22 """Retrieve differences in the form of `difflib.SequenceMatcher`
23 opcodes, grouped according to the ``context`` and ``ignore_*``
23 opcodes, grouped according to the ``context`` and ``ignore_*``
24 parameters.
24 parameters.
25
25
26 :param fromlines: list of lines corresponding to the old content
26 :param from_lines: list of lines corresponding to the old content
27 :param tolines: list of lines corresponding to the new content
27 :param to_lines: list of lines corresponding to the new content
28 :param ignore_blank_lines: differences about empty lines only are ignored
28 :param ignore_blank_lines: differences about empty lines only are ignored
29 :param ignore_case: upper case / lower case only differences are ignored
29 :param ignore_case: upper case / lower case only differences are ignored
30 :param ignore_space_changes: differences in amount of spaces are ignored
30 :param ignore_space_changes: differences in amount of spaces are ignored
@@ -36,27 +36,27 b' def get_filtered_hunks(fromlines, toline'
36 to filter out the results will come straight from the
36 to filter out the results will come straight from the
37 SequenceMatcher.
37 SequenceMatcher.
38 """
38 """
39 hunks = get_hunks(fromlines, tolines, context)
39 hunks = get_hunks(from_lines, to_lines, context)
40 if ignore_space_changes or ignore_case or ignore_blank_lines:
40 if ignore_space_changes or ignore_case or ignore_blank_lines:
41 hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
41 hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context,
42 ignore_blank_lines, ignore_case,
42 ignore_blank_lines, ignore_case,
43 ignore_space_changes)
43 ignore_space_changes)
44 return hunks
44 return hunks
45
45
46
46
47 def get_hunks(fromlines, tolines, context=None):
47 def get_hunks(from_lines, to_lines, context=None):
48 """Generator yielding grouped opcodes describing differences .
48 """Generator yielding grouped opcodes describing differences .
49
49
50 See `get_filtered_hunks` for the parameter descriptions.
50 See `get_filtered_hunks` for the parameter descriptions.
51 """
51 """
52 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
52 matcher = difflib.SequenceMatcher(None, from_lines, to_lines)
53 if context is None:
53 if context is None:
54 return (hunk for hunk in [matcher.get_opcodes()])
54 return (hunk for hunk in [matcher.get_opcodes()])
55 else:
55 else:
56 return matcher.get_grouped_opcodes(context)
56 return matcher.get_grouped_opcodes(context)
57
57
58
58
59 def filter_ignorable_lines(hunks, fromlines, tolines, context,
59 def filter_ignorable_lines(hunks, from_lines, to_lines, context,
60 ignore_blank_lines, ignore_case,
60 ignore_blank_lines, ignore_case,
61 ignore_space_changes):
61 ignore_space_changes):
62 """Detect line changes that should be ignored and emits them as
62 """Detect line changes that should be ignored and emits them as
@@ -66,11 +66,12 b' def filter_ignorable_lines(hunks, fromli'
66 See `get_filtered_hunks` for the parameter descriptions.
66 See `get_filtered_hunks` for the parameter descriptions.
67 """
67 """
68 def is_ignorable(tag, fromlines, tolines):
68 def is_ignorable(tag, fromlines, tolines):
69
69 if tag == 'delete' and ignore_blank_lines:
70 if tag == 'delete' and ignore_blank_lines:
70 if ''.join(fromlines) == '':
71 if b''.join(fromlines) == b'':
71 return True
72 return True
72 elif tag == 'insert' and ignore_blank_lines:
73 elif tag == 'insert' and ignore_blank_lines:
73 if ''.join(tolines) == '':
74 if b''.join(tolines) == b'':
74 return True
75 return True
75 elif tag == 'replace' and (ignore_case or ignore_space_changes):
76 elif tag == 'replace' and (ignore_case or ignore_space_changes):
76 if len(fromlines) != len(tolines):
77 if len(fromlines) != len(tolines):
@@ -80,7 +81,7 b' def filter_ignorable_lines(hunks, fromli'
80 if ignore_case:
81 if ignore_case:
81 input_str = input_str.lower()
82 input_str = input_str.lower()
82 if ignore_space_changes:
83 if ignore_space_changes:
83 input_str = ' '.join(input_str.split())
84 input_str = b' '.join(input_str.split())
84 return input_str
85 return input_str
85
86
86 for i in range(len(fromlines)):
87 for i in range(len(fromlines)):
@@ -100,7 +101,7 b' def filter_ignorable_lines(hunks, fromli'
100 else:
101 else:
101 prev = (tag, i1, i2, j1, j2)
102 prev = (tag, i1, i2, j1, j2)
102 else:
103 else:
103 if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
104 if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]):
104 ignored_lines = True
105 ignored_lines = True
105 if prev:
106 if prev:
106 prev = 'equal', prev[1], i2, prev[3], j2
107 prev = 'equal', prev[1], i2, prev[3], j2
@@ -124,10 +125,11 b' def filter_ignorable_lines(hunks, fromli'
124 nn = n + n
125 nn = n + n
125
126
126 group = []
127 group = []
128
127 def all_equal():
129 def all_equal():
128 all(op[0] == 'equal' for op in group)
130 all(op[0] == 'equal' for op in group)
129 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
131 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
130 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
132 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
131 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
133 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
132 elif tag == 'equal' and i2 - i1 > nn:
134 elif tag == 'equal' and i2 - i1 > nn:
133 group.append((tag, i1, min(i2, i1 + n), j1,
135 group.append((tag, i1, min(i2, i1 + n), j1,
@@ -139,7 +141,7 b' def filter_ignorable_lines(hunks, fromli'
139 group.append((tag, i1, i2, j1, j2))
141 group.append((tag, i1, i2, j1, j2))
140
142
141 if group and not (len(group) == 1 and group[0][0] == 'equal'):
143 if group and not (len(group) == 1 and group[0][0] == 'equal'):
142 if group[-1][0] == 'equal': # Fixup trailing unchanged block
144 if group[-1][0] == 'equal': # Fixup trailing unchanged block
143 tag, i1, i2, j1, j2 = group[-1]
145 tag, i1, i2, j1, j2 = group[-1]
144 group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
146 group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
145 if not all_equal():
147 if not all_equal():
@@ -149,11 +151,12 b' def filter_ignorable_lines(hunks, fromli'
149 yield hunk
151 yield hunk
150
152
151
153
152 NO_NEWLINE_AT_END = '\\ No newline at end of file'
154 NO_NEWLINE_AT_END = b'\\ No newline at end of file'
155 LINE_TERM = b'\n'
153
156
154
157
155 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
158 def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False,
156 ignore_case=0, ignore_space_changes=0, lineterm='\n'):
159 ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes:
157 """
160 """
158 Generator producing lines corresponding to a textual diff.
161 Generator producing lines corresponding to a textual diff.
159
162
@@ -162,10 +165,16 b' def unified_diff(fromlines, tolines, con'
162 # TODO: johbo: Check if this can be nicely integrated into the matching
165 # TODO: johbo: Check if this can be nicely integrated into the matching
163
166
164 if ignore_space_changes:
167 if ignore_space_changes:
165 fromlines = [l.strip() for l in fromlines]
168 from_lines = [l.strip() for l in from_lines]
166 tolines = [l.strip() for l in tolines]
169 to_lines = [l.strip() for l in to_lines]
167
170
168 for group in get_filtered_hunks(fromlines, tolines, context,
171 def _hunk_range(start, length) -> bytes:
172 if length != 1:
173 return b'%d,%d' % (start, length)
174 else:
175 return b'%d' % (start,)
176
177 for group in get_filtered_hunks(from_lines, to_lines, context,
169 ignore_blank_lines, ignore_case,
178 ignore_blank_lines, ignore_case,
170 ignore_space_changes):
179 ignore_space_changes):
171 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
180 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
@@ -173,37 +182,30 b' def unified_diff(fromlines, tolines, con'
173 i1, i2 = -1, -1 # support for Add changes
182 i1, i2 = -1, -1 # support for Add changes
174 if j1 == 0 and j2 == 0:
183 if j1 == 0 and j2 == 0:
175 j1, j2 = -1, -1 # support for Delete changes
184 j1, j2 = -1, -1 # support for Delete changes
176 yield '@@ -{} +{} @@{}'.format(
185 yield b'@@ -%b +%b @@%b' % (
177 _hunk_range(i1 + 1, i2 - i1),
186 _hunk_range(i1 + 1, i2 - i1),
178 _hunk_range(j1 + 1, j2 - j1),
187 _hunk_range(j1 + 1, j2 - j1),
179 lineterm)
188 lineterm)
180 for tag, i1, i2, j1, j2 in group:
189 for tag, i1, i2, j1, j2 in group:
181 if tag == 'equal':
190 if tag == 'equal':
182 for line in fromlines[i1:i2]:
191 for line in from_lines[i1:i2]:
183 if not line.endswith(lineterm):
192 if not line.endswith(lineterm):
184 yield ' ' + line + lineterm
193 yield b' ' + line + lineterm
185 yield NO_NEWLINE_AT_END + lineterm
194 yield NO_NEWLINE_AT_END + lineterm
186 else:
195 else:
187 yield ' ' + line
196 yield b' ' + line
188 else:
197 else:
189 if tag in ('replace', 'delete'):
198 if tag in ('replace', 'delete'):
190 for line in fromlines[i1:i2]:
199 for line in from_lines[i1:i2]:
191 if not line.endswith(lineterm):
200 if not line.endswith(lineterm):
192 yield '-' + line + lineterm
201 yield b'-' + line + lineterm
193 yield NO_NEWLINE_AT_END + lineterm
202 yield NO_NEWLINE_AT_END + lineterm
194 else:
203 else:
195 yield '-' + line
204 yield b'-' + line
196 if tag in ('replace', 'insert'):
205 if tag in ('replace', 'insert'):
197 for line in tolines[j1:j2]:
206 for line in to_lines[j1:j2]:
198 if not line.endswith(lineterm):
207 if not line.endswith(lineterm):
199 yield '+' + line + lineterm
208 yield b'+' + line + lineterm
200 yield NO_NEWLINE_AT_END + lineterm
209 yield NO_NEWLINE_AT_END + lineterm
201 else:
210 else:
202 yield '+' + line
211 yield b'+' + line
203
204
205 def _hunk_range(start, length):
206 if length != 1:
207 return '%d,%d' % (start, length)
208 else:
209 return '%d' % (start, )
@@ -69,11 +69,11 b' def test_svn_libraries_can_be_imported()'
69
69
70
70
71 @pytest.mark.parametrize('example_url, parts', [
71 @pytest.mark.parametrize('example_url, parts', [
72 ('http://server.com', (None, None, 'http://server.com')),
72 ('http://server.com', ('', '', 'http://server.com')),
73 ('http://user@server.com', ('user', None, 'http://user@server.com')),
73 ('http://user@server.com', ('user', '', 'http://user@server.com')),
74 ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')),
74 ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')),
75 ('<script>', (None, None, '<script>')),
75 ('<script>', ('', '', '<script>')),
76 ('http://', (None, None, 'http://')),
76 ('http://', ('', '', 'http://')),
77 ])
77 ])
78 def test_username_password_extraction_from_url(example_url, parts):
78 def test_username_password_extraction_from_url(example_url, parts):
79 from vcsserver.remote import svn
79 from vcsserver.remote import svn
@@ -99,5 +99,5 b' def test_check_url(call_url):'
99
99
100 remote = svn.SvnRemote(factory)
100 remote = svn.SvnRemote(factory)
101 remote.is_path_valid_repository = lambda wire, path: True
101 remote.is_path_valid_repository = lambda wire, path: True
102 assert remote.check_url(call_url)
102 assert remote.check_url(call_url, {'dummy': 'config'})
103
103
@@ -14,10 +14,12 b''
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17 import base64
18 import time
18 import time
19 import logging
19 import logging
20
20
21 import msgpack
22
21 import vcsserver
23 import vcsserver
22 from vcsserver.str_utils import safe_str, ascii_str
24 from vcsserver.str_utils import safe_str, ascii_str
23
25
@@ -33,8 +35,9 b' def get_user_agent(environ):'
33 return environ.get('HTTP_USER_AGENT')
35 return environ.get('HTTP_USER_AGENT')
34
36
35
37
36 def get_call_context(registry) -> dict:
38 def get_call_context(request) -> dict:
37 cc = {}
39 cc = {}
40 registry = request.registry
38 if hasattr(registry, 'vcs_call_context'):
41 if hasattr(registry, 'vcs_call_context'):
39 cc.update({
42 cc.update({
40 'X-RC-Method': registry.vcs_call_context.get('method'),
43 'X-RC-Method': registry.vcs_call_context.get('method'),
@@ -44,6 +47,14 b' def get_call_context(registry) -> dict:'
44 return cc
47 return cc
45
48
46
49
50 def get_headers_call_context(environ, strict=True):
51 if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
52 packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
53 return msgpack.unpackb(packed_cc)
54 elif strict:
55 raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
56
57
47 class RequestWrapperTween(object):
58 class RequestWrapperTween(object):
48 def __init__(self, handler, registry):
59 def __init__(self, handler, registry):
49 self.handler = handler
60 self.handler = handler
@@ -60,7 +71,7 b' class RequestWrapperTween(object):'
60 response = self.handler(request)
71 response = self.handler(request)
61 finally:
72 finally:
62 ua = get_user_agent(request.environ)
73 ua = get_user_agent(request.environ)
63 call_context = get_call_context(request.registry)
74 call_context = get_call_context(request)
64 vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
75 vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
65 repo_name = call_context.get('X-RC-Repo-Name', '')
76 repo_name = call_context.get('X-RC-Repo-Name', '')
66
77
General Comments 0
You need to be logged in to leave comments. Login now