##// END OF EJS Templates
archive: implemented efficient way to perform archive for each repository.
milka -
r894:5a847e1a default
parent child Browse files
Show More
@@ -14,12 +14,15 b''
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17 import os
18 18 import sys
19 19 import traceback
20 20 import logging
21 21 import urlparse
22 22
23 from vcsserver import exceptions
24 from vcsserver.exceptions import NoContentException
25 from vcsserver.hgcompat import (archival)
23 26 from vcsserver.lib.rc_cache import region_meta
24 27 log = logging.getLogger(__name__)
25 28
@@ -74,3 +77,54 b' def raise_from_original(new_type):'
74 77 raise new_exc, None, exc_traceback
75 78 finally:
76 79 del exc_traceback
80
81
82 class ArchiveNode(object):
83 def __init__(self, path, mode, is_link, raw_bytes):
84 self.path = path
85 self.mode = mode
86 self.is_link = is_link
87 self.raw_bytes = raw_bytes
88
89
90 def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
91 archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
92 """
93 walker should be a file walker, for example:
94 def walker():
95 for file_info in files:
96 yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
97 """
98 extra_metadata = extra_metadata or {}
99
100 if kind == "tgz":
101 archiver = archival.tarit(archive_dest_path, mtime, "gz")
102 elif kind == "tbz2":
103 archiver = archival.tarit(archive_dest_path, mtime, "bz2")
104 elif kind == 'zip':
105 archiver = archival.zipit(archive_dest_path, mtime)
106 else:
107 raise exceptions.ArchiveException()(
108 'Remote does not support: "%s" archive type.' % kind)
109
110 for f in walker(commit_id, archive_at_path):
111 f_path = os.path.join(archive_dir_name, f.path.lstrip('/'))
112 try:
113 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
114 except NoContentException:
115 # NOTE(marcink): this is a special case for SVN so we can create "empty"
116 # directories which arent supported by archiver
117 archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '')
118
119 if write_metadata:
120 metadata = dict([
121 ('commit_id', commit_id),
122 ('mtime', mtime),
123 ])
124 metadata.update(extra_metadata)
125
126 meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()]
127 f_path = os.path.join(archive_dir_name, '.archival.txt')
128 archiver.addfile(f_path, 0o644, False, '\n'.join(meta))
129
130 return archiver.done()
@@ -119,3 +119,7 b' class HTTPRepoBranchProtected(HTTPForbid'
119 119
120 120 class RefNotFoundException(KeyError):
121 121 pass
122
123
124 class NoContentException(ValueError):
125 pass
@@ -29,6 +29,7 b' from functools import wraps'
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 from pygit2 import index as LibGit2Index
32 33 from dulwich import index, objects
33 34 from dulwich.client import HttpGitClient, LocalGitClient
34 35 from dulwich.errors import (
@@ -40,7 +41,7 b' from dulwich.server import update_server'
40 41
41 42 from vcsserver import exceptions, settings, subprocessio
42 43 from vcsserver.utils import safe_str, safe_int, safe_unicode
43 from vcsserver.base import RepoFactory, obfuscate_qs
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
44 45 from vcsserver.hgcompat import (
45 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 47 from vcsserver.git_lfs.lib import LFSOidStore
@@ -1190,3 +1191,36 b' class GitRemote(RemoteBase):'
1190 1191 'pre_version': get_git_pre_hook_version(path, bare),
1191 1192 'post_version': get_git_post_hook_version(path, bare),
1192 1193 }
1194
1195 @reraise_safe_exceptions
1196 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1197 archive_dir_name, commit_id):
1198
1199 def file_walker(_commit_id, path):
1200 repo_init = self._factory.repo_libgit2(wire)
1201
1202 with repo_init as repo:
1203 commit = repo[commit_id]
1204
1205 if path in ['', '/']:
1206 tree = commit.tree
1207 else:
1208 tree = commit.tree[path.rstrip('/')]
1209 tree_id = tree.id.hex
1210 try:
1211 tree = repo[tree_id]
1212 except KeyError:
1213 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1214
1215 index = LibGit2Index.Index()
1216 index.read_tree(tree)
1217 file_iter = index
1218
1219 for fn in file_iter:
1220 file_path = fn.path
1221 mode = fn.mode
1222 is_link = stat.S_ISLNK(mode)
1223 yield ArchiveNode(file_path, mode, is_link, repo[fn.id].read_raw)
1224
1225 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1226 archive_dir_name, commit_id)
@@ -14,9 +14,10 b''
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17 import functools
18 18 import io
19 19 import logging
20 import os
20 21 import stat
21 22 import urllib
22 23 import urllib2
@@ -31,13 +32,14 b' from mercurial import repair'
31 32
32 33 import vcsserver
33 34 from vcsserver import exceptions
34 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode
35 36 from vcsserver.hgcompat import (
36 37 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
37 38 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
38 39 makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
39 40 patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
40 RepoLookupError, InterventionRequired, RequirementError)
41 RepoLookupError, InterventionRequired, RequirementError,
42 alwaysmatcher, patternmatcher, hgutil)
41 43 from vcsserver.vcs_base import RemoteBase
42 44
43 45 log = logging.getLogger(__name__)
@@ -205,22 +207,6 b' class HgRemote(RemoteBase):'
205 207 return False
206 208
207 209 @reraise_safe_exceptions
208 def archive_repo(self, archive_path, mtime, file_info, kind):
209 if kind == "tgz":
210 archiver = archival.tarit(archive_path, mtime, "gz")
211 elif kind == "tbz2":
212 archiver = archival.tarit(archive_path, mtime, "bz2")
213 elif kind == 'zip':
214 archiver = archival.zipit(archive_path, mtime)
215 else:
216 raise exceptions.ArchiveException()(
217 'Remote does not support: "%s".' % kind)
218
219 for f_path, f_mode, f_is_link, f_content in file_info:
220 archiver.addfile(f_path, f_mode, f_is_link, f_content)
221 archiver.done()
222
223 @reraise_safe_exceptions
224 210 def bookmarks(self, wire):
225 211 cache_on, context_uid, repo_id = self._cache_on(wire)
226 212 @self.region.conditional_cache_on_arguments(condition=cache_on)
@@ -1007,3 +993,29 b' class HgRemote(RemoteBase):'
1007 993 'pre_version': vcsserver.__version__,
1008 994 'post_version': vcsserver.__version__,
1009 995 }
996
997 @reraise_safe_exceptions
998 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
999 archive_dir_name, commit_id):
1000
1001 def file_walker(_commit_id, path):
1002 repo = self._factory.repo(wire)
1003 ctx = repo[_commit_id]
1004 is_root = path in ['', '/']
1005 if is_root:
1006 matcher = alwaysmatcher(badfn=None)
1007 else:
1008 matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
1009 file_iter = ctx.manifest().walk(matcher)
1010
1011 for fn in file_iter:
1012 file_path = fn
1013 flags = ctx.flags(fn)
1014 mode = b'x' in flags and 0o755 or 0o644
1015 is_link = b'l' in flags
1016
1017 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1018
1019 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1020 archive_dir_name, commit_id)
1021
@@ -38,7 +38,7 b' from mercurial import merge as hg_merge'
38 38 from mercurial import subrepo
39 39 from mercurial import subrepoutil
40 40 from mercurial import tags as hg_tag
41
41 from mercurial import util as hgutil
42 42 from mercurial.commands import clone, nullid, pull
43 43 from mercurial.context import memctx, memfilectx
44 44 from mercurial.error import (
@@ -46,7 +46,7 b' from mercurial.error import ('
46 46 RequirementError, ProgrammingError)
47 47 from mercurial.hgweb import hgweb_mod
48 48 from mercurial.localrepo import instance
49 from mercurial.match import match
49 from mercurial.match import match, alwaysmatcher, patternmatcher
50 50 from mercurial.mdiff import diffopts
51 51 from mercurial.node import bin, hex
52 52 from mercurial.encoding import tolocal
@@ -375,7 +375,7 b' class HTTPApplication(object):'
375 375 # NOTE(marcink): trading complexity for slight performance
376 376 if log.isEnabledFor(logging.DEBUG):
377 377 no_args_methods = [
378 'archive_repo'
378
379 379 ]
380 380 if method in no_args_methods:
381 381 call_args = ''
@@ -19,6 +19,7 b' from __future__ import absolute_import'
19 19
20 20 import os
21 21 import subprocess
22 import time
22 23 from urllib2 import URLError
23 24 import urlparse
24 25 import logging
@@ -35,7 +36,8 b' import svn.fs'
35 36 import svn.repos
36 37
37 38 from vcsserver import svn_diff, exceptions, subprocessio, settings
38 from vcsserver.base import RepoFactory, raise_from_original
39 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo
40 from vcsserver.exceptions import NoContentException
39 41 from vcsserver.vcs_base import RemoteBase
40 42
41 43 log = logging.getLogger(__name__)
@@ -528,6 +530,70 b' class SvnRemote(RemoteBase):'
528 530 'post_version': get_svn_post_hook_version(repo_path),
529 531 }
530 532
533 @reraise_safe_exceptions
534 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
535 archive_dir_name, commit_id):
536
537 def walk_tree(root, root_dir, _commit_id):
538 """
539 Special recursive svn repo walker
540 """
541
542 filemode_default = 0o100644
543 filemode_executable = 0o100755
544
545 file_iter = svn.fs.dir_entries(root, root_dir)
546 for f_name in file_iter:
547 f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
548
549 if f_type == 'dir':
550 # return only DIR, and then all entries in that dir
551 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
552 new_root = os.path.join(root_dir, f_name)
553 for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id):
554 yield _f_name, _f_data, _f_type
555 else:
556 f_path = os.path.join(root_dir, f_name).rstrip('/')
557 prop_list = svn.fs.node_proplist(root, f_path)
558
559 f_mode = filemode_default
560 if prop_list.get('svn:executable'):
561 f_mode = filemode_executable
562
563 f_is_link = False
564 if prop_list.get('svn:special'):
565 f_is_link = True
566
567 data = {
568 'is_link': f_is_link,
569 'mode': f_mode,
570 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
571 }
572
573 yield f_path, data, f_type
574
575 def file_walker(_commit_id, path):
576 repo = self._factory.repo(wire)
577 root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
578
579 def no_content():
580 raise NoContentException()
581
582 for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
583 file_path = f_name
584
585 if f_type == 'dir':
586 mode = f_data['mode']
587 yield ArchiveNode(file_path, mode, False, no_content)
588 else:
589 mode = f_data['mode']
590 is_link = f_data['is_link']
591 data_stream = f_data['content_stream']
592 yield ArchiveNode(file_path, mode, is_link, data_stream)
593
594 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
595 archive_dir_name, commit_id)
596
531 597
532 598 class SvnDiffer(object):
533 599 """
@@ -685,8 +751,7 b' class SvnDiffer(object):'
685 751 if node_kind not in (
686 752 svn.core.svn_node_file, svn.core.svn_node_symlink):
687 753 return []
688 content = svn.core.Stream(
689 svn.fs.file_contents(fs_root, node_path)).read()
754 content = svn.core.Stream(svn.fs.file_contents(fs_root, node_path)).read()
690 755 return content.splitlines(True)
691 756
692 757
General Comments 0
You need to be logged in to leave comments. Login now