##// END OF EJS Templates
git: make use of pygit2 more...
super-admin -
r1109:7aab64a8 python3
parent child Browse files
Show More
@@ -1,1375 +1,1382 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 #
3 #
4 # This program is free software; you can redistribute it and/or modify
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
18 import collections
18 import collections
19 import logging
19 import logging
20 import os
20 import os
21 import posixpath as vcspath
21 import posixpath as vcspath
22 import re
22 import re
23 import stat
23 import stat
24 import traceback
24 import traceback
25 import urllib.request
25 import urllib.request
26 import urllib.parse
26 import urllib.parse
27 import urllib.error
27 import urllib.error
28 from functools import wraps
28 from functools import wraps
29
29
30 import more_itertools
30 import more_itertools
31 import pygit2
31 import pygit2
32 from pygit2 import Repository as LibGit2Repo
32 from pygit2 import Repository as LibGit2Repo
33 from pygit2 import index as LibGit2Index
33 from pygit2 import index as LibGit2Index
34 from dulwich import index, objects
34 from dulwich import index, objects
35 from dulwich.client import HttpGitClient, LocalGitClient
35 from dulwich.client import HttpGitClient, LocalGitClient
36 from dulwich.errors import (
36 from dulwich.errors import (
37 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 NotGitRepository, ChecksumMismatch, WrongObjectException,
38 MissingCommitError, ObjectMissing, HangupException,
38 MissingCommitError, ObjectMissing, HangupException,
39 UnexpectedCommandError)
39 UnexpectedCommandError)
40 from dulwich.repo import Repo as DulwichRepo
40 from dulwich.repo import Repo as DulwichRepo
41 from dulwich.server import update_server_info
41 from dulwich.server import update_server_info
42
42
43 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver import exceptions, settings, subprocessio
44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
46 from vcsserver.hgcompat import (
46 from vcsserver.hgcompat import (
47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 from vcsserver.git_lfs.lib import LFSOidStore
48 from vcsserver.git_lfs.lib import LFSOidStore
49 from vcsserver.vcs_base import RemoteBase
49 from vcsserver.vcs_base import RemoteBase
50
50
51 DIR_STAT = stat.S_IFDIR
51 DIR_STAT = stat.S_IFDIR
52 FILE_MODE = stat.S_IFMT
52 FILE_MODE = stat.S_IFMT
53 GIT_LINK = objects.S_IFGITLINK
53 GIT_LINK = objects.S_IFGITLINK
54 PEELED_REF_MARKER = b'^{}'
54 PEELED_REF_MARKER = b'^{}'
55 HEAD_MARKER = b'HEAD'
55 HEAD_MARKER = b'HEAD'
56
56
57 log = logging.getLogger(__name__)
57 log = logging.getLogger(__name__)
58
58
59
59
60 def reraise_safe_exceptions(func):
60 def reraise_safe_exceptions(func):
61 """Converts Dulwich exceptions to something neutral."""
61 """Converts Dulwich exceptions to something neutral."""
62
62
63 @wraps(func)
63 @wraps(func)
64 def wrapper(*args, **kwargs):
64 def wrapper(*args, **kwargs):
65 try:
65 try:
66 return func(*args, **kwargs)
66 return func(*args, **kwargs)
67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 exc = exceptions.LookupException(org_exc=e)
68 exc = exceptions.LookupException(org_exc=e)
69 raise exc(safe_str(e))
69 raise exc(safe_str(e))
70 except (HangupException, UnexpectedCommandError) as e:
70 except (HangupException, UnexpectedCommandError) as e:
71 exc = exceptions.VcsException(org_exc=e)
71 exc = exceptions.VcsException(org_exc=e)
72 raise exc(safe_str(e))
72 raise exc(safe_str(e))
73 except Exception:
73 except Exception:
74 # NOTE(marcink): because of how dulwich handles some exceptions
74 # NOTE(marcink): because of how dulwich handles some exceptions
75 # (KeyError on empty repos), we cannot track this and catch all
75 # (KeyError on empty repos), we cannot track this and catch all
76 # exceptions, it's an exceptions from other handlers
76 # exceptions, it's an exceptions from other handlers
77 #if not hasattr(e, '_vcs_kind'):
77 #if not hasattr(e, '_vcs_kind'):
78 #log.exception("Unhandled exception in git remote call")
78 #log.exception("Unhandled exception in git remote call")
79 #raise_from_original(exceptions.UnhandledException)
79 #raise_from_original(exceptions.UnhandledException)
80 raise
80 raise
81 return wrapper
81 return wrapper
82
82
83
83
84 class Repo(DulwichRepo):
84 class Repo(DulwichRepo):
85 """
85 """
86 A wrapper for dulwich Repo class.
86 A wrapper for dulwich Repo class.
87
87
88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 "Too many open files" error. We need to close all opened file descriptors
89 "Too many open files" error. We need to close all opened file descriptors
90 once the repo object is destroyed.
90 once the repo object is destroyed.
91 """
91 """
92 def __del__(self):
92 def __del__(self):
93 if hasattr(self, 'object_store'):
93 if hasattr(self, 'object_store'):
94 self.close()
94 self.close()
95
95
96
96
97 class Repository(LibGit2Repo):
97 class Repository(LibGit2Repo):
98
98
99 def __enter__(self):
99 def __enter__(self):
100 return self
100 return self
101
101
102 def __exit__(self, exc_type, exc_val, exc_tb):
102 def __exit__(self, exc_type, exc_val, exc_tb):
103 self.free()
103 self.free()
104
104
105
105
106 class GitFactory(RepoFactory):
106 class GitFactory(RepoFactory):
107 repo_type = 'git'
107 repo_type = 'git'
108
108
109 def _create_repo(self, wire, create, use_libgit2=False):
109 def _create_repo(self, wire, create, use_libgit2=False):
110 if use_libgit2:
110 if use_libgit2:
111 repo = Repository(safe_bytes(wire['path']))
111 repo = Repository(safe_bytes(wire['path']))
112 else:
112 else:
113 # dulwich mode
113 # dulwich mode
114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 repo = Repo(repo_path)
115 repo = Repo(repo_path)
116
116
117 log.debug('repository created: got GIT object: %s', repo)
117 log.debug('repository created: got GIT object: %s', repo)
118 return repo
118 return repo
119
119
120 def repo(self, wire, create=False, use_libgit2=False):
120 def repo(self, wire, create=False, use_libgit2=False):
121 """
121 """
122 Get a repository instance for the given path.
122 Get a repository instance for the given path.
123 """
123 """
124 return self._create_repo(wire, create, use_libgit2)
124 return self._create_repo(wire, create, use_libgit2)
125
125
126 def repo_libgit2(self, wire):
126 def repo_libgit2(self, wire):
127 return self.repo(wire, use_libgit2=True)
127 return self.repo(wire, use_libgit2=True)
128
128
129
129
130 class GitRemote(RemoteBase):
130 class GitRemote(RemoteBase):
131
131
132 def __init__(self, factory):
132 def __init__(self, factory):
133 self._factory = factory
133 self._factory = factory
134 self._bulk_methods = {
134 self._bulk_methods = {
135 "date": self.date,
135 "date": self.date,
136 "author": self.author,
136 "author": self.author,
137 "branch": self.branch,
137 "branch": self.branch,
138 "message": self.message,
138 "message": self.message,
139 "parents": self.parents,
139 "parents": self.parents,
140 "_commit": self.revision,
140 "_commit": self.revision,
141 }
141 }
142
142
143 def _wire_to_config(self, wire):
143 def _wire_to_config(self, wire):
144 if 'config' in wire:
144 if 'config' in wire:
145 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
145 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
146 return {}
146 return {}
147
147
148 def _remote_conf(self, config):
148 def _remote_conf(self, config):
149 params = [
149 params = [
150 '-c', 'core.askpass=""',
150 '-c', 'core.askpass=""',
151 ]
151 ]
152 ssl_cert_dir = config.get('vcs_ssl_dir')
152 ssl_cert_dir = config.get('vcs_ssl_dir')
153 if ssl_cert_dir:
153 if ssl_cert_dir:
154 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
154 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
155 return params
155 return params
156
156
157 @reraise_safe_exceptions
157 @reraise_safe_exceptions
158 def discover_git_version(self):
158 def discover_git_version(self):
159 stdout, _ = self.run_git_command(
159 stdout, _ = self.run_git_command(
160 {}, ['--version'], _bare=True, _safe=True)
160 {}, ['--version'], _bare=True, _safe=True)
161 prefix = b'git version'
161 prefix = b'git version'
162 if stdout.startswith(prefix):
162 if stdout.startswith(prefix):
163 stdout = stdout[len(prefix):]
163 stdout = stdout[len(prefix):]
164 return safe_str(stdout.strip())
164 return safe_str(stdout.strip())
165
165
166 @reraise_safe_exceptions
166 @reraise_safe_exceptions
167 def is_empty(self, wire):
167 def is_empty(self, wire):
168 repo_init = self._factory.repo_libgit2(wire)
168 repo_init = self._factory.repo_libgit2(wire)
169 with repo_init as repo:
169 with repo_init as repo:
170
170
171 try:
171 try:
172 has_head = repo.head.name
172 has_head = repo.head.name
173 if has_head:
173 if has_head:
174 return False
174 return False
175
175
176 # NOTE(marcink): check again using more expensive method
176 # NOTE(marcink): check again using more expensive method
177 return repo.is_empty
177 return repo.is_empty
178 except Exception:
178 except Exception:
179 pass
179 pass
180
180
181 return True
181 return True
182
182
183 @reraise_safe_exceptions
183 @reraise_safe_exceptions
184 def assert_correct_path(self, wire):
184 def assert_correct_path(self, wire):
185 cache_on, context_uid, repo_id = self._cache_on(wire)
185 cache_on, context_uid, repo_id = self._cache_on(wire)
186 region = self._region(wire)
186 region = self._region(wire)
187
187
188 @region.conditional_cache_on_arguments(condition=cache_on)
188 @region.conditional_cache_on_arguments(condition=cache_on)
189 def _assert_correct_path(_context_uid, _repo_id):
189 def _assert_correct_path(_context_uid, _repo_id, fast_check):
190 try:
190 if fast_check:
191 repo_init = self._factory.repo_libgit2(wire)
191 path = safe_str(wire['path'])
192 with repo_init as repo:
192 if pygit2.discover_repository(path):
193 pass
193 return True
194 except pygit2.GitError:
195 path = wire.get('path')
196 tb = traceback.format_exc()
197 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
198 return False
194 return False
195 else:
196 try:
197 repo_init = self._factory.repo_libgit2(wire)
198 with repo_init:
199 pass
200 except pygit2.GitError:
201 path = wire.get('path')
202 tb = traceback.format_exc()
203 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
204 return False
205 return True
199
206
200 return True
207 return _assert_correct_path(context_uid, repo_id, True)
201 return _assert_correct_path(context_uid, repo_id)
202
208
203 @reraise_safe_exceptions
209 @reraise_safe_exceptions
204 def bare(self, wire):
210 def bare(self, wire):
205 repo_init = self._factory.repo_libgit2(wire)
211 repo_init = self._factory.repo_libgit2(wire)
206 with repo_init as repo:
212 with repo_init as repo:
207 return repo.is_bare
213 return repo.is_bare
208
214
209 @reraise_safe_exceptions
215 @reraise_safe_exceptions
210 def blob_as_pretty_string(self, wire, sha):
216 def blob_as_pretty_string(self, wire, sha):
211 repo_init = self._factory.repo_libgit2(wire)
217 repo_init = self._factory.repo_libgit2(wire)
212 with repo_init as repo:
218 with repo_init as repo:
213 blob_obj = repo[sha]
219 blob_obj = repo[sha]
214 return BinaryEnvelope(blob_obj.data)
220 return BinaryEnvelope(blob_obj.data)
215
221
216 @reraise_safe_exceptions
222 @reraise_safe_exceptions
217 def blob_raw_length(self, wire, sha):
223 def blob_raw_length(self, wire, sha):
218 cache_on, context_uid, repo_id = self._cache_on(wire)
224 cache_on, context_uid, repo_id = self._cache_on(wire)
219 region = self._region(wire)
225 region = self._region(wire)
220
226
221 @region.conditional_cache_on_arguments(condition=cache_on)
227 @region.conditional_cache_on_arguments(condition=cache_on)
222 def _blob_raw_length(_repo_id, _sha):
228 def _blob_raw_length(_repo_id, _sha):
223
229
224 repo_init = self._factory.repo_libgit2(wire)
230 repo_init = self._factory.repo_libgit2(wire)
225 with repo_init as repo:
231 with repo_init as repo:
226 blob = repo[sha]
232 blob = repo[sha]
227 return blob.size
233 return blob.size
228
234
229 return _blob_raw_length(repo_id, sha)
235 return _blob_raw_length(repo_id, sha)
230
236
231 def _parse_lfs_pointer(self, raw_content):
237 def _parse_lfs_pointer(self, raw_content):
232 spec_string = b'version https://git-lfs.github.com/spec'
238 spec_string = b'version https://git-lfs.github.com/spec'
233 if raw_content and raw_content.startswith(spec_string):
239 if raw_content and raw_content.startswith(spec_string):
234
240
235 pattern = re.compile(rb"""
241 pattern = re.compile(rb"""
236 (?:\n)?
242 (?:\n)?
237 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
243 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
238 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
244 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
239 ^size[ ](?P<oid_size>[0-9]+)\n
245 ^size[ ](?P<oid_size>[0-9]+)\n
240 (?:\n)?
246 (?:\n)?
241 """, re.VERBOSE | re.MULTILINE)
247 """, re.VERBOSE | re.MULTILINE)
242 match = pattern.match(raw_content)
248 match = pattern.match(raw_content)
243 if match:
249 if match:
244 return match.groupdict()
250 return match.groupdict()
245
251
246 return {}
252 return {}
247
253
248 @reraise_safe_exceptions
254 @reraise_safe_exceptions
249 def is_large_file(self, wire, commit_id):
255 def is_large_file(self, wire, commit_id):
250 cache_on, context_uid, repo_id = self._cache_on(wire)
256 cache_on, context_uid, repo_id = self._cache_on(wire)
251 region = self._region(wire)
257 region = self._region(wire)
252
258
253 @region.conditional_cache_on_arguments(condition=cache_on)
259 @region.conditional_cache_on_arguments(condition=cache_on)
254 def _is_large_file(_repo_id, _sha):
260 def _is_large_file(_repo_id, _sha):
255 repo_init = self._factory.repo_libgit2(wire)
261 repo_init = self._factory.repo_libgit2(wire)
256 with repo_init as repo:
262 with repo_init as repo:
257 blob = repo[commit_id]
263 blob = repo[commit_id]
258 if blob.is_binary:
264 if blob.is_binary:
259 return {}
265 return {}
260
266
261 return self._parse_lfs_pointer(blob.data)
267 return self._parse_lfs_pointer(blob.data)
262
268
263 return _is_large_file(repo_id, commit_id)
269 return _is_large_file(repo_id, commit_id)
264
270
265 @reraise_safe_exceptions
271 @reraise_safe_exceptions
266 def is_binary(self, wire, tree_id):
272 def is_binary(self, wire, tree_id):
267 cache_on, context_uid, repo_id = self._cache_on(wire)
273 cache_on, context_uid, repo_id = self._cache_on(wire)
268 region = self._region(wire)
274 region = self._region(wire)
269
275
270 @region.conditional_cache_on_arguments(condition=cache_on)
276 @region.conditional_cache_on_arguments(condition=cache_on)
271 def _is_binary(_repo_id, _tree_id):
277 def _is_binary(_repo_id, _tree_id):
272 repo_init = self._factory.repo_libgit2(wire)
278 repo_init = self._factory.repo_libgit2(wire)
273 with repo_init as repo:
279 with repo_init as repo:
274 blob_obj = repo[tree_id]
280 blob_obj = repo[tree_id]
275 return blob_obj.is_binary
281 return blob_obj.is_binary
276
282
277 return _is_binary(repo_id, tree_id)
283 return _is_binary(repo_id, tree_id)
278
284
279 @reraise_safe_exceptions
285 @reraise_safe_exceptions
280 def md5_hash(self, wire, tree_id):
286 def md5_hash(self, wire, tree_id):
281 cache_on, context_uid, repo_id = self._cache_on(wire)
287 cache_on, context_uid, repo_id = self._cache_on(wire)
282 region = self._region(wire)
288 region = self._region(wire)
283
289
284 @region.conditional_cache_on_arguments(condition=cache_on)
290 @region.conditional_cache_on_arguments(condition=cache_on)
285 def _md5_hash(_repo_id, _tree_id):
291 def _md5_hash(_repo_id, _tree_id):
286 return ''
292 return ''
287
293
288 return _md5_hash(repo_id, tree_id)
294 return _md5_hash(repo_id, tree_id)
289
295
290 @reraise_safe_exceptions
296 @reraise_safe_exceptions
291 def in_largefiles_store(self, wire, oid):
297 def in_largefiles_store(self, wire, oid):
292 conf = self._wire_to_config(wire)
298 conf = self._wire_to_config(wire)
293 repo_init = self._factory.repo_libgit2(wire)
299 repo_init = self._factory.repo_libgit2(wire)
294 with repo_init as repo:
300 with repo_init as repo:
295 repo_name = repo.path
301 repo_name = repo.path
296
302
297 store_location = conf.get('vcs_git_lfs_store_location')
303 store_location = conf.get('vcs_git_lfs_store_location')
298 if store_location:
304 if store_location:
299
305
300 store = LFSOidStore(
306 store = LFSOidStore(
301 oid=oid, repo=repo_name, store_location=store_location)
307 oid=oid, repo=repo_name, store_location=store_location)
302 return store.has_oid()
308 return store.has_oid()
303
309
304 return False
310 return False
305
311
306 @reraise_safe_exceptions
312 @reraise_safe_exceptions
307 def store_path(self, wire, oid):
313 def store_path(self, wire, oid):
308 conf = self._wire_to_config(wire)
314 conf = self._wire_to_config(wire)
309 repo_init = self._factory.repo_libgit2(wire)
315 repo_init = self._factory.repo_libgit2(wire)
310 with repo_init as repo:
316 with repo_init as repo:
311 repo_name = repo.path
317 repo_name = repo.path
312
318
313 store_location = conf.get('vcs_git_lfs_store_location')
319 store_location = conf.get('vcs_git_lfs_store_location')
314 if store_location:
320 if store_location:
315 store = LFSOidStore(
321 store = LFSOidStore(
316 oid=oid, repo=repo_name, store_location=store_location)
322 oid=oid, repo=repo_name, store_location=store_location)
317 return store.oid_path
323 return store.oid_path
318 raise ValueError('Unable to fetch oid with path {}'.format(oid))
324 raise ValueError('Unable to fetch oid with path {}'.format(oid))
319
325
320 @reraise_safe_exceptions
326 @reraise_safe_exceptions
321 def bulk_request(self, wire, rev, pre_load):
327 def bulk_request(self, wire, rev, pre_load):
322 cache_on, context_uid, repo_id = self._cache_on(wire)
328 cache_on, context_uid, repo_id = self._cache_on(wire)
323 region = self._region(wire)
329 region = self._region(wire)
324
330
325 @region.conditional_cache_on_arguments(condition=cache_on)
331 @region.conditional_cache_on_arguments(condition=cache_on)
326 def _bulk_request(_repo_id, _rev, _pre_load):
332 def _bulk_request(_repo_id, _rev, _pre_load):
327 result = {}
333 result = {}
328 for attr in pre_load:
334 for attr in pre_load:
329 try:
335 try:
330 method = self._bulk_methods[attr]
336 method = self._bulk_methods[attr]
331 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
337 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
332 args = [wire, rev]
338 args = [wire, rev]
333 result[attr] = method(*args)
339 result[attr] = method(*args)
334 except KeyError as e:
340 except KeyError as e:
335 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
341 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
336 return result
342 return result
337
343
338 return _bulk_request(repo_id, rev, sorted(pre_load))
344 return _bulk_request(repo_id, rev, sorted(pre_load))
339
345
340 def _build_opener(self, url):
346 def _build_opener(self, url):
341 handlers = []
347 handlers = []
342 url_obj = url_parser(url)
348 url_obj = url_parser(url)
343 _, authinfo = url_obj.authinfo()
349 _, authinfo = url_obj.authinfo()
344
350
345 if authinfo:
351 if authinfo:
346 # create a password manager
352 # create a password manager
347 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
353 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
348 passmgr.add_password(*authinfo)
354 passmgr.add_password(*authinfo)
349
355
350 handlers.extend((httpbasicauthhandler(passmgr),
356 handlers.extend((httpbasicauthhandler(passmgr),
351 httpdigestauthhandler(passmgr)))
357 httpdigestauthhandler(passmgr)))
352
358
353 return urllib.request.build_opener(*handlers)
359 return urllib.request.build_opener(*handlers)
354
360
355 def _type_id_to_name(self, type_id: int):
361 def _type_id_to_name(self, type_id: int):
356 return {
362 return {
357 1: 'commit',
363 1: 'commit',
358 2: 'tree',
364 2: 'tree',
359 3: 'blob',
365 3: 'blob',
360 4: 'tag'
366 4: 'tag'
361 }[type_id]
367 }[type_id]
362
368
363 @reraise_safe_exceptions
369 @reraise_safe_exceptions
364 def check_url(self, url, config):
370 def check_url(self, url, config):
365 url_obj = url_parser(safe_bytes(url))
371 url_obj = url_parser(safe_bytes(url))
366 test_uri, _ = url_obj.authinfo()
372 test_uri, _ = url_obj.authinfo()
367 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
373 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
368 url_obj.query = obfuscate_qs(url_obj.query)
374 url_obj.query = obfuscate_qs(url_obj.query)
369 cleaned_uri = str(url_obj)
375 cleaned_uri = str(url_obj)
370 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
376 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
371
377
372 if not test_uri.endswith('info/refs'):
378 if not test_uri.endswith('info/refs'):
373 test_uri = test_uri.rstrip('/') + '/info/refs'
379 test_uri = test_uri.rstrip('/') + '/info/refs'
374
380
375 o = self._build_opener(url)
381 o = self._build_opener(url)
376 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
382 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
377
383
378 q = {"service": 'git-upload-pack'}
384 q = {"service": 'git-upload-pack'}
379 qs = '?%s' % urllib.parse.urlencode(q)
385 qs = '?%s' % urllib.parse.urlencode(q)
380 cu = "%s%s" % (test_uri, qs)
386 cu = "%s%s" % (test_uri, qs)
381 req = urllib.request.Request(cu, None, {})
387 req = urllib.request.Request(cu, None, {})
382
388
383 try:
389 try:
384 log.debug("Trying to open URL %s", cleaned_uri)
390 log.debug("Trying to open URL %s", cleaned_uri)
385 resp = o.open(req)
391 resp = o.open(req)
386 if resp.code != 200:
392 if resp.code != 200:
387 raise exceptions.URLError()('Return Code is not 200')
393 raise exceptions.URLError()('Return Code is not 200')
388 except Exception as e:
394 except Exception as e:
389 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
395 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
390 # means it cannot be cloned
396 # means it cannot be cloned
391 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
397 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
392
398
393 # now detect if it's proper git repo
399 # now detect if it's proper git repo
394 gitdata = resp.read()
400 gitdata = resp.read()
395 if 'service=git-upload-pack' in gitdata:
401 if 'service=git-upload-pack' in gitdata:
396 pass
402 pass
397 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
403 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
398 # old style git can return some other format !
404 # old style git can return some other format !
399 pass
405 pass
400 else:
406 else:
401 raise exceptions.URLError()(
407 raise exceptions.URLError()(
402 "url [%s] does not look like an git" % (cleaned_uri,))
408 "url [%s] does not look like an git" % (cleaned_uri,))
403
409
404 return True
410 return True
405
411
406 @reraise_safe_exceptions
412 @reraise_safe_exceptions
407 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
413 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
408 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
414 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
409 remote_refs = self.pull(wire, url, apply_refs=False)
415 remote_refs = self.pull(wire, url, apply_refs=False)
410 repo = self._factory.repo(wire)
416 repo = self._factory.repo(wire)
411 if isinstance(valid_refs, list):
417 if isinstance(valid_refs, list):
412 valid_refs = tuple(valid_refs)
418 valid_refs = tuple(valid_refs)
413
419
414 for k in remote_refs:
420 for k in remote_refs:
415 # only parse heads/tags and skip so called deferred tags
421 # only parse heads/tags and skip so called deferred tags
416 if k.startswith(valid_refs) and not k.endswith(deferred):
422 if k.startswith(valid_refs) and not k.endswith(deferred):
417 repo[k] = remote_refs[k]
423 repo[k] = remote_refs[k]
418
424
419 if update_after_clone:
425 if update_after_clone:
420 # we want to checkout HEAD
426 # we want to checkout HEAD
421 repo["HEAD"] = remote_refs["HEAD"]
427 repo["HEAD"] = remote_refs["HEAD"]
422 index.build_index_from_tree(repo.path, repo.index_path(),
428 index.build_index_from_tree(repo.path, repo.index_path(),
423 repo.object_store, repo["HEAD"].tree)
429 repo.object_store, repo["HEAD"].tree)
424
430
425 @reraise_safe_exceptions
431 @reraise_safe_exceptions
426 def branch(self, wire, commit_id):
432 def branch(self, wire, commit_id):
427 cache_on, context_uid, repo_id = self._cache_on(wire)
433 cache_on, context_uid, repo_id = self._cache_on(wire)
428 region = self._region(wire)
434 region = self._region(wire)
429
435
430 @region.conditional_cache_on_arguments(condition=cache_on)
436 @region.conditional_cache_on_arguments(condition=cache_on)
431 def _branch(_context_uid, _repo_id, _commit_id):
437 def _branch(_context_uid, _repo_id, _commit_id):
432 regex = re.compile('^refs/heads')
438 regex = re.compile('^refs/heads')
433
439
434 def filter_with(ref):
440 def filter_with(ref):
435 return regex.match(ref[0]) and ref[1] == _commit_id
441 return regex.match(ref[0]) and ref[1] == _commit_id
436
442
437 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
443 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
438 return [x[0].split('refs/heads/')[-1] for x in branches]
444 return [x[0].split('refs/heads/')[-1] for x in branches]
439
445
440 return _branch(context_uid, repo_id, commit_id)
446 return _branch(context_uid, repo_id, commit_id)
441
447
442 @reraise_safe_exceptions
448 @reraise_safe_exceptions
443 def commit_branches(self, wire, commit_id):
449 def commit_branches(self, wire, commit_id):
444 cache_on, context_uid, repo_id = self._cache_on(wire)
450 cache_on, context_uid, repo_id = self._cache_on(wire)
445 region = self._region(wire)
451 region = self._region(wire)
446
452
447 @region.conditional_cache_on_arguments(condition=cache_on)
453 @region.conditional_cache_on_arguments(condition=cache_on)
448 def _commit_branches(_context_uid, _repo_id, _commit_id):
454 def _commit_branches(_context_uid, _repo_id, _commit_id):
449 repo_init = self._factory.repo_libgit2(wire)
455 repo_init = self._factory.repo_libgit2(wire)
450 with repo_init as repo:
456 with repo_init as repo:
451 branches = [x for x in repo.branches.with_commit(_commit_id)]
457 branches = [x for x in repo.branches.with_commit(_commit_id)]
452 return branches
458 return branches
453
459
454 return _commit_branches(context_uid, repo_id, commit_id)
460 return _commit_branches(context_uid, repo_id, commit_id)
455
461
456 @reraise_safe_exceptions
462 @reraise_safe_exceptions
457 def add_object(self, wire, content):
463 def add_object(self, wire, content):
458 repo_init = self._factory.repo_libgit2(wire)
464 repo_init = self._factory.repo_libgit2(wire)
459 with repo_init as repo:
465 with repo_init as repo:
460 blob = objects.Blob()
466 blob = objects.Blob()
461 blob.set_raw_string(content)
467 blob.set_raw_string(content)
462 repo.object_store.add_object(blob)
468 repo.object_store.add_object(blob)
463 return blob.id
469 return blob.id
464
470
465 # TODO: this is quite complex, check if that can be simplified
471 # TODO: this is quite complex, check if that can be simplified
466 @reraise_safe_exceptions
472 @reraise_safe_exceptions
467 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
473 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
468 # Defines the root tree
474 # Defines the root tree
469 class _Root(object):
475 class _Root(object):
470 def __repr__(self):
476 def __repr__(self):
471 return 'ROOT TREE'
477 return 'ROOT TREE'
472 ROOT = _Root()
478 ROOT = _Root()
473
479
474 repo = self._factory.repo(wire)
480 repo = self._factory.repo(wire)
475 object_store = repo.object_store
481 object_store = repo.object_store
476
482
477 # Create tree and populates it with blobs
483 # Create tree and populates it with blobs
478 if commit_tree:
484 if commit_tree:
479 commit_tree = safe_bytes(commit_tree)
485 commit_tree = safe_bytes(commit_tree)
480
486
481 if commit_tree and repo[commit_tree]:
487 if commit_tree and repo[commit_tree]:
482 git_commit = repo[safe_bytes(commit_data['parents'][0])]
488 git_commit = repo[safe_bytes(commit_data['parents'][0])]
483 commit_tree = repo[git_commit.tree] # root tree
489 commit_tree = repo[git_commit.tree] # root tree
484 else:
490 else:
485 commit_tree = objects.Tree()
491 commit_tree = objects.Tree()
486
492
487 for node in updated:
493 for node in updated:
488 # Compute subdirs if needed
494 # Compute subdirs if needed
489 dirpath, nodename = vcspath.split(node['path'])
495 dirpath, nodename = vcspath.split(node['path'])
490 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
496 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
491 parent = commit_tree
497 parent = commit_tree
492 ancestors = [('', parent)]
498 ancestors = [('', parent)]
493
499
494 # Tries to dig for the deepest existing tree
500 # Tries to dig for the deepest existing tree
495 while dirnames:
501 while dirnames:
496 curdir = dirnames.pop(0)
502 curdir = dirnames.pop(0)
497 try:
503 try:
498 dir_id = parent[curdir][1]
504 dir_id = parent[curdir][1]
499 except KeyError:
505 except KeyError:
500 # put curdir back into dirnames and stops
506 # put curdir back into dirnames and stops
501 dirnames.insert(0, curdir)
507 dirnames.insert(0, curdir)
502 break
508 break
503 else:
509 else:
504 # If found, updates parent
510 # If found, updates parent
505 parent = repo[dir_id]
511 parent = repo[dir_id]
506 ancestors.append((curdir, parent))
512 ancestors.append((curdir, parent))
507 # Now parent is deepest existing tree and we need to create
513 # Now parent is deepest existing tree and we need to create
508 # subtrees for dirnames (in reverse order)
514 # subtrees for dirnames (in reverse order)
509 # [this only applies for nodes from added]
515 # [this only applies for nodes from added]
510 new_trees = []
516 new_trees = []
511
517
512 blob = objects.Blob.from_string(node['content'])
518 blob = objects.Blob.from_string(node['content'])
513
519
514 node_path = safe_bytes(node['node_path'])
520 node_path = safe_bytes(node['node_path'])
515
521
516 if dirnames:
522 if dirnames:
517 # If there are trees which should be created we need to build
523 # If there are trees which should be created we need to build
518 # them now (in reverse order)
524 # them now (in reverse order)
519 reversed_dirnames = list(reversed(dirnames))
525 reversed_dirnames = list(reversed(dirnames))
520 curtree = objects.Tree()
526 curtree = objects.Tree()
521 curtree[node_path] = node['mode'], blob.id
527 curtree[node_path] = node['mode'], blob.id
522 new_trees.append(curtree)
528 new_trees.append(curtree)
523 for dirname in reversed_dirnames[:-1]:
529 for dirname in reversed_dirnames[:-1]:
524 newtree = objects.Tree()
530 newtree = objects.Tree()
525 newtree[dirname] = (DIR_STAT, curtree.id)
531 newtree[dirname] = (DIR_STAT, curtree.id)
526 new_trees.append(newtree)
532 new_trees.append(newtree)
527 curtree = newtree
533 curtree = newtree
528 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
534 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
529 else:
535 else:
530 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
536 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
531
537
532 new_trees.append(parent)
538 new_trees.append(parent)
533 # Update ancestors
539 # Update ancestors
534 reversed_ancestors = reversed(
540 reversed_ancestors = reversed(
535 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
541 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
536 for parent, tree, path in reversed_ancestors:
542 for parent, tree, path in reversed_ancestors:
537 parent[path] = (DIR_STAT, tree.id)
543 parent[path] = (DIR_STAT, tree.id)
538 object_store.add_object(tree)
544 object_store.add_object(tree)
539
545
540 object_store.add_object(blob)
546 object_store.add_object(blob)
541 for tree in new_trees:
547 for tree in new_trees:
542 object_store.add_object(tree)
548 object_store.add_object(tree)
543
549
544 for node_path in removed:
550 for node_path in removed:
545 paths = node_path.split('/')
551 paths = node_path.split('/')
546 tree = commit_tree # start with top-level
552 tree = commit_tree # start with top-level
547 trees = [{'tree': tree, 'path': ROOT}]
553 trees = [{'tree': tree, 'path': ROOT}]
548 # Traverse deep into the forest...
554 # Traverse deep into the forest...
549 # resolve final tree by iterating the path.
555 # resolve final tree by iterating the path.
550 # e.g a/b/c.txt will get
556 # e.g a/b/c.txt will get
551 # - root as tree then
557 # - root as tree then
552 # - 'a' as tree,
558 # - 'a' as tree,
553 # - 'b' as tree,
559 # - 'b' as tree,
554 # - stop at c as blob.
560 # - stop at c as blob.
555 for path in paths:
561 for path in paths:
556 try:
562 try:
557 obj = repo[tree[path][1]]
563 obj = repo[tree[path][1]]
558 if isinstance(obj, objects.Tree):
564 if isinstance(obj, objects.Tree):
559 trees.append({'tree': obj, 'path': path})
565 trees.append({'tree': obj, 'path': path})
560 tree = obj
566 tree = obj
561 except KeyError:
567 except KeyError:
562 break
568 break
563 #PROBLEM:
569 #PROBLEM:
564 """
570 """
565 We're not editing same reference tree object
571 We're not editing same reference tree object
566 """
572 """
567 # Cut down the blob and all rotten trees on the way back...
573 # Cut down the blob and all rotten trees on the way back...
568 for path, tree_data in reversed(list(zip(paths, trees))):
574 for path, tree_data in reversed(list(zip(paths, trees))):
569 tree = tree_data['tree']
575 tree = tree_data['tree']
570 tree.__delitem__(path)
576 tree.__delitem__(path)
571 # This operation edits the tree, we need to mark new commit back
577 # This operation edits the tree, we need to mark new commit back
572
578
573 if len(tree) > 0:
579 if len(tree) > 0:
574 # This tree still has elements - don't remove it or any
580 # This tree still has elements - don't remove it or any
575 # of it's parents
581 # of it's parents
576 break
582 break
577
583
578 object_store.add_object(commit_tree)
584 object_store.add_object(commit_tree)
579
585
580 # Create commit
586 # Create commit
581 commit = objects.Commit()
587 commit = objects.Commit()
582 commit.tree = commit_tree.id
588 commit.tree = commit_tree.id
583 bytes_keys = [
589 bytes_keys = [
584 'author',
590 'author',
585 'committer',
591 'committer',
586 'message',
592 'message',
587 'encoding',
593 'encoding',
588 'parents'
594 'parents'
589 ]
595 ]
590
596
591 for k, v in commit_data.items():
597 for k, v in commit_data.items():
592 if k in bytes_keys:
598 if k in bytes_keys:
593 if k == 'parents':
599 if k == 'parents':
594 v = [safe_bytes(x) for x in v]
600 v = [safe_bytes(x) for x in v]
595 else:
601 else:
596 v = safe_bytes(v)
602 v = safe_bytes(v)
597 setattr(commit, k, v)
603 setattr(commit, k, v)
598
604
599 object_store.add_object(commit)
605 object_store.add_object(commit)
600
606
601 self.create_branch(wire, branch, safe_str(commit.id))
607 self.create_branch(wire, branch, safe_str(commit.id))
602
608
603 # dulwich set-ref
609 # dulwich set-ref
604 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
610 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
605
611
606 return commit.id
612 return commit.id
607
613
608 @reraise_safe_exceptions
614 @reraise_safe_exceptions
609 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
615 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
610 if url != 'default' and '://' not in url:
616 if url != 'default' and '://' not in url:
611 client = LocalGitClient(url)
617 client = LocalGitClient(url)
612 else:
618 else:
613 url_obj = url_parser(url)
619 url_obj = url_parser(url)
614 o = self._build_opener(url)
620 o = self._build_opener(url)
615 url, _ = url_obj.authinfo()
621 url, _ = url_obj.authinfo()
616 client = HttpGitClient(base_url=url, opener=o)
622 client = HttpGitClient(base_url=url, opener=o)
617 repo = self._factory.repo(wire)
623 repo = self._factory.repo(wire)
618
624
619 determine_wants = repo.object_store.determine_wants_all
625 determine_wants = repo.object_store.determine_wants_all
620 if refs:
626 if refs:
621 refs = [ascii_bytes(x) for x in refs]
627 refs = [ascii_bytes(x) for x in refs]
622
628
623 def determine_wants_requested(remote_refs):
629 def determine_wants_requested(remote_refs):
624 determined = []
630 determined = []
625 for ref_name, ref_hash in remote_refs.items():
631 for ref_name, ref_hash in remote_refs.items():
626 bytes_ref_name = safe_bytes(ref_name)
632 bytes_ref_name = safe_bytes(ref_name)
627
633
628 if bytes_ref_name in refs:
634 if bytes_ref_name in refs:
629 bytes_ref_hash = safe_bytes(ref_hash)
635 bytes_ref_hash = safe_bytes(ref_hash)
630 determined.append(bytes_ref_hash)
636 determined.append(bytes_ref_hash)
631 return determined
637 return determined
632
638
633 # swap with our custom requested wants
639 # swap with our custom requested wants
634 determine_wants = determine_wants_requested
640 determine_wants = determine_wants_requested
635
641
636 try:
642 try:
637 remote_refs = client.fetch(
643 remote_refs = client.fetch(
638 path=url, target=repo, determine_wants=determine_wants)
644 path=url, target=repo, determine_wants=determine_wants)
639
645
640 except NotGitRepository as e:
646 except NotGitRepository as e:
641 log.warning(
647 log.warning(
642 'Trying to fetch from "%s" failed, not a Git repository.', url)
648 'Trying to fetch from "%s" failed, not a Git repository.', url)
643 # Exception can contain unicode which we convert
649 # Exception can contain unicode which we convert
644 raise exceptions.AbortException(e)(repr(e))
650 raise exceptions.AbortException(e)(repr(e))
645
651
646 # mikhail: client.fetch() returns all the remote refs, but fetches only
652 # mikhail: client.fetch() returns all the remote refs, but fetches only
647 # refs filtered by `determine_wants` function. We need to filter result
653 # refs filtered by `determine_wants` function. We need to filter result
648 # as well
654 # as well
649 if refs:
655 if refs:
650 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
656 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
651
657
652 if apply_refs:
658 if apply_refs:
653 # TODO: johbo: Needs proper test coverage with a git repository
659 # TODO: johbo: Needs proper test coverage with a git repository
654 # that contains a tag object, so that we would end up with
660 # that contains a tag object, so that we would end up with
655 # a peeled ref at this point.
661 # a peeled ref at this point.
656 for k in remote_refs:
662 for k in remote_refs:
657 if k.endswith(PEELED_REF_MARKER):
663 if k.endswith(PEELED_REF_MARKER):
658 log.debug("Skipping peeled reference %s", k)
664 log.debug("Skipping peeled reference %s", k)
659 continue
665 continue
660 repo[k] = remote_refs[k]
666 repo[k] = remote_refs[k]
661
667
662 if refs and not update_after:
668 if refs and not update_after:
663 # mikhail: explicitly set the head to the last ref.
669 # mikhail: explicitly set the head to the last ref.
664 repo[HEAD_MARKER] = remote_refs[refs[-1]]
670 repo[HEAD_MARKER] = remote_refs[refs[-1]]
665
671
666 if update_after:
672 if update_after:
667 # we want to check out HEAD
673 # we want to check out HEAD
668 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
674 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
669 index.build_index_from_tree(repo.path, repo.index_path(),
675 index.build_index_from_tree(repo.path, repo.index_path(),
670 repo.object_store, repo[HEAD_MARKER].tree)
676 repo.object_store, repo[HEAD_MARKER].tree)
671 return remote_refs
677 return remote_refs
672
678
673 @reraise_safe_exceptions
679 @reraise_safe_exceptions
674 def sync_fetch(self, wire, url, refs=None, all_refs=False):
680 def sync_fetch(self, wire, url, refs=None, all_refs=False):
675 repo = self._factory.repo(wire)
681 self._factory.repo(wire)
676 if refs and not isinstance(refs, (list, tuple)):
682 if refs and not isinstance(refs, (list, tuple)):
677 refs = [refs]
683 refs = [refs]
678
684
679 config = self._wire_to_config(wire)
685 config = self._wire_to_config(wire)
680 # get all remote refs we'll use to fetch later
686 # get all remote refs we'll use to fetch later
681 cmd = ['ls-remote']
687 cmd = ['ls-remote']
682 if not all_refs:
688 if not all_refs:
683 cmd += ['--heads', '--tags']
689 cmd += ['--heads', '--tags']
684 cmd += [url]
690 cmd += [url]
685 output, __ = self.run_git_command(
691 output, __ = self.run_git_command(
686 wire, cmd, fail_on_stderr=False,
692 wire, cmd, fail_on_stderr=False,
687 _copts=self._remote_conf(config),
693 _copts=self._remote_conf(config),
688 extra_env={'GIT_TERMINAL_PROMPT': '0'})
694 extra_env={'GIT_TERMINAL_PROMPT': '0'})
689
695
690 remote_refs = collections.OrderedDict()
696 remote_refs = collections.OrderedDict()
691 fetch_refs = []
697 fetch_refs = []
692
698
693 for ref_line in output.splitlines():
699 for ref_line in output.splitlines():
694 sha, ref = ref_line.split(b'\t')
700 sha, ref = ref_line.split(b'\t')
695 sha = sha.strip()
701 sha = sha.strip()
696 if ref in remote_refs:
702 if ref in remote_refs:
697 # duplicate, skip
703 # duplicate, skip
698 continue
704 continue
699 if ref.endswith(PEELED_REF_MARKER):
705 if ref.endswith(PEELED_REF_MARKER):
700 log.debug("Skipping peeled reference %s", ref)
706 log.debug("Skipping peeled reference %s", ref)
701 continue
707 continue
702 # don't sync HEAD
708 # don't sync HEAD
703 if ref in [HEAD_MARKER]:
709 if ref in [HEAD_MARKER]:
704 continue
710 continue
705
711
706 remote_refs[ref] = sha
712 remote_refs[ref] = sha
707
713
708 if refs and sha in refs:
714 if refs and sha in refs:
709 # we filter fetch using our specified refs
715 # we filter fetch using our specified refs
710 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
716 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
711 elif not refs:
717 elif not refs:
712 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
718 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
713 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
719 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
714
720
715 if fetch_refs:
721 if fetch_refs:
716 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
722 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
717 fetch_refs_chunks = list(chunk)
723 fetch_refs_chunks = list(chunk)
718 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
724 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
719 self.run_git_command(
725 self.run_git_command(
720 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
726 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
721 fail_on_stderr=False,
727 fail_on_stderr=False,
722 _copts=self._remote_conf(config),
728 _copts=self._remote_conf(config),
723 extra_env={'GIT_TERMINAL_PROMPT': '0'})
729 extra_env={'GIT_TERMINAL_PROMPT': '0'})
724
730
725 return remote_refs
731 return remote_refs
726
732
727 @reraise_safe_exceptions
733 @reraise_safe_exceptions
728 def sync_push(self, wire, url, refs=None):
734 def sync_push(self, wire, url, refs=None):
729 if not self.check_url(url, wire):
735 if not self.check_url(url, wire):
730 return
736 return
731 config = self._wire_to_config(wire)
737 config = self._wire_to_config(wire)
732 self._factory.repo(wire)
738 self._factory.repo(wire)
733 self.run_git_command(
739 self.run_git_command(
734 wire, ['push', url, '--mirror'], fail_on_stderr=False,
740 wire, ['push', url, '--mirror'], fail_on_stderr=False,
735 _copts=self._remote_conf(config),
741 _copts=self._remote_conf(config),
736 extra_env={'GIT_TERMINAL_PROMPT': '0'})
742 extra_env={'GIT_TERMINAL_PROMPT': '0'})
737
743
738 @reraise_safe_exceptions
744 @reraise_safe_exceptions
739 def get_remote_refs(self, wire, url):
745 def get_remote_refs(self, wire, url):
740 repo = Repo(url)
746 repo = Repo(url)
741 return repo.get_refs()
747 return repo.get_refs()
742
748
743 @reraise_safe_exceptions
749 @reraise_safe_exceptions
744 def get_description(self, wire):
750 def get_description(self, wire):
745 repo = self._factory.repo(wire)
751 repo = self._factory.repo(wire)
746 return repo.get_description()
752 return repo.get_description()
747
753
748 @reraise_safe_exceptions
754 @reraise_safe_exceptions
749 def get_missing_revs(self, wire, rev1, rev2, path2):
755 def get_missing_revs(self, wire, rev1, rev2, path2):
750 repo = self._factory.repo(wire)
756 repo = self._factory.repo(wire)
751 LocalGitClient(thin_packs=False).fetch(path2, repo)
757 LocalGitClient(thin_packs=False).fetch(path2, repo)
752
758
753 wire_remote = wire.copy()
759 wire_remote = wire.copy()
754 wire_remote['path'] = path2
760 wire_remote['path'] = path2
755 repo_remote = self._factory.repo(wire_remote)
761 repo_remote = self._factory.repo(wire_remote)
756 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
762 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
757
763
758 revs = [
764 revs = [
759 x.commit.id
765 x.commit.id
760 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
766 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
761 return revs
767 return revs
762
768
763 @reraise_safe_exceptions
769 @reraise_safe_exceptions
764 def get_object(self, wire, sha, maybe_unreachable=False):
770 def get_object(self, wire, sha, maybe_unreachable=False):
765 cache_on, context_uid, repo_id = self._cache_on(wire)
771 cache_on, context_uid, repo_id = self._cache_on(wire)
766 region = self._region(wire)
772 region = self._region(wire)
767
773
768 @region.conditional_cache_on_arguments(condition=cache_on)
774 @region.conditional_cache_on_arguments(condition=cache_on)
769 def _get_object(_context_uid, _repo_id, _sha):
775 def _get_object(_context_uid, _repo_id, _sha):
770 repo_init = self._factory.repo_libgit2(wire)
776 repo_init = self._factory.repo_libgit2(wire)
771 with repo_init as repo:
777 with repo_init as repo:
772
778
773 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
779 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
774 try:
780 try:
775 commit = repo.revparse_single(sha)
781 commit = repo.revparse_single(sha)
776 except KeyError:
782 except KeyError:
777 # NOTE(marcink): KeyError doesn't give us any meaningful information
783 # NOTE(marcink): KeyError doesn't give us any meaningful information
778 # here, we instead give something more explicit
784 # here, we instead give something more explicit
779 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
785 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
780 raise exceptions.LookupException(e)(missing_commit_err)
786 raise exceptions.LookupException(e)(missing_commit_err)
781 except ValueError as e:
787 except ValueError as e:
782 raise exceptions.LookupException(e)(missing_commit_err)
788 raise exceptions.LookupException(e)(missing_commit_err)
783
789
784 is_tag = False
790 is_tag = False
785 if isinstance(commit, pygit2.Tag):
791 if isinstance(commit, pygit2.Tag):
786 commit = repo.get(commit.target)
792 commit = repo.get(commit.target)
787 is_tag = True
793 is_tag = True
788
794
789 check_dangling = True
795 check_dangling = True
790 if is_tag:
796 if is_tag:
791 check_dangling = False
797 check_dangling = False
792
798
793 if check_dangling and maybe_unreachable:
799 if check_dangling and maybe_unreachable:
794 check_dangling = False
800 check_dangling = False
795
801
796 # we used a reference and it parsed means we're not having a dangling commit
802 # we used a reference and it parsed means we're not having a dangling commit
797 if sha != commit.hex:
803 if sha != commit.hex:
798 check_dangling = False
804 check_dangling = False
799
805
800 if check_dangling:
806 if check_dangling:
801 # check for dangling commit
807 # check for dangling commit
802 for branch in repo.branches.with_commit(commit.hex):
808 for branch in repo.branches.with_commit(commit.hex):
803 if branch:
809 if branch:
804 break
810 break
805 else:
811 else:
806 # NOTE(marcink): Empty error doesn't give us any meaningful information
812 # NOTE(marcink): Empty error doesn't give us any meaningful information
807 # here, we instead give something more explicit
813 # here, we instead give something more explicit
808 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
814 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
809 raise exceptions.LookupException(e)(missing_commit_err)
815 raise exceptions.LookupException(e)(missing_commit_err)
810
816
811 commit_id = commit.hex
817 commit_id = commit.hex
812 type_id = commit.type
818 type_id = commit.type
813
819
814 return {
820 return {
815 'id': commit_id,
821 'id': commit_id,
816 'type': self._type_id_to_name(type_id),
822 'type': self._type_id_to_name(type_id),
817 'commit_id': commit_id,
823 'commit_id': commit_id,
818 'idx': 0
824 'idx': 0
819 }
825 }
820
826
821 return _get_object(context_uid, repo_id, sha)
827 return _get_object(context_uid, repo_id, sha)
822
828
823 @reraise_safe_exceptions
829 @reraise_safe_exceptions
824 def get_refs(self, wire):
830 def get_refs(self, wire):
825 cache_on, context_uid, repo_id = self._cache_on(wire)
831 cache_on, context_uid, repo_id = self._cache_on(wire)
826 region = self._region(wire)
832 region = self._region(wire)
827
833
828 @region.conditional_cache_on_arguments(condition=cache_on)
834 @region.conditional_cache_on_arguments(condition=cache_on)
829 def _get_refs(_context_uid, _repo_id):
835 def _get_refs(_context_uid, _repo_id):
830
836
831 repo_init = self._factory.repo_libgit2(wire)
837 repo_init = self._factory.repo_libgit2(wire)
832 with repo_init as repo:
838 with repo_init as repo:
833 regex = re.compile('^refs/(heads|tags)/')
839 regex = re.compile('^refs/(heads|tags)/')
834 return {x.name: x.target.hex for x in
840 return {x.name: x.target.hex for x in
835 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
841 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
836
842
837 return _get_refs(context_uid, repo_id)
843 return _get_refs(context_uid, repo_id)
838
844
839 @reraise_safe_exceptions
845 @reraise_safe_exceptions
840 def get_branch_pointers(self, wire):
846 def get_branch_pointers(self, wire):
841 cache_on, context_uid, repo_id = self._cache_on(wire)
847 cache_on, context_uid, repo_id = self._cache_on(wire)
842 region = self._region(wire)
848 region = self._region(wire)
843
849
844 @region.conditional_cache_on_arguments(condition=cache_on)
850 @region.conditional_cache_on_arguments(condition=cache_on)
845 def _get_branch_pointers(_context_uid, _repo_id):
851 def _get_branch_pointers(_context_uid, _repo_id):
846
852
847 repo_init = self._factory.repo_libgit2(wire)
853 repo_init = self._factory.repo_libgit2(wire)
848 regex = re.compile('^refs/heads')
854 regex = re.compile('^refs/heads')
849 with repo_init as repo:
855 with repo_init as repo:
850 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
856 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
851 return {x.target.hex: x.shorthand for x in branches}
857 return {x.target.hex: x.shorthand for x in branches}
852
858
853 return _get_branch_pointers(context_uid, repo_id)
859 return _get_branch_pointers(context_uid, repo_id)
854
860
855 @reraise_safe_exceptions
861 @reraise_safe_exceptions
856 def head(self, wire, show_exc=True):
862 def head(self, wire, show_exc=True):
857 cache_on, context_uid, repo_id = self._cache_on(wire)
863 cache_on, context_uid, repo_id = self._cache_on(wire)
858 region = self._region(wire)
864 region = self._region(wire)
859
865
860 @region.conditional_cache_on_arguments(condition=cache_on)
866 @region.conditional_cache_on_arguments(condition=cache_on)
861 def _head(_context_uid, _repo_id, _show_exc):
867 def _head(_context_uid, _repo_id, _show_exc):
862 repo_init = self._factory.repo_libgit2(wire)
868 repo_init = self._factory.repo_libgit2(wire)
863 with repo_init as repo:
869 with repo_init as repo:
864 try:
870 try:
865 return repo.head.peel().hex
871 return repo.head.peel().hex
866 except Exception:
872 except Exception:
867 if show_exc:
873 if show_exc:
868 raise
874 raise
869 return _head(context_uid, repo_id, show_exc)
875 return _head(context_uid, repo_id, show_exc)
870
876
871 @reraise_safe_exceptions
877 @reraise_safe_exceptions
872 def init(self, wire):
878 def init(self, wire):
873 repo_path = safe_str(wire['path'])
879 repo_path = safe_str(wire['path'])
874 self.repo = Repo.init(repo_path)
880 self.repo = Repo.init(repo_path)
875
881
876 @reraise_safe_exceptions
882 @reraise_safe_exceptions
877 def init_bare(self, wire):
883 def init_bare(self, wire):
878 repo_path = safe_str(wire['path'])
884 repo_path = safe_str(wire['path'])
879 self.repo = Repo.init_bare(repo_path)
885 self.repo = Repo.init_bare(repo_path)
880
886
881 @reraise_safe_exceptions
887 @reraise_safe_exceptions
882 def revision(self, wire, rev):
888 def revision(self, wire, rev):
883
889
884 cache_on, context_uid, repo_id = self._cache_on(wire)
890 cache_on, context_uid, repo_id = self._cache_on(wire)
885 region = self._region(wire)
891 region = self._region(wire)
886
892
887 @region.conditional_cache_on_arguments(condition=cache_on)
893 @region.conditional_cache_on_arguments(condition=cache_on)
888 def _revision(_context_uid, _repo_id, _rev):
894 def _revision(_context_uid, _repo_id, _rev):
889 repo_init = self._factory.repo_libgit2(wire)
895 repo_init = self._factory.repo_libgit2(wire)
890 with repo_init as repo:
896 with repo_init as repo:
891 commit = repo[rev]
897 commit = repo[rev]
892 obj_data = {
898 obj_data = {
893 'id': commit.id.hex,
899 'id': commit.id.hex,
894 }
900 }
895 # tree objects itself don't have tree_id attribute
901 # tree objects itself don't have tree_id attribute
896 if hasattr(commit, 'tree_id'):
902 if hasattr(commit, 'tree_id'):
897 obj_data['tree'] = commit.tree_id.hex
903 obj_data['tree'] = commit.tree_id.hex
898
904
899 return obj_data
905 return obj_data
900 return _revision(context_uid, repo_id, rev)
906 return _revision(context_uid, repo_id, rev)
901
907
902 @reraise_safe_exceptions
908 @reraise_safe_exceptions
903 def date(self, wire, commit_id):
909 def date(self, wire, commit_id):
904 cache_on, context_uid, repo_id = self._cache_on(wire)
910 cache_on, context_uid, repo_id = self._cache_on(wire)
905 region = self._region(wire)
911 region = self._region(wire)
906
912
907 @region.conditional_cache_on_arguments(condition=cache_on)
913 @region.conditional_cache_on_arguments(condition=cache_on)
908 def _date(_repo_id, _commit_id):
914 def _date(_repo_id, _commit_id):
909 repo_init = self._factory.repo_libgit2(wire)
915 repo_init = self._factory.repo_libgit2(wire)
910 with repo_init as repo:
916 with repo_init as repo:
911 commit = repo[commit_id]
917 commit = repo[commit_id]
912
918
913 if hasattr(commit, 'commit_time'):
919 if hasattr(commit, 'commit_time'):
914 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
920 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
915 else:
921 else:
916 commit = commit.get_object()
922 commit = commit.get_object()
917 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
923 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
918
924
919 # TODO(marcink): check dulwich difference of offset vs timezone
925 # TODO(marcink): check dulwich difference of offset vs timezone
920 return [commit_time, commit_time_offset]
926 return [commit_time, commit_time_offset]
921 return _date(repo_id, commit_id)
927 return _date(repo_id, commit_id)
922
928
923 @reraise_safe_exceptions
929 @reraise_safe_exceptions
924 def author(self, wire, commit_id):
930 def author(self, wire, commit_id):
925 cache_on, context_uid, repo_id = self._cache_on(wire)
931 cache_on, context_uid, repo_id = self._cache_on(wire)
926 region = self._region(wire)
932 region = self._region(wire)
927
933
928 @region.conditional_cache_on_arguments(condition=cache_on)
934 @region.conditional_cache_on_arguments(condition=cache_on)
929 def _author(_repo_id, _commit_id):
935 def _author(_repo_id, _commit_id):
930 repo_init = self._factory.repo_libgit2(wire)
936 repo_init = self._factory.repo_libgit2(wire)
931 with repo_init as repo:
937 with repo_init as repo:
932 commit = repo[commit_id]
938 commit = repo[commit_id]
933
939
934 if hasattr(commit, 'author'):
940 if hasattr(commit, 'author'):
935 author = commit.author
941 author = commit.author
936 else:
942 else:
937 author = commit.get_object().author
943 author = commit.get_object().author
938
944
939 if author.email:
945 if author.email:
940 return "{} <{}>".format(author.name, author.email)
946 return "{} <{}>".format(author.name, author.email)
941
947
942 try:
948 try:
943 return "{}".format(author.name)
949 return "{}".format(author.name)
944 except Exception:
950 except Exception:
945 return "{}".format(safe_str(author.raw_name))
951 return "{}".format(safe_str(author.raw_name))
946
952
947 return _author(repo_id, commit_id)
953 return _author(repo_id, commit_id)
948
954
949 @reraise_safe_exceptions
955 @reraise_safe_exceptions
950 def message(self, wire, commit_id):
956 def message(self, wire, commit_id):
951 cache_on, context_uid, repo_id = self._cache_on(wire)
957 cache_on, context_uid, repo_id = self._cache_on(wire)
952 region = self._region(wire)
958 region = self._region(wire)
953
959
954 @region.conditional_cache_on_arguments(condition=cache_on)
960 @region.conditional_cache_on_arguments(condition=cache_on)
955 def _message(_repo_id, _commit_id):
961 def _message(_repo_id, _commit_id):
956 repo_init = self._factory.repo_libgit2(wire)
962 repo_init = self._factory.repo_libgit2(wire)
957 with repo_init as repo:
963 with repo_init as repo:
958 commit = repo[commit_id]
964 commit = repo[commit_id]
959 return commit.message
965 return commit.message
960 return _message(repo_id, commit_id)
966 return _message(repo_id, commit_id)
961
967
962 @reraise_safe_exceptions
968 @reraise_safe_exceptions
963 def parents(self, wire, commit_id):
969 def parents(self, wire, commit_id):
964 cache_on, context_uid, repo_id = self._cache_on(wire)
970 cache_on, context_uid, repo_id = self._cache_on(wire)
965 region = self._region(wire)
971 region = self._region(wire)
966
972
967 @region.conditional_cache_on_arguments(condition=cache_on)
973 @region.conditional_cache_on_arguments(condition=cache_on)
968 def _parents(_repo_id, _commit_id):
974 def _parents(_repo_id, _commit_id):
969 repo_init = self._factory.repo_libgit2(wire)
975 repo_init = self._factory.repo_libgit2(wire)
970 with repo_init as repo:
976 with repo_init as repo:
971 commit = repo[commit_id]
977 commit = repo[commit_id]
972 if hasattr(commit, 'parent_ids'):
978 if hasattr(commit, 'parent_ids'):
973 parent_ids = commit.parent_ids
979 parent_ids = commit.parent_ids
974 else:
980 else:
975 parent_ids = commit.get_object().parent_ids
981 parent_ids = commit.get_object().parent_ids
976
982
977 return [x.hex for x in parent_ids]
983 return [x.hex for x in parent_ids]
978 return _parents(repo_id, commit_id)
984 return _parents(repo_id, commit_id)
979
985
980 @reraise_safe_exceptions
986 @reraise_safe_exceptions
981 def children(self, wire, commit_id):
987 def children(self, wire, commit_id):
982 cache_on, context_uid, repo_id = self._cache_on(wire)
988 cache_on, context_uid, repo_id = self._cache_on(wire)
983 region = self._region(wire)
989 region = self._region(wire)
984
990
985 head = self.head(wire)
991 head = self.head(wire)
986
992
987 @region.conditional_cache_on_arguments(condition=cache_on)
993 @region.conditional_cache_on_arguments(condition=cache_on)
988 def _children(_repo_id, _commit_id):
994 def _children(_repo_id, _commit_id):
989
995
990 output, __ = self.run_git_command(
996 output, __ = self.run_git_command(
991 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
997 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
992
998
993 child_ids = []
999 child_ids = []
994 pat = re.compile(r'^{}'.format(commit_id))
1000 pat = re.compile(r'^{}'.format(commit_id))
995 for line in output.splitlines():
1001 for line in output.splitlines():
996 line = safe_str(line)
1002 line = safe_str(line)
997 if pat.match(line):
1003 if pat.match(line):
998 found_ids = line.split(' ')[1:]
1004 found_ids = line.split(' ')[1:]
999 child_ids.extend(found_ids)
1005 child_ids.extend(found_ids)
1000 break
1006 break
1001
1007
1002 return child_ids
1008 return child_ids
1003 return _children(repo_id, commit_id)
1009 return _children(repo_id, commit_id)
1004
1010
1005 @reraise_safe_exceptions
1011 @reraise_safe_exceptions
1006 def set_refs(self, wire, key, value):
1012 def set_refs(self, wire, key, value):
1007 repo_init = self._factory.repo_libgit2(wire)
1013 repo_init = self._factory.repo_libgit2(wire)
1008 with repo_init as repo:
1014 with repo_init as repo:
1009 repo.references.create(key, value, force=True)
1015 repo.references.create(key, value, force=True)
1010
1016
1011 @reraise_safe_exceptions
1017 @reraise_safe_exceptions
1012 def create_branch(self, wire, branch_name, commit_id, force=False):
1018 def create_branch(self, wire, branch_name, commit_id, force=False):
1013 repo_init = self._factory.repo_libgit2(wire)
1019 repo_init = self._factory.repo_libgit2(wire)
1014 with repo_init as repo:
1020 with repo_init as repo:
1015 commit = repo[commit_id]
1021 commit = repo[commit_id]
1016
1022
1017 if force:
1023 if force:
1018 repo.branches.local.create(branch_name, commit, force=force)
1024 repo.branches.local.create(branch_name, commit, force=force)
1019 elif not repo.branches.get(branch_name):
1025 elif not repo.branches.get(branch_name):
1020 # create only if that branch isn't existing
1026 # create only if that branch isn't existing
1021 repo.branches.local.create(branch_name, commit, force=force)
1027 repo.branches.local.create(branch_name, commit, force=force)
1022
1028
1023 @reraise_safe_exceptions
1029 @reraise_safe_exceptions
1024 def remove_ref(self, wire, key):
1030 def remove_ref(self, wire, key):
1025 repo_init = self._factory.repo_libgit2(wire)
1031 repo_init = self._factory.repo_libgit2(wire)
1026 with repo_init as repo:
1032 with repo_init as repo:
1027 repo.references.delete(key)
1033 repo.references.delete(key)
1028
1034
1029 @reraise_safe_exceptions
1035 @reraise_safe_exceptions
1030 def tag_remove(self, wire, tag_name):
1036 def tag_remove(self, wire, tag_name):
1031 repo_init = self._factory.repo_libgit2(wire)
1037 repo_init = self._factory.repo_libgit2(wire)
1032 with repo_init as repo:
1038 with repo_init as repo:
1033 key = 'refs/tags/{}'.format(tag_name)
1039 key = 'refs/tags/{}'.format(tag_name)
1034 repo.references.delete(key)
1040 repo.references.delete(key)
1035
1041
1036 @reraise_safe_exceptions
1042 @reraise_safe_exceptions
1037 def tree_changes(self, wire, source_id, target_id):
1043 def tree_changes(self, wire, source_id, target_id):
1038 # TODO(marcink): remove this seems it's only used by tests
1044 # TODO(marcink): remove this seems it's only used by tests
1039 repo = self._factory.repo(wire)
1045 repo = self._factory.repo(wire)
1040 source = repo[source_id].tree if source_id else None
1046 source = repo[source_id].tree if source_id else None
1041 target = repo[target_id].tree
1047 target = repo[target_id].tree
1042 result = repo.object_store.tree_changes(source, target)
1048 result = repo.object_store.tree_changes(source, target)
1043 return list(result)
1049 return list(result)
1044
1050
1045 @reraise_safe_exceptions
1051 @reraise_safe_exceptions
1046 def tree_and_type_for_path(self, wire, commit_id, path):
1052 def tree_and_type_for_path(self, wire, commit_id, path):
1047
1053
1048 cache_on, context_uid, repo_id = self._cache_on(wire)
1054 cache_on, context_uid, repo_id = self._cache_on(wire)
1049 region = self._region(wire)
1055 region = self._region(wire)
1050
1056
1051 @region.conditional_cache_on_arguments(condition=cache_on)
1057 @region.conditional_cache_on_arguments(condition=cache_on)
1052 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1058 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1053 repo_init = self._factory.repo_libgit2(wire)
1059 repo_init = self._factory.repo_libgit2(wire)
1054
1060
1055 with repo_init as repo:
1061 with repo_init as repo:
1056 commit = repo[commit_id]
1062 commit = repo[commit_id]
1057 try:
1063 try:
1058 tree = commit.tree[path]
1064 tree = commit.tree[path]
1059 except KeyError:
1065 except KeyError:
1060 return None, None, None
1066 return None, None, None
1061
1067
1062 return tree.id.hex, tree.type_str, tree.filemode
1068 return tree.id.hex, tree.type_str, tree.filemode
1063 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1069 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1064
1070
1065 @reraise_safe_exceptions
1071 @reraise_safe_exceptions
1066 def tree_items(self, wire, tree_id):
1072 def tree_items(self, wire, tree_id):
1067 cache_on, context_uid, repo_id = self._cache_on(wire)
1073 cache_on, context_uid, repo_id = self._cache_on(wire)
1068 region = self._region(wire)
1074 region = self._region(wire)
1069
1075
1070 @region.conditional_cache_on_arguments(condition=cache_on)
1076 @region.conditional_cache_on_arguments(condition=cache_on)
1071 def _tree_items(_repo_id, _tree_id):
1077 def _tree_items(_repo_id, _tree_id):
1072
1078
1073 repo_init = self._factory.repo_libgit2(wire)
1079 repo_init = self._factory.repo_libgit2(wire)
1074 with repo_init as repo:
1080 with repo_init as repo:
1075 try:
1081 try:
1076 tree = repo[tree_id]
1082 tree = repo[tree_id]
1077 except KeyError:
1083 except KeyError:
1078 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1084 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1079
1085
1080 result = []
1086 result = []
1081 for item in tree:
1087 for item in tree:
1082 item_sha = item.hex
1088 item_sha = item.hex
1083 item_mode = item.filemode
1089 item_mode = item.filemode
1084 item_type = item.type_str
1090 item_type = item.type_str
1085
1091
1086 if item_type == 'commit':
1092 if item_type == 'commit':
1087 # NOTE(marcink): submodules we translate to 'link' for backward compat
1093 # NOTE(marcink): submodules we translate to 'link' for backward compat
1088 item_type = 'link'
1094 item_type = 'link'
1089
1095
1090 result.append((item.name, item_mode, item_sha, item_type))
1096 result.append((item.name, item_mode, item_sha, item_type))
1091 return result
1097 return result
1092 return _tree_items(repo_id, tree_id)
1098 return _tree_items(repo_id, tree_id)
1093
1099
1094 @reraise_safe_exceptions
1100 @reraise_safe_exceptions
1095 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1101 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1096 """
1102 """
1097 Old version that uses subprocess to call diff
1103 Old version that uses subprocess to call diff
1098 """
1104 """
1099
1105
1100 flags = [
1106 flags = [
1101 '-U%s' % context, '--patch',
1107 '-U%s' % context, '--patch',
1102 '--binary',
1108 '--binary',
1103 '--find-renames',
1109 '--find-renames',
1104 '--no-indent-heuristic',
1110 '--no-indent-heuristic',
1105 # '--indent-heuristic',
1111 # '--indent-heuristic',
1106 #'--full-index',
1112 #'--full-index',
1107 #'--abbrev=40'
1113 #'--abbrev=40'
1108 ]
1114 ]
1109
1115
1110 if opt_ignorews:
1116 if opt_ignorews:
1111 flags.append('--ignore-all-space')
1117 flags.append('--ignore-all-space')
1112
1118
1113 if commit_id_1 == self.EMPTY_COMMIT:
1119 if commit_id_1 == self.EMPTY_COMMIT:
1114 cmd = ['show'] + flags + [commit_id_2]
1120 cmd = ['show'] + flags + [commit_id_2]
1115 else:
1121 else:
1116 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1122 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1117
1123
1118 if file_filter:
1124 if file_filter:
1119 cmd.extend(['--', file_filter])
1125 cmd.extend(['--', file_filter])
1120
1126
1121 diff, __ = self.run_git_command(wire, cmd)
1127 diff, __ = self.run_git_command(wire, cmd)
1122 # If we used 'show' command, strip first few lines (until actual diff
1128 # If we used 'show' command, strip first few lines (until actual diff
1123 # starts)
1129 # starts)
1124 if commit_id_1 == self.EMPTY_COMMIT:
1130 if commit_id_1 == self.EMPTY_COMMIT:
1125 lines = diff.splitlines()
1131 lines = diff.splitlines()
1126 x = 0
1132 x = 0
1127 for line in lines:
1133 for line in lines:
1128 if line.startswith(b'diff'):
1134 if line.startswith(b'diff'):
1129 break
1135 break
1130 x += 1
1136 x += 1
1131 # Append new line just like 'diff' command do
1137 # Append new line just like 'diff' command do
1132 diff = '\n'.join(lines[x:]) + '\n'
1138 diff = '\n'.join(lines[x:]) + '\n'
1133 return diff
1139 return diff
1134
1140
1135 @reraise_safe_exceptions
1141 @reraise_safe_exceptions
1136 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1142 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1137 repo_init = self._factory.repo_libgit2(wire)
1143 repo_init = self._factory.repo_libgit2(wire)
1138
1144
1139 with repo_init as repo:
1145 with repo_init as repo:
1140 swap = True
1146 swap = True
1141 flags = 0
1147 flags = 0
1142 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1148 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1143
1149
1144 if opt_ignorews:
1150 if opt_ignorews:
1145 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1151 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1146
1152
1147 if commit_id_1 == self.EMPTY_COMMIT:
1153 if commit_id_1 == self.EMPTY_COMMIT:
1148 comm1 = repo[commit_id_2]
1154 comm1 = repo[commit_id_2]
1149 diff_obj = comm1.tree.diff_to_tree(
1155 diff_obj = comm1.tree.diff_to_tree(
1150 flags=flags, context_lines=context, swap=swap)
1156 flags=flags, context_lines=context, swap=swap)
1151
1157
1152 else:
1158 else:
1153 comm1 = repo[commit_id_2]
1159 comm1 = repo[commit_id_2]
1154 comm2 = repo[commit_id_1]
1160 comm2 = repo[commit_id_1]
1155 diff_obj = comm1.tree.diff_to_tree(
1161 diff_obj = comm1.tree.diff_to_tree(
1156 comm2.tree, flags=flags, context_lines=context, swap=swap)
1162 comm2.tree, flags=flags, context_lines=context, swap=swap)
1157 similar_flags = 0
1163 similar_flags = 0
1158 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1164 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1159 diff_obj.find_similar(flags=similar_flags)
1165 diff_obj.find_similar(flags=similar_flags)
1160
1166
1161 if file_filter:
1167 if file_filter:
1162 for p in diff_obj:
1168 for p in diff_obj:
1163 if p.delta.old_file.path == file_filter:
1169 if p.delta.old_file.path == file_filter:
1164 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1170 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1165 # fo matching path == no diff
1171 # fo matching path == no diff
1166 return BinaryEnvelope(b'')
1172 return BinaryEnvelope(b'')
1167 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1173 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1168
1174
1169 @reraise_safe_exceptions
1175 @reraise_safe_exceptions
1170 def node_history(self, wire, commit_id, path, limit):
1176 def node_history(self, wire, commit_id, path, limit):
1171 cache_on, context_uid, repo_id = self._cache_on(wire)
1177 cache_on, context_uid, repo_id = self._cache_on(wire)
1172 region = self._region(wire)
1178 region = self._region(wire)
1173
1179
1174 @region.conditional_cache_on_arguments(condition=cache_on)
1180 @region.conditional_cache_on_arguments(condition=cache_on)
1175 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1181 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1176 # optimize for n==1, rev-list is much faster for that use-case
1182 # optimize for n==1, rev-list is much faster for that use-case
1177 if limit == 1:
1183 if limit == 1:
1178 cmd = ['rev-list', '-1', commit_id, '--', path]
1184 cmd = ['rev-list', '-1', commit_id, '--', path]
1179 else:
1185 else:
1180 cmd = ['log']
1186 cmd = ['log']
1181 if limit:
1187 if limit:
1182 cmd.extend(['-n', str(safe_int(limit, 0))])
1188 cmd.extend(['-n', str(safe_int(limit, 0))])
1183 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1189 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1184
1190
1185 output, __ = self.run_git_command(wire, cmd)
1191 output, __ = self.run_git_command(wire, cmd)
1186 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1192 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1187
1193
1188 return [x for x in commit_ids]
1194 return [x for x in commit_ids]
1189 return _node_history(context_uid, repo_id, commit_id, path, limit)
1195 return _node_history(context_uid, repo_id, commit_id, path, limit)
1190
1196
1191 @reraise_safe_exceptions
1197 @reraise_safe_exceptions
1192 def node_annotate_legacy(self, wire, commit_id, path):
1198 def node_annotate_legacy(self, wire, commit_id, path):
1193 # note: replaced by pygit2 implementation
1199 # note: replaced by pygit2 implementation
1194 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1200 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1195 # -l ==> outputs long shas (and we need all 40 characters)
1201 # -l ==> outputs long shas (and we need all 40 characters)
1196 # --root ==> doesn't put '^' character for boundaries
1202 # --root ==> doesn't put '^' character for boundaries
1197 # -r commit_id ==> blames for the given commit
1203 # -r commit_id ==> blames for the given commit
1198 output, __ = self.run_git_command(wire, cmd)
1204 output, __ = self.run_git_command(wire, cmd)
1199
1205
1200 result = []
1206 result = []
1201 for i, blame_line in enumerate(output.splitlines()[:-1]):
1207 for i, blame_line in enumerate(output.splitlines()[:-1]):
1202 line_no = i + 1
1208 line_no = i + 1
1203 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1209 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1204 result.append((line_no, blame_commit_id, line))
1210 result.append((line_no, blame_commit_id, line))
1205
1211
1206 return result
1212 return result
1207
1213
1208 @reraise_safe_exceptions
1214 @reraise_safe_exceptions
1209 def node_annotate(self, wire, commit_id, path):
1215 def node_annotate(self, wire, commit_id, path):
1210
1216
1211 result_libgit = []
1217 result_libgit = []
1212 repo_init = self._factory.repo_libgit2(wire)
1218 repo_init = self._factory.repo_libgit2(wire)
1213 with repo_init as repo:
1219 with repo_init as repo:
1214 commit = repo[commit_id]
1220 commit = repo[commit_id]
1215 blame_obj = repo.blame(path, newest_commit=commit_id)
1221 blame_obj = repo.blame(path, newest_commit=commit_id)
1216 for i, line in enumerate(commit.tree[path].data.splitlines()):
1222 for i, line in enumerate(commit.tree[path].data.splitlines()):
1217 line_no = i + 1
1223 line_no = i + 1
1218 hunk = blame_obj.for_line(line_no)
1224 hunk = blame_obj.for_line(line_no)
1219 blame_commit_id = hunk.final_commit_id.hex
1225 blame_commit_id = hunk.final_commit_id.hex
1220
1226
1221 result_libgit.append((line_no, blame_commit_id, line))
1227 result_libgit.append((line_no, blame_commit_id, line))
1222
1228
1223 return result_libgit
1229 return result_libgit
1224
1230
1225 @reraise_safe_exceptions
1231 @reraise_safe_exceptions
1226 def update_server_info(self, wire):
1232 def update_server_info(self, wire):
1227 repo = self._factory.repo(wire)
1233 repo = self._factory.repo(wire)
1228 update_server_info(repo)
1234 update_server_info(repo)
1229
1235
1230 @reraise_safe_exceptions
1236 @reraise_safe_exceptions
1231 def get_all_commit_ids(self, wire):
1237 def get_all_commit_ids(self, wire):
1232
1238
1233 cache_on, context_uid, repo_id = self._cache_on(wire)
1239 cache_on, context_uid, repo_id = self._cache_on(wire)
1234 region = self._region(wire)
1240 region = self._region(wire)
1235
1241
1236 @region.conditional_cache_on_arguments(condition=cache_on)
1242 @region.conditional_cache_on_arguments(condition=cache_on)
1237 def _get_all_commit_ids(_context_uid, _repo_id):
1243 def _get_all_commit_ids(_context_uid, _repo_id):
1238
1244
1239 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1245 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1240 try:
1246 try:
1241 output, __ = self.run_git_command(wire, cmd)
1247 output, __ = self.run_git_command(wire, cmd)
1242 return output.splitlines()
1248 return output.splitlines()
1243 except Exception:
1249 except Exception:
1244 # Can be raised for empty repositories
1250 # Can be raised for empty repositories
1245 return []
1251 return []
1246
1252
1247 @region.conditional_cache_on_arguments(condition=cache_on)
1253 @region.conditional_cache_on_arguments(condition=cache_on)
1248 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1254 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1249 repo_init = self._factory.repo_libgit2(wire)
1255 repo_init = self._factory.repo_libgit2(wire)
1250 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1256 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1251 results = []
1257 results = []
1252 with repo_init as repo:
1258 with repo_init as repo:
1253 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1259 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1254 results.append(commit.id.hex)
1260 results.append(commit.id.hex)
1255
1261
1256 return _get_all_commit_ids(context_uid, repo_id)
1262 return _get_all_commit_ids(context_uid, repo_id)
1257
1263
1258 @reraise_safe_exceptions
1264 @reraise_safe_exceptions
1259 def run_git_command(self, wire, cmd, **opts):
1265 def run_git_command(self, wire, cmd, **opts):
1260 path = wire.get('path', None)
1266 path = wire.get('path', None)
1261
1267
1262 if path and os.path.isdir(path):
1268 if path and os.path.isdir(path):
1263 opts['cwd'] = path
1269 opts['cwd'] = path
1264
1270
1265 if '_bare' in opts:
1271 if '_bare' in opts:
1266 _copts = []
1272 _copts = []
1267 del opts['_bare']
1273 del opts['_bare']
1268 else:
1274 else:
1269 _copts = ['-c', 'core.quotepath=false', ]
1275 _copts = ['-c', 'core.quotepath=false',]
1270 safe_call = False
1276 safe_call = False
1271 if '_safe' in opts:
1277 if '_safe' in opts:
1272 # no exc on failure
1278 # no exc on failure
1273 del opts['_safe']
1279 del opts['_safe']
1274 safe_call = True
1280 safe_call = True
1275
1281
1276 if '_copts' in opts:
1282 if '_copts' in opts:
1277 _copts.extend(opts['_copts'] or [])
1283 _copts.extend(opts['_copts'] or [])
1278 del opts['_copts']
1284 del opts['_copts']
1279
1285
1280 gitenv = os.environ.copy()
1286 gitenv = os.environ.copy()
1281 gitenv.update(opts.pop('extra_env', {}))
1287 gitenv.update(opts.pop('extra_env', {}))
1282 # need to clean fix GIT_DIR !
1288 # need to clean fix GIT_DIR !
1283 if 'GIT_DIR' in gitenv:
1289 if 'GIT_DIR' in gitenv:
1284 del gitenv['GIT_DIR']
1290 del gitenv['GIT_DIR']
1285 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1291 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1286 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1292 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1287
1293
1288 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1294 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1289 _opts = {'env': gitenv, 'shell': False}
1295 _opts = {'env': gitenv, 'shell': False}
1290
1296
1291 proc = None
1297 proc = None
1292 try:
1298 try:
1293 _opts.update(opts)
1299 _opts.update(opts)
1294 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1300 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1295
1301
1296 return b''.join(proc), b''.join(proc.stderr)
1302 return b''.join(proc), b''.join(proc.stderr)
1297 except OSError as err:
1303 except OSError as err:
1298 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1304 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1299 tb_err = ("Couldn't run git command (%s).\n"
1305 tb_err = ("Couldn't run git command (%s).\n"
1300 "Original error was:%s\n"
1306 "Original error was:%s\n"
1301 "Call options:%s\n"
1307 "Call options:%s\n"
1302 % (cmd, err, _opts))
1308 % (cmd, err, _opts))
1303 log.exception(tb_err)
1309 log.exception(tb_err)
1304 if safe_call:
1310 if safe_call:
1305 return '', err
1311 return '', err
1306 else:
1312 else:
1307 raise exceptions.VcsException()(tb_err)
1313 raise exceptions.VcsException()(tb_err)
1308 finally:
1314 finally:
1309 if proc:
1315 if proc:
1310 proc.close()
1316 proc.close()
1311
1317
1312 @reraise_safe_exceptions
1318 @reraise_safe_exceptions
1313 def install_hooks(self, wire, force=False):
1319 def install_hooks(self, wire, force=False):
1314 from vcsserver.hook_utils import install_git_hooks
1320 from vcsserver.hook_utils import install_git_hooks
1315 bare = self.bare(wire)
1321 bare = self.bare(wire)
1316 path = wire['path']
1322 path = wire['path']
1317 binary_dir = settings.BINARY_DIR
1323 binary_dir = settings.BINARY_DIR
1318 executable = None
1319 if binary_dir:
1324 if binary_dir:
1320 executable = os.path.join(binary_dir, 'python3')
1325 os.path.join(binary_dir, 'python3')
1321 return install_git_hooks(path, bare, force_create=force)
1326 return install_git_hooks(path, bare, force_create=force)
1322
1327
1323 @reraise_safe_exceptions
1328 @reraise_safe_exceptions
1324 def get_hooks_info(self, wire):
1329 def get_hooks_info(self, wire):
1325 from vcsserver.hook_utils import (
1330 from vcsserver.hook_utils import (
1326 get_git_pre_hook_version, get_git_post_hook_version)
1331 get_git_pre_hook_version, get_git_post_hook_version)
1327 bare = self.bare(wire)
1332 bare = self.bare(wire)
1328 path = wire['path']
1333 path = wire['path']
1329 return {
1334 return {
1330 'pre_version': get_git_pre_hook_version(path, bare),
1335 'pre_version': get_git_pre_hook_version(path, bare),
1331 'post_version': get_git_post_hook_version(path, bare),
1336 'post_version': get_git_post_hook_version(path, bare),
1332 }
1337 }
1333
1338
1334 @reraise_safe_exceptions
1339 @reraise_safe_exceptions
1335 def set_head_ref(self, wire, head_name):
1340 def set_head_ref(self, wire, head_name):
1336 log.debug('Setting refs/head to `%s`', head_name)
1341 log.debug('Setting refs/head to `%s`', head_name)
1337 cmd = ['symbolic-ref', '"HEAD"', '"refs/heads/%s"' % head_name]
1342 repo_init = self._factory.repo_libgit2(wire)
1338 output, __ = self.run_git_command(wire, cmd)
1343 with repo_init as repo:
1339 return [head_name] + output.splitlines()
1344 repo.set_head(f'refs/heads/{head_name}')
1345
1346 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1340
1347
1341 @reraise_safe_exceptions
1348 @reraise_safe_exceptions
1342 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1349 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1343 archive_dir_name, commit_id):
1350 archive_dir_name, commit_id):
1344
1351
1345 def file_walker(_commit_id, path):
1352 def file_walker(_commit_id, path):
1346 repo_init = self._factory.repo_libgit2(wire)
1353 repo_init = self._factory.repo_libgit2(wire)
1347
1354
1348 with repo_init as repo:
1355 with repo_init as repo:
1349 commit = repo[commit_id]
1356 commit = repo[commit_id]
1350
1357
1351 if path in ['', '/']:
1358 if path in ['', '/']:
1352 tree = commit.tree
1359 tree = commit.tree
1353 else:
1360 else:
1354 tree = commit.tree[path.rstrip('/')]
1361 tree = commit.tree[path.rstrip('/')]
1355 tree_id = tree.id.hex
1362 tree_id = tree.id.hex
1356 try:
1363 try:
1357 tree = repo[tree_id]
1364 tree = repo[tree_id]
1358 except KeyError:
1365 except KeyError:
1359 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1366 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1360
1367
1361 index = LibGit2Index.Index()
1368 index = LibGit2Index.Index()
1362 index.read_tree(tree)
1369 index.read_tree(tree)
1363 file_iter = index
1370 file_iter = index
1364
1371
1365 for fn in file_iter:
1372 for file_node in file_iter:
1366 file_path = fn.path
1373 file_path = file_node.path
1367 mode = fn.mode
1374 mode = file_node.mode
1368 is_link = stat.S_ISLNK(mode)
1375 is_link = stat.S_ISLNK(mode)
1369 if mode == pygit2.GIT_FILEMODE_COMMIT:
1376 if mode == pygit2.GIT_FILEMODE_COMMIT:
1370 log.debug('Skipping path %s as a commit node', file_path)
1377 log.debug('Skipping path %s as a commit node', file_path)
1371 continue
1378 continue
1372 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1379 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1373
1380
1374 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1381 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1375 archive_dir_name, commit_id)
1382 archive_dir_name, commit_id)
@@ -1,287 +1,295 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 #
3 #
4 # This program is free software; you can redistribute it and/or modify
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
18 import io
18 import io
19 import more_itertools
19 import more_itertools
20
20
21 import dulwich.protocol
21 import dulwich.protocol
22 import mock
22 import mock
23 import pytest
23 import pytest
24 import webob
24 import webob
25 import webtest
25 import webtest
26
26
27 from vcsserver import hooks, pygrack
27 from vcsserver import hooks, pygrack
28
28
29 from vcsserver.str_utils import ascii_bytes
29 from vcsserver.str_utils import ascii_bytes
30
30
31
31
32 @pytest.fixture()
32 @pytest.fixture()
33 def pygrack_instance(tmpdir):
33 def pygrack_instance(tmpdir):
34 """
34 """
35 Creates a pygrack app instance.
35 Creates a pygrack app instance.
36
36
37 Right now, it does not much helpful regarding the passed directory.
37 Right now, it does not much helpful regarding the passed directory.
38 It just contains the required folders to pass the signature test.
38 It just contains the required folders to pass the signature test.
39 """
39 """
40 for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
40 for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
41 tmpdir.mkdir(dir_name)
41 tmpdir.mkdir(dir_name)
42
42
43 return pygrack.GitRepository('repo_name', str(tmpdir), 'git', False, {})
43 return pygrack.GitRepository('repo_name', str(tmpdir), 'git', False, {})
44
44
45
45
46 @pytest.fixture()
46 @pytest.fixture()
47 def pygrack_app(pygrack_instance):
47 def pygrack_app(pygrack_instance):
48 """
48 """
49 Creates a pygrack app wrapped in webtest.TestApp.
49 Creates a pygrack app wrapped in webtest.TestApp.
50 """
50 """
51 return webtest.TestApp(pygrack_instance)
51 return webtest.TestApp(pygrack_instance)
52
52
53
53
54 def test_invalid_service_info_refs_returns_403(pygrack_app):
54 def test_invalid_service_info_refs_returns_403(pygrack_app):
55 response = pygrack_app.get('/info/refs?service=git-upload-packs',
55 response = pygrack_app.get('/info/refs?service=git-upload-packs',
56 expect_errors=True)
56 expect_errors=True)
57
57
58 assert response.status_int == 403
58 assert response.status_int == 403
59
59
60
60
61 def test_invalid_endpoint_returns_403(pygrack_app):
61 def test_invalid_endpoint_returns_403(pygrack_app):
62 response = pygrack_app.post('/git-upload-packs', expect_errors=True)
62 response = pygrack_app.post('/git-upload-packs', expect_errors=True)
63
63
64 assert response.status_int == 403
64 assert response.status_int == 403
65
65
66
66
67 @pytest.mark.parametrize('sideband', [
67 @pytest.mark.parametrize('sideband', [
68 'side-band-64k',
68 'side-band-64k',
69 'side-band',
69 'side-band',
70 'side-band no-progress',
70 'side-band no-progress',
71 ])
71 ])
72 def test_pre_pull_hook_fails_with_sideband(pygrack_app, sideband):
72 def test_pre_pull_hook_fails_with_sideband(pygrack_app, sideband):
73 request = ''.join([
73 request = ''.join([
74 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ',
74 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ',
75 'multi_ack %s ofs-delta\n' % sideband,
75 'multi_ack %s ofs-delta\n' % sideband,
76 '0000',
76 '0000',
77 '0009done\n',
77 '0009done\n',
78 ])
78 ])
79 with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')):
79 with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')):
80 response = pygrack_app.post(
80 response = pygrack_app.post(
81 '/git-upload-pack', params=request,
81 '/git-upload-pack', params=request,
82 content_type='application/x-git-upload-pack')
82 content_type='application/x-git-upload-pack')
83
83
84 data = io.BytesIO(response.body)
84 data = io.BytesIO(response.body)
85 proto = dulwich.protocol.Protocol(data.read, None)
85 proto = dulwich.protocol.Protocol(data.read, None)
86 packets = list(proto.read_pkt_seq())
86 packets = list(proto.read_pkt_seq())
87
87
88 expected_packets = [
88 expected_packets = [
89 b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n',
89 b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n',
90 b'\x01' + pygrack.GitRepository.EMPTY_PACK,
90 b'\x01' + pygrack.GitRepository.EMPTY_PACK,
91 ]
91 ]
92 assert packets == expected_packets
92 assert packets == expected_packets
93
93
94
94
95 def test_pre_pull_hook_fails_no_sideband(pygrack_app):
95 def test_pre_pull_hook_fails_no_sideband(pygrack_app):
96 request = ''.join([
96 request = ''.join([
97 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
97 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
98 'multi_ack ofs-delta\n'
98 'multi_ack ofs-delta\n'
99 '0000',
99 '0000',
100 '0009done\n',
100 '0009done\n',
101 ])
101 ])
102 with mock.patch('vcsserver.hooks.git_pre_pull',
102 with mock.patch('vcsserver.hooks.git_pre_pull',
103 return_value=hooks.HookResponse(1, 'foo')):
103 return_value=hooks.HookResponse(1, 'foo')):
104 response = pygrack_app.post(
104 response = pygrack_app.post(
105 '/git-upload-pack', params=request,
105 '/git-upload-pack', params=request,
106 content_type='application/x-git-upload-pack')
106 content_type='application/x-git-upload-pack')
107
107
108 assert response.body == pygrack.GitRepository.EMPTY_PACK
108 assert response.body == pygrack.GitRepository.EMPTY_PACK
109
109
110
110
111 def test_pull_has_hook_messages(pygrack_app):
111 def test_pull_has_hook_messages(pygrack_app):
112 request = ''.join([
112 request = ''.join([
113 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
113 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
114 'multi_ack side-band-64k ofs-delta\n'
114 'multi_ack side-band-64k ofs-delta\n'
115 '0000',
115 '0000',
116 '0009done\n',
116 '0009done\n',
117 ])
117 ])
118
119 pre_pull = 'pre_pull_output'
120 post_pull = 'post_pull_output'
121
118 with mock.patch('vcsserver.hooks.git_pre_pull',
122 with mock.patch('vcsserver.hooks.git_pre_pull',
119 return_value=hooks.HookResponse(0, 'foo')):
123 return_value=hooks.HookResponse(0, pre_pull)):
120 with mock.patch('vcsserver.hooks.git_post_pull',
124 with mock.patch('vcsserver.hooks.git_post_pull',
121 return_value=hooks.HookResponse(1, 'bar')):
125 return_value=hooks.HookResponse(1, post_pull)):
122 with mock.patch('vcsserver.subprocessio.SubprocessIOChunker',
126 with mock.patch('vcsserver.subprocessio.SubprocessIOChunker',
123 return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])):
127 return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])):
124 response = pygrack_app.post(
128 response = pygrack_app.post(
125 '/git-upload-pack', params=request,
129 '/git-upload-pack', params=request,
126 content_type='application/x-git-upload-pack')
130 content_type='application/x-git-upload-pack')
127
131
128 data = io.BytesIO(response.body)
132 data = io.BytesIO(response.body)
129 proto = dulwich.protocol.Protocol(data.read, None)
133 proto = dulwich.protocol.Protocol(data.read, None)
130 packets = list(proto.read_pkt_seq())
134 packets = list(proto.read_pkt_seq())
131
135
132 assert packets == [b'NAK\n', b'\x02foo', b'subp\n', b'\x02bar']
136 assert packets == [b'NAK\n',
137 # pre-pull only outputs if IT FAILS as in != 0 ret code
138 #b'\x02pre_pull_output',
139 b'subp\n',
140 b'\x02post_pull_output']
133
141
134
142
135 def test_get_want_capabilities(pygrack_instance):
143 def test_get_want_capabilities(pygrack_instance):
136 data = io.BytesIO(
144 data = io.BytesIO(
137 b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
145 b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
138 b'multi_ack side-band-64k ofs-delta\n00000009done\n')
146 b'multi_ack side-band-64k ofs-delta\n00000009done\n')
139
147
140 request = webob.Request({
148 request = webob.Request({
141 'wsgi.input': data,
149 'wsgi.input': data,
142 'REQUEST_METHOD': 'POST',
150 'REQUEST_METHOD': 'POST',
143 'webob.is_body_seekable': True
151 'webob.is_body_seekable': True
144 })
152 })
145
153
146 capabilities = pygrack_instance._get_want_capabilities(request)
154 capabilities = pygrack_instance._get_want_capabilities(request)
147
155
148 assert capabilities == frozenset(
156 assert capabilities == frozenset(
149 (b'ofs-delta', b'multi_ack', b'side-band-64k'))
157 (b'ofs-delta', b'multi_ack', b'side-band-64k'))
150 assert data.tell() == 0
158 assert data.tell() == 0
151
159
152
160
153 @pytest.mark.parametrize('data,capabilities,expected', [
161 @pytest.mark.parametrize('data,capabilities,expected', [
154 ('foo', [], []),
162 ('foo', [], []),
155 ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []),
163 ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []),
156 ('', [pygrack.CAPABILITY_SIDE_BAND], []),
164 ('', [pygrack.CAPABILITY_SIDE_BAND], []),
157 ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']),
165 ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']),
158 ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']),
166 ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']),
159 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]),
167 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]),
160 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']),
168 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']),
161 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']),
169 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']),
162 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]),
170 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]),
163 ], ids=[
171 ], ids=[
164 'foo-empty',
172 'foo-empty',
165 'empty-64k', 'empty',
173 'empty-64k', 'empty',
166 'foo-64k', 'foo',
174 'foo-64k', 'foo',
167 'f-1000-64k', 'f-1000',
175 'f-1000-64k', 'f-1000',
168 'f-65520-64k', 'f-65520'])
176 'f-65520-64k', 'f-65520'])
169 def test_get_messages(pygrack_instance, data, capabilities, expected):
177 def test_get_messages(pygrack_instance, data, capabilities, expected):
170 messages = pygrack_instance._get_messages(data, capabilities)
178 messages = pygrack_instance._get_messages(data, capabilities)
171
179
172 assert messages == expected
180 assert messages == expected
173
181
174
182
175 @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [
183 @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [
176 # Unexpected response
184 # Unexpected response
177 ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'),
185 ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'),
178 # No sideband
186 # No sideband
179 ([b'no-sideband'], [], 'foo', 'bar'),
187 ([b'no-sideband'], [], 'foo', 'bar'),
180 # No messages
188 # No messages
181 ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''),
189 ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''),
182 ])
190 ])
183 def test_inject_messages_to_response_nothing_to_do(
191 def test_inject_messages_to_response_nothing_to_do(
184 pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages):
192 pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages):
185
193
186 new_response = pygrack_instance._build_post_pull_response(
194 new_response = pygrack_instance._build_post_pull_response(
187 more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages)
195 more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages)
188
196
189 assert list(new_response) == response
197 assert list(new_response) == response
190
198
191
199
192 @pytest.mark.parametrize('capabilities', [
200 @pytest.mark.parametrize('capabilities', [
193 [pygrack.CAPABILITY_SIDE_BAND],
201 [pygrack.CAPABILITY_SIDE_BAND],
194 [pygrack.CAPABILITY_SIDE_BAND_64K],
202 [pygrack.CAPABILITY_SIDE_BAND_64K],
195 ])
203 ])
196 def test_inject_messages_to_response_single_element(pygrack_instance, capabilities):
204 def test_inject_messages_to_response_single_element(pygrack_instance, capabilities):
197 response = [b'0008NAK\n0009subp\n0000']
205 response = [b'0008NAK\n0009subp\n0000']
198 new_response = pygrack_instance._build_post_pull_response(
206 new_response = pygrack_instance._build_post_pull_response(
199 more_itertools.always_iterable(response), capabilities, 'foo', 'bar')
207 more_itertools.always_iterable(response), capabilities, 'foo', 'bar')
200
208
201 expected_response = b''.join([
209 expected_response = b''.join([
202 b'0008NAK\n',
210 b'0008NAK\n',
203 b'0008\x02foo',
211 b'0008\x02foo',
204 b'0009subp\n',
212 b'0009subp\n',
205 b'0008\x02bar',
213 b'0008\x02bar',
206 b'0000'])
214 b'0000'])
207
215
208 assert b''.join(new_response) == expected_response
216 assert b''.join(new_response) == expected_response
209
217
210
218
211 @pytest.mark.parametrize('capabilities', [
219 @pytest.mark.parametrize('capabilities', [
212 [pygrack.CAPABILITY_SIDE_BAND],
220 [pygrack.CAPABILITY_SIDE_BAND],
213 [pygrack.CAPABILITY_SIDE_BAND_64K],
221 [pygrack.CAPABILITY_SIDE_BAND_64K],
214 ])
222 ])
215 def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities):
223 def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities):
216 response = more_itertools.always_iterable([
224 response = more_itertools.always_iterable([
217 b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000'
225 b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000'
218 ])
226 ])
219 new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar')
227 new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar')
220
228
221 expected_response = b''.join([
229 expected_response = b''.join([
222 b'0008NAK\n',
230 b'0008NAK\n',
223 b'0008\x02foo',
231 b'0008\x02foo',
224 b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n',
232 b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n',
225 b'0008\x02bar',
233 b'0008\x02bar',
226 b'0000'
234 b'0000'
227 ])
235 ])
228
236
229 assert b''.join(new_response) == expected_response
237 assert b''.join(new_response) == expected_response
230
238
231
239
232 def test_build_failed_pre_pull_response_no_sideband(pygrack_instance):
240 def test_build_failed_pre_pull_response_no_sideband(pygrack_instance):
233 response = pygrack_instance._build_failed_pre_pull_response([], 'foo')
241 response = pygrack_instance._build_failed_pre_pull_response([], 'foo')
234
242
235 assert response == [pygrack.GitRepository.EMPTY_PACK]
243 assert response == [pygrack.GitRepository.EMPTY_PACK]
236
244
237
245
238 @pytest.mark.parametrize('capabilities', [
246 @pytest.mark.parametrize('capabilities', [
239 [pygrack.CAPABILITY_SIDE_BAND],
247 [pygrack.CAPABILITY_SIDE_BAND],
240 [pygrack.CAPABILITY_SIDE_BAND_64K],
248 [pygrack.CAPABILITY_SIDE_BAND_64K],
241 [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'],
249 [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'],
242 ])
250 ])
243 def test_build_failed_pre_pull_response(pygrack_instance, capabilities):
251 def test_build_failed_pre_pull_response(pygrack_instance, capabilities):
244 response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo')
252 response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo')
245
253
246 expected_response = [
254 expected_response = [
247 b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n',
255 b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n',
248 b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK),
256 b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK),
249 pygrack.GitRepository.FLUSH_PACKET,
257 pygrack.GitRepository.FLUSH_PACKET,
250 ]
258 ]
251
259
252 assert response == expected_response
260 assert response == expected_response
253
261
254
262
255 def test_inject_messages_to_response_generator(pygrack_instance):
263 def test_inject_messages_to_response_generator(pygrack_instance):
256
264
257 def response_generator():
265 def response_generator():
258 response = [
266 response = [
259 # protocol start
267 # protocol start
260 b'0008NAK\n',
268 b'0008NAK\n',
261 ]
269 ]
262 response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)]
270 response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)]
263 response += [
271 response += [
264 # protocol end
272 # protocol end
265 pygrack.GitRepository.FLUSH_PACKET
273 pygrack.GitRepository.FLUSH_PACKET
266 ]
274 ]
267 for elem in response:
275 for elem in response:
268 yield elem
276 yield elem
269
277
270 new_response = pygrack_instance._build_post_pull_response(
278 new_response = pygrack_instance._build_post_pull_response(
271 response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n')
279 response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n')
272
280
273 assert iter(new_response)
281 assert iter(new_response)
274
282
275 expected_response = b''.join([
283 expected_response = b''.join([
276 # start
284 # start
277 b'0008NAK\n0012\x02PRE_PULL_MSG\n',
285 b'0008NAK\n0012\x02PRE_PULL_MSG\n',
278 ] + [
286 ] + [
279 # ... rest
287 # ... rest
280 ascii_bytes(f'000asubp{x}\n') for x in range(1000)
288 ascii_bytes(f'000asubp{x}\n') for x in range(1000)
281 ] + [
289 ] + [
282 # final message,
290 # final message,
283 b'0013\x02POST_PULL_MSG\n0000',
291 b'0013\x02POST_PULL_MSG\n0000',
284
292
285 ])
293 ])
286
294
287 assert b''.join(new_response) == expected_response
295 assert b''.join(new_response) == expected_response
General Comments 0
You need to be logged in to leave comments. Login now