##// END OF EJS Templates
git: speed up extraction of git children call...
super-admin -
r1071:f2aec1d6 python3
parent child Browse files
Show More
@@ -1,1327 +1,1332 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 #
3 #
4 # This program is free software; you can redistribute it and/or modify
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
18 import collections
18 import collections
19 import logging
19 import logging
20 import os
20 import os
21 import posixpath as vcspath
21 import posixpath as vcspath
22 import re
22 import re
23 import stat
23 import stat
24 import traceback
24 import traceback
25 import urllib.request, urllib.parse, urllib.error
25 import urllib.request, urllib.parse, urllib.error
26 import urllib.request, urllib.error, urllib.parse
26 import urllib.request, urllib.error, urllib.parse
27 from functools import wraps
27 from functools import wraps
28
28
29 import more_itertools
29 import more_itertools
30 import pygit2
30 import pygit2
31 from pygit2 import Repository as LibGit2Repo
31 from pygit2 import Repository as LibGit2Repo
32 from pygit2 import index as LibGit2Index
32 from pygit2 import index as LibGit2Index
33 from dulwich import index, objects
33 from dulwich import index, objects
34 from dulwich.client import HttpGitClient, LocalGitClient
34 from dulwich.client import HttpGitClient, LocalGitClient
35 from dulwich.errors import (
35 from dulwich.errors import (
36 NotGitRepository, ChecksumMismatch, WrongObjectException,
36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 MissingCommitError, ObjectMissing, HangupException,
37 MissingCommitError, ObjectMissing, HangupException,
38 UnexpectedCommandError)
38 UnexpectedCommandError)
39 from dulwich.repo import Repo as DulwichRepo
39 from dulwich.repo import Repo as DulwichRepo
40 from dulwich.server import update_server_info
40 from dulwich.server import update_server_info
41
41
42 from vcsserver import exceptions, settings, subprocessio
42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
45 from vcsserver.hgcompat import (
45 from vcsserver.hgcompat import (
46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 from vcsserver.git_lfs.lib import LFSOidStore
47 from vcsserver.git_lfs.lib import LFSOidStore
48 from vcsserver.vcs_base import RemoteBase
48 from vcsserver.vcs_base import RemoteBase
49
49
50 DIR_STAT = stat.S_IFDIR
50 DIR_STAT = stat.S_IFDIR
51 FILE_MODE = stat.S_IFMT
51 FILE_MODE = stat.S_IFMT
52 GIT_LINK = objects.S_IFGITLINK
52 GIT_LINK = objects.S_IFGITLINK
53 PEELED_REF_MARKER = b'^{}'
53 PEELED_REF_MARKER = b'^{}'
54
54
55
55
56 log = logging.getLogger(__name__)
56 log = logging.getLogger(__name__)
57
57
58
58
59 def reraise_safe_exceptions(func):
59 def reraise_safe_exceptions(func):
60 """Converts Dulwich exceptions to something neutral."""
60 """Converts Dulwich exceptions to something neutral."""
61
61
62 @wraps(func)
62 @wraps(func)
63 def wrapper(*args, **kwargs):
63 def wrapper(*args, **kwargs):
64 try:
64 try:
65 return func(*args, **kwargs)
65 return func(*args, **kwargs)
66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 exc = exceptions.LookupException(org_exc=e)
67 exc = exceptions.LookupException(org_exc=e)
68 raise exc(safe_str(e))
68 raise exc(safe_str(e))
69 except (HangupException, UnexpectedCommandError) as e:
69 except (HangupException, UnexpectedCommandError) as e:
70 exc = exceptions.VcsException(org_exc=e)
70 exc = exceptions.VcsException(org_exc=e)
71 raise exc(safe_str(e))
71 raise exc(safe_str(e))
72 except Exception as e:
72 except Exception as e:
73 # NOTE(marcink): becuase of how dulwich handles some exceptions
73 # NOTE(marcink): becuase of how dulwich handles some exceptions
74 # (KeyError on empty repos), we cannot track this and catch all
74 # (KeyError on empty repos), we cannot track this and catch all
75 # exceptions, it's an exceptions from other handlers
75 # exceptions, it's an exceptions from other handlers
76 #if not hasattr(e, '_vcs_kind'):
76 #if not hasattr(e, '_vcs_kind'):
77 #log.exception("Unhandled exception in git remote call")
77 #log.exception("Unhandled exception in git remote call")
78 #raise_from_original(exceptions.UnhandledException)
78 #raise_from_original(exceptions.UnhandledException)
79 raise
79 raise
80 return wrapper
80 return wrapper
81
81
82
82
83 class Repo(DulwichRepo):
83 class Repo(DulwichRepo):
84 """
84 """
85 A wrapper for dulwich Repo class.
85 A wrapper for dulwich Repo class.
86
86
87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 "Too many open files" error. We need to close all opened file descriptors
88 "Too many open files" error. We need to close all opened file descriptors
89 once the repo object is destroyed.
89 once the repo object is destroyed.
90 """
90 """
91 def __del__(self):
91 def __del__(self):
92 if hasattr(self, 'object_store'):
92 if hasattr(self, 'object_store'):
93 self.close()
93 self.close()
94
94
95
95
96 class Repository(LibGit2Repo):
96 class Repository(LibGit2Repo):
97
97
98 def __enter__(self):
98 def __enter__(self):
99 return self
99 return self
100
100
101 def __exit__(self, exc_type, exc_val, exc_tb):
101 def __exit__(self, exc_type, exc_val, exc_tb):
102 self.free()
102 self.free()
103
103
104
104
105 class GitFactory(RepoFactory):
105 class GitFactory(RepoFactory):
106 repo_type = 'git'
106 repo_type = 'git'
107
107
108 def _create_repo(self, wire, create, use_libgit2=False):
108 def _create_repo(self, wire, create, use_libgit2=False):
109 if use_libgit2:
109 if use_libgit2:
110 return Repository(wire['path'])
110 return Repository(wire['path'])
111 else:
111 else:
112 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
112 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
113 return Repo(repo_path)
113 return Repo(repo_path)
114
114
115 def repo(self, wire, create=False, use_libgit2=False):
115 def repo(self, wire, create=False, use_libgit2=False):
116 """
116 """
117 Get a repository instance for the given path.
117 Get a repository instance for the given path.
118 """
118 """
119 return self._create_repo(wire, create, use_libgit2)
119 return self._create_repo(wire, create, use_libgit2)
120
120
121 def repo_libgit2(self, wire):
121 def repo_libgit2(self, wire):
122 return self.repo(wire, use_libgit2=True)
122 return self.repo(wire, use_libgit2=True)
123
123
124
124
125 class GitRemote(RemoteBase):
125 class GitRemote(RemoteBase):
126
126
127 def __init__(self, factory):
127 def __init__(self, factory):
128 self._factory = factory
128 self._factory = factory
129 self._bulk_methods = {
129 self._bulk_methods = {
130 "date": self.date,
130 "date": self.date,
131 "author": self.author,
131 "author": self.author,
132 "branch": self.branch,
132 "branch": self.branch,
133 "message": self.message,
133 "message": self.message,
134 "parents": self.parents,
134 "parents": self.parents,
135 "_commit": self.revision,
135 "_commit": self.revision,
136 }
136 }
137
137
138 def _wire_to_config(self, wire):
138 def _wire_to_config(self, wire):
139 if 'config' in wire:
139 if 'config' in wire:
140 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
140 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
141 return {}
141 return {}
142
142
143 def _remote_conf(self, config):
143 def _remote_conf(self, config):
144 params = [
144 params = [
145 '-c', 'core.askpass=""',
145 '-c', 'core.askpass=""',
146 ]
146 ]
147 ssl_cert_dir = config.get('vcs_ssl_dir')
147 ssl_cert_dir = config.get('vcs_ssl_dir')
148 if ssl_cert_dir:
148 if ssl_cert_dir:
149 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
149 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
150 return params
150 return params
151
151
152 @reraise_safe_exceptions
152 @reraise_safe_exceptions
153 def discover_git_version(self):
153 def discover_git_version(self):
154 stdout, _ = self.run_git_command(
154 stdout, _ = self.run_git_command(
155 {}, ['--version'], _bare=True, _safe=True)
155 {}, ['--version'], _bare=True, _safe=True)
156 prefix = b'git version'
156 prefix = b'git version'
157 if stdout.startswith(prefix):
157 if stdout.startswith(prefix):
158 stdout = stdout[len(prefix):]
158 stdout = stdout[len(prefix):]
159 return safe_str(stdout.strip())
159 return safe_str(stdout.strip())
160
160
161 @reraise_safe_exceptions
161 @reraise_safe_exceptions
162 def is_empty(self, wire):
162 def is_empty(self, wire):
163 repo_init = self._factory.repo_libgit2(wire)
163 repo_init = self._factory.repo_libgit2(wire)
164 with repo_init as repo:
164 with repo_init as repo:
165
165
166 try:
166 try:
167 has_head = repo.head.name
167 has_head = repo.head.name
168 if has_head:
168 if has_head:
169 return False
169 return False
170
170
171 # NOTE(marcink): check again using more expensive method
171 # NOTE(marcink): check again using more expensive method
172 return repo.is_empty
172 return repo.is_empty
173 except Exception:
173 except Exception:
174 pass
174 pass
175
175
176 return True
176 return True
177
177
178 @reraise_safe_exceptions
178 @reraise_safe_exceptions
179 def assert_correct_path(self, wire):
179 def assert_correct_path(self, wire):
180 cache_on, context_uid, repo_id = self._cache_on(wire)
180 cache_on, context_uid, repo_id = self._cache_on(wire)
181 region = self._region(wire)
181 region = self._region(wire)
182
182
183 @region.conditional_cache_on_arguments(condition=cache_on)
183 @region.conditional_cache_on_arguments(condition=cache_on)
184 def _assert_correct_path(_context_uid, _repo_id):
184 def _assert_correct_path(_context_uid, _repo_id):
185 try:
185 try:
186 repo_init = self._factory.repo_libgit2(wire)
186 repo_init = self._factory.repo_libgit2(wire)
187 with repo_init as repo:
187 with repo_init as repo:
188 pass
188 pass
189 except pygit2.GitError:
189 except pygit2.GitError:
190 path = wire.get('path')
190 path = wire.get('path')
191 tb = traceback.format_exc()
191 tb = traceback.format_exc()
192 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
192 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
193 return False
193 return False
194
194
195 return True
195 return True
196 return _assert_correct_path(context_uid, repo_id)
196 return _assert_correct_path(context_uid, repo_id)
197
197
198 @reraise_safe_exceptions
198 @reraise_safe_exceptions
199 def bare(self, wire):
199 def bare(self, wire):
200 repo_init = self._factory.repo_libgit2(wire)
200 repo_init = self._factory.repo_libgit2(wire)
201 with repo_init as repo:
201 with repo_init as repo:
202 return repo.is_bare
202 return repo.is_bare
203
203
204 @reraise_safe_exceptions
204 @reraise_safe_exceptions
205 def blob_as_pretty_string(self, wire, sha):
205 def blob_as_pretty_string(self, wire, sha):
206 repo_init = self._factory.repo_libgit2(wire)
206 repo_init = self._factory.repo_libgit2(wire)
207 with repo_init as repo:
207 with repo_init as repo:
208 blob_obj = repo[sha]
208 blob_obj = repo[sha]
209 blob = blob_obj.data
209 blob = blob_obj.data
210 return blob
210 return blob
211
211
212 @reraise_safe_exceptions
212 @reraise_safe_exceptions
213 def blob_raw_length(self, wire, sha):
213 def blob_raw_length(self, wire, sha):
214 cache_on, context_uid, repo_id = self._cache_on(wire)
214 cache_on, context_uid, repo_id = self._cache_on(wire)
215 region = self._region(wire)
215 region = self._region(wire)
216
216
217 @region.conditional_cache_on_arguments(condition=cache_on)
217 @region.conditional_cache_on_arguments(condition=cache_on)
218 def _blob_raw_length(_repo_id, _sha):
218 def _blob_raw_length(_repo_id, _sha):
219
219
220 repo_init = self._factory.repo_libgit2(wire)
220 repo_init = self._factory.repo_libgit2(wire)
221 with repo_init as repo:
221 with repo_init as repo:
222 blob = repo[sha]
222 blob = repo[sha]
223 return blob.size
223 return blob.size
224
224
225 return _blob_raw_length(repo_id, sha)
225 return _blob_raw_length(repo_id, sha)
226
226
227 def _parse_lfs_pointer(self, raw_content):
227 def _parse_lfs_pointer(self, raw_content):
228 spec_string = b'version https://git-lfs.github.com/spec'
228 spec_string = b'version https://git-lfs.github.com/spec'
229 if raw_content and raw_content.startswith(spec_string):
229 if raw_content and raw_content.startswith(spec_string):
230
230
231 pattern = re.compile(rb"""
231 pattern = re.compile(rb"""
232 (?:\n)?
232 (?:\n)?
233 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
233 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
234 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
234 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
235 ^size[ ](?P<oid_size>[0-9]+)\n
235 ^size[ ](?P<oid_size>[0-9]+)\n
236 (?:\n)?
236 (?:\n)?
237 """, re.VERBOSE | re.MULTILINE)
237 """, re.VERBOSE | re.MULTILINE)
238 match = pattern.match(raw_content)
238 match = pattern.match(raw_content)
239 if match:
239 if match:
240 return match.groupdict()
240 return match.groupdict()
241
241
242 return {}
242 return {}
243
243
244 @reraise_safe_exceptions
244 @reraise_safe_exceptions
245 def is_large_file(self, wire, commit_id):
245 def is_large_file(self, wire, commit_id):
246 cache_on, context_uid, repo_id = self._cache_on(wire)
246 cache_on, context_uid, repo_id = self._cache_on(wire)
247 region = self._region(wire)
247 region = self._region(wire)
248
248
249 @region.conditional_cache_on_arguments(condition=cache_on)
249 @region.conditional_cache_on_arguments(condition=cache_on)
250 def _is_large_file(_repo_id, _sha):
250 def _is_large_file(_repo_id, _sha):
251 repo_init = self._factory.repo_libgit2(wire)
251 repo_init = self._factory.repo_libgit2(wire)
252 with repo_init as repo:
252 with repo_init as repo:
253 blob = repo[commit_id]
253 blob = repo[commit_id]
254 if blob.is_binary:
254 if blob.is_binary:
255 return {}
255 return {}
256
256
257 return self._parse_lfs_pointer(blob.data)
257 return self._parse_lfs_pointer(blob.data)
258
258
259 return _is_large_file(repo_id, commit_id)
259 return _is_large_file(repo_id, commit_id)
260
260
261 @reraise_safe_exceptions
261 @reraise_safe_exceptions
262 def is_binary(self, wire, tree_id):
262 def is_binary(self, wire, tree_id):
263 cache_on, context_uid, repo_id = self._cache_on(wire)
263 cache_on, context_uid, repo_id = self._cache_on(wire)
264 region = self._region(wire)
264 region = self._region(wire)
265
265
266 @region.conditional_cache_on_arguments(condition=cache_on)
266 @region.conditional_cache_on_arguments(condition=cache_on)
267 def _is_binary(_repo_id, _tree_id):
267 def _is_binary(_repo_id, _tree_id):
268 repo_init = self._factory.repo_libgit2(wire)
268 repo_init = self._factory.repo_libgit2(wire)
269 with repo_init as repo:
269 with repo_init as repo:
270 blob_obj = repo[tree_id]
270 blob_obj = repo[tree_id]
271 return blob_obj.is_binary
271 return blob_obj.is_binary
272
272
273 return _is_binary(repo_id, tree_id)
273 return _is_binary(repo_id, tree_id)
274
274
275 @reraise_safe_exceptions
275 @reraise_safe_exceptions
276 def in_largefiles_store(self, wire, oid):
276 def in_largefiles_store(self, wire, oid):
277 conf = self._wire_to_config(wire)
277 conf = self._wire_to_config(wire)
278 repo_init = self._factory.repo_libgit2(wire)
278 repo_init = self._factory.repo_libgit2(wire)
279 with repo_init as repo:
279 with repo_init as repo:
280 repo_name = repo.path
280 repo_name = repo.path
281
281
282 store_location = conf.get('vcs_git_lfs_store_location')
282 store_location = conf.get('vcs_git_lfs_store_location')
283 if store_location:
283 if store_location:
284
284
285 store = LFSOidStore(
285 store = LFSOidStore(
286 oid=oid, repo=repo_name, store_location=store_location)
286 oid=oid, repo=repo_name, store_location=store_location)
287 return store.has_oid()
287 return store.has_oid()
288
288
289 return False
289 return False
290
290
291 @reraise_safe_exceptions
291 @reraise_safe_exceptions
292 def store_path(self, wire, oid):
292 def store_path(self, wire, oid):
293 conf = self._wire_to_config(wire)
293 conf = self._wire_to_config(wire)
294 repo_init = self._factory.repo_libgit2(wire)
294 repo_init = self._factory.repo_libgit2(wire)
295 with repo_init as repo:
295 with repo_init as repo:
296 repo_name = repo.path
296 repo_name = repo.path
297
297
298 store_location = conf.get('vcs_git_lfs_store_location')
298 store_location = conf.get('vcs_git_lfs_store_location')
299 if store_location:
299 if store_location:
300 store = LFSOidStore(
300 store = LFSOidStore(
301 oid=oid, repo=repo_name, store_location=store_location)
301 oid=oid, repo=repo_name, store_location=store_location)
302 return store.oid_path
302 return store.oid_path
303 raise ValueError('Unable to fetch oid with path {}'.format(oid))
303 raise ValueError('Unable to fetch oid with path {}'.format(oid))
304
304
305 @reraise_safe_exceptions
305 @reraise_safe_exceptions
306 def bulk_request(self, wire, rev, pre_load):
306 def bulk_request(self, wire, rev, pre_load):
307 cache_on, context_uid, repo_id = self._cache_on(wire)
307 cache_on, context_uid, repo_id = self._cache_on(wire)
308 region = self._region(wire)
308 region = self._region(wire)
309
309
310 @region.conditional_cache_on_arguments(condition=cache_on)
310 @region.conditional_cache_on_arguments(condition=cache_on)
311 def _bulk_request(_repo_id, _rev, _pre_load):
311 def _bulk_request(_repo_id, _rev, _pre_load):
312 result = {}
312 result = {}
313 for attr in pre_load:
313 for attr in pre_load:
314 try:
314 try:
315 method = self._bulk_methods[attr]
315 method = self._bulk_methods[attr]
316 args = [wire, rev]
316 args = [wire, rev]
317 result[attr] = method(*args)
317 result[attr] = method(*args)
318 except KeyError as e:
318 except KeyError as e:
319 raise exceptions.VcsException(e)(
319 raise exceptions.VcsException(e)(
320 "Unknown bulk attribute: %s" % attr)
320 "Unknown bulk attribute: %s" % attr)
321 return result
321 return result
322
322
323 return _bulk_request(repo_id, rev, sorted(pre_load))
323 return _bulk_request(repo_id, rev, sorted(pre_load))
324
324
325 def _build_opener(self, url):
325 def _build_opener(self, url):
326 handlers = []
326 handlers = []
327 url_obj = url_parser(url)
327 url_obj = url_parser(url)
328 _, authinfo = url_obj.authinfo()
328 _, authinfo = url_obj.authinfo()
329
329
330 if authinfo:
330 if authinfo:
331 # create a password manager
331 # create a password manager
332 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
332 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
333 passmgr.add_password(*authinfo)
333 passmgr.add_password(*authinfo)
334
334
335 handlers.extend((httpbasicauthhandler(passmgr),
335 handlers.extend((httpbasicauthhandler(passmgr),
336 httpdigestauthhandler(passmgr)))
336 httpdigestauthhandler(passmgr)))
337
337
338 return urllib.request.build_opener(*handlers)
338 return urllib.request.build_opener(*handlers)
339
339
340 def _type_id_to_name(self, type_id: int):
340 def _type_id_to_name(self, type_id: int):
341 return {
341 return {
342 1: 'commit',
342 1: 'commit',
343 2: 'tree',
343 2: 'tree',
344 3: 'blob',
344 3: 'blob',
345 4: 'tag'
345 4: 'tag'
346 }[type_id]
346 }[type_id]
347
347
348 @reraise_safe_exceptions
348 @reraise_safe_exceptions
349 def check_url(self, url, config):
349 def check_url(self, url, config):
350 url_obj = url_parser(url)
350 url_obj = url_parser(url)
351 test_uri, _ = url_obj.authinfo()
351 test_uri, _ = url_obj.authinfo()
352 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
352 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
353 url_obj.query = obfuscate_qs(url_obj.query)
353 url_obj.query = obfuscate_qs(url_obj.query)
354 cleaned_uri = str(url_obj)
354 cleaned_uri = str(url_obj)
355 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
355 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
356
356
357 if not test_uri.endswith('info/refs'):
357 if not test_uri.endswith('info/refs'):
358 test_uri = test_uri.rstrip('/') + '/info/refs'
358 test_uri = test_uri.rstrip('/') + '/info/refs'
359
359
360 o = self._build_opener(url)
360 o = self._build_opener(url)
361 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
361 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
362
362
363 q = {"service": 'git-upload-pack'}
363 q = {"service": 'git-upload-pack'}
364 qs = '?%s' % urllib.parse.urlencode(q)
364 qs = '?%s' % urllib.parse.urlencode(q)
365 cu = "%s%s" % (test_uri, qs)
365 cu = "%s%s" % (test_uri, qs)
366 req = urllib.request.Request(cu, None, {})
366 req = urllib.request.Request(cu, None, {})
367
367
368 try:
368 try:
369 log.debug("Trying to open URL %s", cleaned_uri)
369 log.debug("Trying to open URL %s", cleaned_uri)
370 resp = o.open(req)
370 resp = o.open(req)
371 if resp.code != 200:
371 if resp.code != 200:
372 raise exceptions.URLError()('Return Code is not 200')
372 raise exceptions.URLError()('Return Code is not 200')
373 except Exception as e:
373 except Exception as e:
374 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
374 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
375 # means it cannot be cloned
375 # means it cannot be cloned
376 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
376 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
377
377
378 # now detect if it's proper git repo
378 # now detect if it's proper git repo
379 gitdata = resp.read()
379 gitdata = resp.read()
380 if 'service=git-upload-pack' in gitdata:
380 if 'service=git-upload-pack' in gitdata:
381 pass
381 pass
382 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
382 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
383 # old style git can return some other format !
383 # old style git can return some other format !
384 pass
384 pass
385 else:
385 else:
386 raise exceptions.URLError()(
386 raise exceptions.URLError()(
387 "url [%s] does not look like an git" % (cleaned_uri,))
387 "url [%s] does not look like an git" % (cleaned_uri,))
388
388
389 return True
389 return True
390
390
391 @reraise_safe_exceptions
391 @reraise_safe_exceptions
392 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
392 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
393 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
393 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
394 remote_refs = self.pull(wire, url, apply_refs=False)
394 remote_refs = self.pull(wire, url, apply_refs=False)
395 repo = self._factory.repo(wire)
395 repo = self._factory.repo(wire)
396 if isinstance(valid_refs, list):
396 if isinstance(valid_refs, list):
397 valid_refs = tuple(valid_refs)
397 valid_refs = tuple(valid_refs)
398
398
399 for k in remote_refs:
399 for k in remote_refs:
400 # only parse heads/tags and skip so called deferred tags
400 # only parse heads/tags and skip so called deferred tags
401 if k.startswith(valid_refs) and not k.endswith(deferred):
401 if k.startswith(valid_refs) and not k.endswith(deferred):
402 repo[k] = remote_refs[k]
402 repo[k] = remote_refs[k]
403
403
404 if update_after_clone:
404 if update_after_clone:
405 # we want to checkout HEAD
405 # we want to checkout HEAD
406 repo["HEAD"] = remote_refs["HEAD"]
406 repo["HEAD"] = remote_refs["HEAD"]
407 index.build_index_from_tree(repo.path, repo.index_path(),
407 index.build_index_from_tree(repo.path, repo.index_path(),
408 repo.object_store, repo["HEAD"].tree)
408 repo.object_store, repo["HEAD"].tree)
409
409
410 @reraise_safe_exceptions
410 @reraise_safe_exceptions
411 def branch(self, wire, commit_id):
411 def branch(self, wire, commit_id):
412 cache_on, context_uid, repo_id = self._cache_on(wire)
412 cache_on, context_uid, repo_id = self._cache_on(wire)
413 region = self._region(wire)
413 region = self._region(wire)
414 @region.conditional_cache_on_arguments(condition=cache_on)
414 @region.conditional_cache_on_arguments(condition=cache_on)
415 def _branch(_context_uid, _repo_id, _commit_id):
415 def _branch(_context_uid, _repo_id, _commit_id):
416 regex = re.compile('^refs/heads')
416 regex = re.compile('^refs/heads')
417
417
418 def filter_with(ref):
418 def filter_with(ref):
419 return regex.match(ref[0]) and ref[1] == _commit_id
419 return regex.match(ref[0]) and ref[1] == _commit_id
420
420
421 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
421 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
422 return [x[0].split('refs/heads/')[-1] for x in branches]
422 return [x[0].split('refs/heads/')[-1] for x in branches]
423
423
424 return _branch(context_uid, repo_id, commit_id)
424 return _branch(context_uid, repo_id, commit_id)
425
425
426 @reraise_safe_exceptions
426 @reraise_safe_exceptions
427 def commit_branches(self, wire, commit_id):
427 def commit_branches(self, wire, commit_id):
428 cache_on, context_uid, repo_id = self._cache_on(wire)
428 cache_on, context_uid, repo_id = self._cache_on(wire)
429 region = self._region(wire)
429 region = self._region(wire)
430 @region.conditional_cache_on_arguments(condition=cache_on)
430 @region.conditional_cache_on_arguments(condition=cache_on)
431 def _commit_branches(_context_uid, _repo_id, _commit_id):
431 def _commit_branches(_context_uid, _repo_id, _commit_id):
432 repo_init = self._factory.repo_libgit2(wire)
432 repo_init = self._factory.repo_libgit2(wire)
433 with repo_init as repo:
433 with repo_init as repo:
434 branches = [x for x in repo.branches.with_commit(_commit_id)]
434 branches = [x for x in repo.branches.with_commit(_commit_id)]
435 return branches
435 return branches
436
436
437 return _commit_branches(context_uid, repo_id, commit_id)
437 return _commit_branches(context_uid, repo_id, commit_id)
438
438
439 @reraise_safe_exceptions
439 @reraise_safe_exceptions
440 def add_object(self, wire, content):
440 def add_object(self, wire, content):
441 repo_init = self._factory.repo_libgit2(wire)
441 repo_init = self._factory.repo_libgit2(wire)
442 with repo_init as repo:
442 with repo_init as repo:
443 blob = objects.Blob()
443 blob = objects.Blob()
444 blob.set_raw_string(content)
444 blob.set_raw_string(content)
445 repo.object_store.add_object(blob)
445 repo.object_store.add_object(blob)
446 return blob.id
446 return blob.id
447
447
448 # TODO: this is quite complex, check if that can be simplified
448 # TODO: this is quite complex, check if that can be simplified
449 @reraise_safe_exceptions
449 @reraise_safe_exceptions
450 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
450 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
451 # Defines the root tree
451 # Defines the root tree
452 class _Root(object):
452 class _Root(object):
453 def __repr__(self):
453 def __repr__(self):
454 return 'ROOT TREE'
454 return 'ROOT TREE'
455 ROOT = _Root()
455 ROOT = _Root()
456
456
457 repo = self._factory.repo(wire)
457 repo = self._factory.repo(wire)
458 object_store = repo.object_store
458 object_store = repo.object_store
459
459
460 # Create tree and populates it with blobs
460 # Create tree and populates it with blobs
461
461
462 if commit_tree and repo[commit_tree]:
462 if commit_tree and repo[commit_tree]:
463 git_commit = repo[commit_data['parents'][0]]
463 git_commit = repo[commit_data['parents'][0]]
464 commit_tree = repo[git_commit.tree] # root tree
464 commit_tree = repo[git_commit.tree] # root tree
465 else:
465 else:
466 commit_tree = objects.Tree()
466 commit_tree = objects.Tree()
467
467
468 for node in updated:
468 for node in updated:
469 # Compute subdirs if needed
469 # Compute subdirs if needed
470 dirpath, nodename = vcspath.split(node['path'])
470 dirpath, nodename = vcspath.split(node['path'])
471 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
471 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
472 parent = commit_tree
472 parent = commit_tree
473 ancestors = [('', parent)]
473 ancestors = [('', parent)]
474
474
475 # Tries to dig for the deepest existing tree
475 # Tries to dig for the deepest existing tree
476 while dirnames:
476 while dirnames:
477 curdir = dirnames.pop(0)
477 curdir = dirnames.pop(0)
478 try:
478 try:
479 dir_id = parent[curdir][1]
479 dir_id = parent[curdir][1]
480 except KeyError:
480 except KeyError:
481 # put curdir back into dirnames and stops
481 # put curdir back into dirnames and stops
482 dirnames.insert(0, curdir)
482 dirnames.insert(0, curdir)
483 break
483 break
484 else:
484 else:
485 # If found, updates parent
485 # If found, updates parent
486 parent = repo[dir_id]
486 parent = repo[dir_id]
487 ancestors.append((curdir, parent))
487 ancestors.append((curdir, parent))
488 # Now parent is deepest existing tree and we need to create
488 # Now parent is deepest existing tree and we need to create
489 # subtrees for dirnames (in reverse order)
489 # subtrees for dirnames (in reverse order)
490 # [this only applies for nodes from added]
490 # [this only applies for nodes from added]
491 new_trees = []
491 new_trees = []
492
492
493 blob = objects.Blob.from_string(node['content'])
493 blob = objects.Blob.from_string(node['content'])
494
494
495 if dirnames:
495 if dirnames:
496 # If there are trees which should be created we need to build
496 # If there are trees which should be created we need to build
497 # them now (in reverse order)
497 # them now (in reverse order)
498 reversed_dirnames = list(reversed(dirnames))
498 reversed_dirnames = list(reversed(dirnames))
499 curtree = objects.Tree()
499 curtree = objects.Tree()
500 curtree[node['node_path']] = node['mode'], blob.id
500 curtree[node['node_path']] = node['mode'], blob.id
501 new_trees.append(curtree)
501 new_trees.append(curtree)
502 for dirname in reversed_dirnames[:-1]:
502 for dirname in reversed_dirnames[:-1]:
503 newtree = objects.Tree()
503 newtree = objects.Tree()
504 newtree[dirname] = (DIR_STAT, curtree.id)
504 newtree[dirname] = (DIR_STAT, curtree.id)
505 new_trees.append(newtree)
505 new_trees.append(newtree)
506 curtree = newtree
506 curtree = newtree
507 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
507 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
508 else:
508 else:
509 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
509 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
510
510
511 new_trees.append(parent)
511 new_trees.append(parent)
512 # Update ancestors
512 # Update ancestors
513 reversed_ancestors = reversed(
513 reversed_ancestors = reversed(
514 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
514 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
515 for parent, tree, path in reversed_ancestors:
515 for parent, tree, path in reversed_ancestors:
516 parent[path] = (DIR_STAT, tree.id)
516 parent[path] = (DIR_STAT, tree.id)
517 object_store.add_object(tree)
517 object_store.add_object(tree)
518
518
519 object_store.add_object(blob)
519 object_store.add_object(blob)
520 for tree in new_trees:
520 for tree in new_trees:
521 object_store.add_object(tree)
521 object_store.add_object(tree)
522
522
523 for node_path in removed:
523 for node_path in removed:
524 paths = node_path.split('/')
524 paths = node_path.split('/')
525 tree = commit_tree # start with top-level
525 tree = commit_tree # start with top-level
526 trees = [{'tree': tree, 'path': ROOT}]
526 trees = [{'tree': tree, 'path': ROOT}]
527 # Traverse deep into the forest...
527 # Traverse deep into the forest...
528 # resolve final tree by iterating the path.
528 # resolve final tree by iterating the path.
529 # e.g a/b/c.txt will get
529 # e.g a/b/c.txt will get
530 # - root as tree then
530 # - root as tree then
531 # - 'a' as tree,
531 # - 'a' as tree,
532 # - 'b' as tree,
532 # - 'b' as tree,
533 # - stop at c as blob.
533 # - stop at c as blob.
534 for path in paths:
534 for path in paths:
535 try:
535 try:
536 obj = repo[tree[path][1]]
536 obj = repo[tree[path][1]]
537 if isinstance(obj, objects.Tree):
537 if isinstance(obj, objects.Tree):
538 trees.append({'tree': obj, 'path': path})
538 trees.append({'tree': obj, 'path': path})
539 tree = obj
539 tree = obj
540 except KeyError:
540 except KeyError:
541 break
541 break
542 #PROBLEM:
542 #PROBLEM:
543 """
543 """
544 We're not editing same reference tree object
544 We're not editing same reference tree object
545 """
545 """
546 # Cut down the blob and all rotten trees on the way back...
546 # Cut down the blob and all rotten trees on the way back...
547 for path, tree_data in reversed(list(zip(paths, trees))):
547 for path, tree_data in reversed(list(zip(paths, trees))):
548 tree = tree_data['tree']
548 tree = tree_data['tree']
549 tree.__delitem__(path)
549 tree.__delitem__(path)
550 # This operation edits the tree, we need to mark new commit back
550 # This operation edits the tree, we need to mark new commit back
551
551
552 if len(tree) > 0:
552 if len(tree) > 0:
553 # This tree still has elements - don't remove it or any
553 # This tree still has elements - don't remove it or any
554 # of it's parents
554 # of it's parents
555 break
555 break
556
556
557 object_store.add_object(commit_tree)
557 object_store.add_object(commit_tree)
558
558
559 # Create commit
559 # Create commit
560 commit = objects.Commit()
560 commit = objects.Commit()
561 commit.tree = commit_tree.id
561 commit.tree = commit_tree.id
562 bytes_keys = [
562 bytes_keys = [
563 'author',
563 'author',
564 'committer',
564 'committer',
565 'message',
565 'message',
566 'encoding'
566 'encoding'
567 ]
567 ]
568
568
569 for k, v in commit_data.items():
569 for k, v in commit_data.items():
570 if k in bytes_keys:
570 if k in bytes_keys:
571 v = safe_bytes(v)
571 v = safe_bytes(v)
572 setattr(commit, k, v)
572 setattr(commit, k, v)
573
573
574 object_store.add_object(commit)
574 object_store.add_object(commit)
575
575
576 self.create_branch(wire, branch, safe_str(commit.id))
576 self.create_branch(wire, branch, safe_str(commit.id))
577
577
578 # dulwich set-ref
578 # dulwich set-ref
579 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
579 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
580
580
581 return commit.id
581 return commit.id
582
582
583 @reraise_safe_exceptions
583 @reraise_safe_exceptions
584 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
584 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
585 if url != 'default' and '://' not in url:
585 if url != 'default' and '://' not in url:
586 client = LocalGitClient(url)
586 client = LocalGitClient(url)
587 else:
587 else:
588 url_obj = url_parser(url)
588 url_obj = url_parser(url)
589 o = self._build_opener(url)
589 o = self._build_opener(url)
590 url, _ = url_obj.authinfo()
590 url, _ = url_obj.authinfo()
591 client = HttpGitClient(base_url=url, opener=o)
591 client = HttpGitClient(base_url=url, opener=o)
592 repo = self._factory.repo(wire)
592 repo = self._factory.repo(wire)
593
593
594 determine_wants = repo.object_store.determine_wants_all
594 determine_wants = repo.object_store.determine_wants_all
595 if refs:
595 if refs:
596 def determine_wants_requested(references):
596 def determine_wants_requested(references):
597 return [references[r] for r in references if r in refs]
597 return [references[r] for r in references if r in refs]
598 determine_wants = determine_wants_requested
598 determine_wants = determine_wants_requested
599
599
600 try:
600 try:
601 remote_refs = client.fetch(
601 remote_refs = client.fetch(
602 path=url, target=repo, determine_wants=determine_wants)
602 path=url, target=repo, determine_wants=determine_wants)
603 except NotGitRepository as e:
603 except NotGitRepository as e:
604 log.warning(
604 log.warning(
605 'Trying to fetch from "%s" failed, not a Git repository.', url)
605 'Trying to fetch from "%s" failed, not a Git repository.', url)
606 # Exception can contain unicode which we convert
606 # Exception can contain unicode which we convert
607 raise exceptions.AbortException(e)(repr(e))
607 raise exceptions.AbortException(e)(repr(e))
608
608
609 # mikhail: client.fetch() returns all the remote refs, but fetches only
609 # mikhail: client.fetch() returns all the remote refs, but fetches only
610 # refs filtered by `determine_wants` function. We need to filter result
610 # refs filtered by `determine_wants` function. We need to filter result
611 # as well
611 # as well
612 if refs:
612 if refs:
613 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
613 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
614
614
615 if apply_refs:
615 if apply_refs:
616 # TODO: johbo: Needs proper test coverage with a git repository
616 # TODO: johbo: Needs proper test coverage with a git repository
617 # that contains a tag object, so that we would end up with
617 # that contains a tag object, so that we would end up with
618 # a peeled ref at this point.
618 # a peeled ref at this point.
619 for k in remote_refs:
619 for k in remote_refs:
620 if k.endswith(PEELED_REF_MARKER):
620 if k.endswith(PEELED_REF_MARKER):
621 log.debug("Skipping peeled reference %s", k)
621 log.debug("Skipping peeled reference %s", k)
622 continue
622 continue
623 repo[k] = remote_refs[k]
623 repo[k] = remote_refs[k]
624
624
625 if refs and not update_after:
625 if refs and not update_after:
626 # mikhail: explicitly set the head to the last ref.
626 # mikhail: explicitly set the head to the last ref.
627 repo["HEAD"] = remote_refs[refs[-1]]
627 repo["HEAD"] = remote_refs[refs[-1]]
628
628
629 if update_after:
629 if update_after:
630 # we want to checkout HEAD
630 # we want to checkout HEAD
631 repo["HEAD"] = remote_refs["HEAD"]
631 repo["HEAD"] = remote_refs["HEAD"]
632 index.build_index_from_tree(repo.path, repo.index_path(),
632 index.build_index_from_tree(repo.path, repo.index_path(),
633 repo.object_store, repo["HEAD"].tree)
633 repo.object_store, repo["HEAD"].tree)
634 return remote_refs
634 return remote_refs
635
635
636 @reraise_safe_exceptions
636 @reraise_safe_exceptions
637 def sync_fetch(self, wire, url, refs=None, all_refs=False):
637 def sync_fetch(self, wire, url, refs=None, all_refs=False):
638 repo = self._factory.repo(wire)
638 repo = self._factory.repo(wire)
639 if refs and not isinstance(refs, (list, tuple)):
639 if refs and not isinstance(refs, (list, tuple)):
640 refs = [refs]
640 refs = [refs]
641
641
642 config = self._wire_to_config(wire)
642 config = self._wire_to_config(wire)
643 # get all remote refs we'll use to fetch later
643 # get all remote refs we'll use to fetch later
644 cmd = ['ls-remote']
644 cmd = ['ls-remote']
645 if not all_refs:
645 if not all_refs:
646 cmd += ['--heads', '--tags']
646 cmd += ['--heads', '--tags']
647 cmd += [url]
647 cmd += [url]
648 output, __ = self.run_git_command(
648 output, __ = self.run_git_command(
649 wire, cmd, fail_on_stderr=False,
649 wire, cmd, fail_on_stderr=False,
650 _copts=self._remote_conf(config),
650 _copts=self._remote_conf(config),
651 extra_env={'GIT_TERMINAL_PROMPT': '0'})
651 extra_env={'GIT_TERMINAL_PROMPT': '0'})
652
652
653 remote_refs = collections.OrderedDict()
653 remote_refs = collections.OrderedDict()
654 fetch_refs = []
654 fetch_refs = []
655
655
656 for ref_line in output.splitlines():
656 for ref_line in output.splitlines():
657 sha, ref = ref_line.split(b'\t')
657 sha, ref = ref_line.split(b'\t')
658 sha = sha.strip()
658 sha = sha.strip()
659 if ref in remote_refs:
659 if ref in remote_refs:
660 # duplicate, skip
660 # duplicate, skip
661 continue
661 continue
662 if ref.endswith(PEELED_REF_MARKER):
662 if ref.endswith(PEELED_REF_MARKER):
663 log.debug("Skipping peeled reference %s", ref)
663 log.debug("Skipping peeled reference %s", ref)
664 continue
664 continue
665 # don't sync HEAD
665 # don't sync HEAD
666 if ref in [b'HEAD']:
666 if ref in [b'HEAD']:
667 continue
667 continue
668
668
669 remote_refs[ref] = sha
669 remote_refs[ref] = sha
670
670
671 if refs and sha in refs:
671 if refs and sha in refs:
672 # we filter fetch using our specified refs
672 # we filter fetch using our specified refs
673 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
673 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
674 elif not refs:
674 elif not refs:
675 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
675 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
676 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
676 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
677
677
678 if fetch_refs:
678 if fetch_refs:
679 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
679 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
680 fetch_refs_chunks = list(chunk)
680 fetch_refs_chunks = list(chunk)
681 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
681 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
682 self.run_git_command(
682 self.run_git_command(
683 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
683 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
684 fail_on_stderr=False,
684 fail_on_stderr=False,
685 _copts=self._remote_conf(config),
685 _copts=self._remote_conf(config),
686 extra_env={'GIT_TERMINAL_PROMPT': '0'})
686 extra_env={'GIT_TERMINAL_PROMPT': '0'})
687
687
688 return remote_refs
688 return remote_refs
689
689
690 @reraise_safe_exceptions
690 @reraise_safe_exceptions
691 def sync_push(self, wire, url, refs=None):
691 def sync_push(self, wire, url, refs=None):
692 if not self.check_url(url, wire):
692 if not self.check_url(url, wire):
693 return
693 return
694 config = self._wire_to_config(wire)
694 config = self._wire_to_config(wire)
695 self._factory.repo(wire)
695 self._factory.repo(wire)
696 self.run_git_command(
696 self.run_git_command(
697 wire, ['push', url, '--mirror'], fail_on_stderr=False,
697 wire, ['push', url, '--mirror'], fail_on_stderr=False,
698 _copts=self._remote_conf(config),
698 _copts=self._remote_conf(config),
699 extra_env={'GIT_TERMINAL_PROMPT': '0'})
699 extra_env={'GIT_TERMINAL_PROMPT': '0'})
700
700
701 @reraise_safe_exceptions
701 @reraise_safe_exceptions
702 def get_remote_refs(self, wire, url):
702 def get_remote_refs(self, wire, url):
703 repo = Repo(url)
703 repo = Repo(url)
704 return repo.get_refs()
704 return repo.get_refs()
705
705
706 @reraise_safe_exceptions
706 @reraise_safe_exceptions
707 def get_description(self, wire):
707 def get_description(self, wire):
708 repo = self._factory.repo(wire)
708 repo = self._factory.repo(wire)
709 return repo.get_description()
709 return repo.get_description()
710
710
711 @reraise_safe_exceptions
711 @reraise_safe_exceptions
712 def get_missing_revs(self, wire, rev1, rev2, path2):
712 def get_missing_revs(self, wire, rev1, rev2, path2):
713 repo = self._factory.repo(wire)
713 repo = self._factory.repo(wire)
714 LocalGitClient(thin_packs=False).fetch(path2, repo)
714 LocalGitClient(thin_packs=False).fetch(path2, repo)
715
715
716 wire_remote = wire.copy()
716 wire_remote = wire.copy()
717 wire_remote['path'] = path2
717 wire_remote['path'] = path2
718 repo_remote = self._factory.repo(wire_remote)
718 repo_remote = self._factory.repo(wire_remote)
719 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
719 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
720
720
721 revs = [
721 revs = [
722 x.commit.id
722 x.commit.id
723 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
723 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
724 return revs
724 return revs
725
725
726 @reraise_safe_exceptions
726 @reraise_safe_exceptions
727 def get_object(self, wire, sha, maybe_unreachable=False):
727 def get_object(self, wire, sha, maybe_unreachable=False):
728 cache_on, context_uid, repo_id = self._cache_on(wire)
728 cache_on, context_uid, repo_id = self._cache_on(wire)
729 region = self._region(wire)
729 region = self._region(wire)
730
730
731 @region.conditional_cache_on_arguments(condition=cache_on)
731 @region.conditional_cache_on_arguments(condition=cache_on)
732 def _get_object(_context_uid, _repo_id, _sha):
732 def _get_object(_context_uid, _repo_id, _sha):
733 repo_init = self._factory.repo_libgit2(wire)
733 repo_init = self._factory.repo_libgit2(wire)
734 with repo_init as repo:
734 with repo_init as repo:
735
735
736 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
736 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
737 try:
737 try:
738 commit = repo.revparse_single(sha)
738 commit = repo.revparse_single(sha)
739 except KeyError:
739 except KeyError:
740 # NOTE(marcink): KeyError doesn't give us any meaningful information
740 # NOTE(marcink): KeyError doesn't give us any meaningful information
741 # here, we instead give something more explicit
741 # here, we instead give something more explicit
742 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
742 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
743 raise exceptions.LookupException(e)(missing_commit_err)
743 raise exceptions.LookupException(e)(missing_commit_err)
744 except ValueError as e:
744 except ValueError as e:
745 raise exceptions.LookupException(e)(missing_commit_err)
745 raise exceptions.LookupException(e)(missing_commit_err)
746
746
747 is_tag = False
747 is_tag = False
748 if isinstance(commit, pygit2.Tag):
748 if isinstance(commit, pygit2.Tag):
749 commit = repo.get(commit.target)
749 commit = repo.get(commit.target)
750 is_tag = True
750 is_tag = True
751
751
752 check_dangling = True
752 check_dangling = True
753 if is_tag:
753 if is_tag:
754 check_dangling = False
754 check_dangling = False
755
755
756 if check_dangling and maybe_unreachable:
756 if check_dangling and maybe_unreachable:
757 check_dangling = False
757 check_dangling = False
758
758
759 # we used a reference and it parsed means we're not having a dangling commit
759 # we used a reference and it parsed means we're not having a dangling commit
760 if sha != commit.hex:
760 if sha != commit.hex:
761 check_dangling = False
761 check_dangling = False
762
762
763 if check_dangling:
763 if check_dangling:
764 # check for dangling commit
764 # check for dangling commit
765 for branch in repo.branches.with_commit(commit.hex):
765 for branch in repo.branches.with_commit(commit.hex):
766 if branch:
766 if branch:
767 break
767 break
768 else:
768 else:
769 # NOTE(marcink): Empty error doesn't give us any meaningful information
769 # NOTE(marcink): Empty error doesn't give us any meaningful information
770 # here, we instead give something more explicit
770 # here, we instead give something more explicit
771 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
771 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
772 raise exceptions.LookupException(e)(missing_commit_err)
772 raise exceptions.LookupException(e)(missing_commit_err)
773
773
774 commit_id = commit.hex
774 commit_id = commit.hex
775 type_id = commit.type
775 type_id = commit.type
776
776
777 return {
777 return {
778 'id': commit_id,
778 'id': commit_id,
779 'type': self._type_id_to_name(type_id),
779 'type': self._type_id_to_name(type_id),
780 'commit_id': commit_id,
780 'commit_id': commit_id,
781 'idx': 0
781 'idx': 0
782 }
782 }
783
783
784 return _get_object(context_uid, repo_id, sha)
784 return _get_object(context_uid, repo_id, sha)
785
785
786 @reraise_safe_exceptions
786 @reraise_safe_exceptions
787 def get_refs(self, wire):
787 def get_refs(self, wire):
788 cache_on, context_uid, repo_id = self._cache_on(wire)
788 cache_on, context_uid, repo_id = self._cache_on(wire)
789 region = self._region(wire)
789 region = self._region(wire)
790
790
791 @region.conditional_cache_on_arguments(condition=cache_on)
791 @region.conditional_cache_on_arguments(condition=cache_on)
792 def _get_refs(_context_uid, _repo_id):
792 def _get_refs(_context_uid, _repo_id):
793
793
794 repo_init = self._factory.repo_libgit2(wire)
794 repo_init = self._factory.repo_libgit2(wire)
795 with repo_init as repo:
795 with repo_init as repo:
796 regex = re.compile('^refs/(heads|tags)/')
796 regex = re.compile('^refs/(heads|tags)/')
797 return {x.name: x.target.hex for x in
797 return {x.name: x.target.hex for x in
798 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
798 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
799
799
800 return _get_refs(context_uid, repo_id)
800 return _get_refs(context_uid, repo_id)
801
801
802 @reraise_safe_exceptions
802 @reraise_safe_exceptions
803 def get_branch_pointers(self, wire):
803 def get_branch_pointers(self, wire):
804 cache_on, context_uid, repo_id = self._cache_on(wire)
804 cache_on, context_uid, repo_id = self._cache_on(wire)
805 region = self._region(wire)
805 region = self._region(wire)
806
806
807 @region.conditional_cache_on_arguments(condition=cache_on)
807 @region.conditional_cache_on_arguments(condition=cache_on)
808 def _get_branch_pointers(_context_uid, _repo_id):
808 def _get_branch_pointers(_context_uid, _repo_id):
809
809
810 repo_init = self._factory.repo_libgit2(wire)
810 repo_init = self._factory.repo_libgit2(wire)
811 regex = re.compile('^refs/heads')
811 regex = re.compile('^refs/heads')
812 with repo_init as repo:
812 with repo_init as repo:
813 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
813 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
814 return {x.target.hex: x.shorthand for x in branches}
814 return {x.target.hex: x.shorthand for x in branches}
815
815
816 return _get_branch_pointers(context_uid, repo_id)
816 return _get_branch_pointers(context_uid, repo_id)
817
817
818 @reraise_safe_exceptions
818 @reraise_safe_exceptions
819 def head(self, wire, show_exc=True):
819 def head(self, wire, show_exc=True):
820 cache_on, context_uid, repo_id = self._cache_on(wire)
820 cache_on, context_uid, repo_id = self._cache_on(wire)
821 region = self._region(wire)
821 region = self._region(wire)
822
822
823 @region.conditional_cache_on_arguments(condition=cache_on)
823 @region.conditional_cache_on_arguments(condition=cache_on)
824 def _head(_context_uid, _repo_id, _show_exc):
824 def _head(_context_uid, _repo_id, _show_exc):
825 repo_init = self._factory.repo_libgit2(wire)
825 repo_init = self._factory.repo_libgit2(wire)
826 with repo_init as repo:
826 with repo_init as repo:
827 try:
827 try:
828 return repo.head.peel().hex
828 return repo.head.peel().hex
829 except Exception:
829 except Exception:
830 if show_exc:
830 if show_exc:
831 raise
831 raise
832 return _head(context_uid, repo_id, show_exc)
832 return _head(context_uid, repo_id, show_exc)
833
833
834 @reraise_safe_exceptions
834 @reraise_safe_exceptions
835 def init(self, wire):
835 def init(self, wire):
836 repo_path = safe_str(wire['path'])
836 repo_path = safe_str(wire['path'])
837 self.repo = Repo.init(repo_path)
837 self.repo = Repo.init(repo_path)
838
838
839 @reraise_safe_exceptions
839 @reraise_safe_exceptions
840 def init_bare(self, wire):
840 def init_bare(self, wire):
841 repo_path = safe_str(wire['path'])
841 repo_path = safe_str(wire['path'])
842 self.repo = Repo.init_bare(repo_path)
842 self.repo = Repo.init_bare(repo_path)
843
843
844 @reraise_safe_exceptions
844 @reraise_safe_exceptions
845 def revision(self, wire, rev):
845 def revision(self, wire, rev):
846
846
847 cache_on, context_uid, repo_id = self._cache_on(wire)
847 cache_on, context_uid, repo_id = self._cache_on(wire)
848 region = self._region(wire)
848 region = self._region(wire)
849
849
850 @region.conditional_cache_on_arguments(condition=cache_on)
850 @region.conditional_cache_on_arguments(condition=cache_on)
851 def _revision(_context_uid, _repo_id, _rev):
851 def _revision(_context_uid, _repo_id, _rev):
852 repo_init = self._factory.repo_libgit2(wire)
852 repo_init = self._factory.repo_libgit2(wire)
853 with repo_init as repo:
853 with repo_init as repo:
854 commit = repo[rev]
854 commit = repo[rev]
855 obj_data = {
855 obj_data = {
856 'id': commit.id.hex,
856 'id': commit.id.hex,
857 }
857 }
858 # tree objects itself don't have tree_id attribute
858 # tree objects itself don't have tree_id attribute
859 if hasattr(commit, 'tree_id'):
859 if hasattr(commit, 'tree_id'):
860 obj_data['tree'] = commit.tree_id.hex
860 obj_data['tree'] = commit.tree_id.hex
861
861
862 return obj_data
862 return obj_data
863 return _revision(context_uid, repo_id, rev)
863 return _revision(context_uid, repo_id, rev)
864
864
865 @reraise_safe_exceptions
865 @reraise_safe_exceptions
866 def date(self, wire, commit_id):
866 def date(self, wire, commit_id):
867 cache_on, context_uid, repo_id = self._cache_on(wire)
867 cache_on, context_uid, repo_id = self._cache_on(wire)
868 region = self._region(wire)
868 region = self._region(wire)
869
869
870 @region.conditional_cache_on_arguments(condition=cache_on)
870 @region.conditional_cache_on_arguments(condition=cache_on)
871 def _date(_repo_id, _commit_id):
871 def _date(_repo_id, _commit_id):
872 repo_init = self._factory.repo_libgit2(wire)
872 repo_init = self._factory.repo_libgit2(wire)
873 with repo_init as repo:
873 with repo_init as repo:
874 commit = repo[commit_id]
874 commit = repo[commit_id]
875
875
876 if hasattr(commit, 'commit_time'):
876 if hasattr(commit, 'commit_time'):
877 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
877 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
878 else:
878 else:
879 commit = commit.get_object()
879 commit = commit.get_object()
880 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
880 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
881
881
882 # TODO(marcink): check dulwich difference of offset vs timezone
882 # TODO(marcink): check dulwich difference of offset vs timezone
883 return [commit_time, commit_time_offset]
883 return [commit_time, commit_time_offset]
884 return _date(repo_id, commit_id)
884 return _date(repo_id, commit_id)
885
885
886 @reraise_safe_exceptions
886 @reraise_safe_exceptions
887 def author(self, wire, commit_id):
887 def author(self, wire, commit_id):
888 cache_on, context_uid, repo_id = self._cache_on(wire)
888 cache_on, context_uid, repo_id = self._cache_on(wire)
889 region = self._region(wire)
889 region = self._region(wire)
890
890
891 @region.conditional_cache_on_arguments(condition=cache_on)
891 @region.conditional_cache_on_arguments(condition=cache_on)
892 def _author(_repo_id, _commit_id):
892 def _author(_repo_id, _commit_id):
893 repo_init = self._factory.repo_libgit2(wire)
893 repo_init = self._factory.repo_libgit2(wire)
894 with repo_init as repo:
894 with repo_init as repo:
895 commit = repo[commit_id]
895 commit = repo[commit_id]
896
896
897 if hasattr(commit, 'author'):
897 if hasattr(commit, 'author'):
898 author = commit.author
898 author = commit.author
899 else:
899 else:
900 author = commit.get_object().author
900 author = commit.get_object().author
901
901
902 if author.email:
902 if author.email:
903 return "{} <{}>".format(author.name, author.email)
903 return "{} <{}>".format(author.name, author.email)
904
904
905 try:
905 try:
906 return "{}".format(author.name)
906 return "{}".format(author.name)
907 except Exception:
907 except Exception:
908 return "{}".format(safe_str(author.raw_name))
908 return "{}".format(safe_str(author.raw_name))
909
909
910 return _author(repo_id, commit_id)
910 return _author(repo_id, commit_id)
911
911
912 @reraise_safe_exceptions
912 @reraise_safe_exceptions
913 def message(self, wire, commit_id):
913 def message(self, wire, commit_id):
914 cache_on, context_uid, repo_id = self._cache_on(wire)
914 cache_on, context_uid, repo_id = self._cache_on(wire)
915 region = self._region(wire)
915 region = self._region(wire)
916 @region.conditional_cache_on_arguments(condition=cache_on)
916 @region.conditional_cache_on_arguments(condition=cache_on)
917 def _message(_repo_id, _commit_id):
917 def _message(_repo_id, _commit_id):
918 repo_init = self._factory.repo_libgit2(wire)
918 repo_init = self._factory.repo_libgit2(wire)
919 with repo_init as repo:
919 with repo_init as repo:
920 commit = repo[commit_id]
920 commit = repo[commit_id]
921 return commit.message
921 return commit.message
922 return _message(repo_id, commit_id)
922 return _message(repo_id, commit_id)
923
923
924 @reraise_safe_exceptions
924 @reraise_safe_exceptions
925 def parents(self, wire, commit_id):
925 def parents(self, wire, commit_id):
926 cache_on, context_uid, repo_id = self._cache_on(wire)
926 cache_on, context_uid, repo_id = self._cache_on(wire)
927 region = self._region(wire)
927 region = self._region(wire)
928
928
929 @region.conditional_cache_on_arguments(condition=cache_on)
929 @region.conditional_cache_on_arguments(condition=cache_on)
930 def _parents(_repo_id, _commit_id):
930 def _parents(_repo_id, _commit_id):
931 repo_init = self._factory.repo_libgit2(wire)
931 repo_init = self._factory.repo_libgit2(wire)
932 with repo_init as repo:
932 with repo_init as repo:
933 commit = repo[commit_id]
933 commit = repo[commit_id]
934 if hasattr(commit, 'parent_ids'):
934 if hasattr(commit, 'parent_ids'):
935 parent_ids = commit.parent_ids
935 parent_ids = commit.parent_ids
936 else:
936 else:
937 parent_ids = commit.get_object().parent_ids
937 parent_ids = commit.get_object().parent_ids
938
938
939 return [x.hex for x in parent_ids]
939 return [x.hex for x in parent_ids]
940 return _parents(repo_id, commit_id)
940 return _parents(repo_id, commit_id)
941
941
942 @reraise_safe_exceptions
942 @reraise_safe_exceptions
943 def children(self, wire, commit_id):
943 def children(self, wire, commit_id):
944 cache_on, context_uid, repo_id = self._cache_on(wire)
944 cache_on, context_uid, repo_id = self._cache_on(wire)
945 region = self._region(wire)
945 region = self._region(wire)
946
946
947 head = self.head(wire)
948
947 @region.conditional_cache_on_arguments(condition=cache_on)
949 @region.conditional_cache_on_arguments(condition=cache_on)
948 def _children(_repo_id, _commit_id):
950 def _children(_repo_id, _commit_id):
951
949 output, __ = self.run_git_command(
952 output, __ = self.run_git_command(
950 wire, ['rev-list', '--all', '--children'])
953 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
951
954
952 child_ids = []
955 child_ids = []
953 pat = re.compile(r'^%s' % commit_id)
956 pat = re.compile(r'^{}'.format(commit_id))
954 for l in output.splitlines():
957 for line in output.splitlines():
955 if pat.match(l):
958 line = safe_str(line)
956 found_ids = l.split(' ')[1:]
959 if pat.match(line):
960 found_ids = line.split(' ')[1:]
957 child_ids.extend(found_ids)
961 child_ids.extend(found_ids)
962 break
958
963
959 return child_ids
964 return child_ids
960 return _children(repo_id, commit_id)
965 return _children(repo_id, commit_id)
961
966
962 @reraise_safe_exceptions
967 @reraise_safe_exceptions
963 def set_refs(self, wire, key, value):
968 def set_refs(self, wire, key, value):
964 repo_init = self._factory.repo_libgit2(wire)
969 repo_init = self._factory.repo_libgit2(wire)
965 with repo_init as repo:
970 with repo_init as repo:
966 repo.references.create(key, value, force=True)
971 repo.references.create(key, value, force=True)
967
972
968 @reraise_safe_exceptions
973 @reraise_safe_exceptions
969 def create_branch(self, wire, branch_name, commit_id, force=False):
974 def create_branch(self, wire, branch_name, commit_id, force=False):
970 repo_init = self._factory.repo_libgit2(wire)
975 repo_init = self._factory.repo_libgit2(wire)
971 with repo_init as repo:
976 with repo_init as repo:
972 commit = repo[commit_id]
977 commit = repo[commit_id]
973
978
974 if force:
979 if force:
975 repo.branches.local.create(branch_name, commit, force=force)
980 repo.branches.local.create(branch_name, commit, force=force)
976 elif not repo.branches.get(branch_name):
981 elif not repo.branches.get(branch_name):
977 # create only if that branch isn't existing
982 # create only if that branch isn't existing
978 repo.branches.local.create(branch_name, commit, force=force)
983 repo.branches.local.create(branch_name, commit, force=force)
979
984
980 @reraise_safe_exceptions
985 @reraise_safe_exceptions
981 def remove_ref(self, wire, key):
986 def remove_ref(self, wire, key):
982 repo_init = self._factory.repo_libgit2(wire)
987 repo_init = self._factory.repo_libgit2(wire)
983 with repo_init as repo:
988 with repo_init as repo:
984 repo.references.delete(key)
989 repo.references.delete(key)
985
990
986 @reraise_safe_exceptions
991 @reraise_safe_exceptions
987 def tag_remove(self, wire, tag_name):
992 def tag_remove(self, wire, tag_name):
988 repo_init = self._factory.repo_libgit2(wire)
993 repo_init = self._factory.repo_libgit2(wire)
989 with repo_init as repo:
994 with repo_init as repo:
990 key = 'refs/tags/{}'.format(tag_name)
995 key = 'refs/tags/{}'.format(tag_name)
991 repo.references.delete(key)
996 repo.references.delete(key)
992
997
993 @reraise_safe_exceptions
998 @reraise_safe_exceptions
994 def tree_changes(self, wire, source_id, target_id):
999 def tree_changes(self, wire, source_id, target_id):
995 # TODO(marcink): remove this seems it's only used by tests
1000 # TODO(marcink): remove this seems it's only used by tests
996 repo = self._factory.repo(wire)
1001 repo = self._factory.repo(wire)
997 source = repo[source_id].tree if source_id else None
1002 source = repo[source_id].tree if source_id else None
998 target = repo[target_id].tree
1003 target = repo[target_id].tree
999 result = repo.object_store.tree_changes(source, target)
1004 result = repo.object_store.tree_changes(source, target)
1000 return list(result)
1005 return list(result)
1001
1006
1002 @reraise_safe_exceptions
1007 @reraise_safe_exceptions
1003 def tree_and_type_for_path(self, wire, commit_id, path):
1008 def tree_and_type_for_path(self, wire, commit_id, path):
1004
1009
1005 cache_on, context_uid, repo_id = self._cache_on(wire)
1010 cache_on, context_uid, repo_id = self._cache_on(wire)
1006 region = self._region(wire)
1011 region = self._region(wire)
1007
1012
1008 @region.conditional_cache_on_arguments(condition=cache_on)
1013 @region.conditional_cache_on_arguments(condition=cache_on)
1009 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1014 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1010 repo_init = self._factory.repo_libgit2(wire)
1015 repo_init = self._factory.repo_libgit2(wire)
1011
1016
1012 with repo_init as repo:
1017 with repo_init as repo:
1013 commit = repo[commit_id]
1018 commit = repo[commit_id]
1014 try:
1019 try:
1015 tree = commit.tree[path]
1020 tree = commit.tree[path]
1016 except KeyError:
1021 except KeyError:
1017 return None, None, None
1022 return None, None, None
1018
1023
1019 return tree.id.hex, tree.type_str, tree.filemode
1024 return tree.id.hex, tree.type_str, tree.filemode
1020 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1025 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1021
1026
1022 @reraise_safe_exceptions
1027 @reraise_safe_exceptions
1023 def tree_items(self, wire, tree_id):
1028 def tree_items(self, wire, tree_id):
1024 cache_on, context_uid, repo_id = self._cache_on(wire)
1029 cache_on, context_uid, repo_id = self._cache_on(wire)
1025 region = self._region(wire)
1030 region = self._region(wire)
1026
1031
1027 @region.conditional_cache_on_arguments(condition=cache_on)
1032 @region.conditional_cache_on_arguments(condition=cache_on)
1028 def _tree_items(_repo_id, _tree_id):
1033 def _tree_items(_repo_id, _tree_id):
1029
1034
1030 repo_init = self._factory.repo_libgit2(wire)
1035 repo_init = self._factory.repo_libgit2(wire)
1031 with repo_init as repo:
1036 with repo_init as repo:
1032 try:
1037 try:
1033 tree = repo[tree_id]
1038 tree = repo[tree_id]
1034 except KeyError:
1039 except KeyError:
1035 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1040 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1036
1041
1037 result = []
1042 result = []
1038 for item in tree:
1043 for item in tree:
1039 item_sha = item.hex
1044 item_sha = item.hex
1040 item_mode = item.filemode
1045 item_mode = item.filemode
1041 item_type = item.type_str
1046 item_type = item.type_str
1042
1047
1043 if item_type == 'commit':
1048 if item_type == 'commit':
1044 # NOTE(marcink): submodules we translate to 'link' for backward compat
1049 # NOTE(marcink): submodules we translate to 'link' for backward compat
1045 item_type = 'link'
1050 item_type = 'link'
1046
1051
1047 result.append((item.name, item_mode, item_sha, item_type))
1052 result.append((item.name, item_mode, item_sha, item_type))
1048 return result
1053 return result
1049 return _tree_items(repo_id, tree_id)
1054 return _tree_items(repo_id, tree_id)
1050
1055
1051 @reraise_safe_exceptions
1056 @reraise_safe_exceptions
1052 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1057 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1053 """
1058 """
1054 Old version that uses subprocess to call diff
1059 Old version that uses subprocess to call diff
1055 """
1060 """
1056
1061
1057 flags = [
1062 flags = [
1058 '-U%s' % context, '--patch',
1063 '-U%s' % context, '--patch',
1059 '--binary',
1064 '--binary',
1060 '--find-renames',
1065 '--find-renames',
1061 '--no-indent-heuristic',
1066 '--no-indent-heuristic',
1062 # '--indent-heuristic',
1067 # '--indent-heuristic',
1063 #'--full-index',
1068 #'--full-index',
1064 #'--abbrev=40'
1069 #'--abbrev=40'
1065 ]
1070 ]
1066
1071
1067 if opt_ignorews:
1072 if opt_ignorews:
1068 flags.append('--ignore-all-space')
1073 flags.append('--ignore-all-space')
1069
1074
1070 if commit_id_1 == self.EMPTY_COMMIT:
1075 if commit_id_1 == self.EMPTY_COMMIT:
1071 cmd = ['show'] + flags + [commit_id_2]
1076 cmd = ['show'] + flags + [commit_id_2]
1072 else:
1077 else:
1073 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1078 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1074
1079
1075 if file_filter:
1080 if file_filter:
1076 cmd.extend(['--', file_filter])
1081 cmd.extend(['--', file_filter])
1077
1082
1078 diff, __ = self.run_git_command(wire, cmd)
1083 diff, __ = self.run_git_command(wire, cmd)
1079 # If we used 'show' command, strip first few lines (until actual diff
1084 # If we used 'show' command, strip first few lines (until actual diff
1080 # starts)
1085 # starts)
1081 if commit_id_1 == self.EMPTY_COMMIT:
1086 if commit_id_1 == self.EMPTY_COMMIT:
1082 lines = diff.splitlines()
1087 lines = diff.splitlines()
1083 x = 0
1088 x = 0
1084 for line in lines:
1089 for line in lines:
1085 if line.startswith(b'diff'):
1090 if line.startswith(b'diff'):
1086 break
1091 break
1087 x += 1
1092 x += 1
1088 # Append new line just like 'diff' command do
1093 # Append new line just like 'diff' command do
1089 diff = '\n'.join(lines[x:]) + '\n'
1094 diff = '\n'.join(lines[x:]) + '\n'
1090 return diff
1095 return diff
1091
1096
1092 @reraise_safe_exceptions
1097 @reraise_safe_exceptions
1093 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1098 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1094 repo_init = self._factory.repo_libgit2(wire)
1099 repo_init = self._factory.repo_libgit2(wire)
1095 with repo_init as repo:
1100 with repo_init as repo:
1096 swap = True
1101 swap = True
1097 flags = 0
1102 flags = 0
1098 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1103 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1099
1104
1100 if opt_ignorews:
1105 if opt_ignorews:
1101 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1106 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1102
1107
1103 if commit_id_1 == self.EMPTY_COMMIT:
1108 if commit_id_1 == self.EMPTY_COMMIT:
1104 comm1 = repo[commit_id_2]
1109 comm1 = repo[commit_id_2]
1105 diff_obj = comm1.tree.diff_to_tree(
1110 diff_obj = comm1.tree.diff_to_tree(
1106 flags=flags, context_lines=context, swap=swap)
1111 flags=flags, context_lines=context, swap=swap)
1107
1112
1108 else:
1113 else:
1109 comm1 = repo[commit_id_2]
1114 comm1 = repo[commit_id_2]
1110 comm2 = repo[commit_id_1]
1115 comm2 = repo[commit_id_1]
1111 diff_obj = comm1.tree.diff_to_tree(
1116 diff_obj = comm1.tree.diff_to_tree(
1112 comm2.tree, flags=flags, context_lines=context, swap=swap)
1117 comm2.tree, flags=flags, context_lines=context, swap=swap)
1113 similar_flags = 0
1118 similar_flags = 0
1114 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1119 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1115 diff_obj.find_similar(flags=similar_flags)
1120 diff_obj.find_similar(flags=similar_flags)
1116
1121
1117 if file_filter:
1122 if file_filter:
1118 for p in diff_obj:
1123 for p in diff_obj:
1119 if p.delta.old_file.path == file_filter:
1124 if p.delta.old_file.path == file_filter:
1120 return p.patch or ''
1125 return p.patch or ''
1121 # fo matching path == no diff
1126 # fo matching path == no diff
1122 return ''
1127 return ''
1123 return diff_obj.patch or ''
1128 return diff_obj.patch or ''
1124
1129
1125 @reraise_safe_exceptions
1130 @reraise_safe_exceptions
1126 def node_history(self, wire, commit_id, path, limit):
1131 def node_history(self, wire, commit_id, path, limit):
1127 cache_on, context_uid, repo_id = self._cache_on(wire)
1132 cache_on, context_uid, repo_id = self._cache_on(wire)
1128 region = self._region(wire)
1133 region = self._region(wire)
1129
1134
1130 @region.conditional_cache_on_arguments(condition=cache_on)
1135 @region.conditional_cache_on_arguments(condition=cache_on)
1131 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1136 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1132 # optimize for n==1, rev-list is much faster for that use-case
1137 # optimize for n==1, rev-list is much faster for that use-case
1133 if limit == 1:
1138 if limit == 1:
1134 cmd = ['rev-list', '-1', commit_id, '--', path]
1139 cmd = ['rev-list', '-1', commit_id, '--', path]
1135 else:
1140 else:
1136 cmd = ['log']
1141 cmd = ['log']
1137 if limit:
1142 if limit:
1138 cmd.extend(['-n', str(safe_int(limit, 0))])
1143 cmd.extend(['-n', str(safe_int(limit, 0))])
1139 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1144 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1140
1145
1141 output, __ = self.run_git_command(wire, cmd)
1146 output, __ = self.run_git_command(wire, cmd)
1142 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1147 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1143
1148
1144 return [x for x in commit_ids]
1149 return [x for x in commit_ids]
1145 return _node_history(context_uid, repo_id, commit_id, path, limit)
1150 return _node_history(context_uid, repo_id, commit_id, path, limit)
1146
1151
1147 @reraise_safe_exceptions
1152 @reraise_safe_exceptions
1148 def node_annotate_legacy(self, wire, commit_id, path):
1153 def node_annotate_legacy(self, wire, commit_id, path):
1149 #note: replaced by pygit2 impelementation
1154 #note: replaced by pygit2 impelementation
1150 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1155 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1151 # -l ==> outputs long shas (and we need all 40 characters)
1156 # -l ==> outputs long shas (and we need all 40 characters)
1152 # --root ==> doesn't put '^' character for boundaries
1157 # --root ==> doesn't put '^' character for boundaries
1153 # -r commit_id ==> blames for the given commit
1158 # -r commit_id ==> blames for the given commit
1154 output, __ = self.run_git_command(wire, cmd)
1159 output, __ = self.run_git_command(wire, cmd)
1155
1160
1156 result = []
1161 result = []
1157 for i, blame_line in enumerate(output.splitlines()[:-1]):
1162 for i, blame_line in enumerate(output.splitlines()[:-1]):
1158 line_no = i + 1
1163 line_no = i + 1
1159 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1164 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1160 result.append((line_no, blame_commit_id, line))
1165 result.append((line_no, blame_commit_id, line))
1161
1166
1162 return result
1167 return result
1163
1168
1164 @reraise_safe_exceptions
1169 @reraise_safe_exceptions
1165 def node_annotate(self, wire, commit_id, path):
1170 def node_annotate(self, wire, commit_id, path):
1166
1171
1167 result_libgit = []
1172 result_libgit = []
1168 repo_init = self._factory.repo_libgit2(wire)
1173 repo_init = self._factory.repo_libgit2(wire)
1169 with repo_init as repo:
1174 with repo_init as repo:
1170 commit = repo[commit_id]
1175 commit = repo[commit_id]
1171 blame_obj = repo.blame(path, newest_commit=commit_id)
1176 blame_obj = repo.blame(path, newest_commit=commit_id)
1172 for i, line in enumerate(commit.tree[path].data.splitlines()):
1177 for i, line in enumerate(commit.tree[path].data.splitlines()):
1173 line_no = i + 1
1178 line_no = i + 1
1174 hunk = blame_obj.for_line(line_no)
1179 hunk = blame_obj.for_line(line_no)
1175 blame_commit_id = hunk.final_commit_id.hex
1180 blame_commit_id = hunk.final_commit_id.hex
1176
1181
1177 result_libgit.append((line_no, blame_commit_id, line))
1182 result_libgit.append((line_no, blame_commit_id, line))
1178
1183
1179 return result_libgit
1184 return result_libgit
1180
1185
1181 @reraise_safe_exceptions
1186 @reraise_safe_exceptions
1182 def update_server_info(self, wire):
1187 def update_server_info(self, wire):
1183 repo = self._factory.repo(wire)
1188 repo = self._factory.repo(wire)
1184 update_server_info(repo)
1189 update_server_info(repo)
1185
1190
1186 @reraise_safe_exceptions
1191 @reraise_safe_exceptions
1187 def get_all_commit_ids(self, wire):
1192 def get_all_commit_ids(self, wire):
1188
1193
1189 cache_on, context_uid, repo_id = self._cache_on(wire)
1194 cache_on, context_uid, repo_id = self._cache_on(wire)
1190 region = self._region(wire)
1195 region = self._region(wire)
1191
1196
1192 @region.conditional_cache_on_arguments(condition=cache_on)
1197 @region.conditional_cache_on_arguments(condition=cache_on)
1193 def _get_all_commit_ids(_context_uid, _repo_id):
1198 def _get_all_commit_ids(_context_uid, _repo_id):
1194
1199
1195 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1200 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1196 try:
1201 try:
1197 output, __ = self.run_git_command(wire, cmd)
1202 output, __ = self.run_git_command(wire, cmd)
1198 return output.splitlines()
1203 return output.splitlines()
1199 except Exception:
1204 except Exception:
1200 # Can be raised for empty repositories
1205 # Can be raised for empty repositories
1201 return []
1206 return []
1202
1207
1203 @region.conditional_cache_on_arguments(condition=cache_on)
1208 @region.conditional_cache_on_arguments(condition=cache_on)
1204 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1209 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1205 repo_init = self._factory.repo_libgit2(wire)
1210 repo_init = self._factory.repo_libgit2(wire)
1206 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1211 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1207 results = []
1212 results = []
1208 with repo_init as repo:
1213 with repo_init as repo:
1209 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1214 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1210 results.append(commit.id.hex)
1215 results.append(commit.id.hex)
1211
1216
1212 return _get_all_commit_ids(context_uid, repo_id)
1217 return _get_all_commit_ids(context_uid, repo_id)
1213
1218
1214 @reraise_safe_exceptions
1219 @reraise_safe_exceptions
1215 def run_git_command(self, wire, cmd, **opts):
1220 def run_git_command(self, wire, cmd, **opts):
1216 path = wire.get('path', None)
1221 path = wire.get('path', None)
1217
1222
1218 if path and os.path.isdir(path):
1223 if path and os.path.isdir(path):
1219 opts['cwd'] = path
1224 opts['cwd'] = path
1220
1225
1221 if '_bare' in opts:
1226 if '_bare' in opts:
1222 _copts = []
1227 _copts = []
1223 del opts['_bare']
1228 del opts['_bare']
1224 else:
1229 else:
1225 _copts = ['-c', 'core.quotepath=false', ]
1230 _copts = ['-c', 'core.quotepath=false', ]
1226 safe_call = False
1231 safe_call = False
1227 if '_safe' in opts:
1232 if '_safe' in opts:
1228 # no exc on failure
1233 # no exc on failure
1229 del opts['_safe']
1234 del opts['_safe']
1230 safe_call = True
1235 safe_call = True
1231
1236
1232 if '_copts' in opts:
1237 if '_copts' in opts:
1233 _copts.extend(opts['_copts'] or [])
1238 _copts.extend(opts['_copts'] or [])
1234 del opts['_copts']
1239 del opts['_copts']
1235
1240
1236 gitenv = os.environ.copy()
1241 gitenv = os.environ.copy()
1237 gitenv.update(opts.pop('extra_env', {}))
1242 gitenv.update(opts.pop('extra_env', {}))
1238 # need to clean fix GIT_DIR !
1243 # need to clean fix GIT_DIR !
1239 if 'GIT_DIR' in gitenv:
1244 if 'GIT_DIR' in gitenv:
1240 del gitenv['GIT_DIR']
1245 del gitenv['GIT_DIR']
1241 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1246 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1242 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1247 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1243
1248
1244 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1249 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1245 _opts = {'env': gitenv, 'shell': False}
1250 _opts = {'env': gitenv, 'shell': False}
1246
1251
1247 proc = None
1252 proc = None
1248 try:
1253 try:
1249 _opts.update(opts)
1254 _opts.update(opts)
1250 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1255 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1251
1256
1252 return b''.join(proc), b''.join(proc.stderr)
1257 return b''.join(proc), b''.join(proc.stderr)
1253 except OSError as err:
1258 except OSError as err:
1254 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1259 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1255 tb_err = ("Couldn't run git command (%s).\n"
1260 tb_err = ("Couldn't run git command (%s).\n"
1256 "Original error was:%s\n"
1261 "Original error was:%s\n"
1257 "Call options:%s\n"
1262 "Call options:%s\n"
1258 % (cmd, err, _opts))
1263 % (cmd, err, _opts))
1259 log.exception(tb_err)
1264 log.exception(tb_err)
1260 if safe_call:
1265 if safe_call:
1261 return '', err
1266 return '', err
1262 else:
1267 else:
1263 raise exceptions.VcsException()(tb_err)
1268 raise exceptions.VcsException()(tb_err)
1264 finally:
1269 finally:
1265 if proc:
1270 if proc:
1266 proc.close()
1271 proc.close()
1267
1272
1268 @reraise_safe_exceptions
1273 @reraise_safe_exceptions
1269 def install_hooks(self, wire, force=False):
1274 def install_hooks(self, wire, force=False):
1270 from vcsserver.hook_utils import install_git_hooks
1275 from vcsserver.hook_utils import install_git_hooks
1271 bare = self.bare(wire)
1276 bare = self.bare(wire)
1272 path = wire['path']
1277 path = wire['path']
1273 return install_git_hooks(path, bare, force_create=force)
1278 return install_git_hooks(path, bare, force_create=force)
1274
1279
1275 @reraise_safe_exceptions
1280 @reraise_safe_exceptions
1276 def get_hooks_info(self, wire):
1281 def get_hooks_info(self, wire):
1277 from vcsserver.hook_utils import (
1282 from vcsserver.hook_utils import (
1278 get_git_pre_hook_version, get_git_post_hook_version)
1283 get_git_pre_hook_version, get_git_post_hook_version)
1279 bare = self.bare(wire)
1284 bare = self.bare(wire)
1280 path = wire['path']
1285 path = wire['path']
1281 return {
1286 return {
1282 'pre_version': get_git_pre_hook_version(path, bare),
1287 'pre_version': get_git_pre_hook_version(path, bare),
1283 'post_version': get_git_post_hook_version(path, bare),
1288 'post_version': get_git_post_hook_version(path, bare),
1284 }
1289 }
1285
1290
1286 @reraise_safe_exceptions
1291 @reraise_safe_exceptions
1287 def set_head_ref(self, wire, head_name):
1292 def set_head_ref(self, wire, head_name):
1288 log.debug('Setting refs/head to `%s`', head_name)
1293 log.debug('Setting refs/head to `%s`', head_name)
1289 cmd = ['symbolic-ref', '"HEAD"', '"refs/heads/%s"' % head_name]
1294 cmd = ['symbolic-ref', '"HEAD"', '"refs/heads/%s"' % head_name]
1290 output, __ = self.run_git_command(wire, cmd)
1295 output, __ = self.run_git_command(wire, cmd)
1291 return [head_name] + output.splitlines()
1296 return [head_name] + output.splitlines()
1292
1297
1293 @reraise_safe_exceptions
1298 @reraise_safe_exceptions
1294 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1299 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1295 archive_dir_name, commit_id):
1300 archive_dir_name, commit_id):
1296
1301
1297 def file_walker(_commit_id, path):
1302 def file_walker(_commit_id, path):
1298 repo_init = self._factory.repo_libgit2(wire)
1303 repo_init = self._factory.repo_libgit2(wire)
1299
1304
1300 with repo_init as repo:
1305 with repo_init as repo:
1301 commit = repo[commit_id]
1306 commit = repo[commit_id]
1302
1307
1303 if path in ['', '/']:
1308 if path in ['', '/']:
1304 tree = commit.tree
1309 tree = commit.tree
1305 else:
1310 else:
1306 tree = commit.tree[path.rstrip('/')]
1311 tree = commit.tree[path.rstrip('/')]
1307 tree_id = tree.id.hex
1312 tree_id = tree.id.hex
1308 try:
1313 try:
1309 tree = repo[tree_id]
1314 tree = repo[tree_id]
1310 except KeyError:
1315 except KeyError:
1311 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1316 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1312
1317
1313 index = LibGit2Index.Index()
1318 index = LibGit2Index.Index()
1314 index.read_tree(tree)
1319 index.read_tree(tree)
1315 file_iter = index
1320 file_iter = index
1316
1321
1317 for fn in file_iter:
1322 for fn in file_iter:
1318 file_path = fn.path
1323 file_path = fn.path
1319 mode = fn.mode
1324 mode = fn.mode
1320 is_link = stat.S_ISLNK(mode)
1325 is_link = stat.S_ISLNK(mode)
1321 if mode == pygit2.GIT_FILEMODE_COMMIT:
1326 if mode == pygit2.GIT_FILEMODE_COMMIT:
1322 log.debug('Skipping path %s as a commit node', file_path)
1327 log.debug('Skipping path %s as a commit node', file_path)
1323 continue
1328 continue
1324 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1329 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1325
1330
1326 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1331 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1327 archive_dir_name, commit_id)
1332 archive_dir_name, commit_id)
General Comments 0
You need to be logged in to leave comments. Login now