##// END OF EJS Templates
fix(git ops): moved git operations into vcsserver and use libgit2 when possible....
super-admin -
r1337:1fc1a507 default
parent child Browse files
Show More
@@ -1,1526 +1,1543 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2024 RhodeCode GmbH
2 # Copyright (C) 2014-2024 RhodeCode GmbH
3 #
3 #
4 # This program is free software; you can redistribute it and/or modify
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
18 import collections
18 import collections
19 import logging
19 import logging
20 import os
20 import os
21 import re
21 import re
22 import stat
22 import stat
23 import traceback
23 import traceback
24 import urllib.request
24 import urllib.request
25 import urllib.parse
25 import urllib.parse
26 import urllib.error
26 import urllib.error
27 from functools import wraps
27 from functools import wraps
28
28
29 import more_itertools
29 import more_itertools
30 import pygit2
30 import pygit2
31 from pygit2 import Repository as LibGit2Repo
31 from pygit2 import Repository as LibGit2Repo
32 from pygit2 import index as LibGit2Index
32 from pygit2 import index as LibGit2Index
33 from dulwich import index, objects
33 from dulwich import index, objects
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 from dulwich.errors import (
35 from dulwich.errors import (
36 NotGitRepository, ChecksumMismatch, WrongObjectException,
36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 MissingCommitError, ObjectMissing, HangupException,
37 MissingCommitError, ObjectMissing, HangupException,
38 UnexpectedCommandError)
38 UnexpectedCommandError)
39 from dulwich.repo import Repo as DulwichRepo
39 from dulwich.repo import Repo as DulwichRepo
40
40
41 import vcsserver
41 import vcsserver
42 from vcsserver import exceptions, settings, subprocessio
42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
43 from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 from vcsserver.hgcompat import (
45 from vcsserver.hgcompat import (
46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 from vcsserver.git_lfs.lib import LFSOidStore
47 from vcsserver.git_lfs.lib import LFSOidStore
48 from vcsserver.vcs_base import RemoteBase
48 from vcsserver.vcs_base import RemoteBase
49
49
50 DIR_STAT = stat.S_IFDIR
50 DIR_STAT = stat.S_IFDIR
51 FILE_MODE = stat.S_IFMT
51 FILE_MODE = stat.S_IFMT
52 GIT_LINK = objects.S_IFGITLINK
52 GIT_LINK = objects.S_IFGITLINK
53 PEELED_REF_MARKER = b'^{}'
53 PEELED_REF_MARKER = b'^{}'
54 HEAD_MARKER = b'HEAD'
54 HEAD_MARKER = b'HEAD'
55
55
56 log = logging.getLogger(__name__)
56 log = logging.getLogger(__name__)
57
57
58
58
59 def reraise_safe_exceptions(func):
59 def reraise_safe_exceptions(func):
60 """Converts Dulwich exceptions to something neutral."""
60 """Converts Dulwich exceptions to something neutral."""
61
61
62 @wraps(func)
62 @wraps(func)
63 def wrapper(*args, **kwargs):
63 def wrapper(*args, **kwargs):
64 try:
64 try:
65 return func(*args, **kwargs)
65 return func(*args, **kwargs)
66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 exc = exceptions.LookupException(org_exc=e)
67 exc = exceptions.LookupException(org_exc=e)
68 raise exc(safe_str(e))
68 raise exc(safe_str(e))
69 except (HangupException, UnexpectedCommandError) as e:
69 except (HangupException, UnexpectedCommandError) as e:
70 exc = exceptions.VcsException(org_exc=e)
70 exc = exceptions.VcsException(org_exc=e)
71 raise exc(safe_str(e))
71 raise exc(safe_str(e))
72 except Exception:
72 except Exception:
73 # NOTE(marcink): because of how dulwich handles some exceptions
73 # NOTE(marcink): because of how dulwich handles some exceptions
74 # (KeyError on empty repos), we cannot track this and catch all
74 # (KeyError on empty repos), we cannot track this and catch all
75 # exceptions, it's an exceptions from other handlers
75 # exceptions, it's an exceptions from other handlers
76 #if not hasattr(e, '_vcs_kind'):
76 #if not hasattr(e, '_vcs_kind'):
77 #log.exception("Unhandled exception in git remote call")
77 #log.exception("Unhandled exception in git remote call")
78 #raise_from_original(exceptions.UnhandledException)
78 #raise_from_original(exceptions.UnhandledException)
79 raise
79 raise
80 return wrapper
80 return wrapper
81
81
82
82
83 class Repo(DulwichRepo):
83 class Repo(DulwichRepo):
84 """
84 """
85 A wrapper for dulwich Repo class.
85 A wrapper for dulwich Repo class.
86
86
87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 "Too many open files" error. We need to close all opened file descriptors
88 "Too many open files" error. We need to close all opened file descriptors
89 once the repo object is destroyed.
89 once the repo object is destroyed.
90 """
90 """
91 def __del__(self):
91 def __del__(self):
92 if hasattr(self, 'object_store'):
92 if hasattr(self, 'object_store'):
93 self.close()
93 self.close()
94
94
95
95
96 class Repository(LibGit2Repo):
96 class Repository(LibGit2Repo):
97
97
98 def __enter__(self):
98 def __enter__(self):
99 return self
99 return self
100
100
101 def __exit__(self, exc_type, exc_val, exc_tb):
101 def __exit__(self, exc_type, exc_val, exc_tb):
102 self.free()
102 self.free()
103
103
104
104
105 class GitFactory(RepoFactory):
105 class GitFactory(RepoFactory):
106 repo_type = 'git'
106 repo_type = 'git'
107
107
108 def _create_repo(self, wire, create, use_libgit2=False):
108 def _create_repo(self, wire, create, use_libgit2=False):
109 if use_libgit2:
109 if use_libgit2:
110 repo = Repository(safe_bytes(wire['path']))
110 repo = Repository(safe_bytes(wire['path']))
111 else:
111 else:
112 # dulwich mode
112 # dulwich mode
113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 repo = Repo(repo_path)
114 repo = Repo(repo_path)
115
115
116 log.debug('repository created: got GIT object: %s', repo)
116 log.debug('repository created: got GIT object: %s', repo)
117 return repo
117 return repo
118
118
119 def repo(self, wire, create=False, use_libgit2=False):
119 def repo(self, wire, create=False, use_libgit2=False):
120 """
120 """
121 Get a repository instance for the given path.
121 Get a repository instance for the given path.
122 """
122 """
123 return self._create_repo(wire, create, use_libgit2)
123 return self._create_repo(wire, create, use_libgit2)
124
124
125 def repo_libgit2(self, wire):
125 def repo_libgit2(self, wire):
126 return self.repo(wire, use_libgit2=True)
126 return self.repo(wire, use_libgit2=True)
127
127
128
128
129 def create_signature_from_string(author_str, **kwargs):
129 def create_signature_from_string(author_str, **kwargs):
130 """
130 """
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132
132
133 :param author_str: String of the format 'Name <email>'
133 :param author_str: String of the format 'Name <email>'
134 :return: pygit2.Signature object
134 :return: pygit2.Signature object
135 """
135 """
136 match = re.match(r'^(.+) <(.+)>$', author_str)
136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 if match is None:
137 if match is None:
138 raise ValueError(f"Invalid format: {author_str}")
138 raise ValueError(f"Invalid format: {author_str}")
139
139
140 name, email = match.groups()
140 name, email = match.groups()
141 return pygit2.Signature(name, email, **kwargs)
141 return pygit2.Signature(name, email, **kwargs)
142
142
143
143
144 def get_obfuscated_url(url_obj):
144 def get_obfuscated_url(url_obj):
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 url_obj.query = obfuscate_qs(url_obj.query)
146 url_obj.query = obfuscate_qs(url_obj.query)
147 obfuscated_uri = str(url_obj)
147 obfuscated_uri = str(url_obj)
148 return obfuscated_uri
148 return obfuscated_uri
149
149
150
150
151 class GitRemote(RemoteBase):
151 class GitRemote(RemoteBase):
152 COMMIT_ID_PAT = re.compile(rb'[0-9a-fA-F]{40}')
152
153
153 def __init__(self, factory):
154 def __init__(self, factory):
154 self._factory = factory
155 self._factory = factory
155 self._bulk_methods = {
156 self._bulk_methods = {
156 "date": self.date,
157 "date": self.date,
157 "author": self.author,
158 "author": self.author,
158 "branch": self.branch,
159 "branch": self.branch,
159 "message": self.message,
160 "message": self.message,
160 "parents": self.parents,
161 "parents": self.parents,
161 "_commit": self.revision,
162 "_commit": self.revision,
162 }
163 }
163 self._bulk_file_methods = {
164 self._bulk_file_methods = {
164 "size": self.get_node_size,
165 "size": self.get_node_size,
165 "data": self.get_node_data,
166 "data": self.get_node_data,
166 "flags": self.get_node_flags,
167 "flags": self.get_node_flags,
167 "is_binary": self.get_node_is_binary,
168 "is_binary": self.get_node_is_binary,
168 "md5": self.md5_hash
169 "md5": self.md5_hash
169 }
170 }
170
171
171 def _wire_to_config(self, wire):
172 def _wire_to_config(self, wire):
172 if 'config' in wire:
173 if 'config' in wire:
173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 return {}
175 return {}
175
176
176 def _remote_conf(self, config):
177 def _remote_conf(self, config):
177 params = [
178 params = [
178 '-c', 'core.askpass=""',
179 '-c', 'core.askpass=""',
179 ]
180 ]
180 config_attrs = {
181 config_attrs = {
181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 }
184 }
184 for key, param in config_attrs.items():
185 for key, param in config_attrs.items():
185 if value := config.get(key):
186 if value := config.get(key):
186 params.extend(['-c', param.format(value)])
187 params.extend(['-c', param.format(value)])
187 return params
188 return params
188
189
189 @reraise_safe_exceptions
190 @reraise_safe_exceptions
190 def discover_git_version(self):
191 def discover_git_version(self):
191 stdout, _ = self.run_git_command(
192 stdout, _ = self.run_git_command(
192 {}, ['--version'], _bare=True, _safe=True)
193 {}, ['--version'], _bare=True, _safe=True)
193 prefix = b'git version'
194 prefix = b'git version'
194 if stdout.startswith(prefix):
195 if stdout.startswith(prefix):
195 stdout = stdout[len(prefix):]
196 stdout = stdout[len(prefix):]
196 return safe_str(stdout.strip())
197 return safe_str(stdout.strip())
197
198
198 @reraise_safe_exceptions
199 @reraise_safe_exceptions
199 def is_empty(self, wire):
200 def is_empty(self, wire):
200 repo_init = self._factory.repo_libgit2(wire)
201 repo_init = self._factory.repo_libgit2(wire)
201 with repo_init as repo:
202 with repo_init as repo:
202 try:
203 try:
203 has_head = repo.head.name
204 has_head = repo.head.name
204 if has_head:
205 if has_head:
205 return False
206 return False
206
207
207 # NOTE(marcink): check again using more expensive method
208 # NOTE(marcink): check again using more expensive method
208 return repo.is_empty
209 return repo.is_empty
209 except Exception:
210 except Exception:
210 pass
211 pass
211
212
212 return True
213 return True
213
214
214 @reraise_safe_exceptions
215 @reraise_safe_exceptions
215 def assert_correct_path(self, wire):
216 def assert_correct_path(self, wire):
216 cache_on, context_uid, repo_id = self._cache_on(wire)
217 cache_on, context_uid, repo_id = self._cache_on(wire)
217 region = self._region(wire)
218 region = self._region(wire)
218
219
219 @region.conditional_cache_on_arguments(condition=cache_on)
220 @region.conditional_cache_on_arguments(condition=cache_on)
220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 if fast_check:
222 if fast_check:
222 path = safe_str(wire['path'])
223 path = safe_str(wire['path'])
223 if pygit2.discover_repository(path):
224 if pygit2.discover_repository(path):
224 return True
225 return True
225 return False
226 return False
226 else:
227 else:
227 try:
228 try:
228 repo_init = self._factory.repo_libgit2(wire)
229 repo_init = self._factory.repo_libgit2(wire)
229 with repo_init:
230 with repo_init:
230 pass
231 pass
231 except pygit2.GitError:
232 except pygit2.GitError:
232 path = wire.get('path')
233 path = wire.get('path')
233 tb = traceback.format_exc()
234 tb = traceback.format_exc()
234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 return False
236 return False
236 return True
237 return True
237
238
238 return _assert_correct_path(context_uid, repo_id, True)
239 return _assert_correct_path(context_uid, repo_id, True)
239
240
240 @reraise_safe_exceptions
241 @reraise_safe_exceptions
241 def bare(self, wire):
242 def bare(self, wire):
242 repo_init = self._factory.repo_libgit2(wire)
243 repo_init = self._factory.repo_libgit2(wire)
243 with repo_init as repo:
244 with repo_init as repo:
244 return repo.is_bare
245 return repo.is_bare
245
246
246 @reraise_safe_exceptions
247 @reraise_safe_exceptions
247 def get_node_data(self, wire, commit_id, path):
248 def get_node_data(self, wire, commit_id, path):
248 repo_init = self._factory.repo_libgit2(wire)
249 repo_init = self._factory.repo_libgit2(wire)
249 with repo_init as repo:
250 with repo_init as repo:
250 commit = repo[commit_id]
251 commit = repo[commit_id]
251 blob_obj = commit.tree[path]
252 blob_obj = commit.tree[path]
252
253
253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 raise exceptions.LookupException()(
255 raise exceptions.LookupException()(
255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256
257
257 return BytesEnvelope(blob_obj.data)
258 return BytesEnvelope(blob_obj.data)
258
259
259 @reraise_safe_exceptions
260 @reraise_safe_exceptions
260 def get_node_size(self, wire, commit_id, path):
261 def get_node_size(self, wire, commit_id, path):
261 repo_init = self._factory.repo_libgit2(wire)
262 repo_init = self._factory.repo_libgit2(wire)
262 with repo_init as repo:
263 with repo_init as repo:
263 commit = repo[commit_id]
264 commit = repo[commit_id]
264 blob_obj = commit.tree[path]
265 blob_obj = commit.tree[path]
265
266
266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 raise exceptions.LookupException()(
268 raise exceptions.LookupException()(
268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269
270
270 return blob_obj.size
271 return blob_obj.size
271
272
272 @reraise_safe_exceptions
273 @reraise_safe_exceptions
273 def get_node_flags(self, wire, commit_id, path):
274 def get_node_flags(self, wire, commit_id, path):
274 repo_init = self._factory.repo_libgit2(wire)
275 repo_init = self._factory.repo_libgit2(wire)
275 with repo_init as repo:
276 with repo_init as repo:
276 commit = repo[commit_id]
277 commit = repo[commit_id]
277 blob_obj = commit.tree[path]
278 blob_obj = commit.tree[path]
278
279
279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 raise exceptions.LookupException()(
281 raise exceptions.LookupException()(
281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282
283
283 return blob_obj.filemode
284 return blob_obj.filemode
284
285
285 @reraise_safe_exceptions
286 @reraise_safe_exceptions
286 def get_node_is_binary(self, wire, commit_id, path):
287 def get_node_is_binary(self, wire, commit_id, path):
287 repo_init = self._factory.repo_libgit2(wire)
288 repo_init = self._factory.repo_libgit2(wire)
288 with repo_init as repo:
289 with repo_init as repo:
289 commit = repo[commit_id]
290 commit = repo[commit_id]
290 blob_obj = commit.tree[path]
291 blob_obj = commit.tree[path]
291
292
292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 raise exceptions.LookupException()(
294 raise exceptions.LookupException()(
294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295
296
296 return blob_obj.is_binary
297 return blob_obj.is_binary
297
298
298 @reraise_safe_exceptions
299 @reraise_safe_exceptions
299 def blob_as_pretty_string(self, wire, sha):
300 def blob_as_pretty_string(self, wire, sha):
300 repo_init = self._factory.repo_libgit2(wire)
301 repo_init = self._factory.repo_libgit2(wire)
301 with repo_init as repo:
302 with repo_init as repo:
302 blob_obj = repo[sha]
303 blob_obj = repo[sha]
303 return BytesEnvelope(blob_obj.data)
304 return BytesEnvelope(blob_obj.data)
304
305
305 @reraise_safe_exceptions
306 @reraise_safe_exceptions
306 def blob_raw_length(self, wire, sha):
307 def blob_raw_length(self, wire, sha):
307 cache_on, context_uid, repo_id = self._cache_on(wire)
308 cache_on, context_uid, repo_id = self._cache_on(wire)
308 region = self._region(wire)
309 region = self._region(wire)
309
310
310 @region.conditional_cache_on_arguments(condition=cache_on)
311 @region.conditional_cache_on_arguments(condition=cache_on)
311 def _blob_raw_length(_repo_id, _sha):
312 def _blob_raw_length(_repo_id, _sha):
312
313
313 repo_init = self._factory.repo_libgit2(wire)
314 repo_init = self._factory.repo_libgit2(wire)
314 with repo_init as repo:
315 with repo_init as repo:
315 blob = repo[sha]
316 blob = repo[sha]
316 return blob.size
317 return blob.size
317
318
318 return _blob_raw_length(repo_id, sha)
319 return _blob_raw_length(repo_id, sha)
319
320
320 def _parse_lfs_pointer(self, raw_content):
321 def _parse_lfs_pointer(self, raw_content):
321 spec_string = b'version https://git-lfs.github.com/spec'
322 spec_string = b'version https://git-lfs.github.com/spec'
322 if raw_content and raw_content.startswith(spec_string):
323 if raw_content and raw_content.startswith(spec_string):
323
324
324 pattern = re.compile(rb"""
325 pattern = re.compile(rb"""
325 (?:\n)?
326 (?:\n)?
326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 ^size[ ](?P<oid_size>[0-9]+)\n
329 ^size[ ](?P<oid_size>[0-9]+)\n
329 (?:\n)?
330 (?:\n)?
330 """, re.VERBOSE | re.MULTILINE)
331 """, re.VERBOSE | re.MULTILINE)
331 match = pattern.match(raw_content)
332 match = pattern.match(raw_content)
332 if match:
333 if match:
333 return match.groupdict()
334 return match.groupdict()
334
335
335 return {}
336 return {}
336
337
337 @reraise_safe_exceptions
338 @reraise_safe_exceptions
338 def is_large_file(self, wire, commit_id):
339 def is_large_file(self, wire, commit_id):
339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 cache_on, context_uid, repo_id = self._cache_on(wire)
340 region = self._region(wire)
341 region = self._region(wire)
341
342
342 @region.conditional_cache_on_arguments(condition=cache_on)
343 @region.conditional_cache_on_arguments(condition=cache_on)
343 def _is_large_file(_repo_id, _sha):
344 def _is_large_file(_repo_id, _sha):
344 repo_init = self._factory.repo_libgit2(wire)
345 repo_init = self._factory.repo_libgit2(wire)
345 with repo_init as repo:
346 with repo_init as repo:
346 blob = repo[commit_id]
347 blob = repo[commit_id]
347 if blob.is_binary:
348 if blob.is_binary:
348 return {}
349 return {}
349
350
350 return self._parse_lfs_pointer(blob.data)
351 return self._parse_lfs_pointer(blob.data)
351
352
352 return _is_large_file(repo_id, commit_id)
353 return _is_large_file(repo_id, commit_id)
353
354
354 @reraise_safe_exceptions
355 @reraise_safe_exceptions
355 def is_binary(self, wire, tree_id):
356 def is_binary(self, wire, tree_id):
356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 cache_on, context_uid, repo_id = self._cache_on(wire)
357 region = self._region(wire)
358 region = self._region(wire)
358
359
359 @region.conditional_cache_on_arguments(condition=cache_on)
360 @region.conditional_cache_on_arguments(condition=cache_on)
360 def _is_binary(_repo_id, _tree_id):
361 def _is_binary(_repo_id, _tree_id):
361 repo_init = self._factory.repo_libgit2(wire)
362 repo_init = self._factory.repo_libgit2(wire)
362 with repo_init as repo:
363 with repo_init as repo:
363 blob_obj = repo[tree_id]
364 blob_obj = repo[tree_id]
364 return blob_obj.is_binary
365 return blob_obj.is_binary
365
366
366 return _is_binary(repo_id, tree_id)
367 return _is_binary(repo_id, tree_id)
367
368
368 @reraise_safe_exceptions
369 @reraise_safe_exceptions
369 def md5_hash(self, wire, commit_id, path):
370 def md5_hash(self, wire, commit_id, path):
370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 cache_on, context_uid, repo_id = self._cache_on(wire)
371 region = self._region(wire)
372 region = self._region(wire)
372
373
373 @region.conditional_cache_on_arguments(condition=cache_on)
374 @region.conditional_cache_on_arguments(condition=cache_on)
374 def _md5_hash(_repo_id, _commit_id, _path):
375 def _md5_hash(_repo_id, _commit_id, _path):
375 repo_init = self._factory.repo_libgit2(wire)
376 repo_init = self._factory.repo_libgit2(wire)
376 with repo_init as repo:
377 with repo_init as repo:
377 commit = repo[_commit_id]
378 commit = repo[_commit_id]
378 blob_obj = commit.tree[_path]
379 blob_obj = commit.tree[_path]
379
380
380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 raise exceptions.LookupException()(
382 raise exceptions.LookupException()(
382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383
384
384 return ''
385 return ''
385
386
386 return _md5_hash(repo_id, commit_id, path)
387 return _md5_hash(repo_id, commit_id, path)
387
388
388 @reraise_safe_exceptions
389 @reraise_safe_exceptions
389 def in_largefiles_store(self, wire, oid):
390 def in_largefiles_store(self, wire, oid):
390 conf = self._wire_to_config(wire)
391 conf = self._wire_to_config(wire)
391 repo_init = self._factory.repo_libgit2(wire)
392 repo_init = self._factory.repo_libgit2(wire)
392 with repo_init as repo:
393 with repo_init as repo:
393 repo_name = repo.path
394 repo_name = repo.path
394
395
395 store_location = conf.get('vcs_git_lfs_store_location')
396 store_location = conf.get('vcs_git_lfs_store_location')
396 if store_location:
397 if store_location:
397
398
398 store = LFSOidStore(
399 store = LFSOidStore(
399 oid=oid, repo=repo_name, store_location=store_location)
400 oid=oid, repo=repo_name, store_location=store_location)
400 return store.has_oid()
401 return store.has_oid()
401
402
402 return False
403 return False
403
404
404 @reraise_safe_exceptions
405 @reraise_safe_exceptions
405 def store_path(self, wire, oid):
406 def store_path(self, wire, oid):
406 conf = self._wire_to_config(wire)
407 conf = self._wire_to_config(wire)
407 repo_init = self._factory.repo_libgit2(wire)
408 repo_init = self._factory.repo_libgit2(wire)
408 with repo_init as repo:
409 with repo_init as repo:
409 repo_name = repo.path
410 repo_name = repo.path
410
411
411 store_location = conf.get('vcs_git_lfs_store_location')
412 store_location = conf.get('vcs_git_lfs_store_location')
412 if store_location:
413 if store_location:
413 store = LFSOidStore(
414 store = LFSOidStore(
414 oid=oid, repo=repo_name, store_location=store_location)
415 oid=oid, repo=repo_name, store_location=store_location)
415 return store.oid_path
416 return store.oid_path
416 raise ValueError(f'Unable to fetch oid with path {oid}')
417 raise ValueError(f'Unable to fetch oid with path {oid}')
417
418
418 @reraise_safe_exceptions
419 @reraise_safe_exceptions
419 def bulk_request(self, wire, rev, pre_load):
420 def bulk_request(self, wire, rev, pre_load):
420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 cache_on, context_uid, repo_id = self._cache_on(wire)
421 region = self._region(wire)
422 region = self._region(wire)
422
423
423 @region.conditional_cache_on_arguments(condition=cache_on)
424 @region.conditional_cache_on_arguments(condition=cache_on)
424 def _bulk_request(_repo_id, _rev, _pre_load):
425 def _bulk_request(_repo_id, _rev, _pre_load):
425 result = {}
426 result = {}
426 for attr in pre_load:
427 for attr in pre_load:
427 try:
428 try:
428 method = self._bulk_methods[attr]
429 method = self._bulk_methods[attr]
429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 args = [wire, rev]
431 args = [wire, rev]
431 result[attr] = method(*args)
432 result[attr] = method(*args)
432 except KeyError as e:
433 except KeyError as e:
433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 return result
435 return result
435
436
436 return _bulk_request(repo_id, rev, sorted(pre_load))
437 return _bulk_request(repo_id, rev, sorted(pre_load))
437
438
438 @reraise_safe_exceptions
439 @reraise_safe_exceptions
439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 cache_on, context_uid, repo_id = self._cache_on(wire)
441 region = self._region(wire)
442 region = self._region(wire)
442
443
443 @region.conditional_cache_on_arguments(condition=cache_on)
444 @region.conditional_cache_on_arguments(condition=cache_on)
444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 result = {}
446 result = {}
446 for attr in pre_load:
447 for attr in pre_load:
447 try:
448 try:
448 method = self._bulk_file_methods[attr]
449 method = self._bulk_file_methods[attr]
449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 result[attr] = method(wire, _commit_id, _path)
451 result[attr] = method(wire, _commit_id, _path)
451 except KeyError as e:
452 except KeyError as e:
452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 return result
454 return result
454
455
455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456
457
457 def _build_opener(self, url: str):
458 def _build_opener(self, url: str):
458 handlers = []
459 handlers = []
459 url_obj = url_parser(safe_bytes(url))
460 url_obj = url_parser(safe_bytes(url))
460 authinfo = url_obj.authinfo()[1]
461 authinfo = url_obj.authinfo()[1]
461
462
462 if authinfo:
463 if authinfo:
463 # create a password manager
464 # create a password manager
464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 passmgr.add_password(*convert_to_str(authinfo))
466 passmgr.add_password(*convert_to_str(authinfo))
466
467
467 handlers.extend((httpbasicauthhandler(passmgr),
468 handlers.extend((httpbasicauthhandler(passmgr),
468 httpdigestauthhandler(passmgr)))
469 httpdigestauthhandler(passmgr)))
469
470
470 return urllib.request.build_opener(*handlers)
471 return urllib.request.build_opener(*handlers)
471
472
472 @reraise_safe_exceptions
473 @reraise_safe_exceptions
473 def check_url(self, url, config):
474 def check_url(self, url, config):
474 url_obj = url_parser(safe_bytes(url))
475 url_obj = url_parser(safe_bytes(url))
475
476
476 test_uri = safe_str(url_obj.authinfo()[0])
477 test_uri = safe_str(url_obj.authinfo()[0])
477 obfuscated_uri = get_obfuscated_url(url_obj)
478 obfuscated_uri = get_obfuscated_url(url_obj)
478
479
479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480
481
481 if not test_uri.endswith('info/refs'):
482 if not test_uri.endswith('info/refs'):
482 test_uri = test_uri.rstrip('/') + '/info/refs'
483 test_uri = test_uri.rstrip('/') + '/info/refs'
483
484
484 o = self._build_opener(url=url)
485 o = self._build_opener(url=url)
485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486
487
487 q = {"service": 'git-upload-pack'}
488 q = {"service": 'git-upload-pack'}
488 qs = f'?{urllib.parse.urlencode(q)}'
489 qs = f'?{urllib.parse.urlencode(q)}'
489 cu = f"{test_uri}{qs}"
490 cu = f"{test_uri}{qs}"
490
491
491 try:
492 try:
492 req = urllib.request.Request(cu, None, {})
493 req = urllib.request.Request(cu, None, {})
493 log.debug("Trying to open URL %s", obfuscated_uri)
494 log.debug("Trying to open URL %s", obfuscated_uri)
494 resp = o.open(req)
495 resp = o.open(req)
495 if resp.code != 200:
496 if resp.code != 200:
496 raise exceptions.URLError()('Return Code is not 200')
497 raise exceptions.URLError()('Return Code is not 200')
497 except Exception as e:
498 except Exception as e:
498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 # means it cannot be cloned
500 # means it cannot be cloned
500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501
502
502 # now detect if it's proper git repo
503 # now detect if it's proper git repo
503 gitdata: bytes = resp.read()
504 gitdata: bytes = resp.read()
504
505
505 if b'service=git-upload-pack' in gitdata:
506 if b'service=git-upload-pack' in gitdata:
506 pass
507 pass
507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 # old style git can return some other format!
509 # old style git can return some other format!
509 pass
510 pass
510 else:
511 else:
511 e = None
512 e = None
512 raise exceptions.URLError(e)(
513 raise exceptions.URLError(e)(
513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514
515
515 return True
516 return True
516
517
517 @reraise_safe_exceptions
518 @reraise_safe_exceptions
518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 remote_refs = self.pull(wire, url, apply_refs=False)
521 remote_refs = self.pull(wire, url, apply_refs=False)
521 repo = self._factory.repo(wire)
522 repo = self._factory.repo(wire)
522 if isinstance(valid_refs, list):
523 if isinstance(valid_refs, list):
523 valid_refs = tuple(valid_refs)
524 valid_refs = tuple(valid_refs)
524
525
525 for k in remote_refs:
526 for k in remote_refs:
526 # only parse heads/tags and skip so called deferred tags
527 # only parse heads/tags and skip so called deferred tags
527 if k.startswith(valid_refs) and not k.endswith(deferred):
528 if k.startswith(valid_refs) and not k.endswith(deferred):
528 repo[k] = remote_refs[k]
529 repo[k] = remote_refs[k]
529
530
530 if update_after_clone:
531 if update_after_clone:
531 # we want to checkout HEAD
532 # we want to checkout HEAD
532 repo["HEAD"] = remote_refs["HEAD"]
533 repo["HEAD"] = remote_refs["HEAD"]
533 index.build_index_from_tree(repo.path, repo.index_path(),
534 index.build_index_from_tree(repo.path, repo.index_path(),
534 repo.object_store, repo["HEAD"].tree)
535 repo.object_store, repo["HEAD"].tree)
535
536
536 @reraise_safe_exceptions
537 @reraise_safe_exceptions
537 def branch(self, wire, commit_id):
538 def branch(self, wire, commit_id):
538 cache_on, context_uid, repo_id = self._cache_on(wire)
539 cache_on, context_uid, repo_id = self._cache_on(wire)
539 region = self._region(wire)
540 region = self._region(wire)
540
541
541 @region.conditional_cache_on_arguments(condition=cache_on)
542 @region.conditional_cache_on_arguments(condition=cache_on)
542 def _branch(_context_uid, _repo_id, _commit_id):
543 def _branch(_context_uid, _repo_id, _commit_id):
543 regex = re.compile('^refs/heads')
544 regex = re.compile('^refs/heads')
544
545
545 def filter_with(ref):
546 def filter_with(ref):
546 return regex.match(ref[0]) and ref[1] == _commit_id
547 return regex.match(ref[0]) and ref[1] == _commit_id
547
548
548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 return [x[0].split('refs/heads/')[-1] for x in branches]
550 return [x[0].split('refs/heads/')[-1] for x in branches]
550
551
551 return _branch(context_uid, repo_id, commit_id)
552 return _branch(context_uid, repo_id, commit_id)
552
553
553 @reraise_safe_exceptions
554 @reraise_safe_exceptions
554 def delete_branch(self, wire, branch_name):
555 def delete_branch(self, wire, branch_name):
555 repo_init = self._factory.repo_libgit2(wire)
556 repo_init = self._factory.repo_libgit2(wire)
556 with repo_init as repo:
557 with repo_init as repo:
557 if branch := repo.lookup_branch(branch_name):
558 if branch := repo.lookup_branch(branch_name):
558 branch.delete()
559 branch.delete()
559
560
560 @reraise_safe_exceptions
561 @reraise_safe_exceptions
561 def commit_branches(self, wire, commit_id):
562 def commit_branches(self, wire, commit_id):
562 cache_on, context_uid, repo_id = self._cache_on(wire)
563 cache_on, context_uid, repo_id = self._cache_on(wire)
563 region = self._region(wire)
564 region = self._region(wire)
564
565
565 @region.conditional_cache_on_arguments(condition=cache_on)
566 @region.conditional_cache_on_arguments(condition=cache_on)
566 def _commit_branches(_context_uid, _repo_id, _commit_id):
567 def _commit_branches(_context_uid, _repo_id, _commit_id):
567 repo_init = self._factory.repo_libgit2(wire)
568 repo_init = self._factory.repo_libgit2(wire)
568 with repo_init as repo:
569 with repo_init as repo:
569 branches = [x for x in repo.branches.with_commit(_commit_id)]
570 branches = [x for x in repo.branches.with_commit(_commit_id)]
570 return branches
571 return branches
571
572
572 return _commit_branches(context_uid, repo_id, commit_id)
573 return _commit_branches(context_uid, repo_id, commit_id)
573
574
574 @reraise_safe_exceptions
575 @reraise_safe_exceptions
575 def add_object(self, wire, content):
576 def add_object(self, wire, content):
576 repo_init = self._factory.repo_libgit2(wire)
577 repo_init = self._factory.repo_libgit2(wire)
577 with repo_init as repo:
578 with repo_init as repo:
578 blob = objects.Blob()
579 blob = objects.Blob()
579 blob.set_raw_string(content)
580 blob.set_raw_string(content)
580 repo.object_store.add_object(blob)
581 repo.object_store.add_object(blob)
581 return blob.id
582 return blob.id
582
583
583 @reraise_safe_exceptions
584 @reraise_safe_exceptions
584 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
585 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
585 date_args: list[int, int] = None,
586 date_args: list[int, int] = None,
586 parents: list | None = None):
587 parents: list | None = None):
587
588
588 repo_init = self._factory.repo_libgit2(wire)
589 repo_init = self._factory.repo_libgit2(wire)
589 with repo_init as repo:
590 with repo_init as repo:
590
591
591 if date_args:
592 if date_args:
592 current_time, offset = date_args
593 current_time, offset = date_args
593
594
594 kw = {
595 kw = {
595 'time': current_time,
596 'time': current_time,
596 'offset': offset
597 'offset': offset
597 }
598 }
598 author = create_signature_from_string(author, **kw)
599 author = create_signature_from_string(author, **kw)
599 committer = create_signature_from_string(committer, **kw)
600 committer = create_signature_from_string(committer, **kw)
600
601
601 tree = new_tree_id
602 tree = new_tree_id
602 if isinstance(tree, (bytes, str)):
603 if isinstance(tree, (bytes, str)):
603 # validate this tree is in the repo...
604 # validate this tree is in the repo...
604 tree = repo[safe_str(tree)].id
605 tree = repo[safe_str(tree)].id
605
606
606 if parents:
607 if parents:
607 # run via sha's and validate them in repo
608 # run via sha's and validate them in repo
608 parents = [repo[c].id for c in parents]
609 parents = [repo[c].id for c in parents]
609 else:
610 else:
610 parents = []
611 parents = []
611 # ensure we COMMIT on top of given branch head
612 # ensure we COMMIT on top of given branch head
612 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
613 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
613 if branch in repo.branches.local:
614 if branch in repo.branches.local:
614 parents += [repo.branches[branch].target]
615 parents += [repo.branches[branch].target]
615 elif [x for x in repo.branches.local]:
616 elif [x for x in repo.branches.local]:
616 parents += [repo.head.target]
617 parents += [repo.head.target]
617 #else:
618 #else:
618 # in case we want to commit on new branch we create it on top of HEAD
619 # in case we want to commit on new branch we create it on top of HEAD
619 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
620 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
620
621
621 # # Create a new commit
622 # # Create a new commit
622 commit_oid = repo.create_commit(
623 commit_oid = repo.create_commit(
623 f'refs/heads/{branch}', # the name of the reference to update
624 f'refs/heads/{branch}', # the name of the reference to update
624 author, # the author of the commit
625 author, # the author of the commit
625 committer, # the committer of the commit
626 committer, # the committer of the commit
626 message, # the commit message
627 message, # the commit message
627 tree, # the tree produced by the index
628 tree, # the tree produced by the index
628 parents # list of parents for the new commit, usually just one,
629 parents # list of parents for the new commit, usually just one,
629 )
630 )
630
631
631 new_commit_id = safe_str(commit_oid)
632 new_commit_id = safe_str(commit_oid)
632
633
633 return new_commit_id
634 return new_commit_id
634
635
635 @reraise_safe_exceptions
636 @reraise_safe_exceptions
637 def compare_commits(self, wire, commit_id1, commit_id2):
638 output, __ = self.run_git_command(
639 wire, ['log', '--reverse', '--pretty=format: %H', '-s',
640 f'{commit_id1}..{commit_id2}'])
641 commits = [safe_str(commit_id) for commit_id in self.COMMIT_ID_PAT.findall(output)]
642 return commits
643
644 @reraise_safe_exceptions
636 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
645 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
637
646
638 def mode2pygit(mode):
647 def mode2pygit(mode):
639 """
648 """
640 git only supports two filemode 644 and 755
649 git only supports two filemode 644 and 755
641
650
642 0o100755 -> 33261
651 0o100755 -> 33261
643 0o100644 -> 33188
652 0o100644 -> 33188
644 """
653 """
645 return {
654 return {
646 0o100644: pygit2.GIT_FILEMODE_BLOB,
655 0o100644: pygit2.GIT_FILEMODE_BLOB,
647 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
656 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
648 0o120000: pygit2.GIT_FILEMODE_LINK
657 0o120000: pygit2.GIT_FILEMODE_LINK
649 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
658 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
650
659
651 repo_init = self._factory.repo_libgit2(wire)
660 repo_init = self._factory.repo_libgit2(wire)
652 with repo_init as repo:
661 with repo_init as repo:
653 repo_index = repo.index
662 repo_index = repo.index
654
663
655 commit_parents = None
664 commit_parents = None
656 if commit_tree and commit_data['parents']:
665 if commit_tree and commit_data['parents']:
657 commit_parents = commit_data['parents']
666 commit_parents = commit_data['parents']
658 parent_commit = repo[commit_parents[0]]
667 parent_commit = repo[commit_parents[0]]
659 repo_index.read_tree(parent_commit.tree)
668 repo_index.read_tree(parent_commit.tree)
660
669
661 for pathspec in updated:
670 for pathspec in updated:
662 blob_id = repo.create_blob(pathspec['content'])
671 blob_id = repo.create_blob(pathspec['content'])
663 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
672 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
664 repo_index.add(ie)
673 repo_index.add(ie)
665
674
666 for pathspec in removed:
675 for pathspec in removed:
667 repo_index.remove(pathspec)
676 repo_index.remove(pathspec)
668
677
669 # Write changes to the index
678 # Write changes to the index
670 repo_index.write()
679 repo_index.write()
671
680
672 # Create a tree from the updated index
681 # Create a tree from the updated index
673 written_commit_tree = repo_index.write_tree()
682 written_commit_tree = repo_index.write_tree()
674
683
675 new_tree_id = written_commit_tree
684 new_tree_id = written_commit_tree
676
685
677 author = commit_data['author']
686 author = commit_data['author']
678 committer = commit_data['committer']
687 committer = commit_data['committer']
679 message = commit_data['message']
688 message = commit_data['message']
680
689
681 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
690 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
682
691
683 new_commit_id = self.create_commit(wire, author, committer, message, branch,
692 new_commit_id = self.create_commit(wire, author, committer, message, branch,
684 new_tree_id, date_args=date_args, parents=commit_parents)
693 new_tree_id, date_args=date_args, parents=commit_parents)
685
694
686 # libgit2, ensure the branch is there and exists
695 # libgit2, ensure the branch is there and exists
687 self.create_branch(wire, branch, new_commit_id)
696 self.create_branch(wire, branch, new_commit_id)
688
697
689 # libgit2, set new ref to this created commit
698 # libgit2, set new ref to this created commit
690 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
699 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
691
700
692 return new_commit_id
701 return new_commit_id
693
702
694 @reraise_safe_exceptions
703 @reraise_safe_exceptions
695 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
704 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
696 if url != 'default' and '://' not in url:
705 if url != 'default' and '://' not in url:
697 client = LocalGitClient(url)
706 client = LocalGitClient(url)
698 else:
707 else:
699 url_obj = url_parser(safe_bytes(url))
708 url_obj = url_parser(safe_bytes(url))
700 o = self._build_opener(url)
709 o = self._build_opener(url)
701 url = url_obj.authinfo()[0]
710 url = url_obj.authinfo()[0]
702 client = HttpGitClient(base_url=url, opener=o)
711 client = HttpGitClient(base_url=url, opener=o)
703 repo = self._factory.repo(wire)
712 repo = self._factory.repo(wire)
704
713
705 determine_wants = repo.object_store.determine_wants_all
714 determine_wants = repo.object_store.determine_wants_all
706
715
707 if refs:
716 if refs:
708 refs: list[bytes] = [ascii_bytes(x) for x in refs]
717 refs: list[bytes] = [ascii_bytes(x) for x in refs]
709
718
710 def determine_wants_requested(_remote_refs):
719 def determine_wants_requested(_remote_refs):
711 determined = []
720 determined = []
712 for ref_name, ref_hash in _remote_refs.items():
721 for ref_name, ref_hash in _remote_refs.items():
713 bytes_ref_name = safe_bytes(ref_name)
722 bytes_ref_name = safe_bytes(ref_name)
714
723
715 if bytes_ref_name in refs:
724 if bytes_ref_name in refs:
716 bytes_ref_hash = safe_bytes(ref_hash)
725 bytes_ref_hash = safe_bytes(ref_hash)
717 determined.append(bytes_ref_hash)
726 determined.append(bytes_ref_hash)
718 return determined
727 return determined
719
728
720 # swap with our custom requested wants
729 # swap with our custom requested wants
721 determine_wants = determine_wants_requested
730 determine_wants = determine_wants_requested
722
731
723 try:
732 try:
724 remote_refs = client.fetch(
733 remote_refs = client.fetch(
725 path=url, target=repo, determine_wants=determine_wants)
734 path=url, target=repo, determine_wants=determine_wants)
726
735
727 except NotGitRepository as e:
736 except NotGitRepository as e:
728 log.warning(
737 log.warning(
729 'Trying to fetch from "%s" failed, not a Git repository.', url)
738 'Trying to fetch from "%s" failed, not a Git repository.', url)
730 # Exception can contain unicode which we convert
739 # Exception can contain unicode which we convert
731 raise exceptions.AbortException(e)(repr(e))
740 raise exceptions.AbortException(e)(repr(e))
732
741
733 # mikhail: client.fetch() returns all the remote refs, but fetches only
742 # mikhail: client.fetch() returns all the remote refs, but fetches only
734 # refs filtered by `determine_wants` function. We need to filter result
743 # refs filtered by `determine_wants` function. We need to filter result
735 # as well
744 # as well
736 if refs:
745 if refs:
737 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
746 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
738
747
739 if apply_refs:
748 if apply_refs:
740 # TODO: johbo: Needs proper test coverage with a git repository
749 # TODO: johbo: Needs proper test coverage with a git repository
741 # that contains a tag object, so that we would end up with
750 # that contains a tag object, so that we would end up with
742 # a peeled ref at this point.
751 # a peeled ref at this point.
743 for k in remote_refs:
752 for k in remote_refs:
744 if k.endswith(PEELED_REF_MARKER):
753 if k.endswith(PEELED_REF_MARKER):
745 log.debug("Skipping peeled reference %s", k)
754 log.debug("Skipping peeled reference %s", k)
746 continue
755 continue
747 repo[k] = remote_refs[k]
756 repo[k] = remote_refs[k]
748
757
749 if refs and not update_after:
758 if refs and not update_after:
750 # update to ref
759 # update to ref
751 # mikhail: explicitly set the head to the last ref.
760 # mikhail: explicitly set the head to the last ref.
752 update_to_ref = refs[-1]
761 update_to_ref = refs[-1]
753 if isinstance(update_after, str):
762 if isinstance(update_after, str):
754 update_to_ref = update_after
763 update_to_ref = update_after
755
764
756 repo[HEAD_MARKER] = remote_refs[update_to_ref]
765 repo[HEAD_MARKER] = remote_refs[update_to_ref]
757
766
758 if update_after:
767 if update_after:
759 # we want to check out HEAD
768 # we want to check out HEAD
760 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
769 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
761 index.build_index_from_tree(repo.path, repo.index_path(),
770 index.build_index_from_tree(repo.path, repo.index_path(),
762 repo.object_store, repo[HEAD_MARKER].tree)
771 repo.object_store, repo[HEAD_MARKER].tree)
763
772
764 if isinstance(remote_refs, FetchPackResult):
773 if isinstance(remote_refs, FetchPackResult):
765 return remote_refs.refs
774 return remote_refs.refs
766 return remote_refs
775 return remote_refs
767
776
768 @reraise_safe_exceptions
777 @reraise_safe_exceptions
769 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
778 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
770 self._factory.repo(wire)
779 self._factory.repo(wire)
771 if refs and not isinstance(refs, (list, tuple)):
780 if refs and not isinstance(refs, (list, tuple)):
772 refs = [refs]
781 refs = [refs]
773
782
774 config = self._wire_to_config(wire)
783 config = self._wire_to_config(wire)
775 # get all remote refs we'll use to fetch later
784 # get all remote refs we'll use to fetch later
776 cmd = ['ls-remote']
785 cmd = ['ls-remote']
777 if not all_refs:
786 if not all_refs:
778 cmd += ['--heads', '--tags']
787 cmd += ['--heads', '--tags']
779 cmd += [url]
788 cmd += [url]
780 output, __ = self.run_git_command(
789 output, __ = self.run_git_command(
781 wire, cmd, fail_on_stderr=False,
790 wire, cmd, fail_on_stderr=False,
782 _copts=self._remote_conf(config),
791 _copts=self._remote_conf(config),
783 extra_env={'GIT_TERMINAL_PROMPT': '0'})
792 extra_env={'GIT_TERMINAL_PROMPT': '0'})
784
793
785 remote_refs = collections.OrderedDict()
794 remote_refs = collections.OrderedDict()
786 fetch_refs = []
795 fetch_refs = []
787
796
788 for ref_line in output.splitlines():
797 for ref_line in output.splitlines():
789 sha, ref = ref_line.split(b'\t')
798 sha, ref = ref_line.split(b'\t')
790 sha = sha.strip()
799 sha = sha.strip()
791 if ref in remote_refs:
800 if ref in remote_refs:
792 # duplicate, skip
801 # duplicate, skip
793 continue
802 continue
794 if ref.endswith(PEELED_REF_MARKER):
803 if ref.endswith(PEELED_REF_MARKER):
795 log.debug("Skipping peeled reference %s", ref)
804 log.debug("Skipping peeled reference %s", ref)
796 continue
805 continue
797 # don't sync HEAD
806 # don't sync HEAD
798 if ref in [HEAD_MARKER]:
807 if ref in [HEAD_MARKER]:
799 continue
808 continue
800
809
801 remote_refs[ref] = sha
810 remote_refs[ref] = sha
802
811
803 if refs and sha in refs:
812 if refs and sha in refs:
804 # we filter fetch using our specified refs
813 # we filter fetch using our specified refs
805 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
814 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
806 elif not refs:
815 elif not refs:
807 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
816 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
808 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
817 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
809
818
810 if fetch_refs:
819 if fetch_refs:
811 for chunk in more_itertools.chunked(fetch_refs, 128):
820 for chunk in more_itertools.chunked(fetch_refs, 128):
812 fetch_refs_chunks = list(chunk)
821 fetch_refs_chunks = list(chunk)
813 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
822 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
814 self.run_git_command(
823 self.run_git_command(
815 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
824 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
816 fail_on_stderr=False,
825 fail_on_stderr=False,
817 _copts=self._remote_conf(config),
826 _copts=self._remote_conf(config),
818 extra_env={'GIT_TERMINAL_PROMPT': '0'})
827 extra_env={'GIT_TERMINAL_PROMPT': '0'})
819 if kwargs.get('sync_large_objects'):
828 if kwargs.get('sync_large_objects'):
820 self.run_git_command(
829 self.run_git_command(
821 wire, ['lfs', 'fetch', url, '--all'],
830 wire, ['lfs', 'fetch', url, '--all'],
822 fail_on_stderr=False,
831 fail_on_stderr=False,
823 _copts=self._remote_conf(config),
832 _copts=self._remote_conf(config),
824 )
833 )
825
834
826 return remote_refs
835 return remote_refs
827
836
828 @reraise_safe_exceptions
837 @reraise_safe_exceptions
829 def sync_push(self, wire, url, refs=None, **kwargs):
838 def sync_push(self, wire, url, refs=None, **kwargs):
830 if not self.check_url(url, wire):
839 if not self.check_url(url, wire):
831 return
840 return
832 config = self._wire_to_config(wire)
841 config = self._wire_to_config(wire)
833 self._factory.repo(wire)
842 self._factory.repo(wire)
834 self.run_git_command(
843 self.run_git_command(
835 wire, ['push', url, '--mirror'], fail_on_stderr=False,
844 wire, ['push', url, '--mirror'], fail_on_stderr=False,
836 _copts=self._remote_conf(config),
845 _copts=self._remote_conf(config),
837 extra_env={'GIT_TERMINAL_PROMPT': '0'})
846 extra_env={'GIT_TERMINAL_PROMPT': '0'})
838 if kwargs.get('sync_large_objects'):
847 if kwargs.get('sync_large_objects'):
839 self.run_git_command(
848 self.run_git_command(
840 wire, ['lfs', 'push', url, '--all'],
849 wire, ['lfs', 'push', url, '--all'],
841 fail_on_stderr=False,
850 fail_on_stderr=False,
842 _copts=self._remote_conf(config),
851 _copts=self._remote_conf(config),
843 )
852 )
844
853
845 @reraise_safe_exceptions
854 @reraise_safe_exceptions
846 def get_remote_refs(self, wire, url):
855 def get_remote_refs(self, wire, url):
847 repo = Repo(url)
856 repo = Repo(url)
848 return repo.get_refs()
857 return repo.get_refs()
849
858
850 @reraise_safe_exceptions
859 @reraise_safe_exceptions
851 def get_description(self, wire):
860 def get_description(self, wire):
852 repo = self._factory.repo(wire)
861 repo = self._factory.repo(wire)
853 return repo.get_description()
862 return repo.get_description()
854
863
855 @reraise_safe_exceptions
864 @reraise_safe_exceptions
856 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
865 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
857 origin_repo_path = wire['path']
866 origin_repo_path = wire['path']
858 repo = self._factory.repo(wire)
867 repo = self._factory.repo(wire)
859 # fetch from other_repo_path to our origin repo
868 # fetch from other_repo_path to our origin repo
860 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
869 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
861
870
862 wire_remote = wire.copy()
871 wire_remote = wire.copy()
863 wire_remote['path'] = other_repo_path
872 wire_remote['path'] = other_repo_path
864 repo_remote = self._factory.repo(wire_remote)
873 repo_remote = self._factory.repo(wire_remote)
865
874
866 # fetch from origin_repo_path to our remote repo
875 # fetch from origin_repo_path to our remote repo
867 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
876 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
868
877
869 revs = [
878 revs = [
870 x.commit.id
879 x.commit.id
871 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
880 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
872 return revs
881 return revs
873
882
874 @reraise_safe_exceptions
883 @reraise_safe_exceptions
884 def get_common_ancestor(self, wire, rev1, rev2):
885 repo_init = self._factory.repo_libgit2(wire)
886 with repo_init as repo:
887 ancestor_id = repo.merge_base(rev1, rev2)
888
889 return str(ancestor_id)
890
891 @reraise_safe_exceptions
875 def get_object(self, wire, sha, maybe_unreachable=False):
892 def get_object(self, wire, sha, maybe_unreachable=False):
876 cache_on, context_uid, repo_id = self._cache_on(wire)
893 cache_on, context_uid, repo_id = self._cache_on(wire)
877 region = self._region(wire)
894 region = self._region(wire)
878
895
879 @region.conditional_cache_on_arguments(condition=cache_on)
896 @region.conditional_cache_on_arguments(condition=cache_on)
880 def _get_object(_context_uid, _repo_id, _sha):
897 def _get_object(_context_uid, _repo_id, _sha):
881 repo_init = self._factory.repo_libgit2(wire)
898 repo_init = self._factory.repo_libgit2(wire)
882 with repo_init as repo:
899 with repo_init as repo:
883
900
884 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
901 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
885 try:
902 try:
886 commit = repo.revparse_single(sha)
903 commit = repo.revparse_single(sha)
887 except KeyError:
904 except KeyError:
888 # NOTE(marcink): KeyError doesn't give us any meaningful information
905 # NOTE(marcink): KeyError doesn't give us any meaningful information
889 # here, we instead give something more explicit
906 # here, we instead give something more explicit
890 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
907 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
891 raise exceptions.LookupException(e)(missing_commit_err)
908 raise exceptions.LookupException(e)(missing_commit_err)
892 except ValueError as e:
909 except ValueError as e:
893 raise exceptions.LookupException(e)(missing_commit_err)
910 raise exceptions.LookupException(e)(missing_commit_err)
894
911
895 is_tag = False
912 is_tag = False
896 if isinstance(commit, pygit2.Tag):
913 if isinstance(commit, pygit2.Tag):
897 commit = repo.get(commit.target)
914 commit = repo.get(commit.target)
898 is_tag = True
915 is_tag = True
899
916
900 check_dangling = True
917 check_dangling = True
901 if is_tag:
918 if is_tag:
902 check_dangling = False
919 check_dangling = False
903
920
904 if check_dangling and maybe_unreachable:
921 if check_dangling and maybe_unreachable:
905 check_dangling = False
922 check_dangling = False
906
923
907 # we used a reference and it parsed means we're not having a dangling commit
924 # we used a reference and it parsed means we're not having a dangling commit
908 if sha != commit.hex:
925 if sha != commit.hex:
909 check_dangling = False
926 check_dangling = False
910
927
911 if check_dangling:
928 if check_dangling:
912 # check for dangling commit
929 # check for dangling commit
913 for branch in repo.branches.with_commit(commit.hex):
930 for branch in repo.branches.with_commit(commit.hex):
914 if branch:
931 if branch:
915 break
932 break
916 else:
933 else:
917 # NOTE(marcink): Empty error doesn't give us any meaningful information
934 # NOTE(marcink): Empty error doesn't give us any meaningful information
918 # here, we instead give something more explicit
935 # here, we instead give something more explicit
919 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
936 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
920 raise exceptions.LookupException(e)(missing_commit_err)
937 raise exceptions.LookupException(e)(missing_commit_err)
921
938
922 commit_id = commit.hex
939 commit_id = commit.hex
923 type_str = commit.type_str
940 type_str = commit.type_str
924
941
925 return {
942 return {
926 'id': commit_id,
943 'id': commit_id,
927 'type': type_str,
944 'type': type_str,
928 'commit_id': commit_id,
945 'commit_id': commit_id,
929 'idx': 0
946 'idx': 0
930 }
947 }
931
948
932 return _get_object(context_uid, repo_id, sha)
949 return _get_object(context_uid, repo_id, sha)
933
950
934 @reraise_safe_exceptions
951 @reraise_safe_exceptions
935 def get_refs(self, wire):
952 def get_refs(self, wire):
936 cache_on, context_uid, repo_id = self._cache_on(wire)
953 cache_on, context_uid, repo_id = self._cache_on(wire)
937 region = self._region(wire)
954 region = self._region(wire)
938
955
939 @region.conditional_cache_on_arguments(condition=cache_on)
956 @region.conditional_cache_on_arguments(condition=cache_on)
940 def _get_refs(_context_uid, _repo_id):
957 def _get_refs(_context_uid, _repo_id):
941
958
942 repo_init = self._factory.repo_libgit2(wire)
959 repo_init = self._factory.repo_libgit2(wire)
943 with repo_init as repo:
960 with repo_init as repo:
944 regex = re.compile('^refs/(heads|tags)/')
961 regex = re.compile('^refs/(heads|tags)/')
945 return {x.name: x.target.hex for x in
962 return {x.name: x.target.hex for x in
946 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
963 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
947
964
948 return _get_refs(context_uid, repo_id)
965 return _get_refs(context_uid, repo_id)
949
966
950 @reraise_safe_exceptions
967 @reraise_safe_exceptions
951 def get_branch_pointers(self, wire):
968 def get_branch_pointers(self, wire):
952 cache_on, context_uid, repo_id = self._cache_on(wire)
969 cache_on, context_uid, repo_id = self._cache_on(wire)
953 region = self._region(wire)
970 region = self._region(wire)
954
971
955 @region.conditional_cache_on_arguments(condition=cache_on)
972 @region.conditional_cache_on_arguments(condition=cache_on)
956 def _get_branch_pointers(_context_uid, _repo_id):
973 def _get_branch_pointers(_context_uid, _repo_id):
957
974
958 repo_init = self._factory.repo_libgit2(wire)
975 repo_init = self._factory.repo_libgit2(wire)
959 regex = re.compile('^refs/heads')
976 regex = re.compile('^refs/heads')
960 with repo_init as repo:
977 with repo_init as repo:
961 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
978 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
962 return {x.target.hex: x.shorthand for x in branches}
979 return {x.target.hex: x.shorthand for x in branches}
963
980
964 return _get_branch_pointers(context_uid, repo_id)
981 return _get_branch_pointers(context_uid, repo_id)
965
982
966 @reraise_safe_exceptions
983 @reraise_safe_exceptions
967 def head(self, wire, show_exc=True):
984 def head(self, wire, show_exc=True):
968 cache_on, context_uid, repo_id = self._cache_on(wire)
985 cache_on, context_uid, repo_id = self._cache_on(wire)
969 region = self._region(wire)
986 region = self._region(wire)
970
987
971 @region.conditional_cache_on_arguments(condition=cache_on)
988 @region.conditional_cache_on_arguments(condition=cache_on)
972 def _head(_context_uid, _repo_id, _show_exc):
989 def _head(_context_uid, _repo_id, _show_exc):
973 repo_init = self._factory.repo_libgit2(wire)
990 repo_init = self._factory.repo_libgit2(wire)
974 with repo_init as repo:
991 with repo_init as repo:
975 try:
992 try:
976 return repo.head.peel().hex
993 return repo.head.peel().hex
977 except Exception:
994 except Exception:
978 if show_exc:
995 if show_exc:
979 raise
996 raise
980 return _head(context_uid, repo_id, show_exc)
997 return _head(context_uid, repo_id, show_exc)
981
998
982 @reraise_safe_exceptions
999 @reraise_safe_exceptions
983 def init(self, wire):
1000 def init(self, wire):
984 repo_path = safe_str(wire['path'])
1001 repo_path = safe_str(wire['path'])
985 os.makedirs(repo_path, mode=0o755)
1002 os.makedirs(repo_path, mode=0o755)
986 pygit2.init_repository(repo_path, bare=False)
1003 pygit2.init_repository(repo_path, bare=False)
987
1004
988 @reraise_safe_exceptions
1005 @reraise_safe_exceptions
989 def init_bare(self, wire):
1006 def init_bare(self, wire):
990 repo_path = safe_str(wire['path'])
1007 repo_path = safe_str(wire['path'])
991 os.makedirs(repo_path, mode=0o755)
1008 os.makedirs(repo_path, mode=0o755)
992 pygit2.init_repository(repo_path, bare=True)
1009 pygit2.init_repository(repo_path, bare=True)
993
1010
994 @reraise_safe_exceptions
1011 @reraise_safe_exceptions
995 def revision(self, wire, rev):
1012 def revision(self, wire, rev):
996
1013
997 cache_on, context_uid, repo_id = self._cache_on(wire)
1014 cache_on, context_uid, repo_id = self._cache_on(wire)
998 region = self._region(wire)
1015 region = self._region(wire)
999
1016
1000 @region.conditional_cache_on_arguments(condition=cache_on)
1017 @region.conditional_cache_on_arguments(condition=cache_on)
1001 def _revision(_context_uid, _repo_id, _rev):
1018 def _revision(_context_uid, _repo_id, _rev):
1002 repo_init = self._factory.repo_libgit2(wire)
1019 repo_init = self._factory.repo_libgit2(wire)
1003 with repo_init as repo:
1020 with repo_init as repo:
1004 commit = repo[rev]
1021 commit = repo[rev]
1005 obj_data = {
1022 obj_data = {
1006 'id': commit.id.hex,
1023 'id': commit.id.hex,
1007 }
1024 }
1008 # tree objects itself don't have tree_id attribute
1025 # tree objects itself don't have tree_id attribute
1009 if hasattr(commit, 'tree_id'):
1026 if hasattr(commit, 'tree_id'):
1010 obj_data['tree'] = commit.tree_id.hex
1027 obj_data['tree'] = commit.tree_id.hex
1011
1028
1012 return obj_data
1029 return obj_data
1013 return _revision(context_uid, repo_id, rev)
1030 return _revision(context_uid, repo_id, rev)
1014
1031
1015 @reraise_safe_exceptions
1032 @reraise_safe_exceptions
1016 def date(self, wire, commit_id):
1033 def date(self, wire, commit_id):
1017 cache_on, context_uid, repo_id = self._cache_on(wire)
1034 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 region = self._region(wire)
1035 region = self._region(wire)
1019
1036
1020 @region.conditional_cache_on_arguments(condition=cache_on)
1037 @region.conditional_cache_on_arguments(condition=cache_on)
1021 def _date(_repo_id, _commit_id):
1038 def _date(_repo_id, _commit_id):
1022 repo_init = self._factory.repo_libgit2(wire)
1039 repo_init = self._factory.repo_libgit2(wire)
1023 with repo_init as repo:
1040 with repo_init as repo:
1024 commit = repo[commit_id]
1041 commit = repo[commit_id]
1025
1042
1026 if hasattr(commit, 'commit_time'):
1043 if hasattr(commit, 'commit_time'):
1027 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1044 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1028 else:
1045 else:
1029 commit = commit.get_object()
1046 commit = commit.get_object()
1030 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1047 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1031
1048
1032 # TODO(marcink): check dulwich difference of offset vs timezone
1049 # TODO(marcink): check dulwich difference of offset vs timezone
1033 return [commit_time, commit_time_offset]
1050 return [commit_time, commit_time_offset]
1034 return _date(repo_id, commit_id)
1051 return _date(repo_id, commit_id)
1035
1052
1036 @reraise_safe_exceptions
1053 @reraise_safe_exceptions
1037 def author(self, wire, commit_id):
1054 def author(self, wire, commit_id):
1038 cache_on, context_uid, repo_id = self._cache_on(wire)
1055 cache_on, context_uid, repo_id = self._cache_on(wire)
1039 region = self._region(wire)
1056 region = self._region(wire)
1040
1057
1041 @region.conditional_cache_on_arguments(condition=cache_on)
1058 @region.conditional_cache_on_arguments(condition=cache_on)
1042 def _author(_repo_id, _commit_id):
1059 def _author(_repo_id, _commit_id):
1043 repo_init = self._factory.repo_libgit2(wire)
1060 repo_init = self._factory.repo_libgit2(wire)
1044 with repo_init as repo:
1061 with repo_init as repo:
1045 commit = repo[commit_id]
1062 commit = repo[commit_id]
1046
1063
1047 if hasattr(commit, 'author'):
1064 if hasattr(commit, 'author'):
1048 author = commit.author
1065 author = commit.author
1049 else:
1066 else:
1050 author = commit.get_object().author
1067 author = commit.get_object().author
1051
1068
1052 if author.email:
1069 if author.email:
1053 return f"{author.name} <{author.email}>"
1070 return f"{author.name} <{author.email}>"
1054
1071
1055 try:
1072 try:
1056 return f"{author.name}"
1073 return f"{author.name}"
1057 except Exception:
1074 except Exception:
1058 return f"{safe_str(author.raw_name)}"
1075 return f"{safe_str(author.raw_name)}"
1059
1076
1060 return _author(repo_id, commit_id)
1077 return _author(repo_id, commit_id)
1061
1078
1062 @reraise_safe_exceptions
1079 @reraise_safe_exceptions
1063 def message(self, wire, commit_id):
1080 def message(self, wire, commit_id):
1064 cache_on, context_uid, repo_id = self._cache_on(wire)
1081 cache_on, context_uid, repo_id = self._cache_on(wire)
1065 region = self._region(wire)
1082 region = self._region(wire)
1066
1083
1067 @region.conditional_cache_on_arguments(condition=cache_on)
1084 @region.conditional_cache_on_arguments(condition=cache_on)
1068 def _message(_repo_id, _commit_id):
1085 def _message(_repo_id, _commit_id):
1069 repo_init = self._factory.repo_libgit2(wire)
1086 repo_init = self._factory.repo_libgit2(wire)
1070 with repo_init as repo:
1087 with repo_init as repo:
1071 commit = repo[commit_id]
1088 commit = repo[commit_id]
1072 return commit.message
1089 return commit.message
1073 return _message(repo_id, commit_id)
1090 return _message(repo_id, commit_id)
1074
1091
1075 @reraise_safe_exceptions
1092 @reraise_safe_exceptions
1076 def parents(self, wire, commit_id):
1093 def parents(self, wire, commit_id):
1077 cache_on, context_uid, repo_id = self._cache_on(wire)
1094 cache_on, context_uid, repo_id = self._cache_on(wire)
1078 region = self._region(wire)
1095 region = self._region(wire)
1079
1096
1080 @region.conditional_cache_on_arguments(condition=cache_on)
1097 @region.conditional_cache_on_arguments(condition=cache_on)
1081 def _parents(_repo_id, _commit_id):
1098 def _parents(_repo_id, _commit_id):
1082 repo_init = self._factory.repo_libgit2(wire)
1099 repo_init = self._factory.repo_libgit2(wire)
1083 with repo_init as repo:
1100 with repo_init as repo:
1084 commit = repo[commit_id]
1101 commit = repo[commit_id]
1085 if hasattr(commit, 'parent_ids'):
1102 if hasattr(commit, 'parent_ids'):
1086 parent_ids = commit.parent_ids
1103 parent_ids = commit.parent_ids
1087 else:
1104 else:
1088 parent_ids = commit.get_object().parent_ids
1105 parent_ids = commit.get_object().parent_ids
1089
1106
1090 return [x.hex for x in parent_ids]
1107 return [x.hex for x in parent_ids]
1091 return _parents(repo_id, commit_id)
1108 return _parents(repo_id, commit_id)
1092
1109
1093 @reraise_safe_exceptions
1110 @reraise_safe_exceptions
1094 def children(self, wire, commit_id):
1111 def children(self, wire, commit_id):
1095 cache_on, context_uid, repo_id = self._cache_on(wire)
1112 cache_on, context_uid, repo_id = self._cache_on(wire)
1096 region = self._region(wire)
1113 region = self._region(wire)
1097
1114
1098 head = self.head(wire)
1115 head = self.head(wire)
1099
1116
1100 @region.conditional_cache_on_arguments(condition=cache_on)
1117 @region.conditional_cache_on_arguments(condition=cache_on)
1101 def _children(_repo_id, _commit_id):
1118 def _children(_repo_id, _commit_id):
1102
1119
1103 output, __ = self.run_git_command(
1120 output, __ = self.run_git_command(
1104 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1121 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1105
1122
1106 child_ids = []
1123 child_ids = []
1107 pat = re.compile(fr'^{commit_id}')
1124 pat = re.compile(fr'^{commit_id}')
1108 for line in output.splitlines():
1125 for line in output.splitlines():
1109 line = safe_str(line)
1126 line = safe_str(line)
1110 if pat.match(line):
1127 if pat.match(line):
1111 found_ids = line.split(' ')[1:]
1128 found_ids = line.split(' ')[1:]
1112 child_ids.extend(found_ids)
1129 child_ids.extend(found_ids)
1113 break
1130 break
1114
1131
1115 return child_ids
1132 return child_ids
1116 return _children(repo_id, commit_id)
1133 return _children(repo_id, commit_id)
1117
1134
1118 @reraise_safe_exceptions
1135 @reraise_safe_exceptions
1119 def set_refs(self, wire, key, value):
1136 def set_refs(self, wire, key, value):
1120 repo_init = self._factory.repo_libgit2(wire)
1137 repo_init = self._factory.repo_libgit2(wire)
1121 with repo_init as repo:
1138 with repo_init as repo:
1122 repo.references.create(key, value, force=True)
1139 repo.references.create(key, value, force=True)
1123
1140
1124 @reraise_safe_exceptions
1141 @reraise_safe_exceptions
1125 def update_refs(self, wire, key, value):
1142 def update_refs(self, wire, key, value):
1126 repo_init = self._factory.repo_libgit2(wire)
1143 repo_init = self._factory.repo_libgit2(wire)
1127 with repo_init as repo:
1144 with repo_init as repo:
1128 if key not in repo.references:
1145 if key not in repo.references:
1129 raise ValueError(f'Reference {key} not found in the repository')
1146 raise ValueError(f'Reference {key} not found in the repository')
1130 repo.references.create(key, value, force=True)
1147 repo.references.create(key, value, force=True)
1131
1148
1132 @reraise_safe_exceptions
1149 @reraise_safe_exceptions
1133 def create_branch(self, wire, branch_name, commit_id, force=False):
1150 def create_branch(self, wire, branch_name, commit_id, force=False):
1134 repo_init = self._factory.repo_libgit2(wire)
1151 repo_init = self._factory.repo_libgit2(wire)
1135 with repo_init as repo:
1152 with repo_init as repo:
1136 if commit_id:
1153 if commit_id:
1137 commit = repo[commit_id]
1154 commit = repo[commit_id]
1138 else:
1155 else:
1139 # if commit is not given just use the HEAD
1156 # if commit is not given just use the HEAD
1140 commit = repo.head()
1157 commit = repo.head()
1141
1158
1142 if force:
1159 if force:
1143 repo.branches.local.create(branch_name, commit, force=force)
1160 repo.branches.local.create(branch_name, commit, force=force)
1144 elif not repo.branches.get(branch_name):
1161 elif not repo.branches.get(branch_name):
1145 # create only if that branch isn't existing
1162 # create only if that branch isn't existing
1146 repo.branches.local.create(branch_name, commit, force=force)
1163 repo.branches.local.create(branch_name, commit, force=force)
1147
1164
1148 @reraise_safe_exceptions
1165 @reraise_safe_exceptions
1149 def remove_ref(self, wire, key):
1166 def remove_ref(self, wire, key):
1150 repo_init = self._factory.repo_libgit2(wire)
1167 repo_init = self._factory.repo_libgit2(wire)
1151 with repo_init as repo:
1168 with repo_init as repo:
1152 repo.references.delete(key)
1169 repo.references.delete(key)
1153
1170
1154 @reraise_safe_exceptions
1171 @reraise_safe_exceptions
1155 def tag_remove(self, wire, tag_name):
1172 def tag_remove(self, wire, tag_name):
1156 repo_init = self._factory.repo_libgit2(wire)
1173 repo_init = self._factory.repo_libgit2(wire)
1157 with repo_init as repo:
1174 with repo_init as repo:
1158 key = f'refs/tags/{tag_name}'
1175 key = f'refs/tags/{tag_name}'
1159 repo.references.delete(key)
1176 repo.references.delete(key)
1160
1177
1161 @reraise_safe_exceptions
1178 @reraise_safe_exceptions
1162 def tree_changes(self, wire, source_id, target_id):
1179 def tree_changes(self, wire, source_id, target_id):
1163 repo = self._factory.repo(wire)
1180 repo = self._factory.repo(wire)
1164 # source can be empty
1181 # source can be empty
1165 source_id = safe_bytes(source_id if source_id else b'')
1182 source_id = safe_bytes(source_id if source_id else b'')
1166 target_id = safe_bytes(target_id)
1183 target_id = safe_bytes(target_id)
1167
1184
1168 source = repo[source_id].tree if source_id else None
1185 source = repo[source_id].tree if source_id else None
1169 target = repo[target_id].tree
1186 target = repo[target_id].tree
1170 result = repo.object_store.tree_changes(source, target)
1187 result = repo.object_store.tree_changes(source, target)
1171
1188
1172 added = set()
1189 added = set()
1173 modified = set()
1190 modified = set()
1174 deleted = set()
1191 deleted = set()
1175 for (old_path, new_path), (_, _), (_, _) in list(result):
1192 for (old_path, new_path), (_, _), (_, _) in list(result):
1176 if new_path and old_path:
1193 if new_path and old_path:
1177 modified.add(new_path)
1194 modified.add(new_path)
1178 elif new_path and not old_path:
1195 elif new_path and not old_path:
1179 added.add(new_path)
1196 added.add(new_path)
1180 elif not new_path and old_path:
1197 elif not new_path and old_path:
1181 deleted.add(old_path)
1198 deleted.add(old_path)
1182
1199
1183 return list(added), list(modified), list(deleted)
1200 return list(added), list(modified), list(deleted)
1184
1201
1185 @reraise_safe_exceptions
1202 @reraise_safe_exceptions
1186 def tree_and_type_for_path(self, wire, commit_id, path):
1203 def tree_and_type_for_path(self, wire, commit_id, path):
1187
1204
1188 cache_on, context_uid, repo_id = self._cache_on(wire)
1205 cache_on, context_uid, repo_id = self._cache_on(wire)
1189 region = self._region(wire)
1206 region = self._region(wire)
1190
1207
1191 @region.conditional_cache_on_arguments(condition=cache_on)
1208 @region.conditional_cache_on_arguments(condition=cache_on)
1192 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1209 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1193 repo_init = self._factory.repo_libgit2(wire)
1210 repo_init = self._factory.repo_libgit2(wire)
1194
1211
1195 with repo_init as repo:
1212 with repo_init as repo:
1196 commit = repo[commit_id]
1213 commit = repo[commit_id]
1197 try:
1214 try:
1198 tree = commit.tree[path]
1215 tree = commit.tree[path]
1199 except KeyError:
1216 except KeyError:
1200 return None, None, None
1217 return None, None, None
1201
1218
1202 return tree.id.hex, tree.type_str, tree.filemode
1219 return tree.id.hex, tree.type_str, tree.filemode
1203 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1220 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1204
1221
1205 @reraise_safe_exceptions
1222 @reraise_safe_exceptions
1206 def tree_items(self, wire, tree_id):
1223 def tree_items(self, wire, tree_id):
1207 cache_on, context_uid, repo_id = self._cache_on(wire)
1224 cache_on, context_uid, repo_id = self._cache_on(wire)
1208 region = self._region(wire)
1225 region = self._region(wire)
1209
1226
1210 @region.conditional_cache_on_arguments(condition=cache_on)
1227 @region.conditional_cache_on_arguments(condition=cache_on)
1211 def _tree_items(_repo_id, _tree_id):
1228 def _tree_items(_repo_id, _tree_id):
1212
1229
1213 repo_init = self._factory.repo_libgit2(wire)
1230 repo_init = self._factory.repo_libgit2(wire)
1214 with repo_init as repo:
1231 with repo_init as repo:
1215 try:
1232 try:
1216 tree = repo[tree_id]
1233 tree = repo[tree_id]
1217 except KeyError:
1234 except KeyError:
1218 raise ObjectMissing(f'No tree with id: {tree_id}')
1235 raise ObjectMissing(f'No tree with id: {tree_id}')
1219
1236
1220 result = []
1237 result = []
1221 for item in tree:
1238 for item in tree:
1222 item_sha = item.hex
1239 item_sha = item.hex
1223 item_mode = item.filemode
1240 item_mode = item.filemode
1224 item_type = item.type_str
1241 item_type = item.type_str
1225
1242
1226 if item_type == 'commit':
1243 if item_type == 'commit':
1227 # NOTE(marcink): submodules we translate to 'link' for backward compat
1244 # NOTE(marcink): submodules we translate to 'link' for backward compat
1228 item_type = 'link'
1245 item_type = 'link'
1229
1246
1230 result.append((item.name, item_mode, item_sha, item_type))
1247 result.append((item.name, item_mode, item_sha, item_type))
1231 return result
1248 return result
1232 return _tree_items(repo_id, tree_id)
1249 return _tree_items(repo_id, tree_id)
1233
1250
1234 @reraise_safe_exceptions
1251 @reraise_safe_exceptions
1235 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1252 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1236 """
1253 """
1237 Old version that uses subprocess to call diff
1254 Old version that uses subprocess to call diff
1238 """
1255 """
1239
1256
1240 flags = [
1257 flags = [
1241 f'-U{context}', '--patch',
1258 f'-U{context}', '--patch',
1242 '--binary',
1259 '--binary',
1243 '--find-renames',
1260 '--find-renames',
1244 '--no-indent-heuristic',
1261 '--no-indent-heuristic',
1245 # '--indent-heuristic',
1262 # '--indent-heuristic',
1246 #'--full-index',
1263 #'--full-index',
1247 #'--abbrev=40'
1264 #'--abbrev=40'
1248 ]
1265 ]
1249
1266
1250 if opt_ignorews:
1267 if opt_ignorews:
1251 flags.append('--ignore-all-space')
1268 flags.append('--ignore-all-space')
1252
1269
1253 if commit_id_1 == self.EMPTY_COMMIT:
1270 if commit_id_1 == self.EMPTY_COMMIT:
1254 cmd = ['show'] + flags + [commit_id_2]
1271 cmd = ['show'] + flags + [commit_id_2]
1255 else:
1272 else:
1256 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1273 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1257
1274
1258 if file_filter:
1275 if file_filter:
1259 cmd.extend(['--', file_filter])
1276 cmd.extend(['--', file_filter])
1260
1277
1261 diff, __ = self.run_git_command(wire, cmd)
1278 diff, __ = self.run_git_command(wire, cmd)
1262 # If we used 'show' command, strip first few lines (until actual diff
1279 # If we used 'show' command, strip first few lines (until actual diff
1263 # starts)
1280 # starts)
1264 if commit_id_1 == self.EMPTY_COMMIT:
1281 if commit_id_1 == self.EMPTY_COMMIT:
1265 lines = diff.splitlines()
1282 lines = diff.splitlines()
1266 x = 0
1283 x = 0
1267 for line in lines:
1284 for line in lines:
1268 if line.startswith(b'diff'):
1285 if line.startswith(b'diff'):
1269 break
1286 break
1270 x += 1
1287 x += 1
1271 # Append new line just like 'diff' command do
1288 # Append new line just like 'diff' command do
1272 diff = '\n'.join(lines[x:]) + '\n'
1289 diff = '\n'.join(lines[x:]) + '\n'
1273 return diff
1290 return diff
1274
1291
1275 @reraise_safe_exceptions
1292 @reraise_safe_exceptions
1276 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1293 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1277 repo_init = self._factory.repo_libgit2(wire)
1294 repo_init = self._factory.repo_libgit2(wire)
1278
1295
1279 with repo_init as repo:
1296 with repo_init as repo:
1280 swap = True
1297 swap = True
1281 flags = 0
1298 flags = 0
1282 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1299 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1283
1300
1284 if opt_ignorews:
1301 if opt_ignorews:
1285 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1302 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1286
1303
1287 if commit_id_1 == self.EMPTY_COMMIT:
1304 if commit_id_1 == self.EMPTY_COMMIT:
1288 comm1 = repo[commit_id_2]
1305 comm1 = repo[commit_id_2]
1289 diff_obj = comm1.tree.diff_to_tree(
1306 diff_obj = comm1.tree.diff_to_tree(
1290 flags=flags, context_lines=context, swap=swap)
1307 flags=flags, context_lines=context, swap=swap)
1291
1308
1292 else:
1309 else:
1293 comm1 = repo[commit_id_2]
1310 comm1 = repo[commit_id_2]
1294 comm2 = repo[commit_id_1]
1311 comm2 = repo[commit_id_1]
1295 diff_obj = comm1.tree.diff_to_tree(
1312 diff_obj = comm1.tree.diff_to_tree(
1296 comm2.tree, flags=flags, context_lines=context, swap=swap)
1313 comm2.tree, flags=flags, context_lines=context, swap=swap)
1297 similar_flags = 0
1314 similar_flags = 0
1298 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1315 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1299 diff_obj.find_similar(flags=similar_flags)
1316 diff_obj.find_similar(flags=similar_flags)
1300
1317
1301 if file_filter:
1318 if file_filter:
1302 for p in diff_obj:
1319 for p in diff_obj:
1303 if p.delta.old_file.path == file_filter:
1320 if p.delta.old_file.path == file_filter:
1304 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1321 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1305 # fo matching path == no diff
1322 # fo matching path == no diff
1306 return BytesEnvelope(b'')
1323 return BytesEnvelope(b'')
1307
1324
1308 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1325 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1309
1326
1310 @reraise_safe_exceptions
1327 @reraise_safe_exceptions
1311 def node_history(self, wire, commit_id, path, limit):
1328 def node_history(self, wire, commit_id, path, limit):
1312 cache_on, context_uid, repo_id = self._cache_on(wire)
1329 cache_on, context_uid, repo_id = self._cache_on(wire)
1313 region = self._region(wire)
1330 region = self._region(wire)
1314
1331
1315 @region.conditional_cache_on_arguments(condition=cache_on)
1332 @region.conditional_cache_on_arguments(condition=cache_on)
1316 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1333 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1317 # optimize for n==1, rev-list is much faster for that use-case
1334 # optimize for n==1, rev-list is much faster for that use-case
1318 if limit == 1:
1335 if limit == 1:
1319 cmd = ['rev-list', '-1', commit_id, '--', path]
1336 cmd = ['rev-list', '-1', commit_id, '--', path]
1320 else:
1337 else:
1321 cmd = ['log']
1338 cmd = ['log']
1322 if limit:
1339 if limit:
1323 cmd.extend(['-n', str(safe_int(limit, 0))])
1340 cmd.extend(['-n', str(safe_int(limit, 0))])
1324 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1341 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1325
1342
1326 output, __ = self.run_git_command(wire, cmd)
1343 output, __ = self.run_git_command(wire, cmd)
1327 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1344 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1328
1345
1329 return [x for x in commit_ids]
1346 return [x for x in commit_ids]
1330 return _node_history(context_uid, repo_id, commit_id, path, limit)
1347 return _node_history(context_uid, repo_id, commit_id, path, limit)
1331
1348
1332 @reraise_safe_exceptions
1349 @reraise_safe_exceptions
1333 def node_annotate_legacy(self, wire, commit_id, path):
1350 def node_annotate_legacy(self, wire, commit_id, path):
1334 # note: replaced by pygit2 implementation
1351 # note: replaced by pygit2 implementation
1335 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1352 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1336 # -l ==> outputs long shas (and we need all 40 characters)
1353 # -l ==> outputs long shas (and we need all 40 characters)
1337 # --root ==> doesn't put '^' character for boundaries
1354 # --root ==> doesn't put '^' character for boundaries
1338 # -r commit_id ==> blames for the given commit
1355 # -r commit_id ==> blames for the given commit
1339 output, __ = self.run_git_command(wire, cmd)
1356 output, __ = self.run_git_command(wire, cmd)
1340
1357
1341 result = []
1358 result = []
1342 for i, blame_line in enumerate(output.splitlines()[:-1]):
1359 for i, blame_line in enumerate(output.splitlines()[:-1]):
1343 line_no = i + 1
1360 line_no = i + 1
1344 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1361 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1345 result.append((line_no, blame_commit_id, line))
1362 result.append((line_no, blame_commit_id, line))
1346
1363
1347 return result
1364 return result
1348
1365
1349 @reraise_safe_exceptions
1366 @reraise_safe_exceptions
1350 def node_annotate(self, wire, commit_id, path):
1367 def node_annotate(self, wire, commit_id, path):
1351
1368
1352 result_libgit = []
1369 result_libgit = []
1353 repo_init = self._factory.repo_libgit2(wire)
1370 repo_init = self._factory.repo_libgit2(wire)
1354 with repo_init as repo:
1371 with repo_init as repo:
1355 commit = repo[commit_id]
1372 commit = repo[commit_id]
1356 blame_obj = repo.blame(path, newest_commit=commit_id)
1373 blame_obj = repo.blame(path, newest_commit=commit_id)
1357 file_content = commit.tree[path].data
1374 file_content = commit.tree[path].data
1358 for i, line in enumerate(splitnewlines(file_content)):
1375 for i, line in enumerate(splitnewlines(file_content)):
1359 line_no = i + 1
1376 line_no = i + 1
1360 hunk = blame_obj.for_line(line_no)
1377 hunk = blame_obj.for_line(line_no)
1361 blame_commit_id = hunk.final_commit_id.hex
1378 blame_commit_id = hunk.final_commit_id.hex
1362
1379
1363 result_libgit.append((line_no, blame_commit_id, line))
1380 result_libgit.append((line_no, blame_commit_id, line))
1364
1381
1365 return BinaryEnvelope(result_libgit)
1382 return BinaryEnvelope(result_libgit)
1366
1383
1367 @reraise_safe_exceptions
1384 @reraise_safe_exceptions
1368 def update_server_info(self, wire, force=False):
1385 def update_server_info(self, wire, force=False):
1369 cmd = ['update-server-info']
1386 cmd = ['update-server-info']
1370 if force:
1387 if force:
1371 cmd += ['--force']
1388 cmd += ['--force']
1372 output, __ = self.run_git_command(wire, cmd)
1389 output, __ = self.run_git_command(wire, cmd)
1373 return output.splitlines()
1390 return output.splitlines()
1374
1391
1375 @reraise_safe_exceptions
1392 @reraise_safe_exceptions
1376 def get_all_commit_ids(self, wire):
1393 def get_all_commit_ids(self, wire):
1377
1394
1378 cache_on, context_uid, repo_id = self._cache_on(wire)
1395 cache_on, context_uid, repo_id = self._cache_on(wire)
1379 region = self._region(wire)
1396 region = self._region(wire)
1380
1397
1381 @region.conditional_cache_on_arguments(condition=cache_on)
1398 @region.conditional_cache_on_arguments(condition=cache_on)
1382 def _get_all_commit_ids(_context_uid, _repo_id):
1399 def _get_all_commit_ids(_context_uid, _repo_id):
1383
1400
1384 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1401 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1385 try:
1402 try:
1386 output, __ = self.run_git_command(wire, cmd)
1403 output, __ = self.run_git_command(wire, cmd)
1387 return output.splitlines()
1404 return output.splitlines()
1388 except Exception:
1405 except Exception:
1389 # Can be raised for empty repositories
1406 # Can be raised for empty repositories
1390 return []
1407 return []
1391
1408
1392 @region.conditional_cache_on_arguments(condition=cache_on)
1409 @region.conditional_cache_on_arguments(condition=cache_on)
1393 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1410 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1394 repo_init = self._factory.repo_libgit2(wire)
1411 repo_init = self._factory.repo_libgit2(wire)
1395 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1412 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1396 results = []
1413 results = []
1397 with repo_init as repo:
1414 with repo_init as repo:
1398 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1415 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1399 results.append(commit.id.hex)
1416 results.append(commit.id.hex)
1400
1417
1401 return _get_all_commit_ids(context_uid, repo_id)
1418 return _get_all_commit_ids(context_uid, repo_id)
1402
1419
1403 @reraise_safe_exceptions
1420 @reraise_safe_exceptions
1404 def run_git_command(self, wire, cmd, **opts):
1421 def run_git_command(self, wire, cmd, **opts):
1405 path = wire.get('path', None)
1422 path = wire.get('path', None)
1406 debug_mode = vcsserver.ConfigGet().get_bool('debug')
1423 debug_mode = vcsserver.ConfigGet().get_bool('debug')
1407
1424
1408 if path and os.path.isdir(path):
1425 if path and os.path.isdir(path):
1409 opts['cwd'] = path
1426 opts['cwd'] = path
1410
1427
1411 if '_bare' in opts:
1428 if '_bare' in opts:
1412 _copts = []
1429 _copts = []
1413 del opts['_bare']
1430 del opts['_bare']
1414 else:
1431 else:
1415 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1432 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1416 safe_call = False
1433 safe_call = False
1417 if '_safe' in opts:
1434 if '_safe' in opts:
1418 # no exc on failure
1435 # no exc on failure
1419 del opts['_safe']
1436 del opts['_safe']
1420 safe_call = True
1437 safe_call = True
1421
1438
1422 if '_copts' in opts:
1439 if '_copts' in opts:
1423 _copts.extend(opts['_copts'] or [])
1440 _copts.extend(opts['_copts'] or [])
1424 del opts['_copts']
1441 del opts['_copts']
1425
1442
1426 gitenv = os.environ.copy()
1443 gitenv = os.environ.copy()
1427 gitenv.update(opts.pop('extra_env', {}))
1444 gitenv.update(opts.pop('extra_env', {}))
1428 # need to clean fix GIT_DIR !
1445 # need to clean fix GIT_DIR !
1429 if 'GIT_DIR' in gitenv:
1446 if 'GIT_DIR' in gitenv:
1430 del gitenv['GIT_DIR']
1447 del gitenv['GIT_DIR']
1431 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1448 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1432 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1449 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1433
1450
1434 cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
1451 cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
1435 _opts = {'env': gitenv, 'shell': False}
1452 _opts = {'env': gitenv, 'shell': False}
1436
1453
1437 proc = None
1454 proc = None
1438 try:
1455 try:
1439 _opts.update(opts)
1456 _opts.update(opts)
1440 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1457 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1441
1458
1442 return b''.join(proc), b''.join(proc.stderr)
1459 return b''.join(proc), b''.join(proc.stderr)
1443 except OSError as err:
1460 except OSError as err:
1444 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1461 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1445 call_opts = {}
1462 call_opts = {}
1446 if debug_mode:
1463 if debug_mode:
1447 call_opts = _opts
1464 call_opts = _opts
1448
1465
1449 tb_err = ("Couldn't run git command ({}).\n"
1466 tb_err = ("Couldn't run git command ({}).\n"
1450 "Original error was:{}\n"
1467 "Original error was:{}\n"
1451 "Call options:{}\n"
1468 "Call options:{}\n"
1452 .format(cmd, err, call_opts))
1469 .format(cmd, err, call_opts))
1453 log.exception(tb_err)
1470 log.exception(tb_err)
1454 if safe_call:
1471 if safe_call:
1455 return '', err
1472 return '', err
1456 else:
1473 else:
1457 raise exceptions.VcsException()(tb_err)
1474 raise exceptions.VcsException()(tb_err)
1458 finally:
1475 finally:
1459 if proc:
1476 if proc:
1460 proc.close()
1477 proc.close()
1461
1478
1462 @reraise_safe_exceptions
1479 @reraise_safe_exceptions
1463 def install_hooks(self, wire, force=False):
1480 def install_hooks(self, wire, force=False):
1464 from vcsserver.hook_utils import install_git_hooks
1481 from vcsserver.hook_utils import install_git_hooks
1465 bare = self.bare(wire)
1482 bare = self.bare(wire)
1466 path = wire['path']
1483 path = wire['path']
1467 binary_dir = settings.BINARY_DIR
1484 binary_dir = settings.BINARY_DIR
1468 if binary_dir:
1485 if binary_dir:
1469 os.path.join(binary_dir, 'python3')
1486 os.path.join(binary_dir, 'python3')
1470 return install_git_hooks(path, bare, force_create=force)
1487 return install_git_hooks(path, bare, force_create=force)
1471
1488
1472 @reraise_safe_exceptions
1489 @reraise_safe_exceptions
1473 def get_hooks_info(self, wire):
1490 def get_hooks_info(self, wire):
1474 from vcsserver.hook_utils import (
1491 from vcsserver.hook_utils import (
1475 get_git_pre_hook_version, get_git_post_hook_version)
1492 get_git_pre_hook_version, get_git_post_hook_version)
1476 bare = self.bare(wire)
1493 bare = self.bare(wire)
1477 path = wire['path']
1494 path = wire['path']
1478 return {
1495 return {
1479 'pre_version': get_git_pre_hook_version(path, bare),
1496 'pre_version': get_git_pre_hook_version(path, bare),
1480 'post_version': get_git_post_hook_version(path, bare),
1497 'post_version': get_git_post_hook_version(path, bare),
1481 }
1498 }
1482
1499
1483 @reraise_safe_exceptions
1500 @reraise_safe_exceptions
1484 def set_head_ref(self, wire, head_name):
1501 def set_head_ref(self, wire, head_name):
1485 log.debug('Setting refs/head to `%s`', head_name)
1502 log.debug('Setting refs/head to `%s`', head_name)
1486 repo_init = self._factory.repo_libgit2(wire)
1503 repo_init = self._factory.repo_libgit2(wire)
1487 with repo_init as repo:
1504 with repo_init as repo:
1488 repo.set_head(f'refs/heads/{head_name}')
1505 repo.set_head(f'refs/heads/{head_name}')
1489
1506
1490 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1507 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1491
1508
1492 @reraise_safe_exceptions
1509 @reraise_safe_exceptions
1493 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1510 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1494 archive_dir_name, commit_id, cache_config):
1511 archive_dir_name, commit_id, cache_config):
1495
1512
1496 def file_walker(_commit_id, path):
1513 def file_walker(_commit_id, path):
1497 repo_init = self._factory.repo_libgit2(wire)
1514 repo_init = self._factory.repo_libgit2(wire)
1498
1515
1499 with repo_init as repo:
1516 with repo_init as repo:
1500 commit = repo[commit_id]
1517 commit = repo[commit_id]
1501
1518
1502 if path in ['', '/']:
1519 if path in ['', '/']:
1503 tree = commit.tree
1520 tree = commit.tree
1504 else:
1521 else:
1505 tree = commit.tree[path.rstrip('/')]
1522 tree = commit.tree[path.rstrip('/')]
1506 tree_id = tree.id.hex
1523 tree_id = tree.id.hex
1507 try:
1524 try:
1508 tree = repo[tree_id]
1525 tree = repo[tree_id]
1509 except KeyError:
1526 except KeyError:
1510 raise ObjectMissing(f'No tree with id: {tree_id}')
1527 raise ObjectMissing(f'No tree with id: {tree_id}')
1511
1528
1512 index = LibGit2Index.Index()
1529 index = LibGit2Index.Index()
1513 index.read_tree(tree)
1530 index.read_tree(tree)
1514 file_iter = index
1531 file_iter = index
1515
1532
1516 for file_node in file_iter:
1533 for file_node in file_iter:
1517 file_path = file_node.path
1534 file_path = file_node.path
1518 mode = file_node.mode
1535 mode = file_node.mode
1519 is_link = stat.S_ISLNK(mode)
1536 is_link = stat.S_ISLNK(mode)
1520 if mode == pygit2.GIT_FILEMODE_COMMIT:
1537 if mode == pygit2.GIT_FILEMODE_COMMIT:
1521 log.debug('Skipping path %s as a commit node', file_path)
1538 log.debug('Skipping path %s as a commit node', file_path)
1522 continue
1539 continue
1523 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1540 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1524
1541
1525 return store_archive_in_cache(
1542 return store_archive_in_cache(
1526 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1543 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now