##// END OF EJS Templates
fix(git): fixed case when we want a set explicit ref instead of last one that is not stable
super-admin -
r1173:76143c56 default
parent child Browse files
Show More
@@ -1,1466 +1,1485 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 ssl_cert_dir = config.get('vcs_ssl_dir')
181 181 if ssl_cert_dir:
182 182 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
183 183 return params
184 184
185 185 @reraise_safe_exceptions
186 186 def discover_git_version(self):
187 187 stdout, _ = self.run_git_command(
188 188 {}, ['--version'], _bare=True, _safe=True)
189 189 prefix = b'git version'
190 190 if stdout.startswith(prefix):
191 191 stdout = stdout[len(prefix):]
192 192 return safe_str(stdout.strip())
193 193
194 194 @reraise_safe_exceptions
195 195 def is_empty(self, wire):
196 196 repo_init = self._factory.repo_libgit2(wire)
197 197 with repo_init as repo:
198 198
199 199 try:
200 200 has_head = repo.head.name
201 201 if has_head:
202 202 return False
203 203
204 204 # NOTE(marcink): check again using more expensive method
205 205 return repo.is_empty
206 206 except Exception:
207 207 pass
208 208
209 209 return True
210 210
211 211 @reraise_safe_exceptions
212 212 def assert_correct_path(self, wire):
213 213 cache_on, context_uid, repo_id = self._cache_on(wire)
214 214 region = self._region(wire)
215 215
216 216 @region.conditional_cache_on_arguments(condition=cache_on)
217 217 def _assert_correct_path(_context_uid, _repo_id, fast_check):
218 218 if fast_check:
219 219 path = safe_str(wire['path'])
220 220 if pygit2.discover_repository(path):
221 221 return True
222 222 return False
223 223 else:
224 224 try:
225 225 repo_init = self._factory.repo_libgit2(wire)
226 226 with repo_init:
227 227 pass
228 228 except pygit2.GitError:
229 229 path = wire.get('path')
230 230 tb = traceback.format_exc()
231 231 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
232 232 return False
233 233 return True
234 234
235 235 return _assert_correct_path(context_uid, repo_id, True)
236 236
237 237 @reraise_safe_exceptions
238 238 def bare(self, wire):
239 239 repo_init = self._factory.repo_libgit2(wire)
240 240 with repo_init as repo:
241 241 return repo.is_bare
242 242
243 243 @reraise_safe_exceptions
244 244 def get_node_data(self, wire, commit_id, path):
245 245 repo_init = self._factory.repo_libgit2(wire)
246 246 with repo_init as repo:
247 247 commit = repo[commit_id]
248 248 blob_obj = commit.tree[path]
249 249
250 250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 251 raise exceptions.LookupException()(
252 252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253 253
254 254 return BytesEnvelope(blob_obj.data)
255 255
256 256 @reraise_safe_exceptions
257 257 def get_node_size(self, wire, commit_id, path):
258 258 repo_init = self._factory.repo_libgit2(wire)
259 259 with repo_init as repo:
260 260 commit = repo[commit_id]
261 261 blob_obj = commit.tree[path]
262 262
263 263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 264 raise exceptions.LookupException()(
265 265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266 266
267 267 return blob_obj.size
268 268
269 269 @reraise_safe_exceptions
270 270 def get_node_flags(self, wire, commit_id, path):
271 271 repo_init = self._factory.repo_libgit2(wire)
272 272 with repo_init as repo:
273 273 commit = repo[commit_id]
274 274 blob_obj = commit.tree[path]
275 275
276 276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 277 raise exceptions.LookupException()(
278 278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279 279
280 280 return blob_obj.filemode
281 281
282 282 @reraise_safe_exceptions
283 283 def get_node_is_binary(self, wire, commit_id, path):
284 284 repo_init = self._factory.repo_libgit2(wire)
285 285 with repo_init as repo:
286 286 commit = repo[commit_id]
287 287 blob_obj = commit.tree[path]
288 288
289 289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 290 raise exceptions.LookupException()(
291 291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292 292
293 293 return blob_obj.is_binary
294 294
295 295 @reraise_safe_exceptions
296 296 def blob_as_pretty_string(self, wire, sha):
297 297 repo_init = self._factory.repo_libgit2(wire)
298 298 with repo_init as repo:
299 299 blob_obj = repo[sha]
300 300 return BytesEnvelope(blob_obj.data)
301 301
302 302 @reraise_safe_exceptions
303 303 def blob_raw_length(self, wire, sha):
304 304 cache_on, context_uid, repo_id = self._cache_on(wire)
305 305 region = self._region(wire)
306 306
307 307 @region.conditional_cache_on_arguments(condition=cache_on)
308 308 def _blob_raw_length(_repo_id, _sha):
309 309
310 310 repo_init = self._factory.repo_libgit2(wire)
311 311 with repo_init as repo:
312 312 blob = repo[sha]
313 313 return blob.size
314 314
315 315 return _blob_raw_length(repo_id, sha)
316 316
317 317 def _parse_lfs_pointer(self, raw_content):
318 318 spec_string = b'version https://git-lfs.github.com/spec'
319 319 if raw_content and raw_content.startswith(spec_string):
320 320
321 321 pattern = re.compile(rb"""
322 322 (?:\n)?
323 323 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
324 324 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
325 325 ^size[ ](?P<oid_size>[0-9]+)\n
326 326 (?:\n)?
327 327 """, re.VERBOSE | re.MULTILINE)
328 328 match = pattern.match(raw_content)
329 329 if match:
330 330 return match.groupdict()
331 331
332 332 return {}
333 333
334 334 @reraise_safe_exceptions
335 335 def is_large_file(self, wire, commit_id):
336 336 cache_on, context_uid, repo_id = self._cache_on(wire)
337 337 region = self._region(wire)
338 338
339 339 @region.conditional_cache_on_arguments(condition=cache_on)
340 340 def _is_large_file(_repo_id, _sha):
341 341 repo_init = self._factory.repo_libgit2(wire)
342 342 with repo_init as repo:
343 343 blob = repo[commit_id]
344 344 if blob.is_binary:
345 345 return {}
346 346
347 347 return self._parse_lfs_pointer(blob.data)
348 348
349 349 return _is_large_file(repo_id, commit_id)
350 350
351 351 @reraise_safe_exceptions
352 352 def is_binary(self, wire, tree_id):
353 353 cache_on, context_uid, repo_id = self._cache_on(wire)
354 354 region = self._region(wire)
355 355
356 356 @region.conditional_cache_on_arguments(condition=cache_on)
357 357 def _is_binary(_repo_id, _tree_id):
358 358 repo_init = self._factory.repo_libgit2(wire)
359 359 with repo_init as repo:
360 360 blob_obj = repo[tree_id]
361 361 return blob_obj.is_binary
362 362
363 363 return _is_binary(repo_id, tree_id)
364 364
365 365 @reraise_safe_exceptions
366 366 def md5_hash(self, wire, commit_id, path):
367 367 cache_on, context_uid, repo_id = self._cache_on(wire)
368 368 region = self._region(wire)
369 369
370 370 @region.conditional_cache_on_arguments(condition=cache_on)
371 371 def _md5_hash(_repo_id, _commit_id, _path):
372 372 repo_init = self._factory.repo_libgit2(wire)
373 373 with repo_init as repo:
374 374 commit = repo[_commit_id]
375 375 blob_obj = commit.tree[_path]
376 376
377 377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 378 raise exceptions.LookupException()(
379 379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380 380
381 381 return ''
382 382
383 383 return _md5_hash(repo_id, commit_id, path)
384 384
385 385 @reraise_safe_exceptions
386 386 def in_largefiles_store(self, wire, oid):
387 387 conf = self._wire_to_config(wire)
388 388 repo_init = self._factory.repo_libgit2(wire)
389 389 with repo_init as repo:
390 390 repo_name = repo.path
391 391
392 392 store_location = conf.get('vcs_git_lfs_store_location')
393 393 if store_location:
394 394
395 395 store = LFSOidStore(
396 396 oid=oid, repo=repo_name, store_location=store_location)
397 397 return store.has_oid()
398 398
399 399 return False
400 400
401 401 @reraise_safe_exceptions
402 402 def store_path(self, wire, oid):
403 403 conf = self._wire_to_config(wire)
404 404 repo_init = self._factory.repo_libgit2(wire)
405 405 with repo_init as repo:
406 406 repo_name = repo.path
407 407
408 408 store_location = conf.get('vcs_git_lfs_store_location')
409 409 if store_location:
410 410 store = LFSOidStore(
411 411 oid=oid, repo=repo_name, store_location=store_location)
412 412 return store.oid_path
413 413 raise ValueError(f'Unable to fetch oid with path {oid}')
414 414
415 415 @reraise_safe_exceptions
416 416 def bulk_request(self, wire, rev, pre_load):
417 417 cache_on, context_uid, repo_id = self._cache_on(wire)
418 418 region = self._region(wire)
419 419
420 420 @region.conditional_cache_on_arguments(condition=cache_on)
421 421 def _bulk_request(_repo_id, _rev, _pre_load):
422 422 result = {}
423 423 for attr in pre_load:
424 424 try:
425 425 method = self._bulk_methods[attr]
426 426 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
427 427 args = [wire, rev]
428 428 result[attr] = method(*args)
429 429 except KeyError as e:
430 430 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
431 431 return result
432 432
433 433 return _bulk_request(repo_id, rev, sorted(pre_load))
434 434
435 435 @reraise_safe_exceptions
436 436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 438 region = self._region(wire)
439 439
440 440 @region.conditional_cache_on_arguments(condition=cache_on)
441 441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 442 result = {}
443 443 for attr in pre_load:
444 444 try:
445 445 method = self._bulk_file_methods[attr]
446 446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 447 result[attr] = method(wire, _commit_id, _path)
448 448 except KeyError as e:
449 449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 450 return result
451 451
452 452 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
453 453
454 454 def _build_opener(self, url: str):
455 455 handlers = []
456 456 url_obj = url_parser(safe_bytes(url))
457 457 authinfo = url_obj.authinfo()[1]
458 458
459 459 if authinfo:
460 460 # create a password manager
461 461 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
462 462 passmgr.add_password(*authinfo)
463 463
464 464 handlers.extend((httpbasicauthhandler(passmgr),
465 465 httpdigestauthhandler(passmgr)))
466 466
467 467 return urllib.request.build_opener(*handlers)
468 468
469 469 @reraise_safe_exceptions
470 470 def check_url(self, url, config):
471 471 url_obj = url_parser(safe_bytes(url))
472 472
473 473 test_uri = safe_str(url_obj.authinfo()[0])
474 474 obfuscated_uri = get_obfuscated_url(url_obj)
475 475
476 476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
477 477
478 478 if not test_uri.endswith('info/refs'):
479 479 test_uri = test_uri.rstrip('/') + '/info/refs'
480 480
481 481 o = self._build_opener(test_uri)
482 482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
483 483
484 484 q = {"service": 'git-upload-pack'}
485 485 qs = f'?{urllib.parse.urlencode(q)}'
486 486 cu = f"{test_uri}{qs}"
487 487
488 488 try:
489 489 req = urllib.request.Request(cu, None, {})
490 490 log.debug("Trying to open URL %s", obfuscated_uri)
491 491 resp = o.open(req)
492 492 if resp.code != 200:
493 493 raise exceptions.URLError()('Return Code is not 200')
494 494 except Exception as e:
495 495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
496 496 # means it cannot be cloned
497 497 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
498 498
499 499 # now detect if it's proper git repo
500 500 gitdata: bytes = resp.read()
501 501
502 502 if b'service=git-upload-pack' in gitdata:
503 503 pass
504 504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
505 505 # old style git can return some other format!
506 506 pass
507 507 else:
508 508 e = None
509 509 raise exceptions.URLError(e)(
510 510 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
511 511
512 512 return True
513 513
514 514 @reraise_safe_exceptions
515 515 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
516 516 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
517 517 remote_refs = self.pull(wire, url, apply_refs=False)
518 518 repo = self._factory.repo(wire)
519 519 if isinstance(valid_refs, list):
520 520 valid_refs = tuple(valid_refs)
521 521
522 522 for k in remote_refs:
523 523 # only parse heads/tags and skip so called deferred tags
524 524 if k.startswith(valid_refs) and not k.endswith(deferred):
525 525 repo[k] = remote_refs[k]
526 526
527 527 if update_after_clone:
528 528 # we want to checkout HEAD
529 529 repo["HEAD"] = remote_refs["HEAD"]
530 530 index.build_index_from_tree(repo.path, repo.index_path(),
531 531 repo.object_store, repo["HEAD"].tree)
532 532
533 533 @reraise_safe_exceptions
534 534 def branch(self, wire, commit_id):
535 535 cache_on, context_uid, repo_id = self._cache_on(wire)
536 536 region = self._region(wire)
537 537
538 538 @region.conditional_cache_on_arguments(condition=cache_on)
539 539 def _branch(_context_uid, _repo_id, _commit_id):
540 540 regex = re.compile('^refs/heads')
541 541
542 542 def filter_with(ref):
543 543 return regex.match(ref[0]) and ref[1] == _commit_id
544 544
545 545 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
546 546 return [x[0].split('refs/heads/')[-1] for x in branches]
547 547
548 548 return _branch(context_uid, repo_id, commit_id)
549 549
550 550 @reraise_safe_exceptions
551 551 def commit_branches(self, wire, commit_id):
552 552 cache_on, context_uid, repo_id = self._cache_on(wire)
553 553 region = self._region(wire)
554 554
555 555 @region.conditional_cache_on_arguments(condition=cache_on)
556 556 def _commit_branches(_context_uid, _repo_id, _commit_id):
557 557 repo_init = self._factory.repo_libgit2(wire)
558 558 with repo_init as repo:
559 559 branches = [x for x in repo.branches.with_commit(_commit_id)]
560 560 return branches
561 561
562 562 return _commit_branches(context_uid, repo_id, commit_id)
563 563
564 564 @reraise_safe_exceptions
565 565 def add_object(self, wire, content):
566 566 repo_init = self._factory.repo_libgit2(wire)
567 567 with repo_init as repo:
568 568 blob = objects.Blob()
569 569 blob.set_raw_string(content)
570 570 repo.object_store.add_object(blob)
571 571 return blob.id
572 572
573 573 @reraise_safe_exceptions
574 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
574 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
575 date_args: list[int, int] = None,
576 parents: list | None = None):
577
575 578 repo_init = self._factory.repo_libgit2(wire)
576 579 with repo_init as repo:
577 580
578 581 if date_args:
579 582 current_time, offset = date_args
580 583
581 584 kw = {
582 585 'time': current_time,
583 586 'offset': offset
584 587 }
585 588 author = create_signature_from_string(author, **kw)
586 589 committer = create_signature_from_string(committer, **kw)
587 590
588 591 tree = new_tree_id
589 592 if isinstance(tree, (bytes, str)):
590 593 # validate this tree is in the repo...
591 594 tree = repo[safe_str(tree)].id
592 595
593 parents = []
594 # ensure we COMMIT on top of given branch head
595 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
596 if branch in repo.branches.local:
597 parents += [repo.branches[branch].target]
598 elif [x for x in repo.branches.local]:
599 parents += [repo.head.target]
600 #else:
601 # in case we want to commit on new branch we create it on top of HEAD
602 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
596 if parents:
597 # run via sha's and validate them in repo
598 parents = [repo[c].id for c in parents]
599 else:
600 parents = []
601 # ensure we COMMIT on top of given branch head
602 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
603 if branch in repo.branches.local:
604 parents += [repo.branches[branch].target]
605 elif [x for x in repo.branches.local]:
606 parents += [repo.head.target]
607 #else:
608 # in case we want to commit on new branch we create it on top of HEAD
609 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
603 610
604 611 # # Create a new commit
605 612 commit_oid = repo.create_commit(
606 613 f'refs/heads/{branch}', # the name of the reference to update
607 614 author, # the author of the commit
608 615 committer, # the committer of the commit
609 616 message, # the commit message
610 617 tree, # the tree produced by the index
611 618 parents # list of parents for the new commit, usually just one,
612 619 )
613 620
614 621 new_commit_id = safe_str(commit_oid)
615 622
616 623 return new_commit_id
617 624
618 625 @reraise_safe_exceptions
619 626 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
620 627
621 628 def mode2pygit(mode):
622 629 """
623 630 git only supports two filemode 644 and 755
624 631
625 632 0o100755 -> 33261
626 633 0o100644 -> 33188
627 634 """
628 635 return {
629 636 0o100644: pygit2.GIT_FILEMODE_BLOB,
630 637 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
631 638 0o120000: pygit2.GIT_FILEMODE_LINK
632 639 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
633 640
634 641 repo_init = self._factory.repo_libgit2(wire)
635 642 with repo_init as repo:
636 643 repo_index = repo.index
637 644
645 commit_parents = None
646 if commit_tree and commit_data['parents']:
647 commit_parents = commit_data['parents']
648 parent_commit = repo[commit_parents[0]]
649 repo_index.read_tree(parent_commit.tree)
650
638 651 for pathspec in updated:
639 652 blob_id = repo.create_blob(pathspec['content'])
640 653 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
641 654 repo_index.add(ie)
642 655
643 656 for pathspec in removed:
644 657 repo_index.remove(pathspec)
645 658
646 659 # Write changes to the index
647 660 repo_index.write()
648 661
649 662 # Create a tree from the updated index
650 commit_tree = repo_index.write_tree()
663 written_commit_tree = repo_index.write_tree()
651 664
652 new_tree_id = commit_tree
665 new_tree_id = written_commit_tree
653 666
654 667 author = commit_data['author']
655 668 committer = commit_data['committer']
656 669 message = commit_data['message']
657 670
658 671 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
659 672
660 673 new_commit_id = self.create_commit(wire, author, committer, message, branch,
661 new_tree_id, date_args=date_args)
674 new_tree_id, date_args=date_args, parents=commit_parents)
662 675
663 676 # libgit2, ensure the branch is there and exists
664 677 self.create_branch(wire, branch, new_commit_id)
665 678
666 679 # libgit2, set new ref to this created commit
667 680 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
668 681
669 682 return new_commit_id
670 683
671 684 @reraise_safe_exceptions
672 685 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
673 686 if url != 'default' and '://' not in url:
674 687 client = LocalGitClient(url)
675 688 else:
676 689 url_obj = url_parser(safe_bytes(url))
677 690 o = self._build_opener(url)
678 691 url = url_obj.authinfo()[0]
679 692 client = HttpGitClient(base_url=url, opener=o)
680 693 repo = self._factory.repo(wire)
681 694
682 695 determine_wants = repo.object_store.determine_wants_all
696
683 697 if refs:
684 refs = [ascii_bytes(x) for x in refs]
698 refs: list[bytes] = [ascii_bytes(x) for x in refs]
685 699
686 def determine_wants_requested(remote_refs):
700 def determine_wants_requested(_remote_refs):
687 701 determined = []
688 for ref_name, ref_hash in remote_refs.items():
702 for ref_name, ref_hash in _remote_refs.items():
689 703 bytes_ref_name = safe_bytes(ref_name)
690 704
691 705 if bytes_ref_name in refs:
692 706 bytes_ref_hash = safe_bytes(ref_hash)
693 707 determined.append(bytes_ref_hash)
694 708 return determined
695 709
696 710 # swap with our custom requested wants
697 711 determine_wants = determine_wants_requested
698 712
699 713 try:
700 714 remote_refs = client.fetch(
701 715 path=url, target=repo, determine_wants=determine_wants)
702 716
703 717 except NotGitRepository as e:
704 718 log.warning(
705 719 'Trying to fetch from "%s" failed, not a Git repository.', url)
706 720 # Exception can contain unicode which we convert
707 721 raise exceptions.AbortException(e)(repr(e))
708 722
709 723 # mikhail: client.fetch() returns all the remote refs, but fetches only
710 724 # refs filtered by `determine_wants` function. We need to filter result
711 725 # as well
712 726 if refs:
713 727 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
714 728
715 729 if apply_refs:
716 730 # TODO: johbo: Needs proper test coverage with a git repository
717 731 # that contains a tag object, so that we would end up with
718 732 # a peeled ref at this point.
719 733 for k in remote_refs:
720 734 if k.endswith(PEELED_REF_MARKER):
721 735 log.debug("Skipping peeled reference %s", k)
722 736 continue
723 737 repo[k] = remote_refs[k]
724 738
725 739 if refs and not update_after:
740 # update to ref
726 741 # mikhail: explicitly set the head to the last ref.
727 repo[HEAD_MARKER] = remote_refs[refs[-1]]
742 update_to_ref = refs[-1]
743 if isinstance(update_after, str):
744 update_to_ref = update_after
745
746 repo[HEAD_MARKER] = remote_refs[update_to_ref]
728 747
729 748 if update_after:
730 749 # we want to check out HEAD
731 750 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
732 751 index.build_index_from_tree(repo.path, repo.index_path(),
733 752 repo.object_store, repo[HEAD_MARKER].tree)
734 753
735 754 if isinstance(remote_refs, FetchPackResult):
736 755 return remote_refs.refs
737 756 return remote_refs
738 757
739 758 @reraise_safe_exceptions
740 759 def sync_fetch(self, wire, url, refs=None, all_refs=False):
741 760 self._factory.repo(wire)
742 761 if refs and not isinstance(refs, (list, tuple)):
743 762 refs = [refs]
744 763
745 764 config = self._wire_to_config(wire)
746 765 # get all remote refs we'll use to fetch later
747 766 cmd = ['ls-remote']
748 767 if not all_refs:
749 768 cmd += ['--heads', '--tags']
750 769 cmd += [url]
751 770 output, __ = self.run_git_command(
752 771 wire, cmd, fail_on_stderr=False,
753 772 _copts=self._remote_conf(config),
754 773 extra_env={'GIT_TERMINAL_PROMPT': '0'})
755 774
756 775 remote_refs = collections.OrderedDict()
757 776 fetch_refs = []
758 777
759 778 for ref_line in output.splitlines():
760 779 sha, ref = ref_line.split(b'\t')
761 780 sha = sha.strip()
762 781 if ref in remote_refs:
763 782 # duplicate, skip
764 783 continue
765 784 if ref.endswith(PEELED_REF_MARKER):
766 785 log.debug("Skipping peeled reference %s", ref)
767 786 continue
768 787 # don't sync HEAD
769 788 if ref in [HEAD_MARKER]:
770 789 continue
771 790
772 791 remote_refs[ref] = sha
773 792
774 793 if refs and sha in refs:
775 794 # we filter fetch using our specified refs
776 795 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
777 796 elif not refs:
778 797 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
779 798 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
780 799
781 800 if fetch_refs:
782 801 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
783 802 fetch_refs_chunks = list(chunk)
784 803 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
785 804 self.run_git_command(
786 805 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
787 806 fail_on_stderr=False,
788 807 _copts=self._remote_conf(config),
789 808 extra_env={'GIT_TERMINAL_PROMPT': '0'})
790 809
791 810 return remote_refs
792 811
793 812 @reraise_safe_exceptions
794 813 def sync_push(self, wire, url, refs=None):
795 814 if not self.check_url(url, wire):
796 815 return
797 816 config = self._wire_to_config(wire)
798 817 self._factory.repo(wire)
799 818 self.run_git_command(
800 819 wire, ['push', url, '--mirror'], fail_on_stderr=False,
801 820 _copts=self._remote_conf(config),
802 821 extra_env={'GIT_TERMINAL_PROMPT': '0'})
803 822
804 823 @reraise_safe_exceptions
805 824 def get_remote_refs(self, wire, url):
806 825 repo = Repo(url)
807 826 return repo.get_refs()
808 827
809 828 @reraise_safe_exceptions
810 829 def get_description(self, wire):
811 830 repo = self._factory.repo(wire)
812 831 return repo.get_description()
813 832
814 833 @reraise_safe_exceptions
815 834 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
816 835 origin_repo_path = wire['path']
817 836 repo = self._factory.repo(wire)
818 837 # fetch from other_repo_path to our origin repo
819 838 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
820 839
821 840 wire_remote = wire.copy()
822 841 wire_remote['path'] = other_repo_path
823 842 repo_remote = self._factory.repo(wire_remote)
824 843
825 844 # fetch from origin_repo_path to our remote repo
826 845 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
827 846
828 847 revs = [
829 848 x.commit.id
830 849 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
831 850 return revs
832 851
833 852 @reraise_safe_exceptions
834 853 def get_object(self, wire, sha, maybe_unreachable=False):
835 854 cache_on, context_uid, repo_id = self._cache_on(wire)
836 855 region = self._region(wire)
837 856
838 857 @region.conditional_cache_on_arguments(condition=cache_on)
839 858 def _get_object(_context_uid, _repo_id, _sha):
840 859 repo_init = self._factory.repo_libgit2(wire)
841 860 with repo_init as repo:
842 861
843 862 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
844 863 try:
845 864 commit = repo.revparse_single(sha)
846 865 except KeyError:
847 866 # NOTE(marcink): KeyError doesn't give us any meaningful information
848 867 # here, we instead give something more explicit
849 868 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
850 869 raise exceptions.LookupException(e)(missing_commit_err)
851 870 except ValueError as e:
852 871 raise exceptions.LookupException(e)(missing_commit_err)
853 872
854 873 is_tag = False
855 874 if isinstance(commit, pygit2.Tag):
856 875 commit = repo.get(commit.target)
857 876 is_tag = True
858 877
859 878 check_dangling = True
860 879 if is_tag:
861 880 check_dangling = False
862 881
863 882 if check_dangling and maybe_unreachable:
864 883 check_dangling = False
865 884
866 885 # we used a reference and it parsed means we're not having a dangling commit
867 886 if sha != commit.hex:
868 887 check_dangling = False
869 888
870 889 if check_dangling:
871 890 # check for dangling commit
872 891 for branch in repo.branches.with_commit(commit.hex):
873 892 if branch:
874 893 break
875 894 else:
876 895 # NOTE(marcink): Empty error doesn't give us any meaningful information
877 896 # here, we instead give something more explicit
878 897 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
879 898 raise exceptions.LookupException(e)(missing_commit_err)
880 899
881 900 commit_id = commit.hex
882 901 type_str = commit.type_str
883 902
884 903 return {
885 904 'id': commit_id,
886 905 'type': type_str,
887 906 'commit_id': commit_id,
888 907 'idx': 0
889 908 }
890 909
891 910 return _get_object(context_uid, repo_id, sha)
892 911
893 912 @reraise_safe_exceptions
894 913 def get_refs(self, wire):
895 914 cache_on, context_uid, repo_id = self._cache_on(wire)
896 915 region = self._region(wire)
897 916
898 917 @region.conditional_cache_on_arguments(condition=cache_on)
899 918 def _get_refs(_context_uid, _repo_id):
900 919
901 920 repo_init = self._factory.repo_libgit2(wire)
902 921 with repo_init as repo:
903 922 regex = re.compile('^refs/(heads|tags)/')
904 923 return {x.name: x.target.hex for x in
905 924 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
906 925
907 926 return _get_refs(context_uid, repo_id)
908 927
909 928 @reraise_safe_exceptions
910 929 def get_branch_pointers(self, wire):
911 930 cache_on, context_uid, repo_id = self._cache_on(wire)
912 931 region = self._region(wire)
913 932
914 933 @region.conditional_cache_on_arguments(condition=cache_on)
915 934 def _get_branch_pointers(_context_uid, _repo_id):
916 935
917 936 repo_init = self._factory.repo_libgit2(wire)
918 937 regex = re.compile('^refs/heads')
919 938 with repo_init as repo:
920 939 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
921 940 return {x.target.hex: x.shorthand for x in branches}
922 941
923 942 return _get_branch_pointers(context_uid, repo_id)
924 943
925 944 @reraise_safe_exceptions
926 945 def head(self, wire, show_exc=True):
927 946 cache_on, context_uid, repo_id = self._cache_on(wire)
928 947 region = self._region(wire)
929 948
930 949 @region.conditional_cache_on_arguments(condition=cache_on)
931 950 def _head(_context_uid, _repo_id, _show_exc):
932 951 repo_init = self._factory.repo_libgit2(wire)
933 952 with repo_init as repo:
934 953 try:
935 954 return repo.head.peel().hex
936 955 except Exception:
937 956 if show_exc:
938 957 raise
939 958 return _head(context_uid, repo_id, show_exc)
940 959
941 960 @reraise_safe_exceptions
942 961 def init(self, wire):
943 962 repo_path = safe_str(wire['path'])
944 963 pygit2.init_repository(repo_path, bare=False)
945 964
946 965 @reraise_safe_exceptions
947 966 def init_bare(self, wire):
948 967 repo_path = safe_str(wire['path'])
949 968 pygit2.init_repository(repo_path, bare=True)
950 969
951 970 @reraise_safe_exceptions
952 971 def revision(self, wire, rev):
953 972
954 973 cache_on, context_uid, repo_id = self._cache_on(wire)
955 974 region = self._region(wire)
956 975
957 976 @region.conditional_cache_on_arguments(condition=cache_on)
958 977 def _revision(_context_uid, _repo_id, _rev):
959 978 repo_init = self._factory.repo_libgit2(wire)
960 979 with repo_init as repo:
961 980 commit = repo[rev]
962 981 obj_data = {
963 982 'id': commit.id.hex,
964 983 }
965 984 # tree objects itself don't have tree_id attribute
966 985 if hasattr(commit, 'tree_id'):
967 986 obj_data['tree'] = commit.tree_id.hex
968 987
969 988 return obj_data
970 989 return _revision(context_uid, repo_id, rev)
971 990
972 991 @reraise_safe_exceptions
973 992 def date(self, wire, commit_id):
974 993 cache_on, context_uid, repo_id = self._cache_on(wire)
975 994 region = self._region(wire)
976 995
977 996 @region.conditional_cache_on_arguments(condition=cache_on)
978 997 def _date(_repo_id, _commit_id):
979 998 repo_init = self._factory.repo_libgit2(wire)
980 999 with repo_init as repo:
981 1000 commit = repo[commit_id]
982 1001
983 1002 if hasattr(commit, 'commit_time'):
984 1003 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
985 1004 else:
986 1005 commit = commit.get_object()
987 1006 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
988 1007
989 1008 # TODO(marcink): check dulwich difference of offset vs timezone
990 1009 return [commit_time, commit_time_offset]
991 1010 return _date(repo_id, commit_id)
992 1011
993 1012 @reraise_safe_exceptions
994 1013 def author(self, wire, commit_id):
995 1014 cache_on, context_uid, repo_id = self._cache_on(wire)
996 1015 region = self._region(wire)
997 1016
998 1017 @region.conditional_cache_on_arguments(condition=cache_on)
999 1018 def _author(_repo_id, _commit_id):
1000 1019 repo_init = self._factory.repo_libgit2(wire)
1001 1020 with repo_init as repo:
1002 1021 commit = repo[commit_id]
1003 1022
1004 1023 if hasattr(commit, 'author'):
1005 1024 author = commit.author
1006 1025 else:
1007 1026 author = commit.get_object().author
1008 1027
1009 1028 if author.email:
1010 1029 return f"{author.name} <{author.email}>"
1011 1030
1012 1031 try:
1013 1032 return f"{author.name}"
1014 1033 except Exception:
1015 1034 return f"{safe_str(author.raw_name)}"
1016 1035
1017 1036 return _author(repo_id, commit_id)
1018 1037
1019 1038 @reraise_safe_exceptions
1020 1039 def message(self, wire, commit_id):
1021 1040 cache_on, context_uid, repo_id = self._cache_on(wire)
1022 1041 region = self._region(wire)
1023 1042
1024 1043 @region.conditional_cache_on_arguments(condition=cache_on)
1025 1044 def _message(_repo_id, _commit_id):
1026 1045 repo_init = self._factory.repo_libgit2(wire)
1027 1046 with repo_init as repo:
1028 1047 commit = repo[commit_id]
1029 1048 return commit.message
1030 1049 return _message(repo_id, commit_id)
1031 1050
1032 1051 @reraise_safe_exceptions
1033 1052 def parents(self, wire, commit_id):
1034 1053 cache_on, context_uid, repo_id = self._cache_on(wire)
1035 1054 region = self._region(wire)
1036 1055
1037 1056 @region.conditional_cache_on_arguments(condition=cache_on)
1038 1057 def _parents(_repo_id, _commit_id):
1039 1058 repo_init = self._factory.repo_libgit2(wire)
1040 1059 with repo_init as repo:
1041 1060 commit = repo[commit_id]
1042 1061 if hasattr(commit, 'parent_ids'):
1043 1062 parent_ids = commit.parent_ids
1044 1063 else:
1045 1064 parent_ids = commit.get_object().parent_ids
1046 1065
1047 1066 return [x.hex for x in parent_ids]
1048 1067 return _parents(repo_id, commit_id)
1049 1068
1050 1069 @reraise_safe_exceptions
1051 1070 def children(self, wire, commit_id):
1052 1071 cache_on, context_uid, repo_id = self._cache_on(wire)
1053 1072 region = self._region(wire)
1054 1073
1055 1074 head = self.head(wire)
1056 1075
1057 1076 @region.conditional_cache_on_arguments(condition=cache_on)
1058 1077 def _children(_repo_id, _commit_id):
1059 1078
1060 1079 output, __ = self.run_git_command(
1061 1080 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1062 1081
1063 1082 child_ids = []
1064 1083 pat = re.compile(fr'^{commit_id}')
1065 1084 for line in output.splitlines():
1066 1085 line = safe_str(line)
1067 1086 if pat.match(line):
1068 1087 found_ids = line.split(' ')[1:]
1069 1088 child_ids.extend(found_ids)
1070 1089 break
1071 1090
1072 1091 return child_ids
1073 1092 return _children(repo_id, commit_id)
1074 1093
1075 1094 @reraise_safe_exceptions
1076 1095 def set_refs(self, wire, key, value):
1077 1096 repo_init = self._factory.repo_libgit2(wire)
1078 1097 with repo_init as repo:
1079 1098 repo.references.create(key, value, force=True)
1080 1099
1081 1100 @reraise_safe_exceptions
1082 1101 def create_branch(self, wire, branch_name, commit_id, force=False):
1083 1102 repo_init = self._factory.repo_libgit2(wire)
1084 1103 with repo_init as repo:
1085 1104 if commit_id:
1086 1105 commit = repo[commit_id]
1087 1106 else:
1088 1107 # if commit is not given just use the HEAD
1089 1108 commit = repo.head()
1090 1109
1091 1110 if force:
1092 1111 repo.branches.local.create(branch_name, commit, force=force)
1093 1112 elif not repo.branches.get(branch_name):
1094 1113 # create only if that branch isn't existing
1095 1114 repo.branches.local.create(branch_name, commit, force=force)
1096 1115
1097 1116 @reraise_safe_exceptions
1098 1117 def remove_ref(self, wire, key):
1099 1118 repo_init = self._factory.repo_libgit2(wire)
1100 1119 with repo_init as repo:
1101 1120 repo.references.delete(key)
1102 1121
1103 1122 @reraise_safe_exceptions
1104 1123 def tag_remove(self, wire, tag_name):
1105 1124 repo_init = self._factory.repo_libgit2(wire)
1106 1125 with repo_init as repo:
1107 1126 key = f'refs/tags/{tag_name}'
1108 1127 repo.references.delete(key)
1109 1128
1110 1129 @reraise_safe_exceptions
1111 1130 def tree_changes(self, wire, source_id, target_id):
1112 1131 repo = self._factory.repo(wire)
1113 1132 # source can be empty
1114 1133 source_id = safe_bytes(source_id if source_id else b'')
1115 1134 target_id = safe_bytes(target_id)
1116 1135
1117 1136 source = repo[source_id].tree if source_id else None
1118 1137 target = repo[target_id].tree
1119 1138 result = repo.object_store.tree_changes(source, target)
1120 1139
1121 1140 added = set()
1122 1141 modified = set()
1123 1142 deleted = set()
1124 1143 for (old_path, new_path), (_, _), (_, _) in list(result):
1125 1144 if new_path and old_path:
1126 1145 modified.add(new_path)
1127 1146 elif new_path and not old_path:
1128 1147 added.add(new_path)
1129 1148 elif not new_path and old_path:
1130 1149 deleted.add(old_path)
1131 1150
1132 1151 return list(added), list(modified), list(deleted)
1133 1152
1134 1153 @reraise_safe_exceptions
1135 1154 def tree_and_type_for_path(self, wire, commit_id, path):
1136 1155
1137 1156 cache_on, context_uid, repo_id = self._cache_on(wire)
1138 1157 region = self._region(wire)
1139 1158
1140 1159 @region.conditional_cache_on_arguments(condition=cache_on)
1141 1160 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1142 1161 repo_init = self._factory.repo_libgit2(wire)
1143 1162
1144 1163 with repo_init as repo:
1145 1164 commit = repo[commit_id]
1146 1165 try:
1147 1166 tree = commit.tree[path]
1148 1167 except KeyError:
1149 1168 return None, None, None
1150 1169
1151 1170 return tree.id.hex, tree.type_str, tree.filemode
1152 1171 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1153 1172
1154 1173 @reraise_safe_exceptions
1155 1174 def tree_items(self, wire, tree_id):
1156 1175 cache_on, context_uid, repo_id = self._cache_on(wire)
1157 1176 region = self._region(wire)
1158 1177
1159 1178 @region.conditional_cache_on_arguments(condition=cache_on)
1160 1179 def _tree_items(_repo_id, _tree_id):
1161 1180
1162 1181 repo_init = self._factory.repo_libgit2(wire)
1163 1182 with repo_init as repo:
1164 1183 try:
1165 1184 tree = repo[tree_id]
1166 1185 except KeyError:
1167 1186 raise ObjectMissing(f'No tree with id: {tree_id}')
1168 1187
1169 1188 result = []
1170 1189 for item in tree:
1171 1190 item_sha = item.hex
1172 1191 item_mode = item.filemode
1173 1192 item_type = item.type_str
1174 1193
1175 1194 if item_type == 'commit':
1176 1195 # NOTE(marcink): submodules we translate to 'link' for backward compat
1177 1196 item_type = 'link'
1178 1197
1179 1198 result.append((item.name, item_mode, item_sha, item_type))
1180 1199 return result
1181 1200 return _tree_items(repo_id, tree_id)
1182 1201
1183 1202 @reraise_safe_exceptions
1184 1203 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1185 1204 """
1186 1205 Old version that uses subprocess to call diff
1187 1206 """
1188 1207
1189 1208 flags = [
1190 1209 f'-U{context}', '--patch',
1191 1210 '--binary',
1192 1211 '--find-renames',
1193 1212 '--no-indent-heuristic',
1194 1213 # '--indent-heuristic',
1195 1214 #'--full-index',
1196 1215 #'--abbrev=40'
1197 1216 ]
1198 1217
1199 1218 if opt_ignorews:
1200 1219 flags.append('--ignore-all-space')
1201 1220
1202 1221 if commit_id_1 == self.EMPTY_COMMIT:
1203 1222 cmd = ['show'] + flags + [commit_id_2]
1204 1223 else:
1205 1224 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1206 1225
1207 1226 if file_filter:
1208 1227 cmd.extend(['--', file_filter])
1209 1228
1210 1229 diff, __ = self.run_git_command(wire, cmd)
1211 1230 # If we used 'show' command, strip first few lines (until actual diff
1212 1231 # starts)
1213 1232 if commit_id_1 == self.EMPTY_COMMIT:
1214 1233 lines = diff.splitlines()
1215 1234 x = 0
1216 1235 for line in lines:
1217 1236 if line.startswith(b'diff'):
1218 1237 break
1219 1238 x += 1
1220 1239 # Append new line just like 'diff' command do
1221 1240 diff = '\n'.join(lines[x:]) + '\n'
1222 1241 return diff
1223 1242
1224 1243 @reraise_safe_exceptions
1225 1244 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1226 1245 repo_init = self._factory.repo_libgit2(wire)
1227 1246
1228 1247 with repo_init as repo:
1229 1248 swap = True
1230 1249 flags = 0
1231 1250 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1232 1251
1233 1252 if opt_ignorews:
1234 1253 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1235 1254
1236 1255 if commit_id_1 == self.EMPTY_COMMIT:
1237 1256 comm1 = repo[commit_id_2]
1238 1257 diff_obj = comm1.tree.diff_to_tree(
1239 1258 flags=flags, context_lines=context, swap=swap)
1240 1259
1241 1260 else:
1242 1261 comm1 = repo[commit_id_2]
1243 1262 comm2 = repo[commit_id_1]
1244 1263 diff_obj = comm1.tree.diff_to_tree(
1245 1264 comm2.tree, flags=flags, context_lines=context, swap=swap)
1246 1265 similar_flags = 0
1247 1266 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1248 1267 diff_obj.find_similar(flags=similar_flags)
1249 1268
1250 1269 if file_filter:
1251 1270 for p in diff_obj:
1252 1271 if p.delta.old_file.path == file_filter:
1253 1272 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1254 1273 # fo matching path == no diff
1255 1274 return BytesEnvelope(b'')
1256 1275
1257 1276 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1258 1277
1259 1278 @reraise_safe_exceptions
1260 1279 def node_history(self, wire, commit_id, path, limit):
1261 1280 cache_on, context_uid, repo_id = self._cache_on(wire)
1262 1281 region = self._region(wire)
1263 1282
1264 1283 @region.conditional_cache_on_arguments(condition=cache_on)
1265 1284 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1266 1285 # optimize for n==1, rev-list is much faster for that use-case
1267 1286 if limit == 1:
1268 1287 cmd = ['rev-list', '-1', commit_id, '--', path]
1269 1288 else:
1270 1289 cmd = ['log']
1271 1290 if limit:
1272 1291 cmd.extend(['-n', str(safe_int(limit, 0))])
1273 1292 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1274 1293
1275 1294 output, __ = self.run_git_command(wire, cmd)
1276 1295 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1277 1296
1278 1297 return [x for x in commit_ids]
1279 1298 return _node_history(context_uid, repo_id, commit_id, path, limit)
1280 1299
1281 1300 @reraise_safe_exceptions
1282 1301 def node_annotate_legacy(self, wire, commit_id, path):
1283 1302 # note: replaced by pygit2 implementation
1284 1303 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1285 1304 # -l ==> outputs long shas (and we need all 40 characters)
1286 1305 # --root ==> doesn't put '^' character for boundaries
1287 1306 # -r commit_id ==> blames for the given commit
1288 1307 output, __ = self.run_git_command(wire, cmd)
1289 1308
1290 1309 result = []
1291 1310 for i, blame_line in enumerate(output.splitlines()[:-1]):
1292 1311 line_no = i + 1
1293 1312 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1294 1313 result.append((line_no, blame_commit_id, line))
1295 1314
1296 1315 return result
1297 1316
1298 1317 @reraise_safe_exceptions
1299 1318 def node_annotate(self, wire, commit_id, path):
1300 1319
1301 1320 result_libgit = []
1302 1321 repo_init = self._factory.repo_libgit2(wire)
1303 1322 with repo_init as repo:
1304 1323 commit = repo[commit_id]
1305 1324 blame_obj = repo.blame(path, newest_commit=commit_id)
1306 1325 for i, line in enumerate(commit.tree[path].data.splitlines()):
1307 1326 line_no = i + 1
1308 1327 hunk = blame_obj.for_line(line_no)
1309 1328 blame_commit_id = hunk.final_commit_id.hex
1310 1329
1311 1330 result_libgit.append((line_no, blame_commit_id, line))
1312 1331
1313 1332 return BinaryEnvelope(result_libgit)
1314 1333
1315 1334 @reraise_safe_exceptions
1316 1335 def update_server_info(self, wire):
1317 1336 repo = self._factory.repo(wire)
1318 1337 update_server_info(repo)
1319 1338
1320 1339 @reraise_safe_exceptions
1321 1340 def get_all_commit_ids(self, wire):
1322 1341
1323 1342 cache_on, context_uid, repo_id = self._cache_on(wire)
1324 1343 region = self._region(wire)
1325 1344
1326 1345 @region.conditional_cache_on_arguments(condition=cache_on)
1327 1346 def _get_all_commit_ids(_context_uid, _repo_id):
1328 1347
1329 1348 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1330 1349 try:
1331 1350 output, __ = self.run_git_command(wire, cmd)
1332 1351 return output.splitlines()
1333 1352 except Exception:
1334 1353 # Can be raised for empty repositories
1335 1354 return []
1336 1355
1337 1356 @region.conditional_cache_on_arguments(condition=cache_on)
1338 1357 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1339 1358 repo_init = self._factory.repo_libgit2(wire)
1340 1359 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1341 1360 results = []
1342 1361 with repo_init as repo:
1343 1362 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1344 1363 results.append(commit.id.hex)
1345 1364
1346 1365 return _get_all_commit_ids(context_uid, repo_id)
1347 1366
1348 1367 @reraise_safe_exceptions
1349 1368 def run_git_command(self, wire, cmd, **opts):
1350 1369 path = wire.get('path', None)
1351 1370
1352 1371 if path and os.path.isdir(path):
1353 1372 opts['cwd'] = path
1354 1373
1355 1374 if '_bare' in opts:
1356 1375 _copts = []
1357 1376 del opts['_bare']
1358 1377 else:
1359 1378 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1360 1379 safe_call = False
1361 1380 if '_safe' in opts:
1362 1381 # no exc on failure
1363 1382 del opts['_safe']
1364 1383 safe_call = True
1365 1384
1366 1385 if '_copts' in opts:
1367 1386 _copts.extend(opts['_copts'] or [])
1368 1387 del opts['_copts']
1369 1388
1370 1389 gitenv = os.environ.copy()
1371 1390 gitenv.update(opts.pop('extra_env', {}))
1372 1391 # need to clean fix GIT_DIR !
1373 1392 if 'GIT_DIR' in gitenv:
1374 1393 del gitenv['GIT_DIR']
1375 1394 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1376 1395 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1377 1396
1378 1397 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1379 1398 _opts = {'env': gitenv, 'shell': False}
1380 1399
1381 1400 proc = None
1382 1401 try:
1383 1402 _opts.update(opts)
1384 1403 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1385 1404
1386 1405 return b''.join(proc), b''.join(proc.stderr)
1387 1406 except OSError as err:
1388 1407 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1389 1408 tb_err = ("Couldn't run git command (%s).\n"
1390 1409 "Original error was:%s\n"
1391 1410 "Call options:%s\n"
1392 1411 % (cmd, err, _opts))
1393 1412 log.exception(tb_err)
1394 1413 if safe_call:
1395 1414 return '', err
1396 1415 else:
1397 1416 raise exceptions.VcsException()(tb_err)
1398 1417 finally:
1399 1418 if proc:
1400 1419 proc.close()
1401 1420
1402 1421 @reraise_safe_exceptions
1403 1422 def install_hooks(self, wire, force=False):
1404 1423 from vcsserver.hook_utils import install_git_hooks
1405 1424 bare = self.bare(wire)
1406 1425 path = wire['path']
1407 1426 binary_dir = settings.BINARY_DIR
1408 1427 if binary_dir:
1409 1428 os.path.join(binary_dir, 'python3')
1410 1429 return install_git_hooks(path, bare, force_create=force)
1411 1430
1412 1431 @reraise_safe_exceptions
1413 1432 def get_hooks_info(self, wire):
1414 1433 from vcsserver.hook_utils import (
1415 1434 get_git_pre_hook_version, get_git_post_hook_version)
1416 1435 bare = self.bare(wire)
1417 1436 path = wire['path']
1418 1437 return {
1419 1438 'pre_version': get_git_pre_hook_version(path, bare),
1420 1439 'post_version': get_git_post_hook_version(path, bare),
1421 1440 }
1422 1441
1423 1442 @reraise_safe_exceptions
1424 1443 def set_head_ref(self, wire, head_name):
1425 1444 log.debug('Setting refs/head to `%s`', head_name)
1426 1445 repo_init = self._factory.repo_libgit2(wire)
1427 1446 with repo_init as repo:
1428 1447 repo.set_head(f'refs/heads/{head_name}')
1429 1448
1430 1449 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1431 1450
1432 1451 @reraise_safe_exceptions
1433 1452 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1434 1453 archive_dir_name, commit_id, cache_config):
1435 1454
1436 1455 def file_walker(_commit_id, path):
1437 1456 repo_init = self._factory.repo_libgit2(wire)
1438 1457
1439 1458 with repo_init as repo:
1440 1459 commit = repo[commit_id]
1441 1460
1442 1461 if path in ['', '/']:
1443 1462 tree = commit.tree
1444 1463 else:
1445 1464 tree = commit.tree[path.rstrip('/')]
1446 1465 tree_id = tree.id.hex
1447 1466 try:
1448 1467 tree = repo[tree_id]
1449 1468 except KeyError:
1450 1469 raise ObjectMissing(f'No tree with id: {tree_id}')
1451 1470
1452 1471 index = LibGit2Index.Index()
1453 1472 index.read_tree(tree)
1454 1473 file_iter = index
1455 1474
1456 1475 for file_node in file_iter:
1457 1476 file_path = file_node.path
1458 1477 mode = file_node.mode
1459 1478 is_link = stat.S_ISLNK(mode)
1460 1479 if mode == pygit2.GIT_FILEMODE_COMMIT:
1461 1480 log.debug('Skipping path %s as a commit node', file_path)
1462 1481 continue
1463 1482 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1464 1483
1465 1484 return store_archive_in_cache(
1466 1485 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now