##// END OF EJS Templates
fix(git ops): moved git operations into vcsserver and use libgit2 when possible....
super-admin -
r1337:1fc1a507 default
parent child Browse files
Show More
@@ -1,1526 +1,1543 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2024 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40
41 41 import vcsserver
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 COMMIT_ID_PAT = re.compile(rb'[0-9a-fA-F]{40}')
152 153
153 154 def __init__(self, factory):
154 155 self._factory = factory
155 156 self._bulk_methods = {
156 157 "date": self.date,
157 158 "author": self.author,
158 159 "branch": self.branch,
159 160 "message": self.message,
160 161 "parents": self.parents,
161 162 "_commit": self.revision,
162 163 }
163 164 self._bulk_file_methods = {
164 165 "size": self.get_node_size,
165 166 "data": self.get_node_data,
166 167 "flags": self.get_node_flags,
167 168 "is_binary": self.get_node_is_binary,
168 169 "md5": self.md5_hash
169 170 }
170 171
171 172 def _wire_to_config(self, wire):
172 173 if 'config' in wire:
173 174 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 175 return {}
175 176
176 177 def _remote_conf(self, config):
177 178 params = [
178 179 '-c', 'core.askpass=""',
179 180 ]
180 181 config_attrs = {
181 182 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 183 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 184 }
184 185 for key, param in config_attrs.items():
185 186 if value := config.get(key):
186 187 params.extend(['-c', param.format(value)])
187 188 return params
188 189
189 190 @reraise_safe_exceptions
190 191 def discover_git_version(self):
191 192 stdout, _ = self.run_git_command(
192 193 {}, ['--version'], _bare=True, _safe=True)
193 194 prefix = b'git version'
194 195 if stdout.startswith(prefix):
195 196 stdout = stdout[len(prefix):]
196 197 return safe_str(stdout.strip())
197 198
198 199 @reraise_safe_exceptions
199 200 def is_empty(self, wire):
200 201 repo_init = self._factory.repo_libgit2(wire)
201 202 with repo_init as repo:
202 203 try:
203 204 has_head = repo.head.name
204 205 if has_head:
205 206 return False
206 207
207 208 # NOTE(marcink): check again using more expensive method
208 209 return repo.is_empty
209 210 except Exception:
210 211 pass
211 212
212 213 return True
213 214
214 215 @reraise_safe_exceptions
215 216 def assert_correct_path(self, wire):
216 217 cache_on, context_uid, repo_id = self._cache_on(wire)
217 218 region = self._region(wire)
218 219
219 220 @region.conditional_cache_on_arguments(condition=cache_on)
220 221 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 222 if fast_check:
222 223 path = safe_str(wire['path'])
223 224 if pygit2.discover_repository(path):
224 225 return True
225 226 return False
226 227 else:
227 228 try:
228 229 repo_init = self._factory.repo_libgit2(wire)
229 230 with repo_init:
230 231 pass
231 232 except pygit2.GitError:
232 233 path = wire.get('path')
233 234 tb = traceback.format_exc()
234 235 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 236 return False
236 237 return True
237 238
238 239 return _assert_correct_path(context_uid, repo_id, True)
239 240
240 241 @reraise_safe_exceptions
241 242 def bare(self, wire):
242 243 repo_init = self._factory.repo_libgit2(wire)
243 244 with repo_init as repo:
244 245 return repo.is_bare
245 246
246 247 @reraise_safe_exceptions
247 248 def get_node_data(self, wire, commit_id, path):
248 249 repo_init = self._factory.repo_libgit2(wire)
249 250 with repo_init as repo:
250 251 commit = repo[commit_id]
251 252 blob_obj = commit.tree[path]
252 253
253 254 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 255 raise exceptions.LookupException()(
255 256 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256 257
257 258 return BytesEnvelope(blob_obj.data)
258 259
259 260 @reraise_safe_exceptions
260 261 def get_node_size(self, wire, commit_id, path):
261 262 repo_init = self._factory.repo_libgit2(wire)
262 263 with repo_init as repo:
263 264 commit = repo[commit_id]
264 265 blob_obj = commit.tree[path]
265 266
266 267 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 268 raise exceptions.LookupException()(
268 269 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269 270
270 271 return blob_obj.size
271 272
272 273 @reraise_safe_exceptions
273 274 def get_node_flags(self, wire, commit_id, path):
274 275 repo_init = self._factory.repo_libgit2(wire)
275 276 with repo_init as repo:
276 277 commit = repo[commit_id]
277 278 blob_obj = commit.tree[path]
278 279
279 280 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 281 raise exceptions.LookupException()(
281 282 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282 283
283 284 return blob_obj.filemode
284 285
285 286 @reraise_safe_exceptions
286 287 def get_node_is_binary(self, wire, commit_id, path):
287 288 repo_init = self._factory.repo_libgit2(wire)
288 289 with repo_init as repo:
289 290 commit = repo[commit_id]
290 291 blob_obj = commit.tree[path]
291 292
292 293 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 294 raise exceptions.LookupException()(
294 295 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295 296
296 297 return blob_obj.is_binary
297 298
298 299 @reraise_safe_exceptions
299 300 def blob_as_pretty_string(self, wire, sha):
300 301 repo_init = self._factory.repo_libgit2(wire)
301 302 with repo_init as repo:
302 303 blob_obj = repo[sha]
303 304 return BytesEnvelope(blob_obj.data)
304 305
305 306 @reraise_safe_exceptions
306 307 def blob_raw_length(self, wire, sha):
307 308 cache_on, context_uid, repo_id = self._cache_on(wire)
308 309 region = self._region(wire)
309 310
310 311 @region.conditional_cache_on_arguments(condition=cache_on)
311 312 def _blob_raw_length(_repo_id, _sha):
312 313
313 314 repo_init = self._factory.repo_libgit2(wire)
314 315 with repo_init as repo:
315 316 blob = repo[sha]
316 317 return blob.size
317 318
318 319 return _blob_raw_length(repo_id, sha)
319 320
320 321 def _parse_lfs_pointer(self, raw_content):
321 322 spec_string = b'version https://git-lfs.github.com/spec'
322 323 if raw_content and raw_content.startswith(spec_string):
323 324
324 325 pattern = re.compile(rb"""
325 326 (?:\n)?
326 327 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 328 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 329 ^size[ ](?P<oid_size>[0-9]+)\n
329 330 (?:\n)?
330 331 """, re.VERBOSE | re.MULTILINE)
331 332 match = pattern.match(raw_content)
332 333 if match:
333 334 return match.groupdict()
334 335
335 336 return {}
336 337
337 338 @reraise_safe_exceptions
338 339 def is_large_file(self, wire, commit_id):
339 340 cache_on, context_uid, repo_id = self._cache_on(wire)
340 341 region = self._region(wire)
341 342
342 343 @region.conditional_cache_on_arguments(condition=cache_on)
343 344 def _is_large_file(_repo_id, _sha):
344 345 repo_init = self._factory.repo_libgit2(wire)
345 346 with repo_init as repo:
346 347 blob = repo[commit_id]
347 348 if blob.is_binary:
348 349 return {}
349 350
350 351 return self._parse_lfs_pointer(blob.data)
351 352
352 353 return _is_large_file(repo_id, commit_id)
353 354
354 355 @reraise_safe_exceptions
355 356 def is_binary(self, wire, tree_id):
356 357 cache_on, context_uid, repo_id = self._cache_on(wire)
357 358 region = self._region(wire)
358 359
359 360 @region.conditional_cache_on_arguments(condition=cache_on)
360 361 def _is_binary(_repo_id, _tree_id):
361 362 repo_init = self._factory.repo_libgit2(wire)
362 363 with repo_init as repo:
363 364 blob_obj = repo[tree_id]
364 365 return blob_obj.is_binary
365 366
366 367 return _is_binary(repo_id, tree_id)
367 368
368 369 @reraise_safe_exceptions
369 370 def md5_hash(self, wire, commit_id, path):
370 371 cache_on, context_uid, repo_id = self._cache_on(wire)
371 372 region = self._region(wire)
372 373
373 374 @region.conditional_cache_on_arguments(condition=cache_on)
374 375 def _md5_hash(_repo_id, _commit_id, _path):
375 376 repo_init = self._factory.repo_libgit2(wire)
376 377 with repo_init as repo:
377 378 commit = repo[_commit_id]
378 379 blob_obj = commit.tree[_path]
379 380
380 381 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 382 raise exceptions.LookupException()(
382 383 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383 384
384 385 return ''
385 386
386 387 return _md5_hash(repo_id, commit_id, path)
387 388
388 389 @reraise_safe_exceptions
389 390 def in_largefiles_store(self, wire, oid):
390 391 conf = self._wire_to_config(wire)
391 392 repo_init = self._factory.repo_libgit2(wire)
392 393 with repo_init as repo:
393 394 repo_name = repo.path
394 395
395 396 store_location = conf.get('vcs_git_lfs_store_location')
396 397 if store_location:
397 398
398 399 store = LFSOidStore(
399 400 oid=oid, repo=repo_name, store_location=store_location)
400 401 return store.has_oid()
401 402
402 403 return False
403 404
404 405 @reraise_safe_exceptions
405 406 def store_path(self, wire, oid):
406 407 conf = self._wire_to_config(wire)
407 408 repo_init = self._factory.repo_libgit2(wire)
408 409 with repo_init as repo:
409 410 repo_name = repo.path
410 411
411 412 store_location = conf.get('vcs_git_lfs_store_location')
412 413 if store_location:
413 414 store = LFSOidStore(
414 415 oid=oid, repo=repo_name, store_location=store_location)
415 416 return store.oid_path
416 417 raise ValueError(f'Unable to fetch oid with path {oid}')
417 418
418 419 @reraise_safe_exceptions
419 420 def bulk_request(self, wire, rev, pre_load):
420 421 cache_on, context_uid, repo_id = self._cache_on(wire)
421 422 region = self._region(wire)
422 423
423 424 @region.conditional_cache_on_arguments(condition=cache_on)
424 425 def _bulk_request(_repo_id, _rev, _pre_load):
425 426 result = {}
426 427 for attr in pre_load:
427 428 try:
428 429 method = self._bulk_methods[attr]
429 430 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 431 args = [wire, rev]
431 432 result[attr] = method(*args)
432 433 except KeyError as e:
433 434 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 435 return result
435 436
436 437 return _bulk_request(repo_id, rev, sorted(pre_load))
437 438
438 439 @reraise_safe_exceptions
439 440 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 441 cache_on, context_uid, repo_id = self._cache_on(wire)
441 442 region = self._region(wire)
442 443
443 444 @region.conditional_cache_on_arguments(condition=cache_on)
444 445 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 446 result = {}
446 447 for attr in pre_load:
447 448 try:
448 449 method = self._bulk_file_methods[attr]
449 450 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 451 result[attr] = method(wire, _commit_id, _path)
451 452 except KeyError as e:
452 453 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 454 return result
454 455
455 456 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456 457
457 458 def _build_opener(self, url: str):
458 459 handlers = []
459 460 url_obj = url_parser(safe_bytes(url))
460 461 authinfo = url_obj.authinfo()[1]
461 462
462 463 if authinfo:
463 464 # create a password manager
464 465 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 466 passmgr.add_password(*convert_to_str(authinfo))
466 467
467 468 handlers.extend((httpbasicauthhandler(passmgr),
468 469 httpdigestauthhandler(passmgr)))
469 470
470 471 return urllib.request.build_opener(*handlers)
471 472
472 473 @reraise_safe_exceptions
473 474 def check_url(self, url, config):
474 475 url_obj = url_parser(safe_bytes(url))
475 476
476 477 test_uri = safe_str(url_obj.authinfo()[0])
477 478 obfuscated_uri = get_obfuscated_url(url_obj)
478 479
479 480 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480 481
481 482 if not test_uri.endswith('info/refs'):
482 483 test_uri = test_uri.rstrip('/') + '/info/refs'
483 484
484 485 o = self._build_opener(url=url)
485 486 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486 487
487 488 q = {"service": 'git-upload-pack'}
488 489 qs = f'?{urllib.parse.urlencode(q)}'
489 490 cu = f"{test_uri}{qs}"
490 491
491 492 try:
492 493 req = urllib.request.Request(cu, None, {})
493 494 log.debug("Trying to open URL %s", obfuscated_uri)
494 495 resp = o.open(req)
495 496 if resp.code != 200:
496 497 raise exceptions.URLError()('Return Code is not 200')
497 498 except Exception as e:
498 499 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 500 # means it cannot be cloned
500 501 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501 502
502 503 # now detect if it's proper git repo
503 504 gitdata: bytes = resp.read()
504 505
505 506 if b'service=git-upload-pack' in gitdata:
506 507 pass
507 508 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 509 # old style git can return some other format!
509 510 pass
510 511 else:
511 512 e = None
512 513 raise exceptions.URLError(e)(
513 514 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514 515
515 516 return True
516 517
517 518 @reraise_safe_exceptions
518 519 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 520 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 521 remote_refs = self.pull(wire, url, apply_refs=False)
521 522 repo = self._factory.repo(wire)
522 523 if isinstance(valid_refs, list):
523 524 valid_refs = tuple(valid_refs)
524 525
525 526 for k in remote_refs:
526 527 # only parse heads/tags and skip so called deferred tags
527 528 if k.startswith(valid_refs) and not k.endswith(deferred):
528 529 repo[k] = remote_refs[k]
529 530
530 531 if update_after_clone:
531 532 # we want to checkout HEAD
532 533 repo["HEAD"] = remote_refs["HEAD"]
533 534 index.build_index_from_tree(repo.path, repo.index_path(),
534 535 repo.object_store, repo["HEAD"].tree)
535 536
536 537 @reraise_safe_exceptions
537 538 def branch(self, wire, commit_id):
538 539 cache_on, context_uid, repo_id = self._cache_on(wire)
539 540 region = self._region(wire)
540 541
541 542 @region.conditional_cache_on_arguments(condition=cache_on)
542 543 def _branch(_context_uid, _repo_id, _commit_id):
543 544 regex = re.compile('^refs/heads')
544 545
545 546 def filter_with(ref):
546 547 return regex.match(ref[0]) and ref[1] == _commit_id
547 548
548 549 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 550 return [x[0].split('refs/heads/')[-1] for x in branches]
550 551
551 552 return _branch(context_uid, repo_id, commit_id)
552 553
553 554 @reraise_safe_exceptions
554 555 def delete_branch(self, wire, branch_name):
555 556 repo_init = self._factory.repo_libgit2(wire)
556 557 with repo_init as repo:
557 558 if branch := repo.lookup_branch(branch_name):
558 559 branch.delete()
559 560
560 561 @reraise_safe_exceptions
561 562 def commit_branches(self, wire, commit_id):
562 563 cache_on, context_uid, repo_id = self._cache_on(wire)
563 564 region = self._region(wire)
564 565
565 566 @region.conditional_cache_on_arguments(condition=cache_on)
566 567 def _commit_branches(_context_uid, _repo_id, _commit_id):
567 568 repo_init = self._factory.repo_libgit2(wire)
568 569 with repo_init as repo:
569 570 branches = [x for x in repo.branches.with_commit(_commit_id)]
570 571 return branches
571 572
572 573 return _commit_branches(context_uid, repo_id, commit_id)
573 574
574 575 @reraise_safe_exceptions
575 576 def add_object(self, wire, content):
576 577 repo_init = self._factory.repo_libgit2(wire)
577 578 with repo_init as repo:
578 579 blob = objects.Blob()
579 580 blob.set_raw_string(content)
580 581 repo.object_store.add_object(blob)
581 582 return blob.id
582 583
583 584 @reraise_safe_exceptions
584 585 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
585 586 date_args: list[int, int] = None,
586 587 parents: list | None = None):
587 588
588 589 repo_init = self._factory.repo_libgit2(wire)
589 590 with repo_init as repo:
590 591
591 592 if date_args:
592 593 current_time, offset = date_args
593 594
594 595 kw = {
595 596 'time': current_time,
596 597 'offset': offset
597 598 }
598 599 author = create_signature_from_string(author, **kw)
599 600 committer = create_signature_from_string(committer, **kw)
600 601
601 602 tree = new_tree_id
602 603 if isinstance(tree, (bytes, str)):
603 604 # validate this tree is in the repo...
604 605 tree = repo[safe_str(tree)].id
605 606
606 607 if parents:
607 608 # run via sha's and validate them in repo
608 609 parents = [repo[c].id for c in parents]
609 610 else:
610 611 parents = []
611 612 # ensure we COMMIT on top of given branch head
612 613 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
613 614 if branch in repo.branches.local:
614 615 parents += [repo.branches[branch].target]
615 616 elif [x for x in repo.branches.local]:
616 617 parents += [repo.head.target]
617 618 #else:
618 619 # in case we want to commit on new branch we create it on top of HEAD
619 620 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
620 621
621 622 # # Create a new commit
622 623 commit_oid = repo.create_commit(
623 624 f'refs/heads/{branch}', # the name of the reference to update
624 625 author, # the author of the commit
625 626 committer, # the committer of the commit
626 627 message, # the commit message
627 628 tree, # the tree produced by the index
628 629 parents # list of parents for the new commit, usually just one,
629 630 )
630 631
631 632 new_commit_id = safe_str(commit_oid)
632 633
633 634 return new_commit_id
634 635
635 636 @reraise_safe_exceptions
637 def compare_commits(self, wire, commit_id1, commit_id2):
638 output, __ = self.run_git_command(
639 wire, ['log', '--reverse', '--pretty=format: %H', '-s',
640 f'{commit_id1}..{commit_id2}'])
641 commits = [safe_str(commit_id) for commit_id in self.COMMIT_ID_PAT.findall(output)]
642 return commits
643
644 @reraise_safe_exceptions
636 645 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
637 646
638 647 def mode2pygit(mode):
639 648 """
640 649 git only supports two filemode 644 and 755
641 650
642 651 0o100755 -> 33261
643 652 0o100644 -> 33188
644 653 """
645 654 return {
646 655 0o100644: pygit2.GIT_FILEMODE_BLOB,
647 656 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
648 657 0o120000: pygit2.GIT_FILEMODE_LINK
649 658 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
650 659
651 660 repo_init = self._factory.repo_libgit2(wire)
652 661 with repo_init as repo:
653 662 repo_index = repo.index
654 663
655 664 commit_parents = None
656 665 if commit_tree and commit_data['parents']:
657 666 commit_parents = commit_data['parents']
658 667 parent_commit = repo[commit_parents[0]]
659 668 repo_index.read_tree(parent_commit.tree)
660 669
661 670 for pathspec in updated:
662 671 blob_id = repo.create_blob(pathspec['content'])
663 672 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
664 673 repo_index.add(ie)
665 674
666 675 for pathspec in removed:
667 676 repo_index.remove(pathspec)
668 677
669 678 # Write changes to the index
670 679 repo_index.write()
671 680
672 681 # Create a tree from the updated index
673 682 written_commit_tree = repo_index.write_tree()
674 683
675 684 new_tree_id = written_commit_tree
676 685
677 686 author = commit_data['author']
678 687 committer = commit_data['committer']
679 688 message = commit_data['message']
680 689
681 690 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
682 691
683 692 new_commit_id = self.create_commit(wire, author, committer, message, branch,
684 693 new_tree_id, date_args=date_args, parents=commit_parents)
685 694
686 695 # libgit2, ensure the branch is there and exists
687 696 self.create_branch(wire, branch, new_commit_id)
688 697
689 698 # libgit2, set new ref to this created commit
690 699 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
691 700
692 701 return new_commit_id
693 702
694 703 @reraise_safe_exceptions
695 704 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
696 705 if url != 'default' and '://' not in url:
697 706 client = LocalGitClient(url)
698 707 else:
699 708 url_obj = url_parser(safe_bytes(url))
700 709 o = self._build_opener(url)
701 710 url = url_obj.authinfo()[0]
702 711 client = HttpGitClient(base_url=url, opener=o)
703 712 repo = self._factory.repo(wire)
704 713
705 714 determine_wants = repo.object_store.determine_wants_all
706 715
707 716 if refs:
708 717 refs: list[bytes] = [ascii_bytes(x) for x in refs]
709 718
710 719 def determine_wants_requested(_remote_refs):
711 720 determined = []
712 721 for ref_name, ref_hash in _remote_refs.items():
713 722 bytes_ref_name = safe_bytes(ref_name)
714 723
715 724 if bytes_ref_name in refs:
716 725 bytes_ref_hash = safe_bytes(ref_hash)
717 726 determined.append(bytes_ref_hash)
718 727 return determined
719 728
720 729 # swap with our custom requested wants
721 730 determine_wants = determine_wants_requested
722 731
723 732 try:
724 733 remote_refs = client.fetch(
725 734 path=url, target=repo, determine_wants=determine_wants)
726 735
727 736 except NotGitRepository as e:
728 737 log.warning(
729 738 'Trying to fetch from "%s" failed, not a Git repository.', url)
730 739 # Exception can contain unicode which we convert
731 740 raise exceptions.AbortException(e)(repr(e))
732 741
733 742 # mikhail: client.fetch() returns all the remote refs, but fetches only
734 743 # refs filtered by `determine_wants` function. We need to filter result
735 744 # as well
736 745 if refs:
737 746 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
738 747
739 748 if apply_refs:
740 749 # TODO: johbo: Needs proper test coverage with a git repository
741 750 # that contains a tag object, so that we would end up with
742 751 # a peeled ref at this point.
743 752 for k in remote_refs:
744 753 if k.endswith(PEELED_REF_MARKER):
745 754 log.debug("Skipping peeled reference %s", k)
746 755 continue
747 756 repo[k] = remote_refs[k]
748 757
749 758 if refs and not update_after:
750 759 # update to ref
751 760 # mikhail: explicitly set the head to the last ref.
752 761 update_to_ref = refs[-1]
753 762 if isinstance(update_after, str):
754 763 update_to_ref = update_after
755 764
756 765 repo[HEAD_MARKER] = remote_refs[update_to_ref]
757 766
758 767 if update_after:
759 768 # we want to check out HEAD
760 769 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
761 770 index.build_index_from_tree(repo.path, repo.index_path(),
762 771 repo.object_store, repo[HEAD_MARKER].tree)
763 772
764 773 if isinstance(remote_refs, FetchPackResult):
765 774 return remote_refs.refs
766 775 return remote_refs
767 776
768 777 @reraise_safe_exceptions
769 778 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
770 779 self._factory.repo(wire)
771 780 if refs and not isinstance(refs, (list, tuple)):
772 781 refs = [refs]
773 782
774 783 config = self._wire_to_config(wire)
775 784 # get all remote refs we'll use to fetch later
776 785 cmd = ['ls-remote']
777 786 if not all_refs:
778 787 cmd += ['--heads', '--tags']
779 788 cmd += [url]
780 789 output, __ = self.run_git_command(
781 790 wire, cmd, fail_on_stderr=False,
782 791 _copts=self._remote_conf(config),
783 792 extra_env={'GIT_TERMINAL_PROMPT': '0'})
784 793
785 794 remote_refs = collections.OrderedDict()
786 795 fetch_refs = []
787 796
788 797 for ref_line in output.splitlines():
789 798 sha, ref = ref_line.split(b'\t')
790 799 sha = sha.strip()
791 800 if ref in remote_refs:
792 801 # duplicate, skip
793 802 continue
794 803 if ref.endswith(PEELED_REF_MARKER):
795 804 log.debug("Skipping peeled reference %s", ref)
796 805 continue
797 806 # don't sync HEAD
798 807 if ref in [HEAD_MARKER]:
799 808 continue
800 809
801 810 remote_refs[ref] = sha
802 811
803 812 if refs and sha in refs:
804 813 # we filter fetch using our specified refs
805 814 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
806 815 elif not refs:
807 816 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
808 817 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
809 818
810 819 if fetch_refs:
811 820 for chunk in more_itertools.chunked(fetch_refs, 128):
812 821 fetch_refs_chunks = list(chunk)
813 822 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
814 823 self.run_git_command(
815 824 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
816 825 fail_on_stderr=False,
817 826 _copts=self._remote_conf(config),
818 827 extra_env={'GIT_TERMINAL_PROMPT': '0'})
819 828 if kwargs.get('sync_large_objects'):
820 829 self.run_git_command(
821 830 wire, ['lfs', 'fetch', url, '--all'],
822 831 fail_on_stderr=False,
823 832 _copts=self._remote_conf(config),
824 833 )
825 834
826 835 return remote_refs
827 836
828 837 @reraise_safe_exceptions
829 838 def sync_push(self, wire, url, refs=None, **kwargs):
830 839 if not self.check_url(url, wire):
831 840 return
832 841 config = self._wire_to_config(wire)
833 842 self._factory.repo(wire)
834 843 self.run_git_command(
835 844 wire, ['push', url, '--mirror'], fail_on_stderr=False,
836 845 _copts=self._remote_conf(config),
837 846 extra_env={'GIT_TERMINAL_PROMPT': '0'})
838 847 if kwargs.get('sync_large_objects'):
839 848 self.run_git_command(
840 849 wire, ['lfs', 'push', url, '--all'],
841 850 fail_on_stderr=False,
842 851 _copts=self._remote_conf(config),
843 852 )
844 853
845 854 @reraise_safe_exceptions
846 855 def get_remote_refs(self, wire, url):
847 856 repo = Repo(url)
848 857 return repo.get_refs()
849 858
850 859 @reraise_safe_exceptions
851 860 def get_description(self, wire):
852 861 repo = self._factory.repo(wire)
853 862 return repo.get_description()
854 863
855 864 @reraise_safe_exceptions
856 865 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
857 866 origin_repo_path = wire['path']
858 867 repo = self._factory.repo(wire)
859 868 # fetch from other_repo_path to our origin repo
860 869 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
861 870
862 871 wire_remote = wire.copy()
863 872 wire_remote['path'] = other_repo_path
864 873 repo_remote = self._factory.repo(wire_remote)
865 874
866 875 # fetch from origin_repo_path to our remote repo
867 876 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
868 877
869 878 revs = [
870 879 x.commit.id
871 880 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
872 881 return revs
873 882
874 883 @reraise_safe_exceptions
884 def get_common_ancestor(self, wire, rev1, rev2):
885 repo_init = self._factory.repo_libgit2(wire)
886 with repo_init as repo:
887 ancestor_id = repo.merge_base(rev1, rev2)
888
889 return str(ancestor_id)
890
891 @reraise_safe_exceptions
875 892 def get_object(self, wire, sha, maybe_unreachable=False):
876 893 cache_on, context_uid, repo_id = self._cache_on(wire)
877 894 region = self._region(wire)
878 895
879 896 @region.conditional_cache_on_arguments(condition=cache_on)
880 897 def _get_object(_context_uid, _repo_id, _sha):
881 898 repo_init = self._factory.repo_libgit2(wire)
882 899 with repo_init as repo:
883 900
884 901 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
885 902 try:
886 903 commit = repo.revparse_single(sha)
887 904 except KeyError:
888 905 # NOTE(marcink): KeyError doesn't give us any meaningful information
889 906 # here, we instead give something more explicit
890 907 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
891 908 raise exceptions.LookupException(e)(missing_commit_err)
892 909 except ValueError as e:
893 910 raise exceptions.LookupException(e)(missing_commit_err)
894 911
895 912 is_tag = False
896 913 if isinstance(commit, pygit2.Tag):
897 914 commit = repo.get(commit.target)
898 915 is_tag = True
899 916
900 917 check_dangling = True
901 918 if is_tag:
902 919 check_dangling = False
903 920
904 921 if check_dangling and maybe_unreachable:
905 922 check_dangling = False
906 923
907 924 # we used a reference and it parsed means we're not having a dangling commit
908 925 if sha != commit.hex:
909 926 check_dangling = False
910 927
911 928 if check_dangling:
912 929 # check for dangling commit
913 930 for branch in repo.branches.with_commit(commit.hex):
914 931 if branch:
915 932 break
916 933 else:
917 934 # NOTE(marcink): Empty error doesn't give us any meaningful information
918 935 # here, we instead give something more explicit
919 936 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
920 937 raise exceptions.LookupException(e)(missing_commit_err)
921 938
922 939 commit_id = commit.hex
923 940 type_str = commit.type_str
924 941
925 942 return {
926 943 'id': commit_id,
927 944 'type': type_str,
928 945 'commit_id': commit_id,
929 946 'idx': 0
930 947 }
931 948
932 949 return _get_object(context_uid, repo_id, sha)
933 950
934 951 @reraise_safe_exceptions
935 952 def get_refs(self, wire):
936 953 cache_on, context_uid, repo_id = self._cache_on(wire)
937 954 region = self._region(wire)
938 955
939 956 @region.conditional_cache_on_arguments(condition=cache_on)
940 957 def _get_refs(_context_uid, _repo_id):
941 958
942 959 repo_init = self._factory.repo_libgit2(wire)
943 960 with repo_init as repo:
944 961 regex = re.compile('^refs/(heads|tags)/')
945 962 return {x.name: x.target.hex for x in
946 963 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
947 964
948 965 return _get_refs(context_uid, repo_id)
949 966
950 967 @reraise_safe_exceptions
951 968 def get_branch_pointers(self, wire):
952 969 cache_on, context_uid, repo_id = self._cache_on(wire)
953 970 region = self._region(wire)
954 971
955 972 @region.conditional_cache_on_arguments(condition=cache_on)
956 973 def _get_branch_pointers(_context_uid, _repo_id):
957 974
958 975 repo_init = self._factory.repo_libgit2(wire)
959 976 regex = re.compile('^refs/heads')
960 977 with repo_init as repo:
961 978 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
962 979 return {x.target.hex: x.shorthand for x in branches}
963 980
964 981 return _get_branch_pointers(context_uid, repo_id)
965 982
966 983 @reraise_safe_exceptions
967 984 def head(self, wire, show_exc=True):
968 985 cache_on, context_uid, repo_id = self._cache_on(wire)
969 986 region = self._region(wire)
970 987
971 988 @region.conditional_cache_on_arguments(condition=cache_on)
972 989 def _head(_context_uid, _repo_id, _show_exc):
973 990 repo_init = self._factory.repo_libgit2(wire)
974 991 with repo_init as repo:
975 992 try:
976 993 return repo.head.peel().hex
977 994 except Exception:
978 995 if show_exc:
979 996 raise
980 997 return _head(context_uid, repo_id, show_exc)
981 998
982 999 @reraise_safe_exceptions
983 1000 def init(self, wire):
984 1001 repo_path = safe_str(wire['path'])
985 1002 os.makedirs(repo_path, mode=0o755)
986 1003 pygit2.init_repository(repo_path, bare=False)
987 1004
988 1005 @reraise_safe_exceptions
989 1006 def init_bare(self, wire):
990 1007 repo_path = safe_str(wire['path'])
991 1008 os.makedirs(repo_path, mode=0o755)
992 1009 pygit2.init_repository(repo_path, bare=True)
993 1010
994 1011 @reraise_safe_exceptions
995 1012 def revision(self, wire, rev):
996 1013
997 1014 cache_on, context_uid, repo_id = self._cache_on(wire)
998 1015 region = self._region(wire)
999 1016
1000 1017 @region.conditional_cache_on_arguments(condition=cache_on)
1001 1018 def _revision(_context_uid, _repo_id, _rev):
1002 1019 repo_init = self._factory.repo_libgit2(wire)
1003 1020 with repo_init as repo:
1004 1021 commit = repo[rev]
1005 1022 obj_data = {
1006 1023 'id': commit.id.hex,
1007 1024 }
1008 1025 # tree objects itself don't have tree_id attribute
1009 1026 if hasattr(commit, 'tree_id'):
1010 1027 obj_data['tree'] = commit.tree_id.hex
1011 1028
1012 1029 return obj_data
1013 1030 return _revision(context_uid, repo_id, rev)
1014 1031
1015 1032 @reraise_safe_exceptions
1016 1033 def date(self, wire, commit_id):
1017 1034 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 1035 region = self._region(wire)
1019 1036
1020 1037 @region.conditional_cache_on_arguments(condition=cache_on)
1021 1038 def _date(_repo_id, _commit_id):
1022 1039 repo_init = self._factory.repo_libgit2(wire)
1023 1040 with repo_init as repo:
1024 1041 commit = repo[commit_id]
1025 1042
1026 1043 if hasattr(commit, 'commit_time'):
1027 1044 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1028 1045 else:
1029 1046 commit = commit.get_object()
1030 1047 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1031 1048
1032 1049 # TODO(marcink): check dulwich difference of offset vs timezone
1033 1050 return [commit_time, commit_time_offset]
1034 1051 return _date(repo_id, commit_id)
1035 1052
1036 1053 @reraise_safe_exceptions
1037 1054 def author(self, wire, commit_id):
1038 1055 cache_on, context_uid, repo_id = self._cache_on(wire)
1039 1056 region = self._region(wire)
1040 1057
1041 1058 @region.conditional_cache_on_arguments(condition=cache_on)
1042 1059 def _author(_repo_id, _commit_id):
1043 1060 repo_init = self._factory.repo_libgit2(wire)
1044 1061 with repo_init as repo:
1045 1062 commit = repo[commit_id]
1046 1063
1047 1064 if hasattr(commit, 'author'):
1048 1065 author = commit.author
1049 1066 else:
1050 1067 author = commit.get_object().author
1051 1068
1052 1069 if author.email:
1053 1070 return f"{author.name} <{author.email}>"
1054 1071
1055 1072 try:
1056 1073 return f"{author.name}"
1057 1074 except Exception:
1058 1075 return f"{safe_str(author.raw_name)}"
1059 1076
1060 1077 return _author(repo_id, commit_id)
1061 1078
1062 1079 @reraise_safe_exceptions
1063 1080 def message(self, wire, commit_id):
1064 1081 cache_on, context_uid, repo_id = self._cache_on(wire)
1065 1082 region = self._region(wire)
1066 1083
1067 1084 @region.conditional_cache_on_arguments(condition=cache_on)
1068 1085 def _message(_repo_id, _commit_id):
1069 1086 repo_init = self._factory.repo_libgit2(wire)
1070 1087 with repo_init as repo:
1071 1088 commit = repo[commit_id]
1072 1089 return commit.message
1073 1090 return _message(repo_id, commit_id)
1074 1091
1075 1092 @reraise_safe_exceptions
1076 1093 def parents(self, wire, commit_id):
1077 1094 cache_on, context_uid, repo_id = self._cache_on(wire)
1078 1095 region = self._region(wire)
1079 1096
1080 1097 @region.conditional_cache_on_arguments(condition=cache_on)
1081 1098 def _parents(_repo_id, _commit_id):
1082 1099 repo_init = self._factory.repo_libgit2(wire)
1083 1100 with repo_init as repo:
1084 1101 commit = repo[commit_id]
1085 1102 if hasattr(commit, 'parent_ids'):
1086 1103 parent_ids = commit.parent_ids
1087 1104 else:
1088 1105 parent_ids = commit.get_object().parent_ids
1089 1106
1090 1107 return [x.hex for x in parent_ids]
1091 1108 return _parents(repo_id, commit_id)
1092 1109
1093 1110 @reraise_safe_exceptions
1094 1111 def children(self, wire, commit_id):
1095 1112 cache_on, context_uid, repo_id = self._cache_on(wire)
1096 1113 region = self._region(wire)
1097 1114
1098 1115 head = self.head(wire)
1099 1116
1100 1117 @region.conditional_cache_on_arguments(condition=cache_on)
1101 1118 def _children(_repo_id, _commit_id):
1102 1119
1103 1120 output, __ = self.run_git_command(
1104 1121 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1105 1122
1106 1123 child_ids = []
1107 1124 pat = re.compile(fr'^{commit_id}')
1108 1125 for line in output.splitlines():
1109 1126 line = safe_str(line)
1110 1127 if pat.match(line):
1111 1128 found_ids = line.split(' ')[1:]
1112 1129 child_ids.extend(found_ids)
1113 1130 break
1114 1131
1115 1132 return child_ids
1116 1133 return _children(repo_id, commit_id)
1117 1134
1118 1135 @reraise_safe_exceptions
1119 1136 def set_refs(self, wire, key, value):
1120 1137 repo_init = self._factory.repo_libgit2(wire)
1121 1138 with repo_init as repo:
1122 1139 repo.references.create(key, value, force=True)
1123 1140
1124 1141 @reraise_safe_exceptions
1125 1142 def update_refs(self, wire, key, value):
1126 1143 repo_init = self._factory.repo_libgit2(wire)
1127 1144 with repo_init as repo:
1128 1145 if key not in repo.references:
1129 1146 raise ValueError(f'Reference {key} not found in the repository')
1130 1147 repo.references.create(key, value, force=True)
1131 1148
1132 1149 @reraise_safe_exceptions
1133 1150 def create_branch(self, wire, branch_name, commit_id, force=False):
1134 1151 repo_init = self._factory.repo_libgit2(wire)
1135 1152 with repo_init as repo:
1136 1153 if commit_id:
1137 1154 commit = repo[commit_id]
1138 1155 else:
1139 1156 # if commit is not given just use the HEAD
1140 1157 commit = repo.head()
1141 1158
1142 1159 if force:
1143 1160 repo.branches.local.create(branch_name, commit, force=force)
1144 1161 elif not repo.branches.get(branch_name):
1145 1162 # create only if that branch isn't existing
1146 1163 repo.branches.local.create(branch_name, commit, force=force)
1147 1164
1148 1165 @reraise_safe_exceptions
1149 1166 def remove_ref(self, wire, key):
1150 1167 repo_init = self._factory.repo_libgit2(wire)
1151 1168 with repo_init as repo:
1152 1169 repo.references.delete(key)
1153 1170
1154 1171 @reraise_safe_exceptions
1155 1172 def tag_remove(self, wire, tag_name):
1156 1173 repo_init = self._factory.repo_libgit2(wire)
1157 1174 with repo_init as repo:
1158 1175 key = f'refs/tags/{tag_name}'
1159 1176 repo.references.delete(key)
1160 1177
1161 1178 @reraise_safe_exceptions
1162 1179 def tree_changes(self, wire, source_id, target_id):
1163 1180 repo = self._factory.repo(wire)
1164 1181 # source can be empty
1165 1182 source_id = safe_bytes(source_id if source_id else b'')
1166 1183 target_id = safe_bytes(target_id)
1167 1184
1168 1185 source = repo[source_id].tree if source_id else None
1169 1186 target = repo[target_id].tree
1170 1187 result = repo.object_store.tree_changes(source, target)
1171 1188
1172 1189 added = set()
1173 1190 modified = set()
1174 1191 deleted = set()
1175 1192 for (old_path, new_path), (_, _), (_, _) in list(result):
1176 1193 if new_path and old_path:
1177 1194 modified.add(new_path)
1178 1195 elif new_path and not old_path:
1179 1196 added.add(new_path)
1180 1197 elif not new_path and old_path:
1181 1198 deleted.add(old_path)
1182 1199
1183 1200 return list(added), list(modified), list(deleted)
1184 1201
1185 1202 @reraise_safe_exceptions
1186 1203 def tree_and_type_for_path(self, wire, commit_id, path):
1187 1204
1188 1205 cache_on, context_uid, repo_id = self._cache_on(wire)
1189 1206 region = self._region(wire)
1190 1207
1191 1208 @region.conditional_cache_on_arguments(condition=cache_on)
1192 1209 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1193 1210 repo_init = self._factory.repo_libgit2(wire)
1194 1211
1195 1212 with repo_init as repo:
1196 1213 commit = repo[commit_id]
1197 1214 try:
1198 1215 tree = commit.tree[path]
1199 1216 except KeyError:
1200 1217 return None, None, None
1201 1218
1202 1219 return tree.id.hex, tree.type_str, tree.filemode
1203 1220 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1204 1221
1205 1222 @reraise_safe_exceptions
1206 1223 def tree_items(self, wire, tree_id):
1207 1224 cache_on, context_uid, repo_id = self._cache_on(wire)
1208 1225 region = self._region(wire)
1209 1226
1210 1227 @region.conditional_cache_on_arguments(condition=cache_on)
1211 1228 def _tree_items(_repo_id, _tree_id):
1212 1229
1213 1230 repo_init = self._factory.repo_libgit2(wire)
1214 1231 with repo_init as repo:
1215 1232 try:
1216 1233 tree = repo[tree_id]
1217 1234 except KeyError:
1218 1235 raise ObjectMissing(f'No tree with id: {tree_id}')
1219 1236
1220 1237 result = []
1221 1238 for item in tree:
1222 1239 item_sha = item.hex
1223 1240 item_mode = item.filemode
1224 1241 item_type = item.type_str
1225 1242
1226 1243 if item_type == 'commit':
1227 1244 # NOTE(marcink): submodules we translate to 'link' for backward compat
1228 1245 item_type = 'link'
1229 1246
1230 1247 result.append((item.name, item_mode, item_sha, item_type))
1231 1248 return result
1232 1249 return _tree_items(repo_id, tree_id)
1233 1250
1234 1251 @reraise_safe_exceptions
1235 1252 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1236 1253 """
1237 1254 Old version that uses subprocess to call diff
1238 1255 """
1239 1256
1240 1257 flags = [
1241 1258 f'-U{context}', '--patch',
1242 1259 '--binary',
1243 1260 '--find-renames',
1244 1261 '--no-indent-heuristic',
1245 1262 # '--indent-heuristic',
1246 1263 #'--full-index',
1247 1264 #'--abbrev=40'
1248 1265 ]
1249 1266
1250 1267 if opt_ignorews:
1251 1268 flags.append('--ignore-all-space')
1252 1269
1253 1270 if commit_id_1 == self.EMPTY_COMMIT:
1254 1271 cmd = ['show'] + flags + [commit_id_2]
1255 1272 else:
1256 1273 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1257 1274
1258 1275 if file_filter:
1259 1276 cmd.extend(['--', file_filter])
1260 1277
1261 1278 diff, __ = self.run_git_command(wire, cmd)
1262 1279 # If we used 'show' command, strip first few lines (until actual diff
1263 1280 # starts)
1264 1281 if commit_id_1 == self.EMPTY_COMMIT:
1265 1282 lines = diff.splitlines()
1266 1283 x = 0
1267 1284 for line in lines:
1268 1285 if line.startswith(b'diff'):
1269 1286 break
1270 1287 x += 1
1271 1288 # Append new line just like 'diff' command do
1272 1289 diff = '\n'.join(lines[x:]) + '\n'
1273 1290 return diff
1274 1291
1275 1292 @reraise_safe_exceptions
1276 1293 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1277 1294 repo_init = self._factory.repo_libgit2(wire)
1278 1295
1279 1296 with repo_init as repo:
1280 1297 swap = True
1281 1298 flags = 0
1282 1299 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1283 1300
1284 1301 if opt_ignorews:
1285 1302 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1286 1303
1287 1304 if commit_id_1 == self.EMPTY_COMMIT:
1288 1305 comm1 = repo[commit_id_2]
1289 1306 diff_obj = comm1.tree.diff_to_tree(
1290 1307 flags=flags, context_lines=context, swap=swap)
1291 1308
1292 1309 else:
1293 1310 comm1 = repo[commit_id_2]
1294 1311 comm2 = repo[commit_id_1]
1295 1312 diff_obj = comm1.tree.diff_to_tree(
1296 1313 comm2.tree, flags=flags, context_lines=context, swap=swap)
1297 1314 similar_flags = 0
1298 1315 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1299 1316 diff_obj.find_similar(flags=similar_flags)
1300 1317
1301 1318 if file_filter:
1302 1319 for p in diff_obj:
1303 1320 if p.delta.old_file.path == file_filter:
1304 1321 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1305 1322 # fo matching path == no diff
1306 1323 return BytesEnvelope(b'')
1307 1324
1308 1325 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1309 1326
1310 1327 @reraise_safe_exceptions
1311 1328 def node_history(self, wire, commit_id, path, limit):
1312 1329 cache_on, context_uid, repo_id = self._cache_on(wire)
1313 1330 region = self._region(wire)
1314 1331
1315 1332 @region.conditional_cache_on_arguments(condition=cache_on)
1316 1333 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1317 1334 # optimize for n==1, rev-list is much faster for that use-case
1318 1335 if limit == 1:
1319 1336 cmd = ['rev-list', '-1', commit_id, '--', path]
1320 1337 else:
1321 1338 cmd = ['log']
1322 1339 if limit:
1323 1340 cmd.extend(['-n', str(safe_int(limit, 0))])
1324 1341 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1325 1342
1326 1343 output, __ = self.run_git_command(wire, cmd)
1327 1344 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1328 1345
1329 1346 return [x for x in commit_ids]
1330 1347 return _node_history(context_uid, repo_id, commit_id, path, limit)
1331 1348
1332 1349 @reraise_safe_exceptions
1333 1350 def node_annotate_legacy(self, wire, commit_id, path):
1334 1351 # note: replaced by pygit2 implementation
1335 1352 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1336 1353 # -l ==> outputs long shas (and we need all 40 characters)
1337 1354 # --root ==> doesn't put '^' character for boundaries
1338 1355 # -r commit_id ==> blames for the given commit
1339 1356 output, __ = self.run_git_command(wire, cmd)
1340 1357
1341 1358 result = []
1342 1359 for i, blame_line in enumerate(output.splitlines()[:-1]):
1343 1360 line_no = i + 1
1344 1361 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1345 1362 result.append((line_no, blame_commit_id, line))
1346 1363
1347 1364 return result
1348 1365
1349 1366 @reraise_safe_exceptions
1350 1367 def node_annotate(self, wire, commit_id, path):
1351 1368
1352 1369 result_libgit = []
1353 1370 repo_init = self._factory.repo_libgit2(wire)
1354 1371 with repo_init as repo:
1355 1372 commit = repo[commit_id]
1356 1373 blame_obj = repo.blame(path, newest_commit=commit_id)
1357 1374 file_content = commit.tree[path].data
1358 1375 for i, line in enumerate(splitnewlines(file_content)):
1359 1376 line_no = i + 1
1360 1377 hunk = blame_obj.for_line(line_no)
1361 1378 blame_commit_id = hunk.final_commit_id.hex
1362 1379
1363 1380 result_libgit.append((line_no, blame_commit_id, line))
1364 1381
1365 1382 return BinaryEnvelope(result_libgit)
1366 1383
1367 1384 @reraise_safe_exceptions
1368 1385 def update_server_info(self, wire, force=False):
1369 1386 cmd = ['update-server-info']
1370 1387 if force:
1371 1388 cmd += ['--force']
1372 1389 output, __ = self.run_git_command(wire, cmd)
1373 1390 return output.splitlines()
1374 1391
1375 1392 @reraise_safe_exceptions
1376 1393 def get_all_commit_ids(self, wire):
1377 1394
1378 1395 cache_on, context_uid, repo_id = self._cache_on(wire)
1379 1396 region = self._region(wire)
1380 1397
1381 1398 @region.conditional_cache_on_arguments(condition=cache_on)
1382 1399 def _get_all_commit_ids(_context_uid, _repo_id):
1383 1400
1384 1401 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1385 1402 try:
1386 1403 output, __ = self.run_git_command(wire, cmd)
1387 1404 return output.splitlines()
1388 1405 except Exception:
1389 1406 # Can be raised for empty repositories
1390 1407 return []
1391 1408
1392 1409 @region.conditional_cache_on_arguments(condition=cache_on)
1393 1410 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1394 1411 repo_init = self._factory.repo_libgit2(wire)
1395 1412 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1396 1413 results = []
1397 1414 with repo_init as repo:
1398 1415 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1399 1416 results.append(commit.id.hex)
1400 1417
1401 1418 return _get_all_commit_ids(context_uid, repo_id)
1402 1419
1403 1420 @reraise_safe_exceptions
1404 1421 def run_git_command(self, wire, cmd, **opts):
1405 1422 path = wire.get('path', None)
1406 1423 debug_mode = vcsserver.ConfigGet().get_bool('debug')
1407 1424
1408 1425 if path and os.path.isdir(path):
1409 1426 opts['cwd'] = path
1410 1427
1411 1428 if '_bare' in opts:
1412 1429 _copts = []
1413 1430 del opts['_bare']
1414 1431 else:
1415 1432 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1416 1433 safe_call = False
1417 1434 if '_safe' in opts:
1418 1435 # no exc on failure
1419 1436 del opts['_safe']
1420 1437 safe_call = True
1421 1438
1422 1439 if '_copts' in opts:
1423 1440 _copts.extend(opts['_copts'] or [])
1424 1441 del opts['_copts']
1425 1442
1426 1443 gitenv = os.environ.copy()
1427 1444 gitenv.update(opts.pop('extra_env', {}))
1428 1445 # need to clean fix GIT_DIR !
1429 1446 if 'GIT_DIR' in gitenv:
1430 1447 del gitenv['GIT_DIR']
1431 1448 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1432 1449 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1433 1450
1434 1451 cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
1435 1452 _opts = {'env': gitenv, 'shell': False}
1436 1453
1437 1454 proc = None
1438 1455 try:
1439 1456 _opts.update(opts)
1440 1457 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1441 1458
1442 1459 return b''.join(proc), b''.join(proc.stderr)
1443 1460 except OSError as err:
1444 1461 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1445 1462 call_opts = {}
1446 1463 if debug_mode:
1447 1464 call_opts = _opts
1448 1465
1449 1466 tb_err = ("Couldn't run git command ({}).\n"
1450 1467 "Original error was:{}\n"
1451 1468 "Call options:{}\n"
1452 1469 .format(cmd, err, call_opts))
1453 1470 log.exception(tb_err)
1454 1471 if safe_call:
1455 1472 return '', err
1456 1473 else:
1457 1474 raise exceptions.VcsException()(tb_err)
1458 1475 finally:
1459 1476 if proc:
1460 1477 proc.close()
1461 1478
1462 1479 @reraise_safe_exceptions
1463 1480 def install_hooks(self, wire, force=False):
1464 1481 from vcsserver.hook_utils import install_git_hooks
1465 1482 bare = self.bare(wire)
1466 1483 path = wire['path']
1467 1484 binary_dir = settings.BINARY_DIR
1468 1485 if binary_dir:
1469 1486 os.path.join(binary_dir, 'python3')
1470 1487 return install_git_hooks(path, bare, force_create=force)
1471 1488
1472 1489 @reraise_safe_exceptions
1473 1490 def get_hooks_info(self, wire):
1474 1491 from vcsserver.hook_utils import (
1475 1492 get_git_pre_hook_version, get_git_post_hook_version)
1476 1493 bare = self.bare(wire)
1477 1494 path = wire['path']
1478 1495 return {
1479 1496 'pre_version': get_git_pre_hook_version(path, bare),
1480 1497 'post_version': get_git_post_hook_version(path, bare),
1481 1498 }
1482 1499
1483 1500 @reraise_safe_exceptions
1484 1501 def set_head_ref(self, wire, head_name):
1485 1502 log.debug('Setting refs/head to `%s`', head_name)
1486 1503 repo_init = self._factory.repo_libgit2(wire)
1487 1504 with repo_init as repo:
1488 1505 repo.set_head(f'refs/heads/{head_name}')
1489 1506
1490 1507 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1491 1508
1492 1509 @reraise_safe_exceptions
1493 1510 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1494 1511 archive_dir_name, commit_id, cache_config):
1495 1512
1496 1513 def file_walker(_commit_id, path):
1497 1514 repo_init = self._factory.repo_libgit2(wire)
1498 1515
1499 1516 with repo_init as repo:
1500 1517 commit = repo[commit_id]
1501 1518
1502 1519 if path in ['', '/']:
1503 1520 tree = commit.tree
1504 1521 else:
1505 1522 tree = commit.tree[path.rstrip('/')]
1506 1523 tree_id = tree.id.hex
1507 1524 try:
1508 1525 tree = repo[tree_id]
1509 1526 except KeyError:
1510 1527 raise ObjectMissing(f'No tree with id: {tree_id}')
1511 1528
1512 1529 index = LibGit2Index.Index()
1513 1530 index.read_tree(tree)
1514 1531 file_iter = index
1515 1532
1516 1533 for file_node in file_iter:
1517 1534 file_path = file_node.path
1518 1535 mode = file_node.mode
1519 1536 is_link = stat.S_ISLNK(mode)
1520 1537 if mode == pygit2.GIT_FILEMODE_COMMIT:
1521 1538 log.debug('Skipping path %s as a commit node', file_path)
1522 1539 continue
1523 1540 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1524 1541
1525 1542 return store_archive_in_cache(
1526 1543 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now