##// END OF EJS Templates
fix(LFS): added git lfs push --all <GIT-URL> option into sync_push. Fixes: RCCE-9
ilin.s -
r1195:a578d58d default
parent child Browse files
Show More
@@ -1,1511 +1,1525 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 import rhodecode
43 43 from vcsserver import exceptions, settings, subprocessio
44 44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 46 from vcsserver.hgcompat import (
47 47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 48 from vcsserver.git_lfs.lib import LFSOidStore
49 49 from vcsserver.vcs_base import RemoteBase
50 50
51 51 DIR_STAT = stat.S_IFDIR
52 52 FILE_MODE = stat.S_IFMT
53 53 GIT_LINK = objects.S_IFGITLINK
54 54 PEELED_REF_MARKER = b'^{}'
55 55 HEAD_MARKER = b'HEAD'
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def reraise_safe_exceptions(func):
61 61 """Converts Dulwich exceptions to something neutral."""
62 62
63 63 @wraps(func)
64 64 def wrapper(*args, **kwargs):
65 65 try:
66 66 return func(*args, **kwargs)
67 67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 68 exc = exceptions.LookupException(org_exc=e)
69 69 raise exc(safe_str(e))
70 70 except (HangupException, UnexpectedCommandError) as e:
71 71 exc = exceptions.VcsException(org_exc=e)
72 72 raise exc(safe_str(e))
73 73 except Exception:
74 74 # NOTE(marcink): because of how dulwich handles some exceptions
75 75 # (KeyError on empty repos), we cannot track this and catch all
76 76 # exceptions, it's an exceptions from other handlers
77 77 #if not hasattr(e, '_vcs_kind'):
78 78 #log.exception("Unhandled exception in git remote call")
79 79 #raise_from_original(exceptions.UnhandledException)
80 80 raise
81 81 return wrapper
82 82
83 83
84 84 class Repo(DulwichRepo):
85 85 """
86 86 A wrapper for dulwich Repo class.
87 87
88 88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 89 "Too many open files" error. We need to close all opened file descriptors
90 90 once the repo object is destroyed.
91 91 """
92 92 def __del__(self):
93 93 if hasattr(self, 'object_store'):
94 94 self.close()
95 95
96 96
97 97 class Repository(LibGit2Repo):
98 98
99 99 def __enter__(self):
100 100 return self
101 101
102 102 def __exit__(self, exc_type, exc_val, exc_tb):
103 103 self.free()
104 104
105 105
106 106 class GitFactory(RepoFactory):
107 107 repo_type = 'git'
108 108
109 109 def _create_repo(self, wire, create, use_libgit2=False):
110 110 if use_libgit2:
111 111 repo = Repository(safe_bytes(wire['path']))
112 112 else:
113 113 # dulwich mode
114 114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 115 repo = Repo(repo_path)
116 116
117 117 log.debug('repository created: got GIT object: %s', repo)
118 118 return repo
119 119
120 120 def repo(self, wire, create=False, use_libgit2=False):
121 121 """
122 122 Get a repository instance for the given path.
123 123 """
124 124 return self._create_repo(wire, create, use_libgit2)
125 125
126 126 def repo_libgit2(self, wire):
127 127 return self.repo(wire, use_libgit2=True)
128 128
129 129
130 130 def create_signature_from_string(author_str, **kwargs):
131 131 """
132 132 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
133 133
134 134 :param author_str: String of the format 'Name <email>'
135 135 :return: pygit2.Signature object
136 136 """
137 137 match = re.match(r'^(.+) <(.+)>$', author_str)
138 138 if match is None:
139 139 raise ValueError(f"Invalid format: {author_str}")
140 140
141 141 name, email = match.groups()
142 142 return pygit2.Signature(name, email, **kwargs)
143 143
144 144
145 145 def get_obfuscated_url(url_obj):
146 146 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
147 147 url_obj.query = obfuscate_qs(url_obj.query)
148 148 obfuscated_uri = str(url_obj)
149 149 return obfuscated_uri
150 150
151 151
152 152 class GitRemote(RemoteBase):
153 153
154 154 def __init__(self, factory):
155 155 self._factory = factory
156 156 self._bulk_methods = {
157 157 "date": self.date,
158 158 "author": self.author,
159 159 "branch": self.branch,
160 160 "message": self.message,
161 161 "parents": self.parents,
162 162 "_commit": self.revision,
163 163 }
164 164 self._bulk_file_methods = {
165 165 "size": self.get_node_size,
166 166 "data": self.get_node_data,
167 167 "flags": self.get_node_flags,
168 168 "is_binary": self.get_node_is_binary,
169 169 "md5": self.md5_hash
170 170 }
171 171
172 172 def _wire_to_config(self, wire):
173 173 if 'config' in wire:
174 174 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
175 175 return {}
176 176
177 177 def _remote_conf(self, config):
178 178 params = [
179 179 '-c', 'core.askpass=""',
180 180 ]
181 181 config_attrs = {
182 182 'vcs_ssl_dir': 'http.sslCAinfo={}',
183 183 'vcs_git_lfs_store_location': 'lfs.storage={}'
184 184 }
185 185 for key, param in config_attrs.items():
186 186 if value := config.get(key):
187 187 params.extend(['-c', param.format(value)])
188 188 return params
189 189
190 190 @reraise_safe_exceptions
191 191 def discover_git_version(self):
192 192 stdout, _ = self.run_git_command(
193 193 {}, ['--version'], _bare=True, _safe=True)
194 194 prefix = b'git version'
195 195 if stdout.startswith(prefix):
196 196 stdout = stdout[len(prefix):]
197 197 return safe_str(stdout.strip())
198 198
199 199 @reraise_safe_exceptions
200 200 def is_empty(self, wire):
201 201 repo_init = self._factory.repo_libgit2(wire)
202 202 with repo_init as repo:
203 203
204 204 try:
205 205 has_head = repo.head.name
206 206 if has_head:
207 207 return False
208 208
209 209 # NOTE(marcink): check again using more expensive method
210 210 return repo.is_empty
211 211 except Exception:
212 212 pass
213 213
214 214 return True
215 215
216 216 @reraise_safe_exceptions
217 217 def assert_correct_path(self, wire):
218 218 cache_on, context_uid, repo_id = self._cache_on(wire)
219 219 region = self._region(wire)
220 220
221 221 @region.conditional_cache_on_arguments(condition=cache_on)
222 222 def _assert_correct_path(_context_uid, _repo_id, fast_check):
223 223 if fast_check:
224 224 path = safe_str(wire['path'])
225 225 if pygit2.discover_repository(path):
226 226 return True
227 227 return False
228 228 else:
229 229 try:
230 230 repo_init = self._factory.repo_libgit2(wire)
231 231 with repo_init:
232 232 pass
233 233 except pygit2.GitError:
234 234 path = wire.get('path')
235 235 tb = traceback.format_exc()
236 236 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
237 237 return False
238 238 return True
239 239
240 240 return _assert_correct_path(context_uid, repo_id, True)
241 241
242 242 @reraise_safe_exceptions
243 243 def bare(self, wire):
244 244 repo_init = self._factory.repo_libgit2(wire)
245 245 with repo_init as repo:
246 246 return repo.is_bare
247 247
248 248 @reraise_safe_exceptions
249 249 def get_node_data(self, wire, commit_id, path):
250 250 repo_init = self._factory.repo_libgit2(wire)
251 251 with repo_init as repo:
252 252 commit = repo[commit_id]
253 253 blob_obj = commit.tree[path]
254 254
255 255 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
256 256 raise exceptions.LookupException()(
257 257 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
258 258
259 259 return BytesEnvelope(blob_obj.data)
260 260
261 261 @reraise_safe_exceptions
262 262 def get_node_size(self, wire, commit_id, path):
263 263 repo_init = self._factory.repo_libgit2(wire)
264 264 with repo_init as repo:
265 265 commit = repo[commit_id]
266 266 blob_obj = commit.tree[path]
267 267
268 268 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
269 269 raise exceptions.LookupException()(
270 270 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
271 271
272 272 return blob_obj.size
273 273
274 274 @reraise_safe_exceptions
275 275 def get_node_flags(self, wire, commit_id, path):
276 276 repo_init = self._factory.repo_libgit2(wire)
277 277 with repo_init as repo:
278 278 commit = repo[commit_id]
279 279 blob_obj = commit.tree[path]
280 280
281 281 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
282 282 raise exceptions.LookupException()(
283 283 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
284 284
285 285 return blob_obj.filemode
286 286
287 287 @reraise_safe_exceptions
288 288 def get_node_is_binary(self, wire, commit_id, path):
289 289 repo_init = self._factory.repo_libgit2(wire)
290 290 with repo_init as repo:
291 291 commit = repo[commit_id]
292 292 blob_obj = commit.tree[path]
293 293
294 294 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
295 295 raise exceptions.LookupException()(
296 296 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
297 297
298 298 return blob_obj.is_binary
299 299
300 300 @reraise_safe_exceptions
301 301 def blob_as_pretty_string(self, wire, sha):
302 302 repo_init = self._factory.repo_libgit2(wire)
303 303 with repo_init as repo:
304 304 blob_obj = repo[sha]
305 305 return BytesEnvelope(blob_obj.data)
306 306
307 307 @reraise_safe_exceptions
308 308 def blob_raw_length(self, wire, sha):
309 309 cache_on, context_uid, repo_id = self._cache_on(wire)
310 310 region = self._region(wire)
311 311
312 312 @region.conditional_cache_on_arguments(condition=cache_on)
313 313 def _blob_raw_length(_repo_id, _sha):
314 314
315 315 repo_init = self._factory.repo_libgit2(wire)
316 316 with repo_init as repo:
317 317 blob = repo[sha]
318 318 return blob.size
319 319
320 320 return _blob_raw_length(repo_id, sha)
321 321
322 322 def _parse_lfs_pointer(self, raw_content):
323 323 spec_string = b'version https://git-lfs.github.com/spec'
324 324 if raw_content and raw_content.startswith(spec_string):
325 325
326 326 pattern = re.compile(rb"""
327 327 (?:\n)?
328 328 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
329 329 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
330 330 ^size[ ](?P<oid_size>[0-9]+)\n
331 331 (?:\n)?
332 332 """, re.VERBOSE | re.MULTILINE)
333 333 match = pattern.match(raw_content)
334 334 if match:
335 335 return match.groupdict()
336 336
337 337 return {}
338 338
339 339 @reraise_safe_exceptions
340 340 def is_large_file(self, wire, commit_id):
341 341 cache_on, context_uid, repo_id = self._cache_on(wire)
342 342 region = self._region(wire)
343 343
344 344 @region.conditional_cache_on_arguments(condition=cache_on)
345 345 def _is_large_file(_repo_id, _sha):
346 346 repo_init = self._factory.repo_libgit2(wire)
347 347 with repo_init as repo:
348 348 blob = repo[commit_id]
349 349 if blob.is_binary:
350 350 return {}
351 351
352 352 return self._parse_lfs_pointer(blob.data)
353 353
354 354 return _is_large_file(repo_id, commit_id)
355 355
356 356 @reraise_safe_exceptions
357 357 def is_binary(self, wire, tree_id):
358 358 cache_on, context_uid, repo_id = self._cache_on(wire)
359 359 region = self._region(wire)
360 360
361 361 @region.conditional_cache_on_arguments(condition=cache_on)
362 362 def _is_binary(_repo_id, _tree_id):
363 363 repo_init = self._factory.repo_libgit2(wire)
364 364 with repo_init as repo:
365 365 blob_obj = repo[tree_id]
366 366 return blob_obj.is_binary
367 367
368 368 return _is_binary(repo_id, tree_id)
369 369
370 370 @reraise_safe_exceptions
371 371 def md5_hash(self, wire, commit_id, path):
372 372 cache_on, context_uid, repo_id = self._cache_on(wire)
373 373 region = self._region(wire)
374 374
375 375 @region.conditional_cache_on_arguments(condition=cache_on)
376 376 def _md5_hash(_repo_id, _commit_id, _path):
377 377 repo_init = self._factory.repo_libgit2(wire)
378 378 with repo_init as repo:
379 379 commit = repo[_commit_id]
380 380 blob_obj = commit.tree[_path]
381 381
382 382 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
383 383 raise exceptions.LookupException()(
384 384 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
385 385
386 386 return ''
387 387
388 388 return _md5_hash(repo_id, commit_id, path)
389 389
390 390 @reraise_safe_exceptions
391 391 def in_largefiles_store(self, wire, oid):
392 392 conf = self._wire_to_config(wire)
393 393 repo_init = self._factory.repo_libgit2(wire)
394 394 with repo_init as repo:
395 395 repo_name = repo.path
396 396
397 397 store_location = conf.get('vcs_git_lfs_store_location')
398 398 if store_location:
399 399
400 400 store = LFSOidStore(
401 401 oid=oid, repo=repo_name, store_location=store_location)
402 402 return store.has_oid()
403 403
404 404 return False
405 405
406 406 @reraise_safe_exceptions
407 407 def store_path(self, wire, oid):
408 408 conf = self._wire_to_config(wire)
409 409 repo_init = self._factory.repo_libgit2(wire)
410 410 with repo_init as repo:
411 411 repo_name = repo.path
412 412
413 413 store_location = conf.get('vcs_git_lfs_store_location')
414 414 if store_location:
415 415 store = LFSOidStore(
416 416 oid=oid, repo=repo_name, store_location=store_location)
417 417 return store.oid_path
418 418 raise ValueError(f'Unable to fetch oid with path {oid}')
419 419
420 420 @reraise_safe_exceptions
421 421 def bulk_request(self, wire, rev, pre_load):
422 422 cache_on, context_uid, repo_id = self._cache_on(wire)
423 423 region = self._region(wire)
424 424
425 425 @region.conditional_cache_on_arguments(condition=cache_on)
426 426 def _bulk_request(_repo_id, _rev, _pre_load):
427 427 result = {}
428 428 for attr in pre_load:
429 429 try:
430 430 method = self._bulk_methods[attr]
431 431 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
432 432 args = [wire, rev]
433 433 result[attr] = method(*args)
434 434 except KeyError as e:
435 435 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
436 436 return result
437 437
438 438 return _bulk_request(repo_id, rev, sorted(pre_load))
439 439
440 440 @reraise_safe_exceptions
441 441 def bulk_file_request(self, wire, commit_id, path, pre_load):
442 442 cache_on, context_uid, repo_id = self._cache_on(wire)
443 443 region = self._region(wire)
444 444
445 445 @region.conditional_cache_on_arguments(condition=cache_on)
446 446 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
447 447 result = {}
448 448 for attr in pre_load:
449 449 try:
450 450 method = self._bulk_file_methods[attr]
451 451 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
452 452 result[attr] = method(wire, _commit_id, _path)
453 453 except KeyError as e:
454 454 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
455 455 return result
456 456
457 457 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
458 458
459 459 def _build_opener(self, url: str):
460 460 handlers = []
461 461 url_obj = url_parser(safe_bytes(url))
462 462 authinfo = url_obj.authinfo()[1]
463 463
464 def _convert_to_strings(data):
465 if isinstance(data, bytes):
466 return safe_str(data)
467 elif isinstance(data, tuple):
468 return tuple(_convert_to_strings(item) for item in data)
469 else:
470 return data
471
464 472 if authinfo:
465 473 # create a password manager
466 474 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
467 passmgr.add_password(*authinfo)
475 passmgr.add_password(*_convert_to_strings(authinfo))
468 476
469 477 handlers.extend((httpbasicauthhandler(passmgr),
470 478 httpdigestauthhandler(passmgr)))
471 479
472 480 return urllib.request.build_opener(*handlers)
473 481
474 482 @reraise_safe_exceptions
475 483 def check_url(self, url, config):
476 484 url_obj = url_parser(safe_bytes(url))
477 485
478 486 test_uri = safe_str(url_obj.authinfo()[0])
479 487 obfuscated_uri = get_obfuscated_url(url_obj)
480 488
481 489 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
482 490
483 491 if not test_uri.endswith('info/refs'):
484 492 test_uri = test_uri.rstrip('/') + '/info/refs'
485 493
486 o = self._build_opener(test_uri)
494 o = self._build_opener(url=url)
487 495 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
488 496
489 497 q = {"service": 'git-upload-pack'}
490 498 qs = f'?{urllib.parse.urlencode(q)}'
491 499 cu = f"{test_uri}{qs}"
492 500
493 501 try:
494 502 req = urllib.request.Request(cu, None, {})
495 503 log.debug("Trying to open URL %s", obfuscated_uri)
496 504 resp = o.open(req)
497 505 if resp.code != 200:
498 506 raise exceptions.URLError()('Return Code is not 200')
499 507 except Exception as e:
500 508 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
501 509 # means it cannot be cloned
502 510 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
503 511
504 512 # now detect if it's proper git repo
505 513 gitdata: bytes = resp.read()
506 514
507 515 if b'service=git-upload-pack' in gitdata:
508 516 pass
509 517 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
510 518 # old style git can return some other format!
511 519 pass
512 520 else:
513 521 e = None
514 522 raise exceptions.URLError(e)(
515 523 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
516 524
517 525 return True
518 526
519 527 @reraise_safe_exceptions
520 528 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
521 529 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
522 530 remote_refs = self.pull(wire, url, apply_refs=False)
523 531 repo = self._factory.repo(wire)
524 532 if isinstance(valid_refs, list):
525 533 valid_refs = tuple(valid_refs)
526 534
527 535 for k in remote_refs:
528 536 # only parse heads/tags and skip so called deferred tags
529 537 if k.startswith(valid_refs) and not k.endswith(deferred):
530 538 repo[k] = remote_refs[k]
531 539
532 540 if update_after_clone:
533 541 # we want to checkout HEAD
534 542 repo["HEAD"] = remote_refs["HEAD"]
535 543 index.build_index_from_tree(repo.path, repo.index_path(),
536 544 repo.object_store, repo["HEAD"].tree)
537 545
538 546 @reraise_safe_exceptions
539 547 def branch(self, wire, commit_id):
540 548 cache_on, context_uid, repo_id = self._cache_on(wire)
541 549 region = self._region(wire)
542 550
543 551 @region.conditional_cache_on_arguments(condition=cache_on)
544 552 def _branch(_context_uid, _repo_id, _commit_id):
545 553 regex = re.compile('^refs/heads')
546 554
547 555 def filter_with(ref):
548 556 return regex.match(ref[0]) and ref[1] == _commit_id
549 557
550 558 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
551 559 return [x[0].split('refs/heads/')[-1] for x in branches]
552 560
553 561 return _branch(context_uid, repo_id, commit_id)
554 562
555 563 @reraise_safe_exceptions
556 564 def commit_branches(self, wire, commit_id):
557 565 cache_on, context_uid, repo_id = self._cache_on(wire)
558 566 region = self._region(wire)
559 567
560 568 @region.conditional_cache_on_arguments(condition=cache_on)
561 569 def _commit_branches(_context_uid, _repo_id, _commit_id):
562 570 repo_init = self._factory.repo_libgit2(wire)
563 571 with repo_init as repo:
564 572 branches = [x for x in repo.branches.with_commit(_commit_id)]
565 573 return branches
566 574
567 575 return _commit_branches(context_uid, repo_id, commit_id)
568 576
569 577 @reraise_safe_exceptions
570 578 def add_object(self, wire, content):
571 579 repo_init = self._factory.repo_libgit2(wire)
572 580 with repo_init as repo:
573 581 blob = objects.Blob()
574 582 blob.set_raw_string(content)
575 583 repo.object_store.add_object(blob)
576 584 return blob.id
577 585
578 586 @reraise_safe_exceptions
579 587 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
580 588 date_args: list[int, int] = None,
581 589 parents: list | None = None):
582 590
583 591 repo_init = self._factory.repo_libgit2(wire)
584 592 with repo_init as repo:
585 593
586 594 if date_args:
587 595 current_time, offset = date_args
588 596
589 597 kw = {
590 598 'time': current_time,
591 599 'offset': offset
592 600 }
593 601 author = create_signature_from_string(author, **kw)
594 602 committer = create_signature_from_string(committer, **kw)
595 603
596 604 tree = new_tree_id
597 605 if isinstance(tree, (bytes, str)):
598 606 # validate this tree is in the repo...
599 607 tree = repo[safe_str(tree)].id
600 608
601 609 if parents:
602 610 # run via sha's and validate them in repo
603 611 parents = [repo[c].id for c in parents]
604 612 else:
605 613 parents = []
606 614 # ensure we COMMIT on top of given branch head
607 615 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
608 616 if branch in repo.branches.local:
609 617 parents += [repo.branches[branch].target]
610 618 elif [x for x in repo.branches.local]:
611 619 parents += [repo.head.target]
612 620 #else:
613 621 # in case we want to commit on new branch we create it on top of HEAD
614 622 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
615 623
616 624 # # Create a new commit
617 625 commit_oid = repo.create_commit(
618 626 f'refs/heads/{branch}', # the name of the reference to update
619 627 author, # the author of the commit
620 628 committer, # the committer of the commit
621 629 message, # the commit message
622 630 tree, # the tree produced by the index
623 631 parents # list of parents for the new commit, usually just one,
624 632 )
625 633
626 634 new_commit_id = safe_str(commit_oid)
627 635
628 636 return new_commit_id
629 637
630 638 @reraise_safe_exceptions
631 639 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
632 640
633 641 def mode2pygit(mode):
634 642 """
635 643 git only supports two filemode 644 and 755
636 644
637 645 0o100755 -> 33261
638 646 0o100644 -> 33188
639 647 """
640 648 return {
641 649 0o100644: pygit2.GIT_FILEMODE_BLOB,
642 650 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
643 651 0o120000: pygit2.GIT_FILEMODE_LINK
644 652 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
645 653
646 654 repo_init = self._factory.repo_libgit2(wire)
647 655 with repo_init as repo:
648 656 repo_index = repo.index
649 657
650 658 commit_parents = None
651 659 if commit_tree and commit_data['parents']:
652 660 commit_parents = commit_data['parents']
653 661 parent_commit = repo[commit_parents[0]]
654 662 repo_index.read_tree(parent_commit.tree)
655 663
656 664 for pathspec in updated:
657 665 blob_id = repo.create_blob(pathspec['content'])
658 666 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
659 667 repo_index.add(ie)
660 668
661 669 for pathspec in removed:
662 670 repo_index.remove(pathspec)
663 671
664 672 # Write changes to the index
665 673 repo_index.write()
666 674
667 675 # Create a tree from the updated index
668 676 written_commit_tree = repo_index.write_tree()
669 677
670 678 new_tree_id = written_commit_tree
671 679
672 680 author = commit_data['author']
673 681 committer = commit_data['committer']
674 682 message = commit_data['message']
675 683
676 684 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
677 685
678 686 new_commit_id = self.create_commit(wire, author, committer, message, branch,
679 687 new_tree_id, date_args=date_args, parents=commit_parents)
680 688
681 689 # libgit2, ensure the branch is there and exists
682 690 self.create_branch(wire, branch, new_commit_id)
683 691
684 692 # libgit2, set new ref to this created commit
685 693 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
686 694
687 695 return new_commit_id
688 696
689 697 @reraise_safe_exceptions
690 698 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
691 699 if url != 'default' and '://' not in url:
692 700 client = LocalGitClient(url)
693 701 else:
694 702 url_obj = url_parser(safe_bytes(url))
695 703 o = self._build_opener(url)
696 704 url = url_obj.authinfo()[0]
697 705 client = HttpGitClient(base_url=url, opener=o)
698 706 repo = self._factory.repo(wire)
699 707
700 708 determine_wants = repo.object_store.determine_wants_all
701 709
702 710 if refs:
703 711 refs: list[bytes] = [ascii_bytes(x) for x in refs]
704 712
705 713 def determine_wants_requested(_remote_refs):
706 714 determined = []
707 715 for ref_name, ref_hash in _remote_refs.items():
708 716 bytes_ref_name = safe_bytes(ref_name)
709 717
710 718 if bytes_ref_name in refs:
711 719 bytes_ref_hash = safe_bytes(ref_hash)
712 720 determined.append(bytes_ref_hash)
713 721 return determined
714 722
715 723 # swap with our custom requested wants
716 724 determine_wants = determine_wants_requested
717 725
718 726 try:
719 727 remote_refs = client.fetch(
720 728 path=url, target=repo, determine_wants=determine_wants)
721 729
722 730 except NotGitRepository as e:
723 731 log.warning(
724 732 'Trying to fetch from "%s" failed, not a Git repository.', url)
725 733 # Exception can contain unicode which we convert
726 734 raise exceptions.AbortException(e)(repr(e))
727 735
728 736 # mikhail: client.fetch() returns all the remote refs, but fetches only
729 737 # refs filtered by `determine_wants` function. We need to filter result
730 738 # as well
731 739 if refs:
732 740 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
733 741
734 742 if apply_refs:
735 743 # TODO: johbo: Needs proper test coverage with a git repository
736 744 # that contains a tag object, so that we would end up with
737 745 # a peeled ref at this point.
738 746 for k in remote_refs:
739 747 if k.endswith(PEELED_REF_MARKER):
740 748 log.debug("Skipping peeled reference %s", k)
741 749 continue
742 750 repo[k] = remote_refs[k]
743 751
744 752 if refs and not update_after:
745 753 # update to ref
746 754 # mikhail: explicitly set the head to the last ref.
747 755 update_to_ref = refs[-1]
748 756 if isinstance(update_after, str):
749 757 update_to_ref = update_after
750 758
751 759 repo[HEAD_MARKER] = remote_refs[update_to_ref]
752 760
753 761 if update_after:
754 762 # we want to check out HEAD
755 763 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
756 764 index.build_index_from_tree(repo.path, repo.index_path(),
757 765 repo.object_store, repo[HEAD_MARKER].tree)
758 766
759 767 if isinstance(remote_refs, FetchPackResult):
760 768 return remote_refs.refs
761 769 return remote_refs
762 770
763 771 @reraise_safe_exceptions
764 772 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
765 773 self._factory.repo(wire)
766 774 if refs and not isinstance(refs, (list, tuple)):
767 775 refs = [refs]
768 776
769 777 config = self._wire_to_config(wire)
770 778 # get all remote refs we'll use to fetch later
771 779 cmd = ['ls-remote']
772 780 if not all_refs:
773 781 cmd += ['--heads', '--tags']
774 782 cmd += [url]
775 783 output, __ = self.run_git_command(
776 784 wire, cmd, fail_on_stderr=False,
777 785 _copts=self._remote_conf(config),
778 786 extra_env={'GIT_TERMINAL_PROMPT': '0'})
779 787
780 788 remote_refs = collections.OrderedDict()
781 789 fetch_refs = []
782 790
783 791 for ref_line in output.splitlines():
784 792 sha, ref = ref_line.split(b'\t')
785 793 sha = sha.strip()
786 794 if ref in remote_refs:
787 795 # duplicate, skip
788 796 continue
789 797 if ref.endswith(PEELED_REF_MARKER):
790 798 log.debug("Skipping peeled reference %s", ref)
791 799 continue
792 800 # don't sync HEAD
793 801 if ref in [HEAD_MARKER]:
794 802 continue
795 803
796 804 remote_refs[ref] = sha
797 805
798 806 if refs and sha in refs:
799 807 # we filter fetch using our specified refs
800 808 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
801 809 elif not refs:
802 810 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
803 811 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
804 812
805 813 if fetch_refs:
806 814 for chunk in more_itertools.chunked(fetch_refs, 128):
807 815 fetch_refs_chunks = list(chunk)
808 816 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
809 817 self.run_git_command(
810 818 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
811 819 fail_on_stderr=False,
812 820 _copts=self._remote_conf(config),
813 821 extra_env={'GIT_TERMINAL_PROMPT': '0'})
814 822 if kwargs.get('sync_large_objects'):
815 823 self.run_git_command(
816 824 wire, ['lfs', 'fetch', url, '--all'],
817 825 fail_on_stderr=False,
818 826 _copts=self._remote_conf(config),
819 827 )
820 828
821 829 return remote_refs
822 830
823 831 @reraise_safe_exceptions
824 def sync_push(self, wire, url, refs=None):
832 def sync_push(self, wire, url, refs=None, **kwargs):
825 833 if not self.check_url(url, wire):
826 834 return
827 835 config = self._wire_to_config(wire)
828 836 self._factory.repo(wire)
829 837 self.run_git_command(
830 838 wire, ['push', url, '--mirror'], fail_on_stderr=False,
831 839 _copts=self._remote_conf(config),
832 840 extra_env={'GIT_TERMINAL_PROMPT': '0'})
841 if kwargs.get('sync_large_objects'):
842 self.run_git_command(
843 wire, ['lfs', 'push', url, '--all'],
844 fail_on_stderr=False,
845 _copts=self._remote_conf(config),
846 )
833 847
834 848 @reraise_safe_exceptions
835 849 def get_remote_refs(self, wire, url):
836 850 repo = Repo(url)
837 851 return repo.get_refs()
838 852
839 853 @reraise_safe_exceptions
840 854 def get_description(self, wire):
841 855 repo = self._factory.repo(wire)
842 856 return repo.get_description()
843 857
844 858 @reraise_safe_exceptions
845 859 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
846 860 origin_repo_path = wire['path']
847 861 repo = self._factory.repo(wire)
848 862 # fetch from other_repo_path to our origin repo
849 863 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
850 864
851 865 wire_remote = wire.copy()
852 866 wire_remote['path'] = other_repo_path
853 867 repo_remote = self._factory.repo(wire_remote)
854 868
855 869 # fetch from origin_repo_path to our remote repo
856 870 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
857 871
858 872 revs = [
859 873 x.commit.id
860 874 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
861 875 return revs
862 876
863 877 @reraise_safe_exceptions
864 878 def get_object(self, wire, sha, maybe_unreachable=False):
865 879 cache_on, context_uid, repo_id = self._cache_on(wire)
866 880 region = self._region(wire)
867 881
868 882 @region.conditional_cache_on_arguments(condition=cache_on)
869 883 def _get_object(_context_uid, _repo_id, _sha):
870 884 repo_init = self._factory.repo_libgit2(wire)
871 885 with repo_init as repo:
872 886
873 887 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
874 888 try:
875 889 commit = repo.revparse_single(sha)
876 890 except KeyError:
877 891 # NOTE(marcink): KeyError doesn't give us any meaningful information
878 892 # here, we instead give something more explicit
879 893 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
880 894 raise exceptions.LookupException(e)(missing_commit_err)
881 895 except ValueError as e:
882 896 raise exceptions.LookupException(e)(missing_commit_err)
883 897
884 898 is_tag = False
885 899 if isinstance(commit, pygit2.Tag):
886 900 commit = repo.get(commit.target)
887 901 is_tag = True
888 902
889 903 check_dangling = True
890 904 if is_tag:
891 905 check_dangling = False
892 906
893 907 if check_dangling and maybe_unreachable:
894 908 check_dangling = False
895 909
896 910 # we used a reference and it parsed means we're not having a dangling commit
897 911 if sha != commit.hex:
898 912 check_dangling = False
899 913
900 914 if check_dangling:
901 915 # check for dangling commit
902 916 for branch in repo.branches.with_commit(commit.hex):
903 917 if branch:
904 918 break
905 919 else:
906 920 # NOTE(marcink): Empty error doesn't give us any meaningful information
907 921 # here, we instead give something more explicit
908 922 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
909 923 raise exceptions.LookupException(e)(missing_commit_err)
910 924
911 925 commit_id = commit.hex
912 926 type_str = commit.type_str
913 927
914 928 return {
915 929 'id': commit_id,
916 930 'type': type_str,
917 931 'commit_id': commit_id,
918 932 'idx': 0
919 933 }
920 934
921 935 return _get_object(context_uid, repo_id, sha)
922 936
923 937 @reraise_safe_exceptions
924 938 def get_refs(self, wire):
925 939 cache_on, context_uid, repo_id = self._cache_on(wire)
926 940 region = self._region(wire)
927 941
928 942 @region.conditional_cache_on_arguments(condition=cache_on)
929 943 def _get_refs(_context_uid, _repo_id):
930 944
931 945 repo_init = self._factory.repo_libgit2(wire)
932 946 with repo_init as repo:
933 947 regex = re.compile('^refs/(heads|tags)/')
934 948 return {x.name: x.target.hex for x in
935 949 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
936 950
937 951 return _get_refs(context_uid, repo_id)
938 952
939 953 @reraise_safe_exceptions
940 954 def get_branch_pointers(self, wire):
941 955 cache_on, context_uid, repo_id = self._cache_on(wire)
942 956 region = self._region(wire)
943 957
944 958 @region.conditional_cache_on_arguments(condition=cache_on)
945 959 def _get_branch_pointers(_context_uid, _repo_id):
946 960
947 961 repo_init = self._factory.repo_libgit2(wire)
948 962 regex = re.compile('^refs/heads')
949 963 with repo_init as repo:
950 964 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
951 965 return {x.target.hex: x.shorthand for x in branches}
952 966
953 967 return _get_branch_pointers(context_uid, repo_id)
954 968
955 969 @reraise_safe_exceptions
956 970 def head(self, wire, show_exc=True):
957 971 cache_on, context_uid, repo_id = self._cache_on(wire)
958 972 region = self._region(wire)
959 973
960 974 @region.conditional_cache_on_arguments(condition=cache_on)
961 975 def _head(_context_uid, _repo_id, _show_exc):
962 976 repo_init = self._factory.repo_libgit2(wire)
963 977 with repo_init as repo:
964 978 try:
965 979 return repo.head.peel().hex
966 980 except Exception:
967 981 if show_exc:
968 982 raise
969 983 return _head(context_uid, repo_id, show_exc)
970 984
971 985 @reraise_safe_exceptions
972 986 def init(self, wire):
973 987 repo_path = safe_str(wire['path'])
974 988 os.makedirs(repo_path, mode=0o755)
975 989 pygit2.init_repository(repo_path, bare=False)
976 990
977 991 @reraise_safe_exceptions
978 992 def init_bare(self, wire):
979 993 repo_path = safe_str(wire['path'])
980 994 os.makedirs(repo_path, mode=0o755)
981 995 pygit2.init_repository(repo_path, bare=True)
982 996
983 997 @reraise_safe_exceptions
984 998 def revision(self, wire, rev):
985 999
986 1000 cache_on, context_uid, repo_id = self._cache_on(wire)
987 1001 region = self._region(wire)
988 1002
989 1003 @region.conditional_cache_on_arguments(condition=cache_on)
990 1004 def _revision(_context_uid, _repo_id, _rev):
991 1005 repo_init = self._factory.repo_libgit2(wire)
992 1006 with repo_init as repo:
993 1007 commit = repo[rev]
994 1008 obj_data = {
995 1009 'id': commit.id.hex,
996 1010 }
997 1011 # tree objects itself don't have tree_id attribute
998 1012 if hasattr(commit, 'tree_id'):
999 1013 obj_data['tree'] = commit.tree_id.hex
1000 1014
1001 1015 return obj_data
1002 1016 return _revision(context_uid, repo_id, rev)
1003 1017
1004 1018 @reraise_safe_exceptions
1005 1019 def date(self, wire, commit_id):
1006 1020 cache_on, context_uid, repo_id = self._cache_on(wire)
1007 1021 region = self._region(wire)
1008 1022
1009 1023 @region.conditional_cache_on_arguments(condition=cache_on)
1010 1024 def _date(_repo_id, _commit_id):
1011 1025 repo_init = self._factory.repo_libgit2(wire)
1012 1026 with repo_init as repo:
1013 1027 commit = repo[commit_id]
1014 1028
1015 1029 if hasattr(commit, 'commit_time'):
1016 1030 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1017 1031 else:
1018 1032 commit = commit.get_object()
1019 1033 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1020 1034
1021 1035 # TODO(marcink): check dulwich difference of offset vs timezone
1022 1036 return [commit_time, commit_time_offset]
1023 1037 return _date(repo_id, commit_id)
1024 1038
1025 1039 @reraise_safe_exceptions
1026 1040 def author(self, wire, commit_id):
1027 1041 cache_on, context_uid, repo_id = self._cache_on(wire)
1028 1042 region = self._region(wire)
1029 1043
1030 1044 @region.conditional_cache_on_arguments(condition=cache_on)
1031 1045 def _author(_repo_id, _commit_id):
1032 1046 repo_init = self._factory.repo_libgit2(wire)
1033 1047 with repo_init as repo:
1034 1048 commit = repo[commit_id]
1035 1049
1036 1050 if hasattr(commit, 'author'):
1037 1051 author = commit.author
1038 1052 else:
1039 1053 author = commit.get_object().author
1040 1054
1041 1055 if author.email:
1042 1056 return f"{author.name} <{author.email}>"
1043 1057
1044 1058 try:
1045 1059 return f"{author.name}"
1046 1060 except Exception:
1047 1061 return f"{safe_str(author.raw_name)}"
1048 1062
1049 1063 return _author(repo_id, commit_id)
1050 1064
1051 1065 @reraise_safe_exceptions
1052 1066 def message(self, wire, commit_id):
1053 1067 cache_on, context_uid, repo_id = self._cache_on(wire)
1054 1068 region = self._region(wire)
1055 1069
1056 1070 @region.conditional_cache_on_arguments(condition=cache_on)
1057 1071 def _message(_repo_id, _commit_id):
1058 1072 repo_init = self._factory.repo_libgit2(wire)
1059 1073 with repo_init as repo:
1060 1074 commit = repo[commit_id]
1061 1075 return commit.message
1062 1076 return _message(repo_id, commit_id)
1063 1077
1064 1078 @reraise_safe_exceptions
1065 1079 def parents(self, wire, commit_id):
1066 1080 cache_on, context_uid, repo_id = self._cache_on(wire)
1067 1081 region = self._region(wire)
1068 1082
1069 1083 @region.conditional_cache_on_arguments(condition=cache_on)
1070 1084 def _parents(_repo_id, _commit_id):
1071 1085 repo_init = self._factory.repo_libgit2(wire)
1072 1086 with repo_init as repo:
1073 1087 commit = repo[commit_id]
1074 1088 if hasattr(commit, 'parent_ids'):
1075 1089 parent_ids = commit.parent_ids
1076 1090 else:
1077 1091 parent_ids = commit.get_object().parent_ids
1078 1092
1079 1093 return [x.hex for x in parent_ids]
1080 1094 return _parents(repo_id, commit_id)
1081 1095
1082 1096 @reraise_safe_exceptions
1083 1097 def children(self, wire, commit_id):
1084 1098 cache_on, context_uid, repo_id = self._cache_on(wire)
1085 1099 region = self._region(wire)
1086 1100
1087 1101 head = self.head(wire)
1088 1102
1089 1103 @region.conditional_cache_on_arguments(condition=cache_on)
1090 1104 def _children(_repo_id, _commit_id):
1091 1105
1092 1106 output, __ = self.run_git_command(
1093 1107 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1094 1108
1095 1109 child_ids = []
1096 1110 pat = re.compile(fr'^{commit_id}')
1097 1111 for line in output.splitlines():
1098 1112 line = safe_str(line)
1099 1113 if pat.match(line):
1100 1114 found_ids = line.split(' ')[1:]
1101 1115 child_ids.extend(found_ids)
1102 1116 break
1103 1117
1104 1118 return child_ids
1105 1119 return _children(repo_id, commit_id)
1106 1120
1107 1121 @reraise_safe_exceptions
1108 1122 def set_refs(self, wire, key, value):
1109 1123 repo_init = self._factory.repo_libgit2(wire)
1110 1124 with repo_init as repo:
1111 1125 repo.references.create(key, value, force=True)
1112 1126
1113 1127 @reraise_safe_exceptions
1114 1128 def update_refs(self, wire, key, value):
1115 1129 repo_init = self._factory.repo_libgit2(wire)
1116 1130 with repo_init as repo:
1117 1131 if key not in repo.references:
1118 1132 raise ValueError(f'Reference {key} not found in the repository')
1119 1133 repo.references.create(key, value, force=True)
1120 1134
1121 1135 @reraise_safe_exceptions
1122 1136 def create_branch(self, wire, branch_name, commit_id, force=False):
1123 1137 repo_init = self._factory.repo_libgit2(wire)
1124 1138 with repo_init as repo:
1125 1139 if commit_id:
1126 1140 commit = repo[commit_id]
1127 1141 else:
1128 1142 # if commit is not given just use the HEAD
1129 1143 commit = repo.head()
1130 1144
1131 1145 if force:
1132 1146 repo.branches.local.create(branch_name, commit, force=force)
1133 1147 elif not repo.branches.get(branch_name):
1134 1148 # create only if that branch isn't existing
1135 1149 repo.branches.local.create(branch_name, commit, force=force)
1136 1150
1137 1151 @reraise_safe_exceptions
1138 1152 def remove_ref(self, wire, key):
1139 1153 repo_init = self._factory.repo_libgit2(wire)
1140 1154 with repo_init as repo:
1141 1155 repo.references.delete(key)
1142 1156
1143 1157 @reraise_safe_exceptions
1144 1158 def tag_remove(self, wire, tag_name):
1145 1159 repo_init = self._factory.repo_libgit2(wire)
1146 1160 with repo_init as repo:
1147 1161 key = f'refs/tags/{tag_name}'
1148 1162 repo.references.delete(key)
1149 1163
1150 1164 @reraise_safe_exceptions
1151 1165 def tree_changes(self, wire, source_id, target_id):
1152 1166 repo = self._factory.repo(wire)
1153 1167 # source can be empty
1154 1168 source_id = safe_bytes(source_id if source_id else b'')
1155 1169 target_id = safe_bytes(target_id)
1156 1170
1157 1171 source = repo[source_id].tree if source_id else None
1158 1172 target = repo[target_id].tree
1159 1173 result = repo.object_store.tree_changes(source, target)
1160 1174
1161 1175 added = set()
1162 1176 modified = set()
1163 1177 deleted = set()
1164 1178 for (old_path, new_path), (_, _), (_, _) in list(result):
1165 1179 if new_path and old_path:
1166 1180 modified.add(new_path)
1167 1181 elif new_path and not old_path:
1168 1182 added.add(new_path)
1169 1183 elif not new_path and old_path:
1170 1184 deleted.add(old_path)
1171 1185
1172 1186 return list(added), list(modified), list(deleted)
1173 1187
1174 1188 @reraise_safe_exceptions
1175 1189 def tree_and_type_for_path(self, wire, commit_id, path):
1176 1190
1177 1191 cache_on, context_uid, repo_id = self._cache_on(wire)
1178 1192 region = self._region(wire)
1179 1193
1180 1194 @region.conditional_cache_on_arguments(condition=cache_on)
1181 1195 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1182 1196 repo_init = self._factory.repo_libgit2(wire)
1183 1197
1184 1198 with repo_init as repo:
1185 1199 commit = repo[commit_id]
1186 1200 try:
1187 1201 tree = commit.tree[path]
1188 1202 except KeyError:
1189 1203 return None, None, None
1190 1204
1191 1205 return tree.id.hex, tree.type_str, tree.filemode
1192 1206 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1193 1207
1194 1208 @reraise_safe_exceptions
1195 1209 def tree_items(self, wire, tree_id):
1196 1210 cache_on, context_uid, repo_id = self._cache_on(wire)
1197 1211 region = self._region(wire)
1198 1212
1199 1213 @region.conditional_cache_on_arguments(condition=cache_on)
1200 1214 def _tree_items(_repo_id, _tree_id):
1201 1215
1202 1216 repo_init = self._factory.repo_libgit2(wire)
1203 1217 with repo_init as repo:
1204 1218 try:
1205 1219 tree = repo[tree_id]
1206 1220 except KeyError:
1207 1221 raise ObjectMissing(f'No tree with id: {tree_id}')
1208 1222
1209 1223 result = []
1210 1224 for item in tree:
1211 1225 item_sha = item.hex
1212 1226 item_mode = item.filemode
1213 1227 item_type = item.type_str
1214 1228
1215 1229 if item_type == 'commit':
1216 1230 # NOTE(marcink): submodules we translate to 'link' for backward compat
1217 1231 item_type = 'link'
1218 1232
1219 1233 result.append((item.name, item_mode, item_sha, item_type))
1220 1234 return result
1221 1235 return _tree_items(repo_id, tree_id)
1222 1236
1223 1237 @reraise_safe_exceptions
1224 1238 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1225 1239 """
1226 1240 Old version that uses subprocess to call diff
1227 1241 """
1228 1242
1229 1243 flags = [
1230 1244 f'-U{context}', '--patch',
1231 1245 '--binary',
1232 1246 '--find-renames',
1233 1247 '--no-indent-heuristic',
1234 1248 # '--indent-heuristic',
1235 1249 #'--full-index',
1236 1250 #'--abbrev=40'
1237 1251 ]
1238 1252
1239 1253 if opt_ignorews:
1240 1254 flags.append('--ignore-all-space')
1241 1255
1242 1256 if commit_id_1 == self.EMPTY_COMMIT:
1243 1257 cmd = ['show'] + flags + [commit_id_2]
1244 1258 else:
1245 1259 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1246 1260
1247 1261 if file_filter:
1248 1262 cmd.extend(['--', file_filter])
1249 1263
1250 1264 diff, __ = self.run_git_command(wire, cmd)
1251 1265 # If we used 'show' command, strip first few lines (until actual diff
1252 1266 # starts)
1253 1267 if commit_id_1 == self.EMPTY_COMMIT:
1254 1268 lines = diff.splitlines()
1255 1269 x = 0
1256 1270 for line in lines:
1257 1271 if line.startswith(b'diff'):
1258 1272 break
1259 1273 x += 1
1260 1274 # Append new line just like 'diff' command do
1261 1275 diff = '\n'.join(lines[x:]) + '\n'
1262 1276 return diff
1263 1277
1264 1278 @reraise_safe_exceptions
1265 1279 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1266 1280 repo_init = self._factory.repo_libgit2(wire)
1267 1281
1268 1282 with repo_init as repo:
1269 1283 swap = True
1270 1284 flags = 0
1271 1285 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1272 1286
1273 1287 if opt_ignorews:
1274 1288 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1275 1289
1276 1290 if commit_id_1 == self.EMPTY_COMMIT:
1277 1291 comm1 = repo[commit_id_2]
1278 1292 diff_obj = comm1.tree.diff_to_tree(
1279 1293 flags=flags, context_lines=context, swap=swap)
1280 1294
1281 1295 else:
1282 1296 comm1 = repo[commit_id_2]
1283 1297 comm2 = repo[commit_id_1]
1284 1298 diff_obj = comm1.tree.diff_to_tree(
1285 1299 comm2.tree, flags=flags, context_lines=context, swap=swap)
1286 1300 similar_flags = 0
1287 1301 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1288 1302 diff_obj.find_similar(flags=similar_flags)
1289 1303
1290 1304 if file_filter:
1291 1305 for p in diff_obj:
1292 1306 if p.delta.old_file.path == file_filter:
1293 1307 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1294 1308 # fo matching path == no diff
1295 1309 return BytesEnvelope(b'')
1296 1310
1297 1311 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1298 1312
1299 1313 @reraise_safe_exceptions
1300 1314 def node_history(self, wire, commit_id, path, limit):
1301 1315 cache_on, context_uid, repo_id = self._cache_on(wire)
1302 1316 region = self._region(wire)
1303 1317
1304 1318 @region.conditional_cache_on_arguments(condition=cache_on)
1305 1319 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1306 1320 # optimize for n==1, rev-list is much faster for that use-case
1307 1321 if limit == 1:
1308 1322 cmd = ['rev-list', '-1', commit_id, '--', path]
1309 1323 else:
1310 1324 cmd = ['log']
1311 1325 if limit:
1312 1326 cmd.extend(['-n', str(safe_int(limit, 0))])
1313 1327 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1314 1328
1315 1329 output, __ = self.run_git_command(wire, cmd)
1316 1330 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1317 1331
1318 1332 return [x for x in commit_ids]
1319 1333 return _node_history(context_uid, repo_id, commit_id, path, limit)
1320 1334
1321 1335 @reraise_safe_exceptions
1322 1336 def node_annotate_legacy(self, wire, commit_id, path):
1323 1337 # note: replaced by pygit2 implementation
1324 1338 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1325 1339 # -l ==> outputs long shas (and we need all 40 characters)
1326 1340 # --root ==> doesn't put '^' character for boundaries
1327 1341 # -r commit_id ==> blames for the given commit
1328 1342 output, __ = self.run_git_command(wire, cmd)
1329 1343
1330 1344 result = []
1331 1345 for i, blame_line in enumerate(output.splitlines()[:-1]):
1332 1346 line_no = i + 1
1333 1347 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1334 1348 result.append((line_no, blame_commit_id, line))
1335 1349
1336 1350 return result
1337 1351
1338 1352 @reraise_safe_exceptions
1339 1353 def node_annotate(self, wire, commit_id, path):
1340 1354
1341 1355 result_libgit = []
1342 1356 repo_init = self._factory.repo_libgit2(wire)
1343 1357 with repo_init as repo:
1344 1358 commit = repo[commit_id]
1345 1359 blame_obj = repo.blame(path, newest_commit=commit_id)
1346 1360 for i, line in enumerate(commit.tree[path].data.splitlines()):
1347 1361 line_no = i + 1
1348 1362 hunk = blame_obj.for_line(line_no)
1349 1363 blame_commit_id = hunk.final_commit_id.hex
1350 1364
1351 1365 result_libgit.append((line_no, blame_commit_id, line))
1352 1366
1353 1367 return BinaryEnvelope(result_libgit)
1354 1368
1355 1369 @reraise_safe_exceptions
1356 1370 def update_server_info(self, wire):
1357 1371 repo = self._factory.repo(wire)
1358 1372 update_server_info(repo)
1359 1373
1360 1374 @reraise_safe_exceptions
1361 1375 def get_all_commit_ids(self, wire):
1362 1376
1363 1377 cache_on, context_uid, repo_id = self._cache_on(wire)
1364 1378 region = self._region(wire)
1365 1379
1366 1380 @region.conditional_cache_on_arguments(condition=cache_on)
1367 1381 def _get_all_commit_ids(_context_uid, _repo_id):
1368 1382
1369 1383 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1370 1384 try:
1371 1385 output, __ = self.run_git_command(wire, cmd)
1372 1386 return output.splitlines()
1373 1387 except Exception:
1374 1388 # Can be raised for empty repositories
1375 1389 return []
1376 1390
1377 1391 @region.conditional_cache_on_arguments(condition=cache_on)
1378 1392 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1379 1393 repo_init = self._factory.repo_libgit2(wire)
1380 1394 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1381 1395 results = []
1382 1396 with repo_init as repo:
1383 1397 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1384 1398 results.append(commit.id.hex)
1385 1399
1386 1400 return _get_all_commit_ids(context_uid, repo_id)
1387 1401
1388 1402 @reraise_safe_exceptions
1389 1403 def run_git_command(self, wire, cmd, **opts):
1390 1404 path = wire.get('path', None)
1391 1405 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1392 1406
1393 1407 if path and os.path.isdir(path):
1394 1408 opts['cwd'] = path
1395 1409
1396 1410 if '_bare' in opts:
1397 1411 _copts = []
1398 1412 del opts['_bare']
1399 1413 else:
1400 1414 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1401 1415 safe_call = False
1402 1416 if '_safe' in opts:
1403 1417 # no exc on failure
1404 1418 del opts['_safe']
1405 1419 safe_call = True
1406 1420
1407 1421 if '_copts' in opts:
1408 1422 _copts.extend(opts['_copts'] or [])
1409 1423 del opts['_copts']
1410 1424
1411 1425 gitenv = os.environ.copy()
1412 1426 gitenv.update(opts.pop('extra_env', {}))
1413 1427 # need to clean fix GIT_DIR !
1414 1428 if 'GIT_DIR' in gitenv:
1415 1429 del gitenv['GIT_DIR']
1416 1430 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1417 1431 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1418 1432
1419 1433 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1420 1434 _opts = {'env': gitenv, 'shell': False}
1421 1435
1422 1436 proc = None
1423 1437 try:
1424 1438 _opts.update(opts)
1425 1439 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1426 1440
1427 1441 return b''.join(proc), b''.join(proc.stderr)
1428 1442 except OSError as err:
1429 1443 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1430 1444 call_opts = {}
1431 1445 if debug_mode:
1432 1446 call_opts = _opts
1433 1447
1434 1448 tb_err = ("Couldn't run git command ({}).\n"
1435 1449 "Original error was:{}\n"
1436 1450 "Call options:{}\n"
1437 1451 .format(cmd, err, call_opts))
1438 1452 log.exception(tb_err)
1439 1453 if safe_call:
1440 1454 return '', err
1441 1455 else:
1442 1456 raise exceptions.VcsException()(tb_err)
1443 1457 finally:
1444 1458 if proc:
1445 1459 proc.close()
1446 1460
1447 1461 @reraise_safe_exceptions
1448 1462 def install_hooks(self, wire, force=False):
1449 1463 from vcsserver.hook_utils import install_git_hooks
1450 1464 bare = self.bare(wire)
1451 1465 path = wire['path']
1452 1466 binary_dir = settings.BINARY_DIR
1453 1467 if binary_dir:
1454 1468 os.path.join(binary_dir, 'python3')
1455 1469 return install_git_hooks(path, bare, force_create=force)
1456 1470
1457 1471 @reraise_safe_exceptions
1458 1472 def get_hooks_info(self, wire):
1459 1473 from vcsserver.hook_utils import (
1460 1474 get_git_pre_hook_version, get_git_post_hook_version)
1461 1475 bare = self.bare(wire)
1462 1476 path = wire['path']
1463 1477 return {
1464 1478 'pre_version': get_git_pre_hook_version(path, bare),
1465 1479 'post_version': get_git_post_hook_version(path, bare),
1466 1480 }
1467 1481
1468 1482 @reraise_safe_exceptions
1469 1483 def set_head_ref(self, wire, head_name):
1470 1484 log.debug('Setting refs/head to `%s`', head_name)
1471 1485 repo_init = self._factory.repo_libgit2(wire)
1472 1486 with repo_init as repo:
1473 1487 repo.set_head(f'refs/heads/{head_name}')
1474 1488
1475 1489 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1476 1490
1477 1491 @reraise_safe_exceptions
1478 1492 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1479 1493 archive_dir_name, commit_id, cache_config):
1480 1494
1481 1495 def file_walker(_commit_id, path):
1482 1496 repo_init = self._factory.repo_libgit2(wire)
1483 1497
1484 1498 with repo_init as repo:
1485 1499 commit = repo[commit_id]
1486 1500
1487 1501 if path in ['', '/']:
1488 1502 tree = commit.tree
1489 1503 else:
1490 1504 tree = commit.tree[path.rstrip('/')]
1491 1505 tree_id = tree.id.hex
1492 1506 try:
1493 1507 tree = repo[tree_id]
1494 1508 except KeyError:
1495 1509 raise ObjectMissing(f'No tree with id: {tree_id}')
1496 1510
1497 1511 index = LibGit2Index.Index()
1498 1512 index.read_tree(tree)
1499 1513 file_iter = index
1500 1514
1501 1515 for file_node in file_iter:
1502 1516 file_path = file_node.path
1503 1517 mode = file_node.mode
1504 1518 is_link = stat.S_ISLNK(mode)
1505 1519 if mode == pygit2.GIT_FILEMODE_COMMIT:
1506 1520 log.debug('Skipping path %s as a commit node', file_path)
1507 1521 continue
1508 1522 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1509 1523
1510 1524 return store_archive_in_cache(
1511 1525 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now