##// END OF EJS Templates
fix(server-info): removed dulwich from update-server-info logic, and added force-flag feature
super-admin -
r1197:bad64234 default
parent child Browse files
Show More
@@ -1,1517 +1,1518 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 from dulwich.server import update_server_info
41 40
42 41 import rhodecode
43 42 from vcsserver import exceptions, settings, subprocessio
44 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
45 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 45 from vcsserver.hgcompat import (
47 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 47 from vcsserver.git_lfs.lib import LFSOidStore
49 48 from vcsserver.vcs_base import RemoteBase
50 49
51 50 DIR_STAT = stat.S_IFDIR
52 51 FILE_MODE = stat.S_IFMT
53 52 GIT_LINK = objects.S_IFGITLINK
54 53 PEELED_REF_MARKER = b'^{}'
55 54 HEAD_MARKER = b'HEAD'
56 55
57 56 log = logging.getLogger(__name__)
58 57
59 58
60 59 def reraise_safe_exceptions(func):
61 60 """Converts Dulwich exceptions to something neutral."""
62 61
63 62 @wraps(func)
64 63 def wrapper(*args, **kwargs):
65 64 try:
66 65 return func(*args, **kwargs)
67 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 67 exc = exceptions.LookupException(org_exc=e)
69 68 raise exc(safe_str(e))
70 69 except (HangupException, UnexpectedCommandError) as e:
71 70 exc = exceptions.VcsException(org_exc=e)
72 71 raise exc(safe_str(e))
73 72 except Exception:
74 73 # NOTE(marcink): because of how dulwich handles some exceptions
75 74 # (KeyError on empty repos), we cannot track this and catch all
76 75 # exceptions, it's an exceptions from other handlers
77 76 #if not hasattr(e, '_vcs_kind'):
78 77 #log.exception("Unhandled exception in git remote call")
79 78 #raise_from_original(exceptions.UnhandledException)
80 79 raise
81 80 return wrapper
82 81
83 82
84 83 class Repo(DulwichRepo):
85 84 """
86 85 A wrapper for dulwich Repo class.
87 86
88 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 88 "Too many open files" error. We need to close all opened file descriptors
90 89 once the repo object is destroyed.
91 90 """
92 91 def __del__(self):
93 92 if hasattr(self, 'object_store'):
94 93 self.close()
95 94
96 95
97 96 class Repository(LibGit2Repo):
98 97
99 98 def __enter__(self):
100 99 return self
101 100
102 101 def __exit__(self, exc_type, exc_val, exc_tb):
103 102 self.free()
104 103
105 104
106 105 class GitFactory(RepoFactory):
107 106 repo_type = 'git'
108 107
109 108 def _create_repo(self, wire, create, use_libgit2=False):
110 109 if use_libgit2:
111 110 repo = Repository(safe_bytes(wire['path']))
112 111 else:
113 112 # dulwich mode
114 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 114 repo = Repo(repo_path)
116 115
117 116 log.debug('repository created: got GIT object: %s', repo)
118 117 return repo
119 118
120 119 def repo(self, wire, create=False, use_libgit2=False):
121 120 """
122 121 Get a repository instance for the given path.
123 122 """
124 123 return self._create_repo(wire, create, use_libgit2)
125 124
126 125 def repo_libgit2(self, wire):
127 126 return self.repo(wire, use_libgit2=True)
128 127
129 128
130 129 def create_signature_from_string(author_str, **kwargs):
131 130 """
132 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
133 132
134 133 :param author_str: String of the format 'Name <email>'
135 134 :return: pygit2.Signature object
136 135 """
137 136 match = re.match(r'^(.+) <(.+)>$', author_str)
138 137 if match is None:
139 138 raise ValueError(f"Invalid format: {author_str}")
140 139
141 140 name, email = match.groups()
142 141 return pygit2.Signature(name, email, **kwargs)
143 142
144 143
145 144 def get_obfuscated_url(url_obj):
146 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
147 146 url_obj.query = obfuscate_qs(url_obj.query)
148 147 obfuscated_uri = str(url_obj)
149 148 return obfuscated_uri
150 149
151 150
152 151 class GitRemote(RemoteBase):
153 152
154 153 def __init__(self, factory):
155 154 self._factory = factory
156 155 self._bulk_methods = {
157 156 "date": self.date,
158 157 "author": self.author,
159 158 "branch": self.branch,
160 159 "message": self.message,
161 160 "parents": self.parents,
162 161 "_commit": self.revision,
163 162 }
164 163 self._bulk_file_methods = {
165 164 "size": self.get_node_size,
166 165 "data": self.get_node_data,
167 166 "flags": self.get_node_flags,
168 167 "is_binary": self.get_node_is_binary,
169 168 "md5": self.md5_hash
170 169 }
171 170
172 171 def _wire_to_config(self, wire):
173 172 if 'config' in wire:
174 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
175 174 return {}
176 175
177 176 def _remote_conf(self, config):
178 177 params = [
179 178 '-c', 'core.askpass=""',
180 179 ]
181 180 config_attrs = {
182 181 'vcs_ssl_dir': 'http.sslCAinfo={}',
183 182 'vcs_git_lfs_store_location': 'lfs.storage={}'
184 183 }
185 184 for key, param in config_attrs.items():
186 185 if value := config.get(key):
187 186 params.extend(['-c', param.format(value)])
188 187 return params
189 188
190 189 @reraise_safe_exceptions
191 190 def discover_git_version(self):
192 191 stdout, _ = self.run_git_command(
193 192 {}, ['--version'], _bare=True, _safe=True)
194 193 prefix = b'git version'
195 194 if stdout.startswith(prefix):
196 195 stdout = stdout[len(prefix):]
197 196 return safe_str(stdout.strip())
198 197
199 198 @reraise_safe_exceptions
200 199 def is_empty(self, wire):
201 200 repo_init = self._factory.repo_libgit2(wire)
202 201 with repo_init as repo:
203
204 202 try:
205 203 has_head = repo.head.name
206 204 if has_head:
207 205 return False
208 206
209 207 # NOTE(marcink): check again using more expensive method
210 208 return repo.is_empty
211 209 except Exception:
212 210 pass
213 211
214 212 return True
215 213
216 214 @reraise_safe_exceptions
217 215 def assert_correct_path(self, wire):
218 216 cache_on, context_uid, repo_id = self._cache_on(wire)
219 217 region = self._region(wire)
220 218
221 219 @region.conditional_cache_on_arguments(condition=cache_on)
222 220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
223 221 if fast_check:
224 222 path = safe_str(wire['path'])
225 223 if pygit2.discover_repository(path):
226 224 return True
227 225 return False
228 226 else:
229 227 try:
230 228 repo_init = self._factory.repo_libgit2(wire)
231 229 with repo_init:
232 230 pass
233 231 except pygit2.GitError:
234 232 path = wire.get('path')
235 233 tb = traceback.format_exc()
236 234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
237 235 return False
238 236 return True
239 237
240 238 return _assert_correct_path(context_uid, repo_id, True)
241 239
242 240 @reraise_safe_exceptions
243 241 def bare(self, wire):
244 242 repo_init = self._factory.repo_libgit2(wire)
245 243 with repo_init as repo:
246 244 return repo.is_bare
247 245
248 246 @reraise_safe_exceptions
249 247 def get_node_data(self, wire, commit_id, path):
250 248 repo_init = self._factory.repo_libgit2(wire)
251 249 with repo_init as repo:
252 250 commit = repo[commit_id]
253 251 blob_obj = commit.tree[path]
254 252
255 253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
256 254 raise exceptions.LookupException()(
257 255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
258 256
259 257 return BytesEnvelope(blob_obj.data)
260 258
261 259 @reraise_safe_exceptions
262 260 def get_node_size(self, wire, commit_id, path):
263 261 repo_init = self._factory.repo_libgit2(wire)
264 262 with repo_init as repo:
265 263 commit = repo[commit_id]
266 264 blob_obj = commit.tree[path]
267 265
268 266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
269 267 raise exceptions.LookupException()(
270 268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
271 269
272 270 return blob_obj.size
273 271
274 272 @reraise_safe_exceptions
275 273 def get_node_flags(self, wire, commit_id, path):
276 274 repo_init = self._factory.repo_libgit2(wire)
277 275 with repo_init as repo:
278 276 commit = repo[commit_id]
279 277 blob_obj = commit.tree[path]
280 278
281 279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
282 280 raise exceptions.LookupException()(
283 281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
284 282
285 283 return blob_obj.filemode
286 284
287 285 @reraise_safe_exceptions
288 286 def get_node_is_binary(self, wire, commit_id, path):
289 287 repo_init = self._factory.repo_libgit2(wire)
290 288 with repo_init as repo:
291 289 commit = repo[commit_id]
292 290 blob_obj = commit.tree[path]
293 291
294 292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
295 293 raise exceptions.LookupException()(
296 294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
297 295
298 296 return blob_obj.is_binary
299 297
300 298 @reraise_safe_exceptions
301 299 def blob_as_pretty_string(self, wire, sha):
302 300 repo_init = self._factory.repo_libgit2(wire)
303 301 with repo_init as repo:
304 302 blob_obj = repo[sha]
305 303 return BytesEnvelope(blob_obj.data)
306 304
307 305 @reraise_safe_exceptions
308 306 def blob_raw_length(self, wire, sha):
309 307 cache_on, context_uid, repo_id = self._cache_on(wire)
310 308 region = self._region(wire)
311 309
312 310 @region.conditional_cache_on_arguments(condition=cache_on)
313 311 def _blob_raw_length(_repo_id, _sha):
314 312
315 313 repo_init = self._factory.repo_libgit2(wire)
316 314 with repo_init as repo:
317 315 blob = repo[sha]
318 316 return blob.size
319 317
320 318 return _blob_raw_length(repo_id, sha)
321 319
322 320 def _parse_lfs_pointer(self, raw_content):
323 321 spec_string = b'version https://git-lfs.github.com/spec'
324 322 if raw_content and raw_content.startswith(spec_string):
325 323
326 324 pattern = re.compile(rb"""
327 325 (?:\n)?
328 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
329 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
330 328 ^size[ ](?P<oid_size>[0-9]+)\n
331 329 (?:\n)?
332 330 """, re.VERBOSE | re.MULTILINE)
333 331 match = pattern.match(raw_content)
334 332 if match:
335 333 return match.groupdict()
336 334
337 335 return {}
338 336
339 337 @reraise_safe_exceptions
340 338 def is_large_file(self, wire, commit_id):
341 339 cache_on, context_uid, repo_id = self._cache_on(wire)
342 340 region = self._region(wire)
343 341
344 342 @region.conditional_cache_on_arguments(condition=cache_on)
345 343 def _is_large_file(_repo_id, _sha):
346 344 repo_init = self._factory.repo_libgit2(wire)
347 345 with repo_init as repo:
348 346 blob = repo[commit_id]
349 347 if blob.is_binary:
350 348 return {}
351 349
352 350 return self._parse_lfs_pointer(blob.data)
353 351
354 352 return _is_large_file(repo_id, commit_id)
355 353
356 354 @reraise_safe_exceptions
357 355 def is_binary(self, wire, tree_id):
358 356 cache_on, context_uid, repo_id = self._cache_on(wire)
359 357 region = self._region(wire)
360 358
361 359 @region.conditional_cache_on_arguments(condition=cache_on)
362 360 def _is_binary(_repo_id, _tree_id):
363 361 repo_init = self._factory.repo_libgit2(wire)
364 362 with repo_init as repo:
365 363 blob_obj = repo[tree_id]
366 364 return blob_obj.is_binary
367 365
368 366 return _is_binary(repo_id, tree_id)
369 367
370 368 @reraise_safe_exceptions
371 369 def md5_hash(self, wire, commit_id, path):
372 370 cache_on, context_uid, repo_id = self._cache_on(wire)
373 371 region = self._region(wire)
374 372
375 373 @region.conditional_cache_on_arguments(condition=cache_on)
376 374 def _md5_hash(_repo_id, _commit_id, _path):
377 375 repo_init = self._factory.repo_libgit2(wire)
378 376 with repo_init as repo:
379 377 commit = repo[_commit_id]
380 378 blob_obj = commit.tree[_path]
381 379
382 380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
383 381 raise exceptions.LookupException()(
384 382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
385 383
386 384 return ''
387 385
388 386 return _md5_hash(repo_id, commit_id, path)
389 387
390 388 @reraise_safe_exceptions
391 389 def in_largefiles_store(self, wire, oid):
392 390 conf = self._wire_to_config(wire)
393 391 repo_init = self._factory.repo_libgit2(wire)
394 392 with repo_init as repo:
395 393 repo_name = repo.path
396 394
397 395 store_location = conf.get('vcs_git_lfs_store_location')
398 396 if store_location:
399 397
400 398 store = LFSOidStore(
401 399 oid=oid, repo=repo_name, store_location=store_location)
402 400 return store.has_oid()
403 401
404 402 return False
405 403
406 404 @reraise_safe_exceptions
407 405 def store_path(self, wire, oid):
408 406 conf = self._wire_to_config(wire)
409 407 repo_init = self._factory.repo_libgit2(wire)
410 408 with repo_init as repo:
411 409 repo_name = repo.path
412 410
413 411 store_location = conf.get('vcs_git_lfs_store_location')
414 412 if store_location:
415 413 store = LFSOidStore(
416 414 oid=oid, repo=repo_name, store_location=store_location)
417 415 return store.oid_path
418 416 raise ValueError(f'Unable to fetch oid with path {oid}')
419 417
420 418 @reraise_safe_exceptions
421 419 def bulk_request(self, wire, rev, pre_load):
422 420 cache_on, context_uid, repo_id = self._cache_on(wire)
423 421 region = self._region(wire)
424 422
425 423 @region.conditional_cache_on_arguments(condition=cache_on)
426 424 def _bulk_request(_repo_id, _rev, _pre_load):
427 425 result = {}
428 426 for attr in pre_load:
429 427 try:
430 428 method = self._bulk_methods[attr]
431 429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
432 430 args = [wire, rev]
433 431 result[attr] = method(*args)
434 432 except KeyError as e:
435 433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
436 434 return result
437 435
438 436 return _bulk_request(repo_id, rev, sorted(pre_load))
439 437
440 438 @reraise_safe_exceptions
441 439 def bulk_file_request(self, wire, commit_id, path, pre_load):
442 440 cache_on, context_uid, repo_id = self._cache_on(wire)
443 441 region = self._region(wire)
444 442
445 443 @region.conditional_cache_on_arguments(condition=cache_on)
446 444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
447 445 result = {}
448 446 for attr in pre_load:
449 447 try:
450 448 method = self._bulk_file_methods[attr]
451 449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
452 450 result[attr] = method(wire, _commit_id, _path)
453 451 except KeyError as e:
454 452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
455 453 return result
456 454
457 455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
458 456
459 457 def _build_opener(self, url: str):
460 458 handlers = []
461 459 url_obj = url_parser(safe_bytes(url))
462 460 authinfo = url_obj.authinfo()[1]
463 461
464 462 if authinfo:
465 463 # create a password manager
466 464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
467 465 passmgr.add_password(*convert_to_str(authinfo))
468 466
469 467 handlers.extend((httpbasicauthhandler(passmgr),
470 468 httpdigestauthhandler(passmgr)))
471 469
472 470 return urllib.request.build_opener(*handlers)
473 471
474 472 @reraise_safe_exceptions
475 473 def check_url(self, url, config):
476 474 url_obj = url_parser(safe_bytes(url))
477 475
478 476 test_uri = safe_str(url_obj.authinfo()[0])
479 477 obfuscated_uri = get_obfuscated_url(url_obj)
480 478
481 479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
482 480
483 481 if not test_uri.endswith('info/refs'):
484 482 test_uri = test_uri.rstrip('/') + '/info/refs'
485 483
486 484 o = self._build_opener(url=url)
487 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
488 486
489 487 q = {"service": 'git-upload-pack'}
490 488 qs = f'?{urllib.parse.urlencode(q)}'
491 489 cu = f"{test_uri}{qs}"
492 490
493 491 try:
494 492 req = urllib.request.Request(cu, None, {})
495 493 log.debug("Trying to open URL %s", obfuscated_uri)
496 494 resp = o.open(req)
497 495 if resp.code != 200:
498 496 raise exceptions.URLError()('Return Code is not 200')
499 497 except Exception as e:
500 498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
501 499 # means it cannot be cloned
502 500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
503 501
504 502 # now detect if it's proper git repo
505 503 gitdata: bytes = resp.read()
506 504
507 505 if b'service=git-upload-pack' in gitdata:
508 506 pass
509 507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
510 508 # old style git can return some other format!
511 509 pass
512 510 else:
513 511 e = None
514 512 raise exceptions.URLError(e)(
515 513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
516 514
517 515 return True
518 516
519 517 @reraise_safe_exceptions
520 518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
521 519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
522 520 remote_refs = self.pull(wire, url, apply_refs=False)
523 521 repo = self._factory.repo(wire)
524 522 if isinstance(valid_refs, list):
525 523 valid_refs = tuple(valid_refs)
526 524
527 525 for k in remote_refs:
528 526 # only parse heads/tags and skip so called deferred tags
529 527 if k.startswith(valid_refs) and not k.endswith(deferred):
530 528 repo[k] = remote_refs[k]
531 529
532 530 if update_after_clone:
533 531 # we want to checkout HEAD
534 532 repo["HEAD"] = remote_refs["HEAD"]
535 533 index.build_index_from_tree(repo.path, repo.index_path(),
536 534 repo.object_store, repo["HEAD"].tree)
537 535
538 536 @reraise_safe_exceptions
539 537 def branch(self, wire, commit_id):
540 538 cache_on, context_uid, repo_id = self._cache_on(wire)
541 539 region = self._region(wire)
542 540
543 541 @region.conditional_cache_on_arguments(condition=cache_on)
544 542 def _branch(_context_uid, _repo_id, _commit_id):
545 543 regex = re.compile('^refs/heads')
546 544
547 545 def filter_with(ref):
548 546 return regex.match(ref[0]) and ref[1] == _commit_id
549 547
550 548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
551 549 return [x[0].split('refs/heads/')[-1] for x in branches]
552 550
553 551 return _branch(context_uid, repo_id, commit_id)
554 552
555 553 @reraise_safe_exceptions
556 554 def commit_branches(self, wire, commit_id):
557 555 cache_on, context_uid, repo_id = self._cache_on(wire)
558 556 region = self._region(wire)
559 557
560 558 @region.conditional_cache_on_arguments(condition=cache_on)
561 559 def _commit_branches(_context_uid, _repo_id, _commit_id):
562 560 repo_init = self._factory.repo_libgit2(wire)
563 561 with repo_init as repo:
564 562 branches = [x for x in repo.branches.with_commit(_commit_id)]
565 563 return branches
566 564
567 565 return _commit_branches(context_uid, repo_id, commit_id)
568 566
569 567 @reraise_safe_exceptions
570 568 def add_object(self, wire, content):
571 569 repo_init = self._factory.repo_libgit2(wire)
572 570 with repo_init as repo:
573 571 blob = objects.Blob()
574 572 blob.set_raw_string(content)
575 573 repo.object_store.add_object(blob)
576 574 return blob.id
577 575
578 576 @reraise_safe_exceptions
579 577 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
580 578 date_args: list[int, int] = None,
581 579 parents: list | None = None):
582 580
583 581 repo_init = self._factory.repo_libgit2(wire)
584 582 with repo_init as repo:
585 583
586 584 if date_args:
587 585 current_time, offset = date_args
588 586
589 587 kw = {
590 588 'time': current_time,
591 589 'offset': offset
592 590 }
593 591 author = create_signature_from_string(author, **kw)
594 592 committer = create_signature_from_string(committer, **kw)
595 593
596 594 tree = new_tree_id
597 595 if isinstance(tree, (bytes, str)):
598 596 # validate this tree is in the repo...
599 597 tree = repo[safe_str(tree)].id
600 598
601 599 if parents:
602 600 # run via sha's and validate them in repo
603 601 parents = [repo[c].id for c in parents]
604 602 else:
605 603 parents = []
606 604 # ensure we COMMIT on top of given branch head
607 605 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
608 606 if branch in repo.branches.local:
609 607 parents += [repo.branches[branch].target]
610 608 elif [x for x in repo.branches.local]:
611 609 parents += [repo.head.target]
612 610 #else:
613 611 # in case we want to commit on new branch we create it on top of HEAD
614 612 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
615 613
616 614 # # Create a new commit
617 615 commit_oid = repo.create_commit(
618 616 f'refs/heads/{branch}', # the name of the reference to update
619 617 author, # the author of the commit
620 618 committer, # the committer of the commit
621 619 message, # the commit message
622 620 tree, # the tree produced by the index
623 621 parents # list of parents for the new commit, usually just one,
624 622 )
625 623
626 624 new_commit_id = safe_str(commit_oid)
627 625
628 626 return new_commit_id
629 627
630 628 @reraise_safe_exceptions
631 629 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
632 630
633 631 def mode2pygit(mode):
634 632 """
635 633 git only supports two filemode 644 and 755
636 634
637 635 0o100755 -> 33261
638 636 0o100644 -> 33188
639 637 """
640 638 return {
641 639 0o100644: pygit2.GIT_FILEMODE_BLOB,
642 640 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
643 641 0o120000: pygit2.GIT_FILEMODE_LINK
644 642 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
645 643
646 644 repo_init = self._factory.repo_libgit2(wire)
647 645 with repo_init as repo:
648 646 repo_index = repo.index
649 647
650 648 commit_parents = None
651 649 if commit_tree and commit_data['parents']:
652 650 commit_parents = commit_data['parents']
653 651 parent_commit = repo[commit_parents[0]]
654 652 repo_index.read_tree(parent_commit.tree)
655 653
656 654 for pathspec in updated:
657 655 blob_id = repo.create_blob(pathspec['content'])
658 656 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
659 657 repo_index.add(ie)
660 658
661 659 for pathspec in removed:
662 660 repo_index.remove(pathspec)
663 661
664 662 # Write changes to the index
665 663 repo_index.write()
666 664
667 665 # Create a tree from the updated index
668 666 written_commit_tree = repo_index.write_tree()
669 667
670 668 new_tree_id = written_commit_tree
671 669
672 670 author = commit_data['author']
673 671 committer = commit_data['committer']
674 672 message = commit_data['message']
675 673
676 674 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
677 675
678 676 new_commit_id = self.create_commit(wire, author, committer, message, branch,
679 677 new_tree_id, date_args=date_args, parents=commit_parents)
680 678
681 679 # libgit2, ensure the branch is there and exists
682 680 self.create_branch(wire, branch, new_commit_id)
683 681
684 682 # libgit2, set new ref to this created commit
685 683 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
686 684
687 685 return new_commit_id
688 686
689 687 @reraise_safe_exceptions
690 688 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
691 689 if url != 'default' and '://' not in url:
692 690 client = LocalGitClient(url)
693 691 else:
694 692 url_obj = url_parser(safe_bytes(url))
695 693 o = self._build_opener(url)
696 694 url = url_obj.authinfo()[0]
697 695 client = HttpGitClient(base_url=url, opener=o)
698 696 repo = self._factory.repo(wire)
699 697
700 698 determine_wants = repo.object_store.determine_wants_all
701 699
702 700 if refs:
703 701 refs: list[bytes] = [ascii_bytes(x) for x in refs]
704 702
705 703 def determine_wants_requested(_remote_refs):
706 704 determined = []
707 705 for ref_name, ref_hash in _remote_refs.items():
708 706 bytes_ref_name = safe_bytes(ref_name)
709 707
710 708 if bytes_ref_name in refs:
711 709 bytes_ref_hash = safe_bytes(ref_hash)
712 710 determined.append(bytes_ref_hash)
713 711 return determined
714 712
715 713 # swap with our custom requested wants
716 714 determine_wants = determine_wants_requested
717 715
718 716 try:
719 717 remote_refs = client.fetch(
720 718 path=url, target=repo, determine_wants=determine_wants)
721 719
722 720 except NotGitRepository as e:
723 721 log.warning(
724 722 'Trying to fetch from "%s" failed, not a Git repository.', url)
725 723 # Exception can contain unicode which we convert
726 724 raise exceptions.AbortException(e)(repr(e))
727 725
728 726 # mikhail: client.fetch() returns all the remote refs, but fetches only
729 727 # refs filtered by `determine_wants` function. We need to filter result
730 728 # as well
731 729 if refs:
732 730 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
733 731
734 732 if apply_refs:
735 733 # TODO: johbo: Needs proper test coverage with a git repository
736 734 # that contains a tag object, so that we would end up with
737 735 # a peeled ref at this point.
738 736 for k in remote_refs:
739 737 if k.endswith(PEELED_REF_MARKER):
740 738 log.debug("Skipping peeled reference %s", k)
741 739 continue
742 740 repo[k] = remote_refs[k]
743 741
744 742 if refs and not update_after:
745 743 # update to ref
746 744 # mikhail: explicitly set the head to the last ref.
747 745 update_to_ref = refs[-1]
748 746 if isinstance(update_after, str):
749 747 update_to_ref = update_after
750 748
751 749 repo[HEAD_MARKER] = remote_refs[update_to_ref]
752 750
753 751 if update_after:
754 752 # we want to check out HEAD
755 753 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
756 754 index.build_index_from_tree(repo.path, repo.index_path(),
757 755 repo.object_store, repo[HEAD_MARKER].tree)
758 756
759 757 if isinstance(remote_refs, FetchPackResult):
760 758 return remote_refs.refs
761 759 return remote_refs
762 760
763 761 @reraise_safe_exceptions
764 762 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
765 763 self._factory.repo(wire)
766 764 if refs and not isinstance(refs, (list, tuple)):
767 765 refs = [refs]
768 766
769 767 config = self._wire_to_config(wire)
770 768 # get all remote refs we'll use to fetch later
771 769 cmd = ['ls-remote']
772 770 if not all_refs:
773 771 cmd += ['--heads', '--tags']
774 772 cmd += [url]
775 773 output, __ = self.run_git_command(
776 774 wire, cmd, fail_on_stderr=False,
777 775 _copts=self._remote_conf(config),
778 776 extra_env={'GIT_TERMINAL_PROMPT': '0'})
779 777
780 778 remote_refs = collections.OrderedDict()
781 779 fetch_refs = []
782 780
783 781 for ref_line in output.splitlines():
784 782 sha, ref = ref_line.split(b'\t')
785 783 sha = sha.strip()
786 784 if ref in remote_refs:
787 785 # duplicate, skip
788 786 continue
789 787 if ref.endswith(PEELED_REF_MARKER):
790 788 log.debug("Skipping peeled reference %s", ref)
791 789 continue
792 790 # don't sync HEAD
793 791 if ref in [HEAD_MARKER]:
794 792 continue
795 793
796 794 remote_refs[ref] = sha
797 795
798 796 if refs and sha in refs:
799 797 # we filter fetch using our specified refs
800 798 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
801 799 elif not refs:
802 800 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
803 801 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
804 802
805 803 if fetch_refs:
806 804 for chunk in more_itertools.chunked(fetch_refs, 128):
807 805 fetch_refs_chunks = list(chunk)
808 806 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
809 807 self.run_git_command(
810 808 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
811 809 fail_on_stderr=False,
812 810 _copts=self._remote_conf(config),
813 811 extra_env={'GIT_TERMINAL_PROMPT': '0'})
814 812 if kwargs.get('sync_large_objects'):
815 813 self.run_git_command(
816 814 wire, ['lfs', 'fetch', url, '--all'],
817 815 fail_on_stderr=False,
818 816 _copts=self._remote_conf(config),
819 817 )
820 818
821 819 return remote_refs
822 820
823 821 @reraise_safe_exceptions
824 822 def sync_push(self, wire, url, refs=None, **kwargs):
825 823 if not self.check_url(url, wire):
826 824 return
827 825 config = self._wire_to_config(wire)
828 826 self._factory.repo(wire)
829 827 self.run_git_command(
830 828 wire, ['push', url, '--mirror'], fail_on_stderr=False,
831 829 _copts=self._remote_conf(config),
832 830 extra_env={'GIT_TERMINAL_PROMPT': '0'})
833 831 if kwargs.get('sync_large_objects'):
834 832 self.run_git_command(
835 833 wire, ['lfs', 'push', url, '--all'],
836 834 fail_on_stderr=False,
837 835 _copts=self._remote_conf(config),
838 836 )
839 837
840 838 @reraise_safe_exceptions
841 839 def get_remote_refs(self, wire, url):
842 840 repo = Repo(url)
843 841 return repo.get_refs()
844 842
845 843 @reraise_safe_exceptions
846 844 def get_description(self, wire):
847 845 repo = self._factory.repo(wire)
848 846 return repo.get_description()
849 847
850 848 @reraise_safe_exceptions
851 849 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
852 850 origin_repo_path = wire['path']
853 851 repo = self._factory.repo(wire)
854 852 # fetch from other_repo_path to our origin repo
855 853 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
856 854
857 855 wire_remote = wire.copy()
858 856 wire_remote['path'] = other_repo_path
859 857 repo_remote = self._factory.repo(wire_remote)
860 858
861 859 # fetch from origin_repo_path to our remote repo
862 860 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
863 861
864 862 revs = [
865 863 x.commit.id
866 864 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
867 865 return revs
868 866
869 867 @reraise_safe_exceptions
870 868 def get_object(self, wire, sha, maybe_unreachable=False):
871 869 cache_on, context_uid, repo_id = self._cache_on(wire)
872 870 region = self._region(wire)
873 871
874 872 @region.conditional_cache_on_arguments(condition=cache_on)
875 873 def _get_object(_context_uid, _repo_id, _sha):
876 874 repo_init = self._factory.repo_libgit2(wire)
877 875 with repo_init as repo:
878 876
879 877 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
880 878 try:
881 879 commit = repo.revparse_single(sha)
882 880 except KeyError:
883 881 # NOTE(marcink): KeyError doesn't give us any meaningful information
884 882 # here, we instead give something more explicit
885 883 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
886 884 raise exceptions.LookupException(e)(missing_commit_err)
887 885 except ValueError as e:
888 886 raise exceptions.LookupException(e)(missing_commit_err)
889 887
890 888 is_tag = False
891 889 if isinstance(commit, pygit2.Tag):
892 890 commit = repo.get(commit.target)
893 891 is_tag = True
894 892
895 893 check_dangling = True
896 894 if is_tag:
897 895 check_dangling = False
898 896
899 897 if check_dangling and maybe_unreachable:
900 898 check_dangling = False
901 899
902 900 # we used a reference and it parsed means we're not having a dangling commit
903 901 if sha != commit.hex:
904 902 check_dangling = False
905 903
906 904 if check_dangling:
907 905 # check for dangling commit
908 906 for branch in repo.branches.with_commit(commit.hex):
909 907 if branch:
910 908 break
911 909 else:
912 910 # NOTE(marcink): Empty error doesn't give us any meaningful information
913 911 # here, we instead give something more explicit
914 912 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
915 913 raise exceptions.LookupException(e)(missing_commit_err)
916 914
917 915 commit_id = commit.hex
918 916 type_str = commit.type_str
919 917
920 918 return {
921 919 'id': commit_id,
922 920 'type': type_str,
923 921 'commit_id': commit_id,
924 922 'idx': 0
925 923 }
926 924
927 925 return _get_object(context_uid, repo_id, sha)
928 926
929 927 @reraise_safe_exceptions
930 928 def get_refs(self, wire):
931 929 cache_on, context_uid, repo_id = self._cache_on(wire)
932 930 region = self._region(wire)
933 931
934 932 @region.conditional_cache_on_arguments(condition=cache_on)
935 933 def _get_refs(_context_uid, _repo_id):
936 934
937 935 repo_init = self._factory.repo_libgit2(wire)
938 936 with repo_init as repo:
939 937 regex = re.compile('^refs/(heads|tags)/')
940 938 return {x.name: x.target.hex for x in
941 939 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
942 940
943 941 return _get_refs(context_uid, repo_id)
944 942
945 943 @reraise_safe_exceptions
946 944 def get_branch_pointers(self, wire):
947 945 cache_on, context_uid, repo_id = self._cache_on(wire)
948 946 region = self._region(wire)
949 947
950 948 @region.conditional_cache_on_arguments(condition=cache_on)
951 949 def _get_branch_pointers(_context_uid, _repo_id):
952 950
953 951 repo_init = self._factory.repo_libgit2(wire)
954 952 regex = re.compile('^refs/heads')
955 953 with repo_init as repo:
956 954 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
957 955 return {x.target.hex: x.shorthand for x in branches}
958 956
959 957 return _get_branch_pointers(context_uid, repo_id)
960 958
961 959 @reraise_safe_exceptions
962 960 def head(self, wire, show_exc=True):
963 961 cache_on, context_uid, repo_id = self._cache_on(wire)
964 962 region = self._region(wire)
965 963
966 964 @region.conditional_cache_on_arguments(condition=cache_on)
967 965 def _head(_context_uid, _repo_id, _show_exc):
968 966 repo_init = self._factory.repo_libgit2(wire)
969 967 with repo_init as repo:
970 968 try:
971 969 return repo.head.peel().hex
972 970 except Exception:
973 971 if show_exc:
974 972 raise
975 973 return _head(context_uid, repo_id, show_exc)
976 974
977 975 @reraise_safe_exceptions
978 976 def init(self, wire):
979 977 repo_path = safe_str(wire['path'])
980 978 os.makedirs(repo_path, mode=0o755)
981 979 pygit2.init_repository(repo_path, bare=False)
982 980
983 981 @reraise_safe_exceptions
984 982 def init_bare(self, wire):
985 983 repo_path = safe_str(wire['path'])
986 984 os.makedirs(repo_path, mode=0o755)
987 985 pygit2.init_repository(repo_path, bare=True)
988 986
989 987 @reraise_safe_exceptions
990 988 def revision(self, wire, rev):
991 989
992 990 cache_on, context_uid, repo_id = self._cache_on(wire)
993 991 region = self._region(wire)
994 992
995 993 @region.conditional_cache_on_arguments(condition=cache_on)
996 994 def _revision(_context_uid, _repo_id, _rev):
997 995 repo_init = self._factory.repo_libgit2(wire)
998 996 with repo_init as repo:
999 997 commit = repo[rev]
1000 998 obj_data = {
1001 999 'id': commit.id.hex,
1002 1000 }
1003 1001 # tree objects itself don't have tree_id attribute
1004 1002 if hasattr(commit, 'tree_id'):
1005 1003 obj_data['tree'] = commit.tree_id.hex
1006 1004
1007 1005 return obj_data
1008 1006 return _revision(context_uid, repo_id, rev)
1009 1007
1010 1008 @reraise_safe_exceptions
1011 1009 def date(self, wire, commit_id):
1012 1010 cache_on, context_uid, repo_id = self._cache_on(wire)
1013 1011 region = self._region(wire)
1014 1012
1015 1013 @region.conditional_cache_on_arguments(condition=cache_on)
1016 1014 def _date(_repo_id, _commit_id):
1017 1015 repo_init = self._factory.repo_libgit2(wire)
1018 1016 with repo_init as repo:
1019 1017 commit = repo[commit_id]
1020 1018
1021 1019 if hasattr(commit, 'commit_time'):
1022 1020 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1023 1021 else:
1024 1022 commit = commit.get_object()
1025 1023 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1026 1024
1027 1025 # TODO(marcink): check dulwich difference of offset vs timezone
1028 1026 return [commit_time, commit_time_offset]
1029 1027 return _date(repo_id, commit_id)
1030 1028
1031 1029 @reraise_safe_exceptions
1032 1030 def author(self, wire, commit_id):
1033 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
1034 1032 region = self._region(wire)
1035 1033
1036 1034 @region.conditional_cache_on_arguments(condition=cache_on)
1037 1035 def _author(_repo_id, _commit_id):
1038 1036 repo_init = self._factory.repo_libgit2(wire)
1039 1037 with repo_init as repo:
1040 1038 commit = repo[commit_id]
1041 1039
1042 1040 if hasattr(commit, 'author'):
1043 1041 author = commit.author
1044 1042 else:
1045 1043 author = commit.get_object().author
1046 1044
1047 1045 if author.email:
1048 1046 return f"{author.name} <{author.email}>"
1049 1047
1050 1048 try:
1051 1049 return f"{author.name}"
1052 1050 except Exception:
1053 1051 return f"{safe_str(author.raw_name)}"
1054 1052
1055 1053 return _author(repo_id, commit_id)
1056 1054
1057 1055 @reraise_safe_exceptions
1058 1056 def message(self, wire, commit_id):
1059 1057 cache_on, context_uid, repo_id = self._cache_on(wire)
1060 1058 region = self._region(wire)
1061 1059
1062 1060 @region.conditional_cache_on_arguments(condition=cache_on)
1063 1061 def _message(_repo_id, _commit_id):
1064 1062 repo_init = self._factory.repo_libgit2(wire)
1065 1063 with repo_init as repo:
1066 1064 commit = repo[commit_id]
1067 1065 return commit.message
1068 1066 return _message(repo_id, commit_id)
1069 1067
1070 1068 @reraise_safe_exceptions
1071 1069 def parents(self, wire, commit_id):
1072 1070 cache_on, context_uid, repo_id = self._cache_on(wire)
1073 1071 region = self._region(wire)
1074 1072
1075 1073 @region.conditional_cache_on_arguments(condition=cache_on)
1076 1074 def _parents(_repo_id, _commit_id):
1077 1075 repo_init = self._factory.repo_libgit2(wire)
1078 1076 with repo_init as repo:
1079 1077 commit = repo[commit_id]
1080 1078 if hasattr(commit, 'parent_ids'):
1081 1079 parent_ids = commit.parent_ids
1082 1080 else:
1083 1081 parent_ids = commit.get_object().parent_ids
1084 1082
1085 1083 return [x.hex for x in parent_ids]
1086 1084 return _parents(repo_id, commit_id)
1087 1085
1088 1086 @reraise_safe_exceptions
1089 1087 def children(self, wire, commit_id):
1090 1088 cache_on, context_uid, repo_id = self._cache_on(wire)
1091 1089 region = self._region(wire)
1092 1090
1093 1091 head = self.head(wire)
1094 1092
1095 1093 @region.conditional_cache_on_arguments(condition=cache_on)
1096 1094 def _children(_repo_id, _commit_id):
1097 1095
1098 1096 output, __ = self.run_git_command(
1099 1097 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1100 1098
1101 1099 child_ids = []
1102 1100 pat = re.compile(fr'^{commit_id}')
1103 1101 for line in output.splitlines():
1104 1102 line = safe_str(line)
1105 1103 if pat.match(line):
1106 1104 found_ids = line.split(' ')[1:]
1107 1105 child_ids.extend(found_ids)
1108 1106 break
1109 1107
1110 1108 return child_ids
1111 1109 return _children(repo_id, commit_id)
1112 1110
1113 1111 @reraise_safe_exceptions
1114 1112 def set_refs(self, wire, key, value):
1115 1113 repo_init = self._factory.repo_libgit2(wire)
1116 1114 with repo_init as repo:
1117 1115 repo.references.create(key, value, force=True)
1118 1116
1119 1117 @reraise_safe_exceptions
1120 1118 def update_refs(self, wire, key, value):
1121 1119 repo_init = self._factory.repo_libgit2(wire)
1122 1120 with repo_init as repo:
1123 1121 if key not in repo.references:
1124 1122 raise ValueError(f'Reference {key} not found in the repository')
1125 1123 repo.references.create(key, value, force=True)
1126 1124
1127 1125 @reraise_safe_exceptions
1128 1126 def create_branch(self, wire, branch_name, commit_id, force=False):
1129 1127 repo_init = self._factory.repo_libgit2(wire)
1130 1128 with repo_init as repo:
1131 1129 if commit_id:
1132 1130 commit = repo[commit_id]
1133 1131 else:
1134 1132 # if commit is not given just use the HEAD
1135 1133 commit = repo.head()
1136 1134
1137 1135 if force:
1138 1136 repo.branches.local.create(branch_name, commit, force=force)
1139 1137 elif not repo.branches.get(branch_name):
1140 1138 # create only if that branch isn't existing
1141 1139 repo.branches.local.create(branch_name, commit, force=force)
1142 1140
1143 1141 @reraise_safe_exceptions
1144 1142 def remove_ref(self, wire, key):
1145 1143 repo_init = self._factory.repo_libgit2(wire)
1146 1144 with repo_init as repo:
1147 1145 repo.references.delete(key)
1148 1146
1149 1147 @reraise_safe_exceptions
1150 1148 def tag_remove(self, wire, tag_name):
1151 1149 repo_init = self._factory.repo_libgit2(wire)
1152 1150 with repo_init as repo:
1153 1151 key = f'refs/tags/{tag_name}'
1154 1152 repo.references.delete(key)
1155 1153
1156 1154 @reraise_safe_exceptions
1157 1155 def tree_changes(self, wire, source_id, target_id):
1158 1156 repo = self._factory.repo(wire)
1159 1157 # source can be empty
1160 1158 source_id = safe_bytes(source_id if source_id else b'')
1161 1159 target_id = safe_bytes(target_id)
1162 1160
1163 1161 source = repo[source_id].tree if source_id else None
1164 1162 target = repo[target_id].tree
1165 1163 result = repo.object_store.tree_changes(source, target)
1166 1164
1167 1165 added = set()
1168 1166 modified = set()
1169 1167 deleted = set()
1170 1168 for (old_path, new_path), (_, _), (_, _) in list(result):
1171 1169 if new_path and old_path:
1172 1170 modified.add(new_path)
1173 1171 elif new_path and not old_path:
1174 1172 added.add(new_path)
1175 1173 elif not new_path and old_path:
1176 1174 deleted.add(old_path)
1177 1175
1178 1176 return list(added), list(modified), list(deleted)
1179 1177
1180 1178 @reraise_safe_exceptions
1181 1179 def tree_and_type_for_path(self, wire, commit_id, path):
1182 1180
1183 1181 cache_on, context_uid, repo_id = self._cache_on(wire)
1184 1182 region = self._region(wire)
1185 1183
1186 1184 @region.conditional_cache_on_arguments(condition=cache_on)
1187 1185 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1188 1186 repo_init = self._factory.repo_libgit2(wire)
1189 1187
1190 1188 with repo_init as repo:
1191 1189 commit = repo[commit_id]
1192 1190 try:
1193 1191 tree = commit.tree[path]
1194 1192 except KeyError:
1195 1193 return None, None, None
1196 1194
1197 1195 return tree.id.hex, tree.type_str, tree.filemode
1198 1196 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1199 1197
1200 1198 @reraise_safe_exceptions
1201 1199 def tree_items(self, wire, tree_id):
1202 1200 cache_on, context_uid, repo_id = self._cache_on(wire)
1203 1201 region = self._region(wire)
1204 1202
1205 1203 @region.conditional_cache_on_arguments(condition=cache_on)
1206 1204 def _tree_items(_repo_id, _tree_id):
1207 1205
1208 1206 repo_init = self._factory.repo_libgit2(wire)
1209 1207 with repo_init as repo:
1210 1208 try:
1211 1209 tree = repo[tree_id]
1212 1210 except KeyError:
1213 1211 raise ObjectMissing(f'No tree with id: {tree_id}')
1214 1212
1215 1213 result = []
1216 1214 for item in tree:
1217 1215 item_sha = item.hex
1218 1216 item_mode = item.filemode
1219 1217 item_type = item.type_str
1220 1218
1221 1219 if item_type == 'commit':
1222 1220 # NOTE(marcink): submodules we translate to 'link' for backward compat
1223 1221 item_type = 'link'
1224 1222
1225 1223 result.append((item.name, item_mode, item_sha, item_type))
1226 1224 return result
1227 1225 return _tree_items(repo_id, tree_id)
1228 1226
1229 1227 @reraise_safe_exceptions
1230 1228 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1231 1229 """
1232 1230 Old version that uses subprocess to call diff
1233 1231 """
1234 1232
1235 1233 flags = [
1236 1234 f'-U{context}', '--patch',
1237 1235 '--binary',
1238 1236 '--find-renames',
1239 1237 '--no-indent-heuristic',
1240 1238 # '--indent-heuristic',
1241 1239 #'--full-index',
1242 1240 #'--abbrev=40'
1243 1241 ]
1244 1242
1245 1243 if opt_ignorews:
1246 1244 flags.append('--ignore-all-space')
1247 1245
1248 1246 if commit_id_1 == self.EMPTY_COMMIT:
1249 1247 cmd = ['show'] + flags + [commit_id_2]
1250 1248 else:
1251 1249 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1252 1250
1253 1251 if file_filter:
1254 1252 cmd.extend(['--', file_filter])
1255 1253
1256 1254 diff, __ = self.run_git_command(wire, cmd)
1257 1255 # If we used 'show' command, strip first few lines (until actual diff
1258 1256 # starts)
1259 1257 if commit_id_1 == self.EMPTY_COMMIT:
1260 1258 lines = diff.splitlines()
1261 1259 x = 0
1262 1260 for line in lines:
1263 1261 if line.startswith(b'diff'):
1264 1262 break
1265 1263 x += 1
1266 1264 # Append new line just like 'diff' command do
1267 1265 diff = '\n'.join(lines[x:]) + '\n'
1268 1266 return diff
1269 1267
1270 1268 @reraise_safe_exceptions
1271 1269 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1272 1270 repo_init = self._factory.repo_libgit2(wire)
1273 1271
1274 1272 with repo_init as repo:
1275 1273 swap = True
1276 1274 flags = 0
1277 1275 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1278 1276
1279 1277 if opt_ignorews:
1280 1278 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1281 1279
1282 1280 if commit_id_1 == self.EMPTY_COMMIT:
1283 1281 comm1 = repo[commit_id_2]
1284 1282 diff_obj = comm1.tree.diff_to_tree(
1285 1283 flags=flags, context_lines=context, swap=swap)
1286 1284
1287 1285 else:
1288 1286 comm1 = repo[commit_id_2]
1289 1287 comm2 = repo[commit_id_1]
1290 1288 diff_obj = comm1.tree.diff_to_tree(
1291 1289 comm2.tree, flags=flags, context_lines=context, swap=swap)
1292 1290 similar_flags = 0
1293 1291 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1294 1292 diff_obj.find_similar(flags=similar_flags)
1295 1293
1296 1294 if file_filter:
1297 1295 for p in diff_obj:
1298 1296 if p.delta.old_file.path == file_filter:
1299 1297 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1300 1298 # fo matching path == no diff
1301 1299 return BytesEnvelope(b'')
1302 1300
1303 1301 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1304 1302
1305 1303 @reraise_safe_exceptions
1306 1304 def node_history(self, wire, commit_id, path, limit):
1307 1305 cache_on, context_uid, repo_id = self._cache_on(wire)
1308 1306 region = self._region(wire)
1309 1307
1310 1308 @region.conditional_cache_on_arguments(condition=cache_on)
1311 1309 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1312 1310 # optimize for n==1, rev-list is much faster for that use-case
1313 1311 if limit == 1:
1314 1312 cmd = ['rev-list', '-1', commit_id, '--', path]
1315 1313 else:
1316 1314 cmd = ['log']
1317 1315 if limit:
1318 1316 cmd.extend(['-n', str(safe_int(limit, 0))])
1319 1317 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1320 1318
1321 1319 output, __ = self.run_git_command(wire, cmd)
1322 1320 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1323 1321
1324 1322 return [x for x in commit_ids]
1325 1323 return _node_history(context_uid, repo_id, commit_id, path, limit)
1326 1324
1327 1325 @reraise_safe_exceptions
1328 1326 def node_annotate_legacy(self, wire, commit_id, path):
1329 1327 # note: replaced by pygit2 implementation
1330 1328 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1331 1329 # -l ==> outputs long shas (and we need all 40 characters)
1332 1330 # --root ==> doesn't put '^' character for boundaries
1333 1331 # -r commit_id ==> blames for the given commit
1334 1332 output, __ = self.run_git_command(wire, cmd)
1335 1333
1336 1334 result = []
1337 1335 for i, blame_line in enumerate(output.splitlines()[:-1]):
1338 1336 line_no = i + 1
1339 1337 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1340 1338 result.append((line_no, blame_commit_id, line))
1341 1339
1342 1340 return result
1343 1341
1344 1342 @reraise_safe_exceptions
1345 1343 def node_annotate(self, wire, commit_id, path):
1346 1344
1347 1345 result_libgit = []
1348 1346 repo_init = self._factory.repo_libgit2(wire)
1349 1347 with repo_init as repo:
1350 1348 commit = repo[commit_id]
1351 1349 blame_obj = repo.blame(path, newest_commit=commit_id)
1352 1350 for i, line in enumerate(commit.tree[path].data.splitlines()):
1353 1351 line_no = i + 1
1354 1352 hunk = blame_obj.for_line(line_no)
1355 1353 blame_commit_id = hunk.final_commit_id.hex
1356 1354
1357 1355 result_libgit.append((line_no, blame_commit_id, line))
1358 1356
1359 1357 return BinaryEnvelope(result_libgit)
1360 1358
1361 1359 @reraise_safe_exceptions
1362 def update_server_info(self, wire):
1363 repo = self._factory.repo(wire)
1364 update_server_info(repo)
1360 def update_server_info(self, wire, force=False):
1361 cmd = ['update-server-info']
1362 if force:
1363 cmd += ['--force']
1364 output, __ = self.run_git_command(wire, cmd)
1365 return output.splitlines()
1365 1366
1366 1367 @reraise_safe_exceptions
1367 1368 def get_all_commit_ids(self, wire):
1368 1369
1369 1370 cache_on, context_uid, repo_id = self._cache_on(wire)
1370 1371 region = self._region(wire)
1371 1372
1372 1373 @region.conditional_cache_on_arguments(condition=cache_on)
1373 1374 def _get_all_commit_ids(_context_uid, _repo_id):
1374 1375
1375 1376 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1376 1377 try:
1377 1378 output, __ = self.run_git_command(wire, cmd)
1378 1379 return output.splitlines()
1379 1380 except Exception:
1380 1381 # Can be raised for empty repositories
1381 1382 return []
1382 1383
1383 1384 @region.conditional_cache_on_arguments(condition=cache_on)
1384 1385 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1385 1386 repo_init = self._factory.repo_libgit2(wire)
1386 1387 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1387 1388 results = []
1388 1389 with repo_init as repo:
1389 1390 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1390 1391 results.append(commit.id.hex)
1391 1392
1392 1393 return _get_all_commit_ids(context_uid, repo_id)
1393 1394
1394 1395 @reraise_safe_exceptions
1395 1396 def run_git_command(self, wire, cmd, **opts):
1396 1397 path = wire.get('path', None)
1397 1398 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1398 1399
1399 1400 if path and os.path.isdir(path):
1400 1401 opts['cwd'] = path
1401 1402
1402 1403 if '_bare' in opts:
1403 1404 _copts = []
1404 1405 del opts['_bare']
1405 1406 else:
1406 1407 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1407 1408 safe_call = False
1408 1409 if '_safe' in opts:
1409 1410 # no exc on failure
1410 1411 del opts['_safe']
1411 1412 safe_call = True
1412 1413
1413 1414 if '_copts' in opts:
1414 1415 _copts.extend(opts['_copts'] or [])
1415 1416 del opts['_copts']
1416 1417
1417 1418 gitenv = os.environ.copy()
1418 1419 gitenv.update(opts.pop('extra_env', {}))
1419 1420 # need to clean fix GIT_DIR !
1420 1421 if 'GIT_DIR' in gitenv:
1421 1422 del gitenv['GIT_DIR']
1422 1423 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1423 1424 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1424 1425
1425 1426 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1426 1427 _opts = {'env': gitenv, 'shell': False}
1427 1428
1428 1429 proc = None
1429 1430 try:
1430 1431 _opts.update(opts)
1431 1432 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1432 1433
1433 1434 return b''.join(proc), b''.join(proc.stderr)
1434 1435 except OSError as err:
1435 1436 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1436 1437 call_opts = {}
1437 1438 if debug_mode:
1438 1439 call_opts = _opts
1439 1440
1440 1441 tb_err = ("Couldn't run git command ({}).\n"
1441 1442 "Original error was:{}\n"
1442 1443 "Call options:{}\n"
1443 1444 .format(cmd, err, call_opts))
1444 1445 log.exception(tb_err)
1445 1446 if safe_call:
1446 1447 return '', err
1447 1448 else:
1448 1449 raise exceptions.VcsException()(tb_err)
1449 1450 finally:
1450 1451 if proc:
1451 1452 proc.close()
1452 1453
1453 1454 @reraise_safe_exceptions
1454 1455 def install_hooks(self, wire, force=False):
1455 1456 from vcsserver.hook_utils import install_git_hooks
1456 1457 bare = self.bare(wire)
1457 1458 path = wire['path']
1458 1459 binary_dir = settings.BINARY_DIR
1459 1460 if binary_dir:
1460 1461 os.path.join(binary_dir, 'python3')
1461 1462 return install_git_hooks(path, bare, force_create=force)
1462 1463
1463 1464 @reraise_safe_exceptions
1464 1465 def get_hooks_info(self, wire):
1465 1466 from vcsserver.hook_utils import (
1466 1467 get_git_pre_hook_version, get_git_post_hook_version)
1467 1468 bare = self.bare(wire)
1468 1469 path = wire['path']
1469 1470 return {
1470 1471 'pre_version': get_git_pre_hook_version(path, bare),
1471 1472 'post_version': get_git_post_hook_version(path, bare),
1472 1473 }
1473 1474
1474 1475 @reraise_safe_exceptions
1475 1476 def set_head_ref(self, wire, head_name):
1476 1477 log.debug('Setting refs/head to `%s`', head_name)
1477 1478 repo_init = self._factory.repo_libgit2(wire)
1478 1479 with repo_init as repo:
1479 1480 repo.set_head(f'refs/heads/{head_name}')
1480 1481
1481 1482 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1482 1483
1483 1484 @reraise_safe_exceptions
1484 1485 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1485 1486 archive_dir_name, commit_id, cache_config):
1486 1487
1487 1488 def file_walker(_commit_id, path):
1488 1489 repo_init = self._factory.repo_libgit2(wire)
1489 1490
1490 1491 with repo_init as repo:
1491 1492 commit = repo[commit_id]
1492 1493
1493 1494 if path in ['', '/']:
1494 1495 tree = commit.tree
1495 1496 else:
1496 1497 tree = commit.tree[path.rstrip('/')]
1497 1498 tree_id = tree.id.hex
1498 1499 try:
1499 1500 tree = repo[tree_id]
1500 1501 except KeyError:
1501 1502 raise ObjectMissing(f'No tree with id: {tree_id}')
1502 1503
1503 1504 index = LibGit2Index.Index()
1504 1505 index.read_tree(tree)
1505 1506 file_iter = index
1506 1507
1507 1508 for file_node in file_iter:
1508 1509 file_path = file_node.path
1509 1510 mode = file_node.mode
1510 1511 is_link = stat.S_ISLNK(mode)
1511 1512 if mode == pygit2.GIT_FILEMODE_COMMIT:
1512 1513 log.debug('Skipping path %s as a commit node', file_path)
1513 1514 continue
1514 1515 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1515 1516
1516 1517 return store_archive_in_cache(
1517 1518 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now