##// END OF EJS Templates
fix(git): make the fetch revs use smaller revs to be less memory hungry, and less prone to fail
super-admin -
r1185:729a204e default
parent child Browse files
Show More
@@ -1,1493 +1,1493 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 import rhodecode
43 43 from vcsserver import exceptions, settings, subprocessio
44 44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 46 from vcsserver.hgcompat import (
47 47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 48 from vcsserver.git_lfs.lib import LFSOidStore
49 49 from vcsserver.vcs_base import RemoteBase
50 50
51 51 DIR_STAT = stat.S_IFDIR
52 52 FILE_MODE = stat.S_IFMT
53 53 GIT_LINK = objects.S_IFGITLINK
54 54 PEELED_REF_MARKER = b'^{}'
55 55 HEAD_MARKER = b'HEAD'
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def reraise_safe_exceptions(func):
61 61 """Converts Dulwich exceptions to something neutral."""
62 62
63 63 @wraps(func)
64 64 def wrapper(*args, **kwargs):
65 65 try:
66 66 return func(*args, **kwargs)
67 67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 68 exc = exceptions.LookupException(org_exc=e)
69 69 raise exc(safe_str(e))
70 70 except (HangupException, UnexpectedCommandError) as e:
71 71 exc = exceptions.VcsException(org_exc=e)
72 72 raise exc(safe_str(e))
73 73 except Exception:
74 74 # NOTE(marcink): because of how dulwich handles some exceptions
75 75 # (KeyError on empty repos), we cannot track this and catch all
76 76 # exceptions, it's an exceptions from other handlers
77 77 #if not hasattr(e, '_vcs_kind'):
78 78 #log.exception("Unhandled exception in git remote call")
79 79 #raise_from_original(exceptions.UnhandledException)
80 80 raise
81 81 return wrapper
82 82
83 83
84 84 class Repo(DulwichRepo):
85 85 """
86 86 A wrapper for dulwich Repo class.
87 87
88 88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 89 "Too many open files" error. We need to close all opened file descriptors
90 90 once the repo object is destroyed.
91 91 """
92 92 def __del__(self):
93 93 if hasattr(self, 'object_store'):
94 94 self.close()
95 95
96 96
97 97 class Repository(LibGit2Repo):
98 98
99 99 def __enter__(self):
100 100 return self
101 101
102 102 def __exit__(self, exc_type, exc_val, exc_tb):
103 103 self.free()
104 104
105 105
106 106 class GitFactory(RepoFactory):
107 107 repo_type = 'git'
108 108
109 109 def _create_repo(self, wire, create, use_libgit2=False):
110 110 if use_libgit2:
111 111 repo = Repository(safe_bytes(wire['path']))
112 112 else:
113 113 # dulwich mode
114 114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 115 repo = Repo(repo_path)
116 116
117 117 log.debug('repository created: got GIT object: %s', repo)
118 118 return repo
119 119
120 120 def repo(self, wire, create=False, use_libgit2=False):
121 121 """
122 122 Get a repository instance for the given path.
123 123 """
124 124 return self._create_repo(wire, create, use_libgit2)
125 125
126 126 def repo_libgit2(self, wire):
127 127 return self.repo(wire, use_libgit2=True)
128 128
129 129
130 130 def create_signature_from_string(author_str, **kwargs):
131 131 """
132 132 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
133 133
134 134 :param author_str: String of the format 'Name <email>'
135 135 :return: pygit2.Signature object
136 136 """
137 137 match = re.match(r'^(.+) <(.+)>$', author_str)
138 138 if match is None:
139 139 raise ValueError(f"Invalid format: {author_str}")
140 140
141 141 name, email = match.groups()
142 142 return pygit2.Signature(name, email, **kwargs)
143 143
144 144
145 145 def get_obfuscated_url(url_obj):
146 146 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
147 147 url_obj.query = obfuscate_qs(url_obj.query)
148 148 obfuscated_uri = str(url_obj)
149 149 return obfuscated_uri
150 150
151 151
152 152 class GitRemote(RemoteBase):
153 153
154 154 def __init__(self, factory):
155 155 self._factory = factory
156 156 self._bulk_methods = {
157 157 "date": self.date,
158 158 "author": self.author,
159 159 "branch": self.branch,
160 160 "message": self.message,
161 161 "parents": self.parents,
162 162 "_commit": self.revision,
163 163 }
164 164 self._bulk_file_methods = {
165 165 "size": self.get_node_size,
166 166 "data": self.get_node_data,
167 167 "flags": self.get_node_flags,
168 168 "is_binary": self.get_node_is_binary,
169 169 "md5": self.md5_hash
170 170 }
171 171
172 172 def _wire_to_config(self, wire):
173 173 if 'config' in wire:
174 174 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
175 175 return {}
176 176
177 177 def _remote_conf(self, config):
178 178 params = [
179 179 '-c', 'core.askpass=""',
180 180 ]
181 181 ssl_cert_dir = config.get('vcs_ssl_dir')
182 182 if ssl_cert_dir:
183 183 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
184 184 return params
185 185
186 186 @reraise_safe_exceptions
187 187 def discover_git_version(self):
188 188 stdout, _ = self.run_git_command(
189 189 {}, ['--version'], _bare=True, _safe=True)
190 190 prefix = b'git version'
191 191 if stdout.startswith(prefix):
192 192 stdout = stdout[len(prefix):]
193 193 return safe_str(stdout.strip())
194 194
195 195 @reraise_safe_exceptions
196 196 def is_empty(self, wire):
197 197 repo_init = self._factory.repo_libgit2(wire)
198 198 with repo_init as repo:
199 199
200 200 try:
201 201 has_head = repo.head.name
202 202 if has_head:
203 203 return False
204 204
205 205 # NOTE(marcink): check again using more expensive method
206 206 return repo.is_empty
207 207 except Exception:
208 208 pass
209 209
210 210 return True
211 211
212 212 @reraise_safe_exceptions
213 213 def assert_correct_path(self, wire):
214 214 cache_on, context_uid, repo_id = self._cache_on(wire)
215 215 region = self._region(wire)
216 216
217 217 @region.conditional_cache_on_arguments(condition=cache_on)
218 218 def _assert_correct_path(_context_uid, _repo_id, fast_check):
219 219 if fast_check:
220 220 path = safe_str(wire['path'])
221 221 if pygit2.discover_repository(path):
222 222 return True
223 223 return False
224 224 else:
225 225 try:
226 226 repo_init = self._factory.repo_libgit2(wire)
227 227 with repo_init:
228 228 pass
229 229 except pygit2.GitError:
230 230 path = wire.get('path')
231 231 tb = traceback.format_exc()
232 232 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
233 233 return False
234 234 return True
235 235
236 236 return _assert_correct_path(context_uid, repo_id, True)
237 237
238 238 @reraise_safe_exceptions
239 239 def bare(self, wire):
240 240 repo_init = self._factory.repo_libgit2(wire)
241 241 with repo_init as repo:
242 242 return repo.is_bare
243 243
244 244 @reraise_safe_exceptions
245 245 def get_node_data(self, wire, commit_id, path):
246 246 repo_init = self._factory.repo_libgit2(wire)
247 247 with repo_init as repo:
248 248 commit = repo[commit_id]
249 249 blob_obj = commit.tree[path]
250 250
251 251 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
252 252 raise exceptions.LookupException()(
253 253 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
254 254
255 255 return BytesEnvelope(blob_obj.data)
256 256
257 257 @reraise_safe_exceptions
258 258 def get_node_size(self, wire, commit_id, path):
259 259 repo_init = self._factory.repo_libgit2(wire)
260 260 with repo_init as repo:
261 261 commit = repo[commit_id]
262 262 blob_obj = commit.tree[path]
263 263
264 264 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
265 265 raise exceptions.LookupException()(
266 266 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
267 267
268 268 return blob_obj.size
269 269
270 270 @reraise_safe_exceptions
271 271 def get_node_flags(self, wire, commit_id, path):
272 272 repo_init = self._factory.repo_libgit2(wire)
273 273 with repo_init as repo:
274 274 commit = repo[commit_id]
275 275 blob_obj = commit.tree[path]
276 276
277 277 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
278 278 raise exceptions.LookupException()(
279 279 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
280 280
281 281 return blob_obj.filemode
282 282
283 283 @reraise_safe_exceptions
284 284 def get_node_is_binary(self, wire, commit_id, path):
285 285 repo_init = self._factory.repo_libgit2(wire)
286 286 with repo_init as repo:
287 287 commit = repo[commit_id]
288 288 blob_obj = commit.tree[path]
289 289
290 290 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
291 291 raise exceptions.LookupException()(
292 292 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
293 293
294 294 return blob_obj.is_binary
295 295
296 296 @reraise_safe_exceptions
297 297 def blob_as_pretty_string(self, wire, sha):
298 298 repo_init = self._factory.repo_libgit2(wire)
299 299 with repo_init as repo:
300 300 blob_obj = repo[sha]
301 301 return BytesEnvelope(blob_obj.data)
302 302
303 303 @reraise_safe_exceptions
304 304 def blob_raw_length(self, wire, sha):
305 305 cache_on, context_uid, repo_id = self._cache_on(wire)
306 306 region = self._region(wire)
307 307
308 308 @region.conditional_cache_on_arguments(condition=cache_on)
309 309 def _blob_raw_length(_repo_id, _sha):
310 310
311 311 repo_init = self._factory.repo_libgit2(wire)
312 312 with repo_init as repo:
313 313 blob = repo[sha]
314 314 return blob.size
315 315
316 316 return _blob_raw_length(repo_id, sha)
317 317
318 318 def _parse_lfs_pointer(self, raw_content):
319 319 spec_string = b'version https://git-lfs.github.com/spec'
320 320 if raw_content and raw_content.startswith(spec_string):
321 321
322 322 pattern = re.compile(rb"""
323 323 (?:\n)?
324 324 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
325 325 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
326 326 ^size[ ](?P<oid_size>[0-9]+)\n
327 327 (?:\n)?
328 328 """, re.VERBOSE | re.MULTILINE)
329 329 match = pattern.match(raw_content)
330 330 if match:
331 331 return match.groupdict()
332 332
333 333 return {}
334 334
335 335 @reraise_safe_exceptions
336 336 def is_large_file(self, wire, commit_id):
337 337 cache_on, context_uid, repo_id = self._cache_on(wire)
338 338 region = self._region(wire)
339 339
340 340 @region.conditional_cache_on_arguments(condition=cache_on)
341 341 def _is_large_file(_repo_id, _sha):
342 342 repo_init = self._factory.repo_libgit2(wire)
343 343 with repo_init as repo:
344 344 blob = repo[commit_id]
345 345 if blob.is_binary:
346 346 return {}
347 347
348 348 return self._parse_lfs_pointer(blob.data)
349 349
350 350 return _is_large_file(repo_id, commit_id)
351 351
352 352 @reraise_safe_exceptions
353 353 def is_binary(self, wire, tree_id):
354 354 cache_on, context_uid, repo_id = self._cache_on(wire)
355 355 region = self._region(wire)
356 356
357 357 @region.conditional_cache_on_arguments(condition=cache_on)
358 358 def _is_binary(_repo_id, _tree_id):
359 359 repo_init = self._factory.repo_libgit2(wire)
360 360 with repo_init as repo:
361 361 blob_obj = repo[tree_id]
362 362 return blob_obj.is_binary
363 363
364 364 return _is_binary(repo_id, tree_id)
365 365
366 366 @reraise_safe_exceptions
367 367 def md5_hash(self, wire, commit_id, path):
368 368 cache_on, context_uid, repo_id = self._cache_on(wire)
369 369 region = self._region(wire)
370 370
371 371 @region.conditional_cache_on_arguments(condition=cache_on)
372 372 def _md5_hash(_repo_id, _commit_id, _path):
373 373 repo_init = self._factory.repo_libgit2(wire)
374 374 with repo_init as repo:
375 375 commit = repo[_commit_id]
376 376 blob_obj = commit.tree[_path]
377 377
378 378 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
379 379 raise exceptions.LookupException()(
380 380 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
381 381
382 382 return ''
383 383
384 384 return _md5_hash(repo_id, commit_id, path)
385 385
386 386 @reraise_safe_exceptions
387 387 def in_largefiles_store(self, wire, oid):
388 388 conf = self._wire_to_config(wire)
389 389 repo_init = self._factory.repo_libgit2(wire)
390 390 with repo_init as repo:
391 391 repo_name = repo.path
392 392
393 393 store_location = conf.get('vcs_git_lfs_store_location')
394 394 if store_location:
395 395
396 396 store = LFSOidStore(
397 397 oid=oid, repo=repo_name, store_location=store_location)
398 398 return store.has_oid()
399 399
400 400 return False
401 401
402 402 @reraise_safe_exceptions
403 403 def store_path(self, wire, oid):
404 404 conf = self._wire_to_config(wire)
405 405 repo_init = self._factory.repo_libgit2(wire)
406 406 with repo_init as repo:
407 407 repo_name = repo.path
408 408
409 409 store_location = conf.get('vcs_git_lfs_store_location')
410 410 if store_location:
411 411 store = LFSOidStore(
412 412 oid=oid, repo=repo_name, store_location=store_location)
413 413 return store.oid_path
414 414 raise ValueError(f'Unable to fetch oid with path {oid}')
415 415
416 416 @reraise_safe_exceptions
417 417 def bulk_request(self, wire, rev, pre_load):
418 418 cache_on, context_uid, repo_id = self._cache_on(wire)
419 419 region = self._region(wire)
420 420
421 421 @region.conditional_cache_on_arguments(condition=cache_on)
422 422 def _bulk_request(_repo_id, _rev, _pre_load):
423 423 result = {}
424 424 for attr in pre_load:
425 425 try:
426 426 method = self._bulk_methods[attr]
427 427 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
428 428 args = [wire, rev]
429 429 result[attr] = method(*args)
430 430 except KeyError as e:
431 431 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
432 432 return result
433 433
434 434 return _bulk_request(repo_id, rev, sorted(pre_load))
435 435
436 436 @reraise_safe_exceptions
437 437 def bulk_file_request(self, wire, commit_id, path, pre_load):
438 438 cache_on, context_uid, repo_id = self._cache_on(wire)
439 439 region = self._region(wire)
440 440
441 441 @region.conditional_cache_on_arguments(condition=cache_on)
442 442 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
443 443 result = {}
444 444 for attr in pre_load:
445 445 try:
446 446 method = self._bulk_file_methods[attr]
447 447 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
448 448 result[attr] = method(wire, _commit_id, _path)
449 449 except KeyError as e:
450 450 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
451 451 return result
452 452
453 453 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
454 454
455 455 def _build_opener(self, url: str):
456 456 handlers = []
457 457 url_obj = url_parser(safe_bytes(url))
458 458 authinfo = url_obj.authinfo()[1]
459 459
460 460 if authinfo:
461 461 # create a password manager
462 462 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
463 463 passmgr.add_password(*authinfo)
464 464
465 465 handlers.extend((httpbasicauthhandler(passmgr),
466 466 httpdigestauthhandler(passmgr)))
467 467
468 468 return urllib.request.build_opener(*handlers)
469 469
470 470 @reraise_safe_exceptions
471 471 def check_url(self, url, config):
472 472 url_obj = url_parser(safe_bytes(url))
473 473
474 474 test_uri = safe_str(url_obj.authinfo()[0])
475 475 obfuscated_uri = get_obfuscated_url(url_obj)
476 476
477 477 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
478 478
479 479 if not test_uri.endswith('info/refs'):
480 480 test_uri = test_uri.rstrip('/') + '/info/refs'
481 481
482 482 o = self._build_opener(test_uri)
483 483 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
484 484
485 485 q = {"service": 'git-upload-pack'}
486 486 qs = f'?{urllib.parse.urlencode(q)}'
487 487 cu = f"{test_uri}{qs}"
488 488
489 489 try:
490 490 req = urllib.request.Request(cu, None, {})
491 491 log.debug("Trying to open URL %s", obfuscated_uri)
492 492 resp = o.open(req)
493 493 if resp.code != 200:
494 494 raise exceptions.URLError()('Return Code is not 200')
495 495 except Exception as e:
496 496 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
497 497 # means it cannot be cloned
498 498 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
499 499
500 500 # now detect if it's proper git repo
501 501 gitdata: bytes = resp.read()
502 502
503 503 if b'service=git-upload-pack' in gitdata:
504 504 pass
505 505 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
506 506 # old style git can return some other format!
507 507 pass
508 508 else:
509 509 e = None
510 510 raise exceptions.URLError(e)(
511 511 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
512 512
513 513 return True
514 514
515 515 @reraise_safe_exceptions
516 516 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
517 517 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
518 518 remote_refs = self.pull(wire, url, apply_refs=False)
519 519 repo = self._factory.repo(wire)
520 520 if isinstance(valid_refs, list):
521 521 valid_refs = tuple(valid_refs)
522 522
523 523 for k in remote_refs:
524 524 # only parse heads/tags and skip so called deferred tags
525 525 if k.startswith(valid_refs) and not k.endswith(deferred):
526 526 repo[k] = remote_refs[k]
527 527
528 528 if update_after_clone:
529 529 # we want to checkout HEAD
530 530 repo["HEAD"] = remote_refs["HEAD"]
531 531 index.build_index_from_tree(repo.path, repo.index_path(),
532 532 repo.object_store, repo["HEAD"].tree)
533 533
534 534 @reraise_safe_exceptions
535 535 def branch(self, wire, commit_id):
536 536 cache_on, context_uid, repo_id = self._cache_on(wire)
537 537 region = self._region(wire)
538 538
539 539 @region.conditional_cache_on_arguments(condition=cache_on)
540 540 def _branch(_context_uid, _repo_id, _commit_id):
541 541 regex = re.compile('^refs/heads')
542 542
543 543 def filter_with(ref):
544 544 return regex.match(ref[0]) and ref[1] == _commit_id
545 545
546 546 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
547 547 return [x[0].split('refs/heads/')[-1] for x in branches]
548 548
549 549 return _branch(context_uid, repo_id, commit_id)
550 550
551 551 @reraise_safe_exceptions
552 552 def commit_branches(self, wire, commit_id):
553 553 cache_on, context_uid, repo_id = self._cache_on(wire)
554 554 region = self._region(wire)
555 555
556 556 @region.conditional_cache_on_arguments(condition=cache_on)
557 557 def _commit_branches(_context_uid, _repo_id, _commit_id):
558 558 repo_init = self._factory.repo_libgit2(wire)
559 559 with repo_init as repo:
560 560 branches = [x for x in repo.branches.with_commit(_commit_id)]
561 561 return branches
562 562
563 563 return _commit_branches(context_uid, repo_id, commit_id)
564 564
565 565 @reraise_safe_exceptions
566 566 def add_object(self, wire, content):
567 567 repo_init = self._factory.repo_libgit2(wire)
568 568 with repo_init as repo:
569 569 blob = objects.Blob()
570 570 blob.set_raw_string(content)
571 571 repo.object_store.add_object(blob)
572 572 return blob.id
573 573
574 574 @reraise_safe_exceptions
575 575 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
576 576 date_args: list[int, int] = None,
577 577 parents: list | None = None):
578 578
579 579 repo_init = self._factory.repo_libgit2(wire)
580 580 with repo_init as repo:
581 581
582 582 if date_args:
583 583 current_time, offset = date_args
584 584
585 585 kw = {
586 586 'time': current_time,
587 587 'offset': offset
588 588 }
589 589 author = create_signature_from_string(author, **kw)
590 590 committer = create_signature_from_string(committer, **kw)
591 591
592 592 tree = new_tree_id
593 593 if isinstance(tree, (bytes, str)):
594 594 # validate this tree is in the repo...
595 595 tree = repo[safe_str(tree)].id
596 596
597 597 if parents:
598 598 # run via sha's and validate them in repo
599 599 parents = [repo[c].id for c in parents]
600 600 else:
601 601 parents = []
602 602 # ensure we COMMIT on top of given branch head
603 603 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
604 604 if branch in repo.branches.local:
605 605 parents += [repo.branches[branch].target]
606 606 elif [x for x in repo.branches.local]:
607 607 parents += [repo.head.target]
608 608 #else:
609 609 # in case we want to commit on new branch we create it on top of HEAD
610 610 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
611 611
612 612 # # Create a new commit
613 613 commit_oid = repo.create_commit(
614 614 f'refs/heads/{branch}', # the name of the reference to update
615 615 author, # the author of the commit
616 616 committer, # the committer of the commit
617 617 message, # the commit message
618 618 tree, # the tree produced by the index
619 619 parents # list of parents for the new commit, usually just one,
620 620 )
621 621
622 622 new_commit_id = safe_str(commit_oid)
623 623
624 624 return new_commit_id
625 625
626 626 @reraise_safe_exceptions
627 627 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
628 628
629 629 def mode2pygit(mode):
630 630 """
631 631 git only supports two filemode 644 and 755
632 632
633 633 0o100755 -> 33261
634 634 0o100644 -> 33188
635 635 """
636 636 return {
637 637 0o100644: pygit2.GIT_FILEMODE_BLOB,
638 638 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
639 639 0o120000: pygit2.GIT_FILEMODE_LINK
640 640 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
641 641
642 642 repo_init = self._factory.repo_libgit2(wire)
643 643 with repo_init as repo:
644 644 repo_index = repo.index
645 645
646 646 commit_parents = None
647 647 if commit_tree and commit_data['parents']:
648 648 commit_parents = commit_data['parents']
649 649 parent_commit = repo[commit_parents[0]]
650 650 repo_index.read_tree(parent_commit.tree)
651 651
652 652 for pathspec in updated:
653 653 blob_id = repo.create_blob(pathspec['content'])
654 654 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
655 655 repo_index.add(ie)
656 656
657 657 for pathspec in removed:
658 658 repo_index.remove(pathspec)
659 659
660 660 # Write changes to the index
661 661 repo_index.write()
662 662
663 663 # Create a tree from the updated index
664 664 written_commit_tree = repo_index.write_tree()
665 665
666 666 new_tree_id = written_commit_tree
667 667
668 668 author = commit_data['author']
669 669 committer = commit_data['committer']
670 670 message = commit_data['message']
671 671
672 672 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
673 673
674 674 new_commit_id = self.create_commit(wire, author, committer, message, branch,
675 675 new_tree_id, date_args=date_args, parents=commit_parents)
676 676
677 677 # libgit2, ensure the branch is there and exists
678 678 self.create_branch(wire, branch, new_commit_id)
679 679
680 680 # libgit2, set new ref to this created commit
681 681 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
682 682
683 683 return new_commit_id
684 684
685 685 @reraise_safe_exceptions
686 686 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
687 687 if url != 'default' and '://' not in url:
688 688 client = LocalGitClient(url)
689 689 else:
690 690 url_obj = url_parser(safe_bytes(url))
691 691 o = self._build_opener(url)
692 692 url = url_obj.authinfo()[0]
693 693 client = HttpGitClient(base_url=url, opener=o)
694 694 repo = self._factory.repo(wire)
695 695
696 696 determine_wants = repo.object_store.determine_wants_all
697 697
698 698 if refs:
699 699 refs: list[bytes] = [ascii_bytes(x) for x in refs]
700 700
701 701 def determine_wants_requested(_remote_refs):
702 702 determined = []
703 703 for ref_name, ref_hash in _remote_refs.items():
704 704 bytes_ref_name = safe_bytes(ref_name)
705 705
706 706 if bytes_ref_name in refs:
707 707 bytes_ref_hash = safe_bytes(ref_hash)
708 708 determined.append(bytes_ref_hash)
709 709 return determined
710 710
711 711 # swap with our custom requested wants
712 712 determine_wants = determine_wants_requested
713 713
714 714 try:
715 715 remote_refs = client.fetch(
716 716 path=url, target=repo, determine_wants=determine_wants)
717 717
718 718 except NotGitRepository as e:
719 719 log.warning(
720 720 'Trying to fetch from "%s" failed, not a Git repository.', url)
721 721 # Exception can contain unicode which we convert
722 722 raise exceptions.AbortException(e)(repr(e))
723 723
724 724 # mikhail: client.fetch() returns all the remote refs, but fetches only
725 725 # refs filtered by `determine_wants` function. We need to filter result
726 726 # as well
727 727 if refs:
728 728 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
729 729
730 730 if apply_refs:
731 731 # TODO: johbo: Needs proper test coverage with a git repository
732 732 # that contains a tag object, so that we would end up with
733 733 # a peeled ref at this point.
734 734 for k in remote_refs:
735 735 if k.endswith(PEELED_REF_MARKER):
736 736 log.debug("Skipping peeled reference %s", k)
737 737 continue
738 738 repo[k] = remote_refs[k]
739 739
740 740 if refs and not update_after:
741 741 # update to ref
742 742 # mikhail: explicitly set the head to the last ref.
743 743 update_to_ref = refs[-1]
744 744 if isinstance(update_after, str):
745 745 update_to_ref = update_after
746 746
747 747 repo[HEAD_MARKER] = remote_refs[update_to_ref]
748 748
749 749 if update_after:
750 750 # we want to check out HEAD
751 751 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
752 752 index.build_index_from_tree(repo.path, repo.index_path(),
753 753 repo.object_store, repo[HEAD_MARKER].tree)
754 754
755 755 if isinstance(remote_refs, FetchPackResult):
756 756 return remote_refs.refs
757 757 return remote_refs
758 758
759 759 @reraise_safe_exceptions
760 760 def sync_fetch(self, wire, url, refs=None, all_refs=False):
761 761 self._factory.repo(wire)
762 762 if refs and not isinstance(refs, (list, tuple)):
763 763 refs = [refs]
764 764
765 765 config = self._wire_to_config(wire)
766 766 # get all remote refs we'll use to fetch later
767 767 cmd = ['ls-remote']
768 768 if not all_refs:
769 769 cmd += ['--heads', '--tags']
770 770 cmd += [url]
771 771 output, __ = self.run_git_command(
772 772 wire, cmd, fail_on_stderr=False,
773 773 _copts=self._remote_conf(config),
774 774 extra_env={'GIT_TERMINAL_PROMPT': '0'})
775 775
776 776 remote_refs = collections.OrderedDict()
777 777 fetch_refs = []
778 778
779 779 for ref_line in output.splitlines():
780 780 sha, ref = ref_line.split(b'\t')
781 781 sha = sha.strip()
782 782 if ref in remote_refs:
783 783 # duplicate, skip
784 784 continue
785 785 if ref.endswith(PEELED_REF_MARKER):
786 786 log.debug("Skipping peeled reference %s", ref)
787 787 continue
788 788 # don't sync HEAD
789 789 if ref in [HEAD_MARKER]:
790 790 continue
791 791
792 792 remote_refs[ref] = sha
793 793
794 794 if refs and sha in refs:
795 795 # we filter fetch using our specified refs
796 796 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
797 797 elif not refs:
798 798 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
799 799 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
800 800
801 801 if fetch_refs:
802 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
802 for chunk in more_itertools.chunked(fetch_refs, 128):
803 803 fetch_refs_chunks = list(chunk)
804 804 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
805 805 self.run_git_command(
806 806 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
807 807 fail_on_stderr=False,
808 808 _copts=self._remote_conf(config),
809 809 extra_env={'GIT_TERMINAL_PROMPT': '0'})
810 810
811 811 return remote_refs
812 812
813 813 @reraise_safe_exceptions
814 814 def sync_push(self, wire, url, refs=None):
815 815 if not self.check_url(url, wire):
816 816 return
817 817 config = self._wire_to_config(wire)
818 818 self._factory.repo(wire)
819 819 self.run_git_command(
820 820 wire, ['push', url, '--mirror'], fail_on_stderr=False,
821 821 _copts=self._remote_conf(config),
822 822 extra_env={'GIT_TERMINAL_PROMPT': '0'})
823 823
824 824 @reraise_safe_exceptions
825 825 def get_remote_refs(self, wire, url):
826 826 repo = Repo(url)
827 827 return repo.get_refs()
828 828
829 829 @reraise_safe_exceptions
830 830 def get_description(self, wire):
831 831 repo = self._factory.repo(wire)
832 832 return repo.get_description()
833 833
834 834 @reraise_safe_exceptions
835 835 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
836 836 origin_repo_path = wire['path']
837 837 repo = self._factory.repo(wire)
838 838 # fetch from other_repo_path to our origin repo
839 839 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
840 840
841 841 wire_remote = wire.copy()
842 842 wire_remote['path'] = other_repo_path
843 843 repo_remote = self._factory.repo(wire_remote)
844 844
845 845 # fetch from origin_repo_path to our remote repo
846 846 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
847 847
848 848 revs = [
849 849 x.commit.id
850 850 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
851 851 return revs
852 852
853 853 @reraise_safe_exceptions
854 854 def get_object(self, wire, sha, maybe_unreachable=False):
855 855 cache_on, context_uid, repo_id = self._cache_on(wire)
856 856 region = self._region(wire)
857 857
858 858 @region.conditional_cache_on_arguments(condition=cache_on)
859 859 def _get_object(_context_uid, _repo_id, _sha):
860 860 repo_init = self._factory.repo_libgit2(wire)
861 861 with repo_init as repo:
862 862
863 863 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
864 864 try:
865 865 commit = repo.revparse_single(sha)
866 866 except KeyError:
867 867 # NOTE(marcink): KeyError doesn't give us any meaningful information
868 868 # here, we instead give something more explicit
869 869 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
870 870 raise exceptions.LookupException(e)(missing_commit_err)
871 871 except ValueError as e:
872 872 raise exceptions.LookupException(e)(missing_commit_err)
873 873
874 874 is_tag = False
875 875 if isinstance(commit, pygit2.Tag):
876 876 commit = repo.get(commit.target)
877 877 is_tag = True
878 878
879 879 check_dangling = True
880 880 if is_tag:
881 881 check_dangling = False
882 882
883 883 if check_dangling and maybe_unreachable:
884 884 check_dangling = False
885 885
886 886 # we used a reference and it parsed means we're not having a dangling commit
887 887 if sha != commit.hex:
888 888 check_dangling = False
889 889
890 890 if check_dangling:
891 891 # check for dangling commit
892 892 for branch in repo.branches.with_commit(commit.hex):
893 893 if branch:
894 894 break
895 895 else:
896 896 # NOTE(marcink): Empty error doesn't give us any meaningful information
897 897 # here, we instead give something more explicit
898 898 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
899 899 raise exceptions.LookupException(e)(missing_commit_err)
900 900
901 901 commit_id = commit.hex
902 902 type_str = commit.type_str
903 903
904 904 return {
905 905 'id': commit_id,
906 906 'type': type_str,
907 907 'commit_id': commit_id,
908 908 'idx': 0
909 909 }
910 910
911 911 return _get_object(context_uid, repo_id, sha)
912 912
913 913 @reraise_safe_exceptions
914 914 def get_refs(self, wire):
915 915 cache_on, context_uid, repo_id = self._cache_on(wire)
916 916 region = self._region(wire)
917 917
918 918 @region.conditional_cache_on_arguments(condition=cache_on)
919 919 def _get_refs(_context_uid, _repo_id):
920 920
921 921 repo_init = self._factory.repo_libgit2(wire)
922 922 with repo_init as repo:
923 923 regex = re.compile('^refs/(heads|tags)/')
924 924 return {x.name: x.target.hex for x in
925 925 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
926 926
927 927 return _get_refs(context_uid, repo_id)
928 928
929 929 @reraise_safe_exceptions
930 930 def get_branch_pointers(self, wire):
931 931 cache_on, context_uid, repo_id = self._cache_on(wire)
932 932 region = self._region(wire)
933 933
934 934 @region.conditional_cache_on_arguments(condition=cache_on)
935 935 def _get_branch_pointers(_context_uid, _repo_id):
936 936
937 937 repo_init = self._factory.repo_libgit2(wire)
938 938 regex = re.compile('^refs/heads')
939 939 with repo_init as repo:
940 940 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
941 941 return {x.target.hex: x.shorthand for x in branches}
942 942
943 943 return _get_branch_pointers(context_uid, repo_id)
944 944
945 945 @reraise_safe_exceptions
946 946 def head(self, wire, show_exc=True):
947 947 cache_on, context_uid, repo_id = self._cache_on(wire)
948 948 region = self._region(wire)
949 949
950 950 @region.conditional_cache_on_arguments(condition=cache_on)
951 951 def _head(_context_uid, _repo_id, _show_exc):
952 952 repo_init = self._factory.repo_libgit2(wire)
953 953 with repo_init as repo:
954 954 try:
955 955 return repo.head.peel().hex
956 956 except Exception:
957 957 if show_exc:
958 958 raise
959 959 return _head(context_uid, repo_id, show_exc)
960 960
961 961 @reraise_safe_exceptions
962 962 def init(self, wire):
963 963 repo_path = safe_str(wire['path'])
964 964 os.makedirs(repo_path, mode=0o755)
965 965 pygit2.init_repository(repo_path, bare=False)
966 966
967 967 @reraise_safe_exceptions
968 968 def init_bare(self, wire):
969 969 repo_path = safe_str(wire['path'])
970 970 os.makedirs(repo_path, mode=0o755)
971 971 pygit2.init_repository(repo_path, bare=True)
972 972
973 973 @reraise_safe_exceptions
974 974 def revision(self, wire, rev):
975 975
976 976 cache_on, context_uid, repo_id = self._cache_on(wire)
977 977 region = self._region(wire)
978 978
979 979 @region.conditional_cache_on_arguments(condition=cache_on)
980 980 def _revision(_context_uid, _repo_id, _rev):
981 981 repo_init = self._factory.repo_libgit2(wire)
982 982 with repo_init as repo:
983 983 commit = repo[rev]
984 984 obj_data = {
985 985 'id': commit.id.hex,
986 986 }
987 987 # tree objects itself don't have tree_id attribute
988 988 if hasattr(commit, 'tree_id'):
989 989 obj_data['tree'] = commit.tree_id.hex
990 990
991 991 return obj_data
992 992 return _revision(context_uid, repo_id, rev)
993 993
994 994 @reraise_safe_exceptions
995 995 def date(self, wire, commit_id):
996 996 cache_on, context_uid, repo_id = self._cache_on(wire)
997 997 region = self._region(wire)
998 998
999 999 @region.conditional_cache_on_arguments(condition=cache_on)
1000 1000 def _date(_repo_id, _commit_id):
1001 1001 repo_init = self._factory.repo_libgit2(wire)
1002 1002 with repo_init as repo:
1003 1003 commit = repo[commit_id]
1004 1004
1005 1005 if hasattr(commit, 'commit_time'):
1006 1006 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1007 1007 else:
1008 1008 commit = commit.get_object()
1009 1009 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1010 1010
1011 1011 # TODO(marcink): check dulwich difference of offset vs timezone
1012 1012 return [commit_time, commit_time_offset]
1013 1013 return _date(repo_id, commit_id)
1014 1014
1015 1015 @reraise_safe_exceptions
1016 1016 def author(self, wire, commit_id):
1017 1017 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 1018 region = self._region(wire)
1019 1019
1020 1020 @region.conditional_cache_on_arguments(condition=cache_on)
1021 1021 def _author(_repo_id, _commit_id):
1022 1022 repo_init = self._factory.repo_libgit2(wire)
1023 1023 with repo_init as repo:
1024 1024 commit = repo[commit_id]
1025 1025
1026 1026 if hasattr(commit, 'author'):
1027 1027 author = commit.author
1028 1028 else:
1029 1029 author = commit.get_object().author
1030 1030
1031 1031 if author.email:
1032 1032 return f"{author.name} <{author.email}>"
1033 1033
1034 1034 try:
1035 1035 return f"{author.name}"
1036 1036 except Exception:
1037 1037 return f"{safe_str(author.raw_name)}"
1038 1038
1039 1039 return _author(repo_id, commit_id)
1040 1040
1041 1041 @reraise_safe_exceptions
1042 1042 def message(self, wire, commit_id):
1043 1043 cache_on, context_uid, repo_id = self._cache_on(wire)
1044 1044 region = self._region(wire)
1045 1045
1046 1046 @region.conditional_cache_on_arguments(condition=cache_on)
1047 1047 def _message(_repo_id, _commit_id):
1048 1048 repo_init = self._factory.repo_libgit2(wire)
1049 1049 with repo_init as repo:
1050 1050 commit = repo[commit_id]
1051 1051 return commit.message
1052 1052 return _message(repo_id, commit_id)
1053 1053
1054 1054 @reraise_safe_exceptions
1055 1055 def parents(self, wire, commit_id):
1056 1056 cache_on, context_uid, repo_id = self._cache_on(wire)
1057 1057 region = self._region(wire)
1058 1058
1059 1059 @region.conditional_cache_on_arguments(condition=cache_on)
1060 1060 def _parents(_repo_id, _commit_id):
1061 1061 repo_init = self._factory.repo_libgit2(wire)
1062 1062 with repo_init as repo:
1063 1063 commit = repo[commit_id]
1064 1064 if hasattr(commit, 'parent_ids'):
1065 1065 parent_ids = commit.parent_ids
1066 1066 else:
1067 1067 parent_ids = commit.get_object().parent_ids
1068 1068
1069 1069 return [x.hex for x in parent_ids]
1070 1070 return _parents(repo_id, commit_id)
1071 1071
1072 1072 @reraise_safe_exceptions
1073 1073 def children(self, wire, commit_id):
1074 1074 cache_on, context_uid, repo_id = self._cache_on(wire)
1075 1075 region = self._region(wire)
1076 1076
1077 1077 head = self.head(wire)
1078 1078
1079 1079 @region.conditional_cache_on_arguments(condition=cache_on)
1080 1080 def _children(_repo_id, _commit_id):
1081 1081
1082 1082 output, __ = self.run_git_command(
1083 1083 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1084 1084
1085 1085 child_ids = []
1086 1086 pat = re.compile(fr'^{commit_id}')
1087 1087 for line in output.splitlines():
1088 1088 line = safe_str(line)
1089 1089 if pat.match(line):
1090 1090 found_ids = line.split(' ')[1:]
1091 1091 child_ids.extend(found_ids)
1092 1092 break
1093 1093
1094 1094 return child_ids
1095 1095 return _children(repo_id, commit_id)
1096 1096
1097 1097 @reraise_safe_exceptions
1098 1098 def set_refs(self, wire, key, value):
1099 1099 repo_init = self._factory.repo_libgit2(wire)
1100 1100 with repo_init as repo:
1101 1101 repo.references.create(key, value, force=True)
1102 1102
1103 1103 @reraise_safe_exceptions
1104 1104 def create_branch(self, wire, branch_name, commit_id, force=False):
1105 1105 repo_init = self._factory.repo_libgit2(wire)
1106 1106 with repo_init as repo:
1107 1107 if commit_id:
1108 1108 commit = repo[commit_id]
1109 1109 else:
1110 1110 # if commit is not given just use the HEAD
1111 1111 commit = repo.head()
1112 1112
1113 1113 if force:
1114 1114 repo.branches.local.create(branch_name, commit, force=force)
1115 1115 elif not repo.branches.get(branch_name):
1116 1116 # create only if that branch isn't existing
1117 1117 repo.branches.local.create(branch_name, commit, force=force)
1118 1118
1119 1119 @reraise_safe_exceptions
1120 1120 def remove_ref(self, wire, key):
1121 1121 repo_init = self._factory.repo_libgit2(wire)
1122 1122 with repo_init as repo:
1123 1123 repo.references.delete(key)
1124 1124
1125 1125 @reraise_safe_exceptions
1126 1126 def tag_remove(self, wire, tag_name):
1127 1127 repo_init = self._factory.repo_libgit2(wire)
1128 1128 with repo_init as repo:
1129 1129 key = f'refs/tags/{tag_name}'
1130 1130 repo.references.delete(key)
1131 1131
1132 1132 @reraise_safe_exceptions
1133 1133 def tree_changes(self, wire, source_id, target_id):
1134 1134 repo = self._factory.repo(wire)
1135 1135 # source can be empty
1136 1136 source_id = safe_bytes(source_id if source_id else b'')
1137 1137 target_id = safe_bytes(target_id)
1138 1138
1139 1139 source = repo[source_id].tree if source_id else None
1140 1140 target = repo[target_id].tree
1141 1141 result = repo.object_store.tree_changes(source, target)
1142 1142
1143 1143 added = set()
1144 1144 modified = set()
1145 1145 deleted = set()
1146 1146 for (old_path, new_path), (_, _), (_, _) in list(result):
1147 1147 if new_path and old_path:
1148 1148 modified.add(new_path)
1149 1149 elif new_path and not old_path:
1150 1150 added.add(new_path)
1151 1151 elif not new_path and old_path:
1152 1152 deleted.add(old_path)
1153 1153
1154 1154 return list(added), list(modified), list(deleted)
1155 1155
1156 1156 @reraise_safe_exceptions
1157 1157 def tree_and_type_for_path(self, wire, commit_id, path):
1158 1158
1159 1159 cache_on, context_uid, repo_id = self._cache_on(wire)
1160 1160 region = self._region(wire)
1161 1161
1162 1162 @region.conditional_cache_on_arguments(condition=cache_on)
1163 1163 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1164 1164 repo_init = self._factory.repo_libgit2(wire)
1165 1165
1166 1166 with repo_init as repo:
1167 1167 commit = repo[commit_id]
1168 1168 try:
1169 1169 tree = commit.tree[path]
1170 1170 except KeyError:
1171 1171 return None, None, None
1172 1172
1173 1173 return tree.id.hex, tree.type_str, tree.filemode
1174 1174 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1175 1175
1176 1176 @reraise_safe_exceptions
1177 1177 def tree_items(self, wire, tree_id):
1178 1178 cache_on, context_uid, repo_id = self._cache_on(wire)
1179 1179 region = self._region(wire)
1180 1180
1181 1181 @region.conditional_cache_on_arguments(condition=cache_on)
1182 1182 def _tree_items(_repo_id, _tree_id):
1183 1183
1184 1184 repo_init = self._factory.repo_libgit2(wire)
1185 1185 with repo_init as repo:
1186 1186 try:
1187 1187 tree = repo[tree_id]
1188 1188 except KeyError:
1189 1189 raise ObjectMissing(f'No tree with id: {tree_id}')
1190 1190
1191 1191 result = []
1192 1192 for item in tree:
1193 1193 item_sha = item.hex
1194 1194 item_mode = item.filemode
1195 1195 item_type = item.type_str
1196 1196
1197 1197 if item_type == 'commit':
1198 1198 # NOTE(marcink): submodules we translate to 'link' for backward compat
1199 1199 item_type = 'link'
1200 1200
1201 1201 result.append((item.name, item_mode, item_sha, item_type))
1202 1202 return result
1203 1203 return _tree_items(repo_id, tree_id)
1204 1204
1205 1205 @reraise_safe_exceptions
1206 1206 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1207 1207 """
1208 1208 Old version that uses subprocess to call diff
1209 1209 """
1210 1210
1211 1211 flags = [
1212 1212 f'-U{context}', '--patch',
1213 1213 '--binary',
1214 1214 '--find-renames',
1215 1215 '--no-indent-heuristic',
1216 1216 # '--indent-heuristic',
1217 1217 #'--full-index',
1218 1218 #'--abbrev=40'
1219 1219 ]
1220 1220
1221 1221 if opt_ignorews:
1222 1222 flags.append('--ignore-all-space')
1223 1223
1224 1224 if commit_id_1 == self.EMPTY_COMMIT:
1225 1225 cmd = ['show'] + flags + [commit_id_2]
1226 1226 else:
1227 1227 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1228 1228
1229 1229 if file_filter:
1230 1230 cmd.extend(['--', file_filter])
1231 1231
1232 1232 diff, __ = self.run_git_command(wire, cmd)
1233 1233 # If we used 'show' command, strip first few lines (until actual diff
1234 1234 # starts)
1235 1235 if commit_id_1 == self.EMPTY_COMMIT:
1236 1236 lines = diff.splitlines()
1237 1237 x = 0
1238 1238 for line in lines:
1239 1239 if line.startswith(b'diff'):
1240 1240 break
1241 1241 x += 1
1242 1242 # Append new line just like 'diff' command do
1243 1243 diff = '\n'.join(lines[x:]) + '\n'
1244 1244 return diff
1245 1245
1246 1246 @reraise_safe_exceptions
1247 1247 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1248 1248 repo_init = self._factory.repo_libgit2(wire)
1249 1249
1250 1250 with repo_init as repo:
1251 1251 swap = True
1252 1252 flags = 0
1253 1253 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1254 1254
1255 1255 if opt_ignorews:
1256 1256 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1257 1257
1258 1258 if commit_id_1 == self.EMPTY_COMMIT:
1259 1259 comm1 = repo[commit_id_2]
1260 1260 diff_obj = comm1.tree.diff_to_tree(
1261 1261 flags=flags, context_lines=context, swap=swap)
1262 1262
1263 1263 else:
1264 1264 comm1 = repo[commit_id_2]
1265 1265 comm2 = repo[commit_id_1]
1266 1266 diff_obj = comm1.tree.diff_to_tree(
1267 1267 comm2.tree, flags=flags, context_lines=context, swap=swap)
1268 1268 similar_flags = 0
1269 1269 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1270 1270 diff_obj.find_similar(flags=similar_flags)
1271 1271
1272 1272 if file_filter:
1273 1273 for p in diff_obj:
1274 1274 if p.delta.old_file.path == file_filter:
1275 1275 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1276 1276 # fo matching path == no diff
1277 1277 return BytesEnvelope(b'')
1278 1278
1279 1279 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1280 1280
1281 1281 @reraise_safe_exceptions
1282 1282 def node_history(self, wire, commit_id, path, limit):
1283 1283 cache_on, context_uid, repo_id = self._cache_on(wire)
1284 1284 region = self._region(wire)
1285 1285
1286 1286 @region.conditional_cache_on_arguments(condition=cache_on)
1287 1287 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1288 1288 # optimize for n==1, rev-list is much faster for that use-case
1289 1289 if limit == 1:
1290 1290 cmd = ['rev-list', '-1', commit_id, '--', path]
1291 1291 else:
1292 1292 cmd = ['log']
1293 1293 if limit:
1294 1294 cmd.extend(['-n', str(safe_int(limit, 0))])
1295 1295 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1296 1296
1297 1297 output, __ = self.run_git_command(wire, cmd)
1298 1298 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1299 1299
1300 1300 return [x for x in commit_ids]
1301 1301 return _node_history(context_uid, repo_id, commit_id, path, limit)
1302 1302
1303 1303 @reraise_safe_exceptions
1304 1304 def node_annotate_legacy(self, wire, commit_id, path):
1305 1305 # note: replaced by pygit2 implementation
1306 1306 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1307 1307 # -l ==> outputs long shas (and we need all 40 characters)
1308 1308 # --root ==> doesn't put '^' character for boundaries
1309 1309 # -r commit_id ==> blames for the given commit
1310 1310 output, __ = self.run_git_command(wire, cmd)
1311 1311
1312 1312 result = []
1313 1313 for i, blame_line in enumerate(output.splitlines()[:-1]):
1314 1314 line_no = i + 1
1315 1315 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1316 1316 result.append((line_no, blame_commit_id, line))
1317 1317
1318 1318 return result
1319 1319
1320 1320 @reraise_safe_exceptions
1321 1321 def node_annotate(self, wire, commit_id, path):
1322 1322
1323 1323 result_libgit = []
1324 1324 repo_init = self._factory.repo_libgit2(wire)
1325 1325 with repo_init as repo:
1326 1326 commit = repo[commit_id]
1327 1327 blame_obj = repo.blame(path, newest_commit=commit_id)
1328 1328 for i, line in enumerate(commit.tree[path].data.splitlines()):
1329 1329 line_no = i + 1
1330 1330 hunk = blame_obj.for_line(line_no)
1331 1331 blame_commit_id = hunk.final_commit_id.hex
1332 1332
1333 1333 result_libgit.append((line_no, blame_commit_id, line))
1334 1334
1335 1335 return BinaryEnvelope(result_libgit)
1336 1336
1337 1337 @reraise_safe_exceptions
1338 1338 def update_server_info(self, wire):
1339 1339 repo = self._factory.repo(wire)
1340 1340 update_server_info(repo)
1341 1341
1342 1342 @reraise_safe_exceptions
1343 1343 def get_all_commit_ids(self, wire):
1344 1344
1345 1345 cache_on, context_uid, repo_id = self._cache_on(wire)
1346 1346 region = self._region(wire)
1347 1347
1348 1348 @region.conditional_cache_on_arguments(condition=cache_on)
1349 1349 def _get_all_commit_ids(_context_uid, _repo_id):
1350 1350
1351 1351 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1352 1352 try:
1353 1353 output, __ = self.run_git_command(wire, cmd)
1354 1354 return output.splitlines()
1355 1355 except Exception:
1356 1356 # Can be raised for empty repositories
1357 1357 return []
1358 1358
1359 1359 @region.conditional_cache_on_arguments(condition=cache_on)
1360 1360 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1361 1361 repo_init = self._factory.repo_libgit2(wire)
1362 1362 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1363 1363 results = []
1364 1364 with repo_init as repo:
1365 1365 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1366 1366 results.append(commit.id.hex)
1367 1367
1368 1368 return _get_all_commit_ids(context_uid, repo_id)
1369 1369
1370 1370 @reraise_safe_exceptions
1371 1371 def run_git_command(self, wire, cmd, **opts):
1372 1372 path = wire.get('path', None)
1373 1373 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1374 1374
1375 1375 if path and os.path.isdir(path):
1376 1376 opts['cwd'] = path
1377 1377
1378 1378 if '_bare' in opts:
1379 1379 _copts = []
1380 1380 del opts['_bare']
1381 1381 else:
1382 1382 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1383 1383 safe_call = False
1384 1384 if '_safe' in opts:
1385 1385 # no exc on failure
1386 1386 del opts['_safe']
1387 1387 safe_call = True
1388 1388
1389 1389 if '_copts' in opts:
1390 1390 _copts.extend(opts['_copts'] or [])
1391 1391 del opts['_copts']
1392 1392
1393 1393 gitenv = os.environ.copy()
1394 1394 gitenv.update(opts.pop('extra_env', {}))
1395 1395 # need to clean fix GIT_DIR !
1396 1396 if 'GIT_DIR' in gitenv:
1397 1397 del gitenv['GIT_DIR']
1398 1398 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1399 1399 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1400 1400
1401 1401 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1402 1402 _opts = {'env': gitenv, 'shell': False}
1403 1403
1404 1404 proc = None
1405 1405 try:
1406 1406 _opts.update(opts)
1407 1407 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1408 1408
1409 1409 return b''.join(proc), b''.join(proc.stderr)
1410 1410 except OSError as err:
1411 1411 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1412 1412 call_opts = {}
1413 1413 if debug_mode:
1414 1414 call_opts = _opts
1415 1415
1416 1416 tb_err = ("Couldn't run git command ({}).\n"
1417 1417 "Original error was:{}\n"
1418 1418 "Call options:{}\n"
1419 1419 .format(cmd, err, call_opts))
1420 1420 log.exception(tb_err)
1421 1421 if safe_call:
1422 1422 return '', err
1423 1423 else:
1424 1424 raise exceptions.VcsException()(tb_err)
1425 1425 finally:
1426 1426 if proc:
1427 1427 proc.close()
1428 1428
1429 1429 @reraise_safe_exceptions
1430 1430 def install_hooks(self, wire, force=False):
1431 1431 from vcsserver.hook_utils import install_git_hooks
1432 1432 bare = self.bare(wire)
1433 1433 path = wire['path']
1434 1434 binary_dir = settings.BINARY_DIR
1435 1435 if binary_dir:
1436 1436 os.path.join(binary_dir, 'python3')
1437 1437 return install_git_hooks(path, bare, force_create=force)
1438 1438
1439 1439 @reraise_safe_exceptions
1440 1440 def get_hooks_info(self, wire):
1441 1441 from vcsserver.hook_utils import (
1442 1442 get_git_pre_hook_version, get_git_post_hook_version)
1443 1443 bare = self.bare(wire)
1444 1444 path = wire['path']
1445 1445 return {
1446 1446 'pre_version': get_git_pre_hook_version(path, bare),
1447 1447 'post_version': get_git_post_hook_version(path, bare),
1448 1448 }
1449 1449
1450 1450 @reraise_safe_exceptions
1451 1451 def set_head_ref(self, wire, head_name):
1452 1452 log.debug('Setting refs/head to `%s`', head_name)
1453 1453 repo_init = self._factory.repo_libgit2(wire)
1454 1454 with repo_init as repo:
1455 1455 repo.set_head(f'refs/heads/{head_name}')
1456 1456
1457 1457 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1458 1458
1459 1459 @reraise_safe_exceptions
1460 1460 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1461 1461 archive_dir_name, commit_id, cache_config):
1462 1462
1463 1463 def file_walker(_commit_id, path):
1464 1464 repo_init = self._factory.repo_libgit2(wire)
1465 1465
1466 1466 with repo_init as repo:
1467 1467 commit = repo[commit_id]
1468 1468
1469 1469 if path in ['', '/']:
1470 1470 tree = commit.tree
1471 1471 else:
1472 1472 tree = commit.tree[path.rstrip('/')]
1473 1473 tree_id = tree.id.hex
1474 1474 try:
1475 1475 tree = repo[tree_id]
1476 1476 except KeyError:
1477 1477 raise ObjectMissing(f'No tree with id: {tree_id}')
1478 1478
1479 1479 index = LibGit2Index.Index()
1480 1480 index.read_tree(tree)
1481 1481 file_iter = index
1482 1482
1483 1483 for file_node in file_iter:
1484 1484 file_path = file_node.path
1485 1485 mode = file_node.mode
1486 1486 is_link = stat.S_ISLNK(mode)
1487 1487 if mode == pygit2.GIT_FILEMODE_COMMIT:
1488 1488 log.debug('Skipping path %s as a commit node', file_path)
1489 1489 continue
1490 1490 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1491 1491
1492 1492 return store_archive_in_cache(
1493 1493 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now