##// END OF EJS Templates
git: use libgit2 to init repositories for git for better compat with future full libgit2 rewrite
super-admin -
r1149:755d25a9 default
parent child Browse files
Show More
@@ -1,1463 +1,1463 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 ssl_cert_dir = config.get('vcs_ssl_dir')
181 181 if ssl_cert_dir:
182 182 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
183 183 return params
184 184
185 185 @reraise_safe_exceptions
186 186 def discover_git_version(self):
187 187 stdout, _ = self.run_git_command(
188 188 {}, ['--version'], _bare=True, _safe=True)
189 189 prefix = b'git version'
190 190 if stdout.startswith(prefix):
191 191 stdout = stdout[len(prefix):]
192 192 return safe_str(stdout.strip())
193 193
194 194 @reraise_safe_exceptions
195 195 def is_empty(self, wire):
196 196 repo_init = self._factory.repo_libgit2(wire)
197 197 with repo_init as repo:
198 198
199 199 try:
200 200 has_head = repo.head.name
201 201 if has_head:
202 202 return False
203 203
204 204 # NOTE(marcink): check again using more expensive method
205 205 return repo.is_empty
206 206 except Exception:
207 207 pass
208 208
209 209 return True
210 210
211 211 @reraise_safe_exceptions
212 212 def assert_correct_path(self, wire):
213 213 cache_on, context_uid, repo_id = self._cache_on(wire)
214 214 region = self._region(wire)
215 215
216 216 @region.conditional_cache_on_arguments(condition=cache_on)
217 217 def _assert_correct_path(_context_uid, _repo_id, fast_check):
218 218 if fast_check:
219 219 path = safe_str(wire['path'])
220 220 if pygit2.discover_repository(path):
221 221 return True
222 222 return False
223 223 else:
224 224 try:
225 225 repo_init = self._factory.repo_libgit2(wire)
226 226 with repo_init:
227 227 pass
228 228 except pygit2.GitError:
229 229 path = wire.get('path')
230 230 tb = traceback.format_exc()
231 231 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
232 232 return False
233 233 return True
234 234
235 235 return _assert_correct_path(context_uid, repo_id, True)
236 236
237 237 @reraise_safe_exceptions
238 238 def bare(self, wire):
239 239 repo_init = self._factory.repo_libgit2(wire)
240 240 with repo_init as repo:
241 241 return repo.is_bare
242 242
243 243 @reraise_safe_exceptions
244 244 def get_node_data(self, wire, commit_id, path):
245 245 repo_init = self._factory.repo_libgit2(wire)
246 246 with repo_init as repo:
247 247 commit = repo[commit_id]
248 248 blob_obj = commit.tree[path]
249 249
250 250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 251 raise exceptions.LookupException()(
252 252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253 253
254 254 return BytesEnvelope(blob_obj.data)
255 255
256 256 @reraise_safe_exceptions
257 257 def get_node_size(self, wire, commit_id, path):
258 258 repo_init = self._factory.repo_libgit2(wire)
259 259 with repo_init as repo:
260 260 commit = repo[commit_id]
261 261 blob_obj = commit.tree[path]
262 262
263 263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 264 raise exceptions.LookupException()(
265 265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266 266
267 267 return blob_obj.size
268 268
269 269 @reraise_safe_exceptions
270 270 def get_node_flags(self, wire, commit_id, path):
271 271 repo_init = self._factory.repo_libgit2(wire)
272 272 with repo_init as repo:
273 273 commit = repo[commit_id]
274 274 blob_obj = commit.tree[path]
275 275
276 276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 277 raise exceptions.LookupException()(
278 278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279 279
280 280 return blob_obj.filemode
281 281
282 282 @reraise_safe_exceptions
283 283 def get_node_is_binary(self, wire, commit_id, path):
284 284 repo_init = self._factory.repo_libgit2(wire)
285 285 with repo_init as repo:
286 286 commit = repo[commit_id]
287 287 blob_obj = commit.tree[path]
288 288
289 289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 290 raise exceptions.LookupException()(
291 291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292 292
293 293 return blob_obj.is_binary
294 294
295 295 @reraise_safe_exceptions
296 296 def blob_as_pretty_string(self, wire, sha):
297 297 repo_init = self._factory.repo_libgit2(wire)
298 298 with repo_init as repo:
299 299 blob_obj = repo[sha]
300 300 return BytesEnvelope(blob_obj.data)
301 301
302 302 @reraise_safe_exceptions
303 303 def blob_raw_length(self, wire, sha):
304 304 cache_on, context_uid, repo_id = self._cache_on(wire)
305 305 region = self._region(wire)
306 306
307 307 @region.conditional_cache_on_arguments(condition=cache_on)
308 308 def _blob_raw_length(_repo_id, _sha):
309 309
310 310 repo_init = self._factory.repo_libgit2(wire)
311 311 with repo_init as repo:
312 312 blob = repo[sha]
313 313 return blob.size
314 314
315 315 return _blob_raw_length(repo_id, sha)
316 316
317 317 def _parse_lfs_pointer(self, raw_content):
318 318 spec_string = b'version https://git-lfs.github.com/spec'
319 319 if raw_content and raw_content.startswith(spec_string):
320 320
321 321 pattern = re.compile(rb"""
322 322 (?:\n)?
323 323 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
324 324 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
325 325 ^size[ ](?P<oid_size>[0-9]+)\n
326 326 (?:\n)?
327 327 """, re.VERBOSE | re.MULTILINE)
328 328 match = pattern.match(raw_content)
329 329 if match:
330 330 return match.groupdict()
331 331
332 332 return {}
333 333
334 334 @reraise_safe_exceptions
335 335 def is_large_file(self, wire, commit_id):
336 336 cache_on, context_uid, repo_id = self._cache_on(wire)
337 337 region = self._region(wire)
338 338
339 339 @region.conditional_cache_on_arguments(condition=cache_on)
340 340 def _is_large_file(_repo_id, _sha):
341 341 repo_init = self._factory.repo_libgit2(wire)
342 342 with repo_init as repo:
343 343 blob = repo[commit_id]
344 344 if blob.is_binary:
345 345 return {}
346 346
347 347 return self._parse_lfs_pointer(blob.data)
348 348
349 349 return _is_large_file(repo_id, commit_id)
350 350
351 351 @reraise_safe_exceptions
352 352 def is_binary(self, wire, tree_id):
353 353 cache_on, context_uid, repo_id = self._cache_on(wire)
354 354 region = self._region(wire)
355 355
356 356 @region.conditional_cache_on_arguments(condition=cache_on)
357 357 def _is_binary(_repo_id, _tree_id):
358 358 repo_init = self._factory.repo_libgit2(wire)
359 359 with repo_init as repo:
360 360 blob_obj = repo[tree_id]
361 361 return blob_obj.is_binary
362 362
363 363 return _is_binary(repo_id, tree_id)
364 364
365 365 @reraise_safe_exceptions
366 366 def md5_hash(self, wire, commit_id, path):
367 367 cache_on, context_uid, repo_id = self._cache_on(wire)
368 368 region = self._region(wire)
369 369
370 370 @region.conditional_cache_on_arguments(condition=cache_on)
371 371 def _md5_hash(_repo_id, _commit_id, _path):
372 372 repo_init = self._factory.repo_libgit2(wire)
373 373 with repo_init as repo:
374 374 commit = repo[_commit_id]
375 375 blob_obj = commit.tree[_path]
376 376
377 377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 378 raise exceptions.LookupException()(
379 379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380 380
381 381 return ''
382 382
383 383 return _md5_hash(repo_id, commit_id, path)
384 384
385 385 @reraise_safe_exceptions
386 386 def in_largefiles_store(self, wire, oid):
387 387 conf = self._wire_to_config(wire)
388 388 repo_init = self._factory.repo_libgit2(wire)
389 389 with repo_init as repo:
390 390 repo_name = repo.path
391 391
392 392 store_location = conf.get('vcs_git_lfs_store_location')
393 393 if store_location:
394 394
395 395 store = LFSOidStore(
396 396 oid=oid, repo=repo_name, store_location=store_location)
397 397 return store.has_oid()
398 398
399 399 return False
400 400
401 401 @reraise_safe_exceptions
402 402 def store_path(self, wire, oid):
403 403 conf = self._wire_to_config(wire)
404 404 repo_init = self._factory.repo_libgit2(wire)
405 405 with repo_init as repo:
406 406 repo_name = repo.path
407 407
408 408 store_location = conf.get('vcs_git_lfs_store_location')
409 409 if store_location:
410 410 store = LFSOidStore(
411 411 oid=oid, repo=repo_name, store_location=store_location)
412 412 return store.oid_path
413 413 raise ValueError(f'Unable to fetch oid with path {oid}')
414 414
415 415 @reraise_safe_exceptions
416 416 def bulk_request(self, wire, rev, pre_load):
417 417 cache_on, context_uid, repo_id = self._cache_on(wire)
418 418 region = self._region(wire)
419 419
420 420 @region.conditional_cache_on_arguments(condition=cache_on)
421 421 def _bulk_request(_repo_id, _rev, _pre_load):
422 422 result = {}
423 423 for attr in pre_load:
424 424 try:
425 425 method = self._bulk_methods[attr]
426 426 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
427 427 args = [wire, rev]
428 428 result[attr] = method(*args)
429 429 except KeyError as e:
430 430 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
431 431 return result
432 432
433 433 return _bulk_request(repo_id, rev, sorted(pre_load))
434 434
435 435 @reraise_safe_exceptions
436 436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 438 region = self._region(wire)
439 439
440 440 @region.conditional_cache_on_arguments(condition=cache_on)
441 441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 442 result = {}
443 443 for attr in pre_load:
444 444 try:
445 445 method = self._bulk_file_methods[attr]
446 446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 447 result[attr] = method(wire, _commit_id, _path)
448 448 except KeyError as e:
449 449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 450 return result
451 451
452 452 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
453 453
454 454 def _build_opener(self, url: str):
455 455 handlers = []
456 456 url_obj = url_parser(safe_bytes(url))
457 457 authinfo = url_obj.authinfo()[1]
458 458
459 459 if authinfo:
460 460 # create a password manager
461 461 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
462 462 passmgr.add_password(*authinfo)
463 463
464 464 handlers.extend((httpbasicauthhandler(passmgr),
465 465 httpdigestauthhandler(passmgr)))
466 466
467 467 return urllib.request.build_opener(*handlers)
468 468
469 469 @reraise_safe_exceptions
470 470 def check_url(self, url, config):
471 471 url_obj = url_parser(safe_bytes(url))
472 472
473 473 test_uri = safe_str(url_obj.authinfo()[0])
474 474 obfuscated_uri = get_obfuscated_url(url_obj)
475 475
476 476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
477 477
478 478 if not test_uri.endswith('info/refs'):
479 479 test_uri = test_uri.rstrip('/') + '/info/refs'
480 480
481 481 o = self._build_opener(test_uri)
482 482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
483 483
484 484 q = {"service": 'git-upload-pack'}
485 485 qs = '?%s' % urllib.parse.urlencode(q)
486 486 cu = f"{test_uri}{qs}"
487 487 req = urllib.request.Request(cu, None, {})
488 488
489 489 try:
490 490 log.debug("Trying to open URL %s", obfuscated_uri)
491 491 resp = o.open(req)
492 492 if resp.code != 200:
493 493 raise exceptions.URLError()('Return Code is not 200')
494 494 except Exception as e:
495 495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
496 496 # means it cannot be cloned
497 497 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
498 498
499 499 # now detect if it's proper git repo
500 500 gitdata: bytes = resp.read()
501 501
502 502 if b'service=git-upload-pack' in gitdata:
503 503 pass
504 504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
505 505 # old style git can return some other format !
506 506 pass
507 507 else:
508 508 e = None
509 509 raise exceptions.URLError(e)(
510 510 "url [%s] does not look like an hg repo org_exc: %s"
511 511 % (obfuscated_uri, e))
512 512
513 513 return True
514 514
515 515 @reraise_safe_exceptions
516 516 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
517 517 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
518 518 remote_refs = self.pull(wire, url, apply_refs=False)
519 519 repo = self._factory.repo(wire)
520 520 if isinstance(valid_refs, list):
521 521 valid_refs = tuple(valid_refs)
522 522
523 523 for k in remote_refs:
524 524 # only parse heads/tags and skip so called deferred tags
525 525 if k.startswith(valid_refs) and not k.endswith(deferred):
526 526 repo[k] = remote_refs[k]
527 527
528 528 if update_after_clone:
529 529 # we want to checkout HEAD
530 530 repo["HEAD"] = remote_refs["HEAD"]
531 531 index.build_index_from_tree(repo.path, repo.index_path(),
532 532 repo.object_store, repo["HEAD"].tree)
533 533
534 534 @reraise_safe_exceptions
535 535 def branch(self, wire, commit_id):
536 536 cache_on, context_uid, repo_id = self._cache_on(wire)
537 537 region = self._region(wire)
538 538
539 539 @region.conditional_cache_on_arguments(condition=cache_on)
540 540 def _branch(_context_uid, _repo_id, _commit_id):
541 541 regex = re.compile('^refs/heads')
542 542
543 543 def filter_with(ref):
544 544 return regex.match(ref[0]) and ref[1] == _commit_id
545 545
546 546 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
547 547 return [x[0].split('refs/heads/')[-1] for x in branches]
548 548
549 549 return _branch(context_uid, repo_id, commit_id)
550 550
551 551 @reraise_safe_exceptions
552 552 def commit_branches(self, wire, commit_id):
553 553 cache_on, context_uid, repo_id = self._cache_on(wire)
554 554 region = self._region(wire)
555 555
556 556 @region.conditional_cache_on_arguments(condition=cache_on)
557 557 def _commit_branches(_context_uid, _repo_id, _commit_id):
558 558 repo_init = self._factory.repo_libgit2(wire)
559 559 with repo_init as repo:
560 560 branches = [x for x in repo.branches.with_commit(_commit_id)]
561 561 return branches
562 562
563 563 return _commit_branches(context_uid, repo_id, commit_id)
564 564
565 565 @reraise_safe_exceptions
566 566 def add_object(self, wire, content):
567 567 repo_init = self._factory.repo_libgit2(wire)
568 568 with repo_init as repo:
569 569 blob = objects.Blob()
570 570 blob.set_raw_string(content)
571 571 repo.object_store.add_object(blob)
572 572 return blob.id
573 573
574 574 @reraise_safe_exceptions
575 575 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
576 576 repo_init = self._factory.repo_libgit2(wire)
577 577 with repo_init as repo:
578 578
579 579 if date_args:
580 580 current_time, offset = date_args
581 581
582 582 kw = {
583 583 'time': current_time,
584 584 'offset': offset
585 585 }
586 586 author = create_signature_from_string(author, **kw)
587 587 committer = create_signature_from_string(committer, **kw)
588 588
589 589 tree = new_tree_id
590 590 if isinstance(tree, (bytes, str)):
591 591 # validate this tree is in the repo...
592 592 tree = repo[safe_str(tree)].id
593 593
594 594 parents = []
595 595 # ensure we COMMIT on top of given branch head
596 596 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
597 597 if branch in repo.branches.local:
598 598 parents += [repo.branches[branch].target]
599 599 elif [x for x in repo.branches.local]:
600 600 parents += [repo.head.target]
601 601 #else:
602 602 # in case we want to commit on new branch we create it on top of HEAD
603 603 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
604 604
605 605 # # Create a new commit
606 606 commit_oid = repo.create_commit(
607 607 f'refs/heads/{branch}', # the name of the reference to update
608 608 author, # the author of the commit
609 609 committer, # the committer of the commit
610 610 message, # the commit message
611 611 tree, # the tree produced by the index
612 612 parents # list of parents for the new commit, usually just one,
613 613 )
614 614
615 615 new_commit_id = safe_str(commit_oid)
616 616
617 617 return new_commit_id
618 618
619 619 @reraise_safe_exceptions
620 620 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
621 621
622 622 def mode2pygit(mode):
623 623 """
624 624 git only supports two filemode 644 and 755
625 625
626 626 0o100755 -> 33261
627 627 0o100644 -> 33188
628 628 """
629 629 return {
630 630 0o100644: pygit2.GIT_FILEMODE_BLOB,
631 631 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
632 632 0o120000: pygit2.GIT_FILEMODE_LINK
633 633 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
634 634
635 635 repo_init = self._factory.repo_libgit2(wire)
636 636 with repo_init as repo:
637 637 repo_index = repo.index
638 638
639 639 for pathspec in updated:
640 640 blob_id = repo.create_blob(pathspec['content'])
641 641 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
642 642 repo_index.add(ie)
643 643
644 644 for pathspec in removed:
645 645 repo_index.remove(pathspec)
646 646
647 647 # Write changes to the index
648 648 repo_index.write()
649 649
650 650 # Create a tree from the updated index
651 651 commit_tree = repo_index.write_tree()
652 652
653 653 new_tree_id = commit_tree
654 654
655 655 author = commit_data['author']
656 656 committer = commit_data['committer']
657 657 message = commit_data['message']
658 658
659 659 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
660 660
661 661 new_commit_id = self.create_commit(wire, author, committer, message, branch,
662 662 new_tree_id, date_args=date_args)
663 663
664 664 # libgit2, ensure the branch is there and exists
665 665 self.create_branch(wire, branch, new_commit_id)
666 666
667 667 # libgit2, set new ref to this created commit
668 668 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
669 669
670 670 return new_commit_id
671 671
672 672 @reraise_safe_exceptions
673 673 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
674 674 if url != 'default' and '://' not in url:
675 675 client = LocalGitClient(url)
676 676 else:
677 677 url_obj = url_parser(safe_bytes(url))
678 678 o = self._build_opener(url)
679 679 url = url_obj.authinfo()[0]
680 680 client = HttpGitClient(base_url=url, opener=o)
681 681 repo = self._factory.repo(wire)
682 682
683 683 determine_wants = repo.object_store.determine_wants_all
684 684 if refs:
685 685 refs = [ascii_bytes(x) for x in refs]
686 686
687 687 def determine_wants_requested(remote_refs):
688 688 determined = []
689 689 for ref_name, ref_hash in remote_refs.items():
690 690 bytes_ref_name = safe_bytes(ref_name)
691 691
692 692 if bytes_ref_name in refs:
693 693 bytes_ref_hash = safe_bytes(ref_hash)
694 694 determined.append(bytes_ref_hash)
695 695 return determined
696 696
697 697 # swap with our custom requested wants
698 698 determine_wants = determine_wants_requested
699 699
700 700 try:
701 701 remote_refs = client.fetch(
702 702 path=url, target=repo, determine_wants=determine_wants)
703 703
704 704 except NotGitRepository as e:
705 705 log.warning(
706 706 'Trying to fetch from "%s" failed, not a Git repository.', url)
707 707 # Exception can contain unicode which we convert
708 708 raise exceptions.AbortException(e)(repr(e))
709 709
710 710 # mikhail: client.fetch() returns all the remote refs, but fetches only
711 711 # refs filtered by `determine_wants` function. We need to filter result
712 712 # as well
713 713 if refs:
714 714 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
715 715
716 716 if apply_refs:
717 717 # TODO: johbo: Needs proper test coverage with a git repository
718 718 # that contains a tag object, so that we would end up with
719 719 # a peeled ref at this point.
720 720 for k in remote_refs:
721 721 if k.endswith(PEELED_REF_MARKER):
722 722 log.debug("Skipping peeled reference %s", k)
723 723 continue
724 724 repo[k] = remote_refs[k]
725 725
726 726 if refs and not update_after:
727 727 # mikhail: explicitly set the head to the last ref.
728 728 repo[HEAD_MARKER] = remote_refs[refs[-1]]
729 729
730 730 if update_after:
731 731 # we want to check out HEAD
732 732 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
733 733 index.build_index_from_tree(repo.path, repo.index_path(),
734 734 repo.object_store, repo[HEAD_MARKER].tree)
735 735
736 736 if isinstance(remote_refs, FetchPackResult):
737 737 return remote_refs.refs
738 738 return remote_refs
739 739
740 740 @reraise_safe_exceptions
741 741 def sync_fetch(self, wire, url, refs=None, all_refs=False):
742 742 self._factory.repo(wire)
743 743 if refs and not isinstance(refs, (list, tuple)):
744 744 refs = [refs]
745 745
746 746 config = self._wire_to_config(wire)
747 747 # get all remote refs we'll use to fetch later
748 748 cmd = ['ls-remote']
749 749 if not all_refs:
750 750 cmd += ['--heads', '--tags']
751 751 cmd += [url]
752 752 output, __ = self.run_git_command(
753 753 wire, cmd, fail_on_stderr=False,
754 754 _copts=self._remote_conf(config),
755 755 extra_env={'GIT_TERMINAL_PROMPT': '0'})
756 756
757 757 remote_refs = collections.OrderedDict()
758 758 fetch_refs = []
759 759
760 760 for ref_line in output.splitlines():
761 761 sha, ref = ref_line.split(b'\t')
762 762 sha = sha.strip()
763 763 if ref in remote_refs:
764 764 # duplicate, skip
765 765 continue
766 766 if ref.endswith(PEELED_REF_MARKER):
767 767 log.debug("Skipping peeled reference %s", ref)
768 768 continue
769 769 # don't sync HEAD
770 770 if ref in [HEAD_MARKER]:
771 771 continue
772 772
773 773 remote_refs[ref] = sha
774 774
775 775 if refs and sha in refs:
776 776 # we filter fetch using our specified refs
777 777 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
778 778 elif not refs:
779 779 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
780 780 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
781 781
782 782 if fetch_refs:
783 783 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
784 784 fetch_refs_chunks = list(chunk)
785 785 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
786 786 self.run_git_command(
787 787 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
788 788 fail_on_stderr=False,
789 789 _copts=self._remote_conf(config),
790 790 extra_env={'GIT_TERMINAL_PROMPT': '0'})
791 791
792 792 return remote_refs
793 793
794 794 @reraise_safe_exceptions
795 795 def sync_push(self, wire, url, refs=None):
796 796 if not self.check_url(url, wire):
797 797 return
798 798 config = self._wire_to_config(wire)
799 799 self._factory.repo(wire)
800 800 self.run_git_command(
801 801 wire, ['push', url, '--mirror'], fail_on_stderr=False,
802 802 _copts=self._remote_conf(config),
803 803 extra_env={'GIT_TERMINAL_PROMPT': '0'})
804 804
805 805 @reraise_safe_exceptions
806 806 def get_remote_refs(self, wire, url):
807 807 repo = Repo(url)
808 808 return repo.get_refs()
809 809
810 810 @reraise_safe_exceptions
811 811 def get_description(self, wire):
812 812 repo = self._factory.repo(wire)
813 813 return repo.get_description()
814 814
815 815 @reraise_safe_exceptions
816 816 def get_missing_revs(self, wire, rev1, rev2, path2):
817 817 repo = self._factory.repo(wire)
818 818 LocalGitClient(thin_packs=False).fetch(path2, repo)
819 819
820 820 wire_remote = wire.copy()
821 821 wire_remote['path'] = path2
822 822 repo_remote = self._factory.repo(wire_remote)
823 823 LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
824 824
825 825 revs = [
826 826 x.commit.id
827 827 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
828 828 return revs
829 829
830 830 @reraise_safe_exceptions
831 831 def get_object(self, wire, sha, maybe_unreachable=False):
832 832 cache_on, context_uid, repo_id = self._cache_on(wire)
833 833 region = self._region(wire)
834 834
835 835 @region.conditional_cache_on_arguments(condition=cache_on)
836 836 def _get_object(_context_uid, _repo_id, _sha):
837 837 repo_init = self._factory.repo_libgit2(wire)
838 838 with repo_init as repo:
839 839
840 840 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
841 841 try:
842 842 commit = repo.revparse_single(sha)
843 843 except KeyError:
844 844 # NOTE(marcink): KeyError doesn't give us any meaningful information
845 845 # here, we instead give something more explicit
846 846 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
847 847 raise exceptions.LookupException(e)(missing_commit_err)
848 848 except ValueError as e:
849 849 raise exceptions.LookupException(e)(missing_commit_err)
850 850
851 851 is_tag = False
852 852 if isinstance(commit, pygit2.Tag):
853 853 commit = repo.get(commit.target)
854 854 is_tag = True
855 855
856 856 check_dangling = True
857 857 if is_tag:
858 858 check_dangling = False
859 859
860 860 if check_dangling and maybe_unreachable:
861 861 check_dangling = False
862 862
863 863 # we used a reference and it parsed means we're not having a dangling commit
864 864 if sha != commit.hex:
865 865 check_dangling = False
866 866
867 867 if check_dangling:
868 868 # check for dangling commit
869 869 for branch in repo.branches.with_commit(commit.hex):
870 870 if branch:
871 871 break
872 872 else:
873 873 # NOTE(marcink): Empty error doesn't give us any meaningful information
874 874 # here, we instead give something more explicit
875 875 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
876 876 raise exceptions.LookupException(e)(missing_commit_err)
877 877
878 878 commit_id = commit.hex
879 879 type_str = commit.type_str
880 880
881 881 return {
882 882 'id': commit_id,
883 883 'type': type_str,
884 884 'commit_id': commit_id,
885 885 'idx': 0
886 886 }
887 887
888 888 return _get_object(context_uid, repo_id, sha)
889 889
890 890 @reraise_safe_exceptions
891 891 def get_refs(self, wire):
892 892 cache_on, context_uid, repo_id = self._cache_on(wire)
893 893 region = self._region(wire)
894 894
895 895 @region.conditional_cache_on_arguments(condition=cache_on)
896 896 def _get_refs(_context_uid, _repo_id):
897 897
898 898 repo_init = self._factory.repo_libgit2(wire)
899 899 with repo_init as repo:
900 900 regex = re.compile('^refs/(heads|tags)/')
901 901 return {x.name: x.target.hex for x in
902 902 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
903 903
904 904 return _get_refs(context_uid, repo_id)
905 905
906 906 @reraise_safe_exceptions
907 907 def get_branch_pointers(self, wire):
908 908 cache_on, context_uid, repo_id = self._cache_on(wire)
909 909 region = self._region(wire)
910 910
911 911 @region.conditional_cache_on_arguments(condition=cache_on)
912 912 def _get_branch_pointers(_context_uid, _repo_id):
913 913
914 914 repo_init = self._factory.repo_libgit2(wire)
915 915 regex = re.compile('^refs/heads')
916 916 with repo_init as repo:
917 917 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
918 918 return {x.target.hex: x.shorthand for x in branches}
919 919
920 920 return _get_branch_pointers(context_uid, repo_id)
921 921
922 922 @reraise_safe_exceptions
923 923 def head(self, wire, show_exc=True):
924 924 cache_on, context_uid, repo_id = self._cache_on(wire)
925 925 region = self._region(wire)
926 926
927 927 @region.conditional_cache_on_arguments(condition=cache_on)
928 928 def _head(_context_uid, _repo_id, _show_exc):
929 929 repo_init = self._factory.repo_libgit2(wire)
930 930 with repo_init as repo:
931 931 try:
932 932 return repo.head.peel().hex
933 933 except Exception:
934 934 if show_exc:
935 935 raise
936 936 return _head(context_uid, repo_id, show_exc)
937 937
938 938 @reraise_safe_exceptions
939 939 def init(self, wire):
940 940 repo_path = safe_str(wire['path'])
941 self.repo = Repo.init(repo_path)
941 pygit2.init_repository(repo_path, bare=False)
942 942
943 943 @reraise_safe_exceptions
944 944 def init_bare(self, wire):
945 945 repo_path = safe_str(wire['path'])
946 self.repo = Repo.init_bare(repo_path)
946 pygit2.init_repository(repo_path, bare=True)
947 947
948 948 @reraise_safe_exceptions
949 949 def revision(self, wire, rev):
950 950
951 951 cache_on, context_uid, repo_id = self._cache_on(wire)
952 952 region = self._region(wire)
953 953
954 954 @region.conditional_cache_on_arguments(condition=cache_on)
955 955 def _revision(_context_uid, _repo_id, _rev):
956 956 repo_init = self._factory.repo_libgit2(wire)
957 957 with repo_init as repo:
958 958 commit = repo[rev]
959 959 obj_data = {
960 960 'id': commit.id.hex,
961 961 }
962 962 # tree objects itself don't have tree_id attribute
963 963 if hasattr(commit, 'tree_id'):
964 964 obj_data['tree'] = commit.tree_id.hex
965 965
966 966 return obj_data
967 967 return _revision(context_uid, repo_id, rev)
968 968
969 969 @reraise_safe_exceptions
970 970 def date(self, wire, commit_id):
971 971 cache_on, context_uid, repo_id = self._cache_on(wire)
972 972 region = self._region(wire)
973 973
974 974 @region.conditional_cache_on_arguments(condition=cache_on)
975 975 def _date(_repo_id, _commit_id):
976 976 repo_init = self._factory.repo_libgit2(wire)
977 977 with repo_init as repo:
978 978 commit = repo[commit_id]
979 979
980 980 if hasattr(commit, 'commit_time'):
981 981 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
982 982 else:
983 983 commit = commit.get_object()
984 984 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
985 985
986 986 # TODO(marcink): check dulwich difference of offset vs timezone
987 987 return [commit_time, commit_time_offset]
988 988 return _date(repo_id, commit_id)
989 989
990 990 @reraise_safe_exceptions
991 991 def author(self, wire, commit_id):
992 992 cache_on, context_uid, repo_id = self._cache_on(wire)
993 993 region = self._region(wire)
994 994
995 995 @region.conditional_cache_on_arguments(condition=cache_on)
996 996 def _author(_repo_id, _commit_id):
997 997 repo_init = self._factory.repo_libgit2(wire)
998 998 with repo_init as repo:
999 999 commit = repo[commit_id]
1000 1000
1001 1001 if hasattr(commit, 'author'):
1002 1002 author = commit.author
1003 1003 else:
1004 1004 author = commit.get_object().author
1005 1005
1006 1006 if author.email:
1007 1007 return f"{author.name} <{author.email}>"
1008 1008
1009 1009 try:
1010 1010 return f"{author.name}"
1011 1011 except Exception:
1012 1012 return f"{safe_str(author.raw_name)}"
1013 1013
1014 1014 return _author(repo_id, commit_id)
1015 1015
1016 1016 @reraise_safe_exceptions
1017 1017 def message(self, wire, commit_id):
1018 1018 cache_on, context_uid, repo_id = self._cache_on(wire)
1019 1019 region = self._region(wire)
1020 1020
1021 1021 @region.conditional_cache_on_arguments(condition=cache_on)
1022 1022 def _message(_repo_id, _commit_id):
1023 1023 repo_init = self._factory.repo_libgit2(wire)
1024 1024 with repo_init as repo:
1025 1025 commit = repo[commit_id]
1026 1026 return commit.message
1027 1027 return _message(repo_id, commit_id)
1028 1028
1029 1029 @reraise_safe_exceptions
1030 1030 def parents(self, wire, commit_id):
1031 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
1032 1032 region = self._region(wire)
1033 1033
1034 1034 @region.conditional_cache_on_arguments(condition=cache_on)
1035 1035 def _parents(_repo_id, _commit_id):
1036 1036 repo_init = self._factory.repo_libgit2(wire)
1037 1037 with repo_init as repo:
1038 1038 commit = repo[commit_id]
1039 1039 if hasattr(commit, 'parent_ids'):
1040 1040 parent_ids = commit.parent_ids
1041 1041 else:
1042 1042 parent_ids = commit.get_object().parent_ids
1043 1043
1044 1044 return [x.hex for x in parent_ids]
1045 1045 return _parents(repo_id, commit_id)
1046 1046
1047 1047 @reraise_safe_exceptions
1048 1048 def children(self, wire, commit_id):
1049 1049 cache_on, context_uid, repo_id = self._cache_on(wire)
1050 1050 region = self._region(wire)
1051 1051
1052 1052 head = self.head(wire)
1053 1053
1054 1054 @region.conditional_cache_on_arguments(condition=cache_on)
1055 1055 def _children(_repo_id, _commit_id):
1056 1056
1057 1057 output, __ = self.run_git_command(
1058 1058 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1059 1059
1060 1060 child_ids = []
1061 1061 pat = re.compile(fr'^{commit_id}')
1062 1062 for line in output.splitlines():
1063 1063 line = safe_str(line)
1064 1064 if pat.match(line):
1065 1065 found_ids = line.split(' ')[1:]
1066 1066 child_ids.extend(found_ids)
1067 1067 break
1068 1068
1069 1069 return child_ids
1070 1070 return _children(repo_id, commit_id)
1071 1071
1072 1072 @reraise_safe_exceptions
1073 1073 def set_refs(self, wire, key, value):
1074 1074 repo_init = self._factory.repo_libgit2(wire)
1075 1075 with repo_init as repo:
1076 1076 repo.references.create(key, value, force=True)
1077 1077
1078 1078 @reraise_safe_exceptions
1079 1079 def create_branch(self, wire, branch_name, commit_id, force=False):
1080 1080 repo_init = self._factory.repo_libgit2(wire)
1081 1081 with repo_init as repo:
1082 1082 if commit_id:
1083 1083 commit = repo[commit_id]
1084 1084 else:
1085 1085 # if commit is not given just use the HEAD
1086 1086 commit = repo.head()
1087 1087
1088 1088 if force:
1089 1089 repo.branches.local.create(branch_name, commit, force=force)
1090 1090 elif not repo.branches.get(branch_name):
1091 1091 # create only if that branch isn't existing
1092 1092 repo.branches.local.create(branch_name, commit, force=force)
1093 1093
1094 1094 @reraise_safe_exceptions
1095 1095 def remove_ref(self, wire, key):
1096 1096 repo_init = self._factory.repo_libgit2(wire)
1097 1097 with repo_init as repo:
1098 1098 repo.references.delete(key)
1099 1099
1100 1100 @reraise_safe_exceptions
1101 1101 def tag_remove(self, wire, tag_name):
1102 1102 repo_init = self._factory.repo_libgit2(wire)
1103 1103 with repo_init as repo:
1104 1104 key = f'refs/tags/{tag_name}'
1105 1105 repo.references.delete(key)
1106 1106
1107 1107 @reraise_safe_exceptions
1108 1108 def tree_changes(self, wire, source_id, target_id):
1109 1109 repo = self._factory.repo(wire)
1110 1110 # source can be empty
1111 1111 source_id = safe_bytes(source_id if source_id else b'')
1112 1112 target_id = safe_bytes(target_id)
1113 1113
1114 1114 source = repo[source_id].tree if source_id else None
1115 1115 target = repo[target_id].tree
1116 1116 result = repo.object_store.tree_changes(source, target)
1117 1117
1118 1118 added = set()
1119 1119 modified = set()
1120 1120 deleted = set()
1121 1121 for (old_path, new_path), (_, _), (_, _) in list(result):
1122 1122 if new_path and old_path:
1123 1123 modified.add(new_path)
1124 1124 elif new_path and not old_path:
1125 1125 added.add(new_path)
1126 1126 elif not new_path and old_path:
1127 1127 deleted.add(old_path)
1128 1128
1129 1129 return list(added), list(modified), list(deleted)
1130 1130
1131 1131 @reraise_safe_exceptions
1132 1132 def tree_and_type_for_path(self, wire, commit_id, path):
1133 1133
1134 1134 cache_on, context_uid, repo_id = self._cache_on(wire)
1135 1135 region = self._region(wire)
1136 1136
1137 1137 @region.conditional_cache_on_arguments(condition=cache_on)
1138 1138 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1139 1139 repo_init = self._factory.repo_libgit2(wire)
1140 1140
1141 1141 with repo_init as repo:
1142 1142 commit = repo[commit_id]
1143 1143 try:
1144 1144 tree = commit.tree[path]
1145 1145 except KeyError:
1146 1146 return None, None, None
1147 1147
1148 1148 return tree.id.hex, tree.type_str, tree.filemode
1149 1149 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1150 1150
1151 1151 @reraise_safe_exceptions
1152 1152 def tree_items(self, wire, tree_id):
1153 1153 cache_on, context_uid, repo_id = self._cache_on(wire)
1154 1154 region = self._region(wire)
1155 1155
1156 1156 @region.conditional_cache_on_arguments(condition=cache_on)
1157 1157 def _tree_items(_repo_id, _tree_id):
1158 1158
1159 1159 repo_init = self._factory.repo_libgit2(wire)
1160 1160 with repo_init as repo:
1161 1161 try:
1162 1162 tree = repo[tree_id]
1163 1163 except KeyError:
1164 1164 raise ObjectMissing(f'No tree with id: {tree_id}')
1165 1165
1166 1166 result = []
1167 1167 for item in tree:
1168 1168 item_sha = item.hex
1169 1169 item_mode = item.filemode
1170 1170 item_type = item.type_str
1171 1171
1172 1172 if item_type == 'commit':
1173 1173 # NOTE(marcink): submodules we translate to 'link' for backward compat
1174 1174 item_type = 'link'
1175 1175
1176 1176 result.append((item.name, item_mode, item_sha, item_type))
1177 1177 return result
1178 1178 return _tree_items(repo_id, tree_id)
1179 1179
1180 1180 @reraise_safe_exceptions
1181 1181 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1182 1182 """
1183 1183 Old version that uses subprocess to call diff
1184 1184 """
1185 1185
1186 1186 flags = [
1187 1187 '-U%s' % context, '--patch',
1188 1188 '--binary',
1189 1189 '--find-renames',
1190 1190 '--no-indent-heuristic',
1191 1191 # '--indent-heuristic',
1192 1192 #'--full-index',
1193 1193 #'--abbrev=40'
1194 1194 ]
1195 1195
1196 1196 if opt_ignorews:
1197 1197 flags.append('--ignore-all-space')
1198 1198
1199 1199 if commit_id_1 == self.EMPTY_COMMIT:
1200 1200 cmd = ['show'] + flags + [commit_id_2]
1201 1201 else:
1202 1202 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1203 1203
1204 1204 if file_filter:
1205 1205 cmd.extend(['--', file_filter])
1206 1206
1207 1207 diff, __ = self.run_git_command(wire, cmd)
1208 1208 # If we used 'show' command, strip first few lines (until actual diff
1209 1209 # starts)
1210 1210 if commit_id_1 == self.EMPTY_COMMIT:
1211 1211 lines = diff.splitlines()
1212 1212 x = 0
1213 1213 for line in lines:
1214 1214 if line.startswith(b'diff'):
1215 1215 break
1216 1216 x += 1
1217 1217 # Append new line just like 'diff' command do
1218 1218 diff = '\n'.join(lines[x:]) + '\n'
1219 1219 return diff
1220 1220
1221 1221 @reraise_safe_exceptions
1222 1222 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1223 1223 repo_init = self._factory.repo_libgit2(wire)
1224 1224
1225 1225 with repo_init as repo:
1226 1226 swap = True
1227 1227 flags = 0
1228 1228 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1229 1229
1230 1230 if opt_ignorews:
1231 1231 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1232 1232
1233 1233 if commit_id_1 == self.EMPTY_COMMIT:
1234 1234 comm1 = repo[commit_id_2]
1235 1235 diff_obj = comm1.tree.diff_to_tree(
1236 1236 flags=flags, context_lines=context, swap=swap)
1237 1237
1238 1238 else:
1239 1239 comm1 = repo[commit_id_2]
1240 1240 comm2 = repo[commit_id_1]
1241 1241 diff_obj = comm1.tree.diff_to_tree(
1242 1242 comm2.tree, flags=flags, context_lines=context, swap=swap)
1243 1243 similar_flags = 0
1244 1244 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1245 1245 diff_obj.find_similar(flags=similar_flags)
1246 1246
1247 1247 if file_filter:
1248 1248 for p in diff_obj:
1249 1249 if p.delta.old_file.path == file_filter:
1250 1250 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1251 1251 # fo matching path == no diff
1252 1252 return BytesEnvelope(b'')
1253 1253
1254 1254 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1255 1255
1256 1256 @reraise_safe_exceptions
1257 1257 def node_history(self, wire, commit_id, path, limit):
1258 1258 cache_on, context_uid, repo_id = self._cache_on(wire)
1259 1259 region = self._region(wire)
1260 1260
1261 1261 @region.conditional_cache_on_arguments(condition=cache_on)
1262 1262 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1263 1263 # optimize for n==1, rev-list is much faster for that use-case
1264 1264 if limit == 1:
1265 1265 cmd = ['rev-list', '-1', commit_id, '--', path]
1266 1266 else:
1267 1267 cmd = ['log']
1268 1268 if limit:
1269 1269 cmd.extend(['-n', str(safe_int(limit, 0))])
1270 1270 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1271 1271
1272 1272 output, __ = self.run_git_command(wire, cmd)
1273 1273 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1274 1274
1275 1275 return [x for x in commit_ids]
1276 1276 return _node_history(context_uid, repo_id, commit_id, path, limit)
1277 1277
1278 1278 @reraise_safe_exceptions
1279 1279 def node_annotate_legacy(self, wire, commit_id, path):
1280 1280 # note: replaced by pygit2 implementation
1281 1281 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1282 1282 # -l ==> outputs long shas (and we need all 40 characters)
1283 1283 # --root ==> doesn't put '^' character for boundaries
1284 1284 # -r commit_id ==> blames for the given commit
1285 1285 output, __ = self.run_git_command(wire, cmd)
1286 1286
1287 1287 result = []
1288 1288 for i, blame_line in enumerate(output.splitlines()[:-1]):
1289 1289 line_no = i + 1
1290 1290 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1291 1291 result.append((line_no, blame_commit_id, line))
1292 1292
1293 1293 return result
1294 1294
1295 1295 @reraise_safe_exceptions
1296 1296 def node_annotate(self, wire, commit_id, path):
1297 1297
1298 1298 result_libgit = []
1299 1299 repo_init = self._factory.repo_libgit2(wire)
1300 1300 with repo_init as repo:
1301 1301 commit = repo[commit_id]
1302 1302 blame_obj = repo.blame(path, newest_commit=commit_id)
1303 1303 for i, line in enumerate(commit.tree[path].data.splitlines()):
1304 1304 line_no = i + 1
1305 1305 hunk = blame_obj.for_line(line_no)
1306 1306 blame_commit_id = hunk.final_commit_id.hex
1307 1307
1308 1308 result_libgit.append((line_no, blame_commit_id, line))
1309 1309
1310 1310 return BinaryEnvelope(result_libgit)
1311 1311
1312 1312 @reraise_safe_exceptions
1313 1313 def update_server_info(self, wire):
1314 1314 repo = self._factory.repo(wire)
1315 1315 update_server_info(repo)
1316 1316
1317 1317 @reraise_safe_exceptions
1318 1318 def get_all_commit_ids(self, wire):
1319 1319
1320 1320 cache_on, context_uid, repo_id = self._cache_on(wire)
1321 1321 region = self._region(wire)
1322 1322
1323 1323 @region.conditional_cache_on_arguments(condition=cache_on)
1324 1324 def _get_all_commit_ids(_context_uid, _repo_id):
1325 1325
1326 1326 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1327 1327 try:
1328 1328 output, __ = self.run_git_command(wire, cmd)
1329 1329 return output.splitlines()
1330 1330 except Exception:
1331 1331 # Can be raised for empty repositories
1332 1332 return []
1333 1333
1334 1334 @region.conditional_cache_on_arguments(condition=cache_on)
1335 1335 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1336 1336 repo_init = self._factory.repo_libgit2(wire)
1337 1337 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1338 1338 results = []
1339 1339 with repo_init as repo:
1340 1340 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1341 1341 results.append(commit.id.hex)
1342 1342
1343 1343 return _get_all_commit_ids(context_uid, repo_id)
1344 1344
1345 1345 @reraise_safe_exceptions
1346 1346 def run_git_command(self, wire, cmd, **opts):
1347 1347 path = wire.get('path', None)
1348 1348
1349 1349 if path and os.path.isdir(path):
1350 1350 opts['cwd'] = path
1351 1351
1352 1352 if '_bare' in opts:
1353 1353 _copts = []
1354 1354 del opts['_bare']
1355 1355 else:
1356 1356 _copts = ['-c', 'core.quotepath=false',]
1357 1357 safe_call = False
1358 1358 if '_safe' in opts:
1359 1359 # no exc on failure
1360 1360 del opts['_safe']
1361 1361 safe_call = True
1362 1362
1363 1363 if '_copts' in opts:
1364 1364 _copts.extend(opts['_copts'] or [])
1365 1365 del opts['_copts']
1366 1366
1367 1367 gitenv = os.environ.copy()
1368 1368 gitenv.update(opts.pop('extra_env', {}))
1369 1369 # need to clean fix GIT_DIR !
1370 1370 if 'GIT_DIR' in gitenv:
1371 1371 del gitenv['GIT_DIR']
1372 1372 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1373 1373 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1374 1374
1375 1375 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1376 1376 _opts = {'env': gitenv, 'shell': False}
1377 1377
1378 1378 proc = None
1379 1379 try:
1380 1380 _opts.update(opts)
1381 1381 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1382 1382
1383 1383 return b''.join(proc), b''.join(proc.stderr)
1384 1384 except OSError as err:
1385 1385 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1386 1386 tb_err = ("Couldn't run git command (%s).\n"
1387 1387 "Original error was:%s\n"
1388 1388 "Call options:%s\n"
1389 1389 % (cmd, err, _opts))
1390 1390 log.exception(tb_err)
1391 1391 if safe_call:
1392 1392 return '', err
1393 1393 else:
1394 1394 raise exceptions.VcsException()(tb_err)
1395 1395 finally:
1396 1396 if proc:
1397 1397 proc.close()
1398 1398
1399 1399 @reraise_safe_exceptions
1400 1400 def install_hooks(self, wire, force=False):
1401 1401 from vcsserver.hook_utils import install_git_hooks
1402 1402 bare = self.bare(wire)
1403 1403 path = wire['path']
1404 1404 binary_dir = settings.BINARY_DIR
1405 1405 if binary_dir:
1406 1406 os.path.join(binary_dir, 'python3')
1407 1407 return install_git_hooks(path, bare, force_create=force)
1408 1408
1409 1409 @reraise_safe_exceptions
1410 1410 def get_hooks_info(self, wire):
1411 1411 from vcsserver.hook_utils import (
1412 1412 get_git_pre_hook_version, get_git_post_hook_version)
1413 1413 bare = self.bare(wire)
1414 1414 path = wire['path']
1415 1415 return {
1416 1416 'pre_version': get_git_pre_hook_version(path, bare),
1417 1417 'post_version': get_git_post_hook_version(path, bare),
1418 1418 }
1419 1419
1420 1420 @reraise_safe_exceptions
1421 1421 def set_head_ref(self, wire, head_name):
1422 1422 log.debug('Setting refs/head to `%s`', head_name)
1423 1423 repo_init = self._factory.repo_libgit2(wire)
1424 1424 with repo_init as repo:
1425 1425 repo.set_head(f'refs/heads/{head_name}')
1426 1426
1427 1427 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1428 1428
1429 1429 @reraise_safe_exceptions
1430 1430 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1431 1431 archive_dir_name, commit_id, cache_config):
1432 1432
1433 1433 def file_walker(_commit_id, path):
1434 1434 repo_init = self._factory.repo_libgit2(wire)
1435 1435
1436 1436 with repo_init as repo:
1437 1437 commit = repo[commit_id]
1438 1438
1439 1439 if path in ['', '/']:
1440 1440 tree = commit.tree
1441 1441 else:
1442 1442 tree = commit.tree[path.rstrip('/')]
1443 1443 tree_id = tree.id.hex
1444 1444 try:
1445 1445 tree = repo[tree_id]
1446 1446 except KeyError:
1447 1447 raise ObjectMissing(f'No tree with id: {tree_id}')
1448 1448
1449 1449 index = LibGit2Index.Index()
1450 1450 index.read_tree(tree)
1451 1451 file_iter = index
1452 1452
1453 1453 for file_node in file_iter:
1454 1454 file_path = file_node.path
1455 1455 mode = file_node.mode
1456 1456 is_link = stat.S_ISLNK(mode)
1457 1457 if mode == pygit2.GIT_FILEMODE_COMMIT:
1458 1458 log.debug('Skipping path %s as a commit node', file_path)
1459 1459 continue
1460 1460 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1461 1461
1462 1462 return store_archive_in_cache(
1463 1463 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now