##// END OF EJS Templates
git: fix remote repo compare logic that wrongly used repos to fetch for cross fork review
super-admin -
r1163:c9256ee2 default
parent child Browse files
Show More
@@ -1,1462 +1,1466 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 ssl_cert_dir = config.get('vcs_ssl_dir')
181 181 if ssl_cert_dir:
182 182 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
183 183 return params
184 184
185 185 @reraise_safe_exceptions
186 186 def discover_git_version(self):
187 187 stdout, _ = self.run_git_command(
188 188 {}, ['--version'], _bare=True, _safe=True)
189 189 prefix = b'git version'
190 190 if stdout.startswith(prefix):
191 191 stdout = stdout[len(prefix):]
192 192 return safe_str(stdout.strip())
193 193
194 194 @reraise_safe_exceptions
195 195 def is_empty(self, wire):
196 196 repo_init = self._factory.repo_libgit2(wire)
197 197 with repo_init as repo:
198 198
199 199 try:
200 200 has_head = repo.head.name
201 201 if has_head:
202 202 return False
203 203
204 204 # NOTE(marcink): check again using more expensive method
205 205 return repo.is_empty
206 206 except Exception:
207 207 pass
208 208
209 209 return True
210 210
211 211 @reraise_safe_exceptions
212 212 def assert_correct_path(self, wire):
213 213 cache_on, context_uid, repo_id = self._cache_on(wire)
214 214 region = self._region(wire)
215 215
216 216 @region.conditional_cache_on_arguments(condition=cache_on)
217 217 def _assert_correct_path(_context_uid, _repo_id, fast_check):
218 218 if fast_check:
219 219 path = safe_str(wire['path'])
220 220 if pygit2.discover_repository(path):
221 221 return True
222 222 return False
223 223 else:
224 224 try:
225 225 repo_init = self._factory.repo_libgit2(wire)
226 226 with repo_init:
227 227 pass
228 228 except pygit2.GitError:
229 229 path = wire.get('path')
230 230 tb = traceback.format_exc()
231 231 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
232 232 return False
233 233 return True
234 234
235 235 return _assert_correct_path(context_uid, repo_id, True)
236 236
237 237 @reraise_safe_exceptions
238 238 def bare(self, wire):
239 239 repo_init = self._factory.repo_libgit2(wire)
240 240 with repo_init as repo:
241 241 return repo.is_bare
242 242
243 243 @reraise_safe_exceptions
244 244 def get_node_data(self, wire, commit_id, path):
245 245 repo_init = self._factory.repo_libgit2(wire)
246 246 with repo_init as repo:
247 247 commit = repo[commit_id]
248 248 blob_obj = commit.tree[path]
249 249
250 250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 251 raise exceptions.LookupException()(
252 252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253 253
254 254 return BytesEnvelope(blob_obj.data)
255 255
256 256 @reraise_safe_exceptions
257 257 def get_node_size(self, wire, commit_id, path):
258 258 repo_init = self._factory.repo_libgit2(wire)
259 259 with repo_init as repo:
260 260 commit = repo[commit_id]
261 261 blob_obj = commit.tree[path]
262 262
263 263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 264 raise exceptions.LookupException()(
265 265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266 266
267 267 return blob_obj.size
268 268
269 269 @reraise_safe_exceptions
270 270 def get_node_flags(self, wire, commit_id, path):
271 271 repo_init = self._factory.repo_libgit2(wire)
272 272 with repo_init as repo:
273 273 commit = repo[commit_id]
274 274 blob_obj = commit.tree[path]
275 275
276 276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 277 raise exceptions.LookupException()(
278 278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279 279
280 280 return blob_obj.filemode
281 281
282 282 @reraise_safe_exceptions
283 283 def get_node_is_binary(self, wire, commit_id, path):
284 284 repo_init = self._factory.repo_libgit2(wire)
285 285 with repo_init as repo:
286 286 commit = repo[commit_id]
287 287 blob_obj = commit.tree[path]
288 288
289 289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 290 raise exceptions.LookupException()(
291 291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292 292
293 293 return blob_obj.is_binary
294 294
295 295 @reraise_safe_exceptions
296 296 def blob_as_pretty_string(self, wire, sha):
297 297 repo_init = self._factory.repo_libgit2(wire)
298 298 with repo_init as repo:
299 299 blob_obj = repo[sha]
300 300 return BytesEnvelope(blob_obj.data)
301 301
302 302 @reraise_safe_exceptions
303 303 def blob_raw_length(self, wire, sha):
304 304 cache_on, context_uid, repo_id = self._cache_on(wire)
305 305 region = self._region(wire)
306 306
307 307 @region.conditional_cache_on_arguments(condition=cache_on)
308 308 def _blob_raw_length(_repo_id, _sha):
309 309
310 310 repo_init = self._factory.repo_libgit2(wire)
311 311 with repo_init as repo:
312 312 blob = repo[sha]
313 313 return blob.size
314 314
315 315 return _blob_raw_length(repo_id, sha)
316 316
317 317 def _parse_lfs_pointer(self, raw_content):
318 318 spec_string = b'version https://git-lfs.github.com/spec'
319 319 if raw_content and raw_content.startswith(spec_string):
320 320
321 321 pattern = re.compile(rb"""
322 322 (?:\n)?
323 323 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
324 324 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
325 325 ^size[ ](?P<oid_size>[0-9]+)\n
326 326 (?:\n)?
327 327 """, re.VERBOSE | re.MULTILINE)
328 328 match = pattern.match(raw_content)
329 329 if match:
330 330 return match.groupdict()
331 331
332 332 return {}
333 333
334 334 @reraise_safe_exceptions
335 335 def is_large_file(self, wire, commit_id):
336 336 cache_on, context_uid, repo_id = self._cache_on(wire)
337 337 region = self._region(wire)
338 338
339 339 @region.conditional_cache_on_arguments(condition=cache_on)
340 340 def _is_large_file(_repo_id, _sha):
341 341 repo_init = self._factory.repo_libgit2(wire)
342 342 with repo_init as repo:
343 343 blob = repo[commit_id]
344 344 if blob.is_binary:
345 345 return {}
346 346
347 347 return self._parse_lfs_pointer(blob.data)
348 348
349 349 return _is_large_file(repo_id, commit_id)
350 350
351 351 @reraise_safe_exceptions
352 352 def is_binary(self, wire, tree_id):
353 353 cache_on, context_uid, repo_id = self._cache_on(wire)
354 354 region = self._region(wire)
355 355
356 356 @region.conditional_cache_on_arguments(condition=cache_on)
357 357 def _is_binary(_repo_id, _tree_id):
358 358 repo_init = self._factory.repo_libgit2(wire)
359 359 with repo_init as repo:
360 360 blob_obj = repo[tree_id]
361 361 return blob_obj.is_binary
362 362
363 363 return _is_binary(repo_id, tree_id)
364 364
365 365 @reraise_safe_exceptions
366 366 def md5_hash(self, wire, commit_id, path):
367 367 cache_on, context_uid, repo_id = self._cache_on(wire)
368 368 region = self._region(wire)
369 369
370 370 @region.conditional_cache_on_arguments(condition=cache_on)
371 371 def _md5_hash(_repo_id, _commit_id, _path):
372 372 repo_init = self._factory.repo_libgit2(wire)
373 373 with repo_init as repo:
374 374 commit = repo[_commit_id]
375 375 blob_obj = commit.tree[_path]
376 376
377 377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 378 raise exceptions.LookupException()(
379 379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380 380
381 381 return ''
382 382
383 383 return _md5_hash(repo_id, commit_id, path)
384 384
385 385 @reraise_safe_exceptions
386 386 def in_largefiles_store(self, wire, oid):
387 387 conf = self._wire_to_config(wire)
388 388 repo_init = self._factory.repo_libgit2(wire)
389 389 with repo_init as repo:
390 390 repo_name = repo.path
391 391
392 392 store_location = conf.get('vcs_git_lfs_store_location')
393 393 if store_location:
394 394
395 395 store = LFSOidStore(
396 396 oid=oid, repo=repo_name, store_location=store_location)
397 397 return store.has_oid()
398 398
399 399 return False
400 400
401 401 @reraise_safe_exceptions
402 402 def store_path(self, wire, oid):
403 403 conf = self._wire_to_config(wire)
404 404 repo_init = self._factory.repo_libgit2(wire)
405 405 with repo_init as repo:
406 406 repo_name = repo.path
407 407
408 408 store_location = conf.get('vcs_git_lfs_store_location')
409 409 if store_location:
410 410 store = LFSOidStore(
411 411 oid=oid, repo=repo_name, store_location=store_location)
412 412 return store.oid_path
413 413 raise ValueError(f'Unable to fetch oid with path {oid}')
414 414
415 415 @reraise_safe_exceptions
416 416 def bulk_request(self, wire, rev, pre_load):
417 417 cache_on, context_uid, repo_id = self._cache_on(wire)
418 418 region = self._region(wire)
419 419
420 420 @region.conditional_cache_on_arguments(condition=cache_on)
421 421 def _bulk_request(_repo_id, _rev, _pre_load):
422 422 result = {}
423 423 for attr in pre_load:
424 424 try:
425 425 method = self._bulk_methods[attr]
426 426 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
427 427 args = [wire, rev]
428 428 result[attr] = method(*args)
429 429 except KeyError as e:
430 430 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
431 431 return result
432 432
433 433 return _bulk_request(repo_id, rev, sorted(pre_load))
434 434
435 435 @reraise_safe_exceptions
436 436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 438 region = self._region(wire)
439 439
440 440 @region.conditional_cache_on_arguments(condition=cache_on)
441 441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 442 result = {}
443 443 for attr in pre_load:
444 444 try:
445 445 method = self._bulk_file_methods[attr]
446 446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 447 result[attr] = method(wire, _commit_id, _path)
448 448 except KeyError as e:
449 449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 450 return result
451 451
452 452 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
453 453
454 454 def _build_opener(self, url: str):
455 455 handlers = []
456 456 url_obj = url_parser(safe_bytes(url))
457 457 authinfo = url_obj.authinfo()[1]
458 458
459 459 if authinfo:
460 460 # create a password manager
461 461 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
462 462 passmgr.add_password(*authinfo)
463 463
464 464 handlers.extend((httpbasicauthhandler(passmgr),
465 465 httpdigestauthhandler(passmgr)))
466 466
467 467 return urllib.request.build_opener(*handlers)
468 468
469 469 @reraise_safe_exceptions
470 470 def check_url(self, url, config):
471 471 url_obj = url_parser(safe_bytes(url))
472 472
473 473 test_uri = safe_str(url_obj.authinfo()[0])
474 474 obfuscated_uri = get_obfuscated_url(url_obj)
475 475
476 476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
477 477
478 478 if not test_uri.endswith('info/refs'):
479 479 test_uri = test_uri.rstrip('/') + '/info/refs'
480 480
481 481 o = self._build_opener(test_uri)
482 482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
483 483
484 484 q = {"service": 'git-upload-pack'}
485 485 qs = f'?{urllib.parse.urlencode(q)}'
486 486 cu = f"{test_uri}{qs}"
487 487
488 488 try:
489 489 req = urllib.request.Request(cu, None, {})
490 490 log.debug("Trying to open URL %s", obfuscated_uri)
491 491 resp = o.open(req)
492 492 if resp.code != 200:
493 493 raise exceptions.URLError()('Return Code is not 200')
494 494 except Exception as e:
495 495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
496 496 # means it cannot be cloned
497 497 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
498 498
499 499 # now detect if it's proper git repo
500 500 gitdata: bytes = resp.read()
501 501
502 502 if b'service=git-upload-pack' in gitdata:
503 503 pass
504 504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
505 505 # old style git can return some other format!
506 506 pass
507 507 else:
508 508 e = None
509 509 raise exceptions.URLError(e)(
510 510 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
511 511
512 512 return True
513 513
514 514 @reraise_safe_exceptions
515 515 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
516 516 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
517 517 remote_refs = self.pull(wire, url, apply_refs=False)
518 518 repo = self._factory.repo(wire)
519 519 if isinstance(valid_refs, list):
520 520 valid_refs = tuple(valid_refs)
521 521
522 522 for k in remote_refs:
523 523 # only parse heads/tags and skip so called deferred tags
524 524 if k.startswith(valid_refs) and not k.endswith(deferred):
525 525 repo[k] = remote_refs[k]
526 526
527 527 if update_after_clone:
528 528 # we want to checkout HEAD
529 529 repo["HEAD"] = remote_refs["HEAD"]
530 530 index.build_index_from_tree(repo.path, repo.index_path(),
531 531 repo.object_store, repo["HEAD"].tree)
532 532
533 533 @reraise_safe_exceptions
534 534 def branch(self, wire, commit_id):
535 535 cache_on, context_uid, repo_id = self._cache_on(wire)
536 536 region = self._region(wire)
537 537
538 538 @region.conditional_cache_on_arguments(condition=cache_on)
539 539 def _branch(_context_uid, _repo_id, _commit_id):
540 540 regex = re.compile('^refs/heads')
541 541
542 542 def filter_with(ref):
543 543 return regex.match(ref[0]) and ref[1] == _commit_id
544 544
545 545 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
546 546 return [x[0].split('refs/heads/')[-1] for x in branches]
547 547
548 548 return _branch(context_uid, repo_id, commit_id)
549 549
550 550 @reraise_safe_exceptions
551 551 def commit_branches(self, wire, commit_id):
552 552 cache_on, context_uid, repo_id = self._cache_on(wire)
553 553 region = self._region(wire)
554 554
555 555 @region.conditional_cache_on_arguments(condition=cache_on)
556 556 def _commit_branches(_context_uid, _repo_id, _commit_id):
557 557 repo_init = self._factory.repo_libgit2(wire)
558 558 with repo_init as repo:
559 559 branches = [x for x in repo.branches.with_commit(_commit_id)]
560 560 return branches
561 561
562 562 return _commit_branches(context_uid, repo_id, commit_id)
563 563
564 564 @reraise_safe_exceptions
565 565 def add_object(self, wire, content):
566 566 repo_init = self._factory.repo_libgit2(wire)
567 567 with repo_init as repo:
568 568 blob = objects.Blob()
569 569 blob.set_raw_string(content)
570 570 repo.object_store.add_object(blob)
571 571 return blob.id
572 572
573 573 @reraise_safe_exceptions
574 574 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
575 575 repo_init = self._factory.repo_libgit2(wire)
576 576 with repo_init as repo:
577 577
578 578 if date_args:
579 579 current_time, offset = date_args
580 580
581 581 kw = {
582 582 'time': current_time,
583 583 'offset': offset
584 584 }
585 585 author = create_signature_from_string(author, **kw)
586 586 committer = create_signature_from_string(committer, **kw)
587 587
588 588 tree = new_tree_id
589 589 if isinstance(tree, (bytes, str)):
590 590 # validate this tree is in the repo...
591 591 tree = repo[safe_str(tree)].id
592 592
593 593 parents = []
594 594 # ensure we COMMIT on top of given branch head
595 595 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
596 596 if branch in repo.branches.local:
597 597 parents += [repo.branches[branch].target]
598 598 elif [x for x in repo.branches.local]:
599 599 parents += [repo.head.target]
600 600 #else:
601 601 # in case we want to commit on new branch we create it on top of HEAD
602 602 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
603 603
604 604 # # Create a new commit
605 605 commit_oid = repo.create_commit(
606 606 f'refs/heads/{branch}', # the name of the reference to update
607 607 author, # the author of the commit
608 608 committer, # the committer of the commit
609 609 message, # the commit message
610 610 tree, # the tree produced by the index
611 611 parents # list of parents for the new commit, usually just one,
612 612 )
613 613
614 614 new_commit_id = safe_str(commit_oid)
615 615
616 616 return new_commit_id
617 617
618 618 @reraise_safe_exceptions
619 619 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
620 620
621 621 def mode2pygit(mode):
622 622 """
623 623 git only supports two filemode 644 and 755
624 624
625 625 0o100755 -> 33261
626 626 0o100644 -> 33188
627 627 """
628 628 return {
629 629 0o100644: pygit2.GIT_FILEMODE_BLOB,
630 630 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
631 631 0o120000: pygit2.GIT_FILEMODE_LINK
632 632 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
633 633
634 634 repo_init = self._factory.repo_libgit2(wire)
635 635 with repo_init as repo:
636 636 repo_index = repo.index
637 637
638 638 for pathspec in updated:
639 639 blob_id = repo.create_blob(pathspec['content'])
640 640 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
641 641 repo_index.add(ie)
642 642
643 643 for pathspec in removed:
644 644 repo_index.remove(pathspec)
645 645
646 646 # Write changes to the index
647 647 repo_index.write()
648 648
649 649 # Create a tree from the updated index
650 650 commit_tree = repo_index.write_tree()
651 651
652 652 new_tree_id = commit_tree
653 653
654 654 author = commit_data['author']
655 655 committer = commit_data['committer']
656 656 message = commit_data['message']
657 657
658 658 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
659 659
660 660 new_commit_id = self.create_commit(wire, author, committer, message, branch,
661 661 new_tree_id, date_args=date_args)
662 662
663 663 # libgit2, ensure the branch is there and exists
664 664 self.create_branch(wire, branch, new_commit_id)
665 665
666 666 # libgit2, set new ref to this created commit
667 667 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
668 668
669 669 return new_commit_id
670 670
671 671 @reraise_safe_exceptions
672 672 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
673 673 if url != 'default' and '://' not in url:
674 674 client = LocalGitClient(url)
675 675 else:
676 676 url_obj = url_parser(safe_bytes(url))
677 677 o = self._build_opener(url)
678 678 url = url_obj.authinfo()[0]
679 679 client = HttpGitClient(base_url=url, opener=o)
680 680 repo = self._factory.repo(wire)
681 681
682 682 determine_wants = repo.object_store.determine_wants_all
683 683 if refs:
684 684 refs = [ascii_bytes(x) for x in refs]
685 685
686 686 def determine_wants_requested(remote_refs):
687 687 determined = []
688 688 for ref_name, ref_hash in remote_refs.items():
689 689 bytes_ref_name = safe_bytes(ref_name)
690 690
691 691 if bytes_ref_name in refs:
692 692 bytes_ref_hash = safe_bytes(ref_hash)
693 693 determined.append(bytes_ref_hash)
694 694 return determined
695 695
696 696 # swap with our custom requested wants
697 697 determine_wants = determine_wants_requested
698 698
699 699 try:
700 700 remote_refs = client.fetch(
701 701 path=url, target=repo, determine_wants=determine_wants)
702 702
703 703 except NotGitRepository as e:
704 704 log.warning(
705 705 'Trying to fetch from "%s" failed, not a Git repository.', url)
706 706 # Exception can contain unicode which we convert
707 707 raise exceptions.AbortException(e)(repr(e))
708 708
709 709 # mikhail: client.fetch() returns all the remote refs, but fetches only
710 710 # refs filtered by `determine_wants` function. We need to filter result
711 711 # as well
712 712 if refs:
713 713 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
714 714
715 715 if apply_refs:
716 716 # TODO: johbo: Needs proper test coverage with a git repository
717 717 # that contains a tag object, so that we would end up with
718 718 # a peeled ref at this point.
719 719 for k in remote_refs:
720 720 if k.endswith(PEELED_REF_MARKER):
721 721 log.debug("Skipping peeled reference %s", k)
722 722 continue
723 723 repo[k] = remote_refs[k]
724 724
725 725 if refs and not update_after:
726 726 # mikhail: explicitly set the head to the last ref.
727 727 repo[HEAD_MARKER] = remote_refs[refs[-1]]
728 728
729 729 if update_after:
730 730 # we want to check out HEAD
731 731 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
732 732 index.build_index_from_tree(repo.path, repo.index_path(),
733 733 repo.object_store, repo[HEAD_MARKER].tree)
734 734
735 735 if isinstance(remote_refs, FetchPackResult):
736 736 return remote_refs.refs
737 737 return remote_refs
738 738
739 739 @reraise_safe_exceptions
740 740 def sync_fetch(self, wire, url, refs=None, all_refs=False):
741 741 self._factory.repo(wire)
742 742 if refs and not isinstance(refs, (list, tuple)):
743 743 refs = [refs]
744 744
745 745 config = self._wire_to_config(wire)
746 746 # get all remote refs we'll use to fetch later
747 747 cmd = ['ls-remote']
748 748 if not all_refs:
749 749 cmd += ['--heads', '--tags']
750 750 cmd += [url]
751 751 output, __ = self.run_git_command(
752 752 wire, cmd, fail_on_stderr=False,
753 753 _copts=self._remote_conf(config),
754 754 extra_env={'GIT_TERMINAL_PROMPT': '0'})
755 755
756 756 remote_refs = collections.OrderedDict()
757 757 fetch_refs = []
758 758
759 759 for ref_line in output.splitlines():
760 760 sha, ref = ref_line.split(b'\t')
761 761 sha = sha.strip()
762 762 if ref in remote_refs:
763 763 # duplicate, skip
764 764 continue
765 765 if ref.endswith(PEELED_REF_MARKER):
766 766 log.debug("Skipping peeled reference %s", ref)
767 767 continue
768 768 # don't sync HEAD
769 769 if ref in [HEAD_MARKER]:
770 770 continue
771 771
772 772 remote_refs[ref] = sha
773 773
774 774 if refs and sha in refs:
775 775 # we filter fetch using our specified refs
776 776 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
777 777 elif not refs:
778 778 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
779 779 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
780 780
781 781 if fetch_refs:
782 782 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
783 783 fetch_refs_chunks = list(chunk)
784 784 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
785 785 self.run_git_command(
786 786 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
787 787 fail_on_stderr=False,
788 788 _copts=self._remote_conf(config),
789 789 extra_env={'GIT_TERMINAL_PROMPT': '0'})
790 790
791 791 return remote_refs
792 792
793 793 @reraise_safe_exceptions
794 794 def sync_push(self, wire, url, refs=None):
795 795 if not self.check_url(url, wire):
796 796 return
797 797 config = self._wire_to_config(wire)
798 798 self._factory.repo(wire)
799 799 self.run_git_command(
800 800 wire, ['push', url, '--mirror'], fail_on_stderr=False,
801 801 _copts=self._remote_conf(config),
802 802 extra_env={'GIT_TERMINAL_PROMPT': '0'})
803 803
804 804 @reraise_safe_exceptions
805 805 def get_remote_refs(self, wire, url):
806 806 repo = Repo(url)
807 807 return repo.get_refs()
808 808
809 809 @reraise_safe_exceptions
810 810 def get_description(self, wire):
811 811 repo = self._factory.repo(wire)
812 812 return repo.get_description()
813 813
814 814 @reraise_safe_exceptions
815 def get_missing_revs(self, wire, rev1, rev2, path2):
815 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
816 origin_repo_path = wire['path']
816 817 repo = self._factory.repo(wire)
817 LocalGitClient(thin_packs=False).fetch(path2, repo)
818 # fetch from other_repo_path to our origin repo
819 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
818 820
819 821 wire_remote = wire.copy()
820 wire_remote['path'] = path2
822 wire_remote['path'] = other_repo_path
821 823 repo_remote = self._factory.repo(wire_remote)
822 LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
824
825 # fetch from origin_repo_path to our remote repo
826 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
823 827
824 828 revs = [
825 829 x.commit.id
826 830 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
827 831 return revs
828 832
829 833 @reraise_safe_exceptions
830 834 def get_object(self, wire, sha, maybe_unreachable=False):
831 835 cache_on, context_uid, repo_id = self._cache_on(wire)
832 836 region = self._region(wire)
833 837
834 838 @region.conditional_cache_on_arguments(condition=cache_on)
835 839 def _get_object(_context_uid, _repo_id, _sha):
836 840 repo_init = self._factory.repo_libgit2(wire)
837 841 with repo_init as repo:
838 842
839 843 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
840 844 try:
841 845 commit = repo.revparse_single(sha)
842 846 except KeyError:
843 847 # NOTE(marcink): KeyError doesn't give us any meaningful information
844 848 # here, we instead give something more explicit
845 849 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
846 850 raise exceptions.LookupException(e)(missing_commit_err)
847 851 except ValueError as e:
848 852 raise exceptions.LookupException(e)(missing_commit_err)
849 853
850 854 is_tag = False
851 855 if isinstance(commit, pygit2.Tag):
852 856 commit = repo.get(commit.target)
853 857 is_tag = True
854 858
855 859 check_dangling = True
856 860 if is_tag:
857 861 check_dangling = False
858 862
859 863 if check_dangling and maybe_unreachable:
860 864 check_dangling = False
861 865
862 866 # we used a reference and it parsed means we're not having a dangling commit
863 867 if sha != commit.hex:
864 868 check_dangling = False
865 869
866 870 if check_dangling:
867 871 # check for dangling commit
868 872 for branch in repo.branches.with_commit(commit.hex):
869 873 if branch:
870 874 break
871 875 else:
872 876 # NOTE(marcink): Empty error doesn't give us any meaningful information
873 877 # here, we instead give something more explicit
874 878 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
875 879 raise exceptions.LookupException(e)(missing_commit_err)
876 880
877 881 commit_id = commit.hex
878 882 type_str = commit.type_str
879 883
880 884 return {
881 885 'id': commit_id,
882 886 'type': type_str,
883 887 'commit_id': commit_id,
884 888 'idx': 0
885 889 }
886 890
887 891 return _get_object(context_uid, repo_id, sha)
888 892
889 893 @reraise_safe_exceptions
890 894 def get_refs(self, wire):
891 895 cache_on, context_uid, repo_id = self._cache_on(wire)
892 896 region = self._region(wire)
893 897
894 898 @region.conditional_cache_on_arguments(condition=cache_on)
895 899 def _get_refs(_context_uid, _repo_id):
896 900
897 901 repo_init = self._factory.repo_libgit2(wire)
898 902 with repo_init as repo:
899 903 regex = re.compile('^refs/(heads|tags)/')
900 904 return {x.name: x.target.hex for x in
901 905 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
902 906
903 907 return _get_refs(context_uid, repo_id)
904 908
905 909 @reraise_safe_exceptions
906 910 def get_branch_pointers(self, wire):
907 911 cache_on, context_uid, repo_id = self._cache_on(wire)
908 912 region = self._region(wire)
909 913
910 914 @region.conditional_cache_on_arguments(condition=cache_on)
911 915 def _get_branch_pointers(_context_uid, _repo_id):
912 916
913 917 repo_init = self._factory.repo_libgit2(wire)
914 918 regex = re.compile('^refs/heads')
915 919 with repo_init as repo:
916 920 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
917 921 return {x.target.hex: x.shorthand for x in branches}
918 922
919 923 return _get_branch_pointers(context_uid, repo_id)
920 924
921 925 @reraise_safe_exceptions
922 926 def head(self, wire, show_exc=True):
923 927 cache_on, context_uid, repo_id = self._cache_on(wire)
924 928 region = self._region(wire)
925 929
926 930 @region.conditional_cache_on_arguments(condition=cache_on)
927 931 def _head(_context_uid, _repo_id, _show_exc):
928 932 repo_init = self._factory.repo_libgit2(wire)
929 933 with repo_init as repo:
930 934 try:
931 935 return repo.head.peel().hex
932 936 except Exception:
933 937 if show_exc:
934 938 raise
935 939 return _head(context_uid, repo_id, show_exc)
936 940
937 941 @reraise_safe_exceptions
938 942 def init(self, wire):
939 943 repo_path = safe_str(wire['path'])
940 944 pygit2.init_repository(repo_path, bare=False)
941 945
942 946 @reraise_safe_exceptions
943 947 def init_bare(self, wire):
944 948 repo_path = safe_str(wire['path'])
945 949 pygit2.init_repository(repo_path, bare=True)
946 950
947 951 @reraise_safe_exceptions
948 952 def revision(self, wire, rev):
949 953
950 954 cache_on, context_uid, repo_id = self._cache_on(wire)
951 955 region = self._region(wire)
952 956
953 957 @region.conditional_cache_on_arguments(condition=cache_on)
954 958 def _revision(_context_uid, _repo_id, _rev):
955 959 repo_init = self._factory.repo_libgit2(wire)
956 960 with repo_init as repo:
957 961 commit = repo[rev]
958 962 obj_data = {
959 963 'id': commit.id.hex,
960 964 }
961 965 # tree objects itself don't have tree_id attribute
962 966 if hasattr(commit, 'tree_id'):
963 967 obj_data['tree'] = commit.tree_id.hex
964 968
965 969 return obj_data
966 970 return _revision(context_uid, repo_id, rev)
967 971
968 972 @reraise_safe_exceptions
969 973 def date(self, wire, commit_id):
970 974 cache_on, context_uid, repo_id = self._cache_on(wire)
971 975 region = self._region(wire)
972 976
973 977 @region.conditional_cache_on_arguments(condition=cache_on)
974 978 def _date(_repo_id, _commit_id):
975 979 repo_init = self._factory.repo_libgit2(wire)
976 980 with repo_init as repo:
977 981 commit = repo[commit_id]
978 982
979 983 if hasattr(commit, 'commit_time'):
980 984 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
981 985 else:
982 986 commit = commit.get_object()
983 987 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
984 988
985 989 # TODO(marcink): check dulwich difference of offset vs timezone
986 990 return [commit_time, commit_time_offset]
987 991 return _date(repo_id, commit_id)
988 992
989 993 @reraise_safe_exceptions
990 994 def author(self, wire, commit_id):
991 995 cache_on, context_uid, repo_id = self._cache_on(wire)
992 996 region = self._region(wire)
993 997
994 998 @region.conditional_cache_on_arguments(condition=cache_on)
995 999 def _author(_repo_id, _commit_id):
996 1000 repo_init = self._factory.repo_libgit2(wire)
997 1001 with repo_init as repo:
998 1002 commit = repo[commit_id]
999 1003
1000 1004 if hasattr(commit, 'author'):
1001 1005 author = commit.author
1002 1006 else:
1003 1007 author = commit.get_object().author
1004 1008
1005 1009 if author.email:
1006 1010 return f"{author.name} <{author.email}>"
1007 1011
1008 1012 try:
1009 1013 return f"{author.name}"
1010 1014 except Exception:
1011 1015 return f"{safe_str(author.raw_name)}"
1012 1016
1013 1017 return _author(repo_id, commit_id)
1014 1018
1015 1019 @reraise_safe_exceptions
1016 1020 def message(self, wire, commit_id):
1017 1021 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 1022 region = self._region(wire)
1019 1023
1020 1024 @region.conditional_cache_on_arguments(condition=cache_on)
1021 1025 def _message(_repo_id, _commit_id):
1022 1026 repo_init = self._factory.repo_libgit2(wire)
1023 1027 with repo_init as repo:
1024 1028 commit = repo[commit_id]
1025 1029 return commit.message
1026 1030 return _message(repo_id, commit_id)
1027 1031
1028 1032 @reraise_safe_exceptions
1029 1033 def parents(self, wire, commit_id):
1030 1034 cache_on, context_uid, repo_id = self._cache_on(wire)
1031 1035 region = self._region(wire)
1032 1036
1033 1037 @region.conditional_cache_on_arguments(condition=cache_on)
1034 1038 def _parents(_repo_id, _commit_id):
1035 1039 repo_init = self._factory.repo_libgit2(wire)
1036 1040 with repo_init as repo:
1037 1041 commit = repo[commit_id]
1038 1042 if hasattr(commit, 'parent_ids'):
1039 1043 parent_ids = commit.parent_ids
1040 1044 else:
1041 1045 parent_ids = commit.get_object().parent_ids
1042 1046
1043 1047 return [x.hex for x in parent_ids]
1044 1048 return _parents(repo_id, commit_id)
1045 1049
1046 1050 @reraise_safe_exceptions
1047 1051 def children(self, wire, commit_id):
1048 1052 cache_on, context_uid, repo_id = self._cache_on(wire)
1049 1053 region = self._region(wire)
1050 1054
1051 1055 head = self.head(wire)
1052 1056
1053 1057 @region.conditional_cache_on_arguments(condition=cache_on)
1054 1058 def _children(_repo_id, _commit_id):
1055 1059
1056 1060 output, __ = self.run_git_command(
1057 1061 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1058 1062
1059 1063 child_ids = []
1060 1064 pat = re.compile(fr'^{commit_id}')
1061 1065 for line in output.splitlines():
1062 1066 line = safe_str(line)
1063 1067 if pat.match(line):
1064 1068 found_ids = line.split(' ')[1:]
1065 1069 child_ids.extend(found_ids)
1066 1070 break
1067 1071
1068 1072 return child_ids
1069 1073 return _children(repo_id, commit_id)
1070 1074
1071 1075 @reraise_safe_exceptions
1072 1076 def set_refs(self, wire, key, value):
1073 1077 repo_init = self._factory.repo_libgit2(wire)
1074 1078 with repo_init as repo:
1075 1079 repo.references.create(key, value, force=True)
1076 1080
1077 1081 @reraise_safe_exceptions
1078 1082 def create_branch(self, wire, branch_name, commit_id, force=False):
1079 1083 repo_init = self._factory.repo_libgit2(wire)
1080 1084 with repo_init as repo:
1081 1085 if commit_id:
1082 1086 commit = repo[commit_id]
1083 1087 else:
1084 1088 # if commit is not given just use the HEAD
1085 1089 commit = repo.head()
1086 1090
1087 1091 if force:
1088 1092 repo.branches.local.create(branch_name, commit, force=force)
1089 1093 elif not repo.branches.get(branch_name):
1090 1094 # create only if that branch isn't existing
1091 1095 repo.branches.local.create(branch_name, commit, force=force)
1092 1096
1093 1097 @reraise_safe_exceptions
1094 1098 def remove_ref(self, wire, key):
1095 1099 repo_init = self._factory.repo_libgit2(wire)
1096 1100 with repo_init as repo:
1097 1101 repo.references.delete(key)
1098 1102
1099 1103 @reraise_safe_exceptions
1100 1104 def tag_remove(self, wire, tag_name):
1101 1105 repo_init = self._factory.repo_libgit2(wire)
1102 1106 with repo_init as repo:
1103 1107 key = f'refs/tags/{tag_name}'
1104 1108 repo.references.delete(key)
1105 1109
1106 1110 @reraise_safe_exceptions
1107 1111 def tree_changes(self, wire, source_id, target_id):
1108 1112 repo = self._factory.repo(wire)
1109 1113 # source can be empty
1110 1114 source_id = safe_bytes(source_id if source_id else b'')
1111 1115 target_id = safe_bytes(target_id)
1112 1116
1113 1117 source = repo[source_id].tree if source_id else None
1114 1118 target = repo[target_id].tree
1115 1119 result = repo.object_store.tree_changes(source, target)
1116 1120
1117 1121 added = set()
1118 1122 modified = set()
1119 1123 deleted = set()
1120 1124 for (old_path, new_path), (_, _), (_, _) in list(result):
1121 1125 if new_path and old_path:
1122 1126 modified.add(new_path)
1123 1127 elif new_path and not old_path:
1124 1128 added.add(new_path)
1125 1129 elif not new_path and old_path:
1126 1130 deleted.add(old_path)
1127 1131
1128 1132 return list(added), list(modified), list(deleted)
1129 1133
1130 1134 @reraise_safe_exceptions
1131 1135 def tree_and_type_for_path(self, wire, commit_id, path):
1132 1136
1133 1137 cache_on, context_uid, repo_id = self._cache_on(wire)
1134 1138 region = self._region(wire)
1135 1139
1136 1140 @region.conditional_cache_on_arguments(condition=cache_on)
1137 1141 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1138 1142 repo_init = self._factory.repo_libgit2(wire)
1139 1143
1140 1144 with repo_init as repo:
1141 1145 commit = repo[commit_id]
1142 1146 try:
1143 1147 tree = commit.tree[path]
1144 1148 except KeyError:
1145 1149 return None, None, None
1146 1150
1147 1151 return tree.id.hex, tree.type_str, tree.filemode
1148 1152 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1149 1153
1150 1154 @reraise_safe_exceptions
1151 1155 def tree_items(self, wire, tree_id):
1152 1156 cache_on, context_uid, repo_id = self._cache_on(wire)
1153 1157 region = self._region(wire)
1154 1158
1155 1159 @region.conditional_cache_on_arguments(condition=cache_on)
1156 1160 def _tree_items(_repo_id, _tree_id):
1157 1161
1158 1162 repo_init = self._factory.repo_libgit2(wire)
1159 1163 with repo_init as repo:
1160 1164 try:
1161 1165 tree = repo[tree_id]
1162 1166 except KeyError:
1163 1167 raise ObjectMissing(f'No tree with id: {tree_id}')
1164 1168
1165 1169 result = []
1166 1170 for item in tree:
1167 1171 item_sha = item.hex
1168 1172 item_mode = item.filemode
1169 1173 item_type = item.type_str
1170 1174
1171 1175 if item_type == 'commit':
1172 1176 # NOTE(marcink): submodules we translate to 'link' for backward compat
1173 1177 item_type = 'link'
1174 1178
1175 1179 result.append((item.name, item_mode, item_sha, item_type))
1176 1180 return result
1177 1181 return _tree_items(repo_id, tree_id)
1178 1182
1179 1183 @reraise_safe_exceptions
1180 1184 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1181 1185 """
1182 1186 Old version that uses subprocess to call diff
1183 1187 """
1184 1188
1185 1189 flags = [
1186 1190 f'-U{context}', '--patch',
1187 1191 '--binary',
1188 1192 '--find-renames',
1189 1193 '--no-indent-heuristic',
1190 1194 # '--indent-heuristic',
1191 1195 #'--full-index',
1192 1196 #'--abbrev=40'
1193 1197 ]
1194 1198
1195 1199 if opt_ignorews:
1196 1200 flags.append('--ignore-all-space')
1197 1201
1198 1202 if commit_id_1 == self.EMPTY_COMMIT:
1199 1203 cmd = ['show'] + flags + [commit_id_2]
1200 1204 else:
1201 1205 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1202 1206
1203 1207 if file_filter:
1204 1208 cmd.extend(['--', file_filter])
1205 1209
1206 1210 diff, __ = self.run_git_command(wire, cmd)
1207 1211 # If we used 'show' command, strip first few lines (until actual diff
1208 1212 # starts)
1209 1213 if commit_id_1 == self.EMPTY_COMMIT:
1210 1214 lines = diff.splitlines()
1211 1215 x = 0
1212 1216 for line in lines:
1213 1217 if line.startswith(b'diff'):
1214 1218 break
1215 1219 x += 1
1216 1220 # Append new line just like 'diff' command do
1217 1221 diff = '\n'.join(lines[x:]) + '\n'
1218 1222 return diff
1219 1223
1220 1224 @reraise_safe_exceptions
1221 1225 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1222 1226 repo_init = self._factory.repo_libgit2(wire)
1223 1227
1224 1228 with repo_init as repo:
1225 1229 swap = True
1226 1230 flags = 0
1227 1231 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1228 1232
1229 1233 if opt_ignorews:
1230 1234 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1231 1235
1232 1236 if commit_id_1 == self.EMPTY_COMMIT:
1233 1237 comm1 = repo[commit_id_2]
1234 1238 diff_obj = comm1.tree.diff_to_tree(
1235 1239 flags=flags, context_lines=context, swap=swap)
1236 1240
1237 1241 else:
1238 1242 comm1 = repo[commit_id_2]
1239 1243 comm2 = repo[commit_id_1]
1240 1244 diff_obj = comm1.tree.diff_to_tree(
1241 1245 comm2.tree, flags=flags, context_lines=context, swap=swap)
1242 1246 similar_flags = 0
1243 1247 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1244 1248 diff_obj.find_similar(flags=similar_flags)
1245 1249
1246 1250 if file_filter:
1247 1251 for p in diff_obj:
1248 1252 if p.delta.old_file.path == file_filter:
1249 1253 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1250 1254 # fo matching path == no diff
1251 1255 return BytesEnvelope(b'')
1252 1256
1253 1257 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1254 1258
1255 1259 @reraise_safe_exceptions
1256 1260 def node_history(self, wire, commit_id, path, limit):
1257 1261 cache_on, context_uid, repo_id = self._cache_on(wire)
1258 1262 region = self._region(wire)
1259 1263
1260 1264 @region.conditional_cache_on_arguments(condition=cache_on)
1261 1265 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1262 1266 # optimize for n==1, rev-list is much faster for that use-case
1263 1267 if limit == 1:
1264 1268 cmd = ['rev-list', '-1', commit_id, '--', path]
1265 1269 else:
1266 1270 cmd = ['log']
1267 1271 if limit:
1268 1272 cmd.extend(['-n', str(safe_int(limit, 0))])
1269 1273 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1270 1274
1271 1275 output, __ = self.run_git_command(wire, cmd)
1272 1276 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1273 1277
1274 1278 return [x for x in commit_ids]
1275 1279 return _node_history(context_uid, repo_id, commit_id, path, limit)
1276 1280
1277 1281 @reraise_safe_exceptions
1278 1282 def node_annotate_legacy(self, wire, commit_id, path):
1279 1283 # note: replaced by pygit2 implementation
1280 1284 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1281 1285 # -l ==> outputs long shas (and we need all 40 characters)
1282 1286 # --root ==> doesn't put '^' character for boundaries
1283 1287 # -r commit_id ==> blames for the given commit
1284 1288 output, __ = self.run_git_command(wire, cmd)
1285 1289
1286 1290 result = []
1287 1291 for i, blame_line in enumerate(output.splitlines()[:-1]):
1288 1292 line_no = i + 1
1289 1293 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1290 1294 result.append((line_no, blame_commit_id, line))
1291 1295
1292 1296 return result
1293 1297
1294 1298 @reraise_safe_exceptions
1295 1299 def node_annotate(self, wire, commit_id, path):
1296 1300
1297 1301 result_libgit = []
1298 1302 repo_init = self._factory.repo_libgit2(wire)
1299 1303 with repo_init as repo:
1300 1304 commit = repo[commit_id]
1301 1305 blame_obj = repo.blame(path, newest_commit=commit_id)
1302 1306 for i, line in enumerate(commit.tree[path].data.splitlines()):
1303 1307 line_no = i + 1
1304 1308 hunk = blame_obj.for_line(line_no)
1305 1309 blame_commit_id = hunk.final_commit_id.hex
1306 1310
1307 1311 result_libgit.append((line_no, blame_commit_id, line))
1308 1312
1309 1313 return BinaryEnvelope(result_libgit)
1310 1314
1311 1315 @reraise_safe_exceptions
1312 1316 def update_server_info(self, wire):
1313 1317 repo = self._factory.repo(wire)
1314 1318 update_server_info(repo)
1315 1319
1316 1320 @reraise_safe_exceptions
1317 1321 def get_all_commit_ids(self, wire):
1318 1322
1319 1323 cache_on, context_uid, repo_id = self._cache_on(wire)
1320 1324 region = self._region(wire)
1321 1325
1322 1326 @region.conditional_cache_on_arguments(condition=cache_on)
1323 1327 def _get_all_commit_ids(_context_uid, _repo_id):
1324 1328
1325 1329 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1326 1330 try:
1327 1331 output, __ = self.run_git_command(wire, cmd)
1328 1332 return output.splitlines()
1329 1333 except Exception:
1330 1334 # Can be raised for empty repositories
1331 1335 return []
1332 1336
1333 1337 @region.conditional_cache_on_arguments(condition=cache_on)
1334 1338 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1335 1339 repo_init = self._factory.repo_libgit2(wire)
1336 1340 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1337 1341 results = []
1338 1342 with repo_init as repo:
1339 1343 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1340 1344 results.append(commit.id.hex)
1341 1345
1342 1346 return _get_all_commit_ids(context_uid, repo_id)
1343 1347
1344 1348 @reraise_safe_exceptions
1345 1349 def run_git_command(self, wire, cmd, **opts):
1346 1350 path = wire.get('path', None)
1347 1351
1348 1352 if path and os.path.isdir(path):
1349 1353 opts['cwd'] = path
1350 1354
1351 1355 if '_bare' in opts:
1352 1356 _copts = []
1353 1357 del opts['_bare']
1354 1358 else:
1355 1359 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1356 1360 safe_call = False
1357 1361 if '_safe' in opts:
1358 1362 # no exc on failure
1359 1363 del opts['_safe']
1360 1364 safe_call = True
1361 1365
1362 1366 if '_copts' in opts:
1363 1367 _copts.extend(opts['_copts'] or [])
1364 1368 del opts['_copts']
1365 1369
1366 1370 gitenv = os.environ.copy()
1367 1371 gitenv.update(opts.pop('extra_env', {}))
1368 1372 # need to clean fix GIT_DIR !
1369 1373 if 'GIT_DIR' in gitenv:
1370 1374 del gitenv['GIT_DIR']
1371 1375 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1372 1376 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1373 1377
1374 1378 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1375 1379 _opts = {'env': gitenv, 'shell': False}
1376 1380
1377 1381 proc = None
1378 1382 try:
1379 1383 _opts.update(opts)
1380 1384 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1381 1385
1382 1386 return b''.join(proc), b''.join(proc.stderr)
1383 1387 except OSError as err:
1384 1388 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1385 1389 tb_err = ("Couldn't run git command (%s).\n"
1386 1390 "Original error was:%s\n"
1387 1391 "Call options:%s\n"
1388 1392 % (cmd, err, _opts))
1389 1393 log.exception(tb_err)
1390 1394 if safe_call:
1391 1395 return '', err
1392 1396 else:
1393 1397 raise exceptions.VcsException()(tb_err)
1394 1398 finally:
1395 1399 if proc:
1396 1400 proc.close()
1397 1401
1398 1402 @reraise_safe_exceptions
1399 1403 def install_hooks(self, wire, force=False):
1400 1404 from vcsserver.hook_utils import install_git_hooks
1401 1405 bare = self.bare(wire)
1402 1406 path = wire['path']
1403 1407 binary_dir = settings.BINARY_DIR
1404 1408 if binary_dir:
1405 1409 os.path.join(binary_dir, 'python3')
1406 1410 return install_git_hooks(path, bare, force_create=force)
1407 1411
1408 1412 @reraise_safe_exceptions
1409 1413 def get_hooks_info(self, wire):
1410 1414 from vcsserver.hook_utils import (
1411 1415 get_git_pre_hook_version, get_git_post_hook_version)
1412 1416 bare = self.bare(wire)
1413 1417 path = wire['path']
1414 1418 return {
1415 1419 'pre_version': get_git_pre_hook_version(path, bare),
1416 1420 'post_version': get_git_post_hook_version(path, bare),
1417 1421 }
1418 1422
1419 1423 @reraise_safe_exceptions
1420 1424 def set_head_ref(self, wire, head_name):
1421 1425 log.debug('Setting refs/head to `%s`', head_name)
1422 1426 repo_init = self._factory.repo_libgit2(wire)
1423 1427 with repo_init as repo:
1424 1428 repo.set_head(f'refs/heads/{head_name}')
1425 1429
1426 1430 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1427 1431
1428 1432 @reraise_safe_exceptions
1429 1433 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1430 1434 archive_dir_name, commit_id, cache_config):
1431 1435
1432 1436 def file_walker(_commit_id, path):
1433 1437 repo_init = self._factory.repo_libgit2(wire)
1434 1438
1435 1439 with repo_init as repo:
1436 1440 commit = repo[commit_id]
1437 1441
1438 1442 if path in ['', '/']:
1439 1443 tree = commit.tree
1440 1444 else:
1441 1445 tree = commit.tree[path.rstrip('/')]
1442 1446 tree_id = tree.id.hex
1443 1447 try:
1444 1448 tree = repo[tree_id]
1445 1449 except KeyError:
1446 1450 raise ObjectMissing(f'No tree with id: {tree_id}')
1447 1451
1448 1452 index = LibGit2Index.Index()
1449 1453 index.read_tree(tree)
1450 1454 file_iter = index
1451 1455
1452 1456 for file_node in file_iter:
1453 1457 file_path = file_node.path
1454 1458 mode = file_node.mode
1455 1459 is_link = stat.S_ISLNK(mode)
1456 1460 if mode == pygit2.GIT_FILEMODE_COMMIT:
1457 1461 log.debug('Skipping path %s as a commit node', file_path)
1458 1462 continue
1459 1463 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1460 1464
1461 1465 return store_archive_in_cache(
1462 1466 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now