##// END OF EJS Templates
git: turn off advice diverging
super-admin -
r1157:0bd1d6c7 default
parent child Browse files
Show More
@@ -1,1462 +1,1462 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 ssl_cert_dir = config.get('vcs_ssl_dir')
181 181 if ssl_cert_dir:
182 182 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
183 183 return params
184 184
185 185 @reraise_safe_exceptions
186 186 def discover_git_version(self):
187 187 stdout, _ = self.run_git_command(
188 188 {}, ['--version'], _bare=True, _safe=True)
189 189 prefix = b'git version'
190 190 if stdout.startswith(prefix):
191 191 stdout = stdout[len(prefix):]
192 192 return safe_str(stdout.strip())
193 193
194 194 @reraise_safe_exceptions
195 195 def is_empty(self, wire):
196 196 repo_init = self._factory.repo_libgit2(wire)
197 197 with repo_init as repo:
198 198
199 199 try:
200 200 has_head = repo.head.name
201 201 if has_head:
202 202 return False
203 203
204 204 # NOTE(marcink): check again using more expensive method
205 205 return repo.is_empty
206 206 except Exception:
207 207 pass
208 208
209 209 return True
210 210
211 211 @reraise_safe_exceptions
212 212 def assert_correct_path(self, wire):
213 213 cache_on, context_uid, repo_id = self._cache_on(wire)
214 214 region = self._region(wire)
215 215
216 216 @region.conditional_cache_on_arguments(condition=cache_on)
217 217 def _assert_correct_path(_context_uid, _repo_id, fast_check):
218 218 if fast_check:
219 219 path = safe_str(wire['path'])
220 220 if pygit2.discover_repository(path):
221 221 return True
222 222 return False
223 223 else:
224 224 try:
225 225 repo_init = self._factory.repo_libgit2(wire)
226 226 with repo_init:
227 227 pass
228 228 except pygit2.GitError:
229 229 path = wire.get('path')
230 230 tb = traceback.format_exc()
231 231 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
232 232 return False
233 233 return True
234 234
235 235 return _assert_correct_path(context_uid, repo_id, True)
236 236
237 237 @reraise_safe_exceptions
238 238 def bare(self, wire):
239 239 repo_init = self._factory.repo_libgit2(wire)
240 240 with repo_init as repo:
241 241 return repo.is_bare
242 242
243 243 @reraise_safe_exceptions
244 244 def get_node_data(self, wire, commit_id, path):
245 245 repo_init = self._factory.repo_libgit2(wire)
246 246 with repo_init as repo:
247 247 commit = repo[commit_id]
248 248 blob_obj = commit.tree[path]
249 249
250 250 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
251 251 raise exceptions.LookupException()(
252 252 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
253 253
254 254 return BytesEnvelope(blob_obj.data)
255 255
256 256 @reraise_safe_exceptions
257 257 def get_node_size(self, wire, commit_id, path):
258 258 repo_init = self._factory.repo_libgit2(wire)
259 259 with repo_init as repo:
260 260 commit = repo[commit_id]
261 261 blob_obj = commit.tree[path]
262 262
263 263 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
264 264 raise exceptions.LookupException()(
265 265 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
266 266
267 267 return blob_obj.size
268 268
269 269 @reraise_safe_exceptions
270 270 def get_node_flags(self, wire, commit_id, path):
271 271 repo_init = self._factory.repo_libgit2(wire)
272 272 with repo_init as repo:
273 273 commit = repo[commit_id]
274 274 blob_obj = commit.tree[path]
275 275
276 276 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
277 277 raise exceptions.LookupException()(
278 278 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
279 279
280 280 return blob_obj.filemode
281 281
282 282 @reraise_safe_exceptions
283 283 def get_node_is_binary(self, wire, commit_id, path):
284 284 repo_init = self._factory.repo_libgit2(wire)
285 285 with repo_init as repo:
286 286 commit = repo[commit_id]
287 287 blob_obj = commit.tree[path]
288 288
289 289 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
290 290 raise exceptions.LookupException()(
291 291 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
292 292
293 293 return blob_obj.is_binary
294 294
295 295 @reraise_safe_exceptions
296 296 def blob_as_pretty_string(self, wire, sha):
297 297 repo_init = self._factory.repo_libgit2(wire)
298 298 with repo_init as repo:
299 299 blob_obj = repo[sha]
300 300 return BytesEnvelope(blob_obj.data)
301 301
302 302 @reraise_safe_exceptions
303 303 def blob_raw_length(self, wire, sha):
304 304 cache_on, context_uid, repo_id = self._cache_on(wire)
305 305 region = self._region(wire)
306 306
307 307 @region.conditional_cache_on_arguments(condition=cache_on)
308 308 def _blob_raw_length(_repo_id, _sha):
309 309
310 310 repo_init = self._factory.repo_libgit2(wire)
311 311 with repo_init as repo:
312 312 blob = repo[sha]
313 313 return blob.size
314 314
315 315 return _blob_raw_length(repo_id, sha)
316 316
317 317 def _parse_lfs_pointer(self, raw_content):
318 318 spec_string = b'version https://git-lfs.github.com/spec'
319 319 if raw_content and raw_content.startswith(spec_string):
320 320
321 321 pattern = re.compile(rb"""
322 322 (?:\n)?
323 323 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
324 324 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
325 325 ^size[ ](?P<oid_size>[0-9]+)\n
326 326 (?:\n)?
327 327 """, re.VERBOSE | re.MULTILINE)
328 328 match = pattern.match(raw_content)
329 329 if match:
330 330 return match.groupdict()
331 331
332 332 return {}
333 333
334 334 @reraise_safe_exceptions
335 335 def is_large_file(self, wire, commit_id):
336 336 cache_on, context_uid, repo_id = self._cache_on(wire)
337 337 region = self._region(wire)
338 338
339 339 @region.conditional_cache_on_arguments(condition=cache_on)
340 340 def _is_large_file(_repo_id, _sha):
341 341 repo_init = self._factory.repo_libgit2(wire)
342 342 with repo_init as repo:
343 343 blob = repo[commit_id]
344 344 if blob.is_binary:
345 345 return {}
346 346
347 347 return self._parse_lfs_pointer(blob.data)
348 348
349 349 return _is_large_file(repo_id, commit_id)
350 350
351 351 @reraise_safe_exceptions
352 352 def is_binary(self, wire, tree_id):
353 353 cache_on, context_uid, repo_id = self._cache_on(wire)
354 354 region = self._region(wire)
355 355
356 356 @region.conditional_cache_on_arguments(condition=cache_on)
357 357 def _is_binary(_repo_id, _tree_id):
358 358 repo_init = self._factory.repo_libgit2(wire)
359 359 with repo_init as repo:
360 360 blob_obj = repo[tree_id]
361 361 return blob_obj.is_binary
362 362
363 363 return _is_binary(repo_id, tree_id)
364 364
365 365 @reraise_safe_exceptions
366 366 def md5_hash(self, wire, commit_id, path):
367 367 cache_on, context_uid, repo_id = self._cache_on(wire)
368 368 region = self._region(wire)
369 369
370 370 @region.conditional_cache_on_arguments(condition=cache_on)
371 371 def _md5_hash(_repo_id, _commit_id, _path):
372 372 repo_init = self._factory.repo_libgit2(wire)
373 373 with repo_init as repo:
374 374 commit = repo[_commit_id]
375 375 blob_obj = commit.tree[_path]
376 376
377 377 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
378 378 raise exceptions.LookupException()(
379 379 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
380 380
381 381 return ''
382 382
383 383 return _md5_hash(repo_id, commit_id, path)
384 384
385 385 @reraise_safe_exceptions
386 386 def in_largefiles_store(self, wire, oid):
387 387 conf = self._wire_to_config(wire)
388 388 repo_init = self._factory.repo_libgit2(wire)
389 389 with repo_init as repo:
390 390 repo_name = repo.path
391 391
392 392 store_location = conf.get('vcs_git_lfs_store_location')
393 393 if store_location:
394 394
395 395 store = LFSOidStore(
396 396 oid=oid, repo=repo_name, store_location=store_location)
397 397 return store.has_oid()
398 398
399 399 return False
400 400
401 401 @reraise_safe_exceptions
402 402 def store_path(self, wire, oid):
403 403 conf = self._wire_to_config(wire)
404 404 repo_init = self._factory.repo_libgit2(wire)
405 405 with repo_init as repo:
406 406 repo_name = repo.path
407 407
408 408 store_location = conf.get('vcs_git_lfs_store_location')
409 409 if store_location:
410 410 store = LFSOidStore(
411 411 oid=oid, repo=repo_name, store_location=store_location)
412 412 return store.oid_path
413 413 raise ValueError(f'Unable to fetch oid with path {oid}')
414 414
415 415 @reraise_safe_exceptions
416 416 def bulk_request(self, wire, rev, pre_load):
417 417 cache_on, context_uid, repo_id = self._cache_on(wire)
418 418 region = self._region(wire)
419 419
420 420 @region.conditional_cache_on_arguments(condition=cache_on)
421 421 def _bulk_request(_repo_id, _rev, _pre_load):
422 422 result = {}
423 423 for attr in pre_load:
424 424 try:
425 425 method = self._bulk_methods[attr]
426 426 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
427 427 args = [wire, rev]
428 428 result[attr] = method(*args)
429 429 except KeyError as e:
430 430 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
431 431 return result
432 432
433 433 return _bulk_request(repo_id, rev, sorted(pre_load))
434 434
435 435 @reraise_safe_exceptions
436 436 def bulk_file_request(self, wire, commit_id, path, pre_load):
437 437 cache_on, context_uid, repo_id = self._cache_on(wire)
438 438 region = self._region(wire)
439 439
440 440 @region.conditional_cache_on_arguments(condition=cache_on)
441 441 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
442 442 result = {}
443 443 for attr in pre_load:
444 444 try:
445 445 method = self._bulk_file_methods[attr]
446 446 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
447 447 result[attr] = method(wire, _commit_id, _path)
448 448 except KeyError as e:
449 449 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
450 450 return result
451 451
452 452 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
453 453
454 454 def _build_opener(self, url: str):
455 455 handlers = []
456 456 url_obj = url_parser(safe_bytes(url))
457 457 authinfo = url_obj.authinfo()[1]
458 458
459 459 if authinfo:
460 460 # create a password manager
461 461 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
462 462 passmgr.add_password(*authinfo)
463 463
464 464 handlers.extend((httpbasicauthhandler(passmgr),
465 465 httpdigestauthhandler(passmgr)))
466 466
467 467 return urllib.request.build_opener(*handlers)
468 468
469 469 @reraise_safe_exceptions
470 470 def check_url(self, url, config):
471 471 url_obj = url_parser(safe_bytes(url))
472 472
473 473 test_uri = safe_str(url_obj.authinfo()[0])
474 474 obfuscated_uri = get_obfuscated_url(url_obj)
475 475
476 476 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
477 477
478 478 if not test_uri.endswith('info/refs'):
479 479 test_uri = test_uri.rstrip('/') + '/info/refs'
480 480
481 481 o = self._build_opener(test_uri)
482 482 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
483 483
484 484 q = {"service": 'git-upload-pack'}
485 485 qs = f'?{urllib.parse.urlencode(q)}'
486 486 cu = f"{test_uri}{qs}"
487 487 req = urllib.request.Request(cu, None, {})
488 488
489 489 try:
490 490 log.debug("Trying to open URL %s", obfuscated_uri)
491 491 resp = o.open(req)
492 492 if resp.code != 200:
493 493 raise exceptions.URLError()('Return Code is not 200')
494 494 except Exception as e:
495 495 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
496 496 # means it cannot be cloned
497 497 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
498 498
499 499 # now detect if it's proper git repo
500 500 gitdata: bytes = resp.read()
501 501
502 502 if b'service=git-upload-pack' in gitdata:
503 503 pass
504 504 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
505 505 # old style git can return some other format !
506 506 pass
507 507 else:
508 508 e = None
509 509 raise exceptions.URLError(e)(
510 510 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
511 511
512 512 return True
513 513
514 514 @reraise_safe_exceptions
515 515 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
516 516 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
517 517 remote_refs = self.pull(wire, url, apply_refs=False)
518 518 repo = self._factory.repo(wire)
519 519 if isinstance(valid_refs, list):
520 520 valid_refs = tuple(valid_refs)
521 521
522 522 for k in remote_refs:
523 523 # only parse heads/tags and skip so called deferred tags
524 524 if k.startswith(valid_refs) and not k.endswith(deferred):
525 525 repo[k] = remote_refs[k]
526 526
527 527 if update_after_clone:
528 528 # we want to checkout HEAD
529 529 repo["HEAD"] = remote_refs["HEAD"]
530 530 index.build_index_from_tree(repo.path, repo.index_path(),
531 531 repo.object_store, repo["HEAD"].tree)
532 532
533 533 @reraise_safe_exceptions
534 534 def branch(self, wire, commit_id):
535 535 cache_on, context_uid, repo_id = self._cache_on(wire)
536 536 region = self._region(wire)
537 537
538 538 @region.conditional_cache_on_arguments(condition=cache_on)
539 539 def _branch(_context_uid, _repo_id, _commit_id):
540 540 regex = re.compile('^refs/heads')
541 541
542 542 def filter_with(ref):
543 543 return regex.match(ref[0]) and ref[1] == _commit_id
544 544
545 545 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
546 546 return [x[0].split('refs/heads/')[-1] for x in branches]
547 547
548 548 return _branch(context_uid, repo_id, commit_id)
549 549
550 550 @reraise_safe_exceptions
551 551 def commit_branches(self, wire, commit_id):
552 552 cache_on, context_uid, repo_id = self._cache_on(wire)
553 553 region = self._region(wire)
554 554
555 555 @region.conditional_cache_on_arguments(condition=cache_on)
556 556 def _commit_branches(_context_uid, _repo_id, _commit_id):
557 557 repo_init = self._factory.repo_libgit2(wire)
558 558 with repo_init as repo:
559 559 branches = [x for x in repo.branches.with_commit(_commit_id)]
560 560 return branches
561 561
562 562 return _commit_branches(context_uid, repo_id, commit_id)
563 563
564 564 @reraise_safe_exceptions
565 565 def add_object(self, wire, content):
566 566 repo_init = self._factory.repo_libgit2(wire)
567 567 with repo_init as repo:
568 568 blob = objects.Blob()
569 569 blob.set_raw_string(content)
570 570 repo.object_store.add_object(blob)
571 571 return blob.id
572 572
573 573 @reraise_safe_exceptions
574 574 def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
575 575 repo_init = self._factory.repo_libgit2(wire)
576 576 with repo_init as repo:
577 577
578 578 if date_args:
579 579 current_time, offset = date_args
580 580
581 581 kw = {
582 582 'time': current_time,
583 583 'offset': offset
584 584 }
585 585 author = create_signature_from_string(author, **kw)
586 586 committer = create_signature_from_string(committer, **kw)
587 587
588 588 tree = new_tree_id
589 589 if isinstance(tree, (bytes, str)):
590 590 # validate this tree is in the repo...
591 591 tree = repo[safe_str(tree)].id
592 592
593 593 parents = []
594 594 # ensure we COMMIT on top of given branch head
595 595 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
596 596 if branch in repo.branches.local:
597 597 parents += [repo.branches[branch].target]
598 598 elif [x for x in repo.branches.local]:
599 599 parents += [repo.head.target]
600 600 #else:
601 601 # in case we want to commit on new branch we create it on top of HEAD
602 602 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
603 603
604 604 # # Create a new commit
605 605 commit_oid = repo.create_commit(
606 606 f'refs/heads/{branch}', # the name of the reference to update
607 607 author, # the author of the commit
608 608 committer, # the committer of the commit
609 609 message, # the commit message
610 610 tree, # the tree produced by the index
611 611 parents # list of parents for the new commit, usually just one,
612 612 )
613 613
614 614 new_commit_id = safe_str(commit_oid)
615 615
616 616 return new_commit_id
617 617
618 618 @reraise_safe_exceptions
619 619 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
620 620
621 621 def mode2pygit(mode):
622 622 """
623 623 git only supports two filemode 644 and 755
624 624
625 625 0o100755 -> 33261
626 626 0o100644 -> 33188
627 627 """
628 628 return {
629 629 0o100644: pygit2.GIT_FILEMODE_BLOB,
630 630 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
631 631 0o120000: pygit2.GIT_FILEMODE_LINK
632 632 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
633 633
634 634 repo_init = self._factory.repo_libgit2(wire)
635 635 with repo_init as repo:
636 636 repo_index = repo.index
637 637
638 638 for pathspec in updated:
639 639 blob_id = repo.create_blob(pathspec['content'])
640 640 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
641 641 repo_index.add(ie)
642 642
643 643 for pathspec in removed:
644 644 repo_index.remove(pathspec)
645 645
646 646 # Write changes to the index
647 647 repo_index.write()
648 648
649 649 # Create a tree from the updated index
650 650 commit_tree = repo_index.write_tree()
651 651
652 652 new_tree_id = commit_tree
653 653
654 654 author = commit_data['author']
655 655 committer = commit_data['committer']
656 656 message = commit_data['message']
657 657
658 658 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
659 659
660 660 new_commit_id = self.create_commit(wire, author, committer, message, branch,
661 661 new_tree_id, date_args=date_args)
662 662
663 663 # libgit2, ensure the branch is there and exists
664 664 self.create_branch(wire, branch, new_commit_id)
665 665
666 666 # libgit2, set new ref to this created commit
667 667 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
668 668
669 669 return new_commit_id
670 670
671 671 @reraise_safe_exceptions
672 672 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
673 673 if url != 'default' and '://' not in url:
674 674 client = LocalGitClient(url)
675 675 else:
676 676 url_obj = url_parser(safe_bytes(url))
677 677 o = self._build_opener(url)
678 678 url = url_obj.authinfo()[0]
679 679 client = HttpGitClient(base_url=url, opener=o)
680 680 repo = self._factory.repo(wire)
681 681
682 682 determine_wants = repo.object_store.determine_wants_all
683 683 if refs:
684 684 refs = [ascii_bytes(x) for x in refs]
685 685
686 686 def determine_wants_requested(remote_refs):
687 687 determined = []
688 688 for ref_name, ref_hash in remote_refs.items():
689 689 bytes_ref_name = safe_bytes(ref_name)
690 690
691 691 if bytes_ref_name in refs:
692 692 bytes_ref_hash = safe_bytes(ref_hash)
693 693 determined.append(bytes_ref_hash)
694 694 return determined
695 695
696 696 # swap with our custom requested wants
697 697 determine_wants = determine_wants_requested
698 698
699 699 try:
700 700 remote_refs = client.fetch(
701 701 path=url, target=repo, determine_wants=determine_wants)
702 702
703 703 except NotGitRepository as e:
704 704 log.warning(
705 705 'Trying to fetch from "%s" failed, not a Git repository.', url)
706 706 # Exception can contain unicode which we convert
707 707 raise exceptions.AbortException(e)(repr(e))
708 708
709 709 # mikhail: client.fetch() returns all the remote refs, but fetches only
710 710 # refs filtered by `determine_wants` function. We need to filter result
711 711 # as well
712 712 if refs:
713 713 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
714 714
715 715 if apply_refs:
716 716 # TODO: johbo: Needs proper test coverage with a git repository
717 717 # that contains a tag object, so that we would end up with
718 718 # a peeled ref at this point.
719 719 for k in remote_refs:
720 720 if k.endswith(PEELED_REF_MARKER):
721 721 log.debug("Skipping peeled reference %s", k)
722 722 continue
723 723 repo[k] = remote_refs[k]
724 724
725 725 if refs and not update_after:
726 726 # mikhail: explicitly set the head to the last ref.
727 727 repo[HEAD_MARKER] = remote_refs[refs[-1]]
728 728
729 729 if update_after:
730 730 # we want to check out HEAD
731 731 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
732 732 index.build_index_from_tree(repo.path, repo.index_path(),
733 733 repo.object_store, repo[HEAD_MARKER].tree)
734 734
735 735 if isinstance(remote_refs, FetchPackResult):
736 736 return remote_refs.refs
737 737 return remote_refs
738 738
739 739 @reraise_safe_exceptions
740 740 def sync_fetch(self, wire, url, refs=None, all_refs=False):
741 741 self._factory.repo(wire)
742 742 if refs and not isinstance(refs, (list, tuple)):
743 743 refs = [refs]
744 744
745 745 config = self._wire_to_config(wire)
746 746 # get all remote refs we'll use to fetch later
747 747 cmd = ['ls-remote']
748 748 if not all_refs:
749 749 cmd += ['--heads', '--tags']
750 750 cmd += [url]
751 751 output, __ = self.run_git_command(
752 752 wire, cmd, fail_on_stderr=False,
753 753 _copts=self._remote_conf(config),
754 754 extra_env={'GIT_TERMINAL_PROMPT': '0'})
755 755
756 756 remote_refs = collections.OrderedDict()
757 757 fetch_refs = []
758 758
759 759 for ref_line in output.splitlines():
760 760 sha, ref = ref_line.split(b'\t')
761 761 sha = sha.strip()
762 762 if ref in remote_refs:
763 763 # duplicate, skip
764 764 continue
765 765 if ref.endswith(PEELED_REF_MARKER):
766 766 log.debug("Skipping peeled reference %s", ref)
767 767 continue
768 768 # don't sync HEAD
769 769 if ref in [HEAD_MARKER]:
770 770 continue
771 771
772 772 remote_refs[ref] = sha
773 773
774 774 if refs and sha in refs:
775 775 # we filter fetch using our specified refs
776 776 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
777 777 elif not refs:
778 778 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
779 779 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
780 780
781 781 if fetch_refs:
782 782 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
783 783 fetch_refs_chunks = list(chunk)
784 784 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
785 785 self.run_git_command(
786 786 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
787 787 fail_on_stderr=False,
788 788 _copts=self._remote_conf(config),
789 789 extra_env={'GIT_TERMINAL_PROMPT': '0'})
790 790
791 791 return remote_refs
792 792
793 793 @reraise_safe_exceptions
794 794 def sync_push(self, wire, url, refs=None):
795 795 if not self.check_url(url, wire):
796 796 return
797 797 config = self._wire_to_config(wire)
798 798 self._factory.repo(wire)
799 799 self.run_git_command(
800 800 wire, ['push', url, '--mirror'], fail_on_stderr=False,
801 801 _copts=self._remote_conf(config),
802 802 extra_env={'GIT_TERMINAL_PROMPT': '0'})
803 803
804 804 @reraise_safe_exceptions
805 805 def get_remote_refs(self, wire, url):
806 806 repo = Repo(url)
807 807 return repo.get_refs()
808 808
809 809 @reraise_safe_exceptions
810 810 def get_description(self, wire):
811 811 repo = self._factory.repo(wire)
812 812 return repo.get_description()
813 813
814 814 @reraise_safe_exceptions
815 815 def get_missing_revs(self, wire, rev1, rev2, path2):
816 816 repo = self._factory.repo(wire)
817 817 LocalGitClient(thin_packs=False).fetch(path2, repo)
818 818
819 819 wire_remote = wire.copy()
820 820 wire_remote['path'] = path2
821 821 repo_remote = self._factory.repo(wire_remote)
822 822 LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
823 823
824 824 revs = [
825 825 x.commit.id
826 826 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
827 827 return revs
828 828
829 829 @reraise_safe_exceptions
830 830 def get_object(self, wire, sha, maybe_unreachable=False):
831 831 cache_on, context_uid, repo_id = self._cache_on(wire)
832 832 region = self._region(wire)
833 833
834 834 @region.conditional_cache_on_arguments(condition=cache_on)
835 835 def _get_object(_context_uid, _repo_id, _sha):
836 836 repo_init = self._factory.repo_libgit2(wire)
837 837 with repo_init as repo:
838 838
839 839 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
840 840 try:
841 841 commit = repo.revparse_single(sha)
842 842 except KeyError:
843 843 # NOTE(marcink): KeyError doesn't give us any meaningful information
844 844 # here, we instead give something more explicit
845 845 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
846 846 raise exceptions.LookupException(e)(missing_commit_err)
847 847 except ValueError as e:
848 848 raise exceptions.LookupException(e)(missing_commit_err)
849 849
850 850 is_tag = False
851 851 if isinstance(commit, pygit2.Tag):
852 852 commit = repo.get(commit.target)
853 853 is_tag = True
854 854
855 855 check_dangling = True
856 856 if is_tag:
857 857 check_dangling = False
858 858
859 859 if check_dangling and maybe_unreachable:
860 860 check_dangling = False
861 861
862 862 # we used a reference and it parsed means we're not having a dangling commit
863 863 if sha != commit.hex:
864 864 check_dangling = False
865 865
866 866 if check_dangling:
867 867 # check for dangling commit
868 868 for branch in repo.branches.with_commit(commit.hex):
869 869 if branch:
870 870 break
871 871 else:
872 872 # NOTE(marcink): Empty error doesn't give us any meaningful information
873 873 # here, we instead give something more explicit
874 874 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
875 875 raise exceptions.LookupException(e)(missing_commit_err)
876 876
877 877 commit_id = commit.hex
878 878 type_str = commit.type_str
879 879
880 880 return {
881 881 'id': commit_id,
882 882 'type': type_str,
883 883 'commit_id': commit_id,
884 884 'idx': 0
885 885 }
886 886
887 887 return _get_object(context_uid, repo_id, sha)
888 888
889 889 @reraise_safe_exceptions
890 890 def get_refs(self, wire):
891 891 cache_on, context_uid, repo_id = self._cache_on(wire)
892 892 region = self._region(wire)
893 893
894 894 @region.conditional_cache_on_arguments(condition=cache_on)
895 895 def _get_refs(_context_uid, _repo_id):
896 896
897 897 repo_init = self._factory.repo_libgit2(wire)
898 898 with repo_init as repo:
899 899 regex = re.compile('^refs/(heads|tags)/')
900 900 return {x.name: x.target.hex for x in
901 901 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
902 902
903 903 return _get_refs(context_uid, repo_id)
904 904
905 905 @reraise_safe_exceptions
906 906 def get_branch_pointers(self, wire):
907 907 cache_on, context_uid, repo_id = self._cache_on(wire)
908 908 region = self._region(wire)
909 909
910 910 @region.conditional_cache_on_arguments(condition=cache_on)
911 911 def _get_branch_pointers(_context_uid, _repo_id):
912 912
913 913 repo_init = self._factory.repo_libgit2(wire)
914 914 regex = re.compile('^refs/heads')
915 915 with repo_init as repo:
916 916 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
917 917 return {x.target.hex: x.shorthand for x in branches}
918 918
919 919 return _get_branch_pointers(context_uid, repo_id)
920 920
921 921 @reraise_safe_exceptions
922 922 def head(self, wire, show_exc=True):
923 923 cache_on, context_uid, repo_id = self._cache_on(wire)
924 924 region = self._region(wire)
925 925
926 926 @region.conditional_cache_on_arguments(condition=cache_on)
927 927 def _head(_context_uid, _repo_id, _show_exc):
928 928 repo_init = self._factory.repo_libgit2(wire)
929 929 with repo_init as repo:
930 930 try:
931 931 return repo.head.peel().hex
932 932 except Exception:
933 933 if show_exc:
934 934 raise
935 935 return _head(context_uid, repo_id, show_exc)
936 936
937 937 @reraise_safe_exceptions
938 938 def init(self, wire):
939 939 repo_path = safe_str(wire['path'])
940 940 pygit2.init_repository(repo_path, bare=False)
941 941
942 942 @reraise_safe_exceptions
943 943 def init_bare(self, wire):
944 944 repo_path = safe_str(wire['path'])
945 945 pygit2.init_repository(repo_path, bare=True)
946 946
947 947 @reraise_safe_exceptions
948 948 def revision(self, wire, rev):
949 949
950 950 cache_on, context_uid, repo_id = self._cache_on(wire)
951 951 region = self._region(wire)
952 952
953 953 @region.conditional_cache_on_arguments(condition=cache_on)
954 954 def _revision(_context_uid, _repo_id, _rev):
955 955 repo_init = self._factory.repo_libgit2(wire)
956 956 with repo_init as repo:
957 957 commit = repo[rev]
958 958 obj_data = {
959 959 'id': commit.id.hex,
960 960 }
961 961 # tree objects itself don't have tree_id attribute
962 962 if hasattr(commit, 'tree_id'):
963 963 obj_data['tree'] = commit.tree_id.hex
964 964
965 965 return obj_data
966 966 return _revision(context_uid, repo_id, rev)
967 967
968 968 @reraise_safe_exceptions
969 969 def date(self, wire, commit_id):
970 970 cache_on, context_uid, repo_id = self._cache_on(wire)
971 971 region = self._region(wire)
972 972
973 973 @region.conditional_cache_on_arguments(condition=cache_on)
974 974 def _date(_repo_id, _commit_id):
975 975 repo_init = self._factory.repo_libgit2(wire)
976 976 with repo_init as repo:
977 977 commit = repo[commit_id]
978 978
979 979 if hasattr(commit, 'commit_time'):
980 980 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
981 981 else:
982 982 commit = commit.get_object()
983 983 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
984 984
985 985 # TODO(marcink): check dulwich difference of offset vs timezone
986 986 return [commit_time, commit_time_offset]
987 987 return _date(repo_id, commit_id)
988 988
989 989 @reraise_safe_exceptions
990 990 def author(self, wire, commit_id):
991 991 cache_on, context_uid, repo_id = self._cache_on(wire)
992 992 region = self._region(wire)
993 993
994 994 @region.conditional_cache_on_arguments(condition=cache_on)
995 995 def _author(_repo_id, _commit_id):
996 996 repo_init = self._factory.repo_libgit2(wire)
997 997 with repo_init as repo:
998 998 commit = repo[commit_id]
999 999
1000 1000 if hasattr(commit, 'author'):
1001 1001 author = commit.author
1002 1002 else:
1003 1003 author = commit.get_object().author
1004 1004
1005 1005 if author.email:
1006 1006 return f"{author.name} <{author.email}>"
1007 1007
1008 1008 try:
1009 1009 return f"{author.name}"
1010 1010 except Exception:
1011 1011 return f"{safe_str(author.raw_name)}"
1012 1012
1013 1013 return _author(repo_id, commit_id)
1014 1014
1015 1015 @reraise_safe_exceptions
1016 1016 def message(self, wire, commit_id):
1017 1017 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 1018 region = self._region(wire)
1019 1019
1020 1020 @region.conditional_cache_on_arguments(condition=cache_on)
1021 1021 def _message(_repo_id, _commit_id):
1022 1022 repo_init = self._factory.repo_libgit2(wire)
1023 1023 with repo_init as repo:
1024 1024 commit = repo[commit_id]
1025 1025 return commit.message
1026 1026 return _message(repo_id, commit_id)
1027 1027
1028 1028 @reraise_safe_exceptions
1029 1029 def parents(self, wire, commit_id):
1030 1030 cache_on, context_uid, repo_id = self._cache_on(wire)
1031 1031 region = self._region(wire)
1032 1032
1033 1033 @region.conditional_cache_on_arguments(condition=cache_on)
1034 1034 def _parents(_repo_id, _commit_id):
1035 1035 repo_init = self._factory.repo_libgit2(wire)
1036 1036 with repo_init as repo:
1037 1037 commit = repo[commit_id]
1038 1038 if hasattr(commit, 'parent_ids'):
1039 1039 parent_ids = commit.parent_ids
1040 1040 else:
1041 1041 parent_ids = commit.get_object().parent_ids
1042 1042
1043 1043 return [x.hex for x in parent_ids]
1044 1044 return _parents(repo_id, commit_id)
1045 1045
1046 1046 @reraise_safe_exceptions
1047 1047 def children(self, wire, commit_id):
1048 1048 cache_on, context_uid, repo_id = self._cache_on(wire)
1049 1049 region = self._region(wire)
1050 1050
1051 1051 head = self.head(wire)
1052 1052
1053 1053 @region.conditional_cache_on_arguments(condition=cache_on)
1054 1054 def _children(_repo_id, _commit_id):
1055 1055
1056 1056 output, __ = self.run_git_command(
1057 1057 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1058 1058
1059 1059 child_ids = []
1060 1060 pat = re.compile(fr'^{commit_id}')
1061 1061 for line in output.splitlines():
1062 1062 line = safe_str(line)
1063 1063 if pat.match(line):
1064 1064 found_ids = line.split(' ')[1:]
1065 1065 child_ids.extend(found_ids)
1066 1066 break
1067 1067
1068 1068 return child_ids
1069 1069 return _children(repo_id, commit_id)
1070 1070
1071 1071 @reraise_safe_exceptions
1072 1072 def set_refs(self, wire, key, value):
1073 1073 repo_init = self._factory.repo_libgit2(wire)
1074 1074 with repo_init as repo:
1075 1075 repo.references.create(key, value, force=True)
1076 1076
1077 1077 @reraise_safe_exceptions
1078 1078 def create_branch(self, wire, branch_name, commit_id, force=False):
1079 1079 repo_init = self._factory.repo_libgit2(wire)
1080 1080 with repo_init as repo:
1081 1081 if commit_id:
1082 1082 commit = repo[commit_id]
1083 1083 else:
1084 1084 # if commit is not given just use the HEAD
1085 1085 commit = repo.head()
1086 1086
1087 1087 if force:
1088 1088 repo.branches.local.create(branch_name, commit, force=force)
1089 1089 elif not repo.branches.get(branch_name):
1090 1090 # create only if that branch isn't existing
1091 1091 repo.branches.local.create(branch_name, commit, force=force)
1092 1092
1093 1093 @reraise_safe_exceptions
1094 1094 def remove_ref(self, wire, key):
1095 1095 repo_init = self._factory.repo_libgit2(wire)
1096 1096 with repo_init as repo:
1097 1097 repo.references.delete(key)
1098 1098
1099 1099 @reraise_safe_exceptions
1100 1100 def tag_remove(self, wire, tag_name):
1101 1101 repo_init = self._factory.repo_libgit2(wire)
1102 1102 with repo_init as repo:
1103 1103 key = f'refs/tags/{tag_name}'
1104 1104 repo.references.delete(key)
1105 1105
1106 1106 @reraise_safe_exceptions
1107 1107 def tree_changes(self, wire, source_id, target_id):
1108 1108 repo = self._factory.repo(wire)
1109 1109 # source can be empty
1110 1110 source_id = safe_bytes(source_id if source_id else b'')
1111 1111 target_id = safe_bytes(target_id)
1112 1112
1113 1113 source = repo[source_id].tree if source_id else None
1114 1114 target = repo[target_id].tree
1115 1115 result = repo.object_store.tree_changes(source, target)
1116 1116
1117 1117 added = set()
1118 1118 modified = set()
1119 1119 deleted = set()
1120 1120 for (old_path, new_path), (_, _), (_, _) in list(result):
1121 1121 if new_path and old_path:
1122 1122 modified.add(new_path)
1123 1123 elif new_path and not old_path:
1124 1124 added.add(new_path)
1125 1125 elif not new_path and old_path:
1126 1126 deleted.add(old_path)
1127 1127
1128 1128 return list(added), list(modified), list(deleted)
1129 1129
1130 1130 @reraise_safe_exceptions
1131 1131 def tree_and_type_for_path(self, wire, commit_id, path):
1132 1132
1133 1133 cache_on, context_uid, repo_id = self._cache_on(wire)
1134 1134 region = self._region(wire)
1135 1135
1136 1136 @region.conditional_cache_on_arguments(condition=cache_on)
1137 1137 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1138 1138 repo_init = self._factory.repo_libgit2(wire)
1139 1139
1140 1140 with repo_init as repo:
1141 1141 commit = repo[commit_id]
1142 1142 try:
1143 1143 tree = commit.tree[path]
1144 1144 except KeyError:
1145 1145 return None, None, None
1146 1146
1147 1147 return tree.id.hex, tree.type_str, tree.filemode
1148 1148 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1149 1149
1150 1150 @reraise_safe_exceptions
1151 1151 def tree_items(self, wire, tree_id):
1152 1152 cache_on, context_uid, repo_id = self._cache_on(wire)
1153 1153 region = self._region(wire)
1154 1154
1155 1155 @region.conditional_cache_on_arguments(condition=cache_on)
1156 1156 def _tree_items(_repo_id, _tree_id):
1157 1157
1158 1158 repo_init = self._factory.repo_libgit2(wire)
1159 1159 with repo_init as repo:
1160 1160 try:
1161 1161 tree = repo[tree_id]
1162 1162 except KeyError:
1163 1163 raise ObjectMissing(f'No tree with id: {tree_id}')
1164 1164
1165 1165 result = []
1166 1166 for item in tree:
1167 1167 item_sha = item.hex
1168 1168 item_mode = item.filemode
1169 1169 item_type = item.type_str
1170 1170
1171 1171 if item_type == 'commit':
1172 1172 # NOTE(marcink): submodules we translate to 'link' for backward compat
1173 1173 item_type = 'link'
1174 1174
1175 1175 result.append((item.name, item_mode, item_sha, item_type))
1176 1176 return result
1177 1177 return _tree_items(repo_id, tree_id)
1178 1178
1179 1179 @reraise_safe_exceptions
1180 1180 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1181 1181 """
1182 1182 Old version that uses subprocess to call diff
1183 1183 """
1184 1184
1185 1185 flags = [
1186 1186 f'-U{context}', '--patch',
1187 1187 '--binary',
1188 1188 '--find-renames',
1189 1189 '--no-indent-heuristic',
1190 1190 # '--indent-heuristic',
1191 1191 #'--full-index',
1192 1192 #'--abbrev=40'
1193 1193 ]
1194 1194
1195 1195 if opt_ignorews:
1196 1196 flags.append('--ignore-all-space')
1197 1197
1198 1198 if commit_id_1 == self.EMPTY_COMMIT:
1199 1199 cmd = ['show'] + flags + [commit_id_2]
1200 1200 else:
1201 1201 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1202 1202
1203 1203 if file_filter:
1204 1204 cmd.extend(['--', file_filter])
1205 1205
1206 1206 diff, __ = self.run_git_command(wire, cmd)
1207 1207 # If we used 'show' command, strip first few lines (until actual diff
1208 1208 # starts)
1209 1209 if commit_id_1 == self.EMPTY_COMMIT:
1210 1210 lines = diff.splitlines()
1211 1211 x = 0
1212 1212 for line in lines:
1213 1213 if line.startswith(b'diff'):
1214 1214 break
1215 1215 x += 1
1216 1216 # Append new line just like 'diff' command do
1217 1217 diff = '\n'.join(lines[x:]) + '\n'
1218 1218 return diff
1219 1219
1220 1220 @reraise_safe_exceptions
1221 1221 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1222 1222 repo_init = self._factory.repo_libgit2(wire)
1223 1223
1224 1224 with repo_init as repo:
1225 1225 swap = True
1226 1226 flags = 0
1227 1227 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1228 1228
1229 1229 if opt_ignorews:
1230 1230 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1231 1231
1232 1232 if commit_id_1 == self.EMPTY_COMMIT:
1233 1233 comm1 = repo[commit_id_2]
1234 1234 diff_obj = comm1.tree.diff_to_tree(
1235 1235 flags=flags, context_lines=context, swap=swap)
1236 1236
1237 1237 else:
1238 1238 comm1 = repo[commit_id_2]
1239 1239 comm2 = repo[commit_id_1]
1240 1240 diff_obj = comm1.tree.diff_to_tree(
1241 1241 comm2.tree, flags=flags, context_lines=context, swap=swap)
1242 1242 similar_flags = 0
1243 1243 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1244 1244 diff_obj.find_similar(flags=similar_flags)
1245 1245
1246 1246 if file_filter:
1247 1247 for p in diff_obj:
1248 1248 if p.delta.old_file.path == file_filter:
1249 1249 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1250 1250 # fo matching path == no diff
1251 1251 return BytesEnvelope(b'')
1252 1252
1253 1253 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1254 1254
1255 1255 @reraise_safe_exceptions
1256 1256 def node_history(self, wire, commit_id, path, limit):
1257 1257 cache_on, context_uid, repo_id = self._cache_on(wire)
1258 1258 region = self._region(wire)
1259 1259
1260 1260 @region.conditional_cache_on_arguments(condition=cache_on)
1261 1261 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1262 1262 # optimize for n==1, rev-list is much faster for that use-case
1263 1263 if limit == 1:
1264 1264 cmd = ['rev-list', '-1', commit_id, '--', path]
1265 1265 else:
1266 1266 cmd = ['log']
1267 1267 if limit:
1268 1268 cmd.extend(['-n', str(safe_int(limit, 0))])
1269 1269 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1270 1270
1271 1271 output, __ = self.run_git_command(wire, cmd)
1272 1272 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1273 1273
1274 1274 return [x for x in commit_ids]
1275 1275 return _node_history(context_uid, repo_id, commit_id, path, limit)
1276 1276
1277 1277 @reraise_safe_exceptions
1278 1278 def node_annotate_legacy(self, wire, commit_id, path):
1279 1279 # note: replaced by pygit2 implementation
1280 1280 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1281 1281 # -l ==> outputs long shas (and we need all 40 characters)
1282 1282 # --root ==> doesn't put '^' character for boundaries
1283 1283 # -r commit_id ==> blames for the given commit
1284 1284 output, __ = self.run_git_command(wire, cmd)
1285 1285
1286 1286 result = []
1287 1287 for i, blame_line in enumerate(output.splitlines()[:-1]):
1288 1288 line_no = i + 1
1289 1289 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1290 1290 result.append((line_no, blame_commit_id, line))
1291 1291
1292 1292 return result
1293 1293
1294 1294 @reraise_safe_exceptions
1295 1295 def node_annotate(self, wire, commit_id, path):
1296 1296
1297 1297 result_libgit = []
1298 1298 repo_init = self._factory.repo_libgit2(wire)
1299 1299 with repo_init as repo:
1300 1300 commit = repo[commit_id]
1301 1301 blame_obj = repo.blame(path, newest_commit=commit_id)
1302 1302 for i, line in enumerate(commit.tree[path].data.splitlines()):
1303 1303 line_no = i + 1
1304 1304 hunk = blame_obj.for_line(line_no)
1305 1305 blame_commit_id = hunk.final_commit_id.hex
1306 1306
1307 1307 result_libgit.append((line_no, blame_commit_id, line))
1308 1308
1309 1309 return BinaryEnvelope(result_libgit)
1310 1310
1311 1311 @reraise_safe_exceptions
1312 1312 def update_server_info(self, wire):
1313 1313 repo = self._factory.repo(wire)
1314 1314 update_server_info(repo)
1315 1315
1316 1316 @reraise_safe_exceptions
1317 1317 def get_all_commit_ids(self, wire):
1318 1318
1319 1319 cache_on, context_uid, repo_id = self._cache_on(wire)
1320 1320 region = self._region(wire)
1321 1321
1322 1322 @region.conditional_cache_on_arguments(condition=cache_on)
1323 1323 def _get_all_commit_ids(_context_uid, _repo_id):
1324 1324
1325 1325 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1326 1326 try:
1327 1327 output, __ = self.run_git_command(wire, cmd)
1328 1328 return output.splitlines()
1329 1329 except Exception:
1330 1330 # Can be raised for empty repositories
1331 1331 return []
1332 1332
1333 1333 @region.conditional_cache_on_arguments(condition=cache_on)
1334 1334 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1335 1335 repo_init = self._factory.repo_libgit2(wire)
1336 1336 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1337 1337 results = []
1338 1338 with repo_init as repo:
1339 1339 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1340 1340 results.append(commit.id.hex)
1341 1341
1342 1342 return _get_all_commit_ids(context_uid, repo_id)
1343 1343
1344 1344 @reraise_safe_exceptions
1345 1345 def run_git_command(self, wire, cmd, **opts):
1346 1346 path = wire.get('path', None)
1347 1347
1348 1348 if path and os.path.isdir(path):
1349 1349 opts['cwd'] = path
1350 1350
1351 1351 if '_bare' in opts:
1352 1352 _copts = []
1353 1353 del opts['_bare']
1354 1354 else:
1355 _copts = ['-c', 'core.quotepath=false',]
1355 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1356 1356 safe_call = False
1357 1357 if '_safe' in opts:
1358 1358 # no exc on failure
1359 1359 del opts['_safe']
1360 1360 safe_call = True
1361 1361
1362 1362 if '_copts' in opts:
1363 1363 _copts.extend(opts['_copts'] or [])
1364 1364 del opts['_copts']
1365 1365
1366 1366 gitenv = os.environ.copy()
1367 1367 gitenv.update(opts.pop('extra_env', {}))
1368 1368 # need to clean fix GIT_DIR !
1369 1369 if 'GIT_DIR' in gitenv:
1370 1370 del gitenv['GIT_DIR']
1371 1371 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1372 1372 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1373 1373
1374 1374 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1375 1375 _opts = {'env': gitenv, 'shell': False}
1376 1376
1377 1377 proc = None
1378 1378 try:
1379 1379 _opts.update(opts)
1380 1380 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1381 1381
1382 1382 return b''.join(proc), b''.join(proc.stderr)
1383 1383 except OSError as err:
1384 1384 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1385 1385 tb_err = ("Couldn't run git command (%s).\n"
1386 1386 "Original error was:%s\n"
1387 1387 "Call options:%s\n"
1388 1388 % (cmd, err, _opts))
1389 1389 log.exception(tb_err)
1390 1390 if safe_call:
1391 1391 return '', err
1392 1392 else:
1393 1393 raise exceptions.VcsException()(tb_err)
1394 1394 finally:
1395 1395 if proc:
1396 1396 proc.close()
1397 1397
1398 1398 @reraise_safe_exceptions
1399 1399 def install_hooks(self, wire, force=False):
1400 1400 from vcsserver.hook_utils import install_git_hooks
1401 1401 bare = self.bare(wire)
1402 1402 path = wire['path']
1403 1403 binary_dir = settings.BINARY_DIR
1404 1404 if binary_dir:
1405 1405 os.path.join(binary_dir, 'python3')
1406 1406 return install_git_hooks(path, bare, force_create=force)
1407 1407
1408 1408 @reraise_safe_exceptions
1409 1409 def get_hooks_info(self, wire):
1410 1410 from vcsserver.hook_utils import (
1411 1411 get_git_pre_hook_version, get_git_post_hook_version)
1412 1412 bare = self.bare(wire)
1413 1413 path = wire['path']
1414 1414 return {
1415 1415 'pre_version': get_git_pre_hook_version(path, bare),
1416 1416 'post_version': get_git_post_hook_version(path, bare),
1417 1417 }
1418 1418
1419 1419 @reraise_safe_exceptions
1420 1420 def set_head_ref(self, wire, head_name):
1421 1421 log.debug('Setting refs/head to `%s`', head_name)
1422 1422 repo_init = self._factory.repo_libgit2(wire)
1423 1423 with repo_init as repo:
1424 1424 repo.set_head(f'refs/heads/{head_name}')
1425 1425
1426 1426 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1427 1427
1428 1428 @reraise_safe_exceptions
1429 1429 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1430 1430 archive_dir_name, commit_id, cache_config):
1431 1431
1432 1432 def file_walker(_commit_id, path):
1433 1433 repo_init = self._factory.repo_libgit2(wire)
1434 1434
1435 1435 with repo_init as repo:
1436 1436 commit = repo[commit_id]
1437 1437
1438 1438 if path in ['', '/']:
1439 1439 tree = commit.tree
1440 1440 else:
1441 1441 tree = commit.tree[path.rstrip('/')]
1442 1442 tree_id = tree.id.hex
1443 1443 try:
1444 1444 tree = repo[tree_id]
1445 1445 except KeyError:
1446 1446 raise ObjectMissing(f'No tree with id: {tree_id}')
1447 1447
1448 1448 index = LibGit2Index.Index()
1449 1449 index.read_tree(tree)
1450 1450 file_iter = index
1451 1451
1452 1452 for file_node in file_iter:
1453 1453 file_path = file_node.path
1454 1454 mode = file_node.mode
1455 1455 is_link = stat.S_ISLNK(mode)
1456 1456 if mode == pygit2.GIT_FILEMODE_COMMIT:
1457 1457 log.debug('Skipping path %s as a commit node', file_path)
1458 1458 continue
1459 1459 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1460 1460
1461 1461 return store_archive_in_cache(
1462 1462 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
General Comments 0
You need to be logged in to leave comments. Login now