##// END OF EJS Templates
fix(annotation): fixed annotation logic bug that could cause exceptions for files with mixed newlines for git. Fixes RCCE-59
super-admin -
r1216:72621ce1 default
parent child Browse files
Show More
@@ -1,1518 +1,1519 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40
41 41 import rhodecode
42 42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 config_attrs = {
181 181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 183 }
184 184 for key, param in config_attrs.items():
185 185 if value := config.get(key):
186 186 params.extend(['-c', param.format(value)])
187 187 return params
188 188
189 189 @reraise_safe_exceptions
190 190 def discover_git_version(self):
191 191 stdout, _ = self.run_git_command(
192 192 {}, ['--version'], _bare=True, _safe=True)
193 193 prefix = b'git version'
194 194 if stdout.startswith(prefix):
195 195 stdout = stdout[len(prefix):]
196 196 return safe_str(stdout.strip())
197 197
198 198 @reraise_safe_exceptions
199 199 def is_empty(self, wire):
200 200 repo_init = self._factory.repo_libgit2(wire)
201 201 with repo_init as repo:
202 202 try:
203 203 has_head = repo.head.name
204 204 if has_head:
205 205 return False
206 206
207 207 # NOTE(marcink): check again using more expensive method
208 208 return repo.is_empty
209 209 except Exception:
210 210 pass
211 211
212 212 return True
213 213
214 214 @reraise_safe_exceptions
215 215 def assert_correct_path(self, wire):
216 216 cache_on, context_uid, repo_id = self._cache_on(wire)
217 217 region = self._region(wire)
218 218
219 219 @region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 221 if fast_check:
222 222 path = safe_str(wire['path'])
223 223 if pygit2.discover_repository(path):
224 224 return True
225 225 return False
226 226 else:
227 227 try:
228 228 repo_init = self._factory.repo_libgit2(wire)
229 229 with repo_init:
230 230 pass
231 231 except pygit2.GitError:
232 232 path = wire.get('path')
233 233 tb = traceback.format_exc()
234 234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 235 return False
236 236 return True
237 237
238 238 return _assert_correct_path(context_uid, repo_id, True)
239 239
240 240 @reraise_safe_exceptions
241 241 def bare(self, wire):
242 242 repo_init = self._factory.repo_libgit2(wire)
243 243 with repo_init as repo:
244 244 return repo.is_bare
245 245
246 246 @reraise_safe_exceptions
247 247 def get_node_data(self, wire, commit_id, path):
248 248 repo_init = self._factory.repo_libgit2(wire)
249 249 with repo_init as repo:
250 250 commit = repo[commit_id]
251 251 blob_obj = commit.tree[path]
252 252
253 253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 254 raise exceptions.LookupException()(
255 255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256 256
257 257 return BytesEnvelope(blob_obj.data)
258 258
259 259 @reraise_safe_exceptions
260 260 def get_node_size(self, wire, commit_id, path):
261 261 repo_init = self._factory.repo_libgit2(wire)
262 262 with repo_init as repo:
263 263 commit = repo[commit_id]
264 264 blob_obj = commit.tree[path]
265 265
266 266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 267 raise exceptions.LookupException()(
268 268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269 269
270 270 return blob_obj.size
271 271
272 272 @reraise_safe_exceptions
273 273 def get_node_flags(self, wire, commit_id, path):
274 274 repo_init = self._factory.repo_libgit2(wire)
275 275 with repo_init as repo:
276 276 commit = repo[commit_id]
277 277 blob_obj = commit.tree[path]
278 278
279 279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 280 raise exceptions.LookupException()(
281 281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282 282
283 283 return blob_obj.filemode
284 284
285 285 @reraise_safe_exceptions
286 286 def get_node_is_binary(self, wire, commit_id, path):
287 287 repo_init = self._factory.repo_libgit2(wire)
288 288 with repo_init as repo:
289 289 commit = repo[commit_id]
290 290 blob_obj = commit.tree[path]
291 291
292 292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 293 raise exceptions.LookupException()(
294 294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295 295
296 296 return blob_obj.is_binary
297 297
298 298 @reraise_safe_exceptions
299 299 def blob_as_pretty_string(self, wire, sha):
300 300 repo_init = self._factory.repo_libgit2(wire)
301 301 with repo_init as repo:
302 302 blob_obj = repo[sha]
303 303 return BytesEnvelope(blob_obj.data)
304 304
305 305 @reraise_safe_exceptions
306 306 def blob_raw_length(self, wire, sha):
307 307 cache_on, context_uid, repo_id = self._cache_on(wire)
308 308 region = self._region(wire)
309 309
310 310 @region.conditional_cache_on_arguments(condition=cache_on)
311 311 def _blob_raw_length(_repo_id, _sha):
312 312
313 313 repo_init = self._factory.repo_libgit2(wire)
314 314 with repo_init as repo:
315 315 blob = repo[sha]
316 316 return blob.size
317 317
318 318 return _blob_raw_length(repo_id, sha)
319 319
320 320 def _parse_lfs_pointer(self, raw_content):
321 321 spec_string = b'version https://git-lfs.github.com/spec'
322 322 if raw_content and raw_content.startswith(spec_string):
323 323
324 324 pattern = re.compile(rb"""
325 325 (?:\n)?
326 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 328 ^size[ ](?P<oid_size>[0-9]+)\n
329 329 (?:\n)?
330 330 """, re.VERBOSE | re.MULTILINE)
331 331 match = pattern.match(raw_content)
332 332 if match:
333 333 return match.groupdict()
334 334
335 335 return {}
336 336
337 337 @reraise_safe_exceptions
338 338 def is_large_file(self, wire, commit_id):
339 339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 340 region = self._region(wire)
341 341
342 342 @region.conditional_cache_on_arguments(condition=cache_on)
343 343 def _is_large_file(_repo_id, _sha):
344 344 repo_init = self._factory.repo_libgit2(wire)
345 345 with repo_init as repo:
346 346 blob = repo[commit_id]
347 347 if blob.is_binary:
348 348 return {}
349 349
350 350 return self._parse_lfs_pointer(blob.data)
351 351
352 352 return _is_large_file(repo_id, commit_id)
353 353
354 354 @reraise_safe_exceptions
355 355 def is_binary(self, wire, tree_id):
356 356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 357 region = self._region(wire)
358 358
359 359 @region.conditional_cache_on_arguments(condition=cache_on)
360 360 def _is_binary(_repo_id, _tree_id):
361 361 repo_init = self._factory.repo_libgit2(wire)
362 362 with repo_init as repo:
363 363 blob_obj = repo[tree_id]
364 364 return blob_obj.is_binary
365 365
366 366 return _is_binary(repo_id, tree_id)
367 367
368 368 @reraise_safe_exceptions
369 369 def md5_hash(self, wire, commit_id, path):
370 370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 371 region = self._region(wire)
372 372
373 373 @region.conditional_cache_on_arguments(condition=cache_on)
374 374 def _md5_hash(_repo_id, _commit_id, _path):
375 375 repo_init = self._factory.repo_libgit2(wire)
376 376 with repo_init as repo:
377 377 commit = repo[_commit_id]
378 378 blob_obj = commit.tree[_path]
379 379
380 380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 381 raise exceptions.LookupException()(
382 382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383 383
384 384 return ''
385 385
386 386 return _md5_hash(repo_id, commit_id, path)
387 387
388 388 @reraise_safe_exceptions
389 389 def in_largefiles_store(self, wire, oid):
390 390 conf = self._wire_to_config(wire)
391 391 repo_init = self._factory.repo_libgit2(wire)
392 392 with repo_init as repo:
393 393 repo_name = repo.path
394 394
395 395 store_location = conf.get('vcs_git_lfs_store_location')
396 396 if store_location:
397 397
398 398 store = LFSOidStore(
399 399 oid=oid, repo=repo_name, store_location=store_location)
400 400 return store.has_oid()
401 401
402 402 return False
403 403
404 404 @reraise_safe_exceptions
405 405 def store_path(self, wire, oid):
406 406 conf = self._wire_to_config(wire)
407 407 repo_init = self._factory.repo_libgit2(wire)
408 408 with repo_init as repo:
409 409 repo_name = repo.path
410 410
411 411 store_location = conf.get('vcs_git_lfs_store_location')
412 412 if store_location:
413 413 store = LFSOidStore(
414 414 oid=oid, repo=repo_name, store_location=store_location)
415 415 return store.oid_path
416 416 raise ValueError(f'Unable to fetch oid with path {oid}')
417 417
418 418 @reraise_safe_exceptions
419 419 def bulk_request(self, wire, rev, pre_load):
420 420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 421 region = self._region(wire)
422 422
423 423 @region.conditional_cache_on_arguments(condition=cache_on)
424 424 def _bulk_request(_repo_id, _rev, _pre_load):
425 425 result = {}
426 426 for attr in pre_load:
427 427 try:
428 428 method = self._bulk_methods[attr]
429 429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 430 args = [wire, rev]
431 431 result[attr] = method(*args)
432 432 except KeyError as e:
433 433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 434 return result
435 435
436 436 return _bulk_request(repo_id, rev, sorted(pre_load))
437 437
438 438 @reraise_safe_exceptions
439 439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 441 region = self._region(wire)
442 442
443 443 @region.conditional_cache_on_arguments(condition=cache_on)
444 444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 445 result = {}
446 446 for attr in pre_load:
447 447 try:
448 448 method = self._bulk_file_methods[attr]
449 449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 450 result[attr] = method(wire, _commit_id, _path)
451 451 except KeyError as e:
452 452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 453 return result
454 454
455 455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456 456
457 457 def _build_opener(self, url: str):
458 458 handlers = []
459 459 url_obj = url_parser(safe_bytes(url))
460 460 authinfo = url_obj.authinfo()[1]
461 461
462 462 if authinfo:
463 463 # create a password manager
464 464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 465 passmgr.add_password(*convert_to_str(authinfo))
466 466
467 467 handlers.extend((httpbasicauthhandler(passmgr),
468 468 httpdigestauthhandler(passmgr)))
469 469
470 470 return urllib.request.build_opener(*handlers)
471 471
472 472 @reraise_safe_exceptions
473 473 def check_url(self, url, config):
474 474 url_obj = url_parser(safe_bytes(url))
475 475
476 476 test_uri = safe_str(url_obj.authinfo()[0])
477 477 obfuscated_uri = get_obfuscated_url(url_obj)
478 478
479 479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480 480
481 481 if not test_uri.endswith('info/refs'):
482 482 test_uri = test_uri.rstrip('/') + '/info/refs'
483 483
484 484 o = self._build_opener(url=url)
485 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486 486
487 487 q = {"service": 'git-upload-pack'}
488 488 qs = f'?{urllib.parse.urlencode(q)}'
489 489 cu = f"{test_uri}{qs}"
490 490
491 491 try:
492 492 req = urllib.request.Request(cu, None, {})
493 493 log.debug("Trying to open URL %s", obfuscated_uri)
494 494 resp = o.open(req)
495 495 if resp.code != 200:
496 496 raise exceptions.URLError()('Return Code is not 200')
497 497 except Exception as e:
498 498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 499 # means it cannot be cloned
500 500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501 501
502 502 # now detect if it's proper git repo
503 503 gitdata: bytes = resp.read()
504 504
505 505 if b'service=git-upload-pack' in gitdata:
506 506 pass
507 507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 508 # old style git can return some other format!
509 509 pass
510 510 else:
511 511 e = None
512 512 raise exceptions.URLError(e)(
513 513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514 514
515 515 return True
516 516
517 517 @reraise_safe_exceptions
518 518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 520 remote_refs = self.pull(wire, url, apply_refs=False)
521 521 repo = self._factory.repo(wire)
522 522 if isinstance(valid_refs, list):
523 523 valid_refs = tuple(valid_refs)
524 524
525 525 for k in remote_refs:
526 526 # only parse heads/tags and skip so called deferred tags
527 527 if k.startswith(valid_refs) and not k.endswith(deferred):
528 528 repo[k] = remote_refs[k]
529 529
530 530 if update_after_clone:
531 531 # we want to checkout HEAD
532 532 repo["HEAD"] = remote_refs["HEAD"]
533 533 index.build_index_from_tree(repo.path, repo.index_path(),
534 534 repo.object_store, repo["HEAD"].tree)
535 535
536 536 @reraise_safe_exceptions
537 537 def branch(self, wire, commit_id):
538 538 cache_on, context_uid, repo_id = self._cache_on(wire)
539 539 region = self._region(wire)
540 540
541 541 @region.conditional_cache_on_arguments(condition=cache_on)
542 542 def _branch(_context_uid, _repo_id, _commit_id):
543 543 regex = re.compile('^refs/heads')
544 544
545 545 def filter_with(ref):
546 546 return regex.match(ref[0]) and ref[1] == _commit_id
547 547
548 548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 549 return [x[0].split('refs/heads/')[-1] for x in branches]
550 550
551 551 return _branch(context_uid, repo_id, commit_id)
552 552
553 553 @reraise_safe_exceptions
554 554 def commit_branches(self, wire, commit_id):
555 555 cache_on, context_uid, repo_id = self._cache_on(wire)
556 556 region = self._region(wire)
557 557
558 558 @region.conditional_cache_on_arguments(condition=cache_on)
559 559 def _commit_branches(_context_uid, _repo_id, _commit_id):
560 560 repo_init = self._factory.repo_libgit2(wire)
561 561 with repo_init as repo:
562 562 branches = [x for x in repo.branches.with_commit(_commit_id)]
563 563 return branches
564 564
565 565 return _commit_branches(context_uid, repo_id, commit_id)
566 566
567 567 @reraise_safe_exceptions
568 568 def add_object(self, wire, content):
569 569 repo_init = self._factory.repo_libgit2(wire)
570 570 with repo_init as repo:
571 571 blob = objects.Blob()
572 572 blob.set_raw_string(content)
573 573 repo.object_store.add_object(blob)
574 574 return blob.id
575 575
576 576 @reraise_safe_exceptions
577 577 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
578 578 date_args: list[int, int] = None,
579 579 parents: list | None = None):
580 580
581 581 repo_init = self._factory.repo_libgit2(wire)
582 582 with repo_init as repo:
583 583
584 584 if date_args:
585 585 current_time, offset = date_args
586 586
587 587 kw = {
588 588 'time': current_time,
589 589 'offset': offset
590 590 }
591 591 author = create_signature_from_string(author, **kw)
592 592 committer = create_signature_from_string(committer, **kw)
593 593
594 594 tree = new_tree_id
595 595 if isinstance(tree, (bytes, str)):
596 596 # validate this tree is in the repo...
597 597 tree = repo[safe_str(tree)].id
598 598
599 599 if parents:
600 600 # run via sha's and validate them in repo
601 601 parents = [repo[c].id for c in parents]
602 602 else:
603 603 parents = []
604 604 # ensure we COMMIT on top of given branch head
605 605 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
606 606 if branch in repo.branches.local:
607 607 parents += [repo.branches[branch].target]
608 608 elif [x for x in repo.branches.local]:
609 609 parents += [repo.head.target]
610 610 #else:
611 611 # in case we want to commit on new branch we create it on top of HEAD
612 612 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
613 613
614 614 # # Create a new commit
615 615 commit_oid = repo.create_commit(
616 616 f'refs/heads/{branch}', # the name of the reference to update
617 617 author, # the author of the commit
618 618 committer, # the committer of the commit
619 619 message, # the commit message
620 620 tree, # the tree produced by the index
621 621 parents # list of parents for the new commit, usually just one,
622 622 )
623 623
624 624 new_commit_id = safe_str(commit_oid)
625 625
626 626 return new_commit_id
627 627
628 628 @reraise_safe_exceptions
629 629 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
630 630
631 631 def mode2pygit(mode):
632 632 """
633 633 git only supports two filemode 644 and 755
634 634
635 635 0o100755 -> 33261
636 636 0o100644 -> 33188
637 637 """
638 638 return {
639 639 0o100644: pygit2.GIT_FILEMODE_BLOB,
640 640 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
641 641 0o120000: pygit2.GIT_FILEMODE_LINK
642 642 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
643 643
644 644 repo_init = self._factory.repo_libgit2(wire)
645 645 with repo_init as repo:
646 646 repo_index = repo.index
647 647
648 648 commit_parents = None
649 649 if commit_tree and commit_data['parents']:
650 650 commit_parents = commit_data['parents']
651 651 parent_commit = repo[commit_parents[0]]
652 652 repo_index.read_tree(parent_commit.tree)
653 653
654 654 for pathspec in updated:
655 655 blob_id = repo.create_blob(pathspec['content'])
656 656 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
657 657 repo_index.add(ie)
658 658
659 659 for pathspec in removed:
660 660 repo_index.remove(pathspec)
661 661
662 662 # Write changes to the index
663 663 repo_index.write()
664 664
665 665 # Create a tree from the updated index
666 666 written_commit_tree = repo_index.write_tree()
667 667
668 668 new_tree_id = written_commit_tree
669 669
670 670 author = commit_data['author']
671 671 committer = commit_data['committer']
672 672 message = commit_data['message']
673 673
674 674 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
675 675
676 676 new_commit_id = self.create_commit(wire, author, committer, message, branch,
677 677 new_tree_id, date_args=date_args, parents=commit_parents)
678 678
679 679 # libgit2, ensure the branch is there and exists
680 680 self.create_branch(wire, branch, new_commit_id)
681 681
682 682 # libgit2, set new ref to this created commit
683 683 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
684 684
685 685 return new_commit_id
686 686
687 687 @reraise_safe_exceptions
688 688 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
689 689 if url != 'default' and '://' not in url:
690 690 client = LocalGitClient(url)
691 691 else:
692 692 url_obj = url_parser(safe_bytes(url))
693 693 o = self._build_opener(url)
694 694 url = url_obj.authinfo()[0]
695 695 client = HttpGitClient(base_url=url, opener=o)
696 696 repo = self._factory.repo(wire)
697 697
698 698 determine_wants = repo.object_store.determine_wants_all
699 699
700 700 if refs:
701 701 refs: list[bytes] = [ascii_bytes(x) for x in refs]
702 702
703 703 def determine_wants_requested(_remote_refs):
704 704 determined = []
705 705 for ref_name, ref_hash in _remote_refs.items():
706 706 bytes_ref_name = safe_bytes(ref_name)
707 707
708 708 if bytes_ref_name in refs:
709 709 bytes_ref_hash = safe_bytes(ref_hash)
710 710 determined.append(bytes_ref_hash)
711 711 return determined
712 712
713 713 # swap with our custom requested wants
714 714 determine_wants = determine_wants_requested
715 715
716 716 try:
717 717 remote_refs = client.fetch(
718 718 path=url, target=repo, determine_wants=determine_wants)
719 719
720 720 except NotGitRepository as e:
721 721 log.warning(
722 722 'Trying to fetch from "%s" failed, not a Git repository.', url)
723 723 # Exception can contain unicode which we convert
724 724 raise exceptions.AbortException(e)(repr(e))
725 725
726 726 # mikhail: client.fetch() returns all the remote refs, but fetches only
727 727 # refs filtered by `determine_wants` function. We need to filter result
728 728 # as well
729 729 if refs:
730 730 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
731 731
732 732 if apply_refs:
733 733 # TODO: johbo: Needs proper test coverage with a git repository
734 734 # that contains a tag object, so that we would end up with
735 735 # a peeled ref at this point.
736 736 for k in remote_refs:
737 737 if k.endswith(PEELED_REF_MARKER):
738 738 log.debug("Skipping peeled reference %s", k)
739 739 continue
740 740 repo[k] = remote_refs[k]
741 741
742 742 if refs and not update_after:
743 743 # update to ref
744 744 # mikhail: explicitly set the head to the last ref.
745 745 update_to_ref = refs[-1]
746 746 if isinstance(update_after, str):
747 747 update_to_ref = update_after
748 748
749 749 repo[HEAD_MARKER] = remote_refs[update_to_ref]
750 750
751 751 if update_after:
752 752 # we want to check out HEAD
753 753 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
754 754 index.build_index_from_tree(repo.path, repo.index_path(),
755 755 repo.object_store, repo[HEAD_MARKER].tree)
756 756
757 757 if isinstance(remote_refs, FetchPackResult):
758 758 return remote_refs.refs
759 759 return remote_refs
760 760
761 761 @reraise_safe_exceptions
762 762 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
763 763 self._factory.repo(wire)
764 764 if refs and not isinstance(refs, (list, tuple)):
765 765 refs = [refs]
766 766
767 767 config = self._wire_to_config(wire)
768 768 # get all remote refs we'll use to fetch later
769 769 cmd = ['ls-remote']
770 770 if not all_refs:
771 771 cmd += ['--heads', '--tags']
772 772 cmd += [url]
773 773 output, __ = self.run_git_command(
774 774 wire, cmd, fail_on_stderr=False,
775 775 _copts=self._remote_conf(config),
776 776 extra_env={'GIT_TERMINAL_PROMPT': '0'})
777 777
778 778 remote_refs = collections.OrderedDict()
779 779 fetch_refs = []
780 780
781 781 for ref_line in output.splitlines():
782 782 sha, ref = ref_line.split(b'\t')
783 783 sha = sha.strip()
784 784 if ref in remote_refs:
785 785 # duplicate, skip
786 786 continue
787 787 if ref.endswith(PEELED_REF_MARKER):
788 788 log.debug("Skipping peeled reference %s", ref)
789 789 continue
790 790 # don't sync HEAD
791 791 if ref in [HEAD_MARKER]:
792 792 continue
793 793
794 794 remote_refs[ref] = sha
795 795
796 796 if refs and sha in refs:
797 797 # we filter fetch using our specified refs
798 798 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
799 799 elif not refs:
800 800 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
801 801 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
802 802
803 803 if fetch_refs:
804 804 for chunk in more_itertools.chunked(fetch_refs, 128):
805 805 fetch_refs_chunks = list(chunk)
806 806 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
807 807 self.run_git_command(
808 808 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
809 809 fail_on_stderr=False,
810 810 _copts=self._remote_conf(config),
811 811 extra_env={'GIT_TERMINAL_PROMPT': '0'})
812 812 if kwargs.get('sync_large_objects'):
813 813 self.run_git_command(
814 814 wire, ['lfs', 'fetch', url, '--all'],
815 815 fail_on_stderr=False,
816 816 _copts=self._remote_conf(config),
817 817 )
818 818
819 819 return remote_refs
820 820
821 821 @reraise_safe_exceptions
822 822 def sync_push(self, wire, url, refs=None, **kwargs):
823 823 if not self.check_url(url, wire):
824 824 return
825 825 config = self._wire_to_config(wire)
826 826 self._factory.repo(wire)
827 827 self.run_git_command(
828 828 wire, ['push', url, '--mirror'], fail_on_stderr=False,
829 829 _copts=self._remote_conf(config),
830 830 extra_env={'GIT_TERMINAL_PROMPT': '0'})
831 831 if kwargs.get('sync_large_objects'):
832 832 self.run_git_command(
833 833 wire, ['lfs', 'push', url, '--all'],
834 834 fail_on_stderr=False,
835 835 _copts=self._remote_conf(config),
836 836 )
837 837
838 838 @reraise_safe_exceptions
839 839 def get_remote_refs(self, wire, url):
840 840 repo = Repo(url)
841 841 return repo.get_refs()
842 842
843 843 @reraise_safe_exceptions
844 844 def get_description(self, wire):
845 845 repo = self._factory.repo(wire)
846 846 return repo.get_description()
847 847
848 848 @reraise_safe_exceptions
849 849 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
850 850 origin_repo_path = wire['path']
851 851 repo = self._factory.repo(wire)
852 852 # fetch from other_repo_path to our origin repo
853 853 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
854 854
855 855 wire_remote = wire.copy()
856 856 wire_remote['path'] = other_repo_path
857 857 repo_remote = self._factory.repo(wire_remote)
858 858
859 859 # fetch from origin_repo_path to our remote repo
860 860 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
861 861
862 862 revs = [
863 863 x.commit.id
864 864 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
865 865 return revs
866 866
867 867 @reraise_safe_exceptions
868 868 def get_object(self, wire, sha, maybe_unreachable=False):
869 869 cache_on, context_uid, repo_id = self._cache_on(wire)
870 870 region = self._region(wire)
871 871
872 872 @region.conditional_cache_on_arguments(condition=cache_on)
873 873 def _get_object(_context_uid, _repo_id, _sha):
874 874 repo_init = self._factory.repo_libgit2(wire)
875 875 with repo_init as repo:
876 876
877 877 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
878 878 try:
879 879 commit = repo.revparse_single(sha)
880 880 except KeyError:
881 881 # NOTE(marcink): KeyError doesn't give us any meaningful information
882 882 # here, we instead give something more explicit
883 883 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
884 884 raise exceptions.LookupException(e)(missing_commit_err)
885 885 except ValueError as e:
886 886 raise exceptions.LookupException(e)(missing_commit_err)
887 887
888 888 is_tag = False
889 889 if isinstance(commit, pygit2.Tag):
890 890 commit = repo.get(commit.target)
891 891 is_tag = True
892 892
893 893 check_dangling = True
894 894 if is_tag:
895 895 check_dangling = False
896 896
897 897 if check_dangling and maybe_unreachable:
898 898 check_dangling = False
899 899
900 900 # we used a reference and it parsed means we're not having a dangling commit
901 901 if sha != commit.hex:
902 902 check_dangling = False
903 903
904 904 if check_dangling:
905 905 # check for dangling commit
906 906 for branch in repo.branches.with_commit(commit.hex):
907 907 if branch:
908 908 break
909 909 else:
910 910 # NOTE(marcink): Empty error doesn't give us any meaningful information
911 911 # here, we instead give something more explicit
912 912 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
913 913 raise exceptions.LookupException(e)(missing_commit_err)
914 914
915 915 commit_id = commit.hex
916 916 type_str = commit.type_str
917 917
918 918 return {
919 919 'id': commit_id,
920 920 'type': type_str,
921 921 'commit_id': commit_id,
922 922 'idx': 0
923 923 }
924 924
925 925 return _get_object(context_uid, repo_id, sha)
926 926
927 927 @reraise_safe_exceptions
928 928 def get_refs(self, wire):
929 929 cache_on, context_uid, repo_id = self._cache_on(wire)
930 930 region = self._region(wire)
931 931
932 932 @region.conditional_cache_on_arguments(condition=cache_on)
933 933 def _get_refs(_context_uid, _repo_id):
934 934
935 935 repo_init = self._factory.repo_libgit2(wire)
936 936 with repo_init as repo:
937 937 regex = re.compile('^refs/(heads|tags)/')
938 938 return {x.name: x.target.hex for x in
939 939 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
940 940
941 941 return _get_refs(context_uid, repo_id)
942 942
943 943 @reraise_safe_exceptions
944 944 def get_branch_pointers(self, wire):
945 945 cache_on, context_uid, repo_id = self._cache_on(wire)
946 946 region = self._region(wire)
947 947
948 948 @region.conditional_cache_on_arguments(condition=cache_on)
949 949 def _get_branch_pointers(_context_uid, _repo_id):
950 950
951 951 repo_init = self._factory.repo_libgit2(wire)
952 952 regex = re.compile('^refs/heads')
953 953 with repo_init as repo:
954 954 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
955 955 return {x.target.hex: x.shorthand for x in branches}
956 956
957 957 return _get_branch_pointers(context_uid, repo_id)
958 958
959 959 @reraise_safe_exceptions
960 960 def head(self, wire, show_exc=True):
961 961 cache_on, context_uid, repo_id = self._cache_on(wire)
962 962 region = self._region(wire)
963 963
964 964 @region.conditional_cache_on_arguments(condition=cache_on)
965 965 def _head(_context_uid, _repo_id, _show_exc):
966 966 repo_init = self._factory.repo_libgit2(wire)
967 967 with repo_init as repo:
968 968 try:
969 969 return repo.head.peel().hex
970 970 except Exception:
971 971 if show_exc:
972 972 raise
973 973 return _head(context_uid, repo_id, show_exc)
974 974
975 975 @reraise_safe_exceptions
976 976 def init(self, wire):
977 977 repo_path = safe_str(wire['path'])
978 978 os.makedirs(repo_path, mode=0o755)
979 979 pygit2.init_repository(repo_path, bare=False)
980 980
981 981 @reraise_safe_exceptions
982 982 def init_bare(self, wire):
983 983 repo_path = safe_str(wire['path'])
984 984 os.makedirs(repo_path, mode=0o755)
985 985 pygit2.init_repository(repo_path, bare=True)
986 986
987 987 @reraise_safe_exceptions
988 988 def revision(self, wire, rev):
989 989
990 990 cache_on, context_uid, repo_id = self._cache_on(wire)
991 991 region = self._region(wire)
992 992
993 993 @region.conditional_cache_on_arguments(condition=cache_on)
994 994 def _revision(_context_uid, _repo_id, _rev):
995 995 repo_init = self._factory.repo_libgit2(wire)
996 996 with repo_init as repo:
997 997 commit = repo[rev]
998 998 obj_data = {
999 999 'id': commit.id.hex,
1000 1000 }
1001 1001 # tree objects itself don't have tree_id attribute
1002 1002 if hasattr(commit, 'tree_id'):
1003 1003 obj_data['tree'] = commit.tree_id.hex
1004 1004
1005 1005 return obj_data
1006 1006 return _revision(context_uid, repo_id, rev)
1007 1007
1008 1008 @reraise_safe_exceptions
1009 1009 def date(self, wire, commit_id):
1010 1010 cache_on, context_uid, repo_id = self._cache_on(wire)
1011 1011 region = self._region(wire)
1012 1012
1013 1013 @region.conditional_cache_on_arguments(condition=cache_on)
1014 1014 def _date(_repo_id, _commit_id):
1015 1015 repo_init = self._factory.repo_libgit2(wire)
1016 1016 with repo_init as repo:
1017 1017 commit = repo[commit_id]
1018 1018
1019 1019 if hasattr(commit, 'commit_time'):
1020 1020 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1021 1021 else:
1022 1022 commit = commit.get_object()
1023 1023 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1024 1024
1025 1025 # TODO(marcink): check dulwich difference of offset vs timezone
1026 1026 return [commit_time, commit_time_offset]
1027 1027 return _date(repo_id, commit_id)
1028 1028
1029 1029 @reraise_safe_exceptions
1030 1030 def author(self, wire, commit_id):
1031 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
1032 1032 region = self._region(wire)
1033 1033
1034 1034 @region.conditional_cache_on_arguments(condition=cache_on)
1035 1035 def _author(_repo_id, _commit_id):
1036 1036 repo_init = self._factory.repo_libgit2(wire)
1037 1037 with repo_init as repo:
1038 1038 commit = repo[commit_id]
1039 1039
1040 1040 if hasattr(commit, 'author'):
1041 1041 author = commit.author
1042 1042 else:
1043 1043 author = commit.get_object().author
1044 1044
1045 1045 if author.email:
1046 1046 return f"{author.name} <{author.email}>"
1047 1047
1048 1048 try:
1049 1049 return f"{author.name}"
1050 1050 except Exception:
1051 1051 return f"{safe_str(author.raw_name)}"
1052 1052
1053 1053 return _author(repo_id, commit_id)
1054 1054
1055 1055 @reraise_safe_exceptions
1056 1056 def message(self, wire, commit_id):
1057 1057 cache_on, context_uid, repo_id = self._cache_on(wire)
1058 1058 region = self._region(wire)
1059 1059
1060 1060 @region.conditional_cache_on_arguments(condition=cache_on)
1061 1061 def _message(_repo_id, _commit_id):
1062 1062 repo_init = self._factory.repo_libgit2(wire)
1063 1063 with repo_init as repo:
1064 1064 commit = repo[commit_id]
1065 1065 return commit.message
1066 1066 return _message(repo_id, commit_id)
1067 1067
1068 1068 @reraise_safe_exceptions
1069 1069 def parents(self, wire, commit_id):
1070 1070 cache_on, context_uid, repo_id = self._cache_on(wire)
1071 1071 region = self._region(wire)
1072 1072
1073 1073 @region.conditional_cache_on_arguments(condition=cache_on)
1074 1074 def _parents(_repo_id, _commit_id):
1075 1075 repo_init = self._factory.repo_libgit2(wire)
1076 1076 with repo_init as repo:
1077 1077 commit = repo[commit_id]
1078 1078 if hasattr(commit, 'parent_ids'):
1079 1079 parent_ids = commit.parent_ids
1080 1080 else:
1081 1081 parent_ids = commit.get_object().parent_ids
1082 1082
1083 1083 return [x.hex for x in parent_ids]
1084 1084 return _parents(repo_id, commit_id)
1085 1085
1086 1086 @reraise_safe_exceptions
1087 1087 def children(self, wire, commit_id):
1088 1088 cache_on, context_uid, repo_id = self._cache_on(wire)
1089 1089 region = self._region(wire)
1090 1090
1091 1091 head = self.head(wire)
1092 1092
1093 1093 @region.conditional_cache_on_arguments(condition=cache_on)
1094 1094 def _children(_repo_id, _commit_id):
1095 1095
1096 1096 output, __ = self.run_git_command(
1097 1097 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1098 1098
1099 1099 child_ids = []
1100 1100 pat = re.compile(fr'^{commit_id}')
1101 1101 for line in output.splitlines():
1102 1102 line = safe_str(line)
1103 1103 if pat.match(line):
1104 1104 found_ids = line.split(' ')[1:]
1105 1105 child_ids.extend(found_ids)
1106 1106 break
1107 1107
1108 1108 return child_ids
1109 1109 return _children(repo_id, commit_id)
1110 1110
1111 1111 @reraise_safe_exceptions
1112 1112 def set_refs(self, wire, key, value):
1113 1113 repo_init = self._factory.repo_libgit2(wire)
1114 1114 with repo_init as repo:
1115 1115 repo.references.create(key, value, force=True)
1116 1116
1117 1117 @reraise_safe_exceptions
1118 1118 def update_refs(self, wire, key, value):
1119 1119 repo_init = self._factory.repo_libgit2(wire)
1120 1120 with repo_init as repo:
1121 1121 if key not in repo.references:
1122 1122 raise ValueError(f'Reference {key} not found in the repository')
1123 1123 repo.references.create(key, value, force=True)
1124 1124
1125 1125 @reraise_safe_exceptions
1126 1126 def create_branch(self, wire, branch_name, commit_id, force=False):
1127 1127 repo_init = self._factory.repo_libgit2(wire)
1128 1128 with repo_init as repo:
1129 1129 if commit_id:
1130 1130 commit = repo[commit_id]
1131 1131 else:
1132 1132 # if commit is not given just use the HEAD
1133 1133 commit = repo.head()
1134 1134
1135 1135 if force:
1136 1136 repo.branches.local.create(branch_name, commit, force=force)
1137 1137 elif not repo.branches.get(branch_name):
1138 1138 # create only if that branch isn't existing
1139 1139 repo.branches.local.create(branch_name, commit, force=force)
1140 1140
1141 1141 @reraise_safe_exceptions
1142 1142 def remove_ref(self, wire, key):
1143 1143 repo_init = self._factory.repo_libgit2(wire)
1144 1144 with repo_init as repo:
1145 1145 repo.references.delete(key)
1146 1146
1147 1147 @reraise_safe_exceptions
1148 1148 def tag_remove(self, wire, tag_name):
1149 1149 repo_init = self._factory.repo_libgit2(wire)
1150 1150 with repo_init as repo:
1151 1151 key = f'refs/tags/{tag_name}'
1152 1152 repo.references.delete(key)
1153 1153
1154 1154 @reraise_safe_exceptions
1155 1155 def tree_changes(self, wire, source_id, target_id):
1156 1156 repo = self._factory.repo(wire)
1157 1157 # source can be empty
1158 1158 source_id = safe_bytes(source_id if source_id else b'')
1159 1159 target_id = safe_bytes(target_id)
1160 1160
1161 1161 source = repo[source_id].tree if source_id else None
1162 1162 target = repo[target_id].tree
1163 1163 result = repo.object_store.tree_changes(source, target)
1164 1164
1165 1165 added = set()
1166 1166 modified = set()
1167 1167 deleted = set()
1168 1168 for (old_path, new_path), (_, _), (_, _) in list(result):
1169 1169 if new_path and old_path:
1170 1170 modified.add(new_path)
1171 1171 elif new_path and not old_path:
1172 1172 added.add(new_path)
1173 1173 elif not new_path and old_path:
1174 1174 deleted.add(old_path)
1175 1175
1176 1176 return list(added), list(modified), list(deleted)
1177 1177
1178 1178 @reraise_safe_exceptions
1179 1179 def tree_and_type_for_path(self, wire, commit_id, path):
1180 1180
1181 1181 cache_on, context_uid, repo_id = self._cache_on(wire)
1182 1182 region = self._region(wire)
1183 1183
1184 1184 @region.conditional_cache_on_arguments(condition=cache_on)
1185 1185 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1186 1186 repo_init = self._factory.repo_libgit2(wire)
1187 1187
1188 1188 with repo_init as repo:
1189 1189 commit = repo[commit_id]
1190 1190 try:
1191 1191 tree = commit.tree[path]
1192 1192 except KeyError:
1193 1193 return None, None, None
1194 1194
1195 1195 return tree.id.hex, tree.type_str, tree.filemode
1196 1196 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1197 1197
1198 1198 @reraise_safe_exceptions
1199 1199 def tree_items(self, wire, tree_id):
1200 1200 cache_on, context_uid, repo_id = self._cache_on(wire)
1201 1201 region = self._region(wire)
1202 1202
1203 1203 @region.conditional_cache_on_arguments(condition=cache_on)
1204 1204 def _tree_items(_repo_id, _tree_id):
1205 1205
1206 1206 repo_init = self._factory.repo_libgit2(wire)
1207 1207 with repo_init as repo:
1208 1208 try:
1209 1209 tree = repo[tree_id]
1210 1210 except KeyError:
1211 1211 raise ObjectMissing(f'No tree with id: {tree_id}')
1212 1212
1213 1213 result = []
1214 1214 for item in tree:
1215 1215 item_sha = item.hex
1216 1216 item_mode = item.filemode
1217 1217 item_type = item.type_str
1218 1218
1219 1219 if item_type == 'commit':
1220 1220 # NOTE(marcink): submodules we translate to 'link' for backward compat
1221 1221 item_type = 'link'
1222 1222
1223 1223 result.append((item.name, item_mode, item_sha, item_type))
1224 1224 return result
1225 1225 return _tree_items(repo_id, tree_id)
1226 1226
1227 1227 @reraise_safe_exceptions
1228 1228 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1229 1229 """
1230 1230 Old version that uses subprocess to call diff
1231 1231 """
1232 1232
1233 1233 flags = [
1234 1234 f'-U{context}', '--patch',
1235 1235 '--binary',
1236 1236 '--find-renames',
1237 1237 '--no-indent-heuristic',
1238 1238 # '--indent-heuristic',
1239 1239 #'--full-index',
1240 1240 #'--abbrev=40'
1241 1241 ]
1242 1242
1243 1243 if opt_ignorews:
1244 1244 flags.append('--ignore-all-space')
1245 1245
1246 1246 if commit_id_1 == self.EMPTY_COMMIT:
1247 1247 cmd = ['show'] + flags + [commit_id_2]
1248 1248 else:
1249 1249 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1250 1250
1251 1251 if file_filter:
1252 1252 cmd.extend(['--', file_filter])
1253 1253
1254 1254 diff, __ = self.run_git_command(wire, cmd)
1255 1255 # If we used 'show' command, strip first few lines (until actual diff
1256 1256 # starts)
1257 1257 if commit_id_1 == self.EMPTY_COMMIT:
1258 1258 lines = diff.splitlines()
1259 1259 x = 0
1260 1260 for line in lines:
1261 1261 if line.startswith(b'diff'):
1262 1262 break
1263 1263 x += 1
1264 1264 # Append new line just like 'diff' command do
1265 1265 diff = '\n'.join(lines[x:]) + '\n'
1266 1266 return diff
1267 1267
1268 1268 @reraise_safe_exceptions
1269 1269 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1270 1270 repo_init = self._factory.repo_libgit2(wire)
1271 1271
1272 1272 with repo_init as repo:
1273 1273 swap = True
1274 1274 flags = 0
1275 1275 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1276 1276
1277 1277 if opt_ignorews:
1278 1278 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1279 1279
1280 1280 if commit_id_1 == self.EMPTY_COMMIT:
1281 1281 comm1 = repo[commit_id_2]
1282 1282 diff_obj = comm1.tree.diff_to_tree(
1283 1283 flags=flags, context_lines=context, swap=swap)
1284 1284
1285 1285 else:
1286 1286 comm1 = repo[commit_id_2]
1287 1287 comm2 = repo[commit_id_1]
1288 1288 diff_obj = comm1.tree.diff_to_tree(
1289 1289 comm2.tree, flags=flags, context_lines=context, swap=swap)
1290 1290 similar_flags = 0
1291 1291 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1292 1292 diff_obj.find_similar(flags=similar_flags)
1293 1293
1294 1294 if file_filter:
1295 1295 for p in diff_obj:
1296 1296 if p.delta.old_file.path == file_filter:
1297 1297 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1298 1298 # fo matching path == no diff
1299 1299 return BytesEnvelope(b'')
1300 1300
1301 1301 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1302 1302
1303 1303 @reraise_safe_exceptions
1304 1304 def node_history(self, wire, commit_id, path, limit):
1305 1305 cache_on, context_uid, repo_id = self._cache_on(wire)
1306 1306 region = self._region(wire)
1307 1307
1308 1308 @region.conditional_cache_on_arguments(condition=cache_on)
1309 1309 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1310 1310 # optimize for n==1, rev-list is much faster for that use-case
1311 1311 if limit == 1:
1312 1312 cmd = ['rev-list', '-1', commit_id, '--', path]
1313 1313 else:
1314 1314 cmd = ['log']
1315 1315 if limit:
1316 1316 cmd.extend(['-n', str(safe_int(limit, 0))])
1317 1317 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1318 1318
1319 1319 output, __ = self.run_git_command(wire, cmd)
1320 1320 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1321 1321
1322 1322 return [x for x in commit_ids]
1323 1323 return _node_history(context_uid, repo_id, commit_id, path, limit)
1324 1324
1325 1325 @reraise_safe_exceptions
1326 1326 def node_annotate_legacy(self, wire, commit_id, path):
1327 1327 # note: replaced by pygit2 implementation
1328 1328 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1329 1329 # -l ==> outputs long shas (and we need all 40 characters)
1330 1330 # --root ==> doesn't put '^' character for boundaries
1331 1331 # -r commit_id ==> blames for the given commit
1332 1332 output, __ = self.run_git_command(wire, cmd)
1333 1333
1334 1334 result = []
1335 1335 for i, blame_line in enumerate(output.splitlines()[:-1]):
1336 1336 line_no = i + 1
1337 1337 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1338 1338 result.append((line_no, blame_commit_id, line))
1339 1339
1340 1340 return result
1341 1341
1342 1342 @reraise_safe_exceptions
1343 1343 def node_annotate(self, wire, commit_id, path):
1344 1344
1345 1345 result_libgit = []
1346 1346 repo_init = self._factory.repo_libgit2(wire)
1347 1347 with repo_init as repo:
1348 1348 commit = repo[commit_id]
1349 1349 blame_obj = repo.blame(path, newest_commit=commit_id)
1350 for i, line in enumerate(commit.tree[path].data.splitlines()):
1350 file_content = commit.tree[path].data
1351 for i, line in enumerate(splitnewlines(file_content)):
1351 1352 line_no = i + 1
1352 1353 hunk = blame_obj.for_line(line_no)
1353 1354 blame_commit_id = hunk.final_commit_id.hex
1354 1355
1355 1356 result_libgit.append((line_no, blame_commit_id, line))
1356 1357
1357 1358 return BinaryEnvelope(result_libgit)
1358 1359
1359 1360 @reraise_safe_exceptions
1360 1361 def update_server_info(self, wire, force=False):
1361 1362 cmd = ['update-server-info']
1362 1363 if force:
1363 1364 cmd += ['--force']
1364 1365 output, __ = self.run_git_command(wire, cmd)
1365 1366 return output.splitlines()
1366 1367
1367 1368 @reraise_safe_exceptions
1368 1369 def get_all_commit_ids(self, wire):
1369 1370
1370 1371 cache_on, context_uid, repo_id = self._cache_on(wire)
1371 1372 region = self._region(wire)
1372 1373
1373 1374 @region.conditional_cache_on_arguments(condition=cache_on)
1374 1375 def _get_all_commit_ids(_context_uid, _repo_id):
1375 1376
1376 1377 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1377 1378 try:
1378 1379 output, __ = self.run_git_command(wire, cmd)
1379 1380 return output.splitlines()
1380 1381 except Exception:
1381 1382 # Can be raised for empty repositories
1382 1383 return []
1383 1384
1384 1385 @region.conditional_cache_on_arguments(condition=cache_on)
1385 1386 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1386 1387 repo_init = self._factory.repo_libgit2(wire)
1387 1388 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1388 1389 results = []
1389 1390 with repo_init as repo:
1390 1391 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1391 1392 results.append(commit.id.hex)
1392 1393
1393 1394 return _get_all_commit_ids(context_uid, repo_id)
1394 1395
1395 1396 @reraise_safe_exceptions
1396 1397 def run_git_command(self, wire, cmd, **opts):
1397 1398 path = wire.get('path', None)
1398 1399 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1399 1400
1400 1401 if path and os.path.isdir(path):
1401 1402 opts['cwd'] = path
1402 1403
1403 1404 if '_bare' in opts:
1404 1405 _copts = []
1405 1406 del opts['_bare']
1406 1407 else:
1407 1408 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1408 1409 safe_call = False
1409 1410 if '_safe' in opts:
1410 1411 # no exc on failure
1411 1412 del opts['_safe']
1412 1413 safe_call = True
1413 1414
1414 1415 if '_copts' in opts:
1415 1416 _copts.extend(opts['_copts'] or [])
1416 1417 del opts['_copts']
1417 1418
1418 1419 gitenv = os.environ.copy()
1419 1420 gitenv.update(opts.pop('extra_env', {}))
1420 1421 # need to clean fix GIT_DIR !
1421 1422 if 'GIT_DIR' in gitenv:
1422 1423 del gitenv['GIT_DIR']
1423 1424 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1424 1425 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1425 1426
1426 1427 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1427 1428 _opts = {'env': gitenv, 'shell': False}
1428 1429
1429 1430 proc = None
1430 1431 try:
1431 1432 _opts.update(opts)
1432 1433 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1433 1434
1434 1435 return b''.join(proc), b''.join(proc.stderr)
1435 1436 except OSError as err:
1436 1437 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1437 1438 call_opts = {}
1438 1439 if debug_mode:
1439 1440 call_opts = _opts
1440 1441
1441 1442 tb_err = ("Couldn't run git command ({}).\n"
1442 1443 "Original error was:{}\n"
1443 1444 "Call options:{}\n"
1444 1445 .format(cmd, err, call_opts))
1445 1446 log.exception(tb_err)
1446 1447 if safe_call:
1447 1448 return '', err
1448 1449 else:
1449 1450 raise exceptions.VcsException()(tb_err)
1450 1451 finally:
1451 1452 if proc:
1452 1453 proc.close()
1453 1454
1454 1455 @reraise_safe_exceptions
1455 1456 def install_hooks(self, wire, force=False):
1456 1457 from vcsserver.hook_utils import install_git_hooks
1457 1458 bare = self.bare(wire)
1458 1459 path = wire['path']
1459 1460 binary_dir = settings.BINARY_DIR
1460 1461 if binary_dir:
1461 1462 os.path.join(binary_dir, 'python3')
1462 1463 return install_git_hooks(path, bare, force_create=force)
1463 1464
1464 1465 @reraise_safe_exceptions
1465 1466 def get_hooks_info(self, wire):
1466 1467 from vcsserver.hook_utils import (
1467 1468 get_git_pre_hook_version, get_git_post_hook_version)
1468 1469 bare = self.bare(wire)
1469 1470 path = wire['path']
1470 1471 return {
1471 1472 'pre_version': get_git_pre_hook_version(path, bare),
1472 1473 'post_version': get_git_post_hook_version(path, bare),
1473 1474 }
1474 1475
1475 1476 @reraise_safe_exceptions
1476 1477 def set_head_ref(self, wire, head_name):
1477 1478 log.debug('Setting refs/head to `%s`', head_name)
1478 1479 repo_init = self._factory.repo_libgit2(wire)
1479 1480 with repo_init as repo:
1480 1481 repo.set_head(f'refs/heads/{head_name}')
1481 1482
1482 1483 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1483 1484
1484 1485 @reraise_safe_exceptions
1485 1486 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1486 1487 archive_dir_name, commit_id, cache_config):
1487 1488
1488 1489 def file_walker(_commit_id, path):
1489 1490 repo_init = self._factory.repo_libgit2(wire)
1490 1491
1491 1492 with repo_init as repo:
1492 1493 commit = repo[commit_id]
1493 1494
1494 1495 if path in ['', '/']:
1495 1496 tree = commit.tree
1496 1497 else:
1497 1498 tree = commit.tree[path.rstrip('/')]
1498 1499 tree_id = tree.id.hex
1499 1500 try:
1500 1501 tree = repo[tree_id]
1501 1502 except KeyError:
1502 1503 raise ObjectMissing(f'No tree with id: {tree_id}')
1503 1504
1504 1505 index = LibGit2Index.Index()
1505 1506 index.read_tree(tree)
1506 1507 file_iter = index
1507 1508
1508 1509 for file_node in file_iter:
1509 1510 file_path = file_node.path
1510 1511 mode = file_node.mode
1511 1512 is_link = stat.S_ISLNK(mode)
1512 1513 if mode == pygit2.GIT_FILEMODE_COMMIT:
1513 1514 log.debug('Skipping path %s as a commit node', file_path)
1514 1515 continue
1515 1516 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1516 1517
1517 1518 return store_archive_in_cache(
1518 1519 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -1,144 +1,158 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import typing
19 19 import base64
20 20 import logging
21 21
22 22
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 26 def safe_int(val, default=None) -> int:
27 27 """
28 28 Returns int() of val if val is not convertable to int use default
29 29 instead
30 30
31 31 :param val:
32 32 :param default:
33 33 """
34 34
35 35 try:
36 36 val = int(val)
37 37 except (ValueError, TypeError):
38 38 val = default
39 39
40 40 return val
41 41
42 42
43 43 def base64_to_str(text) -> str:
44 44 return safe_str(base64.encodebytes(safe_bytes(text))).strip()
45 45
46 46
47 47 def get_default_encodings() -> list[str]:
48 48 return ['utf8']
49 49
50 50
51 51 def safe_str(str_, to_encoding=None) -> str:
52 52 """
53 53 safe str function. Does few trick to turn unicode_ into string
54 54
55 55 :param str_: str to encode
56 56 :param to_encoding: encode to this type UTF8 default
57 57 """
58 58 if isinstance(str_, str):
59 59 return str_
60 60
61 61 # if it's bytes cast to str
62 62 if not isinstance(str_, bytes):
63 63 return str(str_)
64 64
65 65 to_encoding = to_encoding or get_default_encodings()
66 66 if not isinstance(to_encoding, (list, tuple)):
67 67 to_encoding = [to_encoding]
68 68
69 69 for enc in to_encoding:
70 70 try:
71 71 return str(str_, enc)
72 72 except UnicodeDecodeError:
73 73 pass
74 74
75 75 return str(str_, to_encoding[0], 'replace')
76 76
77 77
78 78 def safe_bytes(str_, from_encoding=None) -> bytes:
79 79 """
80 80 safe bytes function. Does few trick to turn str_ into bytes string:
81 81
82 82 :param str_: string to decode
83 83 :param from_encoding: encode from this type UTF8 default
84 84 """
85 85 if isinstance(str_, bytes):
86 86 return str_
87 87
88 88 if not isinstance(str_, str):
89 89 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
90 90
91 91 from_encoding = from_encoding or get_default_encodings()
92 92 if not isinstance(from_encoding, (list, tuple)):
93 93 from_encoding = [from_encoding]
94 94
95 95 for enc in from_encoding:
96 96 try:
97 97 return str_.encode(enc)
98 98 except UnicodeDecodeError:
99 99 pass
100 100
101 101 return str_.encode(from_encoding[0], 'replace')
102 102
103 103
104 104 def ascii_bytes(str_, allow_bytes=False) -> bytes:
105 105 """
106 106 Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
107 107 Fails with UnicodeError on invalid input.
108 108 This should be used where encoding and "safe" ambiguity should be avoided.
109 109 Where strings already have been encoded in other ways but still are unicode
110 110 string - for example to hex, base64, json, urlencoding, or are known to be
111 111 identifiers.
112 112 """
113 113 if allow_bytes and isinstance(str_, bytes):
114 114 return str_
115 115
116 116 if not isinstance(str_, str):
117 117 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
118 118 return str_.encode('ascii')
119 119
120 120
121 121 def ascii_str(str_) -> str:
122 122 """
123 123 Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
124 124 Fails with UnicodeError on invalid input.
125 125 This should be used where encoding and "safe" ambiguity should be avoided.
126 126 Where strings are encoded but also in other ways are known to be ASCII, and
127 127 where a unicode string is wanted without caring about encoding. For example
128 128 to hex, base64, urlencoding, or are known to be identifiers.
129 129 """
130 130
131 131 if not isinstance(str_, bytes):
132 132 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
133 133 return str_.decode('ascii')
134 134
135 135
136 136 def convert_to_str(data):
137 137 if isinstance(data, bytes):
138 138 return safe_str(data)
139 139 elif isinstance(data, tuple):
140 140 return tuple(convert_to_str(item) for item in data)
141 141 elif isinstance(data, list):
142 142 return list(convert_to_str(item) for item in data)
143 143 else:
144 144 return data
145
146
147 def splitnewlines(text: bytes):
148 """
149 like splitlines, but only split on newlines.
150 """
151
152 lines = [_l + b'\n' for _l in text.split(b'\n')]
153 if lines:
154 if lines[-1] == b'\n':
155 lines.pop()
156 else:
157 lines[-1] = lines[-1][:-1]
158 return lines
General Comments 0
You need to be logged in to leave comments. Login now