##// END OF EJS Templates
git: make use of pygit2 more...
super-admin -
r1109:7aab64a8 python3
parent child Browse files
Show More
@@ -1,1375 +1,1382 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 25 import urllib.request
26 26 import urllib.parse
27 27 import urllib.error
28 28 from functools import wraps
29 29
30 30 import more_itertools
31 31 import pygit2
32 32 from pygit2 import Repository as LibGit2Repo
33 33 from pygit2 import index as LibGit2Index
34 34 from dulwich import index, objects
35 35 from dulwich.client import HttpGitClient, LocalGitClient
36 36 from dulwich.errors import (
37 37 NotGitRepository, ChecksumMismatch, WrongObjectException,
38 38 MissingCommitError, ObjectMissing, HangupException,
39 39 UnexpectedCommandError)
40 40 from dulwich.repo import Repo as DulwichRepo
41 41 from dulwich.server import update_server_info
42 42
43 43 from vcsserver import exceptions, settings, subprocessio
44 44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
46 46 from vcsserver.hgcompat import (
47 47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 48 from vcsserver.git_lfs.lib import LFSOidStore
49 49 from vcsserver.vcs_base import RemoteBase
50 50
51 51 DIR_STAT = stat.S_IFDIR
52 52 FILE_MODE = stat.S_IFMT
53 53 GIT_LINK = objects.S_IFGITLINK
54 54 PEELED_REF_MARKER = b'^{}'
55 55 HEAD_MARKER = b'HEAD'
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def reraise_safe_exceptions(func):
61 61 """Converts Dulwich exceptions to something neutral."""
62 62
63 63 @wraps(func)
64 64 def wrapper(*args, **kwargs):
65 65 try:
66 66 return func(*args, **kwargs)
67 67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 68 exc = exceptions.LookupException(org_exc=e)
69 69 raise exc(safe_str(e))
70 70 except (HangupException, UnexpectedCommandError) as e:
71 71 exc = exceptions.VcsException(org_exc=e)
72 72 raise exc(safe_str(e))
73 73 except Exception:
74 74 # NOTE(marcink): because of how dulwich handles some exceptions
75 75 # (KeyError on empty repos), we cannot track this and catch all
76 76 # exceptions, it's an exceptions from other handlers
77 77 #if not hasattr(e, '_vcs_kind'):
78 78 #log.exception("Unhandled exception in git remote call")
79 79 #raise_from_original(exceptions.UnhandledException)
80 80 raise
81 81 return wrapper
82 82
83 83
84 84 class Repo(DulwichRepo):
85 85 """
86 86 A wrapper for dulwich Repo class.
87 87
88 88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 89 "Too many open files" error. We need to close all opened file descriptors
90 90 once the repo object is destroyed.
91 91 """
92 92 def __del__(self):
93 93 if hasattr(self, 'object_store'):
94 94 self.close()
95 95
96 96
97 97 class Repository(LibGit2Repo):
98 98
99 99 def __enter__(self):
100 100 return self
101 101
102 102 def __exit__(self, exc_type, exc_val, exc_tb):
103 103 self.free()
104 104
105 105
106 106 class GitFactory(RepoFactory):
107 107 repo_type = 'git'
108 108
109 109 def _create_repo(self, wire, create, use_libgit2=False):
110 110 if use_libgit2:
111 111 repo = Repository(safe_bytes(wire['path']))
112 112 else:
113 113 # dulwich mode
114 114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 115 repo = Repo(repo_path)
116 116
117 117 log.debug('repository created: got GIT object: %s', repo)
118 118 return repo
119 119
120 120 def repo(self, wire, create=False, use_libgit2=False):
121 121 """
122 122 Get a repository instance for the given path.
123 123 """
124 124 return self._create_repo(wire, create, use_libgit2)
125 125
126 126 def repo_libgit2(self, wire):
127 127 return self.repo(wire, use_libgit2=True)
128 128
129 129
130 130 class GitRemote(RemoteBase):
131 131
132 132 def __init__(self, factory):
133 133 self._factory = factory
134 134 self._bulk_methods = {
135 135 "date": self.date,
136 136 "author": self.author,
137 137 "branch": self.branch,
138 138 "message": self.message,
139 139 "parents": self.parents,
140 140 "_commit": self.revision,
141 141 }
142 142
143 143 def _wire_to_config(self, wire):
144 144 if 'config' in wire:
145 145 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
146 146 return {}
147 147
148 148 def _remote_conf(self, config):
149 149 params = [
150 150 '-c', 'core.askpass=""',
151 151 ]
152 152 ssl_cert_dir = config.get('vcs_ssl_dir')
153 153 if ssl_cert_dir:
154 154 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
155 155 return params
156 156
157 157 @reraise_safe_exceptions
158 158 def discover_git_version(self):
159 159 stdout, _ = self.run_git_command(
160 160 {}, ['--version'], _bare=True, _safe=True)
161 161 prefix = b'git version'
162 162 if stdout.startswith(prefix):
163 163 stdout = stdout[len(prefix):]
164 164 return safe_str(stdout.strip())
165 165
166 166 @reraise_safe_exceptions
167 167 def is_empty(self, wire):
168 168 repo_init = self._factory.repo_libgit2(wire)
169 169 with repo_init as repo:
170 170
171 171 try:
172 172 has_head = repo.head.name
173 173 if has_head:
174 174 return False
175 175
176 176 # NOTE(marcink): check again using more expensive method
177 177 return repo.is_empty
178 178 except Exception:
179 179 pass
180 180
181 181 return True
182 182
183 183 @reraise_safe_exceptions
184 184 def assert_correct_path(self, wire):
185 185 cache_on, context_uid, repo_id = self._cache_on(wire)
186 186 region = self._region(wire)
187 187
188 188 @region.conditional_cache_on_arguments(condition=cache_on)
189 def _assert_correct_path(_context_uid, _repo_id):
190 try:
191 repo_init = self._factory.repo_libgit2(wire)
192 with repo_init as repo:
193 pass
194 except pygit2.GitError:
195 path = wire.get('path')
196 tb = traceback.format_exc()
197 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
189 def _assert_correct_path(_context_uid, _repo_id, fast_check):
190 if fast_check:
191 path = safe_str(wire['path'])
192 if pygit2.discover_repository(path):
193 return True
198 194 return False
195 else:
196 try:
197 repo_init = self._factory.repo_libgit2(wire)
198 with repo_init:
199 pass
200 except pygit2.GitError:
201 path = wire.get('path')
202 tb = traceback.format_exc()
203 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
204 return False
205 return True
199 206
200 return True
201 return _assert_correct_path(context_uid, repo_id)
207 return _assert_correct_path(context_uid, repo_id, True)
202 208
203 209 @reraise_safe_exceptions
204 210 def bare(self, wire):
205 211 repo_init = self._factory.repo_libgit2(wire)
206 212 with repo_init as repo:
207 213 return repo.is_bare
208 214
209 215 @reraise_safe_exceptions
210 216 def blob_as_pretty_string(self, wire, sha):
211 217 repo_init = self._factory.repo_libgit2(wire)
212 218 with repo_init as repo:
213 219 blob_obj = repo[sha]
214 220 return BinaryEnvelope(blob_obj.data)
215 221
216 222 @reraise_safe_exceptions
217 223 def blob_raw_length(self, wire, sha):
218 224 cache_on, context_uid, repo_id = self._cache_on(wire)
219 225 region = self._region(wire)
220 226
221 227 @region.conditional_cache_on_arguments(condition=cache_on)
222 228 def _blob_raw_length(_repo_id, _sha):
223 229
224 230 repo_init = self._factory.repo_libgit2(wire)
225 231 with repo_init as repo:
226 232 blob = repo[sha]
227 233 return blob.size
228 234
229 235 return _blob_raw_length(repo_id, sha)
230 236
231 237 def _parse_lfs_pointer(self, raw_content):
232 238 spec_string = b'version https://git-lfs.github.com/spec'
233 239 if raw_content and raw_content.startswith(spec_string):
234 240
235 241 pattern = re.compile(rb"""
236 242 (?:\n)?
237 243 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
238 244 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
239 245 ^size[ ](?P<oid_size>[0-9]+)\n
240 246 (?:\n)?
241 247 """, re.VERBOSE | re.MULTILINE)
242 248 match = pattern.match(raw_content)
243 249 if match:
244 250 return match.groupdict()
245 251
246 252 return {}
247 253
248 254 @reraise_safe_exceptions
249 255 def is_large_file(self, wire, commit_id):
250 256 cache_on, context_uid, repo_id = self._cache_on(wire)
251 257 region = self._region(wire)
252 258
253 259 @region.conditional_cache_on_arguments(condition=cache_on)
254 260 def _is_large_file(_repo_id, _sha):
255 261 repo_init = self._factory.repo_libgit2(wire)
256 262 with repo_init as repo:
257 263 blob = repo[commit_id]
258 264 if blob.is_binary:
259 265 return {}
260 266
261 267 return self._parse_lfs_pointer(blob.data)
262 268
263 269 return _is_large_file(repo_id, commit_id)
264 270
265 271 @reraise_safe_exceptions
266 272 def is_binary(self, wire, tree_id):
267 273 cache_on, context_uid, repo_id = self._cache_on(wire)
268 274 region = self._region(wire)
269 275
270 276 @region.conditional_cache_on_arguments(condition=cache_on)
271 277 def _is_binary(_repo_id, _tree_id):
272 278 repo_init = self._factory.repo_libgit2(wire)
273 279 with repo_init as repo:
274 280 blob_obj = repo[tree_id]
275 281 return blob_obj.is_binary
276 282
277 283 return _is_binary(repo_id, tree_id)
278 284
279 285 @reraise_safe_exceptions
280 286 def md5_hash(self, wire, tree_id):
281 287 cache_on, context_uid, repo_id = self._cache_on(wire)
282 288 region = self._region(wire)
283 289
284 290 @region.conditional_cache_on_arguments(condition=cache_on)
285 291 def _md5_hash(_repo_id, _tree_id):
286 292 return ''
287 293
288 294 return _md5_hash(repo_id, tree_id)
289 295
290 296 @reraise_safe_exceptions
291 297 def in_largefiles_store(self, wire, oid):
292 298 conf = self._wire_to_config(wire)
293 299 repo_init = self._factory.repo_libgit2(wire)
294 300 with repo_init as repo:
295 301 repo_name = repo.path
296 302
297 303 store_location = conf.get('vcs_git_lfs_store_location')
298 304 if store_location:
299 305
300 306 store = LFSOidStore(
301 307 oid=oid, repo=repo_name, store_location=store_location)
302 308 return store.has_oid()
303 309
304 310 return False
305 311
306 312 @reraise_safe_exceptions
307 313 def store_path(self, wire, oid):
308 314 conf = self._wire_to_config(wire)
309 315 repo_init = self._factory.repo_libgit2(wire)
310 316 with repo_init as repo:
311 317 repo_name = repo.path
312 318
313 319 store_location = conf.get('vcs_git_lfs_store_location')
314 320 if store_location:
315 321 store = LFSOidStore(
316 322 oid=oid, repo=repo_name, store_location=store_location)
317 323 return store.oid_path
318 324 raise ValueError('Unable to fetch oid with path {}'.format(oid))
319 325
320 326 @reraise_safe_exceptions
321 327 def bulk_request(self, wire, rev, pre_load):
322 328 cache_on, context_uid, repo_id = self._cache_on(wire)
323 329 region = self._region(wire)
324 330
325 331 @region.conditional_cache_on_arguments(condition=cache_on)
326 332 def _bulk_request(_repo_id, _rev, _pre_load):
327 333 result = {}
328 334 for attr in pre_load:
329 335 try:
330 336 method = self._bulk_methods[attr]
331 337 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
332 338 args = [wire, rev]
333 339 result[attr] = method(*args)
334 340 except KeyError as e:
335 341 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
336 342 return result
337 343
338 344 return _bulk_request(repo_id, rev, sorted(pre_load))
339 345
340 346 def _build_opener(self, url):
341 347 handlers = []
342 348 url_obj = url_parser(url)
343 349 _, authinfo = url_obj.authinfo()
344 350
345 351 if authinfo:
346 352 # create a password manager
347 353 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
348 354 passmgr.add_password(*authinfo)
349 355
350 356 handlers.extend((httpbasicauthhandler(passmgr),
351 357 httpdigestauthhandler(passmgr)))
352 358
353 359 return urllib.request.build_opener(*handlers)
354 360
355 361 def _type_id_to_name(self, type_id: int):
356 362 return {
357 363 1: 'commit',
358 364 2: 'tree',
359 365 3: 'blob',
360 366 4: 'tag'
361 367 }[type_id]
362 368
363 369 @reraise_safe_exceptions
364 370 def check_url(self, url, config):
365 371 url_obj = url_parser(safe_bytes(url))
366 372 test_uri, _ = url_obj.authinfo()
367 373 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
368 374 url_obj.query = obfuscate_qs(url_obj.query)
369 375 cleaned_uri = str(url_obj)
370 376 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
371 377
372 378 if not test_uri.endswith('info/refs'):
373 379 test_uri = test_uri.rstrip('/') + '/info/refs'
374 380
375 381 o = self._build_opener(url)
376 382 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
377 383
378 384 q = {"service": 'git-upload-pack'}
379 385 qs = '?%s' % urllib.parse.urlencode(q)
380 386 cu = "%s%s" % (test_uri, qs)
381 387 req = urllib.request.Request(cu, None, {})
382 388
383 389 try:
384 390 log.debug("Trying to open URL %s", cleaned_uri)
385 391 resp = o.open(req)
386 392 if resp.code != 200:
387 393 raise exceptions.URLError()('Return Code is not 200')
388 394 except Exception as e:
389 395 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
390 396 # means it cannot be cloned
391 397 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
392 398
393 399 # now detect if it's proper git repo
394 400 gitdata = resp.read()
395 401 if 'service=git-upload-pack' in gitdata:
396 402 pass
397 403 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
398 404 # old style git can return some other format !
399 405 pass
400 406 else:
401 407 raise exceptions.URLError()(
402 408 "url [%s] does not look like an git" % (cleaned_uri,))
403 409
404 410 return True
405 411
406 412 @reraise_safe_exceptions
407 413 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
408 414 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
409 415 remote_refs = self.pull(wire, url, apply_refs=False)
410 416 repo = self._factory.repo(wire)
411 417 if isinstance(valid_refs, list):
412 418 valid_refs = tuple(valid_refs)
413 419
414 420 for k in remote_refs:
415 421 # only parse heads/tags and skip so called deferred tags
416 422 if k.startswith(valid_refs) and not k.endswith(deferred):
417 423 repo[k] = remote_refs[k]
418 424
419 425 if update_after_clone:
420 426 # we want to checkout HEAD
421 427 repo["HEAD"] = remote_refs["HEAD"]
422 428 index.build_index_from_tree(repo.path, repo.index_path(),
423 429 repo.object_store, repo["HEAD"].tree)
424 430
425 431 @reraise_safe_exceptions
426 432 def branch(self, wire, commit_id):
427 433 cache_on, context_uid, repo_id = self._cache_on(wire)
428 434 region = self._region(wire)
429 435
430 436 @region.conditional_cache_on_arguments(condition=cache_on)
431 437 def _branch(_context_uid, _repo_id, _commit_id):
432 438 regex = re.compile('^refs/heads')
433 439
434 440 def filter_with(ref):
435 441 return regex.match(ref[0]) and ref[1] == _commit_id
436 442
437 443 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
438 444 return [x[0].split('refs/heads/')[-1] for x in branches]
439 445
440 446 return _branch(context_uid, repo_id, commit_id)
441 447
442 448 @reraise_safe_exceptions
443 449 def commit_branches(self, wire, commit_id):
444 450 cache_on, context_uid, repo_id = self._cache_on(wire)
445 451 region = self._region(wire)
446 452
447 453 @region.conditional_cache_on_arguments(condition=cache_on)
448 454 def _commit_branches(_context_uid, _repo_id, _commit_id):
449 455 repo_init = self._factory.repo_libgit2(wire)
450 456 with repo_init as repo:
451 457 branches = [x for x in repo.branches.with_commit(_commit_id)]
452 458 return branches
453 459
454 460 return _commit_branches(context_uid, repo_id, commit_id)
455 461
456 462 @reraise_safe_exceptions
457 463 def add_object(self, wire, content):
458 464 repo_init = self._factory.repo_libgit2(wire)
459 465 with repo_init as repo:
460 466 blob = objects.Blob()
461 467 blob.set_raw_string(content)
462 468 repo.object_store.add_object(blob)
463 469 return blob.id
464 470
465 471 # TODO: this is quite complex, check if that can be simplified
466 472 @reraise_safe_exceptions
467 473 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
468 474 # Defines the root tree
469 475 class _Root(object):
470 476 def __repr__(self):
471 477 return 'ROOT TREE'
472 478 ROOT = _Root()
473 479
474 480 repo = self._factory.repo(wire)
475 481 object_store = repo.object_store
476 482
477 483 # Create tree and populates it with blobs
478 484 if commit_tree:
479 485 commit_tree = safe_bytes(commit_tree)
480 486
481 487 if commit_tree and repo[commit_tree]:
482 488 git_commit = repo[safe_bytes(commit_data['parents'][0])]
483 489 commit_tree = repo[git_commit.tree] # root tree
484 490 else:
485 491 commit_tree = objects.Tree()
486 492
487 493 for node in updated:
488 494 # Compute subdirs if needed
489 495 dirpath, nodename = vcspath.split(node['path'])
490 496 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
491 497 parent = commit_tree
492 498 ancestors = [('', parent)]
493 499
494 500 # Tries to dig for the deepest existing tree
495 501 while dirnames:
496 502 curdir = dirnames.pop(0)
497 503 try:
498 504 dir_id = parent[curdir][1]
499 505 except KeyError:
500 506 # put curdir back into dirnames and stops
501 507 dirnames.insert(0, curdir)
502 508 break
503 509 else:
504 510 # If found, updates parent
505 511 parent = repo[dir_id]
506 512 ancestors.append((curdir, parent))
507 513 # Now parent is deepest existing tree and we need to create
508 514 # subtrees for dirnames (in reverse order)
509 515 # [this only applies for nodes from added]
510 516 new_trees = []
511 517
512 518 blob = objects.Blob.from_string(node['content'])
513 519
514 520 node_path = safe_bytes(node['node_path'])
515 521
516 522 if dirnames:
517 523 # If there are trees which should be created we need to build
518 524 # them now (in reverse order)
519 525 reversed_dirnames = list(reversed(dirnames))
520 526 curtree = objects.Tree()
521 527 curtree[node_path] = node['mode'], blob.id
522 528 new_trees.append(curtree)
523 529 for dirname in reversed_dirnames[:-1]:
524 530 newtree = objects.Tree()
525 531 newtree[dirname] = (DIR_STAT, curtree.id)
526 532 new_trees.append(newtree)
527 533 curtree = newtree
528 534 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
529 535 else:
530 536 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
531 537
532 538 new_trees.append(parent)
533 539 # Update ancestors
534 540 reversed_ancestors = reversed(
535 541 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
536 542 for parent, tree, path in reversed_ancestors:
537 543 parent[path] = (DIR_STAT, tree.id)
538 544 object_store.add_object(tree)
539 545
540 546 object_store.add_object(blob)
541 547 for tree in new_trees:
542 548 object_store.add_object(tree)
543 549
544 550 for node_path in removed:
545 551 paths = node_path.split('/')
546 552 tree = commit_tree # start with top-level
547 553 trees = [{'tree': tree, 'path': ROOT}]
548 554 # Traverse deep into the forest...
549 555 # resolve final tree by iterating the path.
550 556 # e.g a/b/c.txt will get
551 557 # - root as tree then
552 558 # - 'a' as tree,
553 559 # - 'b' as tree,
554 560 # - stop at c as blob.
555 561 for path in paths:
556 562 try:
557 563 obj = repo[tree[path][1]]
558 564 if isinstance(obj, objects.Tree):
559 565 trees.append({'tree': obj, 'path': path})
560 566 tree = obj
561 567 except KeyError:
562 568 break
563 569 #PROBLEM:
564 570 """
565 571 We're not editing same reference tree object
566 572 """
567 573 # Cut down the blob and all rotten trees on the way back...
568 574 for path, tree_data in reversed(list(zip(paths, trees))):
569 575 tree = tree_data['tree']
570 576 tree.__delitem__(path)
571 577 # This operation edits the tree, we need to mark new commit back
572 578
573 579 if len(tree) > 0:
574 580 # This tree still has elements - don't remove it or any
575 581 # of it's parents
576 582 break
577 583
578 584 object_store.add_object(commit_tree)
579 585
580 586 # Create commit
581 587 commit = objects.Commit()
582 588 commit.tree = commit_tree.id
583 589 bytes_keys = [
584 590 'author',
585 591 'committer',
586 592 'message',
587 593 'encoding',
588 594 'parents'
589 595 ]
590 596
591 597 for k, v in commit_data.items():
592 598 if k in bytes_keys:
593 599 if k == 'parents':
594 600 v = [safe_bytes(x) for x in v]
595 601 else:
596 602 v = safe_bytes(v)
597 603 setattr(commit, k, v)
598 604
599 605 object_store.add_object(commit)
600 606
601 607 self.create_branch(wire, branch, safe_str(commit.id))
602 608
603 609 # dulwich set-ref
604 610 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
605 611
606 612 return commit.id
607 613
608 614 @reraise_safe_exceptions
609 615 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
610 616 if url != 'default' and '://' not in url:
611 617 client = LocalGitClient(url)
612 618 else:
613 619 url_obj = url_parser(url)
614 620 o = self._build_opener(url)
615 621 url, _ = url_obj.authinfo()
616 622 client = HttpGitClient(base_url=url, opener=o)
617 623 repo = self._factory.repo(wire)
618 624
619 625 determine_wants = repo.object_store.determine_wants_all
620 626 if refs:
621 627 refs = [ascii_bytes(x) for x in refs]
622 628
623 629 def determine_wants_requested(remote_refs):
624 630 determined = []
625 631 for ref_name, ref_hash in remote_refs.items():
626 632 bytes_ref_name = safe_bytes(ref_name)
627 633
628 634 if bytes_ref_name in refs:
629 635 bytes_ref_hash = safe_bytes(ref_hash)
630 636 determined.append(bytes_ref_hash)
631 637 return determined
632 638
633 639 # swap with our custom requested wants
634 640 determine_wants = determine_wants_requested
635 641
636 642 try:
637 643 remote_refs = client.fetch(
638 644 path=url, target=repo, determine_wants=determine_wants)
639 645
640 646 except NotGitRepository as e:
641 647 log.warning(
642 648 'Trying to fetch from "%s" failed, not a Git repository.', url)
643 649 # Exception can contain unicode which we convert
644 650 raise exceptions.AbortException(e)(repr(e))
645 651
646 652 # mikhail: client.fetch() returns all the remote refs, but fetches only
647 653 # refs filtered by `determine_wants` function. We need to filter result
648 654 # as well
649 655 if refs:
650 656 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
651 657
652 658 if apply_refs:
653 659 # TODO: johbo: Needs proper test coverage with a git repository
654 660 # that contains a tag object, so that we would end up with
655 661 # a peeled ref at this point.
656 662 for k in remote_refs:
657 663 if k.endswith(PEELED_REF_MARKER):
658 664 log.debug("Skipping peeled reference %s", k)
659 665 continue
660 666 repo[k] = remote_refs[k]
661 667
662 668 if refs and not update_after:
663 669 # mikhail: explicitly set the head to the last ref.
664 670 repo[HEAD_MARKER] = remote_refs[refs[-1]]
665 671
666 672 if update_after:
667 673 # we want to check out HEAD
668 674 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
669 675 index.build_index_from_tree(repo.path, repo.index_path(),
670 676 repo.object_store, repo[HEAD_MARKER].tree)
671 677 return remote_refs
672 678
673 679 @reraise_safe_exceptions
674 680 def sync_fetch(self, wire, url, refs=None, all_refs=False):
675 repo = self._factory.repo(wire)
681 self._factory.repo(wire)
676 682 if refs and not isinstance(refs, (list, tuple)):
677 683 refs = [refs]
678 684
679 685 config = self._wire_to_config(wire)
680 686 # get all remote refs we'll use to fetch later
681 687 cmd = ['ls-remote']
682 688 if not all_refs:
683 689 cmd += ['--heads', '--tags']
684 690 cmd += [url]
685 691 output, __ = self.run_git_command(
686 692 wire, cmd, fail_on_stderr=False,
687 693 _copts=self._remote_conf(config),
688 694 extra_env={'GIT_TERMINAL_PROMPT': '0'})
689 695
690 696 remote_refs = collections.OrderedDict()
691 697 fetch_refs = []
692 698
693 699 for ref_line in output.splitlines():
694 700 sha, ref = ref_line.split(b'\t')
695 701 sha = sha.strip()
696 702 if ref in remote_refs:
697 703 # duplicate, skip
698 704 continue
699 705 if ref.endswith(PEELED_REF_MARKER):
700 706 log.debug("Skipping peeled reference %s", ref)
701 707 continue
702 708 # don't sync HEAD
703 709 if ref in [HEAD_MARKER]:
704 710 continue
705 711
706 712 remote_refs[ref] = sha
707 713
708 714 if refs and sha in refs:
709 715 # we filter fetch using our specified refs
710 716 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
711 717 elif not refs:
712 718 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
713 719 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
714 720
715 721 if fetch_refs:
716 722 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
717 723 fetch_refs_chunks = list(chunk)
718 724 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
719 725 self.run_git_command(
720 726 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
721 727 fail_on_stderr=False,
722 728 _copts=self._remote_conf(config),
723 729 extra_env={'GIT_TERMINAL_PROMPT': '0'})
724 730
725 731 return remote_refs
726 732
727 733 @reraise_safe_exceptions
728 734 def sync_push(self, wire, url, refs=None):
729 735 if not self.check_url(url, wire):
730 736 return
731 737 config = self._wire_to_config(wire)
732 738 self._factory.repo(wire)
733 739 self.run_git_command(
734 740 wire, ['push', url, '--mirror'], fail_on_stderr=False,
735 741 _copts=self._remote_conf(config),
736 742 extra_env={'GIT_TERMINAL_PROMPT': '0'})
737 743
738 744 @reraise_safe_exceptions
739 745 def get_remote_refs(self, wire, url):
740 746 repo = Repo(url)
741 747 return repo.get_refs()
742 748
743 749 @reraise_safe_exceptions
744 750 def get_description(self, wire):
745 751 repo = self._factory.repo(wire)
746 752 return repo.get_description()
747 753
748 754 @reraise_safe_exceptions
749 755 def get_missing_revs(self, wire, rev1, rev2, path2):
750 756 repo = self._factory.repo(wire)
751 757 LocalGitClient(thin_packs=False).fetch(path2, repo)
752 758
753 759 wire_remote = wire.copy()
754 760 wire_remote['path'] = path2
755 761 repo_remote = self._factory.repo(wire_remote)
756 762 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
757 763
758 764 revs = [
759 765 x.commit.id
760 766 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
761 767 return revs
762 768
763 769 @reraise_safe_exceptions
764 770 def get_object(self, wire, sha, maybe_unreachable=False):
765 771 cache_on, context_uid, repo_id = self._cache_on(wire)
766 772 region = self._region(wire)
767 773
768 774 @region.conditional_cache_on_arguments(condition=cache_on)
769 775 def _get_object(_context_uid, _repo_id, _sha):
770 776 repo_init = self._factory.repo_libgit2(wire)
771 777 with repo_init as repo:
772 778
773 779 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
774 780 try:
775 781 commit = repo.revparse_single(sha)
776 782 except KeyError:
777 783 # NOTE(marcink): KeyError doesn't give us any meaningful information
778 784 # here, we instead give something more explicit
779 785 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
780 786 raise exceptions.LookupException(e)(missing_commit_err)
781 787 except ValueError as e:
782 788 raise exceptions.LookupException(e)(missing_commit_err)
783 789
784 790 is_tag = False
785 791 if isinstance(commit, pygit2.Tag):
786 792 commit = repo.get(commit.target)
787 793 is_tag = True
788 794
789 795 check_dangling = True
790 796 if is_tag:
791 797 check_dangling = False
792 798
793 799 if check_dangling and maybe_unreachable:
794 800 check_dangling = False
795 801
796 802 # we used a reference and it parsed means we're not having a dangling commit
797 803 if sha != commit.hex:
798 804 check_dangling = False
799 805
800 806 if check_dangling:
801 807 # check for dangling commit
802 808 for branch in repo.branches.with_commit(commit.hex):
803 809 if branch:
804 810 break
805 811 else:
806 812 # NOTE(marcink): Empty error doesn't give us any meaningful information
807 813 # here, we instead give something more explicit
808 814 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
809 815 raise exceptions.LookupException(e)(missing_commit_err)
810 816
811 817 commit_id = commit.hex
812 818 type_id = commit.type
813 819
814 820 return {
815 821 'id': commit_id,
816 822 'type': self._type_id_to_name(type_id),
817 823 'commit_id': commit_id,
818 824 'idx': 0
819 825 }
820 826
821 827 return _get_object(context_uid, repo_id, sha)
822 828
823 829 @reraise_safe_exceptions
824 830 def get_refs(self, wire):
825 831 cache_on, context_uid, repo_id = self._cache_on(wire)
826 832 region = self._region(wire)
827 833
828 834 @region.conditional_cache_on_arguments(condition=cache_on)
829 835 def _get_refs(_context_uid, _repo_id):
830 836
831 837 repo_init = self._factory.repo_libgit2(wire)
832 838 with repo_init as repo:
833 839 regex = re.compile('^refs/(heads|tags)/')
834 840 return {x.name: x.target.hex for x in
835 841 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
836 842
837 843 return _get_refs(context_uid, repo_id)
838 844
839 845 @reraise_safe_exceptions
840 846 def get_branch_pointers(self, wire):
841 847 cache_on, context_uid, repo_id = self._cache_on(wire)
842 848 region = self._region(wire)
843 849
844 850 @region.conditional_cache_on_arguments(condition=cache_on)
845 851 def _get_branch_pointers(_context_uid, _repo_id):
846 852
847 853 repo_init = self._factory.repo_libgit2(wire)
848 854 regex = re.compile('^refs/heads')
849 855 with repo_init as repo:
850 856 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
851 857 return {x.target.hex: x.shorthand for x in branches}
852 858
853 859 return _get_branch_pointers(context_uid, repo_id)
854 860
855 861 @reraise_safe_exceptions
856 862 def head(self, wire, show_exc=True):
857 863 cache_on, context_uid, repo_id = self._cache_on(wire)
858 864 region = self._region(wire)
859 865
860 866 @region.conditional_cache_on_arguments(condition=cache_on)
861 867 def _head(_context_uid, _repo_id, _show_exc):
862 868 repo_init = self._factory.repo_libgit2(wire)
863 869 with repo_init as repo:
864 870 try:
865 871 return repo.head.peel().hex
866 872 except Exception:
867 873 if show_exc:
868 874 raise
869 875 return _head(context_uid, repo_id, show_exc)
870 876
871 877 @reraise_safe_exceptions
872 878 def init(self, wire):
873 879 repo_path = safe_str(wire['path'])
874 880 self.repo = Repo.init(repo_path)
875 881
876 882 @reraise_safe_exceptions
877 883 def init_bare(self, wire):
878 884 repo_path = safe_str(wire['path'])
879 885 self.repo = Repo.init_bare(repo_path)
880 886
881 887 @reraise_safe_exceptions
882 888 def revision(self, wire, rev):
883 889
884 890 cache_on, context_uid, repo_id = self._cache_on(wire)
885 891 region = self._region(wire)
886 892
887 893 @region.conditional_cache_on_arguments(condition=cache_on)
888 894 def _revision(_context_uid, _repo_id, _rev):
889 895 repo_init = self._factory.repo_libgit2(wire)
890 896 with repo_init as repo:
891 897 commit = repo[rev]
892 898 obj_data = {
893 899 'id': commit.id.hex,
894 900 }
895 901 # tree objects itself don't have tree_id attribute
896 902 if hasattr(commit, 'tree_id'):
897 903 obj_data['tree'] = commit.tree_id.hex
898 904
899 905 return obj_data
900 906 return _revision(context_uid, repo_id, rev)
901 907
902 908 @reraise_safe_exceptions
903 909 def date(self, wire, commit_id):
904 910 cache_on, context_uid, repo_id = self._cache_on(wire)
905 911 region = self._region(wire)
906 912
907 913 @region.conditional_cache_on_arguments(condition=cache_on)
908 914 def _date(_repo_id, _commit_id):
909 915 repo_init = self._factory.repo_libgit2(wire)
910 916 with repo_init as repo:
911 917 commit = repo[commit_id]
912 918
913 919 if hasattr(commit, 'commit_time'):
914 920 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
915 921 else:
916 922 commit = commit.get_object()
917 923 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
918 924
919 925 # TODO(marcink): check dulwich difference of offset vs timezone
920 926 return [commit_time, commit_time_offset]
921 927 return _date(repo_id, commit_id)
922 928
923 929 @reraise_safe_exceptions
924 930 def author(self, wire, commit_id):
925 931 cache_on, context_uid, repo_id = self._cache_on(wire)
926 932 region = self._region(wire)
927 933
928 934 @region.conditional_cache_on_arguments(condition=cache_on)
929 935 def _author(_repo_id, _commit_id):
930 936 repo_init = self._factory.repo_libgit2(wire)
931 937 with repo_init as repo:
932 938 commit = repo[commit_id]
933 939
934 940 if hasattr(commit, 'author'):
935 941 author = commit.author
936 942 else:
937 943 author = commit.get_object().author
938 944
939 945 if author.email:
940 946 return "{} <{}>".format(author.name, author.email)
941 947
942 948 try:
943 949 return "{}".format(author.name)
944 950 except Exception:
945 951 return "{}".format(safe_str(author.raw_name))
946 952
947 953 return _author(repo_id, commit_id)
948 954
949 955 @reraise_safe_exceptions
950 956 def message(self, wire, commit_id):
951 957 cache_on, context_uid, repo_id = self._cache_on(wire)
952 958 region = self._region(wire)
953 959
954 960 @region.conditional_cache_on_arguments(condition=cache_on)
955 961 def _message(_repo_id, _commit_id):
956 962 repo_init = self._factory.repo_libgit2(wire)
957 963 with repo_init as repo:
958 964 commit = repo[commit_id]
959 965 return commit.message
960 966 return _message(repo_id, commit_id)
961 967
962 968 @reraise_safe_exceptions
963 969 def parents(self, wire, commit_id):
964 970 cache_on, context_uid, repo_id = self._cache_on(wire)
965 971 region = self._region(wire)
966 972
967 973 @region.conditional_cache_on_arguments(condition=cache_on)
968 974 def _parents(_repo_id, _commit_id):
969 975 repo_init = self._factory.repo_libgit2(wire)
970 976 with repo_init as repo:
971 977 commit = repo[commit_id]
972 978 if hasattr(commit, 'parent_ids'):
973 979 parent_ids = commit.parent_ids
974 980 else:
975 981 parent_ids = commit.get_object().parent_ids
976 982
977 983 return [x.hex for x in parent_ids]
978 984 return _parents(repo_id, commit_id)
979 985
980 986 @reraise_safe_exceptions
981 987 def children(self, wire, commit_id):
982 988 cache_on, context_uid, repo_id = self._cache_on(wire)
983 989 region = self._region(wire)
984 990
985 991 head = self.head(wire)
986 992
987 993 @region.conditional_cache_on_arguments(condition=cache_on)
988 994 def _children(_repo_id, _commit_id):
989 995
990 996 output, __ = self.run_git_command(
991 997 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
992 998
993 999 child_ids = []
994 1000 pat = re.compile(r'^{}'.format(commit_id))
995 1001 for line in output.splitlines():
996 1002 line = safe_str(line)
997 1003 if pat.match(line):
998 1004 found_ids = line.split(' ')[1:]
999 1005 child_ids.extend(found_ids)
1000 1006 break
1001 1007
1002 1008 return child_ids
1003 1009 return _children(repo_id, commit_id)
1004 1010
1005 1011 @reraise_safe_exceptions
1006 1012 def set_refs(self, wire, key, value):
1007 1013 repo_init = self._factory.repo_libgit2(wire)
1008 1014 with repo_init as repo:
1009 1015 repo.references.create(key, value, force=True)
1010 1016
1011 1017 @reraise_safe_exceptions
1012 1018 def create_branch(self, wire, branch_name, commit_id, force=False):
1013 1019 repo_init = self._factory.repo_libgit2(wire)
1014 1020 with repo_init as repo:
1015 1021 commit = repo[commit_id]
1016 1022
1017 1023 if force:
1018 1024 repo.branches.local.create(branch_name, commit, force=force)
1019 1025 elif not repo.branches.get(branch_name):
1020 1026 # create only if that branch isn't existing
1021 1027 repo.branches.local.create(branch_name, commit, force=force)
1022 1028
1023 1029 @reraise_safe_exceptions
1024 1030 def remove_ref(self, wire, key):
1025 1031 repo_init = self._factory.repo_libgit2(wire)
1026 1032 with repo_init as repo:
1027 1033 repo.references.delete(key)
1028 1034
1029 1035 @reraise_safe_exceptions
1030 1036 def tag_remove(self, wire, tag_name):
1031 1037 repo_init = self._factory.repo_libgit2(wire)
1032 1038 with repo_init as repo:
1033 1039 key = 'refs/tags/{}'.format(tag_name)
1034 1040 repo.references.delete(key)
1035 1041
1036 1042 @reraise_safe_exceptions
1037 1043 def tree_changes(self, wire, source_id, target_id):
1038 1044 # TODO(marcink): remove this seems it's only used by tests
1039 1045 repo = self._factory.repo(wire)
1040 1046 source = repo[source_id].tree if source_id else None
1041 1047 target = repo[target_id].tree
1042 1048 result = repo.object_store.tree_changes(source, target)
1043 1049 return list(result)
1044 1050
1045 1051 @reraise_safe_exceptions
1046 1052 def tree_and_type_for_path(self, wire, commit_id, path):
1047 1053
1048 1054 cache_on, context_uid, repo_id = self._cache_on(wire)
1049 1055 region = self._region(wire)
1050 1056
1051 1057 @region.conditional_cache_on_arguments(condition=cache_on)
1052 1058 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1053 1059 repo_init = self._factory.repo_libgit2(wire)
1054 1060
1055 1061 with repo_init as repo:
1056 1062 commit = repo[commit_id]
1057 1063 try:
1058 1064 tree = commit.tree[path]
1059 1065 except KeyError:
1060 1066 return None, None, None
1061 1067
1062 1068 return tree.id.hex, tree.type_str, tree.filemode
1063 1069 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1064 1070
1065 1071 @reraise_safe_exceptions
1066 1072 def tree_items(self, wire, tree_id):
1067 1073 cache_on, context_uid, repo_id = self._cache_on(wire)
1068 1074 region = self._region(wire)
1069 1075
1070 1076 @region.conditional_cache_on_arguments(condition=cache_on)
1071 1077 def _tree_items(_repo_id, _tree_id):
1072 1078
1073 1079 repo_init = self._factory.repo_libgit2(wire)
1074 1080 with repo_init as repo:
1075 1081 try:
1076 1082 tree = repo[tree_id]
1077 1083 except KeyError:
1078 1084 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1079 1085
1080 1086 result = []
1081 1087 for item in tree:
1082 1088 item_sha = item.hex
1083 1089 item_mode = item.filemode
1084 1090 item_type = item.type_str
1085 1091
1086 1092 if item_type == 'commit':
1087 1093 # NOTE(marcink): submodules we translate to 'link' for backward compat
1088 1094 item_type = 'link'
1089 1095
1090 1096 result.append((item.name, item_mode, item_sha, item_type))
1091 1097 return result
1092 1098 return _tree_items(repo_id, tree_id)
1093 1099
1094 1100 @reraise_safe_exceptions
1095 1101 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1096 1102 """
1097 1103 Old version that uses subprocess to call diff
1098 1104 """
1099 1105
1100 1106 flags = [
1101 1107 '-U%s' % context, '--patch',
1102 1108 '--binary',
1103 1109 '--find-renames',
1104 1110 '--no-indent-heuristic',
1105 1111 # '--indent-heuristic',
1106 1112 #'--full-index',
1107 1113 #'--abbrev=40'
1108 1114 ]
1109 1115
1110 1116 if opt_ignorews:
1111 1117 flags.append('--ignore-all-space')
1112 1118
1113 1119 if commit_id_1 == self.EMPTY_COMMIT:
1114 1120 cmd = ['show'] + flags + [commit_id_2]
1115 1121 else:
1116 1122 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1117 1123
1118 1124 if file_filter:
1119 1125 cmd.extend(['--', file_filter])
1120 1126
1121 1127 diff, __ = self.run_git_command(wire, cmd)
1122 1128 # If we used 'show' command, strip first few lines (until actual diff
1123 1129 # starts)
1124 1130 if commit_id_1 == self.EMPTY_COMMIT:
1125 1131 lines = diff.splitlines()
1126 1132 x = 0
1127 1133 for line in lines:
1128 1134 if line.startswith(b'diff'):
1129 1135 break
1130 1136 x += 1
1131 1137 # Append new line just like 'diff' command do
1132 1138 diff = '\n'.join(lines[x:]) + '\n'
1133 1139 return diff
1134 1140
1135 1141 @reraise_safe_exceptions
1136 1142 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1137 1143 repo_init = self._factory.repo_libgit2(wire)
1138 1144
1139 1145 with repo_init as repo:
1140 1146 swap = True
1141 1147 flags = 0
1142 1148 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1143 1149
1144 1150 if opt_ignorews:
1145 1151 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1146 1152
1147 1153 if commit_id_1 == self.EMPTY_COMMIT:
1148 1154 comm1 = repo[commit_id_2]
1149 1155 diff_obj = comm1.tree.diff_to_tree(
1150 1156 flags=flags, context_lines=context, swap=swap)
1151 1157
1152 1158 else:
1153 1159 comm1 = repo[commit_id_2]
1154 1160 comm2 = repo[commit_id_1]
1155 1161 diff_obj = comm1.tree.diff_to_tree(
1156 1162 comm2.tree, flags=flags, context_lines=context, swap=swap)
1157 1163 similar_flags = 0
1158 1164 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1159 1165 diff_obj.find_similar(flags=similar_flags)
1160 1166
1161 1167 if file_filter:
1162 1168 for p in diff_obj:
1163 1169 if p.delta.old_file.path == file_filter:
1164 1170 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1165 1171 # fo matching path == no diff
1166 1172 return BinaryEnvelope(b'')
1167 1173 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1168 1174
1169 1175 @reraise_safe_exceptions
1170 1176 def node_history(self, wire, commit_id, path, limit):
1171 1177 cache_on, context_uid, repo_id = self._cache_on(wire)
1172 1178 region = self._region(wire)
1173 1179
1174 1180 @region.conditional_cache_on_arguments(condition=cache_on)
1175 1181 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1176 1182 # optimize for n==1, rev-list is much faster for that use-case
1177 1183 if limit == 1:
1178 1184 cmd = ['rev-list', '-1', commit_id, '--', path]
1179 1185 else:
1180 1186 cmd = ['log']
1181 1187 if limit:
1182 1188 cmd.extend(['-n', str(safe_int(limit, 0))])
1183 1189 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1184 1190
1185 1191 output, __ = self.run_git_command(wire, cmd)
1186 1192 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1187 1193
1188 1194 return [x for x in commit_ids]
1189 1195 return _node_history(context_uid, repo_id, commit_id, path, limit)
1190 1196
1191 1197 @reraise_safe_exceptions
1192 1198 def node_annotate_legacy(self, wire, commit_id, path):
1193 1199 # note: replaced by pygit2 implementation
1194 1200 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1195 1201 # -l ==> outputs long shas (and we need all 40 characters)
1196 1202 # --root ==> doesn't put '^' character for boundaries
1197 1203 # -r commit_id ==> blames for the given commit
1198 1204 output, __ = self.run_git_command(wire, cmd)
1199 1205
1200 1206 result = []
1201 1207 for i, blame_line in enumerate(output.splitlines()[:-1]):
1202 1208 line_no = i + 1
1203 1209 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1204 1210 result.append((line_no, blame_commit_id, line))
1205 1211
1206 1212 return result
1207 1213
1208 1214 @reraise_safe_exceptions
1209 1215 def node_annotate(self, wire, commit_id, path):
1210 1216
1211 1217 result_libgit = []
1212 1218 repo_init = self._factory.repo_libgit2(wire)
1213 1219 with repo_init as repo:
1214 1220 commit = repo[commit_id]
1215 1221 blame_obj = repo.blame(path, newest_commit=commit_id)
1216 1222 for i, line in enumerate(commit.tree[path].data.splitlines()):
1217 1223 line_no = i + 1
1218 1224 hunk = blame_obj.for_line(line_no)
1219 1225 blame_commit_id = hunk.final_commit_id.hex
1220 1226
1221 1227 result_libgit.append((line_no, blame_commit_id, line))
1222 1228
1223 1229 return result_libgit
1224 1230
1225 1231 @reraise_safe_exceptions
1226 1232 def update_server_info(self, wire):
1227 1233 repo = self._factory.repo(wire)
1228 1234 update_server_info(repo)
1229 1235
1230 1236 @reraise_safe_exceptions
1231 1237 def get_all_commit_ids(self, wire):
1232 1238
1233 1239 cache_on, context_uid, repo_id = self._cache_on(wire)
1234 1240 region = self._region(wire)
1235 1241
1236 1242 @region.conditional_cache_on_arguments(condition=cache_on)
1237 1243 def _get_all_commit_ids(_context_uid, _repo_id):
1238 1244
1239 1245 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1240 1246 try:
1241 1247 output, __ = self.run_git_command(wire, cmd)
1242 1248 return output.splitlines()
1243 1249 except Exception:
1244 1250 # Can be raised for empty repositories
1245 1251 return []
1246 1252
1247 1253 @region.conditional_cache_on_arguments(condition=cache_on)
1248 1254 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1249 1255 repo_init = self._factory.repo_libgit2(wire)
1250 1256 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1251 1257 results = []
1252 1258 with repo_init as repo:
1253 1259 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1254 1260 results.append(commit.id.hex)
1255 1261
1256 1262 return _get_all_commit_ids(context_uid, repo_id)
1257 1263
1258 1264 @reraise_safe_exceptions
1259 1265 def run_git_command(self, wire, cmd, **opts):
1260 1266 path = wire.get('path', None)
1261 1267
1262 1268 if path and os.path.isdir(path):
1263 1269 opts['cwd'] = path
1264 1270
1265 1271 if '_bare' in opts:
1266 1272 _copts = []
1267 1273 del opts['_bare']
1268 1274 else:
1269 _copts = ['-c', 'core.quotepath=false', ]
1275 _copts = ['-c', 'core.quotepath=false',]
1270 1276 safe_call = False
1271 1277 if '_safe' in opts:
1272 1278 # no exc on failure
1273 1279 del opts['_safe']
1274 1280 safe_call = True
1275 1281
1276 1282 if '_copts' in opts:
1277 1283 _copts.extend(opts['_copts'] or [])
1278 1284 del opts['_copts']
1279 1285
1280 1286 gitenv = os.environ.copy()
1281 1287 gitenv.update(opts.pop('extra_env', {}))
1282 1288 # need to clean fix GIT_DIR !
1283 1289 if 'GIT_DIR' in gitenv:
1284 1290 del gitenv['GIT_DIR']
1285 1291 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1286 1292 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1287 1293
1288 1294 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1289 1295 _opts = {'env': gitenv, 'shell': False}
1290 1296
1291 1297 proc = None
1292 1298 try:
1293 1299 _opts.update(opts)
1294 1300 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1295 1301
1296 1302 return b''.join(proc), b''.join(proc.stderr)
1297 1303 except OSError as err:
1298 1304 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1299 1305 tb_err = ("Couldn't run git command (%s).\n"
1300 1306 "Original error was:%s\n"
1301 1307 "Call options:%s\n"
1302 1308 % (cmd, err, _opts))
1303 1309 log.exception(tb_err)
1304 1310 if safe_call:
1305 1311 return '', err
1306 1312 else:
1307 1313 raise exceptions.VcsException()(tb_err)
1308 1314 finally:
1309 1315 if proc:
1310 1316 proc.close()
1311 1317
1312 1318 @reraise_safe_exceptions
1313 1319 def install_hooks(self, wire, force=False):
1314 1320 from vcsserver.hook_utils import install_git_hooks
1315 1321 bare = self.bare(wire)
1316 1322 path = wire['path']
1317 1323 binary_dir = settings.BINARY_DIR
1318 executable = None
1319 1324 if binary_dir:
1320 executable = os.path.join(binary_dir, 'python3')
1325 os.path.join(binary_dir, 'python3')
1321 1326 return install_git_hooks(path, bare, force_create=force)
1322 1327
1323 1328 @reraise_safe_exceptions
1324 1329 def get_hooks_info(self, wire):
1325 1330 from vcsserver.hook_utils import (
1326 1331 get_git_pre_hook_version, get_git_post_hook_version)
1327 1332 bare = self.bare(wire)
1328 1333 path = wire['path']
1329 1334 return {
1330 1335 'pre_version': get_git_pre_hook_version(path, bare),
1331 1336 'post_version': get_git_post_hook_version(path, bare),
1332 1337 }
1333 1338
1334 1339 @reraise_safe_exceptions
1335 1340 def set_head_ref(self, wire, head_name):
1336 1341 log.debug('Setting refs/head to `%s`', head_name)
1337 cmd = ['symbolic-ref', '"HEAD"', '"refs/heads/%s"' % head_name]
1338 output, __ = self.run_git_command(wire, cmd)
1339 return [head_name] + output.splitlines()
1342 repo_init = self._factory.repo_libgit2(wire)
1343 with repo_init as repo:
1344 repo.set_head(f'refs/heads/{head_name}')
1345
1346 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1340 1347
1341 1348 @reraise_safe_exceptions
1342 1349 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1343 1350 archive_dir_name, commit_id):
1344 1351
1345 1352 def file_walker(_commit_id, path):
1346 1353 repo_init = self._factory.repo_libgit2(wire)
1347 1354
1348 1355 with repo_init as repo:
1349 1356 commit = repo[commit_id]
1350 1357
1351 1358 if path in ['', '/']:
1352 1359 tree = commit.tree
1353 1360 else:
1354 1361 tree = commit.tree[path.rstrip('/')]
1355 1362 tree_id = tree.id.hex
1356 1363 try:
1357 1364 tree = repo[tree_id]
1358 1365 except KeyError:
1359 1366 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1360 1367
1361 1368 index = LibGit2Index.Index()
1362 1369 index.read_tree(tree)
1363 1370 file_iter = index
1364 1371
1365 for fn in file_iter:
1366 file_path = fn.path
1367 mode = fn.mode
1372 for file_node in file_iter:
1373 file_path = file_node.path
1374 mode = file_node.mode
1368 1375 is_link = stat.S_ISLNK(mode)
1369 1376 if mode == pygit2.GIT_FILEMODE_COMMIT:
1370 1377 log.debug('Skipping path %s as a commit node', file_path)
1371 1378 continue
1372 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1379 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1373 1380
1374 1381 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1375 1382 archive_dir_name, commit_id)
@@ -1,287 +1,295 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import more_itertools
20 20
21 21 import dulwich.protocol
22 22 import mock
23 23 import pytest
24 24 import webob
25 25 import webtest
26 26
27 27 from vcsserver import hooks, pygrack
28 28
29 29 from vcsserver.str_utils import ascii_bytes
30 30
31 31
32 32 @pytest.fixture()
33 33 def pygrack_instance(tmpdir):
34 34 """
35 35 Creates a pygrack app instance.
36 36
37 37 Right now, it does not much helpful regarding the passed directory.
38 38 It just contains the required folders to pass the signature test.
39 39 """
40 40 for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
41 41 tmpdir.mkdir(dir_name)
42 42
43 43 return pygrack.GitRepository('repo_name', str(tmpdir), 'git', False, {})
44 44
45 45
46 46 @pytest.fixture()
47 47 def pygrack_app(pygrack_instance):
48 48 """
49 49 Creates a pygrack app wrapped in webtest.TestApp.
50 50 """
51 51 return webtest.TestApp(pygrack_instance)
52 52
53 53
54 54 def test_invalid_service_info_refs_returns_403(pygrack_app):
55 55 response = pygrack_app.get('/info/refs?service=git-upload-packs',
56 56 expect_errors=True)
57 57
58 58 assert response.status_int == 403
59 59
60 60
61 61 def test_invalid_endpoint_returns_403(pygrack_app):
62 62 response = pygrack_app.post('/git-upload-packs', expect_errors=True)
63 63
64 64 assert response.status_int == 403
65 65
66 66
67 67 @pytest.mark.parametrize('sideband', [
68 68 'side-band-64k',
69 69 'side-band',
70 70 'side-band no-progress',
71 71 ])
72 72 def test_pre_pull_hook_fails_with_sideband(pygrack_app, sideband):
73 73 request = ''.join([
74 74 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ',
75 75 'multi_ack %s ofs-delta\n' % sideband,
76 76 '0000',
77 77 '0009done\n',
78 78 ])
79 79 with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')):
80 80 response = pygrack_app.post(
81 81 '/git-upload-pack', params=request,
82 82 content_type='application/x-git-upload-pack')
83 83
84 84 data = io.BytesIO(response.body)
85 85 proto = dulwich.protocol.Protocol(data.read, None)
86 86 packets = list(proto.read_pkt_seq())
87 87
88 88 expected_packets = [
89 89 b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n',
90 90 b'\x01' + pygrack.GitRepository.EMPTY_PACK,
91 91 ]
92 92 assert packets == expected_packets
93 93
94 94
95 95 def test_pre_pull_hook_fails_no_sideband(pygrack_app):
96 96 request = ''.join([
97 97 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
98 98 'multi_ack ofs-delta\n'
99 99 '0000',
100 100 '0009done\n',
101 101 ])
102 102 with mock.patch('vcsserver.hooks.git_pre_pull',
103 103 return_value=hooks.HookResponse(1, 'foo')):
104 104 response = pygrack_app.post(
105 105 '/git-upload-pack', params=request,
106 106 content_type='application/x-git-upload-pack')
107 107
108 108 assert response.body == pygrack.GitRepository.EMPTY_PACK
109 109
110 110
111 111 def test_pull_has_hook_messages(pygrack_app):
112 112 request = ''.join([
113 113 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
114 114 'multi_ack side-band-64k ofs-delta\n'
115 115 '0000',
116 116 '0009done\n',
117 117 ])
118
119 pre_pull = 'pre_pull_output'
120 post_pull = 'post_pull_output'
121
118 122 with mock.patch('vcsserver.hooks.git_pre_pull',
119 return_value=hooks.HookResponse(0, 'foo')):
123 return_value=hooks.HookResponse(0, pre_pull)):
120 124 with mock.patch('vcsserver.hooks.git_post_pull',
121 return_value=hooks.HookResponse(1, 'bar')):
125 return_value=hooks.HookResponse(1, post_pull)):
122 126 with mock.patch('vcsserver.subprocessio.SubprocessIOChunker',
123 127 return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])):
124 128 response = pygrack_app.post(
125 129 '/git-upload-pack', params=request,
126 130 content_type='application/x-git-upload-pack')
127 131
128 132 data = io.BytesIO(response.body)
129 133 proto = dulwich.protocol.Protocol(data.read, None)
130 134 packets = list(proto.read_pkt_seq())
131 135
132 assert packets == [b'NAK\n', b'\x02foo', b'subp\n', b'\x02bar']
136 assert packets == [b'NAK\n',
137 # pre-pull only outputs if IT FAILS as in != 0 ret code
138 #b'\x02pre_pull_output',
139 b'subp\n',
140 b'\x02post_pull_output']
133 141
134 142
135 143 def test_get_want_capabilities(pygrack_instance):
136 144 data = io.BytesIO(
137 145 b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
138 146 b'multi_ack side-band-64k ofs-delta\n00000009done\n')
139 147
140 148 request = webob.Request({
141 149 'wsgi.input': data,
142 150 'REQUEST_METHOD': 'POST',
143 151 'webob.is_body_seekable': True
144 152 })
145 153
146 154 capabilities = pygrack_instance._get_want_capabilities(request)
147 155
148 156 assert capabilities == frozenset(
149 157 (b'ofs-delta', b'multi_ack', b'side-band-64k'))
150 158 assert data.tell() == 0
151 159
152 160
153 161 @pytest.mark.parametrize('data,capabilities,expected', [
154 162 ('foo', [], []),
155 163 ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []),
156 164 ('', [pygrack.CAPABILITY_SIDE_BAND], []),
157 165 ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']),
158 166 ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']),
159 167 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]),
160 168 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']),
161 169 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']),
162 170 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]),
163 171 ], ids=[
164 172 'foo-empty',
165 173 'empty-64k', 'empty',
166 174 'foo-64k', 'foo',
167 175 'f-1000-64k', 'f-1000',
168 176 'f-65520-64k', 'f-65520'])
169 177 def test_get_messages(pygrack_instance, data, capabilities, expected):
170 178 messages = pygrack_instance._get_messages(data, capabilities)
171 179
172 180 assert messages == expected
173 181
174 182
175 183 @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [
176 184 # Unexpected response
177 185 ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'),
178 186 # No sideband
179 187 ([b'no-sideband'], [], 'foo', 'bar'),
180 188 # No messages
181 189 ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''),
182 190 ])
183 191 def test_inject_messages_to_response_nothing_to_do(
184 192 pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages):
185 193
186 194 new_response = pygrack_instance._build_post_pull_response(
187 195 more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages)
188 196
189 197 assert list(new_response) == response
190 198
191 199
192 200 @pytest.mark.parametrize('capabilities', [
193 201 [pygrack.CAPABILITY_SIDE_BAND],
194 202 [pygrack.CAPABILITY_SIDE_BAND_64K],
195 203 ])
196 204 def test_inject_messages_to_response_single_element(pygrack_instance, capabilities):
197 205 response = [b'0008NAK\n0009subp\n0000']
198 206 new_response = pygrack_instance._build_post_pull_response(
199 207 more_itertools.always_iterable(response), capabilities, 'foo', 'bar')
200 208
201 209 expected_response = b''.join([
202 210 b'0008NAK\n',
203 211 b'0008\x02foo',
204 212 b'0009subp\n',
205 213 b'0008\x02bar',
206 214 b'0000'])
207 215
208 216 assert b''.join(new_response) == expected_response
209 217
210 218
211 219 @pytest.mark.parametrize('capabilities', [
212 220 [pygrack.CAPABILITY_SIDE_BAND],
213 221 [pygrack.CAPABILITY_SIDE_BAND_64K],
214 222 ])
215 223 def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities):
216 224 response = more_itertools.always_iterable([
217 225 b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000'
218 226 ])
219 227 new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar')
220 228
221 229 expected_response = b''.join([
222 230 b'0008NAK\n',
223 231 b'0008\x02foo',
224 232 b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n',
225 233 b'0008\x02bar',
226 234 b'0000'
227 235 ])
228 236
229 237 assert b''.join(new_response) == expected_response
230 238
231 239
232 240 def test_build_failed_pre_pull_response_no_sideband(pygrack_instance):
233 241 response = pygrack_instance._build_failed_pre_pull_response([], 'foo')
234 242
235 243 assert response == [pygrack.GitRepository.EMPTY_PACK]
236 244
237 245
238 246 @pytest.mark.parametrize('capabilities', [
239 247 [pygrack.CAPABILITY_SIDE_BAND],
240 248 [pygrack.CAPABILITY_SIDE_BAND_64K],
241 249 [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'],
242 250 ])
243 251 def test_build_failed_pre_pull_response(pygrack_instance, capabilities):
244 252 response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo')
245 253
246 254 expected_response = [
247 255 b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n',
248 256 b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK),
249 257 pygrack.GitRepository.FLUSH_PACKET,
250 258 ]
251 259
252 260 assert response == expected_response
253 261
254 262
255 263 def test_inject_messages_to_response_generator(pygrack_instance):
256 264
257 265 def response_generator():
258 266 response = [
259 267 # protocol start
260 268 b'0008NAK\n',
261 269 ]
262 270 response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)]
263 271 response += [
264 272 # protocol end
265 273 pygrack.GitRepository.FLUSH_PACKET
266 274 ]
267 275 for elem in response:
268 276 yield elem
269 277
270 278 new_response = pygrack_instance._build_post_pull_response(
271 279 response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n')
272 280
273 281 assert iter(new_response)
274 282
275 283 expected_response = b''.join([
276 284 # start
277 285 b'0008NAK\n0012\x02PRE_PULL_MSG\n',
278 286 ] + [
279 287 # ... rest
280 288 ascii_bytes(f'000asubp{x}\n') for x in range(1000)
281 289 ] + [
282 290 # final message,
283 291 b'0013\x02POST_PULL_MSG\n0000',
284 292
285 293 ])
286 294
287 295 assert b''.join(new_response) == expected_response
General Comments 0
You need to be logged in to leave comments. Login now