##// END OF EJS Templates
git: simplify dangling commit checking to only full SHAs, otherwise we do too many checks in case of references which is slow on large repos
marcink -
r767:406ae170 default
parent child Browse files
Show More
@@ -1,1150 +1,1160 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2019 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 25 import urllib
26 26 import urllib2
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from dulwich import index, objects
33 33 from dulwich.client import HttpGitClient, LocalGitClient
34 34 from dulwich.errors import (
35 35 NotGitRepository, ChecksumMismatch, WrongObjectException,
36 36 MissingCommitError, ObjectMissing, HangupException,
37 37 UnexpectedCommandError)
38 38 from dulwich.repo import Repo as DulwichRepo
39 39 from dulwich.server import update_server_info
40 40
41 41 from vcsserver import exceptions, settings, subprocessio
42 42 from vcsserver.utils import safe_str, safe_int
43 43 from vcsserver.base import RepoFactory, obfuscate_qs
44 44 from vcsserver.hgcompat import (
45 45 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 46 from vcsserver.git_lfs.lib import LFSOidStore
47 47 from vcsserver.vcs_base import RemoteBase
48 48
49 49 DIR_STAT = stat.S_IFDIR
50 50 FILE_MODE = stat.S_IFMT
51 51 GIT_LINK = objects.S_IFGITLINK
52 52 PEELED_REF_MARKER = '^{}'
53 53
54 54
55 55 log = logging.getLogger(__name__)
56 56
57 57
58 58 def str_to_dulwich(value):
59 59 """
60 60 Dulwich 0.10.1a requires `unicode` objects to be passed in.
61 61 """
62 62 return value.decode(settings.WIRE_ENCODING)
63 63
64 64
65 65 def reraise_safe_exceptions(func):
66 66 """Converts Dulwich exceptions to something neutral."""
67 67
68 68 @wraps(func)
69 69 def wrapper(*args, **kwargs):
70 70 try:
71 71 return func(*args, **kwargs)
72 72 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
73 73 exc = exceptions.LookupException(org_exc=e)
74 74 raise exc(safe_str(e))
75 75 except (HangupException, UnexpectedCommandError) as e:
76 76 exc = exceptions.VcsException(org_exc=e)
77 77 raise exc(safe_str(e))
78 78 except Exception as e:
79 79 # NOTE(marcink): becuase of how dulwich handles some exceptions
80 80 # (KeyError on empty repos), we cannot track this and catch all
81 81 # exceptions, it's an exceptions from other handlers
82 82 #if not hasattr(e, '_vcs_kind'):
83 83 #log.exception("Unhandled exception in git remote call")
84 84 #raise_from_original(exceptions.UnhandledException)
85 85 raise
86 86 return wrapper
87 87
88 88
89 89 class Repo(DulwichRepo):
90 90 """
91 91 A wrapper for dulwich Repo class.
92 92
93 93 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
94 94 "Too many open files" error. We need to close all opened file descriptors
95 95 once the repo object is destroyed.
96 96 """
97 97 def __del__(self):
98 98 if hasattr(self, 'object_store'):
99 99 self.close()
100 100
101 101
102 102 class Repository(LibGit2Repo):
103 103
104 104 def __enter__(self):
105 105 return self
106 106
107 107 def __exit__(self, exc_type, exc_val, exc_tb):
108 108 self.free()
109 109
110 110
111 111 class GitFactory(RepoFactory):
112 112 repo_type = 'git'
113 113
114 114 def _create_repo(self, wire, create, use_libgit2=False):
115 115 if use_libgit2:
116 116 return Repository(wire['path'])
117 117 else:
118 118 repo_path = str_to_dulwich(wire['path'])
119 119 return Repo(repo_path)
120 120
121 121 def repo(self, wire, create=False, use_libgit2=False):
122 122 """
123 123 Get a repository instance for the given path.
124 124 """
125 125 return self._create_repo(wire, create, use_libgit2)
126 126
127 127 def repo_libgit2(self, wire):
128 128 return self.repo(wire, use_libgit2=True)
129 129
130 130
131 131 class GitRemote(RemoteBase):
132 132
133 133 def __init__(self, factory):
134 134 self._factory = factory
135 135 self._bulk_methods = {
136 136 "date": self.date,
137 137 "author": self.author,
138 138 "branch": self.branch,
139 139 "message": self.message,
140 140 "parents": self.parents,
141 141 "_commit": self.revision,
142 142 }
143 143
144 144 def _wire_to_config(self, wire):
145 145 if 'config' in wire:
146 146 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
147 147 return {}
148 148
149 149 def _remote_conf(self, config):
150 150 params = [
151 151 '-c', 'core.askpass=""',
152 152 ]
153 153 ssl_cert_dir = config.get('vcs_ssl_dir')
154 154 if ssl_cert_dir:
155 155 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
156 156 return params
157 157
158 158 @reraise_safe_exceptions
159 159 def discover_git_version(self):
160 160 stdout, _ = self.run_git_command(
161 161 {}, ['--version'], _bare=True, _safe=True)
162 162 prefix = 'git version'
163 163 if stdout.startswith(prefix):
164 164 stdout = stdout[len(prefix):]
165 165 return stdout.strip()
166 166
167 167 @reraise_safe_exceptions
168 168 def is_empty(self, wire):
169 169 repo_init = self._factory.repo_libgit2(wire)
170 170 with repo_init as repo:
171 171
172 172 try:
173 173 has_head = repo.head.name
174 174 if has_head:
175 175 return False
176 176
177 177 # NOTE(marcink): check again using more expensive method
178 178 return repo.is_empty
179 179 except Exception:
180 180 pass
181 181
182 182 return True
183 183
184 184 @reraise_safe_exceptions
185 185 def assert_correct_path(self, wire):
186 186 cache_on, context_uid, repo_id = self._cache_on(wire)
187 187 @self.region.conditional_cache_on_arguments(condition=cache_on)
188 188 def _assert_correct_path(_context_uid, _repo_id):
189 189 try:
190 190 repo_init = self._factory.repo_libgit2(wire)
191 191 with repo_init as repo:
192 192 pass
193 193 except pygit2.GitError:
194 194 path = wire.get('path')
195 195 tb = traceback.format_exc()
196 196 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
197 197 return False
198 198
199 199 return True
200 200 return _assert_correct_path(context_uid, repo_id)
201 201
202 202 @reraise_safe_exceptions
203 203 def bare(self, wire):
204 204 repo_init = self._factory.repo_libgit2(wire)
205 205 with repo_init as repo:
206 206 return repo.is_bare
207 207
208 208 @reraise_safe_exceptions
209 209 def blob_as_pretty_string(self, wire, sha):
210 210 repo_init = self._factory.repo_libgit2(wire)
211 211 with repo_init as repo:
212 212 blob_obj = repo[sha]
213 213 blob = blob_obj.data
214 214 return blob
215 215
216 216 @reraise_safe_exceptions
217 217 def blob_raw_length(self, wire, sha):
218 218 cache_on, context_uid, repo_id = self._cache_on(wire)
219 219 @self.region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _blob_raw_length(_repo_id, _sha):
221 221
222 222 repo_init = self._factory.repo_libgit2(wire)
223 223 with repo_init as repo:
224 224 blob = repo[sha]
225 225 return blob.size
226 226
227 227 return _blob_raw_length(repo_id, sha)
228 228
229 229 def _parse_lfs_pointer(self, raw_content):
230 230
231 231 spec_string = 'version https://git-lfs.github.com/spec'
232 232 if raw_content and raw_content.startswith(spec_string):
233 233 pattern = re.compile(r"""
234 234 (?:\n)?
235 235 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
236 236 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
237 237 ^size[ ](?P<oid_size>[0-9]+)\n
238 238 (?:\n)?
239 239 """, re.VERBOSE | re.MULTILINE)
240 240 match = pattern.match(raw_content)
241 241 if match:
242 242 return match.groupdict()
243 243
244 244 return {}
245 245
246 246 @reraise_safe_exceptions
247 247 def is_large_file(self, wire, commit_id):
248 248
249 249 cache_on, context_uid, repo_id = self._cache_on(wire)
250 250 @self.region.conditional_cache_on_arguments(condition=cache_on)
251 251 def _is_large_file(_repo_id, _sha):
252 252 repo_init = self._factory.repo_libgit2(wire)
253 253 with repo_init as repo:
254 254 blob = repo[commit_id]
255 255 if blob.is_binary:
256 256 return {}
257 257
258 258 return self._parse_lfs_pointer(blob.data)
259 259
260 260 return _is_large_file(repo_id, commit_id)
261 261
262 262 @reraise_safe_exceptions
263 263 def in_largefiles_store(self, wire, oid):
264 264 conf = self._wire_to_config(wire)
265 265 repo_init = self._factory.repo_libgit2(wire)
266 266 with repo_init as repo:
267 267 repo_name = repo.path
268 268
269 269 store_location = conf.get('vcs_git_lfs_store_location')
270 270 if store_location:
271 271
272 272 store = LFSOidStore(
273 273 oid=oid, repo=repo_name, store_location=store_location)
274 274 return store.has_oid()
275 275
276 276 return False
277 277
278 278 @reraise_safe_exceptions
279 279 def store_path(self, wire, oid):
280 280 conf = self._wire_to_config(wire)
281 281 repo_init = self._factory.repo_libgit2(wire)
282 282 with repo_init as repo:
283 283 repo_name = repo.path
284 284
285 285 store_location = conf.get('vcs_git_lfs_store_location')
286 286 if store_location:
287 287 store = LFSOidStore(
288 288 oid=oid, repo=repo_name, store_location=store_location)
289 289 return store.oid_path
290 290 raise ValueError('Unable to fetch oid with path {}'.format(oid))
291 291
292 292 @reraise_safe_exceptions
293 293 def bulk_request(self, wire, rev, pre_load):
294 294 cache_on, context_uid, repo_id = self._cache_on(wire)
295 295 @self.region.conditional_cache_on_arguments(condition=cache_on)
296 296 def _bulk_request(_repo_id, _rev, _pre_load):
297 297 result = {}
298 298 for attr in pre_load:
299 299 try:
300 300 method = self._bulk_methods[attr]
301 301 args = [wire, rev]
302 302 result[attr] = method(*args)
303 303 except KeyError as e:
304 304 raise exceptions.VcsException(e)(
305 305 "Unknown bulk attribute: %s" % attr)
306 306 return result
307 307
308 308 return _bulk_request(repo_id, rev, sorted(pre_load))
309 309
310 310 def _build_opener(self, url):
311 311 handlers = []
312 312 url_obj = url_parser(url)
313 313 _, authinfo = url_obj.authinfo()
314 314
315 315 if authinfo:
316 316 # create a password manager
317 317 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
318 318 passmgr.add_password(*authinfo)
319 319
320 320 handlers.extend((httpbasicauthhandler(passmgr),
321 321 httpdigestauthhandler(passmgr)))
322 322
323 323 return urllib2.build_opener(*handlers)
324 324
325 325 def _type_id_to_name(self, type_id):
326 326 return {
327 327 1: b'commit',
328 328 2: b'tree',
329 329 3: b'blob',
330 330 4: b'tag'
331 331 }[type_id]
332 332
333 333 @reraise_safe_exceptions
334 334 def check_url(self, url, config):
335 335 url_obj = url_parser(url)
336 336 test_uri, _ = url_obj.authinfo()
337 337 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
338 338 url_obj.query = obfuscate_qs(url_obj.query)
339 339 cleaned_uri = str(url_obj)
340 340 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
341 341
342 342 if not test_uri.endswith('info/refs'):
343 343 test_uri = test_uri.rstrip('/') + '/info/refs'
344 344
345 345 o = self._build_opener(url)
346 346 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
347 347
348 348 q = {"service": 'git-upload-pack'}
349 349 qs = '?%s' % urllib.urlencode(q)
350 350 cu = "%s%s" % (test_uri, qs)
351 351 req = urllib2.Request(cu, None, {})
352 352
353 353 try:
354 354 log.debug("Trying to open URL %s", cleaned_uri)
355 355 resp = o.open(req)
356 356 if resp.code != 200:
357 357 raise exceptions.URLError()('Return Code is not 200')
358 358 except Exception as e:
359 359 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
360 360 # means it cannot be cloned
361 361 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
362 362
363 363 # now detect if it's proper git repo
364 364 gitdata = resp.read()
365 365 if 'service=git-upload-pack' in gitdata:
366 366 pass
367 367 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
368 368 # old style git can return some other format !
369 369 pass
370 370 else:
371 371 raise exceptions.URLError()(
372 372 "url [%s] does not look like an git" % (cleaned_uri,))
373 373
374 374 return True
375 375
376 376 @reraise_safe_exceptions
377 377 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
378 378 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
379 379 remote_refs = self.pull(wire, url, apply_refs=False)
380 380 repo = self._factory.repo(wire)
381 381 if isinstance(valid_refs, list):
382 382 valid_refs = tuple(valid_refs)
383 383
384 384 for k in remote_refs:
385 385 # only parse heads/tags and skip so called deferred tags
386 386 if k.startswith(valid_refs) and not k.endswith(deferred):
387 387 repo[k] = remote_refs[k]
388 388
389 389 if update_after_clone:
390 390 # we want to checkout HEAD
391 391 repo["HEAD"] = remote_refs["HEAD"]
392 392 index.build_index_from_tree(repo.path, repo.index_path(),
393 393 repo.object_store, repo["HEAD"].tree)
394 394
395 395 @reraise_safe_exceptions
396 396 def branch(self, wire, commit_id):
397 397 cache_on, context_uid, repo_id = self._cache_on(wire)
398 398 @self.region.conditional_cache_on_arguments(condition=cache_on)
399 399 def _branch(_context_uid, _repo_id, _commit_id):
400 400 regex = re.compile('^refs/heads')
401 401
402 402 def filter_with(ref):
403 403 return regex.match(ref[0]) and ref[1] == _commit_id
404 404
405 405 branches = filter(filter_with, self.get_refs(wire).items())
406 406 return [x[0].split('refs/heads/')[-1] for x in branches]
407 407
408 408 return _branch(context_uid, repo_id, commit_id)
409 409
410 410 @reraise_safe_exceptions
411 411 def commit_branches(self, wire, commit_id):
412 412 cache_on, context_uid, repo_id = self._cache_on(wire)
413 413 @self.region.conditional_cache_on_arguments(condition=cache_on)
414 414 def _commit_branches(_context_uid, _repo_id, _commit_id):
415 415 repo_init = self._factory.repo_libgit2(wire)
416 416 with repo_init as repo:
417 417 branches = [x for x in repo.branches.with_commit(_commit_id)]
418 418 return branches
419 419
420 420 return _commit_branches(context_uid, repo_id, commit_id)
421 421
422 422 @reraise_safe_exceptions
423 423 def add_object(self, wire, content):
424 424 repo_init = self._factory.repo_libgit2(wire)
425 425 with repo_init as repo:
426 426 blob = objects.Blob()
427 427 blob.set_raw_string(content)
428 428 repo.object_store.add_object(blob)
429 429 return blob.id
430 430
431 431 # TODO: this is quite complex, check if that can be simplified
432 432 @reraise_safe_exceptions
433 433 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
434 434 repo = self._factory.repo(wire)
435 435 object_store = repo.object_store
436 436
437 437 # Create tree and populates it with blobs
438 438 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
439 439
440 440 for node in updated:
441 441 # Compute subdirs if needed
442 442 dirpath, nodename = vcspath.split(node['path'])
443 443 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
444 444 parent = commit_tree
445 445 ancestors = [('', parent)]
446 446
447 447 # Tries to dig for the deepest existing tree
448 448 while dirnames:
449 449 curdir = dirnames.pop(0)
450 450 try:
451 451 dir_id = parent[curdir][1]
452 452 except KeyError:
453 453 # put curdir back into dirnames and stops
454 454 dirnames.insert(0, curdir)
455 455 break
456 456 else:
457 457 # If found, updates parent
458 458 parent = repo[dir_id]
459 459 ancestors.append((curdir, parent))
460 460 # Now parent is deepest existing tree and we need to create
461 461 # subtrees for dirnames (in reverse order)
462 462 # [this only applies for nodes from added]
463 463 new_trees = []
464 464
465 465 blob = objects.Blob.from_string(node['content'])
466 466
467 467 if dirnames:
468 468 # If there are trees which should be created we need to build
469 469 # them now (in reverse order)
470 470 reversed_dirnames = list(reversed(dirnames))
471 471 curtree = objects.Tree()
472 472 curtree[node['node_path']] = node['mode'], blob.id
473 473 new_trees.append(curtree)
474 474 for dirname in reversed_dirnames[:-1]:
475 475 newtree = objects.Tree()
476 476 newtree[dirname] = (DIR_STAT, curtree.id)
477 477 new_trees.append(newtree)
478 478 curtree = newtree
479 479 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
480 480 else:
481 481 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
482 482
483 483 new_trees.append(parent)
484 484 # Update ancestors
485 485 reversed_ancestors = reversed(
486 486 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
487 487 for parent, tree, path in reversed_ancestors:
488 488 parent[path] = (DIR_STAT, tree.id)
489 489 object_store.add_object(tree)
490 490
491 491 object_store.add_object(blob)
492 492 for tree in new_trees:
493 493 object_store.add_object(tree)
494 494
495 495 for node_path in removed:
496 496 paths = node_path.split('/')
497 497 tree = commit_tree
498 498 trees = [tree]
499 499 # Traverse deep into the forest...
500 500 for path in paths:
501 501 try:
502 502 obj = repo[tree[path][1]]
503 503 if isinstance(obj, objects.Tree):
504 504 trees.append(obj)
505 505 tree = obj
506 506 except KeyError:
507 507 break
508 508 # Cut down the blob and all rotten trees on the way back...
509 509 for path, tree in reversed(zip(paths, trees)):
510 510 del tree[path]
511 511 if tree:
512 512 # This tree still has elements - don't remove it or any
513 513 # of it's parents
514 514 break
515 515
516 516 object_store.add_object(commit_tree)
517 517
518 518 # Create commit
519 519 commit = objects.Commit()
520 520 commit.tree = commit_tree.id
521 521 for k, v in commit_data.iteritems():
522 522 setattr(commit, k, v)
523 523 object_store.add_object(commit)
524 524
525 525 self.create_branch(wire, branch, commit.id)
526 526
527 527 # dulwich set-ref
528 528 ref = 'refs/heads/%s' % branch
529 529 repo.refs[ref] = commit.id
530 530
531 531 return commit.id
532 532
533 533 @reraise_safe_exceptions
534 534 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
535 535 if url != 'default' and '://' not in url:
536 536 client = LocalGitClient(url)
537 537 else:
538 538 url_obj = url_parser(url)
539 539 o = self._build_opener(url)
540 540 url, _ = url_obj.authinfo()
541 541 client = HttpGitClient(base_url=url, opener=o)
542 542 repo = self._factory.repo(wire)
543 543
544 544 determine_wants = repo.object_store.determine_wants_all
545 545 if refs:
546 546 def determine_wants_requested(references):
547 547 return [references[r] for r in references if r in refs]
548 548 determine_wants = determine_wants_requested
549 549
550 550 try:
551 551 remote_refs = client.fetch(
552 552 path=url, target=repo, determine_wants=determine_wants)
553 553 except NotGitRepository as e:
554 554 log.warning(
555 555 'Trying to fetch from "%s" failed, not a Git repository.', url)
556 556 # Exception can contain unicode which we convert
557 557 raise exceptions.AbortException(e)(repr(e))
558 558
559 559 # mikhail: client.fetch() returns all the remote refs, but fetches only
560 560 # refs filtered by `determine_wants` function. We need to filter result
561 561 # as well
562 562 if refs:
563 563 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
564 564
565 565 if apply_refs:
566 566 # TODO: johbo: Needs proper test coverage with a git repository
567 567 # that contains a tag object, so that we would end up with
568 568 # a peeled ref at this point.
569 569 for k in remote_refs:
570 570 if k.endswith(PEELED_REF_MARKER):
571 571 log.debug("Skipping peeled reference %s", k)
572 572 continue
573 573 repo[k] = remote_refs[k]
574 574
575 575 if refs and not update_after:
576 576 # mikhail: explicitly set the head to the last ref.
577 577 repo['HEAD'] = remote_refs[refs[-1]]
578 578
579 579 if update_after:
580 580 # we want to checkout HEAD
581 581 repo["HEAD"] = remote_refs["HEAD"]
582 582 index.build_index_from_tree(repo.path, repo.index_path(),
583 583 repo.object_store, repo["HEAD"].tree)
584 584 return remote_refs
585 585
586 586 @reraise_safe_exceptions
587 587 def sync_fetch(self, wire, url, refs=None, all_refs=False):
588 588 repo = self._factory.repo(wire)
589 589 if refs and not isinstance(refs, (list, tuple)):
590 590 refs = [refs]
591 591
592 592 config = self._wire_to_config(wire)
593 593 # get all remote refs we'll use to fetch later
594 594 cmd = ['ls-remote']
595 595 if not all_refs:
596 596 cmd += ['--heads', '--tags']
597 597 cmd += [url]
598 598 output, __ = self.run_git_command(
599 599 wire, cmd, fail_on_stderr=False,
600 600 _copts=self._remote_conf(config),
601 601 extra_env={'GIT_TERMINAL_PROMPT': '0'})
602 602
603 603 remote_refs = collections.OrderedDict()
604 604 fetch_refs = []
605 605
606 606 for ref_line in output.splitlines():
607 607 sha, ref = ref_line.split('\t')
608 608 sha = sha.strip()
609 609 if ref in remote_refs:
610 610 # duplicate, skip
611 611 continue
612 612 if ref.endswith(PEELED_REF_MARKER):
613 613 log.debug("Skipping peeled reference %s", ref)
614 614 continue
615 615 # don't sync HEAD
616 616 if ref in ['HEAD']:
617 617 continue
618 618
619 619 remote_refs[ref] = sha
620 620
621 621 if refs and sha in refs:
622 622 # we filter fetch using our specified refs
623 623 fetch_refs.append('{}:{}'.format(ref, ref))
624 624 elif not refs:
625 625 fetch_refs.append('{}:{}'.format(ref, ref))
626 626 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
627 627
628 628 if fetch_refs:
629 629 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
630 630 fetch_refs_chunks = list(chunk)
631 631 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
632 632 _out, _err = self.run_git_command(
633 633 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
634 634 fail_on_stderr=False,
635 635 _copts=self._remote_conf(config),
636 636 extra_env={'GIT_TERMINAL_PROMPT': '0'})
637 637
638 638 return remote_refs
639 639
640 640 @reraise_safe_exceptions
641 641 def sync_push(self, wire, url, refs=None):
642 642 if not self.check_url(url, wire):
643 643 return
644 644 config = self._wire_to_config(wire)
645 645 self._factory.repo(wire)
646 646 self.run_git_command(
647 647 wire, ['push', url, '--mirror'], fail_on_stderr=False,
648 648 _copts=self._remote_conf(config),
649 649 extra_env={'GIT_TERMINAL_PROMPT': '0'})
650 650
651 651 @reraise_safe_exceptions
652 652 def get_remote_refs(self, wire, url):
653 653 repo = Repo(url)
654 654 return repo.get_refs()
655 655
656 656 @reraise_safe_exceptions
657 657 def get_description(self, wire):
658 658 repo = self._factory.repo(wire)
659 659 return repo.get_description()
660 660
661 661 @reraise_safe_exceptions
662 662 def get_missing_revs(self, wire, rev1, rev2, path2):
663 663 repo = self._factory.repo(wire)
664 664 LocalGitClient(thin_packs=False).fetch(path2, repo)
665 665
666 666 wire_remote = wire.copy()
667 667 wire_remote['path'] = path2
668 668 repo_remote = self._factory.repo(wire_remote)
669 669 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
670 670
671 671 revs = [
672 672 x.commit.id
673 673 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
674 674 return revs
675 675
676 676 @reraise_safe_exceptions
677 677 def get_object(self, wire, sha):
678 678 cache_on, context_uid, repo_id = self._cache_on(wire)
679 679 @self.region.conditional_cache_on_arguments(condition=cache_on)
680 680 def _get_object(_context_uid, _repo_id, _sha):
681 681 repo_init = self._factory.repo_libgit2(wire)
682 682 with repo_init as repo:
683 683
684 684 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
685 685 try:
686 686 commit = repo.revparse_single(sha)
687 687 except (KeyError, ValueError) as e:
688 688 raise exceptions.LookupException(e)(missing_commit_err)
689 689
690 690 is_tag = False
691 691 if isinstance(commit, pygit2.Tag):
692 692 commit = repo.get(commit.target)
693 693 is_tag = True
694 694
695 if not is_tag:
695 check_dangling = True
696 if is_tag:
697 check_dangling = False
698
699 # we used a reference and it parsed means we're not having a dangling commit
700 if sha != commit.hex:
701 check_dangling = False
702
703 if check_dangling:
696 704 # check for dangling commit
697 branches = [x for x in repo.branches.with_commit(commit.hex)]
698 if not branches:
705 for branch in repo.branches.with_commit(commit.hex):
706 if branch:
707 break
708 else:
699 709 raise exceptions.LookupException(None)(missing_commit_err)
700 710
701 711 commit_id = commit.hex
702 712 type_id = commit.type
703 713
704 714 return {
705 715 'id': commit_id,
706 716 'type': self._type_id_to_name(type_id),
707 717 'commit_id': commit_id,
708 718 'idx': 0
709 719 }
710 720
711 721 return _get_object(context_uid, repo_id, sha)
712 722
713 723 @reraise_safe_exceptions
714 724 def get_refs(self, wire):
715 725 cache_on, context_uid, repo_id = self._cache_on(wire)
716 726 @self.region.conditional_cache_on_arguments(condition=cache_on)
717 727 def _get_refs(_context_uid, _repo_id):
718 728
719 729 repo_init = self._factory.repo_libgit2(wire)
720 730 with repo_init as repo:
721 731 regex = re.compile('^refs/(heads|tags)/')
722 732 return {x.name: x.target.hex for x in
723 733 filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
724 734
725 735 return _get_refs(context_uid, repo_id)
726 736
727 737 @reraise_safe_exceptions
728 738 def get_branch_pointers(self, wire):
729 739 cache_on, context_uid, repo_id = self._cache_on(wire)
730 740 @self.region.conditional_cache_on_arguments(condition=cache_on)
731 741 def _get_branch_pointers(_context_uid, _repo_id):
732 742
733 743 repo_init = self._factory.repo_libgit2(wire)
734 744 regex = re.compile('^refs/heads')
735 745 with repo_init as repo:
736 746 branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
737 747 return {x.target.hex: x.shorthand for x in branches}
738 748
739 749 return _get_branch_pointers(context_uid, repo_id)
740 750
741 751 @reraise_safe_exceptions
742 752 def head(self, wire, show_exc=True):
743 753 cache_on, context_uid, repo_id = self._cache_on(wire)
744 754 @self.region.conditional_cache_on_arguments(condition=cache_on)
745 755 def _head(_context_uid, _repo_id, _show_exc):
746 756 repo_init = self._factory.repo_libgit2(wire)
747 757 with repo_init as repo:
748 758 try:
749 759 return repo.head.peel().hex
750 760 except Exception:
751 761 if show_exc:
752 762 raise
753 763 return _head(context_uid, repo_id, show_exc)
754 764
755 765 @reraise_safe_exceptions
756 766 def init(self, wire):
757 767 repo_path = str_to_dulwich(wire['path'])
758 768 self.repo = Repo.init(repo_path)
759 769
760 770 @reraise_safe_exceptions
761 771 def init_bare(self, wire):
762 772 repo_path = str_to_dulwich(wire['path'])
763 773 self.repo = Repo.init_bare(repo_path)
764 774
765 775 @reraise_safe_exceptions
766 776 def revision(self, wire, rev):
767 777
768 778 cache_on, context_uid, repo_id = self._cache_on(wire)
769 779 @self.region.conditional_cache_on_arguments(condition=cache_on)
770 780 def _revision(_context_uid, _repo_id, _rev):
771 781 repo_init = self._factory.repo_libgit2(wire)
772 782 with repo_init as repo:
773 783 commit = repo[rev]
774 784 obj_data = {
775 785 'id': commit.id.hex,
776 786 }
777 787 # tree objects itself don't have tree_id attribute
778 788 if hasattr(commit, 'tree_id'):
779 789 obj_data['tree'] = commit.tree_id.hex
780 790
781 791 return obj_data
782 792 return _revision(context_uid, repo_id, rev)
783 793
784 794 @reraise_safe_exceptions
785 795 def date(self, wire, commit_id):
786 796 cache_on, context_uid, repo_id = self._cache_on(wire)
787 797 @self.region.conditional_cache_on_arguments(condition=cache_on)
788 798 def _date(_repo_id, _commit_id):
789 799 repo_init = self._factory.repo_libgit2(wire)
790 800 with repo_init as repo:
791 801 commit = repo[commit_id]
792 802
793 803 if hasattr(commit, 'commit_time'):
794 804 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
795 805 else:
796 806 commit = commit.get_object()
797 807 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
798 808
799 809 # TODO(marcink): check dulwich difference of offset vs timezone
800 810 return [commit_time, commit_time_offset]
801 811 return _date(repo_id, commit_id)
802 812
803 813 @reraise_safe_exceptions
804 814 def author(self, wire, commit_id):
805 815 cache_on, context_uid, repo_id = self._cache_on(wire)
806 816 @self.region.conditional_cache_on_arguments(condition=cache_on)
807 817 def _author(_repo_id, _commit_id):
808 818 repo_init = self._factory.repo_libgit2(wire)
809 819 with repo_init as repo:
810 820 commit = repo[commit_id]
811 821
812 822 if hasattr(commit, 'author'):
813 823 author = commit.author
814 824 else:
815 825 author = commit.get_object().author
816 826
817 827 if author.email:
818 828 return u"{} <{}>".format(author.name, author.email)
819 829
820 830 return u"{}".format(author.raw_name)
821 831 return _author(repo_id, commit_id)
822 832
823 833 @reraise_safe_exceptions
824 834 def message(self, wire, commit_id):
825 835 cache_on, context_uid, repo_id = self._cache_on(wire)
826 836 @self.region.conditional_cache_on_arguments(condition=cache_on)
827 837 def _message(_repo_id, _commit_id):
828 838 repo_init = self._factory.repo_libgit2(wire)
829 839 with repo_init as repo:
830 840 commit = repo[commit_id]
831 841 return commit.message
832 842 return _message(repo_id, commit_id)
833 843
834 844 @reraise_safe_exceptions
835 845 def parents(self, wire, commit_id):
836 846 cache_on, context_uid, repo_id = self._cache_on(wire)
837 847 @self.region.conditional_cache_on_arguments(condition=cache_on)
838 848 def _parents(_repo_id, _commit_id):
839 849 repo_init = self._factory.repo_libgit2(wire)
840 850 with repo_init as repo:
841 851 commit = repo[commit_id]
842 852 if hasattr(commit, 'parent_ids'):
843 853 parent_ids = commit.parent_ids
844 854 else:
845 855 parent_ids = commit.get_object().parent_ids
846 856
847 857 return [x.hex for x in parent_ids]
848 858 return _parents(repo_id, commit_id)
849 859
850 860 @reraise_safe_exceptions
851 861 def children(self, wire, commit_id):
852 862 cache_on, context_uid, repo_id = self._cache_on(wire)
853 863 @self.region.conditional_cache_on_arguments(condition=cache_on)
854 864 def _children(_repo_id, _commit_id):
855 865 output, __ = self.run_git_command(
856 866 wire, ['rev-list', '--all', '--children'])
857 867
858 868 child_ids = []
859 869 pat = re.compile(r'^%s' % commit_id)
860 870 for l in output.splitlines():
861 871 if pat.match(l):
862 872 found_ids = l.split(' ')[1:]
863 873 child_ids.extend(found_ids)
864 874
865 875 return child_ids
866 876 return _children(repo_id, commit_id)
867 877
868 878 @reraise_safe_exceptions
869 879 def set_refs(self, wire, key, value):
870 880 repo_init = self._factory.repo_libgit2(wire)
871 881 with repo_init as repo:
872 882 repo.references.create(key, value, force=True)
873 883
874 884 @reraise_safe_exceptions
875 885 def create_branch(self, wire, branch_name, commit_id, force=False):
876 886 repo_init = self._factory.repo_libgit2(wire)
877 887 with repo_init as repo:
878 888 commit = repo[commit_id]
879 889
880 890 if force:
881 891 repo.branches.local.create(branch_name, commit, force=force)
882 892 elif not repo.branches.get(branch_name):
883 893 # create only if that branch isn't existing
884 894 repo.branches.local.create(branch_name, commit, force=force)
885 895
886 896 @reraise_safe_exceptions
887 897 def remove_ref(self, wire, key):
888 898 repo_init = self._factory.repo_libgit2(wire)
889 899 with repo_init as repo:
890 900 repo.references.delete(key)
891 901
892 902 @reraise_safe_exceptions
893 903 def tag_remove(self, wire, tag_name):
894 904 repo_init = self._factory.repo_libgit2(wire)
895 905 with repo_init as repo:
896 906 key = 'refs/tags/{}'.format(tag_name)
897 907 repo.references.delete(key)
898 908
899 909 @reraise_safe_exceptions
900 910 def tree_changes(self, wire, source_id, target_id):
901 911 # TODO(marcink): remove this seems it's only used by tests
902 912 repo = self._factory.repo(wire)
903 913 source = repo[source_id].tree if source_id else None
904 914 target = repo[target_id].tree
905 915 result = repo.object_store.tree_changes(source, target)
906 916 return list(result)
907 917
908 918 @reraise_safe_exceptions
909 919 def tree_and_type_for_path(self, wire, commit_id, path):
910 920
911 921 cache_on, context_uid, repo_id = self._cache_on(wire)
912 922 @self.region.conditional_cache_on_arguments(condition=cache_on)
913 923 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
914 924 repo_init = self._factory.repo_libgit2(wire)
915 925
916 926 with repo_init as repo:
917 927 commit = repo[commit_id]
918 928 try:
919 929 tree = commit.tree[path]
920 930 except KeyError:
921 931 return None, None, None
922 932
923 933 return tree.id.hex, tree.type, tree.filemode
924 934 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
925 935
926 936 @reraise_safe_exceptions
927 937 def tree_items(self, wire, tree_id):
928 938 cache_on, context_uid, repo_id = self._cache_on(wire)
929 939 @self.region.conditional_cache_on_arguments(condition=cache_on)
930 940 def _tree_items(_repo_id, _tree_id):
931 941
932 942 repo_init = self._factory.repo_libgit2(wire)
933 943 with repo_init as repo:
934 944 try:
935 945 tree = repo[tree_id]
936 946 except KeyError:
937 947 raise ObjectMissing('No tree with id: {}'.format(tree_id))
938 948
939 949 result = []
940 950 for item in tree:
941 951 item_sha = item.hex
942 952 item_mode = item.filemode
943 953 item_type = item.type
944 954
945 955 if item_type == 'commit':
946 956 # NOTE(marcink): submodules we translate to 'link' for backward compat
947 957 item_type = 'link'
948 958
949 959 result.append((item.name, item_mode, item_sha, item_type))
950 960 return result
951 961 return _tree_items(repo_id, tree_id)
952 962
953 963 @reraise_safe_exceptions
954 964 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
955 965 """
956 966 Old version that uses subprocess to call diff
957 967 """
958 968
959 969 flags = [
960 970 '-U%s' % context, '--patch',
961 971 '--binary',
962 972 '--find-renames',
963 973 '--no-indent-heuristic',
964 974 # '--indent-heuristic',
965 975 #'--full-index',
966 976 #'--abbrev=40'
967 977 ]
968 978
969 979 if opt_ignorews:
970 980 flags.append('--ignore-all-space')
971 981
972 982 if commit_id_1 == self.EMPTY_COMMIT:
973 983 cmd = ['show'] + flags + [commit_id_2]
974 984 else:
975 985 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
976 986
977 987 if file_filter:
978 988 cmd.extend(['--', file_filter])
979 989
980 990 diff, __ = self.run_git_command(wire, cmd)
981 991 # If we used 'show' command, strip first few lines (until actual diff
982 992 # starts)
983 993 if commit_id_1 == self.EMPTY_COMMIT:
984 994 lines = diff.splitlines()
985 995 x = 0
986 996 for line in lines:
987 997 if line.startswith('diff'):
988 998 break
989 999 x += 1
990 1000 # Append new line just like 'diff' command do
991 1001 diff = '\n'.join(lines[x:]) + '\n'
992 1002 return diff
993 1003
994 1004 @reraise_safe_exceptions
995 1005 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
996 1006 repo_init = self._factory.repo_libgit2(wire)
997 1007 with repo_init as repo:
998 1008 swap = True
999 1009 flags = 0
1000 1010 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1001 1011
1002 1012 if opt_ignorews:
1003 1013 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1004 1014
1005 1015 if commit_id_1 == self.EMPTY_COMMIT:
1006 1016 comm1 = repo[commit_id_2]
1007 1017 diff_obj = comm1.tree.diff_to_tree(
1008 1018 flags=flags, context_lines=context, swap=swap)
1009 1019
1010 1020 else:
1011 1021 comm1 = repo[commit_id_2]
1012 1022 comm2 = repo[commit_id_1]
1013 1023 diff_obj = comm1.tree.diff_to_tree(
1014 1024 comm2.tree, flags=flags, context_lines=context, swap=swap)
1015 1025 similar_flags = 0
1016 1026 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1017 1027 diff_obj.find_similar(flags=similar_flags)
1018 1028
1019 1029 if file_filter:
1020 1030 for p in diff_obj:
1021 1031 if p.delta.old_file.path == file_filter:
1022 1032 return p.patch or ''
1023 1033 # fo matching path == no diff
1024 1034 return ''
1025 1035 return diff_obj.patch or ''
1026 1036
1027 1037 @reraise_safe_exceptions
1028 1038 def node_history(self, wire, commit_id, path, limit):
1029 1039 cache_on, context_uid, repo_id = self._cache_on(wire)
1030 1040 @self.region.conditional_cache_on_arguments(condition=cache_on)
1031 1041 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1032 1042 # optimize for n==1, rev-list is much faster for that use-case
1033 1043 if limit == 1:
1034 1044 cmd = ['rev-list', '-1', commit_id, '--', path]
1035 1045 else:
1036 1046 cmd = ['log']
1037 1047 if limit:
1038 1048 cmd.extend(['-n', str(safe_int(limit, 0))])
1039 1049 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1040 1050
1041 1051 output, __ = self.run_git_command(wire, cmd)
1042 1052 commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
1043 1053
1044 1054 return [x for x in commit_ids]
1045 1055 return _node_history(context_uid, repo_id, commit_id, path, limit)
1046 1056
1047 1057 @reraise_safe_exceptions
1048 1058 def node_annotate(self, wire, commit_id, path):
1049 1059
1050 1060 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1051 1061 # -l ==> outputs long shas (and we need all 40 characters)
1052 1062 # --root ==> doesn't put '^' character for boundaries
1053 1063 # -r commit_id ==> blames for the given commit
1054 1064 output, __ = self.run_git_command(wire, cmd)
1055 1065
1056 1066 result = []
1057 1067 for i, blame_line in enumerate(output.split('\n')[:-1]):
1058 1068 line_no = i + 1
1059 1069 commit_id, line = re.split(r' ', blame_line, 1)
1060 1070 result.append((line_no, commit_id, line))
1061 1071 return result
1062 1072
1063 1073 @reraise_safe_exceptions
1064 1074 def update_server_info(self, wire):
1065 1075 repo = self._factory.repo(wire)
1066 1076 update_server_info(repo)
1067 1077
1068 1078 @reraise_safe_exceptions
1069 1079 def get_all_commit_ids(self, wire):
1070 1080
1071 1081 cache_on, context_uid, repo_id = self._cache_on(wire)
1072 1082 @self.region.conditional_cache_on_arguments(condition=cache_on)
1073 1083 def _get_all_commit_ids(_context_uid, _repo_id):
1074 1084
1075 1085 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1076 1086 try:
1077 1087 output, __ = self.run_git_command(wire, cmd)
1078 1088 return output.splitlines()
1079 1089 except Exception:
1080 1090 # Can be raised for empty repositories
1081 1091 return []
1082 1092 return _get_all_commit_ids(context_uid, repo_id)
1083 1093
1084 1094 @reraise_safe_exceptions
1085 1095 def run_git_command(self, wire, cmd, **opts):
1086 1096 path = wire.get('path', None)
1087 1097
1088 1098 if path and os.path.isdir(path):
1089 1099 opts['cwd'] = path
1090 1100
1091 1101 if '_bare' in opts:
1092 1102 _copts = []
1093 1103 del opts['_bare']
1094 1104 else:
1095 1105 _copts = ['-c', 'core.quotepath=false', ]
1096 1106 safe_call = False
1097 1107 if '_safe' in opts:
1098 1108 # no exc on failure
1099 1109 del opts['_safe']
1100 1110 safe_call = True
1101 1111
1102 1112 if '_copts' in opts:
1103 1113 _copts.extend(opts['_copts'] or [])
1104 1114 del opts['_copts']
1105 1115
1106 1116 gitenv = os.environ.copy()
1107 1117 gitenv.update(opts.pop('extra_env', {}))
1108 1118 # need to clean fix GIT_DIR !
1109 1119 if 'GIT_DIR' in gitenv:
1110 1120 del gitenv['GIT_DIR']
1111 1121 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1112 1122 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1113 1123
1114 1124 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1115 1125 _opts = {'env': gitenv, 'shell': False}
1116 1126
1117 1127 try:
1118 1128 _opts.update(opts)
1119 1129 p = subprocessio.SubprocessIOChunker(cmd, **_opts)
1120 1130
1121 1131 return ''.join(p), ''.join(p.error)
1122 1132 except (EnvironmentError, OSError) as err:
1123 1133 cmd = ' '.join(cmd) # human friendly CMD
1124 1134 tb_err = ("Couldn't run git command (%s).\n"
1125 1135 "Original error was:%s\n"
1126 1136 "Call options:%s\n"
1127 1137 % (cmd, err, _opts))
1128 1138 log.exception(tb_err)
1129 1139 if safe_call:
1130 1140 return '', err
1131 1141 else:
1132 1142 raise exceptions.VcsException()(tb_err)
1133 1143
1134 1144 @reraise_safe_exceptions
1135 1145 def install_hooks(self, wire, force=False):
1136 1146 from vcsserver.hook_utils import install_git_hooks
1137 1147 bare = self.bare(wire)
1138 1148 path = wire['path']
1139 1149 return install_git_hooks(path, bare, force_create=force)
1140 1150
1141 1151 @reraise_safe_exceptions
1142 1152 def get_hooks_info(self, wire):
1143 1153 from vcsserver.hook_utils import (
1144 1154 get_git_pre_hook_version, get_git_post_hook_version)
1145 1155 bare = self.bare(wire)
1146 1156 path = wire['path']
1147 1157 return {
1148 1158 'pre_version': get_git_pre_hook_version(path, bare),
1149 1159 'post_version': get_git_post_hook_version(path, bare),
1150 1160 }
General Comments 0
You need to be logged in to leave comments. Login now