##// END OF EJS Templates
git: handle pygit tags in cases they don't have required metadata.
marcink -
r763:d8a843fc default
parent child Browse files
Show More
@@ -1,1126 +1,1144 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2019 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 25 import urllib
26 26 import urllib2
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from dulwich import index, objects
33 33 from dulwich.client import HttpGitClient, LocalGitClient
34 34 from dulwich.errors import (
35 35 NotGitRepository, ChecksumMismatch, WrongObjectException,
36 36 MissingCommitError, ObjectMissing, HangupException,
37 37 UnexpectedCommandError)
38 38 from dulwich.repo import Repo as DulwichRepo
39 39 from dulwich.server import update_server_info
40 40
41 41 from vcsserver import exceptions, settings, subprocessio
42 42 from vcsserver.utils import safe_str, safe_int
43 43 from vcsserver.base import RepoFactory, obfuscate_qs
44 44 from vcsserver.hgcompat import (
45 45 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 46 from vcsserver.git_lfs.lib import LFSOidStore
47 47 from vcsserver.vcs_base import RemoteBase
48 48
49 49 DIR_STAT = stat.S_IFDIR
50 50 FILE_MODE = stat.S_IFMT
51 51 GIT_LINK = objects.S_IFGITLINK
52 52 PEELED_REF_MARKER = '^{}'
53 53
54 54
55 55 log = logging.getLogger(__name__)
56 56
57 57
58 58 def str_to_dulwich(value):
59 59 """
60 60 Dulwich 0.10.1a requires `unicode` objects to be passed in.
61 61 """
62 62 return value.decode(settings.WIRE_ENCODING)
63 63
64 64
65 65 def reraise_safe_exceptions(func):
66 66 """Converts Dulwich exceptions to something neutral."""
67 67
68 68 @wraps(func)
69 69 def wrapper(*args, **kwargs):
70 70 try:
71 71 return func(*args, **kwargs)
72 72 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
73 73 exc = exceptions.LookupException(org_exc=e)
74 74 raise exc(safe_str(e))
75 75 except (HangupException, UnexpectedCommandError) as e:
76 76 exc = exceptions.VcsException(org_exc=e)
77 77 raise exc(safe_str(e))
78 78 except Exception as e:
79 79 # NOTE(marcink): becuase of how dulwich handles some exceptions
80 80 # (KeyError on empty repos), we cannot track this and catch all
81 81 # exceptions, it's an exceptions from other handlers
82 82 #if not hasattr(e, '_vcs_kind'):
83 83 #log.exception("Unhandled exception in git remote call")
84 84 #raise_from_original(exceptions.UnhandledException)
85 85 raise
86 86 return wrapper
87 87
88 88
89 89 class Repo(DulwichRepo):
90 90 """
91 91 A wrapper for dulwich Repo class.
92 92
93 93 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
94 94 "Too many open files" error. We need to close all opened file descriptors
95 95 once the repo object is destroyed.
96 96 """
97 97 def __del__(self):
98 98 if hasattr(self, 'object_store'):
99 99 self.close()
100 100
101 101
102 102 class Repository(LibGit2Repo):
103 103
104 104 def __enter__(self):
105 105 return self
106 106
107 107 def __exit__(self, exc_type, exc_val, exc_tb):
108 108 self.free()
109 109
110 110
111 111 class GitFactory(RepoFactory):
112 112 repo_type = 'git'
113 113
114 114 def _create_repo(self, wire, create, use_libgit2=False):
115 115 if use_libgit2:
116 116 return Repository(wire['path'])
117 117 else:
118 118 repo_path = str_to_dulwich(wire['path'])
119 119 return Repo(repo_path)
120 120
121 121 def repo(self, wire, create=False, use_libgit2=False):
122 122 """
123 123 Get a repository instance for the given path.
124 124 """
125 125 return self._create_repo(wire, create, use_libgit2)
126 126
127 127 def repo_libgit2(self, wire):
128 128 return self.repo(wire, use_libgit2=True)
129 129
130 130
131 131 class GitRemote(RemoteBase):
132 132
133 133 def __init__(self, factory):
134 134 self._factory = factory
135 135 self._bulk_methods = {
136 136 "date": self.date,
137 137 "author": self.author,
138 138 "branch": self.branch,
139 139 "message": self.message,
140 140 "parents": self.parents,
141 141 "_commit": self.revision,
142 142 }
143 143
144 144 def _wire_to_config(self, wire):
145 145 if 'config' in wire:
146 146 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
147 147 return {}
148 148
149 149 def _remote_conf(self, config):
150 150 params = [
151 151 '-c', 'core.askpass=""',
152 152 ]
153 153 ssl_cert_dir = config.get('vcs_ssl_dir')
154 154 if ssl_cert_dir:
155 155 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
156 156 return params
157 157
158 158 @reraise_safe_exceptions
159 159 def discover_git_version(self):
160 160 stdout, _ = self.run_git_command(
161 161 {}, ['--version'], _bare=True, _safe=True)
162 162 prefix = 'git version'
163 163 if stdout.startswith(prefix):
164 164 stdout = stdout[len(prefix):]
165 165 return stdout.strip()
166 166
167 167 @reraise_safe_exceptions
168 168 def is_empty(self, wire):
169 169 repo_init = self._factory.repo_libgit2(wire)
170 170 with repo_init as repo:
171 171
172 172 try:
173 173 has_head = repo.head.name
174 174 if has_head:
175 175 return False
176 176
177 177 # NOTE(marcink): check again using more expensive method
178 178 return repo.is_empty
179 179 except Exception:
180 180 pass
181 181
182 182 return True
183 183
184 184 @reraise_safe_exceptions
185 185 def assert_correct_path(self, wire):
186 186 cache_on, context_uid, repo_id = self._cache_on(wire)
187 187 @self.region.conditional_cache_on_arguments(condition=cache_on)
188 188 def _assert_correct_path(_context_uid, _repo_id):
189 189 try:
190 190 repo_init = self._factory.repo_libgit2(wire)
191 191 with repo_init as repo:
192 192 pass
193 193 except pygit2.GitError:
194 194 path = wire.get('path')
195 195 tb = traceback.format_exc()
196 196 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
197 197 return False
198 198
199 199 return True
200 200 return _assert_correct_path(context_uid, repo_id)
201 201
202 202 @reraise_safe_exceptions
203 203 def bare(self, wire):
204 204 repo_init = self._factory.repo_libgit2(wire)
205 205 with repo_init as repo:
206 206 return repo.is_bare
207 207
208 208 @reraise_safe_exceptions
209 209 def blob_as_pretty_string(self, wire, sha):
210 210 repo_init = self._factory.repo_libgit2(wire)
211 211 with repo_init as repo:
212 212 blob_obj = repo[sha]
213 213 blob = blob_obj.data
214 214 return blob
215 215
216 216 @reraise_safe_exceptions
217 217 def blob_raw_length(self, wire, sha):
218 218 cache_on, context_uid, repo_id = self._cache_on(wire)
219 219 @self.region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _blob_raw_length(_repo_id, _sha):
221 221
222 222 repo_init = self._factory.repo_libgit2(wire)
223 223 with repo_init as repo:
224 224 blob = repo[sha]
225 225 return blob.size
226 226
227 227 return _blob_raw_length(repo_id, sha)
228 228
229 229 def _parse_lfs_pointer(self, raw_content):
230 230
231 231 spec_string = 'version https://git-lfs.github.com/spec'
232 232 if raw_content and raw_content.startswith(spec_string):
233 233 pattern = re.compile(r"""
234 234 (?:\n)?
235 235 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
236 236 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
237 237 ^size[ ](?P<oid_size>[0-9]+)\n
238 238 (?:\n)?
239 239 """, re.VERBOSE | re.MULTILINE)
240 240 match = pattern.match(raw_content)
241 241 if match:
242 242 return match.groupdict()
243 243
244 244 return {}
245 245
246 246 @reraise_safe_exceptions
247 247 def is_large_file(self, wire, commit_id):
248 248
249 249 cache_on, context_uid, repo_id = self._cache_on(wire)
250 250 @self.region.conditional_cache_on_arguments(condition=cache_on)
251 251 def _is_large_file(_repo_id, _sha):
252 252 repo_init = self._factory.repo_libgit2(wire)
253 253 with repo_init as repo:
254 254 blob = repo[commit_id]
255 255 if blob.is_binary:
256 256 return {}
257 257
258 258 return self._parse_lfs_pointer(blob.data)
259 259
260 260 return _is_large_file(repo_id, commit_id)
261 261
262 262 @reraise_safe_exceptions
263 263 def in_largefiles_store(self, wire, oid):
264 264 conf = self._wire_to_config(wire)
265 265 repo_init = self._factory.repo_libgit2(wire)
266 266 with repo_init as repo:
267 267 repo_name = repo.path
268 268
269 269 store_location = conf.get('vcs_git_lfs_store_location')
270 270 if store_location:
271 271
272 272 store = LFSOidStore(
273 273 oid=oid, repo=repo_name, store_location=store_location)
274 274 return store.has_oid()
275 275
276 276 return False
277 277
278 278 @reraise_safe_exceptions
279 279 def store_path(self, wire, oid):
280 280 conf = self._wire_to_config(wire)
281 281 repo_init = self._factory.repo_libgit2(wire)
282 282 with repo_init as repo:
283 283 repo_name = repo.path
284 284
285 285 store_location = conf.get('vcs_git_lfs_store_location')
286 286 if store_location:
287 287 store = LFSOidStore(
288 288 oid=oid, repo=repo_name, store_location=store_location)
289 289 return store.oid_path
290 290 raise ValueError('Unable to fetch oid with path {}'.format(oid))
291 291
292 292 @reraise_safe_exceptions
293 293 def bulk_request(self, wire, rev, pre_load):
294 294 cache_on, context_uid, repo_id = self._cache_on(wire)
295 295 @self.region.conditional_cache_on_arguments(condition=cache_on)
296 296 def _bulk_request(_repo_id, _rev, _pre_load):
297 297 result = {}
298 298 for attr in pre_load:
299 299 try:
300 300 method = self._bulk_methods[attr]
301 301 args = [wire, rev]
302 302 result[attr] = method(*args)
303 303 except KeyError as e:
304 304 raise exceptions.VcsException(e)(
305 305 "Unknown bulk attribute: %s" % attr)
306 306 return result
307 307
308 308 return _bulk_request(repo_id, rev, sorted(pre_load))
309 309
310 310 def _build_opener(self, url):
311 311 handlers = []
312 312 url_obj = url_parser(url)
313 313 _, authinfo = url_obj.authinfo()
314 314
315 315 if authinfo:
316 316 # create a password manager
317 317 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
318 318 passmgr.add_password(*authinfo)
319 319
320 320 handlers.extend((httpbasicauthhandler(passmgr),
321 321 httpdigestauthhandler(passmgr)))
322 322
323 323 return urllib2.build_opener(*handlers)
324 324
325 325 def _type_id_to_name(self, type_id):
326 326 return {
327 327 1: b'commit',
328 328 2: b'tree',
329 329 3: b'blob',
330 330 4: b'tag'
331 331 }[type_id]
332 332
333 333 @reraise_safe_exceptions
334 334 def check_url(self, url, config):
335 335 url_obj = url_parser(url)
336 336 test_uri, _ = url_obj.authinfo()
337 337 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
338 338 url_obj.query = obfuscate_qs(url_obj.query)
339 339 cleaned_uri = str(url_obj)
340 340 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
341 341
342 342 if not test_uri.endswith('info/refs'):
343 343 test_uri = test_uri.rstrip('/') + '/info/refs'
344 344
345 345 o = self._build_opener(url)
346 346 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
347 347
348 348 q = {"service": 'git-upload-pack'}
349 349 qs = '?%s' % urllib.urlencode(q)
350 350 cu = "%s%s" % (test_uri, qs)
351 351 req = urllib2.Request(cu, None, {})
352 352
353 353 try:
354 354 log.debug("Trying to open URL %s", cleaned_uri)
355 355 resp = o.open(req)
356 356 if resp.code != 200:
357 357 raise exceptions.URLError()('Return Code is not 200')
358 358 except Exception as e:
359 359 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
360 360 # means it cannot be cloned
361 361 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
362 362
363 363 # now detect if it's proper git repo
364 364 gitdata = resp.read()
365 365 if 'service=git-upload-pack' in gitdata:
366 366 pass
367 367 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
368 368 # old style git can return some other format !
369 369 pass
370 370 else:
371 371 raise exceptions.URLError()(
372 372 "url [%s] does not look like an git" % (cleaned_uri,))
373 373
374 374 return True
375 375
376 376 @reraise_safe_exceptions
377 377 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
378 378 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
379 379 remote_refs = self.pull(wire, url, apply_refs=False)
380 380 repo = self._factory.repo(wire)
381 381 if isinstance(valid_refs, list):
382 382 valid_refs = tuple(valid_refs)
383 383
384 384 for k in remote_refs:
385 385 # only parse heads/tags and skip so called deferred tags
386 386 if k.startswith(valid_refs) and not k.endswith(deferred):
387 387 repo[k] = remote_refs[k]
388 388
389 389 if update_after_clone:
390 390 # we want to checkout HEAD
391 391 repo["HEAD"] = remote_refs["HEAD"]
392 392 index.build_index_from_tree(repo.path, repo.index_path(),
393 393 repo.object_store, repo["HEAD"].tree)
394 394
395 395 @reraise_safe_exceptions
396 396 def branch(self, wire, commit_id):
397 397 cache_on, context_uid, repo_id = self._cache_on(wire)
398 398 @self.region.conditional_cache_on_arguments(condition=cache_on)
399 399 def _branch(_context_uid, _repo_id, _commit_id):
400 400 regex = re.compile('^refs/heads')
401 401
402 402 def filter_with(ref):
403 403 return regex.match(ref[0]) and ref[1] == _commit_id
404 404
405 405 branches = filter(filter_with, self.get_refs(wire).items())
406 406 return [x[0].split('refs/heads/')[-1] for x in branches]
407 407
408 408 return _branch(context_uid, repo_id, commit_id)
409 409
410 410 @reraise_safe_exceptions
411 411 def commit_branches(self, wire, commit_id):
412 412 cache_on, context_uid, repo_id = self._cache_on(wire)
413 413 @self.region.conditional_cache_on_arguments(condition=cache_on)
414 414 def _commit_branches(_context_uid, _repo_id, _commit_id):
415 415 repo_init = self._factory.repo_libgit2(wire)
416 416 with repo_init as repo:
417 417 branches = [x for x in repo.branches.with_commit(_commit_id)]
418 418 return branches
419 419
420 420 return _commit_branches(context_uid, repo_id, commit_id)
421 421
422 422 @reraise_safe_exceptions
423 423 def add_object(self, wire, content):
424 424 repo_init = self._factory.repo_libgit2(wire)
425 425 with repo_init as repo:
426 426 blob = objects.Blob()
427 427 blob.set_raw_string(content)
428 428 repo.object_store.add_object(blob)
429 429 return blob.id
430 430
431 431 # TODO: this is quite complex, check if that can be simplified
432 432 @reraise_safe_exceptions
433 433 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
434 434 repo = self._factory.repo(wire)
435 435 object_store = repo.object_store
436 436
437 437 # Create tree and populates it with blobs
438 438 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
439 439
440 440 for node in updated:
441 441 # Compute subdirs if needed
442 442 dirpath, nodename = vcspath.split(node['path'])
443 443 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
444 444 parent = commit_tree
445 445 ancestors = [('', parent)]
446 446
447 447 # Tries to dig for the deepest existing tree
448 448 while dirnames:
449 449 curdir = dirnames.pop(0)
450 450 try:
451 451 dir_id = parent[curdir][1]
452 452 except KeyError:
453 453 # put curdir back into dirnames and stops
454 454 dirnames.insert(0, curdir)
455 455 break
456 456 else:
457 457 # If found, updates parent
458 458 parent = repo[dir_id]
459 459 ancestors.append((curdir, parent))
460 460 # Now parent is deepest existing tree and we need to create
461 461 # subtrees for dirnames (in reverse order)
462 462 # [this only applies for nodes from added]
463 463 new_trees = []
464 464
465 465 blob = objects.Blob.from_string(node['content'])
466 466
467 467 if dirnames:
468 468 # If there are trees which should be created we need to build
469 469 # them now (in reverse order)
470 470 reversed_dirnames = list(reversed(dirnames))
471 471 curtree = objects.Tree()
472 472 curtree[node['node_path']] = node['mode'], blob.id
473 473 new_trees.append(curtree)
474 474 for dirname in reversed_dirnames[:-1]:
475 475 newtree = objects.Tree()
476 476 newtree[dirname] = (DIR_STAT, curtree.id)
477 477 new_trees.append(newtree)
478 478 curtree = newtree
479 479 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
480 480 else:
481 481 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
482 482
483 483 new_trees.append(parent)
484 484 # Update ancestors
485 485 reversed_ancestors = reversed(
486 486 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
487 487 for parent, tree, path in reversed_ancestors:
488 488 parent[path] = (DIR_STAT, tree.id)
489 489 object_store.add_object(tree)
490 490
491 491 object_store.add_object(blob)
492 492 for tree in new_trees:
493 493 object_store.add_object(tree)
494 494
495 495 for node_path in removed:
496 496 paths = node_path.split('/')
497 497 tree = commit_tree
498 498 trees = [tree]
499 499 # Traverse deep into the forest...
500 500 for path in paths:
501 501 try:
502 502 obj = repo[tree[path][1]]
503 503 if isinstance(obj, objects.Tree):
504 504 trees.append(obj)
505 505 tree = obj
506 506 except KeyError:
507 507 break
508 508 # Cut down the blob and all rotten trees on the way back...
509 509 for path, tree in reversed(zip(paths, trees)):
510 510 del tree[path]
511 511 if tree:
512 512 # This tree still has elements - don't remove it or any
513 513 # of it's parents
514 514 break
515 515
516 516 object_store.add_object(commit_tree)
517 517
518 518 # Create commit
519 519 commit = objects.Commit()
520 520 commit.tree = commit_tree.id
521 521 for k, v in commit_data.iteritems():
522 522 setattr(commit, k, v)
523 523 object_store.add_object(commit)
524 524
525 525 self.create_branch(wire, branch, commit.id)
526 526
527 527 # dulwich set-ref
528 528 ref = 'refs/heads/%s' % branch
529 529 repo.refs[ref] = commit.id
530 530
531 531 return commit.id
532 532
533 533 @reraise_safe_exceptions
534 534 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
535 535 if url != 'default' and '://' not in url:
536 536 client = LocalGitClient(url)
537 537 else:
538 538 url_obj = url_parser(url)
539 539 o = self._build_opener(url)
540 540 url, _ = url_obj.authinfo()
541 541 client = HttpGitClient(base_url=url, opener=o)
542 542 repo = self._factory.repo(wire)
543 543
544 544 determine_wants = repo.object_store.determine_wants_all
545 545 if refs:
546 546 def determine_wants_requested(references):
547 547 return [references[r] for r in references if r in refs]
548 548 determine_wants = determine_wants_requested
549 549
550 550 try:
551 551 remote_refs = client.fetch(
552 552 path=url, target=repo, determine_wants=determine_wants)
553 553 except NotGitRepository as e:
554 554 log.warning(
555 555 'Trying to fetch from "%s" failed, not a Git repository.', url)
556 556 # Exception can contain unicode which we convert
557 557 raise exceptions.AbortException(e)(repr(e))
558 558
559 559 # mikhail: client.fetch() returns all the remote refs, but fetches only
560 560 # refs filtered by `determine_wants` function. We need to filter result
561 561 # as well
562 562 if refs:
563 563 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
564 564
565 565 if apply_refs:
566 566 # TODO: johbo: Needs proper test coverage with a git repository
567 567 # that contains a tag object, so that we would end up with
568 568 # a peeled ref at this point.
569 569 for k in remote_refs:
570 570 if k.endswith(PEELED_REF_MARKER):
571 571 log.debug("Skipping peeled reference %s", k)
572 572 continue
573 573 repo[k] = remote_refs[k]
574 574
575 575 if refs and not update_after:
576 576 # mikhail: explicitly set the head to the last ref.
577 577 repo['HEAD'] = remote_refs[refs[-1]]
578 578
579 579 if update_after:
580 580 # we want to checkout HEAD
581 581 repo["HEAD"] = remote_refs["HEAD"]
582 582 index.build_index_from_tree(repo.path, repo.index_path(),
583 583 repo.object_store, repo["HEAD"].tree)
584 584 return remote_refs
585 585
586 586 @reraise_safe_exceptions
587 587 def sync_fetch(self, wire, url, refs=None):
588 588 repo = self._factory.repo(wire)
589 589 if refs and not isinstance(refs, (list, tuple)):
590 590 refs = [refs]
591 591 config = self._wire_to_config(wire)
592 592 # get all remote refs we'll use to fetch later
593 593 output, __ = self.run_git_command(
594 594 wire, ['ls-remote', url], fail_on_stderr=False,
595 595 _copts=self._remote_conf(config),
596 596 extra_env={'GIT_TERMINAL_PROMPT': '0'})
597 597
598 598 remote_refs = collections.OrderedDict()
599 599 fetch_refs = []
600 600
601 601 for ref_line in output.splitlines():
602 602 sha, ref = ref_line.split('\t')
603 603 sha = sha.strip()
604 604 if ref in remote_refs:
605 605 # duplicate, skip
606 606 continue
607 607 if ref.endswith(PEELED_REF_MARKER):
608 608 log.debug("Skipping peeled reference %s", ref)
609 609 continue
610 610 # don't sync HEAD
611 611 if ref in ['HEAD']:
612 612 continue
613 613
614 614 remote_refs[ref] = sha
615 615
616 616 if refs and sha in refs:
617 617 # we filter fetch using our specified refs
618 618 fetch_refs.append('{}:{}'.format(ref, ref))
619 619 elif not refs:
620 620 fetch_refs.append('{}:{}'.format(ref, ref))
621 621 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
622 622 if fetch_refs:
623 623 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
624 624 fetch_refs_chunks = list(chunk)
625 625 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
626 626 _out, _err = self.run_git_command(
627 627 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
628 628 fail_on_stderr=False,
629 629 _copts=self._remote_conf(config),
630 630 extra_env={'GIT_TERMINAL_PROMPT': '0'})
631 631
632 632 return remote_refs
633 633
634 634 @reraise_safe_exceptions
635 635 def sync_push(self, wire, url, refs=None):
636 636 if not self.check_url(url, wire):
637 637 return
638 638 config = self._wire_to_config(wire)
639 639 self._factory.repo(wire)
640 640 self.run_git_command(
641 641 wire, ['push', url, '--mirror'], fail_on_stderr=False,
642 642 _copts=self._remote_conf(config),
643 643 extra_env={'GIT_TERMINAL_PROMPT': '0'})
644 644
645 645 @reraise_safe_exceptions
646 646 def get_remote_refs(self, wire, url):
647 647 repo = Repo(url)
648 648 return repo.get_refs()
649 649
650 650 @reraise_safe_exceptions
651 651 def get_description(self, wire):
652 652 repo = self._factory.repo(wire)
653 653 return repo.get_description()
654 654
655 655 @reraise_safe_exceptions
656 656 def get_missing_revs(self, wire, rev1, rev2, path2):
657 657 repo = self._factory.repo(wire)
658 658 LocalGitClient(thin_packs=False).fetch(path2, repo)
659 659
660 660 wire_remote = wire.copy()
661 661 wire_remote['path'] = path2
662 662 repo_remote = self._factory.repo(wire_remote)
663 663 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
664 664
665 665 revs = [
666 666 x.commit.id
667 667 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
668 668 return revs
669 669
670 670 @reraise_safe_exceptions
671 671 def get_object(self, wire, sha):
672 672 cache_on, context_uid, repo_id = self._cache_on(wire)
673 673 @self.region.conditional_cache_on_arguments(condition=cache_on)
674 674 def _get_object(_context_uid, _repo_id, _sha):
675 675 repo_init = self._factory.repo_libgit2(wire)
676 676 with repo_init as repo:
677 677
678 678 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
679 679 try:
680 680 commit = repo.revparse_single(sha)
681 681 except (KeyError, ValueError) as e:
682 682 raise exceptions.LookupException(e)(missing_commit_err)
683 683
684 684 is_tag = False
685 685 if isinstance(commit, pygit2.Tag):
686 686 commit = repo.get(commit.target)
687 687 is_tag = True
688 688
689 689 if not is_tag:
690 690 # check for dangling commit
691 691 branches = [x for x in repo.branches.with_commit(commit.hex)]
692 692 if not branches:
693 693 raise exceptions.LookupException(None)(missing_commit_err)
694 694
695 695 commit_id = commit.hex
696 696 type_id = commit.type
697 697
698 698 return {
699 699 'id': commit_id,
700 700 'type': self._type_id_to_name(type_id),
701 701 'commit_id': commit_id,
702 702 'idx': 0
703 703 }
704 704
705 705 return _get_object(context_uid, repo_id, sha)
706 706
707 707 @reraise_safe_exceptions
708 708 def get_refs(self, wire):
709 709 cache_on, context_uid, repo_id = self._cache_on(wire)
710 710 @self.region.conditional_cache_on_arguments(condition=cache_on)
711 711 def _get_refs(_context_uid, _repo_id):
712 712
713 713 repo_init = self._factory.repo_libgit2(wire)
714 714 with repo_init as repo:
715 715 regex = re.compile('^refs/(heads|tags)/')
716 716 return {x.name: x.target.hex for x in
717 717 filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
718 718
719 719 return _get_refs(context_uid, repo_id)
720 720
721 721 @reraise_safe_exceptions
722 722 def get_branch_pointers(self, wire):
723 723 cache_on, context_uid, repo_id = self._cache_on(wire)
724 724 @self.region.conditional_cache_on_arguments(condition=cache_on)
725 725 def _get_branch_pointers(_context_uid, _repo_id):
726 726
727 727 repo_init = self._factory.repo_libgit2(wire)
728 728 regex = re.compile('^refs/heads')
729 729 with repo_init as repo:
730 730 branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
731 731 return {x.target.hex: x.shorthand for x in branches}
732 732
733 733 return _get_branch_pointers(context_uid, repo_id)
734 734
735 735 @reraise_safe_exceptions
736 736 def head(self, wire, show_exc=True):
737 737 cache_on, context_uid, repo_id = self._cache_on(wire)
738 738 @self.region.conditional_cache_on_arguments(condition=cache_on)
739 739 def _head(_context_uid, _repo_id, _show_exc):
740 740 repo_init = self._factory.repo_libgit2(wire)
741 741 with repo_init as repo:
742 742 try:
743 743 return repo.head.peel().hex
744 744 except Exception:
745 745 if show_exc:
746 746 raise
747 747 return _head(context_uid, repo_id, show_exc)
748 748
749 749 @reraise_safe_exceptions
750 750 def init(self, wire):
751 751 repo_path = str_to_dulwich(wire['path'])
752 752 self.repo = Repo.init(repo_path)
753 753
754 754 @reraise_safe_exceptions
755 755 def init_bare(self, wire):
756 756 repo_path = str_to_dulwich(wire['path'])
757 757 self.repo = Repo.init_bare(repo_path)
758 758
759 759 @reraise_safe_exceptions
760 760 def revision(self, wire, rev):
761 761
762 762 cache_on, context_uid, repo_id = self._cache_on(wire)
763 763 @self.region.conditional_cache_on_arguments(condition=cache_on)
764 764 def _revision(_context_uid, _repo_id, _rev):
765 765 repo_init = self._factory.repo_libgit2(wire)
766 766 with repo_init as repo:
767 767 commit = repo[rev]
768 768 obj_data = {
769 769 'id': commit.id.hex,
770 770 }
771 771 # tree objects itself don't have tree_id attribute
772 772 if hasattr(commit, 'tree_id'):
773 773 obj_data['tree'] = commit.tree_id.hex
774 774
775 775 return obj_data
776 776 return _revision(context_uid, repo_id, rev)
777 777
778 778 @reraise_safe_exceptions
779 779 def date(self, wire, commit_id):
780 780 cache_on, context_uid, repo_id = self._cache_on(wire)
781 781 @self.region.conditional_cache_on_arguments(condition=cache_on)
782 782 def _date(_repo_id, _commit_id):
783 783 repo_init = self._factory.repo_libgit2(wire)
784 784 with repo_init as repo:
785 785 commit = repo[commit_id]
786
787 if hasattr(commit, 'commit_time'):
788 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
789 else:
790 commit = commit.get_object()
791 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
792
786 793 # TODO(marcink): check dulwich difference of offset vs timezone
787 return [commit.commit_time, commit.commit_time_offset]
794 return [commit_time, commit_time_offset]
788 795 return _date(repo_id, commit_id)
789 796
790 797 @reraise_safe_exceptions
791 798 def author(self, wire, commit_id):
792 799 cache_on, context_uid, repo_id = self._cache_on(wire)
793 800 @self.region.conditional_cache_on_arguments(condition=cache_on)
794 801 def _author(_repo_id, _commit_id):
795 802 repo_init = self._factory.repo_libgit2(wire)
796 803 with repo_init as repo:
797 804 commit = repo[commit_id]
798 if commit.author.email:
799 return u"{} <{}>".format(commit.author.name, commit.author.email)
800 805
801 return u"{}".format(commit.author.raw_name)
806 if hasattr(commit, 'author'):
807 author = commit.author
808 else:
809 author = commit.get_object().author
810
811 if author.email:
812 return u"{} <{}>".format(author.name, author.email)
813
814 return u"{}".format(author.raw_name)
802 815 return _author(repo_id, commit_id)
803 816
804 817 @reraise_safe_exceptions
805 818 def message(self, wire, commit_id):
806 819 cache_on, context_uid, repo_id = self._cache_on(wire)
807 820 @self.region.conditional_cache_on_arguments(condition=cache_on)
808 821 def _message(_repo_id, _commit_id):
809 822 repo_init = self._factory.repo_libgit2(wire)
810 823 with repo_init as repo:
811 824 commit = repo[commit_id]
812 825 return commit.message
813 826 return _message(repo_id, commit_id)
814 827
815 828 @reraise_safe_exceptions
816 829 def parents(self, wire, commit_id):
817 830 cache_on, context_uid, repo_id = self._cache_on(wire)
818 831 @self.region.conditional_cache_on_arguments(condition=cache_on)
819 832 def _parents(_repo_id, _commit_id):
820 833 repo_init = self._factory.repo_libgit2(wire)
821 834 with repo_init as repo:
822 835 commit = repo[commit_id]
823 return [x.hex for x in commit.parent_ids]
836 if hasattr(commit, 'parent_ids'):
837 parent_ids = commit.parent_ids
838 else:
839 parent_ids = commit.get_object().parent_ids
840
841 return [x.hex for x in parent_ids]
824 842 return _parents(repo_id, commit_id)
825 843
826 844 @reraise_safe_exceptions
827 845 def children(self, wire, commit_id):
828 846 cache_on, context_uid, repo_id = self._cache_on(wire)
829 847 @self.region.conditional_cache_on_arguments(condition=cache_on)
830 848 def _children(_repo_id, _commit_id):
831 849 output, __ = self.run_git_command(
832 850 wire, ['rev-list', '--all', '--children'])
833 851
834 852 child_ids = []
835 853 pat = re.compile(r'^%s' % commit_id)
836 854 for l in output.splitlines():
837 855 if pat.match(l):
838 856 found_ids = l.split(' ')[1:]
839 857 child_ids.extend(found_ids)
840 858
841 859 return child_ids
842 860 return _children(repo_id, commit_id)
843 861
844 862 @reraise_safe_exceptions
845 863 def set_refs(self, wire, key, value):
846 864 repo_init = self._factory.repo_libgit2(wire)
847 865 with repo_init as repo:
848 866 repo.references.create(key, value, force=True)
849 867
850 868 @reraise_safe_exceptions
851 869 def create_branch(self, wire, branch_name, commit_id, force=False):
852 870 repo_init = self._factory.repo_libgit2(wire)
853 871 with repo_init as repo:
854 872 commit = repo[commit_id]
855 873
856 874 if force:
857 875 repo.branches.local.create(branch_name, commit, force=force)
858 876 elif not repo.branches.get(branch_name):
859 877 # create only if that branch isn't existing
860 878 repo.branches.local.create(branch_name, commit, force=force)
861 879
862 880 @reraise_safe_exceptions
863 881 def remove_ref(self, wire, key):
864 882 repo_init = self._factory.repo_libgit2(wire)
865 883 with repo_init as repo:
866 884 repo.references.delete(key)
867 885
868 886 @reraise_safe_exceptions
869 887 def tag_remove(self, wire, tag_name):
870 888 repo_init = self._factory.repo_libgit2(wire)
871 889 with repo_init as repo:
872 890 key = 'refs/tags/{}'.format(tag_name)
873 891 repo.references.delete(key)
874 892
875 893 @reraise_safe_exceptions
876 894 def tree_changes(self, wire, source_id, target_id):
877 895 # TODO(marcink): remove this seems it's only used by tests
878 896 repo = self._factory.repo(wire)
879 897 source = repo[source_id].tree if source_id else None
880 898 target = repo[target_id].tree
881 899 result = repo.object_store.tree_changes(source, target)
882 900 return list(result)
883 901
884 902 @reraise_safe_exceptions
885 903 def tree_and_type_for_path(self, wire, commit_id, path):
886 904
887 905 cache_on, context_uid, repo_id = self._cache_on(wire)
888 906 @self.region.conditional_cache_on_arguments(condition=cache_on)
889 907 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
890 908 repo_init = self._factory.repo_libgit2(wire)
891 909
892 910 with repo_init as repo:
893 911 commit = repo[commit_id]
894 912 try:
895 913 tree = commit.tree[path]
896 914 except KeyError:
897 915 return None, None, None
898 916
899 917 return tree.id.hex, tree.type, tree.filemode
900 918 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
901 919
902 920 @reraise_safe_exceptions
903 921 def tree_items(self, wire, tree_id):
904 922 cache_on, context_uid, repo_id = self._cache_on(wire)
905 923 @self.region.conditional_cache_on_arguments(condition=cache_on)
906 924 def _tree_items(_repo_id, _tree_id):
907 925
908 926 repo_init = self._factory.repo_libgit2(wire)
909 927 with repo_init as repo:
910 928 try:
911 929 tree = repo[tree_id]
912 930 except KeyError:
913 931 raise ObjectMissing('No tree with id: {}'.format(tree_id))
914 932
915 933 result = []
916 934 for item in tree:
917 935 item_sha = item.hex
918 936 item_mode = item.filemode
919 937 item_type = item.type
920 938
921 939 if item_type == 'commit':
922 940 # NOTE(marcink): submodules we translate to 'link' for backward compat
923 941 item_type = 'link'
924 942
925 943 result.append((item.name, item_mode, item_sha, item_type))
926 944 return result
927 945 return _tree_items(repo_id, tree_id)
928 946
929 947 @reraise_safe_exceptions
930 948 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
931 949 """
932 950 Old version that uses subprocess to call diff
933 951 """
934 952
935 953 flags = [
936 954 '-U%s' % context, '--patch',
937 955 '--binary',
938 956 '--find-renames',
939 957 '--no-indent-heuristic',
940 958 # '--indent-heuristic',
941 959 #'--full-index',
942 960 #'--abbrev=40'
943 961 ]
944 962
945 963 if opt_ignorews:
946 964 flags.append('--ignore-all-space')
947 965
948 966 if commit_id_1 == self.EMPTY_COMMIT:
949 967 cmd = ['show'] + flags + [commit_id_2]
950 968 else:
951 969 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
952 970
953 971 if file_filter:
954 972 cmd.extend(['--', file_filter])
955 973
956 974 diff, __ = self.run_git_command(wire, cmd)
957 975 # If we used 'show' command, strip first few lines (until actual diff
958 976 # starts)
959 977 if commit_id_1 == self.EMPTY_COMMIT:
960 978 lines = diff.splitlines()
961 979 x = 0
962 980 for line in lines:
963 981 if line.startswith('diff'):
964 982 break
965 983 x += 1
966 984 # Append new line just like 'diff' command do
967 985 diff = '\n'.join(lines[x:]) + '\n'
968 986 return diff
969 987
970 988 @reraise_safe_exceptions
971 989 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
972 990 repo_init = self._factory.repo_libgit2(wire)
973 991 with repo_init as repo:
974 992 swap = True
975 993 flags = 0
976 994 flags |= pygit2.GIT_DIFF_SHOW_BINARY
977 995
978 996 if opt_ignorews:
979 997 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
980 998
981 999 if commit_id_1 == self.EMPTY_COMMIT:
982 1000 comm1 = repo[commit_id_2]
983 1001 diff_obj = comm1.tree.diff_to_tree(
984 1002 flags=flags, context_lines=context, swap=swap)
985 1003
986 1004 else:
987 1005 comm1 = repo[commit_id_2]
988 1006 comm2 = repo[commit_id_1]
989 1007 diff_obj = comm1.tree.diff_to_tree(
990 1008 comm2.tree, flags=flags, context_lines=context, swap=swap)
991 1009 similar_flags = 0
992 1010 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
993 1011 diff_obj.find_similar(flags=similar_flags)
994 1012
995 1013 if file_filter:
996 1014 for p in diff_obj:
997 1015 if p.delta.old_file.path == file_filter:
998 1016 return p.patch or ''
999 1017 # fo matching path == no diff
1000 1018 return ''
1001 1019 return diff_obj.patch or ''
1002 1020
1003 1021 @reraise_safe_exceptions
1004 1022 def node_history(self, wire, commit_id, path, limit):
1005 1023 cache_on, context_uid, repo_id = self._cache_on(wire)
1006 1024 @self.region.conditional_cache_on_arguments(condition=cache_on)
1007 1025 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1008 1026 # optimize for n==1, rev-list is much faster for that use-case
1009 1027 if limit == 1:
1010 1028 cmd = ['rev-list', '-1', commit_id, '--', path]
1011 1029 else:
1012 1030 cmd = ['log']
1013 1031 if limit:
1014 1032 cmd.extend(['-n', str(safe_int(limit, 0))])
1015 1033 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1016 1034
1017 1035 output, __ = self.run_git_command(wire, cmd)
1018 1036 commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
1019 1037
1020 1038 return [x for x in commit_ids]
1021 1039 return _node_history(context_uid, repo_id, commit_id, path, limit)
1022 1040
1023 1041 @reraise_safe_exceptions
1024 1042 def node_annotate(self, wire, commit_id, path):
1025 1043
1026 1044 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1027 1045 # -l ==> outputs long shas (and we need all 40 characters)
1028 1046 # --root ==> doesn't put '^' character for boundaries
1029 1047 # -r commit_id ==> blames for the given commit
1030 1048 output, __ = self.run_git_command(wire, cmd)
1031 1049
1032 1050 result = []
1033 1051 for i, blame_line in enumerate(output.split('\n')[:-1]):
1034 1052 line_no = i + 1
1035 1053 commit_id, line = re.split(r' ', blame_line, 1)
1036 1054 result.append((line_no, commit_id, line))
1037 1055 return result
1038 1056
1039 1057 @reraise_safe_exceptions
1040 1058 def update_server_info(self, wire):
1041 1059 repo = self._factory.repo(wire)
1042 1060 update_server_info(repo)
1043 1061
1044 1062 @reraise_safe_exceptions
1045 1063 def get_all_commit_ids(self, wire):
1046 1064
1047 1065 cache_on, context_uid, repo_id = self._cache_on(wire)
1048 1066 @self.region.conditional_cache_on_arguments(condition=cache_on)
1049 1067 def _get_all_commit_ids(_context_uid, _repo_id):
1050 1068
1051 1069 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1052 1070 try:
1053 1071 output, __ = self.run_git_command(wire, cmd)
1054 1072 return output.splitlines()
1055 1073 except Exception:
1056 1074 # Can be raised for empty repositories
1057 1075 return []
1058 1076 return _get_all_commit_ids(context_uid, repo_id)
1059 1077
1060 1078 @reraise_safe_exceptions
1061 1079 def run_git_command(self, wire, cmd, **opts):
1062 1080 path = wire.get('path', None)
1063 1081
1064 1082 if path and os.path.isdir(path):
1065 1083 opts['cwd'] = path
1066 1084
1067 1085 if '_bare' in opts:
1068 1086 _copts = []
1069 1087 del opts['_bare']
1070 1088 else:
1071 1089 _copts = ['-c', 'core.quotepath=false', ]
1072 1090 safe_call = False
1073 1091 if '_safe' in opts:
1074 1092 # no exc on failure
1075 1093 del opts['_safe']
1076 1094 safe_call = True
1077 1095
1078 1096 if '_copts' in opts:
1079 1097 _copts.extend(opts['_copts'] or [])
1080 1098 del opts['_copts']
1081 1099
1082 1100 gitenv = os.environ.copy()
1083 1101 gitenv.update(opts.pop('extra_env', {}))
1084 1102 # need to clean fix GIT_DIR !
1085 1103 if 'GIT_DIR' in gitenv:
1086 1104 del gitenv['GIT_DIR']
1087 1105 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1088 1106 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1089 1107
1090 1108 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1091 1109 _opts = {'env': gitenv, 'shell': False}
1092 1110
1093 1111 try:
1094 1112 _opts.update(opts)
1095 1113 p = subprocessio.SubprocessIOChunker(cmd, **_opts)
1096 1114
1097 1115 return ''.join(p), ''.join(p.error)
1098 1116 except (EnvironmentError, OSError) as err:
1099 1117 cmd = ' '.join(cmd) # human friendly CMD
1100 1118 tb_err = ("Couldn't run git command (%s).\n"
1101 1119 "Original error was:%s\n"
1102 1120 "Call options:%s\n"
1103 1121 % (cmd, err, _opts))
1104 1122 log.exception(tb_err)
1105 1123 if safe_call:
1106 1124 return '', err
1107 1125 else:
1108 1126 raise exceptions.VcsException()(tb_err)
1109 1127
1110 1128 @reraise_safe_exceptions
1111 1129 def install_hooks(self, wire, force=False):
1112 1130 from vcsserver.hook_utils import install_git_hooks
1113 1131 bare = self.bare(wire)
1114 1132 path = wire['path']
1115 1133 return install_git_hooks(path, bare, force_create=force)
1116 1134
1117 1135 @reraise_safe_exceptions
1118 1136 def get_hooks_info(self, wire):
1119 1137 from vcsserver.hook_utils import (
1120 1138 get_git_pre_hook_version, get_git_post_hook_version)
1121 1139 bare = self.bare(wire)
1122 1140 path = wire['path']
1123 1141 return {
1124 1142 'pre_version': get_git_pre_hook_version(path, bare),
1125 1143 'post_version': get_git_post_hook_version(path, bare),
1126 1144 }
General Comments 0
You need to be logged in to leave comments. Login now