##// END OF EJS Templates
git: limit sync-fetch to tags/ and heads/ with default execution arguments
marcink -
r766:7807f9db default
parent child Browse files
Show More
@@ -1,1144 +1,1150 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2019 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 25 import urllib
26 26 import urllib2
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from dulwich import index, objects
33 33 from dulwich.client import HttpGitClient, LocalGitClient
34 34 from dulwich.errors import (
35 35 NotGitRepository, ChecksumMismatch, WrongObjectException,
36 36 MissingCommitError, ObjectMissing, HangupException,
37 37 UnexpectedCommandError)
38 38 from dulwich.repo import Repo as DulwichRepo
39 39 from dulwich.server import update_server_info
40 40
41 41 from vcsserver import exceptions, settings, subprocessio
42 42 from vcsserver.utils import safe_str, safe_int
43 43 from vcsserver.base import RepoFactory, obfuscate_qs
44 44 from vcsserver.hgcompat import (
45 45 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
46 46 from vcsserver.git_lfs.lib import LFSOidStore
47 47 from vcsserver.vcs_base import RemoteBase
48 48
49 49 DIR_STAT = stat.S_IFDIR
50 50 FILE_MODE = stat.S_IFMT
51 51 GIT_LINK = objects.S_IFGITLINK
52 52 PEELED_REF_MARKER = '^{}'
53 53
54 54
55 55 log = logging.getLogger(__name__)
56 56
57 57
58 58 def str_to_dulwich(value):
59 59 """
60 60 Dulwich 0.10.1a requires `unicode` objects to be passed in.
61 61 """
62 62 return value.decode(settings.WIRE_ENCODING)
63 63
64 64
65 65 def reraise_safe_exceptions(func):
66 66 """Converts Dulwich exceptions to something neutral."""
67 67
68 68 @wraps(func)
69 69 def wrapper(*args, **kwargs):
70 70 try:
71 71 return func(*args, **kwargs)
72 72 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
73 73 exc = exceptions.LookupException(org_exc=e)
74 74 raise exc(safe_str(e))
75 75 except (HangupException, UnexpectedCommandError) as e:
76 76 exc = exceptions.VcsException(org_exc=e)
77 77 raise exc(safe_str(e))
78 78 except Exception as e:
79 79 # NOTE(marcink): becuase of how dulwich handles some exceptions
80 80 # (KeyError on empty repos), we cannot track this and catch all
81 81 # exceptions, it's an exceptions from other handlers
82 82 #if not hasattr(e, '_vcs_kind'):
83 83 #log.exception("Unhandled exception in git remote call")
84 84 #raise_from_original(exceptions.UnhandledException)
85 85 raise
86 86 return wrapper
87 87
88 88
89 89 class Repo(DulwichRepo):
90 90 """
91 91 A wrapper for dulwich Repo class.
92 92
93 93 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
94 94 "Too many open files" error. We need to close all opened file descriptors
95 95 once the repo object is destroyed.
96 96 """
97 97 def __del__(self):
98 98 if hasattr(self, 'object_store'):
99 99 self.close()
100 100
101 101
102 102 class Repository(LibGit2Repo):
103 103
104 104 def __enter__(self):
105 105 return self
106 106
107 107 def __exit__(self, exc_type, exc_val, exc_tb):
108 108 self.free()
109 109
110 110
111 111 class GitFactory(RepoFactory):
112 112 repo_type = 'git'
113 113
114 114 def _create_repo(self, wire, create, use_libgit2=False):
115 115 if use_libgit2:
116 116 return Repository(wire['path'])
117 117 else:
118 118 repo_path = str_to_dulwich(wire['path'])
119 119 return Repo(repo_path)
120 120
121 121 def repo(self, wire, create=False, use_libgit2=False):
122 122 """
123 123 Get a repository instance for the given path.
124 124 """
125 125 return self._create_repo(wire, create, use_libgit2)
126 126
127 127 def repo_libgit2(self, wire):
128 128 return self.repo(wire, use_libgit2=True)
129 129
130 130
131 131 class GitRemote(RemoteBase):
132 132
133 133 def __init__(self, factory):
134 134 self._factory = factory
135 135 self._bulk_methods = {
136 136 "date": self.date,
137 137 "author": self.author,
138 138 "branch": self.branch,
139 139 "message": self.message,
140 140 "parents": self.parents,
141 141 "_commit": self.revision,
142 142 }
143 143
144 144 def _wire_to_config(self, wire):
145 145 if 'config' in wire:
146 146 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
147 147 return {}
148 148
149 149 def _remote_conf(self, config):
150 150 params = [
151 151 '-c', 'core.askpass=""',
152 152 ]
153 153 ssl_cert_dir = config.get('vcs_ssl_dir')
154 154 if ssl_cert_dir:
155 155 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
156 156 return params
157 157
158 158 @reraise_safe_exceptions
159 159 def discover_git_version(self):
160 160 stdout, _ = self.run_git_command(
161 161 {}, ['--version'], _bare=True, _safe=True)
162 162 prefix = 'git version'
163 163 if stdout.startswith(prefix):
164 164 stdout = stdout[len(prefix):]
165 165 return stdout.strip()
166 166
167 167 @reraise_safe_exceptions
168 168 def is_empty(self, wire):
169 169 repo_init = self._factory.repo_libgit2(wire)
170 170 with repo_init as repo:
171 171
172 172 try:
173 173 has_head = repo.head.name
174 174 if has_head:
175 175 return False
176 176
177 177 # NOTE(marcink): check again using more expensive method
178 178 return repo.is_empty
179 179 except Exception:
180 180 pass
181 181
182 182 return True
183 183
184 184 @reraise_safe_exceptions
185 185 def assert_correct_path(self, wire):
186 186 cache_on, context_uid, repo_id = self._cache_on(wire)
187 187 @self.region.conditional_cache_on_arguments(condition=cache_on)
188 188 def _assert_correct_path(_context_uid, _repo_id):
189 189 try:
190 190 repo_init = self._factory.repo_libgit2(wire)
191 191 with repo_init as repo:
192 192 pass
193 193 except pygit2.GitError:
194 194 path = wire.get('path')
195 195 tb = traceback.format_exc()
196 196 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
197 197 return False
198 198
199 199 return True
200 200 return _assert_correct_path(context_uid, repo_id)
201 201
202 202 @reraise_safe_exceptions
203 203 def bare(self, wire):
204 204 repo_init = self._factory.repo_libgit2(wire)
205 205 with repo_init as repo:
206 206 return repo.is_bare
207 207
208 208 @reraise_safe_exceptions
209 209 def blob_as_pretty_string(self, wire, sha):
210 210 repo_init = self._factory.repo_libgit2(wire)
211 211 with repo_init as repo:
212 212 blob_obj = repo[sha]
213 213 blob = blob_obj.data
214 214 return blob
215 215
216 216 @reraise_safe_exceptions
217 217 def blob_raw_length(self, wire, sha):
218 218 cache_on, context_uid, repo_id = self._cache_on(wire)
219 219 @self.region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _blob_raw_length(_repo_id, _sha):
221 221
222 222 repo_init = self._factory.repo_libgit2(wire)
223 223 with repo_init as repo:
224 224 blob = repo[sha]
225 225 return blob.size
226 226
227 227 return _blob_raw_length(repo_id, sha)
228 228
229 229 def _parse_lfs_pointer(self, raw_content):
230 230
231 231 spec_string = 'version https://git-lfs.github.com/spec'
232 232 if raw_content and raw_content.startswith(spec_string):
233 233 pattern = re.compile(r"""
234 234 (?:\n)?
235 235 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
236 236 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
237 237 ^size[ ](?P<oid_size>[0-9]+)\n
238 238 (?:\n)?
239 239 """, re.VERBOSE | re.MULTILINE)
240 240 match = pattern.match(raw_content)
241 241 if match:
242 242 return match.groupdict()
243 243
244 244 return {}
245 245
246 246 @reraise_safe_exceptions
247 247 def is_large_file(self, wire, commit_id):
248 248
249 249 cache_on, context_uid, repo_id = self._cache_on(wire)
250 250 @self.region.conditional_cache_on_arguments(condition=cache_on)
251 251 def _is_large_file(_repo_id, _sha):
252 252 repo_init = self._factory.repo_libgit2(wire)
253 253 with repo_init as repo:
254 254 blob = repo[commit_id]
255 255 if blob.is_binary:
256 256 return {}
257 257
258 258 return self._parse_lfs_pointer(blob.data)
259 259
260 260 return _is_large_file(repo_id, commit_id)
261 261
262 262 @reraise_safe_exceptions
263 263 def in_largefiles_store(self, wire, oid):
264 264 conf = self._wire_to_config(wire)
265 265 repo_init = self._factory.repo_libgit2(wire)
266 266 with repo_init as repo:
267 267 repo_name = repo.path
268 268
269 269 store_location = conf.get('vcs_git_lfs_store_location')
270 270 if store_location:
271 271
272 272 store = LFSOidStore(
273 273 oid=oid, repo=repo_name, store_location=store_location)
274 274 return store.has_oid()
275 275
276 276 return False
277 277
278 278 @reraise_safe_exceptions
279 279 def store_path(self, wire, oid):
280 280 conf = self._wire_to_config(wire)
281 281 repo_init = self._factory.repo_libgit2(wire)
282 282 with repo_init as repo:
283 283 repo_name = repo.path
284 284
285 285 store_location = conf.get('vcs_git_lfs_store_location')
286 286 if store_location:
287 287 store = LFSOidStore(
288 288 oid=oid, repo=repo_name, store_location=store_location)
289 289 return store.oid_path
290 290 raise ValueError('Unable to fetch oid with path {}'.format(oid))
291 291
292 292 @reraise_safe_exceptions
293 293 def bulk_request(self, wire, rev, pre_load):
294 294 cache_on, context_uid, repo_id = self._cache_on(wire)
295 295 @self.region.conditional_cache_on_arguments(condition=cache_on)
296 296 def _bulk_request(_repo_id, _rev, _pre_load):
297 297 result = {}
298 298 for attr in pre_load:
299 299 try:
300 300 method = self._bulk_methods[attr]
301 301 args = [wire, rev]
302 302 result[attr] = method(*args)
303 303 except KeyError as e:
304 304 raise exceptions.VcsException(e)(
305 305 "Unknown bulk attribute: %s" % attr)
306 306 return result
307 307
308 308 return _bulk_request(repo_id, rev, sorted(pre_load))
309 309
310 310 def _build_opener(self, url):
311 311 handlers = []
312 312 url_obj = url_parser(url)
313 313 _, authinfo = url_obj.authinfo()
314 314
315 315 if authinfo:
316 316 # create a password manager
317 317 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
318 318 passmgr.add_password(*authinfo)
319 319
320 320 handlers.extend((httpbasicauthhandler(passmgr),
321 321 httpdigestauthhandler(passmgr)))
322 322
323 323 return urllib2.build_opener(*handlers)
324 324
325 325 def _type_id_to_name(self, type_id):
326 326 return {
327 327 1: b'commit',
328 328 2: b'tree',
329 329 3: b'blob',
330 330 4: b'tag'
331 331 }[type_id]
332 332
333 333 @reraise_safe_exceptions
334 334 def check_url(self, url, config):
335 335 url_obj = url_parser(url)
336 336 test_uri, _ = url_obj.authinfo()
337 337 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
338 338 url_obj.query = obfuscate_qs(url_obj.query)
339 339 cleaned_uri = str(url_obj)
340 340 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
341 341
342 342 if not test_uri.endswith('info/refs'):
343 343 test_uri = test_uri.rstrip('/') + '/info/refs'
344 344
345 345 o = self._build_opener(url)
346 346 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
347 347
348 348 q = {"service": 'git-upload-pack'}
349 349 qs = '?%s' % urllib.urlencode(q)
350 350 cu = "%s%s" % (test_uri, qs)
351 351 req = urllib2.Request(cu, None, {})
352 352
353 353 try:
354 354 log.debug("Trying to open URL %s", cleaned_uri)
355 355 resp = o.open(req)
356 356 if resp.code != 200:
357 357 raise exceptions.URLError()('Return Code is not 200')
358 358 except Exception as e:
359 359 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
360 360 # means it cannot be cloned
361 361 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
362 362
363 363 # now detect if it's proper git repo
364 364 gitdata = resp.read()
365 365 if 'service=git-upload-pack' in gitdata:
366 366 pass
367 367 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
368 368 # old style git can return some other format !
369 369 pass
370 370 else:
371 371 raise exceptions.URLError()(
372 372 "url [%s] does not look like an git" % (cleaned_uri,))
373 373
374 374 return True
375 375
376 376 @reraise_safe_exceptions
377 377 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
378 378 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
379 379 remote_refs = self.pull(wire, url, apply_refs=False)
380 380 repo = self._factory.repo(wire)
381 381 if isinstance(valid_refs, list):
382 382 valid_refs = tuple(valid_refs)
383 383
384 384 for k in remote_refs:
385 385 # only parse heads/tags and skip so called deferred tags
386 386 if k.startswith(valid_refs) and not k.endswith(deferred):
387 387 repo[k] = remote_refs[k]
388 388
389 389 if update_after_clone:
390 390 # we want to checkout HEAD
391 391 repo["HEAD"] = remote_refs["HEAD"]
392 392 index.build_index_from_tree(repo.path, repo.index_path(),
393 393 repo.object_store, repo["HEAD"].tree)
394 394
395 395 @reraise_safe_exceptions
396 396 def branch(self, wire, commit_id):
397 397 cache_on, context_uid, repo_id = self._cache_on(wire)
398 398 @self.region.conditional_cache_on_arguments(condition=cache_on)
399 399 def _branch(_context_uid, _repo_id, _commit_id):
400 400 regex = re.compile('^refs/heads')
401 401
402 402 def filter_with(ref):
403 403 return regex.match(ref[0]) and ref[1] == _commit_id
404 404
405 405 branches = filter(filter_with, self.get_refs(wire).items())
406 406 return [x[0].split('refs/heads/')[-1] for x in branches]
407 407
408 408 return _branch(context_uid, repo_id, commit_id)
409 409
410 410 @reraise_safe_exceptions
411 411 def commit_branches(self, wire, commit_id):
412 412 cache_on, context_uid, repo_id = self._cache_on(wire)
413 413 @self.region.conditional_cache_on_arguments(condition=cache_on)
414 414 def _commit_branches(_context_uid, _repo_id, _commit_id):
415 415 repo_init = self._factory.repo_libgit2(wire)
416 416 with repo_init as repo:
417 417 branches = [x for x in repo.branches.with_commit(_commit_id)]
418 418 return branches
419 419
420 420 return _commit_branches(context_uid, repo_id, commit_id)
421 421
422 422 @reraise_safe_exceptions
423 423 def add_object(self, wire, content):
424 424 repo_init = self._factory.repo_libgit2(wire)
425 425 with repo_init as repo:
426 426 blob = objects.Blob()
427 427 blob.set_raw_string(content)
428 428 repo.object_store.add_object(blob)
429 429 return blob.id
430 430
431 431 # TODO: this is quite complex, check if that can be simplified
432 432 @reraise_safe_exceptions
433 433 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
434 434 repo = self._factory.repo(wire)
435 435 object_store = repo.object_store
436 436
437 437 # Create tree and populates it with blobs
438 438 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
439 439
440 440 for node in updated:
441 441 # Compute subdirs if needed
442 442 dirpath, nodename = vcspath.split(node['path'])
443 443 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
444 444 parent = commit_tree
445 445 ancestors = [('', parent)]
446 446
447 447 # Tries to dig for the deepest existing tree
448 448 while dirnames:
449 449 curdir = dirnames.pop(0)
450 450 try:
451 451 dir_id = parent[curdir][1]
452 452 except KeyError:
453 453 # put curdir back into dirnames and stops
454 454 dirnames.insert(0, curdir)
455 455 break
456 456 else:
457 457 # If found, updates parent
458 458 parent = repo[dir_id]
459 459 ancestors.append((curdir, parent))
460 460 # Now parent is deepest existing tree and we need to create
461 461 # subtrees for dirnames (in reverse order)
462 462 # [this only applies for nodes from added]
463 463 new_trees = []
464 464
465 465 blob = objects.Blob.from_string(node['content'])
466 466
467 467 if dirnames:
468 468 # If there are trees which should be created we need to build
469 469 # them now (in reverse order)
470 470 reversed_dirnames = list(reversed(dirnames))
471 471 curtree = objects.Tree()
472 472 curtree[node['node_path']] = node['mode'], blob.id
473 473 new_trees.append(curtree)
474 474 for dirname in reversed_dirnames[:-1]:
475 475 newtree = objects.Tree()
476 476 newtree[dirname] = (DIR_STAT, curtree.id)
477 477 new_trees.append(newtree)
478 478 curtree = newtree
479 479 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
480 480 else:
481 481 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
482 482
483 483 new_trees.append(parent)
484 484 # Update ancestors
485 485 reversed_ancestors = reversed(
486 486 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
487 487 for parent, tree, path in reversed_ancestors:
488 488 parent[path] = (DIR_STAT, tree.id)
489 489 object_store.add_object(tree)
490 490
491 491 object_store.add_object(blob)
492 492 for tree in new_trees:
493 493 object_store.add_object(tree)
494 494
495 495 for node_path in removed:
496 496 paths = node_path.split('/')
497 497 tree = commit_tree
498 498 trees = [tree]
499 499 # Traverse deep into the forest...
500 500 for path in paths:
501 501 try:
502 502 obj = repo[tree[path][1]]
503 503 if isinstance(obj, objects.Tree):
504 504 trees.append(obj)
505 505 tree = obj
506 506 except KeyError:
507 507 break
508 508 # Cut down the blob and all rotten trees on the way back...
509 509 for path, tree in reversed(zip(paths, trees)):
510 510 del tree[path]
511 511 if tree:
512 512 # This tree still has elements - don't remove it or any
513 513 # of it's parents
514 514 break
515 515
516 516 object_store.add_object(commit_tree)
517 517
518 518 # Create commit
519 519 commit = objects.Commit()
520 520 commit.tree = commit_tree.id
521 521 for k, v in commit_data.iteritems():
522 522 setattr(commit, k, v)
523 523 object_store.add_object(commit)
524 524
525 525 self.create_branch(wire, branch, commit.id)
526 526
527 527 # dulwich set-ref
528 528 ref = 'refs/heads/%s' % branch
529 529 repo.refs[ref] = commit.id
530 530
531 531 return commit.id
532 532
533 533 @reraise_safe_exceptions
534 534 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
535 535 if url != 'default' and '://' not in url:
536 536 client = LocalGitClient(url)
537 537 else:
538 538 url_obj = url_parser(url)
539 539 o = self._build_opener(url)
540 540 url, _ = url_obj.authinfo()
541 541 client = HttpGitClient(base_url=url, opener=o)
542 542 repo = self._factory.repo(wire)
543 543
544 544 determine_wants = repo.object_store.determine_wants_all
545 545 if refs:
546 546 def determine_wants_requested(references):
547 547 return [references[r] for r in references if r in refs]
548 548 determine_wants = determine_wants_requested
549 549
550 550 try:
551 551 remote_refs = client.fetch(
552 552 path=url, target=repo, determine_wants=determine_wants)
553 553 except NotGitRepository as e:
554 554 log.warning(
555 555 'Trying to fetch from "%s" failed, not a Git repository.', url)
556 556 # Exception can contain unicode which we convert
557 557 raise exceptions.AbortException(e)(repr(e))
558 558
559 559 # mikhail: client.fetch() returns all the remote refs, but fetches only
560 560 # refs filtered by `determine_wants` function. We need to filter result
561 561 # as well
562 562 if refs:
563 563 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
564 564
565 565 if apply_refs:
566 566 # TODO: johbo: Needs proper test coverage with a git repository
567 567 # that contains a tag object, so that we would end up with
568 568 # a peeled ref at this point.
569 569 for k in remote_refs:
570 570 if k.endswith(PEELED_REF_MARKER):
571 571 log.debug("Skipping peeled reference %s", k)
572 572 continue
573 573 repo[k] = remote_refs[k]
574 574
575 575 if refs and not update_after:
576 576 # mikhail: explicitly set the head to the last ref.
577 577 repo['HEAD'] = remote_refs[refs[-1]]
578 578
579 579 if update_after:
580 580 # we want to checkout HEAD
581 581 repo["HEAD"] = remote_refs["HEAD"]
582 582 index.build_index_from_tree(repo.path, repo.index_path(),
583 583 repo.object_store, repo["HEAD"].tree)
584 584 return remote_refs
585 585
586 586 @reraise_safe_exceptions
587 def sync_fetch(self, wire, url, refs=None):
587 def sync_fetch(self, wire, url, refs=None, all_refs=False):
588 588 repo = self._factory.repo(wire)
589 589 if refs and not isinstance(refs, (list, tuple)):
590 590 refs = [refs]
591
591 592 config = self._wire_to_config(wire)
592 593 # get all remote refs we'll use to fetch later
594 cmd = ['ls-remote']
595 if not all_refs:
596 cmd += ['--heads', '--tags']
597 cmd += [url]
593 598 output, __ = self.run_git_command(
594 wire, ['ls-remote', url], fail_on_stderr=False,
599 wire, cmd, fail_on_stderr=False,
595 600 _copts=self._remote_conf(config),
596 601 extra_env={'GIT_TERMINAL_PROMPT': '0'})
597 602
598 603 remote_refs = collections.OrderedDict()
599 604 fetch_refs = []
600 605
601 606 for ref_line in output.splitlines():
602 607 sha, ref = ref_line.split('\t')
603 608 sha = sha.strip()
604 609 if ref in remote_refs:
605 610 # duplicate, skip
606 611 continue
607 612 if ref.endswith(PEELED_REF_MARKER):
608 613 log.debug("Skipping peeled reference %s", ref)
609 614 continue
610 615 # don't sync HEAD
611 616 if ref in ['HEAD']:
612 617 continue
613 618
614 619 remote_refs[ref] = sha
615 620
616 621 if refs and sha in refs:
617 622 # we filter fetch using our specified refs
618 623 fetch_refs.append('{}:{}'.format(ref, ref))
619 624 elif not refs:
620 625 fetch_refs.append('{}:{}'.format(ref, ref))
621 626 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
627
622 628 if fetch_refs:
623 629 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
624 630 fetch_refs_chunks = list(chunk)
625 631 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
626 632 _out, _err = self.run_git_command(
627 633 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
628 634 fail_on_stderr=False,
629 635 _copts=self._remote_conf(config),
630 636 extra_env={'GIT_TERMINAL_PROMPT': '0'})
631 637
632 638 return remote_refs
633 639
634 640 @reraise_safe_exceptions
635 641 def sync_push(self, wire, url, refs=None):
636 642 if not self.check_url(url, wire):
637 643 return
638 644 config = self._wire_to_config(wire)
639 645 self._factory.repo(wire)
640 646 self.run_git_command(
641 647 wire, ['push', url, '--mirror'], fail_on_stderr=False,
642 648 _copts=self._remote_conf(config),
643 649 extra_env={'GIT_TERMINAL_PROMPT': '0'})
644 650
645 651 @reraise_safe_exceptions
646 652 def get_remote_refs(self, wire, url):
647 653 repo = Repo(url)
648 654 return repo.get_refs()
649 655
650 656 @reraise_safe_exceptions
651 657 def get_description(self, wire):
652 658 repo = self._factory.repo(wire)
653 659 return repo.get_description()
654 660
655 661 @reraise_safe_exceptions
656 662 def get_missing_revs(self, wire, rev1, rev2, path2):
657 663 repo = self._factory.repo(wire)
658 664 LocalGitClient(thin_packs=False).fetch(path2, repo)
659 665
660 666 wire_remote = wire.copy()
661 667 wire_remote['path'] = path2
662 668 repo_remote = self._factory.repo(wire_remote)
663 669 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
664 670
665 671 revs = [
666 672 x.commit.id
667 673 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
668 674 return revs
669 675
670 676 @reraise_safe_exceptions
671 677 def get_object(self, wire, sha):
672 678 cache_on, context_uid, repo_id = self._cache_on(wire)
673 679 @self.region.conditional_cache_on_arguments(condition=cache_on)
674 680 def _get_object(_context_uid, _repo_id, _sha):
675 681 repo_init = self._factory.repo_libgit2(wire)
676 682 with repo_init as repo:
677 683
678 684 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
679 685 try:
680 686 commit = repo.revparse_single(sha)
681 687 except (KeyError, ValueError) as e:
682 688 raise exceptions.LookupException(e)(missing_commit_err)
683 689
684 690 is_tag = False
685 691 if isinstance(commit, pygit2.Tag):
686 692 commit = repo.get(commit.target)
687 693 is_tag = True
688 694
689 695 if not is_tag:
690 696 # check for dangling commit
691 697 branches = [x for x in repo.branches.with_commit(commit.hex)]
692 698 if not branches:
693 699 raise exceptions.LookupException(None)(missing_commit_err)
694 700
695 701 commit_id = commit.hex
696 702 type_id = commit.type
697 703
698 704 return {
699 705 'id': commit_id,
700 706 'type': self._type_id_to_name(type_id),
701 707 'commit_id': commit_id,
702 708 'idx': 0
703 709 }
704 710
705 711 return _get_object(context_uid, repo_id, sha)
706 712
707 713 @reraise_safe_exceptions
708 714 def get_refs(self, wire):
709 715 cache_on, context_uid, repo_id = self._cache_on(wire)
710 716 @self.region.conditional_cache_on_arguments(condition=cache_on)
711 717 def _get_refs(_context_uid, _repo_id):
712 718
713 719 repo_init = self._factory.repo_libgit2(wire)
714 720 with repo_init as repo:
715 721 regex = re.compile('^refs/(heads|tags)/')
716 722 return {x.name: x.target.hex for x in
717 723 filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
718 724
719 725 return _get_refs(context_uid, repo_id)
720 726
721 727 @reraise_safe_exceptions
722 728 def get_branch_pointers(self, wire):
723 729 cache_on, context_uid, repo_id = self._cache_on(wire)
724 730 @self.region.conditional_cache_on_arguments(condition=cache_on)
725 731 def _get_branch_pointers(_context_uid, _repo_id):
726 732
727 733 repo_init = self._factory.repo_libgit2(wire)
728 734 regex = re.compile('^refs/heads')
729 735 with repo_init as repo:
730 736 branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
731 737 return {x.target.hex: x.shorthand for x in branches}
732 738
733 739 return _get_branch_pointers(context_uid, repo_id)
734 740
735 741 @reraise_safe_exceptions
736 742 def head(self, wire, show_exc=True):
737 743 cache_on, context_uid, repo_id = self._cache_on(wire)
738 744 @self.region.conditional_cache_on_arguments(condition=cache_on)
739 745 def _head(_context_uid, _repo_id, _show_exc):
740 746 repo_init = self._factory.repo_libgit2(wire)
741 747 with repo_init as repo:
742 748 try:
743 749 return repo.head.peel().hex
744 750 except Exception:
745 751 if show_exc:
746 752 raise
747 753 return _head(context_uid, repo_id, show_exc)
748 754
749 755 @reraise_safe_exceptions
750 756 def init(self, wire):
751 757 repo_path = str_to_dulwich(wire['path'])
752 758 self.repo = Repo.init(repo_path)
753 759
754 760 @reraise_safe_exceptions
755 761 def init_bare(self, wire):
756 762 repo_path = str_to_dulwich(wire['path'])
757 763 self.repo = Repo.init_bare(repo_path)
758 764
759 765 @reraise_safe_exceptions
760 766 def revision(self, wire, rev):
761 767
762 768 cache_on, context_uid, repo_id = self._cache_on(wire)
763 769 @self.region.conditional_cache_on_arguments(condition=cache_on)
764 770 def _revision(_context_uid, _repo_id, _rev):
765 771 repo_init = self._factory.repo_libgit2(wire)
766 772 with repo_init as repo:
767 773 commit = repo[rev]
768 774 obj_data = {
769 775 'id': commit.id.hex,
770 776 }
771 777 # tree objects itself don't have tree_id attribute
772 778 if hasattr(commit, 'tree_id'):
773 779 obj_data['tree'] = commit.tree_id.hex
774 780
775 781 return obj_data
776 782 return _revision(context_uid, repo_id, rev)
777 783
778 784 @reraise_safe_exceptions
779 785 def date(self, wire, commit_id):
780 786 cache_on, context_uid, repo_id = self._cache_on(wire)
781 787 @self.region.conditional_cache_on_arguments(condition=cache_on)
782 788 def _date(_repo_id, _commit_id):
783 789 repo_init = self._factory.repo_libgit2(wire)
784 790 with repo_init as repo:
785 791 commit = repo[commit_id]
786 792
787 793 if hasattr(commit, 'commit_time'):
788 794 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
789 795 else:
790 796 commit = commit.get_object()
791 797 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
792 798
793 799 # TODO(marcink): check dulwich difference of offset vs timezone
794 800 return [commit_time, commit_time_offset]
795 801 return _date(repo_id, commit_id)
796 802
797 803 @reraise_safe_exceptions
798 804 def author(self, wire, commit_id):
799 805 cache_on, context_uid, repo_id = self._cache_on(wire)
800 806 @self.region.conditional_cache_on_arguments(condition=cache_on)
801 807 def _author(_repo_id, _commit_id):
802 808 repo_init = self._factory.repo_libgit2(wire)
803 809 with repo_init as repo:
804 810 commit = repo[commit_id]
805 811
806 812 if hasattr(commit, 'author'):
807 813 author = commit.author
808 814 else:
809 815 author = commit.get_object().author
810 816
811 817 if author.email:
812 818 return u"{} <{}>".format(author.name, author.email)
813 819
814 820 return u"{}".format(author.raw_name)
815 821 return _author(repo_id, commit_id)
816 822
817 823 @reraise_safe_exceptions
818 824 def message(self, wire, commit_id):
819 825 cache_on, context_uid, repo_id = self._cache_on(wire)
820 826 @self.region.conditional_cache_on_arguments(condition=cache_on)
821 827 def _message(_repo_id, _commit_id):
822 828 repo_init = self._factory.repo_libgit2(wire)
823 829 with repo_init as repo:
824 830 commit = repo[commit_id]
825 831 return commit.message
826 832 return _message(repo_id, commit_id)
827 833
828 834 @reraise_safe_exceptions
829 835 def parents(self, wire, commit_id):
830 836 cache_on, context_uid, repo_id = self._cache_on(wire)
831 837 @self.region.conditional_cache_on_arguments(condition=cache_on)
832 838 def _parents(_repo_id, _commit_id):
833 839 repo_init = self._factory.repo_libgit2(wire)
834 840 with repo_init as repo:
835 841 commit = repo[commit_id]
836 842 if hasattr(commit, 'parent_ids'):
837 843 parent_ids = commit.parent_ids
838 844 else:
839 845 parent_ids = commit.get_object().parent_ids
840 846
841 847 return [x.hex for x in parent_ids]
842 848 return _parents(repo_id, commit_id)
843 849
844 850 @reraise_safe_exceptions
845 851 def children(self, wire, commit_id):
846 852 cache_on, context_uid, repo_id = self._cache_on(wire)
847 853 @self.region.conditional_cache_on_arguments(condition=cache_on)
848 854 def _children(_repo_id, _commit_id):
849 855 output, __ = self.run_git_command(
850 856 wire, ['rev-list', '--all', '--children'])
851 857
852 858 child_ids = []
853 859 pat = re.compile(r'^%s' % commit_id)
854 860 for l in output.splitlines():
855 861 if pat.match(l):
856 862 found_ids = l.split(' ')[1:]
857 863 child_ids.extend(found_ids)
858 864
859 865 return child_ids
860 866 return _children(repo_id, commit_id)
861 867
862 868 @reraise_safe_exceptions
863 869 def set_refs(self, wire, key, value):
864 870 repo_init = self._factory.repo_libgit2(wire)
865 871 with repo_init as repo:
866 872 repo.references.create(key, value, force=True)
867 873
868 874 @reraise_safe_exceptions
869 875 def create_branch(self, wire, branch_name, commit_id, force=False):
870 876 repo_init = self._factory.repo_libgit2(wire)
871 877 with repo_init as repo:
872 878 commit = repo[commit_id]
873 879
874 880 if force:
875 881 repo.branches.local.create(branch_name, commit, force=force)
876 882 elif not repo.branches.get(branch_name):
877 883 # create only if that branch isn't existing
878 884 repo.branches.local.create(branch_name, commit, force=force)
879 885
880 886 @reraise_safe_exceptions
881 887 def remove_ref(self, wire, key):
882 888 repo_init = self._factory.repo_libgit2(wire)
883 889 with repo_init as repo:
884 890 repo.references.delete(key)
885 891
886 892 @reraise_safe_exceptions
887 893 def tag_remove(self, wire, tag_name):
888 894 repo_init = self._factory.repo_libgit2(wire)
889 895 with repo_init as repo:
890 896 key = 'refs/tags/{}'.format(tag_name)
891 897 repo.references.delete(key)
892 898
893 899 @reraise_safe_exceptions
894 900 def tree_changes(self, wire, source_id, target_id):
895 901 # TODO(marcink): remove this seems it's only used by tests
896 902 repo = self._factory.repo(wire)
897 903 source = repo[source_id].tree if source_id else None
898 904 target = repo[target_id].tree
899 905 result = repo.object_store.tree_changes(source, target)
900 906 return list(result)
901 907
902 908 @reraise_safe_exceptions
903 909 def tree_and_type_for_path(self, wire, commit_id, path):
904 910
905 911 cache_on, context_uid, repo_id = self._cache_on(wire)
906 912 @self.region.conditional_cache_on_arguments(condition=cache_on)
907 913 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
908 914 repo_init = self._factory.repo_libgit2(wire)
909 915
910 916 with repo_init as repo:
911 917 commit = repo[commit_id]
912 918 try:
913 919 tree = commit.tree[path]
914 920 except KeyError:
915 921 return None, None, None
916 922
917 923 return tree.id.hex, tree.type, tree.filemode
918 924 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
919 925
920 926 @reraise_safe_exceptions
921 927 def tree_items(self, wire, tree_id):
922 928 cache_on, context_uid, repo_id = self._cache_on(wire)
923 929 @self.region.conditional_cache_on_arguments(condition=cache_on)
924 930 def _tree_items(_repo_id, _tree_id):
925 931
926 932 repo_init = self._factory.repo_libgit2(wire)
927 933 with repo_init as repo:
928 934 try:
929 935 tree = repo[tree_id]
930 936 except KeyError:
931 937 raise ObjectMissing('No tree with id: {}'.format(tree_id))
932 938
933 939 result = []
934 940 for item in tree:
935 941 item_sha = item.hex
936 942 item_mode = item.filemode
937 943 item_type = item.type
938 944
939 945 if item_type == 'commit':
940 946 # NOTE(marcink): submodules we translate to 'link' for backward compat
941 947 item_type = 'link'
942 948
943 949 result.append((item.name, item_mode, item_sha, item_type))
944 950 return result
945 951 return _tree_items(repo_id, tree_id)
946 952
947 953 @reraise_safe_exceptions
948 954 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
949 955 """
950 956 Old version that uses subprocess to call diff
951 957 """
952 958
953 959 flags = [
954 960 '-U%s' % context, '--patch',
955 961 '--binary',
956 962 '--find-renames',
957 963 '--no-indent-heuristic',
958 964 # '--indent-heuristic',
959 965 #'--full-index',
960 966 #'--abbrev=40'
961 967 ]
962 968
963 969 if opt_ignorews:
964 970 flags.append('--ignore-all-space')
965 971
966 972 if commit_id_1 == self.EMPTY_COMMIT:
967 973 cmd = ['show'] + flags + [commit_id_2]
968 974 else:
969 975 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
970 976
971 977 if file_filter:
972 978 cmd.extend(['--', file_filter])
973 979
974 980 diff, __ = self.run_git_command(wire, cmd)
975 981 # If we used 'show' command, strip first few lines (until actual diff
976 982 # starts)
977 983 if commit_id_1 == self.EMPTY_COMMIT:
978 984 lines = diff.splitlines()
979 985 x = 0
980 986 for line in lines:
981 987 if line.startswith('diff'):
982 988 break
983 989 x += 1
984 990 # Append new line just like 'diff' command do
985 991 diff = '\n'.join(lines[x:]) + '\n'
986 992 return diff
987 993
988 994 @reraise_safe_exceptions
989 995 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
990 996 repo_init = self._factory.repo_libgit2(wire)
991 997 with repo_init as repo:
992 998 swap = True
993 999 flags = 0
994 1000 flags |= pygit2.GIT_DIFF_SHOW_BINARY
995 1001
996 1002 if opt_ignorews:
997 1003 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
998 1004
999 1005 if commit_id_1 == self.EMPTY_COMMIT:
1000 1006 comm1 = repo[commit_id_2]
1001 1007 diff_obj = comm1.tree.diff_to_tree(
1002 1008 flags=flags, context_lines=context, swap=swap)
1003 1009
1004 1010 else:
1005 1011 comm1 = repo[commit_id_2]
1006 1012 comm2 = repo[commit_id_1]
1007 1013 diff_obj = comm1.tree.diff_to_tree(
1008 1014 comm2.tree, flags=flags, context_lines=context, swap=swap)
1009 1015 similar_flags = 0
1010 1016 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1011 1017 diff_obj.find_similar(flags=similar_flags)
1012 1018
1013 1019 if file_filter:
1014 1020 for p in diff_obj:
1015 1021 if p.delta.old_file.path == file_filter:
1016 1022 return p.patch or ''
1017 1023 # fo matching path == no diff
1018 1024 return ''
1019 1025 return diff_obj.patch or ''
1020 1026
1021 1027 @reraise_safe_exceptions
1022 1028 def node_history(self, wire, commit_id, path, limit):
1023 1029 cache_on, context_uid, repo_id = self._cache_on(wire)
1024 1030 @self.region.conditional_cache_on_arguments(condition=cache_on)
1025 1031 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1026 1032 # optimize for n==1, rev-list is much faster for that use-case
1027 1033 if limit == 1:
1028 1034 cmd = ['rev-list', '-1', commit_id, '--', path]
1029 1035 else:
1030 1036 cmd = ['log']
1031 1037 if limit:
1032 1038 cmd.extend(['-n', str(safe_int(limit, 0))])
1033 1039 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1034 1040
1035 1041 output, __ = self.run_git_command(wire, cmd)
1036 1042 commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
1037 1043
1038 1044 return [x for x in commit_ids]
1039 1045 return _node_history(context_uid, repo_id, commit_id, path, limit)
1040 1046
1041 1047 @reraise_safe_exceptions
1042 1048 def node_annotate(self, wire, commit_id, path):
1043 1049
1044 1050 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1045 1051 # -l ==> outputs long shas (and we need all 40 characters)
1046 1052 # --root ==> doesn't put '^' character for boundaries
1047 1053 # -r commit_id ==> blames for the given commit
1048 1054 output, __ = self.run_git_command(wire, cmd)
1049 1055
1050 1056 result = []
1051 1057 for i, blame_line in enumerate(output.split('\n')[:-1]):
1052 1058 line_no = i + 1
1053 1059 commit_id, line = re.split(r' ', blame_line, 1)
1054 1060 result.append((line_no, commit_id, line))
1055 1061 return result
1056 1062
1057 1063 @reraise_safe_exceptions
1058 1064 def update_server_info(self, wire):
1059 1065 repo = self._factory.repo(wire)
1060 1066 update_server_info(repo)
1061 1067
1062 1068 @reraise_safe_exceptions
1063 1069 def get_all_commit_ids(self, wire):
1064 1070
1065 1071 cache_on, context_uid, repo_id = self._cache_on(wire)
1066 1072 @self.region.conditional_cache_on_arguments(condition=cache_on)
1067 1073 def _get_all_commit_ids(_context_uid, _repo_id):
1068 1074
1069 1075 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1070 1076 try:
1071 1077 output, __ = self.run_git_command(wire, cmd)
1072 1078 return output.splitlines()
1073 1079 except Exception:
1074 1080 # Can be raised for empty repositories
1075 1081 return []
1076 1082 return _get_all_commit_ids(context_uid, repo_id)
1077 1083
1078 1084 @reraise_safe_exceptions
1079 1085 def run_git_command(self, wire, cmd, **opts):
1080 1086 path = wire.get('path', None)
1081 1087
1082 1088 if path and os.path.isdir(path):
1083 1089 opts['cwd'] = path
1084 1090
1085 1091 if '_bare' in opts:
1086 1092 _copts = []
1087 1093 del opts['_bare']
1088 1094 else:
1089 1095 _copts = ['-c', 'core.quotepath=false', ]
1090 1096 safe_call = False
1091 1097 if '_safe' in opts:
1092 1098 # no exc on failure
1093 1099 del opts['_safe']
1094 1100 safe_call = True
1095 1101
1096 1102 if '_copts' in opts:
1097 1103 _copts.extend(opts['_copts'] or [])
1098 1104 del opts['_copts']
1099 1105
1100 1106 gitenv = os.environ.copy()
1101 1107 gitenv.update(opts.pop('extra_env', {}))
1102 1108 # need to clean fix GIT_DIR !
1103 1109 if 'GIT_DIR' in gitenv:
1104 1110 del gitenv['GIT_DIR']
1105 1111 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1106 1112 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1107 1113
1108 1114 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1109 1115 _opts = {'env': gitenv, 'shell': False}
1110 1116
1111 1117 try:
1112 1118 _opts.update(opts)
1113 1119 p = subprocessio.SubprocessIOChunker(cmd, **_opts)
1114 1120
1115 1121 return ''.join(p), ''.join(p.error)
1116 1122 except (EnvironmentError, OSError) as err:
1117 1123 cmd = ' '.join(cmd) # human friendly CMD
1118 1124 tb_err = ("Couldn't run git command (%s).\n"
1119 1125 "Original error was:%s\n"
1120 1126 "Call options:%s\n"
1121 1127 % (cmd, err, _opts))
1122 1128 log.exception(tb_err)
1123 1129 if safe_call:
1124 1130 return '', err
1125 1131 else:
1126 1132 raise exceptions.VcsException()(tb_err)
1127 1133
1128 1134 @reraise_safe_exceptions
1129 1135 def install_hooks(self, wire, force=False):
1130 1136 from vcsserver.hook_utils import install_git_hooks
1131 1137 bare = self.bare(wire)
1132 1138 path = wire['path']
1133 1139 return install_git_hooks(path, bare, force_create=force)
1134 1140
1135 1141 @reraise_safe_exceptions
1136 1142 def get_hooks_info(self, wire):
1137 1143 from vcsserver.hook_utils import (
1138 1144 get_git_pre_hook_version, get_git_post_hook_version)
1139 1145 bare = self.bare(wire)
1140 1146 path = wire['path']
1141 1147 return {
1142 1148 'pre_version': get_git_pre_hook_version(path, bare),
1143 1149 'post_version': get_git_post_hook_version(path, bare),
1144 1150 }
General Comments 0
You need to be logged in to leave comments. Login now