##// END OF EJS Templates
git: use peeled commit hashes for get_refs so that annotated tags...
dan -
r65:0b6ca72a default
parent child Browse files
Show More
@@ -1,588 +1,575 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2016 RodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import logging
19 19 import os
20 20 import posixpath as vcspath
21 21 import re
22 22 import stat
23 23 import urllib
24 24 import urllib2
25 25 from functools import wraps
26 26
27 27 from dulwich import index, objects
28 28 from dulwich.client import HttpGitClient, LocalGitClient
29 29 from dulwich.errors import (
30 30 NotGitRepository, ChecksumMismatch, WrongObjectException,
31 31 MissingCommitError, ObjectMissing, HangupException,
32 32 UnexpectedCommandError)
33 33 from dulwich.repo import Repo as DulwichRepo, Tag
34 34 from dulwich.server import update_server_info
35 35
36 36 from vcsserver import exceptions, settings, subprocessio
37 37 from vcsserver.utils import safe_str
38 38 from vcsserver.base import RepoFactory
39 39 from vcsserver.hgcompat import (
40 40 hg_url, httpbasicauthhandler, httpdigestauthhandler)
41 41
42 42
43 43 DIR_STAT = stat.S_IFDIR
44 44 FILE_MODE = stat.S_IFMT
45 45 GIT_LINK = objects.S_IFGITLINK
46 46
47 47 log = logging.getLogger(__name__)
48 48
49 49
50 50 def reraise_safe_exceptions(func):
51 51 """Converts Dulwich exceptions to something neutral."""
52 52 @wraps(func)
53 53 def wrapper(*args, **kwargs):
54 54 try:
55 55 return func(*args, **kwargs)
56 56 except (ChecksumMismatch, WrongObjectException, MissingCommitError,
57 57 ObjectMissing) as e:
58 58 raise exceptions.LookupException(e.message)
59 59 except (HangupException, UnexpectedCommandError) as e:
60 60 raise exceptions.VcsException(e.message)
61 61 return wrapper
62 62
63 63
64 64 class Repo(DulwichRepo):
65 65 """
66 66 A wrapper for dulwich Repo class.
67 67
68 68 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
69 69 "Too many open files" error. We need to close all opened file descriptors
70 70 once the repo object is destroyed.
71 71
72 72 TODO: mikhail: please check if we need this wrapper after updating dulwich
73 73 to 0.12.0 +
74 74 """
75 75 def __del__(self):
76 76 if hasattr(self, 'object_store'):
77 77 self.close()
78 78
79 79
80 80 class GitFactory(RepoFactory):
81 81
82 82 def _create_repo(self, wire, create):
83 83 repo_path = str_to_dulwich(wire['path'])
84 84 return Repo(repo_path)
85 85
86 86
87 87 class GitRemote(object):
88 88
89 89 def __init__(self, factory):
90 90 self._factory = factory
91 91
92 92 self._bulk_methods = {
93 93 "author": self.commit_attribute,
94 94 "date": self.get_object_attrs,
95 95 "message": self.commit_attribute,
96 96 "parents": self.commit_attribute,
97 97 "_commit": self.revision,
98 98 }
99 99
100 100 def _assign_ref(self, wire, ref, commit_id):
101 101 repo = self._factory.repo(wire)
102 102 repo[ref] = commit_id
103 103
104 104 @reraise_safe_exceptions
105 105 def add_object(self, wire, content):
106 106 repo = self._factory.repo(wire)
107 107 blob = objects.Blob()
108 108 blob.set_raw_string(content)
109 109 repo.object_store.add_object(blob)
110 110 return blob.id
111 111
112 112 @reraise_safe_exceptions
113 113 def assert_correct_path(self, wire):
114 114 try:
115 115 self._factory.repo(wire)
116 116 except NotGitRepository as e:
117 117 # Exception can contain unicode which we convert
118 118 raise exceptions.AbortException(repr(e))
119 119
120 120 @reraise_safe_exceptions
121 121 def bare(self, wire):
122 122 repo = self._factory.repo(wire)
123 123 return repo.bare
124 124
125 125 @reraise_safe_exceptions
126 126 def blob_as_pretty_string(self, wire, sha):
127 127 repo = self._factory.repo(wire)
128 128 return repo[sha].as_pretty_string()
129 129
130 130 @reraise_safe_exceptions
131 131 def blob_raw_length(self, wire, sha):
132 132 repo = self._factory.repo(wire)
133 133 blob = repo[sha]
134 134 return blob.raw_length()
135 135
136 136 @reraise_safe_exceptions
137 137 def bulk_request(self, wire, rev, pre_load):
138 138 result = {}
139 139 for attr in pre_load:
140 140 try:
141 141 method = self._bulk_methods[attr]
142 142 args = [wire, rev]
143 143 if attr == "date":
144 144 args.extend(["commit_time", "commit_timezone"])
145 145 elif attr in ["author", "message", "parents"]:
146 146 args.append(attr)
147 147 result[attr] = method(*args)
148 148 except KeyError:
149 149 raise exceptions.VcsException(
150 150 "Unknown bulk attribute: %s" % attr)
151 151 return result
152 152
153 153 def _build_opener(self, url):
154 154 handlers = []
155 155 url_obj = hg_url(url)
156 156 _, authinfo = url_obj.authinfo()
157 157
158 158 if authinfo:
159 159 # create a password manager
160 160 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
161 161 passmgr.add_password(*authinfo)
162 162
163 163 handlers.extend((httpbasicauthhandler(passmgr),
164 164 httpdigestauthhandler(passmgr)))
165 165
166 166 return urllib2.build_opener(*handlers)
167 167
168 168 @reraise_safe_exceptions
169 169 def check_url(self, url, config):
170 170 url_obj = hg_url(url)
171 171 test_uri, _ = url_obj.authinfo()
172 172 url_obj.passwd = '*****'
173 173 cleaned_uri = str(url_obj)
174 174
175 175 if not test_uri.endswith('info/refs'):
176 176 test_uri = test_uri.rstrip('/') + '/info/refs'
177 177
178 178 o = self._build_opener(url)
179 179 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
180 180
181 181 q = {"service": 'git-upload-pack'}
182 182 qs = '?%s' % urllib.urlencode(q)
183 183 cu = "%s%s" % (test_uri, qs)
184 184 req = urllib2.Request(cu, None, {})
185 185
186 186 try:
187 187 resp = o.open(req)
188 188 if resp.code != 200:
189 189 raise Exception('Return Code is not 200')
190 190 except Exception as e:
191 191 # means it cannot be cloned
192 192 raise urllib2.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
193 193
194 194 # now detect if it's proper git repo
195 195 gitdata = resp.read()
196 196 if 'service=git-upload-pack' in gitdata:
197 197 pass
198 198 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
199 199 # old style git can return some other format !
200 200 pass
201 201 else:
202 202 raise urllib2.URLError(
203 203 "url [%s] does not look like an git" % (cleaned_uri,))
204 204
205 205 return True
206 206
207 207 @reraise_safe_exceptions
208 208 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
209 209 remote_refs = self.fetch(wire, url, apply_refs=False)
210 210 repo = self._factory.repo(wire)
211 211 if isinstance(valid_refs, list):
212 212 valid_refs = tuple(valid_refs)
213 213
214 214 for k in remote_refs:
215 215 # only parse heads/tags and skip so called deferred tags
216 216 if k.startswith(valid_refs) and not k.endswith(deferred):
217 217 repo[k] = remote_refs[k]
218 218
219 219 if update_after_clone:
220 220 # we want to checkout HEAD
221 221 repo["HEAD"] = remote_refs["HEAD"]
222 222 index.build_index_from_tree(repo.path, repo.index_path(),
223 223 repo.object_store, repo["HEAD"].tree)
224 224
225 225 # TODO: this is quite complex, check if that can be simplified
226 226 @reraise_safe_exceptions
227 227 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
228 228 repo = self._factory.repo(wire)
229 229 object_store = repo.object_store
230 230
231 231 # Create tree and populates it with blobs
232 232 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
233 233
234 234 for node in updated:
235 235 # Compute subdirs if needed
236 236 dirpath, nodename = vcspath.split(node['path'])
237 237 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
238 238 parent = commit_tree
239 239 ancestors = [('', parent)]
240 240
241 241 # Tries to dig for the deepest existing tree
242 242 while dirnames:
243 243 curdir = dirnames.pop(0)
244 244 try:
245 245 dir_id = parent[curdir][1]
246 246 except KeyError:
247 247 # put curdir back into dirnames and stops
248 248 dirnames.insert(0, curdir)
249 249 break
250 250 else:
251 251 # If found, updates parent
252 252 parent = repo[dir_id]
253 253 ancestors.append((curdir, parent))
254 254 # Now parent is deepest existing tree and we need to create
255 255 # subtrees for dirnames (in reverse order)
256 256 # [this only applies for nodes from added]
257 257 new_trees = []
258 258
259 259 blob = objects.Blob.from_string(node['content'])
260 260
261 261 if dirnames:
262 262 # If there are trees which should be created we need to build
263 263 # them now (in reverse order)
264 264 reversed_dirnames = list(reversed(dirnames))
265 265 curtree = objects.Tree()
266 266 curtree[node['node_path']] = node['mode'], blob.id
267 267 new_trees.append(curtree)
268 268 for dirname in reversed_dirnames[:-1]:
269 269 newtree = objects.Tree()
270 270 newtree[dirname] = (DIR_STAT, curtree.id)
271 271 new_trees.append(newtree)
272 272 curtree = newtree
273 273 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
274 274 else:
275 275 parent.add(
276 276 name=node['node_path'], mode=node['mode'], hexsha=blob.id)
277 277
278 278 new_trees.append(parent)
279 279 # Update ancestors
280 280 reversed_ancestors = reversed(
281 281 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
282 282 for parent, tree, path in reversed_ancestors:
283 283 parent[path] = (DIR_STAT, tree.id)
284 284 object_store.add_object(tree)
285 285
286 286 object_store.add_object(blob)
287 287 for tree in new_trees:
288 288 object_store.add_object(tree)
289 289
290 290 for node_path in removed:
291 291 paths = node_path.split('/')
292 292 tree = commit_tree
293 293 trees = [tree]
294 294 # Traverse deep into the forest...
295 295 for path in paths:
296 296 try:
297 297 obj = repo[tree[path][1]]
298 298 if isinstance(obj, objects.Tree):
299 299 trees.append(obj)
300 300 tree = obj
301 301 except KeyError:
302 302 break
303 303 # Cut down the blob and all rotten trees on the way back...
304 304 for path, tree in reversed(zip(paths, trees)):
305 305 del tree[path]
306 306 if tree:
307 307 # This tree still has elements - don't remove it or any
308 308 # of it's parents
309 309 break
310 310
311 311 object_store.add_object(commit_tree)
312 312
313 313 # Create commit
314 314 commit = objects.Commit()
315 315 commit.tree = commit_tree.id
316 316 for k, v in commit_data.iteritems():
317 317 setattr(commit, k, v)
318 318 object_store.add_object(commit)
319 319
320 320 ref = 'refs/heads/%s' % branch
321 321 repo.refs[ref] = commit.id
322 322
323 323 return commit.id
324 324
325 325 @reraise_safe_exceptions
326 326 def fetch(self, wire, url, apply_refs=True, refs=None):
327 327 if url != 'default' and '://' not in url:
328 328 client = LocalGitClient(url)
329 329 else:
330 330 url_obj = hg_url(url)
331 331 o = self._build_opener(url)
332 332 url, _ = url_obj.authinfo()
333 333 client = HttpGitClient(base_url=url, opener=o)
334 334 repo = self._factory.repo(wire)
335 335
336 336 determine_wants = repo.object_store.determine_wants_all
337 337 if refs:
338 338 def determine_wants_requested(references):
339 339 return [references[r] for r in references if r in refs]
340 340 determine_wants = determine_wants_requested
341 341
342 342 try:
343 343 remote_refs = client.fetch(
344 344 path=url, target=repo, determine_wants=determine_wants)
345 345 except NotGitRepository:
346 346 log.warning(
347 347 'Trying to fetch from "%s" failed, not a Git repository.', url)
348 348 raise exceptions.AbortException()
349 349
350 350 # mikhail: client.fetch() returns all the remote refs, but fetches only
351 351 # refs filtered by `determine_wants` function. We need to filter result
352 352 # as well
353 353 if refs:
354 354 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
355 355
356 356 if apply_refs:
357 357 # TODO: johbo: Needs proper test coverage with a git repository
358 358 # that contains a tag object, so that we would end up with
359 359 # a peeled ref at this point.
360 360 PEELED_REF_MARKER = '^{}'
361 361 for k in remote_refs:
362 362 if k.endswith(PEELED_REF_MARKER):
363 363 log.info("Skipping peeled reference %s", k)
364 364 continue
365 365 repo[k] = remote_refs[k]
366 366
367 367 if refs:
368 368 # mikhail: explicitly set the head to the last ref.
369 369 repo['HEAD'] = remote_refs[refs[-1]]
370 370
371 371 # TODO: mikhail: should we return remote_refs here to be
372 372 # consistent?
373 373 else:
374 374 return remote_refs
375 375
376 376 @reraise_safe_exceptions
377 377 def get_remote_refs(self, wire, url):
378 378 repo = Repo(url)
379 379 return repo.get_refs()
380 380
381 381 @reraise_safe_exceptions
382 382 def get_description(self, wire):
383 383 repo = self._factory.repo(wire)
384 384 return repo.get_description()
385 385
386 386 @reraise_safe_exceptions
387 387 def get_file_history(self, wire, file_path, commit_id, limit):
388 388 repo = self._factory.repo(wire)
389 389 include = [commit_id]
390 390 paths = [file_path]
391 391
392 392 walker = repo.get_walker(include, paths=paths, max_entries=limit)
393 393 return [x.commit.id for x in walker]
394 394
395 395 @reraise_safe_exceptions
396 396 def get_missing_revs(self, wire, rev1, rev2, path2):
397 397 repo = self._factory.repo(wire)
398 398 LocalGitClient(thin_packs=False).fetch(path2, repo)
399 399
400 400 wire_remote = wire.copy()
401 401 wire_remote['path'] = path2
402 402 repo_remote = self._factory.repo(wire_remote)
403 403 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
404 404
405 405 revs = [
406 406 x.commit.id
407 407 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
408 408 return revs
409 409
410 410 @reraise_safe_exceptions
411 411 def get_object(self, wire, sha):
412 412 repo = self._factory.repo(wire)
413 413 obj = repo.get_object(sha)
414 414 commit_id = obj.id
415 415
416 416 if isinstance(obj, Tag):
417 417 commit_id = obj.object[1]
418 418
419 419 return {
420 420 'id': obj.id,
421 421 'type': obj.type_name,
422 422 'commit_id': commit_id
423 423 }
424 424
425 425 @reraise_safe_exceptions
426 426 def get_object_attrs(self, wire, sha, *attrs):
427 427 repo = self._factory.repo(wire)
428 428 obj = repo.get_object(sha)
429 429 return list(getattr(obj, a) for a in attrs)
430 430
431 431 @reraise_safe_exceptions
432 def get_refs(self, wire, keys=None):
433 # FIXME(skreft): this method is affected by bug
434 # http://bugs.rhodecode.com/issues/298.
435 # Basically, it will overwrite previously computed references if
436 # there's another one with the same name and given the order of
437 # repo.get_refs() is not guaranteed, the output of this method is not
438 # stable either.
432 def get_refs(self, wire):
439 433 repo = self._factory.repo(wire)
440 refs = repo.get_refs()
441 if keys is None:
442 return refs
443 434
444 _refs = {}
445 for ref, sha in refs.iteritems():
446 for k, type_ in keys:
447 if ref.startswith(k):
448 _key = ref[len(k):]
449 if type_ == 'T':
450 sha = repo.get_object(sha).id
451 _refs[_key] = [sha, type_]
452 break
453 return _refs
435 repo.refs._peeled_refs
436 result = {}
437 for ref, sha in repo.refs.as_dict().items():
438 peeled_sha = repo.refs._peeled_refs.get(ref, sha)
439 result[ref] = peeled_sha
440 return result
454 441
455 442 @reraise_safe_exceptions
456 443 def get_refs_path(self, wire):
457 444 repo = self._factory.repo(wire)
458 445 return repo.refs.path
459 446
460 447 @reraise_safe_exceptions
461 448 def head(self, wire):
462 449 repo = self._factory.repo(wire)
463 450 return repo.head()
464 451
465 452 @reraise_safe_exceptions
466 453 def init(self, wire):
467 454 repo_path = str_to_dulwich(wire['path'])
468 455 self.repo = Repo.init(repo_path)
469 456
470 457 @reraise_safe_exceptions
471 458 def init_bare(self, wire):
472 459 repo_path = str_to_dulwich(wire['path'])
473 460 self.repo = Repo.init_bare(repo_path)
474 461
475 462 @reraise_safe_exceptions
476 463 def revision(self, wire, rev):
477 464 repo = self._factory.repo(wire)
478 465 obj = repo[rev]
479 466 obj_data = {
480 467 'id': obj.id,
481 468 }
482 469 try:
483 470 obj_data['tree'] = obj.tree
484 471 except AttributeError:
485 472 pass
486 473 return obj_data
487 474
488 475 @reraise_safe_exceptions
489 476 def commit_attribute(self, wire, rev, attr):
490 477 repo = self._factory.repo(wire)
491 478 obj = repo[rev]
492 479 return getattr(obj, attr)
493 480
494 481 @reraise_safe_exceptions
495 482 def set_refs(self, wire, key, value):
496 483 repo = self._factory.repo(wire)
497 484 repo.refs[key] = value
498 485
499 486 @reraise_safe_exceptions
500 487 def remove_ref(self, wire, key):
501 488 repo = self._factory.repo(wire)
502 489 del repo.refs[key]
503 490
504 491 @reraise_safe_exceptions
505 492 def tree_changes(self, wire, source_id, target_id):
506 493 repo = self._factory.repo(wire)
507 494 source = repo[source_id].tree if source_id else None
508 495 target = repo[target_id].tree
509 496 result = repo.object_store.tree_changes(source, target)
510 497 return list(result)
511 498
512 499 @reraise_safe_exceptions
513 500 def tree_items(self, wire, tree_id):
514 501 repo = self._factory.repo(wire)
515 502 tree = repo[tree_id]
516 503
517 504 result = []
518 505 for item in tree.iteritems():
519 506 item_sha = item.sha
520 507 item_mode = item.mode
521 508
522 509 if FILE_MODE(item_mode) == GIT_LINK:
523 510 item_type = "link"
524 511 else:
525 512 item_type = repo[item_sha].type_name
526 513
527 514 result.append((item.path, item_mode, item_sha, item_type))
528 515 return result
529 516
530 517 @reraise_safe_exceptions
531 518 def update_server_info(self, wire):
532 519 repo = self._factory.repo(wire)
533 520 update_server_info(repo)
534 521
535 522 @reraise_safe_exceptions
536 523 def discover_git_version(self):
537 524 stdout, _ = self.run_git_command(
538 525 {}, ['--version'], _bare=True, _safe=True)
539 526 return stdout
540 527
541 528 @reraise_safe_exceptions
542 529 def run_git_command(self, wire, cmd, **opts):
543 530 path = wire.get('path', None)
544 531
545 532 if path and os.path.isdir(path):
546 533 opts['cwd'] = path
547 534
548 535 if '_bare' in opts:
549 536 _copts = []
550 537 del opts['_bare']
551 538 else:
552 539 _copts = ['-c', 'core.quotepath=false', ]
553 540 safe_call = False
554 541 if '_safe' in opts:
555 542 # no exc on failure
556 543 del opts['_safe']
557 544 safe_call = True
558 545
559 546 gitenv = os.environ.copy()
560 547 gitenv.update(opts.pop('extra_env', {}))
561 548 # need to clean fix GIT_DIR !
562 549 if 'GIT_DIR' in gitenv:
563 550 del gitenv['GIT_DIR']
564 551 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
565 552
566 553 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
567 554
568 555 try:
569 556 _opts = {'env': gitenv, 'shell': False}
570 557 _opts.update(opts)
571 558 p = subprocessio.SubprocessIOChunker(cmd, **_opts)
572 559
573 560 return ''.join(p), ''.join(p.error)
574 561 except (EnvironmentError, OSError) as err:
575 562 tb_err = ("Couldn't run git command (%s).\n"
576 563 "Original error was:%s\n" % (cmd, err))
577 564 log.exception(tb_err)
578 565 if safe_call:
579 566 return '', err
580 567 else:
581 568 raise exceptions.VcsException(tb_err)
582 569
583 570
584 571 def str_to_dulwich(value):
585 572 """
586 573 Dulwich 0.10.1a requires `unicode` objects to be passed in.
587 574 """
588 575 return value.decode(settings.WIRE_ENCODING)
General Comments 0
You need to be logged in to leave comments. Login now