##// END OF EJS Templates
exceptions: for dulwich we cannot catch all exceptions because it re-uses some...
marcink -
r172:40d7e43c default
parent child Browse files
Show More
@@ -1,586 +1,589 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2017 RodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import logging
19 19 import os
20 20 import posixpath as vcspath
21 21 import re
22 22 import stat
23 23 import urllib
24 24 import urllib2
25 25 from functools import wraps
26 26
27 27 from dulwich import index, objects
28 28 from dulwich.client import HttpGitClient, LocalGitClient
29 29 from dulwich.errors import (
30 30 NotGitRepository, ChecksumMismatch, WrongObjectException,
31 31 MissingCommitError, ObjectMissing, HangupException,
32 32 UnexpectedCommandError)
33 33 from dulwich.repo import Repo as DulwichRepo, Tag
34 34 from dulwich.server import update_server_info
35 35
36 36 from vcsserver import exceptions, settings, subprocessio
37 37 from vcsserver.utils import safe_str
38 38 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original
39 39 from vcsserver.hgcompat import (
40 40 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
41 41
42 42
43 43 DIR_STAT = stat.S_IFDIR
44 44 FILE_MODE = stat.S_IFMT
45 45 GIT_LINK = objects.S_IFGITLINK
46 46
47 47 log = logging.getLogger(__name__)
48 48
49 49
50 50 def reraise_safe_exceptions(func):
51 51 """Converts Dulwich exceptions to something neutral."""
52 52 @wraps(func)
53 53 def wrapper(*args, **kwargs):
54 54 try:
55 55 return func(*args, **kwargs)
56 56 except (ChecksumMismatch, WrongObjectException, MissingCommitError,
57 57 ObjectMissing) as e:
58 58 raise exceptions.LookupException(e.message)
59 59 except (HangupException, UnexpectedCommandError) as e:
60 60 raise exceptions.VcsException(e.message)
61 61 except Exception as e:
62 if not hasattr(e, '_vcs_kind'):
63 log.exception("Unhandled exception in git remote call")
64 raise_from_original(exceptions.UnhandledException)
62 # NOTE(marcink): becuase of how dulwich handles some exceptions
63 # (KeyError on empty repos), we cannot track this and catch all
64 # exceptions, it's an exceptions from other handlers
65 #if not hasattr(e, '_vcs_kind'):
66 #log.exception("Unhandled exception in git remote call")
67 #raise_from_original(exceptions.UnhandledException)
65 68 raise
66 69 return wrapper
67 70
68 71
69 72 class Repo(DulwichRepo):
70 73 """
71 74 A wrapper for dulwich Repo class.
72 75
73 76 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
74 77 "Too many open files" error. We need to close all opened file descriptors
75 78 once the repo object is destroyed.
76 79
77 80 TODO: mikhail: please check if we need this wrapper after updating dulwich
78 81 to 0.12.0 +
79 82 """
80 83 def __del__(self):
81 84 if hasattr(self, 'object_store'):
82 85 self.close()
83 86
84 87
85 88 class GitFactory(RepoFactory):
86 89
87 90 def _create_repo(self, wire, create):
88 91 repo_path = str_to_dulwich(wire['path'])
89 92 return Repo(repo_path)
90 93
91 94
92 95 class GitRemote(object):
93 96
94 97 def __init__(self, factory):
95 98 self._factory = factory
96 99
97 100 self._bulk_methods = {
98 101 "author": self.commit_attribute,
99 102 "date": self.get_object_attrs,
100 103 "message": self.commit_attribute,
101 104 "parents": self.commit_attribute,
102 105 "_commit": self.revision,
103 106 }
104 107
105 108 def _assign_ref(self, wire, ref, commit_id):
106 109 repo = self._factory.repo(wire)
107 110 repo[ref] = commit_id
108 111
109 112 @reraise_safe_exceptions
110 113 def add_object(self, wire, content):
111 114 repo = self._factory.repo(wire)
112 115 blob = objects.Blob()
113 116 blob.set_raw_string(content)
114 117 repo.object_store.add_object(blob)
115 118 return blob.id
116 119
117 120 @reraise_safe_exceptions
118 121 def assert_correct_path(self, wire):
119 122 try:
120 123 self._factory.repo(wire)
121 124 except NotGitRepository as e:
122 125 # Exception can contain unicode which we convert
123 126 raise exceptions.AbortException(repr(e))
124 127
125 128 @reraise_safe_exceptions
126 129 def bare(self, wire):
127 130 repo = self._factory.repo(wire)
128 131 return repo.bare
129 132
130 133 @reraise_safe_exceptions
131 134 def blob_as_pretty_string(self, wire, sha):
132 135 repo = self._factory.repo(wire)
133 136 return repo[sha].as_pretty_string()
134 137
135 138 @reraise_safe_exceptions
136 139 def blob_raw_length(self, wire, sha):
137 140 repo = self._factory.repo(wire)
138 141 blob = repo[sha]
139 142 return blob.raw_length()
140 143
141 144 @reraise_safe_exceptions
142 145 def bulk_request(self, wire, rev, pre_load):
143 146 result = {}
144 147 for attr in pre_load:
145 148 try:
146 149 method = self._bulk_methods[attr]
147 150 args = [wire, rev]
148 151 if attr == "date":
149 152 args.extend(["commit_time", "commit_timezone"])
150 153 elif attr in ["author", "message", "parents"]:
151 154 args.append(attr)
152 155 result[attr] = method(*args)
153 156 except KeyError:
154 157 raise exceptions.VcsException(
155 158 "Unknown bulk attribute: %s" % attr)
156 159 return result
157 160
158 161 def _build_opener(self, url):
159 162 handlers = []
160 163 url_obj = url_parser(url)
161 164 _, authinfo = url_obj.authinfo()
162 165
163 166 if authinfo:
164 167 # create a password manager
165 168 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
166 169 passmgr.add_password(*authinfo)
167 170
168 171 handlers.extend((httpbasicauthhandler(passmgr),
169 172 httpdigestauthhandler(passmgr)))
170 173
171 174 return urllib2.build_opener(*handlers)
172 175
173 176 @reraise_safe_exceptions
174 177 def check_url(self, url, config):
175 178 url_obj = url_parser(url)
176 179 test_uri, _ = url_obj.authinfo()
177 180 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
178 181 url_obj.query = obfuscate_qs(url_obj.query)
179 182 cleaned_uri = str(url_obj)
180 183 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
181 184
182 185 if not test_uri.endswith('info/refs'):
183 186 test_uri = test_uri.rstrip('/') + '/info/refs'
184 187
185 188 o = self._build_opener(url)
186 189 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
187 190
188 191 q = {"service": 'git-upload-pack'}
189 192 qs = '?%s' % urllib.urlencode(q)
190 193 cu = "%s%s" % (test_uri, qs)
191 194 req = urllib2.Request(cu, None, {})
192 195
193 196 try:
194 197 log.debug("Trying to open URL %s", cleaned_uri)
195 198 resp = o.open(req)
196 199 if resp.code != 200:
197 200 raise exceptions.URLError('Return Code is not 200')
198 201 except Exception as e:
199 202 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
200 203 # means it cannot be cloned
201 204 raise exceptions.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
202 205
203 206 # now detect if it's proper git repo
204 207 gitdata = resp.read()
205 208 if 'service=git-upload-pack' in gitdata:
206 209 pass
207 210 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
208 211 # old style git can return some other format !
209 212 pass
210 213 else:
211 214 raise exceptions.URLError(
212 215 "url [%s] does not look like an git" % (cleaned_uri,))
213 216
214 217 return True
215 218
216 219 @reraise_safe_exceptions
217 220 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
218 221 remote_refs = self.fetch(wire, url, apply_refs=False)
219 222 repo = self._factory.repo(wire)
220 223 if isinstance(valid_refs, list):
221 224 valid_refs = tuple(valid_refs)
222 225
223 226 for k in remote_refs:
224 227 # only parse heads/tags and skip so called deferred tags
225 228 if k.startswith(valid_refs) and not k.endswith(deferred):
226 229 repo[k] = remote_refs[k]
227 230
228 231 if update_after_clone:
229 232 # we want to checkout HEAD
230 233 repo["HEAD"] = remote_refs["HEAD"]
231 234 index.build_index_from_tree(repo.path, repo.index_path(),
232 235 repo.object_store, repo["HEAD"].tree)
233 236
234 237 # TODO: this is quite complex, check if that can be simplified
235 238 @reraise_safe_exceptions
236 239 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
237 240 repo = self._factory.repo(wire)
238 241 object_store = repo.object_store
239 242
240 243 # Create tree and populates it with blobs
241 244 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
242 245
243 246 for node in updated:
244 247 # Compute subdirs if needed
245 248 dirpath, nodename = vcspath.split(node['path'])
246 249 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
247 250 parent = commit_tree
248 251 ancestors = [('', parent)]
249 252
250 253 # Tries to dig for the deepest existing tree
251 254 while dirnames:
252 255 curdir = dirnames.pop(0)
253 256 try:
254 257 dir_id = parent[curdir][1]
255 258 except KeyError:
256 259 # put curdir back into dirnames and stops
257 260 dirnames.insert(0, curdir)
258 261 break
259 262 else:
260 263 # If found, updates parent
261 264 parent = repo[dir_id]
262 265 ancestors.append((curdir, parent))
263 266 # Now parent is deepest existing tree and we need to create
264 267 # subtrees for dirnames (in reverse order)
265 268 # [this only applies for nodes from added]
266 269 new_trees = []
267 270
268 271 blob = objects.Blob.from_string(node['content'])
269 272
270 273 if dirnames:
271 274 # If there are trees which should be created we need to build
272 275 # them now (in reverse order)
273 276 reversed_dirnames = list(reversed(dirnames))
274 277 curtree = objects.Tree()
275 278 curtree[node['node_path']] = node['mode'], blob.id
276 279 new_trees.append(curtree)
277 280 for dirname in reversed_dirnames[:-1]:
278 281 newtree = objects.Tree()
279 282 newtree[dirname] = (DIR_STAT, curtree.id)
280 283 new_trees.append(newtree)
281 284 curtree = newtree
282 285 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
283 286 else:
284 287 parent.add(
285 288 name=node['node_path'], mode=node['mode'], hexsha=blob.id)
286 289
287 290 new_trees.append(parent)
288 291 # Update ancestors
289 292 reversed_ancestors = reversed(
290 293 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
291 294 for parent, tree, path in reversed_ancestors:
292 295 parent[path] = (DIR_STAT, tree.id)
293 296 object_store.add_object(tree)
294 297
295 298 object_store.add_object(blob)
296 299 for tree in new_trees:
297 300 object_store.add_object(tree)
298 301
299 302 for node_path in removed:
300 303 paths = node_path.split('/')
301 304 tree = commit_tree
302 305 trees = [tree]
303 306 # Traverse deep into the forest...
304 307 for path in paths:
305 308 try:
306 309 obj = repo[tree[path][1]]
307 310 if isinstance(obj, objects.Tree):
308 311 trees.append(obj)
309 312 tree = obj
310 313 except KeyError:
311 314 break
312 315 # Cut down the blob and all rotten trees on the way back...
313 316 for path, tree in reversed(zip(paths, trees)):
314 317 del tree[path]
315 318 if tree:
316 319 # This tree still has elements - don't remove it or any
317 320 # of it's parents
318 321 break
319 322
320 323 object_store.add_object(commit_tree)
321 324
322 325 # Create commit
323 326 commit = objects.Commit()
324 327 commit.tree = commit_tree.id
325 328 for k, v in commit_data.iteritems():
326 329 setattr(commit, k, v)
327 330 object_store.add_object(commit)
328 331
329 332 ref = 'refs/heads/%s' % branch
330 333 repo.refs[ref] = commit.id
331 334
332 335 return commit.id
333 336
334 337 @reraise_safe_exceptions
335 338 def fetch(self, wire, url, apply_refs=True, refs=None):
336 339 if url != 'default' and '://' not in url:
337 340 client = LocalGitClient(url)
338 341 else:
339 342 url_obj = url_parser(url)
340 343 o = self._build_opener(url)
341 344 url, _ = url_obj.authinfo()
342 345 client = HttpGitClient(base_url=url, opener=o)
343 346 repo = self._factory.repo(wire)
344 347
345 348 determine_wants = repo.object_store.determine_wants_all
346 349 if refs:
347 350 def determine_wants_requested(references):
348 351 return [references[r] for r in references if r in refs]
349 352 determine_wants = determine_wants_requested
350 353
351 354 try:
352 355 remote_refs = client.fetch(
353 356 path=url, target=repo, determine_wants=determine_wants)
354 357 except NotGitRepository as e:
355 358 log.warning(
356 359 'Trying to fetch from "%s" failed, not a Git repository.', url)
357 360 # Exception can contain unicode which we convert
358 361 raise exceptions.AbortException(repr(e))
359 362
360 363 # mikhail: client.fetch() returns all the remote refs, but fetches only
361 364 # refs filtered by `determine_wants` function. We need to filter result
362 365 # as well
363 366 if refs:
364 367 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
365 368
366 369 if apply_refs:
367 370 # TODO: johbo: Needs proper test coverage with a git repository
368 371 # that contains a tag object, so that we would end up with
369 372 # a peeled ref at this point.
370 373 PEELED_REF_MARKER = '^{}'
371 374 for k in remote_refs:
372 375 if k.endswith(PEELED_REF_MARKER):
373 376 log.info("Skipping peeled reference %s", k)
374 377 continue
375 378 repo[k] = remote_refs[k]
376 379
377 380 if refs:
378 381 # mikhail: explicitly set the head to the last ref.
379 382 repo['HEAD'] = remote_refs[refs[-1]]
380 383
381 384 # TODO: mikhail: should we return remote_refs here to be
382 385 # consistent?
383 386 else:
384 387 return remote_refs
385 388
386 389 @reraise_safe_exceptions
387 390 def get_remote_refs(self, wire, url):
388 391 repo = Repo(url)
389 392 return repo.get_refs()
390 393
391 394 @reraise_safe_exceptions
392 395 def get_description(self, wire):
393 396 repo = self._factory.repo(wire)
394 397 return repo.get_description()
395 398
396 399 @reraise_safe_exceptions
397 400 def get_file_history(self, wire, file_path, commit_id, limit):
398 401 repo = self._factory.repo(wire)
399 402 include = [commit_id]
400 403 paths = [file_path]
401 404
402 405 walker = repo.get_walker(include, paths=paths, max_entries=limit)
403 406 return [x.commit.id for x in walker]
404 407
405 408 @reraise_safe_exceptions
406 409 def get_missing_revs(self, wire, rev1, rev2, path2):
407 410 repo = self._factory.repo(wire)
408 411 LocalGitClient(thin_packs=False).fetch(path2, repo)
409 412
410 413 wire_remote = wire.copy()
411 414 wire_remote['path'] = path2
412 415 repo_remote = self._factory.repo(wire_remote)
413 416 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
414 417
415 418 revs = [
416 419 x.commit.id
417 420 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
418 421 return revs
419 422
420 423 @reraise_safe_exceptions
421 424 def get_object(self, wire, sha):
422 425 repo = self._factory.repo(wire)
423 426 obj = repo.get_object(sha)
424 427 commit_id = obj.id
425 428
426 429 if isinstance(obj, Tag):
427 430 commit_id = obj.object[1]
428 431
429 432 return {
430 433 'id': obj.id,
431 434 'type': obj.type_name,
432 435 'commit_id': commit_id
433 436 }
434 437
435 438 @reraise_safe_exceptions
436 439 def get_object_attrs(self, wire, sha, *attrs):
437 440 repo = self._factory.repo(wire)
438 441 obj = repo.get_object(sha)
439 442 return list(getattr(obj, a) for a in attrs)
440 443
441 444 @reraise_safe_exceptions
442 445 def get_refs(self, wire):
443 446 repo = self._factory.repo(wire)
444 447 result = {}
445 448 for ref, sha in repo.refs.as_dict().items():
446 449 peeled_sha = repo.get_peeled(ref)
447 450 result[ref] = peeled_sha
448 451 return result
449 452
450 453 @reraise_safe_exceptions
451 454 def get_refs_path(self, wire):
452 455 repo = self._factory.repo(wire)
453 456 return repo.refs.path
454 457
455 458 @reraise_safe_exceptions
456 459 def head(self, wire):
457 460 repo = self._factory.repo(wire)
458 461 return repo.head()
459 462
460 463 @reraise_safe_exceptions
461 464 def init(self, wire):
462 465 repo_path = str_to_dulwich(wire['path'])
463 466 self.repo = Repo.init(repo_path)
464 467
465 468 @reraise_safe_exceptions
466 469 def init_bare(self, wire):
467 470 repo_path = str_to_dulwich(wire['path'])
468 471 self.repo = Repo.init_bare(repo_path)
469 472
470 473 @reraise_safe_exceptions
471 474 def revision(self, wire, rev):
472 475 repo = self._factory.repo(wire)
473 476 obj = repo[rev]
474 477 obj_data = {
475 478 'id': obj.id,
476 479 }
477 480 try:
478 481 obj_data['tree'] = obj.tree
479 482 except AttributeError:
480 483 pass
481 484 return obj_data
482 485
483 486 @reraise_safe_exceptions
484 487 def commit_attribute(self, wire, rev, attr):
485 488 repo = self._factory.repo(wire)
486 489 obj = repo[rev]
487 490 return getattr(obj, attr)
488 491
489 492 @reraise_safe_exceptions
490 493 def set_refs(self, wire, key, value):
491 494 repo = self._factory.repo(wire)
492 495 repo.refs[key] = value
493 496
494 497 @reraise_safe_exceptions
495 498 def remove_ref(self, wire, key):
496 499 repo = self._factory.repo(wire)
497 500 del repo.refs[key]
498 501
499 502 @reraise_safe_exceptions
500 503 def tree_changes(self, wire, source_id, target_id):
501 504 repo = self._factory.repo(wire)
502 505 source = repo[source_id].tree if source_id else None
503 506 target = repo[target_id].tree
504 507 result = repo.object_store.tree_changes(source, target)
505 508 return list(result)
506 509
507 510 @reraise_safe_exceptions
508 511 def tree_items(self, wire, tree_id):
509 512 repo = self._factory.repo(wire)
510 513 tree = repo[tree_id]
511 514
512 515 result = []
513 516 for item in tree.iteritems():
514 517 item_sha = item.sha
515 518 item_mode = item.mode
516 519
517 520 if FILE_MODE(item_mode) == GIT_LINK:
518 521 item_type = "link"
519 522 else:
520 523 item_type = repo[item_sha].type_name
521 524
522 525 result.append((item.path, item_mode, item_sha, item_type))
523 526 return result
524 527
525 528 @reraise_safe_exceptions
526 529 def update_server_info(self, wire):
527 530 repo = self._factory.repo(wire)
528 531 update_server_info(repo)
529 532
530 533 @reraise_safe_exceptions
531 534 def discover_git_version(self):
532 535 stdout, _ = self.run_git_command(
533 536 {}, ['--version'], _bare=True, _safe=True)
534 537 prefix = 'git version'
535 538 if stdout.startswith(prefix):
536 539 stdout = stdout[len(prefix):]
537 540 return stdout.strip()
538 541
539 542 @reraise_safe_exceptions
540 543 def run_git_command(self, wire, cmd, **opts):
541 544 path = wire.get('path', None)
542 545
543 546 if path and os.path.isdir(path):
544 547 opts['cwd'] = path
545 548
546 549 if '_bare' in opts:
547 550 _copts = []
548 551 del opts['_bare']
549 552 else:
550 553 _copts = ['-c', 'core.quotepath=false', ]
551 554 safe_call = False
552 555 if '_safe' in opts:
553 556 # no exc on failure
554 557 del opts['_safe']
555 558 safe_call = True
556 559
557 560 gitenv = os.environ.copy()
558 561 gitenv.update(opts.pop('extra_env', {}))
559 562 # need to clean fix GIT_DIR !
560 563 if 'GIT_DIR' in gitenv:
561 564 del gitenv['GIT_DIR']
562 565 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
563 566
564 567 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
565 568
566 569 try:
567 570 _opts = {'env': gitenv, 'shell': False}
568 571 _opts.update(opts)
569 572 p = subprocessio.SubprocessIOChunker(cmd, **_opts)
570 573
571 574 return ''.join(p), ''.join(p.error)
572 575 except (EnvironmentError, OSError) as err:
573 576 tb_err = ("Couldn't run git command (%s).\n"
574 577 "Original error was:%s\n" % (cmd, err))
575 578 log.exception(tb_err)
576 579 if safe_call:
577 580 return '', err
578 581 else:
579 582 raise exceptions.VcsException(tb_err)
580 583
581 584
582 585 def str_to_dulwich(value):
583 586 """
584 587 Dulwich 0.10.1a requires `unicode` objects to be passed in.
585 588 """
586 589 return value.decode(settings.WIRE_ENCODING)
General Comments 0
You need to be logged in to leave comments. Login now