##// END OF EJS Templates
git: update check for invalid URL characters to work with Python versions that include an attempt at fixing the very same problem...
Mads Kiilerich -
r8681:f0fbb0fe default
parent child Browse files
Show More
@@ -1,819 +1,827 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 vcs.backends.git.repository
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Git repository implementation.
7 7
8 8 :created_on: Apr 8, 2010
9 9 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
10 10 """
11 11
12 12 import errno
13 13 import logging
14 14 import os
15 15 import re
16 16 import time
17 17 import urllib.error
18 18 import urllib.parse
19 19 import urllib.request
20 20 from collections import OrderedDict
21 21
22 22 import mercurial.util # import url as hg_url
23 23 from dulwich.client import SubprocessGitClient
24 24 from dulwich.config import ConfigFile
25 25 from dulwich.objects import Tag
26 26 from dulwich.repo import NotGitRepository, Repo
27 27 from dulwich.server import update_server_info
28 28
29 29 from kallithea.lib.vcs import subprocessio
30 30 from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
31 31 from kallithea.lib.vcs.conf import settings
32 32 from kallithea.lib.vcs.exceptions import (BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError,
33 33 TagDoesNotExistError)
34 34 from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, makedate, safe_bytes, safe_str
35 35 from kallithea.lib.vcs.utils.helpers import get_urllib_request_handlers
36 36 from kallithea.lib.vcs.utils.lazy import LazyProperty
37 37 from kallithea.lib.vcs.utils.paths import abspath, get_user_home
38 38
39 39 from . import changeset, inmemory, workdir
40 40
41 41
42 42 SHA_PATTERN = re.compile(r'^([0-9a-fA-F]{12}|[0-9a-fA-F]{40})$')
43 43
44 44 log = logging.getLogger(__name__)
45 45
46 46
47 47 class GitRepository(BaseRepository):
48 48 """
49 49 Git repository backend.
50 50 """
51 51 DEFAULT_BRANCH_NAME = 'master'
52 52 scm = 'git'
53 53
54 54 def __init__(self, repo_path, create=False, src_url=None,
55 55 update_after_clone=False, bare=False, baseui=None):
56 56 baseui # unused
57 57 self.path = abspath(repo_path)
58 58 self.repo = self._get_repo(create, src_url, update_after_clone, bare)
59 59 self.bare = self.repo.bare
60 60
61 61 @property
62 62 def _config_files(self):
63 63 return [
64 64 self.bare and abspath(self.path, 'config')
65 65 or abspath(self.path, '.git', 'config'),
66 66 abspath(get_user_home(), '.gitconfig'),
67 67 ]
68 68
69 69 @property
70 70 def _repo(self):
71 71 return self.repo
72 72
73 73 @property
74 74 def head(self):
75 75 try:
76 76 return self._repo.head()
77 77 except KeyError:
78 78 return None
79 79
80 80 @property
81 81 def _empty(self):
82 82 """
83 83 Checks if repository is empty ie. without any changesets
84 84 """
85 85
86 86 try:
87 87 self.revisions[0]
88 88 except (KeyError, IndexError):
89 89 return True
90 90 return False
91 91
92 92 @LazyProperty
93 93 def revisions(self):
94 94 """
95 95 Returns list of revisions' ids, in ascending order. Being lazy
96 96 attribute allows external tools to inject shas from cache.
97 97 """
98 98 return self._get_all_revisions()
99 99
100 100 @classmethod
101 101 def _run_git_command(cls, cmd, cwd=None):
102 102 """
103 103 Runs given ``cmd`` as git command and returns output bytes in a tuple
104 104 (stdout, stderr) ... or raise RepositoryError.
105 105
106 106 :param cmd: git command to be executed
107 107 :param cwd: passed directly to subprocess
108 108 """
109 109 # need to clean fix GIT_DIR !
110 110 gitenv = dict(os.environ)
111 111 gitenv.pop('GIT_DIR', None)
112 112 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
113 113
114 114 assert isinstance(cmd, list), cmd
115 115 cmd = [settings.GIT_EXECUTABLE_PATH, '-c', 'core.quotepath=false'] + cmd
116 116 try:
117 117 p = subprocessio.SubprocessIOChunker(cmd, cwd=cwd, env=gitenv, shell=False)
118 118 except (EnvironmentError, OSError) as err:
119 119 # output from the failing process is in str(EnvironmentError)
120 120 msg = ("Couldn't run git command %s.\n"
121 121 "Subprocess failed with '%s': %s\n" %
122 122 (cmd, type(err).__name__, err)
123 123 ).strip()
124 124 log.error(msg)
125 125 raise RepositoryError(msg)
126 126
127 127 try:
128 128 stdout = b''.join(p.output)
129 129 stderr = b''.join(p.error)
130 130 finally:
131 131 p.close()
132 132 # TODO: introduce option to make commands fail if they have any stderr output?
133 133 if stderr:
134 134 log.debug('stderr from %s:\n%s', cmd, stderr)
135 135 else:
136 136 log.debug('stderr from %s: None', cmd)
137 137 return stdout, stderr
138 138
139 139 def run_git_command(self, cmd):
140 140 """
141 141 Runs given ``cmd`` as git command with cwd set to current repo.
142 142 Returns stdout as unicode str ... or raise RepositoryError.
143 143 """
144 144 cwd = None
145 145 if os.path.isdir(self.path):
146 146 cwd = self.path
147 147 stdout, _stderr = self._run_git_command(cmd, cwd=cwd)
148 148 return safe_str(stdout)
149 149
150 150 @staticmethod
151 151 def _check_url(url):
152 152 r"""
153 153 Raise URLError if url doesn't seem like a valid safe Git URL. We
154 154 only allow http, https, git, and ssh URLs.
155 155
156 156 For http and https URLs, make a connection and probe to see if it is valid.
157 157
158 158 >>> GitRepository._check_url('git://example.com/my%20fine repo')
159 159
160 160 >>> GitRepository._check_url('http://example.com:65537/repo')
161 161 Traceback (most recent call last):
162 162 ...
163 163 urllib.error.URLError: <urlopen error Error parsing URL: 'http://example.com:65537/repo'>
164 164 >>> GitRepository._check_url('foo')
165 165 Traceback (most recent call last):
166 166 ...
167 167 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'foo'>
168 168 >>> GitRepository._check_url('file:///repo')
169 169 Traceback (most recent call last):
170 170 ...
171 171 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'file:///repo'>
172 172 >>> GitRepository._check_url('git+http://example.com/repo')
173 173 Traceback (most recent call last):
174 174 ...
175 175 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'git+http://example.com/repo'>
176 176 >>> GitRepository._check_url('git://example.com/%09')
177 177 Traceback (most recent call last):
178 178 ...
179 179 urllib.error.URLError: <urlopen error Invalid escape character in path: '%'>
180 180 >>> GitRepository._check_url('git://example.com/%x00')
181 181 Traceback (most recent call last):
182 182 ...
183 183 urllib.error.URLError: <urlopen error Invalid escape character in path: '%'>
184 184 >>> GitRepository._check_url(r'git://example.com/\u0009')
185 185 Traceback (most recent call last):
186 186 ...
187 187 urllib.error.URLError: <urlopen error Invalid escape character in path: '\'>
188 188 >>> GitRepository._check_url(r'git://example.com/\t')
189 189 Traceback (most recent call last):
190 190 ...
191 191 urllib.error.URLError: <urlopen error Invalid escape character in path: '\'>
192 192 >>> GitRepository._check_url('git://example.com/\t')
193 193 Traceback (most recent call last):
194 194 ...
195 urllib.error.URLError: <urlopen error Invalid ...>
196
197 The failure above will be one of, depending on the level of WhatWG support:
195 198 urllib.error.URLError: <urlopen error Invalid whitespace character in path: '\t'>
199 urllib.error.URLError: <urlopen error Invalid url: 'git://example.com/ ' normalizes to 'git://example.com/'>
196 200 """
197 201 try:
198 202 parsed_url = urllib.parse.urlparse(url)
199 203 parsed_url.port # trigger netloc parsing which might raise ValueError
200 204 except ValueError:
201 205 raise urllib.error.URLError("Error parsing URL: %r" % url)
202 206
203 207 # check first if it's not an local url
204 208 if os.path.isabs(url) and os.path.isdir(url):
205 209 return
206 210
211 unparsed_url = urllib.parse.urlunparse(parsed_url)
212 if unparsed_url != url:
213 raise urllib.error.URLError("Invalid url: '%s' normalizes to '%s'" % (url, unparsed_url))
214
207 215 if parsed_url.scheme == 'git':
208 216 # Mitigate problems elsewhere with incorrect handling of encoded paths.
209 217 # Don't trust urllib.parse.unquote but be prepared for more flexible implementations elsewhere.
210 218 # Space is the only allowed whitespace character - directly or % encoded. No other % or \ is allowed.
211 219 for c in parsed_url.path.replace('%20', ' '):
212 220 if c in '%\\':
213 221 raise urllib.error.URLError("Invalid escape character in path: '%s'" % c)
214 222 if c.isspace() and c != ' ':
215 223 raise urllib.error.URLError("Invalid whitespace character in path: %r" % c)
216 224 return
217 225
218 226 if parsed_url.scheme not in ['http', 'https']:
219 227 raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
220 228
221 229 url_obj = mercurial.util.url(safe_bytes(url))
222 230 test_uri, handlers = get_urllib_request_handlers(url_obj)
223 231 if not test_uri.endswith(b'info/refs'):
224 232 test_uri = test_uri.rstrip(b'/') + b'/info/refs'
225 233
226 234 url_obj.passwd = b'*****'
227 235 cleaned_uri = str(url_obj)
228 236
229 237 o = urllib.request.build_opener(*handlers)
230 238 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
231 239
232 240 req = urllib.request.Request(
233 241 "%s?%s" % (
234 242 safe_str(test_uri),
235 243 urllib.parse.urlencode({"service": 'git-upload-pack'})
236 244 ))
237 245
238 246 try:
239 247 resp = o.open(req)
240 248 if resp.code != 200:
241 249 raise Exception('Return Code is not 200')
242 250 except Exception as e:
243 251 # means it cannot be cloned
244 252 raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
245 253
246 254 # now detect if it's proper git repo
247 255 gitdata = resp.read()
248 256 if b'service=git-upload-pack' not in gitdata:
249 257 raise urllib.error.URLError(
250 258 "url [%s] does not look like an git" % cleaned_uri)
251 259
252 260 def _get_repo(self, create, src_url=None, update_after_clone=False,
253 261 bare=False):
254 262 if create and os.path.exists(self.path):
255 263 raise RepositoryError("Location already exist")
256 264 if src_url and not create:
257 265 raise RepositoryError("Create should be set to True if src_url is "
258 266 "given (clone operation creates repository)")
259 267 try:
260 268 if create and src_url:
261 269 GitRepository._check_url(src_url)
262 270 self.clone(src_url, update_after_clone, bare)
263 271 return Repo(self.path)
264 272 elif create:
265 273 os.makedirs(self.path)
266 274 if bare:
267 275 return Repo.init_bare(self.path)
268 276 else:
269 277 return Repo.init(self.path)
270 278 else:
271 279 return Repo(self.path)
272 280 except (NotGitRepository, OSError) as err:
273 281 raise RepositoryError(err)
274 282
275 283 def _get_all_revisions(self):
276 284 # we must check if this repo is not empty, since later command
277 285 # fails if it is. And it's cheaper to ask than throw the subprocess
278 286 # errors
279 287 try:
280 288 self._repo.head()
281 289 except KeyError:
282 290 return []
283 291
284 292 rev_filter = settings.GIT_REV_FILTER
285 293 cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
286 294 try:
287 295 so = self.run_git_command(cmd)
288 296 except RepositoryError:
289 297 # Can be raised for empty repositories
290 298 return []
291 299 return so.splitlines()
292 300
293 301 def _get_all_revisions2(self):
294 302 # alternate implementation using dulwich
295 303 includes = [ascii_str(sha) for key, (sha, type_) in self._parsed_refs.items()
296 304 if type_ != b'T']
297 305 return [c.commit.id for c in self._repo.get_walker(include=includes)]
298 306
299 307 def _get_revision(self, revision):
300 308 """
301 309 Given any revision identifier, returns a 40 char string with revision hash.
302 310 """
303 311 if self._empty:
304 312 raise EmptyRepositoryError("There are no changesets yet")
305 313
306 314 if revision in (None, '', 'tip', 'HEAD', 'head', -1):
307 315 revision = -1
308 316
309 317 if isinstance(revision, int):
310 318 try:
311 319 return self.revisions[revision]
312 320 except IndexError:
313 321 msg = "Revision %r does not exist for %s" % (revision, self.name)
314 322 raise ChangesetDoesNotExistError(msg)
315 323
316 324 if isinstance(revision, str):
317 325 if revision.isdigit() and (len(revision) < 12 or len(revision) == revision.count('0')):
318 326 try:
319 327 return self.revisions[int(revision)]
320 328 except IndexError:
321 329 msg = "Revision %r does not exist for %s" % (revision, self)
322 330 raise ChangesetDoesNotExistError(msg)
323 331
324 332 # get by branch/tag name
325 333 _ref_revision = self._parsed_refs.get(safe_bytes(revision))
326 334 if _ref_revision: # and _ref_revision[1] in [b'H', b'RH', b'T']:
327 335 return ascii_str(_ref_revision[0])
328 336
329 337 if revision in self.revisions:
330 338 return revision
331 339
332 340 # maybe it's a tag ? we don't have them in self.revisions
333 341 if revision in self.tags.values():
334 342 return revision
335 343
336 344 if SHA_PATTERN.match(revision):
337 345 msg = "Revision %r does not exist for %s" % (revision, self.name)
338 346 raise ChangesetDoesNotExistError(msg)
339 347
340 348 raise ChangesetDoesNotExistError("Given revision %r not recognized" % revision)
341 349
342 350 def get_ref_revision(self, ref_type, ref_name):
343 351 """
344 352 Returns ``GitChangeset`` object representing repository's
345 353 changeset at the given ``revision``.
346 354 """
347 355 return self._get_revision(ref_name)
348 356
349 357 def _get_archives(self, archive_name='tip'):
350 358
351 359 for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
352 360 yield {"type": i[0], "extension": i[1], "node": archive_name}
353 361
354 362 def _get_url(self, url):
355 363 """
356 364 Returns normalized url. If schema is not given, would fall to
357 365 filesystem (``file:///``) schema.
358 366 """
359 367 if url != 'default' and '://' not in url:
360 368 url = ':///'.join(('file', url))
361 369 return url
362 370
363 371 @LazyProperty
364 372 def name(self):
365 373 return os.path.basename(self.path)
366 374
367 375 @LazyProperty
368 376 def last_change(self):
369 377 """
370 378 Returns last change made on this repository as datetime object
371 379 """
372 380 return date_fromtimestamp(self._get_mtime(), makedate()[1])
373 381
374 382 def _get_mtime(self):
375 383 try:
376 384 return time.mktime(self.get_changeset().date.timetuple())
377 385 except RepositoryError:
378 386 idx_loc = '' if self.bare else '.git'
379 387 # fallback to filesystem
380 388 in_path = os.path.join(self.path, idx_loc, "index")
381 389 he_path = os.path.join(self.path, idx_loc, "HEAD")
382 390 if os.path.exists(in_path):
383 391 return os.stat(in_path).st_mtime
384 392 else:
385 393 return os.stat(he_path).st_mtime
386 394
387 395 @LazyProperty
388 396 def description(self):
389 397 return safe_str(self._repo.get_description() or b'unknown')
390 398
391 399 @property
392 400 def branches(self):
393 401 if not self.revisions:
394 402 return {}
395 403 _branches = [(safe_str(key), ascii_str(sha))
396 404 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'H']
397 405 return OrderedDict(sorted(_branches, key=(lambda ctx: ctx[0]), reverse=False))
398 406
399 407 @LazyProperty
400 408 def closed_branches(self):
401 409 return {}
402 410
403 411 @LazyProperty
404 412 def tags(self):
405 413 return self._get_tags()
406 414
407 415 def _get_tags(self):
408 416 if not self.revisions:
409 417 return {}
410 418 _tags = [(safe_str(key), ascii_str(sha))
411 419 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'T']
412 420 return OrderedDict(sorted(_tags, key=(lambda ctx: ctx[0]), reverse=True))
413 421
414 422 def tag(self, name, user, revision=None, message=None, date=None,
415 423 **kwargs):
416 424 """
417 425 Creates and returns a tag for the given ``revision``.
418 426
419 427 :param name: name for new tag
420 428 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
421 429 :param revision: changeset id for which new tag would be created
422 430 :param message: message of the tag's commit
423 431 :param date: date of tag's commit
424 432
425 433 :raises TagAlreadyExistError: if tag with same name already exists
426 434 """
427 435 if name in self.tags:
428 436 raise TagAlreadyExistError("Tag %s already exists" % name)
429 437 changeset = self.get_changeset(revision)
430 438 message = message or "Added tag %s for commit %s" % (name,
431 439 changeset.raw_id)
432 440 self._repo.refs[b"refs/tags/%s" % safe_bytes(name)] = changeset._commit.id
433 441
434 442 self._parsed_refs = self._get_parsed_refs()
435 443 self.tags = self._get_tags()
436 444 return changeset
437 445
438 446 def remove_tag(self, name, user, message=None, date=None):
439 447 """
440 448 Removes tag with the given ``name``.
441 449
442 450 :param name: name of the tag to be removed
443 451 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
444 452 :param message: message of the tag's removal commit
445 453 :param date: date of tag's removal commit
446 454
447 455 :raises TagDoesNotExistError: if tag with given name does not exists
448 456 """
449 457 if name not in self.tags:
450 458 raise TagDoesNotExistError("Tag %s does not exist" % name)
451 459 # self._repo.refs is a DiskRefsContainer, and .path gives the full absolute path of '.git'
452 460 tagpath = os.path.join(safe_str(self._repo.refs.path), 'refs', 'tags', name)
453 461 try:
454 462 os.remove(tagpath)
455 463 self._parsed_refs = self._get_parsed_refs()
456 464 self.tags = self._get_tags()
457 465 except OSError as e:
458 466 raise RepositoryError(e.strerror)
459 467
460 468 @LazyProperty
461 469 def bookmarks(self):
462 470 """
463 471 Gets bookmarks for this repository
464 472 """
465 473 return {}
466 474
467 475 @LazyProperty
468 476 def _parsed_refs(self):
469 477 return self._get_parsed_refs()
470 478
471 479 def _get_parsed_refs(self):
472 480 """Return refs as a dict, like:
473 481 { b'v0.2.0': [b'599ba911aa24d2981225f3966eb659dfae9e9f30', b'T'] }
474 482 """
475 483 _repo = self._repo
476 484 refs = _repo.get_refs()
477 485 keys = [(b'refs/heads/', b'H'),
478 486 (b'refs/remotes/origin/', b'RH'),
479 487 (b'refs/tags/', b'T')]
480 488 _refs = {}
481 489 for ref, sha in refs.items():
482 490 for k, type_ in keys:
483 491 if ref.startswith(k):
484 492 _key = ref[len(k):]
485 493 if type_ == b'T':
486 494 obj = _repo.get_object(sha)
487 495 if isinstance(obj, Tag):
488 496 sha = _repo.get_object(sha).object[1]
489 497 _refs[_key] = [sha, type_]
490 498 break
491 499 return _refs
492 500
493 501 def _heads(self, reverse=False):
494 502 refs = self._repo.get_refs()
495 503 heads = {}
496 504
497 505 for key, val in refs.items():
498 506 for ref_key in [b'refs/heads/', b'refs/remotes/origin/']:
499 507 if key.startswith(ref_key):
500 508 n = key[len(ref_key):]
501 509 if n not in [b'HEAD']:
502 510 heads[n] = val
503 511
504 512 return heads if reverse else dict((y, x) for x, y in heads.items())
505 513
506 514 def get_changeset(self, revision=None):
507 515 """
508 516 Returns ``GitChangeset`` object representing commit from git repository
509 517 at the given revision or head (most recent commit) if None given.
510 518 """
511 519 if isinstance(revision, changeset.GitChangeset):
512 520 return revision
513 521 return changeset.GitChangeset(repository=self, revision=self._get_revision(revision))
514 522
515 523 def get_changesets(self, start=None, end=None, start_date=None,
516 524 end_date=None, branch_name=None, reverse=False, max_revisions=None):
517 525 """
518 526 Returns iterator of ``GitChangeset`` objects from start to end (both
519 527 are inclusive), in ascending date order (unless ``reverse`` is set).
520 528
521 529 :param start: changeset ID, as str; first returned changeset
522 530 :param end: changeset ID, as str; last returned changeset
523 531 :param start_date: if specified, changesets with commit date less than
524 532 ``start_date`` would be filtered out from returned set
525 533 :param end_date: if specified, changesets with commit date greater than
526 534 ``end_date`` would be filtered out from returned set
527 535 :param branch_name: if specified, changesets not reachable from given
528 536 branch would be filtered out from returned set
529 537 :param reverse: if ``True``, returned generator would be reversed
530 538 (meaning that returned changesets would have descending date order)
531 539
532 540 :raise BranchDoesNotExistError: If given ``branch_name`` does not
533 541 exist.
534 542 :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
535 543 ``end`` could not be found.
536 544
537 545 """
538 546 if branch_name and branch_name not in self.branches:
539 547 raise BranchDoesNotExistError("Branch '%s' not found"
540 548 % branch_name)
541 549 # actually we should check now if it's not an empty repo to not spaw
542 550 # subprocess commands
543 551 if self._empty:
544 552 raise EmptyRepositoryError("There are no changesets yet")
545 553
546 554 # %H at format means (full) commit hash, initial hashes are retrieved
547 555 # in ascending date order
548 556 cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
549 557 if max_revisions:
550 558 cmd += ['--max-count=%s' % max_revisions]
551 559 if start_date:
552 560 cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
553 561 if end_date:
554 562 cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
555 563 if branch_name:
556 564 cmd.append(branch_name)
557 565 else:
558 566 cmd.append(settings.GIT_REV_FILTER)
559 567
560 568 revs = self.run_git_command(cmd).splitlines()
561 569 start_pos = 0
562 570 end_pos = len(revs)
563 571 if start:
564 572 _start = self._get_revision(start)
565 573 try:
566 574 start_pos = revs.index(_start)
567 575 except ValueError:
568 576 pass
569 577
570 578 if end is not None:
571 579 _end = self._get_revision(end)
572 580 try:
573 581 end_pos = revs.index(_end)
574 582 except ValueError:
575 583 pass
576 584
577 585 if None not in [start, end] and start_pos > end_pos:
578 586 raise RepositoryError('start cannot be after end')
579 587
580 588 if end_pos is not None:
581 589 end_pos += 1
582 590
583 591 revs = revs[start_pos:end_pos]
584 592 if reverse:
585 593 revs.reverse()
586 594
587 595 return CollectionGenerator(self, revs)
588 596
589 597 def get_diff_changesets(self, org_rev, other_repo, other_rev):
590 598 """
591 599 Returns lists of changesets that can be merged from this repo @org_rev
592 600 to other_repo @other_rev
593 601 ... and the other way
594 602 ... and the ancestors that would be used for merge
595 603
596 604 :param org_rev: the revision we want our compare to be made
597 605 :param other_repo: repo object, most likely the fork of org_repo. It has
598 606 all changesets that we need to obtain
599 607 :param other_rev: revision we want out compare to be made on other_repo
600 608 """
601 609 org_changesets = []
602 610 ancestors = None
603 611 if org_rev == other_rev:
604 612 other_changesets = []
605 613 elif self != other_repo:
606 614 gitrepo = Repo(self.path)
607 615 SubprocessGitClient(thin_packs=False).fetch(other_repo.path, gitrepo)
608 616
609 617 gitrepo_remote = Repo(other_repo.path)
610 618 SubprocessGitClient(thin_packs=False).fetch(self.path, gitrepo_remote)
611 619
612 620 revs = [
613 621 ascii_str(x.commit.id)
614 622 for x in gitrepo_remote.get_walker(include=[ascii_bytes(other_rev)],
615 623 exclude=[ascii_bytes(org_rev)])
616 624 ]
617 625 other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
618 626 if other_changesets:
619 627 ancestors = [other_changesets[0].parents[0].raw_id]
620 628 else:
621 629 # no changesets from other repo, ancestor is the other_rev
622 630 ancestors = [other_rev]
623 631
624 632 gitrepo.close()
625 633 gitrepo_remote.close()
626 634
627 635 else:
628 636 so = self.run_git_command(
629 637 ['log', '--reverse', '--pretty=format:%H',
630 638 '-s', '%s..%s' % (org_rev, other_rev)]
631 639 )
632 640 other_changesets = [self.get_changeset(cs)
633 641 for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
634 642 so = self.run_git_command(
635 643 ['merge-base', org_rev, other_rev]
636 644 )
637 645 ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]]
638 646
639 647 return other_changesets, org_changesets, ancestors
640 648
641 649 def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
642 650 context=3):
643 651 """
644 652 Returns (git like) *diff*, as plain bytes text. Shows changes
645 653 introduced by ``rev2`` since ``rev1``.
646 654
647 655 :param rev1: Entry point from which diff is shown. Can be
648 656 ``self.EMPTY_CHANGESET`` - in this case, patch showing all
649 657 the changes since empty state of the repository until ``rev2``
650 658 :param rev2: Until which revision changes should be shown.
651 659 :param ignore_whitespace: If set to ``True``, would not show whitespace
652 660 changes. Defaults to ``False``.
653 661 :param context: How many lines before/after changed lines should be
654 662 shown. Defaults to ``3``. Due to limitations in Git, if
655 663 value passed-in is greater than ``2**31-1``
656 664 (``2147483647``), it will be set to ``2147483647``
657 665 instead. If negative value is passed-in, it will be set to
658 666 ``0`` instead.
659 667 """
660 668
661 669 # Git internally uses a signed long int for storing context
662 670 # size (number of lines to show before and after the
663 671 # differences). This can result in integer overflow, so we
664 672 # ensure the requested context is smaller by one than the
665 673 # number that would cause the overflow. It is highly unlikely
666 674 # that a single file will contain that many lines, so this
667 675 # kind of change should not cause any realistic consequences.
668 676 overflowed_long_int = 2**31
669 677
670 678 if context >= overflowed_long_int:
671 679 context = overflowed_long_int - 1
672 680
673 681 # Negative context values make no sense, and will result in
674 682 # errors. Ensure this does not happen.
675 683 if context < 0:
676 684 context = 0
677 685
678 686 flags = ['-U%s' % context, '--full-index', '--binary', '-p', '-M', '--abbrev=40']
679 687 if ignore_whitespace:
680 688 flags.append('-w')
681 689
682 690 if hasattr(rev1, 'raw_id'):
683 691 rev1 = getattr(rev1, 'raw_id')
684 692
685 693 if hasattr(rev2, 'raw_id'):
686 694 rev2 = getattr(rev2, 'raw_id')
687 695
688 696 if rev1 == self.EMPTY_CHANGESET:
689 697 rev2 = self.get_changeset(rev2).raw_id
690 698 cmd = ['show'] + flags + [rev2]
691 699 else:
692 700 rev1 = self.get_changeset(rev1).raw_id
693 701 rev2 = self.get_changeset(rev2).raw_id
694 702 cmd = ['diff'] + flags + [rev1, rev2]
695 703
696 704 if path:
697 705 cmd += ['--', path]
698 706
699 707 stdout, stderr = self._run_git_command(cmd, cwd=self.path)
700 708 # If we used 'show' command, strip first few lines (until actual diff
701 709 # starts)
702 710 if rev1 == self.EMPTY_CHANGESET:
703 711 parts = stdout.split(b'\ndiff ', 1)
704 712 if len(parts) > 1:
705 713 stdout = b'diff ' + parts[1]
706 714 return stdout
707 715
708 716 @LazyProperty
709 717 def in_memory_changeset(self):
710 718 """
711 719 Returns ``GitInMemoryChangeset`` object for this repository.
712 720 """
713 721 return inmemory.GitInMemoryChangeset(self)
714 722
715 723 def clone(self, url, update_after_clone=True, bare=False):
716 724 """
717 725 Tries to clone changes from external location.
718 726
719 727 :param update_after_clone: If set to ``False``, git won't checkout
720 728 working directory
721 729 :param bare: If set to ``True``, repository would be cloned into
722 730 *bare* git repository (no working directory at all).
723 731 """
724 732 url = self._get_url(url)
725 733 cmd = ['clone', '-q']
726 734 if bare:
727 735 cmd.append('--bare')
728 736 elif not update_after_clone:
729 737 cmd.append('--no-checkout')
730 738 cmd += ['--', url, self.path]
731 739 # If error occurs run_git_command raises RepositoryError already
732 740 self.run_git_command(cmd)
733 741
734 742 def pull(self, url):
735 743 """
736 744 Tries to pull changes from external location.
737 745 """
738 746 url = self._get_url(url)
739 747 cmd = ['pull', '--ff-only', url]
740 748 # If error occurs run_git_command raises RepositoryError already
741 749 self.run_git_command(cmd)
742 750
743 751 def fetch(self, url):
744 752 """
745 753 Tries to pull changes from external location.
746 754 """
747 755 url = self._get_url(url)
748 756 so = self.run_git_command(['ls-remote', '-h', url])
749 757 cmd = ['fetch', url, '--']
750 758 for line in so.splitlines():
751 759 sha, ref = line.split('\t')
752 760 cmd.append('+%s:%s' % (ref, ref))
753 761 self.run_git_command(cmd)
754 762
755 763 def _update_server_info(self):
756 764 """
757 765 runs gits update-server-info command in this repo instance
758 766 """
759 767 try:
760 768 update_server_info(self._repo)
761 769 except OSError as e:
762 770 if e.errno not in [errno.ENOENT, errno.EROFS]:
763 771 raise
764 772 # Workaround for dulwich crashing on for example its own dulwich/tests/data/repos/simple_merge.git/info/refs.lock
765 773 log.error('Ignoring %s running update-server-info: %s', type(e).__name__, e)
766 774
767 775 @LazyProperty
768 776 def workdir(self):
769 777 """
770 778 Returns ``Workdir`` instance for this repository.
771 779 """
772 780 return workdir.GitWorkdir(self)
773 781
774 782 def get_config_value(self, section, name, config_file=None):
775 783 """
776 784 Returns configuration value for a given [``section``] and ``name``.
777 785
778 786 :param section: Section we want to retrieve value from
779 787 :param name: Name of configuration we want to retrieve
780 788 :param config_file: A path to file which should be used to retrieve
781 789 configuration from (might also be a list of file paths)
782 790 """
783 791 if config_file is None:
784 792 config_file = []
785 793 elif isinstance(config_file, str):
786 794 config_file = [config_file]
787 795
788 796 def gen_configs():
789 797 for path in config_file + self._config_files:
790 798 try:
791 799 yield ConfigFile.from_path(path)
792 800 except (IOError, OSError, ValueError):
793 801 continue
794 802
795 803 for config in gen_configs():
796 804 try:
797 805 value = config.get(section, name)
798 806 except KeyError:
799 807 continue
800 808 return None if value is None else safe_str(value)
801 809 return None
802 810
803 811 def get_user_name(self, config_file=None):
804 812 """
805 813 Returns user's name from global configuration file.
806 814
807 815 :param config_file: A path to file which should be used to retrieve
808 816 configuration from (might also be a list of file paths)
809 817 """
810 818 return self.get_config_value('user', 'name', config_file)
811 819
812 820 def get_user_email(self, config_file=None):
813 821 """
814 822 Returns user's email from global configuration file.
815 823
816 824 :param config_file: A path to file which should be used to retrieve
817 825 configuration from (might also be a list of file paths)
818 826 """
819 827 return self.get_config_value('user', 'email', config_file)
General Comments 0
You need to be logged in to leave comments. Login now