##// END OF EJS Templates
git: fix handling of submodules that are not in the repo root (Issue #337)...
Thomas De Schampheleire -
r7528:ce5b7896 default
parent child Browse files
Show More
@@ -1,565 +1,566 b''
1 1 import re
2 2 from itertools import chain
3 3 from dulwich import objects
4 4 from dulwich.config import ConfigFile
5 5 from subprocess import Popen, PIPE
6 6 from io import BytesIO
7 7
8 8 from kallithea.lib.vcs.conf import settings
9 9 from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
10 10 from kallithea.lib.vcs.exceptions import (
11 11 RepositoryError, ChangesetError, NodeDoesNotExistError, VCSError,
12 12 ChangesetDoesNotExistError, ImproperArchiveTypeError
13 13 )
14 14 from kallithea.lib.vcs.nodes import (
15 15 FileNode, DirNode, NodeKind, RootNode, SubModuleNode,
16 16 ChangedFileNodesGenerator, AddedFileNodesGenerator, RemovedFileNodesGenerator
17 17 )
18 18 from kallithea.lib.vcs.utils import (
19 19 safe_unicode, safe_str, safe_int, date_fromtimestamp
20 20 )
21 21 from kallithea.lib.vcs.utils.lazy import LazyProperty
22 22
23 23
24 24 class GitChangeset(BaseChangeset):
25 25 """
26 26 Represents state of the repository at single revision.
27 27 """
28 28
29 29 def __init__(self, repository, revision):
30 30 self._stat_modes = {}
31 31 self.repository = repository
32 32 revision = safe_str(revision)
33 33 try:
34 34 commit = self.repository._repo[revision]
35 35 if isinstance(commit, objects.Tag):
36 36 revision = safe_str(commit.object[1])
37 37 commit = self.repository._repo.get_object(commit.object[1])
38 38 except KeyError:
39 39 raise RepositoryError("Cannot get object with id %s" % revision)
40 40 self.raw_id = revision
41 41 self.id = self.raw_id
42 42 self.short_id = self.raw_id[:12]
43 43 self._commit = commit
44 44 self._tree_id = commit.tree
45 45 self._committer_property = 'committer'
46 46 self._author_property = 'author'
47 47 self._date_property = 'commit_time'
48 48 self._date_tz_property = 'commit_timezone'
49 49 self.revision = repository.revisions.index(revision)
50 50
51 51 self.nodes = {}
52 52 self._paths = {}
53 53
54 54 @LazyProperty
55 55 def bookmarks(self):
56 56 return ()
57 57
58 58 @LazyProperty
59 59 def message(self):
60 60 return safe_unicode(self._commit.message)
61 61
62 62 @LazyProperty
63 63 def committer(self):
64 64 return safe_unicode(getattr(self._commit, self._committer_property))
65 65
66 66 @LazyProperty
67 67 def author(self):
68 68 return safe_unicode(getattr(self._commit, self._author_property))
69 69
70 70 @LazyProperty
71 71 def date(self):
72 72 return date_fromtimestamp(getattr(self._commit, self._date_property),
73 73 getattr(self._commit, self._date_tz_property))
74 74
75 75 @LazyProperty
76 76 def _timestamp(self):
77 77 return getattr(self._commit, self._date_property)
78 78
79 79 @LazyProperty
80 80 def status(self):
81 81 """
82 82 Returns modified, added, removed, deleted files for current changeset
83 83 """
84 84 return self.changed, self.added, self.removed
85 85
86 86 @LazyProperty
87 87 def tags(self):
88 88 _tags = []
89 89 for tname, tsha in self.repository.tags.iteritems():
90 90 if tsha == self.raw_id:
91 91 _tags.append(tname)
92 92 return _tags
93 93
94 94 @LazyProperty
95 95 def branch(self):
96 96 # Note: This function will return one branch name for the changeset -
97 97 # that might not make sense in Git where branches() is a better match
98 98 # for the basic model
99 99 heads = self.repository._heads(reverse=False)
100 100 ref = heads.get(self.raw_id)
101 101 if ref:
102 102 return safe_unicode(ref)
103 103
104 104 @LazyProperty
105 105 def branches(self):
106 106 heads = self.repository._heads(reverse=True)
107 107 return [b for b in heads if heads[b] == self.raw_id] # FIXME: Inefficient ... and returning None!
108 108
109 109 def _fix_path(self, path):
110 110 """
111 111 Paths are stored without trailing slash so we need to get rid off it if
112 112 needed.
113 113 """
114 114 if path.endswith('/'):
115 115 path = path.rstrip('/')
116 116 return path
117 117
118 118 def _get_id_for_path(self, path):
119 119 path = safe_str(path)
120 120 # FIXME: Please, spare a couple of minutes and make those codes cleaner;
121 121 if path not in self._paths:
122 122 path = path.strip('/')
123 123 # set root tree
124 124 tree = self.repository._repo[self._tree_id]
125 125 if path == '':
126 126 self._paths[''] = tree.id
127 127 return tree.id
128 128 splitted = path.split('/')
129 129 dirs, name = splitted[:-1], splitted[-1]
130 130 curdir = ''
131 131
132 132 # initially extract things from root dir
133 133 for item, stat, id in tree.iteritems():
134 134 if curdir:
135 135 name = '/'.join((curdir, item))
136 136 else:
137 137 name = item
138 138 self._paths[name] = id
139 139 self._stat_modes[name] = stat
140 140
141 141 for dir in dirs:
142 142 if curdir:
143 143 curdir = '/'.join((curdir, dir))
144 144 else:
145 145 curdir = dir
146 146 dir_id = None
147 147 for item, stat, id in tree.iteritems():
148 148 if dir == item:
149 149 dir_id = id
150 150 if dir_id:
151 151 # Update tree
152 152 tree = self.repository._repo[dir_id]
153 153 if not isinstance(tree, objects.Tree):
154 154 raise ChangesetError('%s is not a directory' % curdir)
155 155 else:
156 156 raise ChangesetError('%s have not been found' % curdir)
157 157
158 158 # cache all items from the given traversed tree
159 159 for item, stat, id in tree.iteritems():
160 160 if curdir:
161 161 name = '/'.join((curdir, item))
162 162 else:
163 163 name = item
164 164 self._paths[name] = id
165 165 self._stat_modes[name] = stat
166 166 if path not in self._paths:
167 167 raise NodeDoesNotExistError("There is no file nor directory "
168 168 "at the given path '%s' at revision %s"
169 169 % (path, safe_str(self.short_id)))
170 170 return self._paths[path]
171 171
172 172 def _get_kind(self, path):
173 173 obj = self.repository._repo[self._get_id_for_path(path)]
174 174 if isinstance(obj, objects.Blob):
175 175 return NodeKind.FILE
176 176 elif isinstance(obj, objects.Tree):
177 177 return NodeKind.DIR
178 178
179 179 def _get_filectx(self, path):
180 180 path = self._fix_path(path)
181 181 if self._get_kind(path) != NodeKind.FILE:
182 182 raise ChangesetError("File does not exist for revision %s at "
183 183 " '%s'" % (self.raw_id, path))
184 184 return path
185 185
186 186 def _get_file_nodes(self):
187 187 return chain(*(t[2] for t in self.walk()))
188 188
189 189 @LazyProperty
190 190 def parents(self):
191 191 """
192 192 Returns list of parents changesets.
193 193 """
194 194 return [self.repository.get_changeset(parent)
195 195 for parent in self._commit.parents]
196 196
197 197 @LazyProperty
198 198 def children(self):
199 199 """
200 200 Returns list of children changesets.
201 201 """
202 202 rev_filter = settings.GIT_REV_FILTER
203 203 so, se = self.repository.run_git_command(
204 204 ['rev-list', rev_filter, '--children']
205 205 )
206 206
207 207 children = []
208 208 pat = re.compile(r'^%s' % self.raw_id)
209 209 for l in so.splitlines():
210 210 if pat.match(l):
211 211 childs = l.split(' ')[1:]
212 212 children.extend(childs)
213 213 return [self.repository.get_changeset(cs) for cs in children]
214 214
215 215 def next(self, branch=None):
216 216 if branch and self.branch != branch:
217 217 raise VCSError('Branch option used on changeset not belonging '
218 218 'to that branch')
219 219
220 220 cs = self
221 221 while True:
222 222 try:
223 223 next_ = cs.revision + 1
224 224 next_rev = cs.repository.revisions[next_]
225 225 except IndexError:
226 226 raise ChangesetDoesNotExistError
227 227 cs = cs.repository.get_changeset(next_rev)
228 228
229 229 if not branch or branch == cs.branch:
230 230 return cs
231 231
232 232 def prev(self, branch=None):
233 233 if branch and self.branch != branch:
234 234 raise VCSError('Branch option used on changeset not belonging '
235 235 'to that branch')
236 236
237 237 cs = self
238 238 while True:
239 239 try:
240 240 prev_ = cs.revision - 1
241 241 if prev_ < 0:
242 242 raise IndexError
243 243 prev_rev = cs.repository.revisions[prev_]
244 244 except IndexError:
245 245 raise ChangesetDoesNotExistError
246 246 cs = cs.repository.get_changeset(prev_rev)
247 247
248 248 if not branch or branch == cs.branch:
249 249 return cs
250 250
251 251 def diff(self, ignore_whitespace=True, context=3):
252 252 rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
253 253 rev2 = self
254 254 return ''.join(self.repository.get_diff(rev1, rev2,
255 255 ignore_whitespace=ignore_whitespace,
256 256 context=context))
257 257
258 258 def get_file_mode(self, path):
259 259 """
260 260 Returns stat mode of the file at the given ``path``.
261 261 """
262 262 # ensure path is traversed
263 263 path = safe_str(path)
264 264 self._get_id_for_path(path)
265 265 return self._stat_modes[path]
266 266
267 267 def get_file_content(self, path):
268 268 """
269 269 Returns content of the file at given ``path``.
270 270 """
271 271 id = self._get_id_for_path(path)
272 272 blob = self.repository._repo[id]
273 273 return blob.as_pretty_string()
274 274
275 275 def get_file_size(self, path):
276 276 """
277 277 Returns size of the file at given ``path``.
278 278 """
279 279 id = self._get_id_for_path(path)
280 280 blob = self.repository._repo[id]
281 281 return blob.raw_length()
282 282
283 283 def get_file_changeset(self, path):
284 284 """
285 285 Returns last commit of the file at the given ``path``.
286 286 """
287 287 return self.get_file_history(path, limit=1)[0]
288 288
289 289 def get_file_history(self, path, limit=None):
290 290 """
291 291 Returns history of file as reversed list of ``Changeset`` objects for
292 292 which file at given ``path`` has been modified.
293 293
294 294 TODO: This function now uses os underlying 'git' and 'grep' commands
295 295 which is generally not good. Should be replaced with algorithm
296 296 iterating commits.
297 297 """
298 298 self._get_filectx(path)
299 299 cs_id = safe_str(self.id)
300 300 f_path = safe_str(path)
301 301
302 302 if limit is not None:
303 303 cmd = ['log', '-n', str(safe_int(limit, 0)),
304 304 '--pretty=format:%H', '-s', cs_id, '--', f_path]
305 305
306 306 else:
307 307 cmd = ['log',
308 308 '--pretty=format:%H', '-s', cs_id, '--', f_path]
309 309 so, se = self.repository.run_git_command(cmd)
310 310 ids = re.findall(r'[0-9a-fA-F]{40}', so)
311 311 return [self.repository.get_changeset(sha) for sha in ids]
312 312
313 313 def get_file_history_2(self, path):
314 314 """
315 315 Returns history of file as reversed list of ``Changeset`` objects for
316 316 which file at given ``path`` has been modified.
317 317
318 318 """
319 319 self._get_filectx(path)
320 320 from dulwich.walk import Walker
321 321 include = [self.id]
322 322 walker = Walker(self.repository._repo.object_store, include,
323 323 paths=[path], max_entries=1)
324 324 return [self.repository.get_changeset(sha)
325 325 for sha in (x.commit.id for x in walker)]
326 326
327 327 def get_file_annotate(self, path):
328 328 """
329 329 Returns a generator of four element tuples with
330 330 lineno, sha, changeset lazy loader and line
331 331
332 332 TODO: This function now uses os underlying 'git' command which is
333 333 generally not good. Should be replaced with algorithm iterating
334 334 commits.
335 335 """
336 336 cmd = ['blame', '-l', '--root', '-r', self.id, '--', path]
337 337 # -l ==> outputs long shas (and we need all 40 characters)
338 338 # --root ==> doesn't put '^' character for boundaries
339 339 # -r sha ==> blames for the given revision
340 340 so, se = self.repository.run_git_command(cmd)
341 341
342 342 for i, blame_line in enumerate(so.split('\n')[:-1]):
343 343 ln_no = i + 1
344 344 sha, line = re.split(r' ', blame_line, 1)
345 345 yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
346 346
347 347 def fill_archive(self, stream=None, kind='tgz', prefix=None,
348 348 subrepos=False):
349 349 """
350 350 Fills up given stream.
351 351
352 352 :param stream: file like object.
353 353 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
354 354 Default: ``tgz``.
355 355 :param prefix: name of root directory in archive.
356 356 Default is repository name and changeset's raw_id joined with dash
357 357 (``repo-tip.<KIND>``).
358 358 :param subrepos: include subrepos in this archive.
359 359
360 360 :raise ImproperArchiveTypeError: If given kind is wrong.
361 361 :raise VcsError: If given stream is None
362 362
363 363 """
364 364 allowed_kinds = settings.ARCHIVE_SPECS.keys()
365 365 if kind not in allowed_kinds:
366 366 raise ImproperArchiveTypeError('Archive kind not supported use one'
367 367 'of %s', allowed_kinds)
368 368
369 369 if prefix is None:
370 370 prefix = '%s-%s' % (self.repository.name, self.short_id)
371 371 elif prefix.startswith('/'):
372 372 raise VCSError("Prefix cannot start with leading slash")
373 373 elif prefix.strip() == '':
374 374 raise VCSError("Prefix cannot be empty")
375 375
376 376 if kind == 'zip':
377 377 frmt = 'zip'
378 378 else:
379 379 frmt = 'tar'
380 380 _git_path = settings.GIT_EXECUTABLE_PATH
381 381 cmd = '%s archive --format=%s --prefix=%s/ %s' % (_git_path,
382 382 frmt, prefix, self.raw_id)
383 383 if kind == 'tgz':
384 384 cmd += ' | gzip -9'
385 385 elif kind == 'tbz2':
386 386 cmd += ' | bzip2 -9'
387 387
388 388 if stream is None:
389 389 raise VCSError('You need to pass in a valid stream for filling'
390 390 ' with archival data')
391 391 popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
392 392 cwd=self.repository.path)
393 393
394 394 buffer_size = 1024 * 8
395 395 chunk = popen.stdout.read(buffer_size)
396 396 while chunk:
397 397 stream.write(chunk)
398 398 chunk = popen.stdout.read(buffer_size)
399 399 # Make sure all descriptors would be read
400 400 popen.communicate()
401 401
402 402 def get_nodes(self, path):
403 403 if self._get_kind(path) != NodeKind.DIR:
404 404 raise ChangesetError("Directory does not exist for revision %s at "
405 405 " '%s'" % (self.revision, path))
406 406 path = self._fix_path(path)
407 407 id = self._get_id_for_path(path)
408 408 tree = self.repository._repo[id]
409 409 dirnodes = []
410 410 filenodes = []
411 411 als = self.repository.alias
412 412 for name, stat, id in tree.iteritems():
413 if path != '':
414 obj_path = '/'.join((path, name))
415 else:
416 obj_path = name
413 417 if objects.S_ISGITLINK(stat):
414 cf = ConfigFile.from_file(BytesIO(self.repository._repo.get_object(tree['.gitmodules'][1]).data))
415 url = cf.get(('submodule', name), 'url')
416 dirnodes.append(SubModuleNode(name, url=url, changeset=id,
418 root_tree = self.repository._repo[self._tree_id]
419 cf = ConfigFile.from_file(BytesIO(self.repository._repo.get_object(root_tree['.gitmodules'][1]).data))
420 url = cf.get(('submodule', obj_path), 'url')
421 dirnodes.append(SubModuleNode(obj_path, url=url, changeset=id,
417 422 alias=als))
418 423 continue
419 424
420 425 obj = self.repository._repo.get_object(id)
421 if path != '':
422 obj_path = '/'.join((path, name))
423 else:
424 obj_path = name
425 426 if obj_path not in self._stat_modes:
426 427 self._stat_modes[obj_path] = stat
427 428 if isinstance(obj, objects.Tree):
428 429 dirnodes.append(DirNode(obj_path, changeset=self))
429 430 elif isinstance(obj, objects.Blob):
430 431 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
431 432 else:
432 433 raise ChangesetError("Requested object should be Tree "
433 434 "or Blob, is %r" % type(obj))
434 435 nodes = dirnodes + filenodes
435 436 for node in nodes:
436 437 if node.path not in self.nodes:
437 438 self.nodes[node.path] = node
438 439 nodes.sort()
439 440 return nodes
440 441
441 442 def get_node(self, path):
442 443 if isinstance(path, unicode):
443 444 path = path.encode('utf-8')
444 445 path = self._fix_path(path)
445 446 if path not in self.nodes:
446 447 try:
447 448 id_ = self._get_id_for_path(path)
448 449 except ChangesetError:
449 450 raise NodeDoesNotExistError("Cannot find one of parents' "
450 451 "directories for a given path: %s" % path)
451 452
452 453 _GL = lambda m: m and objects.S_ISGITLINK(m)
453 454 if _GL(self._stat_modes.get(path)):
454 455 tree = self.repository._repo[self._tree_id]
455 456 cf = ConfigFile.from_file(BytesIO(self.repository._repo.get_object(tree['.gitmodules'][1]).data))
456 457 url = cf.get(('submodule', path), 'url')
457 458 node = SubModuleNode(path, url=url, changeset=id_,
458 459 alias=self.repository.alias)
459 460 else:
460 461 obj = self.repository._repo.get_object(id_)
461 462
462 463 if isinstance(obj, objects.Tree):
463 464 if path == '':
464 465 node = RootNode(changeset=self)
465 466 else:
466 467 node = DirNode(path, changeset=self)
467 468 node._tree = obj
468 469 elif isinstance(obj, objects.Blob):
469 470 node = FileNode(path, changeset=self)
470 471 node._blob = obj
471 472 else:
472 473 raise NodeDoesNotExistError("There is no file nor directory "
473 474 "at the given path '%s' at revision %s"
474 475 % (path, self.short_id))
475 476 # cache node
476 477 self.nodes[path] = node
477 478 return self.nodes[path]
478 479
479 480 @LazyProperty
480 481 def affected_files(self):
481 482 """
482 483 Gets a fast accessible file changes for given changeset
483 484 """
484 485 added, modified, deleted = self._changes_cache
485 486 return list(added.union(modified).union(deleted))
486 487
487 488 @LazyProperty
488 489 def _diff_name_status(self):
489 490 output = []
490 491 for parent in self.parents:
491 492 cmd = ['diff', '--name-status', parent.raw_id, self.raw_id,
492 493 '--encoding=utf8']
493 494 so, se = self.repository.run_git_command(cmd)
494 495 output.append(so.strip())
495 496 return '\n'.join(output)
496 497
497 498 @LazyProperty
498 499 def _changes_cache(self):
499 500 added = set()
500 501 modified = set()
501 502 deleted = set()
502 503 _r = self.repository._repo
503 504
504 505 parents = self.parents
505 506 if not self.parents:
506 507 parents = [EmptyChangeset()]
507 508 for parent in parents:
508 509 if isinstance(parent, EmptyChangeset):
509 510 oid = None
510 511 else:
511 512 oid = _r[parent.raw_id].tree
512 513 changes = _r.object_store.tree_changes(oid, _r[self.raw_id].tree)
513 514 for (oldpath, newpath), (_, _), (_, _) in changes:
514 515 if newpath and oldpath:
515 516 modified.add(newpath)
516 517 elif newpath and not oldpath:
517 518 added.add(newpath)
518 519 elif not newpath and oldpath:
519 520 deleted.add(oldpath)
520 521 return added, modified, deleted
521 522
522 523 def _get_paths_for_status(self, status):
523 524 """
524 525 Returns sorted list of paths for given ``status``.
525 526
526 527 :param status: one of: *added*, *modified* or *deleted*
527 528 """
528 529 added, modified, deleted = self._changes_cache
529 530 return sorted({
530 531 'added': list(added),
531 532 'modified': list(modified),
532 533 'deleted': list(deleted)}[status]
533 534 )
534 535
535 536 @LazyProperty
536 537 def added(self):
537 538 """
538 539 Returns list of added ``FileNode`` objects.
539 540 """
540 541 if not self.parents:
541 542 return list(self._get_file_nodes())
542 543 return AddedFileNodesGenerator([n for n in
543 544 self._get_paths_for_status('added')], self)
544 545
545 546 @LazyProperty
546 547 def changed(self):
547 548 """
548 549 Returns list of modified ``FileNode`` objects.
549 550 """
550 551 if not self.parents:
551 552 return []
552 553 return ChangedFileNodesGenerator([n for n in
553 554 self._get_paths_for_status('modified')], self)
554 555
555 556 @LazyProperty
556 557 def removed(self):
557 558 """
558 559 Returns list of removed ``FileNode`` objects.
559 560 """
560 561 if not self.parents:
561 562 return []
562 563 return RemovedFileNodesGenerator([n for n in
563 564 self._get_paths_for_status('deleted')], self)
564 565
565 566 extra = {}
General Comments 0
You need to be logged in to leave comments. Login now