##// END OF EJS Templates
new dulwich based implementation of added/modified/removed...
marcink -
r2762:ba4fb9c4 beta
parent child Browse files
Show More
@@ -1,485 +1,489 b''
1 1 import re
2 2 from itertools import chain
3 3 from dulwich import objects
4 4 from subprocess import Popen, PIPE
5 5 from rhodecode.lib.vcs.conf import settings
6 6 from rhodecode.lib.vcs.exceptions import RepositoryError
7 7 from rhodecode.lib.vcs.exceptions import ChangesetError
8 8 from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
9 9 from rhodecode.lib.vcs.exceptions import VCSError
10 10 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
11 11 from rhodecode.lib.vcs.exceptions import ImproperArchiveTypeError
12 from rhodecode.lib.vcs.backends.base import BaseChangeset
12 from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
13 13 from rhodecode.lib.vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
14 14 RemovedFileNode, SubModuleNode
15 15 from rhodecode.lib.vcs.utils import safe_unicode
16 16 from rhodecode.lib.vcs.utils import date_fromtimestamp
17 17 from rhodecode.lib.vcs.utils.lazy import LazyProperty
18 18
19 19
20 20 class GitChangeset(BaseChangeset):
21 21 """
22 22 Represents state of the repository at single revision.
23 23 """
24 24
25 25 def __init__(self, repository, revision):
26 26 self._stat_modes = {}
27 27 self.repository = repository
28 28
29 29 try:
30 30 commit = self.repository._repo.get_object(revision)
31 31 if isinstance(commit, objects.Tag):
32 32 revision = commit.object[1]
33 33 commit = self.repository._repo.get_object(commit.object[1])
34 34 except KeyError:
35 35 raise RepositoryError("Cannot get object with id %s" % revision)
36 36 self.raw_id = revision
37 37 self.id = self.raw_id
38 38 self.short_id = self.raw_id[:12]
39 39 self._commit = commit
40 40
41 41 self._tree_id = commit.tree
42 42 self._commiter_property = 'committer'
43 43 self._date_property = 'commit_time'
44 44 self._date_tz_property = 'commit_timezone'
45 45 self.revision = repository.revisions.index(revision)
46 46
47 47 self.message = safe_unicode(commit.message)
48 48 #self.branch = None
49 49 self.tags = []
50 50 self.nodes = {}
51 51 self._paths = {}
52 52
53 53 @LazyProperty
54 54 def author(self):
55 55 return safe_unicode(getattr(self._commit, self._commiter_property))
56 56
57 57 @LazyProperty
58 58 def date(self):
59 59 return date_fromtimestamp(getattr(self._commit, self._date_property),
60 60 getattr(self._commit, self._date_tz_property))
61 61
62 62 @LazyProperty
63 63 def _timestamp(self):
64 64 return getattr(self._commit, self._date_property)
65 65
66 66 @LazyProperty
67 67 def status(self):
68 68 """
69 69 Returns modified, added, removed, deleted files for current changeset
70 70 """
71 71 return self.changed, self.added, self.removed
72 72
73 73 @LazyProperty
74 74 def branch(self):
75 75
76 76 heads = self.repository._heads(reverse=False)
77 77
78 78 ref = heads.get(self.raw_id)
79 79 if ref:
80 80 return safe_unicode(ref)
81 81
82 82 def _fix_path(self, path):
83 83 """
84 84 Paths are stored without trailing slash so we need to get rid off it if
85 85 needed.
86 86 """
87 87 if path.endswith('/'):
88 88 path = path.rstrip('/')
89 89 return path
90 90
91 91 def _get_id_for_path(self, path):
92 92
93 93 # FIXME: Please, spare a couple of minutes and make those codes cleaner;
94 94 if not path in self._paths:
95 95 path = path.strip('/')
96 96 # set root tree
97 97 tree = self.repository._repo[self._tree_id]
98 98 if path == '':
99 99 self._paths[''] = tree.id
100 100 return tree.id
101 101 splitted = path.split('/')
102 102 dirs, name = splitted[:-1], splitted[-1]
103 103 curdir = ''
104 104
105 105 # initially extract things from root dir
106 106 for item, stat, id in tree.iteritems():
107 107 if curdir:
108 108 name = '/'.join((curdir, item))
109 109 else:
110 110 name = item
111 111 self._paths[name] = id
112 112 self._stat_modes[name] = stat
113 113
114 114 for dir in dirs:
115 115 if curdir:
116 116 curdir = '/'.join((curdir, dir))
117 117 else:
118 118 curdir = dir
119 119 dir_id = None
120 120 for item, stat, id in tree.iteritems():
121 121 if dir == item:
122 122 dir_id = id
123 123 if dir_id:
124 124 # Update tree
125 125 tree = self.repository._repo[dir_id]
126 126 if not isinstance(tree, objects.Tree):
127 127 raise ChangesetError('%s is not a directory' % curdir)
128 128 else:
129 129 raise ChangesetError('%s have not been found' % curdir)
130 130
131 131 # cache all items from the given traversed tree
132 132 for item, stat, id in tree.iteritems():
133 133 if curdir:
134 134 name = '/'.join((curdir, item))
135 135 else:
136 136 name = item
137 137 self._paths[name] = id
138 138 self._stat_modes[name] = stat
139 139 if not path in self._paths:
140 140 raise NodeDoesNotExistError("There is no file nor directory "
141 141 "at the given path %r at revision %r"
142 142 % (path, self.short_id))
143 143 return self._paths[path]
144 144
145 145 def _get_kind(self, path):
146 146 obj = self.repository._repo[self._get_id_for_path(path)]
147 147 if isinstance(obj, objects.Blob):
148 148 return NodeKind.FILE
149 149 elif isinstance(obj, objects.Tree):
150 150 return NodeKind.DIR
151 151
152 152 def _get_file_nodes(self):
153 153 return chain(*(t[2] for t in self.walk()))
154 154
155 155 @LazyProperty
156 156 def parents(self):
157 157 """
158 158 Returns list of parents changesets.
159 159 """
160 160 return [self.repository.get_changeset(parent)
161 161 for parent in self._commit.parents]
162 162
163 163 def next(self, branch=None):
164 164
165 165 if branch and self.branch != branch:
166 166 raise VCSError('Branch option used on changeset not belonging '
167 167 'to that branch')
168 168
169 169 def _next(changeset, branch):
170 170 try:
171 171 next_ = changeset.revision + 1
172 172 next_rev = changeset.repository.revisions[next_]
173 173 except IndexError:
174 174 raise ChangesetDoesNotExistError
175 175 cs = changeset.repository.get_changeset(next_rev)
176 176
177 177 if branch and branch != cs.branch:
178 178 return _next(cs, branch)
179 179
180 180 return cs
181 181
182 182 return _next(self, branch)
183 183
184 184 def prev(self, branch=None):
185 185 if branch and self.branch != branch:
186 186 raise VCSError('Branch option used on changeset not belonging '
187 187 'to that branch')
188 188
189 189 def _prev(changeset, branch):
190 190 try:
191 191 prev_ = changeset.revision - 1
192 192 if prev_ < 0:
193 193 raise IndexError
194 194 prev_rev = changeset.repository.revisions[prev_]
195 195 except IndexError:
196 196 raise ChangesetDoesNotExistError
197 197
198 198 cs = changeset.repository.get_changeset(prev_rev)
199 199
200 200 if branch and branch != cs.branch:
201 201 return _prev(cs, branch)
202 202
203 203 return cs
204 204
205 205 return _prev(self, branch)
206 206
207 207 def diff(self, ignore_whitespace=True, context=3):
208 208 rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
209 209 rev2 = self
210 210 return ''.join(self.repository.get_diff(rev1, rev2,
211 211 ignore_whitespace=ignore_whitespace,
212 212 context=context))
213 213
214 214 def get_file_mode(self, path):
215 215 """
216 216 Returns stat mode of the file at the given ``path``.
217 217 """
218 218 # ensure path is traversed
219 219 self._get_id_for_path(path)
220 220 return self._stat_modes[path]
221 221
222 222 def get_file_content(self, path):
223 223 """
224 224 Returns content of the file at given ``path``.
225 225 """
226 226 id = self._get_id_for_path(path)
227 227 blob = self.repository._repo[id]
228 228 return blob.as_pretty_string()
229 229
230 230 def get_file_size(self, path):
231 231 """
232 232 Returns size of the file at given ``path``.
233 233 """
234 234 id = self._get_id_for_path(path)
235 235 blob = self.repository._repo[id]
236 236 return blob.raw_length()
237 237
238 238 def get_file_changeset(self, path):
239 239 """
240 240 Returns last commit of the file at the given ``path``.
241 241 """
242 242 node = self.get_node(path)
243 243 return node.history[0]
244 244
245 245 def get_file_history(self, path):
246 246 """
247 247 Returns history of file as reversed list of ``Changeset`` objects for
248 248 which file at given ``path`` has been modified.
249 249
250 250 TODO: This function now uses os underlying 'git' and 'grep' commands
251 251 which is generally not good. Should be replaced with algorithm
252 252 iterating commits.
253 253 """
254 254 cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
255 255 self.id, path
256 256 )
257 257 so, se = self.repository.run_git_command(cmd)
258 258 ids = re.findall(r'[0-9a-fA-F]{40}', so)
259 259 return [self.repository.get_changeset(id) for id in ids]
260 260
261 261 def get_file_annotate(self, path):
262 262 """
263 263 Returns a list of three element tuples with lineno,changeset and line
264 264
265 265 TODO: This function now uses os underlying 'git' command which is
266 266 generally not good. Should be replaced with algorithm iterating
267 267 commits.
268 268 """
269 269 cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
270 270 # -l ==> outputs long shas (and we need all 40 characters)
271 271 # --root ==> doesn't put '^' character for bounderies
272 272 # -r sha ==> blames for the given revision
273 273 so, se = self.repository.run_git_command(cmd)
274 274
275 275 annotate = []
276 276 for i, blame_line in enumerate(so.split('\n')[:-1]):
277 277 ln_no = i + 1
278 278 id, line = re.split(r' ', blame_line, 1)
279 279 annotate.append((ln_no, self.repository.get_changeset(id), line))
280 280 return annotate
281 281
282 282 def fill_archive(self, stream=None, kind='tgz', prefix=None,
283 283 subrepos=False):
284 284 """
285 285 Fills up given stream.
286 286
287 287 :param stream: file like object.
288 288 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
289 289 Default: ``tgz``.
290 290 :param prefix: name of root directory in archive.
291 291 Default is repository name and changeset's raw_id joined with dash
292 292 (``repo-tip.<KIND>``).
293 293 :param subrepos: include subrepos in this archive.
294 294
295 295 :raise ImproperArchiveTypeError: If given kind is wrong.
296 296 :raise VcsError: If given stream is None
297 297
298 298 """
299 299 allowed_kinds = settings.ARCHIVE_SPECS.keys()
300 300 if kind not in allowed_kinds:
301 301 raise ImproperArchiveTypeError('Archive kind not supported use one'
302 302 'of %s', allowed_kinds)
303 303
304 304 if prefix is None:
305 305 prefix = '%s-%s' % (self.repository.name, self.short_id)
306 306 elif prefix.startswith('/'):
307 307 raise VCSError("Prefix cannot start with leading slash")
308 308 elif prefix.strip() == '':
309 309 raise VCSError("Prefix cannot be empty")
310 310
311 311 if kind == 'zip':
312 312 frmt = 'zip'
313 313 else:
314 314 frmt = 'tar'
315 315 cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
316 316 self.raw_id)
317 317 if kind == 'tgz':
318 318 cmd += ' | gzip -9'
319 319 elif kind == 'tbz2':
320 320 cmd += ' | bzip2 -9'
321 321
322 322 if stream is None:
323 323 raise VCSError('You need to pass in a valid stream for filling'
324 324 ' with archival data')
325 325 popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
326 326 cwd=self.repository.path)
327 327
328 328 buffer_size = 1024 * 8
329 329 chunk = popen.stdout.read(buffer_size)
330 330 while chunk:
331 331 stream.write(chunk)
332 332 chunk = popen.stdout.read(buffer_size)
333 333 # Make sure all descriptors would be read
334 334 popen.communicate()
335 335
336 336 def get_nodes(self, path):
337 337 if self._get_kind(path) != NodeKind.DIR:
338 338 raise ChangesetError("Directory does not exist for revision %r at "
339 339 " %r" % (self.revision, path))
340 340 path = self._fix_path(path)
341 341 id = self._get_id_for_path(path)
342 342 tree = self.repository._repo[id]
343 343 dirnodes = []
344 344 filenodes = []
345 345 als = self.repository.alias
346 346 for name, stat, id in tree.iteritems():
347 347 if objects.S_ISGITLINK(stat):
348 348 dirnodes.append(SubModuleNode(name, url=None, changeset=id,
349 349 alias=als))
350 350 continue
351 351
352 352 obj = self.repository._repo.get_object(id)
353 353 if path != '':
354 354 obj_path = '/'.join((path, name))
355 355 else:
356 356 obj_path = name
357 357 if obj_path not in self._stat_modes:
358 358 self._stat_modes[obj_path] = stat
359 359 if isinstance(obj, objects.Tree):
360 360 dirnodes.append(DirNode(obj_path, changeset=self))
361 361 elif isinstance(obj, objects.Blob):
362 362 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
363 363 else:
364 364 raise ChangesetError("Requested object should be Tree "
365 365 "or Blob, is %r" % type(obj))
366 366 nodes = dirnodes + filenodes
367 367 for node in nodes:
368 368 if not node.path in self.nodes:
369 369 self.nodes[node.path] = node
370 370 nodes.sort()
371 371 return nodes
372 372
373 373 def get_node(self, path):
374 374 if isinstance(path, unicode):
375 375 path = path.encode('utf-8')
376 376 path = self._fix_path(path)
377 377 if not path in self.nodes:
378 378 try:
379 379 id_ = self._get_id_for_path(path)
380 380 except ChangesetError:
381 381 raise NodeDoesNotExistError("Cannot find one of parents' "
382 382 "directories for a given path: %s" % path)
383 383
384 384 _GL = lambda m: m and objects.S_ISGITLINK(m)
385 385 if _GL(self._stat_modes.get(path)):
386 386 node = SubModuleNode(path, url=None, changeset=id_,
387 387 alias=self.repository.alias)
388 388 else:
389 389 obj = self.repository._repo.get_object(id_)
390 390
391 391 if isinstance(obj, objects.Tree):
392 392 if path == '':
393 393 node = RootNode(changeset=self)
394 394 else:
395 395 node = DirNode(path, changeset=self)
396 396 node._tree = obj
397 397 elif isinstance(obj, objects.Blob):
398 398 node = FileNode(path, changeset=self)
399 399 node._blob = obj
400 400 else:
401 401 raise NodeDoesNotExistError("There is no file nor directory "
402 402 "at the given path %r at revision %r"
403 403 % (path, self.short_id))
404 404 # cache node
405 405 self.nodes[path] = node
406 406 return self.nodes[path]
407 407
408 408 @LazyProperty
409 409 def affected_files(self):
410 410 """
411 411 Get's a fast accessible file changes for given changeset
412 412 """
413 #OLD SOLUTION
414 #files = set()
415 #for f in (self.added + self.changed + self.removed):
416 # files.add(f.path)
417 #files = list(files)
418
419 _r = self.repository._repo
420 files = set()
421 for parent in self.parents:
422 changes = _r.object_store.tree_changes(_r[parent.raw_id].tree,
423 _r[self.raw_id].tree)
424 for (oldpath, newpath), (_, _), (_, _) in changes:
425 files.add(newpath or oldpath)
426 return list(files)
413 a, m, d = self._changes_cache
414 return list(a.union(m).union(d))
427 415
428 416 @LazyProperty
429 417 def _diff_name_status(self):
430 418 output = []
431 419 for parent in self.parents:
432 420 cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id,
433 421 self.raw_id)
434 422 so, se = self.repository.run_git_command(cmd)
435 423 output.append(so.strip())
436 424 return '\n'.join(output)
437 425
426 @LazyProperty
427 def _changes_cache(self):
428 added = set()
429 modified = set()
430 deleted = set()
431 _r = self.repository._repo
432
433 parents = self.parents
434 if not self.parents:
435 parents = [EmptyChangeset()]
436 for parent in parents:
437 if isinstance(parent, EmptyChangeset):
438 oid = None
439 else:
440 oid = _r[parent.raw_id].tree
441 changes = _r.object_store.tree_changes(oid, _r[self.raw_id].tree)
442 for (oldpath, newpath), (_, _), (_, _) in changes:
443 if newpath and oldpath:
444 modified.add(newpath)
445 elif newpath and not oldpath:
446 added.add(newpath)
447 elif not newpath and oldpath:
448 deleted.add(oldpath)
449 return added, modified, deleted
450
438 451 def _get_paths_for_status(self, status):
439 452 """
440 453 Returns sorted list of paths for given ``status``.
441 454
442 455 :param status: one of: *added*, *modified* or *deleted*
443 456 """
444 paths = set()
445 char = status[0].upper()
446 for line in self._diff_name_status.splitlines():
447 if not line:
448 continue
449
450 if line.startswith(char):
451 splitted = line.split(char, 1)
452 if not len(splitted) == 2:
453 raise VCSError("Couldn't parse diff result:\n%s\n\n and "
454 "particularly that line: %s" % (self._diff_name_status,
455 line))
456 _path = splitted[1].strip()
457 paths.add(_path)
458 return sorted(paths)
457 a, m, d = self._changes_cache
458 return sorted({
459 'added': list(a),
460 'modified': list(m),
461 'deleted': list(d)}[status]
462 )
459 463
460 464 @LazyProperty
461 465 def added(self):
462 466 """
463 467 Returns list of added ``FileNode`` objects.
464 468 """
465 469 if not self.parents:
466 470 return list(self._get_file_nodes())
467 471 return [self.get_node(path) for path in self._get_paths_for_status('added')]
468 472
469 473 @LazyProperty
470 474 def changed(self):
471 475 """
472 476 Returns list of modified ``FileNode`` objects.
473 477 """
474 478 if not self.parents:
475 479 return []
476 480 return [self.get_node(path) for path in self._get_paths_for_status('modified')]
477 481
478 482 @LazyProperty
479 483 def removed(self):
480 484 """
481 485 Returns list of removed ``FileNode`` objects.
482 486 """
483 487 if not self.parents:
484 488 return []
485 489 return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
General Comments 0
You need to be logged in to leave comments. Login now