##// END OF EJS Templates
merge with beta
marcink -
r2282:1e617a41 merge codereview
parent child Browse files
Show More
@@ -1,460 +1,457 b''
1 1 import re
2 2 from itertools import chain
3 3 from dulwich import objects
4 4 from subprocess import Popen, PIPE
5 5 from rhodecode.lib.vcs.conf import settings
6 6 from rhodecode.lib.vcs.exceptions import RepositoryError
7 7 from rhodecode.lib.vcs.exceptions import ChangesetError
8 8 from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
9 9 from rhodecode.lib.vcs.exceptions import VCSError
10 10 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
11 11 from rhodecode.lib.vcs.exceptions import ImproperArchiveTypeError
12 12 from rhodecode.lib.vcs.backends.base import BaseChangeset
13 13 from rhodecode.lib.vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
14 14 RemovedFileNode, SubModuleNode
15 15 from rhodecode.lib.vcs.utils import safe_unicode
16 16 from rhodecode.lib.vcs.utils import date_fromtimestamp
17 17 from rhodecode.lib.vcs.utils.lazy import LazyProperty
18 18
19 19
20 20 class GitChangeset(BaseChangeset):
21 21 """
22 22 Represents state of the repository at single revision.
23 23 """
24 24
25 25 def __init__(self, repository, revision):
26 26 self._stat_modes = {}
27 27 self.repository = repository
28 28 self.raw_id = revision
29 29 self.revision = repository.revisions.index(revision)
30 30
31 31 self.short_id = self.raw_id[:12]
32 32 self.id = self.raw_id
33 33 try:
34 34 commit = self.repository._repo.get_object(self.raw_id)
35 35 except KeyError:
36 36 raise RepositoryError("Cannot get object with id %s" % self.raw_id)
37 37 self._commit = commit
38 38 self._tree_id = commit.tree
39 39
40 40 try:
41 self.message = safe_unicode(commit.message[:-1])
42 # Always strip last eol
41 self.message = safe_unicode(commit.message)
43 42 except UnicodeDecodeError:
44 self.message = commit.message[:-1].decode(commit.encoding
45 or 'utf-8')
43 self.message = commit.message.decode(commit.encoding or 'utf-8')
46 44 #self.branch = None
47 45 self.tags = []
48 #tree = self.repository.get_object(self._tree_id)
49 46 self.nodes = {}
50 47 self._paths = {}
51 48
52 49 @LazyProperty
53 50 def author(self):
54 51 return safe_unicode(self._commit.committer)
55 52
56 53 @LazyProperty
57 54 def date(self):
58 55 return date_fromtimestamp(self._commit.commit_time,
59 56 self._commit.commit_timezone)
60 57
61 58 @LazyProperty
62 59 def status(self):
63 60 """
64 61 Returns modified, added, removed, deleted files for current changeset
65 62 """
66 63 return self.changed, self.added, self.removed
67 64
68 65 @LazyProperty
69 66 def branch(self):
70 67
71 68 heads = self.repository._heads(reverse=False)
72 69
73 70 ref = heads.get(self.raw_id)
74 71 if ref:
75 72 return safe_unicode(ref)
76 73
77 74 def _fix_path(self, path):
78 75 """
79 76 Paths are stored without trailing slash so we need to get rid off it if
80 77 needed.
81 78 """
82 79 if path.endswith('/'):
83 80 path = path.rstrip('/')
84 81 return path
85 82
86 83 def _get_id_for_path(self, path):
87 84
88 85 # FIXME: Please, spare a couple of minutes and make those codes cleaner;
89 86 if not path in self._paths:
90 87 path = path.strip('/')
91 88 # set root tree
92 89 tree = self.repository._repo[self._commit.tree]
93 90 if path == '':
94 91 self._paths[''] = tree.id
95 92 return tree.id
96 93 splitted = path.split('/')
97 94 dirs, name = splitted[:-1], splitted[-1]
98 95 curdir = ''
99 96
100 97 # initially extract things from root dir
101 98 for item, stat, id in tree.iteritems():
102 99 if curdir:
103 100 name = '/'.join((curdir, item))
104 101 else:
105 102 name = item
106 103 self._paths[name] = id
107 104 self._stat_modes[name] = stat
108 105
109 106 for dir in dirs:
110 107 if curdir:
111 108 curdir = '/'.join((curdir, dir))
112 109 else:
113 110 curdir = dir
114 111 dir_id = None
115 112 for item, stat, id in tree.iteritems():
116 113 if dir == item:
117 114 dir_id = id
118 115 if dir_id:
119 116 # Update tree
120 117 tree = self.repository._repo[dir_id]
121 118 if not isinstance(tree, objects.Tree):
122 119 raise ChangesetError('%s is not a directory' % curdir)
123 120 else:
124 121 raise ChangesetError('%s have not been found' % curdir)
125 122
126 123 # cache all items from the given traversed tree
127 124 for item, stat, id in tree.iteritems():
128 125 if curdir:
129 126 name = '/'.join((curdir, item))
130 127 else:
131 128 name = item
132 129 self._paths[name] = id
133 130 self._stat_modes[name] = stat
134 131 if not path in self._paths:
135 132 raise NodeDoesNotExistError("There is no file nor directory "
136 133 "at the given path %r at revision %r"
137 134 % (path, self.short_id))
138 135 return self._paths[path]
139 136
140 137 def _get_kind(self, path):
141 138 id = self._get_id_for_path(path)
142 139 obj = self.repository._repo[id]
143 140 if isinstance(obj, objects.Blob):
144 141 return NodeKind.FILE
145 142 elif isinstance(obj, objects.Tree):
146 143 return NodeKind.DIR
147 144
148 145 def _get_file_nodes(self):
149 146 return chain(*(t[2] for t in self.walk()))
150 147
151 148 @LazyProperty
152 149 def parents(self):
153 150 """
154 151 Returns list of parents changesets.
155 152 """
156 153 return [self.repository.get_changeset(parent)
157 154 for parent in self._commit.parents]
158 155
159 156 def next(self, branch=None):
160 157
161 158 if branch and self.branch != branch:
162 159 raise VCSError('Branch option used on changeset not belonging '
163 160 'to that branch')
164 161
165 162 def _next(changeset, branch):
166 163 try:
167 164 next_ = changeset.revision + 1
168 165 next_rev = changeset.repository.revisions[next_]
169 166 except IndexError:
170 167 raise ChangesetDoesNotExistError
171 168 cs = changeset.repository.get_changeset(next_rev)
172 169
173 170 if branch and branch != cs.branch:
174 171 return _next(cs, branch)
175 172
176 173 return cs
177 174
178 175 return _next(self, branch)
179 176
180 177 def prev(self, branch=None):
181 178 if branch and self.branch != branch:
182 179 raise VCSError('Branch option used on changeset not belonging '
183 180 'to that branch')
184 181
185 182 def _prev(changeset, branch):
186 183 try:
187 184 prev_ = changeset.revision - 1
188 185 if prev_ < 0:
189 186 raise IndexError
190 187 prev_rev = changeset.repository.revisions[prev_]
191 188 except IndexError:
192 189 raise ChangesetDoesNotExistError
193 190
194 191 cs = changeset.repository.get_changeset(prev_rev)
195 192
196 193 if branch and branch != cs.branch:
197 194 return _prev(cs, branch)
198 195
199 196 return cs
200 197
201 198 return _prev(self, branch)
202 199
203 200 def get_file_mode(self, path):
204 201 """
205 202 Returns stat mode of the file at the given ``path``.
206 203 """
207 204 # ensure path is traversed
208 205 self._get_id_for_path(path)
209 206 return self._stat_modes[path]
210 207
211 208 def get_file_content(self, path):
212 209 """
213 210 Returns content of the file at given ``path``.
214 211 """
215 212 id = self._get_id_for_path(path)
216 213 blob = self.repository._repo[id]
217 214 return blob.as_pretty_string()
218 215
219 216 def get_file_size(self, path):
220 217 """
221 218 Returns size of the file at given ``path``.
222 219 """
223 220 id = self._get_id_for_path(path)
224 221 blob = self.repository._repo[id]
225 222 return blob.raw_length()
226 223
227 224 def get_file_changeset(self, path):
228 225 """
229 226 Returns last commit of the file at the given ``path``.
230 227 """
231 228 node = self.get_node(path)
232 229 return node.history[0]
233 230
234 231 def get_file_history(self, path):
235 232 """
236 233 Returns history of file as reversed list of ``Changeset`` objects for
237 234 which file at given ``path`` has been modified.
238 235
239 236 TODO: This function now uses os underlying 'git' and 'grep' commands
240 237 which is generally not good. Should be replaced with algorithm
241 238 iterating commits.
242 239 """
243 240 cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
244 241 self.id, path
245 242 )
246 243 so, se = self.repository.run_git_command(cmd)
247 244 ids = re.findall(r'[0-9a-fA-F]{40}', so)
248 245 return [self.repository.get_changeset(id) for id in ids]
249 246
250 247 def get_file_annotate(self, path):
251 248 """
252 249 Returns a list of three element tuples with lineno,changeset and line
253 250
254 251 TODO: This function now uses os underlying 'git' command which is
255 252 generally not good. Should be replaced with algorithm iterating
256 253 commits.
257 254 """
258 255 cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
259 256 # -l ==> outputs long shas (and we need all 40 characters)
260 257 # --root ==> doesn't put '^' character for bounderies
261 258 # -r sha ==> blames for the given revision
262 259 so, se = self.repository.run_git_command(cmd)
263 260 annotate = []
264 261 for i, blame_line in enumerate(so.split('\n')[:-1]):
265 262 ln_no = i + 1
266 263 id, line = re.split(r' \(.+?\) ', blame_line, 1)
267 264 annotate.append((ln_no, self.repository.get_changeset(id), line))
268 265 return annotate
269 266
270 267 def fill_archive(self, stream=None, kind='tgz', prefix=None,
271 268 subrepos=False):
272 269 """
273 270 Fills up given stream.
274 271
275 272 :param stream: file like object.
276 273 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
277 274 Default: ``tgz``.
278 275 :param prefix: name of root directory in archive.
279 276 Default is repository name and changeset's raw_id joined with dash
280 277 (``repo-tip.<KIND>``).
281 278 :param subrepos: include subrepos in this archive.
282 279
283 280 :raise ImproperArchiveTypeError: If given kind is wrong.
284 281 :raise VcsError: If given stream is None
285 282
286 283 """
287 284 allowed_kinds = settings.ARCHIVE_SPECS.keys()
288 285 if kind not in allowed_kinds:
289 286 raise ImproperArchiveTypeError('Archive kind not supported use one'
290 287 'of %s', allowed_kinds)
291 288
292 289 if prefix is None:
293 290 prefix = '%s-%s' % (self.repository.name, self.short_id)
294 291 elif prefix.startswith('/'):
295 292 raise VCSError("Prefix cannot start with leading slash")
296 293 elif prefix.strip() == '':
297 294 raise VCSError("Prefix cannot be empty")
298 295
299 296 if kind == 'zip':
300 297 frmt = 'zip'
301 298 else:
302 299 frmt = 'tar'
303 300 cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
304 301 self.raw_id)
305 302 if kind == 'tgz':
306 303 cmd += ' | gzip -9'
307 304 elif kind == 'tbz2':
308 305 cmd += ' | bzip2 -9'
309 306
310 307 if stream is None:
311 308 raise VCSError('You need to pass in a valid stream for filling'
312 309 ' with archival data')
313 310 popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
314 311 cwd=self.repository.path)
315 312
316 313 buffer_size = 1024 * 8
317 314 chunk = popen.stdout.read(buffer_size)
318 315 while chunk:
319 316 stream.write(chunk)
320 317 chunk = popen.stdout.read(buffer_size)
321 318 # Make sure all descriptors would be read
322 319 popen.communicate()
323 320
324 321 def get_nodes(self, path):
325 322 if self._get_kind(path) != NodeKind.DIR:
326 323 raise ChangesetError("Directory does not exist for revision %r at "
327 324 " %r" % (self.revision, path))
328 325 path = self._fix_path(path)
329 326 id = self._get_id_for_path(path)
330 327 tree = self.repository._repo[id]
331 328 dirnodes = []
332 329 filenodes = []
333 330 als = self.repository.alias
334 331 for name, stat, id in tree.iteritems():
335 332 if objects.S_ISGITLINK(stat):
336 333 dirnodes.append(SubModuleNode(name, url=None, changeset=id,
337 334 alias=als))
338 335 continue
339 336
340 337 obj = self.repository._repo.get_object(id)
341 338 if path != '':
342 339 obj_path = '/'.join((path, name))
343 340 else:
344 341 obj_path = name
345 342 if obj_path not in self._stat_modes:
346 343 self._stat_modes[obj_path] = stat
347 344 if isinstance(obj, objects.Tree):
348 345 dirnodes.append(DirNode(obj_path, changeset=self))
349 346 elif isinstance(obj, objects.Blob):
350 347 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
351 348 else:
352 349 raise ChangesetError("Requested object should be Tree "
353 350 "or Blob, is %r" % type(obj))
354 351 nodes = dirnodes + filenodes
355 352 for node in nodes:
356 353 if not node.path in self.nodes:
357 354 self.nodes[node.path] = node
358 355 nodes.sort()
359 356 return nodes
360 357
361 358 def get_node(self, path):
362 359 if isinstance(path, unicode):
363 360 path = path.encode('utf-8')
364 361 path = self._fix_path(path)
365 362 if not path in self.nodes:
366 363 try:
367 364 id_ = self._get_id_for_path(path)
368 365 except ChangesetError:
369 366 raise NodeDoesNotExistError("Cannot find one of parents' "
370 367 "directories for a given path: %s" % path)
371 368
372 369 als = self.repository.alias
373 370 _GL = lambda m: m and objects.S_ISGITLINK(m)
374 371 if _GL(self._stat_modes.get(path)):
375 372 node = SubModuleNode(path, url=None, changeset=id_, alias=als)
376 373 else:
377 374 obj = self.repository._repo.get_object(id_)
378 375
379 376 if isinstance(obj, objects.Tree):
380 377 if path == '':
381 378 node = RootNode(changeset=self)
382 379 else:
383 380 node = DirNode(path, changeset=self)
384 381 node._tree = obj
385 382 elif isinstance(obj, objects.Blob):
386 383 node = FileNode(path, changeset=self)
387 384 node._blob = obj
388 385 else:
389 386 raise NodeDoesNotExistError("There is no file nor directory "
390 387 "at the given path %r at revision %r"
391 388 % (path, self.short_id))
392 389 # cache node
393 390 self.nodes[path] = node
394 391 return self.nodes[path]
395 392
396 393 @LazyProperty
397 394 def affected_files(self):
398 395 """
399 396 Get's a fast accessible file changes for given changeset
400 397 """
401 398
402 399 return self.added + self.changed
403 400
404 401 @LazyProperty
405 402 def _diff_name_status(self):
406 403 output = []
407 404 for parent in self.parents:
408 405 cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
409 406 so, se = self.repository.run_git_command(cmd)
410 407 output.append(so.strip())
411 408 return '\n'.join(output)
412 409
413 410 def _get_paths_for_status(self, status):
414 411 """
415 412 Returns sorted list of paths for given ``status``.
416 413
417 414 :param status: one of: *added*, *modified* or *deleted*
418 415 """
419 416 paths = set()
420 417 char = status[0].upper()
421 418 for line in self._diff_name_status.splitlines():
422 419 if not line:
423 420 continue
424 421
425 422 if line.startswith(char):
426 423 splitted = line.split(char, 1)
427 424 if not len(splitted) == 2:
428 425 raise VCSError("Couldn't parse diff result:\n%s\n\n and "
429 426 "particularly that line: %s" % (self._diff_name_status,
430 427 line))
431 428 _path = splitted[1].strip()
432 429 paths.add(_path)
433 430 return sorted(paths)
434 431
435 432 @LazyProperty
436 433 def added(self):
437 434 """
438 435 Returns list of added ``FileNode`` objects.
439 436 """
440 437 if not self.parents:
441 438 return list(self._get_file_nodes())
442 439 return [self.get_node(path) for path in self._get_paths_for_status('added')]
443 440
444 441 @LazyProperty
445 442 def changed(self):
446 443 """
447 444 Returns list of modified ``FileNode`` objects.
448 445 """
449 446 if not self.parents:
450 447 return []
451 448 return [self.get_node(path) for path in self._get_paths_for_status('modified')]
452 449
453 450 @LazyProperty
454 451 def removed(self):
455 452 """
456 453 Returns list of removed ``FileNode`` objects.
457 454 """
458 455 if not self.parents:
459 456 return []
460 457 return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
General Comments 0
You need to be logged in to leave comments. Login now