##// END OF EJS Templates
safe_unicode never fails so it's not needed to wrap it in Unicode exceptions
marcink -
r2283:d932229a codereview
parent child Browse files
Show More
@@ -1,457 +1,455 b''
1 1 import re
2 2 from itertools import chain
3 3 from dulwich import objects
4 4 from subprocess import Popen, PIPE
5 5 from rhodecode.lib.vcs.conf import settings
6 6 from rhodecode.lib.vcs.exceptions import RepositoryError
7 7 from rhodecode.lib.vcs.exceptions import ChangesetError
8 8 from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
9 9 from rhodecode.lib.vcs.exceptions import VCSError
10 10 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
11 11 from rhodecode.lib.vcs.exceptions import ImproperArchiveTypeError
12 12 from rhodecode.lib.vcs.backends.base import BaseChangeset
13 13 from rhodecode.lib.vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
14 14 RemovedFileNode, SubModuleNode
15 15 from rhodecode.lib.vcs.utils import safe_unicode
16 16 from rhodecode.lib.vcs.utils import date_fromtimestamp
17 17 from rhodecode.lib.vcs.utils.lazy import LazyProperty
18 18
19 19
20 20 class GitChangeset(BaseChangeset):
21 21 """
22 22 Represents state of the repository at single revision.
23 23 """
24 24
25 25 def __init__(self, repository, revision):
26 26 self._stat_modes = {}
27 27 self.repository = repository
28 28 self.raw_id = revision
29 29 self.revision = repository.revisions.index(revision)
30 30
31 31 self.short_id = self.raw_id[:12]
32 32 self.id = self.raw_id
33 33 try:
34 34 commit = self.repository._repo.get_object(self.raw_id)
35 35 except KeyError:
36 36 raise RepositoryError("Cannot get object with id %s" % self.raw_id)
37 37 self._commit = commit
38 38 self._tree_id = commit.tree
39 39
40 try:
41 40 self.message = safe_unicode(commit.message)
42 except UnicodeDecodeError:
43 self.message = commit.message.decode(commit.encoding or 'utf-8')
41
44 42 #self.branch = None
45 43 self.tags = []
46 44 self.nodes = {}
47 45 self._paths = {}
48 46
49 47 @LazyProperty
50 48 def author(self):
51 49 return safe_unicode(self._commit.committer)
52 50
53 51 @LazyProperty
54 52 def date(self):
55 53 return date_fromtimestamp(self._commit.commit_time,
56 54 self._commit.commit_timezone)
57 55
58 56 @LazyProperty
59 57 def status(self):
60 58 """
61 59 Returns modified, added, removed, deleted files for current changeset
62 60 """
63 61 return self.changed, self.added, self.removed
64 62
65 63 @LazyProperty
66 64 def branch(self):
67 65
68 66 heads = self.repository._heads(reverse=False)
69 67
70 68 ref = heads.get(self.raw_id)
71 69 if ref:
72 70 return safe_unicode(ref)
73 71
74 72 def _fix_path(self, path):
75 73 """
76 74 Paths are stored without trailing slash so we need to get rid off it if
77 75 needed.
78 76 """
79 77 if path.endswith('/'):
80 78 path = path.rstrip('/')
81 79 return path
82 80
83 81 def _get_id_for_path(self, path):
84 82
85 83 # FIXME: Please, spare a couple of minutes and make those codes cleaner;
86 84 if not path in self._paths:
87 85 path = path.strip('/')
88 86 # set root tree
89 87 tree = self.repository._repo[self._commit.tree]
90 88 if path == '':
91 89 self._paths[''] = tree.id
92 90 return tree.id
93 91 splitted = path.split('/')
94 92 dirs, name = splitted[:-1], splitted[-1]
95 93 curdir = ''
96 94
97 95 # initially extract things from root dir
98 96 for item, stat, id in tree.iteritems():
99 97 if curdir:
100 98 name = '/'.join((curdir, item))
101 99 else:
102 100 name = item
103 101 self._paths[name] = id
104 102 self._stat_modes[name] = stat
105 103
106 104 for dir in dirs:
107 105 if curdir:
108 106 curdir = '/'.join((curdir, dir))
109 107 else:
110 108 curdir = dir
111 109 dir_id = None
112 110 for item, stat, id in tree.iteritems():
113 111 if dir == item:
114 112 dir_id = id
115 113 if dir_id:
116 114 # Update tree
117 115 tree = self.repository._repo[dir_id]
118 116 if not isinstance(tree, objects.Tree):
119 117 raise ChangesetError('%s is not a directory' % curdir)
120 118 else:
121 119 raise ChangesetError('%s have not been found' % curdir)
122 120
123 121 # cache all items from the given traversed tree
124 122 for item, stat, id in tree.iteritems():
125 123 if curdir:
126 124 name = '/'.join((curdir, item))
127 125 else:
128 126 name = item
129 127 self._paths[name] = id
130 128 self._stat_modes[name] = stat
131 129 if not path in self._paths:
132 130 raise NodeDoesNotExistError("There is no file nor directory "
133 131 "at the given path %r at revision %r"
134 132 % (path, self.short_id))
135 133 return self._paths[path]
136 134
137 135 def _get_kind(self, path):
138 136 id = self._get_id_for_path(path)
139 137 obj = self.repository._repo[id]
140 138 if isinstance(obj, objects.Blob):
141 139 return NodeKind.FILE
142 140 elif isinstance(obj, objects.Tree):
143 141 return NodeKind.DIR
144 142
145 143 def _get_file_nodes(self):
146 144 return chain(*(t[2] for t in self.walk()))
147 145
148 146 @LazyProperty
149 147 def parents(self):
150 148 """
151 149 Returns list of parents changesets.
152 150 """
153 151 return [self.repository.get_changeset(parent)
154 152 for parent in self._commit.parents]
155 153
156 154 def next(self, branch=None):
157 155
158 156 if branch and self.branch != branch:
159 157 raise VCSError('Branch option used on changeset not belonging '
160 158 'to that branch')
161 159
162 160 def _next(changeset, branch):
163 161 try:
164 162 next_ = changeset.revision + 1
165 163 next_rev = changeset.repository.revisions[next_]
166 164 except IndexError:
167 165 raise ChangesetDoesNotExistError
168 166 cs = changeset.repository.get_changeset(next_rev)
169 167
170 168 if branch and branch != cs.branch:
171 169 return _next(cs, branch)
172 170
173 171 return cs
174 172
175 173 return _next(self, branch)
176 174
177 175 def prev(self, branch=None):
178 176 if branch and self.branch != branch:
179 177 raise VCSError('Branch option used on changeset not belonging '
180 178 'to that branch')
181 179
182 180 def _prev(changeset, branch):
183 181 try:
184 182 prev_ = changeset.revision - 1
185 183 if prev_ < 0:
186 184 raise IndexError
187 185 prev_rev = changeset.repository.revisions[prev_]
188 186 except IndexError:
189 187 raise ChangesetDoesNotExistError
190 188
191 189 cs = changeset.repository.get_changeset(prev_rev)
192 190
193 191 if branch and branch != cs.branch:
194 192 return _prev(cs, branch)
195 193
196 194 return cs
197 195
198 196 return _prev(self, branch)
199 197
200 198 def get_file_mode(self, path):
201 199 """
202 200 Returns stat mode of the file at the given ``path``.
203 201 """
204 202 # ensure path is traversed
205 203 self._get_id_for_path(path)
206 204 return self._stat_modes[path]
207 205
208 206 def get_file_content(self, path):
209 207 """
210 208 Returns content of the file at given ``path``.
211 209 """
212 210 id = self._get_id_for_path(path)
213 211 blob = self.repository._repo[id]
214 212 return blob.as_pretty_string()
215 213
216 214 def get_file_size(self, path):
217 215 """
218 216 Returns size of the file at given ``path``.
219 217 """
220 218 id = self._get_id_for_path(path)
221 219 blob = self.repository._repo[id]
222 220 return blob.raw_length()
223 221
224 222 def get_file_changeset(self, path):
225 223 """
226 224 Returns last commit of the file at the given ``path``.
227 225 """
228 226 node = self.get_node(path)
229 227 return node.history[0]
230 228
231 229 def get_file_history(self, path):
232 230 """
233 231 Returns history of file as reversed list of ``Changeset`` objects for
234 232 which file at given ``path`` has been modified.
235 233
236 234 TODO: This function now uses os underlying 'git' and 'grep' commands
237 235 which is generally not good. Should be replaced with algorithm
238 236 iterating commits.
239 237 """
240 238 cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
241 239 self.id, path
242 240 )
243 241 so, se = self.repository.run_git_command(cmd)
244 242 ids = re.findall(r'[0-9a-fA-F]{40}', so)
245 243 return [self.repository.get_changeset(id) for id in ids]
246 244
247 245 def get_file_annotate(self, path):
248 246 """
249 247 Returns a list of three element tuples with lineno,changeset and line
250 248
251 249 TODO: This function now uses os underlying 'git' command which is
252 250 generally not good. Should be replaced with algorithm iterating
253 251 commits.
254 252 """
255 253 cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
256 254 # -l ==> outputs long shas (and we need all 40 characters)
257 255 # --root ==> doesn't put '^' character for bounderies
258 256 # -r sha ==> blames for the given revision
259 257 so, se = self.repository.run_git_command(cmd)
260 258 annotate = []
261 259 for i, blame_line in enumerate(so.split('\n')[:-1]):
262 260 ln_no = i + 1
263 261 id, line = re.split(r' \(.+?\) ', blame_line, 1)
264 262 annotate.append((ln_no, self.repository.get_changeset(id), line))
265 263 return annotate
266 264
267 265 def fill_archive(self, stream=None, kind='tgz', prefix=None,
268 266 subrepos=False):
269 267 """
270 268 Fills up given stream.
271 269
272 270 :param stream: file like object.
273 271 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
274 272 Default: ``tgz``.
275 273 :param prefix: name of root directory in archive.
276 274 Default is repository name and changeset's raw_id joined with dash
277 275 (``repo-tip.<KIND>``).
278 276 :param subrepos: include subrepos in this archive.
279 277
280 278 :raise ImproperArchiveTypeError: If given kind is wrong.
281 279 :raise VcsError: If given stream is None
282 280
283 281 """
284 282 allowed_kinds = settings.ARCHIVE_SPECS.keys()
285 283 if kind not in allowed_kinds:
286 284 raise ImproperArchiveTypeError('Archive kind not supported use one'
287 285 'of %s', allowed_kinds)
288 286
289 287 if prefix is None:
290 288 prefix = '%s-%s' % (self.repository.name, self.short_id)
291 289 elif prefix.startswith('/'):
292 290 raise VCSError("Prefix cannot start with leading slash")
293 291 elif prefix.strip() == '':
294 292 raise VCSError("Prefix cannot be empty")
295 293
296 294 if kind == 'zip':
297 295 frmt = 'zip'
298 296 else:
299 297 frmt = 'tar'
300 298 cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
301 299 self.raw_id)
302 300 if kind == 'tgz':
303 301 cmd += ' | gzip -9'
304 302 elif kind == 'tbz2':
305 303 cmd += ' | bzip2 -9'
306 304
307 305 if stream is None:
308 306 raise VCSError('You need to pass in a valid stream for filling'
309 307 ' with archival data')
310 308 popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
311 309 cwd=self.repository.path)
312 310
313 311 buffer_size = 1024 * 8
314 312 chunk = popen.stdout.read(buffer_size)
315 313 while chunk:
316 314 stream.write(chunk)
317 315 chunk = popen.stdout.read(buffer_size)
318 316 # Make sure all descriptors would be read
319 317 popen.communicate()
320 318
321 319 def get_nodes(self, path):
322 320 if self._get_kind(path) != NodeKind.DIR:
323 321 raise ChangesetError("Directory does not exist for revision %r at "
324 322 " %r" % (self.revision, path))
325 323 path = self._fix_path(path)
326 324 id = self._get_id_for_path(path)
327 325 tree = self.repository._repo[id]
328 326 dirnodes = []
329 327 filenodes = []
330 328 als = self.repository.alias
331 329 for name, stat, id in tree.iteritems():
332 330 if objects.S_ISGITLINK(stat):
333 331 dirnodes.append(SubModuleNode(name, url=None, changeset=id,
334 332 alias=als))
335 333 continue
336 334
337 335 obj = self.repository._repo.get_object(id)
338 336 if path != '':
339 337 obj_path = '/'.join((path, name))
340 338 else:
341 339 obj_path = name
342 340 if obj_path not in self._stat_modes:
343 341 self._stat_modes[obj_path] = stat
344 342 if isinstance(obj, objects.Tree):
345 343 dirnodes.append(DirNode(obj_path, changeset=self))
346 344 elif isinstance(obj, objects.Blob):
347 345 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
348 346 else:
349 347 raise ChangesetError("Requested object should be Tree "
350 348 "or Blob, is %r" % type(obj))
351 349 nodes = dirnodes + filenodes
352 350 for node in nodes:
353 351 if not node.path in self.nodes:
354 352 self.nodes[node.path] = node
355 353 nodes.sort()
356 354 return nodes
357 355
358 356 def get_node(self, path):
359 357 if isinstance(path, unicode):
360 358 path = path.encode('utf-8')
361 359 path = self._fix_path(path)
362 360 if not path in self.nodes:
363 361 try:
364 362 id_ = self._get_id_for_path(path)
365 363 except ChangesetError:
366 364 raise NodeDoesNotExistError("Cannot find one of parents' "
367 365 "directories for a given path: %s" % path)
368 366
369 367 als = self.repository.alias
370 368 _GL = lambda m: m and objects.S_ISGITLINK(m)
371 369 if _GL(self._stat_modes.get(path)):
372 370 node = SubModuleNode(path, url=None, changeset=id_, alias=als)
373 371 else:
374 372 obj = self.repository._repo.get_object(id_)
375 373
376 374 if isinstance(obj, objects.Tree):
377 375 if path == '':
378 376 node = RootNode(changeset=self)
379 377 else:
380 378 node = DirNode(path, changeset=self)
381 379 node._tree = obj
382 380 elif isinstance(obj, objects.Blob):
383 381 node = FileNode(path, changeset=self)
384 382 node._blob = obj
385 383 else:
386 384 raise NodeDoesNotExistError("There is no file nor directory "
387 385 "at the given path %r at revision %r"
388 386 % (path, self.short_id))
389 387 # cache node
390 388 self.nodes[path] = node
391 389 return self.nodes[path]
392 390
393 391 @LazyProperty
394 392 def affected_files(self):
395 393 """
396 394 Get's a fast accessible file changes for given changeset
397 395 """
398 396
399 397 return self.added + self.changed
400 398
401 399 @LazyProperty
402 400 def _diff_name_status(self):
403 401 output = []
404 402 for parent in self.parents:
405 403 cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
406 404 so, se = self.repository.run_git_command(cmd)
407 405 output.append(so.strip())
408 406 return '\n'.join(output)
409 407
410 408 def _get_paths_for_status(self, status):
411 409 """
412 410 Returns sorted list of paths for given ``status``.
413 411
414 412 :param status: one of: *added*, *modified* or *deleted*
415 413 """
416 414 paths = set()
417 415 char = status[0].upper()
418 416 for line in self._diff_name_status.splitlines():
419 417 if not line:
420 418 continue
421 419
422 420 if line.startswith(char):
423 421 splitted = line.split(char, 1)
424 422 if not len(splitted) == 2:
425 423 raise VCSError("Couldn't parse diff result:\n%s\n\n and "
426 424 "particularly that line: %s" % (self._diff_name_status,
427 425 line))
428 426 _path = splitted[1].strip()
429 427 paths.add(_path)
430 428 return sorted(paths)
431 429
432 430 @LazyProperty
433 431 def added(self):
434 432 """
435 433 Returns list of added ``FileNode`` objects.
436 434 """
437 435 if not self.parents:
438 436 return list(self._get_file_nodes())
439 437 return [self.get_node(path) for path in self._get_paths_for_status('added')]
440 438
441 439 @LazyProperty
442 440 def changed(self):
443 441 """
444 442 Returns list of modified ``FileNode`` objects.
445 443 """
446 444 if not self.parents:
447 445 return []
448 446 return [self.get_node(path) for path in self._get_paths_for_status('modified')]
449 447
450 448 @LazyProperty
451 449 def removed(self):
452 450 """
453 451 Returns list of removed ``FileNode`` objects.
454 452 """
455 453 if not self.parents:
456 454 return []
457 455 return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
General Comments 0
You need to be logged in to leave comments. Login now