##// END OF EJS Templates
better regex for history
marcink -
r2276:8caaa995 beta
parent child Browse files
Show More
@@ -1,460 +1,460
1 1 import re
2 2 from itertools import chain
3 3 from dulwich import objects
4 4 from subprocess import Popen, PIPE
5 5 from rhodecode.lib.vcs.conf import settings
6 6 from rhodecode.lib.vcs.exceptions import RepositoryError
7 7 from rhodecode.lib.vcs.exceptions import ChangesetError
8 8 from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
9 9 from rhodecode.lib.vcs.exceptions import VCSError
10 10 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
11 11 from rhodecode.lib.vcs.exceptions import ImproperArchiveTypeError
12 12 from rhodecode.lib.vcs.backends.base import BaseChangeset
13 13 from rhodecode.lib.vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
14 14 RemovedFileNode, SubModuleNode
15 15 from rhodecode.lib.vcs.utils import safe_unicode
16 16 from rhodecode.lib.vcs.utils import date_fromtimestamp
17 17 from rhodecode.lib.vcs.utils.lazy import LazyProperty
18 18
19 19
20 20 class GitChangeset(BaseChangeset):
21 21 """
22 22 Represents state of the repository at single revision.
23 23 """
24 24
25 25 def __init__(self, repository, revision):
26 26 self._stat_modes = {}
27 27 self.repository = repository
28 28 self.raw_id = revision
29 29 self.revision = repository.revisions.index(revision)
30 30
31 31 self.short_id = self.raw_id[:12]
32 32 self.id = self.raw_id
33 33 try:
34 34 commit = self.repository._repo.get_object(self.raw_id)
35 35 except KeyError:
36 36 raise RepositoryError("Cannot get object with id %s" % self.raw_id)
37 37 self._commit = commit
38 38 self._tree_id = commit.tree
39 39
40 40 try:
41 41 self.message = safe_unicode(commit.message[:-1])
42 42 # Always strip last eol
43 43 except UnicodeDecodeError:
44 44 self.message = commit.message[:-1].decode(commit.encoding
45 45 or 'utf-8')
46 46 #self.branch = None
47 47 self.tags = []
48 48 #tree = self.repository.get_object(self._tree_id)
49 49 self.nodes = {}
50 50 self._paths = {}
51 51
52 52 @LazyProperty
53 53 def author(self):
54 54 return safe_unicode(self._commit.committer)
55 55
56 56 @LazyProperty
57 57 def date(self):
58 58 return date_fromtimestamp(self._commit.commit_time,
59 59 self._commit.commit_timezone)
60 60
61 61 @LazyProperty
62 62 def status(self):
63 63 """
64 64 Returns modified, added, removed, deleted files for current changeset
65 65 """
66 66 return self.changed, self.added, self.removed
67 67
68 68 @LazyProperty
69 69 def branch(self):
70 70
71 71 heads = self.repository._heads(reverse=False)
72 72
73 73 ref = heads.get(self.raw_id)
74 74 if ref:
75 75 return safe_unicode(ref)
76 76
77 77 def _fix_path(self, path):
78 78 """
79 79 Paths are stored without trailing slash so we need to get rid off it if
80 80 needed.
81 81 """
82 82 if path.endswith('/'):
83 83 path = path.rstrip('/')
84 84 return path
85 85
86 86 def _get_id_for_path(self, path):
87 87
88 88 # FIXME: Please, spare a couple of minutes and make those codes cleaner;
89 89 if not path in self._paths:
90 90 path = path.strip('/')
91 91 # set root tree
92 92 tree = self.repository._repo[self._commit.tree]
93 93 if path == '':
94 94 self._paths[''] = tree.id
95 95 return tree.id
96 96 splitted = path.split('/')
97 97 dirs, name = splitted[:-1], splitted[-1]
98 98 curdir = ''
99 99
100 100 # initially extract things from root dir
101 101 for item, stat, id in tree.iteritems():
102 102 if curdir:
103 103 name = '/'.join((curdir, item))
104 104 else:
105 105 name = item
106 106 self._paths[name] = id
107 107 self._stat_modes[name] = stat
108 108
109 109 for dir in dirs:
110 110 if curdir:
111 111 curdir = '/'.join((curdir, dir))
112 112 else:
113 113 curdir = dir
114 114 dir_id = None
115 115 for item, stat, id in tree.iteritems():
116 116 if dir == item:
117 117 dir_id = id
118 118 if dir_id:
119 119 # Update tree
120 120 tree = self.repository._repo[dir_id]
121 121 if not isinstance(tree, objects.Tree):
122 122 raise ChangesetError('%s is not a directory' % curdir)
123 123 else:
124 124 raise ChangesetError('%s have not been found' % curdir)
125 125
126 126 # cache all items from the given traversed tree
127 127 for item, stat, id in tree.iteritems():
128 128 if curdir:
129 129 name = '/'.join((curdir, item))
130 130 else:
131 131 name = item
132 132 self._paths[name] = id
133 133 self._stat_modes[name] = stat
134 134 if not path in self._paths:
135 135 raise NodeDoesNotExistError("There is no file nor directory "
136 136 "at the given path %r at revision %r"
137 137 % (path, self.short_id))
138 138 return self._paths[path]
139 139
140 140 def _get_kind(self, path):
141 141 id = self._get_id_for_path(path)
142 142 obj = self.repository._repo[id]
143 143 if isinstance(obj, objects.Blob):
144 144 return NodeKind.FILE
145 145 elif isinstance(obj, objects.Tree):
146 146 return NodeKind.DIR
147 147
148 148 def _get_file_nodes(self):
149 149 return chain(*(t[2] for t in self.walk()))
150 150
151 151 @LazyProperty
152 152 def parents(self):
153 153 """
154 154 Returns list of parents changesets.
155 155 """
156 156 return [self.repository.get_changeset(parent)
157 157 for parent in self._commit.parents]
158 158
159 159 def next(self, branch=None):
160 160
161 161 if branch and self.branch != branch:
162 162 raise VCSError('Branch option used on changeset not belonging '
163 163 'to that branch')
164 164
165 165 def _next(changeset, branch):
166 166 try:
167 167 next_ = changeset.revision + 1
168 168 next_rev = changeset.repository.revisions[next_]
169 169 except IndexError:
170 170 raise ChangesetDoesNotExistError
171 171 cs = changeset.repository.get_changeset(next_rev)
172 172
173 173 if branch and branch != cs.branch:
174 174 return _next(cs, branch)
175 175
176 176 return cs
177 177
178 178 return _next(self, branch)
179 179
180 180 def prev(self, branch=None):
181 181 if branch and self.branch != branch:
182 182 raise VCSError('Branch option used on changeset not belonging '
183 183 'to that branch')
184 184
185 185 def _prev(changeset, branch):
186 186 try:
187 187 prev_ = changeset.revision - 1
188 188 if prev_ < 0:
189 189 raise IndexError
190 190 prev_rev = changeset.repository.revisions[prev_]
191 191 except IndexError:
192 192 raise ChangesetDoesNotExistError
193 193
194 194 cs = changeset.repository.get_changeset(prev_rev)
195 195
196 196 if branch and branch != cs.branch:
197 197 return _prev(cs, branch)
198 198
199 199 return cs
200 200
201 201 return _prev(self, branch)
202 202
203 203 def get_file_mode(self, path):
204 204 """
205 205 Returns stat mode of the file at the given ``path``.
206 206 """
207 207 # ensure path is traversed
208 208 self._get_id_for_path(path)
209 209 return self._stat_modes[path]
210 210
211 211 def get_file_content(self, path):
212 212 """
213 213 Returns content of the file at given ``path``.
214 214 """
215 215 id = self._get_id_for_path(path)
216 216 blob = self.repository._repo[id]
217 217 return blob.as_pretty_string()
218 218
219 219 def get_file_size(self, path):
220 220 """
221 221 Returns size of the file at given ``path``.
222 222 """
223 223 id = self._get_id_for_path(path)
224 224 blob = self.repository._repo[id]
225 225 return blob.raw_length()
226 226
227 227 def get_file_changeset(self, path):
228 228 """
229 229 Returns last commit of the file at the given ``path``.
230 230 """
231 231 node = self.get_node(path)
232 232 return node.history[0]
233 233
234 234 def get_file_history(self, path):
235 235 """
236 236 Returns history of file as reversed list of ``Changeset`` objects for
237 237 which file at given ``path`` has been modified.
238 238
239 239 TODO: This function now uses os underlying 'git' and 'grep' commands
240 240 which is generally not good. Should be replaced with algorithm
241 241 iterating commits.
242 242 """
243 cmd = 'log --pretty="format: --%%H--" --name-status -p %s -- "%s"' % (
243 cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
244 244 self.id, path
245 245 )
246 246 so, se = self.repository.run_git_command(cmd)
247 ids = re.findall(r'(?:--)(\w{40})(?:--)', so)
247 ids = re.findall(r'[0-9a-fA-F]{40}', so)
248 248 return [self.repository.get_changeset(id) for id in ids]
249 249
250 250 def get_file_annotate(self, path):
251 251 """
252 252 Returns a list of three element tuples with lineno,changeset and line
253 253
254 254 TODO: This function now uses os underlying 'git' command which is
255 255 generally not good. Should be replaced with algorithm iterating
256 256 commits.
257 257 """
258 258 cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
259 259 # -l ==> outputs long shas (and we need all 40 characters)
260 260 # --root ==> doesn't put '^' character for bounderies
261 261 # -r sha ==> blames for the given revision
262 262 so, se = self.repository.run_git_command(cmd)
263 263 annotate = []
264 264 for i, blame_line in enumerate(so.split('\n')[:-1]):
265 265 ln_no = i + 1
266 266 id, line = re.split(r' \(.+?\) ', blame_line, 1)
267 267 annotate.append((ln_no, self.repository.get_changeset(id), line))
268 268 return annotate
269 269
270 270 def fill_archive(self, stream=None, kind='tgz', prefix=None,
271 271 subrepos=False):
272 272 """
273 273 Fills up given stream.
274 274
275 275 :param stream: file like object.
276 276 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
277 277 Default: ``tgz``.
278 278 :param prefix: name of root directory in archive.
279 279 Default is repository name and changeset's raw_id joined with dash
280 280 (``repo-tip.<KIND>``).
281 281 :param subrepos: include subrepos in this archive.
282 282
283 283 :raise ImproperArchiveTypeError: If given kind is wrong.
284 284 :raise VcsError: If given stream is None
285 285
286 286 """
287 287 allowed_kinds = settings.ARCHIVE_SPECS.keys()
288 288 if kind not in allowed_kinds:
289 289 raise ImproperArchiveTypeError('Archive kind not supported use one'
290 290 'of %s', allowed_kinds)
291 291
292 292 if prefix is None:
293 293 prefix = '%s-%s' % (self.repository.name, self.short_id)
294 294 elif prefix.startswith('/'):
295 295 raise VCSError("Prefix cannot start with leading slash")
296 296 elif prefix.strip() == '':
297 297 raise VCSError("Prefix cannot be empty")
298 298
299 299 if kind == 'zip':
300 300 frmt = 'zip'
301 301 else:
302 302 frmt = 'tar'
303 303 cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
304 304 self.raw_id)
305 305 if kind == 'tgz':
306 306 cmd += ' | gzip -9'
307 307 elif kind == 'tbz2':
308 308 cmd += ' | bzip2 -9'
309 309
310 310 if stream is None:
311 311 raise VCSError('You need to pass in a valid stream for filling'
312 312 ' with archival data')
313 313 popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
314 314 cwd=self.repository.path)
315 315
316 316 buffer_size = 1024 * 8
317 317 chunk = popen.stdout.read(buffer_size)
318 318 while chunk:
319 319 stream.write(chunk)
320 320 chunk = popen.stdout.read(buffer_size)
321 321 # Make sure all descriptors would be read
322 322 popen.communicate()
323 323
324 324 def get_nodes(self, path):
325 325 if self._get_kind(path) != NodeKind.DIR:
326 326 raise ChangesetError("Directory does not exist for revision %r at "
327 327 " %r" % (self.revision, path))
328 328 path = self._fix_path(path)
329 329 id = self._get_id_for_path(path)
330 330 tree = self.repository._repo[id]
331 331 dirnodes = []
332 332 filenodes = []
333 333 als = self.repository.alias
334 334 for name, stat, id in tree.iteritems():
335 335 if objects.S_ISGITLINK(stat):
336 336 dirnodes.append(SubModuleNode(name, url=None, changeset=id,
337 337 alias=als))
338 338 continue
339 339
340 340 obj = self.repository._repo.get_object(id)
341 341 if path != '':
342 342 obj_path = '/'.join((path, name))
343 343 else:
344 344 obj_path = name
345 345 if obj_path not in self._stat_modes:
346 346 self._stat_modes[obj_path] = stat
347 347 if isinstance(obj, objects.Tree):
348 348 dirnodes.append(DirNode(obj_path, changeset=self))
349 349 elif isinstance(obj, objects.Blob):
350 350 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
351 351 else:
352 352 raise ChangesetError("Requested object should be Tree "
353 353 "or Blob, is %r" % type(obj))
354 354 nodes = dirnodes + filenodes
355 355 for node in nodes:
356 356 if not node.path in self.nodes:
357 357 self.nodes[node.path] = node
358 358 nodes.sort()
359 359 return nodes
360 360
361 361 def get_node(self, path):
362 362 if isinstance(path, unicode):
363 363 path = path.encode('utf-8')
364 364 path = self._fix_path(path)
365 365 if not path in self.nodes:
366 366 try:
367 367 id_ = self._get_id_for_path(path)
368 368 except ChangesetError:
369 369 raise NodeDoesNotExistError("Cannot find one of parents' "
370 370 "directories for a given path: %s" % path)
371 371
372 372 als = self.repository.alias
373 373 _GL = lambda m: m and objects.S_ISGITLINK(m)
374 374 if _GL(self._stat_modes.get(path)):
375 375 node = SubModuleNode(path, url=None, changeset=id_, alias=als)
376 376 else:
377 377 obj = self.repository._repo.get_object(id_)
378 378
379 379 if isinstance(obj, objects.Tree):
380 380 if path == '':
381 381 node = RootNode(changeset=self)
382 382 else:
383 383 node = DirNode(path, changeset=self)
384 384 node._tree = obj
385 385 elif isinstance(obj, objects.Blob):
386 386 node = FileNode(path, changeset=self)
387 387 node._blob = obj
388 388 else:
389 389 raise NodeDoesNotExistError("There is no file nor directory "
390 390 "at the given path %r at revision %r"
391 391 % (path, self.short_id))
392 392 # cache node
393 393 self.nodes[path] = node
394 394 return self.nodes[path]
395 395
396 396 @LazyProperty
397 397 def affected_files(self):
398 398 """
399 399 Get's a fast accessible file changes for given changeset
400 400 """
401 401
402 402 return self.added + self.changed
403 403
404 404 @LazyProperty
405 405 def _diff_name_status(self):
406 406 output = []
407 407 for parent in self.parents:
408 408 cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
409 409 so, se = self.repository.run_git_command(cmd)
410 410 output.append(so.strip())
411 411 return '\n'.join(output)
412 412
413 413 def _get_paths_for_status(self, status):
414 414 """
415 415 Returns sorted list of paths for given ``status``.
416 416
417 417 :param status: one of: *added*, *modified* or *deleted*
418 418 """
419 419 paths = set()
420 420 char = status[0].upper()
421 421 for line in self._diff_name_status.splitlines():
422 422 if not line:
423 423 continue
424 424
425 425 if line.startswith(char):
426 426 splitted = line.split(char, 1)
427 427 if not len(splitted) == 2:
428 428 raise VCSError("Couldn't parse diff result:\n%s\n\n and "
429 429 "particularly that line: %s" % (self._diff_name_status,
430 430 line))
431 431 _path = splitted[1].strip()
432 432 paths.add(_path)
433 433 return sorted(paths)
434 434
435 435 @LazyProperty
436 436 def added(self):
437 437 """
438 438 Returns list of added ``FileNode`` objects.
439 439 """
440 440 if not self.parents:
441 441 return list(self._get_file_nodes())
442 442 return [self.get_node(path) for path in self._get_paths_for_status('added')]
443 443
444 444 @LazyProperty
445 445 def changed(self):
446 446 """
447 447 Returns list of modified ``FileNode`` objects.
448 448 """
449 449 if not self.parents:
450 450 return []
451 451 return [self.get_node(path) for path in self._get_paths_for_status('modified')]
452 452
453 453 @LazyProperty
454 454 def removed(self):
455 455 """
456 456 Returns list of removed ``FileNode`` objects.
457 457 """
458 458 if not self.parents:
459 459 return []
460 460 return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
General Comments 0
You need to be logged in to leave comments. Login now