##// END OF EJS Templates
git: use rev-list for fetching last commit data inc ase of single commit. It's faster that using git log
marcink -
r3459:57dba41e default
parent child Browse files
Show More
@@ -1,530 +1,534 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2014-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 GIT commit module
23 23 """
24 24
25 25 import re
26 26 import stat
27 27 from itertools import chain
28 28 from StringIO import StringIO
29 29
30 30 from zope.cachedescriptors.property import Lazy as LazyProperty
31 31
32 32 from rhodecode.lib.datelib import utcdate_fromtimestamp
33 33 from rhodecode.lib.utils import safe_unicode, safe_str
34 34 from rhodecode.lib.utils2 import safe_int
35 35 from rhodecode.lib.vcs.conf import settings
36 36 from rhodecode.lib.vcs.backends import base
37 37 from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError
38 38 from rhodecode.lib.vcs.nodes import (
39 39 FileNode, DirNode, NodeKind, RootNode, SubModuleNode,
40 40 ChangedFileNodesGenerator, AddedFileNodesGenerator,
41 41 RemovedFileNodesGenerator, LargeFileNode)
42 42 from rhodecode.lib.vcs.compat import configparser
43 43
44 44
45 45 class GitCommit(base.BaseCommit):
46 46 """
47 47 Represents state of the repository at single commit id.
48 48 """
49 49 _author_property = 'author'
50 50 _committer_property = 'committer'
51 51 _date_property = 'commit_time'
52 52 _date_tz_property = 'commit_timezone'
53 53 _message_property = 'message'
54 54 _parents_property = 'parents'
55 55
56 56 _filter_pre_load = [
57 57 # done through a more complex tree walk on parents
58 58 "affected_files",
59 59 # based on repository cached property
60 60 "branch",
61 61 # done through subprocess not remote call
62 62 "children",
63 63 # done through a more complex tree walk on parents
64 64 "status",
65 65 # mercurial specific property not supported here
66 66 "_file_paths",
67 67 # mercurial specific property not supported here
68 68 'obsolete',
69 69 # mercurial specific property not supported here
70 70 'phase',
71 71 # mercurial specific property not supported here
72 72 'hidden'
73 73 ]
74 74
75 75 def __init__(self, repository, raw_id, idx, pre_load=None):
76 76 self.repository = repository
77 77 self._remote = repository._remote
78 78 # TODO: johbo: Tweak of raw_id should not be necessary
79 79 self.raw_id = safe_str(raw_id)
80 80 self.idx = idx
81 81
82 82 self._set_bulk_properties(pre_load)
83 83
84 84 # caches
85 85 self._stat_modes = {} # stat info for paths
86 86 self._paths = {} # path processed with parse_tree
87 87 self.nodes = {}
88 88 self._submodules = None
89 89
90 90 def _set_bulk_properties(self, pre_load):
91 91 if not pre_load:
92 92 return
93 93 pre_load = [entry for entry in pre_load
94 94 if entry not in self._filter_pre_load]
95 95 if not pre_load:
96 96 return
97 97
98 98 result = self._remote.bulk_request(self.raw_id, pre_load)
99 99 for attr, value in result.items():
100 100 if attr in ["author", "message"]:
101 101 if value:
102 102 value = safe_unicode(value)
103 103 elif attr == "date":
104 104 value = utcdate_fromtimestamp(*value)
105 105 elif attr == "parents":
106 106 value = self._make_commits(value)
107 107 self.__dict__[attr] = value
108 108
109 109 @LazyProperty
110 110 def _commit(self):
111 111 return self._remote[self.raw_id]
112 112
113 113 @LazyProperty
114 114 def _tree_id(self):
115 115 return self._remote[self._commit['tree']]['id']
116 116
117 117 @LazyProperty
118 118 def id(self):
119 119 return self.raw_id
120 120
121 121 @LazyProperty
122 122 def short_id(self):
123 123 return self.raw_id[:12]
124 124
125 125 @LazyProperty
126 126 def message(self):
127 127 return safe_unicode(
128 128 self._remote.commit_attribute(self.id, self._message_property))
129 129
130 130 @LazyProperty
131 131 def committer(self):
132 132 return safe_unicode(
133 133 self._remote.commit_attribute(self.id, self._committer_property))
134 134
135 135 @LazyProperty
136 136 def author(self):
137 137 return safe_unicode(
138 138 self._remote.commit_attribute(self.id, self._author_property))
139 139
140 140 @LazyProperty
141 141 def date(self):
142 142 unix_ts, tz = self._remote.get_object_attrs(
143 143 self.raw_id, self._date_property, self._date_tz_property)
144 144 return utcdate_fromtimestamp(unix_ts, tz)
145 145
146 146 @LazyProperty
147 147 def status(self):
148 148 """
149 149 Returns modified, added, removed, deleted files for current commit
150 150 """
151 151 return self.changed, self.added, self.removed
152 152
153 153 @LazyProperty
154 154 def tags(self):
155 155 tags = [safe_unicode(name) for name,
156 156 commit_id in self.repository.tags.iteritems()
157 157 if commit_id == self.raw_id]
158 158 return tags
159 159
160 160 @LazyProperty
161 161 def branch(self):
162 162 for name, commit_id in self.repository.branches.iteritems():
163 163 if commit_id == self.raw_id:
164 164 return safe_unicode(name)
165 165 return None
166 166
167 167 def _get_id_for_path(self, path):
168 168 path = safe_str(path)
169 169 if path in self._paths:
170 170 return self._paths[path]
171 171
172 172 tree_id = self._tree_id
173 173
174 174 path = path.strip('/')
175 175 if path == '':
176 176 data = [tree_id, "tree"]
177 177 self._paths[''] = data
178 178 return data
179 179
180 180 parts = path.split('/')
181 181 dirs, name = parts[:-1], parts[-1]
182 182 cur_dir = ''
183 183
184 184 # initially extract things from root dir
185 185 tree_items = self._remote.tree_items(tree_id)
186 186 self._process_tree_items(tree_items, cur_dir)
187 187
188 188 for dir in dirs:
189 189 if cur_dir:
190 190 cur_dir = '/'.join((cur_dir, dir))
191 191 else:
192 192 cur_dir = dir
193 193 dir_id = None
194 194 for item, stat_, id_, type_ in tree_items:
195 195 if item == dir:
196 196 dir_id = id_
197 197 break
198 198 if dir_id:
199 199 if type_ != "tree":
200 200 raise CommitError('%s is not a directory' % cur_dir)
201 201 # update tree
202 202 tree_items = self._remote.tree_items(dir_id)
203 203 else:
204 204 raise CommitError('%s have not been found' % cur_dir)
205 205
206 206 # cache all items from the given traversed tree
207 207 self._process_tree_items(tree_items, cur_dir)
208 208
209 209 if path not in self._paths:
210 210 raise self.no_node_at_path(path)
211 211
212 212 return self._paths[path]
213 213
214 214 def _process_tree_items(self, items, cur_dir):
215 215 for item, stat_, id_, type_ in items:
216 216 if cur_dir:
217 217 name = '/'.join((cur_dir, item))
218 218 else:
219 219 name = item
220 220 self._paths[name] = [id_, type_]
221 221 self._stat_modes[name] = stat_
222 222
223 223 def _get_kind(self, path):
224 224 path_id, type_ = self._get_id_for_path(path)
225 225 if type_ == 'blob':
226 226 return NodeKind.FILE
227 227 elif type_ == 'tree':
228 228 return NodeKind.DIR
229 229 elif type == 'link':
230 230 return NodeKind.SUBMODULE
231 231 return None
232 232
233 233 def _get_filectx(self, path):
234 234 path = self._fix_path(path)
235 235 if self._get_kind(path) != NodeKind.FILE:
236 236 raise CommitError(
237 237 "File does not exist for commit %s at '%s'" %
238 238 (self.raw_id, path))
239 239 return path
240 240
241 241 def _get_file_nodes(self):
242 242 return chain(*(t[2] for t in self.walk()))
243 243
244 244 @LazyProperty
245 245 def parents(self):
246 246 """
247 247 Returns list of parent commits.
248 248 """
249 249 parent_ids = self._remote.commit_attribute(
250 250 self.id, self._parents_property)
251 251 return self._make_commits(parent_ids)
252 252
253 253 @LazyProperty
254 254 def children(self):
255 255 """
256 256 Returns list of child commits.
257 257 """
258 258 rev_filter = settings.GIT_REV_FILTER
259 259 output, __ = self.repository.run_git_command(
260 260 ['rev-list', '--children'] + rev_filter)
261 261
262 262 child_ids = []
263 263 pat = re.compile(r'^%s' % self.raw_id)
264 264 for l in output.splitlines():
265 265 if pat.match(l):
266 266 found_ids = l.split(' ')[1:]
267 267 child_ids.extend(found_ids)
268 268 return self._make_commits(child_ids)
269 269
270 270 def _make_commits(self, commit_ids, pre_load=None):
271 271 return [
272 272 self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
273 273 for commit_id in commit_ids]
274 274
275 275 def get_file_mode(self, path):
276 276 """
277 277 Returns stat mode of the file at the given `path`.
278 278 """
279 279 path = safe_str(path)
280 280 # ensure path is traversed
281 281 self._get_id_for_path(path)
282 282 return self._stat_modes[path]
283 283
284 284 def is_link(self, path):
285 285 return stat.S_ISLNK(self.get_file_mode(path))
286 286
287 287 def get_file_content(self, path):
288 288 """
289 289 Returns content of the file at given `path`.
290 290 """
291 291 id_, _ = self._get_id_for_path(path)
292 292 return self._remote.blob_as_pretty_string(id_)
293 293
294 294 def get_file_size(self, path):
295 295 """
296 296 Returns size of the file at given `path`.
297 297 """
298 298 id_, _ = self._get_id_for_path(path)
299 299 return self._remote.blob_raw_length(id_)
300 300
301 301 def get_path_history(self, path, limit=None, pre_load=None):
302 302 """
303 303 Returns history of file as reversed list of `GitCommit` objects for
304 304 which file at given `path` has been modified.
305 305
306 306 TODO: This function now uses an underlying 'git' command which works
307 307 quickly but ideally we should replace with an algorithm.
308 308 """
309 309 self._get_filectx(path)
310 310 f_path = safe_str(path)
311 311
312 # optimize for n==1, rev-list is much faster for that use-case
313 if limit == 1:
314 cmd = ['rev-list', '-1', self.raw_id, '--', f_path]
315 else:
312 316 cmd = ['log']
313 317 if limit:
314 318 cmd.extend(['-n', str(safe_int(limit, 0))])
315 319 cmd.extend(['--pretty=format: %H', '-s', self.raw_id, '--', f_path])
316 320
317 321 output, __ = self.repository.run_git_command(cmd)
318 322 commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
319 323
320 324 return [
321 325 self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
322 326 for commit_id in commit_ids]
323 327
324 328 def get_file_annotate(self, path, pre_load=None):
325 329 """
326 330 Returns a generator of four element tuples with
327 331 lineno, commit_id, commit lazy loader and line
328 332
329 333 TODO: This function now uses os underlying 'git' command which is
330 334 generally not good. Should be replaced with algorithm iterating
331 335 commits.
332 336 """
333 337 cmd = ['blame', '-l', '--root', '-r', self.raw_id, '--', path]
334 338 # -l ==> outputs long shas (and we need all 40 characters)
335 339 # --root ==> doesn't put '^' character for bounderies
336 340 # -r commit_id ==> blames for the given commit
337 341 output, __ = self.repository.run_git_command(cmd)
338 342
339 343 for i, blame_line in enumerate(output.split('\n')[:-1]):
340 344 line_no = i + 1
341 345 commit_id, line = re.split(r' ', blame_line, 1)
342 346 yield (
343 347 line_no, commit_id,
344 348 lambda: self.repository.get_commit(commit_id=commit_id,
345 349 pre_load=pre_load),
346 350 line)
347 351
348 352 def get_nodes(self, path):
349 353 if self._get_kind(path) != NodeKind.DIR:
350 354 raise CommitError(
351 355 "Directory does not exist for commit %s at "
352 356 " '%s'" % (self.raw_id, path))
353 357 path = self._fix_path(path)
354 358 id_, _ = self._get_id_for_path(path)
355 359 tree_id = self._remote[id_]['id']
356 360 dirnodes = []
357 361 filenodes = []
358 362 alias = self.repository.alias
359 363 for name, stat_, id_, type_ in self._remote.tree_items(tree_id):
360 364 if type_ == 'link':
361 365 url = self._get_submodule_url('/'.join((path, name)))
362 366 dirnodes.append(SubModuleNode(
363 367 name, url=url, commit=id_, alias=alias))
364 368 continue
365 369
366 370 if path != '':
367 371 obj_path = '/'.join((path, name))
368 372 else:
369 373 obj_path = name
370 374 if obj_path not in self._stat_modes:
371 375 self._stat_modes[obj_path] = stat_
372 376
373 377 if type_ == 'tree':
374 378 dirnodes.append(DirNode(obj_path, commit=self))
375 379 elif type_ == 'blob':
376 380 filenodes.append(FileNode(obj_path, commit=self, mode=stat_))
377 381 else:
378 382 raise CommitError(
379 383 "Requested object should be Tree or Blob, is %s", type_)
380 384
381 385 nodes = dirnodes + filenodes
382 386 for node in nodes:
383 387 if node.path not in self.nodes:
384 388 self.nodes[node.path] = node
385 389 nodes.sort()
386 390 return nodes
387 391
388 392 def get_node(self, path, pre_load=None):
389 393 if isinstance(path, unicode):
390 394 path = path.encode('utf-8')
391 395 path = self._fix_path(path)
392 396 if path not in self.nodes:
393 397 try:
394 398 id_, type_ = self._get_id_for_path(path)
395 399 except CommitError:
396 400 raise NodeDoesNotExistError(
397 401 "Cannot find one of parents' directories for a given "
398 402 "path: %s" % path)
399 403
400 404 if type_ == 'link':
401 405 url = self._get_submodule_url(path)
402 406 node = SubModuleNode(path, url=url, commit=id_,
403 407 alias=self.repository.alias)
404 408 elif type_ == 'tree':
405 409 if path == '':
406 410 node = RootNode(commit=self)
407 411 else:
408 412 node = DirNode(path, commit=self)
409 413 elif type_ == 'blob':
410 414 node = FileNode(path, commit=self, pre_load=pre_load)
411 415 else:
412 416 raise self.no_node_at_path(path)
413 417
414 418 # cache node
415 419 self.nodes[path] = node
416 420 return self.nodes[path]
417 421
418 422 def get_largefile_node(self, path):
419 423 id_, _ = self._get_id_for_path(path)
420 424 pointer_spec = self._remote.is_large_file(id_)
421 425
422 426 if pointer_spec:
423 427 # content of that file regular FileNode is the hash of largefile
424 428 file_id = pointer_spec.get('oid_hash')
425 429 if self._remote.in_largefiles_store(file_id):
426 430 lf_path = self._remote.store_path(file_id)
427 431 return LargeFileNode(lf_path, commit=self, org_path=path)
428 432
429 433 @LazyProperty
430 434 def affected_files(self):
431 435 """
432 436 Gets a fast accessible file changes for given commit
433 437 """
434 438 added, modified, deleted = self._changes_cache
435 439 return list(added.union(modified).union(deleted))
436 440
437 441 @LazyProperty
438 442 def _changes_cache(self):
439 443 added = set()
440 444 modified = set()
441 445 deleted = set()
442 446 _r = self._remote
443 447
444 448 parents = self.parents
445 449 if not self.parents:
446 450 parents = [base.EmptyCommit()]
447 451 for parent in parents:
448 452 if isinstance(parent, base.EmptyCommit):
449 453 oid = None
450 454 else:
451 455 oid = parent.raw_id
452 456 changes = _r.tree_changes(oid, self.raw_id)
453 457 for (oldpath, newpath), (_, _), (_, _) in changes:
454 458 if newpath and oldpath:
455 459 modified.add(newpath)
456 460 elif newpath and not oldpath:
457 461 added.add(newpath)
458 462 elif not newpath and oldpath:
459 463 deleted.add(oldpath)
460 464 return added, modified, deleted
461 465
462 466 def _get_paths_for_status(self, status):
463 467 """
464 468 Returns sorted list of paths for given ``status``.
465 469
466 470 :param status: one of: *added*, *modified* or *deleted*
467 471 """
468 472 added, modified, deleted = self._changes_cache
469 473 return sorted({
470 474 'added': list(added),
471 475 'modified': list(modified),
472 476 'deleted': list(deleted)}[status]
473 477 )
474 478
475 479 @LazyProperty
476 480 def added(self):
477 481 """
478 482 Returns list of added ``FileNode`` objects.
479 483 """
480 484 if not self.parents:
481 485 return list(self._get_file_nodes())
482 486 return AddedFileNodesGenerator(
483 487 [n for n in self._get_paths_for_status('added')], self)
484 488
485 489 @LazyProperty
486 490 def changed(self):
487 491 """
488 492 Returns list of modified ``FileNode`` objects.
489 493 """
490 494 if not self.parents:
491 495 return []
492 496 return ChangedFileNodesGenerator(
493 497 [n for n in self._get_paths_for_status('modified')], self)
494 498
495 499 @LazyProperty
496 500 def removed(self):
497 501 """
498 502 Returns list of removed ``FileNode`` objects.
499 503 """
500 504 if not self.parents:
501 505 return []
502 506 return RemovedFileNodesGenerator(
503 507 [n for n in self._get_paths_for_status('deleted')], self)
504 508
505 509 def _get_submodule_url(self, submodule_path):
506 510 git_modules_path = '.gitmodules'
507 511
508 512 if self._submodules is None:
509 513 self._submodules = {}
510 514
511 515 try:
512 516 submodules_node = self.get_node(git_modules_path)
513 517 except NodeDoesNotExistError:
514 518 return None
515 519
516 520 content = submodules_node.content
517 521
518 522 # ConfigParser fails if there are whitespaces
519 523 content = '\n'.join(l.strip() for l in content.split('\n'))
520 524
521 525 parser = configparser.ConfigParser()
522 526 parser.readfp(StringIO(content))
523 527
524 528 for section in parser.sections():
525 529 path = parser.get(section, 'path')
526 530 url = parser.get(section, 'url')
527 531 if path and url:
528 532 self._submodules[path.strip('/')] = url
529 533
530 534 return self._submodules.get(submodule_path.strip('/'))
General Comments 0
You need to be logged in to leave comments. Login now