##// END OF EJS Templates
git: fix submodule handling for git repositories
super-admin -
r5126:de59bf70 default
parent child Browse files
Show More
@@ -1,488 +1,484 b''
1 1 # Copyright (C) 2014-2023 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 """
20 20 GIT commit module
21 21 """
22 22
23 import io
23 24 import stat
24 25 import configparser
25 26 from itertools import chain
26 27
27 28 from zope.cachedescriptors.property import Lazy as LazyProperty
28 29
29 30 from rhodecode.lib.datelib import utcdate_fromtimestamp
30 31 from rhodecode.lib.str_utils import safe_bytes, safe_str
31 32 from rhodecode.lib.vcs.backends import base
32 33 from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError
33 34 from rhodecode.lib.vcs.nodes import (
34 35 FileNode, DirNode, NodeKind, RootNode, SubModuleNode,
35 36 ChangedFileNodesGenerator, AddedFileNodesGenerator,
36 37 RemovedFileNodesGenerator, LargeFileNode)
37 38
38 39
39 40 class GitCommit(base.BaseCommit):
40 41 """
41 42 Represents state of the repository at single commit id.
42 43 """
43 44
44 45 _filter_pre_load = [
45 46 # done through a more complex tree walk on parents
46 47 "affected_files",
47 48 # done through subprocess not remote call
48 49 "children",
49 50 # done through a more complex tree walk on parents
50 51 "status",
51 52 # mercurial specific property not supported here
52 53 "_file_paths",
53 54 # mercurial specific property not supported here
54 55 'obsolete',
55 56 # mercurial specific property not supported here
56 57 'phase',
57 58 # mercurial specific property not supported here
58 59 'hidden'
59 60 ]
60 61
61 62 def __init__(self, repository, raw_id, idx, pre_load=None):
62 63 self.repository = repository
63 64 self._remote = repository._remote
64 65 # TODO: johbo: Tweak of raw_id should not be necessary
65 66 self.raw_id = safe_str(raw_id)
66 67 self.idx = idx
67 68
68 69 self._set_bulk_properties(pre_load)
69 70
70 71 # caches
71 72 self._stat_modes = {} # stat info for paths
72 73 self._paths = {} # path processed with parse_tree
73 74 self.nodes = {}
74 75 self._submodules = None
75 76
76 77 def _set_bulk_properties(self, pre_load):
77 78
78 79 if not pre_load:
79 80 return
80 81 pre_load = [entry for entry in pre_load
81 82 if entry not in self._filter_pre_load]
82 83 if not pre_load:
83 84 return
84 85
85 86 result = self._remote.bulk_request(self.raw_id, pre_load)
86 87 for attr, value in result.items():
87 88 if attr in ["author", "message"]:
88 89 if value:
89 90 value = safe_str(value)
90 91 elif attr == "date":
91 92 value = utcdate_fromtimestamp(*value)
92 93 elif attr == "parents":
93 94 value = self._make_commits(value)
94 95 elif attr == "branch":
95 96 value = self._set_branch(value)
96 97 self.__dict__[attr] = value
97 98
98 99 @LazyProperty
99 100 def _commit(self):
100 101 return self._remote[self.raw_id]
101 102
102 103 @LazyProperty
103 104 def _tree_id(self):
104 105 return self._remote[self._commit['tree']]['id']
105 106
106 107 @LazyProperty
107 108 def id(self):
108 109 return self.raw_id
109 110
110 111 @LazyProperty
111 112 def short_id(self):
112 113 return self.raw_id[:12]
113 114
114 115 @LazyProperty
115 116 def message(self):
116 117 return safe_str(self._remote.message(self.id))
117 118
118 119 @LazyProperty
119 120 def committer(self):
120 121 return safe_str(self._remote.author(self.id))
121 122
122 123 @LazyProperty
123 124 def author(self):
124 125 return safe_str(self._remote.author(self.id))
125 126
126 127 @LazyProperty
127 128 def date(self):
128 129 unix_ts, tz = self._remote.date(self.raw_id)
129 130 return utcdate_fromtimestamp(unix_ts, tz)
130 131
131 132 @LazyProperty
132 133 def status(self):
133 134 """
134 135 Returns modified, added, removed, deleted files for current commit
135 136 """
136 137 return self.changed, self.added, self.removed
137 138
138 139 @LazyProperty
139 140 def tags(self):
140 141 tags = [safe_str(name) for name,
141 142 commit_id in self.repository.tags.items()
142 143 if commit_id == self.raw_id]
143 144 return tags
144 145
145 146 @LazyProperty
146 147 def commit_branches(self):
147 148 branches = []
148 149 for name, commit_id in self.repository.branches.items():
149 150 if commit_id == self.raw_id:
150 151 branches.append(name)
151 152 return branches
152 153
153 154 def _set_branch(self, branches):
154 155 if branches:
155 156 # actually commit can have multiple branches in git
156 157 return safe_str(branches[0])
157 158
158 159 @LazyProperty
159 160 def branch(self):
160 161 branches = self._remote.branch(self.raw_id)
161 162 return self._set_branch(branches)
162 163
163 164 def _get_tree_id_for_path(self, path):
164 165
165 166 path = safe_str(path)
166 167 if path in self._paths:
167 168 return self._paths[path]
168 169
169 170 tree_id = self._tree_id
170 171
171 172 path = path.strip('/')
172 173 if path == '':
173 174 data = [tree_id, "tree"]
174 175 self._paths[''] = data
175 176 return data
176 177
177 178 tree_id, tree_type, tree_mode = \
178 179 self._remote.tree_and_type_for_path(self.raw_id, path)
179 180 if tree_id is None:
180 181 raise self.no_node_at_path(path)
181 182
182 183 self._paths[path] = [tree_id, tree_type]
183 184 self._stat_modes[path] = tree_mode
184 185
185 186 if path not in self._paths:
186 187 raise self.no_node_at_path(path)
187 188
188 189 return self._paths[path]
189 190
190 191 def _get_kind(self, path):
191 192 tree_id, type_ = self._get_tree_id_for_path(path)
192 193 if type_ == 'blob':
193 194 return NodeKind.FILE
194 195 elif type_ == 'tree':
195 196 return NodeKind.DIR
196 197 elif type_ == 'link':
197 198 return NodeKind.SUBMODULE
198 199 return None
199 200
200 201 def _assert_is_path(self, path):
201 202 path = self._fix_path(path)
202 203 if self._get_kind(path) != NodeKind.FILE:
203 204 raise CommitError(f"File does not exist for commit {self.raw_id} at '{path}'")
204 205 return path
205 206
206 207 def _get_file_nodes(self):
207 208 return chain(*(t[2] for t in self.walk()))
208 209
209 210 @LazyProperty
210 211 def parents(self):
211 212 """
212 213 Returns list of parent commits.
213 214 """
214 215 parent_ids = self._remote.parents(self.id)
215 216 return self._make_commits(parent_ids)
216 217
217 218 @LazyProperty
218 219 def children(self):
219 220 """
220 221 Returns list of child commits.
221 222 """
222 223
223 224 children = self._remote.children(self.raw_id)
224 225 return self._make_commits(children)
225 226
226 227 def _make_commits(self, commit_ids):
227 228 def commit_maker(_commit_id):
228 229 return self.repository.get_commit(commit_id=_commit_id)
229 230
230 231 return [commit_maker(commit_id) for commit_id in commit_ids]
231 232
232 233 def get_file_mode(self, path: bytes):
233 234 """
234 235 Returns stat mode of the file at the given `path`.
235 236 """
236 237 path = self._assert_is_path(path)
237 238
238 239 # ensure path is traversed
239 240 self._get_tree_id_for_path(path)
240 241
241 242 return self._stat_modes[path]
242 243
243 244 def is_link(self, path):
244 245 return stat.S_ISLNK(self.get_file_mode(path))
245 246
246 247 def is_node_binary(self, path):
247 248 tree_id, _ = self._get_tree_id_for_path(path)
248 249 return self._remote.is_binary(tree_id)
249 250
250 251 def node_md5_hash(self, path):
251 252 path = self._assert_is_path(path)
252 253 return self._remote.md5_hash(self.raw_id, path)
253 254
254 255 def get_file_content(self, path):
255 256 """
256 257 Returns content of the file at given `path`.
257 258 """
258 259 tree_id, _ = self._get_tree_id_for_path(path)
259 260 return self._remote.blob_as_pretty_string(tree_id)
260 261
261 262 def get_file_content_streamed(self, path):
262 263 tree_id, _ = self._get_tree_id_for_path(path)
263 264 stream_method = getattr(self._remote, 'stream:blob_as_pretty_string')
264 265 return stream_method(tree_id)
265 266
266 267 def get_file_size(self, path):
267 268 """
268 269 Returns size of the file at given `path`.
269 270 """
270 271 tree_id, _ = self._get_tree_id_for_path(path)
271 272 return self._remote.blob_raw_length(tree_id)
272 273
273 274 def get_path_history(self, path, limit=None, pre_load=None):
274 275 """
275 276 Returns history of file as reversed list of `GitCommit` objects for
276 277 which file at given `path` has been modified.
277 278 """
278 279
279 280 path = self._assert_is_path(path)
280 281 hist = self._remote.node_history(self.raw_id, path, limit)
281 282 return [
282 283 self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
283 284 for commit_id in hist]
284 285
285 286 def get_file_annotate(self, path, pre_load=None):
286 287 """
287 288 Returns a generator of four element tuples with
288 289 lineno, commit_id, commit lazy loader and line
289 290 """
290 291
291 292 result = self._remote.node_annotate(self.raw_id, path)
292 293
293 294 for ln_no, commit_id, content in result:
294 295 yield (
295 296 ln_no, commit_id,
296 297 lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load),
297 298 content)
298 299
299 300 def get_nodes(self, path, pre_load=None):
300 301
301 302 if self._get_kind(path) != NodeKind.DIR:
302 303 raise CommitError(
303 304 f"Directory does not exist for commit {self.raw_id} at '{path}'")
304 305 path = self._fix_path(path)
305 306
306 307 tree_id, _ = self._get_tree_id_for_path(path)
307 308
308 309 dirnodes = []
309 310 filenodes = []
310 311
311 312 # extracted tree ID gives us our files...
312 bytes_path = safe_str(path) # libgit operates on bytes
313 str_path = safe_str(path) # libgit operates on bytes
313 314 for name, stat_, id_, type_ in self._remote.tree_items(tree_id):
314 315 if type_ == 'link':
315 url = self._get_submodule_url('/'.join((bytes_path, name)))
316 url = self._get_submodule_url('/'.join((str_path, name)))
316 317 dirnodes.append(SubModuleNode(
317 318 name, url=url, commit=id_, alias=self.repository.alias))
318 319 continue
319 320
320 if bytes_path != '':
321 obj_path = '/'.join((bytes_path, name))
321 if str_path != '':
322 obj_path = '/'.join((str_path, name))
322 323 else:
323 324 obj_path = name
324 325 if obj_path not in self._stat_modes:
325 326 self._stat_modes[obj_path] = stat_
326 327
327 328 if type_ == 'tree':
328 329 dirnodes.append(DirNode(safe_bytes(obj_path), commit=self))
329 330 elif type_ == 'blob':
330 331 filenodes.append(FileNode(safe_bytes(obj_path), commit=self, mode=stat_, pre_load=pre_load))
331 332 else:
332 333 raise CommitError(f"Requested object should be Tree or Blob, is {type_}")
333 334
334 335 nodes = dirnodes + filenodes
335 336 for node in nodes:
336 337 if node.path not in self.nodes:
337 338 self.nodes[node.path] = node
338 339 nodes.sort()
339 340 return nodes
340 341
341 342 def get_node(self, path, pre_load=None):
342 343 path = self._fix_path(path)
343 344 if path not in self.nodes:
344 345 try:
345 346 tree_id, type_ = self._get_tree_id_for_path(path)
346 347 except CommitError:
347 348 raise NodeDoesNotExistError(
348 349 f"Cannot find one of parents' directories for a given "
349 350 f"path: {path}")
350 351
351 352 if type_ in ['link', 'commit']:
352 353 url = self._get_submodule_url(path)
353 354 node = SubModuleNode(path, url=url, commit=tree_id,
354 355 alias=self.repository.alias)
355 356 elif type_ == 'tree':
356 357 if path == '':
357 358 node = RootNode(commit=self)
358 359 else:
359 360 node = DirNode(safe_bytes(path), commit=self)
360 361 elif type_ == 'blob':
361 362 node = FileNode(safe_bytes(path), commit=self, pre_load=pre_load)
362 363 self._stat_modes[path] = node.mode
363 364 else:
364 365 raise self.no_node_at_path(path)
365 366
366 367 # cache node
367 368 self.nodes[path] = node
368 369
369 370 return self.nodes[path]
370 371
371 372 def get_largefile_node(self, path):
372 373 tree_id, _ = self._get_tree_id_for_path(path)
373 374 pointer_spec = self._remote.is_large_file(tree_id)
374 375
375 376 if pointer_spec:
376 377 # content of that file regular FileNode is the hash of largefile
377 378 file_id = pointer_spec.get('oid_hash')
378 379 if self._remote.in_largefiles_store(file_id):
379 380 lf_path = self._remote.store_path(file_id)
380 381 return LargeFileNode(safe_bytes(lf_path), commit=self, org_path=path)
381 382
382 383 @LazyProperty
383 384 def affected_files(self):
384 385 """
385 386 Gets a fast accessible file changes for given commit
386 387 """
387 388 added, modified, deleted = self._changes_cache
388 389 return list(added.union(modified).union(deleted))
389 390
390 391 @LazyProperty
391 392 def _changes_cache(self):
392 393 added = set()
393 394 modified = set()
394 395 deleted = set()
395 396
396 397 parents = self.parents
397 398 if not self.parents:
398 399 parents = [base.EmptyCommit()]
399 400 for parent in parents:
400 401 if isinstance(parent, base.EmptyCommit):
401 402 oid = None
402 403 else:
403 404 oid = parent.raw_id
404 405 _added, _modified, _deleted = self._remote.tree_changes(oid, self.raw_id)
405 406 added = added | set(_added)
406 407 modified = modified | set(_modified)
407 408 deleted = deleted | set(_deleted)
408 409
409 410 return added, modified, deleted
410 411
411 412 def _get_paths_for_status(self, status):
412 413 """
413 414 Returns sorted list of paths for given ``status``.
414 415
415 416 :param status: one of: *added*, *modified* or *deleted*
416 417 """
417 418 added, modified, deleted = self._changes_cache
418 419 return sorted({
419 420 'added': list(added),
420 421 'modified': list(modified),
421 422 'deleted': list(deleted)}[status]
422 423 )
423 424
424 425 @LazyProperty
425 426 def added(self):
426 427 """
427 428 Returns list of added ``FileNode`` objects.
428 429 """
429 430 if not self.parents:
430 431 return list(self._get_file_nodes())
431 432 return AddedFileNodesGenerator(self.added_paths, self)
432 433
433 434 @LazyProperty
434 435 def added_paths(self):
435 436 return [n for n in self._get_paths_for_status('added')]
436 437
437 438 @LazyProperty
438 439 def changed(self):
439 440 """
440 441 Returns list of modified ``FileNode`` objects.
441 442 """
442 443 if not self.parents:
443 444 return []
444 445 return ChangedFileNodesGenerator(self.changed_paths, self)
445 446
446 447 @LazyProperty
447 448 def changed_paths(self):
448 449 return [n for n in self._get_paths_for_status('modified')]
449 450
450 451 @LazyProperty
451 452 def removed(self):
452 453 """
453 454 Returns list of removed ``FileNode`` objects.
454 455 """
455 456 if not self.parents:
456 457 return []
457 458 return RemovedFileNodesGenerator(self.removed_paths, self)
458 459
459 460 @LazyProperty
460 461 def removed_paths(self):
461 462 return [n for n in self._get_paths_for_status('deleted')]
462 463
463 464 def _get_submodule_url(self, submodule_path):
464 465 git_modules_path = '.gitmodules'
465 466
466 467 if self._submodules is None:
467 468 self._submodules = {}
468 469
469 470 try:
470 471 submodules_node = self.get_node(git_modules_path)
471 472 except NodeDoesNotExistError:
472 473 return None
473 474
474 # ConfigParser fails if there are whitespaces, also it needs an iterable
475 # file like content
476 def iter_content(_content):
477 yield from _content.splitlines()
478
479 475 parser = configparser.RawConfigParser()
480 parser.read_file(iter_content(submodules_node.content))
476 parser.read_file(io.StringIO(submodules_node.str_content))
481 477
482 478 for section in parser.sections():
483 479 path = parser.get(section, 'path')
484 480 url = parser.get(section, 'url')
485 481 if path and url:
486 482 self._submodules[path.strip('/')] = url
487 483
488 484 return self._submodules.get(submodule_path.strip('/'))
General Comments 0
You need to be logged in to leave comments. Login now