##// END OF EJS Templates
fix: remove rhodecode import added by accident
super-admin -
r1310:9cc7dfc4 default
parent child Browse files
Show More
@@ -1,41 +1,70 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19
20 20 __version__ = ''
21 21
22 22
23 23 def get_version():
24 24 global __version__
25 25 if __version__:
26 26 return __version__
27 27
28 28 here = os.path.abspath(os.path.dirname(__file__))
29 29 ver_file = os.path.join(here, "VERSION")
30 30 with open(ver_file, "rt") as f:
31 31 version = f.read().strip()
32 32
33 33 __version__ = version
34 34 return version
35 35
36 36 # link to config for pyramid
37 37 CONFIG = {}
38 38
39
40 class ConfigGet:
41 NotGiven = object()
42
43 def _get_val_or_missing(self, key, missing):
44 if key not in CONFIG:
45 if missing == self.NotGiven:
46 return missing
47 # we don't get key, we don't get missing value, return nothing similar as config.get(key)
48 return None
49 else:
50 val = CONFIG[key]
51 return val
52
53 def get_str(self, key, missing=NotGiven):
54 from vcsserver.lib.str_utils import safe_str
55 val = self._get_val_or_missing(key, missing)
56 return safe_str(val)
57
58 def get_int(self, key, missing=NotGiven):
59 from vcsserver.lib.str_utils import safe_int
60 val = self._get_val_or_missing(key, missing)
61 return safe_int(val)
62
63 def get_bool(self, key, missing=NotGiven):
64 from vcsserver.lib.type_utils import str2bool
65 val = self._get_val_or_missing(key, missing)
66 return str2bool(val)
67
39 68 # Populated with the settings dictionary from application init in
40 69 #
41 70 PYRAMID_SETTINGS = {}
@@ -1,1526 +1,1526 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40
41 import rhodecode
41 import vcsserver
42 42 from vcsserver import exceptions, settings, subprocessio
43 43 from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 config_attrs = {
181 181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 183 }
184 184 for key, param in config_attrs.items():
185 185 if value := config.get(key):
186 186 params.extend(['-c', param.format(value)])
187 187 return params
188 188
189 189 @reraise_safe_exceptions
190 190 def discover_git_version(self):
191 191 stdout, _ = self.run_git_command(
192 192 {}, ['--version'], _bare=True, _safe=True)
193 193 prefix = b'git version'
194 194 if stdout.startswith(prefix):
195 195 stdout = stdout[len(prefix):]
196 196 return safe_str(stdout.strip())
197 197
198 198 @reraise_safe_exceptions
199 199 def is_empty(self, wire):
200 200 repo_init = self._factory.repo_libgit2(wire)
201 201 with repo_init as repo:
202 202 try:
203 203 has_head = repo.head.name
204 204 if has_head:
205 205 return False
206 206
207 207 # NOTE(marcink): check again using more expensive method
208 208 return repo.is_empty
209 209 except Exception:
210 210 pass
211 211
212 212 return True
213 213
214 214 @reraise_safe_exceptions
215 215 def assert_correct_path(self, wire):
216 216 cache_on, context_uid, repo_id = self._cache_on(wire)
217 217 region = self._region(wire)
218 218
219 219 @region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 221 if fast_check:
222 222 path = safe_str(wire['path'])
223 223 if pygit2.discover_repository(path):
224 224 return True
225 225 return False
226 226 else:
227 227 try:
228 228 repo_init = self._factory.repo_libgit2(wire)
229 229 with repo_init:
230 230 pass
231 231 except pygit2.GitError:
232 232 path = wire.get('path')
233 233 tb = traceback.format_exc()
234 234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 235 return False
236 236 return True
237 237
238 238 return _assert_correct_path(context_uid, repo_id, True)
239 239
240 240 @reraise_safe_exceptions
241 241 def bare(self, wire):
242 242 repo_init = self._factory.repo_libgit2(wire)
243 243 with repo_init as repo:
244 244 return repo.is_bare
245 245
246 246 @reraise_safe_exceptions
247 247 def get_node_data(self, wire, commit_id, path):
248 248 repo_init = self._factory.repo_libgit2(wire)
249 249 with repo_init as repo:
250 250 commit = repo[commit_id]
251 251 blob_obj = commit.tree[path]
252 252
253 253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 254 raise exceptions.LookupException()(
255 255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256 256
257 257 return BytesEnvelope(blob_obj.data)
258 258
259 259 @reraise_safe_exceptions
260 260 def get_node_size(self, wire, commit_id, path):
261 261 repo_init = self._factory.repo_libgit2(wire)
262 262 with repo_init as repo:
263 263 commit = repo[commit_id]
264 264 blob_obj = commit.tree[path]
265 265
266 266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 267 raise exceptions.LookupException()(
268 268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269 269
270 270 return blob_obj.size
271 271
272 272 @reraise_safe_exceptions
273 273 def get_node_flags(self, wire, commit_id, path):
274 274 repo_init = self._factory.repo_libgit2(wire)
275 275 with repo_init as repo:
276 276 commit = repo[commit_id]
277 277 blob_obj = commit.tree[path]
278 278
279 279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 280 raise exceptions.LookupException()(
281 281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282 282
283 283 return blob_obj.filemode
284 284
285 285 @reraise_safe_exceptions
286 286 def get_node_is_binary(self, wire, commit_id, path):
287 287 repo_init = self._factory.repo_libgit2(wire)
288 288 with repo_init as repo:
289 289 commit = repo[commit_id]
290 290 blob_obj = commit.tree[path]
291 291
292 292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 293 raise exceptions.LookupException()(
294 294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295 295
296 296 return blob_obj.is_binary
297 297
298 298 @reraise_safe_exceptions
299 299 def blob_as_pretty_string(self, wire, sha):
300 300 repo_init = self._factory.repo_libgit2(wire)
301 301 with repo_init as repo:
302 302 blob_obj = repo[sha]
303 303 return BytesEnvelope(blob_obj.data)
304 304
305 305 @reraise_safe_exceptions
306 306 def blob_raw_length(self, wire, sha):
307 307 cache_on, context_uid, repo_id = self._cache_on(wire)
308 308 region = self._region(wire)
309 309
310 310 @region.conditional_cache_on_arguments(condition=cache_on)
311 311 def _blob_raw_length(_repo_id, _sha):
312 312
313 313 repo_init = self._factory.repo_libgit2(wire)
314 314 with repo_init as repo:
315 315 blob = repo[sha]
316 316 return blob.size
317 317
318 318 return _blob_raw_length(repo_id, sha)
319 319
320 320 def _parse_lfs_pointer(self, raw_content):
321 321 spec_string = b'version https://git-lfs.github.com/spec'
322 322 if raw_content and raw_content.startswith(spec_string):
323 323
324 324 pattern = re.compile(rb"""
325 325 (?:\n)?
326 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 328 ^size[ ](?P<oid_size>[0-9]+)\n
329 329 (?:\n)?
330 330 """, re.VERBOSE | re.MULTILINE)
331 331 match = pattern.match(raw_content)
332 332 if match:
333 333 return match.groupdict()
334 334
335 335 return {}
336 336
337 337 @reraise_safe_exceptions
338 338 def is_large_file(self, wire, commit_id):
339 339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 340 region = self._region(wire)
341 341
342 342 @region.conditional_cache_on_arguments(condition=cache_on)
343 343 def _is_large_file(_repo_id, _sha):
344 344 repo_init = self._factory.repo_libgit2(wire)
345 345 with repo_init as repo:
346 346 blob = repo[commit_id]
347 347 if blob.is_binary:
348 348 return {}
349 349
350 350 return self._parse_lfs_pointer(blob.data)
351 351
352 352 return _is_large_file(repo_id, commit_id)
353 353
354 354 @reraise_safe_exceptions
355 355 def is_binary(self, wire, tree_id):
356 356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 357 region = self._region(wire)
358 358
359 359 @region.conditional_cache_on_arguments(condition=cache_on)
360 360 def _is_binary(_repo_id, _tree_id):
361 361 repo_init = self._factory.repo_libgit2(wire)
362 362 with repo_init as repo:
363 363 blob_obj = repo[tree_id]
364 364 return blob_obj.is_binary
365 365
366 366 return _is_binary(repo_id, tree_id)
367 367
368 368 @reraise_safe_exceptions
369 369 def md5_hash(self, wire, commit_id, path):
370 370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 371 region = self._region(wire)
372 372
373 373 @region.conditional_cache_on_arguments(condition=cache_on)
374 374 def _md5_hash(_repo_id, _commit_id, _path):
375 375 repo_init = self._factory.repo_libgit2(wire)
376 376 with repo_init as repo:
377 377 commit = repo[_commit_id]
378 378 blob_obj = commit.tree[_path]
379 379
380 380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 381 raise exceptions.LookupException()(
382 382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383 383
384 384 return ''
385 385
386 386 return _md5_hash(repo_id, commit_id, path)
387 387
388 388 @reraise_safe_exceptions
389 389 def in_largefiles_store(self, wire, oid):
390 390 conf = self._wire_to_config(wire)
391 391 repo_init = self._factory.repo_libgit2(wire)
392 392 with repo_init as repo:
393 393 repo_name = repo.path
394 394
395 395 store_location = conf.get('vcs_git_lfs_store_location')
396 396 if store_location:
397 397
398 398 store = LFSOidStore(
399 399 oid=oid, repo=repo_name, store_location=store_location)
400 400 return store.has_oid()
401 401
402 402 return False
403 403
404 404 @reraise_safe_exceptions
405 405 def store_path(self, wire, oid):
406 406 conf = self._wire_to_config(wire)
407 407 repo_init = self._factory.repo_libgit2(wire)
408 408 with repo_init as repo:
409 409 repo_name = repo.path
410 410
411 411 store_location = conf.get('vcs_git_lfs_store_location')
412 412 if store_location:
413 413 store = LFSOidStore(
414 414 oid=oid, repo=repo_name, store_location=store_location)
415 415 return store.oid_path
416 416 raise ValueError(f'Unable to fetch oid with path {oid}')
417 417
418 418 @reraise_safe_exceptions
419 419 def bulk_request(self, wire, rev, pre_load):
420 420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 421 region = self._region(wire)
422 422
423 423 @region.conditional_cache_on_arguments(condition=cache_on)
424 424 def _bulk_request(_repo_id, _rev, _pre_load):
425 425 result = {}
426 426 for attr in pre_load:
427 427 try:
428 428 method = self._bulk_methods[attr]
429 429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 430 args = [wire, rev]
431 431 result[attr] = method(*args)
432 432 except KeyError as e:
433 433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 434 return result
435 435
436 436 return _bulk_request(repo_id, rev, sorted(pre_load))
437 437
438 438 @reraise_safe_exceptions
439 439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 441 region = self._region(wire)
442 442
443 443 @region.conditional_cache_on_arguments(condition=cache_on)
444 444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 445 result = {}
446 446 for attr in pre_load:
447 447 try:
448 448 method = self._bulk_file_methods[attr]
449 449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 450 result[attr] = method(wire, _commit_id, _path)
451 451 except KeyError as e:
452 452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 453 return result
454 454
455 455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456 456
457 457 def _build_opener(self, url: str):
458 458 handlers = []
459 459 url_obj = url_parser(safe_bytes(url))
460 460 authinfo = url_obj.authinfo()[1]
461 461
462 462 if authinfo:
463 463 # create a password manager
464 464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 465 passmgr.add_password(*convert_to_str(authinfo))
466 466
467 467 handlers.extend((httpbasicauthhandler(passmgr),
468 468 httpdigestauthhandler(passmgr)))
469 469
470 470 return urllib.request.build_opener(*handlers)
471 471
472 472 @reraise_safe_exceptions
473 473 def check_url(self, url, config):
474 474 url_obj = url_parser(safe_bytes(url))
475 475
476 476 test_uri = safe_str(url_obj.authinfo()[0])
477 477 obfuscated_uri = get_obfuscated_url(url_obj)
478 478
479 479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480 480
481 481 if not test_uri.endswith('info/refs'):
482 482 test_uri = test_uri.rstrip('/') + '/info/refs'
483 483
484 484 o = self._build_opener(url=url)
485 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486 486
487 487 q = {"service": 'git-upload-pack'}
488 488 qs = f'?{urllib.parse.urlencode(q)}'
489 489 cu = f"{test_uri}{qs}"
490 490
491 491 try:
492 492 req = urllib.request.Request(cu, None, {})
493 493 log.debug("Trying to open URL %s", obfuscated_uri)
494 494 resp = o.open(req)
495 495 if resp.code != 200:
496 496 raise exceptions.URLError()('Return Code is not 200')
497 497 except Exception as e:
498 498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 499 # means it cannot be cloned
500 500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501 501
502 502 # now detect if it's proper git repo
503 503 gitdata: bytes = resp.read()
504 504
505 505 if b'service=git-upload-pack' in gitdata:
506 506 pass
507 507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 508 # old style git can return some other format!
509 509 pass
510 510 else:
511 511 e = None
512 512 raise exceptions.URLError(e)(
513 513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514 514
515 515 return True
516 516
517 517 @reraise_safe_exceptions
518 518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 520 remote_refs = self.pull(wire, url, apply_refs=False)
521 521 repo = self._factory.repo(wire)
522 522 if isinstance(valid_refs, list):
523 523 valid_refs = tuple(valid_refs)
524 524
525 525 for k in remote_refs:
526 526 # only parse heads/tags and skip so called deferred tags
527 527 if k.startswith(valid_refs) and not k.endswith(deferred):
528 528 repo[k] = remote_refs[k]
529 529
530 530 if update_after_clone:
531 531 # we want to checkout HEAD
532 532 repo["HEAD"] = remote_refs["HEAD"]
533 533 index.build_index_from_tree(repo.path, repo.index_path(),
534 534 repo.object_store, repo["HEAD"].tree)
535 535
536 536 @reraise_safe_exceptions
537 537 def branch(self, wire, commit_id):
538 538 cache_on, context_uid, repo_id = self._cache_on(wire)
539 539 region = self._region(wire)
540 540
541 541 @region.conditional_cache_on_arguments(condition=cache_on)
542 542 def _branch(_context_uid, _repo_id, _commit_id):
543 543 regex = re.compile('^refs/heads')
544 544
545 545 def filter_with(ref):
546 546 return regex.match(ref[0]) and ref[1] == _commit_id
547 547
548 548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 549 return [x[0].split('refs/heads/')[-1] for x in branches]
550 550
551 551 return _branch(context_uid, repo_id, commit_id)
552 552
553 553 @reraise_safe_exceptions
554 554 def delete_branch(self, wire, branch_name):
555 555 repo_init = self._factory.repo_libgit2(wire)
556 556 with repo_init as repo:
557 557 if branch := repo.lookup_branch(branch_name):
558 558 branch.delete()
559 559
560 560 @reraise_safe_exceptions
561 561 def commit_branches(self, wire, commit_id):
562 562 cache_on, context_uid, repo_id = self._cache_on(wire)
563 563 region = self._region(wire)
564 564
565 565 @region.conditional_cache_on_arguments(condition=cache_on)
566 566 def _commit_branches(_context_uid, _repo_id, _commit_id):
567 567 repo_init = self._factory.repo_libgit2(wire)
568 568 with repo_init as repo:
569 569 branches = [x for x in repo.branches.with_commit(_commit_id)]
570 570 return branches
571 571
572 572 return _commit_branches(context_uid, repo_id, commit_id)
573 573
574 574 @reraise_safe_exceptions
575 575 def add_object(self, wire, content):
576 576 repo_init = self._factory.repo_libgit2(wire)
577 577 with repo_init as repo:
578 578 blob = objects.Blob()
579 579 blob.set_raw_string(content)
580 580 repo.object_store.add_object(blob)
581 581 return blob.id
582 582
583 583 @reraise_safe_exceptions
584 584 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
585 585 date_args: list[int, int] = None,
586 586 parents: list | None = None):
587 587
588 588 repo_init = self._factory.repo_libgit2(wire)
589 589 with repo_init as repo:
590 590
591 591 if date_args:
592 592 current_time, offset = date_args
593 593
594 594 kw = {
595 595 'time': current_time,
596 596 'offset': offset
597 597 }
598 598 author = create_signature_from_string(author, **kw)
599 599 committer = create_signature_from_string(committer, **kw)
600 600
601 601 tree = new_tree_id
602 602 if isinstance(tree, (bytes, str)):
603 603 # validate this tree is in the repo...
604 604 tree = repo[safe_str(tree)].id
605 605
606 606 if parents:
607 607 # run via sha's and validate them in repo
608 608 parents = [repo[c].id for c in parents]
609 609 else:
610 610 parents = []
611 611 # ensure we COMMIT on top of given branch head
612 612 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
613 613 if branch in repo.branches.local:
614 614 parents += [repo.branches[branch].target]
615 615 elif [x for x in repo.branches.local]:
616 616 parents += [repo.head.target]
617 617 #else:
618 618 # in case we want to commit on new branch we create it on top of HEAD
619 619 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
620 620
621 621 # # Create a new commit
622 622 commit_oid = repo.create_commit(
623 623 f'refs/heads/{branch}', # the name of the reference to update
624 624 author, # the author of the commit
625 625 committer, # the committer of the commit
626 626 message, # the commit message
627 627 tree, # the tree produced by the index
628 628 parents # list of parents for the new commit, usually just one,
629 629 )
630 630
631 631 new_commit_id = safe_str(commit_oid)
632 632
633 633 return new_commit_id
634 634
635 635 @reraise_safe_exceptions
636 636 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
637 637
638 638 def mode2pygit(mode):
639 639 """
640 640 git only supports two filemode 644 and 755
641 641
642 642 0o100755 -> 33261
643 643 0o100644 -> 33188
644 644 """
645 645 return {
646 646 0o100644: pygit2.GIT_FILEMODE_BLOB,
647 647 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
648 648 0o120000: pygit2.GIT_FILEMODE_LINK
649 649 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
650 650
651 651 repo_init = self._factory.repo_libgit2(wire)
652 652 with repo_init as repo:
653 653 repo_index = repo.index
654 654
655 655 commit_parents = None
656 656 if commit_tree and commit_data['parents']:
657 657 commit_parents = commit_data['parents']
658 658 parent_commit = repo[commit_parents[0]]
659 659 repo_index.read_tree(parent_commit.tree)
660 660
661 661 for pathspec in updated:
662 662 blob_id = repo.create_blob(pathspec['content'])
663 663 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
664 664 repo_index.add(ie)
665 665
666 666 for pathspec in removed:
667 667 repo_index.remove(pathspec)
668 668
669 669 # Write changes to the index
670 670 repo_index.write()
671 671
672 672 # Create a tree from the updated index
673 673 written_commit_tree = repo_index.write_tree()
674 674
675 675 new_tree_id = written_commit_tree
676 676
677 677 author = commit_data['author']
678 678 committer = commit_data['committer']
679 679 message = commit_data['message']
680 680
681 681 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
682 682
683 683 new_commit_id = self.create_commit(wire, author, committer, message, branch,
684 684 new_tree_id, date_args=date_args, parents=commit_parents)
685 685
686 686 # libgit2, ensure the branch is there and exists
687 687 self.create_branch(wire, branch, new_commit_id)
688 688
689 689 # libgit2, set new ref to this created commit
690 690 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
691 691
692 692 return new_commit_id
693 693
694 694 @reraise_safe_exceptions
695 695 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
696 696 if url != 'default' and '://' not in url:
697 697 client = LocalGitClient(url)
698 698 else:
699 699 url_obj = url_parser(safe_bytes(url))
700 700 o = self._build_opener(url)
701 701 url = url_obj.authinfo()[0]
702 702 client = HttpGitClient(base_url=url, opener=o)
703 703 repo = self._factory.repo(wire)
704 704
705 705 determine_wants = repo.object_store.determine_wants_all
706 706
707 707 if refs:
708 708 refs: list[bytes] = [ascii_bytes(x) for x in refs]
709 709
710 710 def determine_wants_requested(_remote_refs):
711 711 determined = []
712 712 for ref_name, ref_hash in _remote_refs.items():
713 713 bytes_ref_name = safe_bytes(ref_name)
714 714
715 715 if bytes_ref_name in refs:
716 716 bytes_ref_hash = safe_bytes(ref_hash)
717 717 determined.append(bytes_ref_hash)
718 718 return determined
719 719
720 720 # swap with our custom requested wants
721 721 determine_wants = determine_wants_requested
722 722
723 723 try:
724 724 remote_refs = client.fetch(
725 725 path=url, target=repo, determine_wants=determine_wants)
726 726
727 727 except NotGitRepository as e:
728 728 log.warning(
729 729 'Trying to fetch from "%s" failed, not a Git repository.', url)
730 730 # Exception can contain unicode which we convert
731 731 raise exceptions.AbortException(e)(repr(e))
732 732
733 733 # mikhail: client.fetch() returns all the remote refs, but fetches only
734 734 # refs filtered by `determine_wants` function. We need to filter result
735 735 # as well
736 736 if refs:
737 737 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
738 738
739 739 if apply_refs:
740 740 # TODO: johbo: Needs proper test coverage with a git repository
741 741 # that contains a tag object, so that we would end up with
742 742 # a peeled ref at this point.
743 743 for k in remote_refs:
744 744 if k.endswith(PEELED_REF_MARKER):
745 745 log.debug("Skipping peeled reference %s", k)
746 746 continue
747 747 repo[k] = remote_refs[k]
748 748
749 749 if refs and not update_after:
750 750 # update to ref
751 751 # mikhail: explicitly set the head to the last ref.
752 752 update_to_ref = refs[-1]
753 753 if isinstance(update_after, str):
754 754 update_to_ref = update_after
755 755
756 756 repo[HEAD_MARKER] = remote_refs[update_to_ref]
757 757
758 758 if update_after:
759 759 # we want to check out HEAD
760 760 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
761 761 index.build_index_from_tree(repo.path, repo.index_path(),
762 762 repo.object_store, repo[HEAD_MARKER].tree)
763 763
764 764 if isinstance(remote_refs, FetchPackResult):
765 765 return remote_refs.refs
766 766 return remote_refs
767 767
768 768 @reraise_safe_exceptions
769 769 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
770 770 self._factory.repo(wire)
771 771 if refs and not isinstance(refs, (list, tuple)):
772 772 refs = [refs]
773 773
774 774 config = self._wire_to_config(wire)
775 775 # get all remote refs we'll use to fetch later
776 776 cmd = ['ls-remote']
777 777 if not all_refs:
778 778 cmd += ['--heads', '--tags']
779 779 cmd += [url]
780 780 output, __ = self.run_git_command(
781 781 wire, cmd, fail_on_stderr=False,
782 782 _copts=self._remote_conf(config),
783 783 extra_env={'GIT_TERMINAL_PROMPT': '0'})
784 784
785 785 remote_refs = collections.OrderedDict()
786 786 fetch_refs = []
787 787
788 788 for ref_line in output.splitlines():
789 789 sha, ref = ref_line.split(b'\t')
790 790 sha = sha.strip()
791 791 if ref in remote_refs:
792 792 # duplicate, skip
793 793 continue
794 794 if ref.endswith(PEELED_REF_MARKER):
795 795 log.debug("Skipping peeled reference %s", ref)
796 796 continue
797 797 # don't sync HEAD
798 798 if ref in [HEAD_MARKER]:
799 799 continue
800 800
801 801 remote_refs[ref] = sha
802 802
803 803 if refs and sha in refs:
804 804 # we filter fetch using our specified refs
805 805 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
806 806 elif not refs:
807 807 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
808 808 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
809 809
810 810 if fetch_refs:
811 811 for chunk in more_itertools.chunked(fetch_refs, 128):
812 812 fetch_refs_chunks = list(chunk)
813 813 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
814 814 self.run_git_command(
815 815 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
816 816 fail_on_stderr=False,
817 817 _copts=self._remote_conf(config),
818 818 extra_env={'GIT_TERMINAL_PROMPT': '0'})
819 819 if kwargs.get('sync_large_objects'):
820 820 self.run_git_command(
821 821 wire, ['lfs', 'fetch', url, '--all'],
822 822 fail_on_stderr=False,
823 823 _copts=self._remote_conf(config),
824 824 )
825 825
826 826 return remote_refs
827 827
828 828 @reraise_safe_exceptions
829 829 def sync_push(self, wire, url, refs=None, **kwargs):
830 830 if not self.check_url(url, wire):
831 831 return
832 832 config = self._wire_to_config(wire)
833 833 self._factory.repo(wire)
834 834 self.run_git_command(
835 835 wire, ['push', url, '--mirror'], fail_on_stderr=False,
836 836 _copts=self._remote_conf(config),
837 837 extra_env={'GIT_TERMINAL_PROMPT': '0'})
838 838 if kwargs.get('sync_large_objects'):
839 839 self.run_git_command(
840 840 wire, ['lfs', 'push', url, '--all'],
841 841 fail_on_stderr=False,
842 842 _copts=self._remote_conf(config),
843 843 )
844 844
845 845 @reraise_safe_exceptions
846 846 def get_remote_refs(self, wire, url):
847 847 repo = Repo(url)
848 848 return repo.get_refs()
849 849
850 850 @reraise_safe_exceptions
851 851 def get_description(self, wire):
852 852 repo = self._factory.repo(wire)
853 853 return repo.get_description()
854 854
855 855 @reraise_safe_exceptions
856 856 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
857 857 origin_repo_path = wire['path']
858 858 repo = self._factory.repo(wire)
859 859 # fetch from other_repo_path to our origin repo
860 860 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
861 861
862 862 wire_remote = wire.copy()
863 863 wire_remote['path'] = other_repo_path
864 864 repo_remote = self._factory.repo(wire_remote)
865 865
866 866 # fetch from origin_repo_path to our remote repo
867 867 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
868 868
869 869 revs = [
870 870 x.commit.id
871 871 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
872 872 return revs
873 873
874 874 @reraise_safe_exceptions
875 875 def get_object(self, wire, sha, maybe_unreachable=False):
876 876 cache_on, context_uid, repo_id = self._cache_on(wire)
877 877 region = self._region(wire)
878 878
879 879 @region.conditional_cache_on_arguments(condition=cache_on)
880 880 def _get_object(_context_uid, _repo_id, _sha):
881 881 repo_init = self._factory.repo_libgit2(wire)
882 882 with repo_init as repo:
883 883
884 884 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
885 885 try:
886 886 commit = repo.revparse_single(sha)
887 887 except KeyError:
888 888 # NOTE(marcink): KeyError doesn't give us any meaningful information
889 889 # here, we instead give something more explicit
890 890 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
891 891 raise exceptions.LookupException(e)(missing_commit_err)
892 892 except ValueError as e:
893 893 raise exceptions.LookupException(e)(missing_commit_err)
894 894
895 895 is_tag = False
896 896 if isinstance(commit, pygit2.Tag):
897 897 commit = repo.get(commit.target)
898 898 is_tag = True
899 899
900 900 check_dangling = True
901 901 if is_tag:
902 902 check_dangling = False
903 903
904 904 if check_dangling and maybe_unreachable:
905 905 check_dangling = False
906 906
907 907 # we used a reference and it parsed means we're not having a dangling commit
908 908 if sha != commit.hex:
909 909 check_dangling = False
910 910
911 911 if check_dangling:
912 912 # check for dangling commit
913 913 for branch in repo.branches.with_commit(commit.hex):
914 914 if branch:
915 915 break
916 916 else:
917 917 # NOTE(marcink): Empty error doesn't give us any meaningful information
918 918 # here, we instead give something more explicit
919 919 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
920 920 raise exceptions.LookupException(e)(missing_commit_err)
921 921
922 922 commit_id = commit.hex
923 923 type_str = commit.type_str
924 924
925 925 return {
926 926 'id': commit_id,
927 927 'type': type_str,
928 928 'commit_id': commit_id,
929 929 'idx': 0
930 930 }
931 931
932 932 return _get_object(context_uid, repo_id, sha)
933 933
934 934 @reraise_safe_exceptions
935 935 def get_refs(self, wire):
936 936 cache_on, context_uid, repo_id = self._cache_on(wire)
937 937 region = self._region(wire)
938 938
939 939 @region.conditional_cache_on_arguments(condition=cache_on)
940 940 def _get_refs(_context_uid, _repo_id):
941 941
942 942 repo_init = self._factory.repo_libgit2(wire)
943 943 with repo_init as repo:
944 944 regex = re.compile('^refs/(heads|tags)/')
945 945 return {x.name: x.target.hex for x in
946 946 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
947 947
948 948 return _get_refs(context_uid, repo_id)
949 949
950 950 @reraise_safe_exceptions
951 951 def get_branch_pointers(self, wire):
952 952 cache_on, context_uid, repo_id = self._cache_on(wire)
953 953 region = self._region(wire)
954 954
955 955 @region.conditional_cache_on_arguments(condition=cache_on)
956 956 def _get_branch_pointers(_context_uid, _repo_id):
957 957
958 958 repo_init = self._factory.repo_libgit2(wire)
959 959 regex = re.compile('^refs/heads')
960 960 with repo_init as repo:
961 961 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
962 962 return {x.target.hex: x.shorthand for x in branches}
963 963
964 964 return _get_branch_pointers(context_uid, repo_id)
965 965
966 966 @reraise_safe_exceptions
967 967 def head(self, wire, show_exc=True):
968 968 cache_on, context_uid, repo_id = self._cache_on(wire)
969 969 region = self._region(wire)
970 970
971 971 @region.conditional_cache_on_arguments(condition=cache_on)
972 972 def _head(_context_uid, _repo_id, _show_exc):
973 973 repo_init = self._factory.repo_libgit2(wire)
974 974 with repo_init as repo:
975 975 try:
976 976 return repo.head.peel().hex
977 977 except Exception:
978 978 if show_exc:
979 979 raise
980 980 return _head(context_uid, repo_id, show_exc)
981 981
982 982 @reraise_safe_exceptions
983 983 def init(self, wire):
984 984 repo_path = safe_str(wire['path'])
985 985 os.makedirs(repo_path, mode=0o755)
986 986 pygit2.init_repository(repo_path, bare=False)
987 987
988 988 @reraise_safe_exceptions
989 989 def init_bare(self, wire):
990 990 repo_path = safe_str(wire['path'])
991 991 os.makedirs(repo_path, mode=0o755)
992 992 pygit2.init_repository(repo_path, bare=True)
993 993
994 994 @reraise_safe_exceptions
995 995 def revision(self, wire, rev):
996 996
997 997 cache_on, context_uid, repo_id = self._cache_on(wire)
998 998 region = self._region(wire)
999 999
1000 1000 @region.conditional_cache_on_arguments(condition=cache_on)
1001 1001 def _revision(_context_uid, _repo_id, _rev):
1002 1002 repo_init = self._factory.repo_libgit2(wire)
1003 1003 with repo_init as repo:
1004 1004 commit = repo[rev]
1005 1005 obj_data = {
1006 1006 'id': commit.id.hex,
1007 1007 }
1008 1008 # tree objects itself don't have tree_id attribute
1009 1009 if hasattr(commit, 'tree_id'):
1010 1010 obj_data['tree'] = commit.tree_id.hex
1011 1011
1012 1012 return obj_data
1013 1013 return _revision(context_uid, repo_id, rev)
1014 1014
1015 1015 @reraise_safe_exceptions
1016 1016 def date(self, wire, commit_id):
1017 1017 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 1018 region = self._region(wire)
1019 1019
1020 1020 @region.conditional_cache_on_arguments(condition=cache_on)
1021 1021 def _date(_repo_id, _commit_id):
1022 1022 repo_init = self._factory.repo_libgit2(wire)
1023 1023 with repo_init as repo:
1024 1024 commit = repo[commit_id]
1025 1025
1026 1026 if hasattr(commit, 'commit_time'):
1027 1027 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1028 1028 else:
1029 1029 commit = commit.get_object()
1030 1030 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1031 1031
1032 1032 # TODO(marcink): check dulwich difference of offset vs timezone
1033 1033 return [commit_time, commit_time_offset]
1034 1034 return _date(repo_id, commit_id)
1035 1035
1036 1036 @reraise_safe_exceptions
1037 1037 def author(self, wire, commit_id):
1038 1038 cache_on, context_uid, repo_id = self._cache_on(wire)
1039 1039 region = self._region(wire)
1040 1040
1041 1041 @region.conditional_cache_on_arguments(condition=cache_on)
1042 1042 def _author(_repo_id, _commit_id):
1043 1043 repo_init = self._factory.repo_libgit2(wire)
1044 1044 with repo_init as repo:
1045 1045 commit = repo[commit_id]
1046 1046
1047 1047 if hasattr(commit, 'author'):
1048 1048 author = commit.author
1049 1049 else:
1050 1050 author = commit.get_object().author
1051 1051
1052 1052 if author.email:
1053 1053 return f"{author.name} <{author.email}>"
1054 1054
1055 1055 try:
1056 1056 return f"{author.name}"
1057 1057 except Exception:
1058 1058 return f"{safe_str(author.raw_name)}"
1059 1059
1060 1060 return _author(repo_id, commit_id)
1061 1061
1062 1062 @reraise_safe_exceptions
1063 1063 def message(self, wire, commit_id):
1064 1064 cache_on, context_uid, repo_id = self._cache_on(wire)
1065 1065 region = self._region(wire)
1066 1066
1067 1067 @region.conditional_cache_on_arguments(condition=cache_on)
1068 1068 def _message(_repo_id, _commit_id):
1069 1069 repo_init = self._factory.repo_libgit2(wire)
1070 1070 with repo_init as repo:
1071 1071 commit = repo[commit_id]
1072 1072 return commit.message
1073 1073 return _message(repo_id, commit_id)
1074 1074
1075 1075 @reraise_safe_exceptions
1076 1076 def parents(self, wire, commit_id):
1077 1077 cache_on, context_uid, repo_id = self._cache_on(wire)
1078 1078 region = self._region(wire)
1079 1079
1080 1080 @region.conditional_cache_on_arguments(condition=cache_on)
1081 1081 def _parents(_repo_id, _commit_id):
1082 1082 repo_init = self._factory.repo_libgit2(wire)
1083 1083 with repo_init as repo:
1084 1084 commit = repo[commit_id]
1085 1085 if hasattr(commit, 'parent_ids'):
1086 1086 parent_ids = commit.parent_ids
1087 1087 else:
1088 1088 parent_ids = commit.get_object().parent_ids
1089 1089
1090 1090 return [x.hex for x in parent_ids]
1091 1091 return _parents(repo_id, commit_id)
1092 1092
1093 1093 @reraise_safe_exceptions
1094 1094 def children(self, wire, commit_id):
1095 1095 cache_on, context_uid, repo_id = self._cache_on(wire)
1096 1096 region = self._region(wire)
1097 1097
1098 1098 head = self.head(wire)
1099 1099
1100 1100 @region.conditional_cache_on_arguments(condition=cache_on)
1101 1101 def _children(_repo_id, _commit_id):
1102 1102
1103 1103 output, __ = self.run_git_command(
1104 1104 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1105 1105
1106 1106 child_ids = []
1107 1107 pat = re.compile(fr'^{commit_id}')
1108 1108 for line in output.splitlines():
1109 1109 line = safe_str(line)
1110 1110 if pat.match(line):
1111 1111 found_ids = line.split(' ')[1:]
1112 1112 child_ids.extend(found_ids)
1113 1113 break
1114 1114
1115 1115 return child_ids
1116 1116 return _children(repo_id, commit_id)
1117 1117
1118 1118 @reraise_safe_exceptions
1119 1119 def set_refs(self, wire, key, value):
1120 1120 repo_init = self._factory.repo_libgit2(wire)
1121 1121 with repo_init as repo:
1122 1122 repo.references.create(key, value, force=True)
1123 1123
1124 1124 @reraise_safe_exceptions
1125 1125 def update_refs(self, wire, key, value):
1126 1126 repo_init = self._factory.repo_libgit2(wire)
1127 1127 with repo_init as repo:
1128 1128 if key not in repo.references:
1129 1129 raise ValueError(f'Reference {key} not found in the repository')
1130 1130 repo.references.create(key, value, force=True)
1131 1131
1132 1132 @reraise_safe_exceptions
1133 1133 def create_branch(self, wire, branch_name, commit_id, force=False):
1134 1134 repo_init = self._factory.repo_libgit2(wire)
1135 1135 with repo_init as repo:
1136 1136 if commit_id:
1137 1137 commit = repo[commit_id]
1138 1138 else:
1139 1139 # if commit is not given just use the HEAD
1140 1140 commit = repo.head()
1141 1141
1142 1142 if force:
1143 1143 repo.branches.local.create(branch_name, commit, force=force)
1144 1144 elif not repo.branches.get(branch_name):
1145 1145 # create only if that branch isn't existing
1146 1146 repo.branches.local.create(branch_name, commit, force=force)
1147 1147
1148 1148 @reraise_safe_exceptions
1149 1149 def remove_ref(self, wire, key):
1150 1150 repo_init = self._factory.repo_libgit2(wire)
1151 1151 with repo_init as repo:
1152 1152 repo.references.delete(key)
1153 1153
1154 1154 @reraise_safe_exceptions
1155 1155 def tag_remove(self, wire, tag_name):
1156 1156 repo_init = self._factory.repo_libgit2(wire)
1157 1157 with repo_init as repo:
1158 1158 key = f'refs/tags/{tag_name}'
1159 1159 repo.references.delete(key)
1160 1160
1161 1161 @reraise_safe_exceptions
1162 1162 def tree_changes(self, wire, source_id, target_id):
1163 1163 repo = self._factory.repo(wire)
1164 1164 # source can be empty
1165 1165 source_id = safe_bytes(source_id if source_id else b'')
1166 1166 target_id = safe_bytes(target_id)
1167 1167
1168 1168 source = repo[source_id].tree if source_id else None
1169 1169 target = repo[target_id].tree
1170 1170 result = repo.object_store.tree_changes(source, target)
1171 1171
1172 1172 added = set()
1173 1173 modified = set()
1174 1174 deleted = set()
1175 1175 for (old_path, new_path), (_, _), (_, _) in list(result):
1176 1176 if new_path and old_path:
1177 1177 modified.add(new_path)
1178 1178 elif new_path and not old_path:
1179 1179 added.add(new_path)
1180 1180 elif not new_path and old_path:
1181 1181 deleted.add(old_path)
1182 1182
1183 1183 return list(added), list(modified), list(deleted)
1184 1184
1185 1185 @reraise_safe_exceptions
1186 1186 def tree_and_type_for_path(self, wire, commit_id, path):
1187 1187
1188 1188 cache_on, context_uid, repo_id = self._cache_on(wire)
1189 1189 region = self._region(wire)
1190 1190
1191 1191 @region.conditional_cache_on_arguments(condition=cache_on)
1192 1192 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1193 1193 repo_init = self._factory.repo_libgit2(wire)
1194 1194
1195 1195 with repo_init as repo:
1196 1196 commit = repo[commit_id]
1197 1197 try:
1198 1198 tree = commit.tree[path]
1199 1199 except KeyError:
1200 1200 return None, None, None
1201 1201
1202 1202 return tree.id.hex, tree.type_str, tree.filemode
1203 1203 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1204 1204
1205 1205 @reraise_safe_exceptions
1206 1206 def tree_items(self, wire, tree_id):
1207 1207 cache_on, context_uid, repo_id = self._cache_on(wire)
1208 1208 region = self._region(wire)
1209 1209
1210 1210 @region.conditional_cache_on_arguments(condition=cache_on)
1211 1211 def _tree_items(_repo_id, _tree_id):
1212 1212
1213 1213 repo_init = self._factory.repo_libgit2(wire)
1214 1214 with repo_init as repo:
1215 1215 try:
1216 1216 tree = repo[tree_id]
1217 1217 except KeyError:
1218 1218 raise ObjectMissing(f'No tree with id: {tree_id}')
1219 1219
1220 1220 result = []
1221 1221 for item in tree:
1222 1222 item_sha = item.hex
1223 1223 item_mode = item.filemode
1224 1224 item_type = item.type_str
1225 1225
1226 1226 if item_type == 'commit':
1227 1227 # NOTE(marcink): submodules we translate to 'link' for backward compat
1228 1228 item_type = 'link'
1229 1229
1230 1230 result.append((item.name, item_mode, item_sha, item_type))
1231 1231 return result
1232 1232 return _tree_items(repo_id, tree_id)
1233 1233
1234 1234 @reraise_safe_exceptions
1235 1235 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1236 1236 """
1237 1237 Old version that uses subprocess to call diff
1238 1238 """
1239 1239
1240 1240 flags = [
1241 1241 f'-U{context}', '--patch',
1242 1242 '--binary',
1243 1243 '--find-renames',
1244 1244 '--no-indent-heuristic',
1245 1245 # '--indent-heuristic',
1246 1246 #'--full-index',
1247 1247 #'--abbrev=40'
1248 1248 ]
1249 1249
1250 1250 if opt_ignorews:
1251 1251 flags.append('--ignore-all-space')
1252 1252
1253 1253 if commit_id_1 == self.EMPTY_COMMIT:
1254 1254 cmd = ['show'] + flags + [commit_id_2]
1255 1255 else:
1256 1256 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1257 1257
1258 1258 if file_filter:
1259 1259 cmd.extend(['--', file_filter])
1260 1260
1261 1261 diff, __ = self.run_git_command(wire, cmd)
1262 1262 # If we used 'show' command, strip first few lines (until actual diff
1263 1263 # starts)
1264 1264 if commit_id_1 == self.EMPTY_COMMIT:
1265 1265 lines = diff.splitlines()
1266 1266 x = 0
1267 1267 for line in lines:
1268 1268 if line.startswith(b'diff'):
1269 1269 break
1270 1270 x += 1
1271 1271 # Append new line just like 'diff' command do
1272 1272 diff = '\n'.join(lines[x:]) + '\n'
1273 1273 return diff
1274 1274
1275 1275 @reraise_safe_exceptions
1276 1276 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1277 1277 repo_init = self._factory.repo_libgit2(wire)
1278 1278
1279 1279 with repo_init as repo:
1280 1280 swap = True
1281 1281 flags = 0
1282 1282 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1283 1283
1284 1284 if opt_ignorews:
1285 1285 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1286 1286
1287 1287 if commit_id_1 == self.EMPTY_COMMIT:
1288 1288 comm1 = repo[commit_id_2]
1289 1289 diff_obj = comm1.tree.diff_to_tree(
1290 1290 flags=flags, context_lines=context, swap=swap)
1291 1291
1292 1292 else:
1293 1293 comm1 = repo[commit_id_2]
1294 1294 comm2 = repo[commit_id_1]
1295 1295 diff_obj = comm1.tree.diff_to_tree(
1296 1296 comm2.tree, flags=flags, context_lines=context, swap=swap)
1297 1297 similar_flags = 0
1298 1298 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1299 1299 diff_obj.find_similar(flags=similar_flags)
1300 1300
1301 1301 if file_filter:
1302 1302 for p in diff_obj:
1303 1303 if p.delta.old_file.path == file_filter:
1304 1304 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1305 1305 # fo matching path == no diff
1306 1306 return BytesEnvelope(b'')
1307 1307
1308 1308 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1309 1309
1310 1310 @reraise_safe_exceptions
1311 1311 def node_history(self, wire, commit_id, path, limit):
1312 1312 cache_on, context_uid, repo_id = self._cache_on(wire)
1313 1313 region = self._region(wire)
1314 1314
1315 1315 @region.conditional_cache_on_arguments(condition=cache_on)
1316 1316 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1317 1317 # optimize for n==1, rev-list is much faster for that use-case
1318 1318 if limit == 1:
1319 1319 cmd = ['rev-list', '-1', commit_id, '--', path]
1320 1320 else:
1321 1321 cmd = ['log']
1322 1322 if limit:
1323 1323 cmd.extend(['-n', str(safe_int(limit, 0))])
1324 1324 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1325 1325
1326 1326 output, __ = self.run_git_command(wire, cmd)
1327 1327 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1328 1328
1329 1329 return [x for x in commit_ids]
1330 1330 return _node_history(context_uid, repo_id, commit_id, path, limit)
1331 1331
1332 1332 @reraise_safe_exceptions
1333 1333 def node_annotate_legacy(self, wire, commit_id, path):
1334 1334 # note: replaced by pygit2 implementation
1335 1335 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1336 1336 # -l ==> outputs long shas (and we need all 40 characters)
1337 1337 # --root ==> doesn't put '^' character for boundaries
1338 1338 # -r commit_id ==> blames for the given commit
1339 1339 output, __ = self.run_git_command(wire, cmd)
1340 1340
1341 1341 result = []
1342 1342 for i, blame_line in enumerate(output.splitlines()[:-1]):
1343 1343 line_no = i + 1
1344 1344 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1345 1345 result.append((line_no, blame_commit_id, line))
1346 1346
1347 1347 return result
1348 1348
1349 1349 @reraise_safe_exceptions
1350 1350 def node_annotate(self, wire, commit_id, path):
1351 1351
1352 1352 result_libgit = []
1353 1353 repo_init = self._factory.repo_libgit2(wire)
1354 1354 with repo_init as repo:
1355 1355 commit = repo[commit_id]
1356 1356 blame_obj = repo.blame(path, newest_commit=commit_id)
1357 1357 file_content = commit.tree[path].data
1358 1358 for i, line in enumerate(splitnewlines(file_content)):
1359 1359 line_no = i + 1
1360 1360 hunk = blame_obj.for_line(line_no)
1361 1361 blame_commit_id = hunk.final_commit_id.hex
1362 1362
1363 1363 result_libgit.append((line_no, blame_commit_id, line))
1364 1364
1365 1365 return BinaryEnvelope(result_libgit)
1366 1366
1367 1367 @reraise_safe_exceptions
1368 1368 def update_server_info(self, wire, force=False):
1369 1369 cmd = ['update-server-info']
1370 1370 if force:
1371 1371 cmd += ['--force']
1372 1372 output, __ = self.run_git_command(wire, cmd)
1373 1373 return output.splitlines()
1374 1374
1375 1375 @reraise_safe_exceptions
1376 1376 def get_all_commit_ids(self, wire):
1377 1377
1378 1378 cache_on, context_uid, repo_id = self._cache_on(wire)
1379 1379 region = self._region(wire)
1380 1380
1381 1381 @region.conditional_cache_on_arguments(condition=cache_on)
1382 1382 def _get_all_commit_ids(_context_uid, _repo_id):
1383 1383
1384 1384 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1385 1385 try:
1386 1386 output, __ = self.run_git_command(wire, cmd)
1387 1387 return output.splitlines()
1388 1388 except Exception:
1389 1389 # Can be raised for empty repositories
1390 1390 return []
1391 1391
1392 1392 @region.conditional_cache_on_arguments(condition=cache_on)
1393 1393 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1394 1394 repo_init = self._factory.repo_libgit2(wire)
1395 1395 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1396 1396 results = []
1397 1397 with repo_init as repo:
1398 1398 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1399 1399 results.append(commit.id.hex)
1400 1400
1401 1401 return _get_all_commit_ids(context_uid, repo_id)
1402 1402
1403 1403 @reraise_safe_exceptions
1404 1404 def run_git_command(self, wire, cmd, **opts):
1405 1405 path = wire.get('path', None)
1406 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1406 debug_mode = vcsserver.ConfigGet().get_bool('debug')
1407 1407
1408 1408 if path and os.path.isdir(path):
1409 1409 opts['cwd'] = path
1410 1410
1411 1411 if '_bare' in opts:
1412 1412 _copts = []
1413 1413 del opts['_bare']
1414 1414 else:
1415 1415 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1416 1416 safe_call = False
1417 1417 if '_safe' in opts:
1418 1418 # no exc on failure
1419 1419 del opts['_safe']
1420 1420 safe_call = True
1421 1421
1422 1422 if '_copts' in opts:
1423 1423 _copts.extend(opts['_copts'] or [])
1424 1424 del opts['_copts']
1425 1425
1426 1426 gitenv = os.environ.copy()
1427 1427 gitenv.update(opts.pop('extra_env', {}))
1428 1428 # need to clean fix GIT_DIR !
1429 1429 if 'GIT_DIR' in gitenv:
1430 1430 del gitenv['GIT_DIR']
1431 1431 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1432 1432 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1433 1433
1434 1434 cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
1435 1435 _opts = {'env': gitenv, 'shell': False}
1436 1436
1437 1437 proc = None
1438 1438 try:
1439 1439 _opts.update(opts)
1440 1440 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1441 1441
1442 1442 return b''.join(proc), b''.join(proc.stderr)
1443 1443 except OSError as err:
1444 1444 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1445 1445 call_opts = {}
1446 1446 if debug_mode:
1447 1447 call_opts = _opts
1448 1448
1449 1449 tb_err = ("Couldn't run git command ({}).\n"
1450 1450 "Original error was:{}\n"
1451 1451 "Call options:{}\n"
1452 1452 .format(cmd, err, call_opts))
1453 1453 log.exception(tb_err)
1454 1454 if safe_call:
1455 1455 return '', err
1456 1456 else:
1457 1457 raise exceptions.VcsException()(tb_err)
1458 1458 finally:
1459 1459 if proc:
1460 1460 proc.close()
1461 1461
1462 1462 @reraise_safe_exceptions
1463 1463 def install_hooks(self, wire, force=False):
1464 1464 from vcsserver.hook_utils import install_git_hooks
1465 1465 bare = self.bare(wire)
1466 1466 path = wire['path']
1467 1467 binary_dir = settings.BINARY_DIR
1468 1468 if binary_dir:
1469 1469 os.path.join(binary_dir, 'python3')
1470 1470 return install_git_hooks(path, bare, force_create=force)
1471 1471
1472 1472 @reraise_safe_exceptions
1473 1473 def get_hooks_info(self, wire):
1474 1474 from vcsserver.hook_utils import (
1475 1475 get_git_pre_hook_version, get_git_post_hook_version)
1476 1476 bare = self.bare(wire)
1477 1477 path = wire['path']
1478 1478 return {
1479 1479 'pre_version': get_git_pre_hook_version(path, bare),
1480 1480 'post_version': get_git_post_hook_version(path, bare),
1481 1481 }
1482 1482
1483 1483 @reraise_safe_exceptions
1484 1484 def set_head_ref(self, wire, head_name):
1485 1485 log.debug('Setting refs/head to `%s`', head_name)
1486 1486 repo_init = self._factory.repo_libgit2(wire)
1487 1487 with repo_init as repo:
1488 1488 repo.set_head(f'refs/heads/{head_name}')
1489 1489
1490 1490 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1491 1491
1492 1492 @reraise_safe_exceptions
1493 1493 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1494 1494 archive_dir_name, commit_id, cache_config):
1495 1495
1496 1496 def file_walker(_commit_id, path):
1497 1497 repo_init = self._factory.repo_libgit2(wire)
1498 1498
1499 1499 with repo_init as repo:
1500 1500 commit = repo[commit_id]
1501 1501
1502 1502 if path in ['', '/']:
1503 1503 tree = commit.tree
1504 1504 else:
1505 1505 tree = commit.tree[path.rstrip('/')]
1506 1506 tree_id = tree.id.hex
1507 1507 try:
1508 1508 tree = repo[tree_id]
1509 1509 except KeyError:
1510 1510 raise ObjectMissing(f'No tree with id: {tree_id}')
1511 1511
1512 1512 index = LibGit2Index.Index()
1513 1513 index.read_tree(tree)
1514 1514 file_iter = index
1515 1515
1516 1516 for file_node in file_iter:
1517 1517 file_path = file_node.path
1518 1518 mode = file_node.mode
1519 1519 is_link = stat.S_ISLNK(mode)
1520 1520 if mode == pygit2.GIT_FILEMODE_COMMIT:
1521 1521 log.debug('Skipping path %s as a commit node', file_path)
1522 1522 continue
1523 1523 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1524 1524
1525 1525 return store_archive_in_cache(
1526 1526 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -1,959 +1,959 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import os
20 20 import subprocess
21 21 from urllib.error import URLError
22 22 import urllib.parse
23 23 import logging
24 24 import posixpath as vcspath
25 25 import io
26 26 import urllib.request
27 27 import urllib.parse
28 28 import urllib.error
29 29 import traceback
30 30
31 31 import svn.client # noqa
32 32 import svn.core # noqa
33 33 import svn.delta # noqa
34 34 import svn.diff # noqa
35 35 import svn.fs # noqa
36 36 import svn.repos # noqa
37 37
38 import rhodecode
38 import vcsserver
39 39 from vcsserver import svn_diff, exceptions, subprocessio, settings
40 40 from vcsserver.base import (
41 41 RepoFactory,
42 42 raise_from_original,
43 43 ArchiveNode,
44 44 store_archive_in_cache,
45 45 BytesEnvelope,
46 46 BinaryEnvelope,
47 47 )
48 48 from vcsserver.exceptions import NoContentException
49 49 from vcsserver.vcs_base import RemoteBase
50 50 from vcsserver.lib.str_utils import safe_str, safe_bytes
51 51 from vcsserver.lib.type_utils import assert_bytes
52 52 from vcsserver.lib.svnremoterepo import svnremoterepo
53 53 from vcsserver.lib.svn_txn_utils import store_txn_id_data
54 54
55 55 log = logging.getLogger(__name__)
56 56
57 57
58 58 svn_compatible_versions_map = {
59 59 'pre-1.4-compatible': '1.3',
60 60 'pre-1.5-compatible': '1.4',
61 61 'pre-1.6-compatible': '1.5',
62 62 'pre-1.8-compatible': '1.7',
63 63 'pre-1.9-compatible': '1.8',
64 64 }
65 65
66 66 current_compatible_version = '1.14'
67 67
68 68
69 69 def reraise_safe_exceptions(func):
70 70 """Decorator for converting svn exceptions to something neutral."""
71 71 def wrapper(*args, **kwargs):
72 72 try:
73 73 return func(*args, **kwargs)
74 74 except Exception as e:
75 75 if not hasattr(e, '_vcs_kind'):
76 76 log.exception("Unhandled exception in svn remote call")
77 77 raise_from_original(exceptions.UnhandledException(e), e)
78 78 raise
79 79 return wrapper
80 80
81 81
82 82 class SubversionFactory(RepoFactory):
83 83 repo_type = 'svn'
84 84
85 85 def _create_repo(self, wire, create, compatible_version):
86 86 path = svn.core.svn_path_canonicalize(wire['path'])
87 87 if create:
88 88 fs_config = {'compatible-version': current_compatible_version}
89 89 if compatible_version:
90 90
91 91 compatible_version_string = \
92 92 svn_compatible_versions_map.get(compatible_version) \
93 93 or compatible_version
94 94 fs_config['compatible-version'] = compatible_version_string
95 95
96 96 log.debug('Create SVN repo with config `%s`', fs_config)
97 97 repo = svn.repos.create(path, "", "", None, fs_config)
98 98 else:
99 99 repo = svn.repos.open(path)
100 100
101 101 log.debug('repository created: got SVN object: %s', repo)
102 102 return repo
103 103
104 104 def repo(self, wire, create=False, compatible_version=None):
105 105 """
106 106 Get a repository instance for the given path.
107 107 """
108 108 return self._create_repo(wire, create, compatible_version)
109 109
110 110
111 111 NODE_TYPE_MAPPING = {
112 112 svn.core.svn_node_file: 'file',
113 113 svn.core.svn_node_dir: 'dir',
114 114 }
115 115
116 116
117 117 class SvnRemote(RemoteBase):
118 118
119 119 def __init__(self, factory, hg_factory=None):
120 120 self._factory = factory
121 121
122 122 self._bulk_methods = {
123 123 # NOT supported in SVN ATM...
124 124 }
125 125 self._bulk_file_methods = {
126 126 "size": self.get_file_size,
127 127 "data": self.get_file_content,
128 128 "flags": self.get_node_type,
129 129 "is_binary": self.is_binary,
130 130 "md5": self.md5_hash
131 131 }
132 132
133 133 @reraise_safe_exceptions
134 134 def bulk_file_request(self, wire, commit_id, path, pre_load):
135 135 cache_on, context_uid, repo_id = self._cache_on(wire)
136 136 region = self._region(wire)
137 137
138 138 # since we use unified API, we need to cast from str to in for SVN
139 139 commit_id = int(commit_id)
140 140
141 141 @region.conditional_cache_on_arguments(condition=cache_on)
142 142 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
143 143 result = {}
144 144 for attr in pre_load:
145 145 try:
146 146 method = self._bulk_file_methods[attr]
147 147 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
148 148 result[attr] = method(wire, _commit_id, _path)
149 149 except KeyError as e:
150 150 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
151 151 return result
152 152
153 153 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
154 154
155 155 @reraise_safe_exceptions
156 156 def discover_svn_version(self):
157 157 try:
158 158 import svn.core
159 159 svn_ver = svn.core.SVN_VERSION
160 160 except ImportError:
161 161 svn_ver = None
162 162 return safe_str(svn_ver)
163 163
164 164 @reraise_safe_exceptions
165 165 def is_empty(self, wire):
166 166 try:
167 167 return self.lookup(wire, -1) == 0
168 168 except Exception:
169 169 log.exception("failed to read object_store")
170 170 return False
171 171
172 172 def check_url(self, url, config):
173 173
174 174 # uuid function gets only valid UUID from proper repo, else
175 175 # throws exception
176 176 username, password, src_url = self.get_url_and_credentials(url)
177 177 try:
178 178 svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
179 179 except Exception:
180 180 tb = traceback.format_exc()
181 181 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
182 182 raise URLError(f'"{url}" is not a valid Subversion source url.')
183 183 return True
184 184
185 185 def is_path_valid_repository(self, wire, path):
186 186 # NOTE(marcink): short circuit the check for SVN repo
187 187 # the repos.open might be expensive to check, but we have one cheap
188 188 # pre-condition that we can use, to check for 'format' file
189 189 if not os.path.isfile(os.path.join(path, 'format')):
190 190 return False
191 191
192 192 cache_on, context_uid, repo_id = self._cache_on(wire)
193 193 region = self._region(wire)
194 194
195 195 @region.conditional_cache_on_arguments(condition=cache_on)
196 196 def _assert_correct_path(_context_uid, _repo_id, fast_check):
197 197
198 198 try:
199 199 svn.repos.open(path)
200 200 except svn.core.SubversionException:
201 201 tb = traceback.format_exc()
202 202 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
203 203 return False
204 204 return True
205 205
206 206 return _assert_correct_path(context_uid, repo_id, True)
207 207
208 208 @reraise_safe_exceptions
209 209 def verify(self, wire,):
210 210 repo_path = wire['path']
211 211 if not self.is_path_valid_repository(wire, repo_path):
212 212 raise Exception(
213 213 f"Path {repo_path} is not a valid Subversion repository.")
214 214
215 215 cmd = ['svnadmin', 'info', repo_path]
216 216 stdout, stderr = subprocessio.run_command(cmd)
217 217 return stdout
218 218
219 219 @reraise_safe_exceptions
220 220 def lookup(self, wire, revision):
221 221 if revision not in [-1, None, 'HEAD']:
222 222 raise NotImplementedError
223 223 repo = self._factory.repo(wire)
224 224 fs_ptr = svn.repos.fs(repo)
225 225 head = svn.fs.youngest_rev(fs_ptr)
226 226 return head
227 227
228 228 @reraise_safe_exceptions
229 229 def lookup_interval(self, wire, start_ts, end_ts):
230 230 repo = self._factory.repo(wire)
231 231 fsobj = svn.repos.fs(repo)
232 232 start_rev = None
233 233 end_rev = None
234 234 if start_ts:
235 235 start_ts_svn = apr_time_t(start_ts)
236 236 start_rev = svn.repos.dated_revision(repo, start_ts_svn) + 1
237 237 else:
238 238 start_rev = 1
239 239 if end_ts:
240 240 end_ts_svn = apr_time_t(end_ts)
241 241 end_rev = svn.repos.dated_revision(repo, end_ts_svn)
242 242 else:
243 243 end_rev = svn.fs.youngest_rev(fsobj)
244 244 return start_rev, end_rev
245 245
246 246 @reraise_safe_exceptions
247 247 def revision_properties(self, wire, revision):
248 248
249 249 cache_on, context_uid, repo_id = self._cache_on(wire)
250 250 region = self._region(wire)
251 251
252 252 @region.conditional_cache_on_arguments(condition=cache_on)
253 253 def _revision_properties(_repo_id, _revision):
254 254 repo = self._factory.repo(wire)
255 255 fs_ptr = svn.repos.fs(repo)
256 256 return svn.fs.revision_proplist(fs_ptr, revision)
257 257 return _revision_properties(repo_id, revision)
258 258
259 259 def revision_changes(self, wire, revision):
260 260
261 261 repo = self._factory.repo(wire)
262 262 fsobj = svn.repos.fs(repo)
263 263 rev_root = svn.fs.revision_root(fsobj, revision)
264 264
265 265 editor = svn.repos.ChangeCollector(fsobj, rev_root)
266 266 editor_ptr, editor_baton = svn.delta.make_editor(editor)
267 267 base_dir = ""
268 268 send_deltas = False
269 269 svn.repos.replay2(
270 270 rev_root, base_dir, svn.core.SVN_INVALID_REVNUM, send_deltas,
271 271 editor_ptr, editor_baton, None)
272 272
273 273 added = []
274 274 changed = []
275 275 removed = []
276 276
277 277 # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
278 278 for path, change in editor.changes.items():
279 279 # TODO: Decide what to do with directory nodes. Subversion can add
280 280 # empty directories.
281 281
282 282 if change.item_kind == svn.core.svn_node_dir:
283 283 continue
284 284 if change.action in [svn.repos.CHANGE_ACTION_ADD]:
285 285 added.append(path)
286 286 elif change.action in [svn.repos.CHANGE_ACTION_MODIFY,
287 287 svn.repos.CHANGE_ACTION_REPLACE]:
288 288 changed.append(path)
289 289 elif change.action in [svn.repos.CHANGE_ACTION_DELETE]:
290 290 removed.append(path)
291 291 else:
292 292 raise NotImplementedError(
293 293 "Action {} not supported on path {}".format(
294 294 change.action, path))
295 295
296 296 changes = {
297 297 'added': added,
298 298 'changed': changed,
299 299 'removed': removed,
300 300 }
301 301 return changes
302 302
303 303 @reraise_safe_exceptions
304 304 def node_history(self, wire, path, revision, limit):
305 305 cache_on, context_uid, repo_id = self._cache_on(wire)
306 306 region = self._region(wire)
307 307
308 308 @region.conditional_cache_on_arguments(condition=cache_on)
309 309 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
310 310 cross_copies = False
311 311 repo = self._factory.repo(wire)
312 312 fsobj = svn.repos.fs(repo)
313 313 rev_root = svn.fs.revision_root(fsobj, revision)
314 314
315 315 history_revisions = []
316 316 history = svn.fs.node_history(rev_root, path)
317 317 history = svn.fs.history_prev(history, cross_copies)
318 318 while history:
319 319 __, node_revision = svn.fs.history_location(history)
320 320 history_revisions.append(node_revision)
321 321 if limit and len(history_revisions) >= limit:
322 322 break
323 323 history = svn.fs.history_prev(history, cross_copies)
324 324 return history_revisions
325 325 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
326 326
327 327 @reraise_safe_exceptions
328 328 def node_properties(self, wire, path, revision):
329 329 cache_on, context_uid, repo_id = self._cache_on(wire)
330 330 region = self._region(wire)
331 331
332 332 @region.conditional_cache_on_arguments(condition=cache_on)
333 333 def _node_properties(_repo_id, _path, _revision):
334 334 repo = self._factory.repo(wire)
335 335 fsobj = svn.repos.fs(repo)
336 336 rev_root = svn.fs.revision_root(fsobj, revision)
337 337 return svn.fs.node_proplist(rev_root, path)
338 338 return _node_properties(repo_id, path, revision)
339 339
340 340 def file_annotate(self, wire, path, revision):
341 341 abs_path = 'file://' + urllib.request.pathname2url(
342 342 vcspath.join(wire['path'], path))
343 343 file_uri = svn.core.svn_path_canonicalize(abs_path)
344 344
345 345 start_rev = svn_opt_revision_value_t(0)
346 346 peg_rev = svn_opt_revision_value_t(revision)
347 347 end_rev = peg_rev
348 348
349 349 annotations = []
350 350
351 351 def receiver(line_no, revision, author, date, line, pool):
352 352 annotations.append((line_no, revision, line))
353 353
354 354 # TODO: Cannot use blame5, missing typemap function in the swig code
355 355 try:
356 356 svn.client.blame2(
357 357 file_uri, peg_rev, start_rev, end_rev,
358 358 receiver, svn.client.create_context())
359 359 except svn.core.SubversionException as exc:
360 360 log.exception("Error during blame operation.")
361 361 raise Exception(
362 362 f"Blame not supported or file does not exist at path {path}. "
363 363 f"Error {exc}.")
364 364
365 365 return BinaryEnvelope(annotations)
366 366
367 367 @reraise_safe_exceptions
368 368 def get_node_type(self, wire, revision=None, path=''):
369 369
370 370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 371 region = self._region(wire)
372 372
373 373 @region.conditional_cache_on_arguments(condition=cache_on)
374 374 def _get_node_type(_repo_id, _revision, _path):
375 375 repo = self._factory.repo(wire)
376 376 fs_ptr = svn.repos.fs(repo)
377 377 if _revision is None:
378 378 _revision = svn.fs.youngest_rev(fs_ptr)
379 379 root = svn.fs.revision_root(fs_ptr, _revision)
380 380 node = svn.fs.check_path(root, path)
381 381 return NODE_TYPE_MAPPING.get(node, None)
382 382 return _get_node_type(repo_id, revision, path)
383 383
384 384 @reraise_safe_exceptions
385 385 def get_nodes(self, wire, revision=None, path=''):
386 386
387 387 cache_on, context_uid, repo_id = self._cache_on(wire)
388 388 region = self._region(wire)
389 389
390 390 @region.conditional_cache_on_arguments(condition=cache_on)
391 391 def _get_nodes(_repo_id, _path, _revision):
392 392 repo = self._factory.repo(wire)
393 393 fsobj = svn.repos.fs(repo)
394 394 if _revision is None:
395 395 _revision = svn.fs.youngest_rev(fsobj)
396 396 root = svn.fs.revision_root(fsobj, _revision)
397 397 entries = svn.fs.dir_entries(root, path)
398 398 result = []
399 399 for entry_path, entry_info in entries.items():
400 400 result.append(
401 401 (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
402 402 return result
403 403 return _get_nodes(repo_id, path, revision)
404 404
405 405 @reraise_safe_exceptions
406 406 def get_file_content(self, wire, rev=None, path=''):
407 407 repo = self._factory.repo(wire)
408 408 fsobj = svn.repos.fs(repo)
409 409
410 410 if rev is None:
411 411 rev = svn.fs.youngest_rev(fsobj)
412 412
413 413 root = svn.fs.revision_root(fsobj, rev)
414 414 content = svn.core.Stream(svn.fs.file_contents(root, path))
415 415 return BytesEnvelope(content.read())
416 416
417 417 @reraise_safe_exceptions
418 418 def get_file_size(self, wire, revision=None, path=''):
419 419
420 420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 421 region = self._region(wire)
422 422
423 423 @region.conditional_cache_on_arguments(condition=cache_on)
424 424 def _get_file_size(_repo_id, _revision, _path):
425 425 repo = self._factory.repo(wire)
426 426 fsobj = svn.repos.fs(repo)
427 427 if _revision is None:
428 428 _revision = svn.fs.youngest_revision(fsobj)
429 429 root = svn.fs.revision_root(fsobj, _revision)
430 430 size = svn.fs.file_length(root, path)
431 431 return size
432 432 return _get_file_size(repo_id, revision, path)
433 433
434 434 def create_repository(self, wire, compatible_version=None):
435 435 log.info('Creating Subversion repository in path "%s"', wire['path'])
436 436 self._factory.repo(wire, create=True,
437 437 compatible_version=compatible_version)
438 438
439 439 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
440 440 obj = urllib.parse.urlparse(src_url)
441 441 username = obj.username or ''
442 442 password = obj.password or ''
443 443 return username, password, src_url
444 444
445 445 def import_remote_repository(self, wire, src_url):
446 446 repo_path = wire['path']
447 447 if not self.is_path_valid_repository(wire, repo_path):
448 448 raise Exception(
449 449 f"Path {repo_path} is not a valid Subversion repository.")
450 450
451 451 username, password, src_url = self.get_url_and_credentials(src_url)
452 452 rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
453 453 '--trust-server-cert-failures=unknown-ca']
454 454 if username and password:
455 455 rdump_cmd += ['--username', username, '--password', password]
456 456 rdump_cmd += [src_url]
457 457
458 458 rdump = subprocess.Popen(
459 459 rdump_cmd,
460 460 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
461 461 load = subprocess.Popen(
462 462 ['svnadmin', 'load', repo_path], stdin=rdump.stdout)
463 463
464 464 # TODO: johbo: This can be a very long operation, might be better
465 465 # to track some kind of status and provide an api to check if the
466 466 # import is done.
467 467 rdump.wait()
468 468 load.wait()
469 469
470 470 log.debug('Return process ended with code: %s', rdump.returncode)
471 471 if rdump.returncode != 0:
472 472 errors = rdump.stderr.read()
473 473 log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
474 474
475 475 reason = 'UNKNOWN'
476 476 if b'svnrdump: E230001:' in errors:
477 477 reason = 'INVALID_CERTIFICATE'
478 478
479 479 if reason == 'UNKNOWN':
480 480 reason = f'UNKNOWN:{safe_str(errors)}'
481 481
482 482 raise Exception(
483 483 'Failed to dump the remote repository from {}. Reason:{}'.format(
484 484 src_url, reason))
485 485 if load.returncode != 0:
486 486 raise Exception(
487 487 f'Failed to load the dump of remote repository from {src_url}.')
488 488
489 489 def commit(self, wire, message, author, timestamp, updated, removed):
490 490
491 491 message = safe_bytes(message)
492 492 author = safe_bytes(author)
493 493
494 494 repo = self._factory.repo(wire)
495 495 fsobj = svn.repos.fs(repo)
496 496
497 497 rev = svn.fs.youngest_rev(fsobj)
498 498 txn = svn.repos.fs_begin_txn_for_commit(repo, rev, author, message)
499 499 txn_root = svn.fs.txn_root(txn)
500 500
501 501 for node in updated:
502 502 TxnNodeProcessor(node, txn_root).update()
503 503 for node in removed:
504 504 TxnNodeProcessor(node, txn_root).remove()
505 505
506 506 svn_txn_id = safe_str(svn.fs.svn_fs_txn_name(txn))
507 507 full_repo_path = wire['path']
508 508 txn_id_data = {'svn_txn_id': svn_txn_id, 'rc_internal_commit': True}
509 509
510 510 store_txn_id_data(full_repo_path, svn_txn_id, txn_id_data)
511 511 commit_id = svn.repos.fs_commit_txn(repo, txn)
512 512
513 513 if timestamp:
514 514 apr_time = apr_time_t(timestamp)
515 515 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
516 516 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
517 517
518 518 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
519 519 return commit_id
520 520
521 521 @reraise_safe_exceptions
522 522 def diff(self, wire, rev1, rev2, path1=None, path2=None,
523 523 ignore_whitespace=False, context=3):
524 524
525 525 wire.update(cache=False)
526 526 repo = self._factory.repo(wire)
527 527 diff_creator = SvnDiffer(
528 528 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
529 529 try:
530 530 return BytesEnvelope(diff_creator.generate_diff())
531 531 except svn.core.SubversionException as e:
532 532 log.exception(
533 533 "Error during diff operation operation. "
534 534 "Path might not exist %s, %s", path1, path2)
535 535 return BytesEnvelope(b'')
536 536
537 537 @reraise_safe_exceptions
538 538 def is_large_file(self, wire, path):
539 539 return False
540 540
541 541 @reraise_safe_exceptions
542 542 def is_binary(self, wire, rev, path):
543 543 cache_on, context_uid, repo_id = self._cache_on(wire)
544 544 region = self._region(wire)
545 545
546 546 @region.conditional_cache_on_arguments(condition=cache_on)
547 547 def _is_binary(_repo_id, _rev, _path):
548 548 raw_bytes = self.get_file_content(wire, rev, path)
549 549 if not raw_bytes:
550 550 return False
551 551 return b'\0' in raw_bytes
552 552
553 553 return _is_binary(repo_id, rev, path)
554 554
555 555 @reraise_safe_exceptions
556 556 def md5_hash(self, wire, rev, path):
557 557 cache_on, context_uid, repo_id = self._cache_on(wire)
558 558 region = self._region(wire)
559 559
560 560 @region.conditional_cache_on_arguments(condition=cache_on)
561 561 def _md5_hash(_repo_id, _rev, _path):
562 562 return ''
563 563
564 564 return _md5_hash(repo_id, rev, path)
565 565
566 566 @reraise_safe_exceptions
567 567 def run_svn_command(self, wire, cmd, **opts):
568 568 path = wire.get('path', None)
569 debug_mode = rhodecode.ConfigGet().get_bool('debug')
569 debug_mode = vcsserver.ConfigGet().get_bool('debug')
570 570
571 571 if path and os.path.isdir(path):
572 572 opts['cwd'] = path
573 573
574 574 safe_call = opts.pop('_safe', False)
575 575
576 576 svnenv = os.environ.copy()
577 577 svnenv.update(opts.pop('extra_env', {}))
578 578
579 579 _opts = {'env': svnenv, 'shell': False}
580 580
581 581 try:
582 582 _opts.update(opts)
583 583 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
584 584
585 585 return b''.join(proc), b''.join(proc.stderr)
586 586 except OSError as err:
587 587 if safe_call:
588 588 return '', safe_str(err).strip()
589 589 else:
590 590 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
591 591 call_opts = {}
592 592 if debug_mode:
593 593 call_opts = _opts
594 594
595 595 tb_err = ("Couldn't run svn command ({}).\n"
596 596 "Original error was:{}\n"
597 597 "Call options:{}\n"
598 598 .format(cmd, err, call_opts))
599 599 log.exception(tb_err)
600 600 raise exceptions.VcsException()(tb_err)
601 601
602 602 @reraise_safe_exceptions
603 603 def install_hooks(self, wire, force=False):
604 604 from vcsserver.hook_utils import install_svn_hooks
605 605 repo_path = wire['path']
606 606 binary_dir = settings.BINARY_DIR
607 607 executable = None
608 608 if binary_dir:
609 609 executable = os.path.join(binary_dir, 'python3')
610 610 return install_svn_hooks(repo_path, force_create=force)
611 611
612 612 @reraise_safe_exceptions
613 613 def get_hooks_info(self, wire):
614 614 from vcsserver.hook_utils import (
615 615 get_svn_pre_hook_version, get_svn_post_hook_version)
616 616 repo_path = wire['path']
617 617 return {
618 618 'pre_version': get_svn_pre_hook_version(repo_path),
619 619 'post_version': get_svn_post_hook_version(repo_path),
620 620 }
621 621
622 622 @reraise_safe_exceptions
623 623 def set_head_ref(self, wire, head_name):
624 624 pass
625 625
626 626 @reraise_safe_exceptions
627 627 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
628 628 archive_dir_name, commit_id, cache_config):
629 629
630 630 def walk_tree(root, root_dir, _commit_id):
631 631 """
632 632 Special recursive svn repo walker
633 633 """
634 634 root_dir = safe_bytes(root_dir)
635 635
636 636 filemode_default = 0o100644
637 637 filemode_executable = 0o100755
638 638
639 639 file_iter = svn.fs.dir_entries(root, root_dir)
640 640 for f_name in file_iter:
641 641 f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
642 642
643 643 if f_type == 'dir':
644 644 # return only DIR, and then all entries in that dir
645 645 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
646 646 new_root = os.path.join(root_dir, f_name)
647 647 yield from walk_tree(root, new_root, _commit_id)
648 648 else:
649 649
650 650 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
651 651 prop_list = svn.fs.node_proplist(root, f_path)
652 652
653 653 f_mode = filemode_default
654 654 if prop_list.get('svn:executable'):
655 655 f_mode = filemode_executable
656 656
657 657 f_is_link = False
658 658 if prop_list.get('svn:special'):
659 659 f_is_link = True
660 660
661 661 data = {
662 662 'is_link': f_is_link,
663 663 'mode': f_mode,
664 664 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
665 665 }
666 666
667 667 yield f_path, data, f_type
668 668
669 669 def file_walker(_commit_id, path):
670 670 repo = self._factory.repo(wire)
671 671 root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
672 672
673 673 def no_content():
674 674 raise NoContentException()
675 675
676 676 for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
677 677 file_path = f_name
678 678
679 679 if f_type == 'dir':
680 680 mode = f_data['mode']
681 681 yield ArchiveNode(file_path, mode, False, no_content)
682 682 else:
683 683 mode = f_data['mode']
684 684 is_link = f_data['is_link']
685 685 data_stream = f_data['content_stream']
686 686 yield ArchiveNode(file_path, mode, is_link, data_stream)
687 687
688 688 return store_archive_in_cache(
689 689 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
690 690
691 691
692 692 class SvnDiffer:
693 693 """
694 694 Utility to create diffs based on difflib and the Subversion api
695 695 """
696 696
697 697 binary_content = False
698 698
699 699 def __init__(
700 700 self, repo, src_rev, src_path, tgt_rev, tgt_path,
701 701 ignore_whitespace, context):
702 702 self.repo = repo
703 703 self.ignore_whitespace = ignore_whitespace
704 704 self.context = context
705 705
706 706 fsobj = svn.repos.fs(repo)
707 707
708 708 self.tgt_rev = tgt_rev
709 709 self.tgt_path = tgt_path or ''
710 710 self.tgt_root = svn.fs.revision_root(fsobj, tgt_rev)
711 711 self.tgt_kind = svn.fs.check_path(self.tgt_root, self.tgt_path)
712 712
713 713 self.src_rev = src_rev
714 714 self.src_path = src_path or self.tgt_path
715 715 self.src_root = svn.fs.revision_root(fsobj, src_rev)
716 716 self.src_kind = svn.fs.check_path(self.src_root, self.src_path)
717 717
718 718 self._validate()
719 719
720 720 def _validate(self):
721 721 if (self.tgt_kind != svn.core.svn_node_none and
722 722 self.src_kind != svn.core.svn_node_none and
723 723 self.src_kind != self.tgt_kind):
724 724 # TODO: johbo: proper error handling
725 725 raise Exception(
726 726 "Source and target are not compatible for diff generation. "
727 727 "Source type: %s, target type: %s" %
728 728 (self.src_kind, self.tgt_kind))
729 729
730 730 def generate_diff(self) -> bytes:
731 731 buf = io.BytesIO()
732 732 if self.tgt_kind == svn.core.svn_node_dir:
733 733 self._generate_dir_diff(buf)
734 734 else:
735 735 self._generate_file_diff(buf)
736 736 return buf.getvalue()
737 737
738 738 def _generate_dir_diff(self, buf: io.BytesIO):
739 739 editor = DiffChangeEditor()
740 740 editor_ptr, editor_baton = svn.delta.make_editor(editor)
741 741 svn.repos.dir_delta2(
742 742 self.src_root,
743 743 self.src_path,
744 744 '', # src_entry
745 745 self.tgt_root,
746 746 self.tgt_path,
747 747 editor_ptr, editor_baton,
748 748 authorization_callback_allow_all,
749 749 False, # text_deltas
750 750 svn.core.svn_depth_infinity, # depth
751 751 False, # entry_props
752 752 False, # ignore_ancestry
753 753 )
754 754
755 755 for path, __, change in sorted(editor.changes):
756 756 self._generate_node_diff(
757 757 buf, change, path, self.tgt_path, path, self.src_path)
758 758
759 759 def _generate_file_diff(self, buf: io.BytesIO):
760 760 change = None
761 761 if self.src_kind == svn.core.svn_node_none:
762 762 change = "add"
763 763 elif self.tgt_kind == svn.core.svn_node_none:
764 764 change = "delete"
765 765 tgt_base, tgt_path = vcspath.split(self.tgt_path)
766 766 src_base, src_path = vcspath.split(self.src_path)
767 767 self._generate_node_diff(
768 768 buf, change, tgt_path, tgt_base, src_path, src_base)
769 769
770 770 def _generate_node_diff(
771 771 self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
772 772
773 773 tgt_path_bytes = safe_bytes(tgt_path)
774 774 tgt_path = safe_str(tgt_path)
775 775
776 776 src_path_bytes = safe_bytes(src_path)
777 777 src_path = safe_str(src_path)
778 778
779 779 if self.src_rev == self.tgt_rev and tgt_base == src_base:
780 780 # makes consistent behaviour with git/hg to return empty diff if
781 781 # we compare same revisions
782 782 return
783 783
784 784 tgt_full_path = vcspath.join(tgt_base, tgt_path)
785 785 src_full_path = vcspath.join(src_base, src_path)
786 786
787 787 self.binary_content = False
788 788 mime_type = self._get_mime_type(tgt_full_path)
789 789
790 790 if mime_type and not mime_type.startswith(b'text'):
791 791 self.binary_content = True
792 792 buf.write(b"=" * 67 + b'\n')
793 793 buf.write(b"Cannot display: file marked as a binary type.\n")
794 794 buf.write(b"svn:mime-type = %s\n" % mime_type)
795 795 buf.write(b"Index: %b\n" % tgt_path_bytes)
796 796 buf.write(b"=" * 67 + b'\n')
797 797 buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
798 798
799 799 if change == 'add':
800 800 # TODO: johbo: SVN is missing a zero here compared to git
801 801 buf.write(b"new file mode 10644\n")
802 802
803 803 # TODO(marcink): intro to binary detection of svn patches
804 804 # if self.binary_content:
805 805 # buf.write(b'GIT binary patch\n')
806 806
807 807 buf.write(b"--- /dev/null\t(revision 0)\n")
808 808 src_lines = []
809 809 else:
810 810 if change == 'delete':
811 811 buf.write(b"deleted file mode 10644\n")
812 812
813 813 # TODO(marcink): intro to binary detection of svn patches
814 814 # if self.binary_content:
815 815 # buf.write('GIT binary patch\n')
816 816
817 817 buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
818 818 src_lines = self._svn_readlines(self.src_root, src_full_path)
819 819
820 820 if change == 'delete':
821 821 buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
822 822 tgt_lines = []
823 823 else:
824 824 buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
825 825 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
826 826
827 827 # we made our diff header, time to generate the diff content into our buffer
828 828
829 829 if not self.binary_content:
830 830 udiff = svn_diff.unified_diff(
831 831 src_lines, tgt_lines, context=self.context,
832 832 ignore_blank_lines=self.ignore_whitespace,
833 833 ignore_case=False,
834 834 ignore_space_changes=self.ignore_whitespace)
835 835
836 836 buf.writelines(udiff)
837 837
838 838 def _get_mime_type(self, path) -> bytes:
839 839 try:
840 840 mime_type = svn.fs.node_prop(
841 841 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
842 842 except svn.core.SubversionException:
843 843 mime_type = svn.fs.node_prop(
844 844 self.src_root, path, svn.core.SVN_PROP_MIME_TYPE)
845 845 return mime_type
846 846
847 847 def _svn_readlines(self, fs_root, node_path):
848 848 if self.binary_content:
849 849 return []
850 850 node_kind = svn.fs.check_path(fs_root, node_path)
851 851 if node_kind not in (
852 852 svn.core.svn_node_file, svn.core.svn_node_symlink):
853 853 return []
854 854 content = svn.core.Stream(
855 855 svn.fs.file_contents(fs_root, node_path)).read()
856 856
857 857 return content.splitlines(True)
858 858
859 859
860 860 class DiffChangeEditor(svn.delta.Editor):
861 861 """
862 862 Records changes between two given revisions
863 863 """
864 864
865 865 def __init__(self):
866 866 self.changes = []
867 867
868 868 def delete_entry(self, path, revision, parent_baton, pool=None):
869 869 self.changes.append((path, None, 'delete'))
870 870
871 871 def add_file(
872 872 self, path, parent_baton, copyfrom_path, copyfrom_revision,
873 873 file_pool=None):
874 874 self.changes.append((path, 'file', 'add'))
875 875
876 876 def open_file(self, path, parent_baton, base_revision, file_pool=None):
877 877 self.changes.append((path, 'file', 'change'))
878 878
879 879
880 880 def authorization_callback_allow_all(root, path, pool):
881 881 return True
882 882
883 883
884 884 class TxnNodeProcessor:
885 885 """
886 886 Utility to process the change of one node within a transaction root.
887 887
888 888 It encapsulates the knowledge of how to add, update or remove
889 889 a node for a given transaction root. The purpose is to support the method
890 890 `SvnRemote.commit`.
891 891 """
892 892
893 893 def __init__(self, node, txn_root):
894 894 assert_bytes(node['path'])
895 895
896 896 self.node = node
897 897 self.txn_root = txn_root
898 898
899 899 def update(self):
900 900 self._ensure_parent_dirs()
901 901 self._add_file_if_node_does_not_exist()
902 902 self._update_file_content()
903 903 self._update_file_properties()
904 904
905 905 def remove(self):
906 906 svn.fs.delete(self.txn_root, self.node['path'])
907 907 # TODO: Clean up directory if empty
908 908
909 909 def _ensure_parent_dirs(self):
910 910 curdir = vcspath.dirname(self.node['path'])
911 911 dirs_to_create = []
912 912 while not self._svn_path_exists(curdir):
913 913 dirs_to_create.append(curdir)
914 914 curdir = vcspath.dirname(curdir)
915 915
916 916 for curdir in reversed(dirs_to_create):
917 917 log.debug('Creating missing directory "%s"', curdir)
918 918 svn.fs.make_dir(self.txn_root, curdir)
919 919
920 920 def _svn_path_exists(self, path):
921 921 path_status = svn.fs.check_path(self.txn_root, path)
922 922 return path_status != svn.core.svn_node_none
923 923
924 924 def _add_file_if_node_does_not_exist(self):
925 925 kind = svn.fs.check_path(self.txn_root, self.node['path'])
926 926 if kind == svn.core.svn_node_none:
927 927 svn.fs.make_file(self.txn_root, self.node['path'])
928 928
929 929 def _update_file_content(self):
930 930 assert_bytes(self.node['content'])
931 931
932 932 handler, baton = svn.fs.apply_textdelta(
933 933 self.txn_root, self.node['path'], None, None)
934 934 svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
935 935
936 936 def _update_file_properties(self):
937 937 properties = self.node.get('properties', {})
938 938 for key, value in properties.items():
939 939 svn.fs.change_node_prop(
940 940 self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
941 941
942 942
943 943 def apr_time_t(timestamp):
944 944 """
945 945 Convert a Python timestamp into APR timestamp type apr_time_t
946 946 """
947 947 return int(timestamp * 1E6)
948 948
949 949
950 950 def svn_opt_revision_value_t(num):
951 951 """
952 952 Put `num` into a `svn_opt_revision_value_t` structure.
953 953 """
954 954 value = svn.core.svn_opt_revision_value_t()
955 955 value.number = num
956 956 revision = svn.core.svn_opt_revision_t()
957 957 revision.kind = svn.core.svn_opt_revision_number
958 958 revision.value = value
959 959 return revision
General Comments 0
You need to be logged in to leave comments. Login now