##// END OF EJS Templates
hooks: added few python3 related fixes to handle bytes vs str on Mercurial hooks
super-admin -
r1108:62b12ad9 python3
parent child Browse files
Show More
@@ -1,738 +1,769 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # RhodeCode VCSServer provides access to different vcs backends via network.
4 4 # Copyright (C) 2014-2020 RhodeCode GmbH
5 5 #
6 6 # This program is free software; you can redistribute it and/or modify
7 7 # it under the terms of the GNU General Public License as published by
8 8 # the Free Software Foundation; either version 3 of the License, or
9 9 # (at your option) any later version.
10 10 #
11 11 # This program is distributed in the hope that it will be useful,
12 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 14 # GNU General Public License for more details.
15 15 #
16 16 # You should have received a copy of the GNU General Public License
17 17 # along with this program; if not, write to the Free Software Foundation,
18 18 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
20 20 import io
21 21 import os
22 22 import sys
23 23 import logging
24 24 import collections
25 25 import importlib
26 26 import base64
27 27 import msgpack
28 import dataclasses
29 import pygit2
28 30
29 31 from http.client import HTTPConnection
30 32
31 33
32 34 import mercurial.scmutil
33 35 import mercurial.node
34 36
35 37 from vcsserver.lib.rc_json import json
36 38 from vcsserver import exceptions, subprocessio, settings
37 from vcsserver.str_utils import safe_bytes
39 from vcsserver.str_utils import ascii_str, safe_str
40 from vcsserver.remote.git import Repository
38 41
39 42 log = logging.getLogger(__name__)
40 43
41 44
42 45 class HooksHttpClient(object):
43 46 proto = 'msgpack.v1'
44 47 connection = None
45 48
46 49 def __init__(self, hooks_uri):
47 50 self.hooks_uri = hooks_uri
48 51
49 52 def __call__(self, method, extras):
50 53 connection = HTTPConnection(self.hooks_uri)
51 54 # binary msgpack body
52 55 headers, body = self._serialize(method, extras)
53 56 try:
54 57 connection.request('POST', '/', body, headers)
55 58 except Exception as error:
56 59 log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
57 60 raise
58 61 response = connection.getresponse()
59 62 try:
60 63 return msgpack.load(response)
61 64 except Exception:
62 65 response_data = response.read()
63 66 log.exception('Failed to decode hook response json data. '
64 67 'response_code:%s, raw_data:%s',
65 68 response.status, response_data)
66 69 raise
67 70
68 71 @classmethod
69 72 def _serialize(cls, hook_name, extras):
70 73 data = {
71 74 'method': hook_name,
72 75 'extras': extras
73 76 }
74 77 headers = {
75 78 'rc-hooks-protocol': cls.proto
76 79 }
77 80 return headers, msgpack.packb(data)
78 81
79 82
80 83 class HooksDummyClient(object):
81 84 def __init__(self, hooks_module):
82 85 self._hooks_module = importlib.import_module(hooks_module)
83 86
84 87 def __call__(self, hook_name, extras):
85 88 with self._hooks_module.Hooks() as hooks:
86 89 return getattr(hooks, hook_name)(extras)
87 90
88 91
89 92 class HooksShadowRepoClient(object):
90 93
91 94 def __call__(self, hook_name, extras):
92 95 return {'output': '', 'status': 0}
93 96
94 97
95 98 class RemoteMessageWriter(object):
96 99 """Writer base class."""
97 100 def write(self, message):
98 101 raise NotImplementedError()
99 102
100 103
101 104 class HgMessageWriter(RemoteMessageWriter):
102 105 """Writer that knows how to send messages to mercurial clients."""
103 106
104 107 def __init__(self, ui):
105 108 self.ui = ui
106 109
107 def write(self, message):
110 def write(self, message: str):
108 111 # TODO: Check why the quiet flag is set by default.
109 112 old = self.ui.quiet
110 113 self.ui.quiet = False
111 114 self.ui.status(message.encode('utf-8'))
112 115 self.ui.quiet = old
113 116
114 117
115 118 class GitMessageWriter(RemoteMessageWriter):
116 119 """Writer that knows how to send messages to git clients."""
117 120
118 121 def __init__(self, stdout=None):
119 122 self.stdout = stdout or sys.stdout
120 123
121 def write(self, message):
122 self.stdout.write(safe_bytes(message))
124 def write(self, message: str):
125 self.stdout.write(message)
123 126
124 127
125 128 class SvnMessageWriter(RemoteMessageWriter):
126 129 """Writer that knows how to send messages to svn clients."""
127 130
128 131 def __init__(self, stderr=None):
129 132 # SVN needs data sent to stderr for back-to-client messaging
130 133 self.stderr = stderr or sys.stderr
131 134
132 135 def write(self, message):
133 136 self.stderr.write(message.encode('utf-8'))
134 137
135 138
136 139 def _handle_exception(result):
137 140 exception_class = result.get('exception')
138 141 exception_traceback = result.get('exception_traceback')
139 142
140 143 if exception_traceback:
141 144 log.error('Got traceback from remote call:%s', exception_traceback)
142 145
143 146 if exception_class == 'HTTPLockedRC':
144 147 raise exceptions.RepositoryLockedException()(*result['exception_args'])
145 148 elif exception_class == 'HTTPBranchProtected':
146 149 raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
147 150 elif exception_class == 'RepositoryError':
148 151 raise exceptions.VcsException()(*result['exception_args'])
149 152 elif exception_class:
150 raise Exception('Got remote exception "%s" with args "%s"' %
151 (exception_class, result['exception_args']))
153 raise Exception(
154 f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
155 )
152 156
153 157
154 158 def _get_hooks_client(extras):
155 159 hooks_uri = extras.get('hooks_uri')
156 160 is_shadow_repo = extras.get('is_shadow_repo')
157 161 if hooks_uri:
158 162 return HooksHttpClient(extras['hooks_uri'])
159 163 elif is_shadow_repo:
160 164 return HooksShadowRepoClient()
161 165 else:
162 166 return HooksDummyClient(extras['hooks_module'])
163 167
164 168
165 169 def _call_hook(hook_name, extras, writer):
166 170 hooks_client = _get_hooks_client(extras)
167 171 log.debug('Hooks, using client:%s', hooks_client)
168 172 result = hooks_client(hook_name, extras)
169 173 log.debug('Hooks got result: %s', result)
170
171 174 _handle_exception(result)
172 175 writer.write(result['output'])
173 176
174 177 return result['status']
175 178
176 179
177 180 def _extras_from_ui(ui):
178 181 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
179 182 if not hook_data:
180 183 # maybe it's inside environ ?
181 184 env_hook_data = os.environ.get('RC_SCM_DATA')
182 185 if env_hook_data:
183 186 hook_data = env_hook_data
184 187
185 188 extras = {}
186 189 if hook_data:
187 190 extras = json.loads(hook_data)
188 191 return extras
189 192
190 193
191 194 def _rev_range_hash(repo, node, check_heads=False):
192 195 from vcsserver.hgcompat import get_ctx
193 196
194 197 commits = []
195 198 revs = []
196 199 start = get_ctx(repo, node).rev()
197 200 end = len(repo)
198 201 for rev in range(start, end):
199 202 revs.append(rev)
200 203 ctx = get_ctx(repo, rev)
201 commit_id = mercurial.node.hex(ctx.node())
202 branch = ctx.branch()
204 commit_id = ascii_str(mercurial.node.hex(ctx.node()))
205 branch = safe_str(ctx.branch())
203 206 commits.append((commit_id, branch))
204 207
205 208 parent_heads = []
206 209 if check_heads:
207 210 parent_heads = _check_heads(repo, start, end, revs)
208 211 return commits, parent_heads
209 212
210 213
211 214 def _check_heads(repo, start, end, commits):
212 215 from vcsserver.hgcompat import get_ctx
213 216 changelog = repo.changelog
214 217 parents = set()
215 218
216 219 for new_rev in commits:
217 220 for p in changelog.parentrevs(new_rev):
218 221 if p == mercurial.node.nullrev:
219 222 continue
220 223 if p < start:
221 224 parents.add(p)
222 225
223 226 for p in parents:
224 227 branch = get_ctx(repo, p).branch()
225 228 # The heads descending from that parent, on the same branch
226 parent_heads = set([p])
227 reachable = set([p])
229 parent_heads = {p}
230 reachable = {p}
228 231 for x in range(p + 1, end):
229 232 if get_ctx(repo, x).branch() != branch:
230 233 continue
231 234 for pp in changelog.parentrevs(x):
232 235 if pp in reachable:
233 236 reachable.add(x)
234 237 parent_heads.discard(pp)
235 238 parent_heads.add(x)
236 239 # More than one head? Suggest merging
237 240 if len(parent_heads) > 1:
238 241 return list(parent_heads)
239 242
240 243 return []
241 244
242 245
243 246 def _get_git_env():
244 247 env = {}
245 248 for k, v in os.environ.items():
246 249 if k.startswith('GIT'):
247 250 env[k] = v
248 251
249 252 # serialized version
250 253 return [(k, v) for k, v in env.items()]
251 254
252 255
253 256 def _get_hg_env(old_rev, new_rev, txnid, repo_path):
254 257 env = {}
255 258 for k, v in os.environ.items():
256 259 if k.startswith('HG'):
257 260 env[k] = v
258 261
259 262 env['HG_NODE'] = old_rev
260 263 env['HG_NODE_LAST'] = new_rev
261 264 env['HG_TXNID'] = txnid
262 265 env['HG_PENDING'] = repo_path
263 266
264 267 return [(k, v) for k, v in env.items()]
265 268
266 269
267 270 def repo_size(ui, repo, **kwargs):
268 271 extras = _extras_from_ui(ui)
269 272 return _call_hook('repo_size', extras, HgMessageWriter(ui))
270 273
271 274
272 275 def pre_pull(ui, repo, **kwargs):
273 276 extras = _extras_from_ui(ui)
274 277 return _call_hook('pre_pull', extras, HgMessageWriter(ui))
275 278
276 279
277 280 def pre_pull_ssh(ui, repo, **kwargs):
278 281 extras = _extras_from_ui(ui)
279 282 if extras and extras.get('SSH'):
280 283 return pre_pull(ui, repo, **kwargs)
281 284 return 0
282 285
283 286
284 287 def post_pull(ui, repo, **kwargs):
285 288 extras = _extras_from_ui(ui)
286 289 return _call_hook('post_pull', extras, HgMessageWriter(ui))
287 290
288 291
289 292 def post_pull_ssh(ui, repo, **kwargs):
290 293 extras = _extras_from_ui(ui)
291 294 if extras and extras.get('SSH'):
292 295 return post_pull(ui, repo, **kwargs)
293 296 return 0
294 297
295 298
296 299 def pre_push(ui, repo, node=None, **kwargs):
297 300 """
298 301 Mercurial pre_push hook
299 302 """
300 303 extras = _extras_from_ui(ui)
301 304 detect_force_push = extras.get('detect_force_push')
302 305
303 306 rev_data = []
304 if node and kwargs.get('hooktype') == 'pretxnchangegroup':
307 hook_type: str = safe_str(kwargs.get('hooktype'))
308
309 if node and hook_type == 'pretxnchangegroup':
305 310 branches = collections.defaultdict(list)
306 311 commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
307 312 for commit_id, branch in commits:
308 313 branches[branch].append(commit_id)
309 314
310 315 for branch, commits in branches.items():
311 old_rev = kwargs.get('node_last') or commits[0]
316 old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
312 317 rev_data.append({
313 318 'total_commits': len(commits),
314 319 'old_rev': old_rev,
315 320 'new_rev': commits[-1],
316 321 'ref': '',
317 322 'type': 'branch',
318 323 'name': branch,
319 324 })
320 325
321 326 for push_ref in rev_data:
322 327 push_ref['multiple_heads'] = _heads
323 328
324 329 repo_path = os.path.join(
325 330 extras.get('repo_store', ''), extras.get('repository', ''))
326 331 push_ref['hg_env'] = _get_hg_env(
327 332 old_rev=push_ref['old_rev'],
328 new_rev=push_ref['new_rev'], txnid=kwargs.get('txnid'),
333 new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
329 334 repo_path=repo_path)
330 335
331 extras['hook_type'] = kwargs.get('hooktype', 'pre_push')
336 extras['hook_type'] = hook_type or 'pre_push'
332 337 extras['commit_ids'] = rev_data
333 338
334 339 return _call_hook('pre_push', extras, HgMessageWriter(ui))
335 340
336 341
337 342 def pre_push_ssh(ui, repo, node=None, **kwargs):
338 343 extras = _extras_from_ui(ui)
339 344 if extras.get('SSH'):
340 345 return pre_push(ui, repo, node, **kwargs)
341 346
342 347 return 0
343 348
344 349
345 350 def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
346 351 """
347 352 Mercurial pre_push hook for SSH
348 353 """
349 354 extras = _extras_from_ui(ui)
350 355 if extras.get('SSH'):
351 356 permission = extras['SSH_PERMISSIONS']
352 357
353 358 if 'repository.write' == permission or 'repository.admin' == permission:
354 359 return 0
355 360
356 361 # non-zero ret code
357 362 return 1
358 363
359 364 return 0
360 365
361 366
362 367 def post_push(ui, repo, node, **kwargs):
363 368 """
364 369 Mercurial post_push hook
365 370 """
366 371 extras = _extras_from_ui(ui)
367 372
368 373 commit_ids = []
369 374 branches = []
370 375 bookmarks = []
371 376 tags = []
377 hook_type: str = safe_str(kwargs.get('hooktype'))
372 378
373 379 commits, _heads = _rev_range_hash(repo, node)
374 380 for commit_id, branch in commits:
375 381 commit_ids.append(commit_id)
376 382 if branch not in branches:
377 383 branches.append(branch)
378 384
379 if hasattr(ui, '_rc_pushkey_branches'):
380 bookmarks = ui._rc_pushkey_branches
385 if hasattr(ui, '_rc_pushkey_bookmarks'):
386 bookmarks = ui._rc_pushkey_bookmarks
381 387
382 extras['hook_type'] = kwargs.get('hooktype', 'post_push')
388 extras['hook_type'] = hook_type or 'post_push'
383 389 extras['commit_ids'] = commit_ids
390
384 391 extras['new_refs'] = {
385 392 'branches': branches,
386 393 'bookmarks': bookmarks,
387 394 'tags': tags
388 395 }
389 396
390 397 return _call_hook('post_push', extras, HgMessageWriter(ui))
391 398
392 399
393 400 def post_push_ssh(ui, repo, node, **kwargs):
394 401 """
395 402 Mercurial post_push hook for SSH
396 403 """
397 404 if _extras_from_ui(ui).get('SSH'):
398 405 return post_push(ui, repo, node, **kwargs)
399 406 return 0
400 407
401 408
402 409 def key_push(ui, repo, **kwargs):
403 410 from vcsserver.hgcompat import get_ctx
404 if kwargs['new'] != '0' and kwargs['namespace'] == 'bookmarks':
411
412 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
405 413 # store new bookmarks in our UI object propagated later to post_push
406 ui._rc_pushkey_branches = get_ctx(repo, kwargs['key']).bookmarks()
414 ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
407 415 return
408 416
409 417
410 418 # backward compat
411 419 log_pull_action = post_pull
412 420
413 421 # backward compat
414 422 log_push_action = post_push
415 423
416 424
417 425 def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
418 426 """
419 427 Old hook name: keep here for backward compatibility.
420 428
421 429 This is only required when the installed git hooks are not upgraded.
422 430 """
423 431 pass
424 432
425 433
426 434 def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
427 435 """
428 436 Old hook name: keep here for backward compatibility.
429 437
430 438 This is only required when the installed git hooks are not upgraded.
431 439 """
432 440 pass
433 441
434 442
435 HookResponse = collections.namedtuple('HookResponse', ('status', 'output'))
443 @dataclasses.dataclass
444 class HookResponse:
445 status: int
446 output: str
436 447
437 448
438 def git_pre_pull(extras):
449 def git_pre_pull(extras) -> HookResponse:
439 450 """
440 451 Pre pull hook.
441 452
442 453 :param extras: dictionary containing the keys defined in simplevcs
443 454 :type extras: dict
444 455
445 456 :return: status code of the hook. 0 for success.
446 457 :rtype: int
447 458 """
448 459
449 460 if 'pull' not in extras['hooks']:
450 461 return HookResponse(0, '')
451 462
452 stdout = io.BytesIO()
463 stdout = io.StringIO()
453 464 try:
454 status = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
465 status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
455 466
456 467 except Exception as error:
457 468 log.exception('Failed to call pre_pull hook')
458 status = 128
459 stdout.write(safe_bytes(f'ERROR: {error}\n'))
469 status_code = 128
470 stdout.write(f'ERROR: {error}\n')
460 471
461 return HookResponse(status, stdout.getvalue())
472 return HookResponse(status_code, stdout.getvalue())
462 473
463 474
464 def git_post_pull(extras):
475 def git_post_pull(extras) -> HookResponse:
465 476 """
466 477 Post pull hook.
467 478
468 479 :param extras: dictionary containing the keys defined in simplevcs
469 480 :type extras: dict
470 481
471 482 :return: status code of the hook. 0 for success.
472 483 :rtype: int
473 484 """
474 485 if 'pull' not in extras['hooks']:
475 486 return HookResponse(0, '')
476 487
477 stdout = io.BytesIO()
488 stdout = io.StringIO()
478 489 try:
479 490 status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
480 491 except Exception as error:
481 492 status = 128
482 stdout.write(safe_bytes(f'ERROR: {error}\n'))
493 stdout.write(f'ERROR: {error}\n')
483 494
484 495 return HookResponse(status, stdout.getvalue())
485 496
486 497
487 498 def _parse_git_ref_lines(revision_lines):
488 499 rev_data = []
489 500 for revision_line in revision_lines or []:
490 501 old_rev, new_rev, ref = revision_line.strip().split(' ')
491 502 ref_data = ref.split('/', 2)
492 503 if ref_data[1] in ('tags', 'heads'):
493 504 rev_data.append({
494 505 # NOTE(marcink):
495 506 # we're unable to tell total_commits for git at this point
496 507 # but we set the variable for consistency with GIT
497 508 'total_commits': -1,
498 509 'old_rev': old_rev,
499 510 'new_rev': new_rev,
500 511 'ref': ref,
501 512 'type': ref_data[1],
502 513 'name': ref_data[2],
503 514 })
504 515 return rev_data
505 516
506 517
507 def git_pre_receive(unused_repo_path, revision_lines, env):
518 def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
508 519 """
509 520 Pre push hook.
510 521
511 :param extras: dictionary containing the keys defined in simplevcs
512 :type extras: dict
513
514 522 :return: status code of the hook. 0 for success.
515 :rtype: int
516 523 """
517 524 extras = json.loads(env['RC_SCM_DATA'])
518 525 rev_data = _parse_git_ref_lines(revision_lines)
519 526 if 'push' not in extras['hooks']:
520 527 return 0
521 528 empty_commit_id = '0' * 40
522 529
523 530 detect_force_push = extras.get('detect_force_push')
524 531
525 532 for push_ref in rev_data:
526 533 # store our git-env which holds the temp store
527 534 push_ref['git_env'] = _get_git_env()
528 535 push_ref['pruned_sha'] = ''
529 536 if not detect_force_push:
530 537 # don't check for forced-push when we don't need to
531 538 continue
532 539
533 540 type_ = push_ref['type']
534 541 new_branch = push_ref['old_rev'] == empty_commit_id
535 542 delete_branch = push_ref['new_rev'] == empty_commit_id
536 543 if type_ == 'heads' and not (new_branch or delete_branch):
537 544 old_rev = push_ref['old_rev']
538 545 new_rev = push_ref['new_rev']
539 546 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)]
540 547 stdout, stderr = subprocessio.run_command(
541 548 cmd, env=os.environ.copy())
542 549 # means we're having some non-reachable objects, this forced push was used
543 550 if stdout:
544 551 push_ref['pruned_sha'] = stdout.splitlines()
545 552
546 553 extras['hook_type'] = 'pre_receive'
547 554 extras['commit_ids'] = rev_data
548 return _call_hook('pre_push', extras, GitMessageWriter())
555
556 stdout = sys.stdout
557 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
558
559 return status_code
549 560
550 561
551 def git_post_receive(unused_repo_path, revision_lines, env):
562 def git_post_receive(unused_repo_path, revision_lines, env) -> int:
552 563 """
553 564 Post push hook.
554 565
555 :param extras: dictionary containing the keys defined in simplevcs
556 :type extras: dict
557
558 566 :return: status code of the hook. 0 for success.
559 :rtype: int
560 567 """
561 568 extras = json.loads(env['RC_SCM_DATA'])
562 569 if 'push' not in extras['hooks']:
563 570 return 0
564 571
565 572 rev_data = _parse_git_ref_lines(revision_lines)
566 573
567 574 git_revs = []
568 575
569 576 # N.B.(skreft): it is ok to just call git, as git before calling a
570 577 # subcommand sets the PATH environment variable so that it point to the
571 578 # correct version of the git executable.
572 579 empty_commit_id = '0' * 40
573 580 branches = []
574 581 tags = []
575 582 for push_ref in rev_data:
576 583 type_ = push_ref['type']
577 584
578 585 if type_ == 'heads':
586 # starting new branch case
579 587 if push_ref['old_rev'] == empty_commit_id:
580 # starting new branch case
581 if push_ref['name'] not in branches:
582 branches.append(push_ref['name'])
588 push_ref_name = push_ref['name']
583 589
584 # Fix up head revision if needed
585 cmd = [settings.GIT_EXECUTABLE, 'show', 'HEAD']
590 if push_ref_name not in branches:
591 branches.append(push_ref_name)
592
593 need_head_set = ''
594 with Repository(os.getcwd()) as repo:
586 595 try:
587 subprocessio.run_command(cmd, env=os.environ.copy())
588 except Exception:
589 push_ref_name = push_ref['name']
590 cmd = [settings.GIT_EXECUTABLE, 'symbolic-ref', '"HEAD"', f'"refs/heads/{push_ref_name}"']
596 repo.head
597 except pygit2.GitError:
598 need_head_set = f'refs/heads/{push_ref_name}'
599
600 if need_head_set:
601 repo.set_head(need_head_set)
591 602 print(f"Setting default branch to {push_ref_name}")
592 subprocessio.run_command(cmd, env=os.environ.copy())
593 603
594 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref',
595 '--format=%(refname)', 'refs/heads/*']
604 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
596 605 stdout, stderr = subprocessio.run_command(
597 606 cmd, env=os.environ.copy())
598 heads = stdout
607 heads = safe_str(stdout)
599 608 heads = heads.replace(push_ref['ref'], '')
600 609 heads = ' '.join(head for head
601 610 in heads.splitlines() if head) or '.'
602 611 cmd = [settings.GIT_EXECUTABLE, 'log', '--reverse',
603 612 '--pretty=format:%H', '--', push_ref['new_rev'],
604 613 '--not', heads]
605 614 stdout, stderr = subprocessio.run_command(
606 615 cmd, env=os.environ.copy())
607 git_revs.extend(stdout.splitlines())
616 git_revs.extend(list(map(ascii_str, stdout.splitlines())))
617
618 # delete branch case
608 619 elif push_ref['new_rev'] == empty_commit_id:
609 # delete branch case
610 620 git_revs.append('delete_branch=>%s' % push_ref['name'])
611 621 else:
612 622 if push_ref['name'] not in branches:
613 623 branches.append(push_ref['name'])
614 624
615 625 cmd = [settings.GIT_EXECUTABLE, 'log',
616 626 '{old_rev}..{new_rev}'.format(**push_ref),
617 627 '--reverse', '--pretty=format:%H']
618 628 stdout, stderr = subprocessio.run_command(
619 629 cmd, env=os.environ.copy())
620 git_revs.extend(stdout.splitlines())
630 # we get bytes from stdout, we need str to be consistent
631 log_revs = list(map(ascii_str, stdout.splitlines()))
632 git_revs.extend(log_revs)
633
634 # Pure pygit2 impl. but still 2-3x slower :/
635 # results = []
636 #
637 # with Repository(os.getcwd()) as repo:
638 # repo_new_rev = repo[push_ref['new_rev']]
639 # repo_old_rev = repo[push_ref['old_rev']]
640 # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
641 #
642 # for commit in walker:
643 # if commit.id == repo_old_rev.id:
644 # break
645 # results.append(commit.id.hex)
646 # # reverse the order, can't use GIT_SORT_REVERSE
647 # log_revs = results[::-1]
648
621 649 elif type_ == 'tags':
622 650 if push_ref['name'] not in tags:
623 651 tags.append(push_ref['name'])
624 652 git_revs.append('tag=>%s' % push_ref['name'])
625 653
626 654 extras['hook_type'] = 'post_receive'
627 655 extras['commit_ids'] = git_revs
628 656 extras['new_refs'] = {
629 657 'branches': branches,
630 658 'bookmarks': [],
631 659 'tags': tags,
632 660 }
633 661
662 stdout = sys.stdout
663
634 664 if 'repo_size' in extras['hooks']:
635 665 try:
636 _call_hook('repo_size', extras, GitMessageWriter())
666 _call_hook('repo_size', extras, GitMessageWriter(stdout))
637 667 except Exception:
638 668 pass
639 669
640 return _call_hook('post_push', extras, GitMessageWriter())
670 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
671 return status_code
641 672
642 673
643 674 def _get_extras_from_txn_id(path, txn_id):
644 675 extras = {}
645 676 try:
646 677 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
647 678 '-t', txn_id,
648 679 '--revprop', path, 'rc-scm-extras']
649 680 stdout, stderr = subprocessio.run_command(
650 681 cmd, env=os.environ.copy())
651 682 extras = json.loads(base64.urlsafe_b64decode(stdout))
652 683 except Exception:
653 684 log.exception('Failed to extract extras info from txn_id')
654 685
655 686 return extras
656 687
657 688
658 689 def _get_extras_from_commit_id(commit_id, path):
659 690 extras = {}
660 691 try:
661 692 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
662 693 '-r', commit_id,
663 694 '--revprop', path, 'rc-scm-extras']
664 695 stdout, stderr = subprocessio.run_command(
665 696 cmd, env=os.environ.copy())
666 697 extras = json.loads(base64.urlsafe_b64decode(stdout))
667 698 except Exception:
668 699 log.exception('Failed to extract extras info from commit_id')
669 700
670 701 return extras
671 702
672 703
673 704 def svn_pre_commit(repo_path, commit_data, env):
674 705 path, txn_id = commit_data
675 706 branches = []
676 707 tags = []
677 708
678 709 if env.get('RC_SCM_DATA'):
679 710 extras = json.loads(env['RC_SCM_DATA'])
680 711 else:
681 712 # fallback method to read from TXN-ID stored data
682 713 extras = _get_extras_from_txn_id(path, txn_id)
683 714 if not extras:
684 715 return 0
685 716
686 717 extras['hook_type'] = 'pre_commit'
687 718 extras['commit_ids'] = [txn_id]
688 719 extras['txn_id'] = txn_id
689 720 extras['new_refs'] = {
690 721 'total_commits': 1,
691 722 'branches': branches,
692 723 'bookmarks': [],
693 724 'tags': tags,
694 725 }
695 726
696 727 return _call_hook('pre_push', extras, SvnMessageWriter())
697 728
698 729
699 730 def svn_post_commit(repo_path, commit_data, env):
700 731 """
701 732 commit_data is path, rev, txn_id
702 733 """
703 734 if len(commit_data) == 3:
704 735 path, commit_id, txn_id = commit_data
705 736 elif len(commit_data) == 2:
706 737 log.error('Failed to extract txn_id from commit_data using legacy method. '
707 738 'Some functionality might be limited')
708 739 path, commit_id = commit_data
709 740 txn_id = None
710 741
711 742 branches = []
712 743 tags = []
713 744
714 745 if env.get('RC_SCM_DATA'):
715 746 extras = json.loads(env['RC_SCM_DATA'])
716 747 else:
717 748 # fallback method to read from TXN-ID stored data
718 749 extras = _get_extras_from_commit_id(commit_id, path)
719 750 if not extras:
720 751 return 0
721 752
722 753 extras['hook_type'] = 'post_commit'
723 754 extras['commit_ids'] = [commit_id]
724 755 extras['txn_id'] = txn_id
725 756 extras['new_refs'] = {
726 757 'branches': branches,
727 758 'bookmarks': [],
728 759 'tags': tags,
729 760 'total_commits': 1,
730 761 }
731 762
732 763 if 'repo_size' in extras['hooks']:
733 764 try:
734 765 _call_hook('repo_size', extras, SvnMessageWriter())
735 766 except Exception:
736 767 pass
737 768
738 769 return _call_hook('post_push', extras, SvnMessageWriter())
@@ -1,413 +1,414 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Handles the Git smart protocol."""
19 19
20 20 import os
21 21 import socket
22 22 import logging
23 23
24 24 import dulwich.protocol
25 25 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
26 26 from webob import Request, Response, exc
27 27
28 28 from vcsserver.lib.rc_json import json
29 29 from vcsserver import hooks, subprocessio
30 30 from vcsserver.str_utils import ascii_bytes
31 31
32 32
33 33 log = logging.getLogger(__name__)
34 34
35 35
36 36 class FileWrapper(object):
37 37 """File wrapper that ensures how much data is read from it."""
38 38
39 39 def __init__(self, fd, content_length):
40 40 self.fd = fd
41 41 self.content_length = content_length
42 42 self.remain = content_length
43 43
44 44 def read(self, size):
45 45 if size <= self.remain:
46 46 try:
47 47 data = self.fd.read(size)
48 48 except socket.error:
49 49 raise IOError(self)
50 50 self.remain -= size
51 51 elif self.remain:
52 52 data = self.fd.read(self.remain)
53 53 self.remain = 0
54 54 else:
55 55 data = None
56 56 return data
57 57
58 58 def __repr__(self):
59 59 return '<FileWrapper %s len: %s, read: %s>' % (
60 60 self.fd, self.content_length, self.content_length - self.remain
61 61 )
62 62
63 63
64 64 class GitRepository(object):
65 65 """WSGI app for handling Git smart protocol endpoints."""
66 66
67 67 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
68 68 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
69 69 valid_accepts = frozenset(('application/x-{}-result'.format(c) for c in commands))
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 73 b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
74 74 b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 76 FLUSH_PACKET = b"0000"
77 77
78 78 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
79 79
80 80 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
81 81 files = frozenset(f.lower() for f in os.listdir(content_path))
82 82 valid_dir_signature = self.git_folder_signature.issubset(files)
83 83
84 84 if not valid_dir_signature:
85 85 raise OSError('%s missing git signature' % content_path)
86 86
87 87 self.content_path = content_path
88 88 self.repo_name = repo_name
89 89 self.extras = extras
90 90 self.git_path = git_path
91 91 self.update_server_info = update_server_info
92 92
93 93 def _get_fixedpath(self, path):
94 94 """
95 95 Small fix for repo_path
96 96
97 97 :param path:
98 98 """
99 99 path = path.split(self.repo_name, 1)[-1]
100 100 if path.startswith('.git'):
101 101 # for bare repos we still get the .git prefix inside, we skip it
102 102 # here, and remove from the service command
103 103 path = path[4:]
104 104
105 105 return path.strip('/')
106 106
107 107 def inforefs(self, request, unused_environ):
108 108 """
109 109 WSGI Response producer for HTTP GET Git Smart
110 110 HTTP /info/refs request.
111 111 """
112 112
113 113 git_command = request.GET.get('service')
114 114 if git_command not in self.commands:
115 115 log.debug('command %s not allowed', git_command)
116 116 return exc.HTTPForbidden()
117 117
118 118 # please, resist the urge to add '\n' to git capture and increment
119 119 # line count by 1.
120 120 # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
121 121 # a part of protocol.
122 122 # The code in Git client not only does NOT need '\n', but actually
123 123 # blows up if you sprinkle "flush" (0000) as "0001\n".
124 124 # It reads binary, per number of bytes specified.
125 125 # if you do add '\n' as part of data, count it.
126 126 server_advert = '# service=%s\n' % git_command
127 127 packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
128 128 try:
129 129 gitenv = dict(os.environ)
130 130 # forget all configs
131 131 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
132 132 command = [self.git_path, git_command[4:], '--stateless-rpc',
133 133 '--advertise-refs', self.content_path]
134 134 out = subprocessio.SubprocessIOChunker(
135 135 command,
136 136 env=gitenv,
137 137 starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
138 138 shell=False
139 139 )
140 140 except OSError:
141 141 log.exception('Error processing command')
142 142 raise exc.HTTPExpectationFailed()
143 143
144 144 resp = Response()
145 145 resp.content_type = f'application/x-{git_command}-advertisement'
146 146 resp.charset = None
147 147 resp.app_iter = out
148 148
149 149 return resp
150 150
151 151 def _get_want_capabilities(self, request):
152 152 """Read the capabilities found in the first want line of the request."""
153 153 pos = request.body_file_seekable.tell()
154 154 first_line = request.body_file_seekable.readline()
155 155 request.body_file_seekable.seek(pos)
156 156
157 157 return frozenset(
158 158 dulwich.protocol.extract_want_line_capabilities(first_line)[1])
159 159
160 160 def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
161 161 """
162 162 Construct a response with an empty PACK file.
163 163
164 164 We use an empty PACK file, as that would trigger the failure of the pull
165 165 or clone command.
166 166
167 167 We also print in the error output a message explaining why the command
168 168 was aborted.
169 169
170 170 If additionally, the user is accepting messages we send them the output
171 171 of the pre-pull hook.
172 172
173 173 Note that for clients not supporting side-band we just send them the
174 174 emtpy PACK file.
175 175 """
176 176
177 177 if self.SIDE_BAND_CAPS.intersection(capabilities):
178 178 response = []
179 179 proto = dulwich.protocol.Protocol(None, response.append)
180 180 proto.write_pkt_line(dulwich.protocol.NAK_LINE)
181 181
182 182 self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
183 183 # N.B.(skreft): Do not change the sideband channel to 3, as that
184 184 # produces a fatal error in the client:
185 185 # fatal: error in sideband demultiplexer
186 186 proto.write_sideband(
187 187 dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
188 188 ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
189 189 proto.write_sideband(
190 190 dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
191 191 ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
192 192
193 193 # writes b"0000" as default
194 194 proto.write_pkt_line(None)
195 195
196 196 return response
197 197 else:
198 198 return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
199 199
200 200 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
201 201 """
202 202 Given a list response we inject the post-pull messages.
203 203
204 204 We only inject the messages if the client supports sideband, and the
205 205 response has the format:
206 206 0008NAK\n...0000
207 207
208 208 Note that we do not check the no-progress capability as by default, git
209 209 sends it, which effectively would block all messages.
210 210 """
211 211
212 212 if not self.SIDE_BAND_CAPS.intersection(capabilities):
213 213 return response
214 214
215 215 if not start_message and not end_message:
216 216 return response
217 217
218 218 try:
219 219 iter(response)
220 220 # iterator probably will work, we continue
221 221 except TypeError:
222 222 raise TypeError(f'response must be an iterator: got {type(response)}')
223 223 if isinstance(response, (list, tuple)):
224 224 raise TypeError(f'response must be an iterator: got {type(response)}')
225 225
226 226 def injected_response():
227 227
228 228 do_loop = 1
229 229 header_injected = 0
230 230 next_item = None
231 231 has_item = False
232 232 while do_loop:
233 233
234 234 try:
235 235 next_item = next(response)
236 236 except StopIteration:
237 237 do_loop = 0
238 238
239 239 if has_item:
240 240 # last item ! alter it now
241 241 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
242 242 new_response = [item[:-4]]
243 243 new_response.extend(self._get_messages(end_message, capabilities))
244 244 new_response.append(self.FLUSH_PACKET)
245 245 item = b''.join(new_response)
246 246
247 247 yield item
248 248 has_item = True
249 249 item = next_item
250 250
251 251 # alter item if it's the initial chunk
252 252 if not header_injected and item.startswith(b'0008NAK\n'):
253 253 new_response = [b'0008NAK\n']
254 254 new_response.extend(self._get_messages(start_message, capabilities))
255 255 new_response.append(item[8:])
256 256 item = b''.join(new_response)
257 257 header_injected = 1
258 258
259 259 return injected_response()
260 260
261 261 def _write_sideband_to_proto(self, proto, data, capabilities):
262 262 """
263 263 Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
264 264
265 265 We do not use dulwich's write_sideband directly as it only supports
266 266 side-band-64k.
267 267 """
268 268 if not data:
269 269 return
270 270
271 271 # N.B.(skreft): The values below are explained in the pack protocol
272 272 # documentation, section Packfile Data.
273 273 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
274 274 if CAPABILITY_SIDE_BAND_64K in capabilities:
275 275 chunk_size = 65515
276 276 elif CAPABILITY_SIDE_BAND in capabilities:
277 277 chunk_size = 995
278 278 else:
279 279 return
280 280
281 281 chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
282 282
283 283 for chunk in chunker:
284 284 proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
285 285
286 286 def _get_messages(self, data, capabilities):
287 287 """Return a list with packets for sending data in sideband number 2."""
288 288 response = []
289 289 proto = dulwich.protocol.Protocol(None, response.append)
290 290
291 291 self._write_sideband_to_proto(proto, data, capabilities)
292 292
293 293 return response
294 294
295 295 def backend(self, request, environ):
296 296 """
297 297 WSGI Response producer for HTTP POST Git Smart HTTP requests.
298 298 Reads commands and data from HTTP POST's body.
299 299 returns an iterator obj with contents of git command's
300 300 response to stdout
301 301 """
302 302 # TODO(skreft): think how we could detect an HTTPLockedException, as
303 303 # we probably want to have the same mechanism used by mercurial and
304 304 # simplevcs.
305 305 # For that we would need to parse the output of the command looking for
306 306 # some signs of the HTTPLockedError, parse the data and reraise it in
307 307 # pygrack. However, that would interfere with the streaming.
308 308 #
309 309 # Now the output of a blocked push is:
310 310 # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
311 311 # POST git-receive-pack (1047 bytes)
312 312 # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
313 313 # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
314 314 # ! [remote rejected] master -> master (pre-receive hook declined)
315 315 # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
316 316
317 317 git_command = self._get_fixedpath(request.path_info)
318 318 if git_command not in self.commands:
319 319 log.debug('command %s not allowed', git_command)
320 320 return exc.HTTPForbidden()
321 321
322 322 capabilities = None
323 323 if git_command == 'git-upload-pack':
324 324 capabilities = self._get_want_capabilities(request)
325 325
326 326 if 'CONTENT_LENGTH' in environ:
327 327 inputstream = FileWrapper(request.body_file_seekable,
328 328 request.content_length)
329 329 else:
330 330 inputstream = request.body_file_seekable
331 331
332 332 resp = Response()
333 333 resp.content_type = 'application/x-{}-result'.format(git_command)
334 334 resp.charset = None
335 335
336 336 pre_pull_messages = ''
337 337 # Upload-pack == clone
338 338 if git_command == 'git-upload-pack':
339 status, pre_pull_messages = hooks.git_pre_pull(self.extras)
340 if status != 0:
339 hook_response = hooks.git_pre_pull(self.extras)
340 if hook_response.status != 0:
341 pre_pull_messages = hook_response.output
341 342 resp.app_iter = self._build_failed_pre_pull_response(
342 343 capabilities, pre_pull_messages)
343 344 return resp
344 345
345 346 gitenv = dict(os.environ)
346 347 # forget all configs
347 348 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
348 349 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
349 350 cmd = [self.git_path, git_command[4:], '--stateless-rpc',
350 351 self.content_path]
351 352 log.debug('handling cmd %s', cmd)
352 353
353 354 out = subprocessio.SubprocessIOChunker(
354 355 cmd,
355 356 input_stream=inputstream,
356 357 env=gitenv,
357 358 cwd=self.content_path,
358 359 shell=False,
359 360 fail_on_stderr=False,
360 361 fail_on_return_code=False
361 362 )
362 363
363 364 if self.update_server_info and git_command == 'git-receive-pack':
364 365 # We need to fully consume the iterator here, as the
365 366 # update-server-info command needs to be run after the push.
366 367 out = list(out)
367 368
368 369 # Updating refs manually after each push.
369 370 # This is required as some clients are exposing Git repos internally
370 371 # with the dumb protocol.
371 372 cmd = [self.git_path, 'update-server-info']
372 373 log.debug('handling cmd %s', cmd)
373 374 output = subprocessio.SubprocessIOChunker(
374 375 cmd,
375 376 input_stream=inputstream,
376 377 env=gitenv,
377 378 cwd=self.content_path,
378 379 shell=False,
379 380 fail_on_stderr=False,
380 381 fail_on_return_code=False
381 382 )
382 383 # Consume all the output so the subprocess finishes
383 384 for _ in output:
384 385 pass
385 386
386 387 # Upload-pack == clone
387 388 if git_command == 'git-upload-pack':
388 unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
389
389 hook_response = hooks.git_post_pull(self.extras)
390 post_pull_messages = hook_response.output
390 391 resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
391 392 else:
392 393 resp.app_iter = out
393 394
394 395 return resp
395 396
396 397 def __call__(self, environ, start_response):
397 398 request = Request(environ)
398 399 _path = self._get_fixedpath(request.path_info)
399 400 if _path.startswith('info/refs'):
400 401 app = self.inforefs
401 402 else:
402 403 app = self.backend
403 404
404 405 try:
405 406 resp = app(request, environ)
406 407 except exc.HTTPException as error:
407 408 log.exception('HTTP Error')
408 409 resp = error
409 410 except Exception:
410 411 log.exception('Unknown error')
411 412 resp = exc.HTTPInternalServerError()
412 413
413 414 return resp(environ, start_response)
@@ -1,1374 +1,1375 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 import urllib.request, urllib.parse, urllib.error
26 import urllib.request, urllib.error, urllib.parse
25 import urllib.request
26 import urllib.parse
27 import urllib.error
27 28 from functools import wraps
28 29
29 30 import more_itertools
30 31 import pygit2
31 32 from pygit2 import Repository as LibGit2Repo
32 33 from pygit2 import index as LibGit2Index
33 34 from dulwich import index, objects
34 35 from dulwich.client import HttpGitClient, LocalGitClient
35 36 from dulwich.errors import (
36 37 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 38 MissingCommitError, ObjectMissing, HangupException,
38 39 UnexpectedCommandError)
39 40 from dulwich.repo import Repo as DulwichRepo
40 41 from dulwich.server import update_server_info
41 42
42 43 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_str, ascii_bytes
44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
44 45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
45 46 from vcsserver.hgcompat import (
46 47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 48 from vcsserver.git_lfs.lib import LFSOidStore
48 49 from vcsserver.vcs_base import RemoteBase
49 50
50 51 DIR_STAT = stat.S_IFDIR
51 52 FILE_MODE = stat.S_IFMT
52 53 GIT_LINK = objects.S_IFGITLINK
53 54 PEELED_REF_MARKER = b'^{}'
54 55 HEAD_MARKER = b'HEAD'
55 56
56 57 log = logging.getLogger(__name__)
57 58
58 59
59 60 def reraise_safe_exceptions(func):
60 61 """Converts Dulwich exceptions to something neutral."""
61 62
62 63 @wraps(func)
63 64 def wrapper(*args, **kwargs):
64 65 try:
65 66 return func(*args, **kwargs)
66 67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 68 exc = exceptions.LookupException(org_exc=e)
68 69 raise exc(safe_str(e))
69 70 except (HangupException, UnexpectedCommandError) as e:
70 71 exc = exceptions.VcsException(org_exc=e)
71 72 raise exc(safe_str(e))
72 except Exception as e:
73 except Exception:
73 74 # NOTE(marcink): because of how dulwich handles some exceptions
74 75 # (KeyError on empty repos), we cannot track this and catch all
75 76 # exceptions, it's an exceptions from other handlers
76 77 #if not hasattr(e, '_vcs_kind'):
77 78 #log.exception("Unhandled exception in git remote call")
78 79 #raise_from_original(exceptions.UnhandledException)
79 80 raise
80 81 return wrapper
81 82
82 83
83 84 class Repo(DulwichRepo):
84 85 """
85 86 A wrapper for dulwich Repo class.
86 87
87 88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 89 "Too many open files" error. We need to close all opened file descriptors
89 90 once the repo object is destroyed.
90 91 """
91 92 def __del__(self):
92 93 if hasattr(self, 'object_store'):
93 94 self.close()
94 95
95 96
96 97 class Repository(LibGit2Repo):
97 98
98 99 def __enter__(self):
99 100 return self
100 101
101 102 def __exit__(self, exc_type, exc_val, exc_tb):
102 103 self.free()
103 104
104 105
105 106 class GitFactory(RepoFactory):
106 107 repo_type = 'git'
107 108
108 109 def _create_repo(self, wire, create, use_libgit2=False):
109 110 if use_libgit2:
110 return Repository(safe_bytes(wire['path']))
111 repo = Repository(safe_bytes(wire['path']))
111 112 else:
112 113 # dulwich mode
113 114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 115 repo = Repo(repo_path)
115 116
116 117 log.debug('repository created: got GIT object: %s', repo)
117 118 return repo
118 119
119 120 def repo(self, wire, create=False, use_libgit2=False):
120 121 """
121 122 Get a repository instance for the given path.
122 123 """
123 124 return self._create_repo(wire, create, use_libgit2)
124 125
125 126 def repo_libgit2(self, wire):
126 127 return self.repo(wire, use_libgit2=True)
127 128
128 129
129 130 class GitRemote(RemoteBase):
130 131
131 132 def __init__(self, factory):
132 133 self._factory = factory
133 134 self._bulk_methods = {
134 135 "date": self.date,
135 136 "author": self.author,
136 137 "branch": self.branch,
137 138 "message": self.message,
138 139 "parents": self.parents,
139 140 "_commit": self.revision,
140 141 }
141 142
142 143 def _wire_to_config(self, wire):
143 144 if 'config' in wire:
144 145 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
145 146 return {}
146 147
147 148 def _remote_conf(self, config):
148 149 params = [
149 150 '-c', 'core.askpass=""',
150 151 ]
151 152 ssl_cert_dir = config.get('vcs_ssl_dir')
152 153 if ssl_cert_dir:
153 154 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
154 155 return params
155 156
156 157 @reraise_safe_exceptions
157 158 def discover_git_version(self):
158 159 stdout, _ = self.run_git_command(
159 160 {}, ['--version'], _bare=True, _safe=True)
160 161 prefix = b'git version'
161 162 if stdout.startswith(prefix):
162 163 stdout = stdout[len(prefix):]
163 164 return safe_str(stdout.strip())
164 165
165 166 @reraise_safe_exceptions
166 167 def is_empty(self, wire):
167 168 repo_init = self._factory.repo_libgit2(wire)
168 169 with repo_init as repo:
169 170
170 171 try:
171 172 has_head = repo.head.name
172 173 if has_head:
173 174 return False
174 175
175 176 # NOTE(marcink): check again using more expensive method
176 177 return repo.is_empty
177 178 except Exception:
178 179 pass
179 180
180 181 return True
181 182
182 183 @reraise_safe_exceptions
183 184 def assert_correct_path(self, wire):
184 185 cache_on, context_uid, repo_id = self._cache_on(wire)
185 186 region = self._region(wire)
186 187
187 188 @region.conditional_cache_on_arguments(condition=cache_on)
188 189 def _assert_correct_path(_context_uid, _repo_id):
189 190 try:
190 191 repo_init = self._factory.repo_libgit2(wire)
191 192 with repo_init as repo:
192 193 pass
193 194 except pygit2.GitError:
194 195 path = wire.get('path')
195 196 tb = traceback.format_exc()
196 197 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
197 198 return False
198 199
199 200 return True
200 201 return _assert_correct_path(context_uid, repo_id)
201 202
202 203 @reraise_safe_exceptions
203 204 def bare(self, wire):
204 205 repo_init = self._factory.repo_libgit2(wire)
205 206 with repo_init as repo:
206 207 return repo.is_bare
207 208
208 209 @reraise_safe_exceptions
209 210 def blob_as_pretty_string(self, wire, sha):
210 211 repo_init = self._factory.repo_libgit2(wire)
211 212 with repo_init as repo:
212 213 blob_obj = repo[sha]
213 214 return BinaryEnvelope(blob_obj.data)
214 215
215 216 @reraise_safe_exceptions
216 217 def blob_raw_length(self, wire, sha):
217 218 cache_on, context_uid, repo_id = self._cache_on(wire)
218 219 region = self._region(wire)
219 220
220 221 @region.conditional_cache_on_arguments(condition=cache_on)
221 222 def _blob_raw_length(_repo_id, _sha):
222 223
223 224 repo_init = self._factory.repo_libgit2(wire)
224 225 with repo_init as repo:
225 226 blob = repo[sha]
226 227 return blob.size
227 228
228 229 return _blob_raw_length(repo_id, sha)
229 230
230 231 def _parse_lfs_pointer(self, raw_content):
231 232 spec_string = b'version https://git-lfs.github.com/spec'
232 233 if raw_content and raw_content.startswith(spec_string):
233 234
234 235 pattern = re.compile(rb"""
235 236 (?:\n)?
236 237 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
237 238 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
238 239 ^size[ ](?P<oid_size>[0-9]+)\n
239 240 (?:\n)?
240 241 """, re.VERBOSE | re.MULTILINE)
241 242 match = pattern.match(raw_content)
242 243 if match:
243 244 return match.groupdict()
244 245
245 246 return {}
246 247
247 248 @reraise_safe_exceptions
248 249 def is_large_file(self, wire, commit_id):
249 250 cache_on, context_uid, repo_id = self._cache_on(wire)
250 251 region = self._region(wire)
251 252
252 253 @region.conditional_cache_on_arguments(condition=cache_on)
253 254 def _is_large_file(_repo_id, _sha):
254 255 repo_init = self._factory.repo_libgit2(wire)
255 256 with repo_init as repo:
256 257 blob = repo[commit_id]
257 258 if blob.is_binary:
258 259 return {}
259 260
260 261 return self._parse_lfs_pointer(blob.data)
261 262
262 263 return _is_large_file(repo_id, commit_id)
263 264
264 265 @reraise_safe_exceptions
265 266 def is_binary(self, wire, tree_id):
266 267 cache_on, context_uid, repo_id = self._cache_on(wire)
267 268 region = self._region(wire)
268 269
269 270 @region.conditional_cache_on_arguments(condition=cache_on)
270 271 def _is_binary(_repo_id, _tree_id):
271 272 repo_init = self._factory.repo_libgit2(wire)
272 273 with repo_init as repo:
273 274 blob_obj = repo[tree_id]
274 275 return blob_obj.is_binary
275 276
276 277 return _is_binary(repo_id, tree_id)
277 278
278 279 @reraise_safe_exceptions
279 280 def md5_hash(self, wire, tree_id):
280 281 cache_on, context_uid, repo_id = self._cache_on(wire)
281 282 region = self._region(wire)
282 283
283 284 @region.conditional_cache_on_arguments(condition=cache_on)
284 285 def _md5_hash(_repo_id, _tree_id):
285 286 return ''
286 287
287 288 return _md5_hash(repo_id, tree_id)
288 289
289 290 @reraise_safe_exceptions
290 291 def in_largefiles_store(self, wire, oid):
291 292 conf = self._wire_to_config(wire)
292 293 repo_init = self._factory.repo_libgit2(wire)
293 294 with repo_init as repo:
294 295 repo_name = repo.path
295 296
296 297 store_location = conf.get('vcs_git_lfs_store_location')
297 298 if store_location:
298 299
299 300 store = LFSOidStore(
300 301 oid=oid, repo=repo_name, store_location=store_location)
301 302 return store.has_oid()
302 303
303 304 return False
304 305
305 306 @reraise_safe_exceptions
306 307 def store_path(self, wire, oid):
307 308 conf = self._wire_to_config(wire)
308 309 repo_init = self._factory.repo_libgit2(wire)
309 310 with repo_init as repo:
310 311 repo_name = repo.path
311 312
312 313 store_location = conf.get('vcs_git_lfs_store_location')
313 314 if store_location:
314 315 store = LFSOidStore(
315 316 oid=oid, repo=repo_name, store_location=store_location)
316 317 return store.oid_path
317 318 raise ValueError('Unable to fetch oid with path {}'.format(oid))
318 319
319 320 @reraise_safe_exceptions
320 321 def bulk_request(self, wire, rev, pre_load):
321 322 cache_on, context_uid, repo_id = self._cache_on(wire)
322 323 region = self._region(wire)
323 324
324 325 @region.conditional_cache_on_arguments(condition=cache_on)
325 326 def _bulk_request(_repo_id, _rev, _pre_load):
326 327 result = {}
327 328 for attr in pre_load:
328 329 try:
329 330 method = self._bulk_methods[attr]
330 331 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
331 332 args = [wire, rev]
332 333 result[attr] = method(*args)
333 334 except KeyError as e:
334 335 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
335 336 return result
336 337
337 338 return _bulk_request(repo_id, rev, sorted(pre_load))
338 339
339 340 def _build_opener(self, url):
340 341 handlers = []
341 342 url_obj = url_parser(url)
342 343 _, authinfo = url_obj.authinfo()
343 344
344 345 if authinfo:
345 346 # create a password manager
346 347 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
347 348 passmgr.add_password(*authinfo)
348 349
349 350 handlers.extend((httpbasicauthhandler(passmgr),
350 351 httpdigestauthhandler(passmgr)))
351 352
352 353 return urllib.request.build_opener(*handlers)
353 354
354 355 def _type_id_to_name(self, type_id: int):
355 356 return {
356 357 1: 'commit',
357 358 2: 'tree',
358 359 3: 'blob',
359 360 4: 'tag'
360 361 }[type_id]
361 362
362 363 @reraise_safe_exceptions
363 364 def check_url(self, url, config):
364 365 url_obj = url_parser(safe_bytes(url))
365 366 test_uri, _ = url_obj.authinfo()
366 367 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
367 368 url_obj.query = obfuscate_qs(url_obj.query)
368 369 cleaned_uri = str(url_obj)
369 370 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
370 371
371 372 if not test_uri.endswith('info/refs'):
372 373 test_uri = test_uri.rstrip('/') + '/info/refs'
373 374
374 375 o = self._build_opener(url)
375 376 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
376 377
377 378 q = {"service": 'git-upload-pack'}
378 379 qs = '?%s' % urllib.parse.urlencode(q)
379 380 cu = "%s%s" % (test_uri, qs)
380 381 req = urllib.request.Request(cu, None, {})
381 382
382 383 try:
383 384 log.debug("Trying to open URL %s", cleaned_uri)
384 385 resp = o.open(req)
385 386 if resp.code != 200:
386 387 raise exceptions.URLError()('Return Code is not 200')
387 388 except Exception as e:
388 389 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
389 390 # means it cannot be cloned
390 391 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
391 392
392 393 # now detect if it's proper git repo
393 394 gitdata = resp.read()
394 395 if 'service=git-upload-pack' in gitdata:
395 396 pass
396 397 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
397 398 # old style git can return some other format !
398 399 pass
399 400 else:
400 401 raise exceptions.URLError()(
401 402 "url [%s] does not look like an git" % (cleaned_uri,))
402 403
403 404 return True
404 405
405 406 @reraise_safe_exceptions
406 407 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
407 408 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
408 409 remote_refs = self.pull(wire, url, apply_refs=False)
409 410 repo = self._factory.repo(wire)
410 411 if isinstance(valid_refs, list):
411 412 valid_refs = tuple(valid_refs)
412 413
413 414 for k in remote_refs:
414 415 # only parse heads/tags and skip so called deferred tags
415 416 if k.startswith(valid_refs) and not k.endswith(deferred):
416 417 repo[k] = remote_refs[k]
417 418
418 419 if update_after_clone:
419 420 # we want to checkout HEAD
420 421 repo["HEAD"] = remote_refs["HEAD"]
421 422 index.build_index_from_tree(repo.path, repo.index_path(),
422 423 repo.object_store, repo["HEAD"].tree)
423 424
424 425 @reraise_safe_exceptions
425 426 def branch(self, wire, commit_id):
426 427 cache_on, context_uid, repo_id = self._cache_on(wire)
427 428 region = self._region(wire)
428 429
429 430 @region.conditional_cache_on_arguments(condition=cache_on)
430 431 def _branch(_context_uid, _repo_id, _commit_id):
431 432 regex = re.compile('^refs/heads')
432 433
433 434 def filter_with(ref):
434 435 return regex.match(ref[0]) and ref[1] == _commit_id
435 436
436 437 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
437 438 return [x[0].split('refs/heads/')[-1] for x in branches]
438 439
439 440 return _branch(context_uid, repo_id, commit_id)
440 441
441 442 @reraise_safe_exceptions
442 443 def commit_branches(self, wire, commit_id):
443 444 cache_on, context_uid, repo_id = self._cache_on(wire)
444 445 region = self._region(wire)
445 446
446 447 @region.conditional_cache_on_arguments(condition=cache_on)
447 448 def _commit_branches(_context_uid, _repo_id, _commit_id):
448 449 repo_init = self._factory.repo_libgit2(wire)
449 450 with repo_init as repo:
450 451 branches = [x for x in repo.branches.with_commit(_commit_id)]
451 452 return branches
452 453
453 454 return _commit_branches(context_uid, repo_id, commit_id)
454 455
455 456 @reraise_safe_exceptions
456 457 def add_object(self, wire, content):
457 458 repo_init = self._factory.repo_libgit2(wire)
458 459 with repo_init as repo:
459 460 blob = objects.Blob()
460 461 blob.set_raw_string(content)
461 462 repo.object_store.add_object(blob)
462 463 return blob.id
463 464
464 465 # TODO: this is quite complex, check if that can be simplified
465 466 @reraise_safe_exceptions
466 467 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
467 468 # Defines the root tree
468 469 class _Root(object):
469 470 def __repr__(self):
470 471 return 'ROOT TREE'
471 472 ROOT = _Root()
472 473
473 474 repo = self._factory.repo(wire)
474 475 object_store = repo.object_store
475 476
476 477 # Create tree and populates it with blobs
477 478 if commit_tree:
478 479 commit_tree = safe_bytes(commit_tree)
479 480
480 481 if commit_tree and repo[commit_tree]:
481 482 git_commit = repo[safe_bytes(commit_data['parents'][0])]
482 483 commit_tree = repo[git_commit.tree] # root tree
483 484 else:
484 485 commit_tree = objects.Tree()
485 486
486 487 for node in updated:
487 488 # Compute subdirs if needed
488 489 dirpath, nodename = vcspath.split(node['path'])
489 490 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
490 491 parent = commit_tree
491 492 ancestors = [('', parent)]
492 493
493 494 # Tries to dig for the deepest existing tree
494 495 while dirnames:
495 496 curdir = dirnames.pop(0)
496 497 try:
497 498 dir_id = parent[curdir][1]
498 499 except KeyError:
499 500 # put curdir back into dirnames and stops
500 501 dirnames.insert(0, curdir)
501 502 break
502 503 else:
503 504 # If found, updates parent
504 505 parent = repo[dir_id]
505 506 ancestors.append((curdir, parent))
506 507 # Now parent is deepest existing tree and we need to create
507 508 # subtrees for dirnames (in reverse order)
508 509 # [this only applies for nodes from added]
509 510 new_trees = []
510 511
511 512 blob = objects.Blob.from_string(node['content'])
512 513
513 514 node_path = safe_bytes(node['node_path'])
514 515
515 516 if dirnames:
516 517 # If there are trees which should be created we need to build
517 518 # them now (in reverse order)
518 519 reversed_dirnames = list(reversed(dirnames))
519 520 curtree = objects.Tree()
520 521 curtree[node_path] = node['mode'], blob.id
521 522 new_trees.append(curtree)
522 523 for dirname in reversed_dirnames[:-1]:
523 524 newtree = objects.Tree()
524 525 newtree[dirname] = (DIR_STAT, curtree.id)
525 526 new_trees.append(newtree)
526 527 curtree = newtree
527 528 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
528 529 else:
529 530 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
530 531
531 532 new_trees.append(parent)
532 533 # Update ancestors
533 534 reversed_ancestors = reversed(
534 535 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
535 536 for parent, tree, path in reversed_ancestors:
536 537 parent[path] = (DIR_STAT, tree.id)
537 538 object_store.add_object(tree)
538 539
539 540 object_store.add_object(blob)
540 541 for tree in new_trees:
541 542 object_store.add_object(tree)
542 543
543 544 for node_path in removed:
544 545 paths = node_path.split('/')
545 546 tree = commit_tree # start with top-level
546 547 trees = [{'tree': tree, 'path': ROOT}]
547 548 # Traverse deep into the forest...
548 549 # resolve final tree by iterating the path.
549 550 # e.g a/b/c.txt will get
550 551 # - root as tree then
551 552 # - 'a' as tree,
552 553 # - 'b' as tree,
553 554 # - stop at c as blob.
554 555 for path in paths:
555 556 try:
556 557 obj = repo[tree[path][1]]
557 558 if isinstance(obj, objects.Tree):
558 559 trees.append({'tree': obj, 'path': path})
559 560 tree = obj
560 561 except KeyError:
561 562 break
562 563 #PROBLEM:
563 564 """
564 565 We're not editing same reference tree object
565 566 """
566 567 # Cut down the blob and all rotten trees on the way back...
567 568 for path, tree_data in reversed(list(zip(paths, trees))):
568 569 tree = tree_data['tree']
569 570 tree.__delitem__(path)
570 571 # This operation edits the tree, we need to mark new commit back
571 572
572 573 if len(tree) > 0:
573 574 # This tree still has elements - don't remove it or any
574 575 # of it's parents
575 576 break
576 577
577 578 object_store.add_object(commit_tree)
578 579
579 580 # Create commit
580 581 commit = objects.Commit()
581 582 commit.tree = commit_tree.id
582 583 bytes_keys = [
583 584 'author',
584 585 'committer',
585 586 'message',
586 587 'encoding',
587 588 'parents'
588 589 ]
589 590
590 591 for k, v in commit_data.items():
591 592 if k in bytes_keys:
592 593 if k == 'parents':
593 594 v = [safe_bytes(x) for x in v]
594 595 else:
595 596 v = safe_bytes(v)
596 597 setattr(commit, k, v)
597 598
598 599 object_store.add_object(commit)
599 600
600 601 self.create_branch(wire, branch, safe_str(commit.id))
601 602
602 603 # dulwich set-ref
603 604 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
604 605
605 606 return commit.id
606 607
607 608 @reraise_safe_exceptions
608 609 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
609 610 if url != 'default' and '://' not in url:
610 611 client = LocalGitClient(url)
611 612 else:
612 613 url_obj = url_parser(url)
613 614 o = self._build_opener(url)
614 615 url, _ = url_obj.authinfo()
615 616 client = HttpGitClient(base_url=url, opener=o)
616 617 repo = self._factory.repo(wire)
617 618
618 619 determine_wants = repo.object_store.determine_wants_all
619 620 if refs:
620 621 refs = [ascii_bytes(x) for x in refs]
621 622
622 623 def determine_wants_requested(remote_refs):
623 624 determined = []
624 625 for ref_name, ref_hash in remote_refs.items():
625 626 bytes_ref_name = safe_bytes(ref_name)
626 627
627 628 if bytes_ref_name in refs:
628 629 bytes_ref_hash = safe_bytes(ref_hash)
629 630 determined.append(bytes_ref_hash)
630 631 return determined
631 632
632 633 # swap with our custom requested wants
633 634 determine_wants = determine_wants_requested
634 635
635 636 try:
636 637 remote_refs = client.fetch(
637 638 path=url, target=repo, determine_wants=determine_wants)
638 639
639 640 except NotGitRepository as e:
640 641 log.warning(
641 642 'Trying to fetch from "%s" failed, not a Git repository.', url)
642 643 # Exception can contain unicode which we convert
643 644 raise exceptions.AbortException(e)(repr(e))
644 645
645 646 # mikhail: client.fetch() returns all the remote refs, but fetches only
646 647 # refs filtered by `determine_wants` function. We need to filter result
647 648 # as well
648 649 if refs:
649 650 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
650 651
651 652 if apply_refs:
652 653 # TODO: johbo: Needs proper test coverage with a git repository
653 654 # that contains a tag object, so that we would end up with
654 655 # a peeled ref at this point.
655 656 for k in remote_refs:
656 657 if k.endswith(PEELED_REF_MARKER):
657 658 log.debug("Skipping peeled reference %s", k)
658 659 continue
659 660 repo[k] = remote_refs[k]
660 661
661 662 if refs and not update_after:
662 663 # mikhail: explicitly set the head to the last ref.
663 664 repo[HEAD_MARKER] = remote_refs[refs[-1]]
664 665
665 666 if update_after:
666 667 # we want to check out HEAD
667 668 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
668 669 index.build_index_from_tree(repo.path, repo.index_path(),
669 670 repo.object_store, repo[HEAD_MARKER].tree)
670 671 return remote_refs
671 672
672 673 @reraise_safe_exceptions
673 674 def sync_fetch(self, wire, url, refs=None, all_refs=False):
674 675 repo = self._factory.repo(wire)
675 676 if refs and not isinstance(refs, (list, tuple)):
676 677 refs = [refs]
677 678
678 679 config = self._wire_to_config(wire)
679 680 # get all remote refs we'll use to fetch later
680 681 cmd = ['ls-remote']
681 682 if not all_refs:
682 683 cmd += ['--heads', '--tags']
683 684 cmd += [url]
684 685 output, __ = self.run_git_command(
685 686 wire, cmd, fail_on_stderr=False,
686 687 _copts=self._remote_conf(config),
687 688 extra_env={'GIT_TERMINAL_PROMPT': '0'})
688 689
689 690 remote_refs = collections.OrderedDict()
690 691 fetch_refs = []
691 692
692 693 for ref_line in output.splitlines():
693 694 sha, ref = ref_line.split(b'\t')
694 695 sha = sha.strip()
695 696 if ref in remote_refs:
696 697 # duplicate, skip
697 698 continue
698 699 if ref.endswith(PEELED_REF_MARKER):
699 700 log.debug("Skipping peeled reference %s", ref)
700 701 continue
701 702 # don't sync HEAD
702 703 if ref in [HEAD_MARKER]:
703 704 continue
704 705
705 706 remote_refs[ref] = sha
706 707
707 708 if refs and sha in refs:
708 709 # we filter fetch using our specified refs
709 710 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
710 711 elif not refs:
711 712 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
712 713 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
713 714
714 715 if fetch_refs:
715 716 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
716 717 fetch_refs_chunks = list(chunk)
717 718 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
718 719 self.run_git_command(
719 720 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
720 721 fail_on_stderr=False,
721 722 _copts=self._remote_conf(config),
722 723 extra_env={'GIT_TERMINAL_PROMPT': '0'})
723 724
724 725 return remote_refs
725 726
726 727 @reraise_safe_exceptions
727 728 def sync_push(self, wire, url, refs=None):
728 729 if not self.check_url(url, wire):
729 730 return
730 731 config = self._wire_to_config(wire)
731 732 self._factory.repo(wire)
732 733 self.run_git_command(
733 734 wire, ['push', url, '--mirror'], fail_on_stderr=False,
734 735 _copts=self._remote_conf(config),
735 736 extra_env={'GIT_TERMINAL_PROMPT': '0'})
736 737
737 738 @reraise_safe_exceptions
738 739 def get_remote_refs(self, wire, url):
739 740 repo = Repo(url)
740 741 return repo.get_refs()
741 742
742 743 @reraise_safe_exceptions
743 744 def get_description(self, wire):
744 745 repo = self._factory.repo(wire)
745 746 return repo.get_description()
746 747
747 748 @reraise_safe_exceptions
748 749 def get_missing_revs(self, wire, rev1, rev2, path2):
749 750 repo = self._factory.repo(wire)
750 751 LocalGitClient(thin_packs=False).fetch(path2, repo)
751 752
752 753 wire_remote = wire.copy()
753 754 wire_remote['path'] = path2
754 755 repo_remote = self._factory.repo(wire_remote)
755 756 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
756 757
757 758 revs = [
758 759 x.commit.id
759 760 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
760 761 return revs
761 762
762 763 @reraise_safe_exceptions
763 764 def get_object(self, wire, sha, maybe_unreachable=False):
764 765 cache_on, context_uid, repo_id = self._cache_on(wire)
765 766 region = self._region(wire)
766 767
767 768 @region.conditional_cache_on_arguments(condition=cache_on)
768 769 def _get_object(_context_uid, _repo_id, _sha):
769 770 repo_init = self._factory.repo_libgit2(wire)
770 771 with repo_init as repo:
771 772
772 773 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
773 774 try:
774 775 commit = repo.revparse_single(sha)
775 776 except KeyError:
776 777 # NOTE(marcink): KeyError doesn't give us any meaningful information
777 778 # here, we instead give something more explicit
778 779 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
779 780 raise exceptions.LookupException(e)(missing_commit_err)
780 781 except ValueError as e:
781 782 raise exceptions.LookupException(e)(missing_commit_err)
782 783
783 784 is_tag = False
784 785 if isinstance(commit, pygit2.Tag):
785 786 commit = repo.get(commit.target)
786 787 is_tag = True
787 788
788 789 check_dangling = True
789 790 if is_tag:
790 791 check_dangling = False
791 792
792 793 if check_dangling and maybe_unreachable:
793 794 check_dangling = False
794 795
795 796 # we used a reference and it parsed means we're not having a dangling commit
796 797 if sha != commit.hex:
797 798 check_dangling = False
798 799
799 800 if check_dangling:
800 801 # check for dangling commit
801 802 for branch in repo.branches.with_commit(commit.hex):
802 803 if branch:
803 804 break
804 805 else:
805 806 # NOTE(marcink): Empty error doesn't give us any meaningful information
806 807 # here, we instead give something more explicit
807 808 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
808 809 raise exceptions.LookupException(e)(missing_commit_err)
809 810
810 811 commit_id = commit.hex
811 812 type_id = commit.type
812 813
813 814 return {
814 815 'id': commit_id,
815 816 'type': self._type_id_to_name(type_id),
816 817 'commit_id': commit_id,
817 818 'idx': 0
818 819 }
819 820
820 821 return _get_object(context_uid, repo_id, sha)
821 822
822 823 @reraise_safe_exceptions
823 824 def get_refs(self, wire):
824 825 cache_on, context_uid, repo_id = self._cache_on(wire)
825 826 region = self._region(wire)
826 827
827 828 @region.conditional_cache_on_arguments(condition=cache_on)
828 829 def _get_refs(_context_uid, _repo_id):
829 830
830 831 repo_init = self._factory.repo_libgit2(wire)
831 832 with repo_init as repo:
832 833 regex = re.compile('^refs/(heads|tags)/')
833 834 return {x.name: x.target.hex for x in
834 835 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
835 836
836 837 return _get_refs(context_uid, repo_id)
837 838
838 839 @reraise_safe_exceptions
839 840 def get_branch_pointers(self, wire):
840 841 cache_on, context_uid, repo_id = self._cache_on(wire)
841 842 region = self._region(wire)
842 843
843 844 @region.conditional_cache_on_arguments(condition=cache_on)
844 845 def _get_branch_pointers(_context_uid, _repo_id):
845 846
846 847 repo_init = self._factory.repo_libgit2(wire)
847 848 regex = re.compile('^refs/heads')
848 849 with repo_init as repo:
849 850 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
850 851 return {x.target.hex: x.shorthand for x in branches}
851 852
852 853 return _get_branch_pointers(context_uid, repo_id)
853 854
854 855 @reraise_safe_exceptions
855 856 def head(self, wire, show_exc=True):
856 857 cache_on, context_uid, repo_id = self._cache_on(wire)
857 858 region = self._region(wire)
858 859
859 860 @region.conditional_cache_on_arguments(condition=cache_on)
860 861 def _head(_context_uid, _repo_id, _show_exc):
861 862 repo_init = self._factory.repo_libgit2(wire)
862 863 with repo_init as repo:
863 864 try:
864 865 return repo.head.peel().hex
865 866 except Exception:
866 867 if show_exc:
867 868 raise
868 869 return _head(context_uid, repo_id, show_exc)
869 870
870 871 @reraise_safe_exceptions
871 872 def init(self, wire):
872 873 repo_path = safe_str(wire['path'])
873 874 self.repo = Repo.init(repo_path)
874 875
875 876 @reraise_safe_exceptions
876 877 def init_bare(self, wire):
877 878 repo_path = safe_str(wire['path'])
878 879 self.repo = Repo.init_bare(repo_path)
879 880
880 881 @reraise_safe_exceptions
881 882 def revision(self, wire, rev):
882 883
883 884 cache_on, context_uid, repo_id = self._cache_on(wire)
884 885 region = self._region(wire)
885 886
886 887 @region.conditional_cache_on_arguments(condition=cache_on)
887 888 def _revision(_context_uid, _repo_id, _rev):
888 889 repo_init = self._factory.repo_libgit2(wire)
889 890 with repo_init as repo:
890 891 commit = repo[rev]
891 892 obj_data = {
892 893 'id': commit.id.hex,
893 894 }
894 895 # tree objects itself don't have tree_id attribute
895 896 if hasattr(commit, 'tree_id'):
896 897 obj_data['tree'] = commit.tree_id.hex
897 898
898 899 return obj_data
899 900 return _revision(context_uid, repo_id, rev)
900 901
901 902 @reraise_safe_exceptions
902 903 def date(self, wire, commit_id):
903 904 cache_on, context_uid, repo_id = self._cache_on(wire)
904 905 region = self._region(wire)
905 906
906 907 @region.conditional_cache_on_arguments(condition=cache_on)
907 908 def _date(_repo_id, _commit_id):
908 909 repo_init = self._factory.repo_libgit2(wire)
909 910 with repo_init as repo:
910 911 commit = repo[commit_id]
911 912
912 913 if hasattr(commit, 'commit_time'):
913 914 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
914 915 else:
915 916 commit = commit.get_object()
916 917 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
917 918
918 919 # TODO(marcink): check dulwich difference of offset vs timezone
919 920 return [commit_time, commit_time_offset]
920 921 return _date(repo_id, commit_id)
921 922
922 923 @reraise_safe_exceptions
923 924 def author(self, wire, commit_id):
924 925 cache_on, context_uid, repo_id = self._cache_on(wire)
925 926 region = self._region(wire)
926 927
927 928 @region.conditional_cache_on_arguments(condition=cache_on)
928 929 def _author(_repo_id, _commit_id):
929 930 repo_init = self._factory.repo_libgit2(wire)
930 931 with repo_init as repo:
931 932 commit = repo[commit_id]
932 933
933 934 if hasattr(commit, 'author'):
934 935 author = commit.author
935 936 else:
936 937 author = commit.get_object().author
937 938
938 939 if author.email:
939 940 return "{} <{}>".format(author.name, author.email)
940 941
941 942 try:
942 943 return "{}".format(author.name)
943 944 except Exception:
944 945 return "{}".format(safe_str(author.raw_name))
945 946
946 947 return _author(repo_id, commit_id)
947 948
948 949 @reraise_safe_exceptions
949 950 def message(self, wire, commit_id):
950 951 cache_on, context_uid, repo_id = self._cache_on(wire)
951 952 region = self._region(wire)
952 953
953 954 @region.conditional_cache_on_arguments(condition=cache_on)
954 955 def _message(_repo_id, _commit_id):
955 956 repo_init = self._factory.repo_libgit2(wire)
956 957 with repo_init as repo:
957 958 commit = repo[commit_id]
958 959 return commit.message
959 960 return _message(repo_id, commit_id)
960 961
961 962 @reraise_safe_exceptions
962 963 def parents(self, wire, commit_id):
963 964 cache_on, context_uid, repo_id = self._cache_on(wire)
964 965 region = self._region(wire)
965 966
966 967 @region.conditional_cache_on_arguments(condition=cache_on)
967 968 def _parents(_repo_id, _commit_id):
968 969 repo_init = self._factory.repo_libgit2(wire)
969 970 with repo_init as repo:
970 971 commit = repo[commit_id]
971 972 if hasattr(commit, 'parent_ids'):
972 973 parent_ids = commit.parent_ids
973 974 else:
974 975 parent_ids = commit.get_object().parent_ids
975 976
976 977 return [x.hex for x in parent_ids]
977 978 return _parents(repo_id, commit_id)
978 979
979 980 @reraise_safe_exceptions
980 981 def children(self, wire, commit_id):
981 982 cache_on, context_uid, repo_id = self._cache_on(wire)
982 983 region = self._region(wire)
983 984
984 985 head = self.head(wire)
985 986
986 987 @region.conditional_cache_on_arguments(condition=cache_on)
987 988 def _children(_repo_id, _commit_id):
988 989
989 990 output, __ = self.run_git_command(
990 991 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
991 992
992 993 child_ids = []
993 994 pat = re.compile(r'^{}'.format(commit_id))
994 995 for line in output.splitlines():
995 996 line = safe_str(line)
996 997 if pat.match(line):
997 998 found_ids = line.split(' ')[1:]
998 999 child_ids.extend(found_ids)
999 1000 break
1000 1001
1001 1002 return child_ids
1002 1003 return _children(repo_id, commit_id)
1003 1004
1004 1005 @reraise_safe_exceptions
1005 1006 def set_refs(self, wire, key, value):
1006 1007 repo_init = self._factory.repo_libgit2(wire)
1007 1008 with repo_init as repo:
1008 1009 repo.references.create(key, value, force=True)
1009 1010
1010 1011 @reraise_safe_exceptions
1011 1012 def create_branch(self, wire, branch_name, commit_id, force=False):
1012 1013 repo_init = self._factory.repo_libgit2(wire)
1013 1014 with repo_init as repo:
1014 1015 commit = repo[commit_id]
1015 1016
1016 1017 if force:
1017 1018 repo.branches.local.create(branch_name, commit, force=force)
1018 1019 elif not repo.branches.get(branch_name):
1019 1020 # create only if that branch isn't existing
1020 1021 repo.branches.local.create(branch_name, commit, force=force)
1021 1022
1022 1023 @reraise_safe_exceptions
1023 1024 def remove_ref(self, wire, key):
1024 1025 repo_init = self._factory.repo_libgit2(wire)
1025 1026 with repo_init as repo:
1026 1027 repo.references.delete(key)
1027 1028
1028 1029 @reraise_safe_exceptions
1029 1030 def tag_remove(self, wire, tag_name):
1030 1031 repo_init = self._factory.repo_libgit2(wire)
1031 1032 with repo_init as repo:
1032 1033 key = 'refs/tags/{}'.format(tag_name)
1033 1034 repo.references.delete(key)
1034 1035
1035 1036 @reraise_safe_exceptions
1036 1037 def tree_changes(self, wire, source_id, target_id):
1037 1038 # TODO(marcink): remove this seems it's only used by tests
1038 1039 repo = self._factory.repo(wire)
1039 1040 source = repo[source_id].tree if source_id else None
1040 1041 target = repo[target_id].tree
1041 1042 result = repo.object_store.tree_changes(source, target)
1042 1043 return list(result)
1043 1044
1044 1045 @reraise_safe_exceptions
1045 1046 def tree_and_type_for_path(self, wire, commit_id, path):
1046 1047
1047 1048 cache_on, context_uid, repo_id = self._cache_on(wire)
1048 1049 region = self._region(wire)
1049 1050
1050 1051 @region.conditional_cache_on_arguments(condition=cache_on)
1051 1052 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1052 1053 repo_init = self._factory.repo_libgit2(wire)
1053 1054
1054 1055 with repo_init as repo:
1055 1056 commit = repo[commit_id]
1056 1057 try:
1057 1058 tree = commit.tree[path]
1058 1059 except KeyError:
1059 1060 return None, None, None
1060 1061
1061 1062 return tree.id.hex, tree.type_str, tree.filemode
1062 1063 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1063 1064
1064 1065 @reraise_safe_exceptions
1065 1066 def tree_items(self, wire, tree_id):
1066 1067 cache_on, context_uid, repo_id = self._cache_on(wire)
1067 1068 region = self._region(wire)
1068 1069
1069 1070 @region.conditional_cache_on_arguments(condition=cache_on)
1070 1071 def _tree_items(_repo_id, _tree_id):
1071 1072
1072 1073 repo_init = self._factory.repo_libgit2(wire)
1073 1074 with repo_init as repo:
1074 1075 try:
1075 1076 tree = repo[tree_id]
1076 1077 except KeyError:
1077 1078 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1078 1079
1079 1080 result = []
1080 1081 for item in tree:
1081 1082 item_sha = item.hex
1082 1083 item_mode = item.filemode
1083 1084 item_type = item.type_str
1084 1085
1085 1086 if item_type == 'commit':
1086 1087 # NOTE(marcink): submodules we translate to 'link' for backward compat
1087 1088 item_type = 'link'
1088 1089
1089 1090 result.append((item.name, item_mode, item_sha, item_type))
1090 1091 return result
1091 1092 return _tree_items(repo_id, tree_id)
1092 1093
1093 1094 @reraise_safe_exceptions
1094 1095 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1095 1096 """
1096 1097 Old version that uses subprocess to call diff
1097 1098 """
1098 1099
1099 1100 flags = [
1100 1101 '-U%s' % context, '--patch',
1101 1102 '--binary',
1102 1103 '--find-renames',
1103 1104 '--no-indent-heuristic',
1104 1105 # '--indent-heuristic',
1105 1106 #'--full-index',
1106 1107 #'--abbrev=40'
1107 1108 ]
1108 1109
1109 1110 if opt_ignorews:
1110 1111 flags.append('--ignore-all-space')
1111 1112
1112 1113 if commit_id_1 == self.EMPTY_COMMIT:
1113 1114 cmd = ['show'] + flags + [commit_id_2]
1114 1115 else:
1115 1116 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1116 1117
1117 1118 if file_filter:
1118 1119 cmd.extend(['--', file_filter])
1119 1120
1120 1121 diff, __ = self.run_git_command(wire, cmd)
1121 1122 # If we used 'show' command, strip first few lines (until actual diff
1122 1123 # starts)
1123 1124 if commit_id_1 == self.EMPTY_COMMIT:
1124 1125 lines = diff.splitlines()
1125 1126 x = 0
1126 1127 for line in lines:
1127 1128 if line.startswith(b'diff'):
1128 1129 break
1129 1130 x += 1
1130 1131 # Append new line just like 'diff' command do
1131 1132 diff = '\n'.join(lines[x:]) + '\n'
1132 1133 return diff
1133 1134
1134 1135 @reraise_safe_exceptions
1135 1136 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1136 1137 repo_init = self._factory.repo_libgit2(wire)
1137 1138
1138 1139 with repo_init as repo:
1139 1140 swap = True
1140 1141 flags = 0
1141 1142 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1142 1143
1143 1144 if opt_ignorews:
1144 1145 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1145 1146
1146 1147 if commit_id_1 == self.EMPTY_COMMIT:
1147 1148 comm1 = repo[commit_id_2]
1148 1149 diff_obj = comm1.tree.diff_to_tree(
1149 1150 flags=flags, context_lines=context, swap=swap)
1150 1151
1151 1152 else:
1152 1153 comm1 = repo[commit_id_2]
1153 1154 comm2 = repo[commit_id_1]
1154 1155 diff_obj = comm1.tree.diff_to_tree(
1155 1156 comm2.tree, flags=flags, context_lines=context, swap=swap)
1156 1157 similar_flags = 0
1157 1158 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1158 1159 diff_obj.find_similar(flags=similar_flags)
1159 1160
1160 1161 if file_filter:
1161 1162 for p in diff_obj:
1162 1163 if p.delta.old_file.path == file_filter:
1163 1164 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1164 1165 # fo matching path == no diff
1165 1166 return BinaryEnvelope(b'')
1166 1167 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1167 1168
1168 1169 @reraise_safe_exceptions
1169 1170 def node_history(self, wire, commit_id, path, limit):
1170 1171 cache_on, context_uid, repo_id = self._cache_on(wire)
1171 1172 region = self._region(wire)
1172 1173
1173 1174 @region.conditional_cache_on_arguments(condition=cache_on)
1174 1175 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1175 1176 # optimize for n==1, rev-list is much faster for that use-case
1176 1177 if limit == 1:
1177 1178 cmd = ['rev-list', '-1', commit_id, '--', path]
1178 1179 else:
1179 1180 cmd = ['log']
1180 1181 if limit:
1181 1182 cmd.extend(['-n', str(safe_int(limit, 0))])
1182 1183 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1183 1184
1184 1185 output, __ = self.run_git_command(wire, cmd)
1185 1186 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1186 1187
1187 1188 return [x for x in commit_ids]
1188 1189 return _node_history(context_uid, repo_id, commit_id, path, limit)
1189 1190
1190 1191 @reraise_safe_exceptions
1191 1192 def node_annotate_legacy(self, wire, commit_id, path):
1192 1193 # note: replaced by pygit2 implementation
1193 1194 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1194 1195 # -l ==> outputs long shas (and we need all 40 characters)
1195 1196 # --root ==> doesn't put '^' character for boundaries
1196 1197 # -r commit_id ==> blames for the given commit
1197 1198 output, __ = self.run_git_command(wire, cmd)
1198 1199
1199 1200 result = []
1200 1201 for i, blame_line in enumerate(output.splitlines()[:-1]):
1201 1202 line_no = i + 1
1202 1203 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1203 1204 result.append((line_no, blame_commit_id, line))
1204 1205
1205 1206 return result
1206 1207
1207 1208 @reraise_safe_exceptions
1208 1209 def node_annotate(self, wire, commit_id, path):
1209 1210
1210 1211 result_libgit = []
1211 1212 repo_init = self._factory.repo_libgit2(wire)
1212 1213 with repo_init as repo:
1213 1214 commit = repo[commit_id]
1214 1215 blame_obj = repo.blame(path, newest_commit=commit_id)
1215 1216 for i, line in enumerate(commit.tree[path].data.splitlines()):
1216 1217 line_no = i + 1
1217 1218 hunk = blame_obj.for_line(line_no)
1218 1219 blame_commit_id = hunk.final_commit_id.hex
1219 1220
1220 1221 result_libgit.append((line_no, blame_commit_id, line))
1221 1222
1222 1223 return result_libgit
1223 1224
1224 1225 @reraise_safe_exceptions
1225 1226 def update_server_info(self, wire):
1226 1227 repo = self._factory.repo(wire)
1227 1228 update_server_info(repo)
1228 1229
1229 1230 @reraise_safe_exceptions
1230 1231 def get_all_commit_ids(self, wire):
1231 1232
1232 1233 cache_on, context_uid, repo_id = self._cache_on(wire)
1233 1234 region = self._region(wire)
1234 1235
1235 1236 @region.conditional_cache_on_arguments(condition=cache_on)
1236 1237 def _get_all_commit_ids(_context_uid, _repo_id):
1237 1238
1238 1239 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1239 1240 try:
1240 1241 output, __ = self.run_git_command(wire, cmd)
1241 1242 return output.splitlines()
1242 1243 except Exception:
1243 1244 # Can be raised for empty repositories
1244 1245 return []
1245 1246
1246 1247 @region.conditional_cache_on_arguments(condition=cache_on)
1247 1248 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1248 1249 repo_init = self._factory.repo_libgit2(wire)
1249 1250 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1250 1251 results = []
1251 1252 with repo_init as repo:
1252 1253 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1253 1254 results.append(commit.id.hex)
1254 1255
1255 1256 return _get_all_commit_ids(context_uid, repo_id)
1256 1257
1257 1258 @reraise_safe_exceptions
1258 1259 def run_git_command(self, wire, cmd, **opts):
1259 1260 path = wire.get('path', None)
1260 1261
1261 1262 if path and os.path.isdir(path):
1262 1263 opts['cwd'] = path
1263 1264
1264 1265 if '_bare' in opts:
1265 1266 _copts = []
1266 1267 del opts['_bare']
1267 1268 else:
1268 1269 _copts = ['-c', 'core.quotepath=false', ]
1269 1270 safe_call = False
1270 1271 if '_safe' in opts:
1271 1272 # no exc on failure
1272 1273 del opts['_safe']
1273 1274 safe_call = True
1274 1275
1275 1276 if '_copts' in opts:
1276 1277 _copts.extend(opts['_copts'] or [])
1277 1278 del opts['_copts']
1278 1279
1279 1280 gitenv = os.environ.copy()
1280 1281 gitenv.update(opts.pop('extra_env', {}))
1281 1282 # need to clean fix GIT_DIR !
1282 1283 if 'GIT_DIR' in gitenv:
1283 1284 del gitenv['GIT_DIR']
1284 1285 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1285 1286 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1286 1287
1287 1288 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1288 1289 _opts = {'env': gitenv, 'shell': False}
1289 1290
1290 1291 proc = None
1291 1292 try:
1292 1293 _opts.update(opts)
1293 1294 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1294 1295
1295 1296 return b''.join(proc), b''.join(proc.stderr)
1296 1297 except OSError as err:
1297 1298 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1298 1299 tb_err = ("Couldn't run git command (%s).\n"
1299 1300 "Original error was:%s\n"
1300 1301 "Call options:%s\n"
1301 1302 % (cmd, err, _opts))
1302 1303 log.exception(tb_err)
1303 1304 if safe_call:
1304 1305 return '', err
1305 1306 else:
1306 1307 raise exceptions.VcsException()(tb_err)
1307 1308 finally:
1308 1309 if proc:
1309 1310 proc.close()
1310 1311
1311 1312 @reraise_safe_exceptions
1312 1313 def install_hooks(self, wire, force=False):
1313 1314 from vcsserver.hook_utils import install_git_hooks
1314 1315 bare = self.bare(wire)
1315 1316 path = wire['path']
1316 1317 binary_dir = settings.BINARY_DIR
1317 1318 executable = None
1318 1319 if binary_dir:
1319 1320 executable = os.path.join(binary_dir, 'python3')
1320 1321 return install_git_hooks(path, bare, force_create=force)
1321 1322
1322 1323 @reraise_safe_exceptions
1323 1324 def get_hooks_info(self, wire):
1324 1325 from vcsserver.hook_utils import (
1325 1326 get_git_pre_hook_version, get_git_post_hook_version)
1326 1327 bare = self.bare(wire)
1327 1328 path = wire['path']
1328 1329 return {
1329 1330 'pre_version': get_git_pre_hook_version(path, bare),
1330 1331 'post_version': get_git_post_hook_version(path, bare),
1331 1332 }
1332 1333
1333 1334 @reraise_safe_exceptions
1334 1335 def set_head_ref(self, wire, head_name):
1335 1336 log.debug('Setting refs/head to `%s`', head_name)
1336 1337 cmd = ['symbolic-ref', '"HEAD"', '"refs/heads/%s"' % head_name]
1337 1338 output, __ = self.run_git_command(wire, cmd)
1338 1339 return [head_name] + output.splitlines()
1339 1340
1340 1341 @reraise_safe_exceptions
1341 1342 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1342 1343 archive_dir_name, commit_id):
1343 1344
1344 1345 def file_walker(_commit_id, path):
1345 1346 repo_init = self._factory.repo_libgit2(wire)
1346 1347
1347 1348 with repo_init as repo:
1348 1349 commit = repo[commit_id]
1349 1350
1350 1351 if path in ['', '/']:
1351 1352 tree = commit.tree
1352 1353 else:
1353 1354 tree = commit.tree[path.rstrip('/')]
1354 1355 tree_id = tree.id.hex
1355 1356 try:
1356 1357 tree = repo[tree_id]
1357 1358 except KeyError:
1358 1359 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1359 1360
1360 1361 index = LibGit2Index.Index()
1361 1362 index.read_tree(tree)
1362 1363 file_iter = index
1363 1364
1364 1365 for fn in file_iter:
1365 1366 file_path = fn.path
1366 1367 mode = fn.mode
1367 1368 is_link = stat.S_ISLNK(mode)
1368 1369 if mode == pygit2.GIT_FILEMODE_COMMIT:
1369 1370 log.debug('Skipping path %s as a commit node', file_path)
1370 1371 continue
1371 1372 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1372 1373
1373 1374 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1374 1375 archive_dir_name, commit_id)
General Comments 0
You need to be logged in to leave comments. Login now