##// END OF EJS Templates
fix(LFS): fixed LFSOidStorage, enbled LFS fetching on repo fetch, fixed tests and python3 transition related leftovers. Fixes: RCCE-8
ilin.s -
r1194:a8552e75 default
parent child Browse files
Show More
@@ -1,47 +1,47 b''
1 1 # Copyright (C) 2014-2023 RhodeCode GmbH
2 2
3 3 """
4 4 Provides the same API as :mod:`remote_wsgi`.
5 5
6 6 Uses the `EchoApp` instead of real implementations.
7 7 """
8 8
9 9 import logging
10 10
11 11 from .echo_app import EchoApp
12 12 from vcsserver import wsgi_app_caller
13 13
14 14
15 15 log = logging.getLogger(__name__)
16 16
17 17
18 18 class GitRemoteWsgi:
19 19 def handle(self, environ, input_data, *args, **kwargs):
20 20 app = wsgi_app_caller.WSGIAppCaller(
21 21 create_echo_wsgi_app(*args, **kwargs))
22 22
23 23 return app.handle(environ, input_data)
24 24
25 25
26 26 class HgRemoteWsgi:
27 27 def handle(self, environ, input_data, *args, **kwargs):
28 28 app = wsgi_app_caller.WSGIAppCaller(
29 29 create_echo_wsgi_app(*args, **kwargs))
30 30
31 31 return app.handle(environ, input_data)
32 32
33 33
34 34 def create_echo_wsgi_app(repo_path, repo_name, config):
35 35 log.debug("Creating EchoApp WSGI application")
36 36
37 37 _assert_valid_config(config)
38 38
39 39 # Remaining items are forwarded to have the extras available
40 40 return EchoApp(repo_path, repo_name, config=config)
41 41
42 42
43 43 def _assert_valid_config(config):
44 44 config = config.copy()
45 45
46 46 # This is what git needs from config at this stage
47 config.pop(b'git_update_server_info')
47 config.pop('git_update_server_info')
@@ -1,175 +1,177 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import shutil
20 20 import logging
21 21 from collections import OrderedDict
22 22
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 26 class OidHandler:
27 27
28 28 def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href,
29 29 obj_verify_href=None):
30 30 self.current_store = store
31 31 self.repo_name = repo_name
32 32 self.auth = auth
33 33 self.oid = oid
34 34 self.obj_size = obj_size
35 35 self.obj_data = obj_data
36 36 self.obj_href = obj_href
37 37 self.obj_verify_href = obj_verify_href
38 38
39 39 def get_store(self, mode=None):
40 40 return self.current_store
41 41
42 42 def get_auth(self):
43 43 """returns auth header for re-use in upload/download"""
44 44 return " ".join(self.auth)
45 45
46 46 def download(self):
47 47
48 48 store = self.get_store()
49 49 response = None
50 50 has_errors = None
51 51
52 52 if not store.has_oid():
53 53 # error reply back to client that something is wrong with dl
54 54 err_msg = f'object: {store.oid} does not exist in store'
55 55 has_errors = OrderedDict(
56 56 error=OrderedDict(
57 57 code=404,
58 58 message=err_msg
59 59 )
60 60 )
61 61
62 62 download_action = OrderedDict(
63 63 href=self.obj_href,
64 64 header=OrderedDict([("Authorization", self.get_auth())])
65 65 )
66 66 if not has_errors:
67 67 response = OrderedDict(download=download_action)
68 68 return response, has_errors
69 69
70 70 def upload(self, skip_existing=True):
71 71 """
72 72 Write upload action for git-lfs server
73 73 """
74 74
75 75 store = self.get_store()
76 76 response = None
77 77 has_errors = None
78 78
79 79 # verify if we have the OID before, if we do, reply with empty
80 80 if store.has_oid():
81 81 log.debug('LFS: store already has oid %s', store.oid)
82 82
83 83 # validate size
84 84 store_size = store.size_oid()
85 85 size_match = store_size == self.obj_size
86 86 if not size_match:
87 87 log.warning(
88 88 'LFS: size mismatch for oid:%s, in store:%s expected: %s',
89 89 self.oid, store_size, self.obj_size)
90 90 elif skip_existing:
91 91 log.debug('LFS: skipping further action as oid is existing')
92 92 return response, has_errors
93 93
94 94 chunked = ("Transfer-Encoding", "chunked")
95 95 upload_action = OrderedDict(
96 96 href=self.obj_href,
97 97 header=OrderedDict([("Authorization", self.get_auth()), chunked])
98 98 )
99 99 if not has_errors:
100 100 response = OrderedDict(upload=upload_action)
101 101 # if specified in handler, return the verification endpoint
102 102 if self.obj_verify_href:
103 103 verify_action = OrderedDict(
104 104 href=self.obj_verify_href,
105 105 header=OrderedDict([("Authorization", self.get_auth())])
106 106 )
107 107 response['verify'] = verify_action
108 108 return response, has_errors
109 109
110 110 def exec_operation(self, operation, *args, **kwargs):
111 111 handler = getattr(self, operation)
112 112 log.debug('LFS: handling request using %s handler', handler)
113 113 return handler(*args, **kwargs)
114 114
115 115
116 116 class LFSOidStore:
117 117
118 118 def __init__(self, oid, repo, store_location=None):
119 119 self.oid = oid
120 120 self.repo = repo
121 self.store_path = store_location or self.get_default_store()
121 defined_store_path = store_location or self.get_default_store()
122 self.store_suffix = f"/objects/{oid[:2]}/{oid[2:4]}"
123 self.store_path = f"{defined_store_path.rstrip('/')}{self.store_suffix}"
122 124 self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp')
123 125 self.oid_path = os.path.join(self.store_path, oid)
124 126 self.fd = None
125 127
126 128 def get_engine(self, mode):
127 129 """
128 130 engine = .get_engine(mode='wb')
129 131 with engine as f:
130 132 f.write('...')
131 133 """
132 134
133 135 class StoreEngine:
134 136 def __init__(self, mode, store_path, oid_path, tmp_oid_path):
135 137 self.mode = mode
136 138 self.store_path = store_path
137 139 self.oid_path = oid_path
138 140 self.tmp_oid_path = tmp_oid_path
139 141
140 142 def __enter__(self):
141 143 if not os.path.isdir(self.store_path):
142 144 os.makedirs(self.store_path)
143 145
144 146 # TODO(marcink): maybe write metadata here with size/oid ?
145 147 fd = open(self.tmp_oid_path, self.mode)
146 148 self.fd = fd
147 149 return fd
148 150
149 151 def __exit__(self, exc_type, exc_value, traceback):
150 152 # close tmp file, and rename to final destination
151 153 self.fd.close()
152 154 shutil.move(self.tmp_oid_path, self.oid_path)
153 155
154 156 return StoreEngine(
155 157 mode, self.store_path, self.oid_path, self.tmp_oid_path)
156 158
157 159 def get_default_store(self):
158 160 """
159 161 Default store, consistent with defaults of Mercurial large files store
160 162 which is /home/username/.cache/largefiles
161 163 """
162 164 user_home = os.path.expanduser("~")
163 165 return os.path.join(user_home, '.cache', 'lfs-store')
164 166
165 167 def has_oid(self):
166 168 return os.path.exists(os.path.join(self.store_path, self.oid))
167 169
168 170 def size_oid(self):
169 171 size = -1
170 172
171 173 if self.has_oid():
172 174 oid = os.path.join(self.store_path, self.oid)
173 175 size = os.stat(oid).st_size
174 176
175 177 return size
@@ -1,273 +1,274 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import pytest
20 20 from webtest.app import TestApp as WebObTestApp
21 21
22 22 from vcsserver.lib.rc_json import json
23 23 from vcsserver.str_utils import safe_bytes
24 24 from vcsserver.git_lfs.app import create_app
25 from vcsserver.git_lfs.lib import LFSOidStore
25 26
26 27
27 28 @pytest.fixture(scope='function')
28 29 def git_lfs_app(tmpdir):
29 30 custom_app = WebObTestApp(create_app(
30 31 git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
31 32 git_lfs_http_scheme='http'))
32 33 custom_app._store = str(tmpdir)
33 34 return custom_app
34 35
35 36
36 37 @pytest.fixture(scope='function')
37 38 def git_lfs_https_app(tmpdir):
38 39 custom_app = WebObTestApp(create_app(
39 40 git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
40 41 git_lfs_http_scheme='https'))
41 42 custom_app._store = str(tmpdir)
42 43 return custom_app
43 44
44 45
45 46 @pytest.fixture()
46 47 def http_auth():
47 48 return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
48 49
49 50
50 51 class TestLFSApplication:
51 52
52 53 def test_app_wrong_path(self, git_lfs_app):
53 54 git_lfs_app.get('/repo/info/lfs/xxx', status=404)
54 55
55 56 def test_app_deprecated_endpoint(self, git_lfs_app):
56 57 response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
57 58 assert response.status_code == 501
58 59 assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
59 60
60 61 def test_app_lock_verify_api_not_available(self, git_lfs_app):
61 62 response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
62 63 assert response.status_code == 501
63 64 assert json.loads(response.text) == {
64 65 'message': 'GIT LFS locking api not supported'}
65 66
66 67 def test_app_lock_api_not_available(self, git_lfs_app):
67 68 response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
68 69 assert response.status_code == 501
69 70 assert json.loads(response.text) == {
70 71 'message': 'GIT LFS locking api not supported'}
71 72
72 73 def test_app_batch_api_missing_auth(self, git_lfs_app):
73 74 git_lfs_app.post_json(
74 75 '/repo/info/lfs/objects/batch', params={}, status=403)
75 76
76 77 def test_app_batch_api_unsupported_operation(self, git_lfs_app, http_auth):
77 78 response = git_lfs_app.post_json(
78 79 '/repo/info/lfs/objects/batch', params={}, status=400,
79 80 extra_environ=http_auth)
80 81 assert json.loads(response.text) == {
81 82 'message': 'unsupported operation mode: `None`'}
82 83
83 84 def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
84 85 response = git_lfs_app.post_json(
85 86 '/repo/info/lfs/objects/batch', params={'operation': 'download'},
86 87 status=400, extra_environ=http_auth)
87 88 assert json.loads(response.text) == {
88 89 'message': 'missing objects data'}
89 90
90 91 def test_app_batch_api_unsupported_data_in_objects(
91 92 self, git_lfs_app, http_auth):
92 93 params = {'operation': 'download',
93 94 'objects': [{}]}
94 95 response = git_lfs_app.post_json(
95 96 '/repo/info/lfs/objects/batch', params=params, status=400,
96 97 extra_environ=http_auth)
97 98 assert json.loads(response.text) == {
98 99 'message': 'unsupported data in objects'}
99 100
100 101 def test_app_batch_api_download_missing_object(
101 102 self, git_lfs_app, http_auth):
102 103 params = {'operation': 'download',
103 104 'objects': [{'oid': '123', 'size': '1024'}]}
104 105 response = git_lfs_app.post_json(
105 106 '/repo/info/lfs/objects/batch', params=params,
106 107 extra_environ=http_auth)
107 108
108 109 expected_objects = [
109 110 {'authenticated': True,
110 111 'errors': {'error': {
111 112 'code': 404,
112 113 'message': 'object: 123 does not exist in store'}},
113 114 'oid': '123',
114 115 'size': '1024'}
115 116 ]
116 117 assert json.loads(response.text) == {
117 118 'objects': expected_objects, 'transfer': 'basic'}
118 119
119 120 def test_app_batch_api_download(self, git_lfs_app, http_auth):
120 121 oid = '456'
121 oid_path = os.path.join(git_lfs_app._store, oid)
122 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
122 123 if not os.path.isdir(os.path.dirname(oid_path)):
123 124 os.makedirs(os.path.dirname(oid_path))
124 125 with open(oid_path, 'wb') as f:
125 126 f.write(safe_bytes('OID_CONTENT'))
126 127
127 128 params = {'operation': 'download',
128 129 'objects': [{'oid': oid, 'size': '1024'}]}
129 130 response = git_lfs_app.post_json(
130 131 '/repo/info/lfs/objects/batch', params=params,
131 132 extra_environ=http_auth)
132 133
133 134 expected_objects = [
134 135 {'authenticated': True,
135 136 'actions': {
136 137 'download': {
137 138 'header': {'Authorization': 'Basic XXXXX'},
138 139 'href': 'http://localhost/repo/info/lfs/objects/456'},
139 140 },
140 141 'oid': '456',
141 142 'size': '1024'}
142 143 ]
143 144 assert json.loads(response.text) == {
144 145 'objects': expected_objects, 'transfer': 'basic'}
145 146
146 147 def test_app_batch_api_upload(self, git_lfs_app, http_auth):
147 148 params = {'operation': 'upload',
148 149 'objects': [{'oid': '123', 'size': '1024'}]}
149 150 response = git_lfs_app.post_json(
150 151 '/repo/info/lfs/objects/batch', params=params,
151 152 extra_environ=http_auth)
152 153 expected_objects = [
153 154 {'authenticated': True,
154 155 'actions': {
155 156 'upload': {
156 157 'header': {'Authorization': 'Basic XXXXX',
157 158 'Transfer-Encoding': 'chunked'},
158 159 'href': 'http://localhost/repo/info/lfs/objects/123'},
159 160 'verify': {
160 161 'header': {'Authorization': 'Basic XXXXX'},
161 162 'href': 'http://localhost/repo/info/lfs/verify'}
162 163 },
163 164 'oid': '123',
164 165 'size': '1024'}
165 166 ]
166 167 assert json.loads(response.text) == {
167 168 'objects': expected_objects, 'transfer': 'basic'}
168 169
169 170 def test_app_batch_api_upload_for_https(self, git_lfs_https_app, http_auth):
170 171 params = {'operation': 'upload',
171 172 'objects': [{'oid': '123', 'size': '1024'}]}
172 173 response = git_lfs_https_app.post_json(
173 174 '/repo/info/lfs/objects/batch', params=params,
174 175 extra_environ=http_auth)
175 176 expected_objects = [
176 177 {'authenticated': True,
177 178 'actions': {
178 179 'upload': {
179 180 'header': {'Authorization': 'Basic XXXXX',
180 181 'Transfer-Encoding': 'chunked'},
181 182 'href': 'https://localhost/repo/info/lfs/objects/123'},
182 183 'verify': {
183 184 'header': {'Authorization': 'Basic XXXXX'},
184 185 'href': 'https://localhost/repo/info/lfs/verify'}
185 186 },
186 187 'oid': '123',
187 188 'size': '1024'}
188 189 ]
189 190 assert json.loads(response.text) == {
190 191 'objects': expected_objects, 'transfer': 'basic'}
191 192
192 193 def test_app_verify_api_missing_data(self, git_lfs_app):
193 194 params = {'oid': 'missing'}
194 195 response = git_lfs_app.post_json(
195 196 '/repo/info/lfs/verify', params=params,
196 197 status=400)
197 198
198 199 assert json.loads(response.text) == {
199 200 'message': 'missing oid and size in request data'}
200 201
201 202 def test_app_verify_api_missing_obj(self, git_lfs_app):
202 203 params = {'oid': 'missing', 'size': '1024'}
203 204 response = git_lfs_app.post_json(
204 205 '/repo/info/lfs/verify', params=params,
205 206 status=404)
206 207
207 208 assert json.loads(response.text) == {
208 209 'message': 'oid `missing` does not exists in store'}
209 210
210 211 def test_app_verify_api_size_mismatch(self, git_lfs_app):
211 212 oid = 'existing'
212 oid_path = os.path.join(git_lfs_app._store, oid)
213 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
213 214 if not os.path.isdir(os.path.dirname(oid_path)):
214 215 os.makedirs(os.path.dirname(oid_path))
215 216 with open(oid_path, 'wb') as f:
216 217 f.write(safe_bytes('OID_CONTENT'))
217 218
218 219 params = {'oid': oid, 'size': '1024'}
219 220 response = git_lfs_app.post_json(
220 221 '/repo/info/lfs/verify', params=params, status=422)
221 222
222 223 assert json.loads(response.text) == {
223 224 'message': 'requested file size mismatch '
224 225 'store size:11 requested:1024'}
225 226
226 227 def test_app_verify_api(self, git_lfs_app):
227 228 oid = 'existing'
228 oid_path = os.path.join(git_lfs_app._store, oid)
229 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
229 230 if not os.path.isdir(os.path.dirname(oid_path)):
230 231 os.makedirs(os.path.dirname(oid_path))
231 232 with open(oid_path, 'wb') as f:
232 233 f.write(safe_bytes('OID_CONTENT'))
233 234
234 235 params = {'oid': oid, 'size': 11}
235 236 response = git_lfs_app.post_json(
236 237 '/repo/info/lfs/verify', params=params)
237 238
238 239 assert json.loads(response.text) == {
239 240 'message': {'size': 'ok', 'in_store': 'ok'}}
240 241
241 242 def test_app_download_api_oid_not_existing(self, git_lfs_app):
242 243 oid = 'missing'
243 244
244 245 response = git_lfs_app.get(
245 246 '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
246 247
247 248 assert json.loads(response.text) == {
248 249 'message': 'requested file with oid `missing` not found in store'}
249 250
250 251 def test_app_download_api(self, git_lfs_app):
251 252 oid = 'existing'
252 oid_path = os.path.join(git_lfs_app._store, oid)
253 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
253 254 if not os.path.isdir(os.path.dirname(oid_path)):
254 255 os.makedirs(os.path.dirname(oid_path))
255 256 with open(oid_path, 'wb') as f:
256 257 f.write(safe_bytes('OID_CONTENT'))
257 258
258 259 response = git_lfs_app.get(
259 260 '/repo/info/lfs/objects/{oid}'.format(oid=oid))
260 261 assert response
261 262
262 263 def test_app_upload(self, git_lfs_app):
263 264 oid = 'uploaded'
264 265
265 266 response = git_lfs_app.put(
266 267 '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
267 268
268 269 assert json.loads(response.text) == {'upload': 'ok'}
269 270
270 271 # verify that we actually wrote that OID
271 oid_path = os.path.join(git_lfs_app._store, oid)
272 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
272 273 assert os.path.isfile(oid_path)
273 274 assert 'CONTENT' == open(oid_path).read()
@@ -1,1501 +1,1511 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40 from dulwich.server import update_server_info
41 41
42 42 import rhodecode
43 43 from vcsserver import exceptions, settings, subprocessio
44 44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
46 46 from vcsserver.hgcompat import (
47 47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 48 from vcsserver.git_lfs.lib import LFSOidStore
49 49 from vcsserver.vcs_base import RemoteBase
50 50
51 51 DIR_STAT = stat.S_IFDIR
52 52 FILE_MODE = stat.S_IFMT
53 53 GIT_LINK = objects.S_IFGITLINK
54 54 PEELED_REF_MARKER = b'^{}'
55 55 HEAD_MARKER = b'HEAD'
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def reraise_safe_exceptions(func):
61 61 """Converts Dulwich exceptions to something neutral."""
62 62
63 63 @wraps(func)
64 64 def wrapper(*args, **kwargs):
65 65 try:
66 66 return func(*args, **kwargs)
67 67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 68 exc = exceptions.LookupException(org_exc=e)
69 69 raise exc(safe_str(e))
70 70 except (HangupException, UnexpectedCommandError) as e:
71 71 exc = exceptions.VcsException(org_exc=e)
72 72 raise exc(safe_str(e))
73 73 except Exception:
74 74 # NOTE(marcink): because of how dulwich handles some exceptions
75 75 # (KeyError on empty repos), we cannot track this and catch all
76 76 # exceptions, it's an exceptions from other handlers
77 77 #if not hasattr(e, '_vcs_kind'):
78 78 #log.exception("Unhandled exception in git remote call")
79 79 #raise_from_original(exceptions.UnhandledException)
80 80 raise
81 81 return wrapper
82 82
83 83
84 84 class Repo(DulwichRepo):
85 85 """
86 86 A wrapper for dulwich Repo class.
87 87
88 88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 89 "Too many open files" error. We need to close all opened file descriptors
90 90 once the repo object is destroyed.
91 91 """
92 92 def __del__(self):
93 93 if hasattr(self, 'object_store'):
94 94 self.close()
95 95
96 96
97 97 class Repository(LibGit2Repo):
98 98
99 99 def __enter__(self):
100 100 return self
101 101
102 102 def __exit__(self, exc_type, exc_val, exc_tb):
103 103 self.free()
104 104
105 105
106 106 class GitFactory(RepoFactory):
107 107 repo_type = 'git'
108 108
109 109 def _create_repo(self, wire, create, use_libgit2=False):
110 110 if use_libgit2:
111 111 repo = Repository(safe_bytes(wire['path']))
112 112 else:
113 113 # dulwich mode
114 114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 115 repo = Repo(repo_path)
116 116
117 117 log.debug('repository created: got GIT object: %s', repo)
118 118 return repo
119 119
120 120 def repo(self, wire, create=False, use_libgit2=False):
121 121 """
122 122 Get a repository instance for the given path.
123 123 """
124 124 return self._create_repo(wire, create, use_libgit2)
125 125
126 126 def repo_libgit2(self, wire):
127 127 return self.repo(wire, use_libgit2=True)
128 128
129 129
130 130 def create_signature_from_string(author_str, **kwargs):
131 131 """
132 132 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
133 133
134 134 :param author_str: String of the format 'Name <email>'
135 135 :return: pygit2.Signature object
136 136 """
137 137 match = re.match(r'^(.+) <(.+)>$', author_str)
138 138 if match is None:
139 139 raise ValueError(f"Invalid format: {author_str}")
140 140
141 141 name, email = match.groups()
142 142 return pygit2.Signature(name, email, **kwargs)
143 143
144 144
145 145 def get_obfuscated_url(url_obj):
146 146 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
147 147 url_obj.query = obfuscate_qs(url_obj.query)
148 148 obfuscated_uri = str(url_obj)
149 149 return obfuscated_uri
150 150
151 151
152 152 class GitRemote(RemoteBase):
153 153
154 154 def __init__(self, factory):
155 155 self._factory = factory
156 156 self._bulk_methods = {
157 157 "date": self.date,
158 158 "author": self.author,
159 159 "branch": self.branch,
160 160 "message": self.message,
161 161 "parents": self.parents,
162 162 "_commit": self.revision,
163 163 }
164 164 self._bulk_file_methods = {
165 165 "size": self.get_node_size,
166 166 "data": self.get_node_data,
167 167 "flags": self.get_node_flags,
168 168 "is_binary": self.get_node_is_binary,
169 169 "md5": self.md5_hash
170 170 }
171 171
172 172 def _wire_to_config(self, wire):
173 173 if 'config' in wire:
174 174 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
175 175 return {}
176 176
177 177 def _remote_conf(self, config):
178 178 params = [
179 179 '-c', 'core.askpass=""',
180 180 ]
181 ssl_cert_dir = config.get('vcs_ssl_dir')
182 if ssl_cert_dir:
183 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
181 config_attrs = {
182 'vcs_ssl_dir': 'http.sslCAinfo={}',
183 'vcs_git_lfs_store_location': 'lfs.storage={}'
184 }
185 for key, param in config_attrs.items():
186 if value := config.get(key):
187 params.extend(['-c', param.format(value)])
184 188 return params
185 189
186 190 @reraise_safe_exceptions
187 191 def discover_git_version(self):
188 192 stdout, _ = self.run_git_command(
189 193 {}, ['--version'], _bare=True, _safe=True)
190 194 prefix = b'git version'
191 195 if stdout.startswith(prefix):
192 196 stdout = stdout[len(prefix):]
193 197 return safe_str(stdout.strip())
194 198
195 199 @reraise_safe_exceptions
196 200 def is_empty(self, wire):
197 201 repo_init = self._factory.repo_libgit2(wire)
198 202 with repo_init as repo:
199 203
200 204 try:
201 205 has_head = repo.head.name
202 206 if has_head:
203 207 return False
204 208
205 209 # NOTE(marcink): check again using more expensive method
206 210 return repo.is_empty
207 211 except Exception:
208 212 pass
209 213
210 214 return True
211 215
212 216 @reraise_safe_exceptions
213 217 def assert_correct_path(self, wire):
214 218 cache_on, context_uid, repo_id = self._cache_on(wire)
215 219 region = self._region(wire)
216 220
217 221 @region.conditional_cache_on_arguments(condition=cache_on)
218 222 def _assert_correct_path(_context_uid, _repo_id, fast_check):
219 223 if fast_check:
220 224 path = safe_str(wire['path'])
221 225 if pygit2.discover_repository(path):
222 226 return True
223 227 return False
224 228 else:
225 229 try:
226 230 repo_init = self._factory.repo_libgit2(wire)
227 231 with repo_init:
228 232 pass
229 233 except pygit2.GitError:
230 234 path = wire.get('path')
231 235 tb = traceback.format_exc()
232 236 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
233 237 return False
234 238 return True
235 239
236 240 return _assert_correct_path(context_uid, repo_id, True)
237 241
238 242 @reraise_safe_exceptions
239 243 def bare(self, wire):
240 244 repo_init = self._factory.repo_libgit2(wire)
241 245 with repo_init as repo:
242 246 return repo.is_bare
243 247
244 248 @reraise_safe_exceptions
245 249 def get_node_data(self, wire, commit_id, path):
246 250 repo_init = self._factory.repo_libgit2(wire)
247 251 with repo_init as repo:
248 252 commit = repo[commit_id]
249 253 blob_obj = commit.tree[path]
250 254
251 255 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
252 256 raise exceptions.LookupException()(
253 257 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
254 258
255 259 return BytesEnvelope(blob_obj.data)
256 260
257 261 @reraise_safe_exceptions
258 262 def get_node_size(self, wire, commit_id, path):
259 263 repo_init = self._factory.repo_libgit2(wire)
260 264 with repo_init as repo:
261 265 commit = repo[commit_id]
262 266 blob_obj = commit.tree[path]
263 267
264 268 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
265 269 raise exceptions.LookupException()(
266 270 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
267 271
268 272 return blob_obj.size
269 273
270 274 @reraise_safe_exceptions
271 275 def get_node_flags(self, wire, commit_id, path):
272 276 repo_init = self._factory.repo_libgit2(wire)
273 277 with repo_init as repo:
274 278 commit = repo[commit_id]
275 279 blob_obj = commit.tree[path]
276 280
277 281 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
278 282 raise exceptions.LookupException()(
279 283 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
280 284
281 285 return blob_obj.filemode
282 286
283 287 @reraise_safe_exceptions
284 288 def get_node_is_binary(self, wire, commit_id, path):
285 289 repo_init = self._factory.repo_libgit2(wire)
286 290 with repo_init as repo:
287 291 commit = repo[commit_id]
288 292 blob_obj = commit.tree[path]
289 293
290 294 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
291 295 raise exceptions.LookupException()(
292 296 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
293 297
294 298 return blob_obj.is_binary
295 299
296 300 @reraise_safe_exceptions
297 301 def blob_as_pretty_string(self, wire, sha):
298 302 repo_init = self._factory.repo_libgit2(wire)
299 303 with repo_init as repo:
300 304 blob_obj = repo[sha]
301 305 return BytesEnvelope(blob_obj.data)
302 306
303 307 @reraise_safe_exceptions
304 308 def blob_raw_length(self, wire, sha):
305 309 cache_on, context_uid, repo_id = self._cache_on(wire)
306 310 region = self._region(wire)
307 311
308 312 @region.conditional_cache_on_arguments(condition=cache_on)
309 313 def _blob_raw_length(_repo_id, _sha):
310 314
311 315 repo_init = self._factory.repo_libgit2(wire)
312 316 with repo_init as repo:
313 317 blob = repo[sha]
314 318 return blob.size
315 319
316 320 return _blob_raw_length(repo_id, sha)
317 321
318 322 def _parse_lfs_pointer(self, raw_content):
319 323 spec_string = b'version https://git-lfs.github.com/spec'
320 324 if raw_content and raw_content.startswith(spec_string):
321 325
322 326 pattern = re.compile(rb"""
323 327 (?:\n)?
324 328 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
325 329 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
326 330 ^size[ ](?P<oid_size>[0-9]+)\n
327 331 (?:\n)?
328 332 """, re.VERBOSE | re.MULTILINE)
329 333 match = pattern.match(raw_content)
330 334 if match:
331 335 return match.groupdict()
332 336
333 337 return {}
334 338
335 339 @reraise_safe_exceptions
336 340 def is_large_file(self, wire, commit_id):
337 341 cache_on, context_uid, repo_id = self._cache_on(wire)
338 342 region = self._region(wire)
339 343
340 344 @region.conditional_cache_on_arguments(condition=cache_on)
341 345 def _is_large_file(_repo_id, _sha):
342 346 repo_init = self._factory.repo_libgit2(wire)
343 347 with repo_init as repo:
344 348 blob = repo[commit_id]
345 349 if blob.is_binary:
346 350 return {}
347 351
348 352 return self._parse_lfs_pointer(blob.data)
349 353
350 354 return _is_large_file(repo_id, commit_id)
351 355
352 356 @reraise_safe_exceptions
353 357 def is_binary(self, wire, tree_id):
354 358 cache_on, context_uid, repo_id = self._cache_on(wire)
355 359 region = self._region(wire)
356 360
357 361 @region.conditional_cache_on_arguments(condition=cache_on)
358 362 def _is_binary(_repo_id, _tree_id):
359 363 repo_init = self._factory.repo_libgit2(wire)
360 364 with repo_init as repo:
361 365 blob_obj = repo[tree_id]
362 366 return blob_obj.is_binary
363 367
364 368 return _is_binary(repo_id, tree_id)
365 369
366 370 @reraise_safe_exceptions
367 371 def md5_hash(self, wire, commit_id, path):
368 372 cache_on, context_uid, repo_id = self._cache_on(wire)
369 373 region = self._region(wire)
370 374
371 375 @region.conditional_cache_on_arguments(condition=cache_on)
372 376 def _md5_hash(_repo_id, _commit_id, _path):
373 377 repo_init = self._factory.repo_libgit2(wire)
374 378 with repo_init as repo:
375 379 commit = repo[_commit_id]
376 380 blob_obj = commit.tree[_path]
377 381
378 382 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
379 383 raise exceptions.LookupException()(
380 384 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
381 385
382 386 return ''
383 387
384 388 return _md5_hash(repo_id, commit_id, path)
385 389
386 390 @reraise_safe_exceptions
387 391 def in_largefiles_store(self, wire, oid):
388 392 conf = self._wire_to_config(wire)
389 393 repo_init = self._factory.repo_libgit2(wire)
390 394 with repo_init as repo:
391 395 repo_name = repo.path
392 396
393 397 store_location = conf.get('vcs_git_lfs_store_location')
394 398 if store_location:
395 399
396 400 store = LFSOidStore(
397 401 oid=oid, repo=repo_name, store_location=store_location)
398 402 return store.has_oid()
399 403
400 404 return False
401 405
402 406 @reraise_safe_exceptions
403 407 def store_path(self, wire, oid):
404 408 conf = self._wire_to_config(wire)
405 409 repo_init = self._factory.repo_libgit2(wire)
406 410 with repo_init as repo:
407 411 repo_name = repo.path
408 412
409 413 store_location = conf.get('vcs_git_lfs_store_location')
410 414 if store_location:
411 415 store = LFSOidStore(
412 416 oid=oid, repo=repo_name, store_location=store_location)
413 417 return store.oid_path
414 418 raise ValueError(f'Unable to fetch oid with path {oid}')
415 419
416 420 @reraise_safe_exceptions
417 421 def bulk_request(self, wire, rev, pre_load):
418 422 cache_on, context_uid, repo_id = self._cache_on(wire)
419 423 region = self._region(wire)
420 424
421 425 @region.conditional_cache_on_arguments(condition=cache_on)
422 426 def _bulk_request(_repo_id, _rev, _pre_load):
423 427 result = {}
424 428 for attr in pre_load:
425 429 try:
426 430 method = self._bulk_methods[attr]
427 431 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
428 432 args = [wire, rev]
429 433 result[attr] = method(*args)
430 434 except KeyError as e:
431 435 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
432 436 return result
433 437
434 438 return _bulk_request(repo_id, rev, sorted(pre_load))
435 439
436 440 @reraise_safe_exceptions
437 441 def bulk_file_request(self, wire, commit_id, path, pre_load):
438 442 cache_on, context_uid, repo_id = self._cache_on(wire)
439 443 region = self._region(wire)
440 444
441 445 @region.conditional_cache_on_arguments(condition=cache_on)
442 446 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
443 447 result = {}
444 448 for attr in pre_load:
445 449 try:
446 450 method = self._bulk_file_methods[attr]
447 451 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
448 452 result[attr] = method(wire, _commit_id, _path)
449 453 except KeyError as e:
450 454 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
451 455 return result
452 456
453 457 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
454 458
455 459 def _build_opener(self, url: str):
456 460 handlers = []
457 461 url_obj = url_parser(safe_bytes(url))
458 462 authinfo = url_obj.authinfo()[1]
459 463
460 464 if authinfo:
461 465 # create a password manager
462 466 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
463 467 passmgr.add_password(*authinfo)
464 468
465 469 handlers.extend((httpbasicauthhandler(passmgr),
466 470 httpdigestauthhandler(passmgr)))
467 471
468 472 return urllib.request.build_opener(*handlers)
469 473
470 474 @reraise_safe_exceptions
471 475 def check_url(self, url, config):
472 476 url_obj = url_parser(safe_bytes(url))
473 477
474 478 test_uri = safe_str(url_obj.authinfo()[0])
475 479 obfuscated_uri = get_obfuscated_url(url_obj)
476 480
477 481 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
478 482
479 483 if not test_uri.endswith('info/refs'):
480 484 test_uri = test_uri.rstrip('/') + '/info/refs'
481 485
482 486 o = self._build_opener(test_uri)
483 487 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
484 488
485 489 q = {"service": 'git-upload-pack'}
486 490 qs = f'?{urllib.parse.urlencode(q)}'
487 491 cu = f"{test_uri}{qs}"
488 492
489 493 try:
490 494 req = urllib.request.Request(cu, None, {})
491 495 log.debug("Trying to open URL %s", obfuscated_uri)
492 496 resp = o.open(req)
493 497 if resp.code != 200:
494 498 raise exceptions.URLError()('Return Code is not 200')
495 499 except Exception as e:
496 500 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
497 501 # means it cannot be cloned
498 502 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
499 503
500 504 # now detect if it's proper git repo
501 505 gitdata: bytes = resp.read()
502 506
503 507 if b'service=git-upload-pack' in gitdata:
504 508 pass
505 509 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
506 510 # old style git can return some other format!
507 511 pass
508 512 else:
509 513 e = None
510 514 raise exceptions.URLError(e)(
511 515 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
512 516
513 517 return True
514 518
515 519 @reraise_safe_exceptions
516 520 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
517 521 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
518 522 remote_refs = self.pull(wire, url, apply_refs=False)
519 523 repo = self._factory.repo(wire)
520 524 if isinstance(valid_refs, list):
521 525 valid_refs = tuple(valid_refs)
522 526
523 527 for k in remote_refs:
524 528 # only parse heads/tags and skip so called deferred tags
525 529 if k.startswith(valid_refs) and not k.endswith(deferred):
526 530 repo[k] = remote_refs[k]
527 531
528 532 if update_after_clone:
529 533 # we want to checkout HEAD
530 534 repo["HEAD"] = remote_refs["HEAD"]
531 535 index.build_index_from_tree(repo.path, repo.index_path(),
532 536 repo.object_store, repo["HEAD"].tree)
533 537
534 538 @reraise_safe_exceptions
535 539 def branch(self, wire, commit_id):
536 540 cache_on, context_uid, repo_id = self._cache_on(wire)
537 541 region = self._region(wire)
538 542
539 543 @region.conditional_cache_on_arguments(condition=cache_on)
540 544 def _branch(_context_uid, _repo_id, _commit_id):
541 545 regex = re.compile('^refs/heads')
542 546
543 547 def filter_with(ref):
544 548 return regex.match(ref[0]) and ref[1] == _commit_id
545 549
546 550 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
547 551 return [x[0].split('refs/heads/')[-1] for x in branches]
548 552
549 553 return _branch(context_uid, repo_id, commit_id)
550 554
551 555 @reraise_safe_exceptions
552 556 def commit_branches(self, wire, commit_id):
553 557 cache_on, context_uid, repo_id = self._cache_on(wire)
554 558 region = self._region(wire)
555 559
556 560 @region.conditional_cache_on_arguments(condition=cache_on)
557 561 def _commit_branches(_context_uid, _repo_id, _commit_id):
558 562 repo_init = self._factory.repo_libgit2(wire)
559 563 with repo_init as repo:
560 564 branches = [x for x in repo.branches.with_commit(_commit_id)]
561 565 return branches
562 566
563 567 return _commit_branches(context_uid, repo_id, commit_id)
564 568
565 569 @reraise_safe_exceptions
566 570 def add_object(self, wire, content):
567 571 repo_init = self._factory.repo_libgit2(wire)
568 572 with repo_init as repo:
569 573 blob = objects.Blob()
570 574 blob.set_raw_string(content)
571 575 repo.object_store.add_object(blob)
572 576 return blob.id
573 577
574 578 @reraise_safe_exceptions
575 579 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
576 580 date_args: list[int, int] = None,
577 581 parents: list | None = None):
578 582
579 583 repo_init = self._factory.repo_libgit2(wire)
580 584 with repo_init as repo:
581 585
582 586 if date_args:
583 587 current_time, offset = date_args
584 588
585 589 kw = {
586 590 'time': current_time,
587 591 'offset': offset
588 592 }
589 593 author = create_signature_from_string(author, **kw)
590 594 committer = create_signature_from_string(committer, **kw)
591 595
592 596 tree = new_tree_id
593 597 if isinstance(tree, (bytes, str)):
594 598 # validate this tree is in the repo...
595 599 tree = repo[safe_str(tree)].id
596 600
597 601 if parents:
598 602 # run via sha's and validate them in repo
599 603 parents = [repo[c].id for c in parents]
600 604 else:
601 605 parents = []
602 606 # ensure we COMMIT on top of given branch head
603 607 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
604 608 if branch in repo.branches.local:
605 609 parents += [repo.branches[branch].target]
606 610 elif [x for x in repo.branches.local]:
607 611 parents += [repo.head.target]
608 612 #else:
609 613 # in case we want to commit on new branch we create it on top of HEAD
610 614 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
611 615
612 616 # # Create a new commit
613 617 commit_oid = repo.create_commit(
614 618 f'refs/heads/{branch}', # the name of the reference to update
615 619 author, # the author of the commit
616 620 committer, # the committer of the commit
617 621 message, # the commit message
618 622 tree, # the tree produced by the index
619 623 parents # list of parents for the new commit, usually just one,
620 624 )
621 625
622 626 new_commit_id = safe_str(commit_oid)
623 627
624 628 return new_commit_id
625 629
626 630 @reraise_safe_exceptions
627 631 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
628 632
629 633 def mode2pygit(mode):
630 634 """
631 635 git only supports two filemode 644 and 755
632 636
633 637 0o100755 -> 33261
634 638 0o100644 -> 33188
635 639 """
636 640 return {
637 641 0o100644: pygit2.GIT_FILEMODE_BLOB,
638 642 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
639 643 0o120000: pygit2.GIT_FILEMODE_LINK
640 644 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
641 645
642 646 repo_init = self._factory.repo_libgit2(wire)
643 647 with repo_init as repo:
644 648 repo_index = repo.index
645 649
646 650 commit_parents = None
647 651 if commit_tree and commit_data['parents']:
648 652 commit_parents = commit_data['parents']
649 653 parent_commit = repo[commit_parents[0]]
650 654 repo_index.read_tree(parent_commit.tree)
651 655
652 656 for pathspec in updated:
653 657 blob_id = repo.create_blob(pathspec['content'])
654 658 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
655 659 repo_index.add(ie)
656 660
657 661 for pathspec in removed:
658 662 repo_index.remove(pathspec)
659 663
660 664 # Write changes to the index
661 665 repo_index.write()
662 666
663 667 # Create a tree from the updated index
664 668 written_commit_tree = repo_index.write_tree()
665 669
666 670 new_tree_id = written_commit_tree
667 671
668 672 author = commit_data['author']
669 673 committer = commit_data['committer']
670 674 message = commit_data['message']
671 675
672 676 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
673 677
674 678 new_commit_id = self.create_commit(wire, author, committer, message, branch,
675 679 new_tree_id, date_args=date_args, parents=commit_parents)
676 680
677 681 # libgit2, ensure the branch is there and exists
678 682 self.create_branch(wire, branch, new_commit_id)
679 683
680 684 # libgit2, set new ref to this created commit
681 685 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
682 686
683 687 return new_commit_id
684 688
685 689 @reraise_safe_exceptions
686 690 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
687 691 if url != 'default' and '://' not in url:
688 692 client = LocalGitClient(url)
689 693 else:
690 694 url_obj = url_parser(safe_bytes(url))
691 695 o = self._build_opener(url)
692 696 url = url_obj.authinfo()[0]
693 697 client = HttpGitClient(base_url=url, opener=o)
694 698 repo = self._factory.repo(wire)
695 699
696 700 determine_wants = repo.object_store.determine_wants_all
697 701
698 702 if refs:
699 703 refs: list[bytes] = [ascii_bytes(x) for x in refs]
700 704
701 705 def determine_wants_requested(_remote_refs):
702 706 determined = []
703 707 for ref_name, ref_hash in _remote_refs.items():
704 708 bytes_ref_name = safe_bytes(ref_name)
705 709
706 710 if bytes_ref_name in refs:
707 711 bytes_ref_hash = safe_bytes(ref_hash)
708 712 determined.append(bytes_ref_hash)
709 713 return determined
710 714
711 715 # swap with our custom requested wants
712 716 determine_wants = determine_wants_requested
713 717
714 718 try:
715 719 remote_refs = client.fetch(
716 720 path=url, target=repo, determine_wants=determine_wants)
717 721
718 722 except NotGitRepository as e:
719 723 log.warning(
720 724 'Trying to fetch from "%s" failed, not a Git repository.', url)
721 725 # Exception can contain unicode which we convert
722 726 raise exceptions.AbortException(e)(repr(e))
723 727
724 728 # mikhail: client.fetch() returns all the remote refs, but fetches only
725 729 # refs filtered by `determine_wants` function. We need to filter result
726 730 # as well
727 731 if refs:
728 732 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
729 733
730 734 if apply_refs:
731 735 # TODO: johbo: Needs proper test coverage with a git repository
732 736 # that contains a tag object, so that we would end up with
733 737 # a peeled ref at this point.
734 738 for k in remote_refs:
735 739 if k.endswith(PEELED_REF_MARKER):
736 740 log.debug("Skipping peeled reference %s", k)
737 741 continue
738 742 repo[k] = remote_refs[k]
739 743
740 744 if refs and not update_after:
741 745 # update to ref
742 746 # mikhail: explicitly set the head to the last ref.
743 747 update_to_ref = refs[-1]
744 748 if isinstance(update_after, str):
745 749 update_to_ref = update_after
746 750
747 751 repo[HEAD_MARKER] = remote_refs[update_to_ref]
748 752
749 753 if update_after:
750 754 # we want to check out HEAD
751 755 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
752 756 index.build_index_from_tree(repo.path, repo.index_path(),
753 757 repo.object_store, repo[HEAD_MARKER].tree)
754 758
755 759 if isinstance(remote_refs, FetchPackResult):
756 760 return remote_refs.refs
757 761 return remote_refs
758 762
759 763 @reraise_safe_exceptions
760 def sync_fetch(self, wire, url, refs=None, all_refs=False):
764 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
761 765 self._factory.repo(wire)
762 766 if refs and not isinstance(refs, (list, tuple)):
763 767 refs = [refs]
764 768
765 769 config = self._wire_to_config(wire)
766 770 # get all remote refs we'll use to fetch later
767 771 cmd = ['ls-remote']
768 772 if not all_refs:
769 773 cmd += ['--heads', '--tags']
770 774 cmd += [url]
771 775 output, __ = self.run_git_command(
772 776 wire, cmd, fail_on_stderr=False,
773 777 _copts=self._remote_conf(config),
774 778 extra_env={'GIT_TERMINAL_PROMPT': '0'})
775 779
776 780 remote_refs = collections.OrderedDict()
777 781 fetch_refs = []
778 782
779 783 for ref_line in output.splitlines():
780 784 sha, ref = ref_line.split(b'\t')
781 785 sha = sha.strip()
782 786 if ref in remote_refs:
783 787 # duplicate, skip
784 788 continue
785 789 if ref.endswith(PEELED_REF_MARKER):
786 790 log.debug("Skipping peeled reference %s", ref)
787 791 continue
788 792 # don't sync HEAD
789 793 if ref in [HEAD_MARKER]:
790 794 continue
791 795
792 796 remote_refs[ref] = sha
793 797
794 798 if refs and sha in refs:
795 799 # we filter fetch using our specified refs
796 800 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
797 801 elif not refs:
798 802 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
799 803 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
800 804
801 805 if fetch_refs:
802 806 for chunk in more_itertools.chunked(fetch_refs, 128):
803 807 fetch_refs_chunks = list(chunk)
804 808 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
805 809 self.run_git_command(
806 810 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
807 811 fail_on_stderr=False,
808 812 _copts=self._remote_conf(config),
809 813 extra_env={'GIT_TERMINAL_PROMPT': '0'})
814 if kwargs.get('sync_large_objects'):
815 self.run_git_command(
816 wire, ['lfs', 'fetch', url, '--all'],
817 fail_on_stderr=False,
818 _copts=self._remote_conf(config),
819 )
810 820
811 821 return remote_refs
812 822
813 823 @reraise_safe_exceptions
814 824 def sync_push(self, wire, url, refs=None):
815 825 if not self.check_url(url, wire):
816 826 return
817 827 config = self._wire_to_config(wire)
818 828 self._factory.repo(wire)
819 829 self.run_git_command(
820 830 wire, ['push', url, '--mirror'], fail_on_stderr=False,
821 831 _copts=self._remote_conf(config),
822 832 extra_env={'GIT_TERMINAL_PROMPT': '0'})
823 833
824 834 @reraise_safe_exceptions
825 835 def get_remote_refs(self, wire, url):
826 836 repo = Repo(url)
827 837 return repo.get_refs()
828 838
829 839 @reraise_safe_exceptions
830 840 def get_description(self, wire):
831 841 repo = self._factory.repo(wire)
832 842 return repo.get_description()
833 843
834 844 @reraise_safe_exceptions
835 845 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
836 846 origin_repo_path = wire['path']
837 847 repo = self._factory.repo(wire)
838 848 # fetch from other_repo_path to our origin repo
839 849 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
840 850
841 851 wire_remote = wire.copy()
842 852 wire_remote['path'] = other_repo_path
843 853 repo_remote = self._factory.repo(wire_remote)
844 854
845 855 # fetch from origin_repo_path to our remote repo
846 856 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
847 857
848 858 revs = [
849 859 x.commit.id
850 860 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
851 861 return revs
852 862
853 863 @reraise_safe_exceptions
854 864 def get_object(self, wire, sha, maybe_unreachable=False):
855 865 cache_on, context_uid, repo_id = self._cache_on(wire)
856 866 region = self._region(wire)
857 867
858 868 @region.conditional_cache_on_arguments(condition=cache_on)
859 869 def _get_object(_context_uid, _repo_id, _sha):
860 870 repo_init = self._factory.repo_libgit2(wire)
861 871 with repo_init as repo:
862 872
863 873 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
864 874 try:
865 875 commit = repo.revparse_single(sha)
866 876 except KeyError:
867 877 # NOTE(marcink): KeyError doesn't give us any meaningful information
868 878 # here, we instead give something more explicit
869 879 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
870 880 raise exceptions.LookupException(e)(missing_commit_err)
871 881 except ValueError as e:
872 882 raise exceptions.LookupException(e)(missing_commit_err)
873 883
874 884 is_tag = False
875 885 if isinstance(commit, pygit2.Tag):
876 886 commit = repo.get(commit.target)
877 887 is_tag = True
878 888
879 889 check_dangling = True
880 890 if is_tag:
881 891 check_dangling = False
882 892
883 893 if check_dangling and maybe_unreachable:
884 894 check_dangling = False
885 895
886 896 # we used a reference and it parsed means we're not having a dangling commit
887 897 if sha != commit.hex:
888 898 check_dangling = False
889 899
890 900 if check_dangling:
891 901 # check for dangling commit
892 902 for branch in repo.branches.with_commit(commit.hex):
893 903 if branch:
894 904 break
895 905 else:
896 906 # NOTE(marcink): Empty error doesn't give us any meaningful information
897 907 # here, we instead give something more explicit
898 908 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
899 909 raise exceptions.LookupException(e)(missing_commit_err)
900 910
901 911 commit_id = commit.hex
902 912 type_str = commit.type_str
903 913
904 914 return {
905 915 'id': commit_id,
906 916 'type': type_str,
907 917 'commit_id': commit_id,
908 918 'idx': 0
909 919 }
910 920
911 921 return _get_object(context_uid, repo_id, sha)
912 922
913 923 @reraise_safe_exceptions
914 924 def get_refs(self, wire):
915 925 cache_on, context_uid, repo_id = self._cache_on(wire)
916 926 region = self._region(wire)
917 927
918 928 @region.conditional_cache_on_arguments(condition=cache_on)
919 929 def _get_refs(_context_uid, _repo_id):
920 930
921 931 repo_init = self._factory.repo_libgit2(wire)
922 932 with repo_init as repo:
923 933 regex = re.compile('^refs/(heads|tags)/')
924 934 return {x.name: x.target.hex for x in
925 935 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
926 936
927 937 return _get_refs(context_uid, repo_id)
928 938
929 939 @reraise_safe_exceptions
930 940 def get_branch_pointers(self, wire):
931 941 cache_on, context_uid, repo_id = self._cache_on(wire)
932 942 region = self._region(wire)
933 943
934 944 @region.conditional_cache_on_arguments(condition=cache_on)
935 945 def _get_branch_pointers(_context_uid, _repo_id):
936 946
937 947 repo_init = self._factory.repo_libgit2(wire)
938 948 regex = re.compile('^refs/heads')
939 949 with repo_init as repo:
940 950 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
941 951 return {x.target.hex: x.shorthand for x in branches}
942 952
943 953 return _get_branch_pointers(context_uid, repo_id)
944 954
945 955 @reraise_safe_exceptions
946 956 def head(self, wire, show_exc=True):
947 957 cache_on, context_uid, repo_id = self._cache_on(wire)
948 958 region = self._region(wire)
949 959
950 960 @region.conditional_cache_on_arguments(condition=cache_on)
951 961 def _head(_context_uid, _repo_id, _show_exc):
952 962 repo_init = self._factory.repo_libgit2(wire)
953 963 with repo_init as repo:
954 964 try:
955 965 return repo.head.peel().hex
956 966 except Exception:
957 967 if show_exc:
958 968 raise
959 969 return _head(context_uid, repo_id, show_exc)
960 970
961 971 @reraise_safe_exceptions
962 972 def init(self, wire):
963 973 repo_path = safe_str(wire['path'])
964 974 os.makedirs(repo_path, mode=0o755)
965 975 pygit2.init_repository(repo_path, bare=False)
966 976
967 977 @reraise_safe_exceptions
968 978 def init_bare(self, wire):
969 979 repo_path = safe_str(wire['path'])
970 980 os.makedirs(repo_path, mode=0o755)
971 981 pygit2.init_repository(repo_path, bare=True)
972 982
973 983 @reraise_safe_exceptions
974 984 def revision(self, wire, rev):
975 985
976 986 cache_on, context_uid, repo_id = self._cache_on(wire)
977 987 region = self._region(wire)
978 988
979 989 @region.conditional_cache_on_arguments(condition=cache_on)
980 990 def _revision(_context_uid, _repo_id, _rev):
981 991 repo_init = self._factory.repo_libgit2(wire)
982 992 with repo_init as repo:
983 993 commit = repo[rev]
984 994 obj_data = {
985 995 'id': commit.id.hex,
986 996 }
987 997 # tree objects itself don't have tree_id attribute
988 998 if hasattr(commit, 'tree_id'):
989 999 obj_data['tree'] = commit.tree_id.hex
990 1000
991 1001 return obj_data
992 1002 return _revision(context_uid, repo_id, rev)
993 1003
994 1004 @reraise_safe_exceptions
995 1005 def date(self, wire, commit_id):
996 1006 cache_on, context_uid, repo_id = self._cache_on(wire)
997 1007 region = self._region(wire)
998 1008
999 1009 @region.conditional_cache_on_arguments(condition=cache_on)
1000 1010 def _date(_repo_id, _commit_id):
1001 1011 repo_init = self._factory.repo_libgit2(wire)
1002 1012 with repo_init as repo:
1003 1013 commit = repo[commit_id]
1004 1014
1005 1015 if hasattr(commit, 'commit_time'):
1006 1016 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1007 1017 else:
1008 1018 commit = commit.get_object()
1009 1019 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1010 1020
1011 1021 # TODO(marcink): check dulwich difference of offset vs timezone
1012 1022 return [commit_time, commit_time_offset]
1013 1023 return _date(repo_id, commit_id)
1014 1024
1015 1025 @reraise_safe_exceptions
1016 1026 def author(self, wire, commit_id):
1017 1027 cache_on, context_uid, repo_id = self._cache_on(wire)
1018 1028 region = self._region(wire)
1019 1029
1020 1030 @region.conditional_cache_on_arguments(condition=cache_on)
1021 1031 def _author(_repo_id, _commit_id):
1022 1032 repo_init = self._factory.repo_libgit2(wire)
1023 1033 with repo_init as repo:
1024 1034 commit = repo[commit_id]
1025 1035
1026 1036 if hasattr(commit, 'author'):
1027 1037 author = commit.author
1028 1038 else:
1029 1039 author = commit.get_object().author
1030 1040
1031 1041 if author.email:
1032 1042 return f"{author.name} <{author.email}>"
1033 1043
1034 1044 try:
1035 1045 return f"{author.name}"
1036 1046 except Exception:
1037 1047 return f"{safe_str(author.raw_name)}"
1038 1048
1039 1049 return _author(repo_id, commit_id)
1040 1050
1041 1051 @reraise_safe_exceptions
1042 1052 def message(self, wire, commit_id):
1043 1053 cache_on, context_uid, repo_id = self._cache_on(wire)
1044 1054 region = self._region(wire)
1045 1055
1046 1056 @region.conditional_cache_on_arguments(condition=cache_on)
1047 1057 def _message(_repo_id, _commit_id):
1048 1058 repo_init = self._factory.repo_libgit2(wire)
1049 1059 with repo_init as repo:
1050 1060 commit = repo[commit_id]
1051 1061 return commit.message
1052 1062 return _message(repo_id, commit_id)
1053 1063
1054 1064 @reraise_safe_exceptions
1055 1065 def parents(self, wire, commit_id):
1056 1066 cache_on, context_uid, repo_id = self._cache_on(wire)
1057 1067 region = self._region(wire)
1058 1068
1059 1069 @region.conditional_cache_on_arguments(condition=cache_on)
1060 1070 def _parents(_repo_id, _commit_id):
1061 1071 repo_init = self._factory.repo_libgit2(wire)
1062 1072 with repo_init as repo:
1063 1073 commit = repo[commit_id]
1064 1074 if hasattr(commit, 'parent_ids'):
1065 1075 parent_ids = commit.parent_ids
1066 1076 else:
1067 1077 parent_ids = commit.get_object().parent_ids
1068 1078
1069 1079 return [x.hex for x in parent_ids]
1070 1080 return _parents(repo_id, commit_id)
1071 1081
1072 1082 @reraise_safe_exceptions
1073 1083 def children(self, wire, commit_id):
1074 1084 cache_on, context_uid, repo_id = self._cache_on(wire)
1075 1085 region = self._region(wire)
1076 1086
1077 1087 head = self.head(wire)
1078 1088
1079 1089 @region.conditional_cache_on_arguments(condition=cache_on)
1080 1090 def _children(_repo_id, _commit_id):
1081 1091
1082 1092 output, __ = self.run_git_command(
1083 1093 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1084 1094
1085 1095 child_ids = []
1086 1096 pat = re.compile(fr'^{commit_id}')
1087 1097 for line in output.splitlines():
1088 1098 line = safe_str(line)
1089 1099 if pat.match(line):
1090 1100 found_ids = line.split(' ')[1:]
1091 1101 child_ids.extend(found_ids)
1092 1102 break
1093 1103
1094 1104 return child_ids
1095 1105 return _children(repo_id, commit_id)
1096 1106
1097 1107 @reraise_safe_exceptions
1098 1108 def set_refs(self, wire, key, value):
1099 1109 repo_init = self._factory.repo_libgit2(wire)
1100 1110 with repo_init as repo:
1101 1111 repo.references.create(key, value, force=True)
1102 1112
1103 1113 @reraise_safe_exceptions
1104 1114 def update_refs(self, wire, key, value):
1105 1115 repo_init = self._factory.repo_libgit2(wire)
1106 1116 with repo_init as repo:
1107 1117 if key not in repo.references:
1108 1118 raise ValueError(f'Reference {key} not found in the repository')
1109 1119 repo.references.create(key, value, force=True)
1110 1120
1111 1121 @reraise_safe_exceptions
1112 1122 def create_branch(self, wire, branch_name, commit_id, force=False):
1113 1123 repo_init = self._factory.repo_libgit2(wire)
1114 1124 with repo_init as repo:
1115 1125 if commit_id:
1116 1126 commit = repo[commit_id]
1117 1127 else:
1118 1128 # if commit is not given just use the HEAD
1119 1129 commit = repo.head()
1120 1130
1121 1131 if force:
1122 1132 repo.branches.local.create(branch_name, commit, force=force)
1123 1133 elif not repo.branches.get(branch_name):
1124 1134 # create only if that branch isn't existing
1125 1135 repo.branches.local.create(branch_name, commit, force=force)
1126 1136
1127 1137 @reraise_safe_exceptions
1128 1138 def remove_ref(self, wire, key):
1129 1139 repo_init = self._factory.repo_libgit2(wire)
1130 1140 with repo_init as repo:
1131 1141 repo.references.delete(key)
1132 1142
1133 1143 @reraise_safe_exceptions
1134 1144 def tag_remove(self, wire, tag_name):
1135 1145 repo_init = self._factory.repo_libgit2(wire)
1136 1146 with repo_init as repo:
1137 1147 key = f'refs/tags/{tag_name}'
1138 1148 repo.references.delete(key)
1139 1149
1140 1150 @reraise_safe_exceptions
1141 1151 def tree_changes(self, wire, source_id, target_id):
1142 1152 repo = self._factory.repo(wire)
1143 1153 # source can be empty
1144 1154 source_id = safe_bytes(source_id if source_id else b'')
1145 1155 target_id = safe_bytes(target_id)
1146 1156
1147 1157 source = repo[source_id].tree if source_id else None
1148 1158 target = repo[target_id].tree
1149 1159 result = repo.object_store.tree_changes(source, target)
1150 1160
1151 1161 added = set()
1152 1162 modified = set()
1153 1163 deleted = set()
1154 1164 for (old_path, new_path), (_, _), (_, _) in list(result):
1155 1165 if new_path and old_path:
1156 1166 modified.add(new_path)
1157 1167 elif new_path and not old_path:
1158 1168 added.add(new_path)
1159 1169 elif not new_path and old_path:
1160 1170 deleted.add(old_path)
1161 1171
1162 1172 return list(added), list(modified), list(deleted)
1163 1173
1164 1174 @reraise_safe_exceptions
1165 1175 def tree_and_type_for_path(self, wire, commit_id, path):
1166 1176
1167 1177 cache_on, context_uid, repo_id = self._cache_on(wire)
1168 1178 region = self._region(wire)
1169 1179
1170 1180 @region.conditional_cache_on_arguments(condition=cache_on)
1171 1181 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1172 1182 repo_init = self._factory.repo_libgit2(wire)
1173 1183
1174 1184 with repo_init as repo:
1175 1185 commit = repo[commit_id]
1176 1186 try:
1177 1187 tree = commit.tree[path]
1178 1188 except KeyError:
1179 1189 return None, None, None
1180 1190
1181 1191 return tree.id.hex, tree.type_str, tree.filemode
1182 1192 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1183 1193
1184 1194 @reraise_safe_exceptions
1185 1195 def tree_items(self, wire, tree_id):
1186 1196 cache_on, context_uid, repo_id = self._cache_on(wire)
1187 1197 region = self._region(wire)
1188 1198
1189 1199 @region.conditional_cache_on_arguments(condition=cache_on)
1190 1200 def _tree_items(_repo_id, _tree_id):
1191 1201
1192 1202 repo_init = self._factory.repo_libgit2(wire)
1193 1203 with repo_init as repo:
1194 1204 try:
1195 1205 tree = repo[tree_id]
1196 1206 except KeyError:
1197 1207 raise ObjectMissing(f'No tree with id: {tree_id}')
1198 1208
1199 1209 result = []
1200 1210 for item in tree:
1201 1211 item_sha = item.hex
1202 1212 item_mode = item.filemode
1203 1213 item_type = item.type_str
1204 1214
1205 1215 if item_type == 'commit':
1206 1216 # NOTE(marcink): submodules we translate to 'link' for backward compat
1207 1217 item_type = 'link'
1208 1218
1209 1219 result.append((item.name, item_mode, item_sha, item_type))
1210 1220 return result
1211 1221 return _tree_items(repo_id, tree_id)
1212 1222
1213 1223 @reraise_safe_exceptions
1214 1224 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1215 1225 """
1216 1226 Old version that uses subprocess to call diff
1217 1227 """
1218 1228
1219 1229 flags = [
1220 1230 f'-U{context}', '--patch',
1221 1231 '--binary',
1222 1232 '--find-renames',
1223 1233 '--no-indent-heuristic',
1224 1234 # '--indent-heuristic',
1225 1235 #'--full-index',
1226 1236 #'--abbrev=40'
1227 1237 ]
1228 1238
1229 1239 if opt_ignorews:
1230 1240 flags.append('--ignore-all-space')
1231 1241
1232 1242 if commit_id_1 == self.EMPTY_COMMIT:
1233 1243 cmd = ['show'] + flags + [commit_id_2]
1234 1244 else:
1235 1245 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1236 1246
1237 1247 if file_filter:
1238 1248 cmd.extend(['--', file_filter])
1239 1249
1240 1250 diff, __ = self.run_git_command(wire, cmd)
1241 1251 # If we used 'show' command, strip first few lines (until actual diff
1242 1252 # starts)
1243 1253 if commit_id_1 == self.EMPTY_COMMIT:
1244 1254 lines = diff.splitlines()
1245 1255 x = 0
1246 1256 for line in lines:
1247 1257 if line.startswith(b'diff'):
1248 1258 break
1249 1259 x += 1
1250 1260 # Append new line just like 'diff' command do
1251 1261 diff = '\n'.join(lines[x:]) + '\n'
1252 1262 return diff
1253 1263
1254 1264 @reraise_safe_exceptions
1255 1265 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1256 1266 repo_init = self._factory.repo_libgit2(wire)
1257 1267
1258 1268 with repo_init as repo:
1259 1269 swap = True
1260 1270 flags = 0
1261 1271 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1262 1272
1263 1273 if opt_ignorews:
1264 1274 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1265 1275
1266 1276 if commit_id_1 == self.EMPTY_COMMIT:
1267 1277 comm1 = repo[commit_id_2]
1268 1278 diff_obj = comm1.tree.diff_to_tree(
1269 1279 flags=flags, context_lines=context, swap=swap)
1270 1280
1271 1281 else:
1272 1282 comm1 = repo[commit_id_2]
1273 1283 comm2 = repo[commit_id_1]
1274 1284 diff_obj = comm1.tree.diff_to_tree(
1275 1285 comm2.tree, flags=flags, context_lines=context, swap=swap)
1276 1286 similar_flags = 0
1277 1287 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1278 1288 diff_obj.find_similar(flags=similar_flags)
1279 1289
1280 1290 if file_filter:
1281 1291 for p in diff_obj:
1282 1292 if p.delta.old_file.path == file_filter:
1283 1293 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1284 1294 # fo matching path == no diff
1285 1295 return BytesEnvelope(b'')
1286 1296
1287 1297 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1288 1298
1289 1299 @reraise_safe_exceptions
1290 1300 def node_history(self, wire, commit_id, path, limit):
1291 1301 cache_on, context_uid, repo_id = self._cache_on(wire)
1292 1302 region = self._region(wire)
1293 1303
1294 1304 @region.conditional_cache_on_arguments(condition=cache_on)
1295 1305 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1296 1306 # optimize for n==1, rev-list is much faster for that use-case
1297 1307 if limit == 1:
1298 1308 cmd = ['rev-list', '-1', commit_id, '--', path]
1299 1309 else:
1300 1310 cmd = ['log']
1301 1311 if limit:
1302 1312 cmd.extend(['-n', str(safe_int(limit, 0))])
1303 1313 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1304 1314
1305 1315 output, __ = self.run_git_command(wire, cmd)
1306 1316 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1307 1317
1308 1318 return [x for x in commit_ids]
1309 1319 return _node_history(context_uid, repo_id, commit_id, path, limit)
1310 1320
1311 1321 @reraise_safe_exceptions
1312 1322 def node_annotate_legacy(self, wire, commit_id, path):
1313 1323 # note: replaced by pygit2 implementation
1314 1324 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1315 1325 # -l ==> outputs long shas (and we need all 40 characters)
1316 1326 # --root ==> doesn't put '^' character for boundaries
1317 1327 # -r commit_id ==> blames for the given commit
1318 1328 output, __ = self.run_git_command(wire, cmd)
1319 1329
1320 1330 result = []
1321 1331 for i, blame_line in enumerate(output.splitlines()[:-1]):
1322 1332 line_no = i + 1
1323 1333 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1324 1334 result.append((line_no, blame_commit_id, line))
1325 1335
1326 1336 return result
1327 1337
1328 1338 @reraise_safe_exceptions
1329 1339 def node_annotate(self, wire, commit_id, path):
1330 1340
1331 1341 result_libgit = []
1332 1342 repo_init = self._factory.repo_libgit2(wire)
1333 1343 with repo_init as repo:
1334 1344 commit = repo[commit_id]
1335 1345 blame_obj = repo.blame(path, newest_commit=commit_id)
1336 1346 for i, line in enumerate(commit.tree[path].data.splitlines()):
1337 1347 line_no = i + 1
1338 1348 hunk = blame_obj.for_line(line_no)
1339 1349 blame_commit_id = hunk.final_commit_id.hex
1340 1350
1341 1351 result_libgit.append((line_no, blame_commit_id, line))
1342 1352
1343 1353 return BinaryEnvelope(result_libgit)
1344 1354
1345 1355 @reraise_safe_exceptions
1346 1356 def update_server_info(self, wire):
1347 1357 repo = self._factory.repo(wire)
1348 1358 update_server_info(repo)
1349 1359
1350 1360 @reraise_safe_exceptions
1351 1361 def get_all_commit_ids(self, wire):
1352 1362
1353 1363 cache_on, context_uid, repo_id = self._cache_on(wire)
1354 1364 region = self._region(wire)
1355 1365
1356 1366 @region.conditional_cache_on_arguments(condition=cache_on)
1357 1367 def _get_all_commit_ids(_context_uid, _repo_id):
1358 1368
1359 1369 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1360 1370 try:
1361 1371 output, __ = self.run_git_command(wire, cmd)
1362 1372 return output.splitlines()
1363 1373 except Exception:
1364 1374 # Can be raised for empty repositories
1365 1375 return []
1366 1376
1367 1377 @region.conditional_cache_on_arguments(condition=cache_on)
1368 1378 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1369 1379 repo_init = self._factory.repo_libgit2(wire)
1370 1380 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1371 1381 results = []
1372 1382 with repo_init as repo:
1373 1383 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1374 1384 results.append(commit.id.hex)
1375 1385
1376 1386 return _get_all_commit_ids(context_uid, repo_id)
1377 1387
1378 1388 @reraise_safe_exceptions
1379 1389 def run_git_command(self, wire, cmd, **opts):
1380 1390 path = wire.get('path', None)
1381 1391 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1382 1392
1383 1393 if path and os.path.isdir(path):
1384 1394 opts['cwd'] = path
1385 1395
1386 1396 if '_bare' in opts:
1387 1397 _copts = []
1388 1398 del opts['_bare']
1389 1399 else:
1390 1400 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1391 1401 safe_call = False
1392 1402 if '_safe' in opts:
1393 1403 # no exc on failure
1394 1404 del opts['_safe']
1395 1405 safe_call = True
1396 1406
1397 1407 if '_copts' in opts:
1398 1408 _copts.extend(opts['_copts'] or [])
1399 1409 del opts['_copts']
1400 1410
1401 1411 gitenv = os.environ.copy()
1402 1412 gitenv.update(opts.pop('extra_env', {}))
1403 1413 # need to clean fix GIT_DIR !
1404 1414 if 'GIT_DIR' in gitenv:
1405 1415 del gitenv['GIT_DIR']
1406 1416 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1407 1417 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1408 1418
1409 1419 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1410 1420 _opts = {'env': gitenv, 'shell': False}
1411 1421
1412 1422 proc = None
1413 1423 try:
1414 1424 _opts.update(opts)
1415 1425 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1416 1426
1417 1427 return b''.join(proc), b''.join(proc.stderr)
1418 1428 except OSError as err:
1419 1429 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1420 1430 call_opts = {}
1421 1431 if debug_mode:
1422 1432 call_opts = _opts
1423 1433
1424 1434 tb_err = ("Couldn't run git command ({}).\n"
1425 1435 "Original error was:{}\n"
1426 1436 "Call options:{}\n"
1427 1437 .format(cmd, err, call_opts))
1428 1438 log.exception(tb_err)
1429 1439 if safe_call:
1430 1440 return '', err
1431 1441 else:
1432 1442 raise exceptions.VcsException()(tb_err)
1433 1443 finally:
1434 1444 if proc:
1435 1445 proc.close()
1436 1446
1437 1447 @reraise_safe_exceptions
1438 1448 def install_hooks(self, wire, force=False):
1439 1449 from vcsserver.hook_utils import install_git_hooks
1440 1450 bare = self.bare(wire)
1441 1451 path = wire['path']
1442 1452 binary_dir = settings.BINARY_DIR
1443 1453 if binary_dir:
1444 1454 os.path.join(binary_dir, 'python3')
1445 1455 return install_git_hooks(path, bare, force_create=force)
1446 1456
1447 1457 @reraise_safe_exceptions
1448 1458 def get_hooks_info(self, wire):
1449 1459 from vcsserver.hook_utils import (
1450 1460 get_git_pre_hook_version, get_git_post_hook_version)
1451 1461 bare = self.bare(wire)
1452 1462 path = wire['path']
1453 1463 return {
1454 1464 'pre_version': get_git_pre_hook_version(path, bare),
1455 1465 'post_version': get_git_post_hook_version(path, bare),
1456 1466 }
1457 1467
1458 1468 @reraise_safe_exceptions
1459 1469 def set_head_ref(self, wire, head_name):
1460 1470 log.debug('Setting refs/head to `%s`', head_name)
1461 1471 repo_init = self._factory.repo_libgit2(wire)
1462 1472 with repo_init as repo:
1463 1473 repo.set_head(f'refs/heads/{head_name}')
1464 1474
1465 1475 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1466 1476
1467 1477 @reraise_safe_exceptions
1468 1478 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1469 1479 archive_dir_name, commit_id, cache_config):
1470 1480
1471 1481 def file_walker(_commit_id, path):
1472 1482 repo_init = self._factory.repo_libgit2(wire)
1473 1483
1474 1484 with repo_init as repo:
1475 1485 commit = repo[commit_id]
1476 1486
1477 1487 if path in ['', '/']:
1478 1488 tree = commit.tree
1479 1489 else:
1480 1490 tree = commit.tree[path.rstrip('/')]
1481 1491 tree_id = tree.id.hex
1482 1492 try:
1483 1493 tree = repo[tree_id]
1484 1494 except KeyError:
1485 1495 raise ObjectMissing(f'No tree with id: {tree_id}')
1486 1496
1487 1497 index = LibGit2Index.Index()
1488 1498 index.read_tree(tree)
1489 1499 file_iter = index
1490 1500
1491 1501 for file_node in file_iter:
1492 1502 file_path = file_node.path
1493 1503 mode = file_node.mode
1494 1504 is_link = stat.S_ISLNK(mode)
1495 1505 if mode == pygit2.GIT_FILEMODE_COMMIT:
1496 1506 log.debug('Skipping path %s as a commit node', file_path)
1497 1507 continue
1498 1508 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1499 1509
1500 1510 return store_archive_in_cache(
1501 1511 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -1,255 +1,255 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import logging
20 20 import itertools
21 21
22 22 import mercurial
23 23 import mercurial.error
24 24 import mercurial.wireprotoserver
25 25 import mercurial.hgweb.common
26 26 import mercurial.hgweb.hgweb_mod
27 27 import webob.exc
28 28
29 29 from vcsserver import pygrack, exceptions, settings, git_lfs
30 30 from vcsserver.str_utils import ascii_bytes, safe_bytes
31 31
32 32 log = logging.getLogger(__name__)
33 33
34 34
35 35 # propagated from mercurial documentation
36 36 HG_UI_SECTIONS = [
37 37 'alias', 'auth', 'decode/encode', 'defaults', 'diff', 'email', 'extensions',
38 38 'format', 'merge-patterns', 'merge-tools', 'hooks', 'http_proxy', 'smtp',
39 39 'patch', 'paths', 'profiling', 'server', 'trusted', 'ui', 'web',
40 40 ]
41 41
42 42
43 43 class HgWeb(mercurial.hgweb.hgweb_mod.hgweb):
44 44 """Extension of hgweb that simplifies some functions."""
45 45
46 46 def _get_view(self, repo):
47 47 """Views are not supported."""
48 48 return repo
49 49
50 50 def loadsubweb(self):
51 51 """The result is only used in the templater method which is not used."""
52 52 return None
53 53
54 54 def run(self):
55 55 """Unused function so raise an exception if accidentally called."""
56 56 raise NotImplementedError
57 57
58 58 def templater(self, req):
59 59 """Function used in an unreachable code path.
60 60
61 61 This code is unreachable because we guarantee that the HTTP request,
62 62 corresponds to a Mercurial command. See the is_hg method. So, we are
63 63 never going to get a user-visible url.
64 64 """
65 65 raise NotImplementedError
66 66
67 67 def archivelist(self, nodeid):
68 68 """Unused function so raise an exception if accidentally called."""
69 69 raise NotImplementedError
70 70
71 71 def __call__(self, environ, start_response):
72 72 """Run the WSGI application.
73 73
74 74 This may be called by multiple threads.
75 75 """
76 76 from mercurial.hgweb import request as requestmod
77 77 req = requestmod.parserequestfromenv(environ)
78 78 res = requestmod.wsgiresponse(req, start_response)
79 79 gen = self.run_wsgi(req, res)
80 80
81 81 first_chunk = None
82 82
83 83 try:
84 84 data = next(gen)
85 85
86 86 def first_chunk():
87 87 yield data
88 88 except StopIteration:
89 89 pass
90 90
91 91 if first_chunk:
92 92 return itertools.chain(first_chunk(), gen)
93 93 return gen
94 94
95 95 def _runwsgi(self, req, res, repo):
96 96
97 97 cmd = req.qsparams.get(b'cmd', '')
98 98 if not mercurial.wireprotoserver.iscmd(cmd):
99 99 # NOTE(marcink): for unsupported commands, we return bad request
100 100 # internally from HG
101 101 log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
102 102 from mercurial.hgweb.common import statusmessage
103 103 res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
104 104 res.setbodybytes(b'')
105 105 return res.sendresponse()
106 106
107 107 return super()._runwsgi(req, res, repo)
108 108
109 109
110 110 def sanitize_hg_ui(baseui):
111 111 # NOTE(marcink): since python3 hgsubversion is deprecated.
112 112 # From old installations we might still have this set enabled
113 113 # we explicitly remove this now here to make sure it wont propagate further
114 114
115 115 if baseui.config(b'extensions', b'hgsubversion') is not None:
116 116 for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg):
117 117 if b'extensions' in cfg:
118 118 if b'hgsubversion' in cfg[b'extensions']:
119 119 del cfg[b'extensions'][b'hgsubversion']
120 120
121 121
122 122 def make_hg_ui_from_config(repo_config):
123 123 baseui = mercurial.ui.ui()
124 124
125 125 # clean the baseui object
126 126 baseui._ocfg = mercurial.config.config()
127 127 baseui._ucfg = mercurial.config.config()
128 128 baseui._tcfg = mercurial.config.config()
129 129
130 130 for section, option, value in repo_config:
131 131 baseui.setconfig(
132 132 ascii_bytes(section, allow_bytes=True),
133 133 ascii_bytes(option, allow_bytes=True),
134 134 ascii_bytes(value, allow_bytes=True))
135 135
136 136 # make our hgweb quiet so it doesn't print output
137 137 baseui.setconfig(b'ui', b'quiet', b'true')
138 138
139 139 return baseui
140 140
141 141
142 142 def update_hg_ui_from_hgrc(baseui, repo_path):
143 143 path = os.path.join(repo_path, '.hg', 'hgrc')
144 144
145 145 if not os.path.isfile(path):
146 146 log.debug('hgrc file is not present at %s, skipping...', path)
147 147 return
148 148 log.debug('reading hgrc from %s', path)
149 149 cfg = mercurial.config.config()
150 150 cfg.read(ascii_bytes(path))
151 151 for section in HG_UI_SECTIONS:
152 152 for k, v in cfg.items(section):
153 153 log.debug('settings ui from file: [%s] %s=%s', section, k, v)
154 154 baseui.setconfig(
155 155 ascii_bytes(section, allow_bytes=True),
156 156 ascii_bytes(k, allow_bytes=True),
157 157 ascii_bytes(v, allow_bytes=True))
158 158
159 159
160 160 def create_hg_wsgi_app(repo_path, repo_name, config):
161 161 """
162 162 Prepares a WSGI application to handle Mercurial requests.
163 163
164 164 :param config: is a list of 3-item tuples representing a ConfigObject
165 165 (it is the serialized version of the config object).
166 166 """
167 167 log.debug("Creating Mercurial WSGI application")
168 168
169 169 baseui = make_hg_ui_from_config(config)
170 170 update_hg_ui_from_hgrc(baseui, repo_path)
171 171 sanitize_hg_ui(baseui)
172 172
173 173 try:
174 174 return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
175 175 except mercurial.error.RequirementError as e:
176 176 raise exceptions.RequirementException(e)(e)
177 177
178 178
179 179 class GitHandler:
180 180 """
181 181 Handler for Git operations like push/pull etc
182 182 """
183 183 def __init__(self, repo_location, repo_name, git_path, update_server_info,
184 184 extras):
185 185 if not os.path.isdir(repo_location):
186 186 raise OSError(repo_location)
187 187 self.content_path = repo_location
188 188 self.repo_name = repo_name
189 189 self.repo_location = repo_location
190 190 self.extras = extras
191 191 self.git_path = git_path
192 192 self.update_server_info = update_server_info
193 193
194 194 def __call__(self, environ, start_response):
195 195 app = webob.exc.HTTPNotFound()
196 196 candidate_paths = (
197 197 self.content_path, os.path.join(self.content_path, '.git'))
198 198
199 199 for content_path in candidate_paths:
200 200 try:
201 201 app = pygrack.GitRepository(
202 202 self.repo_name, content_path, self.git_path,
203 203 self.update_server_info, self.extras)
204 204 break
205 205 except OSError:
206 206 continue
207 207
208 208 return app(environ, start_response)
209 209
210 210
211 211 def create_git_wsgi_app(repo_path, repo_name, config):
212 212 """
213 213 Creates a WSGI application to handle Git requests.
214 214
215 215 :param config: is a dictionary holding the extras.
216 216 """
217 217 git_path = settings.GIT_EXECUTABLE
218 218 update_server_info = config.pop('git_update_server_info')
219 219 app = GitHandler(
220 220 repo_path, repo_name, git_path, update_server_info, config)
221 221
222 222 return app
223 223
224 224
225 225 class GitLFSHandler:
226 226 """
227 227 Handler for Git LFS operations
228 228 """
229 229
230 230 def __init__(self, repo_location, repo_name, git_path, update_server_info,
231 231 extras):
232 232 if not os.path.isdir(repo_location):
233 233 raise OSError(repo_location)
234 234 self.content_path = repo_location
235 235 self.repo_name = repo_name
236 236 self.repo_location = repo_location
237 237 self.extras = extras
238 238 self.git_path = git_path
239 239 self.update_server_info = update_server_info
240 240
241 241 def get_app(self, git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
242 242 app = git_lfs.create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
243 243 return app
244 244
245 245
246 246 def create_git_lfs_wsgi_app(repo_path, repo_name, config):
247 247 git_path = settings.GIT_EXECUTABLE
248 update_server_info = config.pop(b'git_update_server_info')
249 git_lfs_enabled = config.pop(b'git_lfs_enabled')
250 git_lfs_store_path = config.pop(b'git_lfs_store_path')
251 git_lfs_http_scheme = config.pop(b'git_lfs_http_scheme', 'http')
248 update_server_info = config.pop('git_update_server_info')
249 git_lfs_enabled = config.pop('git_lfs_enabled')
250 git_lfs_store_path = config.pop('git_lfs_store_path')
251 git_lfs_http_scheme = config.pop('git_lfs_http_scheme', 'http')
252 252 app = GitLFSHandler(
253 253 repo_path, repo_name, git_path, update_server_info, config)
254 254
255 255 return app.get_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
General Comments 0
You need to be logged in to leave comments. Login now