##// END OF EJS Templates
merge: Resolved conflicts
andverb -
r1266:6139e442 merge v5.1.0 stable
parent child Browse files
Show More
@@ -0,0 +1,79 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import logging
20
21 from .backends.fanout_cache import FileSystemFanoutCache
22 from .backends.objectstore_cache import ObjectStoreCache
23
24 from .utils import archive_iterator # noqa
25 from .lock import ArchiveCacheGenerationLock # noqa
26
27 log = logging.getLogger(__name__)
28
29
30 cache_meta = None
31
32
33 def includeme(config):
34 return # vcsserver gets its config from rhodecode on a remote call
35 # init our cache at start
36 settings = config.get_settings()
37 get_archival_cache_store(settings)
38
39
40 def get_archival_config(config):
41
42 final_config = {
43
44 }
45
46 for k, v in config.items():
47 if k.startswith('archive_cache'):
48 final_config[k] = v
49
50 return final_config
51
52
53 def get_archival_cache_store(config, always_init=False):
54
55 global cache_meta
56 if cache_meta is not None and not always_init:
57 return cache_meta
58
59 config = get_archival_config(config)
60 backend = config['archive_cache.backend.type']
61
62 archive_cache_locking_url = config['archive_cache.locking.url']
63
64 match backend:
65 case 'filesystem':
66 d_cache = FileSystemFanoutCache(
67 locking_url=archive_cache_locking_url,
68 **config
69 )
70 case 'objectstore':
71 d_cache = ObjectStoreCache(
72 locking_url=archive_cache_locking_url,
73 **config
74 )
75 case _:
76 raise ValueError(f'archive_cache.backend.type only supports "filesystem" or "objectstore" got {backend} ')
77
78 cache_meta = d_cache
79 return cache_meta
@@ -0,0 +1,17 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
@@ -0,0 +1,372 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import os
20 import functools
21 import logging
22 import typing
23 import time
24 import zlib
25
26 from ...ext_json import json
27 from ..utils import StatsDB, NOT_GIVEN, ShardFileReader, EVICTION_POLICY, format_size
28 from ..lock import GenerationLock
29
30 log = logging.getLogger(__name__)
31
32
33 class BaseShard:
34 storage_type: str = ''
35 fs = None
36
37 @classmethod
38 def hash(cls, key):
39 """Compute portable hash for `key`.
40
41 :param key: key to hash
42 :return: hash value
43
44 """
45 mask = 0xFFFFFFFF
46 return zlib.adler32(key.encode('utf-8')) & mask # noqa
47
48 def _write_file(self, full_path, read_iterator, mode):
49 raise NotImplementedError
50
51 def _get_keyfile(self, key):
52 raise NotImplementedError
53
54 def random_filename(self):
55 raise NotImplementedError
56
57 def store(self, *args, **kwargs):
58 raise NotImplementedError
59
60 def _store(self, key, value_reader, metadata, mode):
61 (filename, # hash-name
62 full_path # full-path/hash-name
63 ) = self.random_filename()
64
65 key_file, key_file_path = self._get_keyfile(key)
66
67 # STORE METADATA
68 _metadata = {
69 "version": "v1",
70
71 "key_file": key_file, # this is the .key.json file storing meta
72 "key_file_path": key_file_path, # full path to key_file
73 "archive_key": key, # original name we stored archive under, e.g my-archive.zip
74 "archive_filename": filename, # the actual filename we stored that file under
75 "archive_full_path": full_path,
76
77 "store_time": time.time(),
78 "access_count": 0,
79 "access_time": 0,
80
81 "size": 0
82 }
83 if metadata:
84 _metadata.update(metadata)
85
86 read_iterator = iter(functools.partial(value_reader.read, 2**22), b'')
87 size, sha256 = self._write_file(full_path, read_iterator, mode)
88 _metadata['size'] = size
89 _metadata['sha256'] = sha256
90
91 # after archive is finished, we create a key to save the presence of the binary file
92 with self.fs.open(key_file_path, 'wb') as f:
93 f.write(json.dumps(_metadata))
94
95 return key, filename, size, _metadata
96
97 def fetch(self, *args, **kwargs):
98 raise NotImplementedError
99
100 def _fetch(self, key, retry, retry_attempts, retry_backoff,
101 presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
102 if retry is NOT_GIVEN:
103 retry = False
104 if retry_attempts is NOT_GIVEN:
105 retry_attempts = 0
106
107 if retry and retry_attempts > 0:
108 for attempt in range(1, retry_attempts + 1):
109 if key in self:
110 break
111 # we didn't find the key, wait retry_backoff N seconds, and re-check
112 time.sleep(retry_backoff)
113
114 if key not in self:
115 log.exception(f'requested key={key} not found in {self} retry={retry}, attempts={retry_attempts}')
116 raise KeyError(key)
117
118 key_file, key_file_path = self._get_keyfile(key)
119 with self.fs.open(key_file_path, 'rb') as f:
120 metadata = json.loads(f.read())
121
122 archive_path = metadata['archive_full_path']
123 if presigned_url_expires and presigned_url_expires > 0:
124 metadata['url'] = self.fs.url(archive_path, expires=presigned_url_expires)
125
126 try:
127 return ShardFileReader(self.fs.open(archive_path, 'rb')), metadata
128 finally:
129 # update usage stats, count and accessed
130 metadata["access_count"] = metadata.get("access_count", 0) + 1
131 metadata["access_time"] = time.time()
132 log.debug('Updated %s with access snapshot, access_count=%s access_time=%s',
133 key_file, metadata['access_count'], metadata['access_time'])
134 with self.fs.open(key_file_path, 'wb') as f:
135 f.write(json.dumps(metadata))
136
137 def remove(self, *args, **kwargs):
138 raise NotImplementedError
139
140 def _remove(self, key):
141 if key not in self:
142 log.exception(f'requested key={key} not found in {self}')
143 raise KeyError(key)
144
145 key_file, key_file_path = self._get_keyfile(key)
146 with self.fs.open(key_file_path, 'rb') as f:
147 metadata = json.loads(f.read())
148
149 archive_path = metadata['archive_full_path']
150 self.fs.rm(archive_path)
151 self.fs.rm(key_file_path)
152 return 1
153
154 @property
155 def storage_medium(self):
156 return getattr(self, self.storage_type)
157
158 @property
159 def key_suffix(self):
160 return 'key.json'
161
162 def __contains__(self, key):
163 """Return `True` if `key` matching item is found in cache.
164
165 :param key: key matching item
166 :return: True if key matching item
167
168 """
169 key_file, key_file_path = self._get_keyfile(key)
170 return self.fs.exists(key_file_path)
171
172
173 class BaseCache:
174 _locking_url: str = ''
175 _storage_path: str = ''
176 _config: dict = {}
177 retry = False
178 retry_attempts: int = 0
179 retry_backoff: int | float = 1
180 _shards = tuple()
181 shard_cls = BaseShard
182 # define the presigned url expiration, 0 == disabled
183 presigned_url_expires: int = 0
184
185 def __contains__(self, key):
186 """Return `True` if `key` matching item is found in cache.
187
188 :param key: key matching item
189 :return: True if key matching item
190
191 """
192 return self.has_key(key)
193
194 def __repr__(self):
195 return f'<{self.__class__.__name__}(storage={self._storage_path})>'
196
197 @classmethod
198 def gb_to_bytes(cls, gb):
199 return gb * (1024 ** 3)
200
201 @property
202 def storage_path(self):
203 return self._storage_path
204
205 @classmethod
206 def get_stats_db(cls):
207 return StatsDB()
208
209 def get_conf(self, key, pop=False):
210 if key not in self._config:
211 raise ValueError(f"No configuration key '{key}', please make sure it exists in archive_cache config")
212 val = self._config[key]
213 if pop:
214 del self._config[key]
215 return val
216
217 def _get_shard(self, key) -> shard_cls:
218 index = self._hash(key) % self._shard_count
219 shard = self._shards[index]
220 return shard
221
222 def _get_size(self, shard, archive_path):
223 raise NotImplementedError
224
225 def store(self, key, value_reader, metadata=None):
226 shard = self._get_shard(key)
227 return shard.store(key, value_reader, metadata)
228
229 def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN) -> tuple[typing.BinaryIO, dict]:
230 """
231 Return file handle corresponding to `key` from specific shard cache.
232 """
233 if retry is NOT_GIVEN:
234 retry = self.retry
235 if retry_attempts is NOT_GIVEN:
236 retry_attempts = self.retry_attempts
237 retry_backoff = self.retry_backoff
238 presigned_url_expires = self.presigned_url_expires
239
240 shard = self._get_shard(key)
241 return shard.fetch(key, retry=retry,
242 retry_attempts=retry_attempts,
243 retry_backoff=retry_backoff,
244 presigned_url_expires=presigned_url_expires)
245
246 def remove(self, key):
247 shard = self._get_shard(key)
248 return shard.remove(key)
249
250 def has_key(self, archive_key):
251 """Return `True` if `key` matching item is found in cache.
252
253 :param archive_key: key for item, this is a unique archive name we want to store data under. e.g my-archive-svn.zip
254 :return: True if key is found
255
256 """
257 shard = self._get_shard(archive_key)
258 return archive_key in shard
259
260 def iter_keys(self):
261 for shard in self._shards:
262 if shard.fs.exists(shard.storage_medium):
263 for path, _dirs, _files in shard.fs.walk(shard.storage_medium):
264 for key_file_path in _files:
265 if key_file_path.endswith(shard.key_suffix):
266 yield shard, key_file_path
267
268 def get_lock(self, lock_key):
269 return GenerationLock(lock_key, self._locking_url)
270
271 def evict(self, policy=None, size_limit=None) -> dict:
272 """
273 Remove old items based on the conditions
274
275
276 explanation of this algo:
277 iterate over each shard, then for each shard iterate over the .key files
278 read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
279 access data, time creation, and access counts.
280
281 Store that into a memory DB in order we can run different sorting strategies easily.
282 Summing the size is a sum sql query.
283
284 Then we run a sorting strategy based on eviction policy.
285 We iterate over sorted keys, and remove each checking if we hit the overall limit.
286 """
287 removal_info = {
288 "removed_items": 0,
289 "removed_size": 0
290 }
291 policy = policy or self._eviction_policy
292 size_limit = size_limit or self._cache_size_limit
293
294 select_policy = EVICTION_POLICY[policy]['evict']
295
296 log.debug('Running eviction policy \'%s\', and checking for size limit: %s',
297 policy, format_size(size_limit))
298
299 if select_policy is None:
300 return removal_info
301
302 db = self.get_stats_db()
303
304 data = []
305 cnt = 1
306
307 for shard, key_file in self.iter_keys():
308 with shard.fs.open(os.path.join(shard.storage_medium, key_file), 'rb') as f:
309 metadata = json.loads(f.read())
310
311 key_file_path = os.path.join(shard.storage_medium, key_file)
312
313 archive_key = metadata['archive_key']
314 archive_path = metadata['archive_full_path']
315
316 size = metadata.get('size')
317 if not size:
318 # in case we don't have size re-calc it...
319 size = self._get_size(shard, archive_path)
320
321 data.append([
322 cnt,
323 key_file,
324 key_file_path,
325 archive_key,
326 archive_path,
327 metadata.get('store_time', 0),
328 metadata.get('access_time', 0),
329 metadata.get('access_count', 0),
330 size,
331 ])
332 cnt += 1
333
334 # Insert bulk data using executemany
335 db.bulk_insert(data)
336
337 total_size = db.get_total_size()
338 log.debug('Analyzed %s keys, occupying: %s, running eviction to match %s',
339 len(data), format_size(total_size), format_size(size_limit))
340
341 removed_items = 0
342 removed_size = 0
343 for key_file, archive_key, size in db.get_sorted_keys(select_policy):
344 # simulate removal impact BEFORE removal
345 total_size -= size
346
347 if total_size <= size_limit:
348 # we obtained what we wanted...
349 break
350
351 self.remove(archive_key)
352 removed_items += 1
353 removed_size += size
354 removal_info['removed_items'] = removed_items
355 removal_info['removed_size'] = removed_size
356 log.debug('Removed %s cache archives, and reduced size by: %s',
357 removed_items, format_size(removed_size))
358 return removal_info
359
360 def get_statistics(self):
361 total_files = 0
362 total_size = 0
363 meta = {}
364
365 for shard, key_file in self.iter_keys():
366 json_key = f"{shard.storage_medium}/{key_file}"
367 with shard.fs.open(json_key, 'rb') as f:
368 total_files += 1
369 metadata = json.loads(f.read())
370 total_size += metadata['size']
371
372 return total_files, total_size, meta
@@ -0,0 +1,177 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import codecs
20 import hashlib
21 import logging
22 import os
23 import typing
24
25 import fsspec
26
27 from .base import BaseCache, BaseShard
28 from ..utils import ShardFileReader, NOT_GIVEN
29 from ...type_utils import str2bool
30
31 log = logging.getLogger(__name__)
32
33
34 class FileSystemShard(BaseShard):
35
36 def __init__(self, index, directory, directory_folder, fs, **settings):
37 self._index: int = index
38 self._directory: str = directory
39 self._directory_folder: str = directory_folder
40 self.storage_type: str = 'directory'
41
42 self.fs = fs
43
44 @property
45 def directory(self) -> str:
46 """Cache directory final path."""
47 return os.path.join(self._directory, self._directory_folder)
48
49 def _get_keyfile(self, archive_key) -> tuple[str, str]:
50 key_file: str = f'{archive_key}.{self.key_suffix}'
51 return key_file, os.path.join(self.directory, key_file)
52
53 def _get_writer(self, path, mode):
54 for count in range(1, 11):
55 try:
56 # Another cache may have deleted the directory before
57 # the file could be opened.
58 return self.fs.open(path, mode)
59 except OSError:
60 if count == 10:
61 # Give up after 10 tries to open the file.
62 raise
63 continue
64
65 def _write_file(self, full_path, iterator, mode):
66
67 # ensure dir exists
68 destination, _ = os.path.split(full_path)
69 if not self.fs.exists(destination):
70 self.fs.makedirs(destination)
71
72 writer = self._get_writer(full_path, mode)
73
74 digest = hashlib.sha256()
75 with writer:
76 size = 0
77 for chunk in iterator:
78 size += len(chunk)
79 digest.update(chunk)
80 writer.write(chunk)
81 writer.flush()
82 # Get the file descriptor
83 fd = writer.fileno()
84
85 # Sync the file descriptor to disk, helps with NFS cases...
86 os.fsync(fd)
87 sha256 = digest.hexdigest()
88 log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
89 return size, sha256
90
91 def store(self, key, value_reader, metadata: dict | None = None):
92 return self._store(key, value_reader, metadata, mode='xb')
93
94 def fetch(self, key, retry=NOT_GIVEN,
95 retry_attempts=NOT_GIVEN, retry_backoff=1, **kwargs) -> tuple[ShardFileReader, dict]:
96 return self._fetch(key, retry, retry_attempts, retry_backoff)
97
98 def remove(self, key):
99 return self._remove(key)
100
101 def random_filename(self):
102 """Return filename and full-path tuple for file storage.
103
104 Filename will be a randomly generated 28 character hexadecimal string
105 with ".archive_cache" suffixed. Two levels of sub-directories will be used to
106 reduce the size of directories. On older filesystems, lookups in
107 directories with many files may be slow.
108 """
109
110 hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
111
112 archive_name = hex_name[4:] + '.archive_cache'
113 filename = f"{hex_name[:2]}/{hex_name[2:4]}/{archive_name}"
114
115 full_path = os.path.join(self.directory, filename)
116 return archive_name, full_path
117
118 def __repr__(self):
119 return f'{self.__class__.__name__}(index={self._index}, dir={self.directory})'
120
121
122 class FileSystemFanoutCache(BaseCache):
123 shard_name: str = 'shard_{:03d}'
124 shard_cls = FileSystemShard
125
126 def __init__(self, locking_url, **settings):
127 """
128 Initialize file system cache instance.
129
130 :param str locking_url: redis url for a lock
131 :param settings: settings dict
132
133 """
134 self._locking_url = locking_url
135 self._config = settings
136 cache_dir = self.get_conf('archive_cache.filesystem.store_dir')
137 directory = str(cache_dir)
138 directory = os.path.expanduser(directory)
139 directory = os.path.expandvars(directory)
140 self._directory = directory
141 self._storage_path = directory # common path for all from BaseCache
142
143 self._shard_count = int(self.get_conf('archive_cache.filesystem.cache_shards', pop=True))
144 if self._shard_count < 1:
145 raise ValueError('cache_shards must be 1 or more')
146
147 self._eviction_policy = self.get_conf('archive_cache.filesystem.eviction_policy', pop=True)
148 self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.filesystem.cache_size_gb')))
149
150 self.retry = str2bool(self.get_conf('archive_cache.filesystem.retry', pop=True))
151 self.retry_attempts = int(self.get_conf('archive_cache.filesystem.retry_attempts', pop=True))
152 self.retry_backoff = int(self.get_conf('archive_cache.filesystem.retry_backoff', pop=True))
153
154 log.debug('Initializing %s archival cache instance', self)
155 fs = fsspec.filesystem('file')
156 # check if it's ok to write, and re-create the archive cache main dir
157 # A directory is the virtual equivalent of a physical file cabinet.
158 # In other words, it's a container for organizing digital data.
159 # Unlike a folder, which can only store files, a directory can store files,
160 # subdirectories, and other directories.
161 if not fs.exists(self._directory):
162 fs.makedirs(self._directory, exist_ok=True)
163
164 self._shards = tuple(
165 self.shard_cls(
166 index=num,
167 directory=directory,
168 directory_folder=self.shard_name.format(num),
169 fs=fs,
170 **settings,
171 )
172 for num in range(self._shard_count)
173 )
174 self._hash = self._shards[0].hash
175
176 def _get_size(self, shard, archive_path):
177 return os.stat(archive_path).st_size
@@ -0,0 +1,173 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import codecs
20 import hashlib
21 import logging
22 import os
23 import typing
24
25 import fsspec
26
27 from .base import BaseCache, BaseShard
28 from ..utils import ShardFileReader, NOT_GIVEN
29 from ...type_utils import str2bool
30
31 log = logging.getLogger(__name__)
32
33
34 class S3Shard(BaseShard):
35
36 def __init__(self, index, bucket, bucket_folder, fs, **settings):
37 self._index: int = index
38 self._bucket_folder: str = bucket_folder
39 self.storage_type: str = 'bucket'
40 self._bucket_main: str = bucket
41
42 self.fs = fs
43
44 @property
45 def bucket(self) -> str:
46 """Cache bucket final path."""
47 return os.path.join(self._bucket_main, self._bucket_folder)
48
49 def _get_keyfile(self, archive_key) -> tuple[str, str]:
50 key_file: str = f'{archive_key}-{self.key_suffix}'
51 return key_file, os.path.join(self.bucket, key_file)
52
53 def _get_writer(self, path, mode):
54 return self.fs.open(path, 'wb')
55
56 def _write_file(self, full_path, iterator, mode):
57
58 # ensure folder in bucket exists
59 destination = self.bucket
60 if not self.fs.exists(destination):
61 self.fs.mkdir(destination, s3_additional_kwargs={})
62
63 writer = self._get_writer(full_path, mode)
64
65 digest = hashlib.sha256()
66 with writer:
67 size = 0
68 for chunk in iterator:
69 size += len(chunk)
70 digest.update(chunk)
71 writer.write(chunk)
72
73 sha256 = digest.hexdigest()
74 log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
75 return size, sha256
76
77 def store(self, key, value_reader, metadata: dict | None = None):
78 return self._store(key, value_reader, metadata, mode='wb')
79
80 def fetch(self, key, retry=NOT_GIVEN,
81 retry_attempts=NOT_GIVEN, retry_backoff=1,
82 presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
83 return self._fetch(key, retry, retry_attempts, retry_backoff, presigned_url_expires=presigned_url_expires)
84
85 def remove(self, key):
86 return self._remove(key)
87
88 def random_filename(self):
89 """Return filename and full-path tuple for file storage.
90
91 Filename will be a randomly generated 28 character hexadecimal string
92 with ".archive_cache" suffixed. Two levels of sub-directories will be used to
93 reduce the size of directories. On older filesystems, lookups in
94 directories with many files may be slow.
95 """
96
97 hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
98
99 archive_name = hex_name[4:] + '.archive_cache'
100 filename = f"{hex_name[:2]}-{hex_name[2:4]}-{archive_name}"
101
102 full_path = os.path.join(self.bucket, filename)
103 return archive_name, full_path
104
105 def __repr__(self):
106 return f'{self.__class__.__name__}(index={self._index}, bucket={self.bucket})'
107
108
109 class ObjectStoreCache(BaseCache):
110 shard_name: str = 'shard-{:03d}'
111 shard_cls = S3Shard
112
113 def __init__(self, locking_url, **settings):
114 """
115 Initialize objectstore cache instance.
116
117 :param str locking_url: redis url for a lock
118 :param settings: settings dict
119
120 """
121 self._locking_url = locking_url
122 self._config = settings
123
124 objectstore_url = self.get_conf('archive_cache.objectstore.url')
125 self._storage_path = objectstore_url # common path for all from BaseCache
126
127 self._shard_count = int(self.get_conf('archive_cache.objectstore.bucket_shards', pop=True))
128 if self._shard_count < 1:
129 raise ValueError('cache_shards must be 1 or more')
130
131 self._bucket = settings.pop('archive_cache.objectstore.bucket')
132 if not self._bucket:
133 raise ValueError('archive_cache.objectstore.bucket needs to have a value')
134
135 self._eviction_policy = self.get_conf('archive_cache.objectstore.eviction_policy', pop=True)
136 self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.objectstore.cache_size_gb')))
137
138 self.retry = str2bool(self.get_conf('archive_cache.objectstore.retry', pop=True))
139 self.retry_attempts = int(self.get_conf('archive_cache.objectstore.retry_attempts', pop=True))
140 self.retry_backoff = int(self.get_conf('archive_cache.objectstore.retry_backoff', pop=True))
141
142 endpoint_url = settings.pop('archive_cache.objectstore.url')
143 key = settings.pop('archive_cache.objectstore.key')
144 secret = settings.pop('archive_cache.objectstore.secret')
145 region = settings.pop('archive_cache.objectstore.region')
146
147 log.debug('Initializing %s archival cache instance', self)
148
149 fs = fsspec.filesystem(
150 's3', anon=False, endpoint_url=endpoint_url, key=key, secret=secret, client_kwargs={'region_name': region}
151 )
152
153 # init main bucket
154 if not fs.exists(self._bucket):
155 fs.mkdir(self._bucket)
156
157 self._shards = tuple(
158 self.shard_cls(
159 index=num,
160 bucket=self._bucket,
161 bucket_folder=self.shard_name.format(num),
162 fs=fs,
163 **settings,
164 )
165 for num in range(self._shard_count)
166 )
167 self._hash = self._shards[0].hash
168
169 def _get_size(self, shard, archive_path):
170 return shard.fs.info(archive_path)['size']
171
172 def set_presigned_url_expiry(self, val: int) -> None:
173 self.presigned_url_expires = val
@@ -0,0 +1,62 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import redis
20 from .._vendor import redis_lock
21
22
23 class ArchiveCacheGenerationLock(Exception):
24 pass
25
26
27 class GenerationLock:
28 """
29 Locking mechanism that detects if a lock is acquired
30
31 with GenerationLock(lock_key):
32 compute_archive()
33 """
34 lock_timeout = 7200
35
36 def __init__(self, lock_key, url):
37 self.lock_key = lock_key
38 self._create_client(url)
39 self.lock = self.get_lock()
40
41 def _create_client(self, url):
42 connection_pool = redis.ConnectionPool.from_url(url)
43 self.writer_client = redis.StrictRedis(
44 connection_pool=connection_pool
45 )
46 self.reader_client = self.writer_client
47
48 def get_lock(self):
49 return redis_lock.Lock(
50 redis_client=self.writer_client,
51 name=self.lock_key,
52 expire=self.lock_timeout,
53 strict=True
54 )
55
56 def __enter__(self):
57 acquired = self.lock.acquire(blocking=False)
58 if not acquired:
59 raise ArchiveCacheGenerationLock('Failed to create a lock')
60
61 def __exit__(self, exc_type, exc_val, exc_tb):
62 self.lock.release()
@@ -0,0 +1,134 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import sqlite3
20 import s3fs.core
21
22 NOT_GIVEN = -917
23
24
25 EVICTION_POLICY = {
26 'none': {
27 'evict': None,
28 },
29 'least-recently-stored': {
30 'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
31 },
32 'least-recently-used': {
33 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
34 },
35 'least-frequently-used': {
36 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
37 },
38 }
39
40
41 def archive_iterator(_reader, block_size: int = 4096 * 512):
42 # 4096 * 64 = 64KB
43 while 1:
44 data = _reader.read(block_size)
45 if not data:
46 break
47 yield data
48
49
50 def format_size(size):
51 # Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
52 for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
53 if size < 1024:
54 return f"{size:.2f} {unit}"
55 size /= 1024
56
57
58 class StatsDB:
59
60 def __init__(self):
61 self.connection = sqlite3.connect(':memory:')
62 self._init_db()
63
64 def _init_db(self):
65 qry = '''
66 CREATE TABLE IF NOT EXISTS archive_cache (
67 rowid INTEGER PRIMARY KEY,
68 key_file TEXT,
69 key_file_path TEXT,
70 archive_key TEXT,
71 archive_path TEXT,
72 store_time REAL,
73 access_time REAL,
74 access_count INTEGER DEFAULT 0,
75 size INTEGER DEFAULT 0
76 )
77 '''
78
79 self.sql(qry)
80 self.connection.commit()
81
82 @property
83 def sql(self):
84 return self.connection.execute
85
86 def bulk_insert(self, rows):
87 qry = '''
88 INSERT INTO archive_cache (
89 rowid,
90 key_file,
91 key_file_path,
92 archive_key,
93 archive_path,
94 store_time,
95 access_time,
96 access_count,
97 size
98 )
99 VALUES (
100 ?, ?, ?, ?, ?, ?, ?, ?, ?
101 )
102 '''
103 cursor = self.connection.cursor()
104 cursor.executemany(qry, rows)
105 self.connection.commit()
106
107 def get_total_size(self):
108 qry = 'SELECT COALESCE(SUM(size), 0) FROM archive_cache'
109 ((total_size,),) = self.sql(qry).fetchall()
110 return total_size
111
112 def get_sorted_keys(self, select_policy):
113 select_policy_qry = select_policy.format(fields='key_file, archive_key, size')
114 return self.sql(select_policy_qry).fetchall()
115
116
117 class ShardFileReader:
118
119 def __init__(self, file_like_reader):
120 self._file_like_reader = file_like_reader
121
122 def __getattr__(self, item):
123 if isinstance(self._file_like_reader, s3fs.core.S3File):
124 match item:
125 case 'name':
126 # S3 FileWrapper doesn't support name attribute, and we use it
127 return self._file_like_reader.full_name
128 case _:
129 return getattr(self._file_like_reader, item)
130 else:
131 return getattr(self._file_like_reader, item)
132
133 def __repr__(self):
134 return f'<{self.__class__.__name__}={self._file_like_reader}>'
@@ -0,0 +1,111 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import logging
19 import redis
20
21 from ..lib import rc_cache
22 from ..lib.ext_json import json
23
24
25 log = logging.getLogger(__name__)
26
27 redis_client = None
28
29
30 class RedisTxnClient:
31
32 def __init__(self, url):
33 self.url = url
34 self._create_client(url)
35
36 def _create_client(self, url):
37 connection_pool = redis.ConnectionPool.from_url(url)
38 self.writer_client = redis.StrictRedis(
39 connection_pool=connection_pool
40 )
41 self.reader_client = self.writer_client
42
43 def set(self, key, value):
44 self.writer_client.set(key, value)
45
46 def get(self, key):
47 return self.reader_client.get(key)
48
49 def delete(self, key):
50 self.writer_client.delete(key)
51
52
53 def get_redis_client(url=''):
54
55 global redis_client
56 if redis_client is not None:
57 return redis_client
58 if not url:
59 from vcsserver import CONFIG
60 url = CONFIG['vcs.svn.redis_conn']
61 redis_client = RedisTxnClient(url)
62 return redis_client
63
64
65 def get_txn_id_data_key(repo_path, svn_txn_id):
66 log.debug('svn-txn-id: %s, obtaining data path', svn_txn_id)
67 repo_key = rc_cache.utils.compute_key_from_params(repo_path)
68 final_key = f'{repo_key}.{svn_txn_id}.svn_txn_id'
69 log.debug('computed final key: %s', final_key)
70
71 return final_key
72
73
74 def store_txn_id_data(repo_path, svn_txn_id, data_dict):
75 log.debug('svn-txn-id: %s, storing data', svn_txn_id)
76
77 if not svn_txn_id:
78 log.warning('Cannot store txn_id because it is empty')
79 return
80
81 redis_conn = get_redis_client()
82
83 store_key = get_txn_id_data_key(repo_path, svn_txn_id)
84 store_data = json.dumps(data_dict)
85 redis_conn.set(store_key, store_data)
86
87
88 def get_txn_id_from_store(repo_path, svn_txn_id, rm_on_read=False):
89 """
90 Reads txn_id from store and if present returns the data for callback manager
91 """
92 log.debug('svn-txn-id: %s, retrieving data', svn_txn_id)
93 redis_conn = get_redis_client()
94
95 store_key = get_txn_id_data_key(repo_path, svn_txn_id)
96 data = {}
97 redis_conn.get(store_key)
98 raw_data = 'not-set'
99 try:
100 raw_data = redis_conn.get(store_key)
101 if not raw_data:
102 raise ValueError(f'Failed to get txn_id metadata, from store: {store_key}')
103 data = json.loads(raw_data)
104 except Exception:
105 log.exception('Failed to get txn_id metadata: %s', raw_data)
106
107 if rm_on_read:
108 log.debug('Cleaning up txn_id at %s', store_key)
109 redis_conn.delete(store_key)
110
111 return data
@@ -1,5 +1,5 b''
1 1 [bumpversion]
2 current_version = 5.0.3
2 current_version = 5.1.0
3 3 message = release: Bump version {current_version} to {new_version}
4 4
5 5 [bumpversion:file:vcsserver/VERSION]
@@ -1,139 +1,144 b''
1 1 # required for pushd to work..
2 2 SHELL = /bin/bash
3 3
4 4
5 5 # set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
6 6 OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
7 7
8 8 .PHONY: clean
9 9 ## Cleanup compiled and cache py files
10 10 clean:
11 11 make test-clean
12 12 find . -type f \( -iname '*.c' -o -iname '*.pyc' -o -iname '*.so' -o -iname '*.orig' \) -exec rm '{}' ';'
13 13 find . -type d -name "build" -prune -exec rm -rf '{}' ';'
14 14
15 15
16 16 .PHONY: test
17 17 ## run test-clean and tests
18 18 test:
19 19 make test-clean
20 20 make test-only
21 21
22 22
23 23 .PHONY: test-clean
24 24 ## run test-clean and tests
25 25 test-clean:
26 26 rm -rf coverage.xml htmlcov junit.xml pylint.log result
27 27 find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
28 28 find . -type f \( -iname '.coverage.*' \) -exec rm '{}' ';'
29 29
30 30
31 31 .PHONY: test-only
32 32 ## Run tests only without cleanup
33 33 test-only:
34 34 PYTHONHASHSEED=random \
35 35 py.test -x -vv -r xw -p no:sugar \
36 36 --cov-report=term-missing --cov-report=html \
37 37 --cov=vcsserver vcsserver
38 38
39 39
40 40 .PHONY: ruff-check
41 41 ## run a ruff analysis
42 42 ruff-check:
43 43 ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev .
44 44
45
46 45 .PHONY: pip-packages
47 46 ## Show outdated packages
48 47 pip-packages:
49 48 python ${OUTDATED_PACKAGES}
50 49
51 50
52 51 .PHONY: build
53 52 ## Build sdist/egg
54 53 build:
55 54 python -m build
56 55
57 56
58 57 .PHONY: dev-sh
59 58 ## make dev-sh
60 59 dev-sh:
61 60 sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" | sudo tee -a "/etc/apt/sources.list.d/fury.list"
62 61 sudo apt-get update
63 62 sudo apt-get install -y zsh carapace-bin
64 63 rm -rf /home/rhodecode/.oh-my-zsh
65 64 curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh
66 echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
67 PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
65 @echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
66 @echo "${RC_DEV_CMD_HELP}"
67 @PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
68
69
70 .PHONY: dev-cleanup
71 ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
72 dev-cleanup:
73 pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
74 rm -rf /tmp/*
68 75
69 76
70 77 .PHONY: dev-env
71 78 ## make dev-env based on the requirements files and install develop of packages
72 79 ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
73 80 dev-env:
81 sudo -u root chown rhodecode:rhodecode /home/rhodecode/.cache/pip/
74 82 pip install build virtualenv
75 83 pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
76 84 pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
77 85 pip install -e .
78 86
79 87
80 88 .PHONY: sh
81 89 ## shortcut for make dev-sh dev-env
82 90 sh:
83 91 make dev-env
84 92 make dev-sh
85 93
86 94
87 .PHONY: dev-srv
88 ## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
89 dev-srv:
90 pserve --reload .dev/dev.ini
95 ## Allows changes of workers e.g make dev-srv-g workers=2
96 workers?=1
91 97
92
93 .PHONY: dev-srv-g
94 ## run gunicorn multi process workers
95 dev-srv-g:
96 gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
98 .PHONY: dev-srv
99 ## run gunicorn web server with reloader, use workers=N to set multiworker mode
100 dev-srv:
101 gunicorn --paste=.dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py --reload --workers=$(workers)
97 102
98 103
99 104 # Default command on calling make
100 105 .DEFAULT_GOAL := show-help
101 106
102 107 .PHONY: show-help
103 108 show-help:
104 109 @echo "$$(tput bold)Available rules:$$(tput sgr0)"
105 110 @echo
106 111 @sed -n -e "/^## / { \
107 112 h; \
108 113 s/.*//; \
109 114 :doc" \
110 115 -e "H; \
111 116 n; \
112 117 s/^## //; \
113 118 t doc" \
114 119 -e "s/:.*//; \
115 120 G; \
116 121 s/\\n## /---/; \
117 122 s/\\n/ /g; \
118 123 p; \
119 124 }" ${MAKEFILE_LIST} \
120 125 | LC_ALL='C' sort --ignore-case \
121 126 | awk -F '---' \
122 127 -v ncol=$$(tput cols) \
123 128 -v indent=19 \
124 129 -v col_on="$$(tput setaf 6)" \
125 130 -v col_off="$$(tput sgr0)" \
126 131 '{ \
127 132 printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
128 133 n = split($$2, words, " "); \
129 134 line_length = ncol - indent; \
130 135 for (i = 1; i <= n; i++) { \
131 136 line_length -= length(words[i]) + 1; \
132 137 if (line_length <= 0) { \
133 138 line_length = ncol - indent - length(words[i]) - 1; \
134 139 printf "\n%*s ", -indent, " "; \
135 140 } \
136 141 printf "%s ", words[i]; \
137 142 } \
138 143 printf "\n"; \
139 144 }'
@@ -1,204 +1,191 b''
1 #
2 1
3 2 ; #################################
4 3 ; RHODECODE VCSSERVER CONFIGURATION
5 4 ; #################################
6 5
7 6 [server:main]
8 ; COMMON HOST/IP CONFIG
7 ; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
8 ; Host port for gunicorn are controlled by gunicorn_conf.py
9 9 host = 0.0.0.0
10 10 port = 10010
11 11
12 ; ##################################################
13 ; WAITRESS WSGI SERVER - Recommended for Development
14 ; ##################################################
15
16 ; use server type
17 use = egg:waitress#main
18
19 ; number of worker threads
20 threads = 5
21
22 ; MAX BODY SIZE 100GB
23 max_request_body_size = 107374182400
24
25 ; Use poll instead of select, fixes file descriptors limits problems.
26 ; May not work on old windows systems.
27 asyncore_use_poll = true
28
29 12
30 13 ; ###########################
31 14 ; GUNICORN APPLICATION SERVER
32 15 ; ###########################
33 16
34 ; run with gunicorn --paste rhodecode.ini
17 ; run with gunicorn --config gunicorn_conf.py --paste vcsserver.ini
35 18
36 19 ; Module to use, this setting shouldn't be changed
37 #use = egg:gunicorn#main
20 use = egg:gunicorn#main
38 21
39 22 [app:main]
40 23 ; The %(here)s variable will be replaced with the absolute path of parent directory
41 24 ; of this file
42 25 ; Each option in the app:main can be override by an environmental variable
43 26 ;
44 27 ;To override an option:
45 28 ;
46 29 ;RC_<KeyName>
47 30 ;Everything should be uppercase, . and - should be replaced by _.
48 31 ;For example, if you have these configuration settings:
49 32 ;rc_cache.repo_object.backend = foo
50 33 ;can be overridden by
51 34 ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
52 35
53 36 use = egg:rhodecode-vcsserver
54 37
55 38
56 39 ; #############
57 40 ; DEBUG OPTIONS
58 41 ; #############
59 42
60 43 # During development the we want to have the debug toolbar enabled
61 44 pyramid.includes =
62 45 pyramid_debugtoolbar
63 46
64 47 debugtoolbar.hosts = 0.0.0.0/0
65 48 debugtoolbar.exclude_prefixes =
66 49 /css
67 50 /fonts
68 51 /images
69 52 /js
70 53
71 54 ; #################
72 55 ; END DEBUG OPTIONS
73 56 ; #################
74 57
75 58 ; Pyramid default locales, we need this to be set
76 59 #pyramid.default_locale_name = en
77 60
78 61 ; default locale used by VCS systems
79 62 #locale = en_US.UTF-8
80 63
81 ; path to binaries for vcsserver, it should be set by the installer
64 ; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer
82 65 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
83 ; it can also be a path to nix-build output in case of development
84 core.binary_dir = ""
66 ; or /usr/local/bin/rhodecode_bin/vcs_bin
67 core.binary_dir =
68
69 ; Redis connection settings for svn integrations logic
70 ; This connection string needs to be the same on ce and vcsserver
71 vcs.svn.redis_conn = redis://redis:6379/0
85 72
86 73 ; Custom exception store path, defaults to TMPDIR
87 74 ; This is used to store exception from RhodeCode in shared directory
88 75 #exception_tracker.store_path =
89 76
90 77 ; #############
91 78 ; DOGPILE CACHE
92 79 ; #############
93 80
94 81 ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
95 82 ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
96 83 #cache_dir = %(here)s/data
97 84
98 85 ; ***************************************
99 86 ; `repo_object` cache, default file based
100 87 ; ***************************************
101 88
102 89 ; `repo_object` cache settings for vcs methods for repositories
103 90 #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
104 91
105 92 ; cache auto-expires after N seconds
106 93 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
107 94 #rc_cache.repo_object.expiration_time = 2592000
108 95
109 96 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
110 97 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
111 98
112 99 ; ***********************************************************
113 100 ; `repo_object` cache with redis backend
114 101 ; recommended for larger instance, and for better performance
115 102 ; ***********************************************************
116 103
117 104 ; `repo_object` cache settings for vcs methods for repositories
118 105 #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
119 106
120 107 ; cache auto-expires after N seconds
121 108 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
122 109 #rc_cache.repo_object.expiration_time = 2592000
123 110
124 111 ; redis_expiration_time needs to be greater then expiration_time
125 112 #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
126 113
127 114 #rc_cache.repo_object.arguments.host = localhost
128 115 #rc_cache.repo_object.arguments.port = 6379
129 116 #rc_cache.repo_object.arguments.db = 5
130 117 #rc_cache.repo_object.arguments.socket_timeout = 30
131 118 ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
132 119 #rc_cache.repo_object.arguments.distributed_lock = true
133 120
134 121 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
135 122 #rc_cache.repo_object.arguments.lock_auto_renewal = true
136 123
137 124 ; Statsd client config, this is used to send metrics to statsd
138 125 ; We recommend setting statsd_exported and scrape them using Promethues
139 126 #statsd.enabled = false
140 127 #statsd.statsd_host = 0.0.0.0
141 128 #statsd.statsd_port = 8125
142 129 #statsd.statsd_prefix =
143 130 #statsd.statsd_ipv6 = false
144 131
145 132 ; configure logging automatically at server startup set to false
146 133 ; to use the below custom logging config.
147 134 ; RC_LOGGING_FORMATTER
148 135 ; RC_LOGGING_LEVEL
149 136 ; env variables can control the settings for logging in case of autoconfigure
150 137
151 138 #logging.autoconfigure = true
152 139
153 140 ; specify your own custom logging config file to configure logging
154 141 #logging.logging_conf_file = /path/to/custom_logging.ini
155 142
156 143 ; #####################
157 144 ; LOGGING CONFIGURATION
158 145 ; #####################
159 146
160 147 [loggers]
161 148 keys = root, vcsserver
162 149
163 150 [handlers]
164 151 keys = console
165 152
166 153 [formatters]
167 154 keys = generic, json
168 155
169 156 ; #######
170 157 ; LOGGERS
171 158 ; #######
172 159 [logger_root]
173 160 level = NOTSET
174 161 handlers = console
175 162
176 163 [logger_vcsserver]
177 164 level = DEBUG
178 165 handlers =
179 166 qualname = vcsserver
180 167 propagate = 1
181 168
182 169 ; ########
183 170 ; HANDLERS
184 171 ; ########
185 172
186 173 [handler_console]
187 174 class = StreamHandler
188 175 args = (sys.stderr, )
189 176 level = DEBUG
190 177 ; To enable JSON formatted logs replace 'generic' with 'json'
191 178 ; This allows sending properly formatted logs to grafana loki or elasticsearch
192 179 formatter = generic
193 180
194 181 ; ##########
195 182 ; FORMATTERS
196 183 ; ##########
197 184
198 185 [formatter_generic]
199 186 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
200 187 datefmt = %Y-%m-%d %H:%M:%S
201 188
202 189 [formatter_json]
203 190 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
204 191 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -1,167 +1,171 b''
1 #
2 1
3 2 ; #################################
4 3 ; RHODECODE VCSSERVER CONFIGURATION
5 4 ; #################################
6 5
7 6 [server:main]
8 ; COMMON HOST/IP CONFIG
9 host = 127.0.0.1
7 ; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
8 ; Host port for gunicorn are controlled by gunicorn_conf.py
9 host = 0.0.0.0
10 10 port = 10010
11 11
12 12
13 13 ; ###########################
14 14 ; GUNICORN APPLICATION SERVER
15 15 ; ###########################
16 16
17 ; run with gunicorn --paste rhodecode.ini
17 ; run with gunicorn --config gunicorn_conf.py --paste vcsserver.ini
18 18
19 19 ; Module to use, this setting shouldn't be changed
20 20 use = egg:gunicorn#main
21 21
22 22 [app:main]
23 23 ; The %(here)s variable will be replaced with the absolute path of parent directory
24 24 ; of this file
25 25 ; Each option in the app:main can be override by an environmental variable
26 26 ;
27 27 ;To override an option:
28 28 ;
29 29 ;RC_<KeyName>
30 30 ;Everything should be uppercase, . and - should be replaced by _.
31 31 ;For example, if you have these configuration settings:
32 32 ;rc_cache.repo_object.backend = foo
33 33 ;can be overridden by
34 34 ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
35 35
36 36 use = egg:rhodecode-vcsserver
37 37
38 38 ; Pyramid default locales, we need this to be set
39 39 #pyramid.default_locale_name = en
40 40
41 41 ; default locale used by VCS systems
42 42 #locale = en_US.UTF-8
43 43
44 ; path to binaries for vcsserver, it should be set by the installer
44 ; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer
45 45 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
46 ; it can also be a path to nix-build output in case of development
47 core.binary_dir = ""
46 ; or /usr/local/bin/rhodecode_bin/vcs_bin
47 core.binary_dir =
48
49 ; Redis connection settings for svn integrations logic
50 ; This connection string needs to be the same on ce and vcsserver
51 vcs.svn.redis_conn = redis://redis:6379/0
48 52
49 53 ; Custom exception store path, defaults to TMPDIR
50 54 ; This is used to store exception from RhodeCode in shared directory
51 55 #exception_tracker.store_path =
52 56
53 57 ; #############
54 58 ; DOGPILE CACHE
55 59 ; #############
56 60
57 61 ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
58 62 ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
59 63 #cache_dir = %(here)s/data
60 64
61 65 ; ***************************************
62 66 ; `repo_object` cache, default file based
63 67 ; ***************************************
64 68
65 69 ; `repo_object` cache settings for vcs methods for repositories
66 70 #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
67 71
68 72 ; cache auto-expires after N seconds
69 73 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
70 74 #rc_cache.repo_object.expiration_time = 2592000
71 75
72 76 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
73 77 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
74 78
75 79 ; ***********************************************************
76 80 ; `repo_object` cache with redis backend
77 81 ; recommended for larger instance, and for better performance
78 82 ; ***********************************************************
79 83
80 84 ; `repo_object` cache settings for vcs methods for repositories
81 85 #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
82 86
83 87 ; cache auto-expires after N seconds
84 88 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
85 89 #rc_cache.repo_object.expiration_time = 2592000
86 90
87 91 ; redis_expiration_time needs to be greater then expiration_time
88 92 #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
89 93
90 94 #rc_cache.repo_object.arguments.host = localhost
91 95 #rc_cache.repo_object.arguments.port = 6379
92 96 #rc_cache.repo_object.arguments.db = 5
93 97 #rc_cache.repo_object.arguments.socket_timeout = 30
94 98 ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
95 99 #rc_cache.repo_object.arguments.distributed_lock = true
96 100
97 101 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
98 102 #rc_cache.repo_object.arguments.lock_auto_renewal = true
99 103
100 104 ; Statsd client config, this is used to send metrics to statsd
101 105 ; We recommend setting statsd_exported and scrape them using Promethues
102 106 #statsd.enabled = false
103 107 #statsd.statsd_host = 0.0.0.0
104 108 #statsd.statsd_port = 8125
105 109 #statsd.statsd_prefix =
106 110 #statsd.statsd_ipv6 = false
107 111
108 112 ; configure logging automatically at server startup set to false
109 113 ; to use the below custom logging config.
110 114 ; RC_LOGGING_FORMATTER
111 115 ; RC_LOGGING_LEVEL
112 116 ; env variables can control the settings for logging in case of autoconfigure
113 117
114 118 #logging.autoconfigure = true
115 119
116 120 ; specify your own custom logging config file to configure logging
117 121 #logging.logging_conf_file = /path/to/custom_logging.ini
118 122
119 123 ; #####################
120 124 ; LOGGING CONFIGURATION
121 125 ; #####################
122 126
123 127 [loggers]
124 128 keys = root, vcsserver
125 129
126 130 [handlers]
127 131 keys = console
128 132
129 133 [formatters]
130 134 keys = generic, json
131 135
132 136 ; #######
133 137 ; LOGGERS
134 138 ; #######
135 139 [logger_root]
136 140 level = NOTSET
137 141 handlers = console
138 142
139 143 [logger_vcsserver]
140 144 level = INFO
141 145 handlers =
142 146 qualname = vcsserver
143 147 propagate = 1
144 148
145 149 ; ########
146 150 ; HANDLERS
147 151 ; ########
148 152
149 153 [handler_console]
150 154 class = StreamHandler
151 155 args = (sys.stderr, )
152 156 level = INFO
153 157 ; To enable JSON formatted logs replace 'generic' with 'json'
154 158 ; This allows sending properly formatted logs to grafana loki or elasticsearch
155 159 formatter = generic
156 160
157 161 ; ##########
158 162 ; FORMATTERS
159 163 ; ##########
160 164
161 165 [formatter_generic]
162 166 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
163 167 datefmt = %Y-%m-%d %H:%M:%S
164 168
165 169 [formatter_json]
166 170 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
167 171 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -1,77 +1,102 b''
1 1 # deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f | tr '[:upper:]' '[:lower:]'
2 2
3 3 async-timeout==4.0.3
4 4 atomicwrites==1.4.1
5 5 celery==5.3.6
6 6 billiard==4.2.0
7 7 click==8.1.3
8 8 click-didyoumean==0.3.0
9 9 click==8.1.3
10 10 click-plugins==1.1.1
11 11 click==8.1.3
12 12 click-repl==0.2.0
13 13 click==8.1.3
14 14 prompt-toolkit==3.0.38
15 15 wcwidth==0.2.6
16 16 six==1.16.0
17 17 kombu==5.3.5
18 18 amqp==5.2.0
19 19 vine==5.1.0
20 20 vine==5.1.0
21 21 python-dateutil==2.8.2
22 22 six==1.16.0
23 tzdata==2023.4
23 tzdata==2024.1
24 24 vine==5.1.0
25 25 contextlib2==21.6.0
26 cov-core==1.15.0
27 coverage==7.2.3
28 diskcache==5.6.3
29 dogpile.cache==1.3.0
26 dogpile.cache==1.3.3
30 27 decorator==5.1.1
31 28 stevedore==5.1.0
32 29 pbr==5.11.1
33 30 dulwich==0.21.6
34 31 urllib3==1.26.14
32 fsspec==2024.6.0
35 33 gunicorn==21.2.0
36 packaging==23.1
37 hg-evolve==11.0.2
34 packaging==24.0
35 hg-evolve==11.1.3
38 36 importlib-metadata==6.0.0
39 37 zipp==3.15.0
40 mercurial==6.3.3
41 mock==5.0.2
38 mercurial==6.7.4
42 39 more-itertools==9.1.0
43 msgpack==1.0.7
44 orjson==3.9.13
40 msgpack==1.0.8
41 orjson==3.10.3
45 42 psutil==5.9.8
46 43 py==1.11.0
47 44 pygit2==1.13.3
48 45 cffi==1.16.0
49 46 pycparser==2.21
50 47 pygments==2.15.1
51 48 pyparsing==3.1.1
52 49 pyramid==2.0.2
53 50 hupper==1.12
54 51 plaster==1.1.2
55 52 plaster-pastedeploy==1.0.1
56 53 pastedeploy==3.1.0
57 54 plaster==1.1.2
58 55 translationstring==1.4
59 56 venusian==3.0.0
60 57 webob==1.8.7
61 58 zope.deprecation==5.0.0
62 zope.interface==6.1.0
63 redis==5.0.1
59 zope.interface==6.3.0
60 redis==5.0.4
64 61 async-timeout==4.0.3
65 62 repoze.lru==0.7
63 s3fs==2024.6.0
64 aiobotocore==2.13.0
65 aiohttp==3.9.5
66 aiosignal==1.3.1
67 frozenlist==1.4.1
68 attrs==22.2.0
69 frozenlist==1.4.1
70 multidict==6.0.5
71 yarl==1.9.4
72 idna==3.4
73 multidict==6.0.5
74 aioitertools==0.11.0
75 botocore==1.34.106
76 jmespath==1.0.1
77 python-dateutil==2.8.2
78 six==1.16.0
79 urllib3==1.26.14
80 wrapt==1.16.0
81 aiohttp==3.9.5
82 aiosignal==1.3.1
83 frozenlist==1.4.1
84 attrs==22.2.0
85 frozenlist==1.4.1
86 multidict==6.0.5
87 yarl==1.9.4
88 idna==3.4
89 multidict==6.0.5
90 fsspec==2024.6.0
66 91 scandir==1.10.0
67 92 setproctitle==1.3.3
68 93 subvertpy==0.11.0
69 94 waitress==3.0.0
70 95 wcwidth==0.2.6
71 96
72 97
73 98 ## test related requirements
74 99 #-r requirements_test.txt
75 100
76 101 ## uncomment to add the debug libraries
77 102 #-r requirements_debug.txt
@@ -1,45 +1,48 b''
1 1 # test related requirements
2
3 cov-core==1.15.0
4 coverage==7.2.3
5 mock==5.0.2
6 py==1.11.0
7 pytest-cov==4.0.0
8 coverage==7.2.3
9 pytest==7.3.1
10 attrs==22.2.0
2 mock==5.1.0
3 pytest-cov==4.1.0
4 coverage==7.4.3
5 pytest==8.1.1
11 6 iniconfig==2.0.0
12 packaging==23.1
13 pluggy==1.0.0
7 packaging==24.0
8 pluggy==1.4.0
9 pytest-env==1.1.3
10 pytest==8.1.1
11 iniconfig==2.0.0
12 packaging==24.0
13 pluggy==1.4.0
14 14 pytest-profiling==1.7.0
15 15 gprof2dot==2022.7.29
16 pytest==7.3.1
17 attrs==22.2.0
16 pytest==8.1.1
18 17 iniconfig==2.0.0
19 packaging==23.1
20 pluggy==1.0.0
18 packaging==24.0
19 pluggy==1.4.0
21 20 six==1.16.0
22 pytest-runner==6.0.0
23 pytest-sugar==0.9.7
24 packaging==23.1
25 pytest==7.3.1
26 attrs==22.2.0
21 pytest-rerunfailures==13.0
22 packaging==24.0
23 pytest==8.1.1
27 24 iniconfig==2.0.0
28 packaging==23.1
29 pluggy==1.0.0
30 termcolor==2.3.0
31 pytest-timeout==2.1.0
32 pytest==7.3.1
33 attrs==22.2.0
25 packaging==24.0
26 pluggy==1.4.0
27 pytest-runner==6.0.1
28 pytest-sugar==1.0.0
29 packaging==24.0
30 pytest==8.1.1
34 31 iniconfig==2.0.0
35 packaging==23.1
36 pluggy==1.0.0
32 packaging==24.0
33 pluggy==1.4.0
34 termcolor==2.4.0
35 pytest-timeout==2.3.1
36 pytest==8.1.1
37 iniconfig==2.0.0
38 packaging==24.0
39 pluggy==1.4.0
37 40 webtest==3.0.0
38 beautifulsoup4==4.11.2
39 soupsieve==2.4
41 beautifulsoup4==4.12.3
42 soupsieve==2.5
40 43 waitress==3.0.0
41 44 webob==1.8.7
42 45
43 46 # RhodeCode test-data
44 47 rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
45 48 rc_testdata==0.11.0
@@ -1,1 +1,1 b''
1 5.0.3 No newline at end of file
1 5.1.0 No newline at end of file
@@ -1,193 +1,187 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import os
18 18 import sys
19 19 import tempfile
20 20 import logging
21 21 import urllib.parse
22 22
23 from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
23 from vcsserver.lib.archive_cache import get_archival_cache_store
24 24
25 25 from vcsserver import exceptions
26 26 from vcsserver.exceptions import NoContentException
27 27 from vcsserver.hgcompat import archival
28 from vcsserver.str_utils import safe_bytes
28 from vcsserver.lib.str_utils import safe_bytes
29 29 from vcsserver.lib.exc_tracking import format_exc
30 30 log = logging.getLogger(__name__)
31 31
32 32
33 33 class RepoFactory:
34 34 """
35 35 Utility to create instances of repository
36 36
37 37 It provides internal caching of the `repo` object based on
38 38 the :term:`call context`.
39 39 """
40 40 repo_type = None
41 41
42 42 def __init__(self):
43 43 pass
44 44
45 45 def _create_config(self, path, config):
46 46 config = {}
47 47 return config
48 48
49 49 def _create_repo(self, wire, create):
50 50 raise NotImplementedError()
51 51
52 52 def repo(self, wire, create=False):
53 53 raise NotImplementedError()
54 54
55 55
56 56 def obfuscate_qs(query_string):
57 57 if query_string is None:
58 58 return None
59 59
60 60 parsed = []
61 61 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
62 62 if k in ['auth_token', 'api_key']:
63 63 v = "*****"
64 64 parsed.append((k, v))
65 65
66 66 return '&'.join('{}{}'.format(
67 67 k, f'={v}' if v else '') for k, v in parsed)
68 68
69 69
70 70 def raise_from_original(new_type, org_exc: Exception):
71 71 """
72 72 Raise a new exception type with original args and traceback.
73 73 """
74 74 exc_info = sys.exc_info()
75 75 exc_type, exc_value, exc_traceback = exc_info
76 76 new_exc = new_type(*exc_value.args)
77 77
78 78 # store the original traceback into the new exc
79 79 new_exc._org_exc_tb = format_exc(exc_info)
80 80
81 81 try:
82 82 raise new_exc.with_traceback(exc_traceback)
83 83 finally:
84 84 del exc_traceback
85 85
86 86
87 87 class ArchiveNode:
88 88 def __init__(self, path, mode, is_link, raw_bytes):
89 89 self.path = path
90 90 self.mode = mode
91 91 self.is_link = is_link
92 92 self.raw_bytes = raw_bytes
93 93
94 94
95 95 def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name,
96 96 commit_id, write_metadata=True, extra_metadata=None, cache_config=None):
97 97 """
98 98 Function that would store generate archive and send it to a dedicated backend store
99 99 In here we use diskcache
100 100
101 101 :param node_walker: a generator returning nodes to add to archive
102 102 :param archive_key: key used to store the path
103 103 :param kind: archive kind
104 104 :param mtime: time of creation
105 105 :param archive_at_path: default '/' the path at archive was started.
106 106 If this is not '/' it means it's a partial archive
107 107 :param archive_dir_name: inside dir name when creating an archive
108 108 :param commit_id: commit sha of revision archive was created at
109 109 :param write_metadata:
110 110 :param extra_metadata:
111 111 :param cache_config:
112 112
113 113 walker should be a file walker, for example,
114 114 def node_walker():
115 115 for file_info in files:
116 116 yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
117 117 """
118 118 extra_metadata = extra_metadata or {}
119 119
120 120 d_cache = get_archival_cache_store(config=cache_config)
121 121
122 122 if archive_key in d_cache:
123 with d_cache as d_cache_reader:
124 reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
125 return reader.name
123 reader, metadata = d_cache.fetch(archive_key)
124 return reader.name
126 125
127 126 archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
128 127 log.debug('Creating new temp archive in %s', archive_tmp_path)
129 128
130 129 if kind == "tgz":
131 130 archiver = archival.tarit(archive_tmp_path, mtime, b"gz")
132 131 elif kind == "tbz2":
133 132 archiver = archival.tarit(archive_tmp_path, mtime, b"bz2")
134 133 elif kind == 'zip':
135 134 archiver = archival.zipit(archive_tmp_path, mtime)
136 135 else:
137 136 raise exceptions.ArchiveException()(
138 137 f'Remote does not support: "{kind}" archive type.')
139 138
140 139 for f in node_walker(commit_id, archive_at_path):
141 140 f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
141
142 142 try:
143 143 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
144 144 except NoContentException:
145 145 # NOTE(marcink): this is a special case for SVN so we can create "empty"
146 146 # directories which are not supported by archiver
147 147 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
148 148
149 metadata = dict([
150 ('commit_id', commit_id),
151 ('mtime', mtime),
152 ])
153 metadata.update(extra_metadata)
149 154 if write_metadata:
150 metadata = dict([
151 ('commit_id', commit_id),
152 ('mtime', mtime),
153 ])
154 metadata.update(extra_metadata)
155
156 155 meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
157 156 f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
158 157 archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
159 158
160 159 archiver.done()
161 160
162 # ensure set & get are atomic
163 with d_cache.transact():
164
165 with open(archive_tmp_path, 'rb') as archive_file:
166 add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
167 if not add_result:
168 log.error('Failed to store cache for key=%s', archive_key)
161 with open(archive_tmp_path, 'rb') as archive_file:
162 add_result = d_cache.store(archive_key, archive_file, metadata=metadata)
163 if not add_result:
164 log.error('Failed to store cache for key=%s', archive_key)
169 165
170 os.remove(archive_tmp_path)
166 os.remove(archive_tmp_path)
171 167
172 reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
173 if not reader:
174 raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
168 reader, metadata = d_cache.fetch(archive_key)
175 169
176 return reader.name
170 return reader.name
177 171
178 172
179 173 class BinaryEnvelope:
180 174 def __init__(self, val):
181 175 self.val = val
182 176
183 177
184 178 class BytesEnvelope(bytes):
185 179 def __new__(cls, content):
186 180 if isinstance(content, bytes):
187 181 return super().__new__(cls, content)
188 182 else:
189 183 raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types')
190 184
191 185
192 186 class BinaryBytesEnvelope(BytesEnvelope):
193 187 pass
@@ -1,185 +1,185 b''
1 1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import os
20 20 import textwrap
21 21 import string
22 22 import functools
23 23 import logging
24 24 import tempfile
25 25 import logging.config
26 26
27 from vcsserver.type_utils import str2bool, aslist
27 from vcsserver.lib.type_utils import str2bool, aslist
28 28
29 29 log = logging.getLogger(__name__)
30 30
31 31
32 32 # skip keys, that are set here, so we don't double process those
33 33 set_keys = {
34 34 '__file__': ''
35 35 }
36 36
37 37
38 38 class SettingsMaker:
39 39
40 40 def __init__(self, app_settings):
41 41 self.settings = app_settings
42 42
43 43 @classmethod
44 44 def _bool_func(cls, input_val):
45 45 if isinstance(input_val, bytes):
46 46 # decode to str
47 47 input_val = input_val.decode('utf8')
48 48 return str2bool(input_val)
49 49
50 50 @classmethod
51 51 def _int_func(cls, input_val):
52 52 return int(input_val)
53 53
54 54 @classmethod
55 55 def _float_func(cls, input_val):
56 56 return float(input_val)
57 57
58 58 @classmethod
59 59 def _list_func(cls, input_val, sep=','):
60 60 return aslist(input_val, sep=sep)
61 61
62 62 @classmethod
63 63 def _string_func(cls, input_val, lower=True):
64 64 if lower:
65 65 input_val = input_val.lower()
66 66 return input_val
67 67
68 68 @classmethod
69 69 def _string_no_quote_func(cls, input_val, lower=True):
70 70 """
71 71 Special case string function that detects if value is set to empty quote string
72 72 e.g.
73 73
74 74 core.binary_dir = ""
75 75 """
76 76
77 77 input_val = cls._string_func(input_val, lower=lower)
78 78 if input_val in ['""', "''"]:
79 79 return ''
80 80 return input_val
81 81
82 82 @classmethod
83 83 def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
84 84
85 85 # ensure we have our dir created
86 86 if not os.path.isdir(input_val) and ensure_dir:
87 87 os.makedirs(input_val, mode=mode, exist_ok=True)
88 88
89 89 if not os.path.isdir(input_val):
90 90 raise Exception(f'Dir at {input_val} does not exist')
91 91 return input_val
92 92
93 93 @classmethod
94 94 def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
95 95 dirname = os.path.dirname(input_val)
96 96 cls._dir_func(dirname, ensure_dir=ensure_dir)
97 97 return input_val
98 98
99 99 @classmethod
100 100 def _key_transformator(cls, key):
101 101 return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
102 102
103 103 def maybe_env_key(self, key):
104 104 # now maybe we have this KEY in env, search and use the value with higher priority.
105 105 transformed_key = self._key_transformator(key)
106 106 envvar_value = os.environ.get(transformed_key)
107 107 if envvar_value:
108 108 log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
109 109
110 110 return envvar_value
111 111
112 112 def env_expand(self):
113 113 replaced = {}
114 114 for k, v in self.settings.items():
115 115 if k not in set_keys:
116 116 envvar_value = self.maybe_env_key(k)
117 117 if envvar_value:
118 118 replaced[k] = envvar_value
119 119 set_keys[k] = envvar_value
120 120
121 121 # replace ALL keys updated
122 122 self.settings.update(replaced)
123 123
124 124 def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
125 125 """
126 126 Helper to enable debug on running instance
127 127 :return:
128 128 """
129 129
130 130 if not str2bool(self.settings.get('logging.autoconfigure')):
131 131 log.info('logging configuration based on main .ini file')
132 132 return
133 133
134 134 if logging_conf is None:
135 135 logging_conf = self.settings.get('logging.logging_conf_file') or ''
136 136
137 137 if not os.path.isfile(logging_conf):
138 138 log.error('Unable to setup logging based on %s, '
139 139 'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
140 140 return
141 141
142 142 with open(logging_conf, 'rt') as f:
143 143 ini_template = textwrap.dedent(f.read())
144 144 ini_template = string.Template(ini_template).safe_substitute(
145 145 RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
146 146 RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
147 147 )
148 148
149 149 with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
150 150 log.info('Saved Temporary LOGGING config at %s', f.name)
151 151 f.write(ini_template)
152 152
153 153 logging.config.fileConfig(f.name)
154 154 os.remove(f.name)
155 155
156 156 def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
157 157 input_val = self.settings.get(key, default)
158 158
159 159 if default_when_empty and not input_val:
160 160 # use default value when value is set in the config but it is empty
161 161 input_val = default
162 162
163 163 parser_func = {
164 164 'bool': self._bool_func,
165 165 'int': self._int_func,
166 166 'float': self._float_func,
167 167 'list': self._list_func,
168 168 'list:newline': functools.partial(self._list_func, sep='/n'),
169 169 'list:spacesep': functools.partial(self._list_func, sep=' '),
170 170 'string': functools.partial(self._string_func, lower=lower),
171 171 'string:noquote': functools.partial(self._string_no_quote_func, lower=lower),
172 172 'dir': self._dir_func,
173 173 'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
174 174 'file': self._file_path_func,
175 175 'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
176 176 None: lambda i: i
177 177 }[parser]
178 178
179 179 envvar_value = self.maybe_env_key(key)
180 180 if envvar_value:
181 181 input_val = envvar_value
182 182 set_keys[key] = input_val
183 183
184 184 self.settings[key] = parser_func(input_val)
185 185 return self.settings[key]
@@ -1,296 +1,296 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import re
19 19 import logging
20 20
21 21 from pyramid.config import Configurator
22 22 from pyramid.response import Response, FileIter
23 23 from pyramid.httpexceptions import (
24 24 HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
25 25 HTTPUnprocessableEntity)
26 26
27 from vcsserver.lib.rc_json import json
27 from vcsserver.lib.ext_json import json
28 28 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
29 29 from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
30 from vcsserver.str_utils import safe_int
30 from vcsserver.lib.str_utils import safe_int
31 31
32 32 log = logging.getLogger(__name__)
33 33
34 34
35 35 GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' # +json ?
36 36 GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
37 37
38 38
39 39 def write_response_error(http_exception, text=None):
40 40 content_type = GIT_LFS_CONTENT_TYPE + '+json'
41 41 _exception = http_exception(content_type=content_type)
42 42 _exception.content_type = content_type
43 43 if text:
44 44 _exception.body = json.dumps({'message': text})
45 45 log.debug('LFS: writing response of type %s to client with text:%s',
46 46 http_exception, text)
47 47 return _exception
48 48
49 49
50 50 class AuthHeaderRequired:
51 51 """
52 52 Decorator to check if request has proper auth-header
53 53 """
54 54
55 55 def __call__(self, func):
56 56 return get_cython_compat_decorator(self.__wrapper, func)
57 57
58 58 def __wrapper(self, func, *fargs, **fkwargs):
59 59 request = fargs[1]
60 60 auth = request.authorization
61 61 if not auth:
62 62 return write_response_error(HTTPForbidden)
63 63 return func(*fargs[1:], **fkwargs)
64 64
65 65
66 66 # views
67 67
68 68 def lfs_objects(request):
69 69 # indicate not supported, V1 API
70 70 log.warning('LFS: v1 api not supported, reporting it back to client')
71 71 return write_response_error(HTTPNotImplemented, 'LFS: v1 api not supported')
72 72
73 73
74 74 @AuthHeaderRequired()
75 75 def lfs_objects_batch(request):
76 76 """
77 77 The client sends the following information to the Batch endpoint to transfer some objects:
78 78
79 79 operation - Should be download or upload.
80 80 transfers - An optional Array of String identifiers for transfer
81 81 adapters that the client has configured. If omitted, the basic
82 82 transfer adapter MUST be assumed by the server.
83 83 objects - An Array of objects to download.
84 84 oid - String OID of the LFS object.
85 85 size - Integer byte size of the LFS object. Must be at least zero.
86 86 """
87 87 request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
88 88 auth = request.authorization
89 89 repo = request.matchdict.get('repo')
90 90 data = request.json
91 91 operation = data.get('operation')
92 92 http_scheme = request.registry.git_lfs_http_scheme
93 93
94 94 if operation not in ('download', 'upload'):
95 95 log.debug('LFS: unsupported operation:%s', operation)
96 96 return write_response_error(
97 97 HTTPBadRequest, f'unsupported operation mode: `{operation}`')
98 98
99 99 if 'objects' not in data:
100 100 log.debug('LFS: missing objects data')
101 101 return write_response_error(
102 102 HTTPBadRequest, 'missing objects data')
103 103
104 104 log.debug('LFS: handling operation of type: %s', operation)
105 105
106 106 objects = []
107 107 for o in data['objects']:
108 108 try:
109 109 oid = o['oid']
110 110 obj_size = o['size']
111 111 except KeyError:
112 112 log.exception('LFS, failed to extract data')
113 113 return write_response_error(
114 114 HTTPBadRequest, 'unsupported data in objects')
115 115
116 116 obj_data = {'oid': oid}
117 117 if http_scheme == 'http':
118 118 # Note(marcink): when using http, we might have a custom port
119 119 # so we skip setting it to http, url dispatch then wont generate a port in URL
120 120 # for development we need this
121 121 http_scheme = None
122 122
123 123 obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
124 124 _scheme=http_scheme)
125 125 obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
126 126 _scheme=http_scheme)
127 127 store = LFSOidStore(
128 128 oid, repo, store_location=request.registry.git_lfs_store_path)
129 129 handler = OidHandler(
130 130 store, repo, auth, oid, obj_size, obj_data,
131 131 obj_href, obj_verify_href)
132 132
133 133 # this verifies also OIDs
134 134 actions, errors = handler.exec_operation(operation)
135 135 if errors:
136 136 log.warning('LFS: got following errors: %s', errors)
137 137 obj_data['errors'] = errors
138 138
139 139 if actions:
140 140 obj_data['actions'] = actions
141 141
142 142 obj_data['size'] = obj_size
143 143 obj_data['authenticated'] = True
144 144 objects.append(obj_data)
145 145
146 146 result = {'objects': objects, 'transfer': 'basic'}
147 147 log.debug('LFS Response %s', safe_result(result))
148 148
149 149 return result
150 150
151 151
152 152 def lfs_objects_oid_upload(request):
153 153 request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
154 154 repo = request.matchdict.get('repo')
155 155 oid = request.matchdict.get('oid')
156 156 store = LFSOidStore(
157 157 oid, repo, store_location=request.registry.git_lfs_store_path)
158 158 engine = store.get_engine(mode='wb')
159 159 log.debug('LFS: starting chunked write of LFS oid: %s to storage', oid)
160 160
161 161 body = request.environ['wsgi.input']
162 162
163 163 with engine as f:
164 164 blksize = 64 * 1024 # 64kb
165 165 while True:
166 166 # read in chunks as stream comes in from Gunicorn
167 167 # this is a specific Gunicorn support function.
168 168 # might work differently on waitress
169 169 chunk = body.read(blksize)
170 170 if not chunk:
171 171 break
172 172 f.write(chunk)
173 173
174 174 return {'upload': 'ok'}
175 175
176 176
177 177 def lfs_objects_oid_download(request):
178 178 repo = request.matchdict.get('repo')
179 179 oid = request.matchdict.get('oid')
180 180
181 181 store = LFSOidStore(
182 182 oid, repo, store_location=request.registry.git_lfs_store_path)
183 183 if not store.has_oid():
184 184 log.debug('LFS: oid %s does not exists in store', oid)
185 185 return write_response_error(
186 186 HTTPNotFound, f'requested file with oid `{oid}` not found in store')
187 187
188 188 # TODO(marcink): support range header ?
189 189 # Range: bytes=0-, `bytes=(\d+)\-.*`
190 190
191 191 f = open(store.oid_path, 'rb')
192 192 response = Response(
193 193 content_type='application/octet-stream', app_iter=FileIter(f))
194 194 response.headers.add('X-RC-LFS-Response-Oid', str(oid))
195 195 return response
196 196
197 197
198 198 def lfs_objects_verify(request):
199 199 request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
200 200 repo = request.matchdict.get('repo')
201 201
202 202 data = request.json
203 203 oid = data.get('oid')
204 204 size = safe_int(data.get('size'))
205 205
206 206 if not (oid and size):
207 207 return write_response_error(
208 208 HTTPBadRequest, 'missing oid and size in request data')
209 209
210 210 store = LFSOidStore(
211 211 oid, repo, store_location=request.registry.git_lfs_store_path)
212 212 if not store.has_oid():
213 213 log.debug('LFS: oid %s does not exists in store', oid)
214 214 return write_response_error(
215 215 HTTPNotFound, f'oid `{oid}` does not exists in store')
216 216
217 217 store_size = store.size_oid()
218 218 if store_size != size:
219 219 msg = 'requested file size mismatch store size:{} requested:{}'.format(
220 220 store_size, size)
221 221 return write_response_error(
222 222 HTTPUnprocessableEntity, msg)
223 223
224 224 return {'message': {'size': 'ok', 'in_store': 'ok'}}
225 225
226 226
227 227 def lfs_objects_lock(request):
228 228 return write_response_error(
229 229 HTTPNotImplemented, 'GIT LFS locking api not supported')
230 230
231 231
232 232 def not_found(request):
233 233 return write_response_error(
234 234 HTTPNotFound, 'request path not found')
235 235
236 236
237 237 def lfs_disabled(request):
238 238 return write_response_error(
239 239 HTTPNotImplemented, 'GIT LFS disabled for this repo')
240 240
241 241
242 242 def git_lfs_app(config):
243 243
244 244 # v1 API deprecation endpoint
245 245 config.add_route('lfs_objects',
246 246 '/{repo:.*?[^/]}/info/lfs/objects')
247 247 config.add_view(lfs_objects, route_name='lfs_objects',
248 248 request_method='POST', renderer='json')
249 249
250 250 # locking API
251 251 config.add_route('lfs_objects_lock',
252 252 '/{repo:.*?[^/]}/info/lfs/locks')
253 253 config.add_view(lfs_objects_lock, route_name='lfs_objects_lock',
254 254 request_method=('POST', 'GET'), renderer='json')
255 255
256 256 config.add_route('lfs_objects_lock_verify',
257 257 '/{repo:.*?[^/]}/info/lfs/locks/verify')
258 258 config.add_view(lfs_objects_lock, route_name='lfs_objects_lock_verify',
259 259 request_method=('POST', 'GET'), renderer='json')
260 260
261 261 # batch API
262 262 config.add_route('lfs_objects_batch',
263 263 '/{repo:.*?[^/]}/info/lfs/objects/batch')
264 264 config.add_view(lfs_objects_batch, route_name='lfs_objects_batch',
265 265 request_method='POST', renderer='json')
266 266
267 267 # oid upload/download API
268 268 config.add_route('lfs_objects_oid',
269 269 '/{repo:.*?[^/]}/info/lfs/objects/{oid}')
270 270 config.add_view(lfs_objects_oid_upload, route_name='lfs_objects_oid',
271 271 request_method='PUT', renderer='json')
272 272 config.add_view(lfs_objects_oid_download, route_name='lfs_objects_oid',
273 273 request_method='GET', renderer='json')
274 274
275 275 # verification API
276 276 config.add_route('lfs_objects_verify',
277 277 '/{repo:.*?[^/]}/info/lfs/verify')
278 278 config.add_view(lfs_objects_verify, route_name='lfs_objects_verify',
279 279 request_method='POST', renderer='json')
280 280
281 281 # not found handler for API
282 282 config.add_notfound_view(not_found, renderer='json')
283 283
284 284
285 285 def create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
286 286 config = Configurator()
287 287 if git_lfs_enabled:
288 288 config.include(git_lfs_app)
289 289 config.registry.git_lfs_store_path = git_lfs_store_path
290 290 config.registry.git_lfs_http_scheme = git_lfs_http_scheme
291 291 else:
292 292 # not found handler for API, reporting disabled LFS support
293 293 config.add_notfound_view(lfs_disabled, renderer='json')
294 294
295 295 app = config.make_wsgi_app()
296 296 return app
@@ -1,274 +1,274 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import pytest
20 20 from webtest.app import TestApp as WebObTestApp
21 21
22 from vcsserver.lib.rc_json import json
23 from vcsserver.str_utils import safe_bytes
22 from vcsserver.lib.ext_json import json
23 from vcsserver.lib.str_utils import safe_bytes
24 24 from vcsserver.git_lfs.app import create_app
25 25 from vcsserver.git_lfs.lib import LFSOidStore
26 26
27 27
28 28 @pytest.fixture(scope='function')
29 29 def git_lfs_app(tmpdir):
30 30 custom_app = WebObTestApp(create_app(
31 31 git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
32 32 git_lfs_http_scheme='http'))
33 33 custom_app._store = str(tmpdir)
34 34 return custom_app
35 35
36 36
37 37 @pytest.fixture(scope='function')
38 38 def git_lfs_https_app(tmpdir):
39 39 custom_app = WebObTestApp(create_app(
40 40 git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
41 41 git_lfs_http_scheme='https'))
42 42 custom_app._store = str(tmpdir)
43 43 return custom_app
44 44
45 45
46 46 @pytest.fixture()
47 47 def http_auth():
48 48 return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
49 49
50 50
51 51 class TestLFSApplication:
52 52
53 53 def test_app_wrong_path(self, git_lfs_app):
54 54 git_lfs_app.get('/repo/info/lfs/xxx', status=404)
55 55
56 56 def test_app_deprecated_endpoint(self, git_lfs_app):
57 57 response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
58 58 assert response.status_code == 501
59 59 assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
60 60
61 61 def test_app_lock_verify_api_not_available(self, git_lfs_app):
62 62 response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
63 63 assert response.status_code == 501
64 64 assert json.loads(response.text) == {
65 65 'message': 'GIT LFS locking api not supported'}
66 66
67 67 def test_app_lock_api_not_available(self, git_lfs_app):
68 68 response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
69 69 assert response.status_code == 501
70 70 assert json.loads(response.text) == {
71 71 'message': 'GIT LFS locking api not supported'}
72 72
73 73 def test_app_batch_api_missing_auth(self, git_lfs_app):
74 74 git_lfs_app.post_json(
75 75 '/repo/info/lfs/objects/batch', params={}, status=403)
76 76
77 77 def test_app_batch_api_unsupported_operation(self, git_lfs_app, http_auth):
78 78 response = git_lfs_app.post_json(
79 79 '/repo/info/lfs/objects/batch', params={}, status=400,
80 80 extra_environ=http_auth)
81 81 assert json.loads(response.text) == {
82 82 'message': 'unsupported operation mode: `None`'}
83 83
84 84 def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
85 85 response = git_lfs_app.post_json(
86 86 '/repo/info/lfs/objects/batch', params={'operation': 'download'},
87 87 status=400, extra_environ=http_auth)
88 88 assert json.loads(response.text) == {
89 89 'message': 'missing objects data'}
90 90
91 91 def test_app_batch_api_unsupported_data_in_objects(
92 92 self, git_lfs_app, http_auth):
93 93 params = {'operation': 'download',
94 94 'objects': [{}]}
95 95 response = git_lfs_app.post_json(
96 96 '/repo/info/lfs/objects/batch', params=params, status=400,
97 97 extra_environ=http_auth)
98 98 assert json.loads(response.text) == {
99 99 'message': 'unsupported data in objects'}
100 100
101 101 def test_app_batch_api_download_missing_object(
102 102 self, git_lfs_app, http_auth):
103 103 params = {'operation': 'download',
104 104 'objects': [{'oid': '123', 'size': '1024'}]}
105 105 response = git_lfs_app.post_json(
106 106 '/repo/info/lfs/objects/batch', params=params,
107 107 extra_environ=http_auth)
108 108
109 109 expected_objects = [
110 110 {'authenticated': True,
111 111 'errors': {'error': {
112 112 'code': 404,
113 113 'message': 'object: 123 does not exist in store'}},
114 114 'oid': '123',
115 115 'size': '1024'}
116 116 ]
117 117 assert json.loads(response.text) == {
118 118 'objects': expected_objects, 'transfer': 'basic'}
119 119
120 120 def test_app_batch_api_download(self, git_lfs_app, http_auth):
121 121 oid = '456'
122 122 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
123 123 if not os.path.isdir(os.path.dirname(oid_path)):
124 124 os.makedirs(os.path.dirname(oid_path))
125 125 with open(oid_path, 'wb') as f:
126 126 f.write(safe_bytes('OID_CONTENT'))
127 127
128 128 params = {'operation': 'download',
129 129 'objects': [{'oid': oid, 'size': '1024'}]}
130 130 response = git_lfs_app.post_json(
131 131 '/repo/info/lfs/objects/batch', params=params,
132 132 extra_environ=http_auth)
133 133
134 134 expected_objects = [
135 135 {'authenticated': True,
136 136 'actions': {
137 137 'download': {
138 138 'header': {'Authorization': 'Basic XXXXX'},
139 139 'href': 'http://localhost/repo/info/lfs/objects/456'},
140 140 },
141 141 'oid': '456',
142 142 'size': '1024'}
143 143 ]
144 144 assert json.loads(response.text) == {
145 145 'objects': expected_objects, 'transfer': 'basic'}
146 146
147 147 def test_app_batch_api_upload(self, git_lfs_app, http_auth):
148 148 params = {'operation': 'upload',
149 149 'objects': [{'oid': '123', 'size': '1024'}]}
150 150 response = git_lfs_app.post_json(
151 151 '/repo/info/lfs/objects/batch', params=params,
152 152 extra_environ=http_auth)
153 153 expected_objects = [
154 154 {'authenticated': True,
155 155 'actions': {
156 156 'upload': {
157 157 'header': {'Authorization': 'Basic XXXXX',
158 158 'Transfer-Encoding': 'chunked'},
159 159 'href': 'http://localhost/repo/info/lfs/objects/123'},
160 160 'verify': {
161 161 'header': {'Authorization': 'Basic XXXXX'},
162 162 'href': 'http://localhost/repo/info/lfs/verify'}
163 163 },
164 164 'oid': '123',
165 165 'size': '1024'}
166 166 ]
167 167 assert json.loads(response.text) == {
168 168 'objects': expected_objects, 'transfer': 'basic'}
169 169
170 170 def test_app_batch_api_upload_for_https(self, git_lfs_https_app, http_auth):
171 171 params = {'operation': 'upload',
172 172 'objects': [{'oid': '123', 'size': '1024'}]}
173 173 response = git_lfs_https_app.post_json(
174 174 '/repo/info/lfs/objects/batch', params=params,
175 175 extra_environ=http_auth)
176 176 expected_objects = [
177 177 {'authenticated': True,
178 178 'actions': {
179 179 'upload': {
180 180 'header': {'Authorization': 'Basic XXXXX',
181 181 'Transfer-Encoding': 'chunked'},
182 182 'href': 'https://localhost/repo/info/lfs/objects/123'},
183 183 'verify': {
184 184 'header': {'Authorization': 'Basic XXXXX'},
185 185 'href': 'https://localhost/repo/info/lfs/verify'}
186 186 },
187 187 'oid': '123',
188 188 'size': '1024'}
189 189 ]
190 190 assert json.loads(response.text) == {
191 191 'objects': expected_objects, 'transfer': 'basic'}
192 192
193 193 def test_app_verify_api_missing_data(self, git_lfs_app):
194 194 params = {'oid': 'missing'}
195 195 response = git_lfs_app.post_json(
196 196 '/repo/info/lfs/verify', params=params,
197 197 status=400)
198 198
199 199 assert json.loads(response.text) == {
200 200 'message': 'missing oid and size in request data'}
201 201
202 202 def test_app_verify_api_missing_obj(self, git_lfs_app):
203 203 params = {'oid': 'missing', 'size': '1024'}
204 204 response = git_lfs_app.post_json(
205 205 '/repo/info/lfs/verify', params=params,
206 206 status=404)
207 207
208 208 assert json.loads(response.text) == {
209 209 'message': 'oid `missing` does not exists in store'}
210 210
211 211 def test_app_verify_api_size_mismatch(self, git_lfs_app):
212 212 oid = 'existing'
213 213 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
214 214 if not os.path.isdir(os.path.dirname(oid_path)):
215 215 os.makedirs(os.path.dirname(oid_path))
216 216 with open(oid_path, 'wb') as f:
217 217 f.write(safe_bytes('OID_CONTENT'))
218 218
219 219 params = {'oid': oid, 'size': '1024'}
220 220 response = git_lfs_app.post_json(
221 221 '/repo/info/lfs/verify', params=params, status=422)
222 222
223 223 assert json.loads(response.text) == {
224 224 'message': 'requested file size mismatch '
225 225 'store size:11 requested:1024'}
226 226
227 227 def test_app_verify_api(self, git_lfs_app):
228 228 oid = 'existing'
229 229 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
230 230 if not os.path.isdir(os.path.dirname(oid_path)):
231 231 os.makedirs(os.path.dirname(oid_path))
232 232 with open(oid_path, 'wb') as f:
233 233 f.write(safe_bytes('OID_CONTENT'))
234 234
235 235 params = {'oid': oid, 'size': 11}
236 236 response = git_lfs_app.post_json(
237 237 '/repo/info/lfs/verify', params=params)
238 238
239 239 assert json.loads(response.text) == {
240 240 'message': {'size': 'ok', 'in_store': 'ok'}}
241 241
242 242 def test_app_download_api_oid_not_existing(self, git_lfs_app):
243 243 oid = 'missing'
244 244
245 245 response = git_lfs_app.get(
246 246 '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
247 247
248 248 assert json.loads(response.text) == {
249 249 'message': 'requested file with oid `missing` not found in store'}
250 250
251 251 def test_app_download_api(self, git_lfs_app):
252 252 oid = 'existing'
253 253 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
254 254 if not os.path.isdir(os.path.dirname(oid_path)):
255 255 os.makedirs(os.path.dirname(oid_path))
256 256 with open(oid_path, 'wb') as f:
257 257 f.write(safe_bytes('OID_CONTENT'))
258 258
259 259 response = git_lfs_app.get(
260 260 '/repo/info/lfs/objects/{oid}'.format(oid=oid))
261 261 assert response
262 262
263 263 def test_app_upload(self, git_lfs_app):
264 264 oid = 'uploaded'
265 265
266 266 response = git_lfs_app.put(
267 267 '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
268 268
269 269 assert json.loads(response.text) == {'upload': 'ok'}
270 270
271 271 # verify that we actually wrote that OID
272 272 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
273 273 assert os.path.isfile(oid_path)
274 274 assert 'CONTENT' == open(oid_path).read()
@@ -1,142 +1,142 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import pytest
20 from vcsserver.str_utils import safe_bytes
20 from vcsserver.lib.str_utils import safe_bytes
21 21 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
22 22
23 23
24 24 @pytest.fixture()
25 25 def lfs_store(tmpdir):
26 26 repo = 'test'
27 27 oid = '123456789'
28 28 store = LFSOidStore(oid=oid, repo=repo, store_location=str(tmpdir))
29 29 return store
30 30
31 31
32 32 @pytest.fixture()
33 33 def oid_handler(lfs_store):
34 34 store = lfs_store
35 35 repo = store.repo
36 36 oid = store.oid
37 37
38 38 oid_handler = OidHandler(
39 39 store=store, repo_name=repo, auth=('basic', 'xxxx'),
40 40 oid=oid,
41 41 obj_size='1024', obj_data={}, obj_href='http://localhost/handle_oid',
42 42 obj_verify_href='http://localhost/verify')
43 43 return oid_handler
44 44
45 45
46 46 class TestOidHandler:
47 47
48 48 @pytest.mark.parametrize('exec_action', [
49 49 'download',
50 50 'upload',
51 51 ])
52 52 def test_exec_action(self, exec_action, oid_handler):
53 53 handler = oid_handler.exec_operation(exec_action)
54 54 assert handler
55 55
56 56 def test_exec_action_undefined(self, oid_handler):
57 57 with pytest.raises(AttributeError):
58 58 oid_handler.exec_operation('wrong')
59 59
60 60 def test_download_oid_not_existing(self, oid_handler):
61 61 response, has_errors = oid_handler.exec_operation('download')
62 62
63 63 assert response is None
64 64 assert has_errors['error'] == {
65 65 'code': 404,
66 66 'message': 'object: 123456789 does not exist in store'}
67 67
68 68 def test_download_oid(self, oid_handler):
69 69 store = oid_handler.get_store()
70 70 if not os.path.isdir(os.path.dirname(store.oid_path)):
71 71 os.makedirs(os.path.dirname(store.oid_path))
72 72
73 73 with open(store.oid_path, 'wb') as f:
74 74 f.write(safe_bytes('CONTENT'))
75 75
76 76 response, has_errors = oid_handler.exec_operation('download')
77 77
78 78 assert has_errors is None
79 79 assert response['download'] == {
80 80 'header': {'Authorization': 'basic xxxx'},
81 81 'href': 'http://localhost/handle_oid'
82 82 }
83 83
84 84 def test_upload_oid_that_exists(self, oid_handler):
85 85 store = oid_handler.get_store()
86 86 if not os.path.isdir(os.path.dirname(store.oid_path)):
87 87 os.makedirs(os.path.dirname(store.oid_path))
88 88
89 89 with open(store.oid_path, 'wb') as f:
90 90 f.write(safe_bytes('CONTENT'))
91 91 oid_handler.obj_size = 7
92 92 response, has_errors = oid_handler.exec_operation('upload')
93 93 assert has_errors is None
94 94 assert response is None
95 95
96 96 def test_upload_oid_that_exists_but_has_wrong_size(self, oid_handler):
97 97 store = oid_handler.get_store()
98 98 if not os.path.isdir(os.path.dirname(store.oid_path)):
99 99 os.makedirs(os.path.dirname(store.oid_path))
100 100
101 101 with open(store.oid_path, 'wb') as f:
102 102 f.write(safe_bytes('CONTENT'))
103 103
104 104 oid_handler.obj_size = 10240
105 105 response, has_errors = oid_handler.exec_operation('upload')
106 106 assert has_errors is None
107 107 assert response['upload'] == {
108 108 'header': {'Authorization': 'basic xxxx',
109 109 'Transfer-Encoding': 'chunked'},
110 110 'href': 'http://localhost/handle_oid',
111 111 }
112 112
113 113 def test_upload_oid(self, oid_handler):
114 114 response, has_errors = oid_handler.exec_operation('upload')
115 115 assert has_errors is None
116 116 assert response['upload'] == {
117 117 'header': {'Authorization': 'basic xxxx',
118 118 'Transfer-Encoding': 'chunked'},
119 119 'href': 'http://localhost/handle_oid'
120 120 }
121 121
122 122
123 123 class TestLFSStore:
124 124 def test_write_oid(self, lfs_store):
125 125 oid_location = lfs_store.oid_path
126 126
127 127 assert not os.path.isfile(oid_location)
128 128
129 129 engine = lfs_store.get_engine(mode='wb')
130 130 with engine as f:
131 131 f.write(safe_bytes('CONTENT'))
132 132
133 133 assert os.path.isfile(oid_location)
134 134
135 135 def test_detect_has_oid(self, lfs_store):
136 136
137 137 assert lfs_store.has_oid() is False
138 138 engine = lfs_store.get_engine(mode='wb')
139 139 with engine as f:
140 140 f.write(safe_bytes('CONTENT'))
141 141
142 142 assert lfs_store.has_oid() is True
@@ -1,92 +1,92 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """
19 19 Mercurial libs compatibility
20 20 """
21 21
22 22 import mercurial
23 23 from mercurial import demandimport
24 24
25 25 # patch demandimport, due to bug in mercurial when it always triggers
26 26 # demandimport.enable()
27 from vcsserver.str_utils import safe_bytes
27 from vcsserver.lib.str_utils import safe_bytes
28 28
29 29 demandimport.enable = lambda *args, **kwargs: 1
30 30
31 31 from mercurial import ui
32 32 from mercurial import patch
33 33 from mercurial import config
34 34 from mercurial import extensions
35 35 from mercurial import scmutil
36 36 from mercurial import archival
37 37 from mercurial import discovery
38 38 from mercurial import unionrepo
39 39 from mercurial import localrepo
40 40 from mercurial import merge as hg_merge
41 41 from mercurial import subrepo
42 42 from mercurial import subrepoutil
43 43 from mercurial import tags as hg_tag
44 44 from mercurial import util as hgutil
45 45 from mercurial.commands import clone, pull
46 46 from mercurial.node import nullid
47 47 from mercurial.context import memctx, memfilectx
48 48 from mercurial.error import (
49 49 LookupError, RepoError, RepoLookupError, Abort, InterventionRequired,
50 50 RequirementError, ProgrammingError)
51 51 from mercurial.hgweb import hgweb_mod
52 52 from mercurial.localrepo import instance
53 53 from mercurial.match import match, alwaysmatcher, patternmatcher
54 54 from mercurial.mdiff import diffopts
55 55 from mercurial.node import bin, hex
56 56 from mercurial.encoding import tolocal
57 57 from mercurial.discovery import findcommonoutgoing
58 58 from mercurial.hg import peer
59 from mercurial.httppeer import makepeer
59 from mercurial.httppeer import make_peer
60 60 from mercurial.utils.urlutil import url as hg_url
61 61 from mercurial.scmutil import revrange, revsymbol
62 62 from mercurial.node import nullrev
63 63 from mercurial import exchange
64 64 from hgext import largefiles
65 65
66 66 # those authnadlers are patched for python 2.6.5 bug an
67 67 # infinit looping when given invalid resources
68 68 from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
69 69
70 70 # hg strip is in core now
71 71 from mercurial import strip as hgext_strip
72 72
73 73
74 74 def get_ctx(repo, ref):
75 75 if not isinstance(ref, int):
76 76 ref = safe_bytes(ref)
77 77
78 78 try:
79 79 ctx = repo[ref]
80 80 return ctx
81 81 except (ProgrammingError, TypeError):
82 82 # we're unable to find the rev using a regular lookup, we fallback
83 83 # to slower, but backward compat revsymbol usage
84 84 pass
85 85 except (LookupError, RepoLookupError):
86 86 # Similar case as above but only for refs that are not numeric
87 87 if isinstance(ref, int):
88 88 raise
89 89
90 90 ctx = revsymbol(repo, ref)
91 91
92 92 return ctx
@@ -1,230 +1,238 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import re
20 20 import os
21 21 import sys
22 22 import datetime
23 23 import logging
24 24 import pkg_resources
25 25
26 26 import vcsserver
27 27 import vcsserver.settings
28 from vcsserver.str_utils import safe_bytes
28 from vcsserver.lib.str_utils import safe_bytes
29 29
30 30 log = logging.getLogger(__name__)
31 31
32 32 HOOKS_DIR_MODE = 0o755
33 33 HOOKS_FILE_MODE = 0o755
34 34
35 35
36 36 def set_permissions_if_needed(path_to_check, perms: oct):
37 37 # Get current permissions
38 38 current_permissions = os.stat(path_to_check).st_mode & 0o777 # Extract permission bits
39 39
40 40 # Check if current permissions are lower than required
41 41 if current_permissions < int(perms):
42 42 # Change the permissions if they are lower than required
43 43 os.chmod(path_to_check, perms)
44 44
45 45
46 46 def get_git_hooks_path(repo_path, bare):
47 47 hooks_path = os.path.join(repo_path, 'hooks')
48 48 if not bare:
49 49 hooks_path = os.path.join(repo_path, '.git', 'hooks')
50 50
51 51 return hooks_path
52 52
53 53
54 54 def install_git_hooks(repo_path, bare, executable=None, force_create=False):
55 55 """
56 56 Creates a RhodeCode hook inside a git repository
57 57
58 58 :param repo_path: path to repository
59 59 :param bare: defines if repository is considered a bare git repo
60 60 :param executable: binary executable to put in the hooks
61 61 :param force_create: Creates even if the same name hook exists
62 62 """
63 63 executable = executable or sys.executable
64 64 hooks_path = get_git_hooks_path(repo_path, bare)
65 65
66 66 # we always call it to ensure dir exists and it has a proper mode
67 67 if not os.path.exists(hooks_path):
68 68 # If it doesn't exist, create a new directory with the specified mode
69 69 os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
70 70 # If it exists, change the directory's mode to the specified mode
71 71 set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
72 72
73 73 tmpl_post = pkg_resources.resource_string(
74 74 'vcsserver', '/'.join(
75 75 ('hook_utils', 'hook_templates', 'git_post_receive.py.tmpl')))
76 76 tmpl_pre = pkg_resources.resource_string(
77 77 'vcsserver', '/'.join(
78 78 ('hook_utils', 'hook_templates', 'git_pre_receive.py.tmpl')))
79 79
80 80 path = '' # not used for now
81 81 timestamp = datetime.datetime.utcnow().isoformat()
82 82
83 83 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
84 84 log.debug('Installing git hook in repo %s', repo_path)
85 85 _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
86 86 _rhodecode_hook = check_rhodecode_hook(_hook_file)
87 87
88 88 if _rhodecode_hook or force_create:
89 89 log.debug('writing git %s hook file at %s !', h_type, _hook_file)
90 env_expand = str([
91 ('RC_INI_FILE', vcsserver.CONFIG['__file__']),
92 ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
93 ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
94 ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
95 ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
96 ])
90 97 try:
91 98 with open(_hook_file, 'wb') as f:
99 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
92 100 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
93 101 template = template.replace(b'_DATE_', safe_bytes(timestamp))
94 102 template = template.replace(b'_ENV_', safe_bytes(executable))
95 103 template = template.replace(b'_PATH_', safe_bytes(path))
96 104 f.write(template)
97 105 set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
98 106 except OSError:
99 107 log.exception('error writing hook file %s', _hook_file)
100 108 else:
101 109 log.debug('skipping writing hook file')
102 110
103 111 return True
104 112
105 113
106 114 def get_svn_hooks_path(repo_path):
107 115 hooks_path = os.path.join(repo_path, 'hooks')
108 116
109 117 return hooks_path
110 118
111 119
112 120 def install_svn_hooks(repo_path, executable=None, force_create=False):
113 121 """
114 122 Creates RhodeCode hooks inside a svn repository
115 123
116 124 :param repo_path: path to repository
117 125 :param executable: binary executable to put in the hooks
118 126 :param force_create: Create even if same name hook exists
119 127 """
120 128 executable = executable or sys.executable
121 129 hooks_path = get_svn_hooks_path(repo_path)
122 130 if not os.path.isdir(hooks_path):
123 131 os.makedirs(hooks_path, mode=0o777, exist_ok=True)
124 132
125 133 tmpl_post = pkg_resources.resource_string(
126 134 'vcsserver', '/'.join(
127 135 ('hook_utils', 'hook_templates', 'svn_post_commit_hook.py.tmpl')))
128 136 tmpl_pre = pkg_resources.resource_string(
129 137 'vcsserver', '/'.join(
130 138 ('hook_utils', 'hook_templates', 'svn_pre_commit_hook.py.tmpl')))
131 139
132 140 path = '' # not used for now
133 141 timestamp = datetime.datetime.utcnow().isoformat()
134 142
135 143 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
136 144 log.debug('Installing svn hook in repo %s', repo_path)
137 145 _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
138 146 _rhodecode_hook = check_rhodecode_hook(_hook_file)
139 147
140 148 if _rhodecode_hook or force_create:
141 149 log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
142 150
143 151 env_expand = str([
152 ('RC_INI_FILE', vcsserver.CONFIG['__file__']),
144 153 ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
145 154 ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
146 155 ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
147 156 ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
148
149 157 ])
150 158 try:
151 159 with open(_hook_file, 'wb') as f:
160 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
152 161 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
153 162 template = template.replace(b'_DATE_', safe_bytes(timestamp))
154 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
155 163 template = template.replace(b'_ENV_', safe_bytes(executable))
156 164 template = template.replace(b'_PATH_', safe_bytes(path))
157 165
158 166 f.write(template)
159 167 os.chmod(_hook_file, 0o755)
160 168 except OSError:
161 169 log.exception('error writing hook file %s', _hook_file)
162 170 else:
163 171 log.debug('skipping writing hook file')
164 172
165 173 return True
166 174
167 175
168 176 def get_version_from_hook(hook_path):
169 177 version = b''
170 178 hook_content = read_hook_content(hook_path)
171 179 matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
172 180 if matches:
173 181 try:
174 182 version = matches.groups()[0]
175 183 log.debug('got version %s from hooks.', version)
176 184 except Exception:
177 185 log.exception("Exception while reading the hook version.")
178 186 return version.replace(b"'", b"")
179 187
180 188
181 189 def check_rhodecode_hook(hook_path):
182 190 """
183 191 Check if the hook was created by RhodeCode
184 192 """
185 193 if not os.path.exists(hook_path):
186 194 return True
187 195
188 196 log.debug('hook exists, checking if it is from RhodeCode')
189 197
190 198 version = get_version_from_hook(hook_path)
191 199 if version:
192 200 return True
193 201
194 202 return False
195 203
196 204
197 205 def read_hook_content(hook_path) -> bytes:
198 206 content = b''
199 207 if os.path.isfile(hook_path):
200 208 with open(hook_path, 'rb') as f:
201 209 content = f.read()
202 210 return content
203 211
204 212
205 213 def get_git_pre_hook_version(repo_path, bare):
206 214 hooks_path = get_git_hooks_path(repo_path, bare)
207 215 _hook_file = os.path.join(hooks_path, 'pre-receive')
208 216 version = get_version_from_hook(_hook_file)
209 217 return version
210 218
211 219
212 220 def get_git_post_hook_version(repo_path, bare):
213 221 hooks_path = get_git_hooks_path(repo_path, bare)
214 222 _hook_file = os.path.join(hooks_path, 'post-receive')
215 223 version = get_version_from_hook(_hook_file)
216 224 return version
217 225
218 226
219 227 def get_svn_pre_hook_version(repo_path):
220 228 hooks_path = get_svn_hooks_path(repo_path)
221 229 _hook_file = os.path.join(hooks_path, 'pre-commit')
222 230 version = get_version_from_hook(_hook_file)
223 231 return version
224 232
225 233
226 234 def get_svn_post_hook_version(repo_path):
227 235 hooks_path = get_svn_hooks_path(repo_path)
228 236 _hook_file = os.path.join(hooks_path, 'post-commit')
229 237 version = get_version_from_hook(_hook_file)
230 238 return version
@@ -1,51 +1,59 b''
1 1 #!_ENV_
2
2 3 import os
3 4 import sys
4 5 path_adjust = [_PATH_]
5 6
6 7 if path_adjust:
7 8 sys.path = path_adjust
8 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
9 15 try:
10 16 from vcsserver import hooks
11 17 except ImportError:
12 18 if os.environ.get('RC_DEBUG_GIT_HOOK'):
13 19 import traceback
14 20 print(traceback.format_exc())
15 21 hooks = None
16 22
17 23
18 24 # TIMESTAMP: _DATE_
19 25 RC_HOOK_VER = '_TMPL_'
20 26
21 27
22 28 def main():
23 29 if hooks is None:
24 30 # exit with success if we cannot import vcsserver.hooks !!
25 31 # this allows simply push to this repo even without rhodecode
26 32 sys.exit(0)
27 33
28 34 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
29 35 sys.exit(0)
30 36
31 37 repo_path = os.getcwd()
32 38 push_data = sys.stdin.readlines()
33 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
39
34 40 # os.environ is modified here by a subprocess call that
35 41 # runs git and later git executes this hook.
36 42 # Environ gets some additional info from rhodecode system
37 43 # like IP or username from basic-auth
44
45 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
38 46 try:
39 47 result = hooks.git_post_receive(repo_path, push_data, os.environ)
40 48 sys.exit(result)
41 49 except Exception as error:
42 50 # TODO: johbo: Improve handling of this special case
43 51 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
44 52 raise
45 53 print(f'ERROR: {error}')
46 54 sys.exit(1)
47 55 sys.exit(0)
48 56
49 57
50 58 if __name__ == '__main__':
51 59 main()
@@ -1,51 +1,59 b''
1 1 #!_ENV_
2
2 3 import os
3 4 import sys
4 5 path_adjust = [_PATH_]
5 6
6 7 if path_adjust:
7 8 sys.path = path_adjust
8 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
9 15 try:
10 16 from vcsserver import hooks
11 17 except ImportError:
12 18 if os.environ.get('RC_DEBUG_GIT_HOOK'):
13 19 import traceback
14 20 print(traceback.format_exc())
15 21 hooks = None
16 22
17 23
18 24 # TIMESTAMP: _DATE_
19 25 RC_HOOK_VER = '_TMPL_'
20 26
21 27
22 28 def main():
23 29 if hooks is None:
24 30 # exit with success if we cannot import vcsserver.hooks !!
25 31 # this allows simply push to this repo even without rhodecode
26 32 sys.exit(0)
27 33
28 34 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
29 35 sys.exit(0)
30 36
31 37 repo_path = os.getcwd()
32 38 push_data = sys.stdin.readlines()
33 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
39
34 40 # os.environ is modified here by a subprocess call that
35 41 # runs git and later git executes this hook.
36 42 # Environ gets some additional info from rhodecode system
37 43 # like IP or username from basic-auth
44
45 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
38 46 try:
39 47 result = hooks.git_pre_receive(repo_path, push_data, os.environ)
40 48 sys.exit(result)
41 49 except Exception as error:
42 50 # TODO: johbo: Improve handling of this special case
43 51 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
44 52 raise
45 53 print(f'ERROR: {error}')
46 54 sys.exit(1)
47 55 sys.exit(0)
48 56
49 57
50 58 if __name__ == '__main__':
51 59 main()
@@ -1,54 +1,59 b''
1 1 #!_ENV_
2 2
3 3 import os
4 4 import sys
5 5 path_adjust = [_PATH_]
6 6
7 7 if path_adjust:
8 8 sys.path = path_adjust
9 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
10 15 try:
11 16 from vcsserver import hooks
12 17 except ImportError:
13 18 if os.environ.get('RC_DEBUG_SVN_HOOK'):
14 19 import traceback
15 20 print(traceback.format_exc())
16 21 hooks = None
17 22
18 23
19 24 # TIMESTAMP: _DATE_
20 25 RC_HOOK_VER = '_TMPL_'
21 26
22 27
23 28 # special trick to pass in some information from rc to hooks
24 29 # mod_dav strips ALL env vars and we can't even access things like PATH
25 30 for env_k, env_v in _OS_EXPAND_:
26 31 os.environ[env_k] = env_v
27 32
28 33 def main():
29 34 if hooks is None:
30 35 # exit with success if we cannot import vcsserver.hooks !!
31 36 # this allows simply push to this repo even without rhodecode
32 37 sys.exit(0)
33 38
34 39 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
35 40 sys.exit(0)
36 repo_path = os.getcwd()
41 cwd_repo_path = os.getcwd()
37 42 push_data = sys.argv[1:]
38 43
39 44 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
40 45
41 46 try:
42 result = hooks.svn_post_commit(repo_path, push_data, os.environ)
47 result = hooks.svn_post_commit(cwd_repo_path, push_data, os.environ)
43 48 sys.exit(result)
44 49 except Exception as error:
45 50 # TODO: johbo: Improve handling of this special case
46 51 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
47 52 raise
48 53 print(f'ERROR: {error}')
49 54 sys.exit(1)
50 55 sys.exit(0)
51 56
52 57
53 58 if __name__ == '__main__':
54 59 main()
@@ -1,58 +1,62 b''
1 1 #!_ENV_
2 2
3 3 import os
4 4 import sys
5 5 path_adjust = [_PATH_]
6 6
7 7 if path_adjust:
8 8 sys.path = path_adjust
9 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
10 15 try:
11 16 from vcsserver import hooks
12 17 except ImportError:
13 18 if os.environ.get('RC_DEBUG_SVN_HOOK'):
14 19 import traceback
15 20 print(traceback.format_exc())
16 21 hooks = None
17 22
18 23
19 24 # TIMESTAMP: _DATE_
20 25 RC_HOOK_VER = '_TMPL_'
21 26
22 27
23 28 # special trick to pass in some information from rc to hooks
24 29 # mod_dav strips ALL env vars and we can't even access things like PATH
25 30 for env_k, env_v in _OS_EXPAND_:
26 31 os.environ[env_k] = env_v
27 32
28 33 def main():
29 34 if os.environ.get('SSH_READ_ONLY') == '1':
30 35 sys.stderr.write('Only read-only access is allowed')
31 36 sys.exit(1)
32 37
33 38 if hooks is None:
34 39 # exit with success if we cannot import vcsserver.hooks !!
35 40 # this allows simply push to this repo even without rhodecode
36 41 sys.exit(0)
37 42
38 43 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
39 44 sys.exit(0)
40 repo_path = os.getcwd()
45 cwd_repo_path = os.getcwd()
41 46 push_data = sys.argv[1:]
42 47
43 48 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
44
45 49 try:
46 result = hooks.svn_pre_commit(repo_path, push_data, os.environ)
50 result = hooks.svn_pre_commit(cwd_repo_path, push_data, os.environ)
47 51 sys.exit(result)
48 52 except Exception as error:
49 53 # TODO: johbo: Improve handling of this special case
50 54 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
51 55 raise
52 56 print(f'ERROR: {error}')
53 57 sys.exit(1)
54 58 sys.exit(0)
55 59
56 60
57 61 if __name__ == '__main__':
58 62 main()
@@ -1,826 +1,822 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import os
20 20 import sys
21 21 import logging
22 22 import collections
23 23 import base64
24 24 import msgpack
25 25 import dataclasses
26 26 import pygit2
27 27
28 28 import http.client
29 29 from celery import Celery
30 30
31 31 import mercurial.scmutil
32 32 import mercurial.node
33 33
34 from vcsserver.lib.rc_json import json
35 34 from vcsserver import exceptions, subprocessio, settings
36 from vcsserver.str_utils import ascii_str, safe_str
35 from vcsserver.lib.ext_json import json
36 from vcsserver.lib.str_utils import ascii_str, safe_str
37 from vcsserver.lib.svn_txn_utils import get_txn_id_from_store
37 38 from vcsserver.remote.git_remote import Repository
38 39
39 40 celery_app = Celery('__vcsserver__')
40 41 log = logging.getLogger(__name__)
41 42
42 43
43 44 class HooksHttpClient:
44 45 proto = 'msgpack.v1'
45 46 connection = None
46 47
47 48 def __init__(self, hooks_uri):
48 49 self.hooks_uri = hooks_uri
49 50
50 51 def __repr__(self):
51 52 return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
52 53
53 54 def __call__(self, method, extras):
54 55 connection = http.client.HTTPConnection(self.hooks_uri)
55 56 # binary msgpack body
56 57 headers, body = self._serialize(method, extras)
57 58 log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
58 59
59 60 try:
60 61 try:
61 62 connection.request('POST', '/', body, headers)
62 63 except Exception as error:
63 64 log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
64 65 raise
65 66
66 67 response = connection.getresponse()
67 68 try:
68 69 return msgpack.load(response)
69 70 except Exception:
70 71 response_data = response.read()
71 72 log.exception('Failed to decode hook response json data. '
72 73 'response_code:%s, raw_data:%s',
73 74 response.status, response_data)
74 75 raise
75 76 finally:
76 77 connection.close()
77 78
78 79 @classmethod
79 80 def _serialize(cls, hook_name, extras):
80 81 data = {
81 82 'method': hook_name,
82 83 'extras': extras
83 84 }
84 85 headers = {
85 86 "rc-hooks-protocol": cls.proto,
86 87 "Connection": "keep-alive"
87 88 }
88 89 return headers, msgpack.packb(data)
89 90
90 91
91 92 class HooksCeleryClient:
92 93 TASK_TIMEOUT = 60 # time in seconds
93 94
94 95 def __init__(self, queue, backend):
95 96 celery_app.config_from_object({
96 97 'broker_url': queue, 'result_backend': backend,
97 98 'broker_connection_retry_on_startup': True,
98 'task_serializer': 'msgpack',
99 'task_serializer': 'json',
99 100 'accept_content': ['json', 'msgpack'],
100 'result_serializer': 'msgpack',
101 'result_serializer': 'json',
101 102 'result_accept_content': ['json', 'msgpack']
102 103 })
103 104 self.celery_app = celery_app
104 105
105 106 def __call__(self, method, extras):
106 107 inquired_task = self.celery_app.signature(
107 108 f'rhodecode.lib.celerylib.tasks.{method}'
108 109 )
109 110 return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
110 111
111 112
112 113 class HooksShadowRepoClient:
113 114
114 115 def __call__(self, hook_name, extras):
115 116 return {'output': '', 'status': 0}
116 117
117 118
118 119 class RemoteMessageWriter:
119 120 """Writer base class."""
120 121 def write(self, message):
121 122 raise NotImplementedError()
122 123
123 124
124 125 class HgMessageWriter(RemoteMessageWriter):
125 126 """Writer that knows how to send messages to mercurial clients."""
126 127
127 128 def __init__(self, ui):
128 129 self.ui = ui
129 130
130 131 def write(self, message: str):
131 132 # TODO: Check why the quiet flag is set by default.
132 133 old = self.ui.quiet
133 134 self.ui.quiet = False
134 135 self.ui.status(message.encode('utf-8'))
135 136 self.ui.quiet = old
136 137
137 138
138 139 class GitMessageWriter(RemoteMessageWriter):
139 140 """Writer that knows how to send messages to git clients."""
140 141
141 142 def __init__(self, stdout=None):
142 143 self.stdout = stdout or sys.stdout
143 144
144 145 def write(self, message: str):
145 146 self.stdout.write(message)
146 147
147 148
148 149 class SvnMessageWriter(RemoteMessageWriter):
149 150 """Writer that knows how to send messages to svn clients."""
150 151
151 152 def __init__(self, stderr=None):
152 153 # SVN needs data sent to stderr for back-to-client messaging
153 154 self.stderr = stderr or sys.stderr
154 155
155 156 def write(self, message):
156 157 self.stderr.write(message)
157 158
158 159
159 160 def _handle_exception(result):
160 161 exception_class = result.get('exception')
161 162 exception_traceback = result.get('exception_traceback')
162 163 log.debug('Handling hook-call exception: %s', exception_class)
163 164
164 165 if exception_traceback:
165 166 log.error('Got traceback from remote call:%s', exception_traceback)
166 167
167 168 if exception_class == 'HTTPLockedRC':
168 169 raise exceptions.RepositoryLockedException()(*result['exception_args'])
169 170 elif exception_class == 'HTTPBranchProtected':
170 171 raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
171 172 elif exception_class == 'RepositoryError':
172 173 raise exceptions.VcsException()(*result['exception_args'])
173 174 elif exception_class:
174 175 raise Exception(
175 176 f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
176 177 )
177 178
178 179
179 180 def _get_hooks_client(extras):
180 181 hooks_uri = extras.get('hooks_uri')
181 182 task_queue = extras.get('task_queue')
182 183 task_backend = extras.get('task_backend')
183 184 is_shadow_repo = extras.get('is_shadow_repo')
184 185
185 186 if hooks_uri:
186 187 return HooksHttpClient(hooks_uri)
187 188 elif task_queue and task_backend:
188 189 return HooksCeleryClient(task_queue, task_backend)
189 190 elif is_shadow_repo:
190 191 return HooksShadowRepoClient()
191 192 else:
192 193 raise Exception("Hooks client not found!")
193 194
194 195
195 196 def _call_hook(hook_name, extras, writer):
196 197 hooks_client = _get_hooks_client(extras)
197 198 log.debug('Hooks, using client:%s', hooks_client)
198 199 result = hooks_client(hook_name, extras)
199 200 log.debug('Hooks got result: %s', result)
200 201 _handle_exception(result)
201 202 writer.write(result['output'])
202 203
203 204 return result['status']
204 205
205 206
206 207 def _extras_from_ui(ui):
207 208 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
208 209 if not hook_data:
209 210 # maybe it's inside environ ?
210 211 env_hook_data = os.environ.get('RC_SCM_DATA')
211 212 if env_hook_data:
212 213 hook_data = env_hook_data
213 214
214 215 extras = {}
215 216 if hook_data:
216 217 extras = json.loads(hook_data)
217 218 return extras
218 219
219 220
220 221 def _rev_range_hash(repo, node, check_heads=False):
221 222 from vcsserver.hgcompat import get_ctx
222 223
223 224 commits = []
224 225 revs = []
225 226 start = get_ctx(repo, node).rev()
226 227 end = len(repo)
227 228 for rev in range(start, end):
228 229 revs.append(rev)
229 230 ctx = get_ctx(repo, rev)
230 231 commit_id = ascii_str(mercurial.node.hex(ctx.node()))
231 232 branch = safe_str(ctx.branch())
232 233 commits.append((commit_id, branch))
233 234
234 235 parent_heads = []
235 236 if check_heads:
236 237 parent_heads = _check_heads(repo, start, end, revs)
237 238 return commits, parent_heads
238 239
239 240
240 241 def _check_heads(repo, start, end, commits):
241 242 from vcsserver.hgcompat import get_ctx
242 243 changelog = repo.changelog
243 244 parents = set()
244 245
245 246 for new_rev in commits:
246 247 for p in changelog.parentrevs(new_rev):
247 248 if p == mercurial.node.nullrev:
248 249 continue
249 250 if p < start:
250 251 parents.add(p)
251 252
252 253 for p in parents:
253 254 branch = get_ctx(repo, p).branch()
254 255 # The heads descending from that parent, on the same branch
255 256 parent_heads = {p}
256 257 reachable = {p}
257 258 for x in range(p + 1, end):
258 259 if get_ctx(repo, x).branch() != branch:
259 260 continue
260 261 for pp in changelog.parentrevs(x):
261 262 if pp in reachable:
262 263 reachable.add(x)
263 264 parent_heads.discard(pp)
264 265 parent_heads.add(x)
265 266 # More than one head? Suggest merging
266 267 if len(parent_heads) > 1:
267 268 return list(parent_heads)
268 269
269 270 return []
270 271
271 272
272 273 def _get_git_env():
273 274 env = {}
274 275 for k, v in os.environ.items():
275 276 if k.startswith('GIT'):
276 277 env[k] = v
277 278
278 279 # serialized version
279 280 return [(k, v) for k, v in env.items()]
280 281
281 282
282 283 def _get_hg_env(old_rev, new_rev, txnid, repo_path):
283 284 env = {}
284 285 for k, v in os.environ.items():
285 286 if k.startswith('HG'):
286 287 env[k] = v
287 288
288 289 env['HG_NODE'] = old_rev
289 290 env['HG_NODE_LAST'] = new_rev
290 291 env['HG_TXNID'] = txnid
291 292 env['HG_PENDING'] = repo_path
292 293
293 294 return [(k, v) for k, v in env.items()]
294 295
295 296
297 def _get_ini_settings(ini_file):
298 from vcsserver.http_main import sanitize_settings_and_apply_defaults
299 from vcsserver.lib.config_utils import get_app_config_lightweight, configure_and_store_settings
300
301 global_config = {'__file__': ini_file}
302 ini_settings = get_app_config_lightweight(ini_file)
303 sanitize_settings_and_apply_defaults(global_config, ini_settings)
304 configure_and_store_settings(global_config, ini_settings)
305
306 return ini_settings
307
308
296 309 def _fix_hooks_executables(ini_path=''):
297 310 """
298 311 This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns
299 312 especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail
300 313 because svn is not on PATH
301 314 """
302 from vcsserver.http_main import sanitize_settings_and_apply_defaults
303 from vcsserver.lib.config_utils import get_app_config_lightweight
304
315 # set defaults, in case we can't read from ini_file
305 316 core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin'
306 317 if ini_path:
307
308 ini_settings = get_app_config_lightweight(ini_path)
309 ini_settings = sanitize_settings_and_apply_defaults({'__file__': ini_path}, ini_settings)
318 ini_settings = _get_ini_settings(ini_path)
310 319 core_binary_dir = ini_settings['core.binary_dir']
311 320
312 321 settings.BINARY_DIR = core_binary_dir
313 322
314 323
315 324 def repo_size(ui, repo, **kwargs):
316 325 extras = _extras_from_ui(ui)
317 326 return _call_hook('repo_size', extras, HgMessageWriter(ui))
318 327
319 328
320 329 def pre_pull(ui, repo, **kwargs):
321 330 extras = _extras_from_ui(ui)
322 331 return _call_hook('pre_pull', extras, HgMessageWriter(ui))
323 332
324 333
325 334 def pre_pull_ssh(ui, repo, **kwargs):
326 335 extras = _extras_from_ui(ui)
327 336 if extras and extras.get('SSH'):
328 337 return pre_pull(ui, repo, **kwargs)
329 338 return 0
330 339
331 340
332 341 def post_pull(ui, repo, **kwargs):
333 342 extras = _extras_from_ui(ui)
334 343 return _call_hook('post_pull', extras, HgMessageWriter(ui))
335 344
336 345
337 346 def post_pull_ssh(ui, repo, **kwargs):
338 347 extras = _extras_from_ui(ui)
339 348 if extras and extras.get('SSH'):
340 349 return post_pull(ui, repo, **kwargs)
341 350 return 0
342 351
343 352
344 353 def pre_push(ui, repo, node=None, **kwargs):
345 354 """
346 355 Mercurial pre_push hook
347 356 """
348 357 extras = _extras_from_ui(ui)
349 358 detect_force_push = extras.get('detect_force_push')
350 359
351 360 rev_data = []
352 361 hook_type: str = safe_str(kwargs.get('hooktype'))
353 362
354 363 if node and hook_type == 'pretxnchangegroup':
355 364 branches = collections.defaultdict(list)
356 365 commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
357 366 for commit_id, branch in commits:
358 367 branches[branch].append(commit_id)
359 368
360 369 for branch, commits in branches.items():
361 370 old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
362 371 rev_data.append({
363 372 'total_commits': len(commits),
364 373 'old_rev': old_rev,
365 374 'new_rev': commits[-1],
366 375 'ref': '',
367 376 'type': 'branch',
368 377 'name': branch,
369 378 })
370 379
371 380 for push_ref in rev_data:
372 381 push_ref['multiple_heads'] = _heads
373 382
374 383 repo_path = os.path.join(
375 384 extras.get('repo_store', ''), extras.get('repository', ''))
376 385 push_ref['hg_env'] = _get_hg_env(
377 386 old_rev=push_ref['old_rev'],
378 387 new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
379 388 repo_path=repo_path)
380 389
381 390 extras['hook_type'] = hook_type or 'pre_push'
382 391 extras['commit_ids'] = rev_data
383 392
384 393 return _call_hook('pre_push', extras, HgMessageWriter(ui))
385 394
386 395
387 396 def pre_push_ssh(ui, repo, node=None, **kwargs):
388 397 extras = _extras_from_ui(ui)
389 398 if extras.get('SSH'):
390 399 return pre_push(ui, repo, node, **kwargs)
391 400
392 401 return 0
393 402
394 403
395 404 def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
396 405 """
397 406 Mercurial pre_push hook for SSH
398 407 """
399 408 extras = _extras_from_ui(ui)
400 409 if extras.get('SSH'):
401 410 permission = extras['SSH_PERMISSIONS']
402 411
403 412 if 'repository.write' == permission or 'repository.admin' == permission:
404 413 return 0
405 414
406 415 # non-zero ret code
407 416 return 1
408 417
409 418 return 0
410 419
411 420
412 421 def post_push(ui, repo, node, **kwargs):
413 422 """
414 423 Mercurial post_push hook
415 424 """
416 425 extras = _extras_from_ui(ui)
417 426
418 427 commit_ids = []
419 428 branches = []
420 429 bookmarks = []
421 430 tags = []
422 431 hook_type: str = safe_str(kwargs.get('hooktype'))
423 432
424 433 commits, _heads = _rev_range_hash(repo, node)
425 434 for commit_id, branch in commits:
426 435 commit_ids.append(commit_id)
427 436 if branch not in branches:
428 437 branches.append(branch)
429 438
430 439 if hasattr(ui, '_rc_pushkey_bookmarks'):
431 440 bookmarks = ui._rc_pushkey_bookmarks
432 441
433 442 extras['hook_type'] = hook_type or 'post_push'
434 443 extras['commit_ids'] = commit_ids
435 444
436 445 extras['new_refs'] = {
437 446 'branches': branches,
438 447 'bookmarks': bookmarks,
439 448 'tags': tags
440 449 }
441 450
442 451 return _call_hook('post_push', extras, HgMessageWriter(ui))
443 452
444 453
445 454 def post_push_ssh(ui, repo, node, **kwargs):
446 455 """
447 456 Mercurial post_push hook for SSH
448 457 """
449 458 if _extras_from_ui(ui).get('SSH'):
450 459 return post_push(ui, repo, node, **kwargs)
451 460 return 0
452 461
453 462
454 463 def key_push(ui, repo, **kwargs):
455 464 from vcsserver.hgcompat import get_ctx
456 465
457 466 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
458 467 # store new bookmarks in our UI object propagated later to post_push
459 468 ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
460 469 return
461 470
462 471
463 472 # backward compat
464 473 log_pull_action = post_pull
465 474
466 475 # backward compat
467 476 log_push_action = post_push
468 477
469 478
470 479 def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
471 480 """
472 481 Old hook name: keep here for backward compatibility.
473 482
474 483 This is only required when the installed git hooks are not upgraded.
475 484 """
476 485 pass
477 486
478 487
479 488 def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
480 489 """
481 490 Old hook name: keep here for backward compatibility.
482 491
483 492 This is only required when the installed git hooks are not upgraded.
484 493 """
485 494 pass
486 495
487 496
488 497 @dataclasses.dataclass
489 498 class HookResponse:
490 499 status: int
491 500 output: str
492 501
493 502
494 503 def git_pre_pull(extras) -> HookResponse:
495 504 """
496 505 Pre pull hook.
497 506
498 507 :param extras: dictionary containing the keys defined in simplevcs
499 508 :type extras: dict
500 509
501 510 :return: status code of the hook. 0 for success.
502 511 :rtype: int
503 512 """
504 513
505 514 if 'pull' not in extras['hooks']:
506 515 return HookResponse(0, '')
507 516
508 517 stdout = io.StringIO()
509 518 try:
510 519 status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
511 520
512 521 except Exception as error:
513 522 log.exception('Failed to call pre_pull hook')
514 523 status_code = 128
515 524 stdout.write(f'ERROR: {error}\n')
516 525
517 526 return HookResponse(status_code, stdout.getvalue())
518 527
519 528
520 529 def git_post_pull(extras) -> HookResponse:
521 530 """
522 531 Post pull hook.
523 532
524 533 :param extras: dictionary containing the keys defined in simplevcs
525 534 :type extras: dict
526 535
527 536 :return: status code of the hook. 0 for success.
528 537 :rtype: int
529 538 """
530 539 if 'pull' not in extras['hooks']:
531 540 return HookResponse(0, '')
532 541
533 542 stdout = io.StringIO()
534 543 try:
535 544 status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
536 545 except Exception as error:
537 546 status = 128
538 547 stdout.write(f'ERROR: {error}\n')
539 548
540 549 return HookResponse(status, stdout.getvalue())
541 550
542 551
543 552 def _parse_git_ref_lines(revision_lines):
544 553 rev_data = []
545 554 for revision_line in revision_lines or []:
546 555 old_rev, new_rev, ref = revision_line.strip().split(' ')
547 556 ref_data = ref.split('/', 2)
548 557 if ref_data[1] in ('tags', 'heads'):
549 558 rev_data.append({
550 559 # NOTE(marcink):
551 560 # we're unable to tell total_commits for git at this point
552 561 # but we set the variable for consistency with GIT
553 562 'total_commits': -1,
554 563 'old_rev': old_rev,
555 564 'new_rev': new_rev,
556 565 'ref': ref,
557 566 'type': ref_data[1],
558 567 'name': ref_data[2],
559 568 })
560 569 return rev_data
561 570
562 571
563 572 def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
564 573 """
565 574 Pre push hook.
566 575
567 576 :return: status code of the hook. 0 for success.
568 577 """
569 578 extras = json.loads(env['RC_SCM_DATA'])
570 579 rev_data = _parse_git_ref_lines(revision_lines)
571 580 if 'push' not in extras['hooks']:
572 581 return 0
573 _fix_hooks_executables()
582 _fix_hooks_executables(env.get('RC_INI_FILE'))
574 583
575 584 empty_commit_id = '0' * 40
576 585
577 586 detect_force_push = extras.get('detect_force_push')
578 587
579 588 for push_ref in rev_data:
580 589 # store our git-env which holds the temp store
581 590 push_ref['git_env'] = _get_git_env()
582 591 push_ref['pruned_sha'] = ''
583 592 if not detect_force_push:
584 593 # don't check for forced-push when we don't need to
585 594 continue
586 595
587 596 type_ = push_ref['type']
588 597 new_branch = push_ref['old_rev'] == empty_commit_id
589 598 delete_branch = push_ref['new_rev'] == empty_commit_id
590 599 if type_ == 'heads' and not (new_branch or delete_branch):
591 600 old_rev = push_ref['old_rev']
592 601 new_rev = push_ref['new_rev']
593 602 cmd = [settings.GIT_EXECUTABLE(), 'rev-list', old_rev, f'^{new_rev}']
594 603 stdout, stderr = subprocessio.run_command(
595 604 cmd, env=os.environ.copy())
596 605 # means we're having some non-reachable objects, this forced push was used
597 606 if stdout:
598 607 push_ref['pruned_sha'] = stdout.splitlines()
599 608
600 609 extras['hook_type'] = 'pre_receive'
601 610 extras['commit_ids'] = rev_data
602 611
603 612 stdout = sys.stdout
604 613 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
605 614
606 615 return status_code
607 616
608 617
609 618 def git_post_receive(unused_repo_path, revision_lines, env) -> int:
610 619 """
611 620 Post push hook.
612 621
613 622 :return: status code of the hook. 0 for success.
614 623 """
615 624 extras = json.loads(env['RC_SCM_DATA'])
616 625 if 'push' not in extras['hooks']:
617 626 return 0
618 627
619 _fix_hooks_executables()
628 _fix_hooks_executables(env.get('RC_INI_FILE'))
620 629
621 630 rev_data = _parse_git_ref_lines(revision_lines)
622 631
623 632 git_revs = []
624 633
625 634 # N.B.(skreft): it is ok to just call git, as git before calling a
626 635 # subcommand sets the PATH environment variable so that it point to the
627 636 # correct version of the git executable.
628 637 empty_commit_id = '0' * 40
629 638 branches = []
630 639 tags = []
631 640 for push_ref in rev_data:
632 641 type_ = push_ref['type']
633 642
634 643 if type_ == 'heads':
635 644 # starting new branch case
636 645 if push_ref['old_rev'] == empty_commit_id:
637 646 push_ref_name = push_ref['name']
638 647
639 648 if push_ref_name not in branches:
640 649 branches.append(push_ref_name)
641 650
642 651 need_head_set = ''
643 652 with Repository(os.getcwd()) as repo:
644 653 try:
645 654 repo.head
646 655 except pygit2.GitError:
647 656 need_head_set = f'refs/heads/{push_ref_name}'
648 657
649 658 if need_head_set:
650 659 repo.set_head(need_head_set)
651 660 print(f"Setting default branch to {push_ref_name}")
652 661
653 662 cmd = [settings.GIT_EXECUTABLE(), 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
654 663 stdout, stderr = subprocessio.run_command(
655 664 cmd, env=os.environ.copy())
656 665 heads = safe_str(stdout)
657 666 heads = heads.replace(push_ref['ref'], '')
658 667 heads = ' '.join(head for head
659 668 in heads.splitlines() if head) or '.'
660 669 cmd = [settings.GIT_EXECUTABLE(), 'log', '--reverse',
661 670 '--pretty=format:%H', '--', push_ref['new_rev'],
662 671 '--not', heads]
663 672 stdout, stderr = subprocessio.run_command(
664 673 cmd, env=os.environ.copy())
665 674 git_revs.extend(list(map(ascii_str, stdout.splitlines())))
666 675
667 676 # delete branch case
668 677 elif push_ref['new_rev'] == empty_commit_id:
669 678 git_revs.append(f'delete_branch=>{push_ref["name"]}')
670 679 else:
671 680 if push_ref['name'] not in branches:
672 681 branches.append(push_ref['name'])
673 682
674 683 cmd = [settings.GIT_EXECUTABLE(), 'log',
675 684 f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
676 685 '--reverse', '--pretty=format:%H']
677 686 stdout, stderr = subprocessio.run_command(
678 687 cmd, env=os.environ.copy())
679 688 # we get bytes from stdout, we need str to be consistent
680 689 log_revs = list(map(ascii_str, stdout.splitlines()))
681 690 git_revs.extend(log_revs)
682 691
683 692 # Pure pygit2 impl. but still 2-3x slower :/
684 693 # results = []
685 694 #
686 695 # with Repository(os.getcwd()) as repo:
687 696 # repo_new_rev = repo[push_ref['new_rev']]
688 697 # repo_old_rev = repo[push_ref['old_rev']]
689 698 # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
690 699 #
691 700 # for commit in walker:
692 701 # if commit.id == repo_old_rev.id:
693 702 # break
694 703 # results.append(commit.id.hex)
695 704 # # reverse the order, can't use GIT_SORT_REVERSE
696 705 # log_revs = results[::-1]
697 706
698 707 elif type_ == 'tags':
699 708 if push_ref['name'] not in tags:
700 709 tags.append(push_ref['name'])
701 710 git_revs.append(f'tag=>{push_ref["name"]}')
702 711
703 712 extras['hook_type'] = 'post_receive'
704 713 extras['commit_ids'] = git_revs
705 714 extras['new_refs'] = {
706 715 'branches': branches,
707 716 'bookmarks': [],
708 717 'tags': tags,
709 718 }
710 719
711 720 stdout = sys.stdout
712 721
713 722 if 'repo_size' in extras['hooks']:
714 723 try:
715 724 _call_hook('repo_size', extras, GitMessageWriter(stdout))
716 725 except Exception:
717 726 pass
718 727
719 728 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
720 729 return status_code
721 730
722 731
723 def _get_extras_from_txn_id(path, txn_id):
724 _fix_hooks_executables()
725
726 extras = {}
727 try:
728 cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
729 '-t', txn_id,
730 '--revprop', path, 'rc-scm-extras']
731 stdout, stderr = subprocessio.run_command(
732 cmd, env=os.environ.copy())
733 extras = json.loads(base64.urlsafe_b64decode(stdout))
734 except Exception:
735 log.exception('Failed to extract extras info from txn_id')
736
737 return extras
738
739
740 def _get_extras_from_commit_id(commit_id, path):
741 _fix_hooks_executables()
742
743 extras = {}
744 try:
745 cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
746 '-r', commit_id,
747 '--revprop', path, 'rc-scm-extras']
748 stdout, stderr = subprocessio.run_command(
749 cmd, env=os.environ.copy())
750 extras = json.loads(base64.urlsafe_b64decode(stdout))
751 except Exception:
752 log.exception('Failed to extract extras info from commit_id')
753
732 def get_extras_from_txn_id(repo_path, txn_id):
733 extras = get_txn_id_from_store(repo_path, txn_id)
754 734 return extras
755 735
756 736
757 737 def svn_pre_commit(repo_path, commit_data, env):
758 738
759 739 path, txn_id = commit_data
760 740 branches = []
761 741 tags = []
762 742
763 743 if env.get('RC_SCM_DATA'):
764 744 extras = json.loads(env['RC_SCM_DATA'])
765 745 else:
746 ini_path = env.get('RC_INI_FILE')
747 if ini_path:
748 _get_ini_settings(ini_path)
766 749 # fallback method to read from TXN-ID stored data
767 extras = _get_extras_from_txn_id(path, txn_id)
768 if not extras:
769 return 0
750 extras = get_extras_from_txn_id(path, txn_id)
751
752 if not extras:
753 raise ValueError('SVN-PRE-COMMIT: Failed to extract context data in called extras for hook execution')
754
755 if extras.get('rc_internal_commit'):
756 # special marker for internal commit, we don't call hooks client
757 return 0
770 758
771 759 extras['hook_type'] = 'pre_commit'
772 760 extras['commit_ids'] = [txn_id]
773 761 extras['txn_id'] = txn_id
774 762 extras['new_refs'] = {
775 763 'total_commits': 1,
776 764 'branches': branches,
777 765 'bookmarks': [],
778 766 'tags': tags,
779 767 }
780 768
781 769 return _call_hook('pre_push', extras, SvnMessageWriter())
782 770
783 771
784 772 def svn_post_commit(repo_path, commit_data, env):
785 773 """
786 774 commit_data is path, rev, txn_id
787 775 """
788 776
789 777 if len(commit_data) == 3:
790 778 path, commit_id, txn_id = commit_data
791 779 elif len(commit_data) == 2:
792 780 log.error('Failed to extract txn_id from commit_data using legacy method. '
793 781 'Some functionality might be limited')
794 782 path, commit_id = commit_data
795 783 txn_id = None
796 784 else:
797 785 return 0
798 786
799 787 branches = []
800 788 tags = []
801 789
802 790 if env.get('RC_SCM_DATA'):
803 791 extras = json.loads(env['RC_SCM_DATA'])
804 792 else:
793 ini_path = env.get('RC_INI_FILE')
794 if ini_path:
795 _get_ini_settings(ini_path)
805 796 # fallback method to read from TXN-ID stored data
806 extras = _get_extras_from_commit_id(commit_id, path)
807 if not extras:
808 return 0
797 extras = get_extras_from_txn_id(path, txn_id)
798
799 if not extras and txn_id:
800 raise ValueError('SVN-POST-COMMIT: Failed to extract context data in called extras for hook execution')
801
802 if extras.get('rc_internal_commit'):
803 # special marker for internal commit, we don't call hooks client
804 return 0
809 805
810 806 extras['hook_type'] = 'post_commit'
811 807 extras['commit_ids'] = [commit_id]
812 808 extras['txn_id'] = txn_id
813 809 extras['new_refs'] = {
814 810 'branches': branches,
815 811 'bookmarks': [],
816 812 'tags': tags,
817 813 'total_commits': 1,
818 814 }
819 815
820 816 if 'repo_size' in extras['hooks']:
821 817 try:
822 818 _call_hook('repo_size', extras, SvnMessageWriter())
823 819 except Exception:
824 820 pass
825 821
826 822 return _call_hook('post_push', extras, SvnMessageWriter())
@@ -1,774 +1,763 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import os
20 20 import platform
21 21 import sys
22 22 import locale
23 23 import logging
24 24 import uuid
25 25 import time
26 26 import wsgiref.util
27 27 import tempfile
28 28 import psutil
29 29
30 30 from itertools import chain
31 31
32 32 import msgpack
33 33 import configparser
34 34
35 35 from pyramid.config import Configurator
36 36 from pyramid.wsgi import wsgiapp
37 37 from pyramid.response import Response
38 38
39 39 from vcsserver.base import BytesEnvelope, BinaryEnvelope
40 from vcsserver.lib.rc_json import json
40
41 41 from vcsserver.config.settings_maker import SettingsMaker
42 from vcsserver.str_utils import safe_int
43 from vcsserver.lib.statsd_client import StatsdClient
42
44 43 from vcsserver.tweens.request_wrapper import get_headers_call_context
45 44
46 import vcsserver
47 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
45 from vcsserver import remote_wsgi, scm_app, hgpatches
46 from vcsserver.server import VcsServer
48 47 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
49 48 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
50 49 from vcsserver.echo_stub.echo_app import EchoApp
51 50 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
52 51 from vcsserver.lib.exc_tracking import store_exception, format_exc
53 from vcsserver.server import VcsServer
52 from vcsserver.lib.str_utils import safe_int
53 from vcsserver.lib.statsd_client import StatsdClient
54 from vcsserver.lib.ext_json import json
55 from vcsserver.lib.config_utils import configure_and_store_settings
56
54 57
55 58 strict_vcs = True
56 59
57 60 git_import_err = None
58 61 try:
59 62 from vcsserver.remote.git_remote import GitFactory, GitRemote
60 63 except ImportError as e:
61 64 GitFactory = None
62 65 GitRemote = None
63 66 git_import_err = e
64 67 if strict_vcs:
65 68 raise
66 69
67 70
68 71 hg_import_err = None
69 72 try:
70 73 from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
71 74 except ImportError as e:
72 75 MercurialFactory = None
73 76 HgRemote = None
74 77 hg_import_err = e
75 78 if strict_vcs:
76 79 raise
77 80
78 81
79 82 svn_import_err = None
80 83 try:
81 84 from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
82 85 except ImportError as e:
83 86 SubversionFactory = None
84 87 SvnRemote = None
85 88 svn_import_err = e
86 89 if strict_vcs:
87 90 raise
88 91
89 92 log = logging.getLogger(__name__)
90 93
91 94 # due to Mercurial/glibc2.27 problems we need to detect if locale settings are
92 95 # causing problems and "fix" it in case they do and fallback to LC_ALL = C
93 96
94 97 try:
95 98 locale.setlocale(locale.LC_ALL, '')
96 99 except locale.Error as e:
97 log.error(
98 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
100 log.error('LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
99 101 os.environ['LC_ALL'] = 'C'
100 102
101 103
102 104 def _is_request_chunked(environ):
103 105 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
104 106 return stream
105 107
106 108
107 109 def log_max_fd():
108 110 try:
109 111 maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
110 112 log.info('Max file descriptors value: %s', maxfd)
111 113 except Exception:
112 114 pass
113 115
114 116
115 117 class VCS:
116 118 def __init__(self, locale_conf=None, cache_config=None):
117 119 self.locale = locale_conf
118 120 self.cache_config = cache_config
119 121 self._configure_locale()
120 122
121 123 log_max_fd()
122 124
123 125 if GitFactory and GitRemote:
124 126 git_factory = GitFactory()
125 127 self._git_remote = GitRemote(git_factory)
126 128 else:
127 129 log.error("Git client import failed: %s", git_import_err)
128 130
129 131 if MercurialFactory and HgRemote:
130 132 hg_factory = MercurialFactory()
131 133 self._hg_remote = HgRemote(hg_factory)
132 134 else:
133 135 log.error("Mercurial client import failed: %s", hg_import_err)
134 136
135 137 if SubversionFactory and SvnRemote:
136 138 svn_factory = SubversionFactory()
137 139
138 140 # hg factory is used for svn url validation
139 141 hg_factory = MercurialFactory()
140 142 self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
141 143 else:
142 144 log.error("Subversion client import failed: %s", svn_import_err)
143 145
144 146 self._vcsserver = VcsServer()
145 147
146 148 def _configure_locale(self):
147 149 if self.locale:
148 150 log.info('Settings locale: `LC_ALL` to %s', self.locale)
149 151 else:
150 152 log.info('Configuring locale subsystem based on environment variables')
151 153 try:
152 154 # If self.locale is the empty string, then the locale
153 155 # module will use the environment variables. See the
154 156 # documentation of the package `locale`.
155 157 locale.setlocale(locale.LC_ALL, self.locale)
156 158
157 159 language_code, encoding = locale.getlocale()
158 160 log.info(
159 161 'Locale set to language code "%s" with encoding "%s".',
160 162 language_code, encoding)
161 163 except locale.Error:
162 164 log.exception('Cannot set locale, not configuring the locale system')
163 165
164 166
165 167 class WsgiProxy:
166 168 def __init__(self, wsgi):
167 169 self.wsgi = wsgi
168 170
169 171 def __call__(self, environ, start_response):
170 172 input_data = environ['wsgi.input'].read()
171 173 input_data = msgpack.unpackb(input_data)
172 174
173 175 error = None
174 176 try:
175 177 data, status, headers = self.wsgi.handle(
176 178 input_data['environment'], input_data['input_data'],
177 179 *input_data['args'], **input_data['kwargs'])
178 180 except Exception as e:
179 181 data, status, headers = [], None, None
180 182 error = {
181 183 'message': str(e),
182 184 '_vcs_kind': getattr(e, '_vcs_kind', None)
183 185 }
184 186
185 187 start_response(200, {})
186 188 return self._iterator(error, status, headers, data)
187 189
188 190 def _iterator(self, error, status, headers, data):
189 191 initial_data = [
190 192 error,
191 193 status,
192 194 headers,
193 195 ]
194 196
195 197 for d in chain(initial_data, data):
196 198 yield msgpack.packb(d)
197 199
198 200
199 201 def not_found(request):
200 202 return {'status': '404 NOT FOUND'}
201 203
202 204
203 205 class VCSViewPredicate:
204 206 def __init__(self, val, config):
205 207 self.remotes = val
206 208
207 209 def text(self):
208 210 return f'vcs view method = {list(self.remotes.keys())}'
209 211
210 212 phash = text
211 213
212 214 def __call__(self, context, request):
213 215 """
214 216 View predicate that returns true if given backend is supported by
215 217 defined remotes.
216 218 """
217 219 backend = request.matchdict.get('backend')
218 220 return backend in self.remotes
219 221
220 222
221 223 class HTTPApplication:
222 224 ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
223 225
224 226 remote_wsgi = remote_wsgi
225 227 _use_echo_app = False
226 228
227 229 def __init__(self, settings=None, global_config=None):
228 230
229 231 self.config = Configurator(settings=settings)
230 232 # Init our statsd at very start
231 233 self.config.registry.statsd = StatsdClient.statsd
232 234 self.config.registry.vcs_call_context = {}
233 235
234 236 self.global_config = global_config
235 237 self.config.include('vcsserver.lib.rc_cache')
236 self.config.include('vcsserver.lib.rc_cache.archive_cache')
238 self.config.include('vcsserver.lib.archive_cache')
237 239
238 240 settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
239 241 vcs = VCS(locale_conf=settings_locale, cache_config=settings)
240 242 self._remotes = {
241 243 'hg': vcs._hg_remote,
242 244 'git': vcs._git_remote,
243 245 'svn': vcs._svn_remote,
244 246 'server': vcs._vcsserver,
245 247 }
246 248 if settings.get('dev.use_echo_app', 'false').lower() == 'true':
247 249 self._use_echo_app = True
248 250 log.warning("Using EchoApp for VCS operations.")
249 251 self.remote_wsgi = remote_wsgi_stub
250 252
251 self._configure_settings(global_config, settings)
253 configure_and_store_settings(global_config, settings)
252 254
253 255 self._configure()
254 256
255 def _configure_settings(self, global_config, app_settings):
256 """
257 Configure the settings module.
258 """
259 settings_merged = global_config.copy()
260 settings_merged.update(app_settings)
261
262 binary_dir = app_settings['core.binary_dir']
263
264 settings.BINARY_DIR = binary_dir
265
266 # Store the settings to make them available to other modules.
267 vcsserver.PYRAMID_SETTINGS = settings_merged
268 vcsserver.CONFIG = settings_merged
269
270 257 def _configure(self):
271 258 self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
272 259
273 260 self.config.add_route('service', '/_service')
274 261 self.config.add_route('status', '/status')
275 262 self.config.add_route('hg_proxy', '/proxy/hg')
276 263 self.config.add_route('git_proxy', '/proxy/git')
277 264
278 265 # rpc methods
279 266 self.config.add_route('vcs', '/{backend}')
280 267
281 268 # streaming rpc remote methods
282 269 self.config.add_route('vcs_stream', '/{backend}/stream')
283 270
284 271 # vcs operations clone/push as streaming
285 272 self.config.add_route('stream_git', '/stream/git/*repo_name')
286 273 self.config.add_route('stream_hg', '/stream/hg/*repo_name')
287 274
288 275 self.config.add_view(self.status_view, route_name='status', renderer='json')
289 276 self.config.add_view(self.service_view, route_name='service', renderer='msgpack')
290 277
291 278 self.config.add_view(self.hg_proxy(), route_name='hg_proxy')
292 279 self.config.add_view(self.git_proxy(), route_name='git_proxy')
293 280 self.config.add_view(self.vcs_view, route_name='vcs', renderer='msgpack',
294 281 vcs_view=self._remotes)
295 282 self.config.add_view(self.vcs_stream_view, route_name='vcs_stream',
296 283 vcs_view=self._remotes)
297 284
298 285 self.config.add_view(self.hg_stream(), route_name='stream_hg')
299 286 self.config.add_view(self.git_stream(), route_name='stream_git')
300 287
301 288 self.config.add_view_predicate('vcs_view', VCSViewPredicate)
302 289
303 290 self.config.add_notfound_view(not_found, renderer='json')
304 291
305 292 self.config.add_view(self.handle_vcs_exception, context=Exception)
306 293
307 294 self.config.add_tween(
308 295 'vcsserver.tweens.request_wrapper.RequestWrapperTween',
309 296 )
310 297 self.config.add_request_method(
311 298 'vcsserver.lib.request_counter.get_request_counter',
312 299 'request_count')
313 300
314 301 def wsgi_app(self):
315 302 return self.config.make_wsgi_app()
316 303
317 304 def _vcs_view_params(self, request):
318 305 remote = self._remotes[request.matchdict['backend']]
319 306 payload = msgpack.unpackb(request.body, use_list=True)
320 307
321 308 method = payload.get('method')
322 309 params = payload['params']
323 310 wire = params.get('wire')
324 311 args = params.get('args')
325 312 kwargs = params.get('kwargs')
326 313 context_uid = None
327 314
328 315 request.registry.vcs_call_context = {
329 316 'method': method,
330 317 'repo_name': payload.get('_repo_name'),
331 318 }
332 319
333 320 if wire:
334 321 try:
335 322 wire['context'] = context_uid = uuid.UUID(wire['context'])
336 323 except KeyError:
337 324 pass
338 325 args.insert(0, wire)
339 326 repo_state_uid = wire.get('repo_state_uid') if wire else None
340 327
341 328 # NOTE(marcink): trading complexity for slight performance
342 329 if log.isEnabledFor(logging.DEBUG):
343 330 # also we SKIP printing out any of those methods args since they maybe excessive
344 331 just_args_methods = {
345 332 'commitctx': ('content', 'removed', 'updated'),
346 333 'commit': ('content', 'removed', 'updated')
347 334 }
348 335 if method in just_args_methods:
349 336 skip_args = just_args_methods[method]
350 337 call_args = ''
351 338 call_kwargs = {}
352 339 for k in kwargs:
353 340 if k in skip_args:
354 341 # replace our skip key with dummy
355 342 call_kwargs[k] = f'RemovedParam({k})'
356 343 else:
357 344 call_kwargs[k] = kwargs[k]
358 345 else:
359 346 call_args = args[1:]
360 347 call_kwargs = kwargs
361 348
362 349 log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
363 350 method, call_args, call_kwargs, context_uid, repo_state_uid)
364 351
365 352 statsd = request.registry.statsd
366 353 if statsd:
367 354 statsd.incr(
368 355 'vcsserver_method_total', tags=[
369 356 f"method:{method}",
370 357 ])
371 358 return payload, remote, method, args, kwargs
372 359
373 360 def vcs_view(self, request):
374 361
375 362 payload, remote, method, args, kwargs = self._vcs_view_params(request)
376 363 payload_id = payload.get('id')
377 364
378 365 try:
379 366 resp = getattr(remote, method)(*args, **kwargs)
380 367 except Exception as e:
381 368 exc_info = list(sys.exc_info())
382 369 exc_type, exc_value, exc_traceback = exc_info
383 370
384 371 org_exc = getattr(e, '_org_exc', None)
385 372 org_exc_name = None
386 373 org_exc_tb = ''
387 374 if org_exc:
388 375 org_exc_name = org_exc.__class__.__name__
389 376 org_exc_tb = getattr(e, '_org_exc_tb', '')
390 377 # replace our "faked" exception with our org
391 378 exc_info[0] = org_exc.__class__
392 379 exc_info[1] = org_exc
393 380
394 381 should_store_exc = True
395 382 if org_exc:
396 383 def get_exc_fqn(_exc_obj):
397 384 module_name = getattr(org_exc.__class__, '__module__', 'UNKNOWN')
398 385 return module_name + '.' + org_exc_name
399 386
400 387 exc_fqn = get_exc_fqn(org_exc)
401 388
402 389 if exc_fqn in ['mercurial.error.RepoLookupError',
403 390 'vcsserver.exceptions.RefNotFoundException']:
404 391 should_store_exc = False
405 392
406 393 if should_store_exc:
407 394 store_exception(id(exc_info), exc_info, request_path=request.path)
408 395
409 396 tb_info = format_exc(exc_info)
410 397
411 398 type_ = e.__class__.__name__
412 399 if type_ not in self.ALLOWED_EXCEPTIONS:
413 400 type_ = None
414 401
415 402 resp = {
416 403 'id': payload_id,
417 404 'error': {
418 405 'message': str(e),
419 406 'traceback': tb_info,
420 407 'org_exc': org_exc_name,
421 408 'org_exc_tb': org_exc_tb,
422 409 'type': type_
423 410 }
424 411 }
425 412
426 413 try:
427 414 resp['error']['_vcs_kind'] = getattr(e, '_vcs_kind', None)
428 415 except AttributeError:
429 416 pass
430 417 else:
431 418 resp = {
432 419 'id': payload_id,
433 420 'result': resp
434 421 }
435 422 log.debug('Serving data for method %s', method)
436 423 return resp
437 424
438 425 def vcs_stream_view(self, request):
439 426 payload, remote, method, args, kwargs = self._vcs_view_params(request)
440 427 # this method has a stream: marker we remove it here
441 428 method = method.split('stream:')[-1]
442 429 chunk_size = safe_int(payload.get('chunk_size')) or 4096
443 430
444 431 resp = getattr(remote, method)(*args, **kwargs)
445 432
446 433 def get_chunked_data(method_resp):
447 434 stream = io.BytesIO(method_resp)
448 435 while 1:
449 436 chunk = stream.read(chunk_size)
450 437 if not chunk:
451 438 break
452 439 yield chunk
453 440
454 441 response = Response(app_iter=get_chunked_data(resp))
455 442 response.content_type = 'application/octet-stream'
456 443
457 444 return response
458 445
459 446 def status_view(self, request):
460 447 import vcsserver
461 448 _platform_id = platform.uname()[1] or 'instance'
462 449
463 450 return {
464 451 "status": "OK",
465 452 "vcsserver_version": vcsserver.get_version(),
466 453 "platform": _platform_id,
467 454 "pid": os.getpid(),
468 455 }
469 456
470 457 def service_view(self, request):
471 458 import vcsserver
472 459
473 460 payload = msgpack.unpackb(request.body, use_list=True)
474 461 server_config, app_config = {}, {}
475 462
476 463 try:
477 464 path = self.global_config['__file__']
478 465 config = configparser.RawConfigParser()
479 466
480 467 config.read(path)
481 468
482 469 if config.has_section('server:main'):
483 470 server_config = dict(config.items('server:main'))
484 471 if config.has_section('app:main'):
485 472 app_config = dict(config.items('app:main'))
486 473
487 474 except Exception:
488 475 log.exception('Failed to read .ini file for display')
489 476
490 477 environ = list(os.environ.items())
491 478
492 479 resp = {
493 480 'id': payload.get('id'),
494 481 'result': dict(
495 482 version=vcsserver.get_version(),
496 483 config=server_config,
497 484 app_config=app_config,
498 485 environ=environ,
499 486 payload=payload,
500 487 )
501 488 }
502 489 return resp
503 490
504 491 def _msgpack_renderer_factory(self, info):
505 492
506 493 def _render(value, system):
507 494 bin_type = False
508 495 res = value.get('result')
509 496 if isinstance(res, BytesEnvelope):
510 497 log.debug('Result is wrapped in BytesEnvelope type')
511 498 bin_type = True
512 499 elif isinstance(res, BinaryEnvelope):
513 500 log.debug('Result is wrapped in BinaryEnvelope type')
514 501 value['result'] = res.val
515 502 bin_type = True
516 503
517 504 request = system.get('request')
518 505 if request is not None:
519 506 response = request.response
520 507 ct = response.content_type
521 508 if ct == response.default_content_type:
522 509 response.content_type = 'application/x-msgpack'
523 510 if bin_type:
524 511 response.content_type = 'application/x-msgpack-bin'
525 512
526 513 return msgpack.packb(value, use_bin_type=bin_type)
527 514 return _render
528 515
529 516 def set_env_from_config(self, environ, config):
530 517 dict_conf = {}
531 518 try:
532 519 for elem in config:
533 520 if elem[0] == 'rhodecode':
534 521 dict_conf = json.loads(elem[2])
535 522 break
536 523 except Exception:
537 524 log.exception('Failed to fetch SCM CONFIG')
538 525 return
539 526
540 527 username = dict_conf.get('username')
541 528 if username:
542 529 environ['REMOTE_USER'] = username
543 530 # mercurial specific, some extension api rely on this
544 531 environ['HGUSER'] = username
545 532
546 533 ip = dict_conf.get('ip')
547 534 if ip:
548 535 environ['REMOTE_HOST'] = ip
549 536
550 537 if _is_request_chunked(environ):
551 538 # set the compatibility flag for webob
552 539 environ['wsgi.input_terminated'] = True
553 540
554 541 def hg_proxy(self):
555 542 @wsgiapp
556 543 def _hg_proxy(environ, start_response):
557 544 app = WsgiProxy(self.remote_wsgi.HgRemoteWsgi())
558 545 return app(environ, start_response)
559 546 return _hg_proxy
560 547
561 548 def git_proxy(self):
562 549 @wsgiapp
563 550 def _git_proxy(environ, start_response):
564 551 app = WsgiProxy(self.remote_wsgi.GitRemoteWsgi())
565 552 return app(environ, start_response)
566 553 return _git_proxy
567 554
568 555 def hg_stream(self):
569 556 if self._use_echo_app:
570 557 @wsgiapp
571 558 def _hg_stream(environ, start_response):
572 559 app = EchoApp('fake_path', 'fake_name', None)
573 560 return app(environ, start_response)
574 561 return _hg_stream
575 562 else:
576 563 @wsgiapp
577 564 def _hg_stream(environ, start_response):
578 565 log.debug('http-app: handling hg stream')
579 566 call_context = get_headers_call_context(environ)
580 567
581 568 repo_path = call_context['repo_path']
582 569 repo_name = call_context['repo_name']
583 570 config = call_context['repo_config']
584 571
585 572 app = scm_app.create_hg_wsgi_app(
586 573 repo_path, repo_name, config)
587 574
588 575 # Consistent path information for hgweb
589 576 environ['PATH_INFO'] = call_context['path_info']
590 577 environ['REPO_NAME'] = repo_name
591 578 self.set_env_from_config(environ, config)
592 579
593 580 log.debug('http-app: starting app handler '
594 581 'with %s and process request', app)
595 582 return app(environ, ResponseFilter(start_response))
596 583 return _hg_stream
597 584
598 585 def git_stream(self):
599 586 if self._use_echo_app:
600 587 @wsgiapp
601 588 def _git_stream(environ, start_response):
602 589 app = EchoApp('fake_path', 'fake_name', None)
603 590 return app(environ, start_response)
604 591 return _git_stream
605 592 else:
606 593 @wsgiapp
607 594 def _git_stream(environ, start_response):
608 595 log.debug('http-app: handling git stream')
609 596
610 597 call_context = get_headers_call_context(environ)
611 598
612 599 repo_path = call_context['repo_path']
613 600 repo_name = call_context['repo_name']
614 601 config = call_context['repo_config']
615 602
616 603 environ['PATH_INFO'] = call_context['path_info']
617 604 self.set_env_from_config(environ, config)
618 605
619 606 content_type = environ.get('CONTENT_TYPE', '')
620 607
621 608 path = environ['PATH_INFO']
622 609 is_lfs_request = GIT_LFS_CONTENT_TYPE in content_type
623 610 log.debug(
624 611 'LFS: Detecting if request `%s` is LFS server path based '
625 612 'on content type:`%s`, is_lfs:%s',
626 613 path, content_type, is_lfs_request)
627 614
628 615 if not is_lfs_request:
629 616 # fallback detection by path
630 617 if GIT_LFS_PROTO_PAT.match(path):
631 618 is_lfs_request = True
632 619 log.debug(
633 620 'LFS: fallback detection by path of: `%s`, is_lfs:%s',
634 621 path, is_lfs_request)
635 622
636 623 if is_lfs_request:
637 624 app = scm_app.create_git_lfs_wsgi_app(
638 625 repo_path, repo_name, config)
639 626 else:
640 627 app = scm_app.create_git_wsgi_app(
641 628 repo_path, repo_name, config)
642 629
643 630 log.debug('http-app: starting app handler '
644 631 'with %s and process request', app)
645 632
646 633 return app(environ, start_response)
647 634
648 635 return _git_stream
649 636
650 637 def handle_vcs_exception(self, exception, request):
651 638 _vcs_kind = getattr(exception, '_vcs_kind', '')
652 639
653 640 if _vcs_kind == 'repo_locked':
654 641 headers_call_context = get_headers_call_context(request.environ)
655 642 status_code = safe_int(headers_call_context['locked_status_code'])
656 643
657 644 return HTTPRepoLocked(
658 645 title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
659 646
660 647 elif _vcs_kind == 'repo_branch_protected':
661 648 # Get custom repo-branch-protected status code if present.
662 649 return HTTPRepoBranchProtected(
663 650 title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
664 651
665 652 exc_info = request.exc_info
666 653 store_exception(id(exc_info), exc_info)
667 654
668 655 traceback_info = 'unavailable'
669 656 if request.exc_info:
670 657 traceback_info = format_exc(request.exc_info)
671 658
672 659 log.error(
673 660 'error occurred handling this request for path: %s, \n%s',
674 661 request.path, traceback_info)
675 662
676 663 statsd = request.registry.statsd
677 664 if statsd:
678 665 exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
679 666 statsd.incr('vcsserver_exception_total',
680 667 tags=[f"type:{exc_type}"])
681 668 raise exception
682 669
683 670
684 671 class ResponseFilter:
685 672
686 673 def __init__(self, start_response):
687 674 self._start_response = start_response
688 675
689 676 def __call__(self, status, response_headers, exc_info=None):
690 677 headers = tuple(
691 678 (h, v) for h, v in response_headers
692 679 if not wsgiref.util.is_hop_by_hop(h))
693 680 return self._start_response(status, headers, exc_info)
694 681
695 682
696 683 def sanitize_settings_and_apply_defaults(global_config, settings):
697 684 _global_settings_maker = SettingsMaker(global_config)
698 685 settings_maker = SettingsMaker(settings)
699 686
700 687 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
701 688
702 689 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
703 690 settings_maker.enable_logging(logging_conf)
704 691
705 692 # Default includes, possible to change as a user
706 693 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
707 694 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
708 695
709 696 settings_maker.make_setting('__file__', global_config.get('__file__'))
710 697
711 698 settings_maker.make_setting('pyramid.default_locale_name', 'en')
712 699 settings_maker.make_setting('locale', 'en_US.UTF-8')
713 700
714 701 settings_maker.make_setting(
715 702 'core.binary_dir', '/usr/local/bin/rhodecode_bin/vcs_bin',
716 703 default_when_empty=True, parser='string:noquote')
717 704
705 settings_maker.make_setting('vcs.svn.redis_conn', 'redis://redis:6379/0')
706
718 707 temp_store = tempfile.gettempdir()
719 708 default_cache_dir = os.path.join(temp_store, 'rc_cache')
720 709 # save default, cache dir, and use it for all backends later.
721 710 default_cache_dir = settings_maker.make_setting(
722 711 'cache_dir',
723 712 default=default_cache_dir, default_when_empty=True,
724 713 parser='dir:ensured')
725 714
726 715 # exception store cache
727 716 settings_maker.make_setting(
728 717 'exception_tracker.store_path',
729 718 default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
730 719 parser='dir:ensured'
731 720 )
732 721
733 722 # repo_object cache defaults
734 723 settings_maker.make_setting(
735 724 'rc_cache.repo_object.backend',
736 725 default='dogpile.cache.rc.file_namespace',
737 726 parser='string')
738 727 settings_maker.make_setting(
739 728 'rc_cache.repo_object.expiration_time',
740 729 default=30 * 24 * 60 * 60, # 30days
741 730 parser='int')
742 731 settings_maker.make_setting(
743 732 'rc_cache.repo_object.arguments.filename',
744 733 default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
745 734 parser='string')
746 735
747 736 # statsd
748 737 settings_maker.make_setting('statsd.enabled', False, parser='bool')
749 738 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
750 739 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
751 740 settings_maker.make_setting('statsd.statsd_prefix', '')
752 741 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
753 742
754 743 settings_maker.env_expand()
755 744
756 745
757 746 def main(global_config, **settings):
758 747 start_time = time.time()
759 748 log.info('Pyramid app config starting')
760 749
761 750 if MercurialFactory:
762 751 hgpatches.patch_largefiles_capabilities()
763 752 hgpatches.patch_subrepo_type_mapping()
764 753
765 754 # Fill in and sanitize the defaults & do ENV expansion
766 755 sanitize_settings_and_apply_defaults(global_config, settings)
767 756
768 757 # init and bootstrap StatsdClient
769 758 StatsdClient.setup(settings)
770 759
771 760 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
772 761 total_time = time.time() - start_time
773 762 log.info('Pyramid app created and configured in %.2fs', total_time)
774 763 return pyramid_app
@@ -1,243 +1,237 b''
1 1 '''
2 2 This library is provided to allow standard python logging
3 3 to output log data as JSON formatted strings
4 4 '''
5 5 import logging
6 import json
7 6 import re
8 7 from datetime import date, datetime, time, tzinfo, timedelta
9 8 import traceback
10 9 import importlib
11 10
12 11 from inspect import istraceback
13 12
14 13 from collections import OrderedDict
15 14
16
17 def _inject_req_id(record, *args, **kwargs):
18 return record
19
20
21 ExceptionAwareFormatter = logging.Formatter
22
15 from ...logging_formatter import _inject_req_id, ExceptionAwareFormatter
16 from ...ext_json import sjson as json
23 17
24 18 ZERO = timedelta(0)
25 19 HOUR = timedelta(hours=1)
26 20
27 21
28 22 class UTC(tzinfo):
29 23 """UTC"""
30 24
31 25 def utcoffset(self, dt):
32 26 return ZERO
33 27
34 28 def tzname(self, dt):
35 29 return "UTC"
36 30
37 31 def dst(self, dt):
38 32 return ZERO
39 33
40 34 utc = UTC()
41 35
42 36
43 37 # skip natural LogRecord attributes
44 38 # http://docs.python.org/library/logging.html#logrecord-attributes
45 39 RESERVED_ATTRS = (
46 40 'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
47 41 'funcName', 'levelname', 'levelno', 'lineno', 'module',
48 42 'msecs', 'message', 'msg', 'name', 'pathname', 'process',
49 43 'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
50 44
51 45
52 46 def merge_record_extra(record, target, reserved):
53 47 """
54 48 Merges extra attributes from LogRecord object into target dictionary
55 49
56 50 :param record: logging.LogRecord
57 51 :param target: dict to update
58 52 :param reserved: dict or list with reserved keys to skip
59 53 """
60 54 for key, value in record.__dict__.items():
61 55 # this allows to have numeric keys
62 56 if (key not in reserved
63 57 and not (hasattr(key, "startswith")
64 58 and key.startswith('_'))):
65 59 target[key] = value
66 60 return target
67 61
68 62
69 63 class JsonEncoder(json.JSONEncoder):
70 64 """
71 65 A custom encoder extending the default JSONEncoder
72 66 """
73 67
74 68 def default(self, obj):
75 69 if isinstance(obj, (date, datetime, time)):
76 70 return self.format_datetime_obj(obj)
77 71
78 72 elif istraceback(obj):
79 73 return ''.join(traceback.format_tb(obj)).strip()
80 74
81 75 elif type(obj) == Exception \
82 76 or isinstance(obj, Exception) \
83 77 or type(obj) == type:
84 78 return str(obj)
85 79
86 80 try:
87 81 return super().default(obj)
88 82
89 83 except TypeError:
90 84 try:
91 85 return str(obj)
92 86
93 87 except Exception:
94 88 return None
95 89
96 90 def format_datetime_obj(self, obj):
97 91 return obj.isoformat()
98 92
99 93
100 94 class JsonFormatter(ExceptionAwareFormatter):
101 95 """
102 96 A custom formatter to format logging records as json strings.
103 97 Extra values will be formatted as str() if not supported by
104 98 json default encoder
105 99 """
106 100
107 101 def __init__(self, *args, **kwargs):
108 102 """
109 103 :param json_default: a function for encoding non-standard objects
110 104 as outlined in http://docs.python.org/2/library/json.html
111 105 :param json_encoder: optional custom encoder
112 106 :param json_serializer: a :meth:`json.dumps`-compatible callable
113 107 that will be used to serialize the log record.
114 108 :param json_indent: an optional :meth:`json.dumps`-compatible numeric value
115 109 that will be used to customize the indent of the output json.
116 110 :param prefix: an optional string prefix added at the beginning of
117 111 the formatted string
118 112 :param json_indent: indent parameter for json.dumps
119 113 :param json_ensure_ascii: ensure_ascii parameter for json.dumps
120 114 :param reserved_attrs: an optional list of fields that will be skipped when
121 115 outputting json log record. Defaults to all log record attributes:
122 116 http://docs.python.org/library/logging.html#logrecord-attributes
123 117 :param timestamp: an optional string/boolean field to add a timestamp when
124 118 outputting the json log record. If string is passed, timestamp will be added
125 119 to log record using string as key. If True boolean is passed, timestamp key
126 120 will be "timestamp". Defaults to False/off.
127 121 """
128 122 self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
129 123 self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
130 124 self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
131 125 self.json_indent = kwargs.pop("json_indent", None)
132 126 self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
133 127 self.prefix = kwargs.pop("prefix", "")
134 128 reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
135 129 self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
136 130 self.timestamp = kwargs.pop("timestamp", True)
137 131
138 132 # super(JsonFormatter, self).__init__(*args, **kwargs)
139 133 logging.Formatter.__init__(self, *args, **kwargs)
140 134 if not self.json_encoder and not self.json_default:
141 135 self.json_encoder = JsonEncoder
142 136
143 137 self._required_fields = self.parse()
144 138 self._skip_fields = dict(list(zip(self._required_fields,
145 139 self._required_fields)))
146 140 self._skip_fields.update(self.reserved_attrs)
147 141
148 142 def _str_to_fn(self, fn_as_str):
149 143 """
150 144 If the argument is not a string, return whatever was passed in.
151 145 Parses a string such as package.module.function, imports the module
152 146 and returns the function.
153 147
154 148 :param fn_as_str: The string to parse. If not a string, return it.
155 149 """
156 150 if not isinstance(fn_as_str, str):
157 151 return fn_as_str
158 152
159 153 path, _, function = fn_as_str.rpartition('.')
160 154 module = importlib.import_module(path)
161 155 return getattr(module, function)
162 156
163 157 def parse(self):
164 158 """
165 159 Parses format string looking for substitutions
166 160
167 161 This method is responsible for returning a list of fields (as strings)
168 162 to include in all log messages.
169 163 """
170 164 standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE)
171 165 return standard_formatters.findall(self._fmt)
172 166
173 167 def add_fields(self, log_record, record, message_dict):
174 168 """
175 169 Override this method to implement custom logic for adding fields.
176 170 """
177 171 for field in self._required_fields:
178 172 log_record[field] = record.__dict__.get(field)
179 173 log_record.update(message_dict)
180 174 merge_record_extra(record, log_record, reserved=self._skip_fields)
181 175
182 176 if self.timestamp:
183 177 key = self.timestamp if type(self.timestamp) == str else 'timestamp'
184 178 log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
185 179
186 180 def process_log_record(self, log_record):
187 181 """
188 182 Override this method to implement custom logic
189 183 on the possibly ordered dictionary.
190 184 """
191 185 return log_record
192 186
193 187 def jsonify_log_record(self, log_record):
194 188 """Returns a json string of the log record."""
195 189 return self.json_serializer(log_record,
196 190 default=self.json_default,
197 191 cls=self.json_encoder,
198 192 indent=self.json_indent,
199 193 ensure_ascii=self.json_ensure_ascii)
200 194
201 195 def serialize_log_record(self, log_record):
202 196 """Returns the final representation of the log record."""
203 197 return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
204 198
205 199 def format(self, record):
206 200 """Formats a log record and serializes to json"""
207 201 message_dict = {}
208 202 # FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
209 203 # are always type of str. We shouldn't need to override that.
210 204 if isinstance(record.msg, dict):
211 205 message_dict = record.msg
212 206 record.message = None
213 207 else:
214 208 record.message = record.getMessage()
215 209 # only format time if needed
216 210 if "asctime" in self._required_fields:
217 211 record.asctime = self.formatTime(record, self.datefmt)
218 212
219 213 # Display formatted exception, but allow overriding it in the
220 214 # user-supplied dict.
221 215 if record.exc_info and not message_dict.get('exc_info'):
222 216 message_dict['exc_info'] = self.formatException(record.exc_info)
223 217 if not message_dict.get('exc_info') and record.exc_text:
224 218 message_dict['exc_info'] = record.exc_text
225 219 # Display formatted record of stack frames
226 220 # default format is a string returned from :func:`traceback.print_stack`
227 221 try:
228 222 if record.stack_info and not message_dict.get('stack_info'):
229 223 message_dict['stack_info'] = self.formatStack(record.stack_info)
230 224 except AttributeError:
231 225 # Python2.7 doesn't have stack_info.
232 226 pass
233 227
234 228 try:
235 229 log_record = OrderedDict()
236 230 except NameError:
237 231 log_record = {}
238 232
239 233 _inject_req_id(record, with_prefix=False)
240 234 self.add_fields(log_record, record, message_dict)
241 235 log_record = self.process_log_record(log_record)
242 236
243 237 return self.serialize_log_record(log_record)
@@ -1,394 +1,402 b''
1 1
2 2 import threading
3 3 import weakref
4 4 from base64 import b64encode
5 5 from logging import getLogger
6 6 from os import urandom
7 7 from typing import Union
8 8
9 9 from redis import StrictRedis
10 10
11 11 __version__ = '4.0.0'
12 12
13 13 loggers = {
14 14 k: getLogger("vcsserver." + ".".join((__name__, k)))
15 15 for k in [
16 16 "acquire",
17 17 "refresh.thread.start",
18 18 "refresh.thread.stop",
19 19 "refresh.thread.exit",
20 20 "refresh.start",
21 21 "refresh.shutdown",
22 22 "refresh.exit",
23 23 "release",
24 24 ]
25 25 }
26 26
27 27 text_type = str
28 28 binary_type = bytes
29 29
30 30
31 31 # Check if the id match. If not, return an error code.
32 32 UNLOCK_SCRIPT = b"""
33 33 if redis.call("get", KEYS[1]) ~= ARGV[1] then
34 34 return 1
35 35 else
36 36 redis.call("del", KEYS[2])
37 37 redis.call("lpush", KEYS[2], 1)
38 38 redis.call("pexpire", KEYS[2], ARGV[2])
39 39 redis.call("del", KEYS[1])
40 40 return 0
41 41 end
42 42 """
43 43
44 44 # Covers both cases when key doesn't exist and doesn't equal to lock's id
45 45 EXTEND_SCRIPT = b"""
46 46 if redis.call("get", KEYS[1]) ~= ARGV[1] then
47 47 return 1
48 48 elseif redis.call("ttl", KEYS[1]) < 0 then
49 49 return 2
50 50 else
51 51 redis.call("expire", KEYS[1], ARGV[2])
52 52 return 0
53 53 end
54 54 """
55 55
56 56 RESET_SCRIPT = b"""
57 57 redis.call('del', KEYS[2])
58 58 redis.call('lpush', KEYS[2], 1)
59 59 redis.call('pexpire', KEYS[2], ARGV[2])
60 60 return redis.call('del', KEYS[1])
61 61 """
62 62
63 63 RESET_ALL_SCRIPT = b"""
64 64 local locks = redis.call('keys', 'lock:*')
65 65 local signal
66 66 for _, lock in pairs(locks) do
67 67 signal = 'lock-signal:' .. string.sub(lock, 6)
68 68 redis.call('del', signal)
69 69 redis.call('lpush', signal, 1)
70 70 redis.call('expire', signal, 1)
71 71 redis.call('del', lock)
72 72 end
73 73 return #locks
74 74 """
75 75
76 76
77 77 class AlreadyAcquired(RuntimeError):
78 78 pass
79 79
80 80
81 81 class NotAcquired(RuntimeError):
82 82 pass
83 83
84 84
85 85 class AlreadyStarted(RuntimeError):
86 86 pass
87 87
88 88
89 89 class TimeoutNotUsable(RuntimeError):
90 90 pass
91 91
92 92
93 93 class InvalidTimeout(RuntimeError):
94 94 pass
95 95
96 96
97 97 class TimeoutTooLarge(RuntimeError):
98 98 pass
99 99
100 100
101 101 class NotExpirable(RuntimeError):
102 102 pass
103 103
104 104
105 105 class Lock:
106 106 """
107 107 A Lock context manager implemented via redis SETNX/BLPOP.
108 108 """
109 109
110 110 unlock_script = None
111 111 extend_script = None
112 112 reset_script = None
113 113 reset_all_script = None
114 blocking = None
114 115
115 116 _lock_renewal_interval: float
116 117 _lock_renewal_thread: Union[threading.Thread, None]
117 118
118 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
119 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000, blocking=True):
119 120 """
120 121 :param redis_client:
121 122 An instance of :class:`~StrictRedis`.
122 123 :param name:
123 124 The name (redis key) the lock should have.
124 125 :param expire:
125 126 The lock expiry time in seconds. If left at the default (None)
126 127 the lock will not expire.
127 128 :param id:
128 129 The ID (redis value) the lock should have. A random value is
129 130 generated when left at the default.
130 131
131 132 Note that if you specify this then the lock is marked as "held". Acquires
132 133 won't be possible.
133 134 :param auto_renewal:
134 135 If set to ``True``, Lock will automatically renew the lock so that it
135 136 doesn't expire for as long as the lock is held (acquire() called
136 137 or running in a context manager).
137 138
138 139 Implementation note: Renewal will happen using a daemon thread with
139 140 an interval of ``expire*2/3``. If wishing to use a different renewal
140 141 time, subclass Lock, call ``super().__init__()`` then set
141 142 ``self._lock_renewal_interval`` to your desired interval.
142 143 :param strict:
143 144 If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``.
144 145 :param signal_expire:
145 146 Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``.
147 :param blocking:
148 Boolean value specifying whether lock should be blocking or not.
149 Used in `__enter__` method.
146 150 """
147 151 if strict and not isinstance(redis_client, StrictRedis):
148 152 raise ValueError("redis_client must be instance of StrictRedis. "
149 153 "Use strict=False if you know what you're doing.")
150 154 if auto_renewal and expire is None:
151 155 raise ValueError("Expire may not be None when auto_renewal is set")
152 156
153 157 self._client = redis_client
154 158
155 159 if expire:
156 160 expire = int(expire)
157 161 if expire < 0:
158 162 raise ValueError("A negative expire is not acceptable.")
159 163 else:
160 164 expire = None
161 165 self._expire = expire
162 166
163 167 self._signal_expire = signal_expire
164 168 if id is None:
165 169 self._id = b64encode(urandom(18)).decode('ascii')
166 170 elif isinstance(id, binary_type):
167 171 try:
168 172 self._id = id.decode('ascii')
169 173 except UnicodeDecodeError:
170 174 self._id = b64encode(id).decode('ascii')
171 175 elif isinstance(id, text_type):
172 176 self._id = id
173 177 else:
174 178 raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.")
175 179 self._name = 'lock:' + name
176 180 self._signal = 'lock-signal:' + name
177 181 self._lock_renewal_interval = (float(expire) * 2 / 3
178 182 if auto_renewal
179 183 else None)
180 184 self._lock_renewal_thread = None
181 185
186 self.blocking = blocking
187
182 188 self.register_scripts(redis_client)
183 189
184 190 @classmethod
185 191 def register_scripts(cls, redis_client):
186 192 global reset_all_script
187 193 if reset_all_script is None:
188 194 cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
189 195 cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
190 196 cls.reset_script = redis_client.register_script(RESET_SCRIPT)
191 197 cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
192 198 reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
193 199
194 200 @property
195 201 def _held(self):
196 202 return self.id == self.get_owner_id()
197 203
198 204 def reset(self):
199 205 """
200 206 Forcibly deletes the lock. Use this with care.
201 207 """
202 208 self.reset_script(client=self._client, keys=(self._name, self._signal), args=(self.id, self._signal_expire))
203 209
204 210 @property
205 211 def id(self):
206 212 return self._id
207 213
208 214 def get_owner_id(self):
209 215 owner_id = self._client.get(self._name)
210 216 if isinstance(owner_id, binary_type):
211 217 owner_id = owner_id.decode('ascii', 'replace')
212 218 return owner_id
213 219
214 220 def acquire(self, blocking=True, timeout=None):
215 221 """
216 222 :param blocking:
217 223 Boolean value specifying whether lock should be blocking or not.
218 224 :param timeout:
219 225 An integer value specifying the maximum number of seconds to block.
220 226 """
221 227 logger = loggers["acquire"]
222 228
223 229 logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
224 230
225 231 if self._held:
226 232 owner_id = self.get_owner_id()
227 233 raise AlreadyAcquired("Already acquired from this Lock instance. Lock id: {}".format(owner_id))
228 234
229 235 if not blocking and timeout is not None:
230 236 raise TimeoutNotUsable("Timeout cannot be used if blocking=False")
231 237
232 238 if timeout:
233 239 timeout = int(timeout)
234 240 if timeout < 0:
235 241 raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0")
236 242
237 243 if self._expire and not self._lock_renewal_interval and timeout > self._expire:
238 244 raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})")
239 245
240 246 busy = True
241 247 blpop_timeout = timeout or self._expire or 0
242 248 timed_out = False
243 249 while busy:
244 250 busy = not self._client.set(self._name, self._id, nx=True, ex=self._expire)
245 251 if busy:
246 252 if timed_out:
247 253 return False
248 254 elif blocking:
249 255 timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
250 256 else:
251 257 logger.warning("Failed to acquire Lock(%r).", self._name)
252 258 return False
253 259
254 260 logger.debug("Acquired Lock(%r).", self._name)
255 261 if self._lock_renewal_interval is not None:
256 262 self._start_lock_renewer()
257 263 return True
258 264
259 265 def extend(self, expire=None):
260 266 """
261 267 Extends expiration time of the lock.
262 268
263 269 :param expire:
264 270 New expiration time. If ``None`` - `expire` provided during
265 271 lock initialization will be taken.
266 272 """
267 273 if expire:
268 274 expire = int(expire)
269 275 if expire < 0:
270 276 raise ValueError("A negative expire is not acceptable.")
271 277 elif self._expire is not None:
272 278 expire = self._expire
273 279 else:
274 280 raise TypeError(
275 281 "To extend a lock 'expire' must be provided as an "
276 282 "argument to extend() method or at initialization time."
277 283 )
278 284
279 285 error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
280 286 if error == 1:
281 287 raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.")
282 288 elif error == 2:
283 289 raise NotExpirable(f"Lock {self._name} has no assigned expiration time")
284 290 elif error:
285 291 raise RuntimeError(f"Unsupported error code {error} from EXTEND script")
286 292
287 293 @staticmethod
288 294 def _lock_renewer(name, lockref, interval, stop):
289 295 """
290 296 Renew the lock key in redis every `interval` seconds for as long
291 297 as `self._lock_renewal_thread.should_exit` is False.
292 298 """
293 299 while not stop.wait(timeout=interval):
294 300 loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name)
295 301 lock: "Lock" = lockref()
296 302 if lock is None:
297 303 loggers["refresh.thread.stop"].debug(
298 304 "Stopping loop because Lock(%r) was garbage collected.", name
299 305 )
300 306 break
301 307 lock.extend(expire=lock._expire)
302 308 del lock
303 309 loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name)
304 310
305 311 def _start_lock_renewer(self):
306 312 """
307 313 Starts the lock refresher thread.
308 314 """
309 315 if self._lock_renewal_thread is not None:
310 316 raise AlreadyStarted("Lock refresh thread already started")
311 317
312 318 loggers["refresh.start"].debug(
313 319 "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.",
314 320 self._name, self._lock_renewal_interval
315 321 )
316 322 self._lock_renewal_stop = threading.Event()
317 323 self._lock_renewal_thread = threading.Thread(
318 324 group=None,
319 325 target=self._lock_renewer,
320 326 kwargs={
321 327 'name': self._name,
322 328 'lockref': weakref.ref(self),
323 329 'interval': self._lock_renewal_interval,
324 330 'stop': self._lock_renewal_stop,
325 331 },
326 332 )
327 333 self._lock_renewal_thread.daemon = True
328 334 self._lock_renewal_thread.start()
329 335
330 336 def _stop_lock_renewer(self):
331 337 """
332 338 Stop the lock renewer.
333 339
334 340 This signals the renewal thread and waits for its exit.
335 341 """
336 342 if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
337 343 return
338 344 loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name)
339 345 self._lock_renewal_stop.set()
340 346 self._lock_renewal_thread.join()
341 347 self._lock_renewal_thread = None
342 348 loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
343 349
344 350 def __enter__(self):
345 acquired = self.acquire(blocking=True)
351 acquired = self.acquire(blocking=self.blocking)
346 352 if not acquired:
347 raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
353 if self.blocking:
354 raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
355 raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
348 356 return self
349 357
350 358 def __exit__(self, exc_type=None, exc_value=None, traceback=None):
351 359 self.release()
352 360
353 361 def release(self):
354 362 """Releases the lock, that was acquired with the same object.
355 363
356 364 .. note::
357 365
358 366 If you want to release a lock that you acquired in a different place you have two choices:
359 367
360 368 * Use ``Lock("name", id=id_from_other_place).release()``
361 369 * Use ``Lock("name").reset()``
362 370 """
363 371 if self._lock_renewal_thread is not None:
364 372 self._stop_lock_renewer()
365 373 loggers["release"].debug("Releasing Lock(%r).", self._name)
366 374 error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
367 375 if error == 1:
368 376 raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
369 377 elif error:
370 378 raise RuntimeError(f"Unsupported error code {error} from EXTEND script.")
371 379
372 380 def locked(self):
373 381 """
374 382 Return true if the lock is acquired.
375 383
376 384 Checks that lock with same name already exists. This method returns true, even if
377 385 lock have another id.
378 386 """
379 387 return self._client.exists(self._name) == 1
380 388
381 389
382 390 reset_all_script = None
383 391
384 392
385 393 def reset_all(redis_client):
386 394 """
387 395 Forcibly deletes all locks if its remains (like a crash reason). Use this with care.
388 396
389 397 :param redis_client:
390 398 An instance of :class:`~StrictRedis`.
391 399 """
392 400 Lock.register_scripts(redis_client)
393 401
394 402 reset_all_script(client=redis_client) # noqa
@@ -1,50 +1,51 b''
1 1 import logging
2 2
3 3 from .stream import TCPStatsClient, UnixSocketStatsClient # noqa
4 4 from .udp import StatsClient # noqa
5 5
6 6 HOST = 'localhost'
7 7 PORT = 8125
8 8 IPV6 = False
9 9 PREFIX = None
10 10 MAXUDPSIZE = 512
11 11
12 12 log = logging.getLogger('rhodecode.statsd')
13 13
14 14
15 15 def statsd_config(config, prefix='statsd.'):
16 16 _config = {}
17 for key in config.keys():
17 for key in list(config.keys()):
18 18 if key.startswith(prefix):
19 19 _config[key[len(prefix):]] = config[key]
20 20 return _config
21 21
22 22
23 23 def client_from_config(configuration, prefix='statsd.', **kwargs):
24 24 from pyramid.settings import asbool
25 25
26 26 _config = statsd_config(configuration, prefix)
27 statsd_flag = _config.get('enabled')
27 28 statsd_enabled = asbool(_config.pop('enabled', False))
28 29 if not statsd_enabled:
29 log.debug('statsd client not enabled by statsd.enabled = flag, skipping...')
30 log.debug('statsd client not enabled by statsd.enabled = %s flag, skipping...', statsd_flag)
30 31 return
31 32
32 33 host = _config.pop('statsd_host', HOST)
33 34 port = _config.pop('statsd_port', PORT)
34 35 prefix = _config.pop('statsd_prefix', PREFIX)
35 36 maxudpsize = _config.pop('statsd_maxudpsize', MAXUDPSIZE)
36 37 ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
37 38 log.debug('configured statsd client %s:%s', host, port)
38 39
39 40 try:
40 41 client = StatsClient(
41 42 host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
42 43 except Exception:
43 44 log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
44 45 client = None
45 46
46 47 return client
47 48
48 49
49 50 def get_statsd_client(request):
50 51 return client_from_config(request.registry.settings)
@@ -1,40 +1,58 b''
1 1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18 import os
19 import vcsserver
20 import vcsserver.settings
19 21
20 22
21 23 def get_config(ini_path, **kwargs):
22 24 import configparser
23 25 parser = configparser.ConfigParser(**kwargs)
24 26 parser.read(ini_path)
25 27 return parser
26 28
27 29
28 30 def get_app_config_lightweight(ini_path):
29 31 parser = get_config(ini_path)
30 32 parser.set('app:main', 'here', os.getcwd())
31 33 parser.set('app:main', '__file__', ini_path)
32 34 return dict(parser.items('app:main'))
33 35
34 36
35 37 def get_app_config(ini_path):
36 38 """
37 39 This loads the app context and provides a heavy type iniliaziation of config
38 40 """
39 41 from paste.deploy.loadwsgi import appconfig
40 42 return appconfig(f'config:{ini_path}', relative_to=os.getcwd())
43
44
45 def configure_and_store_settings(global_config, app_settings):
46 """
47 Configure the settings module.
48 """
49 settings_merged = global_config.copy()
50 settings_merged.update(app_settings)
51
52 binary_dir = app_settings['core.binary_dir']
53
54 vcsserver.settings.BINARY_DIR = binary_dir
55
56 # Store the settings to make them available to other modules.
57 vcsserver.PYRAMID_SETTINGS = settings_merged
58 vcsserver.CONFIG = settings_merged
@@ -1,2 +1,14 b''
1 # use orjson by default
2 import orjson as json
1 import json as stdlib_json
2
3 try:
4 # we keep simplejson for having dump functionality still
5 # orjson doesn't support it
6 import simplejson as sjson
7 except ImportError:
8 sjson = stdlib_json
9
10 try:
11 import orjson
12 import orjson as json
13 except ImportError:
14 json = stdlib_json
@@ -1,53 +1,63 b''
1 1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import sys
20 20 import logging
21 21
22 22
23 23 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
24 24
25 25 # Sequences
26 26 RESET_SEQ = "\033[0m"
27 27 COLOR_SEQ = "\033[0;%dm"
28 28 BOLD_SEQ = "\033[1m"
29 29
30 30 COLORS = {
31 31 'CRITICAL': MAGENTA,
32 32 'ERROR': RED,
33 33 'WARNING': CYAN,
34 34 'INFO': GREEN,
35 35 'DEBUG': BLUE,
36 36 'SQL': YELLOW
37 37 }
38 38
39 39
40 def _inject_req_id(record, *args, **kwargs):
41 return record
42
43
44 class ExceptionAwareFormatter(logging.Formatter):
45 pass
46
47
40 48 class ColorFormatter(logging.Formatter):
41 49
42 50 def format(self, record):
43 51 """
44 Change record's levelname to use with COLORS enum
52 Changes record's levelname to use with COLORS enum
45 53 """
46 54 def_record = super().format(record)
47 55
48 56 levelname = record.levelname
49 57 start = COLOR_SEQ % (COLORS[levelname])
50 58 end = RESET_SEQ
51 59
52 60 colored_record = ''.join([start, def_record, end])
53 61 return colored_record
62
63
@@ -1,63 +1,63 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import logging
20 20
21 21 from repoze.lru import LRUCache
22 22
23 from vcsserver.str_utils import safe_str
23 from vcsserver.lib.str_utils import safe_str
24 24
25 25 log = logging.getLogger(__name__)
26 26
27 27
28 28 class LRUDict(LRUCache):
29 29 """
30 30 Wrapper to provide partial dict access
31 31 """
32 32
33 33 def __setitem__(self, key, value):
34 34 return self.put(key, value)
35 35
36 36 def __getitem__(self, key):
37 37 return self.get(key)
38 38
39 39 def __contains__(self, key):
40 40 return bool(self.get(key))
41 41
42 42 def __delitem__(self, key):
43 43 del self.data[key]
44 44
45 45 def keys(self):
46 46 return list(self.data.keys())
47 47
48 48
49 49 class LRUDictDebug(LRUDict):
50 50 """
51 51 Wrapper to provide some debug options
52 52 """
53 53 def _report_keys(self):
54 54 elems_cnt = f'{len(list(self.keys()))}/{self.size}'
55 55 # trick for pformat print it more nicely
56 56 fmt = '\n'
57 57 for cnt, elem in enumerate(self.keys()):
58 58 fmt += f'{cnt+1} - {safe_str(elem)}\n'
59 59 log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
60 60
61 61 def __getitem__(self, key):
62 62 self._report_keys()
63 63 return self.get(key)
@@ -1,303 +1,303 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 #import errno
19 19 import fcntl
20 20 import functools
21 21 import logging
22 22 import os
23 23 import pickle
24 24 #import time
25 25
26 26 #import gevent
27 27 import msgpack
28 28 import redis
29 29
30 30 flock_org = fcntl.flock
31 31 from typing import Union
32 32
33 33 from dogpile.cache.api import Deserializer, Serializer
34 34 from dogpile.cache.backends import file as file_backend
35 35 from dogpile.cache.backends import memory as memory_backend
36 36 from dogpile.cache.backends import redis as redis_backend
37 37 from dogpile.cache.backends.file import FileLock
38 38 from dogpile.cache.util import memoized_property
39 39
40 40 from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
41 from vcsserver.str_utils import safe_bytes, safe_str
42 from vcsserver.type_utils import str2bool
41 from vcsserver.lib.str_utils import safe_bytes, safe_str
42 from vcsserver.lib.type_utils import str2bool
43 43
44 44 _default_max_size = 1024
45 45
46 46 log = logging.getLogger(__name__)
47 47
48 48
49 49 class LRUMemoryBackend(memory_backend.MemoryBackend):
50 50 key_prefix = 'lru_mem_backend'
51 51 pickle_values = False
52 52
53 53 def __init__(self, arguments):
54 54 self.max_size = arguments.pop('max_size', _default_max_size)
55 55
56 56 LRUDictClass = LRUDict
57 57 if arguments.pop('log_key_count', None):
58 58 LRUDictClass = LRUDictDebug
59 59
60 60 arguments['cache_dict'] = LRUDictClass(self.max_size)
61 61 super().__init__(arguments)
62 62
63 63 def __repr__(self):
64 64 return f'{self.__class__}(maxsize=`{self.max_size}`)'
65 65
66 66 def __str__(self):
67 67 return self.__repr__()
68 68
69 69 def delete(self, key):
70 70 try:
71 71 del self._cache[key]
72 72 except KeyError:
73 73 # we don't care if key isn't there at deletion
74 74 pass
75 75
76 76 def list_keys(self, prefix):
77 77 return list(self._cache.keys())
78 78
79 79 def delete_multi(self, keys):
80 80 for key in keys:
81 81 self.delete(key)
82 82
83 83 def delete_multi_by_prefix(self, prefix):
84 84 cache_keys = self.list_keys(prefix=prefix)
85 85 num_affected_keys = len(cache_keys)
86 86 if num_affected_keys:
87 87 self.delete_multi(cache_keys)
88 88 return num_affected_keys
89 89
90 90
91 91 class PickleSerializer:
92 92 serializer: None | Serializer = staticmethod( # type: ignore
93 93 functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
94 94 )
95 95 deserializer: None | Deserializer = staticmethod( # type: ignore
96 96 functools.partial(pickle.loads)
97 97 )
98 98
99 99
100 100 class MsgPackSerializer:
101 101 serializer: None | Serializer = staticmethod( # type: ignore
102 102 msgpack.packb
103 103 )
104 104 deserializer: None | Deserializer = staticmethod( # type: ignore
105 105 functools.partial(msgpack.unpackb, use_list=False)
106 106 )
107 107
108 108
109 109 class CustomLockFactory(FileLock):
110 110
111 111 pass
112 112
113 113
114 114 class FileNamespaceBackend(PickleSerializer, file_backend.DBMBackend):
115 115 key_prefix = 'file_backend'
116 116
117 117 def __init__(self, arguments):
118 118 arguments['lock_factory'] = CustomLockFactory
119 119 db_file = arguments.get('filename')
120 120
121 121 log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
122 122 db_file_dir = os.path.dirname(db_file)
123 123 if not os.path.isdir(db_file_dir):
124 124 os.makedirs(db_file_dir)
125 125
126 126 try:
127 127 super().__init__(arguments)
128 128 except Exception:
129 129 log.exception('Failed to initialize db at: %s', db_file)
130 130 raise
131 131
132 132 def __repr__(self):
133 133 return f'{self.__class__}(file=`{self.filename}`)'
134 134
135 135 def __str__(self):
136 136 return self.__repr__()
137 137
138 138 def _get_keys_pattern(self, prefix: bytes = b''):
139 139 return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
140 140
141 141 def list_keys(self, prefix: bytes = b''):
142 142 prefix = self._get_keys_pattern(prefix)
143 143
144 144 def cond(dbm_key: bytes):
145 145 if not prefix:
146 146 return True
147 147
148 148 if dbm_key.startswith(prefix):
149 149 return True
150 150 return False
151 151
152 152 with self._dbm_file(True) as dbm:
153 153 try:
154 154 return list(filter(cond, dbm.keys()))
155 155 except Exception:
156 156 log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
157 157 raise
158 158
159 159 def delete_multi_by_prefix(self, prefix):
160 160 cache_keys = self.list_keys(prefix=prefix)
161 161 num_affected_keys = len(cache_keys)
162 162 if num_affected_keys:
163 163 self.delete_multi(cache_keys)
164 164 return num_affected_keys
165 165
166 166 def get_store(self):
167 167 return self.filename
168 168
169 169
170 170 class BaseRedisBackend(redis_backend.RedisBackend):
171 171 key_prefix = ''
172 172
173 173 def __init__(self, arguments):
174 174 self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
175 175 super().__init__(arguments)
176 176
177 177 self._lock_timeout = self.lock_timeout
178 178 self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
179 179
180 180 if self._lock_auto_renewal and not self._lock_timeout:
181 181 # set default timeout for auto_renewal
182 182 self._lock_timeout = 30
183 183
184 184 def __repr__(self):
185 185 return f'{self.__class__}(conn=`{self.db_conn}`)'
186 186
187 187 def __str__(self):
188 188 return self.__repr__()
189 189
190 190 def _create_client(self):
191 191 args = {}
192 192
193 193 if self.url is not None:
194 194 args.update(url=self.url)
195 195
196 196 else:
197 197 args.update(
198 198 host=self.host, password=self.password,
199 199 port=self.port, db=self.db
200 200 )
201 201
202 202 connection_pool = redis.ConnectionPool(**args)
203 203 self.writer_client = redis.StrictRedis(
204 204 connection_pool=connection_pool
205 205 )
206 206 self.reader_client = self.writer_client
207 207
208 208 def _get_keys_pattern(self, prefix: bytes = b''):
209 209 return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
210 210
211 211 def list_keys(self, prefix: bytes = b''):
212 212 prefix = self._get_keys_pattern(prefix)
213 213 return self.reader_client.keys(prefix)
214 214
215 215 def delete_multi_by_prefix(self, prefix, use_lua=False):
216 216 if use_lua:
217 217 # high efficient LUA script to delete ALL keys by prefix...
218 218 lua = """local keys = redis.call('keys', ARGV[1])
219 219 for i=1,#keys,5000 do
220 220 redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
221 221 end
222 222 return #keys"""
223 223 num_affected_keys = self.writer_client.eval(
224 224 lua,
225 225 0,
226 226 f"{prefix}*")
227 227 else:
228 228 cache_keys = self.list_keys(prefix=prefix)
229 229 num_affected_keys = len(cache_keys)
230 230 if num_affected_keys:
231 231 self.delete_multi(cache_keys)
232 232 return num_affected_keys
233 233
234 234 def get_store(self):
235 235 return self.reader_client.connection_pool
236 236
237 237 def get_mutex(self, key):
238 238 if self.distributed_lock:
239 239 lock_key = f'_lock_{safe_str(key)}'
240 240 return get_mutex_lock(
241 241 self.writer_client, lock_key,
242 242 self._lock_timeout,
243 243 auto_renewal=self._lock_auto_renewal
244 244 )
245 245 else:
246 246 return None
247 247
248 248
249 249 class RedisPickleBackend(PickleSerializer, BaseRedisBackend):
250 250 key_prefix = 'redis_pickle_backend'
251 251 pass
252 252
253 253
254 254 class RedisMsgPackBackend(MsgPackSerializer, BaseRedisBackend):
255 255 key_prefix = 'redis_msgpack_backend'
256 256 pass
257 257
258 258
259 259 def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
260 260 from vcsserver.lib._vendor import redis_lock
261 261
262 262 class _RedisLockWrapper:
263 263 """LockWrapper for redis_lock"""
264 264
265 265 @classmethod
266 266 def get_lock(cls):
267 267 return redis_lock.Lock(
268 268 redis_client=client,
269 269 name=lock_key,
270 270 expire=lock_timeout,
271 271 auto_renewal=auto_renewal,
272 272 strict=True,
273 273 )
274 274
275 275 def __repr__(self):
276 276 return f"{self.__class__.__name__}:{lock_key}"
277 277
278 278 def __str__(self):
279 279 return f"{self.__class__.__name__}:{lock_key}"
280 280
281 281 def __init__(self):
282 282 self.lock = self.get_lock()
283 283 self.lock_key = lock_key
284 284
285 285 def acquire(self, wait=True):
286 286 log.debug('Trying to acquire Redis lock for key %s', self.lock_key)
287 287 try:
288 288 acquired = self.lock.acquire(wait)
289 289 log.debug('Got lock for key %s, %s', self.lock_key, acquired)
290 290 return acquired
291 291 except redis_lock.AlreadyAcquired:
292 292 return False
293 293 except redis_lock.AlreadyStarted:
294 294 # refresh thread exists, but it also means we acquired the lock
295 295 return True
296 296
297 297 def release(self):
298 298 try:
299 299 self.lock.release()
300 300 except redis_lock.NotAcquired:
301 301 pass
302 302
303 303 return _RedisLockWrapper()
@@ -1,245 +1,245 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import functools
19 19 import logging
20 20 import os
21 21 import threading
22 22 import time
23 23
24 24 import decorator
25 25 from dogpile.cache import CacheRegion
26 26
27 27
28 28 from vcsserver.utils import sha1
29 from vcsserver.str_utils import safe_bytes
30 from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
29 from vcsserver.lib.str_utils import safe_bytes
30 from vcsserver.lib.type_utils import str2bool # noqa :required by imports from .utils
31 31
32 32 from . import region_meta
33 33
34 34 log = logging.getLogger(__name__)
35 35
36 36
37 37 class RhodeCodeCacheRegion(CacheRegion):
38 38
39 39 def __repr__(self):
40 40 return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
41 41
42 42 def conditional_cache_on_arguments(
43 43 self, namespace=None,
44 44 expiration_time=None,
45 45 should_cache_fn=None,
46 46 to_str=str,
47 47 function_key_generator=None,
48 48 condition=True):
49 49 """
50 50 Custom conditional decorator, that will not touch any dogpile internals if
51 51 condition isn't meet. This works a bit different from should_cache_fn
52 52 And it's faster in cases we don't ever want to compute cached values
53 53 """
54 54 expiration_time_is_callable = callable(expiration_time)
55 55 if not namespace:
56 56 namespace = getattr(self, '_default_namespace', None)
57 57
58 58 if function_key_generator is None:
59 59 function_key_generator = self.function_key_generator
60 60
61 61 def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
62 62
63 63 if not condition:
64 64 log.debug('Calling un-cached method:%s', user_func.__name__)
65 65 start = time.time()
66 66 result = user_func(*arg, **kw)
67 67 total = time.time() - start
68 68 log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
69 69 return result
70 70
71 71 key = func_key_generator(*arg, **kw)
72 72
73 73 timeout = expiration_time() if expiration_time_is_callable \
74 74 else expiration_time
75 75
76 76 log.debug('Calling cached method:`%s`', user_func.__name__)
77 77 return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
78 78
79 79 def cache_decorator(user_func):
80 80 if to_str is str:
81 81 # backwards compatible
82 82 key_generator = function_key_generator(namespace, user_func)
83 83 else:
84 84 key_generator = function_key_generator(namespace, user_func, to_str=to_str)
85 85
86 86 def refresh(*arg, **kw):
87 87 """
88 88 Like invalidate, but regenerates the value instead
89 89 """
90 90 key = key_generator(*arg, **kw)
91 91 value = user_func(*arg, **kw)
92 92 self.set(key, value)
93 93 return value
94 94
95 95 def invalidate(*arg, **kw):
96 96 key = key_generator(*arg, **kw)
97 97 self.delete(key)
98 98
99 99 def set_(value, *arg, **kw):
100 100 key = key_generator(*arg, **kw)
101 101 self.set(key, value)
102 102
103 103 def get(*arg, **kw):
104 104 key = key_generator(*arg, **kw)
105 105 return self.get(key)
106 106
107 107 user_func.set = set_
108 108 user_func.invalidate = invalidate
109 109 user_func.get = get
110 110 user_func.refresh = refresh
111 111 user_func.key_generator = key_generator
112 112 user_func.original = user_func
113 113
114 114 # Use `decorate` to preserve the signature of :param:`user_func`.
115 115 return decorator.decorate(user_func, functools.partial(
116 116 get_or_create_for_user_func, key_generator))
117 117
118 118 return cache_decorator
119 119
120 120
121 121 def make_region(*arg, **kw):
122 122 return RhodeCodeCacheRegion(*arg, **kw)
123 123
124 124
125 125 def get_default_cache_settings(settings, prefixes=None):
126 126 prefixes = prefixes or []
127 127 cache_settings = {}
128 128 for key in settings.keys():
129 129 for prefix in prefixes:
130 130 if key.startswith(prefix):
131 131 name = key.split(prefix)[1].strip()
132 132 val = settings[key]
133 133 if isinstance(val, str):
134 134 val = val.strip()
135 135 cache_settings[name] = val
136 136 return cache_settings
137 137
138 138
139 139 def compute_key_from_params(*args):
140 140 """
141 141 Helper to compute key from given params to be used in cache manager
142 142 """
143 143 return sha1(safe_bytes("_".join(map(str, args))))
144 144
145 145
146 146 def custom_key_generator(backend, namespace, fn):
147 147 func_name = fn.__name__
148 148
149 149 def generate_key(*args):
150 150 backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
151 151 namespace_pref = namespace or 'default_namespace'
152 152 arg_key = compute_key_from_params(*args)
153 153 final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
154 154
155 155 return final_key
156 156
157 157 return generate_key
158 158
159 159
160 160 def backend_key_generator(backend):
161 161 """
162 162 Special wrapper that also sends over the backend to the key generator
163 163 """
164 164 def wrapper(namespace, fn):
165 165 return custom_key_generator(backend, namespace, fn)
166 166 return wrapper
167 167
168 168
169 169 def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
170 170 from .backends import FileNamespaceBackend
171 171 from . import async_creation_runner
172 172
173 173 region_obj = region_meta.dogpile_cache_regions.get(region_name)
174 174 if not region_obj:
175 175 reg_keys = list(region_meta.dogpile_cache_regions.keys())
176 176 raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
177 177
178 178 region_uid_name = f'{region_name}:{region_namespace}'
179 179
180 180 # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
181 181 if isinstance(region_obj.actual_backend, FileNamespaceBackend):
182 182 if not region_namespace:
183 183 raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
184 184
185 185 region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
186 186 if region_exist:
187 187 log.debug('Using already configured region: %s', region_namespace)
188 188 return region_exist
189 189
190 190 expiration_time = region_obj.expiration_time
191 191
192 192 cache_dir = region_meta.dogpile_config_defaults['cache_dir']
193 193 namespace_cache_dir = cache_dir
194 194
195 195 # we default the namespace_cache_dir to our default cache dir.
196 196 # however, if this backend is configured with filename= param, we prioritize that
197 197 # so all caches within that particular region, even those namespaced end up in the same path
198 198 if region_obj.actual_backend.filename:
199 199 namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
200 200
201 201 if not os.path.isdir(namespace_cache_dir):
202 202 os.makedirs(namespace_cache_dir)
203 203 new_region = make_region(
204 204 name=region_uid_name,
205 205 function_key_generator=backend_key_generator(region_obj.actual_backend)
206 206 )
207 207
208 208 namespace_filename = os.path.join(
209 209 namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
210 210 # special type that allows 1db per namespace
211 211 new_region.configure(
212 212 backend='dogpile.cache.rc.file_namespace',
213 213 expiration_time=expiration_time,
214 214 arguments={"filename": namespace_filename}
215 215 )
216 216
217 217 # create and save in region caches
218 218 log.debug('configuring new region: %s', region_uid_name)
219 219 region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
220 220
221 221 region_obj._default_namespace = region_namespace
222 222 if use_async_runner:
223 223 region_obj.async_creation_runner = async_creation_runner
224 224 return region_obj
225 225
226 226
227 227 def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
228 228 from . import CLEAR_DELETE, CLEAR_INVALIDATE
229 229
230 230 if not isinstance(cache_region, RhodeCodeCacheRegion):
231 231 cache_region = get_or_create_region(cache_region, cache_namespace_uid)
232 232 log.debug('clearing cache region: %s [prefix:%s] with method=%s',
233 233 cache_region, cache_namespace_uid, method)
234 234
235 235 num_affected_keys = 0
236 236
237 237 if method == CLEAR_INVALIDATE:
238 238 # NOTE: The CacheRegion.invalidate() method’s default mode of
239 239 # operation is to set a timestamp local to this CacheRegion in this Python process only.
240 240 # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
241 241 cache_region.invalidate(hard=True)
242 242
243 243 if method == CLEAR_DELETE:
244 244 num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
245 245 return num_affected_keys
@@ -1,158 +1,158 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import typing
19 19 import base64
20 20 import logging
21 21
22 22
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 26 def safe_int(val, default=None) -> int:
27 27 """
28 28 Returns int() of val if val is not convertable to int use default
29 29 instead
30 30
31 31 :param val:
32 32 :param default:
33 33 """
34 34
35 35 try:
36 36 val = int(val)
37 37 except (ValueError, TypeError):
38 38 val = default
39 39
40 40 return val
41 41
42 42
43 43 def base64_to_str(text) -> str:
44 44 return safe_str(base64.encodebytes(safe_bytes(text))).strip()
45 45
46 46
47 47 def get_default_encodings() -> list[str]:
48 48 return ['utf8']
49 49
50 50
51 51 def safe_str(str_, to_encoding=None) -> str:
52 52 """
53 53 safe str function. Does few trick to turn unicode_ into string
54 54
55 55 :param str_: str to encode
56 56 :param to_encoding: encode to this type UTF8 default
57 57 """
58 58 if isinstance(str_, str):
59 59 return str_
60 60
61 61 # if it's bytes cast to str
62 62 if not isinstance(str_, bytes):
63 63 return str(str_)
64 64
65 65 to_encoding = to_encoding or get_default_encodings()
66 66 if not isinstance(to_encoding, (list, tuple)):
67 67 to_encoding = [to_encoding]
68 68
69 69 for enc in to_encoding:
70 70 try:
71 71 return str(str_, enc)
72 72 except UnicodeDecodeError:
73 73 pass
74 74
75 75 return str(str_, to_encoding[0], 'replace')
76 76
77 77
78 78 def safe_bytes(str_, from_encoding=None) -> bytes:
79 79 """
80 80 safe bytes function. Does few trick to turn str_ into bytes string:
81 81
82 82 :param str_: string to decode
83 83 :param from_encoding: encode from this type UTF8 default
84 84 """
85 85 if isinstance(str_, bytes):
86 86 return str_
87 87
88 88 if not isinstance(str_, str):
89 89 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
90 90
91 91 from_encoding = from_encoding or get_default_encodings()
92 92 if not isinstance(from_encoding, (list, tuple)):
93 93 from_encoding = [from_encoding]
94 94
95 95 for enc in from_encoding:
96 96 try:
97 97 return str_.encode(enc)
98 98 except UnicodeDecodeError:
99 99 pass
100 100
101 101 return str_.encode(from_encoding[0], 'replace')
102 102
103 103
104 104 def ascii_bytes(str_, allow_bytes=False) -> bytes:
105 105 """
106 106 Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
107 107 Fails with UnicodeError on invalid input.
108 108 This should be used where encoding and "safe" ambiguity should be avoided.
109 109 Where strings already have been encoded in other ways but still are unicode
110 110 string - for example to hex, base64, json, urlencoding, or are known to be
111 111 identifiers.
112 112 """
113 113 if allow_bytes and isinstance(str_, bytes):
114 114 return str_
115 115
116 116 if not isinstance(str_, str):
117 117 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
118 118 return str_.encode('ascii')
119 119
120 120
121 121 def ascii_str(str_) -> str:
122 122 """
123 123 Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
124 124 Fails with UnicodeError on invalid input.
125 125 This should be used where encoding and "safe" ambiguity should be avoided.
126 126 Where strings are encoded but also in other ways are known to be ASCII, and
127 127 where a unicode string is wanted without caring about encoding. For example
128 128 to hex, base64, urlencoding, or are known to be identifiers.
129 129 """
130 130
131 131 if not isinstance(str_, bytes):
132 132 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
133 133 return str_.decode('ascii')
134 134
135 135
136 136 def convert_to_str(data):
137 137 if isinstance(data, bytes):
138 138 return safe_str(data)
139 139 elif isinstance(data, tuple):
140 140 return tuple(convert_to_str(item) for item in data)
141 141 elif isinstance(data, list):
142 142 return list(convert_to_str(item) for item in data)
143 143 else:
144 144 return data
145 145
146 146
147 147 def splitnewlines(text: bytes):
148 148 """
149 149 like splitlines, but only split on newlines.
150 150 """
151 151
152 152 lines = [_l + b'\n' for _l in text.split(b'\n')]
153 153 if lines:
154 154 if lines[-1] == b'\n':
155 155 lines.pop()
156 156 else:
157 157 lines[-1] = lines[-1][:-1]
158 return lines No newline at end of file
158 return lines
@@ -1,160 +1,160 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import tempfile
20 20
21 21 from svn import client
22 22 from svn import core
23 23 from svn import ra
24 24
25 25 from mercurial import error
26 26
27 from vcsserver.str_utils import safe_bytes
27 from vcsserver.lib.str_utils import safe_bytes
28 28
29 29 core.svn_config_ensure(None)
30 30 svn_config = core.svn_config_get_config(None)
31 31
32 32
33 33 class RaCallbacks(ra.Callbacks):
34 34 @staticmethod
35 35 def open_tmp_file(pool): # pragma: no cover
36 36 (fd, fn) = tempfile.mkstemp()
37 37 os.close(fd)
38 38 return fn
39 39
40 40 @staticmethod
41 41 def get_client_string(pool):
42 42 return b'RhodeCode-subversion-url-checker'
43 43
44 44
45 45 class SubversionException(Exception):
46 46 pass
47 47
48 48
49 49 class SubversionConnectionException(SubversionException):
50 50 """Exception raised when a generic error occurs when connecting to a repository."""
51 51
52 52
53 53 def normalize_url(url):
54 54 if not url:
55 55 return url
56 56 if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'):
57 57 url = url[4:]
58 58 url = url.rstrip(b'/')
59 59 return url
60 60
61 61
62 62 def _create_auth_baton(pool):
63 63 """Create a Subversion authentication baton. """
64 64 # Give the client context baton a suite of authentication
65 65 # providers.h
66 66 platform_specific = [
67 67 'svn_auth_get_gnome_keyring_simple_provider',
68 68 'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider',
69 69 'svn_auth_get_keychain_simple_provider',
70 70 'svn_auth_get_keychain_ssl_client_cert_pw_provider',
71 71 'svn_auth_get_kwallet_simple_provider',
72 72 'svn_auth_get_kwallet_ssl_client_cert_pw_provider',
73 73 'svn_auth_get_ssl_client_cert_file_provider',
74 74 'svn_auth_get_windows_simple_provider',
75 75 'svn_auth_get_windows_ssl_server_trust_provider',
76 76 ]
77 77
78 78 providers = []
79 79
80 80 for p in platform_specific:
81 81 if getattr(core, p, None) is not None:
82 82 try:
83 83 providers.append(getattr(core, p)())
84 84 except RuntimeError:
85 85 pass
86 86
87 87 providers += [
88 88 client.get_simple_provider(),
89 89 client.get_username_provider(),
90 90 client.get_ssl_client_cert_file_provider(),
91 91 client.get_ssl_client_cert_pw_file_provider(),
92 92 client.get_ssl_server_trust_file_provider(),
93 93 ]
94 94
95 95 return core.svn_auth_open(providers, pool)
96 96
97 97
98 98 class SubversionRepo:
99 99 """Wrapper for a Subversion repository.
100 100
101 101 It uses the SWIG Python bindings, see above for requirements.
102 102 """
103 103 def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''):
104 104
105 105 self.username = username
106 106 self.password = password
107 107 self.svn_url = core.svn_path_canonicalize(svn_url)
108 108
109 109 self.auth_baton_pool = core.Pool()
110 110 self.auth_baton = _create_auth_baton(self.auth_baton_pool)
111 111 # self.init_ra_and_client() assumes that a pool already exists
112 112 self.pool = core.Pool()
113 113
114 114 self.ra = self.init_ra_and_client()
115 115 self.uuid = ra.get_uuid(self.ra, self.pool)
116 116
117 117 def init_ra_and_client(self):
118 118 """Initializes the RA and client layers, because sometimes getting
119 119 unified diffs runs the remote server out of open files.
120 120 """
121 121
122 122 if self.username:
123 123 core.svn_auth_set_parameter(self.auth_baton,
124 124 core.SVN_AUTH_PARAM_DEFAULT_USERNAME,
125 125 self.username)
126 126 if self.password:
127 127 core.svn_auth_set_parameter(self.auth_baton,
128 128 core.SVN_AUTH_PARAM_DEFAULT_PASSWORD,
129 129 self.password)
130 130
131 131 callbacks = RaCallbacks()
132 132 callbacks.auth_baton = self.auth_baton
133 133
134 134 try:
135 135 return ra.open2(self.svn_url, callbacks, svn_config, self.pool)
136 136 except SubversionException as e:
137 137 # e.child contains a detailed error messages
138 138 msglist = []
139 139 svn_exc = e
140 140 while svn_exc:
141 141 if svn_exc.args[0]:
142 142 msglist.append(svn_exc.args[0])
143 143 svn_exc = svn_exc.child
144 144 msg = '\n'.join(msglist)
145 145 raise SubversionConnectionException(msg)
146 146
147 147
148 148 class svnremoterepo:
149 149 """ the dumb wrapper for actual Subversion repositories """
150 150
151 151 def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''):
152 152 self.username = username or b''
153 153 self.password = password or b''
154 154 self.path = normalize_url(svn_url)
155 155
156 156 def svn(self):
157 157 try:
158 158 return SubversionRepo(self.path, self.username, self.password)
159 159 except SubversionConnectionException as e:
160 160 raise error.Abort(safe_bytes(e))
1 NO CONTENT: file renamed from vcsserver/type_utils.py to vcsserver/lib/type_utils.py
@@ -1,417 +1,417 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Handles the Git smart protocol."""
19 19
20 20 import os
21 21 import socket
22 22 import logging
23 23
24 24 import dulwich.protocol
25 25 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
26 26 from webob import Request, Response, exc
27 27
28 from vcsserver.lib.rc_json import json
28 from vcsserver.lib.ext_json import json
29 29 from vcsserver import hooks, subprocessio
30 from vcsserver.str_utils import ascii_bytes
30 from vcsserver.lib.str_utils import ascii_bytes
31 31
32 32
33 33 log = logging.getLogger(__name__)
34 34
35 35
36 36 class FileWrapper:
37 37 """File wrapper that ensures how much data is read from it."""
38 38
39 39 def __init__(self, fd, content_length):
40 40 self.fd = fd
41 41 self.content_length = content_length
42 42 self.remain = content_length
43 43
44 44 def read(self, size):
45 45 if size <= self.remain:
46 46 try:
47 47 data = self.fd.read(size)
48 48 except socket.error:
49 49 raise IOError(self)
50 50 self.remain -= size
51 51 elif self.remain:
52 52 data = self.fd.read(self.remain)
53 53 self.remain = 0
54 54 else:
55 55 data = None
56 56 return data
57 57
58 58 def __repr__(self):
59 59 return '<FileWrapper {} len: {}, read: {}>'.format(
60 60 self.fd, self.content_length, self.content_length - self.remain
61 61 )
62 62
63 63
64 64 class GitRepository:
65 65 """WSGI app for handling Git smart protocol endpoints."""
66 66
67 67 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
68 68 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
69 69 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 73 b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
74 74 b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 76 FLUSH_PACKET = b"0000"
77 77
78 78 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
79 79
80 80 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
81 81 files = frozenset(f.lower() for f in os.listdir(content_path))
82 82 valid_dir_signature = self.git_folder_signature.issubset(files)
83 83
84 84 if not valid_dir_signature:
85 85 raise OSError(f'{content_path} missing git signature')
86 86
87 87 self.content_path = content_path
88 88 self.repo_name = repo_name
89 89 self.extras = extras
90 90 self.git_path = git_path
91 91 self.update_server_info = update_server_info
92 92
93 93 def _get_fixedpath(self, path):
94 94 """
95 95 Small fix for repo_path
96 96
97 97 :param path:
98 98 """
99 99 path = path.split(self.repo_name, 1)[-1]
100 100 if path.startswith('.git'):
101 101 # for bare repos we still get the .git prefix inside, we skip it
102 102 # here, and remove from the service command
103 103 path = path[4:]
104 104
105 105 return path.strip('/')
106 106
107 107 def inforefs(self, request, unused_environ):
108 108 """
109 109 WSGI Response producer for HTTP GET Git Smart
110 110 HTTP /info/refs request.
111 111 """
112 112
113 113 git_command = request.GET.get('service')
114 114 if git_command not in self.commands:
115 115 log.debug('command %s not allowed', git_command)
116 116 return exc.HTTPForbidden()
117 117
118 118 # please, resist the urge to add '\n' to git capture and increment
119 119 # line count by 1.
120 120 # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
121 121 # a part of protocol.
122 122 # The code in Git client not only does NOT need '\n', but actually
123 123 # blows up if you sprinkle "flush" (0000) as "0001\n".
124 124 # It reads binary, per number of bytes specified.
125 125 # if you do add '\n' as part of data, count it.
126 126 server_advert = f'# service={git_command}\n'
127 127 packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
128 128 try:
129 129 gitenv = dict(os.environ)
130 130 # forget all configs
131 131 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
132 132 command = [self.git_path, git_command[4:], '--stateless-rpc',
133 133 '--advertise-refs', self.content_path]
134 134 out = subprocessio.SubprocessIOChunker(
135 135 command,
136 136 env=gitenv,
137 137 starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
138 138 shell=False
139 139 )
140 140 except OSError:
141 141 log.exception('Error processing command')
142 142 raise exc.HTTPExpectationFailed()
143 143
144 144 resp = Response()
145 145 resp.content_type = f'application/x-{git_command}-advertisement'
146 146 resp.charset = None
147 147 resp.app_iter = out
148 148
149 149 return resp
150 150
151 151 def _get_want_capabilities(self, request):
152 152 """Read the capabilities found in the first want line of the request."""
153 153 pos = request.body_file_seekable.tell()
154 154 first_line = request.body_file_seekable.readline()
155 155 request.body_file_seekable.seek(pos)
156 156
157 157 return frozenset(
158 158 dulwich.protocol.extract_want_line_capabilities(first_line)[1])
159 159
160 160 def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
161 161 """
162 162 Construct a response with an empty PACK file.
163 163
164 164 We use an empty PACK file, as that would trigger the failure of the pull
165 165 or clone command.
166 166
167 167 We also print in the error output a message explaining why the command
168 168 was aborted.
169 169
170 170 If additionally, the user is accepting messages we send them the output
171 171 of the pre-pull hook.
172 172
173 173 Note that for clients not supporting side-band we just send them the
174 174 emtpy PACK file.
175 175 """
176 176
177 177 if self.SIDE_BAND_CAPS.intersection(capabilities):
178 178 response = []
179 179 proto = dulwich.protocol.Protocol(None, response.append)
180 180 proto.write_pkt_line(dulwich.protocol.NAK_LINE)
181 181
182 182 self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
183 183 # N.B.(skreft): Do not change the sideband channel to 3, as that
184 184 # produces a fatal error in the client:
185 185 # fatal: error in sideband demultiplexer
186 186 proto.write_sideband(
187 187 dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
188 188 ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
189 189 proto.write_sideband(
190 190 dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
191 191 ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
192 192
193 193 # writes b"0000" as default
194 194 proto.write_pkt_line(None)
195 195
196 196 return response
197 197 else:
198 198 return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
199 199
200 200 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
201 201 """
202 202 Given a list response we inject the post-pull messages.
203 203
204 204 We only inject the messages if the client supports sideband, and the
205 205 response has the format:
206 206 0008NAK\n...0000
207 207
208 208 Note that we do not check the no-progress capability as by default, git
209 209 sends it, which effectively would block all messages.
210 210 """
211 211
212 212 if not self.SIDE_BAND_CAPS.intersection(capabilities):
213 213 return response
214 214
215 215 if not start_message and not end_message:
216 216 return response
217 217
218 218 try:
219 219 iter(response)
220 220 # iterator probably will work, we continue
221 221 except TypeError:
222 222 raise TypeError(f'response must be an iterator: got {type(response)}')
223 223 if isinstance(response, (list, tuple)):
224 224 raise TypeError(f'response must be an iterator: got {type(response)}')
225 225
226 226 def injected_response():
227 227
228 228 do_loop = 1
229 229 header_injected = 0
230 230 next_item = None
231 231 has_item = False
232 232 item = b''
233 233
234 234 while do_loop:
235 235
236 236 try:
237 237 next_item = next(response)
238 238 except StopIteration:
239 239 do_loop = 0
240 240
241 241 if has_item:
242 242 # last item ! alter it now
243 243 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
244 244 new_response = [item[:-4]]
245 245 new_response.extend(self._get_messages(end_message, capabilities))
246 246 new_response.append(self.FLUSH_PACKET)
247 247 item = b''.join(new_response)
248 248
249 249 yield item
250 250
251 251 has_item = True
252 252 item = next_item
253 253
254 254 # alter item if it's the initial chunk
255 255 if not header_injected and item.startswith(b'0008NAK\n'):
256 256 new_response = [b'0008NAK\n']
257 257 new_response.extend(self._get_messages(start_message, capabilities))
258 258 new_response.append(item[8:])
259 259 item = b''.join(new_response)
260 260 header_injected = 1
261 261
262 262 return injected_response()
263 263
264 264 def _write_sideband_to_proto(self, proto, data, capabilities):
265 265 """
266 266 Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
267 267
268 268 We do not use dulwich's write_sideband directly as it only supports
269 269 side-band-64k.
270 270 """
271 271 if not data:
272 272 return
273 273
274 274 # N.B.(skreft): The values below are explained in the pack protocol
275 275 # documentation, section Packfile Data.
276 276 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
277 277 if CAPABILITY_SIDE_BAND_64K in capabilities:
278 278 chunk_size = 65515
279 279 elif CAPABILITY_SIDE_BAND in capabilities:
280 280 chunk_size = 995
281 281 else:
282 282 return
283 283
284 284 chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
285 285
286 286 for chunk in chunker:
287 287 proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
288 288
289 289 def _get_messages(self, data, capabilities):
290 290 """Return a list with packets for sending data in sideband number 2."""
291 291 response = []
292 292 proto = dulwich.protocol.Protocol(None, response.append)
293 293
294 294 self._write_sideband_to_proto(proto, data, capabilities)
295 295
296 296 return response
297 297
298 298 def backend(self, request, environ):
299 299 """
300 300 WSGI Response producer for HTTP POST Git Smart HTTP requests.
301 301 Reads commands and data from HTTP POST's body.
302 302 returns an iterator obj with contents of git command's
303 303 response to stdout
304 304 """
305 305 # TODO(skreft): think how we could detect an HTTPLockedException, as
306 306 # we probably want to have the same mechanism used by mercurial and
307 307 # simplevcs.
308 308 # For that we would need to parse the output of the command looking for
309 309 # some signs of the HTTPLockedError, parse the data and reraise it in
310 310 # pygrack. However, that would interfere with the streaming.
311 311 #
312 312 # Now the output of a blocked push is:
313 313 # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
314 314 # POST git-receive-pack (1047 bytes)
315 315 # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
316 316 # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
317 317 # ! [remote rejected] master -> master (pre-receive hook declined)
318 318 # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
319 319
320 320 git_command = self._get_fixedpath(request.path_info)
321 321 if git_command not in self.commands:
322 322 log.debug('command %s not allowed', git_command)
323 323 return exc.HTTPForbidden()
324 324
325 325 capabilities = None
326 326 if git_command == 'git-upload-pack':
327 327 capabilities = self._get_want_capabilities(request)
328 328
329 329 if 'CONTENT_LENGTH' in environ:
330 330 inputstream = FileWrapper(request.body_file_seekable,
331 331 request.content_length)
332 332 else:
333 333 inputstream = request.body_file_seekable
334 334
335 335 resp = Response()
336 336 resp.content_type = f'application/x-{git_command}-result'
337 337 resp.charset = None
338 338
339 339 pre_pull_messages = ''
340 340 # Upload-pack == clone
341 341 if git_command == 'git-upload-pack':
342 342 hook_response = hooks.git_pre_pull(self.extras)
343 343 if hook_response.status != 0:
344 344 pre_pull_messages = hook_response.output
345 345 resp.app_iter = self._build_failed_pre_pull_response(
346 346 capabilities, pre_pull_messages)
347 347 return resp
348 348
349 349 gitenv = dict(os.environ)
350 350 # forget all configs
351 351 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
352 352 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
353 353 cmd = [self.git_path, git_command[4:], '--stateless-rpc',
354 354 self.content_path]
355 355 log.debug('handling cmd %s', cmd)
356 356
357 357 out = subprocessio.SubprocessIOChunker(
358 358 cmd,
359 359 input_stream=inputstream,
360 360 env=gitenv,
361 361 cwd=self.content_path,
362 362 shell=False,
363 363 fail_on_stderr=False,
364 364 fail_on_return_code=False
365 365 )
366 366
367 367 if self.update_server_info and git_command == 'git-receive-pack':
368 368 # We need to fully consume the iterator here, as the
369 369 # update-server-info command needs to be run after the push.
370 370 out = list(out)
371 371
372 372 # Updating refs manually after each push.
373 373 # This is required as some clients are exposing Git repos internally
374 374 # with the dumb protocol.
375 375 cmd = [self.git_path, 'update-server-info']
376 376 log.debug('handling cmd %s', cmd)
377 377 output = subprocessio.SubprocessIOChunker(
378 378 cmd,
379 379 input_stream=inputstream,
380 380 env=gitenv,
381 381 cwd=self.content_path,
382 382 shell=False,
383 383 fail_on_stderr=False,
384 384 fail_on_return_code=False
385 385 )
386 386 # Consume all the output so the subprocess finishes
387 387 for _ in output:
388 388 pass
389 389
390 390 # Upload-pack == clone
391 391 if git_command == 'git-upload-pack':
392 392 hook_response = hooks.git_post_pull(self.extras)
393 393 post_pull_messages = hook_response.output
394 394 resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
395 395 else:
396 396 resp.app_iter = out
397 397
398 398 return resp
399 399
400 400 def __call__(self, environ, start_response):
401 401 request = Request(environ)
402 402 _path = self._get_fixedpath(request.path_info)
403 403 if _path.startswith('info/refs'):
404 404 app = self.inforefs
405 405 else:
406 406 app = self.backend
407 407
408 408 try:
409 409 resp = app(request, environ)
410 410 except exc.HTTPException as error:
411 411 log.exception('HTTP Error')
412 412 resp = error
413 413 except Exception:
414 414 log.exception('Unknown error')
415 415 resp = exc.HTTPInternalServerError()
416 416
417 417 return resp(environ, start_response)
@@ -1,1519 +1,1526 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40
41 41 import rhodecode
42 42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
43 from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 config_attrs = {
181 181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 183 }
184 184 for key, param in config_attrs.items():
185 185 if value := config.get(key):
186 186 params.extend(['-c', param.format(value)])
187 187 return params
188 188
189 189 @reraise_safe_exceptions
190 190 def discover_git_version(self):
191 191 stdout, _ = self.run_git_command(
192 192 {}, ['--version'], _bare=True, _safe=True)
193 193 prefix = b'git version'
194 194 if stdout.startswith(prefix):
195 195 stdout = stdout[len(prefix):]
196 196 return safe_str(stdout.strip())
197 197
198 198 @reraise_safe_exceptions
199 199 def is_empty(self, wire):
200 200 repo_init = self._factory.repo_libgit2(wire)
201 201 with repo_init as repo:
202 202 try:
203 203 has_head = repo.head.name
204 204 if has_head:
205 205 return False
206 206
207 207 # NOTE(marcink): check again using more expensive method
208 208 return repo.is_empty
209 209 except Exception:
210 210 pass
211 211
212 212 return True
213 213
214 214 @reraise_safe_exceptions
215 215 def assert_correct_path(self, wire):
216 216 cache_on, context_uid, repo_id = self._cache_on(wire)
217 217 region = self._region(wire)
218 218
219 219 @region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 221 if fast_check:
222 222 path = safe_str(wire['path'])
223 223 if pygit2.discover_repository(path):
224 224 return True
225 225 return False
226 226 else:
227 227 try:
228 228 repo_init = self._factory.repo_libgit2(wire)
229 229 with repo_init:
230 230 pass
231 231 except pygit2.GitError:
232 232 path = wire.get('path')
233 233 tb = traceback.format_exc()
234 234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 235 return False
236 236 return True
237 237
238 238 return _assert_correct_path(context_uid, repo_id, True)
239 239
240 240 @reraise_safe_exceptions
241 241 def bare(self, wire):
242 242 repo_init = self._factory.repo_libgit2(wire)
243 243 with repo_init as repo:
244 244 return repo.is_bare
245 245
246 246 @reraise_safe_exceptions
247 247 def get_node_data(self, wire, commit_id, path):
248 248 repo_init = self._factory.repo_libgit2(wire)
249 249 with repo_init as repo:
250 250 commit = repo[commit_id]
251 251 blob_obj = commit.tree[path]
252 252
253 253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 254 raise exceptions.LookupException()(
255 255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256 256
257 257 return BytesEnvelope(blob_obj.data)
258 258
259 259 @reraise_safe_exceptions
260 260 def get_node_size(self, wire, commit_id, path):
261 261 repo_init = self._factory.repo_libgit2(wire)
262 262 with repo_init as repo:
263 263 commit = repo[commit_id]
264 264 blob_obj = commit.tree[path]
265 265
266 266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 267 raise exceptions.LookupException()(
268 268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269 269
270 270 return blob_obj.size
271 271
272 272 @reraise_safe_exceptions
273 273 def get_node_flags(self, wire, commit_id, path):
274 274 repo_init = self._factory.repo_libgit2(wire)
275 275 with repo_init as repo:
276 276 commit = repo[commit_id]
277 277 blob_obj = commit.tree[path]
278 278
279 279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 280 raise exceptions.LookupException()(
281 281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282 282
283 283 return blob_obj.filemode
284 284
285 285 @reraise_safe_exceptions
286 286 def get_node_is_binary(self, wire, commit_id, path):
287 287 repo_init = self._factory.repo_libgit2(wire)
288 288 with repo_init as repo:
289 289 commit = repo[commit_id]
290 290 blob_obj = commit.tree[path]
291 291
292 292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 293 raise exceptions.LookupException()(
294 294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295 295
296 296 return blob_obj.is_binary
297 297
298 298 @reraise_safe_exceptions
299 299 def blob_as_pretty_string(self, wire, sha):
300 300 repo_init = self._factory.repo_libgit2(wire)
301 301 with repo_init as repo:
302 302 blob_obj = repo[sha]
303 303 return BytesEnvelope(blob_obj.data)
304 304
305 305 @reraise_safe_exceptions
306 306 def blob_raw_length(self, wire, sha):
307 307 cache_on, context_uid, repo_id = self._cache_on(wire)
308 308 region = self._region(wire)
309 309
310 310 @region.conditional_cache_on_arguments(condition=cache_on)
311 311 def _blob_raw_length(_repo_id, _sha):
312 312
313 313 repo_init = self._factory.repo_libgit2(wire)
314 314 with repo_init as repo:
315 315 blob = repo[sha]
316 316 return blob.size
317 317
318 318 return _blob_raw_length(repo_id, sha)
319 319
320 320 def _parse_lfs_pointer(self, raw_content):
321 321 spec_string = b'version https://git-lfs.github.com/spec'
322 322 if raw_content and raw_content.startswith(spec_string):
323 323
324 324 pattern = re.compile(rb"""
325 325 (?:\n)?
326 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 328 ^size[ ](?P<oid_size>[0-9]+)\n
329 329 (?:\n)?
330 330 """, re.VERBOSE | re.MULTILINE)
331 331 match = pattern.match(raw_content)
332 332 if match:
333 333 return match.groupdict()
334 334
335 335 return {}
336 336
337 337 @reraise_safe_exceptions
338 338 def is_large_file(self, wire, commit_id):
339 339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 340 region = self._region(wire)
341 341
342 342 @region.conditional_cache_on_arguments(condition=cache_on)
343 343 def _is_large_file(_repo_id, _sha):
344 344 repo_init = self._factory.repo_libgit2(wire)
345 345 with repo_init as repo:
346 346 blob = repo[commit_id]
347 347 if blob.is_binary:
348 348 return {}
349 349
350 350 return self._parse_lfs_pointer(blob.data)
351 351
352 352 return _is_large_file(repo_id, commit_id)
353 353
354 354 @reraise_safe_exceptions
355 355 def is_binary(self, wire, tree_id):
356 356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 357 region = self._region(wire)
358 358
359 359 @region.conditional_cache_on_arguments(condition=cache_on)
360 360 def _is_binary(_repo_id, _tree_id):
361 361 repo_init = self._factory.repo_libgit2(wire)
362 362 with repo_init as repo:
363 363 blob_obj = repo[tree_id]
364 364 return blob_obj.is_binary
365 365
366 366 return _is_binary(repo_id, tree_id)
367 367
368 368 @reraise_safe_exceptions
369 369 def md5_hash(self, wire, commit_id, path):
370 370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 371 region = self._region(wire)
372 372
373 373 @region.conditional_cache_on_arguments(condition=cache_on)
374 374 def _md5_hash(_repo_id, _commit_id, _path):
375 375 repo_init = self._factory.repo_libgit2(wire)
376 376 with repo_init as repo:
377 377 commit = repo[_commit_id]
378 378 blob_obj = commit.tree[_path]
379 379
380 380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 381 raise exceptions.LookupException()(
382 382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383 383
384 384 return ''
385 385
386 386 return _md5_hash(repo_id, commit_id, path)
387 387
388 388 @reraise_safe_exceptions
389 389 def in_largefiles_store(self, wire, oid):
390 390 conf = self._wire_to_config(wire)
391 391 repo_init = self._factory.repo_libgit2(wire)
392 392 with repo_init as repo:
393 393 repo_name = repo.path
394 394
395 395 store_location = conf.get('vcs_git_lfs_store_location')
396 396 if store_location:
397 397
398 398 store = LFSOidStore(
399 399 oid=oid, repo=repo_name, store_location=store_location)
400 400 return store.has_oid()
401 401
402 402 return False
403 403
404 404 @reraise_safe_exceptions
405 405 def store_path(self, wire, oid):
406 406 conf = self._wire_to_config(wire)
407 407 repo_init = self._factory.repo_libgit2(wire)
408 408 with repo_init as repo:
409 409 repo_name = repo.path
410 410
411 411 store_location = conf.get('vcs_git_lfs_store_location')
412 412 if store_location:
413 413 store = LFSOidStore(
414 414 oid=oid, repo=repo_name, store_location=store_location)
415 415 return store.oid_path
416 416 raise ValueError(f'Unable to fetch oid with path {oid}')
417 417
418 418 @reraise_safe_exceptions
419 419 def bulk_request(self, wire, rev, pre_load):
420 420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 421 region = self._region(wire)
422 422
423 423 @region.conditional_cache_on_arguments(condition=cache_on)
424 424 def _bulk_request(_repo_id, _rev, _pre_load):
425 425 result = {}
426 426 for attr in pre_load:
427 427 try:
428 428 method = self._bulk_methods[attr]
429 429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 430 args = [wire, rev]
431 431 result[attr] = method(*args)
432 432 except KeyError as e:
433 433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 434 return result
435 435
436 436 return _bulk_request(repo_id, rev, sorted(pre_load))
437 437
438 438 @reraise_safe_exceptions
439 439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 441 region = self._region(wire)
442 442
443 443 @region.conditional_cache_on_arguments(condition=cache_on)
444 444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 445 result = {}
446 446 for attr in pre_load:
447 447 try:
448 448 method = self._bulk_file_methods[attr]
449 449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 450 result[attr] = method(wire, _commit_id, _path)
451 451 except KeyError as e:
452 452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 453 return result
454 454
455 455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456 456
457 457 def _build_opener(self, url: str):
458 458 handlers = []
459 459 url_obj = url_parser(safe_bytes(url))
460 460 authinfo = url_obj.authinfo()[1]
461 461
462 462 if authinfo:
463 463 # create a password manager
464 464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 465 passmgr.add_password(*convert_to_str(authinfo))
466 466
467 467 handlers.extend((httpbasicauthhandler(passmgr),
468 468 httpdigestauthhandler(passmgr)))
469 469
470 470 return urllib.request.build_opener(*handlers)
471 471
472 472 @reraise_safe_exceptions
473 473 def check_url(self, url, config):
474 474 url_obj = url_parser(safe_bytes(url))
475 475
476 476 test_uri = safe_str(url_obj.authinfo()[0])
477 477 obfuscated_uri = get_obfuscated_url(url_obj)
478 478
479 479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480 480
481 481 if not test_uri.endswith('info/refs'):
482 482 test_uri = test_uri.rstrip('/') + '/info/refs'
483 483
484 484 o = self._build_opener(url=url)
485 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486 486
487 487 q = {"service": 'git-upload-pack'}
488 488 qs = f'?{urllib.parse.urlencode(q)}'
489 489 cu = f"{test_uri}{qs}"
490 490
491 491 try:
492 492 req = urllib.request.Request(cu, None, {})
493 493 log.debug("Trying to open URL %s", obfuscated_uri)
494 494 resp = o.open(req)
495 495 if resp.code != 200:
496 496 raise exceptions.URLError()('Return Code is not 200')
497 497 except Exception as e:
498 498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 499 # means it cannot be cloned
500 500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501 501
502 502 # now detect if it's proper git repo
503 503 gitdata: bytes = resp.read()
504 504
505 505 if b'service=git-upload-pack' in gitdata:
506 506 pass
507 507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 508 # old style git can return some other format!
509 509 pass
510 510 else:
511 511 e = None
512 512 raise exceptions.URLError(e)(
513 513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514 514
515 515 return True
516 516
517 517 @reraise_safe_exceptions
518 518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 520 remote_refs = self.pull(wire, url, apply_refs=False)
521 521 repo = self._factory.repo(wire)
522 522 if isinstance(valid_refs, list):
523 523 valid_refs = tuple(valid_refs)
524 524
525 525 for k in remote_refs:
526 526 # only parse heads/tags and skip so called deferred tags
527 527 if k.startswith(valid_refs) and not k.endswith(deferred):
528 528 repo[k] = remote_refs[k]
529 529
530 530 if update_after_clone:
531 531 # we want to checkout HEAD
532 532 repo["HEAD"] = remote_refs["HEAD"]
533 533 index.build_index_from_tree(repo.path, repo.index_path(),
534 534 repo.object_store, repo["HEAD"].tree)
535 535
536 536 @reraise_safe_exceptions
537 537 def branch(self, wire, commit_id):
538 538 cache_on, context_uid, repo_id = self._cache_on(wire)
539 539 region = self._region(wire)
540 540
541 541 @region.conditional_cache_on_arguments(condition=cache_on)
542 542 def _branch(_context_uid, _repo_id, _commit_id):
543 543 regex = re.compile('^refs/heads')
544 544
545 545 def filter_with(ref):
546 546 return regex.match(ref[0]) and ref[1] == _commit_id
547 547
548 548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 549 return [x[0].split('refs/heads/')[-1] for x in branches]
550 550
551 551 return _branch(context_uid, repo_id, commit_id)
552 552
553 553 @reraise_safe_exceptions
554 def delete_branch(self, wire, branch_name):
555 repo_init = self._factory.repo_libgit2(wire)
556 with repo_init as repo:
557 if branch := repo.lookup_branch(branch_name):
558 branch.delete()
559
560 @reraise_safe_exceptions
554 561 def commit_branches(self, wire, commit_id):
555 562 cache_on, context_uid, repo_id = self._cache_on(wire)
556 563 region = self._region(wire)
557 564
558 565 @region.conditional_cache_on_arguments(condition=cache_on)
559 566 def _commit_branches(_context_uid, _repo_id, _commit_id):
560 567 repo_init = self._factory.repo_libgit2(wire)
561 568 with repo_init as repo:
562 569 branches = [x for x in repo.branches.with_commit(_commit_id)]
563 570 return branches
564 571
565 572 return _commit_branches(context_uid, repo_id, commit_id)
566 573
567 574 @reraise_safe_exceptions
568 575 def add_object(self, wire, content):
569 576 repo_init = self._factory.repo_libgit2(wire)
570 577 with repo_init as repo:
571 578 blob = objects.Blob()
572 579 blob.set_raw_string(content)
573 580 repo.object_store.add_object(blob)
574 581 return blob.id
575 582
576 583 @reraise_safe_exceptions
577 584 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
578 585 date_args: list[int, int] = None,
579 586 parents: list | None = None):
580 587
581 588 repo_init = self._factory.repo_libgit2(wire)
582 589 with repo_init as repo:
583 590
584 591 if date_args:
585 592 current_time, offset = date_args
586 593
587 594 kw = {
588 595 'time': current_time,
589 596 'offset': offset
590 597 }
591 598 author = create_signature_from_string(author, **kw)
592 599 committer = create_signature_from_string(committer, **kw)
593 600
594 601 tree = new_tree_id
595 602 if isinstance(tree, (bytes, str)):
596 603 # validate this tree is in the repo...
597 604 tree = repo[safe_str(tree)].id
598 605
599 606 if parents:
600 607 # run via sha's and validate them in repo
601 608 parents = [repo[c].id for c in parents]
602 609 else:
603 610 parents = []
604 611 # ensure we COMMIT on top of given branch head
605 612 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
606 613 if branch in repo.branches.local:
607 614 parents += [repo.branches[branch].target]
608 615 elif [x for x in repo.branches.local]:
609 616 parents += [repo.head.target]
610 617 #else:
611 618 # in case we want to commit on new branch we create it on top of HEAD
612 619 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
613 620
614 621 # # Create a new commit
615 622 commit_oid = repo.create_commit(
616 623 f'refs/heads/{branch}', # the name of the reference to update
617 624 author, # the author of the commit
618 625 committer, # the committer of the commit
619 626 message, # the commit message
620 627 tree, # the tree produced by the index
621 628 parents # list of parents for the new commit, usually just one,
622 629 )
623 630
624 631 new_commit_id = safe_str(commit_oid)
625 632
626 633 return new_commit_id
627 634
628 635 @reraise_safe_exceptions
629 636 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
630 637
631 638 def mode2pygit(mode):
632 639 """
633 640 git only supports two filemode 644 and 755
634 641
635 642 0o100755 -> 33261
636 643 0o100644 -> 33188
637 644 """
638 645 return {
639 646 0o100644: pygit2.GIT_FILEMODE_BLOB,
640 647 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
641 648 0o120000: pygit2.GIT_FILEMODE_LINK
642 649 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
643 650
644 651 repo_init = self._factory.repo_libgit2(wire)
645 652 with repo_init as repo:
646 653 repo_index = repo.index
647 654
648 655 commit_parents = None
649 656 if commit_tree and commit_data['parents']:
650 657 commit_parents = commit_data['parents']
651 658 parent_commit = repo[commit_parents[0]]
652 659 repo_index.read_tree(parent_commit.tree)
653 660
654 661 for pathspec in updated:
655 662 blob_id = repo.create_blob(pathspec['content'])
656 663 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
657 664 repo_index.add(ie)
658 665
659 666 for pathspec in removed:
660 667 repo_index.remove(pathspec)
661 668
662 669 # Write changes to the index
663 670 repo_index.write()
664 671
665 672 # Create a tree from the updated index
666 673 written_commit_tree = repo_index.write_tree()
667 674
668 675 new_tree_id = written_commit_tree
669 676
670 677 author = commit_data['author']
671 678 committer = commit_data['committer']
672 679 message = commit_data['message']
673 680
674 681 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
675 682
676 683 new_commit_id = self.create_commit(wire, author, committer, message, branch,
677 684 new_tree_id, date_args=date_args, parents=commit_parents)
678 685
679 686 # libgit2, ensure the branch is there and exists
680 687 self.create_branch(wire, branch, new_commit_id)
681 688
682 689 # libgit2, set new ref to this created commit
683 690 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
684 691
685 692 return new_commit_id
686 693
687 694 @reraise_safe_exceptions
688 695 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
689 696 if url != 'default' and '://' not in url:
690 697 client = LocalGitClient(url)
691 698 else:
692 699 url_obj = url_parser(safe_bytes(url))
693 700 o = self._build_opener(url)
694 701 url = url_obj.authinfo()[0]
695 702 client = HttpGitClient(base_url=url, opener=o)
696 703 repo = self._factory.repo(wire)
697 704
698 705 determine_wants = repo.object_store.determine_wants_all
699 706
700 707 if refs:
701 708 refs: list[bytes] = [ascii_bytes(x) for x in refs]
702 709
703 710 def determine_wants_requested(_remote_refs):
704 711 determined = []
705 712 for ref_name, ref_hash in _remote_refs.items():
706 713 bytes_ref_name = safe_bytes(ref_name)
707 714
708 715 if bytes_ref_name in refs:
709 716 bytes_ref_hash = safe_bytes(ref_hash)
710 717 determined.append(bytes_ref_hash)
711 718 return determined
712 719
713 720 # swap with our custom requested wants
714 721 determine_wants = determine_wants_requested
715 722
716 723 try:
717 724 remote_refs = client.fetch(
718 725 path=url, target=repo, determine_wants=determine_wants)
719 726
720 727 except NotGitRepository as e:
721 728 log.warning(
722 729 'Trying to fetch from "%s" failed, not a Git repository.', url)
723 730 # Exception can contain unicode which we convert
724 731 raise exceptions.AbortException(e)(repr(e))
725 732
726 733 # mikhail: client.fetch() returns all the remote refs, but fetches only
727 734 # refs filtered by `determine_wants` function. We need to filter result
728 735 # as well
729 736 if refs:
730 737 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
731 738
732 739 if apply_refs:
733 740 # TODO: johbo: Needs proper test coverage with a git repository
734 741 # that contains a tag object, so that we would end up with
735 742 # a peeled ref at this point.
736 743 for k in remote_refs:
737 744 if k.endswith(PEELED_REF_MARKER):
738 745 log.debug("Skipping peeled reference %s", k)
739 746 continue
740 747 repo[k] = remote_refs[k]
741 748
742 749 if refs and not update_after:
743 750 # update to ref
744 751 # mikhail: explicitly set the head to the last ref.
745 752 update_to_ref = refs[-1]
746 753 if isinstance(update_after, str):
747 754 update_to_ref = update_after
748 755
749 756 repo[HEAD_MARKER] = remote_refs[update_to_ref]
750 757
751 758 if update_after:
752 759 # we want to check out HEAD
753 760 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
754 761 index.build_index_from_tree(repo.path, repo.index_path(),
755 762 repo.object_store, repo[HEAD_MARKER].tree)
756 763
757 764 if isinstance(remote_refs, FetchPackResult):
758 765 return remote_refs.refs
759 766 return remote_refs
760 767
761 768 @reraise_safe_exceptions
762 769 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
763 770 self._factory.repo(wire)
764 771 if refs and not isinstance(refs, (list, tuple)):
765 772 refs = [refs]
766 773
767 774 config = self._wire_to_config(wire)
768 775 # get all remote refs we'll use to fetch later
769 776 cmd = ['ls-remote']
770 777 if not all_refs:
771 778 cmd += ['--heads', '--tags']
772 779 cmd += [url]
773 780 output, __ = self.run_git_command(
774 781 wire, cmd, fail_on_stderr=False,
775 782 _copts=self._remote_conf(config),
776 783 extra_env={'GIT_TERMINAL_PROMPT': '0'})
777 784
778 785 remote_refs = collections.OrderedDict()
779 786 fetch_refs = []
780 787
781 788 for ref_line in output.splitlines():
782 789 sha, ref = ref_line.split(b'\t')
783 790 sha = sha.strip()
784 791 if ref in remote_refs:
785 792 # duplicate, skip
786 793 continue
787 794 if ref.endswith(PEELED_REF_MARKER):
788 795 log.debug("Skipping peeled reference %s", ref)
789 796 continue
790 797 # don't sync HEAD
791 798 if ref in [HEAD_MARKER]:
792 799 continue
793 800
794 801 remote_refs[ref] = sha
795 802
796 803 if refs and sha in refs:
797 804 # we filter fetch using our specified refs
798 805 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
799 806 elif not refs:
800 807 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
801 808 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
802 809
803 810 if fetch_refs:
804 811 for chunk in more_itertools.chunked(fetch_refs, 128):
805 812 fetch_refs_chunks = list(chunk)
806 813 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
807 814 self.run_git_command(
808 815 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
809 816 fail_on_stderr=False,
810 817 _copts=self._remote_conf(config),
811 818 extra_env={'GIT_TERMINAL_PROMPT': '0'})
812 819 if kwargs.get('sync_large_objects'):
813 820 self.run_git_command(
814 821 wire, ['lfs', 'fetch', url, '--all'],
815 822 fail_on_stderr=False,
816 823 _copts=self._remote_conf(config),
817 824 )
818 825
819 826 return remote_refs
820 827
821 828 @reraise_safe_exceptions
822 829 def sync_push(self, wire, url, refs=None, **kwargs):
823 830 if not self.check_url(url, wire):
824 831 return
825 832 config = self._wire_to_config(wire)
826 833 self._factory.repo(wire)
827 834 self.run_git_command(
828 835 wire, ['push', url, '--mirror'], fail_on_stderr=False,
829 836 _copts=self._remote_conf(config),
830 837 extra_env={'GIT_TERMINAL_PROMPT': '0'})
831 838 if kwargs.get('sync_large_objects'):
832 839 self.run_git_command(
833 840 wire, ['lfs', 'push', url, '--all'],
834 841 fail_on_stderr=False,
835 842 _copts=self._remote_conf(config),
836 843 )
837 844
838 845 @reraise_safe_exceptions
839 846 def get_remote_refs(self, wire, url):
840 847 repo = Repo(url)
841 848 return repo.get_refs()
842 849
843 850 @reraise_safe_exceptions
844 851 def get_description(self, wire):
845 852 repo = self._factory.repo(wire)
846 853 return repo.get_description()
847 854
848 855 @reraise_safe_exceptions
849 856 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
850 857 origin_repo_path = wire['path']
851 858 repo = self._factory.repo(wire)
852 859 # fetch from other_repo_path to our origin repo
853 860 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
854 861
855 862 wire_remote = wire.copy()
856 863 wire_remote['path'] = other_repo_path
857 864 repo_remote = self._factory.repo(wire_remote)
858 865
859 866 # fetch from origin_repo_path to our remote repo
860 867 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
861 868
862 869 revs = [
863 870 x.commit.id
864 871 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
865 872 return revs
866 873
867 874 @reraise_safe_exceptions
868 875 def get_object(self, wire, sha, maybe_unreachable=False):
869 876 cache_on, context_uid, repo_id = self._cache_on(wire)
870 877 region = self._region(wire)
871 878
872 879 @region.conditional_cache_on_arguments(condition=cache_on)
873 880 def _get_object(_context_uid, _repo_id, _sha):
874 881 repo_init = self._factory.repo_libgit2(wire)
875 882 with repo_init as repo:
876 883
877 884 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
878 885 try:
879 886 commit = repo.revparse_single(sha)
880 887 except KeyError:
881 888 # NOTE(marcink): KeyError doesn't give us any meaningful information
882 889 # here, we instead give something more explicit
883 890 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
884 891 raise exceptions.LookupException(e)(missing_commit_err)
885 892 except ValueError as e:
886 893 raise exceptions.LookupException(e)(missing_commit_err)
887 894
888 895 is_tag = False
889 896 if isinstance(commit, pygit2.Tag):
890 897 commit = repo.get(commit.target)
891 898 is_tag = True
892 899
893 900 check_dangling = True
894 901 if is_tag:
895 902 check_dangling = False
896 903
897 904 if check_dangling and maybe_unreachable:
898 905 check_dangling = False
899 906
900 907 # we used a reference and it parsed means we're not having a dangling commit
901 908 if sha != commit.hex:
902 909 check_dangling = False
903 910
904 911 if check_dangling:
905 912 # check for dangling commit
906 913 for branch in repo.branches.with_commit(commit.hex):
907 914 if branch:
908 915 break
909 916 else:
910 917 # NOTE(marcink): Empty error doesn't give us any meaningful information
911 918 # here, we instead give something more explicit
912 919 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
913 920 raise exceptions.LookupException(e)(missing_commit_err)
914 921
915 922 commit_id = commit.hex
916 923 type_str = commit.type_str
917 924
918 925 return {
919 926 'id': commit_id,
920 927 'type': type_str,
921 928 'commit_id': commit_id,
922 929 'idx': 0
923 930 }
924 931
925 932 return _get_object(context_uid, repo_id, sha)
926 933
927 934 @reraise_safe_exceptions
928 935 def get_refs(self, wire):
929 936 cache_on, context_uid, repo_id = self._cache_on(wire)
930 937 region = self._region(wire)
931 938
932 939 @region.conditional_cache_on_arguments(condition=cache_on)
933 940 def _get_refs(_context_uid, _repo_id):
934 941
935 942 repo_init = self._factory.repo_libgit2(wire)
936 943 with repo_init as repo:
937 944 regex = re.compile('^refs/(heads|tags)/')
938 945 return {x.name: x.target.hex for x in
939 946 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
940 947
941 948 return _get_refs(context_uid, repo_id)
942 949
943 950 @reraise_safe_exceptions
944 951 def get_branch_pointers(self, wire):
945 952 cache_on, context_uid, repo_id = self._cache_on(wire)
946 953 region = self._region(wire)
947 954
948 955 @region.conditional_cache_on_arguments(condition=cache_on)
949 956 def _get_branch_pointers(_context_uid, _repo_id):
950 957
951 958 repo_init = self._factory.repo_libgit2(wire)
952 959 regex = re.compile('^refs/heads')
953 960 with repo_init as repo:
954 961 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
955 962 return {x.target.hex: x.shorthand for x in branches}
956 963
957 964 return _get_branch_pointers(context_uid, repo_id)
958 965
959 966 @reraise_safe_exceptions
960 967 def head(self, wire, show_exc=True):
961 968 cache_on, context_uid, repo_id = self._cache_on(wire)
962 969 region = self._region(wire)
963 970
964 971 @region.conditional_cache_on_arguments(condition=cache_on)
965 972 def _head(_context_uid, _repo_id, _show_exc):
966 973 repo_init = self._factory.repo_libgit2(wire)
967 974 with repo_init as repo:
968 975 try:
969 976 return repo.head.peel().hex
970 977 except Exception:
971 978 if show_exc:
972 979 raise
973 980 return _head(context_uid, repo_id, show_exc)
974 981
975 982 @reraise_safe_exceptions
976 983 def init(self, wire):
977 984 repo_path = safe_str(wire['path'])
978 985 os.makedirs(repo_path, mode=0o755)
979 986 pygit2.init_repository(repo_path, bare=False)
980 987
981 988 @reraise_safe_exceptions
982 989 def init_bare(self, wire):
983 990 repo_path = safe_str(wire['path'])
984 991 os.makedirs(repo_path, mode=0o755)
985 992 pygit2.init_repository(repo_path, bare=True)
986 993
987 994 @reraise_safe_exceptions
988 995 def revision(self, wire, rev):
989 996
990 997 cache_on, context_uid, repo_id = self._cache_on(wire)
991 998 region = self._region(wire)
992 999
993 1000 @region.conditional_cache_on_arguments(condition=cache_on)
994 1001 def _revision(_context_uid, _repo_id, _rev):
995 1002 repo_init = self._factory.repo_libgit2(wire)
996 1003 with repo_init as repo:
997 1004 commit = repo[rev]
998 1005 obj_data = {
999 1006 'id': commit.id.hex,
1000 1007 }
1001 1008 # tree objects itself don't have tree_id attribute
1002 1009 if hasattr(commit, 'tree_id'):
1003 1010 obj_data['tree'] = commit.tree_id.hex
1004 1011
1005 1012 return obj_data
1006 1013 return _revision(context_uid, repo_id, rev)
1007 1014
1008 1015 @reraise_safe_exceptions
1009 1016 def date(self, wire, commit_id):
1010 1017 cache_on, context_uid, repo_id = self._cache_on(wire)
1011 1018 region = self._region(wire)
1012 1019
1013 1020 @region.conditional_cache_on_arguments(condition=cache_on)
1014 1021 def _date(_repo_id, _commit_id):
1015 1022 repo_init = self._factory.repo_libgit2(wire)
1016 1023 with repo_init as repo:
1017 1024 commit = repo[commit_id]
1018 1025
1019 1026 if hasattr(commit, 'commit_time'):
1020 1027 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1021 1028 else:
1022 1029 commit = commit.get_object()
1023 1030 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1024 1031
1025 1032 # TODO(marcink): check dulwich difference of offset vs timezone
1026 1033 return [commit_time, commit_time_offset]
1027 1034 return _date(repo_id, commit_id)
1028 1035
1029 1036 @reraise_safe_exceptions
1030 1037 def author(self, wire, commit_id):
1031 1038 cache_on, context_uid, repo_id = self._cache_on(wire)
1032 1039 region = self._region(wire)
1033 1040
1034 1041 @region.conditional_cache_on_arguments(condition=cache_on)
1035 1042 def _author(_repo_id, _commit_id):
1036 1043 repo_init = self._factory.repo_libgit2(wire)
1037 1044 with repo_init as repo:
1038 1045 commit = repo[commit_id]
1039 1046
1040 1047 if hasattr(commit, 'author'):
1041 1048 author = commit.author
1042 1049 else:
1043 1050 author = commit.get_object().author
1044 1051
1045 1052 if author.email:
1046 1053 return f"{author.name} <{author.email}>"
1047 1054
1048 1055 try:
1049 1056 return f"{author.name}"
1050 1057 except Exception:
1051 1058 return f"{safe_str(author.raw_name)}"
1052 1059
1053 1060 return _author(repo_id, commit_id)
1054 1061
1055 1062 @reraise_safe_exceptions
1056 1063 def message(self, wire, commit_id):
1057 1064 cache_on, context_uid, repo_id = self._cache_on(wire)
1058 1065 region = self._region(wire)
1059 1066
1060 1067 @region.conditional_cache_on_arguments(condition=cache_on)
1061 1068 def _message(_repo_id, _commit_id):
1062 1069 repo_init = self._factory.repo_libgit2(wire)
1063 1070 with repo_init as repo:
1064 1071 commit = repo[commit_id]
1065 1072 return commit.message
1066 1073 return _message(repo_id, commit_id)
1067 1074
1068 1075 @reraise_safe_exceptions
1069 1076 def parents(self, wire, commit_id):
1070 1077 cache_on, context_uid, repo_id = self._cache_on(wire)
1071 1078 region = self._region(wire)
1072 1079
1073 1080 @region.conditional_cache_on_arguments(condition=cache_on)
1074 1081 def _parents(_repo_id, _commit_id):
1075 1082 repo_init = self._factory.repo_libgit2(wire)
1076 1083 with repo_init as repo:
1077 1084 commit = repo[commit_id]
1078 1085 if hasattr(commit, 'parent_ids'):
1079 1086 parent_ids = commit.parent_ids
1080 1087 else:
1081 1088 parent_ids = commit.get_object().parent_ids
1082 1089
1083 1090 return [x.hex for x in parent_ids]
1084 1091 return _parents(repo_id, commit_id)
1085 1092
1086 1093 @reraise_safe_exceptions
1087 1094 def children(self, wire, commit_id):
1088 1095 cache_on, context_uid, repo_id = self._cache_on(wire)
1089 1096 region = self._region(wire)
1090 1097
1091 1098 head = self.head(wire)
1092 1099
1093 1100 @region.conditional_cache_on_arguments(condition=cache_on)
1094 1101 def _children(_repo_id, _commit_id):
1095 1102
1096 1103 output, __ = self.run_git_command(
1097 1104 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1098 1105
1099 1106 child_ids = []
1100 1107 pat = re.compile(fr'^{commit_id}')
1101 1108 for line in output.splitlines():
1102 1109 line = safe_str(line)
1103 1110 if pat.match(line):
1104 1111 found_ids = line.split(' ')[1:]
1105 1112 child_ids.extend(found_ids)
1106 1113 break
1107 1114
1108 1115 return child_ids
1109 1116 return _children(repo_id, commit_id)
1110 1117
1111 1118 @reraise_safe_exceptions
1112 1119 def set_refs(self, wire, key, value):
1113 1120 repo_init = self._factory.repo_libgit2(wire)
1114 1121 with repo_init as repo:
1115 1122 repo.references.create(key, value, force=True)
1116 1123
1117 1124 @reraise_safe_exceptions
1118 1125 def update_refs(self, wire, key, value):
1119 1126 repo_init = self._factory.repo_libgit2(wire)
1120 1127 with repo_init as repo:
1121 1128 if key not in repo.references:
1122 1129 raise ValueError(f'Reference {key} not found in the repository')
1123 1130 repo.references.create(key, value, force=True)
1124 1131
1125 1132 @reraise_safe_exceptions
1126 1133 def create_branch(self, wire, branch_name, commit_id, force=False):
1127 1134 repo_init = self._factory.repo_libgit2(wire)
1128 1135 with repo_init as repo:
1129 1136 if commit_id:
1130 1137 commit = repo[commit_id]
1131 1138 else:
1132 1139 # if commit is not given just use the HEAD
1133 1140 commit = repo.head()
1134 1141
1135 1142 if force:
1136 1143 repo.branches.local.create(branch_name, commit, force=force)
1137 1144 elif not repo.branches.get(branch_name):
1138 1145 # create only if that branch isn't existing
1139 1146 repo.branches.local.create(branch_name, commit, force=force)
1140 1147
1141 1148 @reraise_safe_exceptions
1142 1149 def remove_ref(self, wire, key):
1143 1150 repo_init = self._factory.repo_libgit2(wire)
1144 1151 with repo_init as repo:
1145 1152 repo.references.delete(key)
1146 1153
1147 1154 @reraise_safe_exceptions
1148 1155 def tag_remove(self, wire, tag_name):
1149 1156 repo_init = self._factory.repo_libgit2(wire)
1150 1157 with repo_init as repo:
1151 1158 key = f'refs/tags/{tag_name}'
1152 1159 repo.references.delete(key)
1153 1160
1154 1161 @reraise_safe_exceptions
1155 1162 def tree_changes(self, wire, source_id, target_id):
1156 1163 repo = self._factory.repo(wire)
1157 1164 # source can be empty
1158 1165 source_id = safe_bytes(source_id if source_id else b'')
1159 1166 target_id = safe_bytes(target_id)
1160 1167
1161 1168 source = repo[source_id].tree if source_id else None
1162 1169 target = repo[target_id].tree
1163 1170 result = repo.object_store.tree_changes(source, target)
1164 1171
1165 1172 added = set()
1166 1173 modified = set()
1167 1174 deleted = set()
1168 1175 for (old_path, new_path), (_, _), (_, _) in list(result):
1169 1176 if new_path and old_path:
1170 1177 modified.add(new_path)
1171 1178 elif new_path and not old_path:
1172 1179 added.add(new_path)
1173 1180 elif not new_path and old_path:
1174 1181 deleted.add(old_path)
1175 1182
1176 1183 return list(added), list(modified), list(deleted)
1177 1184
1178 1185 @reraise_safe_exceptions
1179 1186 def tree_and_type_for_path(self, wire, commit_id, path):
1180 1187
1181 1188 cache_on, context_uid, repo_id = self._cache_on(wire)
1182 1189 region = self._region(wire)
1183 1190
1184 1191 @region.conditional_cache_on_arguments(condition=cache_on)
1185 1192 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1186 1193 repo_init = self._factory.repo_libgit2(wire)
1187 1194
1188 1195 with repo_init as repo:
1189 1196 commit = repo[commit_id]
1190 1197 try:
1191 1198 tree = commit.tree[path]
1192 1199 except KeyError:
1193 1200 return None, None, None
1194 1201
1195 1202 return tree.id.hex, tree.type_str, tree.filemode
1196 1203 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1197 1204
1198 1205 @reraise_safe_exceptions
1199 1206 def tree_items(self, wire, tree_id):
1200 1207 cache_on, context_uid, repo_id = self._cache_on(wire)
1201 1208 region = self._region(wire)
1202 1209
1203 1210 @region.conditional_cache_on_arguments(condition=cache_on)
1204 1211 def _tree_items(_repo_id, _tree_id):
1205 1212
1206 1213 repo_init = self._factory.repo_libgit2(wire)
1207 1214 with repo_init as repo:
1208 1215 try:
1209 1216 tree = repo[tree_id]
1210 1217 except KeyError:
1211 1218 raise ObjectMissing(f'No tree with id: {tree_id}')
1212 1219
1213 1220 result = []
1214 1221 for item in tree:
1215 1222 item_sha = item.hex
1216 1223 item_mode = item.filemode
1217 1224 item_type = item.type_str
1218 1225
1219 1226 if item_type == 'commit':
1220 1227 # NOTE(marcink): submodules we translate to 'link' for backward compat
1221 1228 item_type = 'link'
1222 1229
1223 1230 result.append((item.name, item_mode, item_sha, item_type))
1224 1231 return result
1225 1232 return _tree_items(repo_id, tree_id)
1226 1233
1227 1234 @reraise_safe_exceptions
1228 1235 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1229 1236 """
1230 1237 Old version that uses subprocess to call diff
1231 1238 """
1232 1239
1233 1240 flags = [
1234 1241 f'-U{context}', '--patch',
1235 1242 '--binary',
1236 1243 '--find-renames',
1237 1244 '--no-indent-heuristic',
1238 1245 # '--indent-heuristic',
1239 1246 #'--full-index',
1240 1247 #'--abbrev=40'
1241 1248 ]
1242 1249
1243 1250 if opt_ignorews:
1244 1251 flags.append('--ignore-all-space')
1245 1252
1246 1253 if commit_id_1 == self.EMPTY_COMMIT:
1247 1254 cmd = ['show'] + flags + [commit_id_2]
1248 1255 else:
1249 1256 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1250 1257
1251 1258 if file_filter:
1252 1259 cmd.extend(['--', file_filter])
1253 1260
1254 1261 diff, __ = self.run_git_command(wire, cmd)
1255 1262 # If we used 'show' command, strip first few lines (until actual diff
1256 1263 # starts)
1257 1264 if commit_id_1 == self.EMPTY_COMMIT:
1258 1265 lines = diff.splitlines()
1259 1266 x = 0
1260 1267 for line in lines:
1261 1268 if line.startswith(b'diff'):
1262 1269 break
1263 1270 x += 1
1264 1271 # Append new line just like 'diff' command do
1265 1272 diff = '\n'.join(lines[x:]) + '\n'
1266 1273 return diff
1267 1274
1268 1275 @reraise_safe_exceptions
1269 1276 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1270 1277 repo_init = self._factory.repo_libgit2(wire)
1271 1278
1272 1279 with repo_init as repo:
1273 1280 swap = True
1274 1281 flags = 0
1275 1282 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1276 1283
1277 1284 if opt_ignorews:
1278 1285 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1279 1286
1280 1287 if commit_id_1 == self.EMPTY_COMMIT:
1281 1288 comm1 = repo[commit_id_2]
1282 1289 diff_obj = comm1.tree.diff_to_tree(
1283 1290 flags=flags, context_lines=context, swap=swap)
1284 1291
1285 1292 else:
1286 1293 comm1 = repo[commit_id_2]
1287 1294 comm2 = repo[commit_id_1]
1288 1295 diff_obj = comm1.tree.diff_to_tree(
1289 1296 comm2.tree, flags=flags, context_lines=context, swap=swap)
1290 1297 similar_flags = 0
1291 1298 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1292 1299 diff_obj.find_similar(flags=similar_flags)
1293 1300
1294 1301 if file_filter:
1295 1302 for p in diff_obj:
1296 1303 if p.delta.old_file.path == file_filter:
1297 1304 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1298 1305 # fo matching path == no diff
1299 1306 return BytesEnvelope(b'')
1300 1307
1301 1308 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1302 1309
1303 1310 @reraise_safe_exceptions
1304 1311 def node_history(self, wire, commit_id, path, limit):
1305 1312 cache_on, context_uid, repo_id = self._cache_on(wire)
1306 1313 region = self._region(wire)
1307 1314
1308 1315 @region.conditional_cache_on_arguments(condition=cache_on)
1309 1316 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1310 1317 # optimize for n==1, rev-list is much faster for that use-case
1311 1318 if limit == 1:
1312 1319 cmd = ['rev-list', '-1', commit_id, '--', path]
1313 1320 else:
1314 1321 cmd = ['log']
1315 1322 if limit:
1316 1323 cmd.extend(['-n', str(safe_int(limit, 0))])
1317 1324 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1318 1325
1319 1326 output, __ = self.run_git_command(wire, cmd)
1320 1327 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1321 1328
1322 1329 return [x for x in commit_ids]
1323 1330 return _node_history(context_uid, repo_id, commit_id, path, limit)
1324 1331
1325 1332 @reraise_safe_exceptions
1326 1333 def node_annotate_legacy(self, wire, commit_id, path):
1327 1334 # note: replaced by pygit2 implementation
1328 1335 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1329 1336 # -l ==> outputs long shas (and we need all 40 characters)
1330 1337 # --root ==> doesn't put '^' character for boundaries
1331 1338 # -r commit_id ==> blames for the given commit
1332 1339 output, __ = self.run_git_command(wire, cmd)
1333 1340
1334 1341 result = []
1335 1342 for i, blame_line in enumerate(output.splitlines()[:-1]):
1336 1343 line_no = i + 1
1337 1344 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1338 1345 result.append((line_no, blame_commit_id, line))
1339 1346
1340 1347 return result
1341 1348
1342 1349 @reraise_safe_exceptions
1343 1350 def node_annotate(self, wire, commit_id, path):
1344 1351
1345 1352 result_libgit = []
1346 1353 repo_init = self._factory.repo_libgit2(wire)
1347 1354 with repo_init as repo:
1348 1355 commit = repo[commit_id]
1349 1356 blame_obj = repo.blame(path, newest_commit=commit_id)
1350 1357 file_content = commit.tree[path].data
1351 1358 for i, line in enumerate(splitnewlines(file_content)):
1352 1359 line_no = i + 1
1353 1360 hunk = blame_obj.for_line(line_no)
1354 1361 blame_commit_id = hunk.final_commit_id.hex
1355 1362
1356 1363 result_libgit.append((line_no, blame_commit_id, line))
1357 1364
1358 1365 return BinaryEnvelope(result_libgit)
1359 1366
1360 1367 @reraise_safe_exceptions
1361 1368 def update_server_info(self, wire, force=False):
1362 1369 cmd = ['update-server-info']
1363 1370 if force:
1364 1371 cmd += ['--force']
1365 1372 output, __ = self.run_git_command(wire, cmd)
1366 1373 return output.splitlines()
1367 1374
1368 1375 @reraise_safe_exceptions
1369 1376 def get_all_commit_ids(self, wire):
1370 1377
1371 1378 cache_on, context_uid, repo_id = self._cache_on(wire)
1372 1379 region = self._region(wire)
1373 1380
1374 1381 @region.conditional_cache_on_arguments(condition=cache_on)
1375 1382 def _get_all_commit_ids(_context_uid, _repo_id):
1376 1383
1377 1384 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1378 1385 try:
1379 1386 output, __ = self.run_git_command(wire, cmd)
1380 1387 return output.splitlines()
1381 1388 except Exception:
1382 1389 # Can be raised for empty repositories
1383 1390 return []
1384 1391
1385 1392 @region.conditional_cache_on_arguments(condition=cache_on)
1386 1393 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1387 1394 repo_init = self._factory.repo_libgit2(wire)
1388 1395 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1389 1396 results = []
1390 1397 with repo_init as repo:
1391 1398 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1392 1399 results.append(commit.id.hex)
1393 1400
1394 1401 return _get_all_commit_ids(context_uid, repo_id)
1395 1402
1396 1403 @reraise_safe_exceptions
1397 1404 def run_git_command(self, wire, cmd, **opts):
1398 1405 path = wire.get('path', None)
1399 1406 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1400 1407
1401 1408 if path and os.path.isdir(path):
1402 1409 opts['cwd'] = path
1403 1410
1404 1411 if '_bare' in opts:
1405 1412 _copts = []
1406 1413 del opts['_bare']
1407 1414 else:
1408 1415 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1409 1416 safe_call = False
1410 1417 if '_safe' in opts:
1411 1418 # no exc on failure
1412 1419 del opts['_safe']
1413 1420 safe_call = True
1414 1421
1415 1422 if '_copts' in opts:
1416 1423 _copts.extend(opts['_copts'] or [])
1417 1424 del opts['_copts']
1418 1425
1419 1426 gitenv = os.environ.copy()
1420 1427 gitenv.update(opts.pop('extra_env', {}))
1421 1428 # need to clean fix GIT_DIR !
1422 1429 if 'GIT_DIR' in gitenv:
1423 1430 del gitenv['GIT_DIR']
1424 1431 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1425 1432 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1426 1433
1427 1434 cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
1428 1435 _opts = {'env': gitenv, 'shell': False}
1429 1436
1430 1437 proc = None
1431 1438 try:
1432 1439 _opts.update(opts)
1433 1440 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1434 1441
1435 1442 return b''.join(proc), b''.join(proc.stderr)
1436 1443 except OSError as err:
1437 1444 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1438 1445 call_opts = {}
1439 1446 if debug_mode:
1440 1447 call_opts = _opts
1441 1448
1442 1449 tb_err = ("Couldn't run git command ({}).\n"
1443 1450 "Original error was:{}\n"
1444 1451 "Call options:{}\n"
1445 1452 .format(cmd, err, call_opts))
1446 1453 log.exception(tb_err)
1447 1454 if safe_call:
1448 1455 return '', err
1449 1456 else:
1450 1457 raise exceptions.VcsException()(tb_err)
1451 1458 finally:
1452 1459 if proc:
1453 1460 proc.close()
1454 1461
1455 1462 @reraise_safe_exceptions
1456 1463 def install_hooks(self, wire, force=False):
1457 1464 from vcsserver.hook_utils import install_git_hooks
1458 1465 bare = self.bare(wire)
1459 1466 path = wire['path']
1460 1467 binary_dir = settings.BINARY_DIR
1461 1468 if binary_dir:
1462 1469 os.path.join(binary_dir, 'python3')
1463 1470 return install_git_hooks(path, bare, force_create=force)
1464 1471
1465 1472 @reraise_safe_exceptions
1466 1473 def get_hooks_info(self, wire):
1467 1474 from vcsserver.hook_utils import (
1468 1475 get_git_pre_hook_version, get_git_post_hook_version)
1469 1476 bare = self.bare(wire)
1470 1477 path = wire['path']
1471 1478 return {
1472 1479 'pre_version': get_git_pre_hook_version(path, bare),
1473 1480 'post_version': get_git_post_hook_version(path, bare),
1474 1481 }
1475 1482
1476 1483 @reraise_safe_exceptions
1477 1484 def set_head_ref(self, wire, head_name):
1478 1485 log.debug('Setting refs/head to `%s`', head_name)
1479 1486 repo_init = self._factory.repo_libgit2(wire)
1480 1487 with repo_init as repo:
1481 1488 repo.set_head(f'refs/heads/{head_name}')
1482 1489
1483 1490 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1484 1491
1485 1492 @reraise_safe_exceptions
1486 1493 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1487 1494 archive_dir_name, commit_id, cache_config):
1488 1495
1489 1496 def file_walker(_commit_id, path):
1490 1497 repo_init = self._factory.repo_libgit2(wire)
1491 1498
1492 1499 with repo_init as repo:
1493 1500 commit = repo[commit_id]
1494 1501
1495 1502 if path in ['', '/']:
1496 1503 tree = commit.tree
1497 1504 else:
1498 1505 tree = commit.tree[path.rstrip('/')]
1499 1506 tree_id = tree.id.hex
1500 1507 try:
1501 1508 tree = repo[tree_id]
1502 1509 except KeyError:
1503 1510 raise ObjectMissing(f'No tree with id: {tree_id}')
1504 1511
1505 1512 index = LibGit2Index.Index()
1506 1513 index.read_tree(tree)
1507 1514 file_iter = index
1508 1515
1509 1516 for file_node in file_iter:
1510 1517 file_path = file_node.path
1511 1518 mode = file_node.mode
1512 1519 is_link = stat.S_ISLNK(mode)
1513 1520 if mode == pygit2.GIT_FILEMODE_COMMIT:
1514 1521 log.debug('Skipping path %s as a commit node', file_path)
1515 1522 continue
1516 1523 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1517 1524
1518 1525 return store_archive_in_cache(
1519 1526 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -1,1213 +1,1217 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import binascii
19 19 import io
20 20 import logging
21 21 import stat
22 22 import sys
23 23 import urllib.request
24 24 import urllib.parse
25 25 import hashlib
26 26
27 27 from hgext import largefiles, rebase
28 28
29 29 from mercurial import commands
30 30 from mercurial import unionrepo
31 31 from mercurial import verify
32 32 from mercurial import repair
33 33 from mercurial.error import AmbiguousPrefixLookupError
34 from mercurial.utils.urlutil import path as hg_path
34 35
35 36 import vcsserver
36 37 from vcsserver import exceptions
37 38 from vcsserver.base import (
38 39 RepoFactory,
39 40 obfuscate_qs,
40 41 raise_from_original,
41 42 store_archive_in_cache,
42 43 ArchiveNode,
43 44 BytesEnvelope,
44 45 BinaryEnvelope,
45 46 )
46 47 from vcsserver.hgcompat import (
47 48 archival,
48 49 bin,
49 50 clone,
50 51 config as hgconfig,
51 52 diffopts,
52 53 hex,
53 54 get_ctx,
54 55 hg_url as url_parser,
55 56 httpbasicauthhandler,
56 57 httpdigestauthhandler,
57 makepeer,
58 make_peer,
58 59 instance,
59 60 match,
60 61 memctx,
61 62 exchange,
62 63 memfilectx,
63 64 nullrev,
64 65 hg_merge,
65 66 patch,
66 67 peer,
67 68 revrange,
68 69 ui,
69 70 hg_tag,
70 71 Abort,
71 72 LookupError,
72 73 RepoError,
73 74 RepoLookupError,
74 75 InterventionRequired,
75 76 RequirementError,
76 77 alwaysmatcher,
77 78 patternmatcher,
78 79 hgext_strip,
79 80 )
80 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
81 from vcsserver.lib.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
81 82 from vcsserver.vcs_base import RemoteBase
82 83 from vcsserver.config import hooks as hooks_config
83 84 from vcsserver.lib.exc_tracking import format_exc
84 85
85 86 log = logging.getLogger(__name__)
86 87
87 88
88 def make_ui_from_config(repo_config):
89 def make_ui_from_config(repo_config, interactive=True):
89 90
90 91 class LoggingUI(ui.ui):
91 92
92 93 def status(self, *msg, **opts):
93 94 str_msg = map(safe_str, msg)
94 95 log.info(' '.join(str_msg).rstrip('\n'))
95 96 #super(LoggingUI, self).status(*msg, **opts)
96 97
97 98 def warn(self, *msg, **opts):
98 99 str_msg = map(safe_str, msg)
99 100 log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
100 101 #super(LoggingUI, self).warn(*msg, **opts)
101 102
102 103 def error(self, *msg, **opts):
103 104 str_msg = map(safe_str, msg)
104 105 log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
105 106 #super(LoggingUI, self).error(*msg, **opts)
106 107
107 108 def note(self, *msg, **opts):
108 109 str_msg = map(safe_str, msg)
109 110 log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
110 111 #super(LoggingUI, self).note(*msg, **opts)
111 112
112 113 def debug(self, *msg, **opts):
113 114 str_msg = map(safe_str, msg)
114 115 log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
115 116 #super(LoggingUI, self).debug(*msg, **opts)
116 117
117 118 baseui = LoggingUI()
118 119
119 120 # clean the baseui object
120 121 baseui._ocfg = hgconfig.config()
121 122 baseui._ucfg = hgconfig.config()
122 123 baseui._tcfg = hgconfig.config()
123 124
124 125 for section, option, value in repo_config:
125 126 baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
126 127
127 128 # make our hgweb quiet so it doesn't print output
128 129 baseui.setconfig(b'ui', b'quiet', b'true')
129 130
130 131 baseui.setconfig(b'ui', b'paginate', b'never')
131 132 # for better Error reporting of Mercurial
132 133 baseui.setconfig(b'ui', b'message-output', b'stderr')
133 134
134 135 # force mercurial to only use 1 thread, otherwise it may try to set a
135 136 # signal in a non-main thread, thus generating a ValueError.
136 137 baseui.setconfig(b'worker', b'numcpus', 1)
137 138
138 139 # If there is no config for the largefiles extension, we explicitly disable
139 140 # it here. This overrides settings from repositories hgrc file. Recent
140 141 # mercurial versions enable largefiles in hgrc on clone from largefile
141 142 # repo.
142 143 if not baseui.hasconfig(b'extensions', b'largefiles'):
143 144 log.debug('Explicitly disable largefiles extension for repo.')
144 145 baseui.setconfig(b'extensions', b'largefiles', b'!')
145 146
147 baseui.setconfig(b'ui', b'interactive', b'true' if interactive else b'false')
146 148 return baseui
147 149
148 150
149 151 def reraise_safe_exceptions(func):
150 152 """Decorator for converting mercurial exceptions to something neutral."""
151 153
152 154 def wrapper(*args, **kwargs):
153 155 try:
154 156 return func(*args, **kwargs)
155 157 except (Abort, InterventionRequired) as e:
156 158 raise_from_original(exceptions.AbortException(e), e)
157 159 except RepoLookupError as e:
158 160 raise_from_original(exceptions.LookupException(e), e)
159 161 except RequirementError as e:
160 162 raise_from_original(exceptions.RequirementException(e), e)
161 163 except RepoError as e:
162 164 raise_from_original(exceptions.VcsException(e), e)
163 165 except LookupError as e:
164 166 raise_from_original(exceptions.LookupException(e), e)
165 167 except Exception as e:
166 168 if not hasattr(e, '_vcs_kind'):
167 169 log.exception("Unhandled exception in hg remote call")
168 170 raise_from_original(exceptions.UnhandledException(e), e)
169 171
170 172 raise
171 173 return wrapper
172 174
173 175
174 176 class MercurialFactory(RepoFactory):
175 177 repo_type = 'hg'
176 178
177 179 def _create_config(self, config, hooks=True):
178 180 if not hooks:
179 181
180 182 hooks_to_clean = {
181 183
182 184 hooks_config.HOOK_REPO_SIZE,
183 185 hooks_config.HOOK_PRE_PULL,
184 186 hooks_config.HOOK_PULL,
185 187
186 188 hooks_config.HOOK_PRE_PUSH,
187 189 # TODO: what about PRETXT, this was disabled in pre 5.0.0
188 190 hooks_config.HOOK_PRETX_PUSH,
189 191
190 192 }
191 193 new_config = []
192 194 for section, option, value in config:
193 195 if section == 'hooks' and option in hooks_to_clean:
194 196 continue
195 197 new_config.append((section, option, value))
196 198 config = new_config
197 199
198 200 baseui = make_ui_from_config(config)
199 201 return baseui
200 202
201 203 def _create_repo(self, wire, create):
202 204 baseui = self._create_config(wire["config"])
203 205 repo = instance(baseui, safe_bytes(wire["path"]), create)
204 206 log.debug('repository created: got HG object: %s', repo)
205 207 return repo
206 208
207 209 def repo(self, wire, create=False):
208 210 """
209 211 Get a repository instance for the given path.
210 212 """
211 213 return self._create_repo(wire, create)
212 214
213 215
214 216 def patch_ui_message_output(baseui):
215 217 baseui.setconfig(b'ui', b'quiet', b'false')
216 218 output = io.BytesIO()
217 219
218 220 def write(data, **unused_kwargs):
219 221 output.write(data)
220 222
221 223 baseui.status = write
222 224 baseui.write = write
223 225 baseui.warn = write
224 226 baseui.debug = write
225 227
226 228 return baseui, output
227 229
228 230
229 231 def get_obfuscated_url(url_obj):
230 232 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
231 233 url_obj.query = obfuscate_qs(url_obj.query)
232 234 obfuscated_uri = str(url_obj)
233 235 return obfuscated_uri
234 236
235 237
236 238 def normalize_url_for_hg(url: str):
237 239 _proto = None
238 240
239 241 if '+' in url[:url.find('://')]:
240 242 _proto = url[0:url.find('+')]
241 243 url = url[url.find('+') + 1:]
242 244 return url, _proto
243 245
244 246
245 247 class HgRemote(RemoteBase):
246 248
247 249 def __init__(self, factory):
248 250 self._factory = factory
249 251 self._bulk_methods = {
250 252 "affected_files": self.ctx_files,
251 253 "author": self.ctx_user,
252 254 "branch": self.ctx_branch,
253 255 "children": self.ctx_children,
254 256 "date": self.ctx_date,
255 257 "message": self.ctx_description,
256 258 "parents": self.ctx_parents,
257 259 "status": self.ctx_status,
258 260 "obsolete": self.ctx_obsolete,
259 261 "phase": self.ctx_phase,
260 262 "hidden": self.ctx_hidden,
261 263 "_file_paths": self.ctx_list,
262 264 }
263 265 self._bulk_file_methods = {
264 266 "size": self.fctx_size,
265 267 "data": self.fctx_node_data,
266 268 "flags": self.fctx_flags,
267 269 "is_binary": self.is_binary,
268 270 "md5": self.md5_hash,
269 271 }
270 272
271 273 def _get_ctx(self, repo, ref):
272 274 return get_ctx(repo, ref)
273 275
274 276 @reraise_safe_exceptions
275 277 def discover_hg_version(self):
276 278 from mercurial import util
277 279 return safe_str(util.version())
278 280
279 281 @reraise_safe_exceptions
280 282 def is_empty(self, wire):
281 283 repo = self._factory.repo(wire)
282 284
283 285 try:
284 286 return len(repo) == 0
285 287 except Exception:
286 288 log.exception("failed to read object_store")
287 289 return False
288 290
289 291 @reraise_safe_exceptions
290 292 def bookmarks(self, wire):
291 293 cache_on, context_uid, repo_id = self._cache_on(wire)
292 294 region = self._region(wire)
293 295
294 296 @region.conditional_cache_on_arguments(condition=cache_on)
295 297 def _bookmarks(_context_uid, _repo_id):
296 298 repo = self._factory.repo(wire)
297 299 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
298 300
299 301 return _bookmarks(context_uid, repo_id)
300 302
301 303 @reraise_safe_exceptions
302 304 def branches(self, wire, normal, closed):
303 305 cache_on, context_uid, repo_id = self._cache_on(wire)
304 306 region = self._region(wire)
305 307
306 308 @region.conditional_cache_on_arguments(condition=cache_on)
307 309 def _branches(_context_uid, _repo_id, _normal, _closed):
308 310 repo = self._factory.repo(wire)
309 311 iter_branches = repo.branchmap().iterbranches()
310 312 bt = {}
311 313 for branch_name, _heads, tip_node, is_closed in iter_branches:
312 314 if normal and not is_closed:
313 315 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
314 316 if closed and is_closed:
315 317 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
316 318
317 319 return bt
318 320
319 321 return _branches(context_uid, repo_id, normal, closed)
320 322
321 323 @reraise_safe_exceptions
322 324 def bulk_request(self, wire, commit_id, pre_load):
323 325 cache_on, context_uid, repo_id = self._cache_on(wire)
324 326 region = self._region(wire)
325 327
326 328 @region.conditional_cache_on_arguments(condition=cache_on)
327 329 def _bulk_request(_repo_id, _commit_id, _pre_load):
328 330 result = {}
329 331 for attr in pre_load:
330 332 try:
331 333 method = self._bulk_methods[attr]
332 334 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
333 335 result[attr] = method(wire, commit_id)
334 336 except KeyError as e:
335 337 raise exceptions.VcsException(e)(
336 338 f'Unknown bulk attribute: "{attr}"')
337 339 return result
338 340
339 341 return _bulk_request(repo_id, commit_id, sorted(pre_load))
340 342
341 343 @reraise_safe_exceptions
342 344 def ctx_branch(self, wire, commit_id):
343 345 cache_on, context_uid, repo_id = self._cache_on(wire)
344 346 region = self._region(wire)
345 347
346 348 @region.conditional_cache_on_arguments(condition=cache_on)
347 349 def _ctx_branch(_repo_id, _commit_id):
348 350 repo = self._factory.repo(wire)
349 351 ctx = self._get_ctx(repo, commit_id)
350 352 return ctx.branch()
351 353 return _ctx_branch(repo_id, commit_id)
352 354
353 355 @reraise_safe_exceptions
354 356 def ctx_date(self, wire, commit_id):
355 357 cache_on, context_uid, repo_id = self._cache_on(wire)
356 358 region = self._region(wire)
357 359
358 360 @region.conditional_cache_on_arguments(condition=cache_on)
359 361 def _ctx_date(_repo_id, _commit_id):
360 362 repo = self._factory.repo(wire)
361 363 ctx = self._get_ctx(repo, commit_id)
362 364 return ctx.date()
363 365 return _ctx_date(repo_id, commit_id)
364 366
365 367 @reraise_safe_exceptions
366 368 def ctx_description(self, wire, revision):
367 369 repo = self._factory.repo(wire)
368 370 ctx = self._get_ctx(repo, revision)
369 371 return ctx.description()
370 372
371 373 @reraise_safe_exceptions
372 374 def ctx_files(self, wire, commit_id):
373 375 cache_on, context_uid, repo_id = self._cache_on(wire)
374 376 region = self._region(wire)
375 377
376 378 @region.conditional_cache_on_arguments(condition=cache_on)
377 379 def _ctx_files(_repo_id, _commit_id):
378 380 repo = self._factory.repo(wire)
379 381 ctx = self._get_ctx(repo, commit_id)
380 382 return ctx.files()
381 383
382 384 return _ctx_files(repo_id, commit_id)
383 385
384 386 @reraise_safe_exceptions
385 387 def ctx_list(self, path, revision):
386 388 repo = self._factory.repo(path)
387 389 ctx = self._get_ctx(repo, revision)
388 390 return list(ctx)
389 391
390 392 @reraise_safe_exceptions
391 393 def ctx_parents(self, wire, commit_id):
392 394 cache_on, context_uid, repo_id = self._cache_on(wire)
393 395 region = self._region(wire)
394 396
395 397 @region.conditional_cache_on_arguments(condition=cache_on)
396 398 def _ctx_parents(_repo_id, _commit_id):
397 399 repo = self._factory.repo(wire)
398 400 ctx = self._get_ctx(repo, commit_id)
399 401 return [parent.hex() for parent in ctx.parents()
400 402 if not (parent.hidden() or parent.obsolete())]
401 403
402 404 return _ctx_parents(repo_id, commit_id)
403 405
404 406 @reraise_safe_exceptions
405 407 def ctx_children(self, wire, commit_id):
406 408 cache_on, context_uid, repo_id = self._cache_on(wire)
407 409 region = self._region(wire)
408 410
409 411 @region.conditional_cache_on_arguments(condition=cache_on)
410 412 def _ctx_children(_repo_id, _commit_id):
411 413 repo = self._factory.repo(wire)
412 414 ctx = self._get_ctx(repo, commit_id)
413 415 return [child.hex() for child in ctx.children()
414 416 if not (child.hidden() or child.obsolete())]
415 417
416 418 return _ctx_children(repo_id, commit_id)
417 419
418 420 @reraise_safe_exceptions
419 421 def ctx_phase(self, wire, commit_id):
420 422 cache_on, context_uid, repo_id = self._cache_on(wire)
421 423 region = self._region(wire)
422 424
423 425 @region.conditional_cache_on_arguments(condition=cache_on)
424 426 def _ctx_phase(_context_uid, _repo_id, _commit_id):
425 427 repo = self._factory.repo(wire)
426 428 ctx = self._get_ctx(repo, commit_id)
427 429 # public=0, draft=1, secret=3
428 430 return ctx.phase()
429 431 return _ctx_phase(context_uid, repo_id, commit_id)
430 432
431 433 @reraise_safe_exceptions
432 434 def ctx_obsolete(self, wire, commit_id):
433 435 cache_on, context_uid, repo_id = self._cache_on(wire)
434 436 region = self._region(wire)
435 437
436 438 @region.conditional_cache_on_arguments(condition=cache_on)
437 439 def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
438 440 repo = self._factory.repo(wire)
439 441 ctx = self._get_ctx(repo, commit_id)
440 442 return ctx.obsolete()
441 443 return _ctx_obsolete(context_uid, repo_id, commit_id)
442 444
443 445 @reraise_safe_exceptions
444 446 def ctx_hidden(self, wire, commit_id):
445 447 cache_on, context_uid, repo_id = self._cache_on(wire)
446 448 region = self._region(wire)
447 449
448 450 @region.conditional_cache_on_arguments(condition=cache_on)
449 451 def _ctx_hidden(_context_uid, _repo_id, _commit_id):
450 452 repo = self._factory.repo(wire)
451 453 ctx = self._get_ctx(repo, commit_id)
452 454 return ctx.hidden()
453 455 return _ctx_hidden(context_uid, repo_id, commit_id)
454 456
455 457 @reraise_safe_exceptions
456 458 def ctx_substate(self, wire, revision):
457 459 repo = self._factory.repo(wire)
458 460 ctx = self._get_ctx(repo, revision)
459 461 return ctx.substate
460 462
461 463 @reraise_safe_exceptions
462 464 def ctx_status(self, wire, revision):
463 465 repo = self._factory.repo(wire)
464 466 ctx = self._get_ctx(repo, revision)
465 467 status = repo[ctx.p1().node()].status(other=ctx.node())
466 468 # object of status (odd, custom named tuple in mercurial) is not
467 469 # correctly serializable, we make it a list, as the underling
468 470 # API expects this to be a list
469 471 return list(status)
470 472
471 473 @reraise_safe_exceptions
472 474 def ctx_user(self, wire, revision):
473 475 repo = self._factory.repo(wire)
474 476 ctx = self._get_ctx(repo, revision)
475 477 return ctx.user()
476 478
477 479 @reraise_safe_exceptions
478 480 def check_url(self, url, config):
479 481 url, _proto = normalize_url_for_hg(url)
480 482 url_obj = url_parser(safe_bytes(url))
481 483
482 484 test_uri = safe_str(url_obj.authinfo()[0])
483 485 authinfo = url_obj.authinfo()[1]
484 486 obfuscated_uri = get_obfuscated_url(url_obj)
485 487 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
486 488
487 489 handlers = []
488 490 if authinfo:
489 491 # create a password manager
490 492 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
491 493 passmgr.add_password(*convert_to_str(authinfo))
492 494
493 495 handlers.extend((httpbasicauthhandler(passmgr),
494 496 httpdigestauthhandler(passmgr)))
495 497
496 498 o = urllib.request.build_opener(*handlers)
497 499 o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
498 500 ('Accept', 'application/mercurial-0.1')]
499 501
500 502 q = {"cmd": 'between'}
501 503 q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
502 504 qs = f'?{urllib.parse.urlencode(q)}'
503 505 cu = f"{test_uri}{qs}"
504 506
505 507 try:
506 508 req = urllib.request.Request(cu, None, {})
507 509 log.debug("Trying to open URL %s", obfuscated_uri)
508 510 resp = o.open(req)
509 511 if resp.code != 200:
510 512 raise exceptions.URLError()('Return Code is not 200')
511 513 except Exception as e:
512 514 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
513 515 # means it cannot be cloned
514 516 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
515 517
516 518 # now check if it's a proper hg repo, but don't do it for svn
517 519 try:
518 520 if _proto == 'svn':
519 521 pass
520 522 else:
521 523 # check for pure hg repos
522 524 log.debug(
523 525 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
524 ui = make_ui_from_config(config)
525 peer_checker = makepeer(ui, safe_bytes(url))
526 # Create repo path with custom mercurial path object
527 ui = make_ui_from_config(config, interactive=False)
528 repo_path = hg_path(ui=ui, rawloc=safe_bytes(url))
529 peer_checker = make_peer(ui, repo_path, False)
526 530 peer_checker.lookup(b'tip')
527 531 except Exception as e:
528 532 log.warning("URL is not a valid Mercurial repository: %s",
529 533 obfuscated_uri)
530 534 raise exceptions.URLError(e)(
531 535 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
532 536
533 537 log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
534 538 return True
535 539
536 540 @reraise_safe_exceptions
537 541 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_git, opt_ignorews, context):
538 542 repo = self._factory.repo(wire)
539 543
540 544 if file_filter:
541 545 # unpack the file-filter
542 546 repo_path, node_path = file_filter
543 547 match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
544 548 else:
545 549 match_filter = file_filter
546 550 opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
547 551
548 552 try:
549 553 diff_iter = patch.diff(
550 554 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
551 555 return BytesEnvelope(b"".join(diff_iter))
552 556 except RepoLookupError as e:
553 557 raise exceptions.LookupException(e)()
554 558
555 559 @reraise_safe_exceptions
556 560 def node_history(self, wire, revision, path, limit):
557 561 cache_on, context_uid, repo_id = self._cache_on(wire)
558 562 region = self._region(wire)
559 563
560 564 @region.conditional_cache_on_arguments(condition=cache_on)
561 565 def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
562 566 repo = self._factory.repo(wire)
563 567
564 568 ctx = self._get_ctx(repo, revision)
565 569 fctx = ctx.filectx(safe_bytes(path))
566 570
567 571 def history_iter():
568 572 limit_rev = fctx.rev()
569 573
570 574 for fctx_candidate in reversed(list(fctx.filelog())):
571 575 f_obj = fctx.filectx(fctx_candidate)
572 576
573 577 # NOTE: This can be problematic...we can hide ONLY history node resulting in empty history
574 578 _ctx = f_obj.changectx()
575 579 if _ctx.hidden() or _ctx.obsolete():
576 580 continue
577 581
578 582 if limit_rev >= f_obj.rev():
579 583 yield f_obj
580 584
581 585 history = []
582 586 for cnt, obj in enumerate(history_iter()):
583 587 if limit and cnt >= limit:
584 588 break
585 589 history.append(hex(obj.node()))
586 590
587 591 return [x for x in history]
588 592 return _node_history(context_uid, repo_id, revision, path, limit)
589 593
590 594 @reraise_safe_exceptions
591 595 def node_history_until(self, wire, revision, path, limit):
592 596 cache_on, context_uid, repo_id = self._cache_on(wire)
593 597 region = self._region(wire)
594 598
595 599 @region.conditional_cache_on_arguments(condition=cache_on)
596 600 def _node_history_until(_context_uid, _repo_id):
597 601 repo = self._factory.repo(wire)
598 602 ctx = self._get_ctx(repo, revision)
599 603 fctx = ctx.filectx(safe_bytes(path))
600 604
601 605 file_log = list(fctx.filelog())
602 606 if limit:
603 607 # Limit to the last n items
604 608 file_log = file_log[-limit:]
605 609
606 610 return [hex(fctx.filectx(cs).node()) for cs in reversed(file_log)]
607 611 return _node_history_until(context_uid, repo_id, revision, path, limit)
608 612
609 613 @reraise_safe_exceptions
610 614 def bulk_file_request(self, wire, commit_id, path, pre_load):
611 615 cache_on, context_uid, repo_id = self._cache_on(wire)
612 616 region = self._region(wire)
613 617
614 618 @region.conditional_cache_on_arguments(condition=cache_on)
615 619 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
616 620 result = {}
617 621 for attr in pre_load:
618 622 try:
619 623 method = self._bulk_file_methods[attr]
620 624 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
621 625 result[attr] = method(wire, _commit_id, _path)
622 626 except KeyError as e:
623 627 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
624 628 return result
625 629
626 630 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
627 631
628 632 @reraise_safe_exceptions
629 633 def fctx_annotate(self, wire, revision, path):
630 634 repo = self._factory.repo(wire)
631 635 ctx = self._get_ctx(repo, revision)
632 636 fctx = ctx.filectx(safe_bytes(path))
633 637
634 638 result = []
635 639 for i, annotate_obj in enumerate(fctx.annotate(), 1):
636 640 ln_no = i
637 641 sha = hex(annotate_obj.fctx.node())
638 642 content = annotate_obj.text
639 643 result.append((ln_no, ascii_str(sha), content))
640 644 return BinaryEnvelope(result)
641 645
642 646 @reraise_safe_exceptions
643 647 def fctx_node_data(self, wire, revision, path):
644 648 repo = self._factory.repo(wire)
645 649 ctx = self._get_ctx(repo, revision)
646 650 fctx = ctx.filectx(safe_bytes(path))
647 651 return BytesEnvelope(fctx.data())
648 652
649 653 @reraise_safe_exceptions
650 654 def fctx_flags(self, wire, commit_id, path):
651 655 cache_on, context_uid, repo_id = self._cache_on(wire)
652 656 region = self._region(wire)
653 657
654 658 @region.conditional_cache_on_arguments(condition=cache_on)
655 659 def _fctx_flags(_repo_id, _commit_id, _path):
656 660 repo = self._factory.repo(wire)
657 661 ctx = self._get_ctx(repo, commit_id)
658 662 fctx = ctx.filectx(safe_bytes(path))
659 663 return fctx.flags()
660 664
661 665 return _fctx_flags(repo_id, commit_id, path)
662 666
663 667 @reraise_safe_exceptions
664 668 def fctx_size(self, wire, commit_id, path):
665 669 cache_on, context_uid, repo_id = self._cache_on(wire)
666 670 region = self._region(wire)
667 671
668 672 @region.conditional_cache_on_arguments(condition=cache_on)
669 673 def _fctx_size(_repo_id, _revision, _path):
670 674 repo = self._factory.repo(wire)
671 675 ctx = self._get_ctx(repo, commit_id)
672 676 fctx = ctx.filectx(safe_bytes(path))
673 677 return fctx.size()
674 678 return _fctx_size(repo_id, commit_id, path)
675 679
676 680 @reraise_safe_exceptions
677 681 def get_all_commit_ids(self, wire, name):
678 682 cache_on, context_uid, repo_id = self._cache_on(wire)
679 683 region = self._region(wire)
680 684
681 685 @region.conditional_cache_on_arguments(condition=cache_on)
682 686 def _get_all_commit_ids(_context_uid, _repo_id, _name):
683 687 repo = self._factory.repo(wire)
684 688 revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
685 689 return revs
686 690 return _get_all_commit_ids(context_uid, repo_id, name)
687 691
688 692 @reraise_safe_exceptions
689 693 def get_config_value(self, wire, section, name, untrusted=False):
690 694 repo = self._factory.repo(wire)
691 695 return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
692 696
693 697 @reraise_safe_exceptions
694 698 def is_large_file(self, wire, commit_id, path):
695 699 cache_on, context_uid, repo_id = self._cache_on(wire)
696 700 region = self._region(wire)
697 701
698 702 @region.conditional_cache_on_arguments(condition=cache_on)
699 703 def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
700 704 return largefiles.lfutil.isstandin(safe_bytes(path))
701 705
702 706 return _is_large_file(context_uid, repo_id, commit_id, path)
703 707
704 708 @reraise_safe_exceptions
705 709 def is_binary(self, wire, revision, path):
706 710 cache_on, context_uid, repo_id = self._cache_on(wire)
707 711 region = self._region(wire)
708 712
709 713 @region.conditional_cache_on_arguments(condition=cache_on)
710 714 def _is_binary(_repo_id, _sha, _path):
711 715 repo = self._factory.repo(wire)
712 716 ctx = self._get_ctx(repo, revision)
713 717 fctx = ctx.filectx(safe_bytes(path))
714 718 return fctx.isbinary()
715 719
716 720 return _is_binary(repo_id, revision, path)
717 721
718 722 @reraise_safe_exceptions
719 723 def md5_hash(self, wire, revision, path):
720 724 cache_on, context_uid, repo_id = self._cache_on(wire)
721 725 region = self._region(wire)
722 726
723 727 @region.conditional_cache_on_arguments(condition=cache_on)
724 728 def _md5_hash(_repo_id, _sha, _path):
725 729 repo = self._factory.repo(wire)
726 730 ctx = self._get_ctx(repo, revision)
727 731 fctx = ctx.filectx(safe_bytes(path))
728 732 return hashlib.md5(fctx.data()).hexdigest()
729 733
730 734 return _md5_hash(repo_id, revision, path)
731 735
732 736 @reraise_safe_exceptions
733 737 def in_largefiles_store(self, wire, sha):
734 738 repo = self._factory.repo(wire)
735 739 return largefiles.lfutil.instore(repo, sha)
736 740
737 741 @reraise_safe_exceptions
738 742 def in_user_cache(self, wire, sha):
739 743 repo = self._factory.repo(wire)
740 744 return largefiles.lfutil.inusercache(repo.ui, sha)
741 745
742 746 @reraise_safe_exceptions
743 747 def store_path(self, wire, sha):
744 748 repo = self._factory.repo(wire)
745 749 return largefiles.lfutil.storepath(repo, sha)
746 750
747 751 @reraise_safe_exceptions
748 752 def link(self, wire, sha, path):
749 753 repo = self._factory.repo(wire)
750 754 largefiles.lfutil.link(
751 755 largefiles.lfutil.usercachepath(repo.ui, sha), path)
752 756
753 757 @reraise_safe_exceptions
754 758 def localrepository(self, wire, create=False):
755 759 self._factory.repo(wire, create=create)
756 760
757 761 @reraise_safe_exceptions
758 762 def lookup(self, wire, revision, both):
759 763 cache_on, context_uid, repo_id = self._cache_on(wire)
760 764 region = self._region(wire)
761 765
762 766 @region.conditional_cache_on_arguments(condition=cache_on)
763 767 def _lookup(_context_uid, _repo_id, _revision, _both):
764 768 repo = self._factory.repo(wire)
765 769 rev = _revision
766 770 if isinstance(rev, int):
767 771 # NOTE(marcink):
768 772 # since Mercurial doesn't support negative indexes properly
769 773 # we need to shift accordingly by one to get proper index, e.g
770 774 # repo[-1] => repo[-2]
771 775 # repo[0] => repo[-1]
772 776 if rev <= 0:
773 777 rev = rev + -1
774 778 try:
775 779 ctx = self._get_ctx(repo, rev)
776 780 except AmbiguousPrefixLookupError:
777 781 e = RepoLookupError(rev)
778 782 e._org_exc_tb = format_exc(sys.exc_info())
779 783 raise exceptions.LookupException(e)(rev)
780 784 except (TypeError, RepoLookupError, binascii.Error) as e:
781 785 e._org_exc_tb = format_exc(sys.exc_info())
782 786 raise exceptions.LookupException(e)(rev)
783 787 except LookupError as e:
784 788 e._org_exc_tb = format_exc(sys.exc_info())
785 789 raise exceptions.LookupException(e)(e.name)
786 790
787 791 if not both:
788 792 return ctx.hex()
789 793
790 794 ctx = repo[ctx.hex()]
791 795 return ctx.hex(), ctx.rev()
792 796
793 797 return _lookup(context_uid, repo_id, revision, both)
794 798
795 799 @reraise_safe_exceptions
796 800 def sync_push(self, wire, url):
797 801 if not self.check_url(url, wire['config']):
798 802 return
799 803
800 804 repo = self._factory.repo(wire)
801 805
802 806 # Disable any prompts for this repo
803 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
807 repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
804 808
805 809 bookmarks = list(dict(repo._bookmarks).keys())
806 810 remote = peer(repo, {}, safe_bytes(url))
807 811 # Disable any prompts for this remote
808 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
812 remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
809 813
810 814 return exchange.push(
811 815 repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
812 816
813 817 @reraise_safe_exceptions
814 818 def revision(self, wire, rev):
815 819 repo = self._factory.repo(wire)
816 820 ctx = self._get_ctx(repo, rev)
817 821 return ctx.rev()
818 822
819 823 @reraise_safe_exceptions
820 824 def rev_range(self, wire, commit_filter):
821 825 cache_on, context_uid, repo_id = self._cache_on(wire)
822 826 region = self._region(wire)
823 827
824 828 @region.conditional_cache_on_arguments(condition=cache_on)
825 829 def _rev_range(_context_uid, _repo_id, _filter):
826 830 repo = self._factory.repo(wire)
827 831 revisions = [
828 832 ascii_str(repo[rev].hex())
829 833 for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
830 834 ]
831 835 return revisions
832 836
833 837 return _rev_range(context_uid, repo_id, sorted(commit_filter))
834 838
835 839 @reraise_safe_exceptions
836 840 def rev_range_hash(self, wire, node):
837 841 repo = self._factory.repo(wire)
838 842
839 843 def get_revs(repo, rev_opt):
840 844 if rev_opt:
841 845 revs = revrange(repo, rev_opt)
842 846 if len(revs) == 0:
843 847 return (nullrev, nullrev)
844 848 return max(revs), min(revs)
845 849 else:
846 850 return len(repo) - 1, 0
847 851
848 852 stop, start = get_revs(repo, [node + ':'])
849 853 revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
850 854 return revs
851 855
852 856 @reraise_safe_exceptions
853 857 def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
854 858 org_path = safe_bytes(wire["path"])
855 859 other_path = safe_bytes(kwargs.pop('other_path', ''))
856 860
857 861 # case when we want to compare two independent repositories
858 862 if other_path and other_path != wire["path"]:
859 863 baseui = self._factory._create_config(wire["config"])
860 864 repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
861 865 else:
862 866 repo = self._factory.repo(wire)
863 867 return list(repo.revs(rev_spec, *args))
864 868
865 869 @reraise_safe_exceptions
866 870 def verify(self, wire,):
867 871 repo = self._factory.repo(wire)
868 872 baseui = self._factory._create_config(wire['config'])
869 873
870 874 baseui, output = patch_ui_message_output(baseui)
871 875
872 876 repo.ui = baseui
873 877 verify.verify(repo)
874 878 return output.getvalue()
875 879
876 880 @reraise_safe_exceptions
877 881 def hg_update_cache(self, wire,):
878 882 repo = self._factory.repo(wire)
879 883 baseui = self._factory._create_config(wire['config'])
880 884 baseui, output = patch_ui_message_output(baseui)
881 885
882 886 repo.ui = baseui
883 887 with repo.wlock(), repo.lock():
884 888 repo.updatecaches(full=True)
885 889
886 890 return output.getvalue()
887 891
888 892 @reraise_safe_exceptions
889 893 def hg_rebuild_fn_cache(self, wire,):
890 894 repo = self._factory.repo(wire)
891 895 baseui = self._factory._create_config(wire['config'])
892 896 baseui, output = patch_ui_message_output(baseui)
893 897
894 898 repo.ui = baseui
895 899
896 900 repair.rebuildfncache(baseui, repo)
897 901
898 902 return output.getvalue()
899 903
900 904 @reraise_safe_exceptions
901 905 def tags(self, wire):
902 906 cache_on, context_uid, repo_id = self._cache_on(wire)
903 907 region = self._region(wire)
904 908
905 909 @region.conditional_cache_on_arguments(condition=cache_on)
906 910 def _tags(_context_uid, _repo_id):
907 911 repo = self._factory.repo(wire)
908 912 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
909 913
910 914 return _tags(context_uid, repo_id)
911 915
912 916 @reraise_safe_exceptions
913 917 def update(self, wire, node='', clean=False):
914 918 repo = self._factory.repo(wire)
915 919 baseui = self._factory._create_config(wire['config'])
916 920 node = safe_bytes(node)
917 921
918 922 commands.update(baseui, repo, node=node, clean=clean)
919 923
920 924 @reraise_safe_exceptions
921 925 def identify(self, wire):
922 926 repo = self._factory.repo(wire)
923 927 baseui = self._factory._create_config(wire['config'])
924 928 output = io.BytesIO()
925 929 baseui.write = output.write
926 930 # This is required to get a full node id
927 931 baseui.debugflag = True
928 932 commands.identify(baseui, repo, id=True)
929 933
930 934 return output.getvalue()
931 935
932 936 @reraise_safe_exceptions
933 937 def heads(self, wire, branch=None):
934 938 repo = self._factory.repo(wire)
935 939 baseui = self._factory._create_config(wire['config'])
936 940 output = io.BytesIO()
937 941
938 942 def write(data, **unused_kwargs):
939 943 output.write(data)
940 944
941 945 baseui.write = write
942 946 if branch:
943 947 args = [safe_bytes(branch)]
944 948 else:
945 949 args = []
946 950 commands.heads(baseui, repo, template=b'{node} ', *args)
947 951
948 952 return output.getvalue()
949 953
950 954 @reraise_safe_exceptions
951 955 def ancestor(self, wire, revision1, revision2):
952 956 repo = self._factory.repo(wire)
953 957 changelog = repo.changelog
954 958 lookup = repo.lookup
955 959 a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
956 960 return hex(a)
957 961
958 962 @reraise_safe_exceptions
959 963 def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
960 964 baseui = self._factory._create_config(wire["config"], hooks=hooks)
961 965 clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
962 966
963 967 @reraise_safe_exceptions
964 968 def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
965 969
966 970 repo = self._factory.repo(wire)
967 971 baseui = self._factory._create_config(wire['config'])
968 972 publishing = baseui.configbool(b'phases', b'publish')
969 973
970 974 def _filectxfn(_repo, ctx, path: bytes):
971 975 """
972 976 Marks given path as added/changed/removed in a given _repo. This is
973 977 for internal mercurial commit function.
974 978 """
975 979
976 980 # check if this path is removed
977 981 if safe_str(path) in removed:
978 982 # returning None is a way to mark node for removal
979 983 return None
980 984
981 985 # check if this path is added
982 986 for node in updated:
983 987 if safe_bytes(node['path']) == path:
984 988 return memfilectx(
985 989 _repo,
986 990 changectx=ctx,
987 991 path=safe_bytes(node['path']),
988 992 data=safe_bytes(node['content']),
989 993 islink=False,
990 994 isexec=bool(node['mode'] & stat.S_IXUSR),
991 995 copysource=False)
992 996 abort_exc = exceptions.AbortException()
993 997 raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
994 998
995 999 if publishing:
996 1000 new_commit_phase = b'public'
997 1001 else:
998 1002 new_commit_phase = b'draft'
999 1003 with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
1000 1004 kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
1001 1005 commit_ctx = memctx(
1002 1006 repo=repo,
1003 1007 parents=parents,
1004 1008 text=safe_bytes(message),
1005 1009 files=[safe_bytes(x) for x in files],
1006 1010 filectxfn=_filectxfn,
1007 1011 user=safe_bytes(user),
1008 1012 date=(commit_time, commit_timezone),
1009 1013 extra=kwargs)
1010 1014
1011 1015 n = repo.commitctx(commit_ctx)
1012 1016 new_id = hex(n)
1013 1017
1014 1018 return new_id
1015 1019
1016 1020 @reraise_safe_exceptions
1017 1021 def pull(self, wire, url, commit_ids=None):
1018 1022 repo = self._factory.repo(wire)
1019 1023 # Disable any prompts for this repo
1020 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
1024 repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
1021 1025
1022 1026 remote = peer(repo, {}, safe_bytes(url))
1023 1027 # Disable any prompts for this remote
1024 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
1028 remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
1025 1029
1026 1030 if commit_ids:
1027 1031 commit_ids = [bin(commit_id) for commit_id in commit_ids]
1028 1032
1029 1033 return exchange.pull(
1030 1034 repo, remote, heads=commit_ids, force=None).cgresult
1031 1035
1032 1036 @reraise_safe_exceptions
1033 1037 def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
1034 1038 repo = self._factory.repo(wire)
1035 1039 baseui = self._factory._create_config(wire['config'], hooks=hooks)
1036 1040
1037 1041 source = safe_bytes(source)
1038 1042
1039 1043 # Mercurial internally has a lot of logic that checks ONLY if
1040 1044 # option is defined, we just pass those if they are defined then
1041 opts = {}
1045 opts = {"remote_hidden": False}
1042 1046
1043 1047 if bookmark:
1044 1048 opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
1045 1049 if isinstance(bookmark, list) else safe_bytes(bookmark)
1046 1050
1047 1051 if branch:
1048 1052 opts['branch'] = [safe_bytes(x) for x in branch] \
1049 1053 if isinstance(branch, list) else safe_bytes(branch)
1050 1054
1051 1055 if revision:
1052 1056 opts['rev'] = [safe_bytes(x) for x in revision] \
1053 1057 if isinstance(revision, list) else safe_bytes(revision)
1054 1058
1055 1059 commands.pull(baseui, repo, source, **opts)
1056 1060
1057 1061 @reraise_safe_exceptions
1058 1062 def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
1059 1063 repo = self._factory.repo(wire)
1060 1064 baseui = self._factory._create_config(wire['config'], hooks=hooks)
1061 1065
1062 1066 revisions = [safe_bytes(x) for x in revisions] \
1063 1067 if isinstance(revisions, list) else safe_bytes(revisions)
1064 1068
1065 1069 commands.push(baseui, repo, safe_bytes(dest_path),
1066 1070 rev=revisions,
1067 1071 new_branch=push_branches)
1068 1072
1069 1073 @reraise_safe_exceptions
1070 1074 def strip(self, wire, revision, update, backup):
1071 1075 repo = self._factory.repo(wire)
1072 1076 ctx = self._get_ctx(repo, revision)
1073 1077 hgext_strip.strip(
1074 1078 repo.baseui, repo, ctx.node(), update=update, backup=backup)
1075 1079
1076 1080 @reraise_safe_exceptions
1077 1081 def get_unresolved_files(self, wire):
1078 1082 repo = self._factory.repo(wire)
1079 1083
1080 1084 log.debug('Calculating unresolved files for repo: %s', repo)
1081 1085 output = io.BytesIO()
1082 1086
1083 1087 def write(data, **unused_kwargs):
1084 1088 output.write(data)
1085 1089
1086 1090 baseui = self._factory._create_config(wire['config'])
1087 1091 baseui.write = write
1088 1092
1089 1093 commands.resolve(baseui, repo, list=True)
1090 1094 unresolved = output.getvalue().splitlines(0)
1091 1095 return unresolved
1092 1096
1093 1097 @reraise_safe_exceptions
1094 1098 def merge(self, wire, revision):
1095 1099 repo = self._factory.repo(wire)
1096 1100 baseui = self._factory._create_config(wire['config'])
1097 1101 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1098 1102
1099 1103 # In case of sub repositories are used mercurial prompts the user in
1100 1104 # case of merge conflicts or different sub repository sources. By
1101 1105 # setting the interactive flag to `False` mercurial doesn't prompt the
1102 1106 # used but instead uses a default value.
1103 repo.ui.setconfig(b'ui', b'interactive', False)
1107 repo.ui.setconfig(b'ui', b'interactive', b'false')
1104 1108 commands.merge(baseui, repo, rev=safe_bytes(revision))
1105 1109
1106 1110 @reraise_safe_exceptions
1107 1111 def merge_state(self, wire):
1108 1112 repo = self._factory.repo(wire)
1109 1113 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1110 1114
1111 1115 # In case of sub repositories are used mercurial prompts the user in
1112 1116 # case of merge conflicts or different sub repository sources. By
1113 1117 # setting the interactive flag to `False` mercurial doesn't prompt the
1114 1118 # used but instead uses a default value.
1115 repo.ui.setconfig(b'ui', b'interactive', False)
1119 repo.ui.setconfig(b'ui', b'interactive', b'false')
1116 1120 ms = hg_merge.mergestate(repo)
1117 1121 return [x for x in ms.unresolved()]
1118 1122
1119 1123 @reraise_safe_exceptions
1120 1124 def commit(self, wire, message, username, close_branch=False):
1121 1125 repo = self._factory.repo(wire)
1122 1126 baseui = self._factory._create_config(wire['config'])
1123 1127 repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
1124 1128 commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
1125 1129
1126 1130 @reraise_safe_exceptions
1127 1131 def rebase(self, wire, source='', dest='', abort=False):
1128 1132
1129 1133 repo = self._factory.repo(wire)
1130 1134 baseui = self._factory._create_config(wire['config'])
1131 1135 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1132 1136 # In case of sub repositories are used mercurial prompts the user in
1133 1137 # case of merge conflicts or different sub repository sources. By
1134 1138 # setting the interactive flag to `False` mercurial doesn't prompt the
1135 1139 # used but instead uses a default value.
1136 repo.ui.setconfig(b'ui', b'interactive', False)
1140 repo.ui.setconfig(b'ui', b'interactive', b'false')
1137 1141
1138 1142 rebase_kws = dict(
1139 1143 keep=not abort,
1140 1144 abort=abort
1141 1145 )
1142 1146
1143 1147 if source:
1144 1148 source = repo[source]
1145 1149 rebase_kws['base'] = [source.hex()]
1146 1150 if dest:
1147 1151 dest = repo[dest]
1148 1152 rebase_kws['dest'] = dest.hex()
1149 1153
1150 1154 rebase.rebase(baseui, repo, **rebase_kws)
1151 1155
1152 1156 @reraise_safe_exceptions
1153 1157 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
1154 1158 repo = self._factory.repo(wire)
1155 1159 ctx = self._get_ctx(repo, revision)
1156 1160 node = ctx.node()
1157 1161
1158 1162 date = (tag_time, tag_timezone)
1159 1163 try:
1160 1164 hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
1161 1165 except Abort as e:
1162 1166 log.exception("Tag operation aborted")
1163 1167 # Exception can contain unicode which we convert
1164 1168 raise exceptions.AbortException(e)(repr(e))
1165 1169
1166 1170 @reraise_safe_exceptions
1167 1171 def bookmark(self, wire, bookmark, revision=''):
1168 1172 repo = self._factory.repo(wire)
1169 1173 baseui = self._factory._create_config(wire['config'])
1170 1174 revision = revision or ''
1171 1175 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1172 1176
1173 1177 @reraise_safe_exceptions
1174 1178 def install_hooks(self, wire, force=False):
1175 1179 # we don't need any special hooks for Mercurial
1176 1180 pass
1177 1181
1178 1182 @reraise_safe_exceptions
1179 1183 def get_hooks_info(self, wire):
1180 1184 return {
1181 1185 'pre_version': vcsserver.get_version(),
1182 1186 'post_version': vcsserver.get_version(),
1183 1187 }
1184 1188
1185 1189 @reraise_safe_exceptions
1186 1190 def set_head_ref(self, wire, head_name):
1187 1191 pass
1188 1192
1189 1193 @reraise_safe_exceptions
1190 1194 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1191 1195 archive_dir_name, commit_id, cache_config):
1192 1196
1193 1197 def file_walker(_commit_id, path):
1194 1198 repo = self._factory.repo(wire)
1195 1199 ctx = repo[_commit_id]
1196 1200 is_root = path in ['', '/']
1197 1201 if is_root:
1198 1202 matcher = alwaysmatcher(badfn=None)
1199 1203 else:
1200 1204 matcher = patternmatcher('', [(b'glob', safe_bytes(path)+b'/**', b'')], badfn=None)
1201 1205 file_iter = ctx.manifest().walk(matcher)
1202 1206
1203 1207 for fn in file_iter:
1204 1208 file_path = fn
1205 1209 flags = ctx.flags(fn)
1206 1210 mode = b'x' in flags and 0o755 or 0o644
1207 1211 is_link = b'l' in flags
1208 1212
1209 1213 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1210 1214
1211 1215 return store_archive_in_cache(
1212 1216 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1213 1217
@@ -1,954 +1,959 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import os
20 20 import subprocess
21 21 from urllib.error import URLError
22 22 import urllib.parse
23 23 import logging
24 24 import posixpath as vcspath
25 25 import io
26 26 import urllib.request
27 27 import urllib.parse
28 28 import urllib.error
29 29 import traceback
30 30
31
32 31 import svn.client # noqa
33 32 import svn.core # noqa
34 33 import svn.delta # noqa
35 34 import svn.diff # noqa
36 35 import svn.fs # noqa
37 36 import svn.repos # noqa
38 37
39 38 import rhodecode
40 39 from vcsserver import svn_diff, exceptions, subprocessio, settings
41 40 from vcsserver.base import (
42 41 RepoFactory,
43 42 raise_from_original,
44 43 ArchiveNode,
45 44 store_archive_in_cache,
46 45 BytesEnvelope,
47 46 BinaryEnvelope,
48 47 )
49 48 from vcsserver.exceptions import NoContentException
50 from vcsserver.str_utils import safe_str, safe_bytes
51 from vcsserver.type_utils import assert_bytes
52 49 from vcsserver.vcs_base import RemoteBase
50 from vcsserver.lib.str_utils import safe_str, safe_bytes
51 from vcsserver.lib.type_utils import assert_bytes
53 52 from vcsserver.lib.svnremoterepo import svnremoterepo
53 from vcsserver.lib.svn_txn_utils import store_txn_id_data
54 54
55 55 log = logging.getLogger(__name__)
56 56
57 57
58 58 svn_compatible_versions_map = {
59 59 'pre-1.4-compatible': '1.3',
60 60 'pre-1.5-compatible': '1.4',
61 61 'pre-1.6-compatible': '1.5',
62 62 'pre-1.8-compatible': '1.7',
63 63 'pre-1.9-compatible': '1.8',
64 64 }
65 65
66 66 current_compatible_version = '1.14'
67 67
68 68
69 69 def reraise_safe_exceptions(func):
70 70 """Decorator for converting svn exceptions to something neutral."""
71 71 def wrapper(*args, **kwargs):
72 72 try:
73 73 return func(*args, **kwargs)
74 74 except Exception as e:
75 75 if not hasattr(e, '_vcs_kind'):
76 76 log.exception("Unhandled exception in svn remote call")
77 77 raise_from_original(exceptions.UnhandledException(e), e)
78 78 raise
79 79 return wrapper
80 80
81 81
82 82 class SubversionFactory(RepoFactory):
83 83 repo_type = 'svn'
84 84
85 85 def _create_repo(self, wire, create, compatible_version):
86 86 path = svn.core.svn_path_canonicalize(wire['path'])
87 87 if create:
88 88 fs_config = {'compatible-version': current_compatible_version}
89 89 if compatible_version:
90 90
91 91 compatible_version_string = \
92 92 svn_compatible_versions_map.get(compatible_version) \
93 93 or compatible_version
94 94 fs_config['compatible-version'] = compatible_version_string
95 95
96 96 log.debug('Create SVN repo with config `%s`', fs_config)
97 97 repo = svn.repos.create(path, "", "", None, fs_config)
98 98 else:
99 99 repo = svn.repos.open(path)
100 100
101 101 log.debug('repository created: got SVN object: %s', repo)
102 102 return repo
103 103
104 104 def repo(self, wire, create=False, compatible_version=None):
105 105 """
106 106 Get a repository instance for the given path.
107 107 """
108 108 return self._create_repo(wire, create, compatible_version)
109 109
110 110
111 111 NODE_TYPE_MAPPING = {
112 112 svn.core.svn_node_file: 'file',
113 113 svn.core.svn_node_dir: 'dir',
114 114 }
115 115
116 116
117 117 class SvnRemote(RemoteBase):
118 118
119 119 def __init__(self, factory, hg_factory=None):
120 120 self._factory = factory
121 121
122 122 self._bulk_methods = {
123 123 # NOT supported in SVN ATM...
124 124 }
125 125 self._bulk_file_methods = {
126 126 "size": self.get_file_size,
127 127 "data": self.get_file_content,
128 128 "flags": self.get_node_type,
129 129 "is_binary": self.is_binary,
130 130 "md5": self.md5_hash
131 131 }
132 132
133 133 @reraise_safe_exceptions
134 134 def bulk_file_request(self, wire, commit_id, path, pre_load):
135 135 cache_on, context_uid, repo_id = self._cache_on(wire)
136 136 region = self._region(wire)
137 137
138 138 # since we use unified API, we need to cast from str to in for SVN
139 139 commit_id = int(commit_id)
140 140
141 141 @region.conditional_cache_on_arguments(condition=cache_on)
142 142 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
143 143 result = {}
144 144 for attr in pre_load:
145 145 try:
146 146 method = self._bulk_file_methods[attr]
147 147 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
148 148 result[attr] = method(wire, _commit_id, _path)
149 149 except KeyError as e:
150 150 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
151 151 return result
152 152
153 153 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
154 154
155 155 @reraise_safe_exceptions
156 156 def discover_svn_version(self):
157 157 try:
158 158 import svn.core
159 159 svn_ver = svn.core.SVN_VERSION
160 160 except ImportError:
161 161 svn_ver = None
162 162 return safe_str(svn_ver)
163 163
164 164 @reraise_safe_exceptions
165 165 def is_empty(self, wire):
166 166 try:
167 167 return self.lookup(wire, -1) == 0
168 168 except Exception:
169 169 log.exception("failed to read object_store")
170 170 return False
171 171
172 172 def check_url(self, url, config):
173 173
174 174 # uuid function gets only valid UUID from proper repo, else
175 175 # throws exception
176 176 username, password, src_url = self.get_url_and_credentials(url)
177 177 try:
178 178 svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
179 179 except Exception:
180 180 tb = traceback.format_exc()
181 181 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
182 182 raise URLError(f'"{url}" is not a valid Subversion source url.')
183 183 return True
184 184
185 185 def is_path_valid_repository(self, wire, path):
186 186 # NOTE(marcink): short circuit the check for SVN repo
187 187 # the repos.open might be expensive to check, but we have one cheap
188 188 # pre-condition that we can use, to check for 'format' file
189 189 if not os.path.isfile(os.path.join(path, 'format')):
190 190 return False
191 191
192 192 cache_on, context_uid, repo_id = self._cache_on(wire)
193 193 region = self._region(wire)
194 194
195 195 @region.conditional_cache_on_arguments(condition=cache_on)
196 196 def _assert_correct_path(_context_uid, _repo_id, fast_check):
197 197
198 198 try:
199 199 svn.repos.open(path)
200 200 except svn.core.SubversionException:
201 201 tb = traceback.format_exc()
202 202 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
203 203 return False
204 204 return True
205 205
206 206 return _assert_correct_path(context_uid, repo_id, True)
207 207
208 208 @reraise_safe_exceptions
209 209 def verify(self, wire,):
210 210 repo_path = wire['path']
211 211 if not self.is_path_valid_repository(wire, repo_path):
212 212 raise Exception(
213 213 f"Path {repo_path} is not a valid Subversion repository.")
214 214
215 215 cmd = ['svnadmin', 'info', repo_path]
216 216 stdout, stderr = subprocessio.run_command(cmd)
217 217 return stdout
218 218
219 219 @reraise_safe_exceptions
220 220 def lookup(self, wire, revision):
221 221 if revision not in [-1, None, 'HEAD']:
222 222 raise NotImplementedError
223 223 repo = self._factory.repo(wire)
224 224 fs_ptr = svn.repos.fs(repo)
225 225 head = svn.fs.youngest_rev(fs_ptr)
226 226 return head
227 227
228 228 @reraise_safe_exceptions
229 229 def lookup_interval(self, wire, start_ts, end_ts):
230 230 repo = self._factory.repo(wire)
231 231 fsobj = svn.repos.fs(repo)
232 232 start_rev = None
233 233 end_rev = None
234 234 if start_ts:
235 235 start_ts_svn = apr_time_t(start_ts)
236 236 start_rev = svn.repos.dated_revision(repo, start_ts_svn) + 1
237 237 else:
238 238 start_rev = 1
239 239 if end_ts:
240 240 end_ts_svn = apr_time_t(end_ts)
241 241 end_rev = svn.repos.dated_revision(repo, end_ts_svn)
242 242 else:
243 243 end_rev = svn.fs.youngest_rev(fsobj)
244 244 return start_rev, end_rev
245 245
246 246 @reraise_safe_exceptions
247 247 def revision_properties(self, wire, revision):
248 248
249 249 cache_on, context_uid, repo_id = self._cache_on(wire)
250 250 region = self._region(wire)
251 251
252 252 @region.conditional_cache_on_arguments(condition=cache_on)
253 253 def _revision_properties(_repo_id, _revision):
254 254 repo = self._factory.repo(wire)
255 255 fs_ptr = svn.repos.fs(repo)
256 256 return svn.fs.revision_proplist(fs_ptr, revision)
257 257 return _revision_properties(repo_id, revision)
258 258
259 259 def revision_changes(self, wire, revision):
260 260
261 261 repo = self._factory.repo(wire)
262 262 fsobj = svn.repos.fs(repo)
263 263 rev_root = svn.fs.revision_root(fsobj, revision)
264 264
265 265 editor = svn.repos.ChangeCollector(fsobj, rev_root)
266 266 editor_ptr, editor_baton = svn.delta.make_editor(editor)
267 267 base_dir = ""
268 268 send_deltas = False
269 269 svn.repos.replay2(
270 270 rev_root, base_dir, svn.core.SVN_INVALID_REVNUM, send_deltas,
271 271 editor_ptr, editor_baton, None)
272 272
273 273 added = []
274 274 changed = []
275 275 removed = []
276 276
277 277 # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
278 278 for path, change in editor.changes.items():
279 279 # TODO: Decide what to do with directory nodes. Subversion can add
280 280 # empty directories.
281 281
282 282 if change.item_kind == svn.core.svn_node_dir:
283 283 continue
284 284 if change.action in [svn.repos.CHANGE_ACTION_ADD]:
285 285 added.append(path)
286 286 elif change.action in [svn.repos.CHANGE_ACTION_MODIFY,
287 287 svn.repos.CHANGE_ACTION_REPLACE]:
288 288 changed.append(path)
289 289 elif change.action in [svn.repos.CHANGE_ACTION_DELETE]:
290 290 removed.append(path)
291 291 else:
292 292 raise NotImplementedError(
293 293 "Action {} not supported on path {}".format(
294 294 change.action, path))
295 295
296 296 changes = {
297 297 'added': added,
298 298 'changed': changed,
299 299 'removed': removed,
300 300 }
301 301 return changes
302 302
303 303 @reraise_safe_exceptions
304 304 def node_history(self, wire, path, revision, limit):
305 305 cache_on, context_uid, repo_id = self._cache_on(wire)
306 306 region = self._region(wire)
307 307
308 308 @region.conditional_cache_on_arguments(condition=cache_on)
309 309 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
310 310 cross_copies = False
311 311 repo = self._factory.repo(wire)
312 312 fsobj = svn.repos.fs(repo)
313 313 rev_root = svn.fs.revision_root(fsobj, revision)
314 314
315 315 history_revisions = []
316 316 history = svn.fs.node_history(rev_root, path)
317 317 history = svn.fs.history_prev(history, cross_copies)
318 318 while history:
319 319 __, node_revision = svn.fs.history_location(history)
320 320 history_revisions.append(node_revision)
321 321 if limit and len(history_revisions) >= limit:
322 322 break
323 323 history = svn.fs.history_prev(history, cross_copies)
324 324 return history_revisions
325 325 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
326 326
327 327 @reraise_safe_exceptions
328 328 def node_properties(self, wire, path, revision):
329 329 cache_on, context_uid, repo_id = self._cache_on(wire)
330 330 region = self._region(wire)
331 331
332 332 @region.conditional_cache_on_arguments(condition=cache_on)
333 333 def _node_properties(_repo_id, _path, _revision):
334 334 repo = self._factory.repo(wire)
335 335 fsobj = svn.repos.fs(repo)
336 336 rev_root = svn.fs.revision_root(fsobj, revision)
337 337 return svn.fs.node_proplist(rev_root, path)
338 338 return _node_properties(repo_id, path, revision)
339 339
340 340 def file_annotate(self, wire, path, revision):
341 341 abs_path = 'file://' + urllib.request.pathname2url(
342 342 vcspath.join(wire['path'], path))
343 343 file_uri = svn.core.svn_path_canonicalize(abs_path)
344 344
345 345 start_rev = svn_opt_revision_value_t(0)
346 346 peg_rev = svn_opt_revision_value_t(revision)
347 347 end_rev = peg_rev
348 348
349 349 annotations = []
350 350
351 351 def receiver(line_no, revision, author, date, line, pool):
352 352 annotations.append((line_no, revision, line))
353 353
354 354 # TODO: Cannot use blame5, missing typemap function in the swig code
355 355 try:
356 356 svn.client.blame2(
357 357 file_uri, peg_rev, start_rev, end_rev,
358 358 receiver, svn.client.create_context())
359 359 except svn.core.SubversionException as exc:
360 360 log.exception("Error during blame operation.")
361 361 raise Exception(
362 362 f"Blame not supported or file does not exist at path {path}. "
363 363 f"Error {exc}.")
364 364
365 365 return BinaryEnvelope(annotations)
366 366
367 367 @reraise_safe_exceptions
368 368 def get_node_type(self, wire, revision=None, path=''):
369 369
370 370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 371 region = self._region(wire)
372 372
373 373 @region.conditional_cache_on_arguments(condition=cache_on)
374 374 def _get_node_type(_repo_id, _revision, _path):
375 375 repo = self._factory.repo(wire)
376 376 fs_ptr = svn.repos.fs(repo)
377 377 if _revision is None:
378 378 _revision = svn.fs.youngest_rev(fs_ptr)
379 379 root = svn.fs.revision_root(fs_ptr, _revision)
380 380 node = svn.fs.check_path(root, path)
381 381 return NODE_TYPE_MAPPING.get(node, None)
382 382 return _get_node_type(repo_id, revision, path)
383 383
384 384 @reraise_safe_exceptions
385 385 def get_nodes(self, wire, revision=None, path=''):
386 386
387 387 cache_on, context_uid, repo_id = self._cache_on(wire)
388 388 region = self._region(wire)
389 389
390 390 @region.conditional_cache_on_arguments(condition=cache_on)
391 391 def _get_nodes(_repo_id, _path, _revision):
392 392 repo = self._factory.repo(wire)
393 393 fsobj = svn.repos.fs(repo)
394 394 if _revision is None:
395 395 _revision = svn.fs.youngest_rev(fsobj)
396 396 root = svn.fs.revision_root(fsobj, _revision)
397 397 entries = svn.fs.dir_entries(root, path)
398 398 result = []
399 399 for entry_path, entry_info in entries.items():
400 400 result.append(
401 401 (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
402 402 return result
403 403 return _get_nodes(repo_id, path, revision)
404 404
405 405 @reraise_safe_exceptions
406 406 def get_file_content(self, wire, rev=None, path=''):
407 407 repo = self._factory.repo(wire)
408 408 fsobj = svn.repos.fs(repo)
409 409
410 410 if rev is None:
411 411 rev = svn.fs.youngest_rev(fsobj)
412 412
413 413 root = svn.fs.revision_root(fsobj, rev)
414 414 content = svn.core.Stream(svn.fs.file_contents(root, path))
415 415 return BytesEnvelope(content.read())
416 416
417 417 @reraise_safe_exceptions
418 418 def get_file_size(self, wire, revision=None, path=''):
419 419
420 420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 421 region = self._region(wire)
422 422
423 423 @region.conditional_cache_on_arguments(condition=cache_on)
424 424 def _get_file_size(_repo_id, _revision, _path):
425 425 repo = self._factory.repo(wire)
426 426 fsobj = svn.repos.fs(repo)
427 427 if _revision is None:
428 428 _revision = svn.fs.youngest_revision(fsobj)
429 429 root = svn.fs.revision_root(fsobj, _revision)
430 430 size = svn.fs.file_length(root, path)
431 431 return size
432 432 return _get_file_size(repo_id, revision, path)
433 433
434 434 def create_repository(self, wire, compatible_version=None):
435 435 log.info('Creating Subversion repository in path "%s"', wire['path'])
436 436 self._factory.repo(wire, create=True,
437 437 compatible_version=compatible_version)
438 438
439 439 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
440 440 obj = urllib.parse.urlparse(src_url)
441 441 username = obj.username or ''
442 442 password = obj.password or ''
443 443 return username, password, src_url
444 444
445 445 def import_remote_repository(self, wire, src_url):
446 446 repo_path = wire['path']
447 447 if not self.is_path_valid_repository(wire, repo_path):
448 448 raise Exception(
449 449 f"Path {repo_path} is not a valid Subversion repository.")
450 450
451 451 username, password, src_url = self.get_url_and_credentials(src_url)
452 452 rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
453 453 '--trust-server-cert-failures=unknown-ca']
454 454 if username and password:
455 455 rdump_cmd += ['--username', username, '--password', password]
456 456 rdump_cmd += [src_url]
457 457
458 458 rdump = subprocess.Popen(
459 459 rdump_cmd,
460 460 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
461 461 load = subprocess.Popen(
462 462 ['svnadmin', 'load', repo_path], stdin=rdump.stdout)
463 463
464 464 # TODO: johbo: This can be a very long operation, might be better
465 465 # to track some kind of status and provide an api to check if the
466 466 # import is done.
467 467 rdump.wait()
468 468 load.wait()
469 469
470 470 log.debug('Return process ended with code: %s', rdump.returncode)
471 471 if rdump.returncode != 0:
472 472 errors = rdump.stderr.read()
473 473 log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
474 474
475 475 reason = 'UNKNOWN'
476 476 if b'svnrdump: E230001:' in errors:
477 477 reason = 'INVALID_CERTIFICATE'
478 478
479 479 if reason == 'UNKNOWN':
480 480 reason = f'UNKNOWN:{safe_str(errors)}'
481 481
482 482 raise Exception(
483 483 'Failed to dump the remote repository from {}. Reason:{}'.format(
484 484 src_url, reason))
485 485 if load.returncode != 0:
486 486 raise Exception(
487 487 f'Failed to load the dump of remote repository from {src_url}.')
488 488
489 489 def commit(self, wire, message, author, timestamp, updated, removed):
490 490
491 491 message = safe_bytes(message)
492 492 author = safe_bytes(author)
493 493
494 494 repo = self._factory.repo(wire)
495 495 fsobj = svn.repos.fs(repo)
496 496
497 497 rev = svn.fs.youngest_rev(fsobj)
498 498 txn = svn.repos.fs_begin_txn_for_commit(repo, rev, author, message)
499 499 txn_root = svn.fs.txn_root(txn)
500 500
501 501 for node in updated:
502 502 TxnNodeProcessor(node, txn_root).update()
503 503 for node in removed:
504 504 TxnNodeProcessor(node, txn_root).remove()
505 505
506 svn_txn_id = safe_str(svn.fs.svn_fs_txn_name(txn))
507 full_repo_path = wire['path']
508 txn_id_data = {'svn_txn_id': svn_txn_id, 'rc_internal_commit': True}
509
510 store_txn_id_data(full_repo_path, svn_txn_id, txn_id_data)
506 511 commit_id = svn.repos.fs_commit_txn(repo, txn)
507 512
508 513 if timestamp:
509 514 apr_time = apr_time_t(timestamp)
510 515 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
511 516 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
512 517
513 518 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
514 519 return commit_id
515 520
516 521 @reraise_safe_exceptions
517 522 def diff(self, wire, rev1, rev2, path1=None, path2=None,
518 523 ignore_whitespace=False, context=3):
519 524
520 525 wire.update(cache=False)
521 526 repo = self._factory.repo(wire)
522 527 diff_creator = SvnDiffer(
523 528 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
524 529 try:
525 530 return BytesEnvelope(diff_creator.generate_diff())
526 531 except svn.core.SubversionException as e:
527 532 log.exception(
528 533 "Error during diff operation operation. "
529 534 "Path might not exist %s, %s", path1, path2)
530 535 return BytesEnvelope(b'')
531 536
532 537 @reraise_safe_exceptions
533 538 def is_large_file(self, wire, path):
534 539 return False
535 540
536 541 @reraise_safe_exceptions
537 542 def is_binary(self, wire, rev, path):
538 543 cache_on, context_uid, repo_id = self._cache_on(wire)
539 544 region = self._region(wire)
540 545
541 546 @region.conditional_cache_on_arguments(condition=cache_on)
542 547 def _is_binary(_repo_id, _rev, _path):
543 548 raw_bytes = self.get_file_content(wire, rev, path)
544 549 if not raw_bytes:
545 550 return False
546 551 return b'\0' in raw_bytes
547 552
548 553 return _is_binary(repo_id, rev, path)
549 554
550 555 @reraise_safe_exceptions
551 556 def md5_hash(self, wire, rev, path):
552 557 cache_on, context_uid, repo_id = self._cache_on(wire)
553 558 region = self._region(wire)
554 559
555 560 @region.conditional_cache_on_arguments(condition=cache_on)
556 561 def _md5_hash(_repo_id, _rev, _path):
557 562 return ''
558 563
559 564 return _md5_hash(repo_id, rev, path)
560 565
561 566 @reraise_safe_exceptions
562 567 def run_svn_command(self, wire, cmd, **opts):
563 568 path = wire.get('path', None)
564 569 debug_mode = rhodecode.ConfigGet().get_bool('debug')
565 570
566 571 if path and os.path.isdir(path):
567 572 opts['cwd'] = path
568 573
569 574 safe_call = opts.pop('_safe', False)
570 575
571 576 svnenv = os.environ.copy()
572 577 svnenv.update(opts.pop('extra_env', {}))
573 578
574 579 _opts = {'env': svnenv, 'shell': False}
575 580
576 581 try:
577 582 _opts.update(opts)
578 583 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
579 584
580 585 return b''.join(proc), b''.join(proc.stderr)
581 586 except OSError as err:
582 587 if safe_call:
583 588 return '', safe_str(err).strip()
584 589 else:
585 590 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
586 591 call_opts = {}
587 592 if debug_mode:
588 593 call_opts = _opts
589 594
590 595 tb_err = ("Couldn't run svn command ({}).\n"
591 596 "Original error was:{}\n"
592 597 "Call options:{}\n"
593 598 .format(cmd, err, call_opts))
594 599 log.exception(tb_err)
595 600 raise exceptions.VcsException()(tb_err)
596 601
597 602 @reraise_safe_exceptions
598 603 def install_hooks(self, wire, force=False):
599 604 from vcsserver.hook_utils import install_svn_hooks
600 605 repo_path = wire['path']
601 606 binary_dir = settings.BINARY_DIR
602 607 executable = None
603 608 if binary_dir:
604 609 executable = os.path.join(binary_dir, 'python3')
605 610 return install_svn_hooks(repo_path, force_create=force)
606 611
607 612 @reraise_safe_exceptions
608 613 def get_hooks_info(self, wire):
609 614 from vcsserver.hook_utils import (
610 615 get_svn_pre_hook_version, get_svn_post_hook_version)
611 616 repo_path = wire['path']
612 617 return {
613 618 'pre_version': get_svn_pre_hook_version(repo_path),
614 619 'post_version': get_svn_post_hook_version(repo_path),
615 620 }
616 621
617 622 @reraise_safe_exceptions
618 623 def set_head_ref(self, wire, head_name):
619 624 pass
620 625
621 626 @reraise_safe_exceptions
622 627 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
623 628 archive_dir_name, commit_id, cache_config):
624 629
625 630 def walk_tree(root, root_dir, _commit_id):
626 631 """
627 632 Special recursive svn repo walker
628 633 """
629 634 root_dir = safe_bytes(root_dir)
630 635
631 636 filemode_default = 0o100644
632 637 filemode_executable = 0o100755
633 638
634 639 file_iter = svn.fs.dir_entries(root, root_dir)
635 640 for f_name in file_iter:
636 641 f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
637 642
638 643 if f_type == 'dir':
639 644 # return only DIR, and then all entries in that dir
640 645 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
641 646 new_root = os.path.join(root_dir, f_name)
642 647 yield from walk_tree(root, new_root, _commit_id)
643 648 else:
644 649
645 650 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
646 651 prop_list = svn.fs.node_proplist(root, f_path)
647 652
648 653 f_mode = filemode_default
649 654 if prop_list.get('svn:executable'):
650 655 f_mode = filemode_executable
651 656
652 657 f_is_link = False
653 658 if prop_list.get('svn:special'):
654 659 f_is_link = True
655 660
656 661 data = {
657 662 'is_link': f_is_link,
658 663 'mode': f_mode,
659 664 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
660 665 }
661 666
662 667 yield f_path, data, f_type
663 668
664 669 def file_walker(_commit_id, path):
665 670 repo = self._factory.repo(wire)
666 671 root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
667 672
668 673 def no_content():
669 674 raise NoContentException()
670 675
671 676 for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
672 677 file_path = f_name
673 678
674 679 if f_type == 'dir':
675 680 mode = f_data['mode']
676 681 yield ArchiveNode(file_path, mode, False, no_content)
677 682 else:
678 683 mode = f_data['mode']
679 684 is_link = f_data['is_link']
680 685 data_stream = f_data['content_stream']
681 686 yield ArchiveNode(file_path, mode, is_link, data_stream)
682 687
683 688 return store_archive_in_cache(
684 689 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
685 690
686 691
687 692 class SvnDiffer:
688 693 """
689 694 Utility to create diffs based on difflib and the Subversion api
690 695 """
691 696
692 697 binary_content = False
693 698
694 699 def __init__(
695 700 self, repo, src_rev, src_path, tgt_rev, tgt_path,
696 701 ignore_whitespace, context):
697 702 self.repo = repo
698 703 self.ignore_whitespace = ignore_whitespace
699 704 self.context = context
700 705
701 706 fsobj = svn.repos.fs(repo)
702 707
703 708 self.tgt_rev = tgt_rev
704 709 self.tgt_path = tgt_path or ''
705 710 self.tgt_root = svn.fs.revision_root(fsobj, tgt_rev)
706 711 self.tgt_kind = svn.fs.check_path(self.tgt_root, self.tgt_path)
707 712
708 713 self.src_rev = src_rev
709 714 self.src_path = src_path or self.tgt_path
710 715 self.src_root = svn.fs.revision_root(fsobj, src_rev)
711 716 self.src_kind = svn.fs.check_path(self.src_root, self.src_path)
712 717
713 718 self._validate()
714 719
715 720 def _validate(self):
716 721 if (self.tgt_kind != svn.core.svn_node_none and
717 722 self.src_kind != svn.core.svn_node_none and
718 723 self.src_kind != self.tgt_kind):
719 724 # TODO: johbo: proper error handling
720 725 raise Exception(
721 726 "Source and target are not compatible for diff generation. "
722 727 "Source type: %s, target type: %s" %
723 728 (self.src_kind, self.tgt_kind))
724 729
725 730 def generate_diff(self) -> bytes:
726 731 buf = io.BytesIO()
727 732 if self.tgt_kind == svn.core.svn_node_dir:
728 733 self._generate_dir_diff(buf)
729 734 else:
730 735 self._generate_file_diff(buf)
731 736 return buf.getvalue()
732 737
733 738 def _generate_dir_diff(self, buf: io.BytesIO):
734 739 editor = DiffChangeEditor()
735 740 editor_ptr, editor_baton = svn.delta.make_editor(editor)
736 741 svn.repos.dir_delta2(
737 742 self.src_root,
738 743 self.src_path,
739 744 '', # src_entry
740 745 self.tgt_root,
741 746 self.tgt_path,
742 747 editor_ptr, editor_baton,
743 748 authorization_callback_allow_all,
744 749 False, # text_deltas
745 750 svn.core.svn_depth_infinity, # depth
746 751 False, # entry_props
747 752 False, # ignore_ancestry
748 753 )
749 754
750 755 for path, __, change in sorted(editor.changes):
751 756 self._generate_node_diff(
752 757 buf, change, path, self.tgt_path, path, self.src_path)
753 758
754 759 def _generate_file_diff(self, buf: io.BytesIO):
755 760 change = None
756 761 if self.src_kind == svn.core.svn_node_none:
757 762 change = "add"
758 763 elif self.tgt_kind == svn.core.svn_node_none:
759 764 change = "delete"
760 765 tgt_base, tgt_path = vcspath.split(self.tgt_path)
761 766 src_base, src_path = vcspath.split(self.src_path)
762 767 self._generate_node_diff(
763 768 buf, change, tgt_path, tgt_base, src_path, src_base)
764 769
765 770 def _generate_node_diff(
766 771 self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
767 772
768 773 tgt_path_bytes = safe_bytes(tgt_path)
769 774 tgt_path = safe_str(tgt_path)
770 775
771 776 src_path_bytes = safe_bytes(src_path)
772 777 src_path = safe_str(src_path)
773 778
774 779 if self.src_rev == self.tgt_rev and tgt_base == src_base:
775 780 # makes consistent behaviour with git/hg to return empty diff if
776 781 # we compare same revisions
777 782 return
778 783
779 784 tgt_full_path = vcspath.join(tgt_base, tgt_path)
780 785 src_full_path = vcspath.join(src_base, src_path)
781 786
782 787 self.binary_content = False
783 788 mime_type = self._get_mime_type(tgt_full_path)
784 789
785 790 if mime_type and not mime_type.startswith(b'text'):
786 791 self.binary_content = True
787 792 buf.write(b"=" * 67 + b'\n')
788 793 buf.write(b"Cannot display: file marked as a binary type.\n")
789 794 buf.write(b"svn:mime-type = %s\n" % mime_type)
790 795 buf.write(b"Index: %b\n" % tgt_path_bytes)
791 796 buf.write(b"=" * 67 + b'\n')
792 797 buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
793 798
794 799 if change == 'add':
795 800 # TODO: johbo: SVN is missing a zero here compared to git
796 801 buf.write(b"new file mode 10644\n")
797 802
798 803 # TODO(marcink): intro to binary detection of svn patches
799 804 # if self.binary_content:
800 805 # buf.write(b'GIT binary patch\n')
801 806
802 807 buf.write(b"--- /dev/null\t(revision 0)\n")
803 808 src_lines = []
804 809 else:
805 810 if change == 'delete':
806 811 buf.write(b"deleted file mode 10644\n")
807 812
808 813 # TODO(marcink): intro to binary detection of svn patches
809 814 # if self.binary_content:
810 815 # buf.write('GIT binary patch\n')
811 816
812 817 buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
813 818 src_lines = self._svn_readlines(self.src_root, src_full_path)
814 819
815 820 if change == 'delete':
816 821 buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
817 822 tgt_lines = []
818 823 else:
819 824 buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
820 825 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
821 826
822 827 # we made our diff header, time to generate the diff content into our buffer
823 828
824 829 if not self.binary_content:
825 830 udiff = svn_diff.unified_diff(
826 831 src_lines, tgt_lines, context=self.context,
827 832 ignore_blank_lines=self.ignore_whitespace,
828 833 ignore_case=False,
829 834 ignore_space_changes=self.ignore_whitespace)
830 835
831 836 buf.writelines(udiff)
832 837
833 838 def _get_mime_type(self, path) -> bytes:
834 839 try:
835 840 mime_type = svn.fs.node_prop(
836 841 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
837 842 except svn.core.SubversionException:
838 843 mime_type = svn.fs.node_prop(
839 844 self.src_root, path, svn.core.SVN_PROP_MIME_TYPE)
840 845 return mime_type
841 846
842 847 def _svn_readlines(self, fs_root, node_path):
843 848 if self.binary_content:
844 849 return []
845 850 node_kind = svn.fs.check_path(fs_root, node_path)
846 851 if node_kind not in (
847 852 svn.core.svn_node_file, svn.core.svn_node_symlink):
848 853 return []
849 854 content = svn.core.Stream(
850 855 svn.fs.file_contents(fs_root, node_path)).read()
851 856
852 857 return content.splitlines(True)
853 858
854 859
855 860 class DiffChangeEditor(svn.delta.Editor):
856 861 """
857 862 Records changes between two given revisions
858 863 """
859 864
860 865 def __init__(self):
861 866 self.changes = []
862 867
863 868 def delete_entry(self, path, revision, parent_baton, pool=None):
864 869 self.changes.append((path, None, 'delete'))
865 870
866 871 def add_file(
867 872 self, path, parent_baton, copyfrom_path, copyfrom_revision,
868 873 file_pool=None):
869 874 self.changes.append((path, 'file', 'add'))
870 875
871 876 def open_file(self, path, parent_baton, base_revision, file_pool=None):
872 877 self.changes.append((path, 'file', 'change'))
873 878
874 879
875 880 def authorization_callback_allow_all(root, path, pool):
876 881 return True
877 882
878 883
879 884 class TxnNodeProcessor:
880 885 """
881 886 Utility to process the change of one node within a transaction root.
882 887
883 888 It encapsulates the knowledge of how to add, update or remove
884 889 a node for a given transaction root. The purpose is to support the method
885 890 `SvnRemote.commit`.
886 891 """
887 892
888 893 def __init__(self, node, txn_root):
889 894 assert_bytes(node['path'])
890 895
891 896 self.node = node
892 897 self.txn_root = txn_root
893 898
894 899 def update(self):
895 900 self._ensure_parent_dirs()
896 901 self._add_file_if_node_does_not_exist()
897 902 self._update_file_content()
898 903 self._update_file_properties()
899 904
900 905 def remove(self):
901 906 svn.fs.delete(self.txn_root, self.node['path'])
902 907 # TODO: Clean up directory if empty
903 908
904 909 def _ensure_parent_dirs(self):
905 910 curdir = vcspath.dirname(self.node['path'])
906 911 dirs_to_create = []
907 912 while not self._svn_path_exists(curdir):
908 913 dirs_to_create.append(curdir)
909 914 curdir = vcspath.dirname(curdir)
910 915
911 916 for curdir in reversed(dirs_to_create):
912 917 log.debug('Creating missing directory "%s"', curdir)
913 918 svn.fs.make_dir(self.txn_root, curdir)
914 919
915 920 def _svn_path_exists(self, path):
916 921 path_status = svn.fs.check_path(self.txn_root, path)
917 922 return path_status != svn.core.svn_node_none
918 923
919 924 def _add_file_if_node_does_not_exist(self):
920 925 kind = svn.fs.check_path(self.txn_root, self.node['path'])
921 926 if kind == svn.core.svn_node_none:
922 927 svn.fs.make_file(self.txn_root, self.node['path'])
923 928
924 929 def _update_file_content(self):
925 930 assert_bytes(self.node['content'])
926 931
927 932 handler, baton = svn.fs.apply_textdelta(
928 933 self.txn_root, self.node['path'], None, None)
929 934 svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
930 935
931 936 def _update_file_properties(self):
932 937 properties = self.node.get('properties', {})
933 938 for key, value in properties.items():
934 939 svn.fs.change_node_prop(
935 940 self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
936 941
937 942
938 943 def apr_time_t(timestamp):
939 944 """
940 945 Convert a Python timestamp into APR timestamp type apr_time_t
941 946 """
942 947 return int(timestamp * 1E6)
943 948
944 949
945 950 def svn_opt_revision_value_t(num):
946 951 """
947 952 Put `num` into a `svn_opt_revision_value_t` structure.
948 953 """
949 954 value = svn.core.svn_opt_revision_value_t()
950 955 value.number = num
951 956 revision = svn.core.svn_opt_revision_t()
952 957 revision.kind = svn.core.svn_opt_revision_number
953 958 revision.value = value
954 959 return revision
@@ -1,255 +1,258 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import logging
20 20 import itertools
21 21
22 22 import mercurial
23 23 import mercurial.error
24 24 import mercurial.wireprotoserver
25 25 import mercurial.hgweb.common
26 26 import mercurial.hgweb.hgweb_mod
27 27 import webob.exc
28 28
29 29 from vcsserver import pygrack, exceptions, settings, git_lfs
30 from vcsserver.str_utils import ascii_bytes, safe_bytes
30 from vcsserver.lib.str_utils import ascii_bytes, safe_bytes
31 31
32 32 log = logging.getLogger(__name__)
33 33
34 34
35 35 # propagated from mercurial documentation
36 36 HG_UI_SECTIONS = [
37 37 'alias', 'auth', 'decode/encode', 'defaults', 'diff', 'email', 'extensions',
38 38 'format', 'merge-patterns', 'merge-tools', 'hooks', 'http_proxy', 'smtp',
39 39 'patch', 'paths', 'profiling', 'server', 'trusted', 'ui', 'web',
40 40 ]
41 41
42 42
43 43 class HgWeb(mercurial.hgweb.hgweb_mod.hgweb):
44 44 """Extension of hgweb that simplifies some functions."""
45 45
46 46 def _get_view(self, repo):
47 47 """Views are not supported."""
48 48 return repo
49 49
50 50 def loadsubweb(self):
51 51 """The result is only used in the templater method which is not used."""
52 52 return None
53 53
54 54 def run(self):
55 55 """Unused function so raise an exception if accidentally called."""
56 56 raise NotImplementedError
57 57
58 58 def templater(self, req):
59 59 """Function used in an unreachable code path.
60 60
61 61 This code is unreachable because we guarantee that the HTTP request,
62 62 corresponds to a Mercurial command. See the is_hg method. So, we are
63 63 never going to get a user-visible url.
64 64 """
65 65 raise NotImplementedError
66 66
67 67 def archivelist(self, nodeid):
68 68 """Unused function so raise an exception if accidentally called."""
69 69 raise NotImplementedError
70 70
71 71 def __call__(self, environ, start_response):
72 72 """Run the WSGI application.
73 73
74 74 This may be called by multiple threads.
75 75 """
76 76 from mercurial.hgweb import request as requestmod
77 77 req = requestmod.parserequestfromenv(environ)
78 78 res = requestmod.wsgiresponse(req, start_response)
79 79 gen = self.run_wsgi(req, res)
80 80
81 81 first_chunk = None
82 82
83 83 try:
84 84 data = next(gen)
85 85
86 86 def first_chunk():
87 87 yield data
88 88 except StopIteration:
89 89 pass
90 90
91 91 if first_chunk:
92 92 return itertools.chain(first_chunk(), gen)
93 93 return gen
94 94
95 95 def _runwsgi(self, req, res, repo):
96 96
97 97 cmd = req.qsparams.get(b'cmd', '')
98 98 if not mercurial.wireprotoserver.iscmd(cmd):
99 99 # NOTE(marcink): for unsupported commands, we return bad request
100 100 # internally from HG
101 101 log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
102 102 from mercurial.hgweb.common import statusmessage
103 103 res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
104 104 res.setbodybytes(b'')
105 105 return res.sendresponse()
106 106
107 107 return super()._runwsgi(req, res, repo)
108 108
109 109
110 110 def sanitize_hg_ui(baseui):
111 111 # NOTE(marcink): since python3 hgsubversion is deprecated.
112 112 # From old installations we might still have this set enabled
113 113 # we explicitly remove this now here to make sure it wont propagate further
114 114
115 115 if baseui.config(b'extensions', b'hgsubversion') is not None:
116 116 for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg):
117 117 if b'extensions' in cfg:
118 118 if b'hgsubversion' in cfg[b'extensions']:
119 119 del cfg[b'extensions'][b'hgsubversion']
120 120
121 121
122 122 def make_hg_ui_from_config(repo_config):
123 123 baseui = mercurial.ui.ui()
124 124
125 125 # clean the baseui object
126 126 baseui._ocfg = mercurial.config.config()
127 127 baseui._ucfg = mercurial.config.config()
128 128 baseui._tcfg = mercurial.config.config()
129 129
130 130 for section, option, value in repo_config:
131 131 baseui.setconfig(
132 132 ascii_bytes(section, allow_bytes=True),
133 133 ascii_bytes(option, allow_bytes=True),
134 134 ascii_bytes(value, allow_bytes=True))
135 135
136 136 # make our hgweb quiet so it doesn't print output
137 137 baseui.setconfig(b'ui', b'quiet', b'true')
138 138
139 # use POST requests with args instead of GET with headers - fixes issues with big repos with lots of branches
140 baseui.setconfig(b'experimental', b'httppostargs', b'true')
141
139 142 return baseui
140 143
141 144
142 145 def update_hg_ui_from_hgrc(baseui, repo_path):
143 146 path = os.path.join(repo_path, '.hg', 'hgrc')
144 147
145 148 if not os.path.isfile(path):
146 149 log.debug('hgrc file is not present at %s, skipping...', path)
147 150 return
148 151 log.debug('reading hgrc from %s', path)
149 152 cfg = mercurial.config.config()
150 153 cfg.read(ascii_bytes(path))
151 154 for section in HG_UI_SECTIONS:
152 155 for k, v in cfg.items(section):
153 156 log.debug('settings ui from file: [%s] %s=%s', section, k, v)
154 157 baseui.setconfig(
155 158 ascii_bytes(section, allow_bytes=True),
156 159 ascii_bytes(k, allow_bytes=True),
157 160 ascii_bytes(v, allow_bytes=True))
158 161
159 162
160 163 def create_hg_wsgi_app(repo_path, repo_name, config):
161 164 """
162 165 Prepares a WSGI application to handle Mercurial requests.
163 166
164 167 :param config: is a list of 3-item tuples representing a ConfigObject
165 168 (it is the serialized version of the config object).
166 169 """
167 170 log.debug("Creating Mercurial WSGI application")
168 171
169 172 baseui = make_hg_ui_from_config(config)
170 173 update_hg_ui_from_hgrc(baseui, repo_path)
171 174 sanitize_hg_ui(baseui)
172 175
173 176 try:
174 177 return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
175 178 except mercurial.error.RequirementError as e:
176 179 raise exceptions.RequirementException(e)(e)
177 180
178 181
179 182 class GitHandler:
180 183 """
181 184 Handler for Git operations like push/pull etc
182 185 """
183 186 def __init__(self, repo_location, repo_name, git_path, update_server_info,
184 187 extras):
185 188 if not os.path.isdir(repo_location):
186 189 raise OSError(repo_location)
187 190 self.content_path = repo_location
188 191 self.repo_name = repo_name
189 192 self.repo_location = repo_location
190 193 self.extras = extras
191 194 self.git_path = git_path
192 195 self.update_server_info = update_server_info
193 196
194 197 def __call__(self, environ, start_response):
195 198 app = webob.exc.HTTPNotFound()
196 199 candidate_paths = (
197 200 self.content_path, os.path.join(self.content_path, '.git'))
198 201
199 202 for content_path in candidate_paths:
200 203 try:
201 204 app = pygrack.GitRepository(
202 205 self.repo_name, content_path, self.git_path,
203 206 self.update_server_info, self.extras)
204 207 break
205 208 except OSError:
206 209 continue
207 210
208 211 return app(environ, start_response)
209 212
210 213
211 214 def create_git_wsgi_app(repo_path, repo_name, config):
212 215 """
213 216 Creates a WSGI application to handle Git requests.
214 217
215 218 :param config: is a dictionary holding the extras.
216 219 """
217 220 git_path = settings.GIT_EXECUTABLE()
218 221 update_server_info = config.pop('git_update_server_info')
219 222 app = GitHandler(
220 223 repo_path, repo_name, git_path, update_server_info, config)
221 224
222 225 return app
223 226
224 227
225 228 class GitLFSHandler:
226 229 """
227 230 Handler for Git LFS operations
228 231 """
229 232
230 233 def __init__(self, repo_location, repo_name, git_path, update_server_info,
231 234 extras):
232 235 if not os.path.isdir(repo_location):
233 236 raise OSError(repo_location)
234 237 self.content_path = repo_location
235 238 self.repo_name = repo_name
236 239 self.repo_location = repo_location
237 240 self.extras = extras
238 241 self.git_path = git_path
239 242 self.update_server_info = update_server_info
240 243
241 244 def get_app(self, git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
242 245 app = git_lfs.create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
243 246 return app
244 247
245 248
246 249 def create_git_lfs_wsgi_app(repo_path, repo_name, config):
247 250 git_path = settings.GIT_EXECUTABLE()
248 251 update_server_info = config.pop('git_update_server_info')
249 252 git_lfs_enabled = config.pop('git_lfs_enabled')
250 253 git_lfs_store_path = config.pop('git_lfs_store_path')
251 254 git_lfs_http_scheme = config.pop('git_lfs_http_scheme', 'http')
252 255 app = GitLFSHandler(
253 256 repo_path, repo_name, git_path, update_server_info, config)
254 257
255 258 return app.get_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
@@ -1,563 +1,563 b''
1 1 """
2 2 Module provides a class allowing to wrap communication over subprocess.Popen
3 3 input, output, error streams into a meaningfull, non-blocking, concurrent
4 4 stream processor exposing the output data as an iterator fitting to be a
5 5 return value passed by a WSGI applicaiton to a WSGI server per PEP 3333.
6 6
7 7 Copyright (c) 2011 Daniel Dotsenko <dotsa[at]hotmail.com>
8 8
9 9 This file is part of git_http_backend.py Project.
10 10
11 11 git_http_backend.py Project is free software: you can redistribute it and/or
12 12 modify it under the terms of the GNU Lesser General Public License as
13 13 published by the Free Software Foundation, either version 2.1 of the License,
14 14 or (at your option) any later version.
15 15
16 16 git_http_backend.py Project is distributed in the hope that it will be useful,
17 17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 19 GNU Lesser General Public License for more details.
20 20
21 21 You should have received a copy of the GNU Lesser General Public License
22 22 along with git_http_backend.py Project.
23 23 If not, see <http://www.gnu.org/licenses/>.
24 24 """
25 25 import os
26 26 import collections
27 27 import logging
28 28 import subprocess
29 29 import threading
30 30
31 from vcsserver.str_utils import safe_str
31 from vcsserver.lib.str_utils import safe_str
32 32
33 33 log = logging.getLogger(__name__)
34 34
35 35
36 36 class StreamFeeder(threading.Thread):
37 37 """
38 38 Normal writing into pipe-like is blocking once the buffer is filled.
39 39 This thread allows a thread to seep data from a file-like into a pipe
40 40 without blocking the main thread.
41 41 We close inpipe once the end of the source stream is reached.
42 42 """
43 43
44 44 def __init__(self, source):
45 45 super().__init__()
46 46 self.daemon = True
47 47 filelike = False
48 48 self.bytes = b''
49 49 if type(source) in (str, bytes, bytearray): # string-like
50 50 self.bytes = bytes(source)
51 51 else: # can be either file pointer or file-like
52 52 if isinstance(source, int): # file pointer it is
53 53 # converting file descriptor (int) stdin into file-like
54 54 source = os.fdopen(source, 'rb', 16384)
55 55 # let's see if source is file-like by now
56 56 filelike = hasattr(source, 'read')
57 57 if not filelike and not self.bytes:
58 58 raise TypeError("StreamFeeder's source object must be a readable "
59 59 "file-like, a file descriptor, or a string-like.")
60 60 self.source = source
61 61 self.readiface, self.writeiface = os.pipe()
62 62
63 63 def run(self):
64 64 writer = self.writeiface
65 65 try:
66 66 if self.bytes:
67 67 os.write(writer, self.bytes)
68 68 else:
69 69 s = self.source
70 70
71 71 while 1:
72 72 _bytes = s.read(4096)
73 73 if not _bytes:
74 74 break
75 75 os.write(writer, _bytes)
76 76
77 77 finally:
78 78 os.close(writer)
79 79
80 80 @property
81 81 def output(self):
82 82 return self.readiface
83 83
84 84
85 85 class InputStreamChunker(threading.Thread):
86 86 def __init__(self, source, target, buffer_size, chunk_size):
87 87
88 88 super().__init__()
89 89
90 90 self.daemon = True # die die die.
91 91
92 92 self.source = source
93 93 self.target = target
94 94 self.chunk_count_max = int(buffer_size / chunk_size) + 1
95 95 self.chunk_size = chunk_size
96 96
97 97 self.data_added = threading.Event()
98 98 self.data_added.clear()
99 99
100 100 self.keep_reading = threading.Event()
101 101 self.keep_reading.set()
102 102
103 103 self.EOF = threading.Event()
104 104 self.EOF.clear()
105 105
106 106 self.go = threading.Event()
107 107 self.go.set()
108 108
109 109 def stop(self):
110 110 self.go.clear()
111 111 self.EOF.set()
112 112 try:
113 113 # this is not proper, but is done to force the reader thread let
114 114 # go of the input because, if successful, .close() will send EOF
115 115 # down the pipe.
116 116 self.source.close()
117 117 except Exception:
118 118 pass
119 119
120 120 def run(self):
121 121 s = self.source
122 122 t = self.target
123 123 cs = self.chunk_size
124 124 chunk_count_max = self.chunk_count_max
125 125 keep_reading = self.keep_reading
126 126 da = self.data_added
127 127 go = self.go
128 128
129 129 try:
130 130 b = s.read(cs)
131 131 except ValueError:
132 132 b = ''
133 133
134 134 timeout_input = 20
135 135 while b and go.is_set():
136 136 if len(t) > chunk_count_max:
137 137 keep_reading.clear()
138 138 keep_reading.wait(timeout_input)
139 139 if len(t) > chunk_count_max + timeout_input:
140 140 log.error("Timed out while waiting for input from subprocess.")
141 141 os._exit(-1) # this will cause the worker to recycle itself
142 142
143 143 t.append(b)
144 144 da.set()
145 145
146 146 try:
147 147 b = s.read(cs)
148 148 except ValueError: # probably "I/O operation on closed file"
149 149 b = ''
150 150
151 151 self.EOF.set()
152 152 da.set() # for cases when done but there was no input.
153 153
154 154
155 155 class BufferedGenerator:
156 156 """
157 157 Class behaves as a non-blocking, buffered pipe reader.
158 158 Reads chunks of data (through a thread)
159 159 from a blocking pipe, and attaches these to an array (Deque) of chunks.
160 160 Reading is halted in the thread when max chunks is internally buffered.
161 161 The .next() may operate in blocking or non-blocking fashion by yielding
162 162 '' if no data is ready
163 163 to be sent or by not returning until there is some data to send
164 164 When we get EOF from underlying source pipe we raise the marker to raise
165 165 StopIteration after the last chunk of data is yielded.
166 166 """
167 167
168 168 def __init__(self, name, source, buffer_size=65536, chunk_size=4096,
169 169 starting_values=None, bottomless=False):
170 170 starting_values = starting_values or []
171 171 self.name = name
172 172 self.buffer_size = buffer_size
173 173 self.chunk_size = chunk_size
174 174
175 175 if bottomless:
176 176 maxlen = int(buffer_size / chunk_size)
177 177 else:
178 178 maxlen = None
179 179
180 180 self.data_queue = collections.deque(starting_values, maxlen)
181 181 self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size)
182 182 if starting_values:
183 183 self.worker.data_added.set()
184 184 self.worker.start()
185 185
186 186 ####################
187 187 # Generator's methods
188 188 ####################
189 189 def __str__(self):
190 190 return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})'
191 191
192 192 def __iter__(self):
193 193 return self
194 194
195 195 def __next__(self):
196 196
197 197 while not self.length and not self.worker.EOF.is_set():
198 198 self.worker.data_added.clear()
199 199 self.worker.data_added.wait(0.2)
200 200
201 201 if self.length:
202 202 self.worker.keep_reading.set()
203 203 return bytes(self.data_queue.popleft())
204 204 elif self.worker.EOF.is_set():
205 205 raise StopIteration
206 206
207 207 def throw(self, exc_type, value=None, traceback=None):
208 208 if not self.worker.EOF.is_set():
209 209 raise exc_type(value)
210 210
211 211 def start(self):
212 212 self.worker.start()
213 213
214 214 def stop(self):
215 215 self.worker.stop()
216 216
217 217 def close(self):
218 218 try:
219 219 self.worker.stop()
220 220 self.throw(GeneratorExit)
221 221 except (GeneratorExit, StopIteration):
222 222 pass
223 223
224 224 ####################
225 225 # Threaded reader's infrastructure.
226 226 ####################
227 227 @property
228 228 def input(self):
229 229 return self.worker.w
230 230
231 231 @property
232 232 def data_added_event(self):
233 233 return self.worker.data_added
234 234
235 235 @property
236 236 def data_added(self):
237 237 return self.worker.data_added.is_set()
238 238
239 239 @property
240 240 def reading_paused(self):
241 241 return not self.worker.keep_reading.is_set()
242 242
243 243 @property
244 244 def done_reading_event(self):
245 245 """
246 246 Done_reding does not mean that the iterator's buffer is empty.
247 247 Iterator might have done reading from underlying source, but the read
248 248 chunks might still be available for serving through .next() method.
249 249
250 250 :returns: An Event class instance.
251 251 """
252 252 return self.worker.EOF
253 253
254 254 @property
255 255 def done_reading(self):
256 256 """
257 257 Done_reading does not mean that the iterator's buffer is empty.
258 258 Iterator might have done reading from underlying source, but the read
259 259 chunks might still be available for serving through .next() method.
260 260
261 261 :returns: An Bool value.
262 262 """
263 263 return self.worker.EOF.is_set()
264 264
265 265 @property
266 266 def length(self):
267 267 """
268 268 returns int.
269 269
270 270 This is the length of the queue of chunks, not the length of
271 271 the combined contents in those chunks.
272 272
273 273 __len__() cannot be meaningfully implemented because this
274 274 reader is just flying through a bottomless pit content and
275 275 can only know the length of what it already saw.
276 276
277 277 If __len__() on WSGI server per PEP 3333 returns a value,
278 278 the response's length will be set to that. In order not to
279 279 confuse WSGI PEP3333 servers, we will not implement __len__
280 280 at all.
281 281 """
282 282 return len(self.data_queue)
283 283
284 284 def prepend(self, x):
285 285 self.data_queue.appendleft(x)
286 286
287 287 def append(self, x):
288 288 self.data_queue.append(x)
289 289
290 290 def extend(self, o):
291 291 self.data_queue.extend(o)
292 292
293 293 def __getitem__(self, i):
294 294 return self.data_queue[i]
295 295
296 296
297 297 class SubprocessIOChunker:
298 298 """
299 299 Processor class wrapping handling of subprocess IO.
300 300
301 301 .. important::
302 302
303 303 Watch out for the method `__del__` on this class. If this object
304 304 is deleted, it will kill the subprocess, so avoid to
305 305 return the `output` attribute or usage of it like in the following
306 306 example::
307 307
308 308 # `args` expected to run a program that produces a lot of output
309 309 output = ''.join(SubprocessIOChunker(
310 310 args, shell=False, inputstream=inputstream, env=environ).output)
311 311
312 312 # `output` will not contain all the data, because the __del__ method
313 313 # has already killed the subprocess in this case before all output
314 314 # has been consumed.
315 315
316 316
317 317
318 318 In a way, this is a "communicate()" replacement with a twist.
319 319
320 320 - We are multithreaded. Writing in and reading out, err are all sep threads.
321 321 - We support concurrent (in and out) stream processing.
322 322 - The output is not a stream. It's a queue of read string (bytes, not str)
323 323 chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
324 324 - We are non-blocking in more respects than communicate()
325 325 (reading from subprocess out pauses when internal buffer is full, but
326 326 does not block the parent calling code. On the flip side, reading from
327 327 slow-yielding subprocess may block the iteration until data shows up. This
328 328 does not block the parallel inpipe reading occurring parallel thread.)
329 329
330 330 The purpose of the object is to allow us to wrap subprocess interactions into
331 331 an iterable that can be passed to a WSGI server as the application's return
332 332 value. Because of stream-processing-ability, WSGI does not have to read ALL
333 333 of the subprocess's output and buffer it, before handing it to WSGI server for
334 334 HTTP response. Instead, the class initializer reads just a bit of the stream
335 335 to figure out if error occurred or likely to occur and if not, just hands the
336 336 further iteration over subprocess output to the server for completion of HTTP
337 337 response.
338 338
339 339 The real or perceived subprocess error is trapped and raised as one of
340 340 OSError family of exceptions
341 341
342 342 Example usage:
343 343 # try:
344 344 # answer = SubprocessIOChunker(
345 345 # cmd,
346 346 # input,
347 347 # buffer_size = 65536,
348 348 # chunk_size = 4096
349 349 # )
350 350 # except (OSError) as e:
351 351 # print str(e)
352 352 # raise e
353 353 #
354 354 # return answer
355 355
356 356
357 357 """
358 358
359 359 # TODO: johbo: This is used to make sure that the open end of the PIPE
360 360 # is closed in the end. It would be way better to wrap this into an
361 361 # object, so that it is closed automatically once it is consumed or
362 362 # something similar.
363 363 _close_input_fd = None
364 364
365 365 _closed = False
366 366 _stdout = None
367 367 _stderr = None
368 368
369 369 def __init__(self, cmd, input_stream=None, buffer_size=65536,
370 370 chunk_size=4096, starting_values=None, fail_on_stderr=True,
371 371 fail_on_return_code=True, **kwargs):
372 372 """
373 373 Initializes SubprocessIOChunker
374 374
375 375 :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
376 376 :param input_stream: (Default: None) A file-like, string, or file pointer.
377 377 :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
378 378 :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
379 379 :param starting_values: (Default: []) An array of strings to put in front of output que.
380 380 :param fail_on_stderr: (Default: True) Whether to raise an exception in
381 381 case something is written to stderr.
382 382 :param fail_on_return_code: (Default: True) Whether to raise an
383 383 exception if the return code is not 0.
384 384 """
385 385
386 386 kwargs['shell'] = kwargs.get('shell', True)
387 387
388 388 starting_values = starting_values or []
389 389 if input_stream:
390 390 input_streamer = StreamFeeder(input_stream)
391 391 input_streamer.start()
392 392 input_stream = input_streamer.output
393 393 self._close_input_fd = input_stream
394 394
395 395 self._fail_on_stderr = fail_on_stderr
396 396 self._fail_on_return_code = fail_on_return_code
397 397 self.cmd = cmd
398 398
399 399 _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
400 400 **kwargs)
401 401 self.process = _p
402 402
403 403 bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values)
404 404 bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True)
405 405
406 406 while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
407 407 # doing this until we reach either end of file, or end of buffer.
408 408 bg_out.data_added_event.wait(0.2)
409 409 bg_out.data_added_event.clear()
410 410
411 411 # at this point it's still ambiguous if we are done reading or just full buffer.
412 412 # Either way, if error (returned by ended process, or implied based on
413 413 # presence of stuff in stderr output) we error out.
414 414 # Else, we are happy.
415 415 return_code = _p.poll()
416 416 ret_code_ok = return_code in [None, 0]
417 417 ret_code_fail = return_code is not None and return_code != 0
418 418 if (
419 419 (ret_code_fail and fail_on_return_code) or
420 420 (ret_code_ok and fail_on_stderr and bg_err.length)
421 421 ):
422 422
423 423 try:
424 424 _p.terminate()
425 425 except Exception:
426 426 pass
427 427
428 428 bg_out.stop()
429 429 out = b''.join(bg_out)
430 430 self._stdout = out
431 431
432 432 bg_err.stop()
433 433 err = b''.join(bg_err)
434 434 self._stderr = err
435 435
436 436 # code from https://github.com/schacon/grack/pull/7
437 437 if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '):
438 438 bg_out = iter([out])
439 439 _p = None
440 440 elif err and fail_on_stderr:
441 441 text_err = err.decode()
442 442 raise OSError(
443 443 f"Subprocess exited due to an error:\n{text_err}")
444 444
445 445 if ret_code_fail and fail_on_return_code:
446 446 text_err = err.decode()
447 447 if not err:
448 448 # maybe get empty stderr, try stdout instead
449 449 # in many cases git reports the errors on stdout too
450 450 text_err = out.decode()
451 451 raise OSError(
452 452 f"Subprocess exited with non 0 ret code:{return_code}: stderr:{text_err}")
453 453
454 454 self.stdout = bg_out
455 455 self.stderr = bg_err
456 456 self.inputstream = input_stream
457 457
458 458 def __str__(self):
459 459 proc = getattr(self, 'process', 'NO_PROCESS')
460 460 return f'SubprocessIOChunker: {proc}'
461 461
462 462 def __iter__(self):
463 463 return self
464 464
465 465 def __next__(self):
466 466 # Note: mikhail: We need to be sure that we are checking the return
467 467 # code after the stdout stream is closed. Some processes, e.g. git
468 468 # are doing some magic in between closing stdout and terminating the
469 469 # process and, as a result, we are not getting return code on "slow"
470 470 # systems.
471 471 result = None
472 472 stop_iteration = None
473 473 try:
474 474 result = next(self.stdout)
475 475 except StopIteration as e:
476 476 stop_iteration = e
477 477
478 478 if self.process:
479 479 return_code = self.process.poll()
480 480 ret_code_fail = return_code is not None and return_code != 0
481 481 if ret_code_fail and self._fail_on_return_code:
482 482 self.stop_streams()
483 483 err = self.get_stderr()
484 484 raise OSError(
485 485 f"Subprocess exited (exit_code:{return_code}) due to an error during iteration:\n{err}")
486 486
487 487 if stop_iteration:
488 488 raise stop_iteration
489 489 return result
490 490
491 491 def throw(self, exc_type, value=None, traceback=None):
492 492 if self.stdout.length or not self.stdout.done_reading:
493 493 raise exc_type(value)
494 494
495 495 def close(self):
496 496 if self._closed:
497 497 return
498 498
499 499 try:
500 500 self.process.terminate()
501 501 except Exception:
502 502 pass
503 503 if self._close_input_fd:
504 504 os.close(self._close_input_fd)
505 505 try:
506 506 self.stdout.close()
507 507 except Exception:
508 508 pass
509 509 try:
510 510 self.stderr.close()
511 511 except Exception:
512 512 pass
513 513 try:
514 514 os.close(self.inputstream)
515 515 except Exception:
516 516 pass
517 517
518 518 self._closed = True
519 519
520 520 def stop_streams(self):
521 521 getattr(self.stdout, 'stop', lambda: None)()
522 522 getattr(self.stderr, 'stop', lambda: None)()
523 523
524 524 def get_stdout(self):
525 525 if self._stdout:
526 526 return self._stdout
527 527 else:
528 528 return b''.join(self.stdout)
529 529
530 530 def get_stderr(self):
531 531 if self._stderr:
532 532 return self._stderr
533 533 else:
534 534 return b''.join(self.stderr)
535 535
536 536
537 537 def run_command(arguments, env=None):
538 538 """
539 539 Run the specified command and return the stdout.
540 540
541 541 :param arguments: sequence of program arguments (including the program name)
542 542 :type arguments: list[str]
543 543 """
544 544
545 545 cmd = arguments
546 546 log.debug('Running subprocessio command %s', cmd)
547 547 proc = None
548 548 try:
549 549 _opts = {'shell': False, 'fail_on_stderr': False}
550 550 if env:
551 551 _opts.update({'env': env})
552 552 proc = SubprocessIOChunker(cmd, **_opts)
553 553 return b''.join(proc), b''.join(proc.stderr)
554 554 except OSError as err:
555 555 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
556 556 tb_err = ("Couldn't run subprocessio command (%s).\n"
557 557 "Original error was:%s\n" % (cmd, err))
558 558 log.exception(tb_err)
559 559 raise Exception(tb_err)
560 560 finally:
561 561 if proc:
562 562 proc.close()
563 563
@@ -1,257 +1,257 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import threading
19 19 import msgpack
20 20
21 21 from http.server import BaseHTTPRequestHandler
22 22 from socketserver import TCPServer
23 23
24 24 import mercurial.ui
25 25 import mock
26 26 import pytest
27 27
28 28 from vcsserver.hooks import HooksHttpClient
29 from vcsserver.lib.rc_json import json
29 from vcsserver.lib.ext_json import json
30 30 from vcsserver import hooks
31 31
32 32
33 33 def get_hg_ui(extras=None):
34 34 """Create a Config object with a valid RC_SCM_DATA entry."""
35 35 extras = extras or {}
36 36 required_extras = {
37 37 'username': '',
38 38 'repository': '',
39 39 'locked_by': '',
40 40 'scm': '',
41 41 'make_lock': '',
42 42 'action': '',
43 43 'ip': '',
44 44 'hooks_uri': 'fake_hooks_uri',
45 45 }
46 46 required_extras.update(extras)
47 47 hg_ui = mercurial.ui.ui()
48 48 hg_ui.setconfig(b'rhodecode', b'RC_SCM_DATA', json.dumps(required_extras))
49 49
50 50 return hg_ui
51 51
52 52
53 53 def test_git_pre_receive_is_disabled():
54 54 extras = {'hooks': ['pull']}
55 55 response = hooks.git_pre_receive(None, None,
56 56 {'RC_SCM_DATA': json.dumps(extras)})
57 57
58 58 assert response == 0
59 59
60 60
61 61 def test_git_post_receive_is_disabled():
62 62 extras = {'hooks': ['pull']}
63 63 response = hooks.git_post_receive(None, '',
64 64 {'RC_SCM_DATA': json.dumps(extras)})
65 65
66 66 assert response == 0
67 67
68 68
69 69 def test_git_post_receive_calls_repo_size():
70 70 extras = {'hooks': ['push', 'repo_size']}
71 71
72 72 with mock.patch.object(hooks, '_call_hook') as call_hook_mock:
73 73 hooks.git_post_receive(
74 74 None, '', {'RC_SCM_DATA': json.dumps(extras)})
75 75 extras.update({'commit_ids': [], 'hook_type': 'post_receive',
76 76 'new_refs': {'bookmarks': [], 'branches': [], 'tags': []}})
77 77 expected_calls = [
78 78 mock.call('repo_size', extras, mock.ANY),
79 79 mock.call('post_push', extras, mock.ANY),
80 80 ]
81 81 assert call_hook_mock.call_args_list == expected_calls
82 82
83 83
84 84 def test_git_post_receive_does_not_call_disabled_repo_size():
85 85 extras = {'hooks': ['push']}
86 86
87 87 with mock.patch.object(hooks, '_call_hook') as call_hook_mock:
88 88 hooks.git_post_receive(
89 89 None, '', {'RC_SCM_DATA': json.dumps(extras)})
90 90 extras.update({'commit_ids': [], 'hook_type': 'post_receive',
91 91 'new_refs': {'bookmarks': [], 'branches': [], 'tags': []}})
92 92 expected_calls = [
93 93 mock.call('post_push', extras, mock.ANY)
94 94 ]
95 95 assert call_hook_mock.call_args_list == expected_calls
96 96
97 97
98 98 def test_repo_size_exception_does_not_affect_git_post_receive():
99 99 extras = {'hooks': ['push', 'repo_size']}
100 100 status = 0
101 101
102 102 def side_effect(name, *args, **kwargs):
103 103 if name == 'repo_size':
104 104 raise Exception('Fake exception')
105 105 else:
106 106 return status
107 107
108 108 with mock.patch.object(hooks, '_call_hook') as call_hook_mock:
109 109 call_hook_mock.side_effect = side_effect
110 110 result = hooks.git_post_receive(
111 111 None, '', {'RC_SCM_DATA': json.dumps(extras)})
112 112 assert result == status
113 113
114 114
115 115 def test_git_pre_pull_is_disabled():
116 116 assert hooks.git_pre_pull({'hooks': ['push']}) == hooks.HookResponse(0, '')
117 117
118 118
119 119 def test_git_post_pull_is_disabled():
120 120 assert (
121 121 hooks.git_post_pull({'hooks': ['push']}) == hooks.HookResponse(0, ''))
122 122
123 123
124 124 class TestGetHooksClient:
125 125
126 126 def test_returns_http_client_when_protocol_matches(self):
127 127 hooks_uri = 'localhost:8000'
128 128 result = hooks._get_hooks_client({
129 129 'hooks_uri': hooks_uri,
130 130 'hooks_protocol': 'http'
131 131 })
132 132 assert isinstance(result, hooks.HooksHttpClient)
133 133 assert result.hooks_uri == hooks_uri
134 134
135 135 def test_return_celery_client_when_queue_and_backend_provided(self):
136 136 task_queue = 'redis://task_queue:0'
137 137 task_backend = task_queue
138 138 result = hooks._get_hooks_client({
139 139 'task_queue': task_queue,
140 140 'task_backend': task_backend
141 141 })
142 142 assert isinstance(result, hooks.HooksCeleryClient)
143 143
144 144
145 145 class TestHooksHttpClient:
146 146 def test_init_sets_hooks_uri(self):
147 147 uri = 'localhost:3000'
148 148 client = hooks.HooksHttpClient(uri)
149 149 assert client.hooks_uri == uri
150 150
151 151 def test_serialize_returns_serialized_string(self):
152 152 client = hooks.HooksHttpClient('localhost:3000')
153 153 hook_name = 'test'
154 154 extras = {
155 155 'first': 1,
156 156 'second': 'two'
157 157 }
158 158 hooks_proto, result = client._serialize(hook_name, extras)
159 159 expected_result = msgpack.packb({
160 160 'method': hook_name,
161 161 'extras': extras,
162 162 })
163 163 assert hooks_proto == {'rc-hooks-protocol': 'msgpack.v1', 'Connection': 'keep-alive'}
164 164 assert result == expected_result
165 165
166 166 def test_call_queries_http_server(self, http_mirror):
167 167 client = hooks.HooksHttpClient(http_mirror.uri)
168 168 hook_name = 'test'
169 169 extras = {
170 170 'first': 1,
171 171 'second': 'two'
172 172 }
173 173 result = client(hook_name, extras)
174 174 expected_result = msgpack.unpackb(msgpack.packb({
175 175 'method': hook_name,
176 176 'extras': extras
177 177 }), raw=False)
178 178 assert result == expected_result
179 179
180 180
181 181 @pytest.fixture
182 182 def http_mirror(request):
183 183 server = MirrorHttpServer()
184 184 request.addfinalizer(server.stop)
185 185 return server
186 186
187 187
188 188 class MirrorHttpHandler(BaseHTTPRequestHandler):
189 189
190 190 def do_POST(self):
191 191 length = int(self.headers['Content-Length'])
192 192 body = self.rfile.read(length)
193 193 self.send_response(200)
194 194 self.end_headers()
195 195 self.wfile.write(body)
196 196
197 197
198 198 class MirrorHttpServer:
199 199 ip_address = '127.0.0.1'
200 200 port = 0
201 201
202 202 def __init__(self):
203 203 self._daemon = TCPServer((self.ip_address, 0), MirrorHttpHandler)
204 204 _, self.port = self._daemon.server_address
205 205 self._thread = threading.Thread(target=self._daemon.serve_forever)
206 206 self._thread.daemon = True
207 207 self._thread.start()
208 208
209 209 def stop(self):
210 210 self._daemon.shutdown()
211 211 self._thread.join()
212 212 self._daemon = None
213 213 self._thread = None
214 214
215 215 @property
216 216 def uri(self):
217 217 return '{}:{}'.format(self.ip_address, self.port)
218 218
219 219
220 220 def test_hooks_http_client_init():
221 221 hooks_uri = 'http://localhost:8000'
222 222 client = HooksHttpClient(hooks_uri)
223 223 assert client.hooks_uri == hooks_uri
224 224
225 225
226 226 def test_hooks_http_client_call():
227 227 hooks_uri = 'http://localhost:8000'
228 228
229 229 method = 'test_method'
230 230 extras = {'key': 'value'}
231 231
232 232 with \
233 233 mock.patch('http.client.HTTPConnection') as mock_connection,\
234 234 mock.patch('msgpack.load') as mock_load:
235 235
236 236 client = HooksHttpClient(hooks_uri)
237 237
238 238 mock_load.return_value = {'result': 'success'}
239 239 response = mock.MagicMock()
240 240 response.status = 200
241 241 mock_connection.request.side_effect = None
242 242 mock_connection.getresponse.return_value = response
243 243
244 244 result = client(method, extras)
245 245
246 246 mock_connection.assert_called_with(hooks_uri)
247 247 mock_connection.return_value.request.assert_called_once()
248 248 assert result == {'result': 'success'}
249 249
250 250
251 251 def test_hooks_http_client_serialize():
252 252 method = 'test_method'
253 253 extras = {'key': 'value'}
254 254 headers, body = HooksHttpClient._serialize(method, extras)
255 255
256 256 assert headers == {'rc-hooks-protocol': HooksHttpClient.proto, 'Connection': 'keep-alive'}
257 257 assert msgpack.unpackb(body) == {'method': method, 'extras': extras}
@@ -1,289 +1,289 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import sys
20 20 import stat
21 21 import pytest
22 22 import vcsserver
23 23 import tempfile
24 24 from vcsserver import hook_utils
25 25 from vcsserver.hook_utils import set_permissions_if_needed, HOOKS_DIR_MODE, HOOKS_FILE_MODE
26 26 from vcsserver.tests.fixture import no_newline_id_generator
27 from vcsserver.str_utils import safe_bytes
27 from vcsserver.lib.str_utils import safe_bytes
28 28 from vcsserver.utils import AttributeDict
29 29
30 30
31 31 class TestCheckRhodecodeHook:
32 32
33 33 def test_returns_false_when_hook_file_is_wrong_found(self, tmpdir):
34 34 hook = os.path.join(str(tmpdir), 'fake_hook_file.py')
35 35 with open(hook, 'wb') as f:
36 36 f.write(b'dummy test')
37 37 result = hook_utils.check_rhodecode_hook(hook)
38 38 assert result is False
39 39
40 40 def test_returns_true_when_no_hook_file_found(self, tmpdir):
41 41 hook = os.path.join(str(tmpdir), 'fake_hook_file_not_existing.py')
42 42 result = hook_utils.check_rhodecode_hook(hook)
43 43 assert result
44 44
45 45 @pytest.mark.parametrize("file_content, expected_result", [
46 46 ("RC_HOOK_VER = '3.3.3'\n", True),
47 47 ("RC_HOOK = '3.3.3'\n", False),
48 48 ], ids=no_newline_id_generator)
49 49 def test_signatures(self, file_content, expected_result, tmpdir):
50 50 hook = os.path.join(str(tmpdir), 'fake_hook_file_1.py')
51 51 with open(hook, 'wb') as f:
52 52 f.write(safe_bytes(file_content))
53 53
54 54 result = hook_utils.check_rhodecode_hook(hook)
55 55
56 56 assert result is expected_result
57 57
58 58
59 59 class BaseInstallHooks:
60 60 HOOK_FILES = ()
61 61
62 62 def _check_hook_file_dir_mode(self, file_path):
63 63 dir_path = os.path.dirname(file_path)
64 64 assert os.path.exists(dir_path), f'dir {file_path} missing'
65 65 stat_info = os.stat(dir_path)
66 66
67 67 file_mode = stat.S_IMODE(stat_info.st_mode)
68 68 expected_mode = int(HOOKS_DIR_MODE)
69 69 assert expected_mode == file_mode, f'expected mode: {oct(expected_mode)} got: {oct(file_mode)} for {dir_path}'
70 70
71 71 def _check_hook_file_mode(self, file_path):
72 72 assert os.path.exists(file_path), f'path {file_path} missing'
73 73 stat_info = os.stat(file_path)
74 74
75 75 file_mode = stat.S_IMODE(stat_info.st_mode)
76 76 expected_mode = int(HOOKS_FILE_MODE)
77 77 assert expected_mode == file_mode, f'expected mode: {oct(expected_mode)} got: {oct(file_mode)} for {file_path}'
78 78
79 79 def _check_hook_file_content(self, file_path, executable):
80 80 executable = executable or sys.executable
81 81 with open(file_path, 'rt') as hook_file:
82 82 content = hook_file.read()
83 83
84 84 expected_env = '#!{}'.format(executable)
85 85 expected_rc_version = "\nRC_HOOK_VER = '{}'\n".format(vcsserver.get_version())
86 86 assert content.strip().startswith(expected_env)
87 87 assert expected_rc_version in content
88 88
89 89 def _create_fake_hook(self, file_path, content):
90 90 with open(file_path, 'w') as hook_file:
91 91 hook_file.write(content)
92 92
93 93 def create_dummy_repo(self, repo_type):
94 94 tmpdir = tempfile.mkdtemp()
95 95 repo = AttributeDict()
96 96 if repo_type == 'git':
97 97 repo.path = os.path.join(tmpdir, 'test_git_hooks_installation_repo')
98 98 os.makedirs(repo.path)
99 99 os.makedirs(os.path.join(repo.path, 'hooks'))
100 100 repo.bare = True
101 101
102 102 elif repo_type == 'svn':
103 103 repo.path = os.path.join(tmpdir, 'test_svn_hooks_installation_repo')
104 104 os.makedirs(repo.path)
105 105 os.makedirs(os.path.join(repo.path, 'hooks'))
106 106
107 107 return repo
108 108
109 109 def check_hooks(self, repo_path, repo_bare=True):
110 110 for file_name in self.HOOK_FILES:
111 111 if repo_bare:
112 112 file_path = os.path.join(repo_path, 'hooks', file_name)
113 113 else:
114 114 file_path = os.path.join(repo_path, '.git', 'hooks', file_name)
115 115
116 116 self._check_hook_file_dir_mode(file_path)
117 117 self._check_hook_file_mode(file_path)
118 118 self._check_hook_file_content(file_path, sys.executable)
119 119
120 120
121 121 class TestInstallGitHooks(BaseInstallHooks):
122 122 HOOK_FILES = ('pre-receive', 'post-receive')
123 123
124 124 def test_hooks_are_installed(self):
125 125 repo = self.create_dummy_repo('git')
126 126 result = hook_utils.install_git_hooks(repo.path, repo.bare)
127 127 assert result
128 128 self.check_hooks(repo.path, repo.bare)
129 129
130 130 def test_hooks_are_replaced(self):
131 131 repo = self.create_dummy_repo('git')
132 132 hooks_path = os.path.join(repo.path, 'hooks')
133 133 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
134 134 self._create_fake_hook(
135 135 file_path, content="RC_HOOK_VER = 'abcde'\n")
136 136
137 137 result = hook_utils.install_git_hooks(repo.path, repo.bare)
138 138 assert result
139 139 self.check_hooks(repo.path, repo.bare)
140 140
141 141 def test_non_rc_hooks_are_not_replaced(self):
142 142 repo = self.create_dummy_repo('git')
143 143 hooks_path = os.path.join(repo.path, 'hooks')
144 144 non_rc_content = 'echo "non rc hook"\n'
145 145 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
146 146 self._create_fake_hook(
147 147 file_path, content=non_rc_content)
148 148
149 149 result = hook_utils.install_git_hooks(repo.path, repo.bare)
150 150 assert result
151 151
152 152 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
153 153 with open(file_path, 'rt') as hook_file:
154 154 content = hook_file.read()
155 155 assert content == non_rc_content
156 156
157 157 def test_non_rc_hooks_are_replaced_with_force_flag(self):
158 158 repo = self.create_dummy_repo('git')
159 159 hooks_path = os.path.join(repo.path, 'hooks')
160 160 non_rc_content = 'echo "non rc hook"\n'
161 161 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
162 162 self._create_fake_hook(
163 163 file_path, content=non_rc_content)
164 164
165 165 result = hook_utils.install_git_hooks(
166 166 repo.path, repo.bare, force_create=True)
167 167 assert result
168 168 self.check_hooks(repo.path, repo.bare)
169 169
170 170
171 171 class TestInstallSvnHooks(BaseInstallHooks):
172 172 HOOK_FILES = ('pre-commit', 'post-commit')
173 173
174 174 def test_hooks_are_installed(self):
175 175 repo = self.create_dummy_repo('svn')
176 176 result = hook_utils.install_svn_hooks(repo.path)
177 177 assert result
178 178 self.check_hooks(repo.path)
179 179
180 180 def test_hooks_are_replaced(self):
181 181 repo = self.create_dummy_repo('svn')
182 182 hooks_path = os.path.join(repo.path, 'hooks')
183 183 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
184 184 self._create_fake_hook(
185 185 file_path, content="RC_HOOK_VER = 'abcde'\n")
186 186
187 187 result = hook_utils.install_svn_hooks(repo.path)
188 188 assert result
189 189 self.check_hooks(repo.path)
190 190
191 191 def test_non_rc_hooks_are_not_replaced(self):
192 192 repo = self.create_dummy_repo('svn')
193 193 hooks_path = os.path.join(repo.path, 'hooks')
194 194 non_rc_content = 'echo "non rc hook"\n'
195 195 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
196 196 self._create_fake_hook(
197 197 file_path, content=non_rc_content)
198 198
199 199 result = hook_utils.install_svn_hooks(repo.path)
200 200 assert result
201 201
202 202 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
203 203 with open(file_path, 'rt') as hook_file:
204 204 content = hook_file.read()
205 205 assert content == non_rc_content
206 206
207 207 def test_non_rc_hooks_are_replaced_with_force_flag(self):
208 208 repo = self.create_dummy_repo('svn')
209 209 hooks_path = os.path.join(repo.path, 'hooks')
210 210 non_rc_content = 'echo "non rc hook"\n'
211 211 for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
212 212 self._create_fake_hook(
213 213 file_path, content=non_rc_content)
214 214
215 215 result = hook_utils.install_svn_hooks(
216 216 repo.path, force_create=True)
217 217 assert result
218 218 self.check_hooks(repo.path, )
219 219
220 220
221 221 def create_test_file(filename):
222 222 """Utility function to create a test file."""
223 223 with open(filename, 'w') as f:
224 224 f.write("Test file")
225 225
226 226
227 227 def remove_test_file(filename):
228 228 """Utility function to remove a test file."""
229 229 if os.path.exists(filename):
230 230 os.remove(filename)
231 231
232 232
233 233 @pytest.fixture
234 234 def test_file():
235 235 filename = 'test_file.txt'
236 236 create_test_file(filename)
237 237 yield filename
238 238 remove_test_file(filename)
239 239
240 240
241 241 def test_increase_permissions(test_file):
242 242 # Set initial lower permissions
243 243 initial_perms = 0o644
244 244 os.chmod(test_file, initial_perms)
245 245
246 246 # Set higher permissions
247 247 new_perms = 0o666
248 248 set_permissions_if_needed(test_file, new_perms)
249 249
250 250 # Check if permissions were updated
251 251 assert (os.stat(test_file).st_mode & 0o777) == new_perms
252 252
253 253
254 254 def test_no_permission_change_needed(test_file):
255 255 # Set initial permissions
256 256 initial_perms = 0o666
257 257 os.chmod(test_file, initial_perms)
258 258
259 259 # Attempt to set the same permissions
260 260 set_permissions_if_needed(test_file, initial_perms)
261 261
262 262 # Check if permissions were unchanged
263 263 assert (os.stat(test_file).st_mode & 0o777) == initial_perms
264 264
265 265
266 266 def test_no_permission_reduction(test_file):
267 267 # Set initial higher permissions
268 268 initial_perms = 0o666
269 269 os.chmod(test_file, initial_perms)
270 270
271 271 # Attempt to set lower permissions
272 272 lower_perms = 0o644
273 273 set_permissions_if_needed(test_file, lower_perms)
274 274
275 275 # Check if permissions were not reduced
276 276 assert (os.stat(test_file).st_mode & 0o777) == initial_perms
277 277
278 278
279 279 def test_no_permission_reduction_when_on_777(test_file):
280 280 # Set initial higher permissions
281 281 initial_perms = 0o777
282 282 os.chmod(test_file, initial_perms)
283 283
284 284 # Attempt to set lower permissions
285 285 lower_perms = 0o755
286 286 set_permissions_if_needed(test_file, lower_perms)
287 287
288 288 # Check if permissions were not reduced
289 289 assert (os.stat(test_file).st_mode & 0o777) == initial_perms
@@ -1,295 +1,295 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import more_itertools
20 20
21 21 import dulwich.protocol
22 22 import mock
23 23 import pytest
24 24 import webob
25 25 import webtest
26 26
27 27 from vcsserver import hooks, pygrack
28 28
29 from vcsserver.str_utils import ascii_bytes
29 from vcsserver.lib.str_utils import ascii_bytes
30 30
31 31
32 32 @pytest.fixture()
33 33 def pygrack_instance(tmpdir):
34 34 """
35 35 Creates a pygrack app instance.
36 36
37 37 Right now, it does not much helpful regarding the passed directory.
38 38 It just contains the required folders to pass the signature test.
39 39 """
40 40 for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
41 41 tmpdir.mkdir(dir_name)
42 42
43 43 return pygrack.GitRepository('repo_name', str(tmpdir), 'git', False, {})
44 44
45 45
46 46 @pytest.fixture()
47 47 def pygrack_app(pygrack_instance):
48 48 """
49 49 Creates a pygrack app wrapped in webtest.TestApp.
50 50 """
51 51 return webtest.TestApp(pygrack_instance)
52 52
53 53
54 54 def test_invalid_service_info_refs_returns_403(pygrack_app):
55 55 response = pygrack_app.get('/info/refs?service=git-upload-packs',
56 56 expect_errors=True)
57 57
58 58 assert response.status_int == 403
59 59
60 60
61 61 def test_invalid_endpoint_returns_403(pygrack_app):
62 62 response = pygrack_app.post('/git-upload-packs', expect_errors=True)
63 63
64 64 assert response.status_int == 403
65 65
66 66
67 67 @pytest.mark.parametrize('sideband', [
68 68 'side-band-64k',
69 69 'side-band',
70 70 'side-band no-progress',
71 71 ])
72 72 def test_pre_pull_hook_fails_with_sideband(pygrack_app, sideband):
73 73 request = ''.join([
74 74 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ',
75 75 f'multi_ack {sideband} ofs-delta\n',
76 76 '0000',
77 77 '0009done\n',
78 78 ])
79 79 with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')):
80 80 response = pygrack_app.post(
81 81 '/git-upload-pack', params=request,
82 82 content_type='application/x-git-upload-pack')
83 83
84 84 data = io.BytesIO(response.body)
85 85 proto = dulwich.protocol.Protocol(data.read, None)
86 86 packets = list(proto.read_pkt_seq())
87 87
88 88 expected_packets = [
89 89 b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n',
90 90 b'\x01' + pygrack.GitRepository.EMPTY_PACK,
91 91 ]
92 92 assert packets == expected_packets
93 93
94 94
95 95 def test_pre_pull_hook_fails_no_sideband(pygrack_app):
96 96 request = ''.join([
97 97 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
98 98 'multi_ack ofs-delta\n'
99 99 '0000',
100 100 '0009done\n',
101 101 ])
102 102 with mock.patch('vcsserver.hooks.git_pre_pull',
103 103 return_value=hooks.HookResponse(1, 'foo')):
104 104 response = pygrack_app.post(
105 105 '/git-upload-pack', params=request,
106 106 content_type='application/x-git-upload-pack')
107 107
108 108 assert response.body == pygrack.GitRepository.EMPTY_PACK
109 109
110 110
111 111 def test_pull_has_hook_messages(pygrack_app):
112 112 request = ''.join([
113 113 '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
114 114 'multi_ack side-band-64k ofs-delta\n'
115 115 '0000',
116 116 '0009done\n',
117 117 ])
118 118
119 119 pre_pull = 'pre_pull_output'
120 120 post_pull = 'post_pull_output'
121 121
122 122 with mock.patch('vcsserver.hooks.git_pre_pull',
123 123 return_value=hooks.HookResponse(0, pre_pull)):
124 124 with mock.patch('vcsserver.hooks.git_post_pull',
125 125 return_value=hooks.HookResponse(1, post_pull)):
126 126 with mock.patch('vcsserver.subprocessio.SubprocessIOChunker',
127 127 return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])):
128 128 response = pygrack_app.post(
129 129 '/git-upload-pack', params=request,
130 130 content_type='application/x-git-upload-pack')
131 131
132 132 data = io.BytesIO(response.body)
133 133 proto = dulwich.protocol.Protocol(data.read, None)
134 134 packets = list(proto.read_pkt_seq())
135 135
136 136 assert packets == [b'NAK\n',
137 137 # pre-pull only outputs if IT FAILS as in != 0 ret code
138 138 #b'\x02pre_pull_output',
139 139 b'subp\n',
140 140 b'\x02post_pull_output']
141 141
142 142
143 143 def test_get_want_capabilities(pygrack_instance):
144 144 data = io.BytesIO(
145 145 b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
146 146 b'multi_ack side-band-64k ofs-delta\n00000009done\n')
147 147
148 148 request = webob.Request({
149 149 'wsgi.input': data,
150 150 'REQUEST_METHOD': 'POST',
151 151 'webob.is_body_seekable': True
152 152 })
153 153
154 154 capabilities = pygrack_instance._get_want_capabilities(request)
155 155
156 156 assert capabilities == frozenset(
157 157 (b'ofs-delta', b'multi_ack', b'side-band-64k'))
158 158 assert data.tell() == 0
159 159
160 160
161 161 @pytest.mark.parametrize('data,capabilities,expected', [
162 162 ('foo', [], []),
163 163 ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []),
164 164 ('', [pygrack.CAPABILITY_SIDE_BAND], []),
165 165 ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']),
166 166 ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']),
167 167 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]),
168 168 ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']),
169 169 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']),
170 170 ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]),
171 171 ], ids=[
172 172 'foo-empty',
173 173 'empty-64k', 'empty',
174 174 'foo-64k', 'foo',
175 175 'f-1000-64k', 'f-1000',
176 176 'f-65520-64k', 'f-65520'])
177 177 def test_get_messages(pygrack_instance, data, capabilities, expected):
178 178 messages = pygrack_instance._get_messages(data, capabilities)
179 179
180 180 assert messages == expected
181 181
182 182
183 183 @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [
184 184 # Unexpected response
185 185 ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'),
186 186 # No sideband
187 187 ([b'no-sideband'], [], 'foo', 'bar'),
188 188 # No messages
189 189 ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''),
190 190 ])
191 191 def test_inject_messages_to_response_nothing_to_do(
192 192 pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages):
193 193
194 194 new_response = pygrack_instance._build_post_pull_response(
195 195 more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages)
196 196
197 197 assert list(new_response) == response
198 198
199 199
200 200 @pytest.mark.parametrize('capabilities', [
201 201 [pygrack.CAPABILITY_SIDE_BAND],
202 202 [pygrack.CAPABILITY_SIDE_BAND_64K],
203 203 ])
204 204 def test_inject_messages_to_response_single_element(pygrack_instance, capabilities):
205 205 response = [b'0008NAK\n0009subp\n0000']
206 206 new_response = pygrack_instance._build_post_pull_response(
207 207 more_itertools.always_iterable(response), capabilities, 'foo', 'bar')
208 208
209 209 expected_response = b''.join([
210 210 b'0008NAK\n',
211 211 b'0008\x02foo',
212 212 b'0009subp\n',
213 213 b'0008\x02bar',
214 214 b'0000'])
215 215
216 216 assert b''.join(new_response) == expected_response
217 217
218 218
219 219 @pytest.mark.parametrize('capabilities', [
220 220 [pygrack.CAPABILITY_SIDE_BAND],
221 221 [pygrack.CAPABILITY_SIDE_BAND_64K],
222 222 ])
223 223 def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities):
224 224 response = more_itertools.always_iterable([
225 225 b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000'
226 226 ])
227 227 new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar')
228 228
229 229 expected_response = b''.join([
230 230 b'0008NAK\n',
231 231 b'0008\x02foo',
232 232 b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n',
233 233 b'0008\x02bar',
234 234 b'0000'
235 235 ])
236 236
237 237 assert b''.join(new_response) == expected_response
238 238
239 239
240 240 def test_build_failed_pre_pull_response_no_sideband(pygrack_instance):
241 241 response = pygrack_instance._build_failed_pre_pull_response([], 'foo')
242 242
243 243 assert response == [pygrack.GitRepository.EMPTY_PACK]
244 244
245 245
246 246 @pytest.mark.parametrize('capabilities', [
247 247 [pygrack.CAPABILITY_SIDE_BAND],
248 248 [pygrack.CAPABILITY_SIDE_BAND_64K],
249 249 [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'],
250 250 ])
251 251 def test_build_failed_pre_pull_response(pygrack_instance, capabilities):
252 252 response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo')
253 253
254 254 expected_response = [
255 255 b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n',
256 256 b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK),
257 257 pygrack.GitRepository.FLUSH_PACKET,
258 258 ]
259 259
260 260 assert response == expected_response
261 261
262 262
263 263 def test_inject_messages_to_response_generator(pygrack_instance):
264 264
265 265 def response_generator():
266 266 response = [
267 267 # protocol start
268 268 b'0008NAK\n',
269 269 ]
270 270 response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)]
271 271 response += [
272 272 # protocol end
273 273 pygrack.GitRepository.FLUSH_PACKET
274 274 ]
275 275 for elem in response:
276 276 yield elem
277 277
278 278 new_response = pygrack_instance._build_post_pull_response(
279 279 response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n')
280 280
281 281 assert iter(new_response)
282 282
283 283 expected_response = b''.join([
284 284 # start
285 285 b'0008NAK\n0012\x02PRE_PULL_MSG\n',
286 286 ] + [
287 287 # ... rest
288 288 ascii_bytes(f'000asubp{x}\n') for x in range(1000)
289 289 ] + [
290 290 # final message,
291 291 b'0013\x02POST_PULL_MSG\n0000',
292 292
293 293 ])
294 294
295 295 assert b''.join(new_response) == expected_response
@@ -1,87 +1,87 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19
20 20 import mercurial.hg
21 21 import mercurial.ui
22 22 import mercurial.error
23 23 import mock
24 24 import pytest
25 25 import webtest
26 26
27 27 from vcsserver import scm_app
28 from vcsserver.str_utils import ascii_bytes
28 from vcsserver.lib.str_utils import ascii_bytes
29 29
30 30
31 31 def test_hg_does_not_accept_invalid_cmd(tmpdir):
32 32 repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True)
33 33 app = webtest.TestApp(scm_app.HgWeb(repo))
34 34
35 35 response = app.get('/repo?cmd=invalidcmd', expect_errors=True)
36 36
37 37 assert response.status_int == 400
38 38
39 39
40 40 def test_create_hg_wsgi_app_requirement_error(tmpdir):
41 41 repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True)
42 42 config = (
43 43 ('paths', 'default', ''),
44 44 )
45 45 with mock.patch('vcsserver.scm_app.HgWeb') as hgweb_mock:
46 46 hgweb_mock.side_effect = mercurial.error.RequirementError()
47 47 with pytest.raises(Exception):
48 48 scm_app.create_hg_wsgi_app(str(tmpdir), repo, config)
49 49
50 50
51 51 def test_git_returns_not_found(tmpdir):
52 52 app = webtest.TestApp(
53 53 scm_app.GitHandler(str(tmpdir), 'repo_name', 'git', False, {}))
54 54
55 55 response = app.get('/repo_name/inforefs?service=git-upload-pack',
56 56 expect_errors=True)
57 57
58 58 assert response.status_int == 404
59 59
60 60
61 61 def test_git(tmpdir):
62 62 for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
63 63 tmpdir.mkdir(dir_name)
64 64
65 65 app = webtest.TestApp(
66 66 scm_app.GitHandler(str(tmpdir), 'repo_name', 'git', False, {}))
67 67
68 68 # We set service to git-upload-packs to trigger a 403
69 69 response = app.get('/repo_name/inforefs?service=git-upload-packs',
70 70 expect_errors=True)
71 71
72 72 assert response.status_int == 403
73 73
74 74
75 75 def test_git_fallbacks_to_git_folder(tmpdir):
76 76 tmpdir.mkdir('.git')
77 77 for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
78 78 tmpdir.mkdir(os.path.join('.git', dir_name))
79 79
80 80 app = webtest.TestApp(
81 81 scm_app.GitHandler(str(tmpdir), 'repo_name', 'git', False, {}))
82 82
83 83 # We set service to git-upload-packs to trigger a 403
84 84 response = app.get('/repo_name/inforefs?service=git-upload-packs',
85 85 expect_errors=True)
86 86
87 87 assert response.status_int == 403
@@ -1,155 +1,155 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import os
20 20 import sys
21 21
22 22 import pytest
23 23
24 24 from vcsserver import subprocessio
25 from vcsserver.str_utils import ascii_bytes
25 from vcsserver.lib.str_utils import ascii_bytes
26 26
27 27
28 28 class FileLikeObj: # pragma: no cover
29 29
30 30 def __init__(self, data: bytes, size):
31 31 chunks = size // len(data)
32 32
33 33 self.stream = self._get_stream(data, chunks)
34 34
35 35 def _get_stream(self, data, chunks):
36 36 for x in range(chunks):
37 37 yield data
38 38
39 39 def read(self, n):
40 40
41 41 buffer_stream = b''
42 42 for chunk in self.stream:
43 43 buffer_stream += chunk
44 44 if len(buffer_stream) >= n:
45 45 break
46 46
47 47 # self.stream = self.bytes[n:]
48 48 return buffer_stream
49 49
50 50
51 51 @pytest.fixture(scope='module')
52 52 def environ():
53 53 """Delete coverage variables, as they make the tests fail."""
54 54 env = dict(os.environ)
55 55 for key in list(env.keys()):
56 56 if key.startswith('COV_CORE_'):
57 57 del env[key]
58 58
59 59 return env
60 60
61 61
62 62 def _get_python_args(script):
63 63 return [sys.executable, '-c', 'import sys; import time; import shutil; ' + script]
64 64
65 65
66 66 def test_raise_exception_on_non_zero_return_code(environ):
67 67 call_args = _get_python_args('raise ValueError("fail")')
68 68 with pytest.raises(OSError):
69 69 b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ))
70 70
71 71
72 72 def test_does_not_fail_on_non_zero_return_code(environ):
73 73 call_args = _get_python_args('sys.stdout.write("hello"); sys.exit(1)')
74 74 proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ)
75 75 output = b''.join(proc)
76 76
77 77 assert output == b'hello'
78 78
79 79
80 80 def test_raise_exception_on_stderr(environ):
81 81 call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); time.sleep(1);')
82 82
83 83 with pytest.raises(OSError) as excinfo:
84 84 b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ))
85 85
86 86 assert 'exited due to an error:\nWRITE_TO_STDERR' in str(excinfo.value)
87 87
88 88
89 89 def test_does_not_fail_on_stderr(environ):
90 90 call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); sys.stderr.flush; time.sleep(2);')
91 91 proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_stderr=False, env=environ)
92 92 output = b''.join(proc)
93 93
94 94 assert output == b''
95 95
96 96
97 97 @pytest.mark.parametrize('size', [
98 98 1,
99 99 10 ** 5
100 100 ])
101 101 def test_output_with_no_input(size, environ):
102 102 call_args = _get_python_args(f'sys.stdout.write("X" * {size});')
103 103 proc = subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ)
104 104 output = b''.join(proc)
105 105
106 106 assert output == ascii_bytes("X" * size)
107 107
108 108
109 109 @pytest.mark.parametrize('size', [
110 110 1,
111 111 10 ** 5
112 112 ])
113 113 def test_output_with_no_input_does_not_fail(size, environ):
114 114
115 115 call_args = _get_python_args(f'sys.stdout.write("X" * {size}); sys.exit(1)')
116 116 proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ)
117 117 output = b''.join(proc)
118 118
119 119 assert output == ascii_bytes("X" * size)
120 120
121 121
122 122 @pytest.mark.parametrize('size', [
123 123 1,
124 124 10 ** 5
125 125 ])
126 126 def test_output_with_input(size, environ):
127 127 data_len = size
128 128 inputstream = FileLikeObj(b'X', size)
129 129
130 130 # This acts like the cat command.
131 131 call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)')
132 132 # note: in this tests we explicitly don't assign chunker to a variable and let it stream directly
133 133 output = b''.join(
134 134 subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ)
135 135 )
136 136
137 137 assert len(output) == data_len
138 138
139 139
140 140 @pytest.mark.parametrize('size', [
141 141 1,
142 142 10 ** 5
143 143 ])
144 144 def test_output_with_input_skipping_iterator(size, environ):
145 145 data_len = size
146 146 inputstream = FileLikeObj(b'X', size)
147 147
148 148 # This acts like the cat command.
149 149 call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)')
150 150
151 151 # Note: assigning the chunker makes sure that it is not deleted too early
152 152 proc = subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ)
153 153 output = b''.join(proc.stdout)
154 154
155 155 assert len(output) == data_len
@@ -1,103 +1,103 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import mock
20 20 import pytest
21 21 import sys
22 22
23 from vcsserver.str_utils import ascii_bytes
23 from vcsserver.lib.str_utils import ascii_bytes
24 24
25 25
26 26 class MockPopen:
27 27 def __init__(self, stderr):
28 28 self.stdout = io.BytesIO(b'')
29 29 self.stderr = io.BytesIO(stderr)
30 30 self.returncode = 1
31 31
32 32 def wait(self):
33 33 pass
34 34
35 35
36 36 INVALID_CERTIFICATE_STDERR = '\n'.join([
37 37 'svnrdump: E230001: Unable to connect to a repository at URL url',
38 38 'svnrdump: E230001: Server SSL certificate verification failed: issuer is not trusted',
39 39 ])
40 40
41 41
42 42 @pytest.mark.parametrize('stderr,expected_reason', [
43 43 (INVALID_CERTIFICATE_STDERR, 'INVALID_CERTIFICATE'),
44 44 ('svnrdump: E123456', 'UNKNOWN:svnrdump: E123456'),
45 45 ], ids=['invalid-cert-stderr', 'svnrdump-err-123456'])
46 46 @pytest.mark.xfail(sys.platform == "cygwin",
47 47 reason="SVN not packaged for Cygwin")
48 48 def test_import_remote_repository_certificate_error(stderr, expected_reason):
49 49 from vcsserver.remote import svn_remote
50 50 factory = mock.Mock()
51 51 factory.repo = mock.Mock(return_value=mock.Mock())
52 52
53 53 remote = svn_remote.SvnRemote(factory)
54 54 remote.is_path_valid_repository = lambda wire, path: True
55 55
56 56 with mock.patch('subprocess.Popen',
57 57 return_value=MockPopen(ascii_bytes(stderr))):
58 58 with pytest.raises(Exception) as excinfo:
59 59 remote.import_remote_repository({'path': 'path'}, 'url')
60 60
61 61 expected_error_args = 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason)
62 62
63 63 assert excinfo.value.args[0] == expected_error_args
64 64
65 65
66 66 def test_svn_libraries_can_be_imported():
67 67 import svn.client # noqa
68 68 assert svn.client is not None
69 69
70 70
71 71 @pytest.mark.parametrize('example_url, parts', [
72 72 ('http://server.com', ('', '', 'http://server.com')),
73 73 ('http://user@server.com', ('user', '', 'http://user@server.com')),
74 74 ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')),
75 75 ('<script>', ('', '', '<script>')),
76 76 ('http://', ('', '', 'http://')),
77 77 ])
78 78 def test_username_password_extraction_from_url(example_url, parts):
79 79 from vcsserver.remote import svn_remote
80 80
81 81 factory = mock.Mock()
82 82 factory.repo = mock.Mock(return_value=mock.Mock())
83 83
84 84 remote = svn_remote.SvnRemote(factory)
85 85 remote.is_path_valid_repository = lambda wire, path: True
86 86
87 87 assert remote.get_url_and_credentials(example_url) == parts
88 88
89 89
90 90 @pytest.mark.parametrize('call_url', [
91 91 b'https://svn.code.sf.net/p/svnbook/source/trunk/',
92 92 b'https://marcink@svn.code.sf.net/p/svnbook/source/trunk/',
93 93 b'https://marcink:qweqwe@svn.code.sf.net/p/svnbook/source/trunk/',
94 94 ])
95 95 def test_check_url(call_url):
96 96 from vcsserver.remote import svn_remote
97 97 factory = mock.Mock()
98 98 factory.repo = mock.Mock(return_value=mock.Mock())
99 99
100 100 remote = svn_remote.SvnRemote(factory)
101 101 remote.is_path_valid_repository = lambda wire, path: True
102 102 assert remote.check_url(call_url, {'dummy': 'config'})
103 103
@@ -1,69 +1,69 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import pytest
19 from vcsserver.str_utils import ascii_bytes, ascii_str, convert_to_str
19 from vcsserver.lib.str_utils import ascii_bytes, ascii_str, convert_to_str
20 20
21 21
22 22 @pytest.mark.parametrize('given, expected', [
23 23 ('a', b'a'),
24 24 ('a', b'a'),
25 25 ])
26 26 def test_ascii_bytes(given, expected):
27 27 assert ascii_bytes(given) == expected
28 28
29 29
30 30 @pytest.mark.parametrize('given', [
31 31 'Γ₯',
32 32 'Γ₯'.encode('utf8')
33 33 ])
34 34 def test_ascii_bytes_raises(given):
35 35 with pytest.raises(ValueError):
36 36 ascii_bytes(given)
37 37
38 38
39 39 @pytest.mark.parametrize('given, expected', [
40 40 (b'a', 'a'),
41 41 ])
42 42 def test_ascii_str(given, expected):
43 43 assert ascii_str(given) == expected
44 44
45 45
46 46 @pytest.mark.parametrize('given', [
47 47 'a',
48 48 'Γ₯'.encode('utf8'),
49 49 'Γ₯'
50 50 ])
51 51 def test_ascii_str_raises(given):
52 52 with pytest.raises(ValueError):
53 53 ascii_str(given)
54 54
55 55
56 56 @pytest.mark.parametrize('given, expected', [
57 57 ('a', 'a'),
58 58 (b'a', 'a'),
59 59 # tuple
60 60 (('a', b'b', b'c'), ('a', 'b', 'c')),
61 61 # nested tuple
62 62 (('a', b'b', (b'd', b'e')), ('a', 'b', ('d', 'e'))),
63 63 # list
64 64 (['a', b'b', b'c'], ['a', 'b', 'c']),
65 65 # mixed
66 66 (['a', b'b', b'c', (b'b1', b'b2')], ['a', 'b', 'c', ('b1', 'b2')])
67 67 ])
68 68 def test_convert_to_str(given, expected):
69 69 assert convert_to_str(given) == expected
@@ -1,98 +1,98 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import wsgiref.simple_server
19 19 import wsgiref.validate
20 20
21 21 from vcsserver import wsgi_app_caller
22 from vcsserver.str_utils import ascii_bytes, safe_str
22 from vcsserver.lib.str_utils import ascii_bytes, safe_str
23 23
24 24
25 25 @wsgiref.validate.validator
26 26 def demo_app(environ, start_response):
27 27 """WSGI app used for testing."""
28 28
29 29 input_data = safe_str(environ['wsgi.input'].read(1024))
30 30
31 31 data = [
32 32 'Hello World!\n',
33 33 f'input_data={input_data}\n',
34 34 ]
35 35 for key, value in sorted(environ.items()):
36 36 data.append(f'{key}={value}\n')
37 37
38 38 write = start_response("200 OK", [('Content-Type', 'text/plain')])
39 39 write(b'Old school write method\n')
40 40 write(b'***********************\n')
41 41 return list(map(ascii_bytes, data))
42 42
43 43
44 44 BASE_ENVIRON = {
45 45 'REQUEST_METHOD': 'GET',
46 46 'SERVER_NAME': 'localhost',
47 47 'SERVER_PORT': '80',
48 48 'SCRIPT_NAME': '',
49 49 'PATH_INFO': '/',
50 50 'QUERY_STRING': '',
51 51 'foo.var': 'bla',
52 52 }
53 53
54 54
55 55 def test_complete_environ():
56 56 environ = dict(BASE_ENVIRON)
57 57 data = b"data"
58 58 wsgi_app_caller._complete_environ(environ, data)
59 59 wsgiref.validate.check_environ(environ)
60 60
61 61 assert data == environ['wsgi.input'].read(1024)
62 62
63 63
64 64 def test_start_response():
65 65 start_response = wsgi_app_caller._StartResponse()
66 66 status = '200 OK'
67 67 headers = [('Content-Type', 'text/plain')]
68 68 start_response(status, headers)
69 69
70 70 assert status == start_response.status
71 71 assert headers == start_response.headers
72 72
73 73
74 74 def test_start_response_with_error():
75 75 start_response = wsgi_app_caller._StartResponse()
76 76 status = '500 Internal Server Error'
77 77 headers = [('Content-Type', 'text/plain')]
78 78 start_response(status, headers, (None, None, None))
79 79
80 80 assert status == start_response.status
81 81 assert headers == start_response.headers
82 82
83 83
84 84 def test_wsgi_app_caller():
85 85 environ = dict(BASE_ENVIRON)
86 86 input_data = 'some text'
87 87
88 88 caller = wsgi_app_caller.WSGIAppCaller(demo_app)
89 89 responses, status, headers = caller.handle(environ, input_data)
90 90 response = b''.join(responses)
91 91
92 92 assert status == '200 OK'
93 93 assert headers == [('Content-Type', 'text/plain')]
94 94 assert response.startswith(b'Old school write method\n***********************\n')
95 95 assert b'Hello World!\n' in response
96 96 assert b'foo.var=bla\n' in response
97 97
98 98 assert ascii_bytes(f'input_data={input_data}\n') in response
@@ -1,123 +1,123 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import base64
18 18 import logging
19 19 import time
20 20
21 21 import msgpack
22 22
23 23 import vcsserver
24 from vcsserver.str_utils import safe_str
24 from vcsserver.lib.str_utils import safe_str
25 25
26 26 log = logging.getLogger(__name__)
27 27
28 28
29 29 def get_access_path(environ):
30 30 path = environ.get('PATH_INFO')
31 31 return path
32 32
33 33
34 34 def get_user_agent(environ):
35 35 return environ.get('HTTP_USER_AGENT')
36 36
37 37
38 38 def get_call_context(request) -> dict:
39 39 cc = {}
40 40 registry = request.registry
41 41 if hasattr(registry, 'vcs_call_context'):
42 42 cc.update({
43 43 'X-RC-Method': registry.vcs_call_context.get('method'),
44 44 'X-RC-Repo-Name': registry.vcs_call_context.get('repo_name')
45 45 })
46 46
47 47 return cc
48 48
49 49
50 50 def get_headers_call_context(environ, strict=True):
51 51 if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
52 52 packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
53 53 return msgpack.unpackb(packed_cc)
54 54 elif strict:
55 55 raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
56 56
57 57
58 58 class RequestWrapperTween:
59 59 def __init__(self, handler, registry):
60 60 self.handler = handler
61 61 self.registry = registry
62 62
63 63 # one-time configuration code goes here
64 64
65 65 def __call__(self, request):
66 66 start = time.time()
67 67 log.debug('Starting request time measurement')
68 68 response = None
69 69
70 70 try:
71 71 response = self.handler(request)
72 72 finally:
73 73 ua = get_user_agent(request.environ)
74 74 call_context = get_call_context(request)
75 75 vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
76 76 repo_name = call_context.get('X-RC-Repo-Name', '')
77 77
78 78 count = request.request_count()
79 79 _ver_ = vcsserver.get_version()
80 80 _path = safe_str(get_access_path(request.environ))
81 81
82 82 ip = '127.0.0.1'
83 83 match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
84 84 resp_code = getattr(response, 'status_code', 'UNDEFINED')
85 85
86 86 _view_path = f"{repo_name}@{_path}/{vcs_method}"
87 87
88 88 total = time.time() - start
89 89
90 90 log.info(
91 91 'Req[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s',
92 92 count, ip, request.environ.get('REQUEST_METHOD'),
93 93 _view_path, total, ua, _ver_,
94 94 extra={"time": total, "ver": _ver_, "code": resp_code,
95 95 "path": _path, "view_name": match_route, "user_agent": ua,
96 96 "vcs_method": vcs_method, "repo_name": repo_name}
97 97 )
98 98
99 99 statsd = request.registry.statsd
100 100 if statsd:
101 101 match_route = request.matched_route.name if request.matched_route else _path
102 102 elapsed_time_ms = round(1000.0 * total) # use ms only
103 103 statsd.timing(
104 104 "vcsserver_req_timing.histogram", elapsed_time_ms,
105 105 tags=[
106 106 f"view_name:{match_route}",
107 107 f"code:{resp_code}"
108 108 ],
109 109 use_decimals=False
110 110 )
111 111 statsd.incr(
112 112 "vcsserver_req_total", tags=[
113 113 f"view_name:{match_route}",
114 114 f"code:{resp_code}"
115 115 ])
116 116
117 117 return response
118 118
119 119
120 120 def includeme(config):
121 121 config.add_tween(
122 122 'vcsserver.tweens.request_wrapper.RequestWrapperTween',
123 123 )
@@ -1,116 +1,116 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Extract the responses of a WSGI app."""
19 19
20 20 __all__ = ('WSGIAppCaller',)
21 21
22 22 import io
23 23 import logging
24 24 import os
25 25
26 from vcsserver.str_utils import ascii_bytes
26 from vcsserver.lib.str_utils import ascii_bytes
27 27
28 28 log = logging.getLogger(__name__)
29 29
30 30 DEV_NULL = open(os.devnull)
31 31
32 32
33 33 def _complete_environ(environ, input_data: bytes):
34 34 """Update the missing wsgi.* variables of a WSGI environment.
35 35
36 36 :param environ: WSGI environment to update
37 37 :type environ: dict
38 38 :param input_data: data to be read by the app
39 39 :type input_data: bytes
40 40 """
41 41 environ.update({
42 42 'wsgi.version': (1, 0),
43 43 'wsgi.url_scheme': 'http',
44 44 'wsgi.multithread': True,
45 45 'wsgi.multiprocess': True,
46 46 'wsgi.run_once': False,
47 47 'wsgi.input': io.BytesIO(input_data),
48 48 'wsgi.errors': DEV_NULL,
49 49 })
50 50
51 51
52 52 # pylint: disable=too-few-public-methods
53 53 class _StartResponse:
54 54 """Save the arguments of a start_response call."""
55 55
56 56 __slots__ = ['status', 'headers', 'content']
57 57
58 58 def __init__(self):
59 59 self.status = None
60 60 self.headers = None
61 61 self.content = []
62 62
63 63 def __call__(self, status, headers, exc_info=None):
64 64 # TODO(skreft): do something meaningful with the exc_info
65 65 exc_info = None # avoid dangling circular reference
66 66 self.status = status
67 67 self.headers = headers
68 68
69 69 return self.write
70 70
71 71 def write(self, content):
72 72 """Write method returning when calling this object.
73 73
74 74 All the data written is then available in content.
75 75 """
76 76 self.content.append(content)
77 77
78 78
79 79 class WSGIAppCaller:
80 80 """Calls a WSGI app."""
81 81
82 82 def __init__(self, app):
83 83 """
84 84 :param app: WSGI app to call
85 85 """
86 86 self.app = app
87 87
88 88 def handle(self, environ, input_data):
89 89 """Process a request with the WSGI app.
90 90
91 91 The returned data of the app is fully consumed into a list.
92 92
93 93 :param environ: WSGI environment to update
94 94 :type environ: dict
95 95 :param input_data: data to be read by the app
96 96 :type input_data: str/bytes
97 97
98 98 :returns: a tuple with the contents, status and headers
99 99 :rtype: (list<str>, str, list<(str, str)>)
100 100 """
101 101 _complete_environ(environ, ascii_bytes(input_data, allow_bytes=True))
102 102 start_response = _StartResponse()
103 103 log.debug("Calling wrapped WSGI application")
104 104 responses = self.app(environ, start_response)
105 105 responses_list = list(responses)
106 106 existing_responses = start_response.content
107 107 if existing_responses:
108 108 log.debug("Adding returned response to response written via write()")
109 109 existing_responses.extend(responses_list)
110 110 responses_list = existing_responses
111 111 if hasattr(responses, 'close'):
112 112 log.debug("Closing iterator from WSGI application")
113 113 responses.close()
114 114
115 115 log.debug("Handling of WSGI request done, returning response")
116 116 return responses_list, start_response.status, start_response.headers
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now