##// END OF EJS Templates
vcs-support: bulk of changes for python3
super-admin -
r5075:d1c4b80b default
parent child Browse files
Show More
@@ -1,418 +1,432 b''
1 1
2 2
3 3 # Copyright (C) 2016-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Client for the VCSServer implemented based on HTTP.
23 23 """
24 24
25 25 import copy
26 26 import logging
27 27 import threading
28 28 import time
29 import urllib.request, urllib.error, urllib.parse
29 import urllib.request
30 import urllib.error
31 import urllib.parse
30 32 import urllib.parse
31 33 import uuid
32 34 import traceback
33 35
34 36 import pycurl
35 37 import msgpack
36 38 import requests
37 39 from requests.packages.urllib3.util.retry import Retry
38 40
39 41 import rhodecode
40 42 from rhodecode.lib import rc_cache
41 43 from rhodecode.lib.rc_cache.utils import compute_key_from_params
42 44 from rhodecode.lib.system_info import get_cert_path
43 45 from rhodecode.lib.vcs import exceptions, CurlSession
44 46 from rhodecode.lib.utils2 import str2bool
45 47
46 48 log = logging.getLogger(__name__)
47 49
48 50
49 51 # TODO: mikhail: Keep it in sync with vcsserver's
50 52 # HTTPApplication.ALLOWED_EXCEPTIONS
51 53 EXCEPTIONS_MAP = {
52 54 'KeyError': KeyError,
53 55 'URLError': urllib.error.URLError,
54 56 }
55 57
56 58
57 59 def _remote_call(url, payload, exceptions_map, session, retries=3):
58 60
59 61 for attempt in range(retries):
60 62 try:
61 63 response = session.post(url, data=msgpack.packb(payload))
64 break
62 65 except pycurl.error as e:
63 66 error_code, error_message = e.args
64 67 if error_code == pycurl.E_RECV_ERROR:
65 68 log.warning(f'Received a "Connection reset by peer" error. '
66 69 f'Retrying... ({attempt + 1}/{retries})')
67 70 continue # Retry if connection reset error.
68 71 msg = '{}. \npycurl traceback: {}'.format(e, traceback.format_exc())
69 72 raise exceptions.HttpVCSCommunicationError(msg)
70 73 except Exception as e:
71 74 message = getattr(e, 'message', '')
72 75 if 'Failed to connect' in message:
73 76 # gevent doesn't return proper pycurl errors
74 77 raise exceptions.HttpVCSCommunicationError(e)
75 78 else:
76 79 raise
77 80
78 81 if response.status_code >= 400:
79 log.error('Call to %s returned non 200 HTTP code: %s',
80 url, response.status_code)
82 content_type = response.content_type
83 log.error('Call to %s returned non 200 HTTP code: %s [%s]',
84 url, response.status_code, content_type)
81 85 raise exceptions.HttpVCSCommunicationError(repr(response.content))
82 86
83 87 try:
84 response = msgpack.unpackb(response.content, raw=False)
88 response = msgpack.unpackb(response.content)
85 89 except Exception:
86 90 log.exception('Failed to decode response from msgpack')
87 91 raise
88 92
89 93 error = response.get('error')
90 94 if error:
91 95 type_ = error.get('type', 'Exception')
92 96 exc = exceptions_map.get(type_, Exception)
93 97 exc = exc(error.get('message'))
94 98 try:
95 99 exc._vcs_kind = error['_vcs_kind']
96 100 except KeyError:
97 101 pass
98 102
99 103 try:
100 104 exc._vcs_server_traceback = error['traceback']
101 105 exc._vcs_server_org_exc_name = error['org_exc']
102 106 exc._vcs_server_org_exc_tb = error['org_exc_tb']
103 107 except KeyError:
104 108 pass
105 109
106 raise exc
110 exc.add_note(attach_exc_details(error))
111 raise exc # raising the org exception from vcsserver
107 112 return response.get('result')
108 113
109 114
115 def attach_exc_details(error):
116 note = '-- EXC NOTE -- :\n'
117 note += f'vcs_kind: {error.get("_vcs_kind")}\n'
118 note += f'org_exc: {error.get("_vcs_kind")}\n'
119 note += f'tb: {error.get("traceback")}\n'
120 note += '-- END EXC NOTE --'
121 return note
122
123
110 124 def _streaming_remote_call(url, payload, exceptions_map, session, chunk_size):
111 125 try:
112 126 headers = {
113 127 'X-RC-Method': payload.get('method'),
114 128 'X-RC-Repo-Name': payload.get('_repo_name')
115 129 }
116 130 response = session.post(url, data=msgpack.packb(payload), headers=headers)
117 131 except pycurl.error as e:
118 132 error_code, error_message = e.args
119 133 msg = '{}. \npycurl traceback: {}'.format(e, traceback.format_exc())
120 134 raise exceptions.HttpVCSCommunicationError(msg)
121 135 except Exception as e:
122 136 message = getattr(e, 'message', '')
123 137 if 'Failed to connect' in message:
124 138 # gevent doesn't return proper pycurl errors
125 139 raise exceptions.HttpVCSCommunicationError(e)
126 140 else:
127 141 raise
128 142
129 143 if response.status_code >= 400:
130 144 log.error('Call to %s returned non 200 HTTP code: %s',
131 145 url, response.status_code)
132 146 raise exceptions.HttpVCSCommunicationError(repr(response.content))
133 147
134 148 return response.iter_content(chunk_size=chunk_size)
135 149
136 150
137 151 class ServiceConnection(object):
138 152 def __init__(self, server_and_port, backend_endpoint, session_factory):
139 153 self.url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
140 154 self._session_factory = session_factory
141 155
142 156 def __getattr__(self, name):
143 157 def f(*args, **kwargs):
144 158 return self._call(name, *args, **kwargs)
145 159 return f
146 160
147 161 @exceptions.map_vcs_exceptions
148 162 def _call(self, name, *args, **kwargs):
149 163 payload = {
150 164 'id': str(uuid.uuid4()),
151 165 'method': name,
152 166 'params': {'args': args, 'kwargs': kwargs}
153 167 }
154 168 return _remote_call(
155 169 self.url, payload, EXCEPTIONS_MAP, self._session_factory())
156 170
157 171
158 172 class RemoteVCSMaker(object):
159 173
160 174 def __init__(self, server_and_port, backend_endpoint, backend_type, session_factory):
161 175 self.url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
162 176 self.stream_url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint+'/stream')
163 177
164 178 self._session_factory = session_factory
165 179 self.backend_type = backend_type
166 180
167 181 @classmethod
168 182 def init_cache_region(cls, repo_id):
169 cache_namespace_uid = 'cache_repo.{}'.format(repo_id)
183 cache_namespace_uid = 'repo.{}'.format(repo_id)
170 184 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
171 185 return region, cache_namespace_uid
172 186
173 187 def __call__(self, path, repo_id, config, with_wire=None):
174 188 log.debug('%s RepoMaker call on %s', self.backend_type.upper(), path)
175 189 return RemoteRepo(path, repo_id, config, self, with_wire=with_wire)
176 190
177 191 def __getattr__(self, name):
178 192 def remote_attr(*args, **kwargs):
179 193 return self._call(name, *args, **kwargs)
180 194 return remote_attr
181 195
182 196 @exceptions.map_vcs_exceptions
183 197 def _call(self, func_name, *args, **kwargs):
184 198 payload = {
185 199 'id': str(uuid.uuid4()),
186 200 'method': func_name,
187 201 'backend': self.backend_type,
188 202 'params': {'args': args, 'kwargs': kwargs}
189 203 }
190 204 url = self.url
191 205 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session_factory())
192 206
193 207
194 208 class RemoteRepo(object):
195 209 CHUNK_SIZE = 16384
196 210
197 211 def __init__(self, path, repo_id, config, remote_maker, with_wire=None):
198 212 self.url = remote_maker.url
199 213 self.stream_url = remote_maker.stream_url
200 214 self._session = remote_maker._session_factory()
201 215
202 216 cache_repo_id = self._repo_id_sanitizer(repo_id)
203 217 _repo_name = self._get_repo_name(config, path)
204 218 self._cache_region, self._cache_namespace = \
205 219 remote_maker.init_cache_region(cache_repo_id)
206 220
207 221 with_wire = with_wire or {}
208 222
209 223 repo_state_uid = with_wire.get('repo_state_uid') or 'state'
210 224
211 225 self._wire = {
212 226 "_repo_name": _repo_name,
213 227 "path": path, # repo path
214 228 "repo_id": repo_id,
215 229 "cache_repo_id": cache_repo_id,
216 230 "config": config,
217 231 "repo_state_uid": repo_state_uid,
218 232 "context": self._create_vcs_cache_context(path, repo_state_uid)
219 233 }
220 234
221 235 if with_wire:
222 236 self._wire.update(with_wire)
223 237
224 238 # NOTE(johbo): Trading complexity for performance. Avoiding the call to
225 239 # log.debug brings a few percent gain even if is is not active.
226 240 if log.isEnabledFor(logging.DEBUG):
227 241 self._call_with_logging = True
228 242
229 243 self.cert_dir = get_cert_path(rhodecode.CONFIG.get('__file__'))
230 244
231 245 def _get_repo_name(self, config, path):
232 246 repo_store = config.get('paths', '/')
233 247 return path.split(repo_store)[-1].lstrip('/')
234 248
235 249 def _repo_id_sanitizer(self, repo_id):
236 250 pathless = repo_id.replace('/', '__').replace('-', '_')
237 251 return ''.join(char if ord(char) < 128 else '_{}_'.format(ord(char)) for char in pathless)
238 252
239 253 def __getattr__(self, name):
240 254
241 255 if name.startswith('stream:'):
242 256 def repo_remote_attr(*args, **kwargs):
243 257 return self._call_stream(name, *args, **kwargs)
244 258 else:
245 259 def repo_remote_attr(*args, **kwargs):
246 260 return self._call(name, *args, **kwargs)
247 261
248 262 return repo_remote_attr
249 263
250 264 def _base_call(self, name, *args, **kwargs):
251 265 # TODO: oliver: This is currently necessary pre-call since the
252 266 # config object is being changed for hooking scenarios
253 267 wire = copy.deepcopy(self._wire)
254 268 wire["config"] = wire["config"].serialize()
255 269 wire["config"].append(('vcs', 'ssl_dir', self.cert_dir))
256 270
257 271 payload = {
258 272 'id': str(uuid.uuid4()),
259 273 'method': name,
260 274 "_repo_name": wire['_repo_name'],
261 275 'params': {'wire': wire, 'args': args, 'kwargs': kwargs}
262 276 }
263 277
264 278 context_uid = wire.get('context')
265 279 return context_uid, payload
266 280
267 281 def get_local_cache(self, name, args):
268 282 cache_on = False
269 283 cache_key = ''
270 local_cache_on = str2bool(rhodecode.CONFIG.get('vcs.methods.cache'))
284 local_cache_on = rhodecode.ConfigGet().get_bool('vcs.methods.cache')
271 285
272 286 cache_methods = [
273 287 'branches', 'tags', 'bookmarks',
274 288 'is_large_file', 'is_binary',
275 289 'fctx_size', 'stream:fctx_node_data', 'blob_raw_length',
276 290 'node_history',
277 291 'revision', 'tree_items',
278 292 'ctx_list', 'ctx_branch', 'ctx_description',
279 293 'bulk_request',
280 294 'assert_correct_path'
281 295 ]
282 296
283 297 if local_cache_on and name in cache_methods:
284 298 cache_on = True
285 299 repo_state_uid = self._wire['repo_state_uid']
286 300 call_args = [a for a in args]
287 301 cache_key = compute_key_from_params(repo_state_uid, name, *call_args)
288 302
289 303 return cache_on, cache_key
290 304
291 305 @exceptions.map_vcs_exceptions
292 306 def _call(self, name, *args, **kwargs):
293 307 context_uid, payload = self._base_call(name, *args, **kwargs)
294 308 url = self.url
295 309
296 310 start = time.time()
297 311 cache_on, cache_key = self.get_local_cache(name, args)
298 312
299 313 @self._cache_region.conditional_cache_on_arguments(
300 314 namespace=self._cache_namespace, condition=cache_on and cache_key)
301 315 def remote_call(_cache_key):
302 316 if self._call_with_logging:
303 args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}'
317 args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}'
304 318 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
305 319 url, name, args_repr, context_uid, cache_on)
306 320 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session)
307 321
308 322 result = remote_call(cache_key)
309 323 if self._call_with_logging:
310 324 log.debug('Call %s@%s took: %.4fs. wire_context: %s',
311 325 url, name, time.time()-start, context_uid)
312 326 return result
313 327
314 328 @exceptions.map_vcs_exceptions
315 329 def _call_stream(self, name, *args, **kwargs):
316 330 context_uid, payload = self._base_call(name, *args, **kwargs)
317 331 payload['chunk_size'] = self.CHUNK_SIZE
318 332 url = self.stream_url
319 333
320 334 start = time.time()
321 335 cache_on, cache_key = self.get_local_cache(name, args)
322 336
323 337 # Cache is a problem because this is a stream
324 338 def streaming_remote_call(_cache_key):
325 339 if self._call_with_logging:
326 args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}'
340 args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}'
327 341 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
328 342 url, name, args_repr, context_uid, cache_on)
329 343 return _streaming_remote_call(url, payload, EXCEPTIONS_MAP, self._session, self.CHUNK_SIZE)
330 344
331 345 result = streaming_remote_call(cache_key)
332 346 if self._call_with_logging:
333 347 log.debug('Call %s@%s took: %.4fs. wire_context: %s',
334 348 url, name, time.time()-start, context_uid)
335 349 return result
336 350
337 351 def __getitem__(self, key):
338 352 return self.revision(key)
339 353
340 354 def _create_vcs_cache_context(self, *args):
341 355 """
342 356 Creates a unique string which is passed to the VCSServer on every
343 357 remote call. It is used as cache key in the VCSServer.
344 358 """
345 359 hash_key = '-'.join(map(str, args))
346 360 return str(uuid.uuid5(uuid.NAMESPACE_URL, hash_key))
347 361
348 362 def invalidate_vcs_cache(self):
349 363 """
350 364 This invalidates the context which is sent to the VCSServer on every
351 365 call to a remote method. It forces the VCSServer to create a fresh
352 366 repository instance on the next call to a remote method.
353 367 """
354 368 self._wire['context'] = str(uuid.uuid4())
355 369
356 370
357 371 class VcsHttpProxy(object):
358 372
359 373 CHUNK_SIZE = 16384
360 374
361 375 def __init__(self, server_and_port, backend_endpoint):
362 376 retries = Retry(total=5, connect=None, read=None, redirect=None)
363 377
364 378 adapter = requests.adapters.HTTPAdapter(max_retries=retries)
365 379 self.base_url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
366 380 self.session = requests.Session()
367 381 self.session.mount('http://', adapter)
368 382
369 383 def handle(self, environment, input_data, *args, **kwargs):
370 384 data = {
371 385 'environment': environment,
372 386 'input_data': input_data,
373 387 'args': args,
374 388 'kwargs': kwargs
375 389 }
376 390 result = self.session.post(
377 391 self.base_url, msgpack.packb(data), stream=True)
378 392 return self._get_result(result)
379 393
380 394 def _deserialize_and_raise(self, error):
381 395 exception = Exception(error['message'])
382 396 try:
383 397 exception._vcs_kind = error['_vcs_kind']
384 398 except KeyError:
385 399 pass
386 400 raise exception
387 401
388 402 def _iterate(self, result):
389 403 unpacker = msgpack.Unpacker()
390 404 for line in result.iter_content(chunk_size=self.CHUNK_SIZE):
391 405 unpacker.feed(line)
392 406 for chunk in unpacker:
393 407 yield chunk
394 408
395 409 def _get_result(self, result):
396 410 iterator = self._iterate(result)
397 411 error = next(iterator)
398 412 if error:
399 413 self._deserialize_and_raise(error)
400 414
401 415 status = next(iterator)
402 416 headers = next(iterator)
403 417
404 418 return iterator, status, headers
405 419
406 420
407 421 class ThreadlocalSessionFactory(object):
408 422 """
409 423 Creates one CurlSession per thread on demand.
410 424 """
411 425
412 426 def __init__(self):
413 427 self._thread_local = threading.local()
414 428
415 429 def __call__(self):
416 430 if not hasattr(self._thread_local, 'curl_session'):
417 431 self._thread_local.curl_session = CurlSession()
418 432 return self._thread_local.curl_session
@@ -1,76 +1,76 b''
1 1
2 2
3 3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Internal settings for vcs-lib
23 23 """
24 24
25 # list of default encoding used in safe_unicode/safe_str methods
25 # list of default encoding used in safe_str methods
26 26 DEFAULT_ENCODINGS = ['utf8']
27 27
28 28
29 29 # Compatibility version when creating SVN repositories. None means newest.
30 30 # Other available options are: pre-1.4-compatible, pre-1.5-compatible,
31 31 # pre-1.6-compatible, pre-1.8-compatible
32 32 SVN_COMPATIBLE_VERSION = None
33 33
34 34 ALIASES = ['hg', 'git', 'svn']
35 35
36 36 BACKENDS = {
37 37 'hg': 'rhodecode.lib.vcs.backends.hg.MercurialRepository',
38 38 'git': 'rhodecode.lib.vcs.backends.git.GitRepository',
39 39 'svn': 'rhodecode.lib.vcs.backends.svn.SubversionRepository',
40 40 }
41 41
42 42
43 43 ARCHIVE_SPECS = [
44 44 ('tbz2', 'application/x-bzip2', '.tbz2'),
45 45 ('tbz2', 'application/x-bzip2', '.tar.bz2'),
46 46
47 47 ('tgz', 'application/x-gzip', '.tgz'),
48 48 ('tgz', 'application/x-gzip', '.tar.gz'),
49 49
50 50 ('zip', 'application/zip', '.zip'),
51 51 ]
52 52
53 53 HOOKS_PROTOCOL = None
54 54 HOOKS_DIRECT_CALLS = False
55 55 HOOKS_HOST = '127.0.0.1'
56 56
57 57
58 58 MERGE_MESSAGE_TMPL = (
59 59 u'Merge pull request !{pr_id} from {source_repo} {source_ref_name}\n\n '
60 60 u'{pr_title}')
61 61 MERGE_DRY_RUN_MESSAGE = 'dry_run_merge_message_from_rhodecode'
62 62 MERGE_DRY_RUN_USER = 'Dry-Run User'
63 63 MERGE_DRY_RUN_EMAIL = 'dry-run-merge@rhodecode.com'
64 64
65 65
66 66 def available_aliases():
67 67 """
68 68 Mercurial is required for the system to work, so in case vcs.backends does
69 69 not include it, we make sure it will be available internally
70 70 TODO: anderson: refactor vcs.backends so it won't be necessary, VCS server
71 71 should be responsible to dictate available backends.
72 72 """
73 73 aliases = ALIASES[:]
74 74 if 'hg' not in aliases:
75 75 aliases += ['hg']
76 76 return aliases
@@ -1,233 +1,234 b''
1 1
2 2
3 3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Custom vcs exceptions module.
23 23 """
24 24 import logging
25 25 import functools
26 import urllib.request, urllib.error, urllib.parse
26 import urllib.error
27 import urllib.parse
27 28 import rhodecode
28 29
29 30 log = logging.getLogger(__name__)
30 31
31 32
32 33 class VCSCommunicationError(Exception):
33 34 pass
34 35
35 36
36 37 class HttpVCSCommunicationError(VCSCommunicationError):
37 38 pass
38 39
39 40
40 41 class VCSError(Exception):
41 42 pass
42 43
43 44
44 45 class RepositoryError(VCSError):
45 46 pass
46 47
47 48
48 49 class RepositoryRequirementError(RepositoryError):
49 50 pass
50 51
51 52
52 53 class UnresolvedFilesInRepo(RepositoryError):
53 54 pass
54 55
55 56
56 57 class VCSBackendNotSupportedError(VCSError):
57 58 """
58 59 Exception raised when VCSServer does not support requested backend
59 60 """
60 61
61 62
62 63 class EmptyRepositoryError(RepositoryError):
63 64 pass
64 65
65 66
66 67 class TagAlreadyExistError(RepositoryError):
67 68 pass
68 69
69 70
70 71 class TagDoesNotExistError(RepositoryError):
71 72 pass
72 73
73 74
74 75 class BranchAlreadyExistError(RepositoryError):
75 76 pass
76 77
77 78
78 79 class BranchDoesNotExistError(RepositoryError):
79 80 pass
80 81
81 82
82 83 class CommitError(RepositoryError):
83 84 """
84 85 Exceptions related to an existing commit
85 86 """
86 87
87 88
88 89 class CommitDoesNotExistError(CommitError):
89 90 pass
90 91
91 92
92 93 class CommittingError(RepositoryError):
93 94 """
94 95 Exceptions happening while creating a new commit
95 96 """
96 97
97 98
98 99 class NothingChangedError(CommittingError):
99 100 pass
100 101
101 102
102 103 class NodeError(VCSError):
103 104 pass
104 105
105 106
106 107 class RemovedFileNodeError(NodeError):
107 108 pass
108 109
109 110
110 111 class NodeAlreadyExistsError(CommittingError):
111 112 pass
112 113
113 114
114 115 class NodeAlreadyChangedError(CommittingError):
115 116 pass
116 117
117 118
118 119 class NodeDoesNotExistError(CommittingError):
119 120 pass
120 121
121 122
122 123 class NodeNotChangedError(CommittingError):
123 124 pass
124 125
125 126
126 127 class NodeAlreadyAddedError(CommittingError):
127 128 pass
128 129
129 130
130 131 class NodeAlreadyRemovedError(CommittingError):
131 132 pass
132 133
133 134
134 135 class SubrepoMergeError(RepositoryError):
135 136 """
136 137 This happens if we try to merge a repository which contains subrepos and
137 138 the subrepos cannot be merged. The subrepos are not merged itself but
138 139 their references in the root repo are merged.
139 140 """
140 141
141 142
142 143 class ImproperArchiveTypeError(VCSError):
143 144 pass
144 145
145 146
146 147 class CommandError(VCSError):
147 148 pass
148 149
149 150
150 151 class UnhandledException(VCSError):
151 152 """
152 153 Signals that something unexpected went wrong.
153 154
154 155 This usually means we have a programming error on the side of the VCSServer
155 156 and should inspect the logfile of the VCSServer to find more details.
156 157 """
157 158
158 159
159 160 _EXCEPTION_MAP = {
160 161 'abort': RepositoryError,
161 162 'archive': ImproperArchiveTypeError,
162 163 'error': RepositoryError,
163 164 'lookup': CommitDoesNotExistError,
164 165 'repo_locked': RepositoryError,
165 166 'requirement': RepositoryRequirementError,
166 167 'unhandled': UnhandledException,
167 168 # TODO: johbo: Define our own exception for this and stop abusing
168 169 # urllib's exception class.
169 170 'url_error': urllib.error.URLError,
170 171 'subrepo_merge_error': SubrepoMergeError,
171 172 }
172 173
173 174
174 175 def map_vcs_exceptions(func):
175 176 """
176 177 Utility to decorate functions so that plain exceptions are translated.
177 178
178 179 The translation is based on `exc_map` which maps a `str` indicating
179 180 the error type into an exception class representing this error inside
180 181 of the vcs layer.
181 182 """
182 183
183 184 @functools.wraps(func)
184 185 def wrapper(*args, **kwargs):
185 186 try:
186 187 return func(*args, **kwargs)
187 188 except Exception as e:
188 from rhodecode.lib.utils2 import str2bool
189 debug = str2bool(rhodecode.CONFIG.get('debug'))
189 debug = rhodecode.ConfigGet().get_bool('debug')
190 190
191 191 # The error middleware adds information if it finds
192 192 # __traceback_info__ in a frame object. This way the remote
193 193 # traceback information is made available in error reports.
194
194 195 remote_tb = getattr(e, '_vcs_server_traceback', None)
195 196 org_remote_tb = getattr(e, '_vcs_server_org_exc_tb', '')
196 197 __traceback_info__ = None
197 198 if remote_tb:
198 199 if isinstance(remote_tb, str):
199 200 remote_tb = [remote_tb]
200 201 __traceback_info__ = (
201 202 'Found VCSServer remote traceback information:\n'
202 203 '{}\n'
203 204 '+++ BEG SOURCE EXCEPTION +++\n\n'
204 205 '{}\n'
205 206 '+++ END SOURCE EXCEPTION +++\n'
206 207 ''.format('\n'.join(remote_tb), org_remote_tb)
207 208 )
208 209
209 210 # Avoid that remote_tb also appears in the frame
210 211 del remote_tb
211 212
212 213 # Special vcs errors had an attribute "_vcs_kind" which is used
213 214 # to translate them to the proper exception class in the vcs
214 215 # client layer.
215 216 kind = getattr(e, '_vcs_kind', None)
216 217 exc_name = getattr(e, '_vcs_server_org_exc_name', None)
217 218
218 219 if kind:
219 220 if any(e.args):
220 221 _args = [a for a in e.args]
221 222 # replace the first argument with a prefix exc name
222 223 args = ['{}:{}'.format(exc_name, _args[0] if _args else '?')] + _args[1:]
223 224 else:
224 225 args = [__traceback_info__ or '{}: UnhandledException'.format(exc_name)]
225 226 if debug or __traceback_info__ and kind not in ['unhandled', 'lookup']:
226 227 # for other than unhandled errors also log the traceback
227 228 # can be useful for debugging
228 229 log.error(__traceback_info__)
229 230
230 231 raise _EXCEPTION_MAP[kind](*args)
231 232 else:
232 233 raise
233 234 return wrapper
@@ -1,876 +1,963 b''
1 1
2 2
3 3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Module holding everything related to vcs nodes, with vcs2 architecture.
23 23 """
24
24 import functools
25 25 import os
26 26 import stat
27 27
28 28 from zope.cachedescriptors.property import Lazy as LazyProperty
29 29
30 import rhodecode
31 30 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
32 from rhodecode.lib.utils import safe_unicode, safe_str
33 from rhodecode.lib.utils2 import md5
31 from rhodecode.lib.str_utils import safe_str, safe_bytes
32 from rhodecode.lib.hash_utils import md5
34 33 from rhodecode.lib.vcs import path as vcspath
35 34 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
36 35 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
37 36 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
38 37
39 38 LARGEFILE_PREFIX = '.hglf'
40 39
41 40
42 41 class NodeKind:
43 42 SUBMODULE = -1
44 43 DIR = 1
45 44 FILE = 2
46 45 LARGEFILE = 3
47 46
48 47
49 48 class NodeState:
50 49 ADDED = 'added'
51 50 CHANGED = 'changed'
52 51 NOT_CHANGED = 'not changed'
53 52 REMOVED = 'removed'
54 53
54 #TODO: not sure if that should be bytes or str ?
55 # most probably bytes because content should be bytes and we check it
56 BIN_BYTE_MARKER = b'\0'
57
55 58
56 59 class NodeGeneratorBase(object):
57 60 """
58 61 Base class for removed added and changed filenodes, it's a lazy generator
59 62 class that will create filenodes only on iteration or call
60 63
61 64 The len method doesn't need to create filenodes at all
62 65 """
63 66
64 67 def __init__(self, current_paths, cs):
65 68 self.cs = cs
66 69 self.current_paths = current_paths
67 70
68 71 def __call__(self):
69 72 return [n for n in self]
70 73
71 def __getslice__(self, i, j):
72 for p in self.current_paths[i:j]:
74 def __getitem__(self, key):
75 if isinstance(key, slice):
76 for p in self.current_paths[key.start:key.stop]:
73 77 yield self.cs.get_node(p)
74 78
75 79 def __len__(self):
76 80 return len(self.current_paths)
77 81
78 82 def __iter__(self):
79 83 for p in self.current_paths:
80 84 yield self.cs.get_node(p)
81 85
82 86
83 87 class AddedFileNodesGenerator(NodeGeneratorBase):
84 88 """
85 89 Class holding added files for current commit
86 90 """
87 91
88 92
89 93 class ChangedFileNodesGenerator(NodeGeneratorBase):
90 94 """
91 95 Class holding changed files for current commit
92 96 """
93 97
94 98
95 99 class RemovedFileNodesGenerator(NodeGeneratorBase):
96 100 """
97 101 Class holding removed files for current commit
98 102 """
99 103 def __iter__(self):
100 104 for p in self.current_paths:
101 yield RemovedFileNode(path=p)
105 yield RemovedFileNode(path=safe_bytes(p))
102 106
103 def __getslice__(self, i, j):
104 for p in self.current_paths[i:j]:
105 yield RemovedFileNode(path=p)
107 def __getitem__(self, key):
108 if isinstance(key, slice):
109 for p in self.current_paths[key.start:key.stop]:
110 yield RemovedFileNode(path=safe_bytes(p))
106 111
107 112
113 @functools.total_ordering
108 114 class Node(object):
109 115 """
110 116 Simplest class representing file or directory on repository. SCM backends
111 117 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
112 118 directly.
113 119
114 120 Node's ``path`` cannot start with slash as we operate on *relative* paths
115 121 only. Moreover, every single node is identified by the ``path`` attribute,
116 122 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
117 123 """
118 RTLO_MARKER = "\u202E" # RTLO marker allows swapping text, and certain
124 # RTLO marker allows swapping text, and certain
119 125 # security attacks could be used with this
126 RTLO_MARKER = "\u202E"
127
120 128 commit = None
121 129
122 def __init__(self, path, kind):
130 def __init__(self, path: bytes, kind):
123 131 self._validate_path(path) # can throw exception if path is invalid
124 self.path = safe_str(path.rstrip('/')) # we store paths as str
125 if path == '' and kind != NodeKind.DIR:
132
133 self.bytes_path = path.rstrip(b'/') # store for __repr__
134 self.path = safe_str(self.bytes_path) # we store paths as str
135
136 if self.bytes_path == b'' and kind != NodeKind.DIR:
126 137 raise NodeError("Only DirNode and its subclasses may be "
127 138 "initialized with empty path")
128 139 self.kind = kind
129 140
130 141 if self.is_root() and not self.is_dir():
131 142 raise NodeError("Root node cannot be FILE kind")
132 143
133 def _validate_path(self, path):
134 if path.startswith('/'):
144 def __eq__(self, other):
145 if type(self) is not type(other):
146 return False
147 for attr in ['name', 'path', 'kind']:
148 if getattr(self, attr) != getattr(other, attr):
149 return False
150 if self.is_file():
151 # FileNode compare, we need to fallback to content compare
152 return None
153 else:
154 # For DirNode's check without entering each dir
155 self_nodes_paths = list(sorted(n.path for n in self.nodes))
156 other_nodes_paths = list(sorted(n.path for n in self.nodes))
157 if self_nodes_paths != other_nodes_paths:
158 return False
159 return True
160
161 def __lt__(self, other):
162 if self.kind < other.kind:
163 return True
164 if self.kind > other.kind:
165 return False
166 if self.path < other.path:
167 return True
168 if self.path > other.path:
169 return False
170
171 # def __cmp__(self, other):
172 # """
173 # Comparator using name of the node, needed for quick list sorting.
174 # """
175 #
176 # kind_cmp = cmp(self.kind, other.kind)
177 # if kind_cmp:
178 # if isinstance(self, SubModuleNode):
179 # # we make submodules equal to dirnode for "sorting" purposes
180 # return NodeKind.DIR
181 # return kind_cmp
182 # return cmp(self.name, other.name)
183
184 def __repr__(self):
185 maybe_path = getattr(self, 'path', 'UNKNOWN_PATH')
186 return f'<{self.__class__.__name__} {maybe_path!r}>'
187
188 def __str__(self):
189 return self.name
190
191 def _validate_path(self, path: bytes):
192 self._assert_bytes(path)
193
194 if path.startswith(b'/'):
135 195 raise NodeError(
136 "Cannot initialize Node objects with slash at "
137 "the beginning as only relative paths are supported. "
138 "Got %s" % (path,))
196 f"Cannot initialize Node objects with slash at "
197 f"the beginning as only relative paths are supported. "
198 f"Got {path}")
199
200 def _assert_bytes(self, value):
201 if not isinstance(value, bytes):
202 raise TypeError(f"Bytes required as input, got {type(value)} of {value}.")
139 203
140 204 @LazyProperty
141 205 def parent(self):
142 206 parent_path = self.get_parent_path()
143 207 if parent_path:
144 208 if self.commit:
145 209 return self.commit.get_node(parent_path)
146 210 return DirNode(parent_path)
147 211 return None
148 212
149 213 @LazyProperty
150 def unicode_path(self):
151 return safe_unicode(self.path)
214 def str_path(self) -> str:
215 return safe_str(self.path)
152 216
153 217 @LazyProperty
154 218 def has_rtlo(self):
155 219 """Detects if a path has right-to-left-override marker"""
156 return self.RTLO_MARKER in self.unicode_path
157
158 @LazyProperty
159 def unicode_path_safe(self):
160 """
161 Special SAFE representation of path without the right-to-left-override.
162 This should be only used for "showing" the file, cannot be used for any
163 urls etc.
164 """
165 return safe_unicode(self.path).replace(self.RTLO_MARKER, '')
220 return self.RTLO_MARKER in self.str_path
166 221
167 222 @LazyProperty
168 223 def dir_path(self):
169 224 """
170 225 Returns name of the directory from full path of this vcs node. Empty
171 226 string is returned if there's no directory in the path
172 227 """
173 228 _parts = self.path.rstrip('/').rsplit('/', 1)
174 229 if len(_parts) == 2:
175 return safe_unicode(_parts[0])
230 return _parts[0]
176 231 return ''
177 232
178 233 @LazyProperty
179 234 def name(self):
180 235 """
181 236 Returns name of the node so if its path
182 237 then only last part is returned.
183 238 """
184 return safe_unicode(self.path.rstrip('/').split('/')[-1])
239 return self.path.rstrip('/').split('/')[-1]
185 240
186 241 @property
187 242 def kind(self):
188 243 return self._kind
189 244
190 245 @kind.setter
191 246 def kind(self, kind):
192 247 if hasattr(self, '_kind'):
193 248 raise NodeError("Cannot change node's kind")
194 249 else:
195 250 self._kind = kind
196 251 # Post setter check (path's trailing slash)
197 252 if self.path.endswith('/'):
198 253 raise NodeError("Node's path cannot end with slash")
199 254
200 def __cmp__(self, other):
201 """
202 Comparator using name of the node, needed for quick list sorting.
203 """
204
205 kind_cmp = cmp(self.kind, other.kind)
206 if kind_cmp:
207 if isinstance(self, SubModuleNode):
208 # we make submodules equal to dirnode for "sorting" purposes
209 return NodeKind.DIR
210 return kind_cmp
211 return cmp(self.name, other.name)
212
213 def __eq__(self, other):
214 for attr in ['name', 'path', 'kind']:
215 if getattr(self, attr) != getattr(other, attr):
216 return False
217 if self.is_file():
218 if self.content != other.content:
219 return False
220 else:
221 # For DirNode's check without entering each dir
222 self_nodes_paths = list(sorted(n.path for n in self.nodes))
223 other_nodes_paths = list(sorted(n.path for n in self.nodes))
224 if self_nodes_paths != other_nodes_paths:
225 return False
226 return True
227
228 def __ne__(self, other):
229 return not self.__eq__(other)
230
231 def __repr__(self):
232 return '<%s %r>' % (self.__class__.__name__, self.path)
233
234 def __str__(self):
235 return self.__repr__()
236
237 def __unicode__(self):
238 return self.name
239
240 def get_parent_path(self):
255 def get_parent_path(self) -> bytes:
241 256 """
242 257 Returns node's parent path or empty string if node is root.
243 258 """
244 259 if self.is_root():
245 return ''
246 return vcspath.dirname(self.path.rstrip('/')) + '/'
260 return b''
261 str_path = vcspath.dirname(self.path.rstrip('/')) + '/'
262
263 return safe_bytes(str_path)
247 264
248 265 def is_file(self):
249 266 """
250 267 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
251 268 otherwise.
252 269 """
253 270 return self.kind == NodeKind.FILE
254 271
255 272 def is_dir(self):
256 273 """
257 274 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
258 275 otherwise.
259 276 """
260 277 return self.kind == NodeKind.DIR
261 278
262 279 def is_root(self):
263 280 """
264 281 Returns ``True`` if node is a root node and ``False`` otherwise.
265 282 """
266 283 return self.kind == NodeKind.DIR and self.path == ''
267 284
268 285 def is_submodule(self):
269 286 """
270 287 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
271 288 otherwise.
272 289 """
273 290 return self.kind == NodeKind.SUBMODULE
274 291
275 292 def is_largefile(self):
276 293 """
277 294 Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False``
278 295 otherwise
279 296 """
280 297 return self.kind == NodeKind.LARGEFILE
281 298
282 299 def is_link(self):
283 300 if self.commit:
284 301 return self.commit.is_link(self.path)
285 302 return False
286 303
287 304 @LazyProperty
288 305 def added(self):
289 306 return self.state is NodeState.ADDED
290 307
291 308 @LazyProperty
292 309 def changed(self):
293 310 return self.state is NodeState.CHANGED
294 311
295 312 @LazyProperty
296 313 def not_changed(self):
297 314 return self.state is NodeState.NOT_CHANGED
298 315
299 316 @LazyProperty
300 317 def removed(self):
301 318 return self.state is NodeState.REMOVED
302 319
303 320
304 321 class FileNode(Node):
305 322 """
306 323 Class representing file nodes.
307 324
308 325 :attribute: path: path to the node, relative to repository's root
309 326 :attribute: content: if given arbitrary sets content of the file
310 327 :attribute: commit: if given, first time content is accessed, callback
311 328 :attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`.
312 329 """
313 330 _filter_pre_load = []
314 331
315 def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
332 def __init__(self, path: bytes, content: bytes | None = None, commit=None, mode=None, pre_load=None):
316 333 """
317 334 Only one of ``content`` and ``commit`` may be given. Passing both
318 335 would raise ``NodeError`` exception.
319 336
320 337 :param path: relative path to the node
321 338 :param content: content may be passed to constructor
322 339 :param commit: if given, will use it to lazily fetch content
323 340 :param mode: ST_MODE (i.e. 0100644)
324 341 """
325 342 if content and commit:
326 343 raise NodeError("Cannot use both content and commit")
327 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
344
345 super().__init__(path, kind=NodeKind.FILE)
346
328 347 self.commit = commit
348 if content and not isinstance(content, bytes):
349 # File content is one thing that inherently must be bytes
350 # we support passing str too, and convert the content
351 content = safe_bytes(content)
329 352 self._content = content
330 353 self._mode = mode or FILEMODE_DEFAULT
331 354
332 355 self._set_bulk_properties(pre_load)
333 356
357 def __eq__(self, other):
358 eq = super(FileNode, self).__eq__(other)
359 if eq is not None:
360 return eq
361 return self.content == other.content
362
363 def __hash__(self):
364 raw_id = getattr(self.commit, 'raw_id', '')
365 return hash((self.path, raw_id))
366
367 def __lt__(self, other):
368 lt = super(FileNode, self).__lt__(other)
369 if lt is not None:
370 return lt
371 return self.content < other.content
372
373 def __repr__(self):
374 short_id = getattr(self.commit, 'short_id', '')
375 return f'<{self.__class__.__name__} path={self.path!r}, short_id={short_id}>'
376
334 377 def _set_bulk_properties(self, pre_load):
335 378 if not pre_load:
336 379 return
337 380 pre_load = [entry for entry in pre_load
338 381 if entry not in self._filter_pre_load]
339 382 if not pre_load:
340 383 return
341 384
342 for attr_name in pre_load:
343 result = getattr(self, attr_name)
344 if callable(result):
345 result = result()
346 self.__dict__[attr_name] = result
385 remote = self.commit.get_remote()
386 result = remote.bulk_file_request(self.commit.raw_id, self.path, pre_load)
387
388 for attr, value in result.items():
389 if attr == "flags":
390 self.__dict__['mode'] = safe_str(value)
391 elif attr == "size":
392 self.__dict__['size'] = value
393 elif attr == "data":
394 self.__dict__['_content'] = value
395 elif attr == "is_binary":
396 self.__dict__['is_binary'] = value
397 elif attr == "md5":
398 self.__dict__['md5'] = value
399 else:
400 raise ValueError(f'Unsupported attr in bulk_property: {attr}')
347 401
348 402 @LazyProperty
349 403 def mode(self):
350 404 """
351 405 Returns lazily mode of the FileNode. If `commit` is not set, would
352 406 use value given at initialization or `FILEMODE_DEFAULT` (default).
353 407 """
354 408 if self.commit:
355 409 mode = self.commit.get_file_mode(self.path)
356 410 else:
357 411 mode = self._mode
358 412 return mode
359 413
360 414 @LazyProperty
361 def raw_bytes(self):
415 def raw_bytes(self) -> bytes:
362 416 """
363 417 Returns lazily the raw bytes of the FileNode.
364 418 """
365 419 if self.commit:
366 420 if self._content is None:
367 421 self._content = self.commit.get_file_content(self.path)
368 422 content = self._content
369 423 else:
370 424 content = self._content
371 425 return content
372 426
427 def content_uncached(self):
428 """
429 Returns lazily content of the FileNode.
430 """
431 if self.commit:
432 content = self.commit.get_file_content(self.path)
433 else:
434 content = self._content
435 return content
436
373 437 def stream_bytes(self):
374 438 """
375 439 Returns an iterator that will stream the content of the file directly from
376 440 vcsserver without loading it to memory.
377 441 """
378 442 if self.commit:
379 443 return self.commit.get_file_content_streamed(self.path)
380 444 raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
381 445
382 @LazyProperty
383 def md5(self):
384 """
385 Returns md5 of the file node.
386 """
387 return md5(self.raw_bytes)
388
389 446 def metadata_uncached(self):
390 447 """
391 448 Returns md5, binary flag of the file node, without any cache usage.
392 449 """
393 450
394 451 content = self.content_uncached()
395 452
396 is_binary = content and '\0' in content
453 is_binary = bool(content and BIN_BYTE_MARKER in content)
397 454 size = 0
398 455 if content:
399 456 size = len(content)
400 457
401 458 return is_binary, md5(content), size, content
402 459
403 def content_uncached(self):
404 """
405 Returns lazily content of the FileNode. If possible, would try to
406 decode content from UTF-8.
460 @LazyProperty
461 def content(self) -> bytes:
407 462 """
408 if self.commit:
409 content = self.commit.get_file_content(self.path)
410 else:
411 content = self._content
463 Returns lazily content of the FileNode.
464 """
465 content = self.raw_bytes
466 if content and not isinstance(content, bytes):
467 raise ValueError(f'Content is of type {type(content)} instead of bytes')
412 468 return content
413 469
414 470 @LazyProperty
415 def content(self):
416 """
417 Returns lazily content of the FileNode. If possible, would try to
418 decode content from UTF-8.
419 """
420 content = self.raw_bytes
421
422 if self.is_binary:
423 return content
424 return safe_unicode(content)
471 def str_content(self) -> str:
472 return safe_str(self.raw_bytes)
425 473
426 474 @LazyProperty
427 475 def size(self):
428 476 if self.commit:
429 477 return self.commit.get_file_size(self.path)
430 478 raise NodeError(
431 479 "Cannot retrieve size of the file without related "
432 480 "commit attribute")
433 481
434 482 @LazyProperty
435 483 def message(self):
436 484 if self.commit:
437 485 return self.last_commit.message
438 486 raise NodeError(
439 487 "Cannot retrieve message of the file without related "
440 488 "commit attribute")
441 489
442 490 @LazyProperty
443 491 def last_commit(self):
444 492 if self.commit:
445 493 pre_load = ["author", "date", "message", "parents"]
446 494 return self.commit.get_path_commit(self.path, pre_load=pre_load)
447 495 raise NodeError(
448 496 "Cannot retrieve last commit of the file without "
449 497 "related commit attribute")
450 498
451 499 def get_mimetype(self):
452 500 """
453 501 Mimetype is calculated based on the file's content. If ``_mimetype``
454 502 attribute is available, it will be returned (backends which store
455 503 mimetypes or can easily recognize them, should set this private
456 504 attribute to indicate that type should *NOT* be calculated).
457 505 """
458 506
459 507 if hasattr(self, '_mimetype'):
460 if (isinstance(self._mimetype, (tuple, list,)) and
508 if (isinstance(self._mimetype, (tuple, list)) and
461 509 len(self._mimetype) == 2):
462 510 return self._mimetype
463 511 else:
464 512 raise NodeError('given _mimetype attribute must be an 2 '
465 513 'element list or tuple')
466 514
467 515 db = get_mimetypes_db()
468 516 mtype, encoding = db.guess_type(self.name)
469 517
470 518 if mtype is None:
471 519 if not self.is_largefile() and self.is_binary:
472 520 mtype = 'application/octet-stream'
473 521 encoding = None
474 522 else:
475 523 mtype = 'text/plain'
476 524 encoding = None
477 525
478 526 # try with pygments
479 527 try:
480 528 from pygments.lexers import get_lexer_for_filename
481 529 mt = get_lexer_for_filename(self.name).mimetypes
482 530 except Exception:
483 531 mt = None
484 532
485 533 if mt:
486 534 mtype = mt[0]
487 535
488 536 return mtype, encoding
489 537
490 538 @LazyProperty
491 539 def mimetype(self):
492 540 """
493 541 Wrapper around full mimetype info. It returns only type of fetched
494 542 mimetype without the encoding part. use get_mimetype function to fetch
495 543 full set of (type,encoding)
496 544 """
497 545 return self.get_mimetype()[0]
498 546
499 547 @LazyProperty
500 548 def mimetype_main(self):
501 549 return self.mimetype.split('/')[0]
502 550
503 551 @classmethod
504 552 def get_lexer(cls, filename, content=None):
505 553 from pygments import lexers
506 554
507 555 extension = filename.split('.')[-1]
508 556 lexer = None
509 557
510 558 try:
511 559 lexer = lexers.guess_lexer_for_filename(
512 560 filename, content, stripnl=False)
513 561 except lexers.ClassNotFound:
514 lexer = None
562 pass
515 563
516 564 # try our EXTENSION_MAP
517 565 if not lexer:
518 566 try:
519 567 lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension)
520 568 if lexer_class:
521 569 lexer = lexers.get_lexer_by_name(lexer_class[0])
522 570 except lexers.ClassNotFound:
523 lexer = None
571 pass
524 572
525 573 if not lexer:
526 574 lexer = lexers.TextLexer(stripnl=False)
527 575
528 576 return lexer
529 577
530 578 @LazyProperty
531 579 def lexer(self):
532 580 """
533 581 Returns pygment's lexer class. Would try to guess lexer taking file's
534 582 content, name and mimetype.
535 583 """
536 return self.get_lexer(self.name, self.content)
584 # TODO: this is more proper, but super heavy on investigating the type based on the content
585 #self.get_lexer(self.name, self.content)
586
587 return self.get_lexer(self.name)
537 588
538 589 @LazyProperty
539 590 def lexer_alias(self):
540 591 """
541 592 Returns first alias of the lexer guessed for this file.
542 593 """
543 594 return self.lexer.aliases[0]
544 595
545 596 @LazyProperty
546 597 def history(self):
547 598 """
548 599 Returns a list of commit for this file in which the file was changed
549 600 """
550 601 if self.commit is None:
551 602 raise NodeError('Unable to get commit for this FileNode')
552 603 return self.commit.get_path_history(self.path)
553 604
554 605 @LazyProperty
555 606 def annotate(self):
556 607 """
557 608 Returns a list of three element tuples with lineno, commit and line
558 609 """
559 610 if self.commit is None:
560 611 raise NodeError('Unable to get commit for this FileNode')
561 612 pre_load = ["author", "date", "message", "parents"]
562 613 return self.commit.get_file_annotate(self.path, pre_load=pre_load)
563 614
564 615 @LazyProperty
565 616 def state(self):
566 617 if not self.commit:
567 618 raise NodeError(
568 619 "Cannot check state of the node if it's not "
569 620 "linked with commit")
570 621 elif self.path in (node.path for node in self.commit.added):
571 622 return NodeState.ADDED
572 623 elif self.path in (node.path for node in self.commit.changed):
573 624 return NodeState.CHANGED
574 625 else:
575 626 return NodeState.NOT_CHANGED
576 627
577 628 @LazyProperty
578 629 def is_binary(self):
579 630 """
580 631 Returns True if file has binary content.
581 632 """
582 633 if self.commit:
583 634 return self.commit.is_node_binary(self.path)
584 635 else:
585 636 raw_bytes = self._content
586 return raw_bytes and '\0' in raw_bytes
637 return bool(raw_bytes and BIN_BYTE_MARKER in raw_bytes)
638
639 @LazyProperty
640 def md5(self):
641 """
642 Returns md5 of the file node.
643 """
644
645 if self.commit:
646 return self.commit.node_md5_hash(self.path)
647 else:
648 raw_bytes = self._content
649 # TODO: this sucks, we're computing md5 on potentially super big stream data...
650 return md5(raw_bytes)
587 651
588 652 @LazyProperty
589 653 def extension(self):
590 654 """Returns filenode extension"""
591 655 return self.name.split('.')[-1]
592 656
593 657 @property
594 658 def is_executable(self):
595 659 """
596 660 Returns ``True`` if file has executable flag turned on.
597 661 """
598 662 return bool(self.mode & stat.S_IXUSR)
599 663
600 664 def get_largefile_node(self):
601 665 """
602 666 Try to return a Mercurial FileNode from this node. It does internal
603 667 checks inside largefile store, if that file exist there it will
604 668 create special instance of LargeFileNode which can get content from
605 669 LF store.
606 670 """
607 671 if self.commit:
608 672 return self.commit.get_largefile_node(self.path)
609 673
610 def count_lines(self, content, count_empty=False):
674 def count_lines(self, content: str | bytes, count_empty=False):
675 if isinstance(content, str):
676 newline_marker = '\n'
677 elif isinstance(content, bytes):
678 newline_marker = b'\n'
679 else:
680 raise ValueError('content must be bytes or str got {type(content)} instead')
611 681
612 682 if count_empty:
613 683 all_lines = 0
614 684 empty_lines = 0
615 685 for line in content.splitlines(True):
616 if line == '\n':
686 if line == newline_marker:
617 687 empty_lines += 1
618 688 all_lines += 1
619 689
620 690 return all_lines, all_lines - empty_lines
621 691 else:
622 692 # fast method
623 empty_lines = all_lines = content.count('\n')
693 empty_lines = all_lines = content.count(newline_marker)
624 694 if all_lines == 0 and content:
625 695 # one-line without a newline
626 696 empty_lines = all_lines = 1
627 697
628 698 return all_lines, empty_lines
629 699
630 700 def lines(self, count_empty=False):
631 701 all_lines, empty_lines = 0, 0
632 702
633 703 if not self.is_binary:
634 704 content = self.content
635 705 all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
636 706 return all_lines, empty_lines
637 707
638 def __repr__(self):
639 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
640 getattr(self.commit, 'short_id', ''))
641
642 708
643 709 class RemovedFileNode(FileNode):
644 710 """
645 711 Dummy FileNode class - trying to access any public attribute except path,
646 712 name, kind or state (or methods/attributes checking those two) would raise
647 713 RemovedFileNodeError.
648 714 """
649 715 ALLOWED_ATTRIBUTES = [
650 716 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
651 'added', 'changed', 'not_changed', 'removed'
717 'added', 'changed', 'not_changed', 'removed', 'bytes_path'
652 718 ]
653 719
654 720 def __init__(self, path):
655 721 """
656 722 :param path: relative path to the node
657 723 """
658 super(RemovedFileNode, self).__init__(path=path)
724 super().__init__(path=path)
659 725
660 726 def __getattribute__(self, attr):
661 727 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
662 return super(RemovedFileNode, self).__getattribute__(attr)
663 raise RemovedFileNodeError(
664 "Cannot access attribute %s on RemovedFileNode" % attr)
728 return super().__getattribute__(attr)
729 raise RemovedFileNodeError(f"Cannot access attribute {attr} on RemovedFileNode. Not in allowed attributes")
665 730
666 731 @LazyProperty
667 732 def state(self):
668 733 return NodeState.REMOVED
669 734
670 735
671 736 class DirNode(Node):
672 737 """
673 738 DirNode stores list of files and directories within this node.
674 739 Nodes may be used standalone but within repository context they
675 740 lazily fetch data within same repository's commit.
676 741 """
677 742
678 def __init__(self, path, nodes=(), commit=None):
743 def __init__(self, path, nodes=(), commit=None, default_pre_load=None):
679 744 """
680 745 Only one of ``nodes`` and ``commit`` may be given. Passing both
681 746 would raise ``NodeError`` exception.
682 747
683 748 :param path: relative path to the node
684 749 :param nodes: content may be passed to constructor
685 750 :param commit: if given, will use it to lazily fetch content
686 751 """
687 752 if nodes and commit:
688 753 raise NodeError("Cannot use both nodes and commit")
689 754 super(DirNode, self).__init__(path, NodeKind.DIR)
690 755 self.commit = commit
691 756 self._nodes = nodes
757 self.default_pre_load = default_pre_load or ['is_binary', 'size']
758
759 def __iter__(self):
760 for node in self.nodes:
761 yield node
762
763 def __eq__(self, other):
764 eq = super(DirNode, self).__eq__(other)
765 if eq is not None:
766 return eq
767 # check without entering each dir
768 self_nodes_paths = list(sorted(n.path for n in self.nodes))
769 other_nodes_paths = list(sorted(n.path for n in self.nodes))
770 return self_nodes_paths == other_nodes_paths
771
772 def __lt__(self, other):
773 lt = super(DirNode, self).__lt__(other)
774 if lt is not None:
775 return lt
776 # check without entering each dir
777 self_nodes_paths = list(sorted(n.path for n in self.nodes))
778 other_nodes_paths = list(sorted(n.path for n in self.nodes))
779 return self_nodes_paths < other_nodes_paths
692 780
693 781 @LazyProperty
694 782 def content(self):
695 raise NodeError(
696 "%s represents a dir and has no `content` attribute" % self)
783 raise NodeError(f"{self} represents a dir and has no `content` attribute")
697 784
698 785 @LazyProperty
699 786 def nodes(self):
700 787 if self.commit:
701 nodes = self.commit.get_nodes(self.path)
788 nodes = self.commit.get_nodes(self.path, pre_load=self.default_pre_load)
702 789 else:
703 790 nodes = self._nodes
704 791 self._nodes_dict = dict((node.path, node) for node in nodes)
705 792 return sorted(nodes)
706 793
707 794 @LazyProperty
708 795 def files(self):
709 796 return sorted((node for node in self.nodes if node.is_file()))
710 797
711 798 @LazyProperty
712 799 def dirs(self):
713 800 return sorted((node for node in self.nodes if node.is_dir()))
714 801
715 def __iter__(self):
716 for node in self.nodes:
717 yield node
718
719 802 def get_node(self, path):
720 803 """
721 804 Returns node from within this particular ``DirNode``, so it is now
722 805 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
723 806 'docs'. In order to access deeper nodes one must fetch nodes between
724 807 them first - this would work::
725 808
726 809 docs = root.get_node('docs')
727 810 docs.get_node('api').get_node('index.rst')
728 811
729 812 :param: path - relative to the current node
730 813
731 814 .. note::
732 815 To access lazily (as in example above) node have to be initialized
733 816 with related commit object - without it node is out of
734 817 context and may know nothing about anything else than nearest
735 818 (located at same level) nodes.
736 819 """
737 820 try:
738 821 path = path.rstrip('/')
739 822 if path == '':
740 823 raise NodeError("Cannot retrieve node without path")
741 824 self.nodes # access nodes first in order to set _nodes_dict
742 825 paths = path.split('/')
743 826 if len(paths) == 1:
744 827 if not self.is_root():
745 828 path = '/'.join((self.path, paths[0]))
746 829 else:
747 830 path = paths[0]
748 831 return self._nodes_dict[path]
749 832 elif len(paths) > 1:
750 833 if self.commit is None:
751 834 raise NodeError("Cannot access deeper nodes without commit")
752 835 else:
753 836 path1, path2 = paths[0], '/'.join(paths[1:])
754 837 return self.get_node(path1).get_node(path2)
755 838 else:
756 839 raise KeyError
757 840 except KeyError:
758 raise NodeError("Node does not exist at %s" % path)
841 raise NodeError(f"Node does not exist at {path}")
759 842
760 843 @LazyProperty
761 844 def state(self):
762 845 raise NodeError("Cannot access state of DirNode")
763 846
764 847 @LazyProperty
765 848 def size(self):
766 849 size = 0
767 850 for root, dirs, files in self.commit.walk(self.path):
768 851 for f in files:
769 852 size += f.size
770 853
771 854 return size
772 855
773 856 @LazyProperty
774 857 def last_commit(self):
775 858 if self.commit:
776 859 pre_load = ["author", "date", "message", "parents"]
777 860 return self.commit.get_path_commit(self.path, pre_load=pre_load)
778 861 raise NodeError(
779 862 "Cannot retrieve last commit of the file without "
780 863 "related commit attribute")
781 864
782 865 def __repr__(self):
783 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
784 getattr(self.commit, 'short_id', ''))
866 short_id = getattr(self.commit, 'short_id', '')
867 return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
785 868
786 869
787 870 class RootNode(DirNode):
788 871 """
789 872 DirNode being the root node of the repository.
790 873 """
791 874
792 875 def __init__(self, nodes=(), commit=None):
793 super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
876 super(RootNode, self).__init__(path=b'', nodes=nodes, commit=commit)
794 877
795 878 def __repr__(self):
796 return '<%s>' % self.__class__.__name__
879 return f'<{self.__class__.__name__}>'
797 880
798 881
799 882 class SubModuleNode(Node):
800 883 """
801 884 represents a SubModule of Git or SubRepo of Mercurial
802 885 """
803 886 is_binary = False
804 887 size = 0
805 888
806 889 def __init__(self, name, url=None, commit=None, alias=None):
807 890 self.path = name
808 891 self.kind = NodeKind.SUBMODULE
809 892 self.alias = alias
810 893
811 894 # we have to use EmptyCommit here since this can point to svn/git/hg
812 895 # submodules we cannot get from repository
813 896 self.commit = EmptyCommit(str(commit), alias=alias)
814 897 self.url = url or self._extract_submodule_url()
815 898
816 899 def __repr__(self):
817 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
818 getattr(self.commit, 'short_id', ''))
900 short_id = getattr(self.commit, 'short_id', '')
901 return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
819 902
820 903 def _extract_submodule_url(self):
821 904 # TODO: find a way to parse gits submodule file and extract the
822 905 # linking URL
823 906 return self.path
824 907
825 908 @LazyProperty
826 909 def name(self):
827 910 """
828 911 Returns name of the node so if its path
829 912 then only last part is returned.
830 913 """
831 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
832 return '%s @ %s' % (org, self.commit.short_id)
914 org = safe_str(self.path.rstrip('/').split('/')[-1])
915 return f'{org} @ {self.commit.short_id}'
833 916
834 917
835 918 class LargeFileNode(FileNode):
836 919
837 920 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
838 self.path = path
839 self.org_path = org_path
921 self._validate_path(path) # can throw exception if path is invalid
922 self.org_path = org_path # as stored in VCS as LF pointer
923
924 self.bytes_path = path.rstrip(b'/') # store for __repr__
925 self.path = safe_str(self.bytes_path) # we store paths as str
926
840 927 self.kind = NodeKind.LARGEFILE
841 928 self.alias = alias
842 self._content = ''
929 self._content = b''
843 930
844 def _validate_path(self, path):
931 def _validate_path(self, path: bytes):
845 932 """
846 we override check since the LargeFileNode path is system absolute
933 we override check since the LargeFileNode path is system absolute, but we check for bytes only
847 934 """
848 pass
935 self._assert_bytes(path)
849 936
850 937 def __repr__(self):
851 return '<%s %r>' % (self.__class__.__name__, self.path)
938 return f'<{self.__class__.__name__} {self.org_path} -> {self.path!r}>'
852 939
853 940 @LazyProperty
854 941 def size(self):
855 942 return os.stat(self.path).st_size
856 943
857 944 @LazyProperty
858 945 def raw_bytes(self):
859 946 with open(self.path, 'rb') as f:
860 947 content = f.read()
861 948 return content
862 949
863 950 @LazyProperty
864 951 def name(self):
865 952 """
866 953 Overwrites name to be the org lf path
867 954 """
868 955 return self.org_path
869 956
870 957 def stream_bytes(self):
871 958 with open(self.path, 'rb') as stream:
872 959 while True:
873 960 data = stream.read(16 * 1024)
874 961 if not data:
875 962 break
876 963 yield data
@@ -1,161 +1,161 b''
1 1
2 2
3 3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Utilities aimed to help achieve mostly basic tasks.
23 23 """
24 24
25 25
26 26
27 27
28 28 import re
29 29 import os
30 30 import time
31 31 import datetime
32 32 import logging
33 33
34 34 from rhodecode.lib.vcs.conf import settings
35 35 from rhodecode.lib.vcs.exceptions import VCSError, VCSBackendNotSupportedError
36 36
37 37
38 38 log = logging.getLogger(__name__)
39 39
40 40
41 41 def get_scm(path):
42 42 """
43 43 Returns one of alias from ``ALIASES`` (in order of precedence same as
44 44 shortcuts given in ``ALIASES``) and working dir path for the given
45 45 argument. If no scm-specific directory is found or more than one scm is
46 46 found at that directory, ``VCSError`` is raised.
47 47 """
48 48 if not os.path.isdir(path):
49 49 raise VCSError("Given path %s is not a directory" % path)
50 50
51 51 found_scms = [(scm, path) for scm in get_scms_for_path(path)]
52 52
53 53 if len(found_scms) > 1:
54 54 found = ', '.join((x[0] for x in found_scms))
55 55 raise VCSError(
56 56 'More than one [%s] scm found at given path %s' % (found, path))
57 57
58 if len(found_scms) is 0:
58 if len(found_scms) == 0:
59 59 raise VCSError('No scm found at given path %s' % path)
60 60
61 61 return found_scms[0]
62 62
63 63
64 64 def get_scm_backend(backend_type):
65 65 from rhodecode.lib.vcs.backends import get_backend
66 66 return get_backend(backend_type)
67 67
68 68
69 69 def get_scms_for_path(path):
70 70 """
71 71 Returns all scm's found at the given path. If no scm is recognized
72 72 - empty list is returned.
73 73
74 74 :param path: path to directory which should be checked. May be callable.
75 75
76 76 :raises VCSError: if given ``path`` is not a directory
77 77 """
78 78 from rhodecode.lib.vcs.backends import get_backend
79 79 if hasattr(path, '__call__'):
80 80 path = path()
81 81 if not os.path.isdir(path):
82 82 raise VCSError("Given path %r is not a directory" % path)
83 83
84 84 result = []
85 85 for key in settings.available_aliases():
86 86 try:
87 87 backend = get_backend(key)
88 88 except VCSBackendNotSupportedError:
89 89 log.warning('VCSBackendNotSupportedError: %s not supported', key)
90 90 continue
91 91 if backend.is_valid_repository(path):
92 92 result.append(key)
93 93 return result
94 94
95 95
96 96 def parse_datetime(text):
97 97 """
98 98 Parses given text and returns ``datetime.datetime`` instance or raises
99 99 ``ValueError``.
100 100
101 101 :param text: string of desired date/datetime or something more verbose,
102 102 like *yesterday*, *2weeks 3days*, etc.
103 103 """
104 104 if not text:
105 105 raise ValueError('Wrong date: "%s"' % text)
106 106
107 107 if isinstance(text, datetime.datetime):
108 108 return text
109 109
110 110 # we limit a format to no include microseconds e.g 2017-10-17t17:48:23.XXXX
111 111 text = text.strip().lower()[:19]
112 112
113 113 input_formats = (
114 114 '%Y-%m-%d %H:%M:%S',
115 115 '%Y-%m-%dt%H:%M:%S',
116 116 '%Y-%m-%d %H:%M',
117 117 '%Y-%m-%dt%H:%M',
118 118 '%Y-%m-%d',
119 119 '%m/%d/%Y %H:%M:%S',
120 120 '%m/%d/%Yt%H:%M:%S',
121 121 '%m/%d/%Y %H:%M',
122 122 '%m/%d/%Yt%H:%M',
123 123 '%m/%d/%Y',
124 124 '%m/%d/%y %H:%M:%S',
125 125 '%m/%d/%yt%H:%M:%S',
126 126 '%m/%d/%y %H:%M',
127 127 '%m/%d/%yt%H:%M',
128 128 '%m/%d/%y',
129 129 )
130 130 for format_def in input_formats:
131 131 try:
132 132 return datetime.datetime(*time.strptime(text, format_def)[:6])
133 133 except ValueError:
134 134 pass
135 135
136 136 # Try descriptive texts
137 137 if text == 'tomorrow':
138 138 future = datetime.datetime.now() + datetime.timedelta(days=1)
139 139 args = future.timetuple()[:3] + (23, 59, 59)
140 140 return datetime.datetime(*args)
141 141 elif text == 'today':
142 142 return datetime.datetime(*datetime.datetime.today().timetuple()[:3])
143 143 elif text == 'now':
144 144 return datetime.datetime.now()
145 145 elif text == 'yesterday':
146 146 past = datetime.datetime.now() - datetime.timedelta(days=1)
147 147 return datetime.datetime(*past.timetuple()[:3])
148 148 else:
149 149 days = 0
150 150 matched = re.match(
151 151 r'^((?P<weeks>\d+) ?w(eeks?)?)? ?((?P<days>\d+) ?d(ays?)?)?$', text)
152 152 if matched:
153 153 groupdict = matched.groupdict()
154 154 if groupdict['days']:
155 155 days += int(matched.groupdict()['days'])
156 156 if groupdict['weeks']:
157 157 days += int(matched.groupdict()['weeks']) * 7
158 158 past = datetime.datetime.now() - datetime.timedelta(days=days)
159 159 return datetime.datetime(*past.timetuple()[:3])
160 160
161 161 raise ValueError('Wrong date: "%s"' % text)
General Comments 0
You need to be logged in to leave comments. Login now