##// END OF EJS Templates
vcs-support: bulk of changes for python3
super-admin -
r5075:d1c4b80b default
parent child Browse files
Show More
@@ -1,418 +1,432 b''
1
1
2
2
3 # Copyright (C) 2016-2020 RhodeCode GmbH
3 # Copyright (C) 2016-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Client for the VCSServer implemented based on HTTP.
22 Client for the VCSServer implemented based on HTTP.
23 """
23 """
24
24
25 import copy
25 import copy
26 import logging
26 import logging
27 import threading
27 import threading
28 import time
28 import time
29 import urllib.request, urllib.error, urllib.parse
29 import urllib.request
30 import urllib.error
31 import urllib.parse
30 import urllib.parse
32 import urllib.parse
31 import uuid
33 import uuid
32 import traceback
34 import traceback
33
35
34 import pycurl
36 import pycurl
35 import msgpack
37 import msgpack
36 import requests
38 import requests
37 from requests.packages.urllib3.util.retry import Retry
39 from requests.packages.urllib3.util.retry import Retry
38
40
39 import rhodecode
41 import rhodecode
40 from rhodecode.lib import rc_cache
42 from rhodecode.lib import rc_cache
41 from rhodecode.lib.rc_cache.utils import compute_key_from_params
43 from rhodecode.lib.rc_cache.utils import compute_key_from_params
42 from rhodecode.lib.system_info import get_cert_path
44 from rhodecode.lib.system_info import get_cert_path
43 from rhodecode.lib.vcs import exceptions, CurlSession
45 from rhodecode.lib.vcs import exceptions, CurlSession
44 from rhodecode.lib.utils2 import str2bool
46 from rhodecode.lib.utils2 import str2bool
45
47
46 log = logging.getLogger(__name__)
48 log = logging.getLogger(__name__)
47
49
48
50
49 # TODO: mikhail: Keep it in sync with vcsserver's
51 # TODO: mikhail: Keep it in sync with vcsserver's
50 # HTTPApplication.ALLOWED_EXCEPTIONS
52 # HTTPApplication.ALLOWED_EXCEPTIONS
51 EXCEPTIONS_MAP = {
53 EXCEPTIONS_MAP = {
52 'KeyError': KeyError,
54 'KeyError': KeyError,
53 'URLError': urllib.error.URLError,
55 'URLError': urllib.error.URLError,
54 }
56 }
55
57
56
58
57 def _remote_call(url, payload, exceptions_map, session, retries=3):
59 def _remote_call(url, payload, exceptions_map, session, retries=3):
58
60
59 for attempt in range(retries):
61 for attempt in range(retries):
60 try:
62 try:
61 response = session.post(url, data=msgpack.packb(payload))
63 response = session.post(url, data=msgpack.packb(payload))
64 break
62 except pycurl.error as e:
65 except pycurl.error as e:
63 error_code, error_message = e.args
66 error_code, error_message = e.args
64 if error_code == pycurl.E_RECV_ERROR:
67 if error_code == pycurl.E_RECV_ERROR:
65 log.warning(f'Received a "Connection reset by peer" error. '
68 log.warning(f'Received a "Connection reset by peer" error. '
66 f'Retrying... ({attempt + 1}/{retries})')
69 f'Retrying... ({attempt + 1}/{retries})')
67 continue # Retry if connection reset error.
70 continue # Retry if connection reset error.
68 msg = '{}. \npycurl traceback: {}'.format(e, traceback.format_exc())
71 msg = '{}. \npycurl traceback: {}'.format(e, traceback.format_exc())
69 raise exceptions.HttpVCSCommunicationError(msg)
72 raise exceptions.HttpVCSCommunicationError(msg)
70 except Exception as e:
73 except Exception as e:
71 message = getattr(e, 'message', '')
74 message = getattr(e, 'message', '')
72 if 'Failed to connect' in message:
75 if 'Failed to connect' in message:
73 # gevent doesn't return proper pycurl errors
76 # gevent doesn't return proper pycurl errors
74 raise exceptions.HttpVCSCommunicationError(e)
77 raise exceptions.HttpVCSCommunicationError(e)
75 else:
78 else:
76 raise
79 raise
77
80
78 if response.status_code >= 400:
81 if response.status_code >= 400:
79 log.error('Call to %s returned non 200 HTTP code: %s',
82 content_type = response.content_type
80 url, response.status_code)
83 log.error('Call to %s returned non 200 HTTP code: %s [%s]',
84 url, response.status_code, content_type)
81 raise exceptions.HttpVCSCommunicationError(repr(response.content))
85 raise exceptions.HttpVCSCommunicationError(repr(response.content))
82
86
83 try:
87 try:
84 response = msgpack.unpackb(response.content, raw=False)
88 response = msgpack.unpackb(response.content)
85 except Exception:
89 except Exception:
86 log.exception('Failed to decode response from msgpack')
90 log.exception('Failed to decode response from msgpack')
87 raise
91 raise
88
92
89 error = response.get('error')
93 error = response.get('error')
90 if error:
94 if error:
91 type_ = error.get('type', 'Exception')
95 type_ = error.get('type', 'Exception')
92 exc = exceptions_map.get(type_, Exception)
96 exc = exceptions_map.get(type_, Exception)
93 exc = exc(error.get('message'))
97 exc = exc(error.get('message'))
94 try:
98 try:
95 exc._vcs_kind = error['_vcs_kind']
99 exc._vcs_kind = error['_vcs_kind']
96 except KeyError:
100 except KeyError:
97 pass
101 pass
98
102
99 try:
103 try:
100 exc._vcs_server_traceback = error['traceback']
104 exc._vcs_server_traceback = error['traceback']
101 exc._vcs_server_org_exc_name = error['org_exc']
105 exc._vcs_server_org_exc_name = error['org_exc']
102 exc._vcs_server_org_exc_tb = error['org_exc_tb']
106 exc._vcs_server_org_exc_tb = error['org_exc_tb']
103 except KeyError:
107 except KeyError:
104 pass
108 pass
105
109
106 raise exc
110 exc.add_note(attach_exc_details(error))
111 raise exc # raising the org exception from vcsserver
107 return response.get('result')
112 return response.get('result')
108
113
109
114
115 def attach_exc_details(error):
116 note = '-- EXC NOTE -- :\n'
117 note += f'vcs_kind: {error.get("_vcs_kind")}\n'
118 note += f'org_exc: {error.get("_vcs_kind")}\n'
119 note += f'tb: {error.get("traceback")}\n'
120 note += '-- END EXC NOTE --'
121 return note
122
123
110 def _streaming_remote_call(url, payload, exceptions_map, session, chunk_size):
124 def _streaming_remote_call(url, payload, exceptions_map, session, chunk_size):
111 try:
125 try:
112 headers = {
126 headers = {
113 'X-RC-Method': payload.get('method'),
127 'X-RC-Method': payload.get('method'),
114 'X-RC-Repo-Name': payload.get('_repo_name')
128 'X-RC-Repo-Name': payload.get('_repo_name')
115 }
129 }
116 response = session.post(url, data=msgpack.packb(payload), headers=headers)
130 response = session.post(url, data=msgpack.packb(payload), headers=headers)
117 except pycurl.error as e:
131 except pycurl.error as e:
118 error_code, error_message = e.args
132 error_code, error_message = e.args
119 msg = '{}. \npycurl traceback: {}'.format(e, traceback.format_exc())
133 msg = '{}. \npycurl traceback: {}'.format(e, traceback.format_exc())
120 raise exceptions.HttpVCSCommunicationError(msg)
134 raise exceptions.HttpVCSCommunicationError(msg)
121 except Exception as e:
135 except Exception as e:
122 message = getattr(e, 'message', '')
136 message = getattr(e, 'message', '')
123 if 'Failed to connect' in message:
137 if 'Failed to connect' in message:
124 # gevent doesn't return proper pycurl errors
138 # gevent doesn't return proper pycurl errors
125 raise exceptions.HttpVCSCommunicationError(e)
139 raise exceptions.HttpVCSCommunicationError(e)
126 else:
140 else:
127 raise
141 raise
128
142
129 if response.status_code >= 400:
143 if response.status_code >= 400:
130 log.error('Call to %s returned non 200 HTTP code: %s',
144 log.error('Call to %s returned non 200 HTTP code: %s',
131 url, response.status_code)
145 url, response.status_code)
132 raise exceptions.HttpVCSCommunicationError(repr(response.content))
146 raise exceptions.HttpVCSCommunicationError(repr(response.content))
133
147
134 return response.iter_content(chunk_size=chunk_size)
148 return response.iter_content(chunk_size=chunk_size)
135
149
136
150
137 class ServiceConnection(object):
151 class ServiceConnection(object):
138 def __init__(self, server_and_port, backend_endpoint, session_factory):
152 def __init__(self, server_and_port, backend_endpoint, session_factory):
139 self.url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
153 self.url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
140 self._session_factory = session_factory
154 self._session_factory = session_factory
141
155
142 def __getattr__(self, name):
156 def __getattr__(self, name):
143 def f(*args, **kwargs):
157 def f(*args, **kwargs):
144 return self._call(name, *args, **kwargs)
158 return self._call(name, *args, **kwargs)
145 return f
159 return f
146
160
147 @exceptions.map_vcs_exceptions
161 @exceptions.map_vcs_exceptions
148 def _call(self, name, *args, **kwargs):
162 def _call(self, name, *args, **kwargs):
149 payload = {
163 payload = {
150 'id': str(uuid.uuid4()),
164 'id': str(uuid.uuid4()),
151 'method': name,
165 'method': name,
152 'params': {'args': args, 'kwargs': kwargs}
166 'params': {'args': args, 'kwargs': kwargs}
153 }
167 }
154 return _remote_call(
168 return _remote_call(
155 self.url, payload, EXCEPTIONS_MAP, self._session_factory())
169 self.url, payload, EXCEPTIONS_MAP, self._session_factory())
156
170
157
171
158 class RemoteVCSMaker(object):
172 class RemoteVCSMaker(object):
159
173
160 def __init__(self, server_and_port, backend_endpoint, backend_type, session_factory):
174 def __init__(self, server_and_port, backend_endpoint, backend_type, session_factory):
161 self.url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
175 self.url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
162 self.stream_url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint+'/stream')
176 self.stream_url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint+'/stream')
163
177
164 self._session_factory = session_factory
178 self._session_factory = session_factory
165 self.backend_type = backend_type
179 self.backend_type = backend_type
166
180
167 @classmethod
181 @classmethod
168 def init_cache_region(cls, repo_id):
182 def init_cache_region(cls, repo_id):
169 cache_namespace_uid = 'cache_repo.{}'.format(repo_id)
183 cache_namespace_uid = 'repo.{}'.format(repo_id)
170 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
184 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
171 return region, cache_namespace_uid
185 return region, cache_namespace_uid
172
186
173 def __call__(self, path, repo_id, config, with_wire=None):
187 def __call__(self, path, repo_id, config, with_wire=None):
174 log.debug('%s RepoMaker call on %s', self.backend_type.upper(), path)
188 log.debug('%s RepoMaker call on %s', self.backend_type.upper(), path)
175 return RemoteRepo(path, repo_id, config, self, with_wire=with_wire)
189 return RemoteRepo(path, repo_id, config, self, with_wire=with_wire)
176
190
177 def __getattr__(self, name):
191 def __getattr__(self, name):
178 def remote_attr(*args, **kwargs):
192 def remote_attr(*args, **kwargs):
179 return self._call(name, *args, **kwargs)
193 return self._call(name, *args, **kwargs)
180 return remote_attr
194 return remote_attr
181
195
182 @exceptions.map_vcs_exceptions
196 @exceptions.map_vcs_exceptions
183 def _call(self, func_name, *args, **kwargs):
197 def _call(self, func_name, *args, **kwargs):
184 payload = {
198 payload = {
185 'id': str(uuid.uuid4()),
199 'id': str(uuid.uuid4()),
186 'method': func_name,
200 'method': func_name,
187 'backend': self.backend_type,
201 'backend': self.backend_type,
188 'params': {'args': args, 'kwargs': kwargs}
202 'params': {'args': args, 'kwargs': kwargs}
189 }
203 }
190 url = self.url
204 url = self.url
191 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session_factory())
205 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session_factory())
192
206
193
207
194 class RemoteRepo(object):
208 class RemoteRepo(object):
195 CHUNK_SIZE = 16384
209 CHUNK_SIZE = 16384
196
210
197 def __init__(self, path, repo_id, config, remote_maker, with_wire=None):
211 def __init__(self, path, repo_id, config, remote_maker, with_wire=None):
198 self.url = remote_maker.url
212 self.url = remote_maker.url
199 self.stream_url = remote_maker.stream_url
213 self.stream_url = remote_maker.stream_url
200 self._session = remote_maker._session_factory()
214 self._session = remote_maker._session_factory()
201
215
202 cache_repo_id = self._repo_id_sanitizer(repo_id)
216 cache_repo_id = self._repo_id_sanitizer(repo_id)
203 _repo_name = self._get_repo_name(config, path)
217 _repo_name = self._get_repo_name(config, path)
204 self._cache_region, self._cache_namespace = \
218 self._cache_region, self._cache_namespace = \
205 remote_maker.init_cache_region(cache_repo_id)
219 remote_maker.init_cache_region(cache_repo_id)
206
220
207 with_wire = with_wire or {}
221 with_wire = with_wire or {}
208
222
209 repo_state_uid = with_wire.get('repo_state_uid') or 'state'
223 repo_state_uid = with_wire.get('repo_state_uid') or 'state'
210
224
211 self._wire = {
225 self._wire = {
212 "_repo_name": _repo_name,
226 "_repo_name": _repo_name,
213 "path": path, # repo path
227 "path": path, # repo path
214 "repo_id": repo_id,
228 "repo_id": repo_id,
215 "cache_repo_id": cache_repo_id,
229 "cache_repo_id": cache_repo_id,
216 "config": config,
230 "config": config,
217 "repo_state_uid": repo_state_uid,
231 "repo_state_uid": repo_state_uid,
218 "context": self._create_vcs_cache_context(path, repo_state_uid)
232 "context": self._create_vcs_cache_context(path, repo_state_uid)
219 }
233 }
220
234
221 if with_wire:
235 if with_wire:
222 self._wire.update(with_wire)
236 self._wire.update(with_wire)
223
237
224 # NOTE(johbo): Trading complexity for performance. Avoiding the call to
238 # NOTE(johbo): Trading complexity for performance. Avoiding the call to
225 # log.debug brings a few percent gain even if is is not active.
239 # log.debug brings a few percent gain even if is is not active.
226 if log.isEnabledFor(logging.DEBUG):
240 if log.isEnabledFor(logging.DEBUG):
227 self._call_with_logging = True
241 self._call_with_logging = True
228
242
229 self.cert_dir = get_cert_path(rhodecode.CONFIG.get('__file__'))
243 self.cert_dir = get_cert_path(rhodecode.CONFIG.get('__file__'))
230
244
231 def _get_repo_name(self, config, path):
245 def _get_repo_name(self, config, path):
232 repo_store = config.get('paths', '/')
246 repo_store = config.get('paths', '/')
233 return path.split(repo_store)[-1].lstrip('/')
247 return path.split(repo_store)[-1].lstrip('/')
234
248
235 def _repo_id_sanitizer(self, repo_id):
249 def _repo_id_sanitizer(self, repo_id):
236 pathless = repo_id.replace('/', '__').replace('-', '_')
250 pathless = repo_id.replace('/', '__').replace('-', '_')
237 return ''.join(char if ord(char) < 128 else '_{}_'.format(ord(char)) for char in pathless)
251 return ''.join(char if ord(char) < 128 else '_{}_'.format(ord(char)) for char in pathless)
238
252
239 def __getattr__(self, name):
253 def __getattr__(self, name):
240
254
241 if name.startswith('stream:'):
255 if name.startswith('stream:'):
242 def repo_remote_attr(*args, **kwargs):
256 def repo_remote_attr(*args, **kwargs):
243 return self._call_stream(name, *args, **kwargs)
257 return self._call_stream(name, *args, **kwargs)
244 else:
258 else:
245 def repo_remote_attr(*args, **kwargs):
259 def repo_remote_attr(*args, **kwargs):
246 return self._call(name, *args, **kwargs)
260 return self._call(name, *args, **kwargs)
247
261
248 return repo_remote_attr
262 return repo_remote_attr
249
263
250 def _base_call(self, name, *args, **kwargs):
264 def _base_call(self, name, *args, **kwargs):
251 # TODO: oliver: This is currently necessary pre-call since the
265 # TODO: oliver: This is currently necessary pre-call since the
252 # config object is being changed for hooking scenarios
266 # config object is being changed for hooking scenarios
253 wire = copy.deepcopy(self._wire)
267 wire = copy.deepcopy(self._wire)
254 wire["config"] = wire["config"].serialize()
268 wire["config"] = wire["config"].serialize()
255 wire["config"].append(('vcs', 'ssl_dir', self.cert_dir))
269 wire["config"].append(('vcs', 'ssl_dir', self.cert_dir))
256
270
257 payload = {
271 payload = {
258 'id': str(uuid.uuid4()),
272 'id': str(uuid.uuid4()),
259 'method': name,
273 'method': name,
260 "_repo_name": wire['_repo_name'],
274 "_repo_name": wire['_repo_name'],
261 'params': {'wire': wire, 'args': args, 'kwargs': kwargs}
275 'params': {'wire': wire, 'args': args, 'kwargs': kwargs}
262 }
276 }
263
277
264 context_uid = wire.get('context')
278 context_uid = wire.get('context')
265 return context_uid, payload
279 return context_uid, payload
266
280
267 def get_local_cache(self, name, args):
281 def get_local_cache(self, name, args):
268 cache_on = False
282 cache_on = False
269 cache_key = ''
283 cache_key = ''
270 local_cache_on = str2bool(rhodecode.CONFIG.get('vcs.methods.cache'))
284 local_cache_on = rhodecode.ConfigGet().get_bool('vcs.methods.cache')
271
285
272 cache_methods = [
286 cache_methods = [
273 'branches', 'tags', 'bookmarks',
287 'branches', 'tags', 'bookmarks',
274 'is_large_file', 'is_binary',
288 'is_large_file', 'is_binary',
275 'fctx_size', 'stream:fctx_node_data', 'blob_raw_length',
289 'fctx_size', 'stream:fctx_node_data', 'blob_raw_length',
276 'node_history',
290 'node_history',
277 'revision', 'tree_items',
291 'revision', 'tree_items',
278 'ctx_list', 'ctx_branch', 'ctx_description',
292 'ctx_list', 'ctx_branch', 'ctx_description',
279 'bulk_request',
293 'bulk_request',
280 'assert_correct_path'
294 'assert_correct_path'
281 ]
295 ]
282
296
283 if local_cache_on and name in cache_methods:
297 if local_cache_on and name in cache_methods:
284 cache_on = True
298 cache_on = True
285 repo_state_uid = self._wire['repo_state_uid']
299 repo_state_uid = self._wire['repo_state_uid']
286 call_args = [a for a in args]
300 call_args = [a for a in args]
287 cache_key = compute_key_from_params(repo_state_uid, name, *call_args)
301 cache_key = compute_key_from_params(repo_state_uid, name, *call_args)
288
302
289 return cache_on, cache_key
303 return cache_on, cache_key
290
304
291 @exceptions.map_vcs_exceptions
305 @exceptions.map_vcs_exceptions
292 def _call(self, name, *args, **kwargs):
306 def _call(self, name, *args, **kwargs):
293 context_uid, payload = self._base_call(name, *args, **kwargs)
307 context_uid, payload = self._base_call(name, *args, **kwargs)
294 url = self.url
308 url = self.url
295
309
296 start = time.time()
310 start = time.time()
297 cache_on, cache_key = self.get_local_cache(name, args)
311 cache_on, cache_key = self.get_local_cache(name, args)
298
312
299 @self._cache_region.conditional_cache_on_arguments(
313 @self._cache_region.conditional_cache_on_arguments(
300 namespace=self._cache_namespace, condition=cache_on and cache_key)
314 namespace=self._cache_namespace, condition=cache_on and cache_key)
301 def remote_call(_cache_key):
315 def remote_call(_cache_key):
302 if self._call_with_logging:
316 if self._call_with_logging:
303 args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}'
317 args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}'
304 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
318 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
305 url, name, args_repr, context_uid, cache_on)
319 url, name, args_repr, context_uid, cache_on)
306 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session)
320 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session)
307
321
308 result = remote_call(cache_key)
322 result = remote_call(cache_key)
309 if self._call_with_logging:
323 if self._call_with_logging:
310 log.debug('Call %s@%s took: %.4fs. wire_context: %s',
324 log.debug('Call %s@%s took: %.4fs. wire_context: %s',
311 url, name, time.time()-start, context_uid)
325 url, name, time.time()-start, context_uid)
312 return result
326 return result
313
327
314 @exceptions.map_vcs_exceptions
328 @exceptions.map_vcs_exceptions
315 def _call_stream(self, name, *args, **kwargs):
329 def _call_stream(self, name, *args, **kwargs):
316 context_uid, payload = self._base_call(name, *args, **kwargs)
330 context_uid, payload = self._base_call(name, *args, **kwargs)
317 payload['chunk_size'] = self.CHUNK_SIZE
331 payload['chunk_size'] = self.CHUNK_SIZE
318 url = self.stream_url
332 url = self.stream_url
319
333
320 start = time.time()
334 start = time.time()
321 cache_on, cache_key = self.get_local_cache(name, args)
335 cache_on, cache_key = self.get_local_cache(name, args)
322
336
323 # Cache is a problem because this is a stream
337 # Cache is a problem because this is a stream
324 def streaming_remote_call(_cache_key):
338 def streaming_remote_call(_cache_key):
325 if self._call_with_logging:
339 if self._call_with_logging:
326 args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}'
340 args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}'
327 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
341 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
328 url, name, args_repr, context_uid, cache_on)
342 url, name, args_repr, context_uid, cache_on)
329 return _streaming_remote_call(url, payload, EXCEPTIONS_MAP, self._session, self.CHUNK_SIZE)
343 return _streaming_remote_call(url, payload, EXCEPTIONS_MAP, self._session, self.CHUNK_SIZE)
330
344
331 result = streaming_remote_call(cache_key)
345 result = streaming_remote_call(cache_key)
332 if self._call_with_logging:
346 if self._call_with_logging:
333 log.debug('Call %s@%s took: %.4fs. wire_context: %s',
347 log.debug('Call %s@%s took: %.4fs. wire_context: %s',
334 url, name, time.time()-start, context_uid)
348 url, name, time.time()-start, context_uid)
335 return result
349 return result
336
350
337 def __getitem__(self, key):
351 def __getitem__(self, key):
338 return self.revision(key)
352 return self.revision(key)
339
353
340 def _create_vcs_cache_context(self, *args):
354 def _create_vcs_cache_context(self, *args):
341 """
355 """
342 Creates a unique string which is passed to the VCSServer on every
356 Creates a unique string which is passed to the VCSServer on every
343 remote call. It is used as cache key in the VCSServer.
357 remote call. It is used as cache key in the VCSServer.
344 """
358 """
345 hash_key = '-'.join(map(str, args))
359 hash_key = '-'.join(map(str, args))
346 return str(uuid.uuid5(uuid.NAMESPACE_URL, hash_key))
360 return str(uuid.uuid5(uuid.NAMESPACE_URL, hash_key))
347
361
348 def invalidate_vcs_cache(self):
362 def invalidate_vcs_cache(self):
349 """
363 """
350 This invalidates the context which is sent to the VCSServer on every
364 This invalidates the context which is sent to the VCSServer on every
351 call to a remote method. It forces the VCSServer to create a fresh
365 call to a remote method. It forces the VCSServer to create a fresh
352 repository instance on the next call to a remote method.
366 repository instance on the next call to a remote method.
353 """
367 """
354 self._wire['context'] = str(uuid.uuid4())
368 self._wire['context'] = str(uuid.uuid4())
355
369
356
370
357 class VcsHttpProxy(object):
371 class VcsHttpProxy(object):
358
372
359 CHUNK_SIZE = 16384
373 CHUNK_SIZE = 16384
360
374
361 def __init__(self, server_and_port, backend_endpoint):
375 def __init__(self, server_and_port, backend_endpoint):
362 retries = Retry(total=5, connect=None, read=None, redirect=None)
376 retries = Retry(total=5, connect=None, read=None, redirect=None)
363
377
364 adapter = requests.adapters.HTTPAdapter(max_retries=retries)
378 adapter = requests.adapters.HTTPAdapter(max_retries=retries)
365 self.base_url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
379 self.base_url = urllib.parse.urljoin('http://%s' % server_and_port, backend_endpoint)
366 self.session = requests.Session()
380 self.session = requests.Session()
367 self.session.mount('http://', adapter)
381 self.session.mount('http://', adapter)
368
382
369 def handle(self, environment, input_data, *args, **kwargs):
383 def handle(self, environment, input_data, *args, **kwargs):
370 data = {
384 data = {
371 'environment': environment,
385 'environment': environment,
372 'input_data': input_data,
386 'input_data': input_data,
373 'args': args,
387 'args': args,
374 'kwargs': kwargs
388 'kwargs': kwargs
375 }
389 }
376 result = self.session.post(
390 result = self.session.post(
377 self.base_url, msgpack.packb(data), stream=True)
391 self.base_url, msgpack.packb(data), stream=True)
378 return self._get_result(result)
392 return self._get_result(result)
379
393
380 def _deserialize_and_raise(self, error):
394 def _deserialize_and_raise(self, error):
381 exception = Exception(error['message'])
395 exception = Exception(error['message'])
382 try:
396 try:
383 exception._vcs_kind = error['_vcs_kind']
397 exception._vcs_kind = error['_vcs_kind']
384 except KeyError:
398 except KeyError:
385 pass
399 pass
386 raise exception
400 raise exception
387
401
388 def _iterate(self, result):
402 def _iterate(self, result):
389 unpacker = msgpack.Unpacker()
403 unpacker = msgpack.Unpacker()
390 for line in result.iter_content(chunk_size=self.CHUNK_SIZE):
404 for line in result.iter_content(chunk_size=self.CHUNK_SIZE):
391 unpacker.feed(line)
405 unpacker.feed(line)
392 for chunk in unpacker:
406 for chunk in unpacker:
393 yield chunk
407 yield chunk
394
408
395 def _get_result(self, result):
409 def _get_result(self, result):
396 iterator = self._iterate(result)
410 iterator = self._iterate(result)
397 error = next(iterator)
411 error = next(iterator)
398 if error:
412 if error:
399 self._deserialize_and_raise(error)
413 self._deserialize_and_raise(error)
400
414
401 status = next(iterator)
415 status = next(iterator)
402 headers = next(iterator)
416 headers = next(iterator)
403
417
404 return iterator, status, headers
418 return iterator, status, headers
405
419
406
420
407 class ThreadlocalSessionFactory(object):
421 class ThreadlocalSessionFactory(object):
408 """
422 """
409 Creates one CurlSession per thread on demand.
423 Creates one CurlSession per thread on demand.
410 """
424 """
411
425
412 def __init__(self):
426 def __init__(self):
413 self._thread_local = threading.local()
427 self._thread_local = threading.local()
414
428
415 def __call__(self):
429 def __call__(self):
416 if not hasattr(self._thread_local, 'curl_session'):
430 if not hasattr(self._thread_local, 'curl_session'):
417 self._thread_local.curl_session = CurlSession()
431 self._thread_local.curl_session = CurlSession()
418 return self._thread_local.curl_session
432 return self._thread_local.curl_session
@@ -1,76 +1,76 b''
1
1
2
2
3 # Copyright (C) 2014-2020 RhodeCode GmbH
3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Internal settings for vcs-lib
22 Internal settings for vcs-lib
23 """
23 """
24
24
25 # list of default encoding used in safe_unicode/safe_str methods
25 # list of default encoding used in safe_str methods
26 DEFAULT_ENCODINGS = ['utf8']
26 DEFAULT_ENCODINGS = ['utf8']
27
27
28
28
29 # Compatibility version when creating SVN repositories. None means newest.
29 # Compatibility version when creating SVN repositories. None means newest.
30 # Other available options are: pre-1.4-compatible, pre-1.5-compatible,
30 # Other available options are: pre-1.4-compatible, pre-1.5-compatible,
31 # pre-1.6-compatible, pre-1.8-compatible
31 # pre-1.6-compatible, pre-1.8-compatible
32 SVN_COMPATIBLE_VERSION = None
32 SVN_COMPATIBLE_VERSION = None
33
33
34 ALIASES = ['hg', 'git', 'svn']
34 ALIASES = ['hg', 'git', 'svn']
35
35
36 BACKENDS = {
36 BACKENDS = {
37 'hg': 'rhodecode.lib.vcs.backends.hg.MercurialRepository',
37 'hg': 'rhodecode.lib.vcs.backends.hg.MercurialRepository',
38 'git': 'rhodecode.lib.vcs.backends.git.GitRepository',
38 'git': 'rhodecode.lib.vcs.backends.git.GitRepository',
39 'svn': 'rhodecode.lib.vcs.backends.svn.SubversionRepository',
39 'svn': 'rhodecode.lib.vcs.backends.svn.SubversionRepository',
40 }
40 }
41
41
42
42
43 ARCHIVE_SPECS = [
43 ARCHIVE_SPECS = [
44 ('tbz2', 'application/x-bzip2', '.tbz2'),
44 ('tbz2', 'application/x-bzip2', '.tbz2'),
45 ('tbz2', 'application/x-bzip2', '.tar.bz2'),
45 ('tbz2', 'application/x-bzip2', '.tar.bz2'),
46
46
47 ('tgz', 'application/x-gzip', '.tgz'),
47 ('tgz', 'application/x-gzip', '.tgz'),
48 ('tgz', 'application/x-gzip', '.tar.gz'),
48 ('tgz', 'application/x-gzip', '.tar.gz'),
49
49
50 ('zip', 'application/zip', '.zip'),
50 ('zip', 'application/zip', '.zip'),
51 ]
51 ]
52
52
53 HOOKS_PROTOCOL = None
53 HOOKS_PROTOCOL = None
54 HOOKS_DIRECT_CALLS = False
54 HOOKS_DIRECT_CALLS = False
55 HOOKS_HOST = '127.0.0.1'
55 HOOKS_HOST = '127.0.0.1'
56
56
57
57
58 MERGE_MESSAGE_TMPL = (
58 MERGE_MESSAGE_TMPL = (
59 u'Merge pull request !{pr_id} from {source_repo} {source_ref_name}\n\n '
59 u'Merge pull request !{pr_id} from {source_repo} {source_ref_name}\n\n '
60 u'{pr_title}')
60 u'{pr_title}')
61 MERGE_DRY_RUN_MESSAGE = 'dry_run_merge_message_from_rhodecode'
61 MERGE_DRY_RUN_MESSAGE = 'dry_run_merge_message_from_rhodecode'
62 MERGE_DRY_RUN_USER = 'Dry-Run User'
62 MERGE_DRY_RUN_USER = 'Dry-Run User'
63 MERGE_DRY_RUN_EMAIL = 'dry-run-merge@rhodecode.com'
63 MERGE_DRY_RUN_EMAIL = 'dry-run-merge@rhodecode.com'
64
64
65
65
66 def available_aliases():
66 def available_aliases():
67 """
67 """
68 Mercurial is required for the system to work, so in case vcs.backends does
68 Mercurial is required for the system to work, so in case vcs.backends does
69 not include it, we make sure it will be available internally
69 not include it, we make sure it will be available internally
70 TODO: anderson: refactor vcs.backends so it won't be necessary, VCS server
70 TODO: anderson: refactor vcs.backends so it won't be necessary, VCS server
71 should be responsible to dictate available backends.
71 should be responsible to dictate available backends.
72 """
72 """
73 aliases = ALIASES[:]
73 aliases = ALIASES[:]
74 if 'hg' not in aliases:
74 if 'hg' not in aliases:
75 aliases += ['hg']
75 aliases += ['hg']
76 return aliases
76 return aliases
@@ -1,233 +1,234 b''
1
1
2
2
3 # Copyright (C) 2014-2020 RhodeCode GmbH
3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Custom vcs exceptions module.
22 Custom vcs exceptions module.
23 """
23 """
24 import logging
24 import logging
25 import functools
25 import functools
26 import urllib.request, urllib.error, urllib.parse
26 import urllib.error
27 import urllib.parse
27 import rhodecode
28 import rhodecode
28
29
29 log = logging.getLogger(__name__)
30 log = logging.getLogger(__name__)
30
31
31
32
32 class VCSCommunicationError(Exception):
33 class VCSCommunicationError(Exception):
33 pass
34 pass
34
35
35
36
36 class HttpVCSCommunicationError(VCSCommunicationError):
37 class HttpVCSCommunicationError(VCSCommunicationError):
37 pass
38 pass
38
39
39
40
40 class VCSError(Exception):
41 class VCSError(Exception):
41 pass
42 pass
42
43
43
44
44 class RepositoryError(VCSError):
45 class RepositoryError(VCSError):
45 pass
46 pass
46
47
47
48
48 class RepositoryRequirementError(RepositoryError):
49 class RepositoryRequirementError(RepositoryError):
49 pass
50 pass
50
51
51
52
52 class UnresolvedFilesInRepo(RepositoryError):
53 class UnresolvedFilesInRepo(RepositoryError):
53 pass
54 pass
54
55
55
56
56 class VCSBackendNotSupportedError(VCSError):
57 class VCSBackendNotSupportedError(VCSError):
57 """
58 """
58 Exception raised when VCSServer does not support requested backend
59 Exception raised when VCSServer does not support requested backend
59 """
60 """
60
61
61
62
62 class EmptyRepositoryError(RepositoryError):
63 class EmptyRepositoryError(RepositoryError):
63 pass
64 pass
64
65
65
66
66 class TagAlreadyExistError(RepositoryError):
67 class TagAlreadyExistError(RepositoryError):
67 pass
68 pass
68
69
69
70
70 class TagDoesNotExistError(RepositoryError):
71 class TagDoesNotExistError(RepositoryError):
71 pass
72 pass
72
73
73
74
74 class BranchAlreadyExistError(RepositoryError):
75 class BranchAlreadyExistError(RepositoryError):
75 pass
76 pass
76
77
77
78
78 class BranchDoesNotExistError(RepositoryError):
79 class BranchDoesNotExistError(RepositoryError):
79 pass
80 pass
80
81
81
82
82 class CommitError(RepositoryError):
83 class CommitError(RepositoryError):
83 """
84 """
84 Exceptions related to an existing commit
85 Exceptions related to an existing commit
85 """
86 """
86
87
87
88
88 class CommitDoesNotExistError(CommitError):
89 class CommitDoesNotExistError(CommitError):
89 pass
90 pass
90
91
91
92
92 class CommittingError(RepositoryError):
93 class CommittingError(RepositoryError):
93 """
94 """
94 Exceptions happening while creating a new commit
95 Exceptions happening while creating a new commit
95 """
96 """
96
97
97
98
98 class NothingChangedError(CommittingError):
99 class NothingChangedError(CommittingError):
99 pass
100 pass
100
101
101
102
102 class NodeError(VCSError):
103 class NodeError(VCSError):
103 pass
104 pass
104
105
105
106
106 class RemovedFileNodeError(NodeError):
107 class RemovedFileNodeError(NodeError):
107 pass
108 pass
108
109
109
110
110 class NodeAlreadyExistsError(CommittingError):
111 class NodeAlreadyExistsError(CommittingError):
111 pass
112 pass
112
113
113
114
114 class NodeAlreadyChangedError(CommittingError):
115 class NodeAlreadyChangedError(CommittingError):
115 pass
116 pass
116
117
117
118
118 class NodeDoesNotExistError(CommittingError):
119 class NodeDoesNotExistError(CommittingError):
119 pass
120 pass
120
121
121
122
122 class NodeNotChangedError(CommittingError):
123 class NodeNotChangedError(CommittingError):
123 pass
124 pass
124
125
125
126
126 class NodeAlreadyAddedError(CommittingError):
127 class NodeAlreadyAddedError(CommittingError):
127 pass
128 pass
128
129
129
130
130 class NodeAlreadyRemovedError(CommittingError):
131 class NodeAlreadyRemovedError(CommittingError):
131 pass
132 pass
132
133
133
134
134 class SubrepoMergeError(RepositoryError):
135 class SubrepoMergeError(RepositoryError):
135 """
136 """
136 This happens if we try to merge a repository which contains subrepos and
137 This happens if we try to merge a repository which contains subrepos and
137 the subrepos cannot be merged. The subrepos are not merged itself but
138 the subrepos cannot be merged. The subrepos are not merged itself but
138 their references in the root repo are merged.
139 their references in the root repo are merged.
139 """
140 """
140
141
141
142
142 class ImproperArchiveTypeError(VCSError):
143 class ImproperArchiveTypeError(VCSError):
143 pass
144 pass
144
145
145
146
146 class CommandError(VCSError):
147 class CommandError(VCSError):
147 pass
148 pass
148
149
149
150
150 class UnhandledException(VCSError):
151 class UnhandledException(VCSError):
151 """
152 """
152 Signals that something unexpected went wrong.
153 Signals that something unexpected went wrong.
153
154
154 This usually means we have a programming error on the side of the VCSServer
155 This usually means we have a programming error on the side of the VCSServer
155 and should inspect the logfile of the VCSServer to find more details.
156 and should inspect the logfile of the VCSServer to find more details.
156 """
157 """
157
158
158
159
159 _EXCEPTION_MAP = {
160 _EXCEPTION_MAP = {
160 'abort': RepositoryError,
161 'abort': RepositoryError,
161 'archive': ImproperArchiveTypeError,
162 'archive': ImproperArchiveTypeError,
162 'error': RepositoryError,
163 'error': RepositoryError,
163 'lookup': CommitDoesNotExistError,
164 'lookup': CommitDoesNotExistError,
164 'repo_locked': RepositoryError,
165 'repo_locked': RepositoryError,
165 'requirement': RepositoryRequirementError,
166 'requirement': RepositoryRequirementError,
166 'unhandled': UnhandledException,
167 'unhandled': UnhandledException,
167 # TODO: johbo: Define our own exception for this and stop abusing
168 # TODO: johbo: Define our own exception for this and stop abusing
168 # urllib's exception class.
169 # urllib's exception class.
169 'url_error': urllib.error.URLError,
170 'url_error': urllib.error.URLError,
170 'subrepo_merge_error': SubrepoMergeError,
171 'subrepo_merge_error': SubrepoMergeError,
171 }
172 }
172
173
173
174
174 def map_vcs_exceptions(func):
175 def map_vcs_exceptions(func):
175 """
176 """
176 Utility to decorate functions so that plain exceptions are translated.
177 Utility to decorate functions so that plain exceptions are translated.
177
178
178 The translation is based on `exc_map` which maps a `str` indicating
179 The translation is based on `exc_map` which maps a `str` indicating
179 the error type into an exception class representing this error inside
180 the error type into an exception class representing this error inside
180 of the vcs layer.
181 of the vcs layer.
181 """
182 """
182
183
183 @functools.wraps(func)
184 @functools.wraps(func)
184 def wrapper(*args, **kwargs):
185 def wrapper(*args, **kwargs):
185 try:
186 try:
186 return func(*args, **kwargs)
187 return func(*args, **kwargs)
187 except Exception as e:
188 except Exception as e:
188 from rhodecode.lib.utils2 import str2bool
189 debug = rhodecode.ConfigGet().get_bool('debug')
189 debug = str2bool(rhodecode.CONFIG.get('debug'))
190
190
191 # The error middleware adds information if it finds
191 # The error middleware adds information if it finds
192 # __traceback_info__ in a frame object. This way the remote
192 # __traceback_info__ in a frame object. This way the remote
193 # traceback information is made available in error reports.
193 # traceback information is made available in error reports.
194
194 remote_tb = getattr(e, '_vcs_server_traceback', None)
195 remote_tb = getattr(e, '_vcs_server_traceback', None)
195 org_remote_tb = getattr(e, '_vcs_server_org_exc_tb', '')
196 org_remote_tb = getattr(e, '_vcs_server_org_exc_tb', '')
196 __traceback_info__ = None
197 __traceback_info__ = None
197 if remote_tb:
198 if remote_tb:
198 if isinstance(remote_tb, str):
199 if isinstance(remote_tb, str):
199 remote_tb = [remote_tb]
200 remote_tb = [remote_tb]
200 __traceback_info__ = (
201 __traceback_info__ = (
201 'Found VCSServer remote traceback information:\n'
202 'Found VCSServer remote traceback information:\n'
202 '{}\n'
203 '{}\n'
203 '+++ BEG SOURCE EXCEPTION +++\n\n'
204 '+++ BEG SOURCE EXCEPTION +++\n\n'
204 '{}\n'
205 '{}\n'
205 '+++ END SOURCE EXCEPTION +++\n'
206 '+++ END SOURCE EXCEPTION +++\n'
206 ''.format('\n'.join(remote_tb), org_remote_tb)
207 ''.format('\n'.join(remote_tb), org_remote_tb)
207 )
208 )
208
209
209 # Avoid that remote_tb also appears in the frame
210 # Avoid that remote_tb also appears in the frame
210 del remote_tb
211 del remote_tb
211
212
212 # Special vcs errors had an attribute "_vcs_kind" which is used
213 # Special vcs errors had an attribute "_vcs_kind" which is used
213 # to translate them to the proper exception class in the vcs
214 # to translate them to the proper exception class in the vcs
214 # client layer.
215 # client layer.
215 kind = getattr(e, '_vcs_kind', None)
216 kind = getattr(e, '_vcs_kind', None)
216 exc_name = getattr(e, '_vcs_server_org_exc_name', None)
217 exc_name = getattr(e, '_vcs_server_org_exc_name', None)
217
218
218 if kind:
219 if kind:
219 if any(e.args):
220 if any(e.args):
220 _args = [a for a in e.args]
221 _args = [a for a in e.args]
221 # replace the first argument with a prefix exc name
222 # replace the first argument with a prefix exc name
222 args = ['{}:{}'.format(exc_name, _args[0] if _args else '?')] + _args[1:]
223 args = ['{}:{}'.format(exc_name, _args[0] if _args else '?')] + _args[1:]
223 else:
224 else:
224 args = [__traceback_info__ or '{}: UnhandledException'.format(exc_name)]
225 args = [__traceback_info__ or '{}: UnhandledException'.format(exc_name)]
225 if debug or __traceback_info__ and kind not in ['unhandled', 'lookup']:
226 if debug or __traceback_info__ and kind not in ['unhandled', 'lookup']:
226 # for other than unhandled errors also log the traceback
227 # for other than unhandled errors also log the traceback
227 # can be useful for debugging
228 # can be useful for debugging
228 log.error(__traceback_info__)
229 log.error(__traceback_info__)
229
230
230 raise _EXCEPTION_MAP[kind](*args)
231 raise _EXCEPTION_MAP[kind](*args)
231 else:
232 else:
232 raise
233 raise
233 return wrapper
234 return wrapper
@@ -1,876 +1,963 b''
1
1
2
2
3 # Copyright (C) 2014-2020 RhodeCode GmbH
3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Module holding everything related to vcs nodes, with vcs2 architecture.
22 Module holding everything related to vcs nodes, with vcs2 architecture.
23 """
23 """
24
24 import functools
25 import os
25 import os
26 import stat
26 import stat
27
27
28 from zope.cachedescriptors.property import Lazy as LazyProperty
28 from zope.cachedescriptors.property import Lazy as LazyProperty
29
29
30 import rhodecode
31 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
30 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
32 from rhodecode.lib.utils import safe_unicode, safe_str
31 from rhodecode.lib.str_utils import safe_str, safe_bytes
33 from rhodecode.lib.utils2 import md5
32 from rhodecode.lib.hash_utils import md5
34 from rhodecode.lib.vcs import path as vcspath
33 from rhodecode.lib.vcs import path as vcspath
35 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
34 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
36 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
35 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
37 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
36 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
38
37
39 LARGEFILE_PREFIX = '.hglf'
38 LARGEFILE_PREFIX = '.hglf'
40
39
41
40
42 class NodeKind:
41 class NodeKind:
43 SUBMODULE = -1
42 SUBMODULE = -1
44 DIR = 1
43 DIR = 1
45 FILE = 2
44 FILE = 2
46 LARGEFILE = 3
45 LARGEFILE = 3
47
46
48
47
49 class NodeState:
48 class NodeState:
50 ADDED = 'added'
49 ADDED = 'added'
51 CHANGED = 'changed'
50 CHANGED = 'changed'
52 NOT_CHANGED = 'not changed'
51 NOT_CHANGED = 'not changed'
53 REMOVED = 'removed'
52 REMOVED = 'removed'
54
53
54 #TODO: not sure if that should be bytes or str ?
55 # most probably bytes because content should be bytes and we check it
56 BIN_BYTE_MARKER = b'\0'
57
55
58
56 class NodeGeneratorBase(object):
59 class NodeGeneratorBase(object):
57 """
60 """
58 Base class for removed added and changed filenodes, it's a lazy generator
61 Base class for removed added and changed filenodes, it's a lazy generator
59 class that will create filenodes only on iteration or call
62 class that will create filenodes only on iteration or call
60
63
61 The len method doesn't need to create filenodes at all
64 The len method doesn't need to create filenodes at all
62 """
65 """
63
66
64 def __init__(self, current_paths, cs):
67 def __init__(self, current_paths, cs):
65 self.cs = cs
68 self.cs = cs
66 self.current_paths = current_paths
69 self.current_paths = current_paths
67
70
68 def __call__(self):
71 def __call__(self):
69 return [n for n in self]
72 return [n for n in self]
70
73
71 def __getslice__(self, i, j):
74 def __getitem__(self, key):
72 for p in self.current_paths[i:j]:
75 if isinstance(key, slice):
76 for p in self.current_paths[key.start:key.stop]:
73 yield self.cs.get_node(p)
77 yield self.cs.get_node(p)
74
78
75 def __len__(self):
79 def __len__(self):
76 return len(self.current_paths)
80 return len(self.current_paths)
77
81
78 def __iter__(self):
82 def __iter__(self):
79 for p in self.current_paths:
83 for p in self.current_paths:
80 yield self.cs.get_node(p)
84 yield self.cs.get_node(p)
81
85
82
86
83 class AddedFileNodesGenerator(NodeGeneratorBase):
87 class AddedFileNodesGenerator(NodeGeneratorBase):
84 """
88 """
85 Class holding added files for current commit
89 Class holding added files for current commit
86 """
90 """
87
91
88
92
89 class ChangedFileNodesGenerator(NodeGeneratorBase):
93 class ChangedFileNodesGenerator(NodeGeneratorBase):
90 """
94 """
91 Class holding changed files for current commit
95 Class holding changed files for current commit
92 """
96 """
93
97
94
98
95 class RemovedFileNodesGenerator(NodeGeneratorBase):
99 class RemovedFileNodesGenerator(NodeGeneratorBase):
96 """
100 """
97 Class holding removed files for current commit
101 Class holding removed files for current commit
98 """
102 """
99 def __iter__(self):
103 def __iter__(self):
100 for p in self.current_paths:
104 for p in self.current_paths:
101 yield RemovedFileNode(path=p)
105 yield RemovedFileNode(path=safe_bytes(p))
102
106
103 def __getslice__(self, i, j):
107 def __getitem__(self, key):
104 for p in self.current_paths[i:j]:
108 if isinstance(key, slice):
105 yield RemovedFileNode(path=p)
109 for p in self.current_paths[key.start:key.stop]:
110 yield RemovedFileNode(path=safe_bytes(p))
106
111
107
112
113 @functools.total_ordering
108 class Node(object):
114 class Node(object):
109 """
115 """
110 Simplest class representing file or directory on repository. SCM backends
116 Simplest class representing file or directory on repository. SCM backends
111 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
117 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
112 directly.
118 directly.
113
119
114 Node's ``path`` cannot start with slash as we operate on *relative* paths
120 Node's ``path`` cannot start with slash as we operate on *relative* paths
115 only. Moreover, every single node is identified by the ``path`` attribute,
121 only. Moreover, every single node is identified by the ``path`` attribute,
116 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
122 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
117 """
123 """
118 RTLO_MARKER = "\u202E" # RTLO marker allows swapping text, and certain
124 # RTLO marker allows swapping text, and certain
119 # security attacks could be used with this
125 # security attacks could be used with this
126 RTLO_MARKER = "\u202E"
127
120 commit = None
128 commit = None
121
129
122 def __init__(self, path, kind):
130 def __init__(self, path: bytes, kind):
123 self._validate_path(path) # can throw exception if path is invalid
131 self._validate_path(path) # can throw exception if path is invalid
124 self.path = safe_str(path.rstrip('/')) # we store paths as str
132
125 if path == '' and kind != NodeKind.DIR:
133 self.bytes_path = path.rstrip(b'/') # store for __repr__
134 self.path = safe_str(self.bytes_path) # we store paths as str
135
136 if self.bytes_path == b'' and kind != NodeKind.DIR:
126 raise NodeError("Only DirNode and its subclasses may be "
137 raise NodeError("Only DirNode and its subclasses may be "
127 "initialized with empty path")
138 "initialized with empty path")
128 self.kind = kind
139 self.kind = kind
129
140
130 if self.is_root() and not self.is_dir():
141 if self.is_root() and not self.is_dir():
131 raise NodeError("Root node cannot be FILE kind")
142 raise NodeError("Root node cannot be FILE kind")
132
143
133 def _validate_path(self, path):
144 def __eq__(self, other):
134 if path.startswith('/'):
145 if type(self) is not type(other):
146 return False
147 for attr in ['name', 'path', 'kind']:
148 if getattr(self, attr) != getattr(other, attr):
149 return False
150 if self.is_file():
151 # FileNode compare, we need to fallback to content compare
152 return None
153 else:
154 # For DirNode's check without entering each dir
155 self_nodes_paths = list(sorted(n.path for n in self.nodes))
156 other_nodes_paths = list(sorted(n.path for n in self.nodes))
157 if self_nodes_paths != other_nodes_paths:
158 return False
159 return True
160
161 def __lt__(self, other):
162 if self.kind < other.kind:
163 return True
164 if self.kind > other.kind:
165 return False
166 if self.path < other.path:
167 return True
168 if self.path > other.path:
169 return False
170
171 # def __cmp__(self, other):
172 # """
173 # Comparator using name of the node, needed for quick list sorting.
174 # """
175 #
176 # kind_cmp = cmp(self.kind, other.kind)
177 # if kind_cmp:
178 # if isinstance(self, SubModuleNode):
179 # # we make submodules equal to dirnode for "sorting" purposes
180 # return NodeKind.DIR
181 # return kind_cmp
182 # return cmp(self.name, other.name)
183
184 def __repr__(self):
185 maybe_path = getattr(self, 'path', 'UNKNOWN_PATH')
186 return f'<{self.__class__.__name__} {maybe_path!r}>'
187
188 def __str__(self):
189 return self.name
190
191 def _validate_path(self, path: bytes):
192 self._assert_bytes(path)
193
194 if path.startswith(b'/'):
135 raise NodeError(
195 raise NodeError(
136 "Cannot initialize Node objects with slash at "
196 f"Cannot initialize Node objects with slash at "
137 "the beginning as only relative paths are supported. "
197 f"the beginning as only relative paths are supported. "
138 "Got %s" % (path,))
198 f"Got {path}")
199
200 def _assert_bytes(self, value):
201 if not isinstance(value, bytes):
202 raise TypeError(f"Bytes required as input, got {type(value)} of {value}.")
139
203
140 @LazyProperty
204 @LazyProperty
141 def parent(self):
205 def parent(self):
142 parent_path = self.get_parent_path()
206 parent_path = self.get_parent_path()
143 if parent_path:
207 if parent_path:
144 if self.commit:
208 if self.commit:
145 return self.commit.get_node(parent_path)
209 return self.commit.get_node(parent_path)
146 return DirNode(parent_path)
210 return DirNode(parent_path)
147 return None
211 return None
148
212
149 @LazyProperty
213 @LazyProperty
150 def unicode_path(self):
214 def str_path(self) -> str:
151 return safe_unicode(self.path)
215 return safe_str(self.path)
152
216
153 @LazyProperty
217 @LazyProperty
154 def has_rtlo(self):
218 def has_rtlo(self):
155 """Detects if a path has right-to-left-override marker"""
219 """Detects if a path has right-to-left-override marker"""
156 return self.RTLO_MARKER in self.unicode_path
220 return self.RTLO_MARKER in self.str_path
157
158 @LazyProperty
159 def unicode_path_safe(self):
160 """
161 Special SAFE representation of path without the right-to-left-override.
162 This should be only used for "showing" the file, cannot be used for any
163 urls etc.
164 """
165 return safe_unicode(self.path).replace(self.RTLO_MARKER, '')
166
221
167 @LazyProperty
222 @LazyProperty
168 def dir_path(self):
223 def dir_path(self):
169 """
224 """
170 Returns name of the directory from full path of this vcs node. Empty
225 Returns name of the directory from full path of this vcs node. Empty
171 string is returned if there's no directory in the path
226 string is returned if there's no directory in the path
172 """
227 """
173 _parts = self.path.rstrip('/').rsplit('/', 1)
228 _parts = self.path.rstrip('/').rsplit('/', 1)
174 if len(_parts) == 2:
229 if len(_parts) == 2:
175 return safe_unicode(_parts[0])
230 return _parts[0]
176 return ''
231 return ''
177
232
178 @LazyProperty
233 @LazyProperty
179 def name(self):
234 def name(self):
180 """
235 """
181 Returns name of the node so if its path
236 Returns name of the node so if its path
182 then only last part is returned.
237 then only last part is returned.
183 """
238 """
184 return safe_unicode(self.path.rstrip('/').split('/')[-1])
239 return self.path.rstrip('/').split('/')[-1]
185
240
186 @property
241 @property
187 def kind(self):
242 def kind(self):
188 return self._kind
243 return self._kind
189
244
190 @kind.setter
245 @kind.setter
191 def kind(self, kind):
246 def kind(self, kind):
192 if hasattr(self, '_kind'):
247 if hasattr(self, '_kind'):
193 raise NodeError("Cannot change node's kind")
248 raise NodeError("Cannot change node's kind")
194 else:
249 else:
195 self._kind = kind
250 self._kind = kind
196 # Post setter check (path's trailing slash)
251 # Post setter check (path's trailing slash)
197 if self.path.endswith('/'):
252 if self.path.endswith('/'):
198 raise NodeError("Node's path cannot end with slash")
253 raise NodeError("Node's path cannot end with slash")
199
254
200 def __cmp__(self, other):
255 def get_parent_path(self) -> bytes:
201 """
202 Comparator using name of the node, needed for quick list sorting.
203 """
204
205 kind_cmp = cmp(self.kind, other.kind)
206 if kind_cmp:
207 if isinstance(self, SubModuleNode):
208 # we make submodules equal to dirnode for "sorting" purposes
209 return NodeKind.DIR
210 return kind_cmp
211 return cmp(self.name, other.name)
212
213 def __eq__(self, other):
214 for attr in ['name', 'path', 'kind']:
215 if getattr(self, attr) != getattr(other, attr):
216 return False
217 if self.is_file():
218 if self.content != other.content:
219 return False
220 else:
221 # For DirNode's check without entering each dir
222 self_nodes_paths = list(sorted(n.path for n in self.nodes))
223 other_nodes_paths = list(sorted(n.path for n in self.nodes))
224 if self_nodes_paths != other_nodes_paths:
225 return False
226 return True
227
228 def __ne__(self, other):
229 return not self.__eq__(other)
230
231 def __repr__(self):
232 return '<%s %r>' % (self.__class__.__name__, self.path)
233
234 def __str__(self):
235 return self.__repr__()
236
237 def __unicode__(self):
238 return self.name
239
240 def get_parent_path(self):
241 """
256 """
242 Returns node's parent path or empty string if node is root.
257 Returns node's parent path or empty string if node is root.
243 """
258 """
244 if self.is_root():
259 if self.is_root():
245 return ''
260 return b''
246 return vcspath.dirname(self.path.rstrip('/')) + '/'
261 str_path = vcspath.dirname(self.path.rstrip('/')) + '/'
262
263 return safe_bytes(str_path)
247
264
248 def is_file(self):
265 def is_file(self):
249 """
266 """
250 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
267 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
251 otherwise.
268 otherwise.
252 """
269 """
253 return self.kind == NodeKind.FILE
270 return self.kind == NodeKind.FILE
254
271
255 def is_dir(self):
272 def is_dir(self):
256 """
273 """
257 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
274 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
258 otherwise.
275 otherwise.
259 """
276 """
260 return self.kind == NodeKind.DIR
277 return self.kind == NodeKind.DIR
261
278
262 def is_root(self):
279 def is_root(self):
263 """
280 """
264 Returns ``True`` if node is a root node and ``False`` otherwise.
281 Returns ``True`` if node is a root node and ``False`` otherwise.
265 """
282 """
266 return self.kind == NodeKind.DIR and self.path == ''
283 return self.kind == NodeKind.DIR and self.path == ''
267
284
268 def is_submodule(self):
285 def is_submodule(self):
269 """
286 """
270 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
287 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
271 otherwise.
288 otherwise.
272 """
289 """
273 return self.kind == NodeKind.SUBMODULE
290 return self.kind == NodeKind.SUBMODULE
274
291
275 def is_largefile(self):
292 def is_largefile(self):
276 """
293 """
277 Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False``
294 Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False``
278 otherwise
295 otherwise
279 """
296 """
280 return self.kind == NodeKind.LARGEFILE
297 return self.kind == NodeKind.LARGEFILE
281
298
282 def is_link(self):
299 def is_link(self):
283 if self.commit:
300 if self.commit:
284 return self.commit.is_link(self.path)
301 return self.commit.is_link(self.path)
285 return False
302 return False
286
303
287 @LazyProperty
304 @LazyProperty
288 def added(self):
305 def added(self):
289 return self.state is NodeState.ADDED
306 return self.state is NodeState.ADDED
290
307
291 @LazyProperty
308 @LazyProperty
292 def changed(self):
309 def changed(self):
293 return self.state is NodeState.CHANGED
310 return self.state is NodeState.CHANGED
294
311
295 @LazyProperty
312 @LazyProperty
296 def not_changed(self):
313 def not_changed(self):
297 return self.state is NodeState.NOT_CHANGED
314 return self.state is NodeState.NOT_CHANGED
298
315
299 @LazyProperty
316 @LazyProperty
300 def removed(self):
317 def removed(self):
301 return self.state is NodeState.REMOVED
318 return self.state is NodeState.REMOVED
302
319
303
320
304 class FileNode(Node):
321 class FileNode(Node):
305 """
322 """
306 Class representing file nodes.
323 Class representing file nodes.
307
324
308 :attribute: path: path to the node, relative to repository's root
325 :attribute: path: path to the node, relative to repository's root
309 :attribute: content: if given arbitrary sets content of the file
326 :attribute: content: if given arbitrary sets content of the file
310 :attribute: commit: if given, first time content is accessed, callback
327 :attribute: commit: if given, first time content is accessed, callback
311 :attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`.
328 :attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`.
312 """
329 """
313 _filter_pre_load = []
330 _filter_pre_load = []
314
331
315 def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
332 def __init__(self, path: bytes, content: bytes | None = None, commit=None, mode=None, pre_load=None):
316 """
333 """
317 Only one of ``content`` and ``commit`` may be given. Passing both
334 Only one of ``content`` and ``commit`` may be given. Passing both
318 would raise ``NodeError`` exception.
335 would raise ``NodeError`` exception.
319
336
320 :param path: relative path to the node
337 :param path: relative path to the node
321 :param content: content may be passed to constructor
338 :param content: content may be passed to constructor
322 :param commit: if given, will use it to lazily fetch content
339 :param commit: if given, will use it to lazily fetch content
323 :param mode: ST_MODE (i.e. 0100644)
340 :param mode: ST_MODE (i.e. 0100644)
324 """
341 """
325 if content and commit:
342 if content and commit:
326 raise NodeError("Cannot use both content and commit")
343 raise NodeError("Cannot use both content and commit")
327 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
344
345 super().__init__(path, kind=NodeKind.FILE)
346
328 self.commit = commit
347 self.commit = commit
348 if content and not isinstance(content, bytes):
349 # File content is one thing that inherently must be bytes
350 # we support passing str too, and convert the content
351 content = safe_bytes(content)
329 self._content = content
352 self._content = content
330 self._mode = mode or FILEMODE_DEFAULT
353 self._mode = mode or FILEMODE_DEFAULT
331
354
332 self._set_bulk_properties(pre_load)
355 self._set_bulk_properties(pre_load)
333
356
357 def __eq__(self, other):
358 eq = super(FileNode, self).__eq__(other)
359 if eq is not None:
360 return eq
361 return self.content == other.content
362
363 def __hash__(self):
364 raw_id = getattr(self.commit, 'raw_id', '')
365 return hash((self.path, raw_id))
366
367 def __lt__(self, other):
368 lt = super(FileNode, self).__lt__(other)
369 if lt is not None:
370 return lt
371 return self.content < other.content
372
373 def __repr__(self):
374 short_id = getattr(self.commit, 'short_id', '')
375 return f'<{self.__class__.__name__} path={self.path!r}, short_id={short_id}>'
376
334 def _set_bulk_properties(self, pre_load):
377 def _set_bulk_properties(self, pre_load):
335 if not pre_load:
378 if not pre_load:
336 return
379 return
337 pre_load = [entry for entry in pre_load
380 pre_load = [entry for entry in pre_load
338 if entry not in self._filter_pre_load]
381 if entry not in self._filter_pre_load]
339 if not pre_load:
382 if not pre_load:
340 return
383 return
341
384
342 for attr_name in pre_load:
385 remote = self.commit.get_remote()
343 result = getattr(self, attr_name)
386 result = remote.bulk_file_request(self.commit.raw_id, self.path, pre_load)
344 if callable(result):
387
345 result = result()
388 for attr, value in result.items():
346 self.__dict__[attr_name] = result
389 if attr == "flags":
390 self.__dict__['mode'] = safe_str(value)
391 elif attr == "size":
392 self.__dict__['size'] = value
393 elif attr == "data":
394 self.__dict__['_content'] = value
395 elif attr == "is_binary":
396 self.__dict__['is_binary'] = value
397 elif attr == "md5":
398 self.__dict__['md5'] = value
399 else:
400 raise ValueError(f'Unsupported attr in bulk_property: {attr}')
347
401
348 @LazyProperty
402 @LazyProperty
349 def mode(self):
403 def mode(self):
350 """
404 """
351 Returns lazily mode of the FileNode. If `commit` is not set, would
405 Returns lazily mode of the FileNode. If `commit` is not set, would
352 use value given at initialization or `FILEMODE_DEFAULT` (default).
406 use value given at initialization or `FILEMODE_DEFAULT` (default).
353 """
407 """
354 if self.commit:
408 if self.commit:
355 mode = self.commit.get_file_mode(self.path)
409 mode = self.commit.get_file_mode(self.path)
356 else:
410 else:
357 mode = self._mode
411 mode = self._mode
358 return mode
412 return mode
359
413
360 @LazyProperty
414 @LazyProperty
361 def raw_bytes(self):
415 def raw_bytes(self) -> bytes:
362 """
416 """
363 Returns lazily the raw bytes of the FileNode.
417 Returns lazily the raw bytes of the FileNode.
364 """
418 """
365 if self.commit:
419 if self.commit:
366 if self._content is None:
420 if self._content is None:
367 self._content = self.commit.get_file_content(self.path)
421 self._content = self.commit.get_file_content(self.path)
368 content = self._content
422 content = self._content
369 else:
423 else:
370 content = self._content
424 content = self._content
371 return content
425 return content
372
426
427 def content_uncached(self):
428 """
429 Returns lazily content of the FileNode.
430 """
431 if self.commit:
432 content = self.commit.get_file_content(self.path)
433 else:
434 content = self._content
435 return content
436
373 def stream_bytes(self):
437 def stream_bytes(self):
374 """
438 """
375 Returns an iterator that will stream the content of the file directly from
439 Returns an iterator that will stream the content of the file directly from
376 vcsserver without loading it to memory.
440 vcsserver without loading it to memory.
377 """
441 """
378 if self.commit:
442 if self.commit:
379 return self.commit.get_file_content_streamed(self.path)
443 return self.commit.get_file_content_streamed(self.path)
380 raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
444 raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
381
445
382 @LazyProperty
383 def md5(self):
384 """
385 Returns md5 of the file node.
386 """
387 return md5(self.raw_bytes)
388
389 def metadata_uncached(self):
446 def metadata_uncached(self):
390 """
447 """
391 Returns md5, binary flag of the file node, without any cache usage.
448 Returns md5, binary flag of the file node, without any cache usage.
392 """
449 """
393
450
394 content = self.content_uncached()
451 content = self.content_uncached()
395
452
396 is_binary = content and '\0' in content
453 is_binary = bool(content and BIN_BYTE_MARKER in content)
397 size = 0
454 size = 0
398 if content:
455 if content:
399 size = len(content)
456 size = len(content)
400
457
401 return is_binary, md5(content), size, content
458 return is_binary, md5(content), size, content
402
459
403 def content_uncached(self):
460 @LazyProperty
404 """
461 def content(self) -> bytes:
405 Returns lazily content of the FileNode. If possible, would try to
406 decode content from UTF-8.
407 """
462 """
408 if self.commit:
463 Returns lazily content of the FileNode.
409 content = self.commit.get_file_content(self.path)
464 """
410 else:
465 content = self.raw_bytes
411 content = self._content
466 if content and not isinstance(content, bytes):
467 raise ValueError(f'Content is of type {type(content)} instead of bytes')
412 return content
468 return content
413
469
414 @LazyProperty
470 @LazyProperty
415 def content(self):
471 def str_content(self) -> str:
416 """
472 return safe_str(self.raw_bytes)
417 Returns lazily content of the FileNode. If possible, would try to
418 decode content from UTF-8.
419 """
420 content = self.raw_bytes
421
422 if self.is_binary:
423 return content
424 return safe_unicode(content)
425
473
426 @LazyProperty
474 @LazyProperty
427 def size(self):
475 def size(self):
428 if self.commit:
476 if self.commit:
429 return self.commit.get_file_size(self.path)
477 return self.commit.get_file_size(self.path)
430 raise NodeError(
478 raise NodeError(
431 "Cannot retrieve size of the file without related "
479 "Cannot retrieve size of the file without related "
432 "commit attribute")
480 "commit attribute")
433
481
434 @LazyProperty
482 @LazyProperty
435 def message(self):
483 def message(self):
436 if self.commit:
484 if self.commit:
437 return self.last_commit.message
485 return self.last_commit.message
438 raise NodeError(
486 raise NodeError(
439 "Cannot retrieve message of the file without related "
487 "Cannot retrieve message of the file without related "
440 "commit attribute")
488 "commit attribute")
441
489
442 @LazyProperty
490 @LazyProperty
443 def last_commit(self):
491 def last_commit(self):
444 if self.commit:
492 if self.commit:
445 pre_load = ["author", "date", "message", "parents"]
493 pre_load = ["author", "date", "message", "parents"]
446 return self.commit.get_path_commit(self.path, pre_load=pre_load)
494 return self.commit.get_path_commit(self.path, pre_load=pre_load)
447 raise NodeError(
495 raise NodeError(
448 "Cannot retrieve last commit of the file without "
496 "Cannot retrieve last commit of the file without "
449 "related commit attribute")
497 "related commit attribute")
450
498
451 def get_mimetype(self):
499 def get_mimetype(self):
452 """
500 """
453 Mimetype is calculated based on the file's content. If ``_mimetype``
501 Mimetype is calculated based on the file's content. If ``_mimetype``
454 attribute is available, it will be returned (backends which store
502 attribute is available, it will be returned (backends which store
455 mimetypes or can easily recognize them, should set this private
503 mimetypes or can easily recognize them, should set this private
456 attribute to indicate that type should *NOT* be calculated).
504 attribute to indicate that type should *NOT* be calculated).
457 """
505 """
458
506
459 if hasattr(self, '_mimetype'):
507 if hasattr(self, '_mimetype'):
460 if (isinstance(self._mimetype, (tuple, list,)) and
508 if (isinstance(self._mimetype, (tuple, list)) and
461 len(self._mimetype) == 2):
509 len(self._mimetype) == 2):
462 return self._mimetype
510 return self._mimetype
463 else:
511 else:
464 raise NodeError('given _mimetype attribute must be an 2 '
512 raise NodeError('given _mimetype attribute must be an 2 '
465 'element list or tuple')
513 'element list or tuple')
466
514
467 db = get_mimetypes_db()
515 db = get_mimetypes_db()
468 mtype, encoding = db.guess_type(self.name)
516 mtype, encoding = db.guess_type(self.name)
469
517
470 if mtype is None:
518 if mtype is None:
471 if not self.is_largefile() and self.is_binary:
519 if not self.is_largefile() and self.is_binary:
472 mtype = 'application/octet-stream'
520 mtype = 'application/octet-stream'
473 encoding = None
521 encoding = None
474 else:
522 else:
475 mtype = 'text/plain'
523 mtype = 'text/plain'
476 encoding = None
524 encoding = None
477
525
478 # try with pygments
526 # try with pygments
479 try:
527 try:
480 from pygments.lexers import get_lexer_for_filename
528 from pygments.lexers import get_lexer_for_filename
481 mt = get_lexer_for_filename(self.name).mimetypes
529 mt = get_lexer_for_filename(self.name).mimetypes
482 except Exception:
530 except Exception:
483 mt = None
531 mt = None
484
532
485 if mt:
533 if mt:
486 mtype = mt[0]
534 mtype = mt[0]
487
535
488 return mtype, encoding
536 return mtype, encoding
489
537
490 @LazyProperty
538 @LazyProperty
491 def mimetype(self):
539 def mimetype(self):
492 """
540 """
493 Wrapper around full mimetype info. It returns only type of fetched
541 Wrapper around full mimetype info. It returns only type of fetched
494 mimetype without the encoding part. use get_mimetype function to fetch
542 mimetype without the encoding part. use get_mimetype function to fetch
495 full set of (type,encoding)
543 full set of (type,encoding)
496 """
544 """
497 return self.get_mimetype()[0]
545 return self.get_mimetype()[0]
498
546
499 @LazyProperty
547 @LazyProperty
500 def mimetype_main(self):
548 def mimetype_main(self):
501 return self.mimetype.split('/')[0]
549 return self.mimetype.split('/')[0]
502
550
503 @classmethod
551 @classmethod
504 def get_lexer(cls, filename, content=None):
552 def get_lexer(cls, filename, content=None):
505 from pygments import lexers
553 from pygments import lexers
506
554
507 extension = filename.split('.')[-1]
555 extension = filename.split('.')[-1]
508 lexer = None
556 lexer = None
509
557
510 try:
558 try:
511 lexer = lexers.guess_lexer_for_filename(
559 lexer = lexers.guess_lexer_for_filename(
512 filename, content, stripnl=False)
560 filename, content, stripnl=False)
513 except lexers.ClassNotFound:
561 except lexers.ClassNotFound:
514 lexer = None
562 pass
515
563
516 # try our EXTENSION_MAP
564 # try our EXTENSION_MAP
517 if not lexer:
565 if not lexer:
518 try:
566 try:
519 lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension)
567 lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension)
520 if lexer_class:
568 if lexer_class:
521 lexer = lexers.get_lexer_by_name(lexer_class[0])
569 lexer = lexers.get_lexer_by_name(lexer_class[0])
522 except lexers.ClassNotFound:
570 except lexers.ClassNotFound:
523 lexer = None
571 pass
524
572
525 if not lexer:
573 if not lexer:
526 lexer = lexers.TextLexer(stripnl=False)
574 lexer = lexers.TextLexer(stripnl=False)
527
575
528 return lexer
576 return lexer
529
577
530 @LazyProperty
578 @LazyProperty
531 def lexer(self):
579 def lexer(self):
532 """
580 """
533 Returns pygment's lexer class. Would try to guess lexer taking file's
581 Returns pygment's lexer class. Would try to guess lexer taking file's
534 content, name and mimetype.
582 content, name and mimetype.
535 """
583 """
536 return self.get_lexer(self.name, self.content)
584 # TODO: this is more proper, but super heavy on investigating the type based on the content
585 #self.get_lexer(self.name, self.content)
586
587 return self.get_lexer(self.name)
537
588
538 @LazyProperty
589 @LazyProperty
539 def lexer_alias(self):
590 def lexer_alias(self):
540 """
591 """
541 Returns first alias of the lexer guessed for this file.
592 Returns first alias of the lexer guessed for this file.
542 """
593 """
543 return self.lexer.aliases[0]
594 return self.lexer.aliases[0]
544
595
545 @LazyProperty
596 @LazyProperty
546 def history(self):
597 def history(self):
547 """
598 """
548 Returns a list of commit for this file in which the file was changed
599 Returns a list of commit for this file in which the file was changed
549 """
600 """
550 if self.commit is None:
601 if self.commit is None:
551 raise NodeError('Unable to get commit for this FileNode')
602 raise NodeError('Unable to get commit for this FileNode')
552 return self.commit.get_path_history(self.path)
603 return self.commit.get_path_history(self.path)
553
604
554 @LazyProperty
605 @LazyProperty
555 def annotate(self):
606 def annotate(self):
556 """
607 """
557 Returns a list of three element tuples with lineno, commit and line
608 Returns a list of three element tuples with lineno, commit and line
558 """
609 """
559 if self.commit is None:
610 if self.commit is None:
560 raise NodeError('Unable to get commit for this FileNode')
611 raise NodeError('Unable to get commit for this FileNode')
561 pre_load = ["author", "date", "message", "parents"]
612 pre_load = ["author", "date", "message", "parents"]
562 return self.commit.get_file_annotate(self.path, pre_load=pre_load)
613 return self.commit.get_file_annotate(self.path, pre_load=pre_load)
563
614
564 @LazyProperty
615 @LazyProperty
565 def state(self):
616 def state(self):
566 if not self.commit:
617 if not self.commit:
567 raise NodeError(
618 raise NodeError(
568 "Cannot check state of the node if it's not "
619 "Cannot check state of the node if it's not "
569 "linked with commit")
620 "linked with commit")
570 elif self.path in (node.path for node in self.commit.added):
621 elif self.path in (node.path for node in self.commit.added):
571 return NodeState.ADDED
622 return NodeState.ADDED
572 elif self.path in (node.path for node in self.commit.changed):
623 elif self.path in (node.path for node in self.commit.changed):
573 return NodeState.CHANGED
624 return NodeState.CHANGED
574 else:
625 else:
575 return NodeState.NOT_CHANGED
626 return NodeState.NOT_CHANGED
576
627
577 @LazyProperty
628 @LazyProperty
578 def is_binary(self):
629 def is_binary(self):
579 """
630 """
580 Returns True if file has binary content.
631 Returns True if file has binary content.
581 """
632 """
582 if self.commit:
633 if self.commit:
583 return self.commit.is_node_binary(self.path)
634 return self.commit.is_node_binary(self.path)
584 else:
635 else:
585 raw_bytes = self._content
636 raw_bytes = self._content
586 return raw_bytes and '\0' in raw_bytes
637 return bool(raw_bytes and BIN_BYTE_MARKER in raw_bytes)
638
639 @LazyProperty
640 def md5(self):
641 """
642 Returns md5 of the file node.
643 """
644
645 if self.commit:
646 return self.commit.node_md5_hash(self.path)
647 else:
648 raw_bytes = self._content
649 # TODO: this sucks, we're computing md5 on potentially super big stream data...
650 return md5(raw_bytes)
587
651
588 @LazyProperty
652 @LazyProperty
589 def extension(self):
653 def extension(self):
590 """Returns filenode extension"""
654 """Returns filenode extension"""
591 return self.name.split('.')[-1]
655 return self.name.split('.')[-1]
592
656
593 @property
657 @property
594 def is_executable(self):
658 def is_executable(self):
595 """
659 """
596 Returns ``True`` if file has executable flag turned on.
660 Returns ``True`` if file has executable flag turned on.
597 """
661 """
598 return bool(self.mode & stat.S_IXUSR)
662 return bool(self.mode & stat.S_IXUSR)
599
663
600 def get_largefile_node(self):
664 def get_largefile_node(self):
601 """
665 """
602 Try to return a Mercurial FileNode from this node. It does internal
666 Try to return a Mercurial FileNode from this node. It does internal
603 checks inside largefile store, if that file exist there it will
667 checks inside largefile store, if that file exist there it will
604 create special instance of LargeFileNode which can get content from
668 create special instance of LargeFileNode which can get content from
605 LF store.
669 LF store.
606 """
670 """
607 if self.commit:
671 if self.commit:
608 return self.commit.get_largefile_node(self.path)
672 return self.commit.get_largefile_node(self.path)
609
673
610 def count_lines(self, content, count_empty=False):
674 def count_lines(self, content: str | bytes, count_empty=False):
675 if isinstance(content, str):
676 newline_marker = '\n'
677 elif isinstance(content, bytes):
678 newline_marker = b'\n'
679 else:
680 raise ValueError('content must be bytes or str got {type(content)} instead')
611
681
612 if count_empty:
682 if count_empty:
613 all_lines = 0
683 all_lines = 0
614 empty_lines = 0
684 empty_lines = 0
615 for line in content.splitlines(True):
685 for line in content.splitlines(True):
616 if line == '\n':
686 if line == newline_marker:
617 empty_lines += 1
687 empty_lines += 1
618 all_lines += 1
688 all_lines += 1
619
689
620 return all_lines, all_lines - empty_lines
690 return all_lines, all_lines - empty_lines
621 else:
691 else:
622 # fast method
692 # fast method
623 empty_lines = all_lines = content.count('\n')
693 empty_lines = all_lines = content.count(newline_marker)
624 if all_lines == 0 and content:
694 if all_lines == 0 and content:
625 # one-line without a newline
695 # one-line without a newline
626 empty_lines = all_lines = 1
696 empty_lines = all_lines = 1
627
697
628 return all_lines, empty_lines
698 return all_lines, empty_lines
629
699
630 def lines(self, count_empty=False):
700 def lines(self, count_empty=False):
631 all_lines, empty_lines = 0, 0
701 all_lines, empty_lines = 0, 0
632
702
633 if not self.is_binary:
703 if not self.is_binary:
634 content = self.content
704 content = self.content
635 all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
705 all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
636 return all_lines, empty_lines
706 return all_lines, empty_lines
637
707
638 def __repr__(self):
639 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
640 getattr(self.commit, 'short_id', ''))
641
642
708
643 class RemovedFileNode(FileNode):
709 class RemovedFileNode(FileNode):
644 """
710 """
645 Dummy FileNode class - trying to access any public attribute except path,
711 Dummy FileNode class - trying to access any public attribute except path,
646 name, kind or state (or methods/attributes checking those two) would raise
712 name, kind or state (or methods/attributes checking those two) would raise
647 RemovedFileNodeError.
713 RemovedFileNodeError.
648 """
714 """
649 ALLOWED_ATTRIBUTES = [
715 ALLOWED_ATTRIBUTES = [
650 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
716 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
651 'added', 'changed', 'not_changed', 'removed'
717 'added', 'changed', 'not_changed', 'removed', 'bytes_path'
652 ]
718 ]
653
719
654 def __init__(self, path):
720 def __init__(self, path):
655 """
721 """
656 :param path: relative path to the node
722 :param path: relative path to the node
657 """
723 """
658 super(RemovedFileNode, self).__init__(path=path)
724 super().__init__(path=path)
659
725
660 def __getattribute__(self, attr):
726 def __getattribute__(self, attr):
661 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
727 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
662 return super(RemovedFileNode, self).__getattribute__(attr)
728 return super().__getattribute__(attr)
663 raise RemovedFileNodeError(
729 raise RemovedFileNodeError(f"Cannot access attribute {attr} on RemovedFileNode. Not in allowed attributes")
664 "Cannot access attribute %s on RemovedFileNode" % attr)
665
730
666 @LazyProperty
731 @LazyProperty
667 def state(self):
732 def state(self):
668 return NodeState.REMOVED
733 return NodeState.REMOVED
669
734
670
735
671 class DirNode(Node):
736 class DirNode(Node):
672 """
737 """
673 DirNode stores list of files and directories within this node.
738 DirNode stores list of files and directories within this node.
674 Nodes may be used standalone but within repository context they
739 Nodes may be used standalone but within repository context they
675 lazily fetch data within same repository's commit.
740 lazily fetch data within same repository's commit.
676 """
741 """
677
742
678 def __init__(self, path, nodes=(), commit=None):
743 def __init__(self, path, nodes=(), commit=None, default_pre_load=None):
679 """
744 """
680 Only one of ``nodes`` and ``commit`` may be given. Passing both
745 Only one of ``nodes`` and ``commit`` may be given. Passing both
681 would raise ``NodeError`` exception.
746 would raise ``NodeError`` exception.
682
747
683 :param path: relative path to the node
748 :param path: relative path to the node
684 :param nodes: content may be passed to constructor
749 :param nodes: content may be passed to constructor
685 :param commit: if given, will use it to lazily fetch content
750 :param commit: if given, will use it to lazily fetch content
686 """
751 """
687 if nodes and commit:
752 if nodes and commit:
688 raise NodeError("Cannot use both nodes and commit")
753 raise NodeError("Cannot use both nodes and commit")
689 super(DirNode, self).__init__(path, NodeKind.DIR)
754 super(DirNode, self).__init__(path, NodeKind.DIR)
690 self.commit = commit
755 self.commit = commit
691 self._nodes = nodes
756 self._nodes = nodes
757 self.default_pre_load = default_pre_load or ['is_binary', 'size']
758
759 def __iter__(self):
760 for node in self.nodes:
761 yield node
762
763 def __eq__(self, other):
764 eq = super(DirNode, self).__eq__(other)
765 if eq is not None:
766 return eq
767 # check without entering each dir
768 self_nodes_paths = list(sorted(n.path for n in self.nodes))
769 other_nodes_paths = list(sorted(n.path for n in self.nodes))
770 return self_nodes_paths == other_nodes_paths
771
772 def __lt__(self, other):
773 lt = super(DirNode, self).__lt__(other)
774 if lt is not None:
775 return lt
776 # check without entering each dir
777 self_nodes_paths = list(sorted(n.path for n in self.nodes))
778 other_nodes_paths = list(sorted(n.path for n in self.nodes))
779 return self_nodes_paths < other_nodes_paths
692
780
693 @LazyProperty
781 @LazyProperty
694 def content(self):
782 def content(self):
695 raise NodeError(
783 raise NodeError(f"{self} represents a dir and has no `content` attribute")
696 "%s represents a dir and has no `content` attribute" % self)
697
784
698 @LazyProperty
785 @LazyProperty
699 def nodes(self):
786 def nodes(self):
700 if self.commit:
787 if self.commit:
701 nodes = self.commit.get_nodes(self.path)
788 nodes = self.commit.get_nodes(self.path, pre_load=self.default_pre_load)
702 else:
789 else:
703 nodes = self._nodes
790 nodes = self._nodes
704 self._nodes_dict = dict((node.path, node) for node in nodes)
791 self._nodes_dict = dict((node.path, node) for node in nodes)
705 return sorted(nodes)
792 return sorted(nodes)
706
793
707 @LazyProperty
794 @LazyProperty
708 def files(self):
795 def files(self):
709 return sorted((node for node in self.nodes if node.is_file()))
796 return sorted((node for node in self.nodes if node.is_file()))
710
797
711 @LazyProperty
798 @LazyProperty
712 def dirs(self):
799 def dirs(self):
713 return sorted((node for node in self.nodes if node.is_dir()))
800 return sorted((node for node in self.nodes if node.is_dir()))
714
801
715 def __iter__(self):
716 for node in self.nodes:
717 yield node
718
719 def get_node(self, path):
802 def get_node(self, path):
720 """
803 """
721 Returns node from within this particular ``DirNode``, so it is now
804 Returns node from within this particular ``DirNode``, so it is now
722 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
805 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
723 'docs'. In order to access deeper nodes one must fetch nodes between
806 'docs'. In order to access deeper nodes one must fetch nodes between
724 them first - this would work::
807 them first - this would work::
725
808
726 docs = root.get_node('docs')
809 docs = root.get_node('docs')
727 docs.get_node('api').get_node('index.rst')
810 docs.get_node('api').get_node('index.rst')
728
811
729 :param: path - relative to the current node
812 :param: path - relative to the current node
730
813
731 .. note::
814 .. note::
732 To access lazily (as in example above) node have to be initialized
815 To access lazily (as in example above) node have to be initialized
733 with related commit object - without it node is out of
816 with related commit object - without it node is out of
734 context and may know nothing about anything else than nearest
817 context and may know nothing about anything else than nearest
735 (located at same level) nodes.
818 (located at same level) nodes.
736 """
819 """
737 try:
820 try:
738 path = path.rstrip('/')
821 path = path.rstrip('/')
739 if path == '':
822 if path == '':
740 raise NodeError("Cannot retrieve node without path")
823 raise NodeError("Cannot retrieve node without path")
741 self.nodes # access nodes first in order to set _nodes_dict
824 self.nodes # access nodes first in order to set _nodes_dict
742 paths = path.split('/')
825 paths = path.split('/')
743 if len(paths) == 1:
826 if len(paths) == 1:
744 if not self.is_root():
827 if not self.is_root():
745 path = '/'.join((self.path, paths[0]))
828 path = '/'.join((self.path, paths[0]))
746 else:
829 else:
747 path = paths[0]
830 path = paths[0]
748 return self._nodes_dict[path]
831 return self._nodes_dict[path]
749 elif len(paths) > 1:
832 elif len(paths) > 1:
750 if self.commit is None:
833 if self.commit is None:
751 raise NodeError("Cannot access deeper nodes without commit")
834 raise NodeError("Cannot access deeper nodes without commit")
752 else:
835 else:
753 path1, path2 = paths[0], '/'.join(paths[1:])
836 path1, path2 = paths[0], '/'.join(paths[1:])
754 return self.get_node(path1).get_node(path2)
837 return self.get_node(path1).get_node(path2)
755 else:
838 else:
756 raise KeyError
839 raise KeyError
757 except KeyError:
840 except KeyError:
758 raise NodeError("Node does not exist at %s" % path)
841 raise NodeError(f"Node does not exist at {path}")
759
842
760 @LazyProperty
843 @LazyProperty
761 def state(self):
844 def state(self):
762 raise NodeError("Cannot access state of DirNode")
845 raise NodeError("Cannot access state of DirNode")
763
846
764 @LazyProperty
847 @LazyProperty
765 def size(self):
848 def size(self):
766 size = 0
849 size = 0
767 for root, dirs, files in self.commit.walk(self.path):
850 for root, dirs, files in self.commit.walk(self.path):
768 for f in files:
851 for f in files:
769 size += f.size
852 size += f.size
770
853
771 return size
854 return size
772
855
773 @LazyProperty
856 @LazyProperty
774 def last_commit(self):
857 def last_commit(self):
775 if self.commit:
858 if self.commit:
776 pre_load = ["author", "date", "message", "parents"]
859 pre_load = ["author", "date", "message", "parents"]
777 return self.commit.get_path_commit(self.path, pre_load=pre_load)
860 return self.commit.get_path_commit(self.path, pre_load=pre_load)
778 raise NodeError(
861 raise NodeError(
779 "Cannot retrieve last commit of the file without "
862 "Cannot retrieve last commit of the file without "
780 "related commit attribute")
863 "related commit attribute")
781
864
782 def __repr__(self):
865 def __repr__(self):
783 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
866 short_id = getattr(self.commit, 'short_id', '')
784 getattr(self.commit, 'short_id', ''))
867 return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
785
868
786
869
787 class RootNode(DirNode):
870 class RootNode(DirNode):
788 """
871 """
789 DirNode being the root node of the repository.
872 DirNode being the root node of the repository.
790 """
873 """
791
874
792 def __init__(self, nodes=(), commit=None):
875 def __init__(self, nodes=(), commit=None):
793 super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
876 super(RootNode, self).__init__(path=b'', nodes=nodes, commit=commit)
794
877
795 def __repr__(self):
878 def __repr__(self):
796 return '<%s>' % self.__class__.__name__
879 return f'<{self.__class__.__name__}>'
797
880
798
881
799 class SubModuleNode(Node):
882 class SubModuleNode(Node):
800 """
883 """
801 represents a SubModule of Git or SubRepo of Mercurial
884 represents a SubModule of Git or SubRepo of Mercurial
802 """
885 """
803 is_binary = False
886 is_binary = False
804 size = 0
887 size = 0
805
888
806 def __init__(self, name, url=None, commit=None, alias=None):
889 def __init__(self, name, url=None, commit=None, alias=None):
807 self.path = name
890 self.path = name
808 self.kind = NodeKind.SUBMODULE
891 self.kind = NodeKind.SUBMODULE
809 self.alias = alias
892 self.alias = alias
810
893
811 # we have to use EmptyCommit here since this can point to svn/git/hg
894 # we have to use EmptyCommit here since this can point to svn/git/hg
812 # submodules we cannot get from repository
895 # submodules we cannot get from repository
813 self.commit = EmptyCommit(str(commit), alias=alias)
896 self.commit = EmptyCommit(str(commit), alias=alias)
814 self.url = url or self._extract_submodule_url()
897 self.url = url or self._extract_submodule_url()
815
898
816 def __repr__(self):
899 def __repr__(self):
817 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
900 short_id = getattr(self.commit, 'short_id', '')
818 getattr(self.commit, 'short_id', ''))
901 return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
819
902
820 def _extract_submodule_url(self):
903 def _extract_submodule_url(self):
821 # TODO: find a way to parse gits submodule file and extract the
904 # TODO: find a way to parse gits submodule file and extract the
822 # linking URL
905 # linking URL
823 return self.path
906 return self.path
824
907
825 @LazyProperty
908 @LazyProperty
826 def name(self):
909 def name(self):
827 """
910 """
828 Returns name of the node so if its path
911 Returns name of the node so if its path
829 then only last part is returned.
912 then only last part is returned.
830 """
913 """
831 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
914 org = safe_str(self.path.rstrip('/').split('/')[-1])
832 return '%s @ %s' % (org, self.commit.short_id)
915 return f'{org} @ {self.commit.short_id}'
833
916
834
917
835 class LargeFileNode(FileNode):
918 class LargeFileNode(FileNode):
836
919
837 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
920 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
838 self.path = path
921 self._validate_path(path) # can throw exception if path is invalid
839 self.org_path = org_path
922 self.org_path = org_path # as stored in VCS as LF pointer
923
924 self.bytes_path = path.rstrip(b'/') # store for __repr__
925 self.path = safe_str(self.bytes_path) # we store paths as str
926
840 self.kind = NodeKind.LARGEFILE
927 self.kind = NodeKind.LARGEFILE
841 self.alias = alias
928 self.alias = alias
842 self._content = ''
929 self._content = b''
843
930
844 def _validate_path(self, path):
931 def _validate_path(self, path: bytes):
845 """
932 """
846 we override check since the LargeFileNode path is system absolute
933 we override check since the LargeFileNode path is system absolute, but we check for bytes only
847 """
934 """
848 pass
935 self._assert_bytes(path)
849
936
850 def __repr__(self):
937 def __repr__(self):
851 return '<%s %r>' % (self.__class__.__name__, self.path)
938 return f'<{self.__class__.__name__} {self.org_path} -> {self.path!r}>'
852
939
853 @LazyProperty
940 @LazyProperty
854 def size(self):
941 def size(self):
855 return os.stat(self.path).st_size
942 return os.stat(self.path).st_size
856
943
857 @LazyProperty
944 @LazyProperty
858 def raw_bytes(self):
945 def raw_bytes(self):
859 with open(self.path, 'rb') as f:
946 with open(self.path, 'rb') as f:
860 content = f.read()
947 content = f.read()
861 return content
948 return content
862
949
863 @LazyProperty
950 @LazyProperty
864 def name(self):
951 def name(self):
865 """
952 """
866 Overwrites name to be the org lf path
953 Overwrites name to be the org lf path
867 """
954 """
868 return self.org_path
955 return self.org_path
869
956
870 def stream_bytes(self):
957 def stream_bytes(self):
871 with open(self.path, 'rb') as stream:
958 with open(self.path, 'rb') as stream:
872 while True:
959 while True:
873 data = stream.read(16 * 1024)
960 data = stream.read(16 * 1024)
874 if not data:
961 if not data:
875 break
962 break
876 yield data
963 yield data
@@ -1,161 +1,161 b''
1
1
2
2
3 # Copyright (C) 2014-2020 RhodeCode GmbH
3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Utilities aimed to help achieve mostly basic tasks.
22 Utilities aimed to help achieve mostly basic tasks.
23 """
23 """
24
24
25
25
26
26
27
27
28 import re
28 import re
29 import os
29 import os
30 import time
30 import time
31 import datetime
31 import datetime
32 import logging
32 import logging
33
33
34 from rhodecode.lib.vcs.conf import settings
34 from rhodecode.lib.vcs.conf import settings
35 from rhodecode.lib.vcs.exceptions import VCSError, VCSBackendNotSupportedError
35 from rhodecode.lib.vcs.exceptions import VCSError, VCSBackendNotSupportedError
36
36
37
37
38 log = logging.getLogger(__name__)
38 log = logging.getLogger(__name__)
39
39
40
40
41 def get_scm(path):
41 def get_scm(path):
42 """
42 """
43 Returns one of alias from ``ALIASES`` (in order of precedence same as
43 Returns one of alias from ``ALIASES`` (in order of precedence same as
44 shortcuts given in ``ALIASES``) and working dir path for the given
44 shortcuts given in ``ALIASES``) and working dir path for the given
45 argument. If no scm-specific directory is found or more than one scm is
45 argument. If no scm-specific directory is found or more than one scm is
46 found at that directory, ``VCSError`` is raised.
46 found at that directory, ``VCSError`` is raised.
47 """
47 """
48 if not os.path.isdir(path):
48 if not os.path.isdir(path):
49 raise VCSError("Given path %s is not a directory" % path)
49 raise VCSError("Given path %s is not a directory" % path)
50
50
51 found_scms = [(scm, path) for scm in get_scms_for_path(path)]
51 found_scms = [(scm, path) for scm in get_scms_for_path(path)]
52
52
53 if len(found_scms) > 1:
53 if len(found_scms) > 1:
54 found = ', '.join((x[0] for x in found_scms))
54 found = ', '.join((x[0] for x in found_scms))
55 raise VCSError(
55 raise VCSError(
56 'More than one [%s] scm found at given path %s' % (found, path))
56 'More than one [%s] scm found at given path %s' % (found, path))
57
57
58 if len(found_scms) is 0:
58 if len(found_scms) == 0:
59 raise VCSError('No scm found at given path %s' % path)
59 raise VCSError('No scm found at given path %s' % path)
60
60
61 return found_scms[0]
61 return found_scms[0]
62
62
63
63
64 def get_scm_backend(backend_type):
64 def get_scm_backend(backend_type):
65 from rhodecode.lib.vcs.backends import get_backend
65 from rhodecode.lib.vcs.backends import get_backend
66 return get_backend(backend_type)
66 return get_backend(backend_type)
67
67
68
68
69 def get_scms_for_path(path):
69 def get_scms_for_path(path):
70 """
70 """
71 Returns all scm's found at the given path. If no scm is recognized
71 Returns all scm's found at the given path. If no scm is recognized
72 - empty list is returned.
72 - empty list is returned.
73
73
74 :param path: path to directory which should be checked. May be callable.
74 :param path: path to directory which should be checked. May be callable.
75
75
76 :raises VCSError: if given ``path`` is not a directory
76 :raises VCSError: if given ``path`` is not a directory
77 """
77 """
78 from rhodecode.lib.vcs.backends import get_backend
78 from rhodecode.lib.vcs.backends import get_backend
79 if hasattr(path, '__call__'):
79 if hasattr(path, '__call__'):
80 path = path()
80 path = path()
81 if not os.path.isdir(path):
81 if not os.path.isdir(path):
82 raise VCSError("Given path %r is not a directory" % path)
82 raise VCSError("Given path %r is not a directory" % path)
83
83
84 result = []
84 result = []
85 for key in settings.available_aliases():
85 for key in settings.available_aliases():
86 try:
86 try:
87 backend = get_backend(key)
87 backend = get_backend(key)
88 except VCSBackendNotSupportedError:
88 except VCSBackendNotSupportedError:
89 log.warning('VCSBackendNotSupportedError: %s not supported', key)
89 log.warning('VCSBackendNotSupportedError: %s not supported', key)
90 continue
90 continue
91 if backend.is_valid_repository(path):
91 if backend.is_valid_repository(path):
92 result.append(key)
92 result.append(key)
93 return result
93 return result
94
94
95
95
96 def parse_datetime(text):
96 def parse_datetime(text):
97 """
97 """
98 Parses given text and returns ``datetime.datetime`` instance or raises
98 Parses given text and returns ``datetime.datetime`` instance or raises
99 ``ValueError``.
99 ``ValueError``.
100
100
101 :param text: string of desired date/datetime or something more verbose,
101 :param text: string of desired date/datetime or something more verbose,
102 like *yesterday*, *2weeks 3days*, etc.
102 like *yesterday*, *2weeks 3days*, etc.
103 """
103 """
104 if not text:
104 if not text:
105 raise ValueError('Wrong date: "%s"' % text)
105 raise ValueError('Wrong date: "%s"' % text)
106
106
107 if isinstance(text, datetime.datetime):
107 if isinstance(text, datetime.datetime):
108 return text
108 return text
109
109
110 # we limit a format to no include microseconds e.g 2017-10-17t17:48:23.XXXX
110 # we limit a format to no include microseconds e.g 2017-10-17t17:48:23.XXXX
111 text = text.strip().lower()[:19]
111 text = text.strip().lower()[:19]
112
112
113 input_formats = (
113 input_formats = (
114 '%Y-%m-%d %H:%M:%S',
114 '%Y-%m-%d %H:%M:%S',
115 '%Y-%m-%dt%H:%M:%S',
115 '%Y-%m-%dt%H:%M:%S',
116 '%Y-%m-%d %H:%M',
116 '%Y-%m-%d %H:%M',
117 '%Y-%m-%dt%H:%M',
117 '%Y-%m-%dt%H:%M',
118 '%Y-%m-%d',
118 '%Y-%m-%d',
119 '%m/%d/%Y %H:%M:%S',
119 '%m/%d/%Y %H:%M:%S',
120 '%m/%d/%Yt%H:%M:%S',
120 '%m/%d/%Yt%H:%M:%S',
121 '%m/%d/%Y %H:%M',
121 '%m/%d/%Y %H:%M',
122 '%m/%d/%Yt%H:%M',
122 '%m/%d/%Yt%H:%M',
123 '%m/%d/%Y',
123 '%m/%d/%Y',
124 '%m/%d/%y %H:%M:%S',
124 '%m/%d/%y %H:%M:%S',
125 '%m/%d/%yt%H:%M:%S',
125 '%m/%d/%yt%H:%M:%S',
126 '%m/%d/%y %H:%M',
126 '%m/%d/%y %H:%M',
127 '%m/%d/%yt%H:%M',
127 '%m/%d/%yt%H:%M',
128 '%m/%d/%y',
128 '%m/%d/%y',
129 )
129 )
130 for format_def in input_formats:
130 for format_def in input_formats:
131 try:
131 try:
132 return datetime.datetime(*time.strptime(text, format_def)[:6])
132 return datetime.datetime(*time.strptime(text, format_def)[:6])
133 except ValueError:
133 except ValueError:
134 pass
134 pass
135
135
136 # Try descriptive texts
136 # Try descriptive texts
137 if text == 'tomorrow':
137 if text == 'tomorrow':
138 future = datetime.datetime.now() + datetime.timedelta(days=1)
138 future = datetime.datetime.now() + datetime.timedelta(days=1)
139 args = future.timetuple()[:3] + (23, 59, 59)
139 args = future.timetuple()[:3] + (23, 59, 59)
140 return datetime.datetime(*args)
140 return datetime.datetime(*args)
141 elif text == 'today':
141 elif text == 'today':
142 return datetime.datetime(*datetime.datetime.today().timetuple()[:3])
142 return datetime.datetime(*datetime.datetime.today().timetuple()[:3])
143 elif text == 'now':
143 elif text == 'now':
144 return datetime.datetime.now()
144 return datetime.datetime.now()
145 elif text == 'yesterday':
145 elif text == 'yesterday':
146 past = datetime.datetime.now() - datetime.timedelta(days=1)
146 past = datetime.datetime.now() - datetime.timedelta(days=1)
147 return datetime.datetime(*past.timetuple()[:3])
147 return datetime.datetime(*past.timetuple()[:3])
148 else:
148 else:
149 days = 0
149 days = 0
150 matched = re.match(
150 matched = re.match(
151 r'^((?P<weeks>\d+) ?w(eeks?)?)? ?((?P<days>\d+) ?d(ays?)?)?$', text)
151 r'^((?P<weeks>\d+) ?w(eeks?)?)? ?((?P<days>\d+) ?d(ays?)?)?$', text)
152 if matched:
152 if matched:
153 groupdict = matched.groupdict()
153 groupdict = matched.groupdict()
154 if groupdict['days']:
154 if groupdict['days']:
155 days += int(matched.groupdict()['days'])
155 days += int(matched.groupdict()['days'])
156 if groupdict['weeks']:
156 if groupdict['weeks']:
157 days += int(matched.groupdict()['weeks']) * 7
157 days += int(matched.groupdict()['weeks']) * 7
158 past = datetime.datetime.now() - datetime.timedelta(days=days)
158 past = datetime.datetime.now() - datetime.timedelta(days=days)
159 return datetime.datetime(*past.timetuple()[:3])
159 return datetime.datetime(*past.timetuple()[:3])
160
160
161 raise ValueError('Wrong date: "%s"' % text)
161 raise ValueError('Wrong date: "%s"' % text)
General Comments 0
You need to be logged in to leave comments. Login now