##// END OF EJS Templates
vcs-support: bulk of changes for python3
super-admin -
r5075:d1c4b80b default
parent child Browse files
Show More
@@ -26,7 +26,9 b' import copy'
26 import logging
26 import logging
27 import threading
27 import threading
28 import time
28 import time
29 import urllib.request, urllib.error, urllib.parse
29 import urllib.request
30 import urllib.error
31 import urllib.parse
30 import urllib.parse
32 import urllib.parse
31 import uuid
33 import uuid
32 import traceback
34 import traceback
@@ -59,6 +61,7 b' def _remote_call(url, payload, exception'
59 for attempt in range(retries):
61 for attempt in range(retries):
60 try:
62 try:
61 response = session.post(url, data=msgpack.packb(payload))
63 response = session.post(url, data=msgpack.packb(payload))
64 break
62 except pycurl.error as e:
65 except pycurl.error as e:
63 error_code, error_message = e.args
66 error_code, error_message = e.args
64 if error_code == pycurl.E_RECV_ERROR:
67 if error_code == pycurl.E_RECV_ERROR:
@@ -76,12 +79,13 b' def _remote_call(url, payload, exception'
76 raise
79 raise
77
80
78 if response.status_code >= 400:
81 if response.status_code >= 400:
79 log.error('Call to %s returned non 200 HTTP code: %s',
82 content_type = response.content_type
80 url, response.status_code)
83 log.error('Call to %s returned non 200 HTTP code: %s [%s]',
84 url, response.status_code, content_type)
81 raise exceptions.HttpVCSCommunicationError(repr(response.content))
85 raise exceptions.HttpVCSCommunicationError(repr(response.content))
82
86
83 try:
87 try:
84 response = msgpack.unpackb(response.content, raw=False)
88 response = msgpack.unpackb(response.content)
85 except Exception:
89 except Exception:
86 log.exception('Failed to decode response from msgpack')
90 log.exception('Failed to decode response from msgpack')
87 raise
91 raise
@@ -103,10 +107,20 b' def _remote_call(url, payload, exception'
103 except KeyError:
107 except KeyError:
104 pass
108 pass
105
109
106 raise exc
110 exc.add_note(attach_exc_details(error))
111 raise exc # raising the org exception from vcsserver
107 return response.get('result')
112 return response.get('result')
108
113
109
114
115 def attach_exc_details(error):
116 note = '-- EXC NOTE -- :\n'
117 note += f'vcs_kind: {error.get("_vcs_kind")}\n'
118 note += f'org_exc: {error.get("_vcs_kind")}\n'
119 note += f'tb: {error.get("traceback")}\n'
120 note += '-- END EXC NOTE --'
121 return note
122
123
110 def _streaming_remote_call(url, payload, exceptions_map, session, chunk_size):
124 def _streaming_remote_call(url, payload, exceptions_map, session, chunk_size):
111 try:
125 try:
112 headers = {
126 headers = {
@@ -166,7 +180,7 b' class RemoteVCSMaker(object):'
166
180
167 @classmethod
181 @classmethod
168 def init_cache_region(cls, repo_id):
182 def init_cache_region(cls, repo_id):
169 cache_namespace_uid = 'cache_repo.{}'.format(repo_id)
183 cache_namespace_uid = 'repo.{}'.format(repo_id)
170 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
184 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
171 return region, cache_namespace_uid
185 return region, cache_namespace_uid
172
186
@@ -267,7 +281,7 b' class RemoteRepo(object):'
267 def get_local_cache(self, name, args):
281 def get_local_cache(self, name, args):
268 cache_on = False
282 cache_on = False
269 cache_key = ''
283 cache_key = ''
270 local_cache_on = str2bool(rhodecode.CONFIG.get('vcs.methods.cache'))
284 local_cache_on = rhodecode.ConfigGet().get_bool('vcs.methods.cache')
271
285
272 cache_methods = [
286 cache_methods = [
273 'branches', 'tags', 'bookmarks',
287 'branches', 'tags', 'bookmarks',
@@ -300,7 +314,7 b' class RemoteRepo(object):'
300 namespace=self._cache_namespace, condition=cache_on and cache_key)
314 namespace=self._cache_namespace, condition=cache_on and cache_key)
301 def remote_call(_cache_key):
315 def remote_call(_cache_key):
302 if self._call_with_logging:
316 if self._call_with_logging:
303 args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}'
317 args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}'
304 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
318 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
305 url, name, args_repr, context_uid, cache_on)
319 url, name, args_repr, context_uid, cache_on)
306 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session)
320 return _remote_call(url, payload, EXCEPTIONS_MAP, self._session)
@@ -323,7 +337,7 b' class RemoteRepo(object):'
323 # Cache is a problem because this is a stream
337 # Cache is a problem because this is a stream
324 def streaming_remote_call(_cache_key):
338 def streaming_remote_call(_cache_key):
325 if self._call_with_logging:
339 if self._call_with_logging:
326 args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}'
340 args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}'
327 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
341 log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s',
328 url, name, args_repr, context_uid, cache_on)
342 url, name, args_repr, context_uid, cache_on)
329 return _streaming_remote_call(url, payload, EXCEPTIONS_MAP, self._session, self.CHUNK_SIZE)
343 return _streaming_remote_call(url, payload, EXCEPTIONS_MAP, self._session, self.CHUNK_SIZE)
@@ -22,7 +22,7 b''
22 Internal settings for vcs-lib
22 Internal settings for vcs-lib
23 """
23 """
24
24
25 # list of default encoding used in safe_unicode/safe_str methods
25 # list of default encoding used in safe_str methods
26 DEFAULT_ENCODINGS = ['utf8']
26 DEFAULT_ENCODINGS = ['utf8']
27
27
28
28
@@ -23,7 +23,8 b' Custom vcs exceptions module.'
23 """
23 """
24 import logging
24 import logging
25 import functools
25 import functools
26 import urllib.request, urllib.error, urllib.parse
26 import urllib.error
27 import urllib.parse
27 import rhodecode
28 import rhodecode
28
29
29 log = logging.getLogger(__name__)
30 log = logging.getLogger(__name__)
@@ -185,12 +186,12 b' def map_vcs_exceptions(func):'
185 try:
186 try:
186 return func(*args, **kwargs)
187 return func(*args, **kwargs)
187 except Exception as e:
188 except Exception as e:
188 from rhodecode.lib.utils2 import str2bool
189 debug = rhodecode.ConfigGet().get_bool('debug')
189 debug = str2bool(rhodecode.CONFIG.get('debug'))
190
190
191 # The error middleware adds information if it finds
191 # The error middleware adds information if it finds
192 # __traceback_info__ in a frame object. This way the remote
192 # __traceback_info__ in a frame object. This way the remote
193 # traceback information is made available in error reports.
193 # traceback information is made available in error reports.
194
194 remote_tb = getattr(e, '_vcs_server_traceback', None)
195 remote_tb = getattr(e, '_vcs_server_traceback', None)
195 org_remote_tb = getattr(e, '_vcs_server_org_exc_tb', '')
196 org_remote_tb = getattr(e, '_vcs_server_org_exc_tb', '')
196 __traceback_info__ = None
197 __traceback_info__ = None
@@ -21,16 +21,15 b''
21 """
21 """
22 Module holding everything related to vcs nodes, with vcs2 architecture.
22 Module holding everything related to vcs nodes, with vcs2 architecture.
23 """
23 """
24
24 import functools
25 import os
25 import os
26 import stat
26 import stat
27
27
28 from zope.cachedescriptors.property import Lazy as LazyProperty
28 from zope.cachedescriptors.property import Lazy as LazyProperty
29
29
30 import rhodecode
31 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
30 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
32 from rhodecode.lib.utils import safe_unicode, safe_str
31 from rhodecode.lib.str_utils import safe_str, safe_bytes
33 from rhodecode.lib.utils2 import md5
32 from rhodecode.lib.hash_utils import md5
34 from rhodecode.lib.vcs import path as vcspath
33 from rhodecode.lib.vcs import path as vcspath
35 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
34 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
36 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
35 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
@@ -52,6 +51,10 b' class NodeState:'
52 NOT_CHANGED = 'not changed'
51 NOT_CHANGED = 'not changed'
53 REMOVED = 'removed'
52 REMOVED = 'removed'
54
53
54 #TODO: not sure if that should be bytes or str ?
55 # most probably bytes because content should be bytes and we check it
56 BIN_BYTE_MARKER = b'\0'
57
55
58
56 class NodeGeneratorBase(object):
59 class NodeGeneratorBase(object):
57 """
60 """
@@ -68,8 +71,9 b' class NodeGeneratorBase(object):'
68 def __call__(self):
71 def __call__(self):
69 return [n for n in self]
72 return [n for n in self]
70
73
71 def __getslice__(self, i, j):
74 def __getitem__(self, key):
72 for p in self.current_paths[i:j]:
75 if isinstance(key, slice):
76 for p in self.current_paths[key.start:key.stop]:
73 yield self.cs.get_node(p)
77 yield self.cs.get_node(p)
74
78
75 def __len__(self):
79 def __len__(self):
@@ -98,13 +102,15 b' class RemovedFileNodesGenerator(NodeGene'
98 """
102 """
99 def __iter__(self):
103 def __iter__(self):
100 for p in self.current_paths:
104 for p in self.current_paths:
101 yield RemovedFileNode(path=p)
105 yield RemovedFileNode(path=safe_bytes(p))
102
106
103 def __getslice__(self, i, j):
107 def __getitem__(self, key):
104 for p in self.current_paths[i:j]:
108 if isinstance(key, slice):
105 yield RemovedFileNode(path=p)
109 for p in self.current_paths[key.start:key.stop]:
110 yield RemovedFileNode(path=safe_bytes(p))
106
111
107
112
113 @functools.total_ordering
108 class Node(object):
114 class Node(object):
109 """
115 """
110 Simplest class representing file or directory on repository. SCM backends
116 Simplest class representing file or directory on repository. SCM backends
@@ -115,14 +121,19 b' class Node(object):'
115 only. Moreover, every single node is identified by the ``path`` attribute,
121 only. Moreover, every single node is identified by the ``path`` attribute,
116 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
122 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
117 """
123 """
118 RTLO_MARKER = "\u202E" # RTLO marker allows swapping text, and certain
124 # RTLO marker allows swapping text, and certain
119 # security attacks could be used with this
125 # security attacks could be used with this
126 RTLO_MARKER = "\u202E"
127
120 commit = None
128 commit = None
121
129
122 def __init__(self, path, kind):
130 def __init__(self, path: bytes, kind):
123 self._validate_path(path) # can throw exception if path is invalid
131 self._validate_path(path) # can throw exception if path is invalid
124 self.path = safe_str(path.rstrip('/')) # we store paths as str
132
125 if path == '' and kind != NodeKind.DIR:
133 self.bytes_path = path.rstrip(b'/') # store for __repr__
134 self.path = safe_str(self.bytes_path) # we store paths as str
135
136 if self.bytes_path == b'' and kind != NodeKind.DIR:
126 raise NodeError("Only DirNode and its subclasses may be "
137 raise NodeError("Only DirNode and its subclasses may be "
127 "initialized with empty path")
138 "initialized with empty path")
128 self.kind = kind
139 self.kind = kind
@@ -130,12 +141,65 b' class Node(object):'
130 if self.is_root() and not self.is_dir():
141 if self.is_root() and not self.is_dir():
131 raise NodeError("Root node cannot be FILE kind")
142 raise NodeError("Root node cannot be FILE kind")
132
143
133 def _validate_path(self, path):
144 def __eq__(self, other):
134 if path.startswith('/'):
145 if type(self) is not type(other):
146 return False
147 for attr in ['name', 'path', 'kind']:
148 if getattr(self, attr) != getattr(other, attr):
149 return False
150 if self.is_file():
151 # FileNode compare, we need to fallback to content compare
152 return None
153 else:
154 # For DirNode's check without entering each dir
155 self_nodes_paths = list(sorted(n.path for n in self.nodes))
156 other_nodes_paths = list(sorted(n.path for n in self.nodes))
157 if self_nodes_paths != other_nodes_paths:
158 return False
159 return True
160
161 def __lt__(self, other):
162 if self.kind < other.kind:
163 return True
164 if self.kind > other.kind:
165 return False
166 if self.path < other.path:
167 return True
168 if self.path > other.path:
169 return False
170
171 # def __cmp__(self, other):
172 # """
173 # Comparator using name of the node, needed for quick list sorting.
174 # """
175 #
176 # kind_cmp = cmp(self.kind, other.kind)
177 # if kind_cmp:
178 # if isinstance(self, SubModuleNode):
179 # # we make submodules equal to dirnode for "sorting" purposes
180 # return NodeKind.DIR
181 # return kind_cmp
182 # return cmp(self.name, other.name)
183
184 def __repr__(self):
185 maybe_path = getattr(self, 'path', 'UNKNOWN_PATH')
186 return f'<{self.__class__.__name__} {maybe_path!r}>'
187
188 def __str__(self):
189 return self.name
190
191 def _validate_path(self, path: bytes):
192 self._assert_bytes(path)
193
194 if path.startswith(b'/'):
135 raise NodeError(
195 raise NodeError(
136 "Cannot initialize Node objects with slash at "
196 f"Cannot initialize Node objects with slash at "
137 "the beginning as only relative paths are supported. "
197 f"the beginning as only relative paths are supported. "
138 "Got %s" % (path,))
198 f"Got {path}")
199
200 def _assert_bytes(self, value):
201 if not isinstance(value, bytes):
202 raise TypeError(f"Bytes required as input, got {type(value)} of {value}.")
139
203
140 @LazyProperty
204 @LazyProperty
141 def parent(self):
205 def parent(self):
@@ -147,22 +211,13 b' class Node(object):'
147 return None
211 return None
148
212
149 @LazyProperty
213 @LazyProperty
150 def unicode_path(self):
214 def str_path(self) -> str:
151 return safe_unicode(self.path)
215 return safe_str(self.path)
152
216
153 @LazyProperty
217 @LazyProperty
154 def has_rtlo(self):
218 def has_rtlo(self):
155 """Detects if a path has right-to-left-override marker"""
219 """Detects if a path has right-to-left-override marker"""
156 return self.RTLO_MARKER in self.unicode_path
220 return self.RTLO_MARKER in self.str_path
157
158 @LazyProperty
159 def unicode_path_safe(self):
160 """
161 Special SAFE representation of path without the right-to-left-override.
162 This should be only used for "showing" the file, cannot be used for any
163 urls etc.
164 """
165 return safe_unicode(self.path).replace(self.RTLO_MARKER, '')
166
221
167 @LazyProperty
222 @LazyProperty
168 def dir_path(self):
223 def dir_path(self):
@@ -172,7 +227,7 b' class Node(object):'
172 """
227 """
173 _parts = self.path.rstrip('/').rsplit('/', 1)
228 _parts = self.path.rstrip('/').rsplit('/', 1)
174 if len(_parts) == 2:
229 if len(_parts) == 2:
175 return safe_unicode(_parts[0])
230 return _parts[0]
176 return ''
231 return ''
177
232
178 @LazyProperty
233 @LazyProperty
@@ -181,7 +236,7 b' class Node(object):'
181 Returns name of the node so if its path
236 Returns name of the node so if its path
182 then only last part is returned.
237 then only last part is returned.
183 """
238 """
184 return safe_unicode(self.path.rstrip('/').split('/')[-1])
239 return self.path.rstrip('/').split('/')[-1]
185
240
186 @property
241 @property
187 def kind(self):
242 def kind(self):
@@ -197,53 +252,15 b' class Node(object):'
197 if self.path.endswith('/'):
252 if self.path.endswith('/'):
198 raise NodeError("Node's path cannot end with slash")
253 raise NodeError("Node's path cannot end with slash")
199
254
200 def __cmp__(self, other):
255 def get_parent_path(self) -> bytes:
201 """
202 Comparator using name of the node, needed for quick list sorting.
203 """
204
205 kind_cmp = cmp(self.kind, other.kind)
206 if kind_cmp:
207 if isinstance(self, SubModuleNode):
208 # we make submodules equal to dirnode for "sorting" purposes
209 return NodeKind.DIR
210 return kind_cmp
211 return cmp(self.name, other.name)
212
213 def __eq__(self, other):
214 for attr in ['name', 'path', 'kind']:
215 if getattr(self, attr) != getattr(other, attr):
216 return False
217 if self.is_file():
218 if self.content != other.content:
219 return False
220 else:
221 # For DirNode's check without entering each dir
222 self_nodes_paths = list(sorted(n.path for n in self.nodes))
223 other_nodes_paths = list(sorted(n.path for n in self.nodes))
224 if self_nodes_paths != other_nodes_paths:
225 return False
226 return True
227
228 def __ne__(self, other):
229 return not self.__eq__(other)
230
231 def __repr__(self):
232 return '<%s %r>' % (self.__class__.__name__, self.path)
233
234 def __str__(self):
235 return self.__repr__()
236
237 def __unicode__(self):
238 return self.name
239
240 def get_parent_path(self):
241 """
256 """
242 Returns node's parent path or empty string if node is root.
257 Returns node's parent path or empty string if node is root.
243 """
258 """
244 if self.is_root():
259 if self.is_root():
245 return ''
260 return b''
246 return vcspath.dirname(self.path.rstrip('/')) + '/'
261 str_path = vcspath.dirname(self.path.rstrip('/')) + '/'
262
263 return safe_bytes(str_path)
247
264
248 def is_file(self):
265 def is_file(self):
249 """
266 """
@@ -312,7 +329,7 b' class FileNode(Node):'
312 """
329 """
313 _filter_pre_load = []
330 _filter_pre_load = []
314
331
315 def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
332 def __init__(self, path: bytes, content: bytes | None = None, commit=None, mode=None, pre_load=None):
316 """
333 """
317 Only one of ``content`` and ``commit`` may be given. Passing both
334 Only one of ``content`` and ``commit`` may be given. Passing both
318 would raise ``NodeError`` exception.
335 would raise ``NodeError`` exception.
@@ -324,13 +341,39 b' class FileNode(Node):'
324 """
341 """
325 if content and commit:
342 if content and commit:
326 raise NodeError("Cannot use both content and commit")
343 raise NodeError("Cannot use both content and commit")
327 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
344
345 super().__init__(path, kind=NodeKind.FILE)
346
328 self.commit = commit
347 self.commit = commit
348 if content and not isinstance(content, bytes):
349 # File content is one thing that inherently must be bytes
350 # we support passing str too, and convert the content
351 content = safe_bytes(content)
329 self._content = content
352 self._content = content
330 self._mode = mode or FILEMODE_DEFAULT
353 self._mode = mode or FILEMODE_DEFAULT
331
354
332 self._set_bulk_properties(pre_load)
355 self._set_bulk_properties(pre_load)
333
356
357 def __eq__(self, other):
358 eq = super(FileNode, self).__eq__(other)
359 if eq is not None:
360 return eq
361 return self.content == other.content
362
363 def __hash__(self):
364 raw_id = getattr(self.commit, 'raw_id', '')
365 return hash((self.path, raw_id))
366
367 def __lt__(self, other):
368 lt = super(FileNode, self).__lt__(other)
369 if lt is not None:
370 return lt
371 return self.content < other.content
372
373 def __repr__(self):
374 short_id = getattr(self.commit, 'short_id', '')
375 return f'<{self.__class__.__name__} path={self.path!r}, short_id={short_id}>'
376
334 def _set_bulk_properties(self, pre_load):
377 def _set_bulk_properties(self, pre_load):
335 if not pre_load:
378 if not pre_load:
336 return
379 return
@@ -339,11 +382,22 b' class FileNode(Node):'
339 if not pre_load:
382 if not pre_load:
340 return
383 return
341
384
342 for attr_name in pre_load:
385 remote = self.commit.get_remote()
343 result = getattr(self, attr_name)
386 result = remote.bulk_file_request(self.commit.raw_id, self.path, pre_load)
344 if callable(result):
387
345 result = result()
388 for attr, value in result.items():
346 self.__dict__[attr_name] = result
389 if attr == "flags":
390 self.__dict__['mode'] = safe_str(value)
391 elif attr == "size":
392 self.__dict__['size'] = value
393 elif attr == "data":
394 self.__dict__['_content'] = value
395 elif attr == "is_binary":
396 self.__dict__['is_binary'] = value
397 elif attr == "md5":
398 self.__dict__['md5'] = value
399 else:
400 raise ValueError(f'Unsupported attr in bulk_property: {attr}')
347
401
348 @LazyProperty
402 @LazyProperty
349 def mode(self):
403 def mode(self):
@@ -358,7 +412,7 b' class FileNode(Node):'
358 return mode
412 return mode
359
413
360 @LazyProperty
414 @LazyProperty
361 def raw_bytes(self):
415 def raw_bytes(self) -> bytes:
362 """
416 """
363 Returns lazily the raw bytes of the FileNode.
417 Returns lazily the raw bytes of the FileNode.
364 """
418 """
@@ -370,6 +424,16 b' class FileNode(Node):'
370 content = self._content
424 content = self._content
371 return content
425 return content
372
426
427 def content_uncached(self):
428 """
429 Returns lazily content of the FileNode.
430 """
431 if self.commit:
432 content = self.commit.get_file_content(self.path)
433 else:
434 content = self._content
435 return content
436
373 def stream_bytes(self):
437 def stream_bytes(self):
374 """
438 """
375 Returns an iterator that will stream the content of the file directly from
439 Returns an iterator that will stream the content of the file directly from
@@ -379,13 +443,6 b' class FileNode(Node):'
379 return self.commit.get_file_content_streamed(self.path)
443 return self.commit.get_file_content_streamed(self.path)
380 raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
444 raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
381
445
382 @LazyProperty
383 def md5(self):
384 """
385 Returns md5 of the file node.
386 """
387 return md5(self.raw_bytes)
388
389 def metadata_uncached(self):
446 def metadata_uncached(self):
390 """
447 """
391 Returns md5, binary flag of the file node, without any cache usage.
448 Returns md5, binary flag of the file node, without any cache usage.
@@ -393,35 +450,26 b' class FileNode(Node):'
393
450
394 content = self.content_uncached()
451 content = self.content_uncached()
395
452
396 is_binary = content and '\0' in content
453 is_binary = bool(content and BIN_BYTE_MARKER in content)
397 size = 0
454 size = 0
398 if content:
455 if content:
399 size = len(content)
456 size = len(content)
400
457
401 return is_binary, md5(content), size, content
458 return is_binary, md5(content), size, content
402
459
403 def content_uncached(self):
460 @LazyProperty
404 """
461 def content(self) -> bytes:
405 Returns lazily content of the FileNode. If possible, would try to
406 decode content from UTF-8.
407 """
462 """
408 if self.commit:
463 Returns lazily content of the FileNode.
409 content = self.commit.get_file_content(self.path)
464 """
410 else:
465 content = self.raw_bytes
411 content = self._content
466 if content and not isinstance(content, bytes):
467 raise ValueError(f'Content is of type {type(content)} instead of bytes')
412 return content
468 return content
413
469
414 @LazyProperty
470 @LazyProperty
415 def content(self):
471 def str_content(self) -> str:
416 """
472 return safe_str(self.raw_bytes)
417 Returns lazily content of the FileNode. If possible, would try to
418 decode content from UTF-8.
419 """
420 content = self.raw_bytes
421
422 if self.is_binary:
423 return content
424 return safe_unicode(content)
425
473
426 @LazyProperty
474 @LazyProperty
427 def size(self):
475 def size(self):
@@ -457,7 +505,7 b' class FileNode(Node):'
457 """
505 """
458
506
459 if hasattr(self, '_mimetype'):
507 if hasattr(self, '_mimetype'):
460 if (isinstance(self._mimetype, (tuple, list,)) and
508 if (isinstance(self._mimetype, (tuple, list)) and
461 len(self._mimetype) == 2):
509 len(self._mimetype) == 2):
462 return self._mimetype
510 return self._mimetype
463 else:
511 else:
@@ -511,7 +559,7 b' class FileNode(Node):'
511 lexer = lexers.guess_lexer_for_filename(
559 lexer = lexers.guess_lexer_for_filename(
512 filename, content, stripnl=False)
560 filename, content, stripnl=False)
513 except lexers.ClassNotFound:
561 except lexers.ClassNotFound:
514 lexer = None
562 pass
515
563
516 # try our EXTENSION_MAP
564 # try our EXTENSION_MAP
517 if not lexer:
565 if not lexer:
@@ -520,7 +568,7 b' class FileNode(Node):'
520 if lexer_class:
568 if lexer_class:
521 lexer = lexers.get_lexer_by_name(lexer_class[0])
569 lexer = lexers.get_lexer_by_name(lexer_class[0])
522 except lexers.ClassNotFound:
570 except lexers.ClassNotFound:
523 lexer = None
571 pass
524
572
525 if not lexer:
573 if not lexer:
526 lexer = lexers.TextLexer(stripnl=False)
574 lexer = lexers.TextLexer(stripnl=False)
@@ -533,7 +581,10 b' class FileNode(Node):'
533 Returns pygment's lexer class. Would try to guess lexer taking file's
581 Returns pygment's lexer class. Would try to guess lexer taking file's
534 content, name and mimetype.
582 content, name and mimetype.
535 """
583 """
536 return self.get_lexer(self.name, self.content)
584 # TODO: this is more proper, but super heavy on investigating the type based on the content
585 #self.get_lexer(self.name, self.content)
586
587 return self.get_lexer(self.name)
537
588
538 @LazyProperty
589 @LazyProperty
539 def lexer_alias(self):
590 def lexer_alias(self):
@@ -583,7 +634,20 b' class FileNode(Node):'
583 return self.commit.is_node_binary(self.path)
634 return self.commit.is_node_binary(self.path)
584 else:
635 else:
585 raw_bytes = self._content
636 raw_bytes = self._content
586 return raw_bytes and '\0' in raw_bytes
637 return bool(raw_bytes and BIN_BYTE_MARKER in raw_bytes)
638
639 @LazyProperty
640 def md5(self):
641 """
642 Returns md5 of the file node.
643 """
644
645 if self.commit:
646 return self.commit.node_md5_hash(self.path)
647 else:
648 raw_bytes = self._content
649 # TODO: this sucks, we're computing md5 on potentially super big stream data...
650 return md5(raw_bytes)
587
651
588 @LazyProperty
652 @LazyProperty
589 def extension(self):
653 def extension(self):
@@ -607,20 +671,26 b' class FileNode(Node):'
607 if self.commit:
671 if self.commit:
608 return self.commit.get_largefile_node(self.path)
672 return self.commit.get_largefile_node(self.path)
609
673
610 def count_lines(self, content, count_empty=False):
674 def count_lines(self, content: str | bytes, count_empty=False):
675 if isinstance(content, str):
676 newline_marker = '\n'
677 elif isinstance(content, bytes):
678 newline_marker = b'\n'
679 else:
680 raise ValueError('content must be bytes or str got {type(content)} instead')
611
681
612 if count_empty:
682 if count_empty:
613 all_lines = 0
683 all_lines = 0
614 empty_lines = 0
684 empty_lines = 0
615 for line in content.splitlines(True):
685 for line in content.splitlines(True):
616 if line == '\n':
686 if line == newline_marker:
617 empty_lines += 1
687 empty_lines += 1
618 all_lines += 1
688 all_lines += 1
619
689
620 return all_lines, all_lines - empty_lines
690 return all_lines, all_lines - empty_lines
621 else:
691 else:
622 # fast method
692 # fast method
623 empty_lines = all_lines = content.count('\n')
693 empty_lines = all_lines = content.count(newline_marker)
624 if all_lines == 0 and content:
694 if all_lines == 0 and content:
625 # one-line without a newline
695 # one-line without a newline
626 empty_lines = all_lines = 1
696 empty_lines = all_lines = 1
@@ -635,10 +705,6 b' class FileNode(Node):'
635 all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
705 all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
636 return all_lines, empty_lines
706 return all_lines, empty_lines
637
707
638 def __repr__(self):
639 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
640 getattr(self.commit, 'short_id', ''))
641
642
708
643 class RemovedFileNode(FileNode):
709 class RemovedFileNode(FileNode):
644 """
710 """
@@ -648,20 +714,19 b' class RemovedFileNode(FileNode):'
648 """
714 """
649 ALLOWED_ATTRIBUTES = [
715 ALLOWED_ATTRIBUTES = [
650 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
716 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
651 'added', 'changed', 'not_changed', 'removed'
717 'added', 'changed', 'not_changed', 'removed', 'bytes_path'
652 ]
718 ]
653
719
654 def __init__(self, path):
720 def __init__(self, path):
655 """
721 """
656 :param path: relative path to the node
722 :param path: relative path to the node
657 """
723 """
658 super(RemovedFileNode, self).__init__(path=path)
724 super().__init__(path=path)
659
725
660 def __getattribute__(self, attr):
726 def __getattribute__(self, attr):
661 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
727 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
662 return super(RemovedFileNode, self).__getattribute__(attr)
728 return super().__getattribute__(attr)
663 raise RemovedFileNodeError(
729 raise RemovedFileNodeError(f"Cannot access attribute {attr} on RemovedFileNode. Not in allowed attributes")
664 "Cannot access attribute %s on RemovedFileNode" % attr)
665
730
666 @LazyProperty
731 @LazyProperty
667 def state(self):
732 def state(self):
@@ -675,7 +740,7 b' class DirNode(Node):'
675 lazily fetch data within same repository's commit.
740 lazily fetch data within same repository's commit.
676 """
741 """
677
742
678 def __init__(self, path, nodes=(), commit=None):
743 def __init__(self, path, nodes=(), commit=None, default_pre_load=None):
679 """
744 """
680 Only one of ``nodes`` and ``commit`` may be given. Passing both
745 Only one of ``nodes`` and ``commit`` may be given. Passing both
681 would raise ``NodeError`` exception.
746 would raise ``NodeError`` exception.
@@ -689,16 +754,38 b' class DirNode(Node):'
689 super(DirNode, self).__init__(path, NodeKind.DIR)
754 super(DirNode, self).__init__(path, NodeKind.DIR)
690 self.commit = commit
755 self.commit = commit
691 self._nodes = nodes
756 self._nodes = nodes
757 self.default_pre_load = default_pre_load or ['is_binary', 'size']
758
759 def __iter__(self):
760 for node in self.nodes:
761 yield node
762
763 def __eq__(self, other):
764 eq = super(DirNode, self).__eq__(other)
765 if eq is not None:
766 return eq
767 # check without entering each dir
768 self_nodes_paths = list(sorted(n.path for n in self.nodes))
769 other_nodes_paths = list(sorted(n.path for n in self.nodes))
770 return self_nodes_paths == other_nodes_paths
771
772 def __lt__(self, other):
773 lt = super(DirNode, self).__lt__(other)
774 if lt is not None:
775 return lt
776 # check without entering each dir
777 self_nodes_paths = list(sorted(n.path for n in self.nodes))
778 other_nodes_paths = list(sorted(n.path for n in self.nodes))
779 return self_nodes_paths < other_nodes_paths
692
780
693 @LazyProperty
781 @LazyProperty
694 def content(self):
782 def content(self):
695 raise NodeError(
783 raise NodeError(f"{self} represents a dir and has no `content` attribute")
696 "%s represents a dir and has no `content` attribute" % self)
697
784
698 @LazyProperty
785 @LazyProperty
699 def nodes(self):
786 def nodes(self):
700 if self.commit:
787 if self.commit:
701 nodes = self.commit.get_nodes(self.path)
788 nodes = self.commit.get_nodes(self.path, pre_load=self.default_pre_load)
702 else:
789 else:
703 nodes = self._nodes
790 nodes = self._nodes
704 self._nodes_dict = dict((node.path, node) for node in nodes)
791 self._nodes_dict = dict((node.path, node) for node in nodes)
@@ -712,10 +799,6 b' class DirNode(Node):'
712 def dirs(self):
799 def dirs(self):
713 return sorted((node for node in self.nodes if node.is_dir()))
800 return sorted((node for node in self.nodes if node.is_dir()))
714
801
715 def __iter__(self):
716 for node in self.nodes:
717 yield node
718
719 def get_node(self, path):
802 def get_node(self, path):
720 """
803 """
721 Returns node from within this particular ``DirNode``, so it is now
804 Returns node from within this particular ``DirNode``, so it is now
@@ -755,7 +838,7 b' class DirNode(Node):'
755 else:
838 else:
756 raise KeyError
839 raise KeyError
757 except KeyError:
840 except KeyError:
758 raise NodeError("Node does not exist at %s" % path)
841 raise NodeError(f"Node does not exist at {path}")
759
842
760 @LazyProperty
843 @LazyProperty
761 def state(self):
844 def state(self):
@@ -780,8 +863,8 b' class DirNode(Node):'
780 "related commit attribute")
863 "related commit attribute")
781
864
782 def __repr__(self):
865 def __repr__(self):
783 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
866 short_id = getattr(self.commit, 'short_id', '')
784 getattr(self.commit, 'short_id', ''))
867 return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
785
868
786
869
787 class RootNode(DirNode):
870 class RootNode(DirNode):
@@ -790,10 +873,10 b' class RootNode(DirNode):'
790 """
873 """
791
874
792 def __init__(self, nodes=(), commit=None):
875 def __init__(self, nodes=(), commit=None):
793 super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
876 super(RootNode, self).__init__(path=b'', nodes=nodes, commit=commit)
794
877
795 def __repr__(self):
878 def __repr__(self):
796 return '<%s>' % self.__class__.__name__
879 return f'<{self.__class__.__name__}>'
797
880
798
881
799 class SubModuleNode(Node):
882 class SubModuleNode(Node):
@@ -814,8 +897,8 b' class SubModuleNode(Node):'
814 self.url = url or self._extract_submodule_url()
897 self.url = url or self._extract_submodule_url()
815
898
816 def __repr__(self):
899 def __repr__(self):
817 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
900 short_id = getattr(self.commit, 'short_id', '')
818 getattr(self.commit, 'short_id', ''))
901 return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
819
902
820 def _extract_submodule_url(self):
903 def _extract_submodule_url(self):
821 # TODO: find a way to parse gits submodule file and extract the
904 # TODO: find a way to parse gits submodule file and extract the
@@ -828,27 +911,31 b' class SubModuleNode(Node):'
828 Returns name of the node so if its path
911 Returns name of the node so if its path
829 then only last part is returned.
912 then only last part is returned.
830 """
913 """
831 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
914 org = safe_str(self.path.rstrip('/').split('/')[-1])
832 return '%s @ %s' % (org, self.commit.short_id)
915 return f'{org} @ {self.commit.short_id}'
833
916
834
917
835 class LargeFileNode(FileNode):
918 class LargeFileNode(FileNode):
836
919
837 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
920 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
838 self.path = path
921 self._validate_path(path) # can throw exception if path is invalid
839 self.org_path = org_path
922 self.org_path = org_path # as stored in VCS as LF pointer
923
924 self.bytes_path = path.rstrip(b'/') # store for __repr__
925 self.path = safe_str(self.bytes_path) # we store paths as str
926
840 self.kind = NodeKind.LARGEFILE
927 self.kind = NodeKind.LARGEFILE
841 self.alias = alias
928 self.alias = alias
842 self._content = ''
929 self._content = b''
843
930
844 def _validate_path(self, path):
931 def _validate_path(self, path: bytes):
845 """
932 """
846 we override check since the LargeFileNode path is system absolute
933 we override check since the LargeFileNode path is system absolute, but we check for bytes only
847 """
934 """
848 pass
935 self._assert_bytes(path)
849
936
850 def __repr__(self):
937 def __repr__(self):
851 return '<%s %r>' % (self.__class__.__name__, self.path)
938 return f'<{self.__class__.__name__} {self.org_path} -> {self.path!r}>'
852
939
853 @LazyProperty
940 @LazyProperty
854 def size(self):
941 def size(self):
@@ -55,7 +55,7 b' def get_scm(path):'
55 raise VCSError(
55 raise VCSError(
56 'More than one [%s] scm found at given path %s' % (found, path))
56 'More than one [%s] scm found at given path %s' % (found, path))
57
57
58 if len(found_scms) is 0:
58 if len(found_scms) == 0:
59 raise VCSError('No scm found at given path %s' % path)
59 raise VCSError('No scm found at given path %s' % path)
60
60
61 return found_scms[0]
61 return found_scms[0]
General Comments 0
You need to be logged in to leave comments. Login now