|
@@
-21,16
+21,15
b''
|
|
21
|
"""
|
|
21
|
"""
|
|
22
|
Module holding everything related to vcs nodes, with vcs2 architecture.
|
|
22
|
Module holding everything related to vcs nodes, with vcs2 architecture.
|
|
23
|
"""
|
|
23
|
"""
|
|
24
|
|
|
24
|
import functools
|
|
25
|
import os
|
|
25
|
import os
|
|
26
|
import stat
|
|
26
|
import stat
|
|
27
|
|
|
27
|
|
|
28
|
from zope.cachedescriptors.property import Lazy as LazyProperty
|
|
28
|
from zope.cachedescriptors.property import Lazy as LazyProperty
|
|
29
|
|
|
29
|
|
|
30
|
import rhodecode
|
|
|
|
|
31
|
from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
|
|
30
|
from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
|
|
32
|
from rhodecode.lib.utils import safe_unicode, safe_str
|
|
31
|
from rhodecode.lib.str_utils import safe_str, safe_bytes
|
|
33
|
from rhodecode.lib.utils2 import md5
|
|
32
|
from rhodecode.lib.hash_utils import md5
|
|
34
|
from rhodecode.lib.vcs import path as vcspath
|
|
33
|
from rhodecode.lib.vcs import path as vcspath
|
|
35
|
from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
|
|
34
|
from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
|
|
36
|
from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
|
|
35
|
from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
|
|
@@
-52,6
+51,10
b' class NodeState:'
|
|
52
|
NOT_CHANGED = 'not changed'
|
|
51
|
NOT_CHANGED = 'not changed'
|
|
53
|
REMOVED = 'removed'
|
|
52
|
REMOVED = 'removed'
|
|
54
|
|
|
53
|
|
|
|
|
|
54
|
#TODO: not sure if that should be bytes or str ?
|
|
|
|
|
55
|
# most probably bytes because content should be bytes and we check it
|
|
|
|
|
56
|
BIN_BYTE_MARKER = b'\0'
|
|
|
|
|
57
|
|
|
55
|
|
|
58
|
|
|
56
|
class NodeGeneratorBase(object):
|
|
59
|
class NodeGeneratorBase(object):
|
|
57
|
"""
|
|
60
|
"""
|
|
@@
-68,8
+71,9
b' class NodeGeneratorBase(object):'
|
|
68
|
def __call__(self):
|
|
71
|
def __call__(self):
|
|
69
|
return [n for n in self]
|
|
72
|
return [n for n in self]
|
|
70
|
|
|
73
|
|
|
71
|
def __getslice__(self, i, j):
|
|
74
|
def __getitem__(self, key):
|
|
72
|
for p in self.current_paths[i:j]:
|
|
75
|
if isinstance(key, slice):
|
|
|
|
|
76
|
for p in self.current_paths[key.start:key.stop]:
|
|
73
|
yield self.cs.get_node(p)
|
|
77
|
yield self.cs.get_node(p)
|
|
74
|
|
|
78
|
|
|
75
|
def __len__(self):
|
|
79
|
def __len__(self):
|
|
@@
-98,13
+102,15
b' class RemovedFileNodesGenerator(NodeGene'
|
|
98
|
"""
|
|
102
|
"""
|
|
99
|
def __iter__(self):
|
|
103
|
def __iter__(self):
|
|
100
|
for p in self.current_paths:
|
|
104
|
for p in self.current_paths:
|
|
101
|
yield RemovedFileNode(path=p)
|
|
105
|
yield RemovedFileNode(path=safe_bytes(p))
|
|
102
|
|
|
106
|
|
|
103
|
def __getslice__(self, i, j):
|
|
107
|
def __getitem__(self, key):
|
|
104
|
for p in self.current_paths[i:j]:
|
|
108
|
if isinstance(key, slice):
|
|
105
|
yield RemovedFileNode(path=p)
|
|
109
|
for p in self.current_paths[key.start:key.stop]:
|
|
|
|
|
110
|
yield RemovedFileNode(path=safe_bytes(p))
|
|
106
|
|
|
111
|
|
|
107
|
|
|
112
|
|
|
|
|
|
113
|
@functools.total_ordering
|
|
108
|
class Node(object):
|
|
114
|
class Node(object):
|
|
109
|
"""
|
|
115
|
"""
|
|
110
|
Simplest class representing file or directory on repository. SCM backends
|
|
116
|
Simplest class representing file or directory on repository. SCM backends
|
|
@@
-115,14
+121,19
b' class Node(object):'
|
|
115
|
only. Moreover, every single node is identified by the ``path`` attribute,
|
|
121
|
only. Moreover, every single node is identified by the ``path`` attribute,
|
|
116
|
so it cannot end with slash, too. Otherwise, path could lead to mistakes.
|
|
122
|
so it cannot end with slash, too. Otherwise, path could lead to mistakes.
|
|
117
|
"""
|
|
123
|
"""
|
|
118
|
RTLO_MARKER = "\u202E" # RTLO marker allows swapping text, and certain
|
|
124
|
# RTLO marker allows swapping text, and certain
|
|
119
|
# security attacks could be used with this
|
|
125
|
# security attacks could be used with this
|
|
|
|
|
126
|
RTLO_MARKER = "\u202E"
|
|
|
|
|
127
|
|
|
120
|
commit = None
|
|
128
|
commit = None
|
|
121
|
|
|
129
|
|
|
122
|
def __init__(self, path, kind):
|
|
130
|
def __init__(self, path: bytes, kind):
|
|
123
|
self._validate_path(path) # can throw exception if path is invalid
|
|
131
|
self._validate_path(path) # can throw exception if path is invalid
|
|
124
|
self.path = safe_str(path.rstrip('/')) # we store paths as str
|
|
132
|
|
|
125
|
if path == '' and kind != NodeKind.DIR:
|
|
133
|
self.bytes_path = path.rstrip(b'/') # store for __repr__
|
|
|
|
|
134
|
self.path = safe_str(self.bytes_path) # we store paths as str
|
|
|
|
|
135
|
|
|
|
|
|
136
|
if self.bytes_path == b'' and kind != NodeKind.DIR:
|
|
126
|
raise NodeError("Only DirNode and its subclasses may be "
|
|
137
|
raise NodeError("Only DirNode and its subclasses may be "
|
|
127
|
"initialized with empty path")
|
|
138
|
"initialized with empty path")
|
|
128
|
self.kind = kind
|
|
139
|
self.kind = kind
|
|
@@
-130,12
+141,65
b' class Node(object):'
|
|
130
|
if self.is_root() and not self.is_dir():
|
|
141
|
if self.is_root() and not self.is_dir():
|
|
131
|
raise NodeError("Root node cannot be FILE kind")
|
|
142
|
raise NodeError("Root node cannot be FILE kind")
|
|
132
|
|
|
143
|
|
|
133
|
def _validate_path(self, path):
|
|
144
|
def __eq__(self, other):
|
|
134
|
if path.startswith('/'):
|
|
145
|
if type(self) is not type(other):
|
|
|
|
|
146
|
return False
|
|
|
|
|
147
|
for attr in ['name', 'path', 'kind']:
|
|
|
|
|
148
|
if getattr(self, attr) != getattr(other, attr):
|
|
|
|
|
149
|
return False
|
|
|
|
|
150
|
if self.is_file():
|
|
|
|
|
151
|
# FileNode compare, we need to fallback to content compare
|
|
|
|
|
152
|
return None
|
|
|
|
|
153
|
else:
|
|
|
|
|
154
|
# For DirNode's check without entering each dir
|
|
|
|
|
155
|
self_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
156
|
other_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
157
|
if self_nodes_paths != other_nodes_paths:
|
|
|
|
|
158
|
return False
|
|
|
|
|
159
|
return True
|
|
|
|
|
160
|
|
|
|
|
|
161
|
def __lt__(self, other):
|
|
|
|
|
162
|
if self.kind < other.kind:
|
|
|
|
|
163
|
return True
|
|
|
|
|
164
|
if self.kind > other.kind:
|
|
|
|
|
165
|
return False
|
|
|
|
|
166
|
if self.path < other.path:
|
|
|
|
|
167
|
return True
|
|
|
|
|
168
|
if self.path > other.path:
|
|
|
|
|
169
|
return False
|
|
|
|
|
170
|
|
|
|
|
|
171
|
# def __cmp__(self, other):
|
|
|
|
|
172
|
# """
|
|
|
|
|
173
|
# Comparator using name of the node, needed for quick list sorting.
|
|
|
|
|
174
|
# """
|
|
|
|
|
175
|
#
|
|
|
|
|
176
|
# kind_cmp = cmp(self.kind, other.kind)
|
|
|
|
|
177
|
# if kind_cmp:
|
|
|
|
|
178
|
# if isinstance(self, SubModuleNode):
|
|
|
|
|
179
|
# # we make submodules equal to dirnode for "sorting" purposes
|
|
|
|
|
180
|
# return NodeKind.DIR
|
|
|
|
|
181
|
# return kind_cmp
|
|
|
|
|
182
|
# return cmp(self.name, other.name)
|
|
|
|
|
183
|
|
|
|
|
|
184
|
def __repr__(self):
|
|
|
|
|
185
|
maybe_path = getattr(self, 'path', 'UNKNOWN_PATH')
|
|
|
|
|
186
|
return f'<{self.__class__.__name__} {maybe_path!r}>'
|
|
|
|
|
187
|
|
|
|
|
|
188
|
def __str__(self):
|
|
|
|
|
189
|
return self.name
|
|
|
|
|
190
|
|
|
|
|
|
191
|
def _validate_path(self, path: bytes):
|
|
|
|
|
192
|
self._assert_bytes(path)
|
|
|
|
|
193
|
|
|
|
|
|
194
|
if path.startswith(b'/'):
|
|
135
|
raise NodeError(
|
|
195
|
raise NodeError(
|
|
136
|
"Cannot initialize Node objects with slash at "
|
|
196
|
f"Cannot initialize Node objects with slash at "
|
|
137
|
"the beginning as only relative paths are supported. "
|
|
197
|
f"the beginning as only relative paths are supported. "
|
|
138
|
"Got %s" % (path,))
|
|
198
|
f"Got {path}")
|
|
|
|
|
199
|
|
|
|
|
|
200
|
def _assert_bytes(self, value):
|
|
|
|
|
201
|
if not isinstance(value, bytes):
|
|
|
|
|
202
|
raise TypeError(f"Bytes required as input, got {type(value)} of {value}.")
|
|
139
|
|
|
203
|
|
|
140
|
@LazyProperty
|
|
204
|
@LazyProperty
|
|
141
|
def parent(self):
|
|
205
|
def parent(self):
|
|
@@
-147,22
+211,13
b' class Node(object):'
|
|
147
|
return None
|
|
211
|
return None
|
|
148
|
|
|
212
|
|
|
149
|
@LazyProperty
|
|
213
|
@LazyProperty
|
|
150
|
def unicode_path(self):
|
|
214
|
def str_path(self) -> str:
|
|
151
|
return safe_unicode(self.path)
|
|
215
|
return safe_str(self.path)
|
|
152
|
|
|
216
|
|
|
153
|
@LazyProperty
|
|
217
|
@LazyProperty
|
|
154
|
def has_rtlo(self):
|
|
218
|
def has_rtlo(self):
|
|
155
|
"""Detects if a path has right-to-left-override marker"""
|
|
219
|
"""Detects if a path has right-to-left-override marker"""
|
|
156
|
return self.RTLO_MARKER in self.unicode_path
|
|
220
|
return self.RTLO_MARKER in self.str_path
|
|
157
|
|
|
|
|
|
158
|
@LazyProperty
|
|
|
|
|
159
|
def unicode_path_safe(self):
|
|
|
|
|
160
|
"""
|
|
|
|
|
161
|
Special SAFE representation of path without the right-to-left-override.
|
|
|
|
|
162
|
This should be only used for "showing" the file, cannot be used for any
|
|
|
|
|
163
|
urls etc.
|
|
|
|
|
164
|
"""
|
|
|
|
|
165
|
return safe_unicode(self.path).replace(self.RTLO_MARKER, '')
|
|
|
|
|
166
|
|
|
221
|
|
|
167
|
@LazyProperty
|
|
222
|
@LazyProperty
|
|
168
|
def dir_path(self):
|
|
223
|
def dir_path(self):
|
|
@@
-172,7
+227,7
b' class Node(object):'
|
|
172
|
"""
|
|
227
|
"""
|
|
173
|
_parts = self.path.rstrip('/').rsplit('/', 1)
|
|
228
|
_parts = self.path.rstrip('/').rsplit('/', 1)
|
|
174
|
if len(_parts) == 2:
|
|
229
|
if len(_parts) == 2:
|
|
175
|
return safe_unicode(_parts[0])
|
|
230
|
return _parts[0]
|
|
176
|
return ''
|
|
231
|
return ''
|
|
177
|
|
|
232
|
|
|
178
|
@LazyProperty
|
|
233
|
@LazyProperty
|
|
@@
-181,7
+236,7
b' class Node(object):'
|
|
181
|
Returns name of the node so if its path
|
|
236
|
Returns name of the node so if its path
|
|
182
|
then only last part is returned.
|
|
237
|
then only last part is returned.
|
|
183
|
"""
|
|
238
|
"""
|
|
184
|
return safe_unicode(self.path.rstrip('/').split('/')[-1])
|
|
239
|
return self.path.rstrip('/').split('/')[-1]
|
|
185
|
|
|
240
|
|
|
186
|
@property
|
|
241
|
@property
|
|
187
|
def kind(self):
|
|
242
|
def kind(self):
|
|
@@
-197,53
+252,15
b' class Node(object):'
|
|
197
|
if self.path.endswith('/'):
|
|
252
|
if self.path.endswith('/'):
|
|
198
|
raise NodeError("Node's path cannot end with slash")
|
|
253
|
raise NodeError("Node's path cannot end with slash")
|
|
199
|
|
|
254
|
|
|
200
|
def __cmp__(self, other):
|
|
255
|
def get_parent_path(self) -> bytes:
|
|
201
|
"""
|
|
|
|
|
202
|
Comparator using name of the node, needed for quick list sorting.
|
|
|
|
|
203
|
"""
|
|
|
|
|
204
|
|
|
|
|
|
205
|
kind_cmp = cmp(self.kind, other.kind)
|
|
|
|
|
206
|
if kind_cmp:
|
|
|
|
|
207
|
if isinstance(self, SubModuleNode):
|
|
|
|
|
208
|
# we make submodules equal to dirnode for "sorting" purposes
|
|
|
|
|
209
|
return NodeKind.DIR
|
|
|
|
|
210
|
return kind_cmp
|
|
|
|
|
211
|
return cmp(self.name, other.name)
|
|
|
|
|
212
|
|
|
|
|
|
213
|
def __eq__(self, other):
|
|
|
|
|
214
|
for attr in ['name', 'path', 'kind']:
|
|
|
|
|
215
|
if getattr(self, attr) != getattr(other, attr):
|
|
|
|
|
216
|
return False
|
|
|
|
|
217
|
if self.is_file():
|
|
|
|
|
218
|
if self.content != other.content:
|
|
|
|
|
219
|
return False
|
|
|
|
|
220
|
else:
|
|
|
|
|
221
|
# For DirNode's check without entering each dir
|
|
|
|
|
222
|
self_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
223
|
other_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
224
|
if self_nodes_paths != other_nodes_paths:
|
|
|
|
|
225
|
return False
|
|
|
|
|
226
|
return True
|
|
|
|
|
227
|
|
|
|
|
|
228
|
def __ne__(self, other):
|
|
|
|
|
229
|
return not self.__eq__(other)
|
|
|
|
|
230
|
|
|
|
|
|
231
|
def __repr__(self):
|
|
|
|
|
232
|
return '<%s %r>' % (self.__class__.__name__, self.path)
|
|
|
|
|
233
|
|
|
|
|
|
234
|
def __str__(self):
|
|
|
|
|
235
|
return self.__repr__()
|
|
|
|
|
236
|
|
|
|
|
|
237
|
def __unicode__(self):
|
|
|
|
|
238
|
return self.name
|
|
|
|
|
239
|
|
|
|
|
|
240
|
def get_parent_path(self):
|
|
|
|
|
241
|
"""
|
|
256
|
"""
|
|
242
|
Returns node's parent path or empty string if node is root.
|
|
257
|
Returns node's parent path or empty string if node is root.
|
|
243
|
"""
|
|
258
|
"""
|
|
244
|
if self.is_root():
|
|
259
|
if self.is_root():
|
|
245
|
return ''
|
|
260
|
return b''
|
|
246
|
return vcspath.dirname(self.path.rstrip('/')) + '/'
|
|
261
|
str_path = vcspath.dirname(self.path.rstrip('/')) + '/'
|
|
|
|
|
262
|
|
|
|
|
|
263
|
return safe_bytes(str_path)
|
|
247
|
|
|
264
|
|
|
248
|
def is_file(self):
|
|
265
|
def is_file(self):
|
|
249
|
"""
|
|
266
|
"""
|
|
@@
-312,7
+329,7
b' class FileNode(Node):'
|
|
312
|
"""
|
|
329
|
"""
|
|
313
|
_filter_pre_load = []
|
|
330
|
_filter_pre_load = []
|
|
314
|
|
|
331
|
|
|
315
|
def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
|
|
332
|
def __init__(self, path: bytes, content: bytes | None = None, commit=None, mode=None, pre_load=None):
|
|
316
|
"""
|
|
333
|
"""
|
|
317
|
Only one of ``content`` and ``commit`` may be given. Passing both
|
|
334
|
Only one of ``content`` and ``commit`` may be given. Passing both
|
|
318
|
would raise ``NodeError`` exception.
|
|
335
|
would raise ``NodeError`` exception.
|
|
@@
-324,13
+341,39
b' class FileNode(Node):'
|
|
324
|
"""
|
|
341
|
"""
|
|
325
|
if content and commit:
|
|
342
|
if content and commit:
|
|
326
|
raise NodeError("Cannot use both content and commit")
|
|
343
|
raise NodeError("Cannot use both content and commit")
|
|
327
|
super(FileNode, self).__init__(path, kind=NodeKind.FILE)
|
|
344
|
|
|
|
|
|
345
|
super().__init__(path, kind=NodeKind.FILE)
|
|
|
|
|
346
|
|
|
328
|
self.commit = commit
|
|
347
|
self.commit = commit
|
|
|
|
|
348
|
if content and not isinstance(content, bytes):
|
|
|
|
|
349
|
# File content is one thing that inherently must be bytes
|
|
|
|
|
350
|
# we support passing str too, and convert the content
|
|
|
|
|
351
|
content = safe_bytes(content)
|
|
329
|
self._content = content
|
|
352
|
self._content = content
|
|
330
|
self._mode = mode or FILEMODE_DEFAULT
|
|
353
|
self._mode = mode or FILEMODE_DEFAULT
|
|
331
|
|
|
354
|
|
|
332
|
self._set_bulk_properties(pre_load)
|
|
355
|
self._set_bulk_properties(pre_load)
|
|
333
|
|
|
356
|
|
|
|
|
|
357
|
def __eq__(self, other):
|
|
|
|
|
358
|
eq = super(FileNode, self).__eq__(other)
|
|
|
|
|
359
|
if eq is not None:
|
|
|
|
|
360
|
return eq
|
|
|
|
|
361
|
return self.content == other.content
|
|
|
|
|
362
|
|
|
|
|
|
363
|
def __hash__(self):
|
|
|
|
|
364
|
raw_id = getattr(self.commit, 'raw_id', '')
|
|
|
|
|
365
|
return hash((self.path, raw_id))
|
|
|
|
|
366
|
|
|
|
|
|
367
|
def __lt__(self, other):
|
|
|
|
|
368
|
lt = super(FileNode, self).__lt__(other)
|
|
|
|
|
369
|
if lt is not None:
|
|
|
|
|
370
|
return lt
|
|
|
|
|
371
|
return self.content < other.content
|
|
|
|
|
372
|
|
|
|
|
|
373
|
def __repr__(self):
|
|
|
|
|
374
|
short_id = getattr(self.commit, 'short_id', '')
|
|
|
|
|
375
|
return f'<{self.__class__.__name__} path={self.path!r}, short_id={short_id}>'
|
|
|
|
|
376
|
|
|
334
|
def _set_bulk_properties(self, pre_load):
|
|
377
|
def _set_bulk_properties(self, pre_load):
|
|
335
|
if not pre_load:
|
|
378
|
if not pre_load:
|
|
336
|
return
|
|
379
|
return
|
|
@@
-339,11
+382,22
b' class FileNode(Node):'
|
|
339
|
if not pre_load:
|
|
382
|
if not pre_load:
|
|
340
|
return
|
|
383
|
return
|
|
341
|
|
|
384
|
|
|
342
|
for attr_name in pre_load:
|
|
385
|
remote = self.commit.get_remote()
|
|
343
|
result = getattr(self, attr_name)
|
|
386
|
result = remote.bulk_file_request(self.commit.raw_id, self.path, pre_load)
|
|
344
|
if callable(result):
|
|
387
|
|
|
345
|
result = result()
|
|
388
|
for attr, value in result.items():
|
|
346
|
self.__dict__[attr_name] = result
|
|
389
|
if attr == "flags":
|
|
|
|
|
390
|
self.__dict__['mode'] = safe_str(value)
|
|
|
|
|
391
|
elif attr == "size":
|
|
|
|
|
392
|
self.__dict__['size'] = value
|
|
|
|
|
393
|
elif attr == "data":
|
|
|
|
|
394
|
self.__dict__['_content'] = value
|
|
|
|
|
395
|
elif attr == "is_binary":
|
|
|
|
|
396
|
self.__dict__['is_binary'] = value
|
|
|
|
|
397
|
elif attr == "md5":
|
|
|
|
|
398
|
self.__dict__['md5'] = value
|
|
|
|
|
399
|
else:
|
|
|
|
|
400
|
raise ValueError(f'Unsupported attr in bulk_property: {attr}')
|
|
347
|
|
|
401
|
|
|
348
|
@LazyProperty
|
|
402
|
@LazyProperty
|
|
349
|
def mode(self):
|
|
403
|
def mode(self):
|
|
@@
-358,7
+412,7
b' class FileNode(Node):'
|
|
358
|
return mode
|
|
412
|
return mode
|
|
359
|
|
|
413
|
|
|
360
|
@LazyProperty
|
|
414
|
@LazyProperty
|
|
361
|
def raw_bytes(self):
|
|
415
|
def raw_bytes(self) -> bytes:
|
|
362
|
"""
|
|
416
|
"""
|
|
363
|
Returns lazily the raw bytes of the FileNode.
|
|
417
|
Returns lazily the raw bytes of the FileNode.
|
|
364
|
"""
|
|
418
|
"""
|
|
@@
-370,6
+424,16
b' class FileNode(Node):'
|
|
370
|
content = self._content
|
|
424
|
content = self._content
|
|
371
|
return content
|
|
425
|
return content
|
|
372
|
|
|
426
|
|
|
|
|
|
427
|
def content_uncached(self):
|
|
|
|
|
428
|
"""
|
|
|
|
|
429
|
Returns lazily content of the FileNode.
|
|
|
|
|
430
|
"""
|
|
|
|
|
431
|
if self.commit:
|
|
|
|
|
432
|
content = self.commit.get_file_content(self.path)
|
|
|
|
|
433
|
else:
|
|
|
|
|
434
|
content = self._content
|
|
|
|
|
435
|
return content
|
|
|
|
|
436
|
|
|
373
|
def stream_bytes(self):
|
|
437
|
def stream_bytes(self):
|
|
374
|
"""
|
|
438
|
"""
|
|
375
|
Returns an iterator that will stream the content of the file directly from
|
|
439
|
Returns an iterator that will stream the content of the file directly from
|
|
@@
-379,13
+443,6
b' class FileNode(Node):'
|
|
379
|
return self.commit.get_file_content_streamed(self.path)
|
|
443
|
return self.commit.get_file_content_streamed(self.path)
|
|
380
|
raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
|
|
444
|
raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
|
|
381
|
|
|
445
|
|
|
382
|
@LazyProperty
|
|
|
|
|
383
|
def md5(self):
|
|
|
|
|
384
|
"""
|
|
|
|
|
385
|
Returns md5 of the file node.
|
|
|
|
|
386
|
"""
|
|
|
|
|
387
|
return md5(self.raw_bytes)
|
|
|
|
|
388
|
|
|
|
|
|
389
|
def metadata_uncached(self):
|
|
446
|
def metadata_uncached(self):
|
|
390
|
"""
|
|
447
|
"""
|
|
391
|
Returns md5, binary flag of the file node, without any cache usage.
|
|
448
|
Returns md5, binary flag of the file node, without any cache usage.
|
|
@@
-393,35
+450,26
b' class FileNode(Node):'
|
|
393
|
|
|
450
|
|
|
394
|
content = self.content_uncached()
|
|
451
|
content = self.content_uncached()
|
|
395
|
|
|
452
|
|
|
396
|
is_binary = content and '\0' in content
|
|
453
|
is_binary = bool(content and BIN_BYTE_MARKER in content)
|
|
397
|
size = 0
|
|
454
|
size = 0
|
|
398
|
if content:
|
|
455
|
if content:
|
|
399
|
size = len(content)
|
|
456
|
size = len(content)
|
|
400
|
|
|
457
|
|
|
401
|
return is_binary, md5(content), size, content
|
|
458
|
return is_binary, md5(content), size, content
|
|
402
|
|
|
459
|
|
|
403
|
def content_uncached(self):
|
|
460
|
@LazyProperty
|
|
404
|
"""
|
|
461
|
def content(self) -> bytes:
|
|
405
|
Returns lazily content of the FileNode. If possible, would try to
|
|
|
|
|
406
|
decode content from UTF-8.
|
|
|
|
|
407
|
"""
|
|
462
|
"""
|
|
408
|
if self.commit:
|
|
463
|
Returns lazily content of the FileNode.
|
|
409
|
content = self.commit.get_file_content(self.path)
|
|
464
|
"""
|
|
410
|
else:
|
|
465
|
content = self.raw_bytes
|
|
411
|
content = self._content
|
|
466
|
if content and not isinstance(content, bytes):
|
|
|
|
|
467
|
raise ValueError(f'Content is of type {type(content)} instead of bytes')
|
|
412
|
return content
|
|
468
|
return content
|
|
413
|
|
|
469
|
|
|
414
|
@LazyProperty
|
|
470
|
@LazyProperty
|
|
415
|
def content(self):
|
|
471
|
def str_content(self) -> str:
|
|
416
|
"""
|
|
472
|
return safe_str(self.raw_bytes)
|
|
417
|
Returns lazily content of the FileNode. If possible, would try to
|
|
|
|
|
418
|
decode content from UTF-8.
|
|
|
|
|
419
|
"""
|
|
|
|
|
420
|
content = self.raw_bytes
|
|
|
|
|
421
|
|
|
|
|
|
422
|
if self.is_binary:
|
|
|
|
|
423
|
return content
|
|
|
|
|
424
|
return safe_unicode(content)
|
|
|
|
|
425
|
|
|
473
|
|
|
426
|
@LazyProperty
|
|
474
|
@LazyProperty
|
|
427
|
def size(self):
|
|
475
|
def size(self):
|
|
@@
-457,7
+505,7
b' class FileNode(Node):'
|
|
457
|
"""
|
|
505
|
"""
|
|
458
|
|
|
506
|
|
|
459
|
if hasattr(self, '_mimetype'):
|
|
507
|
if hasattr(self, '_mimetype'):
|
|
460
|
if (isinstance(self._mimetype, (tuple, list,)) and
|
|
508
|
if (isinstance(self._mimetype, (tuple, list)) and
|
|
461
|
len(self._mimetype) == 2):
|
|
509
|
len(self._mimetype) == 2):
|
|
462
|
return self._mimetype
|
|
510
|
return self._mimetype
|
|
463
|
else:
|
|
511
|
else:
|
|
@@
-511,7
+559,7
b' class FileNode(Node):'
|
|
511
|
lexer = lexers.guess_lexer_for_filename(
|
|
559
|
lexer = lexers.guess_lexer_for_filename(
|
|
512
|
filename, content, stripnl=False)
|
|
560
|
filename, content, stripnl=False)
|
|
513
|
except lexers.ClassNotFound:
|
|
561
|
except lexers.ClassNotFound:
|
|
514
|
lexer = None
|
|
562
|
pass
|
|
515
|
|
|
563
|
|
|
516
|
# try our EXTENSION_MAP
|
|
564
|
# try our EXTENSION_MAP
|
|
517
|
if not lexer:
|
|
565
|
if not lexer:
|
|
@@
-520,7
+568,7
b' class FileNode(Node):'
|
|
520
|
if lexer_class:
|
|
568
|
if lexer_class:
|
|
521
|
lexer = lexers.get_lexer_by_name(lexer_class[0])
|
|
569
|
lexer = lexers.get_lexer_by_name(lexer_class[0])
|
|
522
|
except lexers.ClassNotFound:
|
|
570
|
except lexers.ClassNotFound:
|
|
523
|
lexer = None
|
|
571
|
pass
|
|
524
|
|
|
572
|
|
|
525
|
if not lexer:
|
|
573
|
if not lexer:
|
|
526
|
lexer = lexers.TextLexer(stripnl=False)
|
|
574
|
lexer = lexers.TextLexer(stripnl=False)
|
|
@@
-533,7
+581,10
b' class FileNode(Node):'
|
|
533
|
Returns pygment's lexer class. Would try to guess lexer taking file's
|
|
581
|
Returns pygment's lexer class. Would try to guess lexer taking file's
|
|
534
|
content, name and mimetype.
|
|
582
|
content, name and mimetype.
|
|
535
|
"""
|
|
583
|
"""
|
|
536
|
return self.get_lexer(self.name, self.content)
|
|
584
|
# TODO: this is more proper, but super heavy on investigating the type based on the content
|
|
|
|
|
585
|
#self.get_lexer(self.name, self.content)
|
|
|
|
|
586
|
|
|
|
|
|
587
|
return self.get_lexer(self.name)
|
|
537
|
|
|
588
|
|
|
538
|
@LazyProperty
|
|
589
|
@LazyProperty
|
|
539
|
def lexer_alias(self):
|
|
590
|
def lexer_alias(self):
|
|
@@
-583,7
+634,20
b' class FileNode(Node):'
|
|
583
|
return self.commit.is_node_binary(self.path)
|
|
634
|
return self.commit.is_node_binary(self.path)
|
|
584
|
else:
|
|
635
|
else:
|
|
585
|
raw_bytes = self._content
|
|
636
|
raw_bytes = self._content
|
|
586
|
return raw_bytes and '\0' in raw_bytes
|
|
637
|
return bool(raw_bytes and BIN_BYTE_MARKER in raw_bytes)
|
|
|
|
|
638
|
|
|
|
|
|
639
|
@LazyProperty
|
|
|
|
|
640
|
def md5(self):
|
|
|
|
|
641
|
"""
|
|
|
|
|
642
|
Returns md5 of the file node.
|
|
|
|
|
643
|
"""
|
|
|
|
|
644
|
|
|
|
|
|
645
|
if self.commit:
|
|
|
|
|
646
|
return self.commit.node_md5_hash(self.path)
|
|
|
|
|
647
|
else:
|
|
|
|
|
648
|
raw_bytes = self._content
|
|
|
|
|
649
|
# TODO: this sucks, we're computing md5 on potentially super big stream data...
|
|
|
|
|
650
|
return md5(raw_bytes)
|
|
587
|
|
|
651
|
|
|
588
|
@LazyProperty
|
|
652
|
@LazyProperty
|
|
589
|
def extension(self):
|
|
653
|
def extension(self):
|
|
@@
-607,20
+671,26
b' class FileNode(Node):'
|
|
607
|
if self.commit:
|
|
671
|
if self.commit:
|
|
608
|
return self.commit.get_largefile_node(self.path)
|
|
672
|
return self.commit.get_largefile_node(self.path)
|
|
609
|
|
|
673
|
|
|
610
|
def count_lines(self, content, count_empty=False):
|
|
674
|
def count_lines(self, content: str | bytes, count_empty=False):
|
|
|
|
|
675
|
if isinstance(content, str):
|
|
|
|
|
676
|
newline_marker = '\n'
|
|
|
|
|
677
|
elif isinstance(content, bytes):
|
|
|
|
|
678
|
newline_marker = b'\n'
|
|
|
|
|
679
|
else:
|
|
|
|
|
680
|
raise ValueError('content must be bytes or str got {type(content)} instead')
|
|
611
|
|
|
681
|
|
|
612
|
if count_empty:
|
|
682
|
if count_empty:
|
|
613
|
all_lines = 0
|
|
683
|
all_lines = 0
|
|
614
|
empty_lines = 0
|
|
684
|
empty_lines = 0
|
|
615
|
for line in content.splitlines(True):
|
|
685
|
for line in content.splitlines(True):
|
|
616
|
if line == '\n':
|
|
686
|
if line == newline_marker:
|
|
617
|
empty_lines += 1
|
|
687
|
empty_lines += 1
|
|
618
|
all_lines += 1
|
|
688
|
all_lines += 1
|
|
619
|
|
|
689
|
|
|
620
|
return all_lines, all_lines - empty_lines
|
|
690
|
return all_lines, all_lines - empty_lines
|
|
621
|
else:
|
|
691
|
else:
|
|
622
|
# fast method
|
|
692
|
# fast method
|
|
623
|
empty_lines = all_lines = content.count('\n')
|
|
693
|
empty_lines = all_lines = content.count(newline_marker)
|
|
624
|
if all_lines == 0 and content:
|
|
694
|
if all_lines == 0 and content:
|
|
625
|
# one-line without a newline
|
|
695
|
# one-line without a newline
|
|
626
|
empty_lines = all_lines = 1
|
|
696
|
empty_lines = all_lines = 1
|
|
@@
-635,10
+705,6
b' class FileNode(Node):'
|
|
635
|
all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
|
|
705
|
all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
|
|
636
|
return all_lines, empty_lines
|
|
706
|
return all_lines, empty_lines
|
|
637
|
|
|
707
|
|
|
638
|
def __repr__(self):
|
|
|
|
|
639
|
return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
|
|
|
|
|
640
|
getattr(self.commit, 'short_id', ''))
|
|
|
|
|
641
|
|
|
|
|
|
642
|
|
|
708
|
|
|
643
|
class RemovedFileNode(FileNode):
|
|
709
|
class RemovedFileNode(FileNode):
|
|
644
|
"""
|
|
710
|
"""
|
|
@@
-648,20
+714,19
b' class RemovedFileNode(FileNode):'
|
|
648
|
"""
|
|
714
|
"""
|
|
649
|
ALLOWED_ATTRIBUTES = [
|
|
715
|
ALLOWED_ATTRIBUTES = [
|
|
650
|
'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
|
|
716
|
'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
|
|
651
|
'added', 'changed', 'not_changed', 'removed'
|
|
717
|
'added', 'changed', 'not_changed', 'removed', 'bytes_path'
|
|
652
|
]
|
|
718
|
]
|
|
653
|
|
|
719
|
|
|
654
|
def __init__(self, path):
|
|
720
|
def __init__(self, path):
|
|
655
|
"""
|
|
721
|
"""
|
|
656
|
:param path: relative path to the node
|
|
722
|
:param path: relative path to the node
|
|
657
|
"""
|
|
723
|
"""
|
|
658
|
super(RemovedFileNode, self).__init__(path=path)
|
|
724
|
super().__init__(path=path)
|
|
659
|
|
|
725
|
|
|
660
|
def __getattribute__(self, attr):
|
|
726
|
def __getattribute__(self, attr):
|
|
661
|
if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
|
|
727
|
if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
|
|
662
|
return super(RemovedFileNode, self).__getattribute__(attr)
|
|
728
|
return super().__getattribute__(attr)
|
|
663
|
raise RemovedFileNodeError(
|
|
729
|
raise RemovedFileNodeError(f"Cannot access attribute {attr} on RemovedFileNode. Not in allowed attributes")
|
|
664
|
"Cannot access attribute %s on RemovedFileNode" % attr)
|
|
|
|
|
665
|
|
|
730
|
|
|
666
|
@LazyProperty
|
|
731
|
@LazyProperty
|
|
667
|
def state(self):
|
|
732
|
def state(self):
|
|
@@
-675,7
+740,7
b' class DirNode(Node):'
|
|
675
|
lazily fetch data within same repository's commit.
|
|
740
|
lazily fetch data within same repository's commit.
|
|
676
|
"""
|
|
741
|
"""
|
|
677
|
|
|
742
|
|
|
678
|
def __init__(self, path, nodes=(), commit=None):
|
|
743
|
def __init__(self, path, nodes=(), commit=None, default_pre_load=None):
|
|
679
|
"""
|
|
744
|
"""
|
|
680
|
Only one of ``nodes`` and ``commit`` may be given. Passing both
|
|
745
|
Only one of ``nodes`` and ``commit`` may be given. Passing both
|
|
681
|
would raise ``NodeError`` exception.
|
|
746
|
would raise ``NodeError`` exception.
|
|
@@
-689,16
+754,38
b' class DirNode(Node):'
|
|
689
|
super(DirNode, self).__init__(path, NodeKind.DIR)
|
|
754
|
super(DirNode, self).__init__(path, NodeKind.DIR)
|
|
690
|
self.commit = commit
|
|
755
|
self.commit = commit
|
|
691
|
self._nodes = nodes
|
|
756
|
self._nodes = nodes
|
|
|
|
|
757
|
self.default_pre_load = default_pre_load or ['is_binary', 'size']
|
|
|
|
|
758
|
|
|
|
|
|
759
|
def __iter__(self):
|
|
|
|
|
760
|
for node in self.nodes:
|
|
|
|
|
761
|
yield node
|
|
|
|
|
762
|
|
|
|
|
|
763
|
def __eq__(self, other):
|
|
|
|
|
764
|
eq = super(DirNode, self).__eq__(other)
|
|
|
|
|
765
|
if eq is not None:
|
|
|
|
|
766
|
return eq
|
|
|
|
|
767
|
# check without entering each dir
|
|
|
|
|
768
|
self_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
769
|
other_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
770
|
return self_nodes_paths == other_nodes_paths
|
|
|
|
|
771
|
|
|
|
|
|
772
|
def __lt__(self, other):
|
|
|
|
|
773
|
lt = super(DirNode, self).__lt__(other)
|
|
|
|
|
774
|
if lt is not None:
|
|
|
|
|
775
|
return lt
|
|
|
|
|
776
|
# check without entering each dir
|
|
|
|
|
777
|
self_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
778
|
other_nodes_paths = list(sorted(n.path for n in self.nodes))
|
|
|
|
|
779
|
return self_nodes_paths < other_nodes_paths
|
|
692
|
|
|
780
|
|
|
693
|
@LazyProperty
|
|
781
|
@LazyProperty
|
|
694
|
def content(self):
|
|
782
|
def content(self):
|
|
695
|
raise NodeError(
|
|
783
|
raise NodeError(f"{self} represents a dir and has no `content` attribute")
|
|
696
|
"%s represents a dir and has no `content` attribute" % self)
|
|
|
|
|
697
|
|
|
784
|
|
|
698
|
@LazyProperty
|
|
785
|
@LazyProperty
|
|
699
|
def nodes(self):
|
|
786
|
def nodes(self):
|
|
700
|
if self.commit:
|
|
787
|
if self.commit:
|
|
701
|
nodes = self.commit.get_nodes(self.path)
|
|
788
|
nodes = self.commit.get_nodes(self.path, pre_load=self.default_pre_load)
|
|
702
|
else:
|
|
789
|
else:
|
|
703
|
nodes = self._nodes
|
|
790
|
nodes = self._nodes
|
|
704
|
self._nodes_dict = dict((node.path, node) for node in nodes)
|
|
791
|
self._nodes_dict = dict((node.path, node) for node in nodes)
|
|
@@
-712,10
+799,6
b' class DirNode(Node):'
|
|
712
|
def dirs(self):
|
|
799
|
def dirs(self):
|
|
713
|
return sorted((node for node in self.nodes if node.is_dir()))
|
|
800
|
return sorted((node for node in self.nodes if node.is_dir()))
|
|
714
|
|
|
801
|
|
|
715
|
def __iter__(self):
|
|
|
|
|
716
|
for node in self.nodes:
|
|
|
|
|
717
|
yield node
|
|
|
|
|
718
|
|
|
|
|
|
719
|
def get_node(self, path):
|
|
802
|
def get_node(self, path):
|
|
720
|
"""
|
|
803
|
"""
|
|
721
|
Returns node from within this particular ``DirNode``, so it is now
|
|
804
|
Returns node from within this particular ``DirNode``, so it is now
|
|
@@
-755,7
+838,7
b' class DirNode(Node):'
|
|
755
|
else:
|
|
838
|
else:
|
|
756
|
raise KeyError
|
|
839
|
raise KeyError
|
|
757
|
except KeyError:
|
|
840
|
except KeyError:
|
|
758
|
raise NodeError("Node does not exist at %s" % path)
|
|
841
|
raise NodeError(f"Node does not exist at {path}")
|
|
759
|
|
|
842
|
|
|
760
|
@LazyProperty
|
|
843
|
@LazyProperty
|
|
761
|
def state(self):
|
|
844
|
def state(self):
|
|
@@
-780,8
+863,8
b' class DirNode(Node):'
|
|
780
|
"related commit attribute")
|
|
863
|
"related commit attribute")
|
|
781
|
|
|
864
|
|
|
782
|
def __repr__(self):
|
|
865
|
def __repr__(self):
|
|
783
|
return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
|
|
866
|
short_id = getattr(self.commit, 'short_id', '')
|
|
784
|
getattr(self.commit, 'short_id', ''))
|
|
867
|
return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
|
|
785
|
|
|
868
|
|
|
786
|
|
|
869
|
|
|
787
|
class RootNode(DirNode):
|
|
870
|
class RootNode(DirNode):
|
|
@@
-790,10
+873,10
b' class RootNode(DirNode):'
|
|
790
|
"""
|
|
873
|
"""
|
|
791
|
|
|
874
|
|
|
792
|
def __init__(self, nodes=(), commit=None):
|
|
875
|
def __init__(self, nodes=(), commit=None):
|
|
793
|
super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
|
|
876
|
super(RootNode, self).__init__(path=b'', nodes=nodes, commit=commit)
|
|
794
|
|
|
877
|
|
|
795
|
def __repr__(self):
|
|
878
|
def __repr__(self):
|
|
796
|
return '<%s>' % self.__class__.__name__
|
|
879
|
return f'<{self.__class__.__name__}>'
|
|
797
|
|
|
880
|
|
|
798
|
|
|
881
|
|
|
799
|
class SubModuleNode(Node):
|
|
882
|
class SubModuleNode(Node):
|
|
@@
-814,8
+897,8
b' class SubModuleNode(Node):'
|
|
814
|
self.url = url or self._extract_submodule_url()
|
|
897
|
self.url = url or self._extract_submodule_url()
|
|
815
|
|
|
898
|
|
|
816
|
def __repr__(self):
|
|
899
|
def __repr__(self):
|
|
817
|
return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
|
|
900
|
short_id = getattr(self.commit, 'short_id', '')
|
|
818
|
getattr(self.commit, 'short_id', ''))
|
|
901
|
return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>'
|
|
819
|
|
|
902
|
|
|
820
|
def _extract_submodule_url(self):
|
|
903
|
def _extract_submodule_url(self):
|
|
821
|
# TODO: find a way to parse gits submodule file and extract the
|
|
904
|
# TODO: find a way to parse gits submodule file and extract the
|
|
@@
-828,27
+911,31
b' class SubModuleNode(Node):'
|
|
828
|
Returns name of the node so if its path
|
|
911
|
Returns name of the node so if its path
|
|
829
|
then only last part is returned.
|
|
912
|
then only last part is returned.
|
|
830
|
"""
|
|
913
|
"""
|
|
831
|
org = safe_unicode(self.path.rstrip('/').split('/')[-1])
|
|
914
|
org = safe_str(self.path.rstrip('/').split('/')[-1])
|
|
832
|
return '%s @ %s' % (org, self.commit.short_id)
|
|
915
|
return f'{org} @ {self.commit.short_id}'
|
|
833
|
|
|
916
|
|
|
834
|
|
|
917
|
|
|
835
|
class LargeFileNode(FileNode):
|
|
918
|
class LargeFileNode(FileNode):
|
|
836
|
|
|
919
|
|
|
837
|
def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
|
|
920
|
def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
|
|
838
|
self.path = path
|
|
921
|
self._validate_path(path) # can throw exception if path is invalid
|
|
839
|
self.org_path = org_path
|
|
922
|
self.org_path = org_path # as stored in VCS as LF pointer
|
|
|
|
|
923
|
|
|
|
|
|
924
|
self.bytes_path = path.rstrip(b'/') # store for __repr__
|
|
|
|
|
925
|
self.path = safe_str(self.bytes_path) # we store paths as str
|
|
|
|
|
926
|
|
|
840
|
self.kind = NodeKind.LARGEFILE
|
|
927
|
self.kind = NodeKind.LARGEFILE
|
|
841
|
self.alias = alias
|
|
928
|
self.alias = alias
|
|
842
|
self._content = ''
|
|
929
|
self._content = b''
|
|
843
|
|
|
930
|
|
|
844
|
def _validate_path(self, path):
|
|
931
|
def _validate_path(self, path: bytes):
|
|
845
|
"""
|
|
932
|
"""
|
|
846
|
we override check since the LargeFileNode path is system absolute
|
|
933
|
we override check since the LargeFileNode path is system absolute, but we check for bytes only
|
|
847
|
"""
|
|
934
|
"""
|
|
848
|
pass
|
|
935
|
self._assert_bytes(path)
|
|
849
|
|
|
936
|
|
|
850
|
def __repr__(self):
|
|
937
|
def __repr__(self):
|
|
851
|
return '<%s %r>' % (self.__class__.__name__, self.path)
|
|
938
|
return f'<{self.__class__.__name__} {self.org_path} -> {self.path!r}>'
|
|
852
|
|
|
939
|
|
|
853
|
@LazyProperty
|
|
940
|
@LazyProperty
|
|
854
|
def size(self):
|
|
941
|
def size(self):
|