##// END OF EJS Templates
nodes: fetching largefiles content is commit independent because the pointes is...
marcink -
r1633:00bda173 stable
parent child Browse files
Show More
@@ -1,800 +1,797 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2014-2017 RhodeCode GmbH
3 # Copyright (C) 2014-2017 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Module holding everything related to vcs nodes, with vcs2 architecture.
22 Module holding everything related to vcs nodes, with vcs2 architecture.
23 """
23 """
24
24
25 import os
25 import os
26 import stat
26 import stat
27
27
28 from zope.cachedescriptors.property import Lazy as LazyProperty
28 from zope.cachedescriptors.property import Lazy as LazyProperty
29
29
30 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
30 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
31 from rhodecode.lib.utils import safe_unicode, safe_str
31 from rhodecode.lib.utils import safe_unicode, safe_str
32 from rhodecode.lib.utils2 import md5
32 from rhodecode.lib.utils2 import md5
33 from rhodecode.lib.vcs import path as vcspath
33 from rhodecode.lib.vcs import path as vcspath
34 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
34 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
35 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
35 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
36 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
36 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
37
37
38 LARGEFILE_PREFIX = '.hglf'
38 LARGEFILE_PREFIX = '.hglf'
39
39
40
40
41 class NodeKind:
41 class NodeKind:
42 SUBMODULE = -1
42 SUBMODULE = -1
43 DIR = 1
43 DIR = 1
44 FILE = 2
44 FILE = 2
45 LARGEFILE = 3
45 LARGEFILE = 3
46
46
47
47
48 class NodeState:
48 class NodeState:
49 ADDED = u'added'
49 ADDED = u'added'
50 CHANGED = u'changed'
50 CHANGED = u'changed'
51 NOT_CHANGED = u'not changed'
51 NOT_CHANGED = u'not changed'
52 REMOVED = u'removed'
52 REMOVED = u'removed'
53
53
54
54
55 class NodeGeneratorBase(object):
55 class NodeGeneratorBase(object):
56 """
56 """
57 Base class for removed added and changed filenodes, it's a lazy generator
57 Base class for removed added and changed filenodes, it's a lazy generator
58 class that will create filenodes only on iteration or call
58 class that will create filenodes only on iteration or call
59
59
60 The len method doesn't need to create filenodes at all
60 The len method doesn't need to create filenodes at all
61 """
61 """
62
62
63 def __init__(self, current_paths, cs):
63 def __init__(self, current_paths, cs):
64 self.cs = cs
64 self.cs = cs
65 self.current_paths = current_paths
65 self.current_paths = current_paths
66
66
67 def __call__(self):
67 def __call__(self):
68 return [n for n in self]
68 return [n for n in self]
69
69
70 def __getslice__(self, i, j):
70 def __getslice__(self, i, j):
71 for p in self.current_paths[i:j]:
71 for p in self.current_paths[i:j]:
72 yield self.cs.get_node(p)
72 yield self.cs.get_node(p)
73
73
74 def __len__(self):
74 def __len__(self):
75 return len(self.current_paths)
75 return len(self.current_paths)
76
76
77 def __iter__(self):
77 def __iter__(self):
78 for p in self.current_paths:
78 for p in self.current_paths:
79 yield self.cs.get_node(p)
79 yield self.cs.get_node(p)
80
80
81
81
82 class AddedFileNodesGenerator(NodeGeneratorBase):
82 class AddedFileNodesGenerator(NodeGeneratorBase):
83 """
83 """
84 Class holding added files for current commit
84 Class holding added files for current commit
85 """
85 """
86
86
87
87
88 class ChangedFileNodesGenerator(NodeGeneratorBase):
88 class ChangedFileNodesGenerator(NodeGeneratorBase):
89 """
89 """
90 Class holding changed files for current commit
90 Class holding changed files for current commit
91 """
91 """
92
92
93
93
94 class RemovedFileNodesGenerator(NodeGeneratorBase):
94 class RemovedFileNodesGenerator(NodeGeneratorBase):
95 """
95 """
96 Class holding removed files for current commit
96 Class holding removed files for current commit
97 """
97 """
98 def __iter__(self):
98 def __iter__(self):
99 for p in self.current_paths:
99 for p in self.current_paths:
100 yield RemovedFileNode(path=p)
100 yield RemovedFileNode(path=p)
101
101
102 def __getslice__(self, i, j):
102 def __getslice__(self, i, j):
103 for p in self.current_paths[i:j]:
103 for p in self.current_paths[i:j]:
104 yield RemovedFileNode(path=p)
104 yield RemovedFileNode(path=p)
105
105
106
106
107 class Node(object):
107 class Node(object):
108 """
108 """
109 Simplest class representing file or directory on repository. SCM backends
109 Simplest class representing file or directory on repository. SCM backends
110 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
110 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
111 directly.
111 directly.
112
112
113 Node's ``path`` cannot start with slash as we operate on *relative* paths
113 Node's ``path`` cannot start with slash as we operate on *relative* paths
114 only. Moreover, every single node is identified by the ``path`` attribute,
114 only. Moreover, every single node is identified by the ``path`` attribute,
115 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
115 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
116 """
116 """
117
117
118 commit = None
118 commit = None
119
119
120 def __init__(self, path, kind):
120 def __init__(self, path, kind):
121 self._validate_path(path) # can throw exception if path is invalid
121 self._validate_path(path) # can throw exception if path is invalid
122 self.path = safe_str(path.rstrip('/')) # we store paths as str
122 self.path = safe_str(path.rstrip('/')) # we store paths as str
123 if path == '' and kind != NodeKind.DIR:
123 if path == '' and kind != NodeKind.DIR:
124 raise NodeError("Only DirNode and its subclasses may be "
124 raise NodeError("Only DirNode and its subclasses may be "
125 "initialized with empty path")
125 "initialized with empty path")
126 self.kind = kind
126 self.kind = kind
127
127
128 if self.is_root() and not self.is_dir():
128 if self.is_root() and not self.is_dir():
129 raise NodeError("Root node cannot be FILE kind")
129 raise NodeError("Root node cannot be FILE kind")
130
130
131 def _validate_path(self, path):
131 def _validate_path(self, path):
132 if path.startswith('/'):
132 if path.startswith('/'):
133 raise NodeError(
133 raise NodeError(
134 "Cannot initialize Node objects with slash at "
134 "Cannot initialize Node objects with slash at "
135 "the beginning as only relative paths are supported. "
135 "the beginning as only relative paths are supported. "
136 "Got %s" % (path,))
136 "Got %s" % (path,))
137
137
138 @LazyProperty
138 @LazyProperty
139 def parent(self):
139 def parent(self):
140 parent_path = self.get_parent_path()
140 parent_path = self.get_parent_path()
141 if parent_path:
141 if parent_path:
142 if self.commit:
142 if self.commit:
143 return self.commit.get_node(parent_path)
143 return self.commit.get_node(parent_path)
144 return DirNode(parent_path)
144 return DirNode(parent_path)
145 return None
145 return None
146
146
147 @LazyProperty
147 @LazyProperty
148 def unicode_path(self):
148 def unicode_path(self):
149 return safe_unicode(self.path)
149 return safe_unicode(self.path)
150
150
151 @LazyProperty
151 @LazyProperty
152 def dir_path(self):
152 def dir_path(self):
153 """
153 """
154 Returns name of the directory from full path of this vcs node. Empty
154 Returns name of the directory from full path of this vcs node. Empty
155 string is returned if there's no directory in the path
155 string is returned if there's no directory in the path
156 """
156 """
157 _parts = self.path.rstrip('/').rsplit('/', 1)
157 _parts = self.path.rstrip('/').rsplit('/', 1)
158 if len(_parts) == 2:
158 if len(_parts) == 2:
159 return safe_unicode(_parts[0])
159 return safe_unicode(_parts[0])
160 return u''
160 return u''
161
161
162 @LazyProperty
162 @LazyProperty
163 def name(self):
163 def name(self):
164 """
164 """
165 Returns name of the node so if its path
165 Returns name of the node so if its path
166 then only last part is returned.
166 then only last part is returned.
167 """
167 """
168 return safe_unicode(self.path.rstrip('/').split('/')[-1])
168 return safe_unicode(self.path.rstrip('/').split('/')[-1])
169
169
170 @property
170 @property
171 def kind(self):
171 def kind(self):
172 return self._kind
172 return self._kind
173
173
174 @kind.setter
174 @kind.setter
175 def kind(self, kind):
175 def kind(self, kind):
176 if hasattr(self, '_kind'):
176 if hasattr(self, '_kind'):
177 raise NodeError("Cannot change node's kind")
177 raise NodeError("Cannot change node's kind")
178 else:
178 else:
179 self._kind = kind
179 self._kind = kind
180 # Post setter check (path's trailing slash)
180 # Post setter check (path's trailing slash)
181 if self.path.endswith('/'):
181 if self.path.endswith('/'):
182 raise NodeError("Node's path cannot end with slash")
182 raise NodeError("Node's path cannot end with slash")
183
183
184 def __cmp__(self, other):
184 def __cmp__(self, other):
185 """
185 """
186 Comparator using name of the node, needed for quick list sorting.
186 Comparator using name of the node, needed for quick list sorting.
187 """
187 """
188 kind_cmp = cmp(self.kind, other.kind)
188 kind_cmp = cmp(self.kind, other.kind)
189 if kind_cmp:
189 if kind_cmp:
190 return kind_cmp
190 return kind_cmp
191 return cmp(self.name, other.name)
191 return cmp(self.name, other.name)
192
192
193 def __eq__(self, other):
193 def __eq__(self, other):
194 for attr in ['name', 'path', 'kind']:
194 for attr in ['name', 'path', 'kind']:
195 if getattr(self, attr) != getattr(other, attr):
195 if getattr(self, attr) != getattr(other, attr):
196 return False
196 return False
197 if self.is_file():
197 if self.is_file():
198 if self.content != other.content:
198 if self.content != other.content:
199 return False
199 return False
200 else:
200 else:
201 # For DirNode's check without entering each dir
201 # For DirNode's check without entering each dir
202 self_nodes_paths = list(sorted(n.path for n in self.nodes))
202 self_nodes_paths = list(sorted(n.path for n in self.nodes))
203 other_nodes_paths = list(sorted(n.path for n in self.nodes))
203 other_nodes_paths = list(sorted(n.path for n in self.nodes))
204 if self_nodes_paths != other_nodes_paths:
204 if self_nodes_paths != other_nodes_paths:
205 return False
205 return False
206 return True
206 return True
207
207
208 def __ne__(self, other):
208 def __ne__(self, other):
209 return not self.__eq__(other)
209 return not self.__eq__(other)
210
210
211 def __repr__(self):
211 def __repr__(self):
212 return '<%s %r>' % (self.__class__.__name__, self.path)
212 return '<%s %r>' % (self.__class__.__name__, self.path)
213
213
214 def __str__(self):
214 def __str__(self):
215 return self.__repr__()
215 return self.__repr__()
216
216
217 def __unicode__(self):
217 def __unicode__(self):
218 return self.name
218 return self.name
219
219
220 def get_parent_path(self):
220 def get_parent_path(self):
221 """
221 """
222 Returns node's parent path or empty string if node is root.
222 Returns node's parent path or empty string if node is root.
223 """
223 """
224 if self.is_root():
224 if self.is_root():
225 return ''
225 return ''
226 return vcspath.dirname(self.path.rstrip('/')) + '/'
226 return vcspath.dirname(self.path.rstrip('/')) + '/'
227
227
228 def is_file(self):
228 def is_file(self):
229 """
229 """
230 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
230 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
231 otherwise.
231 otherwise.
232 """
232 """
233 return self.kind == NodeKind.FILE
233 return self.kind == NodeKind.FILE
234
234
235 def is_dir(self):
235 def is_dir(self):
236 """
236 """
237 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
237 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
238 otherwise.
238 otherwise.
239 """
239 """
240 return self.kind == NodeKind.DIR
240 return self.kind == NodeKind.DIR
241
241
242 def is_root(self):
242 def is_root(self):
243 """
243 """
244 Returns ``True`` if node is a root node and ``False`` otherwise.
244 Returns ``True`` if node is a root node and ``False`` otherwise.
245 """
245 """
246 return self.kind == NodeKind.DIR and self.path == ''
246 return self.kind == NodeKind.DIR and self.path == ''
247
247
248 def is_submodule(self):
248 def is_submodule(self):
249 """
249 """
250 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
250 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
251 otherwise.
251 otherwise.
252 """
252 """
253 return self.kind == NodeKind.SUBMODULE
253 return self.kind == NodeKind.SUBMODULE
254
254
255 def is_largefile(self):
255 def is_largefile(self):
256 """
256 """
257 Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False``
257 Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False``
258 otherwise
258 otherwise
259 """
259 """
260 return self.kind == NodeKind.LARGEFILE
260 return self.kind == NodeKind.LARGEFILE
261
261
262 def is_link(self):
262 def is_link(self):
263 if self.commit:
263 if self.commit:
264 return self.commit.is_link(self.path)
264 return self.commit.is_link(self.path)
265 return False
265 return False
266
266
267 @LazyProperty
267 @LazyProperty
268 def added(self):
268 def added(self):
269 return self.state is NodeState.ADDED
269 return self.state is NodeState.ADDED
270
270
271 @LazyProperty
271 @LazyProperty
272 def changed(self):
272 def changed(self):
273 return self.state is NodeState.CHANGED
273 return self.state is NodeState.CHANGED
274
274
275 @LazyProperty
275 @LazyProperty
276 def not_changed(self):
276 def not_changed(self):
277 return self.state is NodeState.NOT_CHANGED
277 return self.state is NodeState.NOT_CHANGED
278
278
279 @LazyProperty
279 @LazyProperty
280 def removed(self):
280 def removed(self):
281 return self.state is NodeState.REMOVED
281 return self.state is NodeState.REMOVED
282
282
283
283
284 class FileNode(Node):
284 class FileNode(Node):
285 """
285 """
286 Class representing file nodes.
286 Class representing file nodes.
287
287
288 :attribute: path: path to the node, relative to repository's root
288 :attribute: path: path to the node, relative to repository's root
289 :attribute: content: if given arbitrary sets content of the file
289 :attribute: content: if given arbitrary sets content of the file
290 :attribute: commit: if given, first time content is accessed, callback
290 :attribute: commit: if given, first time content is accessed, callback
291 :attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`.
291 :attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`.
292 """
292 """
293 _filter_pre_load = []
293 _filter_pre_load = []
294
294
295 def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
295 def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
296 """
296 """
297 Only one of ``content`` and ``commit`` may be given. Passing both
297 Only one of ``content`` and ``commit`` may be given. Passing both
298 would raise ``NodeError`` exception.
298 would raise ``NodeError`` exception.
299
299
300 :param path: relative path to the node
300 :param path: relative path to the node
301 :param content: content may be passed to constructor
301 :param content: content may be passed to constructor
302 :param commit: if given, will use it to lazily fetch content
302 :param commit: if given, will use it to lazily fetch content
303 :param mode: ST_MODE (i.e. 0100644)
303 :param mode: ST_MODE (i.e. 0100644)
304 """
304 """
305 if content and commit:
305 if content and commit:
306 raise NodeError("Cannot use both content and commit")
306 raise NodeError("Cannot use both content and commit")
307 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
307 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
308 self.commit = commit
308 self.commit = commit
309 self._content = content
309 self._content = content
310 self._mode = mode or FILEMODE_DEFAULT
310 self._mode = mode or FILEMODE_DEFAULT
311
311
312 self._set_bulk_properties(pre_load)
312 self._set_bulk_properties(pre_load)
313
313
314 def _set_bulk_properties(self, pre_load):
314 def _set_bulk_properties(self, pre_load):
315 if not pre_load:
315 if not pre_load:
316 return
316 return
317 pre_load = [entry for entry in pre_load
317 pre_load = [entry for entry in pre_load
318 if entry not in self._filter_pre_load]
318 if entry not in self._filter_pre_load]
319 if not pre_load:
319 if not pre_load:
320 return
320 return
321
321
322 for attr_name in pre_load:
322 for attr_name in pre_load:
323 result = getattr(self, attr_name)
323 result = getattr(self, attr_name)
324 if callable(result):
324 if callable(result):
325 result = result()
325 result = result()
326 self.__dict__[attr_name] = result
326 self.__dict__[attr_name] = result
327
327
328 @LazyProperty
328 @LazyProperty
329 def mode(self):
329 def mode(self):
330 """
330 """
331 Returns lazily mode of the FileNode. If `commit` is not set, would
331 Returns lazily mode of the FileNode. If `commit` is not set, would
332 use value given at initialization or `FILEMODE_DEFAULT` (default).
332 use value given at initialization or `FILEMODE_DEFAULT` (default).
333 """
333 """
334 if self.commit:
334 if self.commit:
335 mode = self.commit.get_file_mode(self.path)
335 mode = self.commit.get_file_mode(self.path)
336 else:
336 else:
337 mode = self._mode
337 mode = self._mode
338 return mode
338 return mode
339
339
340 @LazyProperty
340 @LazyProperty
341 def raw_bytes(self):
341 def raw_bytes(self):
342 """
342 """
343 Returns lazily the raw bytes of the FileNode.
343 Returns lazily the raw bytes of the FileNode.
344 """
344 """
345 if self.commit:
345 if self.commit:
346 if self._content is None:
346 if self._content is None:
347 self._content = self.commit.get_file_content(self.path)
347 self._content = self.commit.get_file_content(self.path)
348 content = self._content
348 content = self._content
349 else:
349 else:
350 content = self._content
350 content = self._content
351 return content
351 return content
352
352
353 @LazyProperty
353 @LazyProperty
354 def md5(self):
354 def md5(self):
355 """
355 """
356 Returns md5 of the file node.
356 Returns md5 of the file node.
357 """
357 """
358 return md5(self.raw_bytes)
358 return md5(self.raw_bytes)
359
359
360 @LazyProperty
360 @LazyProperty
361 def content(self):
361 def content(self):
362 """
362 """
363 Returns lazily content of the FileNode. If possible, would try to
363 Returns lazily content of the FileNode. If possible, would try to
364 decode content from UTF-8.
364 decode content from UTF-8.
365 """
365 """
366 content = self.raw_bytes
366 content = self.raw_bytes
367
367
368 if self.is_binary:
368 if self.is_binary:
369 return content
369 return content
370 return safe_unicode(content)
370 return safe_unicode(content)
371
371
372 @LazyProperty
372 @LazyProperty
373 def size(self):
373 def size(self):
374 if self.commit:
374 if self.commit:
375 return self.commit.get_file_size(self.path)
375 return self.commit.get_file_size(self.path)
376 raise NodeError(
376 raise NodeError(
377 "Cannot retrieve size of the file without related "
377 "Cannot retrieve size of the file without related "
378 "commit attribute")
378 "commit attribute")
379
379
380 @LazyProperty
380 @LazyProperty
381 def message(self):
381 def message(self):
382 if self.commit:
382 if self.commit:
383 return self.last_commit.message
383 return self.last_commit.message
384 raise NodeError(
384 raise NodeError(
385 "Cannot retrieve message of the file without related "
385 "Cannot retrieve message of the file without related "
386 "commit attribute")
386 "commit attribute")
387
387
388 @LazyProperty
388 @LazyProperty
389 def last_commit(self):
389 def last_commit(self):
390 if self.commit:
390 if self.commit:
391 pre_load = ["author", "date", "message"]
391 pre_load = ["author", "date", "message"]
392 return self.commit.get_file_commit(self.path, pre_load=pre_load)
392 return self.commit.get_file_commit(self.path, pre_load=pre_load)
393 raise NodeError(
393 raise NodeError(
394 "Cannot retrieve last commit of the file without "
394 "Cannot retrieve last commit of the file without "
395 "related commit attribute")
395 "related commit attribute")
396
396
397 def get_mimetype(self):
397 def get_mimetype(self):
398 """
398 """
399 Mimetype is calculated based on the file's content. If ``_mimetype``
399 Mimetype is calculated based on the file's content. If ``_mimetype``
400 attribute is available, it will be returned (backends which store
400 attribute is available, it will be returned (backends which store
401 mimetypes or can easily recognize them, should set this private
401 mimetypes or can easily recognize them, should set this private
402 attribute to indicate that type should *NOT* be calculated).
402 attribute to indicate that type should *NOT* be calculated).
403 """
403 """
404
404
405 if hasattr(self, '_mimetype'):
405 if hasattr(self, '_mimetype'):
406 if (isinstance(self._mimetype, (tuple, list,)) and
406 if (isinstance(self._mimetype, (tuple, list,)) and
407 len(self._mimetype) == 2):
407 len(self._mimetype) == 2):
408 return self._mimetype
408 return self._mimetype
409 else:
409 else:
410 raise NodeError('given _mimetype attribute must be an 2 '
410 raise NodeError('given _mimetype attribute must be an 2 '
411 'element list or tuple')
411 'element list or tuple')
412
412
413 db = get_mimetypes_db()
413 db = get_mimetypes_db()
414 mtype, encoding = db.guess_type(self.name)
414 mtype, encoding = db.guess_type(self.name)
415
415
416 if mtype is None:
416 if mtype is None:
417 if self.is_binary:
417 if self.is_binary:
418 mtype = 'application/octet-stream'
418 mtype = 'application/octet-stream'
419 encoding = None
419 encoding = None
420 else:
420 else:
421 mtype = 'text/plain'
421 mtype = 'text/plain'
422 encoding = None
422 encoding = None
423
423
424 # try with pygments
424 # try with pygments
425 try:
425 try:
426 from pygments.lexers import get_lexer_for_filename
426 from pygments.lexers import get_lexer_for_filename
427 mt = get_lexer_for_filename(self.name).mimetypes
427 mt = get_lexer_for_filename(self.name).mimetypes
428 except Exception:
428 except Exception:
429 mt = None
429 mt = None
430
430
431 if mt:
431 if mt:
432 mtype = mt[0]
432 mtype = mt[0]
433
433
434 return mtype, encoding
434 return mtype, encoding
435
435
436 @LazyProperty
436 @LazyProperty
437 def mimetype(self):
437 def mimetype(self):
438 """
438 """
439 Wrapper around full mimetype info. It returns only type of fetched
439 Wrapper around full mimetype info. It returns only type of fetched
440 mimetype without the encoding part. use get_mimetype function to fetch
440 mimetype without the encoding part. use get_mimetype function to fetch
441 full set of (type,encoding)
441 full set of (type,encoding)
442 """
442 """
443 return self.get_mimetype()[0]
443 return self.get_mimetype()[0]
444
444
445 @LazyProperty
445 @LazyProperty
446 def mimetype_main(self):
446 def mimetype_main(self):
447 return self.mimetype.split('/')[0]
447 return self.mimetype.split('/')[0]
448
448
449 @classmethod
449 @classmethod
450 def get_lexer(cls, filename, content=None):
450 def get_lexer(cls, filename, content=None):
451 from pygments import lexers
451 from pygments import lexers
452
452
453 extension = filename.split('.')[-1]
453 extension = filename.split('.')[-1]
454 lexer = None
454 lexer = None
455
455
456 try:
456 try:
457 lexer = lexers.guess_lexer_for_filename(
457 lexer = lexers.guess_lexer_for_filename(
458 filename, content, stripnl=False)
458 filename, content, stripnl=False)
459 except lexers.ClassNotFound:
459 except lexers.ClassNotFound:
460 lexer = None
460 lexer = None
461
461
462 # try our EXTENSION_MAP
462 # try our EXTENSION_MAP
463 if not lexer:
463 if not lexer:
464 try:
464 try:
465 lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension)
465 lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension)
466 if lexer_class:
466 if lexer_class:
467 lexer = lexers.get_lexer_by_name(lexer_class[0])
467 lexer = lexers.get_lexer_by_name(lexer_class[0])
468 except lexers.ClassNotFound:
468 except lexers.ClassNotFound:
469 lexer = None
469 lexer = None
470
470
471 if not lexer:
471 if not lexer:
472 lexer = lexers.TextLexer(stripnl=False)
472 lexer = lexers.TextLexer(stripnl=False)
473
473
474 return lexer
474 return lexer
475
475
476 @LazyProperty
476 @LazyProperty
477 def lexer(self):
477 def lexer(self):
478 """
478 """
479 Returns pygment's lexer class. Would try to guess lexer taking file's
479 Returns pygment's lexer class. Would try to guess lexer taking file's
480 content, name and mimetype.
480 content, name and mimetype.
481 """
481 """
482 return self.get_lexer(self.name, self.content)
482 return self.get_lexer(self.name, self.content)
483
483
484 @LazyProperty
484 @LazyProperty
485 def lexer_alias(self):
485 def lexer_alias(self):
486 """
486 """
487 Returns first alias of the lexer guessed for this file.
487 Returns first alias of the lexer guessed for this file.
488 """
488 """
489 return self.lexer.aliases[0]
489 return self.lexer.aliases[0]
490
490
491 @LazyProperty
491 @LazyProperty
492 def history(self):
492 def history(self):
493 """
493 """
494 Returns a list of commit for this file in which the file was changed
494 Returns a list of commit for this file in which the file was changed
495 """
495 """
496 if self.commit is None:
496 if self.commit is None:
497 raise NodeError('Unable to get commit for this FileNode')
497 raise NodeError('Unable to get commit for this FileNode')
498 return self.commit.get_file_history(self.path)
498 return self.commit.get_file_history(self.path)
499
499
500 @LazyProperty
500 @LazyProperty
501 def annotate(self):
501 def annotate(self):
502 """
502 """
503 Returns a list of three element tuples with lineno, commit and line
503 Returns a list of three element tuples with lineno, commit and line
504 """
504 """
505 if self.commit is None:
505 if self.commit is None:
506 raise NodeError('Unable to get commit for this FileNode')
506 raise NodeError('Unable to get commit for this FileNode')
507 pre_load = ["author", "date", "message"]
507 pre_load = ["author", "date", "message"]
508 return self.commit.get_file_annotate(self.path, pre_load=pre_load)
508 return self.commit.get_file_annotate(self.path, pre_load=pre_load)
509
509
510 @LazyProperty
510 @LazyProperty
511 def state(self):
511 def state(self):
512 if not self.commit:
512 if not self.commit:
513 raise NodeError(
513 raise NodeError(
514 "Cannot check state of the node if it's not "
514 "Cannot check state of the node if it's not "
515 "linked with commit")
515 "linked with commit")
516 elif self.path in (node.path for node in self.commit.added):
516 elif self.path in (node.path for node in self.commit.added):
517 return NodeState.ADDED
517 return NodeState.ADDED
518 elif self.path in (node.path for node in self.commit.changed):
518 elif self.path in (node.path for node in self.commit.changed):
519 return NodeState.CHANGED
519 return NodeState.CHANGED
520 else:
520 else:
521 return NodeState.NOT_CHANGED
521 return NodeState.NOT_CHANGED
522
522
523 @LazyProperty
523 @LazyProperty
524 def is_binary(self):
524 def is_binary(self):
525 """
525 """
526 Returns True if file has binary content.
526 Returns True if file has binary content.
527 """
527 """
528 _bin = self.raw_bytes and '\0' in self.raw_bytes
528 _bin = self.raw_bytes and '\0' in self.raw_bytes
529 return _bin
529 return _bin
530
530
531 @LazyProperty
531 @LazyProperty
532 def extension(self):
532 def extension(self):
533 """Returns filenode extension"""
533 """Returns filenode extension"""
534 return self.name.split('.')[-1]
534 return self.name.split('.')[-1]
535
535
536 @property
536 @property
537 def is_executable(self):
537 def is_executable(self):
538 """
538 """
539 Returns ``True`` if file has executable flag turned on.
539 Returns ``True`` if file has executable flag turned on.
540 """
540 """
541 return bool(self.mode & stat.S_IXUSR)
541 return bool(self.mode & stat.S_IXUSR)
542
542
543 def get_largefile_node(self):
543 def get_largefile_node(self):
544 """
544 """
545 Try to return a Mercurial FileNode from this node. It does internal
545 Try to return a Mercurial FileNode from this node. It does internal
546 checks inside largefile store, if that file exist there it will
546 checks inside largefile store, if that file exist there it will
547 create special instance of LargeFileNode which can get content from
547 create special instance of LargeFileNode which can get content from
548 LF store.
548 LF store.
549 """
549 """
550 if self.commit:
550 if self.commit:
551 return self.commit.get_largefile_node(self.path)
551 return self.commit.get_largefile_node(self.path)
552
552
553 def lines(self, count_empty=False):
553 def lines(self, count_empty=False):
554 all_lines, empty_lines = 0, 0
554 all_lines, empty_lines = 0, 0
555
555
556 if not self.is_binary:
556 if not self.is_binary:
557 content = self.content
557 content = self.content
558 if count_empty:
558 if count_empty:
559 all_lines = 0
559 all_lines = 0
560 empty_lines = 0
560 empty_lines = 0
561 for line in content.splitlines(True):
561 for line in content.splitlines(True):
562 if line == '\n':
562 if line == '\n':
563 empty_lines += 1
563 empty_lines += 1
564 all_lines += 1
564 all_lines += 1
565
565
566 return all_lines, all_lines - empty_lines
566 return all_lines, all_lines - empty_lines
567 else:
567 else:
568 # fast method
568 # fast method
569 empty_lines = all_lines = content.count('\n')
569 empty_lines = all_lines = content.count('\n')
570 if all_lines == 0 and content:
570 if all_lines == 0 and content:
571 # one-line without a newline
571 # one-line without a newline
572 empty_lines = all_lines = 1
572 empty_lines = all_lines = 1
573
573
574 return all_lines, empty_lines
574 return all_lines, empty_lines
575
575
576 def __repr__(self):
576 def __repr__(self):
577 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
577 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
578 getattr(self.commit, 'short_id', ''))
578 getattr(self.commit, 'short_id', ''))
579
579
580
580
581 class RemovedFileNode(FileNode):
581 class RemovedFileNode(FileNode):
582 """
582 """
583 Dummy FileNode class - trying to access any public attribute except path,
583 Dummy FileNode class - trying to access any public attribute except path,
584 name, kind or state (or methods/attributes checking those two) would raise
584 name, kind or state (or methods/attributes checking those two) would raise
585 RemovedFileNodeError.
585 RemovedFileNodeError.
586 """
586 """
587 ALLOWED_ATTRIBUTES = [
587 ALLOWED_ATTRIBUTES = [
588 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
588 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
589 'added', 'changed', 'not_changed', 'removed'
589 'added', 'changed', 'not_changed', 'removed'
590 ]
590 ]
591
591
592 def __init__(self, path):
592 def __init__(self, path):
593 """
593 """
594 :param path: relative path to the node
594 :param path: relative path to the node
595 """
595 """
596 super(RemovedFileNode, self).__init__(path=path)
596 super(RemovedFileNode, self).__init__(path=path)
597
597
598 def __getattribute__(self, attr):
598 def __getattribute__(self, attr):
599 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
599 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
600 return super(RemovedFileNode, self).__getattribute__(attr)
600 return super(RemovedFileNode, self).__getattribute__(attr)
601 raise RemovedFileNodeError(
601 raise RemovedFileNodeError(
602 "Cannot access attribute %s on RemovedFileNode" % attr)
602 "Cannot access attribute %s on RemovedFileNode" % attr)
603
603
604 @LazyProperty
604 @LazyProperty
605 def state(self):
605 def state(self):
606 return NodeState.REMOVED
606 return NodeState.REMOVED
607
607
608
608
609 class DirNode(Node):
609 class DirNode(Node):
610 """
610 """
611 DirNode stores list of files and directories within this node.
611 DirNode stores list of files and directories within this node.
612 Nodes may be used standalone but within repository context they
612 Nodes may be used standalone but within repository context they
613 lazily fetch data within same repositorty's commit.
613 lazily fetch data within same repositorty's commit.
614 """
614 """
615
615
616 def __init__(self, path, nodes=(), commit=None):
616 def __init__(self, path, nodes=(), commit=None):
617 """
617 """
618 Only one of ``nodes`` and ``commit`` may be given. Passing both
618 Only one of ``nodes`` and ``commit`` may be given. Passing both
619 would raise ``NodeError`` exception.
619 would raise ``NodeError`` exception.
620
620
621 :param path: relative path to the node
621 :param path: relative path to the node
622 :param nodes: content may be passed to constructor
622 :param nodes: content may be passed to constructor
623 :param commit: if given, will use it to lazily fetch content
623 :param commit: if given, will use it to lazily fetch content
624 """
624 """
625 if nodes and commit:
625 if nodes and commit:
626 raise NodeError("Cannot use both nodes and commit")
626 raise NodeError("Cannot use both nodes and commit")
627 super(DirNode, self).__init__(path, NodeKind.DIR)
627 super(DirNode, self).__init__(path, NodeKind.DIR)
628 self.commit = commit
628 self.commit = commit
629 self._nodes = nodes
629 self._nodes = nodes
630
630
631 @LazyProperty
631 @LazyProperty
632 def content(self):
632 def content(self):
633 raise NodeError(
633 raise NodeError(
634 "%s represents a dir and has no `content` attribute" % self)
634 "%s represents a dir and has no `content` attribute" % self)
635
635
636 @LazyProperty
636 @LazyProperty
637 def nodes(self):
637 def nodes(self):
638 if self.commit:
638 if self.commit:
639 nodes = self.commit.get_nodes(self.path)
639 nodes = self.commit.get_nodes(self.path)
640 else:
640 else:
641 nodes = self._nodes
641 nodes = self._nodes
642 self._nodes_dict = dict((node.path, node) for node in nodes)
642 self._nodes_dict = dict((node.path, node) for node in nodes)
643 return sorted(nodes)
643 return sorted(nodes)
644
644
645 @LazyProperty
645 @LazyProperty
646 def files(self):
646 def files(self):
647 return sorted((node for node in self.nodes if node.is_file()))
647 return sorted((node for node in self.nodes if node.is_file()))
648
648
649 @LazyProperty
649 @LazyProperty
650 def dirs(self):
650 def dirs(self):
651 return sorted((node for node in self.nodes if node.is_dir()))
651 return sorted((node for node in self.nodes if node.is_dir()))
652
652
653 def __iter__(self):
653 def __iter__(self):
654 for node in self.nodes:
654 for node in self.nodes:
655 yield node
655 yield node
656
656
657 def get_node(self, path):
657 def get_node(self, path):
658 """
658 """
659 Returns node from within this particular ``DirNode``, so it is now
659 Returns node from within this particular ``DirNode``, so it is now
660 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
660 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
661 'docs'. In order to access deeper nodes one must fetch nodes between
661 'docs'. In order to access deeper nodes one must fetch nodes between
662 them first - this would work::
662 them first - this would work::
663
663
664 docs = root.get_node('docs')
664 docs = root.get_node('docs')
665 docs.get_node('api').get_node('index.rst')
665 docs.get_node('api').get_node('index.rst')
666
666
667 :param: path - relative to the current node
667 :param: path - relative to the current node
668
668
669 .. note::
669 .. note::
670 To access lazily (as in example above) node have to be initialized
670 To access lazily (as in example above) node have to be initialized
671 with related commit object - without it node is out of
671 with related commit object - without it node is out of
672 context and may know nothing about anything else than nearest
672 context and may know nothing about anything else than nearest
673 (located at same level) nodes.
673 (located at same level) nodes.
674 """
674 """
675 try:
675 try:
676 path = path.rstrip('/')
676 path = path.rstrip('/')
677 if path == '':
677 if path == '':
678 raise NodeError("Cannot retrieve node without path")
678 raise NodeError("Cannot retrieve node without path")
679 self.nodes # access nodes first in order to set _nodes_dict
679 self.nodes # access nodes first in order to set _nodes_dict
680 paths = path.split('/')
680 paths = path.split('/')
681 if len(paths) == 1:
681 if len(paths) == 1:
682 if not self.is_root():
682 if not self.is_root():
683 path = '/'.join((self.path, paths[0]))
683 path = '/'.join((self.path, paths[0]))
684 else:
684 else:
685 path = paths[0]
685 path = paths[0]
686 return self._nodes_dict[path]
686 return self._nodes_dict[path]
687 elif len(paths) > 1:
687 elif len(paths) > 1:
688 if self.commit is None:
688 if self.commit is None:
689 raise NodeError(
689 raise NodeError(
690 "Cannot access deeper nodes without commit")
690 "Cannot access deeper nodes without commit")
691 else:
691 else:
692 path1, path2 = paths[0], '/'.join(paths[1:])
692 path1, path2 = paths[0], '/'.join(paths[1:])
693 return self.get_node(path1).get_node(path2)
693 return self.get_node(path1).get_node(path2)
694 else:
694 else:
695 raise KeyError
695 raise KeyError
696 except KeyError:
696 except KeyError:
697 raise NodeError("Node does not exist at %s" % path)
697 raise NodeError("Node does not exist at %s" % path)
698
698
699 @LazyProperty
699 @LazyProperty
700 def state(self):
700 def state(self):
701 raise NodeError("Cannot access state of DirNode")
701 raise NodeError("Cannot access state of DirNode")
702
702
703 @LazyProperty
703 @LazyProperty
704 def size(self):
704 def size(self):
705 size = 0
705 size = 0
706 for root, dirs, files in self.commit.walk(self.path):
706 for root, dirs, files in self.commit.walk(self.path):
707 for f in files:
707 for f in files:
708 size += f.size
708 size += f.size
709
709
710 return size
710 return size
711
711
712 def __repr__(self):
712 def __repr__(self):
713 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
713 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
714 getattr(self.commit, 'short_id', ''))
714 getattr(self.commit, 'short_id', ''))
715
715
716
716
717 class RootNode(DirNode):
717 class RootNode(DirNode):
718 """
718 """
719 DirNode being the root node of the repository.
719 DirNode being the root node of the repository.
720 """
720 """
721
721
722 def __init__(self, nodes=(), commit=None):
722 def __init__(self, nodes=(), commit=None):
723 super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
723 super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
724
724
725 def __repr__(self):
725 def __repr__(self):
726 return '<%s>' % self.__class__.__name__
726 return '<%s>' % self.__class__.__name__
727
727
728
728
729 class SubModuleNode(Node):
729 class SubModuleNode(Node):
730 """
730 """
731 represents a SubModule of Git or SubRepo of Mercurial
731 represents a SubModule of Git or SubRepo of Mercurial
732 """
732 """
733 is_binary = False
733 is_binary = False
734 size = 0
734 size = 0
735
735
736 def __init__(self, name, url=None, commit=None, alias=None):
736 def __init__(self, name, url=None, commit=None, alias=None):
737 self.path = name
737 self.path = name
738 self.kind = NodeKind.SUBMODULE
738 self.kind = NodeKind.SUBMODULE
739 self.alias = alias
739 self.alias = alias
740
740
741 # we have to use EmptyCommit here since this can point to svn/git/hg
741 # we have to use EmptyCommit here since this can point to svn/git/hg
742 # submodules we cannot get from repository
742 # submodules we cannot get from repository
743 self.commit = EmptyCommit(str(commit), alias=alias)
743 self.commit = EmptyCommit(str(commit), alias=alias)
744 self.url = url or self._extract_submodule_url()
744 self.url = url or self._extract_submodule_url()
745
745
746 def __repr__(self):
746 def __repr__(self):
747 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
747 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
748 getattr(self.commit, 'short_id', ''))
748 getattr(self.commit, 'short_id', ''))
749
749
750 def _extract_submodule_url(self):
750 def _extract_submodule_url(self):
751 # TODO: find a way to parse gits submodule file and extract the
751 # TODO: find a way to parse gits submodule file and extract the
752 # linking URL
752 # linking URL
753 return self.path
753 return self.path
754
754
755 @LazyProperty
755 @LazyProperty
756 def name(self):
756 def name(self):
757 """
757 """
758 Returns name of the node so if its path
758 Returns name of the node so if its path
759 then only last part is returned.
759 then only last part is returned.
760 """
760 """
761 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
761 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
762 return u'%s @ %s' % (org, self.commit.short_id)
762 return u'%s @ %s' % (org, self.commit.short_id)
763
763
764
764
765 class LargeFileNode(FileNode):
765 class LargeFileNode(FileNode):
766
766
767 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
767 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
768 self.path = path
768 self.path = path
769 self.org_path = org_path
769 self.org_path = org_path
770 self.kind = NodeKind.LARGEFILE
770 self.kind = NodeKind.LARGEFILE
771 self.alias = alias
771 self.alias = alias
772
772
773 def _validate_path(self, path):
773 def _validate_path(self, path):
774 """
774 """
775 we override check since the LargeFileNode path is system absolute
775 we override check since the LargeFileNode path is system absolute
776 """
776 """
777 pass
777 pass
778
778
779 def __repr__(self):
779 def __repr__(self):
780 return '<%s %r>' % (self.__class__.__name__, self.path)
780 return '<%s %r>' % (self.__class__.__name__, self.path)
781
781
782 @LazyProperty
782 @LazyProperty
783 def size(self):
783 def size(self):
784 return os.stat(self.path).st_size
784 return os.stat(self.path).st_size
785
785
786 @LazyProperty
786 @LazyProperty
787 def raw_bytes(self):
787 def raw_bytes(self):
788 if self.commit:
789 with open(self.path, 'rb') as f:
788 with open(self.path, 'rb') as f:
790 content = f.read()
789 content = f.read()
791 else:
792 content = self._content
793 return content
790 return content
794
791
795 @LazyProperty
792 @LazyProperty
796 def name(self):
793 def name(self):
797 """
794 """
798 Overwrites name to be the org lf path
795 Overwrites name to be the org lf path
799 """
796 """
800 return self.org_path
797 return self.org_path
General Comments 0
You need to be logged in to leave comments. Login now