##// END OF EJS Templates
lazy load pygments lib so we can use VCS without it installed
marcink -
r3876:c5dd289c beta
parent child Browse files
Show More
@@ -1,618 +1,616 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 vcs.nodes
4 4 ~~~~~~~~~
5 5
6 6 Module holding everything related to vcs nodes.
7 7
8 8 :created_on: Apr 8, 2010
9 9 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
10 10 """
11 11 import os
12 12 import stat
13 13 import posixpath
14 14 import mimetypes
15 15
16 from pygments import lexers
17
18 16 from rhodecode.lib.vcs.backends.base import EmptyChangeset
19 17 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
20 18 from rhodecode.lib.vcs.utils.lazy import LazyProperty
21 19 from rhodecode.lib.vcs.utils import safe_unicode
22 20
23 21
24 22 class NodeKind:
25 23 SUBMODULE = -1
26 24 DIR = 1
27 25 FILE = 2
28 26
29 27
30 28 class NodeState:
31 29 ADDED = u'added'
32 30 CHANGED = u'changed'
33 31 NOT_CHANGED = u'not changed'
34 32 REMOVED = u'removed'
35 33
36 34
37 35 class NodeGeneratorBase(object):
38 36 """
39 37 Base class for removed added and changed filenodes, it's a lazy generator
40 38 class that will create filenodes only on iteration or call
41 39
42 40 The len method doesn't need to create filenodes at all
43 41 """
44 42
45 43 def __init__(self, current_paths, cs):
46 44 self.cs = cs
47 45 self.current_paths = current_paths
48 46
49 47 def __call__(self):
50 48 return [n for n in self]
51 49
52 50 def __getslice__(self, i, j):
53 51 for p in self.current_paths[i:j]:
54 52 yield self.cs.get_node(p)
55 53
56 54 def __len__(self):
57 55 return len(self.current_paths)
58 56
59 57 def __iter__(self):
60 58 for p in self.current_paths:
61 59 yield self.cs.get_node(p)
62 60
63 61
64 62 class AddedFileNodesGenerator(NodeGeneratorBase):
65 63 """
66 64 Class holding Added files for current changeset
67 65 """
68 66 pass
69 67
70 68
71 69 class ChangedFileNodesGenerator(NodeGeneratorBase):
72 70 """
73 71 Class holding Changed files for current changeset
74 72 """
75 73 pass
76 74
77 75
78 76 class RemovedFileNodesGenerator(NodeGeneratorBase):
79 77 """
80 78 Class holding removed files for current changeset
81 79 """
82 80 def __iter__(self):
83 81 for p in self.current_paths:
84 82 yield RemovedFileNode(path=p)
85 83
86 84 def __getslice__(self, i, j):
87 85 for p in self.current_paths[i:j]:
88 86 yield RemovedFileNode(path=p)
89 87
90 88
91 89 class Node(object):
92 90 """
93 91 Simplest class representing file or directory on repository. SCM backends
94 92 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
95 93 directly.
96 94
97 95 Node's ``path`` cannot start with slash as we operate on *relative* paths
98 96 only. Moreover, every single node is identified by the ``path`` attribute,
99 97 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
100 98 """
101 99
102 100 def __init__(self, path, kind):
103 101 if path.startswith('/'):
104 102 raise NodeError("Cannot initialize Node objects with slash at "
105 103 "the beginning as only relative paths are supported")
106 104 self.path = path.rstrip('/')
107 105 if path == '' and kind != NodeKind.DIR:
108 106 raise NodeError("Only DirNode and its subclasses may be "
109 107 "initialized with empty path")
110 108 self.kind = kind
111 109 #self.dirs, self.files = [], []
112 110 if self.is_root() and not self.is_dir():
113 111 raise NodeError("Root node cannot be FILE kind")
114 112
115 113 @LazyProperty
116 114 def parent(self):
117 115 parent_path = self.get_parent_path()
118 116 if parent_path:
119 117 if self.changeset:
120 118 return self.changeset.get_node(parent_path)
121 119 return DirNode(parent_path)
122 120 return None
123 121
124 122 @LazyProperty
125 123 def unicode_path(self):
126 124 return safe_unicode(self.path)
127 125
128 126 @LazyProperty
129 127 def name(self):
130 128 """
131 129 Returns name of the node so if its path
132 130 then only last part is returned.
133 131 """
134 132 return safe_unicode(self.path.rstrip('/').split('/')[-1])
135 133
136 134 def _get_kind(self):
137 135 return self._kind
138 136
139 137 def _set_kind(self, kind):
140 138 if hasattr(self, '_kind'):
141 139 raise NodeError("Cannot change node's kind")
142 140 else:
143 141 self._kind = kind
144 142 # Post setter check (path's trailing slash)
145 143 if self.path.endswith('/'):
146 144 raise NodeError("Node's path cannot end with slash")
147 145
148 146 kind = property(_get_kind, _set_kind)
149 147
150 148 def __cmp__(self, other):
151 149 """
152 150 Comparator using name of the node, needed for quick list sorting.
153 151 """
154 152 kind_cmp = cmp(self.kind, other.kind)
155 153 if kind_cmp:
156 154 return kind_cmp
157 155 return cmp(self.name, other.name)
158 156
159 157 def __eq__(self, other):
160 158 for attr in ['name', 'path', 'kind']:
161 159 if getattr(self, attr) != getattr(other, attr):
162 160 return False
163 161 if self.is_file():
164 162 if self.content != other.content:
165 163 return False
166 164 else:
167 165 # For DirNode's check without entering each dir
168 166 self_nodes_paths = list(sorted(n.path for n in self.nodes))
169 167 other_nodes_paths = list(sorted(n.path for n in self.nodes))
170 168 if self_nodes_paths != other_nodes_paths:
171 169 return False
172 170 return True
173 171
174 172 def __nq__(self, other):
175 173 return not self.__eq__(other)
176 174
177 175 def __repr__(self):
178 176 return '<%s %r>' % (self.__class__.__name__, self.path)
179 177
180 178 def __str__(self):
181 179 return self.__repr__()
182 180
183 181 def __unicode__(self):
184 182 return self.name
185 183
186 184 def get_parent_path(self):
187 185 """
188 186 Returns node's parent path or empty string if node is root.
189 187 """
190 188 if self.is_root():
191 189 return ''
192 190 return posixpath.dirname(self.path.rstrip('/')) + '/'
193 191
194 192 def is_file(self):
195 193 """
196 194 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
197 195 otherwise.
198 196 """
199 197 return self.kind == NodeKind.FILE
200 198
201 199 def is_dir(self):
202 200 """
203 201 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
204 202 otherwise.
205 203 """
206 204 return self.kind == NodeKind.DIR
207 205
208 206 def is_root(self):
209 207 """
210 208 Returns ``True`` if node is a root node and ``False`` otherwise.
211 209 """
212 210 return self.kind == NodeKind.DIR and self.path == ''
213 211
214 212 def is_submodule(self):
215 213 """
216 214 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
217 215 otherwise.
218 216 """
219 217 return self.kind == NodeKind.SUBMODULE
220 218
221 219 @LazyProperty
222 220 def added(self):
223 221 return self.state is NodeState.ADDED
224 222
225 223 @LazyProperty
226 224 def changed(self):
227 225 return self.state is NodeState.CHANGED
228 226
229 227 @LazyProperty
230 228 def not_changed(self):
231 229 return self.state is NodeState.NOT_CHANGED
232 230
233 231 @LazyProperty
234 232 def removed(self):
235 233 return self.state is NodeState.REMOVED
236 234
237 235
238 236 class FileNode(Node):
239 237 """
240 238 Class representing file nodes.
241 239
242 240 :attribute: path: path to the node, relative to repostiory's root
243 241 :attribute: content: if given arbitrary sets content of the file
244 242 :attribute: changeset: if given, first time content is accessed, callback
245 243 :attribute: mode: octal stat mode for a node. Default is 0100644.
246 244 """
247 245
248 246 def __init__(self, path, content=None, changeset=None, mode=None):
249 247 """
250 248 Only one of ``content`` and ``changeset`` may be given. Passing both
251 249 would raise ``NodeError`` exception.
252 250
253 251 :param path: relative path to the node
254 252 :param content: content may be passed to constructor
255 253 :param changeset: if given, will use it to lazily fetch content
256 254 :param mode: octal representation of ST_MODE (i.e. 0100644)
257 255 """
258 256
259 257 if content and changeset:
260 258 raise NodeError("Cannot use both content and changeset")
261 259 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
262 260 self.changeset = changeset
263 261 self._content = content
264 262 self._mode = mode or 0100644
265 263
266 264 @LazyProperty
267 265 def mode(self):
268 266 """
269 267 Returns lazily mode of the FileNode. If ``changeset`` is not set, would
270 268 use value given at initialization or 0100644 (default).
271 269 """
272 270 if self.changeset:
273 271 mode = self.changeset.get_file_mode(self.path)
274 272 else:
275 273 mode = self._mode
276 274 return mode
277 275
278 276 def _get_content(self):
279 277 if self.changeset:
280 278 content = self.changeset.get_file_content(self.path)
281 279 else:
282 280 content = self._content
283 281 return content
284 282
285 283 @property
286 284 def content(self):
287 285 """
288 286 Returns lazily content of the FileNode. If possible, would try to
289 287 decode content from UTF-8.
290 288 """
291 289 content = self._get_content()
292 290
293 291 if bool(content and '\0' in content):
294 292 return content
295 293 return safe_unicode(content)
296 294
297 295 @LazyProperty
298 296 def size(self):
299 297 if self.changeset:
300 298 return self.changeset.get_file_size(self.path)
301 299 raise NodeError("Cannot retrieve size of the file without related "
302 300 "changeset attribute")
303 301
304 302 @LazyProperty
305 303 def message(self):
306 304 if self.changeset:
307 305 return self.last_changeset.message
308 306 raise NodeError("Cannot retrieve message of the file without related "
309 307 "changeset attribute")
310 308
311 309 @LazyProperty
312 310 def last_changeset(self):
313 311 if self.changeset:
314 312 return self.changeset.get_file_changeset(self.path)
315 313 raise NodeError("Cannot retrieve last changeset of the file without "
316 314 "related changeset attribute")
317 315
318 316 def get_mimetype(self):
319 317 """
320 318 Mimetype is calculated based on the file's content. If ``_mimetype``
321 319 attribute is available, it will be returned (backends which store
322 320 mimetypes or can easily recognize them, should set this private
323 321 attribute to indicate that type should *NOT* be calculated).
324 322 """
325 323 if hasattr(self, '_mimetype'):
326 324 if (isinstance(self._mimetype, (tuple, list,)) and
327 325 len(self._mimetype) == 2):
328 326 return self._mimetype
329 327 else:
330 328 raise NodeError('given _mimetype attribute must be an 2 '
331 329 'element list or tuple')
332 330
333 331 mtype, encoding = mimetypes.guess_type(self.name)
334 332
335 333 if mtype is None:
336 334 if self.is_binary:
337 335 mtype = 'application/octet-stream'
338 336 encoding = None
339 337 else:
340 338 mtype = 'text/plain'
341 339 encoding = None
342 340 return mtype, encoding
343 341
344 342 @LazyProperty
345 343 def mimetype(self):
346 344 """
347 345 Wrapper around full mimetype info. It returns only type of fetched
348 346 mimetype without the encoding part. use get_mimetype function to fetch
349 347 full set of (type,encoding)
350 348 """
351 349 return self.get_mimetype()[0]
352 350
353 351 @LazyProperty
354 352 def mimetype_main(self):
355 353 return self.mimetype.split('/')[0]
356 354
357 355 @LazyProperty
358 356 def lexer(self):
359 357 """
360 358 Returns pygment's lexer class. Would try to guess lexer taking file's
361 359 content, name and mimetype.
362 360 """
363
361 from pygments import lexers
364 362 try:
365 363 lexer = lexers.guess_lexer_for_filename(self.name, self.content, stripnl=False)
366 364 except lexers.ClassNotFound:
367 365 lexer = lexers.TextLexer(stripnl=False)
368 366 # returns first alias
369 367 return lexer
370 368
371 369 @LazyProperty
372 370 def lexer_alias(self):
373 371 """
374 372 Returns first alias of the lexer guessed for this file.
375 373 """
376 374 return self.lexer.aliases[0]
377 375
378 376 @LazyProperty
379 377 def history(self):
380 378 """
381 379 Returns a list of changeset for this file in which the file was changed
382 380 """
383 381 if self.changeset is None:
384 382 raise NodeError('Unable to get changeset for this FileNode')
385 383 return self.changeset.get_file_history(self.path)
386 384
387 385 @LazyProperty
388 386 def annotate(self):
389 387 """
390 388 Returns a list of three element tuples with lineno,changeset and line
391 389 """
392 390 if self.changeset is None:
393 391 raise NodeError('Unable to get changeset for this FileNode')
394 392 return self.changeset.get_file_annotate(self.path)
395 393
396 394 @LazyProperty
397 395 def state(self):
398 396 if not self.changeset:
399 397 raise NodeError("Cannot check state of the node if it's not "
400 398 "linked with changeset")
401 399 elif self.path in (node.path for node in self.changeset.added):
402 400 return NodeState.ADDED
403 401 elif self.path in (node.path for node in self.changeset.changed):
404 402 return NodeState.CHANGED
405 403 else:
406 404 return NodeState.NOT_CHANGED
407 405
408 406 @property
409 407 def is_binary(self):
410 408 """
411 409 Returns True if file has binary content.
412 410 """
413 411 _bin = '\0' in self._get_content()
414 412 return _bin
415 413
416 414 @LazyProperty
417 415 def extension(self):
418 416 """Returns filenode extension"""
419 417 return self.name.split('.')[-1]
420 418
421 419 @property
422 420 def is_executable(self):
423 421 """
424 422 Returns ``True`` if file has executable flag turned on.
425 423 """
426 424 return bool(self.mode & stat.S_IXUSR)
427 425
428 426 def __repr__(self):
429 427 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
430 428 getattr(self.changeset, 'short_id', ''))
431 429
432 430
433 431 class RemovedFileNode(FileNode):
434 432 """
435 433 Dummy FileNode class - trying to access any public attribute except path,
436 434 name, kind or state (or methods/attributes checking those two) would raise
437 435 RemovedFileNodeError.
438 436 """
439 437 ALLOWED_ATTRIBUTES = [
440 438 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
441 439 'added', 'changed', 'not_changed', 'removed'
442 440 ]
443 441
444 442 def __init__(self, path):
445 443 """
446 444 :param path: relative path to the node
447 445 """
448 446 super(RemovedFileNode, self).__init__(path=path)
449 447
450 448 def __getattribute__(self, attr):
451 449 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
452 450 return super(RemovedFileNode, self).__getattribute__(attr)
453 451 raise RemovedFileNodeError("Cannot access attribute %s on "
454 452 "RemovedFileNode" % attr)
455 453
456 454 @LazyProperty
457 455 def state(self):
458 456 return NodeState.REMOVED
459 457
460 458
461 459 class DirNode(Node):
462 460 """
463 461 DirNode stores list of files and directories within this node.
464 462 Nodes may be used standalone but within repository context they
465 463 lazily fetch data within same repositorty's changeset.
466 464 """
467 465
468 466 def __init__(self, path, nodes=(), changeset=None):
469 467 """
470 468 Only one of ``nodes`` and ``changeset`` may be given. Passing both
471 469 would raise ``NodeError`` exception.
472 470
473 471 :param path: relative path to the node
474 472 :param nodes: content may be passed to constructor
475 473 :param changeset: if given, will use it to lazily fetch content
476 474 :param size: always 0 for ``DirNode``
477 475 """
478 476 if nodes and changeset:
479 477 raise NodeError("Cannot use both nodes and changeset")
480 478 super(DirNode, self).__init__(path, NodeKind.DIR)
481 479 self.changeset = changeset
482 480 self._nodes = nodes
483 481
484 482 @LazyProperty
485 483 def content(self):
486 484 raise NodeError("%s represents a dir and has no ``content`` attribute"
487 485 % self)
488 486
489 487 @LazyProperty
490 488 def nodes(self):
491 489 if self.changeset:
492 490 nodes = self.changeset.get_nodes(self.path)
493 491 else:
494 492 nodes = self._nodes
495 493 self._nodes_dict = dict((node.path, node) for node in nodes)
496 494 return sorted(nodes)
497 495
498 496 @LazyProperty
499 497 def files(self):
500 498 return sorted((node for node in self.nodes if node.is_file()))
501 499
502 500 @LazyProperty
503 501 def dirs(self):
504 502 return sorted((node for node in self.nodes if node.is_dir()))
505 503
506 504 def __iter__(self):
507 505 for node in self.nodes:
508 506 yield node
509 507
510 508 def get_node(self, path):
511 509 """
512 510 Returns node from within this particular ``DirNode``, so it is now
513 511 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
514 512 'docs'. In order to access deeper nodes one must fetch nodes between
515 513 them first - this would work::
516 514
517 515 docs = root.get_node('docs')
518 516 docs.get_node('api').get_node('index.rst')
519 517
520 518 :param: path - relative to the current node
521 519
522 520 .. note::
523 521 To access lazily (as in example above) node have to be initialized
524 522 with related changeset object - without it node is out of
525 523 context and may know nothing about anything else than nearest
526 524 (located at same level) nodes.
527 525 """
528 526 try:
529 527 path = path.rstrip('/')
530 528 if path == '':
531 529 raise NodeError("Cannot retrieve node without path")
532 530 self.nodes # access nodes first in order to set _nodes_dict
533 531 paths = path.split('/')
534 532 if len(paths) == 1:
535 533 if not self.is_root():
536 534 path = '/'.join((self.path, paths[0]))
537 535 else:
538 536 path = paths[0]
539 537 return self._nodes_dict[path]
540 538 elif len(paths) > 1:
541 539 if self.changeset is None:
542 540 raise NodeError("Cannot access deeper "
543 541 "nodes without changeset")
544 542 else:
545 543 path1, path2 = paths[0], '/'.join(paths[1:])
546 544 return self.get_node(path1).get_node(path2)
547 545 else:
548 546 raise KeyError
549 547 except KeyError:
550 548 raise NodeError("Node does not exist at %s" % path)
551 549
552 550 @LazyProperty
553 551 def state(self):
554 552 raise NodeError("Cannot access state of DirNode")
555 553
556 554 @LazyProperty
557 555 def size(self):
558 556 size = 0
559 557 for root, dirs, files in self.changeset.walk(self.path):
560 558 for f in files:
561 559 size += f.size
562 560
563 561 return size
564 562
565 563 def __repr__(self):
566 564 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
567 565 getattr(self.changeset, 'short_id', ''))
568 566
569 567
570 568 class RootNode(DirNode):
571 569 """
572 570 DirNode being the root node of the repository.
573 571 """
574 572
575 573 def __init__(self, nodes=(), changeset=None):
576 574 super(RootNode, self).__init__(path='', nodes=nodes,
577 575 changeset=changeset)
578 576
579 577 def __repr__(self):
580 578 return '<%s>' % self.__class__.__name__
581 579
582 580
583 581 class SubModuleNode(Node):
584 582 """
585 583 represents a SubModule of Git or SubRepo of Mercurial
586 584 """
587 585 is_binary = False
588 586 size = 0
589 587
590 588 def __init__(self, name, url=None, changeset=None, alias=None):
591 589 self.path = name
592 590 self.kind = NodeKind.SUBMODULE
593 591 self.alias = alias
594 592 # we have to use emptyChangeset here since this can point to svn/git/hg
595 593 # submodules we cannot get from repository
596 594 self.changeset = EmptyChangeset(str(changeset), alias=alias)
597 595 self.url = url or self._extract_submodule_url()
598 596
599 597 def __repr__(self):
600 598 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
601 599 getattr(self.changeset, 'short_id', ''))
602 600
603 601 def _extract_submodule_url(self):
604 602 if self.alias == 'git':
605 603 #TODO: find a way to parse gits submodule file and extract the
606 604 # linking URL
607 605 return self.path
608 606 if self.alias == 'hg':
609 607 return self.path
610 608
611 609 @LazyProperty
612 610 def name(self):
613 611 """
614 612 Returns name of the node so if its path
615 613 then only last part is returned.
616 614 """
617 615 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
618 616 return u'%s @ %s' % (org, self.changeset.short_id)
General Comments 0
You need to be logged in to leave comments. Login now