##// END OF EJS Templates
small change to is_binary function logic so it always skips the unicode conversions to perform this simple check
marcink -
r3002:dec78aee beta
parent child Browse files
Show More
@@ -1,613 +1,617 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 vcs.nodes
4 4 ~~~~~~~~~
5 5
6 6 Module holding everything related to vcs nodes.
7 7
8 8 :created_on: Apr 8, 2010
9 9 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
10 10 """
11 11 import os
12 12 import stat
13 13 import posixpath
14 14 import mimetypes
15 15
16 16 from pygments import lexers
17 17
18 18 from rhodecode.lib.vcs.utils.lazy import LazyProperty
19 19 from rhodecode.lib.vcs.utils import safe_unicode
20 20 from rhodecode.lib.vcs.exceptions import NodeError
21 21 from rhodecode.lib.vcs.exceptions import RemovedFileNodeError
22 22 from rhodecode.lib.vcs.backends.base import EmptyChangeset
23 23
24 24
25 25 class NodeKind:
26 26 SUBMODULE = -1
27 27 DIR = 1
28 28 FILE = 2
29 29
30 30
31 31 class NodeState:
32 32 ADDED = u'added'
33 33 CHANGED = u'changed'
34 34 NOT_CHANGED = u'not changed'
35 35 REMOVED = u'removed'
36 36
37 37
38 38 class NodeGeneratorBase(object):
39 39 """
40 40 Base class for removed added and changed filenodes, it's a lazy generator
41 41 class that will create filenodes only on iteration or call
42 42
43 43 The len method doesn't need to create filenodes at all
44 44 """
45 45
46 46 def __init__(self, current_paths, cs):
47 47 self.cs = cs
48 48 self.current_paths = current_paths
49 49
50 50 def __call__(self):
51 51 return [n for n in self]
52 52
53 53 def __getslice__(self, i, j):
54 54 for p in self.current_paths[i:j]:
55 55 yield self.cs.get_node(p)
56 56
57 57 def __len__(self):
58 58 return len(self.current_paths)
59 59
60 60 def __iter__(self):
61 61 for p in self.current_paths:
62 62 yield self.cs.get_node(p)
63 63
64 64
65 65 class AddedFileNodesGenerator(NodeGeneratorBase):
66 66 """
67 67 Class holding Added files for current changeset
68 68 """
69 69 pass
70 70
71 71
72 72 class ChangedFileNodesGenerator(NodeGeneratorBase):
73 73 """
74 74 Class holding Changed files for current changeset
75 75 """
76 76 pass
77 77
78 78
79 79 class RemovedFileNodesGenerator(NodeGeneratorBase):
80 80 """
81 81 Class holding removed files for current changeset
82 82 """
83 83 def __iter__(self):
84 84 for p in self.current_paths:
85 85 yield RemovedFileNode(path=p)
86 86
87 87 def __getslice__(self, i, j):
88 88 for p in self.current_paths[i:j]:
89 89 yield RemovedFileNode(path=p)
90 90
91 91
92 92 class Node(object):
93 93 """
94 94 Simplest class representing file or directory on repository. SCM backends
95 95 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
96 96 directly.
97 97
98 98 Node's ``path`` cannot start with slash as we operate on *relative* paths
99 99 only. Moreover, every single node is identified by the ``path`` attribute,
100 100 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
101 101 """
102 102
103 103 def __init__(self, path, kind):
104 104 if path.startswith('/'):
105 105 raise NodeError("Cannot initialize Node objects with slash at "
106 106 "the beginning as only relative paths are supported")
107 107 self.path = path.rstrip('/')
108 108 if path == '' and kind != NodeKind.DIR:
109 109 raise NodeError("Only DirNode and its subclasses may be "
110 110 "initialized with empty path")
111 111 self.kind = kind
112 112 #self.dirs, self.files = [], []
113 113 if self.is_root() and not self.is_dir():
114 114 raise NodeError("Root node cannot be FILE kind")
115 115
116 116 @LazyProperty
117 117 def parent(self):
118 118 parent_path = self.get_parent_path()
119 119 if parent_path:
120 120 if self.changeset:
121 121 return self.changeset.get_node(parent_path)
122 122 return DirNode(parent_path)
123 123 return None
124 124
125 125 @LazyProperty
126 126 def unicode_path(self):
127 127 return safe_unicode(self.path)
128 128
129 129 @LazyProperty
130 130 def name(self):
131 131 """
132 132 Returns name of the node so if its path
133 133 then only last part is returned.
134 134 """
135 135 return safe_unicode(self.path.rstrip('/').split('/')[-1])
136 136
137 137 def _get_kind(self):
138 138 return self._kind
139 139
140 140 def _set_kind(self, kind):
141 141 if hasattr(self, '_kind'):
142 142 raise NodeError("Cannot change node's kind")
143 143 else:
144 144 self._kind = kind
145 145 # Post setter check (path's trailing slash)
146 146 if self.path.endswith('/'):
147 147 raise NodeError("Node's path cannot end with slash")
148 148
149 149 kind = property(_get_kind, _set_kind)
150 150
151 151 def __cmp__(self, other):
152 152 """
153 153 Comparator using name of the node, needed for quick list sorting.
154 154 """
155 155 kind_cmp = cmp(self.kind, other.kind)
156 156 if kind_cmp:
157 157 return kind_cmp
158 158 return cmp(self.name, other.name)
159 159
160 160 def __eq__(self, other):
161 161 for attr in ['name', 'path', 'kind']:
162 162 if getattr(self, attr) != getattr(other, attr):
163 163 return False
164 164 if self.is_file():
165 165 if self.content != other.content:
166 166 return False
167 167 else:
168 168 # For DirNode's check without entering each dir
169 169 self_nodes_paths = list(sorted(n.path for n in self.nodes))
170 170 other_nodes_paths = list(sorted(n.path for n in self.nodes))
171 171 if self_nodes_paths != other_nodes_paths:
172 172 return False
173 173 return True
174 174
175 175 def __nq__(self, other):
176 176 return not self.__eq__(other)
177 177
178 178 def __repr__(self):
179 179 return '<%s %r>' % (self.__class__.__name__, self.path)
180 180
181 181 def __str__(self):
182 182 return self.__repr__()
183 183
184 184 def __unicode__(self):
185 185 return self.name
186 186
187 187 def get_parent_path(self):
188 188 """
189 189 Returns node's parent path or empty string if node is root.
190 190 """
191 191 if self.is_root():
192 192 return ''
193 193 return posixpath.dirname(self.path.rstrip('/')) + '/'
194 194
195 195 def is_file(self):
196 196 """
197 197 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
198 198 otherwise.
199 199 """
200 200 return self.kind == NodeKind.FILE
201 201
202 202 def is_dir(self):
203 203 """
204 204 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
205 205 otherwise.
206 206 """
207 207 return self.kind == NodeKind.DIR
208 208
209 209 def is_root(self):
210 210 """
211 211 Returns ``True`` if node is a root node and ``False`` otherwise.
212 212 """
213 213 return self.kind == NodeKind.DIR and self.path == ''
214 214
215 215 def is_submodule(self):
216 216 """
217 217 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
218 218 otherwise.
219 219 """
220 220 return self.kind == NodeKind.SUBMODULE
221 221
222 222 @LazyProperty
223 223 def added(self):
224 224 return self.state is NodeState.ADDED
225 225
226 226 @LazyProperty
227 227 def changed(self):
228 228 return self.state is NodeState.CHANGED
229 229
230 230 @LazyProperty
231 231 def not_changed(self):
232 232 return self.state is NodeState.NOT_CHANGED
233 233
234 234 @LazyProperty
235 235 def removed(self):
236 236 return self.state is NodeState.REMOVED
237 237
238 238
239 239 class FileNode(Node):
240 240 """
241 241 Class representing file nodes.
242 242
243 243 :attribute: path: path to the node, relative to repostiory's root
244 244 :attribute: content: if given arbitrary sets content of the file
245 245 :attribute: changeset: if given, first time content is accessed, callback
246 246 :attribute: mode: octal stat mode for a node. Default is 0100644.
247 247 """
248 248
249 249 def __init__(self, path, content=None, changeset=None, mode=None):
250 250 """
251 251 Only one of ``content`` and ``changeset`` may be given. Passing both
252 252 would raise ``NodeError`` exception.
253 253
254 254 :param path: relative path to the node
255 255 :param content: content may be passed to constructor
256 256 :param changeset: if given, will use it to lazily fetch content
257 257 :param mode: octal representation of ST_MODE (i.e. 0100644)
258 258 """
259 259
260 260 if content and changeset:
261 261 raise NodeError("Cannot use both content and changeset")
262 262 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
263 263 self.changeset = changeset
264 264 self._content = content
265 265 self._mode = mode or 0100644
266 266
267 267 @LazyProperty
268 268 def mode(self):
269 269 """
270 270 Returns lazily mode of the FileNode. If ``changeset`` is not set, would
271 271 use value given at initialization or 0100644 (default).
272 272 """
273 273 if self.changeset:
274 274 mode = self.changeset.get_file_mode(self.path)
275 275 else:
276 276 mode = self._mode
277 277 return mode
278 278
279 def _get_content(self):
280 if self.changeset:
281 content = self.changeset.get_file_content(self.path)
282 else:
283 content = self._content
284 return content
285
279 286 @property
280 287 def content(self):
281 288 """
282 289 Returns lazily content of the FileNode. If possible, would try to
283 290 decode content from UTF-8.
284 291 """
285 if self.changeset:
286 content = self.changeset.get_file_content(self.path)
287 else:
288 content = self._content
292 content = self._get_content()
289 293
290 294 if bool(content and '\0' in content):
291 295 return content
292 296 return safe_unicode(content)
293 297
294 298 @LazyProperty
295 299 def size(self):
296 300 if self.changeset:
297 301 return self.changeset.get_file_size(self.path)
298 302 raise NodeError("Cannot retrieve size of the file without related "
299 303 "changeset attribute")
300 304
301 305 @LazyProperty
302 306 def message(self):
303 307 if self.changeset:
304 308 return self.last_changeset.message
305 309 raise NodeError("Cannot retrieve message of the file without related "
306 310 "changeset attribute")
307 311
308 312 @LazyProperty
309 313 def last_changeset(self):
310 314 if self.changeset:
311 315 return self.changeset.get_file_changeset(self.path)
312 316 raise NodeError("Cannot retrieve last changeset of the file without "
313 317 "related changeset attribute")
314 318
315 319 def get_mimetype(self):
316 320 """
317 321 Mimetype is calculated based on the file's content. If ``_mimetype``
318 322 attribute is available, it will be returned (backends which store
319 323 mimetypes or can easily recognize them, should set this private
320 324 attribute to indicate that type should *NOT* be calculated).
321 325 """
322 326 if hasattr(self, '_mimetype'):
323 327 if (isinstance(self._mimetype, (tuple, list,)) and
324 328 len(self._mimetype) == 2):
325 329 return self._mimetype
326 330 else:
327 331 raise NodeError('given _mimetype attribute must be an 2 '
328 332 'element list or tuple')
329 333
330 334 mtype, encoding = mimetypes.guess_type(self.name)
331 335
332 336 if mtype is None:
333 337 if self.is_binary:
334 338 mtype = 'application/octet-stream'
335 339 encoding = None
336 340 else:
337 341 mtype = 'text/plain'
338 342 encoding = None
339 343 return mtype, encoding
340 344
341 345 @LazyProperty
342 346 def mimetype(self):
343 347 """
344 348 Wrapper around full mimetype info. It returns only type of fetched
345 349 mimetype without the encoding part. use get_mimetype function to fetch
346 350 full set of (type,encoding)
347 351 """
348 352 return self.get_mimetype()[0]
349 353
350 354 @LazyProperty
351 355 def mimetype_main(self):
352 356 return self.mimetype.split('/')[0]
353 357
354 358 @LazyProperty
355 359 def lexer(self):
356 360 """
357 361 Returns pygment's lexer class. Would try to guess lexer taking file's
358 362 content, name and mimetype.
359 363 """
360 364 try:
361 365 lexer = lexers.guess_lexer_for_filename(self.name, self.content)
362 366 except lexers.ClassNotFound:
363 367 lexer = lexers.TextLexer()
364 368 # returns first alias
365 369 return lexer
366 370
367 371 @LazyProperty
368 372 def lexer_alias(self):
369 373 """
370 374 Returns first alias of the lexer guessed for this file.
371 375 """
372 376 return self.lexer.aliases[0]
373 377
374 378 @LazyProperty
375 379 def history(self):
376 380 """
377 381 Returns a list of changeset for this file in which the file was changed
378 382 """
379 383 if self.changeset is None:
380 384 raise NodeError('Unable to get changeset for this FileNode')
381 385 return self.changeset.get_file_history(self.path)
382 386
383 387 @LazyProperty
384 388 def annotate(self):
385 389 """
386 390 Returns a list of three element tuples with lineno,changeset and line
387 391 """
388 392 if self.changeset is None:
389 393 raise NodeError('Unable to get changeset for this FileNode')
390 394 return self.changeset.get_file_annotate(self.path)
391 395
392 396 @LazyProperty
393 397 def state(self):
394 398 if not self.changeset:
395 399 raise NodeError("Cannot check state of the node if it's not "
396 400 "linked with changeset")
397 401 elif self.path in (node.path for node in self.changeset.added):
398 402 return NodeState.ADDED
399 403 elif self.path in (node.path for node in self.changeset.changed):
400 404 return NodeState.CHANGED
401 405 else:
402 406 return NodeState.NOT_CHANGED
403 407
404 408 @property
405 409 def is_binary(self):
406 410 """
407 411 Returns True if file has binary content.
408 412 """
409 _bin = '\0' in self.content
413 _bin = '\0' in self._get_content()
410 414 return _bin
411 415
412 416 @LazyProperty
413 417 def extension(self):
414 418 """Returns filenode extension"""
415 419 return self.name.split('.')[-1]
416 420
417 421 def is_executable(self):
418 422 """
419 423 Returns ``True`` if file has executable flag turned on.
420 424 """
421 425 return bool(self.mode & stat.S_IXUSR)
422 426
423 427 def __repr__(self):
424 428 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
425 429 getattr(self.changeset, 'short_id', ''))
426 430
427 431
428 432 class RemovedFileNode(FileNode):
429 433 """
430 434 Dummy FileNode class - trying to access any public attribute except path,
431 435 name, kind or state (or methods/attributes checking those two) would raise
432 436 RemovedFileNodeError.
433 437 """
434 438 ALLOWED_ATTRIBUTES = [
435 439 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
436 440 'added', 'changed', 'not_changed', 'removed'
437 441 ]
438 442
439 443 def __init__(self, path):
440 444 """
441 445 :param path: relative path to the node
442 446 """
443 447 super(RemovedFileNode, self).__init__(path=path)
444 448
445 449 def __getattribute__(self, attr):
446 450 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
447 451 return super(RemovedFileNode, self).__getattribute__(attr)
448 452 raise RemovedFileNodeError("Cannot access attribute %s on "
449 453 "RemovedFileNode" % attr)
450 454
451 455 @LazyProperty
452 456 def state(self):
453 457 return NodeState.REMOVED
454 458
455 459
456 460 class DirNode(Node):
457 461 """
458 462 DirNode stores list of files and directories within this node.
459 463 Nodes may be used standalone but within repository context they
460 464 lazily fetch data within same repositorty's changeset.
461 465 """
462 466
463 467 def __init__(self, path, nodes=(), changeset=None):
464 468 """
465 469 Only one of ``nodes`` and ``changeset`` may be given. Passing both
466 470 would raise ``NodeError`` exception.
467 471
468 472 :param path: relative path to the node
469 473 :param nodes: content may be passed to constructor
470 474 :param changeset: if given, will use it to lazily fetch content
471 475 :param size: always 0 for ``DirNode``
472 476 """
473 477 if nodes and changeset:
474 478 raise NodeError("Cannot use both nodes and changeset")
475 479 super(DirNode, self).__init__(path, NodeKind.DIR)
476 480 self.changeset = changeset
477 481 self._nodes = nodes
478 482
479 483 @LazyProperty
480 484 def content(self):
481 485 raise NodeError("%s represents a dir and has no ``content`` attribute"
482 486 % self)
483 487
484 488 @LazyProperty
485 489 def nodes(self):
486 490 if self.changeset:
487 491 nodes = self.changeset.get_nodes(self.path)
488 492 else:
489 493 nodes = self._nodes
490 494 self._nodes_dict = dict((node.path, node) for node in nodes)
491 495 return sorted(nodes)
492 496
493 497 @LazyProperty
494 498 def files(self):
495 499 return sorted((node for node in self.nodes if node.is_file()))
496 500
497 501 @LazyProperty
498 502 def dirs(self):
499 503 return sorted((node for node in self.nodes if node.is_dir()))
500 504
501 505 def __iter__(self):
502 506 for node in self.nodes:
503 507 yield node
504 508
505 509 def get_node(self, path):
506 510 """
507 511 Returns node from within this particular ``DirNode``, so it is now
508 512 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
509 513 'docs'. In order to access deeper nodes one must fetch nodes between
510 514 them first - this would work::
511 515
512 516 docs = root.get_node('docs')
513 517 docs.get_node('api').get_node('index.rst')
514 518
515 519 :param: path - relative to the current node
516 520
517 521 .. note::
518 522 To access lazily (as in example above) node have to be initialized
519 523 with related changeset object - without it node is out of
520 524 context and may know nothing about anything else than nearest
521 525 (located at same level) nodes.
522 526 """
523 527 try:
524 528 path = path.rstrip('/')
525 529 if path == '':
526 530 raise NodeError("Cannot retrieve node without path")
527 531 self.nodes # access nodes first in order to set _nodes_dict
528 532 paths = path.split('/')
529 533 if len(paths) == 1:
530 534 if not self.is_root():
531 535 path = '/'.join((self.path, paths[0]))
532 536 else:
533 537 path = paths[0]
534 538 return self._nodes_dict[path]
535 539 elif len(paths) > 1:
536 540 if self.changeset is None:
537 541 raise NodeError("Cannot access deeper "
538 542 "nodes without changeset")
539 543 else:
540 544 path1, path2 = paths[0], '/'.join(paths[1:])
541 545 return self.get_node(path1).get_node(path2)
542 546 else:
543 547 raise KeyError
544 548 except KeyError:
545 549 raise NodeError("Node does not exist at %s" % path)
546 550
547 551 @LazyProperty
548 552 def state(self):
549 553 raise NodeError("Cannot access state of DirNode")
550 554
551 555 @LazyProperty
552 556 def size(self):
553 557 size = 0
554 558 for root, dirs, files in self.changeset.walk(self.path):
555 559 for f in files:
556 560 size += f.size
557 561
558 562 return size
559 563
560 564 def __repr__(self):
561 565 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
562 566 getattr(self.changeset, 'short_id', ''))
563 567
564 568
565 569 class RootNode(DirNode):
566 570 """
567 571 DirNode being the root node of the repository.
568 572 """
569 573
570 574 def __init__(self, nodes=(), changeset=None):
571 575 super(RootNode, self).__init__(path='', nodes=nodes,
572 576 changeset=changeset)
573 577
574 578 def __repr__(self):
575 579 return '<%s>' % self.__class__.__name__
576 580
577 581
578 582 class SubModuleNode(Node):
579 583 """
580 584 represents a SubModule of Git or SubRepo of Mercurial
581 585 """
582 586 is_binary = False
583 587 size = 0
584 588
585 589 def __init__(self, name, url=None, changeset=None, alias=None):
586 590 self.path = name
587 591 self.kind = NodeKind.SUBMODULE
588 592 self.alias = alias
589 593 # we have to use emptyChangeset here since this can point to svn/git/hg
590 594 # submodules we cannot get from repository
591 595 self.changeset = EmptyChangeset(str(changeset), alias=alias)
592 596 self.url = url or self._extract_submodule_url()
593 597
594 598 def __repr__(self):
595 599 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
596 600 getattr(self.changeset, 'short_id', ''))
597 601
598 602 def _extract_submodule_url(self):
599 603 if self.alias == 'git':
600 604 #TODO: find a way to parse gits submodule file and extract the
601 605 # linking URL
602 606 return self.path
603 607 if self.alias == 'hg':
604 608 return self.path
605 609
606 610 @LazyProperty
607 611 def name(self):
608 612 """
609 613 Returns name of the node so if its path
610 614 then only last part is returned.
611 615 """
612 616 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
613 617 return u'%s @ %s' % (org, self.changeset.short_id)
General Comments 0
You need to be logged in to leave comments. Login now