##// END OF EJS Templates
nodes: expose line counts in node information. This would be used in full text search
marcink -
r3962:605faead default
parent child Browse files
Show More
@@ -1,870 +1,875 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2014-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Module holding everything related to vcs nodes, with vcs2 architecture.
23 23 """
24 24
25 25 import os
26 26 import stat
27 27
28 28 from zope.cachedescriptors.property import Lazy as LazyProperty
29 29
30 30 import rhodecode
31 31 from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP
32 32 from rhodecode.lib.utils import safe_unicode, safe_str
33 33 from rhodecode.lib.utils2 import md5
34 34 from rhodecode.lib.vcs import path as vcspath
35 35 from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT
36 36 from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db
37 37 from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError
38 38
39 39 LARGEFILE_PREFIX = '.hglf'
40 40
41 41
42 42 class NodeKind:
43 43 SUBMODULE = -1
44 44 DIR = 1
45 45 FILE = 2
46 46 LARGEFILE = 3
47 47
48 48
49 49 class NodeState:
50 50 ADDED = u'added'
51 51 CHANGED = u'changed'
52 52 NOT_CHANGED = u'not changed'
53 53 REMOVED = u'removed'
54 54
55 55
56 56 class NodeGeneratorBase(object):
57 57 """
58 58 Base class for removed added and changed filenodes, it's a lazy generator
59 59 class that will create filenodes only on iteration or call
60 60
61 61 The len method doesn't need to create filenodes at all
62 62 """
63 63
64 64 def __init__(self, current_paths, cs):
65 65 self.cs = cs
66 66 self.current_paths = current_paths
67 67
68 68 def __call__(self):
69 69 return [n for n in self]
70 70
71 71 def __getslice__(self, i, j):
72 72 for p in self.current_paths[i:j]:
73 73 yield self.cs.get_node(p)
74 74
75 75 def __len__(self):
76 76 return len(self.current_paths)
77 77
78 78 def __iter__(self):
79 79 for p in self.current_paths:
80 80 yield self.cs.get_node(p)
81 81
82 82
83 83 class AddedFileNodesGenerator(NodeGeneratorBase):
84 84 """
85 85 Class holding added files for current commit
86 86 """
87 87
88 88
89 89 class ChangedFileNodesGenerator(NodeGeneratorBase):
90 90 """
91 91 Class holding changed files for current commit
92 92 """
93 93
94 94
95 95 class RemovedFileNodesGenerator(NodeGeneratorBase):
96 96 """
97 97 Class holding removed files for current commit
98 98 """
99 99 def __iter__(self):
100 100 for p in self.current_paths:
101 101 yield RemovedFileNode(path=p)
102 102
103 103 def __getslice__(self, i, j):
104 104 for p in self.current_paths[i:j]:
105 105 yield RemovedFileNode(path=p)
106 106
107 107
108 108 class Node(object):
109 109 """
110 110 Simplest class representing file or directory on repository. SCM backends
111 111 should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
112 112 directly.
113 113
114 114 Node's ``path`` cannot start with slash as we operate on *relative* paths
115 115 only. Moreover, every single node is identified by the ``path`` attribute,
116 116 so it cannot end with slash, too. Otherwise, path could lead to mistakes.
117 117 """
118 118 RTLO_MARKER = u"\u202E" # RTLO marker allows swapping text, and certain
119 119 # security attacks could be used with this
120 120 commit = None
121 121
122 122 def __init__(self, path, kind):
123 123 self._validate_path(path) # can throw exception if path is invalid
124 124 self.path = safe_str(path.rstrip('/')) # we store paths as str
125 125 if path == '' and kind != NodeKind.DIR:
126 126 raise NodeError("Only DirNode and its subclasses may be "
127 127 "initialized with empty path")
128 128 self.kind = kind
129 129
130 130 if self.is_root() and not self.is_dir():
131 131 raise NodeError("Root node cannot be FILE kind")
132 132
133 133 def _validate_path(self, path):
134 134 if path.startswith('/'):
135 135 raise NodeError(
136 136 "Cannot initialize Node objects with slash at "
137 137 "the beginning as only relative paths are supported. "
138 138 "Got %s" % (path,))
139 139
140 140 @LazyProperty
141 141 def parent(self):
142 142 parent_path = self.get_parent_path()
143 143 if parent_path:
144 144 if self.commit:
145 145 return self.commit.get_node(parent_path)
146 146 return DirNode(parent_path)
147 147 return None
148 148
149 149 @LazyProperty
150 150 def unicode_path(self):
151 151 return safe_unicode(self.path)
152 152
153 153 @LazyProperty
154 154 def has_rtlo(self):
155 155 """Detects if a path has right-to-left-override marker"""
156 156 return self.RTLO_MARKER in self.unicode_path
157 157
158 158 @LazyProperty
159 159 def unicode_path_safe(self):
160 160 """
161 161 Special SAFE representation of path without the right-to-left-override.
162 162 This should be only used for "showing" the file, cannot be used for any
163 163 urls etc.
164 164 """
165 165 return safe_unicode(self.path).replace(self.RTLO_MARKER, '')
166 166
167 167 @LazyProperty
168 168 def dir_path(self):
169 169 """
170 170 Returns name of the directory from full path of this vcs node. Empty
171 171 string is returned if there's no directory in the path
172 172 """
173 173 _parts = self.path.rstrip('/').rsplit('/', 1)
174 174 if len(_parts) == 2:
175 175 return safe_unicode(_parts[0])
176 176 return u''
177 177
178 178 @LazyProperty
179 179 def name(self):
180 180 """
181 181 Returns name of the node so if its path
182 182 then only last part is returned.
183 183 """
184 184 return safe_unicode(self.path.rstrip('/').split('/')[-1])
185 185
186 186 @property
187 187 def kind(self):
188 188 return self._kind
189 189
190 190 @kind.setter
191 191 def kind(self, kind):
192 192 if hasattr(self, '_kind'):
193 193 raise NodeError("Cannot change node's kind")
194 194 else:
195 195 self._kind = kind
196 196 # Post setter check (path's trailing slash)
197 197 if self.path.endswith('/'):
198 198 raise NodeError("Node's path cannot end with slash")
199 199
200 200 def __cmp__(self, other):
201 201 """
202 202 Comparator using name of the node, needed for quick list sorting.
203 203 """
204 204
205 205 kind_cmp = cmp(self.kind, other.kind)
206 206 if kind_cmp:
207 207 if isinstance(self, SubModuleNode):
208 208 # we make submodules equal to dirnode for "sorting" purposes
209 209 return NodeKind.DIR
210 210 return kind_cmp
211 211 return cmp(self.name, other.name)
212 212
213 213 def __eq__(self, other):
214 214 for attr in ['name', 'path', 'kind']:
215 215 if getattr(self, attr) != getattr(other, attr):
216 216 return False
217 217 if self.is_file():
218 218 if self.content != other.content:
219 219 return False
220 220 else:
221 221 # For DirNode's check without entering each dir
222 222 self_nodes_paths = list(sorted(n.path for n in self.nodes))
223 223 other_nodes_paths = list(sorted(n.path for n in self.nodes))
224 224 if self_nodes_paths != other_nodes_paths:
225 225 return False
226 226 return True
227 227
228 228 def __ne__(self, other):
229 229 return not self.__eq__(other)
230 230
231 231 def __repr__(self):
232 232 return '<%s %r>' % (self.__class__.__name__, self.path)
233 233
234 234 def __str__(self):
235 235 return self.__repr__()
236 236
237 237 def __unicode__(self):
238 238 return self.name
239 239
240 240 def get_parent_path(self):
241 241 """
242 242 Returns node's parent path or empty string if node is root.
243 243 """
244 244 if self.is_root():
245 245 return ''
246 246 return vcspath.dirname(self.path.rstrip('/')) + '/'
247 247
248 248 def is_file(self):
249 249 """
250 250 Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
251 251 otherwise.
252 252 """
253 253 return self.kind == NodeKind.FILE
254 254
255 255 def is_dir(self):
256 256 """
257 257 Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
258 258 otherwise.
259 259 """
260 260 return self.kind == NodeKind.DIR
261 261
262 262 def is_root(self):
263 263 """
264 264 Returns ``True`` if node is a root node and ``False`` otherwise.
265 265 """
266 266 return self.kind == NodeKind.DIR and self.path == ''
267 267
268 268 def is_submodule(self):
269 269 """
270 270 Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False``
271 271 otherwise.
272 272 """
273 273 return self.kind == NodeKind.SUBMODULE
274 274
275 275 def is_largefile(self):
276 276 """
277 277 Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False``
278 278 otherwise
279 279 """
280 280 return self.kind == NodeKind.LARGEFILE
281 281
282 282 def is_link(self):
283 283 if self.commit:
284 284 return self.commit.is_link(self.path)
285 285 return False
286 286
287 287 @LazyProperty
288 288 def added(self):
289 289 return self.state is NodeState.ADDED
290 290
291 291 @LazyProperty
292 292 def changed(self):
293 293 return self.state is NodeState.CHANGED
294 294
295 295 @LazyProperty
296 296 def not_changed(self):
297 297 return self.state is NodeState.NOT_CHANGED
298 298
299 299 @LazyProperty
300 300 def removed(self):
301 301 return self.state is NodeState.REMOVED
302 302
303 303
304 304 class FileNode(Node):
305 305 """
306 306 Class representing file nodes.
307 307
308 308 :attribute: path: path to the node, relative to repository's root
309 309 :attribute: content: if given arbitrary sets content of the file
310 310 :attribute: commit: if given, first time content is accessed, callback
311 311 :attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`.
312 312 """
313 313 _filter_pre_load = []
314 314
315 315 def __init__(self, path, content=None, commit=None, mode=None, pre_load=None):
316 316 """
317 317 Only one of ``content`` and ``commit`` may be given. Passing both
318 318 would raise ``NodeError`` exception.
319 319
320 320 :param path: relative path to the node
321 321 :param content: content may be passed to constructor
322 322 :param commit: if given, will use it to lazily fetch content
323 323 :param mode: ST_MODE (i.e. 0100644)
324 324 """
325 325 if content and commit:
326 326 raise NodeError("Cannot use both content and commit")
327 327 super(FileNode, self).__init__(path, kind=NodeKind.FILE)
328 328 self.commit = commit
329 329 self._content = content
330 330 self._mode = mode or FILEMODE_DEFAULT
331 331
332 332 self._set_bulk_properties(pre_load)
333 333
334 334 def _set_bulk_properties(self, pre_load):
335 335 if not pre_load:
336 336 return
337 337 pre_load = [entry for entry in pre_load
338 338 if entry not in self._filter_pre_load]
339 339 if not pre_load:
340 340 return
341 341
342 342 for attr_name in pre_load:
343 343 result = getattr(self, attr_name)
344 344 if callable(result):
345 345 result = result()
346 346 self.__dict__[attr_name] = result
347 347
348 348 @LazyProperty
349 349 def mode(self):
350 350 """
351 351 Returns lazily mode of the FileNode. If `commit` is not set, would
352 352 use value given at initialization or `FILEMODE_DEFAULT` (default).
353 353 """
354 354 if self.commit:
355 355 mode = self.commit.get_file_mode(self.path)
356 356 else:
357 357 mode = self._mode
358 358 return mode
359 359
360 360 @LazyProperty
361 361 def raw_bytes(self):
362 362 """
363 363 Returns lazily the raw bytes of the FileNode.
364 364 """
365 365 if self.commit:
366 366 if self._content is None:
367 367 self._content = self.commit.get_file_content(self.path)
368 368 content = self._content
369 369 else:
370 370 content = self._content
371 371 return content
372 372
373 373 def stream_bytes(self):
374 374 """
375 375 Returns an iterator that will stream the content of the file directly from
376 376 vcsserver without loading it to memory.
377 377 """
378 378 if self.commit:
379 379 return self.commit.get_file_content_streamed(self.path)
380 380 raise NodeError("Cannot retrieve stream_bytes without related commit attribute")
381 381
382 382 @LazyProperty
383 383 def md5(self):
384 384 """
385 385 Returns md5 of the file node.
386 386 """
387 387 return md5(self.raw_bytes)
388 388
389 389 def metadata_uncached(self):
390 390 """
391 391 Returns md5, binary flag of the file node, without any cache usage.
392 392 """
393 393
394 394 content = self.content_uncached()
395 395
396 396 is_binary = content and '\0' in content
397 397 size = 0
398 398 if content:
399 399 size = len(content)
400 400
401 401 return is_binary, md5(content), size, content
402 402
403 403 def content_uncached(self):
404 404 """
405 405 Returns lazily content of the FileNode. If possible, would try to
406 406 decode content from UTF-8.
407 407 """
408 408 if self.commit:
409 409 content = self.commit.get_file_content(self.path)
410 410 else:
411 411 content = self._content
412 412 return content
413 413
414 414 @LazyProperty
415 415 def content(self):
416 416 """
417 417 Returns lazily content of the FileNode. If possible, would try to
418 418 decode content from UTF-8.
419 419 """
420 420 content = self.raw_bytes
421 421
422 422 if self.is_binary:
423 423 return content
424 424 return safe_unicode(content)
425 425
426 426 @LazyProperty
427 427 def size(self):
428 428 if self.commit:
429 429 return self.commit.get_file_size(self.path)
430 430 raise NodeError(
431 431 "Cannot retrieve size of the file without related "
432 432 "commit attribute")
433 433
434 434 @LazyProperty
435 435 def message(self):
436 436 if self.commit:
437 437 return self.last_commit.message
438 438 raise NodeError(
439 439 "Cannot retrieve message of the file without related "
440 440 "commit attribute")
441 441
442 442 @LazyProperty
443 443 def last_commit(self):
444 444 if self.commit:
445 445 pre_load = ["author", "date", "message", "parents"]
446 446 return self.commit.get_path_commit(self.path, pre_load=pre_load)
447 447 raise NodeError(
448 448 "Cannot retrieve last commit of the file without "
449 449 "related commit attribute")
450 450
451 451 def get_mimetype(self):
452 452 """
453 453 Mimetype is calculated based on the file's content. If ``_mimetype``
454 454 attribute is available, it will be returned (backends which store
455 455 mimetypes or can easily recognize them, should set this private
456 456 attribute to indicate that type should *NOT* be calculated).
457 457 """
458 458
459 459 if hasattr(self, '_mimetype'):
460 460 if (isinstance(self._mimetype, (tuple, list,)) and
461 461 len(self._mimetype) == 2):
462 462 return self._mimetype
463 463 else:
464 464 raise NodeError('given _mimetype attribute must be an 2 '
465 465 'element list or tuple')
466 466
467 467 db = get_mimetypes_db()
468 468 mtype, encoding = db.guess_type(self.name)
469 469
470 470 if mtype is None:
471 471 if self.is_binary:
472 472 mtype = 'application/octet-stream'
473 473 encoding = None
474 474 else:
475 475 mtype = 'text/plain'
476 476 encoding = None
477 477
478 478 # try with pygments
479 479 try:
480 480 from pygments.lexers import get_lexer_for_filename
481 481 mt = get_lexer_for_filename(self.name).mimetypes
482 482 except Exception:
483 483 mt = None
484 484
485 485 if mt:
486 486 mtype = mt[0]
487 487
488 488 return mtype, encoding
489 489
490 490 @LazyProperty
491 491 def mimetype(self):
492 492 """
493 493 Wrapper around full mimetype info. It returns only type of fetched
494 494 mimetype without the encoding part. use get_mimetype function to fetch
495 495 full set of (type,encoding)
496 496 """
497 497 return self.get_mimetype()[0]
498 498
499 499 @LazyProperty
500 500 def mimetype_main(self):
501 501 return self.mimetype.split('/')[0]
502 502
503 503 @classmethod
504 504 def get_lexer(cls, filename, content=None):
505 505 from pygments import lexers
506 506
507 507 extension = filename.split('.')[-1]
508 508 lexer = None
509 509
510 510 try:
511 511 lexer = lexers.guess_lexer_for_filename(
512 512 filename, content, stripnl=False)
513 513 except lexers.ClassNotFound:
514 514 lexer = None
515 515
516 516 # try our EXTENSION_MAP
517 517 if not lexer:
518 518 try:
519 519 lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension)
520 520 if lexer_class:
521 521 lexer = lexers.get_lexer_by_name(lexer_class[0])
522 522 except lexers.ClassNotFound:
523 523 lexer = None
524 524
525 525 if not lexer:
526 526 lexer = lexers.TextLexer(stripnl=False)
527 527
528 528 return lexer
529 529
530 530 @LazyProperty
531 531 def lexer(self):
532 532 """
533 533 Returns pygment's lexer class. Would try to guess lexer taking file's
534 534 content, name and mimetype.
535 535 """
536 536 return self.get_lexer(self.name, self.content)
537 537
538 538 @LazyProperty
539 539 def lexer_alias(self):
540 540 """
541 541 Returns first alias of the lexer guessed for this file.
542 542 """
543 543 return self.lexer.aliases[0]
544 544
545 545 @LazyProperty
546 546 def history(self):
547 547 """
548 548 Returns a list of commit for this file in which the file was changed
549 549 """
550 550 if self.commit is None:
551 551 raise NodeError('Unable to get commit for this FileNode')
552 552 return self.commit.get_path_history(self.path)
553 553
554 554 @LazyProperty
555 555 def annotate(self):
556 556 """
557 557 Returns a list of three element tuples with lineno, commit and line
558 558 """
559 559 if self.commit is None:
560 560 raise NodeError('Unable to get commit for this FileNode')
561 561 pre_load = ["author", "date", "message", "parents"]
562 562 return self.commit.get_file_annotate(self.path, pre_load=pre_load)
563 563
564 564 @LazyProperty
565 565 def state(self):
566 566 if not self.commit:
567 567 raise NodeError(
568 568 "Cannot check state of the node if it's not "
569 569 "linked with commit")
570 570 elif self.path in (node.path for node in self.commit.added):
571 571 return NodeState.ADDED
572 572 elif self.path in (node.path for node in self.commit.changed):
573 573 return NodeState.CHANGED
574 574 else:
575 575 return NodeState.NOT_CHANGED
576 576
577 577 @LazyProperty
578 578 def is_binary(self):
579 579 """
580 580 Returns True if file has binary content.
581 581 """
582 582 if self.commit:
583 583 return self.commit.is_node_binary(self.path)
584 584 else:
585 585 raw_bytes = self._content
586 586 return raw_bytes and '\0' in raw_bytes
587 587
588 588 @LazyProperty
589 589 def extension(self):
590 590 """Returns filenode extension"""
591 591 return self.name.split('.')[-1]
592 592
593 593 @property
594 594 def is_executable(self):
595 595 """
596 596 Returns ``True`` if file has executable flag turned on.
597 597 """
598 598 return bool(self.mode & stat.S_IXUSR)
599 599
600 600 def get_largefile_node(self):
601 601 """
602 602 Try to return a Mercurial FileNode from this node. It does internal
603 603 checks inside largefile store, if that file exist there it will
604 604 create special instance of LargeFileNode which can get content from
605 605 LF store.
606 606 """
607 607 if self.commit:
608 608 return self.commit.get_largefile_node(self.path)
609 609
610 def count_lines(self, content, count_empty=False):
611
612 if count_empty:
613 all_lines = 0
614 empty_lines = 0
615 for line in content.splitlines(True):
616 if line == '\n':
617 empty_lines += 1
618 all_lines += 1
619
620 return all_lines, all_lines - empty_lines
621 else:
622 # fast method
623 empty_lines = all_lines = content.count('\n')
624 if all_lines == 0 and content:
625 # one-line without a newline
626 empty_lines = all_lines = 1
627
628 return all_lines, empty_lines
629
610 630 def lines(self, count_empty=False):
611 631 all_lines, empty_lines = 0, 0
612 632
613 633 if not self.is_binary:
614 634 content = self.content
615 if count_empty:
616 all_lines = 0
617 empty_lines = 0
618 for line in content.splitlines(True):
619 if line == '\n':
620 empty_lines += 1
621 all_lines += 1
622
623 return all_lines, all_lines - empty_lines
624 else:
625 # fast method
626 empty_lines = all_lines = content.count('\n')
627 if all_lines == 0 and content:
628 # one-line without a newline
629 empty_lines = all_lines = 1
630
635 all_lines, empty_lines = self.count_lines(content, count_empty=count_empty)
631 636 return all_lines, empty_lines
632 637
633 638 def __repr__(self):
634 639 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
635 640 getattr(self.commit, 'short_id', ''))
636 641
637 642
638 643 class RemovedFileNode(FileNode):
639 644 """
640 645 Dummy FileNode class - trying to access any public attribute except path,
641 646 name, kind or state (or methods/attributes checking those two) would raise
642 647 RemovedFileNodeError.
643 648 """
644 649 ALLOWED_ATTRIBUTES = [
645 650 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind',
646 651 'added', 'changed', 'not_changed', 'removed'
647 652 ]
648 653
649 654 def __init__(self, path):
650 655 """
651 656 :param path: relative path to the node
652 657 """
653 658 super(RemovedFileNode, self).__init__(path=path)
654 659
655 660 def __getattribute__(self, attr):
656 661 if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES:
657 662 return super(RemovedFileNode, self).__getattribute__(attr)
658 663 raise RemovedFileNodeError(
659 664 "Cannot access attribute %s on RemovedFileNode" % attr)
660 665
661 666 @LazyProperty
662 667 def state(self):
663 668 return NodeState.REMOVED
664 669
665 670
666 671 class DirNode(Node):
667 672 """
668 673 DirNode stores list of files and directories within this node.
669 674 Nodes may be used standalone but within repository context they
670 675 lazily fetch data within same repository's commit.
671 676 """
672 677
673 678 def __init__(self, path, nodes=(), commit=None):
674 679 """
675 680 Only one of ``nodes`` and ``commit`` may be given. Passing both
676 681 would raise ``NodeError`` exception.
677 682
678 683 :param path: relative path to the node
679 684 :param nodes: content may be passed to constructor
680 685 :param commit: if given, will use it to lazily fetch content
681 686 """
682 687 if nodes and commit:
683 688 raise NodeError("Cannot use both nodes and commit")
684 689 super(DirNode, self).__init__(path, NodeKind.DIR)
685 690 self.commit = commit
686 691 self._nodes = nodes
687 692
688 693 @LazyProperty
689 694 def content(self):
690 695 raise NodeError(
691 696 "%s represents a dir and has no `content` attribute" % self)
692 697
693 698 @LazyProperty
694 699 def nodes(self):
695 700 if self.commit:
696 701 nodes = self.commit.get_nodes(self.path)
697 702 else:
698 703 nodes = self._nodes
699 704 self._nodes_dict = dict((node.path, node) for node in nodes)
700 705 return sorted(nodes)
701 706
702 707 @LazyProperty
703 708 def files(self):
704 709 return sorted((node for node in self.nodes if node.is_file()))
705 710
706 711 @LazyProperty
707 712 def dirs(self):
708 713 return sorted((node for node in self.nodes if node.is_dir()))
709 714
710 715 def __iter__(self):
711 716 for node in self.nodes:
712 717 yield node
713 718
714 719 def get_node(self, path):
715 720 """
716 721 Returns node from within this particular ``DirNode``, so it is now
717 722 allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
718 723 'docs'. In order to access deeper nodes one must fetch nodes between
719 724 them first - this would work::
720 725
721 726 docs = root.get_node('docs')
722 727 docs.get_node('api').get_node('index.rst')
723 728
724 729 :param: path - relative to the current node
725 730
726 731 .. note::
727 732 To access lazily (as in example above) node have to be initialized
728 733 with related commit object - without it node is out of
729 734 context and may know nothing about anything else than nearest
730 735 (located at same level) nodes.
731 736 """
732 737 try:
733 738 path = path.rstrip('/')
734 739 if path == '':
735 740 raise NodeError("Cannot retrieve node without path")
736 741 self.nodes # access nodes first in order to set _nodes_dict
737 742 paths = path.split('/')
738 743 if len(paths) == 1:
739 744 if not self.is_root():
740 745 path = '/'.join((self.path, paths[0]))
741 746 else:
742 747 path = paths[0]
743 748 return self._nodes_dict[path]
744 749 elif len(paths) > 1:
745 750 if self.commit is None:
746 751 raise NodeError("Cannot access deeper nodes without commit")
747 752 else:
748 753 path1, path2 = paths[0], '/'.join(paths[1:])
749 754 return self.get_node(path1).get_node(path2)
750 755 else:
751 756 raise KeyError
752 757 except KeyError:
753 758 raise NodeError("Node does not exist at %s" % path)
754 759
755 760 @LazyProperty
756 761 def state(self):
757 762 raise NodeError("Cannot access state of DirNode")
758 763
759 764 @LazyProperty
760 765 def size(self):
761 766 size = 0
762 767 for root, dirs, files in self.commit.walk(self.path):
763 768 for f in files:
764 769 size += f.size
765 770
766 771 return size
767 772
768 773 @LazyProperty
769 774 def last_commit(self):
770 775 if self.commit:
771 776 pre_load = ["author", "date", "message", "parents"]
772 777 return self.commit.get_path_commit(self.path, pre_load=pre_load)
773 778 raise NodeError(
774 779 "Cannot retrieve last commit of the file without "
775 780 "related commit attribute")
776 781
777 782 def __repr__(self):
778 783 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
779 784 getattr(self.commit, 'short_id', ''))
780 785
781 786
782 787 class RootNode(DirNode):
783 788 """
784 789 DirNode being the root node of the repository.
785 790 """
786 791
787 792 def __init__(self, nodes=(), commit=None):
788 793 super(RootNode, self).__init__(path='', nodes=nodes, commit=commit)
789 794
790 795 def __repr__(self):
791 796 return '<%s>' % self.__class__.__name__
792 797
793 798
794 799 class SubModuleNode(Node):
795 800 """
796 801 represents a SubModule of Git or SubRepo of Mercurial
797 802 """
798 803 is_binary = False
799 804 size = 0
800 805
801 806 def __init__(self, name, url=None, commit=None, alias=None):
802 807 self.path = name
803 808 self.kind = NodeKind.SUBMODULE
804 809 self.alias = alias
805 810
806 811 # we have to use EmptyCommit here since this can point to svn/git/hg
807 812 # submodules we cannot get from repository
808 813 self.commit = EmptyCommit(str(commit), alias=alias)
809 814 self.url = url or self._extract_submodule_url()
810 815
811 816 def __repr__(self):
812 817 return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
813 818 getattr(self.commit, 'short_id', ''))
814 819
815 820 def _extract_submodule_url(self):
816 821 # TODO: find a way to parse gits submodule file and extract the
817 822 # linking URL
818 823 return self.path
819 824
820 825 @LazyProperty
821 826 def name(self):
822 827 """
823 828 Returns name of the node so if its path
824 829 then only last part is returned.
825 830 """
826 831 org = safe_unicode(self.path.rstrip('/').split('/')[-1])
827 832 return u'%s @ %s' % (org, self.commit.short_id)
828 833
829 834
830 835 class LargeFileNode(FileNode):
831 836
832 837 def __init__(self, path, url=None, commit=None, alias=None, org_path=None):
833 838 self.path = path
834 839 self.org_path = org_path
835 840 self.kind = NodeKind.LARGEFILE
836 841 self.alias = alias
837 842
838 843 def _validate_path(self, path):
839 844 """
840 845 we override check since the LargeFileNode path is system absolute
841 846 """
842 847 pass
843 848
844 849 def __repr__(self):
845 850 return '<%s %r>' % (self.__class__.__name__, self.path)
846 851
847 852 @LazyProperty
848 853 def size(self):
849 854 return os.stat(self.path).st_size
850 855
851 856 @LazyProperty
852 857 def raw_bytes(self):
853 858 with open(self.path, 'rb') as f:
854 859 content = f.read()
855 860 return content
856 861
857 862 @LazyProperty
858 863 def name(self):
859 864 """
860 865 Overwrites name to be the org lf path
861 866 """
862 867 return self.org_path
863 868
864 869 def stream_bytes(self):
865 870 with open(self.path, 'rb') as stream:
866 871 while True:
867 872 data = stream.read(16 * 1024)
868 873 if not data:
869 874 break
870 875 yield data
@@ -1,1008 +1,1014 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 Scm model for RhodeCode
23 23 """
24 24
25 25 import os.path
26 26 import traceback
27 27 import logging
28 28 import cStringIO
29 29
30 30 from sqlalchemy import func
31 31 from zope.cachedescriptors.property import Lazy as LazyProperty
32 32
33 33 import rhodecode
34 34 from rhodecode.lib.vcs import get_backend
35 35 from rhodecode.lib.vcs.exceptions import RepositoryError, NodeNotChangedError
36 36 from rhodecode.lib.vcs.nodes import FileNode
37 37 from rhodecode.lib.vcs.backends.base import EmptyCommit
38 38 from rhodecode.lib import helpers as h, rc_cache
39 39 from rhodecode.lib.auth import (
40 40 HasRepoPermissionAny, HasRepoGroupPermissionAny,
41 41 HasUserGroupPermissionAny)
42 42 from rhodecode.lib.exceptions import NonRelativePathError, IMCCommitError
43 43 from rhodecode.lib import hooks_utils
44 44 from rhodecode.lib.utils import (
45 45 get_filesystem_repos, make_db_config)
46 46 from rhodecode.lib.utils2 import (safe_str, safe_unicode)
47 47 from rhodecode.lib.system_info import get_system_info
48 48 from rhodecode.model import BaseModel
49 49 from rhodecode.model.db import (
50 50 Repository, CacheKey, UserFollowing, UserLog, User, RepoGroup,
51 51 PullRequest)
52 52 from rhodecode.model.settings import VcsSettingsModel
53 53 from rhodecode.model.validation_schema.validators import url_validator, InvalidCloneUrl
54 54
55 55 log = logging.getLogger(__name__)
56 56
57 57
58 58 class UserTemp(object):
59 59 def __init__(self, user_id):
60 60 self.user_id = user_id
61 61
62 62 def __repr__(self):
63 63 return "<%s('id:%s')>" % (self.__class__.__name__, self.user_id)
64 64
65 65
66 66 class RepoTemp(object):
67 67 def __init__(self, repo_id):
68 68 self.repo_id = repo_id
69 69
70 70 def __repr__(self):
71 71 return "<%s('id:%s')>" % (self.__class__.__name__, self.repo_id)
72 72
73 73
74 74 class SimpleCachedRepoList(object):
75 75 """
76 76 Lighter version of of iteration of repos without the scm initialisation,
77 77 and with cache usage
78 78 """
79 79 def __init__(self, db_repo_list, repos_path, order_by=None, perm_set=None):
80 80 self.db_repo_list = db_repo_list
81 81 self.repos_path = repos_path
82 82 self.order_by = order_by
83 83 self.reversed = (order_by or '').startswith('-')
84 84 if not perm_set:
85 85 perm_set = ['repository.read', 'repository.write',
86 86 'repository.admin']
87 87 self.perm_set = perm_set
88 88
89 89 def __len__(self):
90 90 return len(self.db_repo_list)
91 91
92 92 def __repr__(self):
93 93 return '<%s (%s)>' % (self.__class__.__name__, self.__len__())
94 94
95 95 def __iter__(self):
96 96 for dbr in self.db_repo_list:
97 97 # check permission at this level
98 98 has_perm = HasRepoPermissionAny(*self.perm_set)(
99 99 dbr.repo_name, 'SimpleCachedRepoList check')
100 100 if not has_perm:
101 101 continue
102 102
103 103 tmp_d = {
104 104 'name': dbr.repo_name,
105 105 'dbrepo': dbr.get_dict(),
106 106 'dbrepo_fork': dbr.fork.get_dict() if dbr.fork else {}
107 107 }
108 108 yield tmp_d
109 109
110 110
111 111 class _PermCheckIterator(object):
112 112
113 113 def __init__(
114 114 self, obj_list, obj_attr, perm_set, perm_checker,
115 115 extra_kwargs=None):
116 116 """
117 117 Creates iterator from given list of objects, additionally
118 118 checking permission for them from perm_set var
119 119
120 120 :param obj_list: list of db objects
121 121 :param obj_attr: attribute of object to pass into perm_checker
122 122 :param perm_set: list of permissions to check
123 123 :param perm_checker: callable to check permissions against
124 124 """
125 125 self.obj_list = obj_list
126 126 self.obj_attr = obj_attr
127 127 self.perm_set = perm_set
128 128 self.perm_checker = perm_checker
129 129 self.extra_kwargs = extra_kwargs or {}
130 130
131 131 def __len__(self):
132 132 return len(self.obj_list)
133 133
134 134 def __repr__(self):
135 135 return '<%s (%s)>' % (self.__class__.__name__, self.__len__())
136 136
137 137 def __iter__(self):
138 138 checker = self.perm_checker(*self.perm_set)
139 139 for db_obj in self.obj_list:
140 140 # check permission at this level
141 141 name = getattr(db_obj, self.obj_attr, None)
142 142 if not checker(name, self.__class__.__name__, **self.extra_kwargs):
143 143 continue
144 144
145 145 yield db_obj
146 146
147 147
148 148 class RepoList(_PermCheckIterator):
149 149
150 150 def __init__(self, db_repo_list, perm_set=None, extra_kwargs=None):
151 151 if not perm_set:
152 152 perm_set = [
153 153 'repository.read', 'repository.write', 'repository.admin']
154 154
155 155 super(RepoList, self).__init__(
156 156 obj_list=db_repo_list,
157 157 obj_attr='repo_name', perm_set=perm_set,
158 158 perm_checker=HasRepoPermissionAny,
159 159 extra_kwargs=extra_kwargs)
160 160
161 161
162 162 class RepoGroupList(_PermCheckIterator):
163 163
164 164 def __init__(self, db_repo_group_list, perm_set=None, extra_kwargs=None):
165 165 if not perm_set:
166 166 perm_set = ['group.read', 'group.write', 'group.admin']
167 167
168 168 super(RepoGroupList, self).__init__(
169 169 obj_list=db_repo_group_list,
170 170 obj_attr='group_name', perm_set=perm_set,
171 171 perm_checker=HasRepoGroupPermissionAny,
172 172 extra_kwargs=extra_kwargs)
173 173
174 174
175 175 class UserGroupList(_PermCheckIterator):
176 176
177 177 def __init__(self, db_user_group_list, perm_set=None, extra_kwargs=None):
178 178 if not perm_set:
179 179 perm_set = ['usergroup.read', 'usergroup.write', 'usergroup.admin']
180 180
181 181 super(UserGroupList, self).__init__(
182 182 obj_list=db_user_group_list,
183 183 obj_attr='users_group_name', perm_set=perm_set,
184 184 perm_checker=HasUserGroupPermissionAny,
185 185 extra_kwargs=extra_kwargs)
186 186
187 187
188 188 class ScmModel(BaseModel):
189 189 """
190 190 Generic Scm Model
191 191 """
192 192
193 193 @LazyProperty
194 194 def repos_path(self):
195 195 """
196 196 Gets the repositories root path from database
197 197 """
198 198
199 199 settings_model = VcsSettingsModel(sa=self.sa)
200 200 return settings_model.get_repos_location()
201 201
202 202 def repo_scan(self, repos_path=None):
203 203 """
204 204 Listing of repositories in given path. This path should not be a
205 205 repository itself. Return a dictionary of repository objects
206 206
207 207 :param repos_path: path to directory containing repositories
208 208 """
209 209
210 210 if repos_path is None:
211 211 repos_path = self.repos_path
212 212
213 213 log.info('scanning for repositories in %s', repos_path)
214 214
215 215 config = make_db_config()
216 216 config.set('extensions', 'largefiles', '')
217 217 repos = {}
218 218
219 219 for name, path in get_filesystem_repos(repos_path, recursive=True):
220 220 # name need to be decomposed and put back together using the /
221 221 # since this is internal storage separator for rhodecode
222 222 name = Repository.normalize_repo_name(name)
223 223
224 224 try:
225 225 if name in repos:
226 226 raise RepositoryError('Duplicate repository name %s '
227 227 'found in %s' % (name, path))
228 228 elif path[0] in rhodecode.BACKENDS:
229 229 backend = get_backend(path[0])
230 230 repos[name] = backend(path[1], config=config,
231 231 with_wire={"cache": False})
232 232 except OSError:
233 233 continue
234 234 log.debug('found %s paths with repositories', len(repos))
235 235 return repos
236 236
237 237 def get_repos(self, all_repos=None, sort_key=None):
238 238 """
239 239 Get all repositories from db and for each repo create it's
240 240 backend instance and fill that backed with information from database
241 241
242 242 :param all_repos: list of repository names as strings
243 243 give specific repositories list, good for filtering
244 244
245 245 :param sort_key: initial sorting of repositories
246 246 """
247 247 if all_repos is None:
248 248 all_repos = self.sa.query(Repository)\
249 249 .filter(Repository.group_id == None)\
250 250 .order_by(func.lower(Repository.repo_name)).all()
251 251 repo_iter = SimpleCachedRepoList(
252 252 all_repos, repos_path=self.repos_path, order_by=sort_key)
253 253 return repo_iter
254 254
255 255 def get_repo_groups(self, all_groups=None):
256 256 if all_groups is None:
257 257 all_groups = RepoGroup.query()\
258 258 .filter(RepoGroup.group_parent_id == None).all()
259 259 return [x for x in RepoGroupList(all_groups)]
260 260
261 261 def mark_for_invalidation(self, repo_name, delete=False):
262 262 """
263 263 Mark caches of this repo invalid in the database. `delete` flag
264 264 removes the cache entries
265 265
266 266 :param repo_name: the repo_name for which caches should be marked
267 267 invalid, or deleted
268 268 :param delete: delete the entry keys instead of setting bool
269 269 flag on them, and also purge caches used by the dogpile
270 270 """
271 271 repo = Repository.get_by_repo_name(repo_name)
272 272
273 273 if repo:
274 274 invalidation_namespace = CacheKey.REPO_INVALIDATION_NAMESPACE.format(
275 275 repo_id=repo.repo_id)
276 276 CacheKey.set_invalidate(invalidation_namespace, delete=delete)
277 277
278 278 repo_id = repo.repo_id
279 279 config = repo._config
280 280 config.set('extensions', 'largefiles', '')
281 281 repo.update_commit_cache(config=config, cs_cache=None)
282 282 if delete:
283 283 cache_namespace_uid = 'cache_repo.{}'.format(repo_id)
284 284 rc_cache.clear_cache_namespace('cache_repo', cache_namespace_uid)
285 285
286 286 def toggle_following_repo(self, follow_repo_id, user_id):
287 287
288 288 f = self.sa.query(UserFollowing)\
289 289 .filter(UserFollowing.follows_repo_id == follow_repo_id)\
290 290 .filter(UserFollowing.user_id == user_id).scalar()
291 291
292 292 if f is not None:
293 293 try:
294 294 self.sa.delete(f)
295 295 return
296 296 except Exception:
297 297 log.error(traceback.format_exc())
298 298 raise
299 299
300 300 try:
301 301 f = UserFollowing()
302 302 f.user_id = user_id
303 303 f.follows_repo_id = follow_repo_id
304 304 self.sa.add(f)
305 305 except Exception:
306 306 log.error(traceback.format_exc())
307 307 raise
308 308
309 309 def toggle_following_user(self, follow_user_id, user_id):
310 310 f = self.sa.query(UserFollowing)\
311 311 .filter(UserFollowing.follows_user_id == follow_user_id)\
312 312 .filter(UserFollowing.user_id == user_id).scalar()
313 313
314 314 if f is not None:
315 315 try:
316 316 self.sa.delete(f)
317 317 return
318 318 except Exception:
319 319 log.error(traceback.format_exc())
320 320 raise
321 321
322 322 try:
323 323 f = UserFollowing()
324 324 f.user_id = user_id
325 325 f.follows_user_id = follow_user_id
326 326 self.sa.add(f)
327 327 except Exception:
328 328 log.error(traceback.format_exc())
329 329 raise
330 330
331 331 def is_following_repo(self, repo_name, user_id, cache=False):
332 332 r = self.sa.query(Repository)\
333 333 .filter(Repository.repo_name == repo_name).scalar()
334 334
335 335 f = self.sa.query(UserFollowing)\
336 336 .filter(UserFollowing.follows_repository == r)\
337 337 .filter(UserFollowing.user_id == user_id).scalar()
338 338
339 339 return f is not None
340 340
341 341 def is_following_user(self, username, user_id, cache=False):
342 342 u = User.get_by_username(username)
343 343
344 344 f = self.sa.query(UserFollowing)\
345 345 .filter(UserFollowing.follows_user == u)\
346 346 .filter(UserFollowing.user_id == user_id).scalar()
347 347
348 348 return f is not None
349 349
350 350 def get_followers(self, repo):
351 351 repo = self._get_repo(repo)
352 352
353 353 return self.sa.query(UserFollowing)\
354 354 .filter(UserFollowing.follows_repository == repo).count()
355 355
356 356 def get_forks(self, repo):
357 357 repo = self._get_repo(repo)
358 358 return self.sa.query(Repository)\
359 359 .filter(Repository.fork == repo).count()
360 360
361 361 def get_pull_requests(self, repo):
362 362 repo = self._get_repo(repo)
363 363 return self.sa.query(PullRequest)\
364 364 .filter(PullRequest.target_repo == repo)\
365 365 .filter(PullRequest.status != PullRequest.STATUS_CLOSED).count()
366 366
367 367 def mark_as_fork(self, repo, fork, user):
368 368 repo = self._get_repo(repo)
369 369 fork = self._get_repo(fork)
370 370 if fork and repo.repo_id == fork.repo_id:
371 371 raise Exception("Cannot set repository as fork of itself")
372 372
373 373 if fork and repo.repo_type != fork.repo_type:
374 374 raise RepositoryError(
375 375 "Cannot set repository as fork of repository with other type")
376 376
377 377 repo.fork = fork
378 378 self.sa.add(repo)
379 379 return repo
380 380
381 381 def pull_changes(self, repo, username, remote_uri=None, validate_uri=True):
382 382 dbrepo = self._get_repo(repo)
383 383 remote_uri = remote_uri or dbrepo.clone_uri
384 384 if not remote_uri:
385 385 raise Exception("This repository doesn't have a clone uri")
386 386
387 387 repo = dbrepo.scm_instance(cache=False)
388 388 repo.config.clear_section('hooks')
389 389
390 390 try:
391 391 # NOTE(marcink): add extra validation so we skip invalid urls
392 392 # this is due this tasks can be executed via scheduler without
393 393 # proper validation of remote_uri
394 394 if validate_uri:
395 395 config = make_db_config(clear_session=False)
396 396 url_validator(remote_uri, dbrepo.repo_type, config)
397 397 except InvalidCloneUrl:
398 398 raise
399 399
400 400 repo_name = dbrepo.repo_name
401 401 try:
402 402 # TODO: we need to make sure those operations call proper hooks !
403 403 repo.fetch(remote_uri)
404 404
405 405 self.mark_for_invalidation(repo_name)
406 406 except Exception:
407 407 log.error(traceback.format_exc())
408 408 raise
409 409
410 410 def push_changes(self, repo, username, remote_uri=None, validate_uri=True):
411 411 dbrepo = self._get_repo(repo)
412 412 remote_uri = remote_uri or dbrepo.push_uri
413 413 if not remote_uri:
414 414 raise Exception("This repository doesn't have a clone uri")
415 415
416 416 repo = dbrepo.scm_instance(cache=False)
417 417 repo.config.clear_section('hooks')
418 418
419 419 try:
420 420 # NOTE(marcink): add extra validation so we skip invalid urls
421 421 # this is due this tasks can be executed via scheduler without
422 422 # proper validation of remote_uri
423 423 if validate_uri:
424 424 config = make_db_config(clear_session=False)
425 425 url_validator(remote_uri, dbrepo.repo_type, config)
426 426 except InvalidCloneUrl:
427 427 raise
428 428
429 429 try:
430 430 repo.push(remote_uri)
431 431 except Exception:
432 432 log.error(traceback.format_exc())
433 433 raise
434 434
435 435 def commit_change(self, repo, repo_name, commit, user, author, message,
436 436 content, f_path):
437 437 """
438 438 Commits changes
439 439
440 440 :param repo: SCM instance
441 441
442 442 """
443 443 user = self._get_user(user)
444 444
445 445 # decoding here will force that we have proper encoded values
446 446 # in any other case this will throw exceptions and deny commit
447 447 content = safe_str(content)
448 448 path = safe_str(f_path)
449 449 # message and author needs to be unicode
450 450 # proper backend should then translate that into required type
451 451 message = safe_unicode(message)
452 452 author = safe_unicode(author)
453 453 imc = repo.in_memory_commit
454 454 imc.change(FileNode(path, content, mode=commit.get_file_mode(f_path)))
455 455 try:
456 456 # TODO: handle pre-push action !
457 457 tip = imc.commit(
458 458 message=message, author=author, parents=[commit],
459 459 branch=commit.branch)
460 460 except Exception as e:
461 461 log.error(traceback.format_exc())
462 462 raise IMCCommitError(str(e))
463 463 finally:
464 464 # always clear caches, if commit fails we want fresh object also
465 465 self.mark_for_invalidation(repo_name)
466 466
467 467 # We trigger the post-push action
468 468 hooks_utils.trigger_post_push_hook(
469 469 username=user.username, action='push_local', hook_type='post_push',
470 470 repo_name=repo_name, repo_alias=repo.alias, commit_ids=[tip.raw_id])
471 471 return tip
472 472
473 473 def _sanitize_path(self, f_path):
474 474 if f_path.startswith('/') or f_path.startswith('./') or '../' in f_path:
475 475 raise NonRelativePathError('%s is not an relative path' % f_path)
476 476 if f_path:
477 477 f_path = os.path.normpath(f_path)
478 478 return f_path
479 479
480 480 def get_dirnode_metadata(self, request, commit, dir_node):
481 481 if not dir_node.is_dir():
482 482 return []
483 483
484 484 data = []
485 485 for node in dir_node:
486 486 if not node.is_file():
487 487 # we skip file-nodes
488 488 continue
489 489
490 490 last_commit = node.last_commit
491 491 last_commit_date = last_commit.date
492 492 data.append({
493 493 'name': node.name,
494 494 'size': h.format_byte_size_binary(node.size),
495 495 'modified_at': h.format_date(last_commit_date),
496 496 'modified_ts': last_commit_date.isoformat(),
497 497 'revision': last_commit.revision,
498 498 'short_id': last_commit.short_id,
499 499 'message': h.escape(last_commit.message),
500 500 'author': h.escape(last_commit.author),
501 501 'user_profile': h.gravatar_with_user(
502 502 request, last_commit.author),
503 503 })
504 504
505 505 return data
506 506
507 507 def get_nodes(self, repo_name, commit_id, root_path='/', flat=True,
508 508 extended_info=False, content=False, max_file_bytes=None):
509 509 """
510 510 recursive walk in root dir and return a set of all path in that dir
511 511 based on repository walk function
512 512
513 513 :param repo_name: name of repository
514 514 :param commit_id: commit id for which to list nodes
515 515 :param root_path: root path to list
516 516 :param flat: return as a list, if False returns a dict with description
517 517 :param extended_info: show additional info such as md5, binary, size etc
518 518 :param content: add nodes content to the return data
519 519 :param max_file_bytes: will not return file contents over this limit
520 520
521 521 """
522 522 _files = list()
523 523 _dirs = list()
524 524 try:
525 525 _repo = self._get_repo(repo_name)
526 526 commit = _repo.scm_instance().get_commit(commit_id=commit_id)
527 527 root_path = root_path.lstrip('/')
528 528 for __, dirs, files in commit.walk(root_path):
529 529
530 530 for f in files:
531 531 _content = None
532 532 _data = f_name = f.unicode_path
533 533
534 534 if not flat:
535 535 _data = {
536 536 "name": h.escape(f_name),
537 537 "type": "file",
538 538 }
539 539 if extended_info:
540 540 _data.update({
541 541 "md5": f.md5,
542 542 "binary": f.is_binary,
543 543 "size": f.size,
544 544 "extension": f.extension,
545 545 "mimetype": f.mimetype,
546 546 "lines": f.lines()[0]
547 547 })
548 548
549 549 if content:
550 550 over_size_limit = (max_file_bytes is not None
551 551 and f.size > max_file_bytes)
552 552 full_content = None
553 553 if not f.is_binary and not over_size_limit:
554 554 full_content = safe_str(f.content)
555 555
556 556 _data.update({
557 557 "content": full_content,
558 558 })
559 559 _files.append(_data)
560 560
561 561 for d in dirs:
562 562 _data = d_name = d.unicode_path
563 563 if not flat:
564 564 _data = {
565 565 "name": h.escape(d_name),
566 566 "type": "dir",
567 567 }
568 568 if extended_info:
569 569 _data.update({
570 570 "md5": None,
571 571 "binary": None,
572 572 "size": None,
573 573 "extension": None,
574 574 })
575 575 if content:
576 576 _data.update({
577 577 "content": None
578 578 })
579 579 _dirs.append(_data)
580 580 except RepositoryError:
581 581 log.exception("Exception in get_nodes")
582 582 raise
583 583
584 584 return _dirs, _files
585 585
586 586 def get_quick_filter_nodes(self, repo_name, commit_id, root_path='/'):
587 587 """
588 588 Generate files for quick filter in files view
589 589 """
590 590
591 591 _files = list()
592 592 _dirs = list()
593 593 try:
594 594 _repo = self._get_repo(repo_name)
595 595 commit = _repo.scm_instance().get_commit(commit_id=commit_id)
596 596 root_path = root_path.lstrip('/')
597 597 for __, dirs, files in commit.walk(root_path):
598 598
599 599 for f in files:
600 600
601 601 _data = {
602 602 "name": h.escape(f.unicode_path),
603 603 "type": "file",
604 604 }
605 605
606 606 _files.append(_data)
607 607
608 608 for d in dirs:
609 609
610 610 _data = {
611 611 "name": h.escape(d.unicode_path),
612 612 "type": "dir",
613 613 }
614 614
615 615 _dirs.append(_data)
616 616 except RepositoryError:
617 617 log.exception("Exception in get_quick_filter_nodes")
618 618 raise
619 619
620 620 return _dirs, _files
621 621
622 622 def get_node(self, repo_name, commit_id, file_path,
623 623 extended_info=False, content=False, max_file_bytes=None, cache=True):
624 624 """
625 625 retrieve single node from commit
626 626 """
627 627 try:
628 628
629 629 _repo = self._get_repo(repo_name)
630 630 commit = _repo.scm_instance().get_commit(commit_id=commit_id)
631 631
632 632 file_node = commit.get_node(file_path)
633 633 if file_node.is_dir():
634 634 raise RepositoryError('The given path is a directory')
635 635
636 636 _content = None
637 637 f_name = file_node.unicode_path
638 638
639 639 file_data = {
640 640 "name": h.escape(f_name),
641 641 "type": "file",
642 642 }
643 643
644 644 if extended_info:
645 645 file_data.update({
646 646 "extension": file_node.extension,
647 647 "mimetype": file_node.mimetype,
648 648 })
649 649
650 650 if cache:
651 651 md5 = file_node.md5
652 652 is_binary = file_node.is_binary
653 653 size = file_node.size
654 654 else:
655 655 is_binary, md5, size, _content = file_node.metadata_uncached()
656 656
657 657 file_data.update({
658 658 "md5": md5,
659 659 "binary": is_binary,
660 660 "size": size,
661 661 })
662 662
663 663 if content and cache:
664 664 # get content + cache
665 665 size = file_node.size
666 666 over_size_limit = (max_file_bytes is not None and size > max_file_bytes)
667 667 full_content = None
668 all_lines = 0
668 669 if not file_node.is_binary and not over_size_limit:
669 670 full_content = safe_unicode(file_node.content)
671 all_lines, empty_lines = file_node.count_lines(full_content)
670 672
671 673 file_data.update({
672 674 "content": full_content,
675 "lines": all_lines
673 676 })
674 677 elif content:
675 678 # get content *without* cache
676 679 if _content is None:
677 680 is_binary, md5, size, _content = file_node.metadata_uncached()
678 681
679 682 over_size_limit = (max_file_bytes is not None and size > max_file_bytes)
680 683 full_content = None
684 all_lines = 0
681 685 if not is_binary and not over_size_limit:
682 686 full_content = safe_unicode(_content)
687 all_lines, empty_lines = file_node.count_lines(full_content)
683 688
684 689 file_data.update({
685 690 "content": full_content,
691 "lines": all_lines
686 692 })
687 693
688 694 except RepositoryError:
689 695 log.exception("Exception in get_node")
690 696 raise
691 697
692 698 return file_data
693 699
694 700 def get_fts_data(self, repo_name, commit_id, root_path='/'):
695 701 """
696 702 Fetch node tree for usage in full text search
697 703 """
698 704
699 705 tree_info = list()
700 706
701 707 try:
702 708 _repo = self._get_repo(repo_name)
703 709 commit = _repo.scm_instance().get_commit(commit_id=commit_id)
704 710 root_path = root_path.lstrip('/')
705 711 for __, dirs, files in commit.walk(root_path):
706 712
707 713 for f in files:
708 714 is_binary, md5, size, _content = f.metadata_uncached()
709 715 _data = {
710 716 "name": f.unicode_path,
711 717 "md5": md5,
712 718 "extension": f.extension,
713 719 "binary": is_binary,
714 720 "size": size
715 721 }
716 722
717 723 tree_info.append(_data)
718 724
719 725 except RepositoryError:
720 726 log.exception("Exception in get_nodes")
721 727 raise
722 728
723 729 return tree_info
724 730
725 731 def create_nodes(self, user, repo, message, nodes, parent_commit=None,
726 732 author=None, trigger_push_hook=True):
727 733 """
728 734 Commits given multiple nodes into repo
729 735
730 736 :param user: RhodeCode User object or user_id, the commiter
731 737 :param repo: RhodeCode Repository object
732 738 :param message: commit message
733 739 :param nodes: mapping {filename:{'content':content},...}
734 740 :param parent_commit: parent commit, can be empty than it's
735 741 initial commit
736 742 :param author: author of commit, cna be different that commiter
737 743 only for git
738 744 :param trigger_push_hook: trigger push hooks
739 745
740 746 :returns: new commited commit
741 747 """
742 748
743 749 user = self._get_user(user)
744 750 scm_instance = repo.scm_instance(cache=False)
745 751
746 752 processed_nodes = []
747 753 for f_path in nodes:
748 754 f_path = self._sanitize_path(f_path)
749 755 content = nodes[f_path]['content']
750 756 f_path = safe_str(f_path)
751 757 # decoding here will force that we have proper encoded values
752 758 # in any other case this will throw exceptions and deny commit
753 759 if isinstance(content, (basestring,)):
754 760 content = safe_str(content)
755 761 elif isinstance(content, (file, cStringIO.OutputType,)):
756 762 content = content.read()
757 763 else:
758 764 raise Exception('Content is of unrecognized type %s' % (
759 765 type(content)
760 766 ))
761 767 processed_nodes.append((f_path, content))
762 768
763 769 message = safe_unicode(message)
764 770 commiter = user.full_contact
765 771 author = safe_unicode(author) if author else commiter
766 772
767 773 imc = scm_instance.in_memory_commit
768 774
769 775 if not parent_commit:
770 776 parent_commit = EmptyCommit(alias=scm_instance.alias)
771 777
772 778 if isinstance(parent_commit, EmptyCommit):
773 779 # EmptyCommit means we we're editing empty repository
774 780 parents = None
775 781 else:
776 782 parents = [parent_commit]
777 783 # add multiple nodes
778 784 for path, content in processed_nodes:
779 785 imc.add(FileNode(path, content=content))
780 786 # TODO: handle pre push scenario
781 787 tip = imc.commit(message=message,
782 788 author=author,
783 789 parents=parents,
784 790 branch=parent_commit.branch)
785 791
786 792 self.mark_for_invalidation(repo.repo_name)
787 793 if trigger_push_hook:
788 794 hooks_utils.trigger_post_push_hook(
789 795 username=user.username, action='push_local',
790 796 repo_name=repo.repo_name, repo_alias=scm_instance.alias,
791 797 hook_type='post_push',
792 798 commit_ids=[tip.raw_id])
793 799 return tip
794 800
795 801 def update_nodes(self, user, repo, message, nodes, parent_commit=None,
796 802 author=None, trigger_push_hook=True):
797 803 user = self._get_user(user)
798 804 scm_instance = repo.scm_instance(cache=False)
799 805
800 806 message = safe_unicode(message)
801 807 commiter = user.full_contact
802 808 author = safe_unicode(author) if author else commiter
803 809
804 810 imc = scm_instance.in_memory_commit
805 811
806 812 if not parent_commit:
807 813 parent_commit = EmptyCommit(alias=scm_instance.alias)
808 814
809 815 if isinstance(parent_commit, EmptyCommit):
810 816 # EmptyCommit means we we're editing empty repository
811 817 parents = None
812 818 else:
813 819 parents = [parent_commit]
814 820
815 821 # add multiple nodes
816 822 for _filename, data in nodes.items():
817 823 # new filename, can be renamed from the old one, also sanitaze
818 824 # the path for any hack around relative paths like ../../ etc.
819 825 filename = self._sanitize_path(data['filename'])
820 826 old_filename = self._sanitize_path(_filename)
821 827 content = data['content']
822 828 file_mode = data.get('mode')
823 829 filenode = FileNode(old_filename, content=content, mode=file_mode)
824 830 op = data['op']
825 831 if op == 'add':
826 832 imc.add(filenode)
827 833 elif op == 'del':
828 834 imc.remove(filenode)
829 835 elif op == 'mod':
830 836 if filename != old_filename:
831 837 # TODO: handle renames more efficient, needs vcs lib changes
832 838 imc.remove(filenode)
833 839 imc.add(FileNode(filename, content=content, mode=file_mode))
834 840 else:
835 841 imc.change(filenode)
836 842
837 843 try:
838 844 # TODO: handle pre push scenario commit changes
839 845 tip = imc.commit(message=message,
840 846 author=author,
841 847 parents=parents,
842 848 branch=parent_commit.branch)
843 849 except NodeNotChangedError:
844 850 raise
845 851 except Exception as e:
846 852 log.exception("Unexpected exception during call to imc.commit")
847 853 raise IMCCommitError(str(e))
848 854 finally:
849 855 # always clear caches, if commit fails we want fresh object also
850 856 self.mark_for_invalidation(repo.repo_name)
851 857
852 858 if trigger_push_hook:
853 859 hooks_utils.trigger_post_push_hook(
854 860 username=user.username, action='push_local', hook_type='post_push',
855 861 repo_name=repo.repo_name, repo_alias=scm_instance.alias,
856 862 commit_ids=[tip.raw_id])
857 863
858 864 return tip
859 865
860 866 def delete_nodes(self, user, repo, message, nodes, parent_commit=None,
861 867 author=None, trigger_push_hook=True):
862 868 """
863 869 Deletes given multiple nodes into `repo`
864 870
865 871 :param user: RhodeCode User object or user_id, the committer
866 872 :param repo: RhodeCode Repository object
867 873 :param message: commit message
868 874 :param nodes: mapping {filename:{'content':content},...}
869 875 :param parent_commit: parent commit, can be empty than it's initial
870 876 commit
871 877 :param author: author of commit, cna be different that commiter only
872 878 for git
873 879 :param trigger_push_hook: trigger push hooks
874 880
875 881 :returns: new commit after deletion
876 882 """
877 883
878 884 user = self._get_user(user)
879 885 scm_instance = repo.scm_instance(cache=False)
880 886
881 887 processed_nodes = []
882 888 for f_path in nodes:
883 889 f_path = self._sanitize_path(f_path)
884 890 # content can be empty but for compatabilty it allows same dicts
885 891 # structure as add_nodes
886 892 content = nodes[f_path].get('content')
887 893 processed_nodes.append((f_path, content))
888 894
889 895 message = safe_unicode(message)
890 896 commiter = user.full_contact
891 897 author = safe_unicode(author) if author else commiter
892 898
893 899 imc = scm_instance.in_memory_commit
894 900
895 901 if not parent_commit:
896 902 parent_commit = EmptyCommit(alias=scm_instance.alias)
897 903
898 904 if isinstance(parent_commit, EmptyCommit):
899 905 # EmptyCommit means we we're editing empty repository
900 906 parents = None
901 907 else:
902 908 parents = [parent_commit]
903 909 # add multiple nodes
904 910 for path, content in processed_nodes:
905 911 imc.remove(FileNode(path, content=content))
906 912
907 913 # TODO: handle pre push scenario
908 914 tip = imc.commit(message=message,
909 915 author=author,
910 916 parents=parents,
911 917 branch=parent_commit.branch)
912 918
913 919 self.mark_for_invalidation(repo.repo_name)
914 920 if trigger_push_hook:
915 921 hooks_utils.trigger_post_push_hook(
916 922 username=user.username, action='push_local', hook_type='post_push',
917 923 repo_name=repo.repo_name, repo_alias=scm_instance.alias,
918 924 commit_ids=[tip.raw_id])
919 925 return tip
920 926
921 927 def strip(self, repo, commit_id, branch):
922 928 scm_instance = repo.scm_instance(cache=False)
923 929 scm_instance.config.clear_section('hooks')
924 930 scm_instance.strip(commit_id, branch)
925 931 self.mark_for_invalidation(repo.repo_name)
926 932
927 933 def get_unread_journal(self):
928 934 return self.sa.query(UserLog).count()
929 935
930 936 @classmethod
931 937 def backend_landing_ref(cls, repo_type):
932 938 """
933 939 Return a default landing ref based on a repository type.
934 940 """
935 941
936 942 landing_ref = {
937 943 'hg': ('branch:default', 'default'),
938 944 'git': ('branch:master', 'master'),
939 945 'svn': ('rev:tip', 'latest tip'),
940 946 'default': ('rev:tip', 'latest tip'),
941 947 }
942 948
943 949 return landing_ref.get(repo_type) or landing_ref['default']
944 950
945 951 def get_repo_landing_revs(self, translator, repo=None):
946 952 """
947 953 Generates select option with tags branches and bookmarks (for hg only)
948 954 grouped by type
949 955
950 956 :param repo:
951 957 """
952 958 _ = translator
953 959 repo = self._get_repo(repo)
954 960
955 961 if repo:
956 962 repo_type = repo.repo_type
957 963 else:
958 964 repo_type = 'default'
959 965
960 966 default_landing_ref, landing_ref_lbl = self.backend_landing_ref(repo_type)
961 967
962 968 default_ref_options = [
963 969 [default_landing_ref, landing_ref_lbl]
964 970 ]
965 971 default_choices = [
966 972 default_landing_ref
967 973 ]
968 974
969 975 if not repo:
970 976 return default_choices, default_ref_options
971 977
972 978 repo = repo.scm_instance()
973 979
974 980 ref_options = [('rev:tip', 'latest tip')]
975 981 choices = ['rev:tip']
976 982
977 983 # branches
978 984 branch_group = [(u'branch:%s' % safe_unicode(b), safe_unicode(b)) for b in repo.branches]
979 985 if not branch_group:
980 986 # new repo, or without maybe a branch?
981 987 branch_group = default_ref_options
982 988
983 989 branches_group = (branch_group, _("Branches"))
984 990 ref_options.append(branches_group)
985 991 choices.extend([x[0] for x in branches_group[0]])
986 992
987 993 # bookmarks for HG
988 994 if repo.alias == 'hg':
989 995 bookmarks_group = (
990 996 [(u'book:%s' % safe_unicode(b), safe_unicode(b))
991 997 for b in repo.bookmarks],
992 998 _("Bookmarks"))
993 999 ref_options.append(bookmarks_group)
994 1000 choices.extend([x[0] for x in bookmarks_group[0]])
995 1001
996 1002 # tags
997 1003 tags_group = (
998 1004 [(u'tag:%s' % safe_unicode(t), safe_unicode(t))
999 1005 for t in repo.tags],
1000 1006 _("Tags"))
1001 1007 ref_options.append(tags_group)
1002 1008 choices.extend([x[0] for x in tags_group[0]])
1003 1009
1004 1010 return choices, ref_options
1005 1011
1006 1012 def get_server_info(self, environ=None):
1007 1013 server_info = get_system_info(environ)
1008 1014 return server_info
General Comments 0
You need to be logged in to leave comments. Login now