##// END OF EJS Templates
Extended commit search schema with date of commit
marcink -
r2693:66c778b8 beta
parent child Browse files
Show More
@@ -1,264 +1,265 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.__init__
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Whoosh indexing module for RhodeCode
7 7
8 8 :created_on: Aug 17, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25 import os
26 26 import sys
27 27 import traceback
28 28 import logging
29 29 from os.path import dirname as dn, join as jn
30 30
31 31 #to get the rhodecode import
32 32 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
33 33
34 34 from string import strip
35 35 from shutil import rmtree
36 36
37 37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
38 38 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType
39 39 from whoosh.index import create_in, open_dir
40 40 from whoosh.formats import Characters
41 41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
42 42
43 43 from webhelpers.html.builder import escape, literal
44 44 from sqlalchemy import engine_from_config
45 45
46 46 from rhodecode.model import init_model
47 47 from rhodecode.model.scm import ScmModel
48 48 from rhodecode.model.repo import RepoModel
49 49 from rhodecode.config.environment import load_environment
50 50 from rhodecode.lib.utils2 import LazyProperty
51 51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
52 52 load_rcextensions
53 53
54 54 log = logging.getLogger(__name__)
55 55
56 56 # CUSTOM ANALYZER wordsplit + lowercase filter
57 57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
58 58
59 59 #INDEX SCHEMA DEFINITION
60 60 SCHEMA = Schema(
61 61 fileid=ID(unique=True),
62 62 owner=TEXT(),
63 63 repository=TEXT(stored=True),
64 64 path=TEXT(stored=True),
65 65 content=FieldType(format=Characters(), analyzer=ANALYZER,
66 66 scorable=True, stored=True),
67 67 modtime=STORED(),
68 68 extension=TEXT(stored=True)
69 69 )
70 70
71 71 IDX_NAME = 'HG_INDEX'
72 72 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
73 73 FRAGMENTER = ContextFragmenter(200)
74 74
75 75 CHGSETS_SCHEMA = Schema(
76 76 raw_id=ID(unique=True, stored=True),
77 date=NUMERIC(stored=True),
77 78 last=BOOLEAN(),
78 79 owner=TEXT(),
79 80 repository=ID(unique=True, stored=True),
80 81 author=TEXT(stored=True),
81 82 message=FieldType(format=Characters(), analyzer=ANALYZER,
82 83 scorable=True, stored=True),
83 84 parents=TEXT(),
84 85 added=TEXT(),
85 86 removed=TEXT(),
86 87 changed=TEXT(),
87 88 )
88 89
89 90 CHGSET_IDX_NAME = 'CHGSET_INDEX'
90 91
91 92 class MakeIndex(BasePasterCommand):
92 93
93 94 max_args = 1
94 95 min_args = 1
95 96
96 97 usage = "CONFIG_FILE"
97 98 summary = "Creates index for full text search given configuration file"
98 99 group_name = "RhodeCode"
99 100 takes_config_file = -1
100 101 parser = Command.standard_parser(verbose=True)
101 102
102 103 def command(self):
103 104 logging.config.fileConfig(self.path_to_ini_file)
104 105 from pylons import config
105 106 add_cache(config)
106 107 engine = engine_from_config(config, 'sqlalchemy.db1.')
107 108 init_model(engine)
108 109 index_location = config['index_dir']
109 110 repo_location = self.options.repo_location \
110 111 if self.options.repo_location else RepoModel().repos_path
111 112 repo_list = map(strip, self.options.repo_list.split(',')) \
112 113 if self.options.repo_list else None
113 114 repo_update_list = map(strip, self.options.repo_update_list.split(',')) \
114 115 if self.options.repo_update_list else None
115 116 load_rcextensions(config['here'])
116 117 #======================================================================
117 118 # WHOOSH DAEMON
118 119 #======================================================================
119 120 from rhodecode.lib.pidlock import LockHeld, DaemonLock
120 121 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
121 122 try:
122 123 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
123 124 WhooshIndexingDaemon(index_location=index_location,
124 125 repo_location=repo_location,
125 126 repo_list=repo_list,
126 127 repo_update_list=repo_update_list)\
127 128 .run(full_index=self.options.full_index)
128 129 l.release()
129 130 except LockHeld:
130 131 sys.exit(1)
131 132
132 133 def update_parser(self):
133 134 self.parser.add_option('--repo-location',
134 135 action='store',
135 136 dest='repo_location',
136 137 help="Specifies repositories location to index OPTIONAL",
137 138 )
138 139 self.parser.add_option('--index-only',
139 140 action='store',
140 141 dest='repo_list',
141 142 help="Specifies a comma separated list of repositores "
142 143 "to build index on. If not given all repositories "
143 144 "are scanned for indexing. OPTIONAL",
144 145 )
145 146 self.parser.add_option('--update-only',
146 147 action='store',
147 148 dest='repo_update_list',
148 149 help="Specifies a comma separated list of repositores "
149 150 "to re-build index on. OPTIONAL",
150 151 )
151 152 self.parser.add_option('-f',
152 153 action='store_true',
153 154 dest='full_index',
154 155 help="Specifies that index should be made full i.e"
155 156 " destroy old and build from scratch",
156 157 default=False)
157 158
158 159
159 160 class WhooshResultWrapper(object):
160 161 def __init__(self, search_type, searcher, matcher, highlight_items,
161 162 repo_location):
162 163 self.search_type = search_type
163 164 self.searcher = searcher
164 165 self.matcher = matcher
165 166 self.highlight_items = highlight_items
166 167 self.fragment_size = 200
167 168 self.repo_location = repo_location
168 169
169 170 @LazyProperty
170 171 def doc_ids(self):
171 172 docs_id = []
172 173 while self.matcher.is_active():
173 174 docnum = self.matcher.id()
174 175 chunks = [offsets for offsets in self.get_chunks()]
175 176 docs_id.append([docnum, chunks])
176 177 self.matcher.next()
177 178 return docs_id
178 179
179 180 def __str__(self):
180 181 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
181 182
182 183 def __repr__(self):
183 184 return self.__str__()
184 185
185 186 def __len__(self):
186 187 return len(self.doc_ids)
187 188
188 189 def __iter__(self):
189 190 """
190 191 Allows Iteration over results,and lazy generate content
191 192
192 193 *Requires* implementation of ``__getitem__`` method.
193 194 """
194 195 for docid in self.doc_ids:
195 196 yield self.get_full_content(docid)
196 197
197 198 def __getitem__(self, key):
198 199 """
199 200 Slicing of resultWrapper
200 201 """
201 202 i, j = key.start, key.stop
202 203
203 204 slices = []
204 205 for docid in self.doc_ids[i:j]:
205 206 slices.append(self.get_full_content(docid))
206 207 return slices
207 208
208 209 def get_full_content(self, docid):
209 210 res = self.searcher.stored_fields(docid[0])
210 211 log.debug('result: %s' % res)
211 212 if self.search_type == 'content':
212 213 full_repo_path = jn(self.repo_location, res['repository'])
213 214 f_path = res['path'].split(full_repo_path)[-1]
214 215 f_path = f_path.lstrip(os.sep)
215 216 content_short = self.get_short_content(res, docid[1])
216 217 res.update({'content_short': content_short,
217 218 'content_short_hl': self.highlight(content_short),
218 219 'f_path': f_path
219 220 })
220 221 elif self.search_type == 'message':
221 222 res.update({'message_hl': self.highlight(res['message'])})
222 223
223 224 log.debug('result: %s' % res)
224 225
225 226 return res
226 227
227 228 def get_short_content(self, res, chunks):
228 229
229 230 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
230 231
231 232 def get_chunks(self):
232 233 """
233 234 Smart function that implements chunking the content
234 235 but not overlap chunks so it doesn't highlight the same
235 236 close occurrences twice.
236 237
237 238 :param matcher:
238 239 :param size:
239 240 """
240 241 memory = [(0, 0)]
241 242 if self.matcher.supports('positions'):
242 243 for span in self.matcher.spans():
243 244 start = span.startchar or 0
244 245 end = span.endchar or 0
245 246 start_offseted = max(0, start - self.fragment_size)
246 247 end_offseted = end + self.fragment_size
247 248
248 249 if start_offseted < memory[-1][1]:
249 250 start_offseted = memory[-1][1]
250 251 memory.append((start_offseted, end_offseted,))
251 252 yield (start_offseted, end_offseted,)
252 253
253 254 def highlight(self, content, top=5):
254 255 if self.search_type not in ['content', 'message']:
255 256 return ''
256 257 hl = highlight(
257 258 text=content,
258 259 terms=self.highlight_items,
259 260 analyzer=ANALYZER,
260 261 fragmenter=FRAGMENTER,
261 262 formatter=FORMATTER,
262 263 top=top
263 264 )
264 265 return hl
@@ -1,412 +1,413 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.daemon
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 A daemon will read from task table and run tasks
7 7
8 8 :created_on: Jan 26, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25 from __future__ import with_statement
26 26
27 27 import os
28 28 import sys
29 29 import logging
30 30 import traceback
31 31
32 32 from shutil import rmtree
33 33 from time import mktime
34 34
35 35 from os.path import dirname as dn
36 36 from os.path import join as jn
37 37
38 38 #to get the rhodecode import
39 39 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
40 40 sys.path.append(project_path)
41 41
42 42 from rhodecode.config.conf import INDEX_EXTENSIONS
43 43 from rhodecode.model.scm import ScmModel
44 44 from rhodecode.lib.utils2 import safe_unicode
45 45 from rhodecode.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, \
46 46 CHGSET_IDX_NAME
47 47
48 48 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
49 49 NodeDoesNotExistError
50 50
51 51 from whoosh.index import create_in, open_dir, exists_in
52 52 from whoosh.query import *
53 53 from whoosh.qparser import QueryParser
54 54
55 55 log = logging.getLogger('whoosh_indexer')
56 56
57 57
58 58 class WhooshIndexingDaemon(object):
59 59 """
60 60 Daemon for atomic indexing jobs
61 61 """
62 62
63 63 def __init__(self, indexname=IDX_NAME, index_location=None,
64 64 repo_location=None, sa=None, repo_list=None,
65 65 repo_update_list=None):
66 66 self.indexname = indexname
67 67
68 68 self.index_location = index_location
69 69 if not index_location:
70 70 raise Exception('You have to provide index location')
71 71
72 72 self.repo_location = repo_location
73 73 if not repo_location:
74 74 raise Exception('You have to provide repositories location')
75 75
76 76 self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
77 77
78 78 #filter repo list
79 79 if repo_list:
80 80 self.filtered_repo_paths = {}
81 81 for repo_name, repo in self.repo_paths.items():
82 82 if repo_name in repo_list:
83 83 self.filtered_repo_paths[repo_name] = repo
84 84
85 85 self.repo_paths = self.filtered_repo_paths
86 86
87 87 #filter update repo list
88 88 self.filtered_repo_update_paths = {}
89 89 if repo_update_list:
90 90 self.filtered_repo_update_paths = {}
91 91 for repo_name, repo in self.repo_paths.items():
92 92 if repo_name in repo_update_list:
93 93 self.filtered_repo_update_paths[repo_name] = repo
94 94 self.repo_paths = self.filtered_repo_update_paths
95 95
96 96 self.initial = True
97 97 if not os.path.isdir(self.index_location):
98 98 os.makedirs(self.index_location)
99 99 log.info('Cannot run incremental index since it does not'
100 100 ' yet exist running full build')
101 101 elif not exists_in(self.index_location, IDX_NAME):
102 102 log.info('Running full index build as the file content'
103 103 ' index does not exist')
104 104 elif not exists_in(self.index_location, CHGSET_IDX_NAME):
105 105 log.info('Running full index build as the changeset'
106 106 ' index does not exist')
107 107 else:
108 108 self.initial = False
109 109
110 110 def get_paths(self, repo):
111 111 """
112 112 recursive walk in root dir and return a set of all path in that dir
113 113 based on repository walk function
114 114 """
115 115 index_paths_ = set()
116 116 try:
117 117 tip = repo.get_changeset('tip')
118 118 for _topnode, _dirs, files in tip.walk('/'):
119 119 for f in files:
120 120 index_paths_.add(jn(repo.path, f.path))
121 121
122 122 except RepositoryError:
123 123 log.debug(traceback.format_exc())
124 124 pass
125 125 return index_paths_
126 126
127 127 def get_node(self, repo, path):
128 128 n_path = path[len(repo.path) + 1:]
129 129 node = repo.get_changeset().get_node(n_path)
130 130 return node
131 131
132 132 def get_node_mtime(self, node):
133 133 return mktime(node.last_changeset.date.timetuple())
134 134
135 135 def add_doc(self, writer, path, repo, repo_name):
136 136 """
137 137 Adding doc to writer this function itself fetches data from
138 138 the instance of vcs backend
139 139 """
140 140
141 141 node = self.get_node(repo, path)
142 142 indexed = indexed_w_content = 0
143 143 # we just index the content of chosen files, and skip binary files
144 144 if node.extension in INDEX_EXTENSIONS and not node.is_binary:
145 145 u_content = node.content
146 146 if not isinstance(u_content, unicode):
147 147 log.warning(' >> %s Could not get this content as unicode '
148 148 'replacing with empty content' % path)
149 149 u_content = u''
150 150 else:
151 151 log.debug(' >> %s [WITH CONTENT]' % path)
152 152 indexed_w_content += 1
153 153
154 154 else:
155 155 log.debug(' >> %s' % path)
156 156 # just index file name without it's content
157 157 u_content = u''
158 158 indexed += 1
159 159
160 160 p = safe_unicode(path)
161 161 writer.add_document(
162 162 fileid=p,
163 163 owner=unicode(repo.contact),
164 164 repository=safe_unicode(repo_name),
165 165 path=p,
166 166 content=u_content,
167 167 modtime=self.get_node_mtime(node),
168 168 extension=node.extension
169 169 )
170 170 return indexed, indexed_w_content
171 171
172 172 def index_changesets(self, writer, repo_name, repo, start_rev=None):
173 173 """
174 174 Add all changeset in the vcs repo starting at start_rev
175 175 to the index writer
176 176
177 177 :param writer: the whoosh index writer to add to
178 178 :param repo_name: name of the repository from whence the
179 179 changeset originates including the repository group
180 180 :param repo: the vcs repository instance to index changesets for,
181 181 the presumption is the repo has changesets to index
182 182 :param start_rev=None: the full sha id to start indexing from
183 183 if start_rev is None then index from the first changeset in
184 184 the repo
185 185 """
186 186
187 187 if start_rev is None:
188 188 start_rev = repo[0].raw_id
189 189
190 190 log.debug('indexing changesets in %s starting at rev: %s' %
191 191 (repo_name, start_rev))
192 192
193 193 indexed = 0
194 194 for cs in repo.get_changesets(start=start_rev):
195 195 log.debug(' >> %s' % cs)
196 196 writer.add_document(
197 197 raw_id=unicode(cs.raw_id),
198 198 owner=unicode(repo.contact),
199 date=cs._timestamp,
199 200 repository=safe_unicode(repo_name),
200 201 author=cs.author,
201 202 message=cs.message,
202 203 last=cs.last,
203 204 added=u' '.join([node.path for node in cs.added]).lower(),
204 205 removed=u' '.join([node.path for node in cs.removed]).lower(),
205 206 changed=u' '.join([node.path for node in cs.changed]).lower(),
206 207 parents=u' '.join([cs.raw_id for cs in cs.parents]),
207 208 )
208 209 indexed += 1
209 210
210 211 log.debug('indexed %d changesets for repo %s' % (indexed, repo_name))
211 212 return indexed
212 213
213 214 def index_files(self, file_idx_writer, repo_name, repo):
214 215 """
215 216 Index files for given repo_name
216 217
217 218 :param file_idx_writer: the whoosh index writer to add to
218 219 :param repo_name: name of the repository we're indexing
219 220 :param repo: instance of vcs repo
220 221 """
221 222 i_cnt = iwc_cnt = 0
222 223 log.debug('building index for [%s]' % repo.path)
223 224 for idx_path in self.get_paths(repo):
224 225 i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name)
225 226 i_cnt += i
226 227 iwc_cnt += iwc
227 228
228 229 log.debug('added %s files %s with content for repo %s' %
229 230 (i_cnt + iwc_cnt, iwc_cnt, repo.path))
230 231 return i_cnt, iwc_cnt
231 232
232 233 def update_changeset_index(self):
233 234 idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
234 235
235 236 with idx.searcher() as searcher:
236 237 writer = idx.writer()
237 238 writer_is_dirty = False
238 239 try:
239 240 indexed_total = 0
240 241 for repo_name, repo in self.repo_paths.items():
241 242 # skip indexing if there aren't any revs in the repo
242 243 num_of_revs = len(repo)
243 244 if num_of_revs < 1:
244 245 continue
245 246
246 247 qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
247 248 q = qp.parse(u"last:t AND %s" % repo_name)
248 249
249 250 results = searcher.search(q)
250 251
251 252 # default to scanning the entire repo
252 253 last_rev = 0
253 254 start_id = None
254 255
255 256 if len(results) > 0:
256 257 # assuming that there is only one result, if not this
257 258 # may require a full re-index.
258 259 start_id = results[0]['raw_id']
259 260 last_rev = repo.get_changeset(revision=start_id).revision
260 261
261 262 # there are new changesets to index or a new repo to index
262 263 if last_rev == 0 or num_of_revs > last_rev + 1:
263 264 # delete the docs in the index for the previous
264 265 # last changeset(s)
265 266 for hit in results:
266 267 q = qp.parse(u"last:t AND %s AND raw_id:%s" %
267 268 (repo_name, hit['raw_id']))
268 269 writer.delete_by_query(q)
269 270
270 271 # index from the previous last changeset + all new ones
271 272 indexed_total += self.index_changesets(writer,
272 273 repo_name, repo, start_id)
273 274 writer_is_dirty = True
274 275 log.debug('indexed %s changesets for repo %s' % (
275 276 indexed_total, repo_name)
276 277 )
277 278 finally:
278 279 if writer_is_dirty:
279 280 log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
280 281 writer.commit(merge=True)
281 282 log.debug('>> COMMITTED CHANGES TO CHANGESET INDEX<<')
282 283 else:
283 284 writer.cancel
284 285 log.debug('>> NOTHING TO COMMIT<<')
285 286
286 287 def update_file_index(self):
287 288 log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
288 289 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys()))
289 290
290 291 idx = open_dir(self.index_location, indexname=self.indexname)
291 292 # The set of all paths in the index
292 293 indexed_paths = set()
293 294 # The set of all paths we need to re-index
294 295 to_index = set()
295 296
296 297 writer = idx.writer()
297 298 writer_is_dirty = False
298 299 try:
299 300 with idx.reader() as reader:
300 301
301 302 # Loop over the stored fields in the index
302 303 for fields in reader.all_stored_fields():
303 304 indexed_path = fields['path']
304 305 indexed_repo_path = fields['repository']
305 306 indexed_paths.add(indexed_path)
306 307
307 308 if not indexed_repo_path in self.filtered_repo_update_paths:
308 309 continue
309 310
310 311 repo = self.repo_paths[indexed_repo_path]
311 312
312 313 try:
313 314 node = self.get_node(repo, indexed_path)
314 315 # Check if this file was changed since it was indexed
315 316 indexed_time = fields['modtime']
316 317 mtime = self.get_node_mtime(node)
317 318 if mtime > indexed_time:
318 319 # The file has changed, delete it and add it to
319 320 # the list of files to reindex
320 321 log.debug(
321 322 'adding to reindex list %s mtime: %s vs %s' % (
322 323 indexed_path, mtime, indexed_time)
323 324 )
324 325 writer.delete_by_term('fileid', indexed_path)
325 326 writer_is_dirty = True
326 327
327 328 to_index.add(indexed_path)
328 329 except (ChangesetError, NodeDoesNotExistError):
329 330 # This file was deleted since it was indexed
330 331 log.debug('removing from index %s' % indexed_path)
331 332 writer.delete_by_term('path', indexed_path)
332 333 writer_is_dirty = True
333 334
334 335 # Loop over the files in the filesystem
335 336 # Assume we have a function that gathers the filenames of the
336 337 # documents to be indexed
337 338 ri_cnt_total = 0 # indexed
338 339 riwc_cnt_total = 0 # indexed with content
339 340 for repo_name, repo in self.repo_paths.items():
340 341 # skip indexing if there aren't any revisions
341 342 if len(repo) < 1:
342 343 continue
343 344 ri_cnt = 0 # indexed
344 345 riwc_cnt = 0 # indexed with content
345 346 for path in self.get_paths(repo):
346 347 path = safe_unicode(path)
347 348 if path in to_index or path not in indexed_paths:
348 349
349 350 # This is either a file that's changed, or a new file
350 351 # that wasn't indexed before. So index it!
351 352 i, iwc = self.add_doc(writer, path, repo, repo_name)
352 353 writer_is_dirty = True
353 354 log.debug('re indexing %s' % path)
354 355 ri_cnt += i
355 356 ri_cnt_total += 1
356 357 riwc_cnt += iwc
357 358 riwc_cnt_total += iwc
358 359 log.debug('added %s files %s with content for repo %s' % (
359 360 ri_cnt + riwc_cnt, riwc_cnt, repo.path)
360 361 )
361 362 log.debug('indexed %s files in total and %s with content' % (
362 363 ri_cnt_total, riwc_cnt_total)
363 364 )
364 365 finally:
365 366 if writer_is_dirty:
366 367 log.debug('>> COMMITING CHANGES <<')
367 368 writer.commit(merge=True)
368 369 log.debug('>>> FINISHED REBUILDING INDEX <<<')
369 370 else:
370 371 log.debug('>> NOTHING TO COMMIT<<')
371 372 writer.cancel()
372 373
373 374 def build_indexes(self):
374 375 if os.path.exists(self.index_location):
375 376 log.debug('removing previous index')
376 377 rmtree(self.index_location)
377 378
378 379 if not os.path.exists(self.index_location):
379 380 os.mkdir(self.index_location)
380 381
381 382 chgset_idx = create_in(self.index_location, CHGSETS_SCHEMA,
382 383 indexname=CHGSET_IDX_NAME)
383 384 chgset_idx_writer = chgset_idx.writer()
384 385
385 386 file_idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
386 387 file_idx_writer = file_idx.writer()
387 388 log.debug('BUILDING INDEX FOR EXTENSIONS %s '
388 389 'AND REPOS %s' % (INDEX_EXTENSIONS, self.repo_paths.keys()))
389 390
390 391 for repo_name, repo in self.repo_paths.items():
391 392 # skip indexing if there aren't any revisions
392 393 if len(repo) < 1:
393 394 continue
394 395
395 396 self.index_files(file_idx_writer, repo_name, repo)
396 397 self.index_changesets(chgset_idx_writer, repo_name, repo)
397 398
398 399 log.debug('>> COMMITING CHANGES <<')
399 400 file_idx_writer.commit(merge=True)
400 401 chgset_idx_writer.commit(merge=True)
401 402 log.debug('>>> FINISHED BUILDING INDEX <<<')
402 403
403 404 def update_indexes(self):
404 405 self.update_file_index()
405 406 self.update_changeset_index()
406 407
407 408 def run(self, full_index=False):
408 409 """Run daemon"""
409 410 if full_index or self.initial:
410 411 self.build_indexes()
411 412 else:
412 413 self.update_indexes()
@@ -1,468 +1,472 b''
1 1 import re
2 2 from itertools import chain
3 3 from dulwich import objects
4 4 from subprocess import Popen, PIPE
5 5 from rhodecode.lib.vcs.conf import settings
6 6 from rhodecode.lib.vcs.exceptions import RepositoryError
7 7 from rhodecode.lib.vcs.exceptions import ChangesetError
8 8 from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
9 9 from rhodecode.lib.vcs.exceptions import VCSError
10 10 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
11 11 from rhodecode.lib.vcs.exceptions import ImproperArchiveTypeError
12 12 from rhodecode.lib.vcs.backends.base import BaseChangeset
13 13 from rhodecode.lib.vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
14 14 RemovedFileNode, SubModuleNode
15 15 from rhodecode.lib.vcs.utils import safe_unicode
16 16 from rhodecode.lib.vcs.utils import date_fromtimestamp
17 17 from rhodecode.lib.vcs.utils.lazy import LazyProperty
18 18
19 19
20 20 class GitChangeset(BaseChangeset):
21 21 """
22 22 Represents state of the repository at single revision.
23 23 """
24 24
25 25 def __init__(self, repository, revision):
26 26 self._stat_modes = {}
27 27 self.repository = repository
28 28
29 29 try:
30 30 commit = self.repository._repo.get_object(revision)
31 31 if isinstance(commit, objects.Tag):
32 32 revision = commit.object[1]
33 33 commit = self.repository._repo.get_object(commit.object[1])
34 34 except KeyError:
35 35 raise RepositoryError("Cannot get object with id %s" % revision)
36 36 self.raw_id = revision
37 37 self.id = self.raw_id
38 38 self.short_id = self.raw_id[:12]
39 39 self._commit = commit
40 40
41 41 self._tree_id = commit.tree
42 42 self._commiter_property = 'committer'
43 43 self._date_property = 'commit_time'
44 44 self._date_tz_property = 'commit_timezone'
45 45 self.revision = repository.revisions.index(revision)
46 46
47 47 self.message = safe_unicode(commit.message)
48 48 #self.branch = None
49 49 self.tags = []
50 50 self.nodes = {}
51 51 self._paths = {}
52 52
53 53 @LazyProperty
54 54 def author(self):
55 55 return safe_unicode(getattr(self._commit, self._commiter_property))
56 56
57 57 @LazyProperty
58 58 def date(self):
59 59 return date_fromtimestamp(getattr(self._commit, self._date_property),
60 60 getattr(self._commit, self._date_tz_property))
61 61
62 62 @LazyProperty
63 def _timestamp(self):
64 return getattr(self._commit, self._date_property)
65
66 @LazyProperty
63 67 def status(self):
64 68 """
65 69 Returns modified, added, removed, deleted files for current changeset
66 70 """
67 71 return self.changed, self.added, self.removed
68 72
69 73 @LazyProperty
70 74 def branch(self):
71 75
72 76 heads = self.repository._heads(reverse=False)
73 77
74 78 ref = heads.get(self.raw_id)
75 79 if ref:
76 80 return safe_unicode(ref)
77 81
78 82 def _fix_path(self, path):
79 83 """
80 84 Paths are stored without trailing slash so we need to get rid off it if
81 85 needed.
82 86 """
83 87 if path.endswith('/'):
84 88 path = path.rstrip('/')
85 89 return path
86 90
87 91 def _get_id_for_path(self, path):
88 92
89 93 # FIXME: Please, spare a couple of minutes and make those codes cleaner;
90 94 if not path in self._paths:
91 95 path = path.strip('/')
92 96 # set root tree
93 97 tree = self.repository._repo[self._tree_id]
94 98 if path == '':
95 99 self._paths[''] = tree.id
96 100 return tree.id
97 101 splitted = path.split('/')
98 102 dirs, name = splitted[:-1], splitted[-1]
99 103 curdir = ''
100 104
101 105 # initially extract things from root dir
102 106 for item, stat, id in tree.iteritems():
103 107 if curdir:
104 108 name = '/'.join((curdir, item))
105 109 else:
106 110 name = item
107 111 self._paths[name] = id
108 112 self._stat_modes[name] = stat
109 113
110 114 for dir in dirs:
111 115 if curdir:
112 116 curdir = '/'.join((curdir, dir))
113 117 else:
114 118 curdir = dir
115 119 dir_id = None
116 120 for item, stat, id in tree.iteritems():
117 121 if dir == item:
118 122 dir_id = id
119 123 if dir_id:
120 124 # Update tree
121 125 tree = self.repository._repo[dir_id]
122 126 if not isinstance(tree, objects.Tree):
123 127 raise ChangesetError('%s is not a directory' % curdir)
124 128 else:
125 129 raise ChangesetError('%s have not been found' % curdir)
126 130
127 131 # cache all items from the given traversed tree
128 132 for item, stat, id in tree.iteritems():
129 133 if curdir:
130 134 name = '/'.join((curdir, item))
131 135 else:
132 136 name = item
133 137 self._paths[name] = id
134 138 self._stat_modes[name] = stat
135 139 if not path in self._paths:
136 140 raise NodeDoesNotExistError("There is no file nor directory "
137 141 "at the given path %r at revision %r"
138 142 % (path, self.short_id))
139 143 return self._paths[path]
140 144
141 145 def _get_kind(self, path):
142 146 obj = self.repository._repo[self._get_id_for_path(path)]
143 147 if isinstance(obj, objects.Blob):
144 148 return NodeKind.FILE
145 149 elif isinstance(obj, objects.Tree):
146 150 return NodeKind.DIR
147 151
148 152 def _get_file_nodes(self):
149 153 return chain(*(t[2] for t in self.walk()))
150 154
151 155 @LazyProperty
152 156 def parents(self):
153 157 """
154 158 Returns list of parents changesets.
155 159 """
156 160 return [self.repository.get_changeset(parent)
157 161 for parent in self._commit.parents]
158 162
159 163 def next(self, branch=None):
160 164
161 165 if branch and self.branch != branch:
162 166 raise VCSError('Branch option used on changeset not belonging '
163 167 'to that branch')
164 168
165 169 def _next(changeset, branch):
166 170 try:
167 171 next_ = changeset.revision + 1
168 172 next_rev = changeset.repository.revisions[next_]
169 173 except IndexError:
170 174 raise ChangesetDoesNotExistError
171 175 cs = changeset.repository.get_changeset(next_rev)
172 176
173 177 if branch and branch != cs.branch:
174 178 return _next(cs, branch)
175 179
176 180 return cs
177 181
178 182 return _next(self, branch)
179 183
180 184 def prev(self, branch=None):
181 185 if branch and self.branch != branch:
182 186 raise VCSError('Branch option used on changeset not belonging '
183 187 'to that branch')
184 188
185 189 def _prev(changeset, branch):
186 190 try:
187 191 prev_ = changeset.revision - 1
188 192 if prev_ < 0:
189 193 raise IndexError
190 194 prev_rev = changeset.repository.revisions[prev_]
191 195 except IndexError:
192 196 raise ChangesetDoesNotExistError
193 197
194 198 cs = changeset.repository.get_changeset(prev_rev)
195 199
196 200 if branch and branch != cs.branch:
197 201 return _prev(cs, branch)
198 202
199 203 return cs
200 204
201 205 return _prev(self, branch)
202 206
203 207 def diff(self, ignore_whitespace=True, context=3):
204 208 rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
205 209 rev2 = self
206 210 return ''.join(self.repository.get_diff(rev1, rev2,
207 211 ignore_whitespace=ignore_whitespace,
208 212 context=context))
209 213
210 214 def get_file_mode(self, path):
211 215 """
212 216 Returns stat mode of the file at the given ``path``.
213 217 """
214 218 # ensure path is traversed
215 219 self._get_id_for_path(path)
216 220 return self._stat_modes[path]
217 221
218 222 def get_file_content(self, path):
219 223 """
220 224 Returns content of the file at given ``path``.
221 225 """
222 226 id = self._get_id_for_path(path)
223 227 blob = self.repository._repo[id]
224 228 return blob.as_pretty_string()
225 229
226 230 def get_file_size(self, path):
227 231 """
228 232 Returns size of the file at given ``path``.
229 233 """
230 234 id = self._get_id_for_path(path)
231 235 blob = self.repository._repo[id]
232 236 return blob.raw_length()
233 237
234 238 def get_file_changeset(self, path):
235 239 """
236 240 Returns last commit of the file at the given ``path``.
237 241 """
238 242 node = self.get_node(path)
239 243 return node.history[0]
240 244
241 245 def get_file_history(self, path):
242 246 """
243 247 Returns history of file as reversed list of ``Changeset`` objects for
244 248 which file at given ``path`` has been modified.
245 249
246 250 TODO: This function now uses os underlying 'git' and 'grep' commands
247 251 which is generally not good. Should be replaced with algorithm
248 252 iterating commits.
249 253 """
250 254 cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
251 255 self.id, path
252 256 )
253 257 so, se = self.repository.run_git_command(cmd)
254 258 ids = re.findall(r'[0-9a-fA-F]{40}', so)
255 259 return [self.repository.get_changeset(id) for id in ids]
256 260
257 261 def get_file_annotate(self, path):
258 262 """
259 263 Returns a list of three element tuples with lineno,changeset and line
260 264
261 265 TODO: This function now uses os underlying 'git' command which is
262 266 generally not good. Should be replaced with algorithm iterating
263 267 commits.
264 268 """
265 269 cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
266 270 # -l ==> outputs long shas (and we need all 40 characters)
267 271 # --root ==> doesn't put '^' character for bounderies
268 272 # -r sha ==> blames for the given revision
269 273 so, se = self.repository.run_git_command(cmd)
270 274
271 275 annotate = []
272 276 for i, blame_line in enumerate(so.split('\n')[:-1]):
273 277 ln_no = i + 1
274 278 id, line = re.split(r' ', blame_line, 1)
275 279 annotate.append((ln_no, self.repository.get_changeset(id), line))
276 280 return annotate
277 281
278 282 def fill_archive(self, stream=None, kind='tgz', prefix=None,
279 283 subrepos=False):
280 284 """
281 285 Fills up given stream.
282 286
283 287 :param stream: file like object.
284 288 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
285 289 Default: ``tgz``.
286 290 :param prefix: name of root directory in archive.
287 291 Default is repository name and changeset's raw_id joined with dash
288 292 (``repo-tip.<KIND>``).
289 293 :param subrepos: include subrepos in this archive.
290 294
291 295 :raise ImproperArchiveTypeError: If given kind is wrong.
292 296 :raise VcsError: If given stream is None
293 297
294 298 """
295 299 allowed_kinds = settings.ARCHIVE_SPECS.keys()
296 300 if kind not in allowed_kinds:
297 301 raise ImproperArchiveTypeError('Archive kind not supported use one'
298 302 'of %s', allowed_kinds)
299 303
300 304 if prefix is None:
301 305 prefix = '%s-%s' % (self.repository.name, self.short_id)
302 306 elif prefix.startswith('/'):
303 307 raise VCSError("Prefix cannot start with leading slash")
304 308 elif prefix.strip() == '':
305 309 raise VCSError("Prefix cannot be empty")
306 310
307 311 if kind == 'zip':
308 312 frmt = 'zip'
309 313 else:
310 314 frmt = 'tar'
311 315 cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
312 316 self.raw_id)
313 317 if kind == 'tgz':
314 318 cmd += ' | gzip -9'
315 319 elif kind == 'tbz2':
316 320 cmd += ' | bzip2 -9'
317 321
318 322 if stream is None:
319 323 raise VCSError('You need to pass in a valid stream for filling'
320 324 ' with archival data')
321 325 popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
322 326 cwd=self.repository.path)
323 327
324 328 buffer_size = 1024 * 8
325 329 chunk = popen.stdout.read(buffer_size)
326 330 while chunk:
327 331 stream.write(chunk)
328 332 chunk = popen.stdout.read(buffer_size)
329 333 # Make sure all descriptors would be read
330 334 popen.communicate()
331 335
332 336 def get_nodes(self, path):
333 337 if self._get_kind(path) != NodeKind.DIR:
334 338 raise ChangesetError("Directory does not exist for revision %r at "
335 339 " %r" % (self.revision, path))
336 340 path = self._fix_path(path)
337 341 id = self._get_id_for_path(path)
338 342 tree = self.repository._repo[id]
339 343 dirnodes = []
340 344 filenodes = []
341 345 als = self.repository.alias
342 346 for name, stat, id in tree.iteritems():
343 347 if objects.S_ISGITLINK(stat):
344 348 dirnodes.append(SubModuleNode(name, url=None, changeset=id,
345 349 alias=als))
346 350 continue
347 351
348 352 obj = self.repository._repo.get_object(id)
349 353 if path != '':
350 354 obj_path = '/'.join((path, name))
351 355 else:
352 356 obj_path = name
353 357 if obj_path not in self._stat_modes:
354 358 self._stat_modes[obj_path] = stat
355 359 if isinstance(obj, objects.Tree):
356 360 dirnodes.append(DirNode(obj_path, changeset=self))
357 361 elif isinstance(obj, objects.Blob):
358 362 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
359 363 else:
360 364 raise ChangesetError("Requested object should be Tree "
361 365 "or Blob, is %r" % type(obj))
362 366 nodes = dirnodes + filenodes
363 367 for node in nodes:
364 368 if not node.path in self.nodes:
365 369 self.nodes[node.path] = node
366 370 nodes.sort()
367 371 return nodes
368 372
369 373 def get_node(self, path):
370 374 if isinstance(path, unicode):
371 375 path = path.encode('utf-8')
372 376 path = self._fix_path(path)
373 377 if not path in self.nodes:
374 378 try:
375 379 id_ = self._get_id_for_path(path)
376 380 except ChangesetError:
377 381 raise NodeDoesNotExistError("Cannot find one of parents' "
378 382 "directories for a given path: %s" % path)
379 383
380 384 _GL = lambda m: m and objects.S_ISGITLINK(m)
381 385 if _GL(self._stat_modes.get(path)):
382 386 node = SubModuleNode(path, url=None, changeset=id_,
383 387 alias=self.repository.alias)
384 388 else:
385 389 obj = self.repository._repo.get_object(id_)
386 390
387 391 if isinstance(obj, objects.Tree):
388 392 if path == '':
389 393 node = RootNode(changeset=self)
390 394 else:
391 395 node = DirNode(path, changeset=self)
392 396 node._tree = obj
393 397 elif isinstance(obj, objects.Blob):
394 398 node = FileNode(path, changeset=self)
395 399 node._blob = obj
396 400 else:
397 401 raise NodeDoesNotExistError("There is no file nor directory "
398 402 "at the given path %r at revision %r"
399 403 % (path, self.short_id))
400 404 # cache node
401 405 self.nodes[path] = node
402 406 return self.nodes[path]
403 407
404 408 @LazyProperty
405 409 def affected_files(self):
406 410 """
407 411 Get's a fast accessible file changes for given changeset
408 412 """
409 413
410 414 return self.added + self.changed
411 415
412 416 @LazyProperty
413 417 def _diff_name_status(self):
414 418 output = []
415 419 for parent in self.parents:
416 420 cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
417 421 so, se = self.repository.run_git_command(cmd)
418 422 output.append(so.strip())
419 423 return '\n'.join(output)
420 424
421 425 def _get_paths_for_status(self, status):
422 426 """
423 427 Returns sorted list of paths for given ``status``.
424 428
425 429 :param status: one of: *added*, *modified* or *deleted*
426 430 """
427 431 paths = set()
428 432 char = status[0].upper()
429 433 for line in self._diff_name_status.splitlines():
430 434 if not line:
431 435 continue
432 436
433 437 if line.startswith(char):
434 438 splitted = line.split(char, 1)
435 439 if not len(splitted) == 2:
436 440 raise VCSError("Couldn't parse diff result:\n%s\n\n and "
437 441 "particularly that line: %s" % (self._diff_name_status,
438 442 line))
439 443 _path = splitted[1].strip()
440 444 paths.add(_path)
441 445 return sorted(paths)
442 446
443 447 @LazyProperty
444 448 def added(self):
445 449 """
446 450 Returns list of added ``FileNode`` objects.
447 451 """
448 452 if not self.parents:
449 453 return list(self._get_file_nodes())
450 454 return [self.get_node(path) for path in self._get_paths_for_status('added')]
451 455
452 456 @LazyProperty
453 457 def changed(self):
454 458 """
455 459 Returns list of modified ``FileNode`` objects.
456 460 """
457 461 if not self.parents:
458 462 return []
459 463 return [self.get_node(path) for path in self._get_paths_for_status('modified')]
460 464
461 465 @LazyProperty
462 466 def removed(self):
463 467 """
464 468 Returns list of removed ``FileNode`` objects.
465 469 """
466 470 if not self.parents:
467 471 return []
468 472 return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
@@ -1,361 +1,365 b''
1 1 import os
2 2 import posixpath
3 3
4 4 from rhodecode.lib.vcs.backends.base import BaseChangeset
5 5 from rhodecode.lib.vcs.conf import settings
6 6 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError, \
7 7 ChangesetError, ImproperArchiveTypeError, NodeDoesNotExistError, VCSError
8 8 from rhodecode.lib.vcs.nodes import AddedFileNodesGenerator, \
9 9 ChangedFileNodesGenerator, DirNode, FileNode, NodeKind, \
10 10 RemovedFileNodesGenerator, RootNode, SubModuleNode
11 11
12 12 from rhodecode.lib.vcs.utils import safe_str, safe_unicode, date_fromtimestamp
13 13 from rhodecode.lib.vcs.utils.lazy import LazyProperty
14 14 from rhodecode.lib.vcs.utils.paths import get_dirs_for_path
15 15 from rhodecode.lib.vcs.utils.hgcompat import archival, hex
16 16
17 17
18 18 class MercurialChangeset(BaseChangeset):
19 19 """
20 20 Represents state of the repository at the single revision.
21 21 """
22 22
23 23 def __init__(self, repository, revision):
24 24 self.repository = repository
25 25 self.raw_id = revision
26 26 self._ctx = repository._repo[revision]
27 27 self.revision = self._ctx._rev
28 28 self.nodes = {}
29 29
30 30 @LazyProperty
31 31 def tags(self):
32 32 return map(safe_unicode, self._ctx.tags())
33 33
34 34 @LazyProperty
35 35 def branch(self):
36 36 return safe_unicode(self._ctx.branch())
37 37
38 38 @LazyProperty
39 39 def bookmarks(self):
40 40 return map(safe_unicode, self._ctx.bookmarks())
41 41
42 42 @LazyProperty
43 43 def message(self):
44 44 return safe_unicode(self._ctx.description())
45 45
46 46 @LazyProperty
47 47 def author(self):
48 48 return safe_unicode(self._ctx.user())
49 49
50 50 @LazyProperty
51 51 def date(self):
52 52 return date_fromtimestamp(*self._ctx.date())
53 53
54 54 @LazyProperty
55 def _timestamp(self):
56 return self._ctx.date()[0]
57
58 @LazyProperty
55 59 def status(self):
56 60 """
57 61 Returns modified, added, removed, deleted files for current changeset
58 62 """
59 63 return self.repository._repo.status(self._ctx.p1().node(),
60 64 self._ctx.node())
61 65
62 66 @LazyProperty
63 67 def _file_paths(self):
64 68 return list(self._ctx)
65 69
66 70 @LazyProperty
67 71 def _dir_paths(self):
68 72 p = list(set(get_dirs_for_path(*self._file_paths)))
69 73 p.insert(0, '')
70 74 return p
71 75
72 76 @LazyProperty
73 77 def _paths(self):
74 78 return self._dir_paths + self._file_paths
75 79
76 80 @LazyProperty
77 81 def id(self):
78 82 if self.last:
79 83 return u'tip'
80 84 return self.short_id
81 85
82 86 @LazyProperty
83 87 def short_id(self):
84 88 return self.raw_id[:12]
85 89
86 90 @LazyProperty
87 91 def parents(self):
88 92 """
89 93 Returns list of parents changesets.
90 94 """
91 95 return [self.repository.get_changeset(parent.rev())
92 96 for parent in self._ctx.parents() if parent.rev() >= 0]
93 97
94 98 def next(self, branch=None):
95 99
96 100 if branch and self.branch != branch:
97 101 raise VCSError('Branch option used on changeset not belonging '
98 102 'to that branch')
99 103
100 104 def _next(changeset, branch):
101 105 try:
102 106 next_ = changeset.revision + 1
103 107 next_rev = changeset.repository.revisions[next_]
104 108 except IndexError:
105 109 raise ChangesetDoesNotExistError
106 110 cs = changeset.repository.get_changeset(next_rev)
107 111
108 112 if branch and branch != cs.branch:
109 113 return _next(cs, branch)
110 114
111 115 return cs
112 116
113 117 return _next(self, branch)
114 118
115 119 def prev(self, branch=None):
116 120 if branch and self.branch != branch:
117 121 raise VCSError('Branch option used on changeset not belonging '
118 122 'to that branch')
119 123
120 124 def _prev(changeset, branch):
121 125 try:
122 126 prev_ = changeset.revision - 1
123 127 if prev_ < 0:
124 128 raise IndexError
125 129 prev_rev = changeset.repository.revisions[prev_]
126 130 except IndexError:
127 131 raise ChangesetDoesNotExistError
128 132
129 133 cs = changeset.repository.get_changeset(prev_rev)
130 134
131 135 if branch and branch != cs.branch:
132 136 return _prev(cs, branch)
133 137
134 138 return cs
135 139
136 140 return _prev(self, branch)
137 141
138 142 def diff(self, ignore_whitespace=True, context=3):
139 143 return ''.join(self._ctx.diff(git=True,
140 144 ignore_whitespace=ignore_whitespace,
141 145 context=context))
142 146
143 147 def _fix_path(self, path):
144 148 """
145 149 Paths are stored without trailing slash so we need to get rid off it if
146 150 needed. Also mercurial keeps filenodes as str so we need to decode
147 151 from unicode to str
148 152 """
149 153 if path.endswith('/'):
150 154 path = path.rstrip('/')
151 155
152 156 return safe_str(path)
153 157
154 158 def _get_kind(self, path):
155 159 path = self._fix_path(path)
156 160 if path in self._file_paths:
157 161 return NodeKind.FILE
158 162 elif path in self._dir_paths:
159 163 return NodeKind.DIR
160 164 else:
161 165 raise ChangesetError("Node does not exist at the given path %r"
162 166 % (path))
163 167
164 168 def _get_filectx(self, path):
165 169 path = self._fix_path(path)
166 170 if self._get_kind(path) != NodeKind.FILE:
167 171 raise ChangesetError("File does not exist for revision %r at "
168 172 " %r" % (self.revision, path))
169 173 return self._ctx.filectx(path)
170 174
171 175 def _extract_submodules(self):
172 176 """
173 177 returns a dictionary with submodule information from substate file
174 178 of hg repository
175 179 """
176 180 return self._ctx.substate
177 181
178 182 def get_file_mode(self, path):
179 183 """
180 184 Returns stat mode of the file at the given ``path``.
181 185 """
182 186 fctx = self._get_filectx(path)
183 187 if 'x' in fctx.flags():
184 188 return 0100755
185 189 else:
186 190 return 0100644
187 191
188 192 def get_file_content(self, path):
189 193 """
190 194 Returns content of the file at given ``path``.
191 195 """
192 196 fctx = self._get_filectx(path)
193 197 return fctx.data()
194 198
195 199 def get_file_size(self, path):
196 200 """
197 201 Returns size of the file at given ``path``.
198 202 """
199 203 fctx = self._get_filectx(path)
200 204 return fctx.size()
201 205
202 206 def get_file_changeset(self, path):
203 207 """
204 208 Returns last commit of the file at the given ``path``.
205 209 """
206 210 node = self.get_node(path)
207 211 return node.history[0]
208 212
209 213 def get_file_history(self, path):
210 214 """
211 215 Returns history of file as reversed list of ``Changeset`` objects for
212 216 which file at given ``path`` has been modified.
213 217 """
214 218 fctx = self._get_filectx(path)
215 219 nodes = [fctx.filectx(x).node() for x in fctx.filelog()]
216 220 changesets = [self.repository.get_changeset(hex(node))
217 221 for node in reversed(nodes)]
218 222 return changesets
219 223
220 224 def get_file_annotate(self, path):
221 225 """
222 226 Returns a list of three element tuples with lineno,changeset and line
223 227 """
224 228 fctx = self._get_filectx(path)
225 229 annotate = []
226 230 for i, annotate_data in enumerate(fctx.annotate()):
227 231 ln_no = i + 1
228 232 annotate.append((ln_no, self.repository\
229 233 .get_changeset(hex(annotate_data[0].node())),
230 234 annotate_data[1],))
231 235
232 236 return annotate
233 237
234 238 def fill_archive(self, stream=None, kind='tgz', prefix=None,
235 239 subrepos=False):
236 240 """
237 241 Fills up given stream.
238 242
239 243 :param stream: file like object.
240 244 :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
241 245 Default: ``tgz``.
242 246 :param prefix: name of root directory in archive.
243 247 Default is repository name and changeset's raw_id joined with dash
244 248 (``repo-tip.<KIND>``).
245 249 :param subrepos: include subrepos in this archive.
246 250
247 251 :raise ImproperArchiveTypeError: If given kind is wrong.
248 252 :raise VcsError: If given stream is None
249 253 """
250 254
251 255 allowed_kinds = settings.ARCHIVE_SPECS.keys()
252 256 if kind not in allowed_kinds:
253 257 raise ImproperArchiveTypeError('Archive kind not supported use one'
254 258 'of %s', allowed_kinds)
255 259
256 260 if stream is None:
257 261 raise VCSError('You need to pass in a valid stream for filling'
258 262 ' with archival data')
259 263
260 264 if prefix is None:
261 265 prefix = '%s-%s' % (self.repository.name, self.short_id)
262 266 elif prefix.startswith('/'):
263 267 raise VCSError("Prefix cannot start with leading slash")
264 268 elif prefix.strip() == '':
265 269 raise VCSError("Prefix cannot be empty")
266 270
267 271 archival.archive(self.repository._repo, stream, self.raw_id,
268 272 kind, prefix=prefix, subrepos=subrepos)
269 273
270 274 if stream.closed and hasattr(stream, 'name'):
271 275 stream = open(stream.name, 'rb')
272 276 elif hasattr(stream, 'mode') and 'r' not in stream.mode:
273 277 stream = open(stream.name, 'rb')
274 278 else:
275 279 stream.seek(0)
276 280
277 281 def get_nodes(self, path):
278 282 """
279 283 Returns combined ``DirNode`` and ``FileNode`` objects list representing
280 284 state of changeset at the given ``path``. If node at the given ``path``
281 285 is not instance of ``DirNode``, ChangesetError would be raised.
282 286 """
283 287
284 288 if self._get_kind(path) != NodeKind.DIR:
285 289 raise ChangesetError("Directory does not exist for revision %r at "
286 290 " %r" % (self.revision, path))
287 291 path = self._fix_path(path)
288 292
289 293 filenodes = [FileNode(f, changeset=self) for f in self._file_paths
290 294 if os.path.dirname(f) == path]
291 295 dirs = path == '' and '' or [d for d in self._dir_paths
292 296 if d and posixpath.dirname(d) == path]
293 297 dirnodes = [DirNode(d, changeset=self) for d in dirs
294 298 if os.path.dirname(d) == path]
295 299
296 300 als = self.repository.alias
297 301 for k, vals in self._extract_submodules().iteritems():
298 302 #vals = url,rev,type
299 303 loc = vals[0]
300 304 cs = vals[1]
301 305 dirnodes.append(SubModuleNode(k, url=loc, changeset=cs,
302 306 alias=als))
303 307 nodes = dirnodes + filenodes
304 308 # cache nodes
305 309 for node in nodes:
306 310 self.nodes[node.path] = node
307 311 nodes.sort()
308 312
309 313 return nodes
310 314
311 315 def get_node(self, path):
312 316 """
313 317 Returns ``Node`` object from the given ``path``. If there is no node at
314 318 the given ``path``, ``ChangesetError`` would be raised.
315 319 """
316 320
317 321 path = self._fix_path(path)
318 322
319 323 if not path in self.nodes:
320 324 if path in self._file_paths:
321 325 node = FileNode(path, changeset=self)
322 326 elif path in self._dir_paths or path in self._dir_paths:
323 327 if path == '':
324 328 node = RootNode(changeset=self)
325 329 else:
326 330 node = DirNode(path, changeset=self)
327 331 else:
328 332 raise NodeDoesNotExistError("There is no file nor directory "
329 333 "at the given path: %r at revision %r"
330 334 % (path, self.short_id))
331 335 # cache node
332 336 self.nodes[path] = node
333 337 return self.nodes[path]
334 338
335 339 @LazyProperty
336 340 def affected_files(self):
337 341 """
338 342 Get's a fast accessible file changes for given changeset
339 343 """
340 344 return self._ctx.files()
341 345
342 346 @property
343 347 def added(self):
344 348 """
345 349 Returns list of added ``FileNode`` objects.
346 350 """
347 351 return AddedFileNodesGenerator([n for n in self.status[1]], self)
348 352
349 353 @property
350 354 def changed(self):
351 355 """
352 356 Returns list of modified ``FileNode`` objects.
353 357 """
354 358 return ChangedFileNodesGenerator([n for n in self.status[0]], self)
355 359
356 360 @property
357 361 def removed(self):
358 362 """
359 363 Returns list of removed ``FileNode`` objects.
360 364 """
361 365 return RemovedFileNodesGenerator([n for n in self.status[2]], self)
General Comments 0
You need to be logged in to leave comments. Login now