##// END OF EJS Templates
manifest: add dirs() to manifestdict...
Drew Gottlieb -
r24322:f263814c default
parent child Browse files
Show More
@@ -1,395 +1,403 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 import mdiff, parsers, error, revlog, util
9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 propertycache = util.propertycache
12 13
13 14 class _lazymanifest(dict):
14 15 """This is the pure implementation of lazymanifest.
15 16
16 17 It has not been optimized *at all* and is not lazy.
17 18 """
18 19
19 20 def __init__(self, data):
20 21 # This init method does a little bit of excessive-looking
21 22 # precondition checking. This is so that the behavior of this
22 23 # class exactly matches its C counterpart to try and help
23 24 # prevent surprise breakage for anyone that develops against
24 25 # the pure version.
25 26 if data and data[-1] != '\n':
26 27 raise ValueError('Manifest did not end in a newline.')
27 28 dict.__init__(self)
28 29 prev = None
29 30 for l in data.splitlines():
30 31 if prev is not None and prev > l:
31 32 raise ValueError('Manifest lines not in sorted order.')
32 33 prev = l
33 34 f, n = l.split('\0')
34 35 if len(n) > 40:
35 36 self[f] = revlog.bin(n[:40]), n[40:]
36 37 else:
37 38 self[f] = revlog.bin(n), ''
38 39
39 40 def __setitem__(self, k, v):
40 41 node, flag = v
41 42 assert node is not None
42 43 if len(node) > 21:
43 44 node = node[:21] # match c implementation behavior
44 45 dict.__setitem__(self, k, (node, flag))
45 46
46 47 def __iter__(self):
47 48 return iter(sorted(dict.keys(self)))
48 49
49 50 def iterkeys(self):
50 51 return iter(sorted(dict.keys(self)))
51 52
52 53 def iterentries(self):
53 54 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
54 55
55 56 def copy(self):
56 57 c = _lazymanifest('')
57 58 c.update(self)
58 59 return c
59 60
60 61 def diff(self, m2, clean=False):
61 62 '''Finds changes between the current manifest and m2.'''
62 63 diff = {}
63 64
64 65 for fn, e1 in self.iteritems():
65 66 if fn not in m2:
66 67 diff[fn] = e1, (None, '')
67 68 else:
68 69 e2 = m2[fn]
69 70 if e1 != e2:
70 71 diff[fn] = e1, e2
71 72 elif clean:
72 73 diff[fn] = None
73 74
74 75 for fn, e2 in m2.iteritems():
75 76 if fn not in self:
76 77 diff[fn] = (None, ''), e2
77 78
78 79 return diff
79 80
80 81 def filtercopy(self, filterfn):
81 82 c = _lazymanifest('')
82 83 for f, n, fl in self.iterentries():
83 84 if filterfn(f):
84 85 c[f] = n, fl
85 86 return c
86 87
87 88 def text(self):
88 89 """Get the full data of this manifest as a bytestring."""
89 90 fl = sorted(self.iterentries())
90 91
91 92 _hex = revlog.hex
92 93 # if this is changed to support newlines in filenames,
93 94 # be sure to check the templates/ dir again (especially *-raw.tmpl)
94 95 return ''.join("%s\0%s%s\n" % (
95 96 f, _hex(n[:20]), flag) for f, n, flag in fl)
96 97
97 98 try:
98 99 _lazymanifest = parsers.lazymanifest
99 100 except AttributeError:
100 101 pass
101 102
102 103 class manifestdict(object):
103 104 def __init__(self, data=''):
104 105 self._lm = _lazymanifest(data)
105 106
106 107 def __getitem__(self, key):
107 108 return self._lm[key][0]
108 109
109 110 def find(self, key):
110 111 return self._lm[key]
111 112
112 113 def __len__(self):
113 114 return len(self._lm)
114 115
115 116 def __setitem__(self, key, node):
116 117 self._lm[key] = node, self.flags(key, '')
117 118
118 119 def __contains__(self, key):
119 120 return key in self._lm
120 121
121 122 def __delitem__(self, key):
122 123 del self._lm[key]
123 124
124 125 def __iter__(self):
125 126 return self._lm.__iter__()
126 127
127 128 def iterkeys(self):
128 129 return self._lm.iterkeys()
129 130
130 131 def keys(self):
131 132 return list(self.iterkeys())
132 133
133 134 def intersectfiles(self, files):
134 135 '''make a new lazymanifest with the intersection of self with files
135 136
136 137 The algorithm assumes that files is much smaller than self.'''
137 138 ret = manifestdict()
138 139 lm = self._lm
139 140 for fn in files:
140 141 if fn in lm:
141 142 ret._lm[fn] = self._lm[fn]
142 143 return ret
143 144
144 145 def filesnotin(self, m2):
145 146 '''Set of files in this manifest that are not in the other'''
146 147 files = set(self)
147 148 files.difference_update(m2)
148 149 return files
149 150
151 @propertycache
152 def _dirs(self):
153 return scmutil.dirs(self)
154
155 def dirs(self):
156 return self._dirs
157
150 158 def matches(self, match):
151 159 '''generate a new manifest filtered by the match argument'''
152 160 if match.always():
153 161 return self.copy()
154 162
155 163 files = match.files()
156 164 if (match.matchfn == match.exact or
157 165 (not match.anypats() and util.all(fn in self for fn in files))):
158 166 return self.intersectfiles(files)
159 167
160 168 lm = manifestdict('')
161 169 lm._lm = self._lm.filtercopy(match)
162 170 return lm
163 171
164 172 def diff(self, m2, clean=False):
165 173 '''Finds changes between the current manifest and m2.
166 174
167 175 Args:
168 176 m2: the manifest to which this manifest should be compared.
169 177 clean: if true, include files unchanged between these manifests
170 178 with a None value in the returned dictionary.
171 179
172 180 The result is returned as a dict with filename as key and
173 181 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
174 182 nodeid in the current/other manifest and fl1/fl2 is the flag
175 183 in the current/other manifest. Where the file does not exist,
176 184 the nodeid will be None and the flags will be the empty
177 185 string.
178 186 '''
179 187 return self._lm.diff(m2._lm, clean)
180 188
181 189 def setflag(self, key, flag):
182 190 self._lm[key] = self[key], flag
183 191
184 192 def get(self, key, default=None):
185 193 try:
186 194 return self._lm[key][0]
187 195 except KeyError:
188 196 return default
189 197
190 198 def flags(self, key, default=''):
191 199 try:
192 200 return self._lm[key][1]
193 201 except KeyError:
194 202 return default
195 203
196 204 def copy(self):
197 205 c = manifestdict('')
198 206 c._lm = self._lm.copy()
199 207 return c
200 208
201 209 def iteritems(self):
202 210 return (x[:2] for x in self._lm.iterentries())
203 211
204 212 def text(self):
205 213 return self._lm.text()
206 214
207 215 def fastdelta(self, base, changes):
208 216 """Given a base manifest text as an array.array and a list of changes
209 217 relative to that text, compute a delta that can be used by revlog.
210 218 """
211 219 delta = []
212 220 dstart = None
213 221 dend = None
214 222 dline = [""]
215 223 start = 0
216 224 # zero copy representation of base as a buffer
217 225 addbuf = util.buffer(base)
218 226
219 227 # start with a readonly loop that finds the offset of
220 228 # each line and creates the deltas
221 229 for f, todelete in changes:
222 230 # bs will either be the index of the item or the insert point
223 231 start, end = _msearch(addbuf, f, start)
224 232 if not todelete:
225 233 h, fl = self._lm[f]
226 234 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
227 235 else:
228 236 if start == end:
229 237 # item we want to delete was not found, error out
230 238 raise AssertionError(
231 239 _("failed to remove %s from manifest") % f)
232 240 l = ""
233 241 if dstart is not None and dstart <= start and dend >= start:
234 242 if dend < end:
235 243 dend = end
236 244 if l:
237 245 dline.append(l)
238 246 else:
239 247 if dstart is not None:
240 248 delta.append([dstart, dend, "".join(dline)])
241 249 dstart = start
242 250 dend = end
243 251 dline = [l]
244 252
245 253 if dstart is not None:
246 254 delta.append([dstart, dend, "".join(dline)])
247 255 # apply the delta to the base, and get a delta for addrevision
248 256 deltatext, arraytext = _addlistdelta(base, delta)
249 257 return arraytext, deltatext
250 258
251 259 def _msearch(m, s, lo=0, hi=None):
252 260 '''return a tuple (start, end) that says where to find s within m.
253 261
254 262 If the string is found m[start:end] are the line containing
255 263 that string. If start == end the string was not found and
256 264 they indicate the proper sorted insertion point.
257 265
258 266 m should be a buffer or a string
259 267 s is a string'''
260 268 def advance(i, c):
261 269 while i < lenm and m[i] != c:
262 270 i += 1
263 271 return i
264 272 if not s:
265 273 return (lo, lo)
266 274 lenm = len(m)
267 275 if not hi:
268 276 hi = lenm
269 277 while lo < hi:
270 278 mid = (lo + hi) // 2
271 279 start = mid
272 280 while start > 0 and m[start - 1] != '\n':
273 281 start -= 1
274 282 end = advance(start, '\0')
275 283 if m[start:end] < s:
276 284 # we know that after the null there are 40 bytes of sha1
277 285 # this translates to the bisect lo = mid + 1
278 286 lo = advance(end + 40, '\n') + 1
279 287 else:
280 288 # this translates to the bisect hi = mid
281 289 hi = start
282 290 end = advance(lo, '\0')
283 291 found = m[lo:end]
284 292 if s == found:
285 293 # we know that after the null there are 40 bytes of sha1
286 294 end = advance(end + 40, '\n')
287 295 return (lo, end + 1)
288 296 else:
289 297 return (lo, lo)
290 298
291 299 def _checkforbidden(l):
292 300 """Check filenames for illegal characters."""
293 301 for f in l:
294 302 if '\n' in f or '\r' in f:
295 303 raise error.RevlogError(
296 304 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
297 305
298 306
299 307 # apply the changes collected during the bisect loop to our addlist
300 308 # return a delta suitable for addrevision
301 309 def _addlistdelta(addlist, x):
302 310 # for large addlist arrays, building a new array is cheaper
303 311 # than repeatedly modifying the existing one
304 312 currentposition = 0
305 313 newaddlist = array.array('c')
306 314
307 315 for start, end, content in x:
308 316 newaddlist += addlist[currentposition:start]
309 317 if content:
310 318 newaddlist += array.array('c', content)
311 319
312 320 currentposition = end
313 321
314 322 newaddlist += addlist[currentposition:]
315 323
316 324 deltatext = "".join(struct.pack(">lll", start, end, len(content))
317 325 + content for start, end, content in x)
318 326 return deltatext, newaddlist
319 327
320 328 class manifest(revlog.revlog):
321 329 def __init__(self, opener):
322 330 # During normal operations, we expect to deal with not more than four
323 331 # revs at a time (such as during commit --amend). When rebasing large
324 332 # stacks of commits, the number can go up, hence the config knob below.
325 333 cachesize = 4
326 334 opts = getattr(opener, 'options', None)
327 335 if opts is not None:
328 336 cachesize = opts.get('manifestcachesize', cachesize)
329 337 self._mancache = util.lrucachedict(cachesize)
330 338 revlog.revlog.__init__(self, opener, "00manifest.i")
331 339
332 340 def readdelta(self, node):
333 341 r = self.rev(node)
334 342 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
335 343 return manifestdict(d)
336 344
337 345 def readfast(self, node):
338 346 '''use the faster of readdelta or read'''
339 347 r = self.rev(node)
340 348 deltaparent = self.deltaparent(r)
341 349 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
342 350 return self.readdelta(node)
343 351 return self.read(node)
344 352
345 353 def read(self, node):
346 354 if node == revlog.nullid:
347 355 return manifestdict() # don't upset local cache
348 356 if node in self._mancache:
349 357 return self._mancache[node][0]
350 358 text = self.revision(node)
351 359 arraytext = array.array('c', text)
352 360 m = manifestdict(text)
353 361 self._mancache[node] = (m, arraytext)
354 362 return m
355 363
356 364 def find(self, node, f):
357 365 '''look up entry for a single file efficiently.
358 366 return (node, flags) pair if found, (None, None) if not.'''
359 367 m = self.read(node)
360 368 try:
361 369 return m.find(f)
362 370 except KeyError:
363 371 return None, None
364 372
365 373 def add(self, m, transaction, link, p1, p2, added, removed):
366 374 if p1 in self._mancache:
367 375 # If our first parent is in the manifest cache, we can
368 376 # compute a delta here using properties we know about the
369 377 # manifest up-front, which may save time later for the
370 378 # revlog layer.
371 379
372 380 _checkforbidden(added)
373 381 # combine the changed lists into one list for sorting
374 382 work = [(x, False) for x in added]
375 383 work.extend((x, True) for x in removed)
376 384 # this could use heapq.merge() (from Python 2.6+) or equivalent
377 385 # since the lists are already sorted
378 386 work.sort()
379 387
380 388 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
381 389 cachedelta = self.rev(p1), deltatext
382 390 text = util.buffer(arraytext)
383 391 else:
384 392 # The first parent manifest isn't already loaded, so we'll
385 393 # just encode a fulltext of the manifest and pass that
386 394 # through to the revlog layer, and let it handle the delta
387 395 # process.
388 396 text = m.text()
389 397 arraytext = array.array('c', text)
390 398 cachedelta = None
391 399
392 400 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
393 401 self._mancache[n] = (m, arraytext)
394 402
395 403 return n
General Comments 0
You need to be logged in to leave comments. Login now