##// END OF EJS Templates
treemanifest: create treemanifest class...
Martin von Zweigbergk -
r24401:e6e023d5 default
parent child Browse files
Show More
@@ -1,406 +1,628 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 12 propertycache = util.propertycache
13 13
14 14 class _lazymanifest(dict):
15 15 """This is the pure implementation of lazymanifest.
16 16
17 17 It has not been optimized *at all* and is not lazy.
18 18 """
19 19
20 20 def __init__(self, data):
21 21 # This init method does a little bit of excessive-looking
22 22 # precondition checking. This is so that the behavior of this
23 23 # class exactly matches its C counterpart to try and help
24 24 # prevent surprise breakage for anyone that develops against
25 25 # the pure version.
26 26 if data and data[-1] != '\n':
27 27 raise ValueError('Manifest did not end in a newline.')
28 28 dict.__init__(self)
29 29 prev = None
30 30 for l in data.splitlines():
31 31 if prev is not None and prev > l:
32 32 raise ValueError('Manifest lines not in sorted order.')
33 33 prev = l
34 34 f, n = l.split('\0')
35 35 if len(n) > 40:
36 36 self[f] = revlog.bin(n[:40]), n[40:]
37 37 else:
38 38 self[f] = revlog.bin(n), ''
39 39
40 40 def __setitem__(self, k, v):
41 41 node, flag = v
42 42 assert node is not None
43 43 if len(node) > 21:
44 44 node = node[:21] # match c implementation behavior
45 45 dict.__setitem__(self, k, (node, flag))
46 46
47 47 def __iter__(self):
48 48 return iter(sorted(dict.keys(self)))
49 49
50 50 def iterkeys(self):
51 51 return iter(sorted(dict.keys(self)))
52 52
53 53 def iterentries(self):
54 54 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
55 55
56 56 def copy(self):
57 57 c = _lazymanifest('')
58 58 c.update(self)
59 59 return c
60 60
61 61 def diff(self, m2, clean=False):
62 62 '''Finds changes between the current manifest and m2.'''
63 63 diff = {}
64 64
65 65 for fn, e1 in self.iteritems():
66 66 if fn not in m2:
67 67 diff[fn] = e1, (None, '')
68 68 else:
69 69 e2 = m2[fn]
70 70 if e1 != e2:
71 71 diff[fn] = e1, e2
72 72 elif clean:
73 73 diff[fn] = None
74 74
75 75 for fn, e2 in m2.iteritems():
76 76 if fn not in self:
77 77 diff[fn] = (None, ''), e2
78 78
79 79 return diff
80 80
81 81 def filtercopy(self, filterfn):
82 82 c = _lazymanifest('')
83 83 for f, n, fl in self.iterentries():
84 84 if filterfn(f):
85 85 c[f] = n, fl
86 86 return c
87 87
88 88 def text(self):
89 89 """Get the full data of this manifest as a bytestring."""
90 90 fl = sorted(self.iterentries())
91 91
92 92 _hex = revlog.hex
93 93 # if this is changed to support newlines in filenames,
94 94 # be sure to check the templates/ dir again (especially *-raw.tmpl)
95 95 return ''.join("%s\0%s%s\n" % (
96 96 f, _hex(n[:20]), flag) for f, n, flag in fl)
97 97
98 98 try:
99 99 _lazymanifest = parsers.lazymanifest
100 100 except AttributeError:
101 101 pass
102 102
103 103 class manifestdict(object):
104 104 def __init__(self, data=''):
105 105 self._lm = _lazymanifest(data)
106 106
107 107 def __getitem__(self, key):
108 108 return self._lm[key][0]
109 109
110 110 def find(self, key):
111 111 return self._lm[key]
112 112
113 113 def __len__(self):
114 114 return len(self._lm)
115 115
116 116 def __setitem__(self, key, node):
117 117 self._lm[key] = node, self.flags(key, '')
118 118
119 119 def __contains__(self, key):
120 120 return key in self._lm
121 121
122 122 def __delitem__(self, key):
123 123 del self._lm[key]
124 124
125 125 def __iter__(self):
126 126 return self._lm.__iter__()
127 127
128 128 def iterkeys(self):
129 129 return self._lm.iterkeys()
130 130
131 131 def keys(self):
132 132 return list(self.iterkeys())
133 133
134 134 def intersectfiles(self, files):
135 135 '''make a new lazymanifest with the intersection of self with files
136 136
137 137 The algorithm assumes that files is much smaller than self.'''
138 138 ret = manifestdict()
139 139 lm = self._lm
140 140 for fn in files:
141 141 if fn in lm:
142 142 ret._lm[fn] = self._lm[fn]
143 143 return ret
144 144
145 145 def filesnotin(self, m2):
146 146 '''Set of files in this manifest that are not in the other'''
147 147 files = set(self)
148 148 files.difference_update(m2)
149 149 return files
150 150
151 151 @propertycache
152 152 def _dirs(self):
153 153 return scmutil.dirs(self)
154 154
155 155 def dirs(self):
156 156 return self._dirs
157 157
158 158 def hasdir(self, dir):
159 159 return dir in self._dirs
160 160
161 161 def matches(self, match):
162 162 '''generate a new manifest filtered by the match argument'''
163 163 if match.always():
164 164 return self.copy()
165 165
166 166 files = match.files()
167 167 if (len(files) < 100 and (match.matchfn == match.exact or
168 168 (not match.anypats() and util.all(fn in self for fn in files)))):
169 169 return self.intersectfiles(files)
170 170
171 171 lm = manifestdict('')
172 172 lm._lm = self._lm.filtercopy(match)
173 173 return lm
174 174
175 175 def diff(self, m2, clean=False):
176 176 '''Finds changes between the current manifest and m2.
177 177
178 178 Args:
179 179 m2: the manifest to which this manifest should be compared.
180 180 clean: if true, include files unchanged between these manifests
181 181 with a None value in the returned dictionary.
182 182
183 183 The result is returned as a dict with filename as key and
184 184 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
185 185 nodeid in the current/other manifest and fl1/fl2 is the flag
186 186 in the current/other manifest. Where the file does not exist,
187 187 the nodeid will be None and the flags will be the empty
188 188 string.
189 189 '''
190 190 return self._lm.diff(m2._lm, clean)
191 191
192 192 def setflag(self, key, flag):
193 193 self._lm[key] = self[key], flag
194 194
195 195 def get(self, key, default=None):
196 196 try:
197 197 return self._lm[key][0]
198 198 except KeyError:
199 199 return default
200 200
201 201 def flags(self, key, default=''):
202 202 try:
203 203 return self._lm[key][1]
204 204 except KeyError:
205 205 return default
206 206
207 207 def copy(self):
208 208 c = manifestdict('')
209 209 c._lm = self._lm.copy()
210 210 return c
211 211
212 212 def iteritems(self):
213 213 return (x[:2] for x in self._lm.iterentries())
214 214
215 215 def text(self):
216 216 return self._lm.text()
217 217
218 218 def fastdelta(self, base, changes):
219 219 """Given a base manifest text as an array.array and a list of changes
220 220 relative to that text, compute a delta that can be used by revlog.
221 221 """
222 222 delta = []
223 223 dstart = None
224 224 dend = None
225 225 dline = [""]
226 226 start = 0
227 227 # zero copy representation of base as a buffer
228 228 addbuf = util.buffer(base)
229 229
230 230 # start with a readonly loop that finds the offset of
231 231 # each line and creates the deltas
232 232 for f, todelete in changes:
233 233 # bs will either be the index of the item or the insert point
234 234 start, end = _msearch(addbuf, f, start)
235 235 if not todelete:
236 236 h, fl = self._lm[f]
237 237 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
238 238 else:
239 239 if start == end:
240 240 # item we want to delete was not found, error out
241 241 raise AssertionError(
242 242 _("failed to remove %s from manifest") % f)
243 243 l = ""
244 244 if dstart is not None and dstart <= start and dend >= start:
245 245 if dend < end:
246 246 dend = end
247 247 if l:
248 248 dline.append(l)
249 249 else:
250 250 if dstart is not None:
251 251 delta.append([dstart, dend, "".join(dline)])
252 252 dstart = start
253 253 dend = end
254 254 dline = [l]
255 255
256 256 if dstart is not None:
257 257 delta.append([dstart, dend, "".join(dline)])
258 258 # apply the delta to the base, and get a delta for addrevision
259 259 deltatext, arraytext = _addlistdelta(base, delta)
260 260 return arraytext, deltatext
261 261
262 262 def _msearch(m, s, lo=0, hi=None):
263 263 '''return a tuple (start, end) that says where to find s within m.
264 264
265 265 If the string is found m[start:end] are the line containing
266 266 that string. If start == end the string was not found and
267 267 they indicate the proper sorted insertion point.
268 268
269 269 m should be a buffer or a string
270 270 s is a string'''
271 271 def advance(i, c):
272 272 while i < lenm and m[i] != c:
273 273 i += 1
274 274 return i
275 275 if not s:
276 276 return (lo, lo)
277 277 lenm = len(m)
278 278 if not hi:
279 279 hi = lenm
280 280 while lo < hi:
281 281 mid = (lo + hi) // 2
282 282 start = mid
283 283 while start > 0 and m[start - 1] != '\n':
284 284 start -= 1
285 285 end = advance(start, '\0')
286 286 if m[start:end] < s:
287 287 # we know that after the null there are 40 bytes of sha1
288 288 # this translates to the bisect lo = mid + 1
289 289 lo = advance(end + 40, '\n') + 1
290 290 else:
291 291 # this translates to the bisect hi = mid
292 292 hi = start
293 293 end = advance(lo, '\0')
294 294 found = m[lo:end]
295 295 if s == found:
296 296 # we know that after the null there are 40 bytes of sha1
297 297 end = advance(end + 40, '\n')
298 298 return (lo, end + 1)
299 299 else:
300 300 return (lo, lo)
301 301
302 302 def _checkforbidden(l):
303 303 """Check filenames for illegal characters."""
304 304 for f in l:
305 305 if '\n' in f or '\r' in f:
306 306 raise error.RevlogError(
307 307 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
308 308
309 309
310 310 # apply the changes collected during the bisect loop to our addlist
311 311 # return a delta suitable for addrevision
312 312 def _addlistdelta(addlist, x):
313 313 # for large addlist arrays, building a new array is cheaper
314 314 # than repeatedly modifying the existing one
315 315 currentposition = 0
316 316 newaddlist = array.array('c')
317 317
318 318 for start, end, content in x:
319 319 newaddlist += addlist[currentposition:start]
320 320 if content:
321 321 newaddlist += array.array('c', content)
322 322
323 323 currentposition = end
324 324
325 325 newaddlist += addlist[currentposition:]
326 326
327 327 deltatext = "".join(struct.pack(">lll", start, end, len(content))
328 328 + content for start, end, content in x)
329 329 return deltatext, newaddlist
330 330
331 def _splittopdir(f):
332 if '/' in f:
333 dir, subpath = f.split('/', 1)
334 return dir + '/', subpath
335 else:
336 return '', f
337
338 class treemanifest(object):
339 def __init__(self, text=''):
340 self._dirs = {}
341 # Using _lazymanifest here is a little slower than plain old dicts
342 self._files = {}
343 self._flags = {}
344 lm = _lazymanifest(text)
345 for f, n, fl in lm.iterentries():
346 self[f] = n
347 if fl:
348 self.setflag(f, fl)
349
350 def __len__(self):
351 size = len(self._files)
352 for m in self._dirs.values():
353 size += m.__len__()
354 return size
355
356 def iteritems(self):
357 for p, n in sorted(self._dirs.items() + self._files.items()):
358 if p in self._files:
359 yield p, n
360 else:
361 for sf, sn in n.iteritems():
362 yield p + sf, sn
363
364 def iterkeys(self):
365 for p in sorted(self._dirs.keys() + self._files.keys()):
366 if p in self._files:
367 yield p
368 else:
369 for f in self._dirs[p].iterkeys():
370 yield p + f
371
372 def keys(self):
373 return list(self.iterkeys())
374
375 def __iter__(self):
376 return self.iterkeys()
377
378 def __contains__(self, f):
379 if f is None:
380 return False
381 dir, subpath = _splittopdir(f)
382 if dir:
383 if dir not in self._dirs:
384 return False
385 return self._dirs[dir].__contains__(subpath)
386 else:
387 return f in self._files
388
389 def get(self, f, default=None):
390 dir, subpath = _splittopdir(f)
391 if dir:
392 if dir not in self._dirs:
393 return default
394 return self._dirs[dir].get(subpath, default)
395 else:
396 return self._files.get(f, default)
397
398 def __getitem__(self, f):
399 dir, subpath = _splittopdir(f)
400 if dir:
401 return self._dirs[dir].__getitem__(subpath)
402 else:
403 return self._files[f]
404
405 def flags(self, f):
406 dir, subpath = _splittopdir(f)
407 if dir:
408 if dir not in self._dirs:
409 return ''
410 return self._dirs[dir].flags(subpath)
411 else:
412 if f in self._dirs:
413 return ''
414 return self._flags.get(f, '')
415
416 def find(self, f):
417 dir, subpath = _splittopdir(f)
418 if dir:
419 return self._dirs[dir].find(subpath)
420 else:
421 return self._files[f], self._flags.get(f, '')
422
423 def __delitem__(self, f):
424 dir, subpath = _splittopdir(f)
425 if dir:
426 self._dirs[dir].__delitem__(subpath)
427 # If the directory is now empty, remove it
428 if not self._dirs[dir]._dirs and not self._dirs[dir]._files:
429 del self._dirs[dir]
430 else:
431 del self._files[f]
432 if f in self._flags:
433 del self._flags[f]
434
435 def __setitem__(self, f, n):
436 assert n is not None
437 dir, subpath = _splittopdir(f)
438 if dir:
439 if dir not in self._dirs:
440 self._dirs[dir] = treemanifest()
441 self._dirs[dir].__setitem__(subpath, n)
442 else:
443 self._files[f] = n
444
445 def setflag(self, f, flags):
446 """Set the flags (symlink, executable) for path f."""
447 dir, subpath = _splittopdir(f)
448 if dir:
449 if dir not in self._dirs:
450 self._dirs[dir] = treemanifest()
451 self._dirs[dir].setflag(subpath, flags)
452 else:
453 self._flags[f] = flags
454
455 def copy(self):
456 copy = treemanifest()
457 for d in self._dirs:
458 copy._dirs[d] = self._dirs[d].copy()
459 copy._files = dict.copy(self._files)
460 copy._flags = dict.copy(self._flags)
461 return copy
462
463 def intersectfiles(self, files):
464 '''make a new treemanifest with the intersection of self with files
465
466 The algorithm assumes that files is much smaller than self.'''
467 ret = treemanifest()
468 for fn in files:
469 if fn in self:
470 ret[fn] = self[fn]
471 flags = self.flags(fn)
472 if flags:
473 ret.setflag(fn, flags)
474 return ret
475
476 def filesnotin(self, m2):
477 '''Set of files in this manifest that are not in the other'''
478 files = set(self.iterkeys())
479 files.difference_update(m2.iterkeys())
480 return files
481
482 @propertycache
483 def _alldirs(self):
484 return scmutil.dirs(self)
485
486 def dirs(self):
487 return self._alldirs
488
489 def hasdir(self, dir):
490 return dir in self._alldirs
491
492 def matches(self, match):
493 '''generate a new manifest filtered by the match argument'''
494 if match.always():
495 return self.copy()
496
497 files = match.files()
498 if (match.matchfn == match.exact or
499 (not match.anypats() and util.all(fn in self for fn in files))):
500 return self.intersectfiles(files)
501
502 m = self.copy()
503 for fn in m.keys():
504 if not match(fn):
505 del m[fn]
506 return m
507
508 def diff(self, m2, clean=False):
509 '''Finds changes between the current manifest and m2.
510
511 Args:
512 m2: the manifest to which this manifest should be compared.
513 clean: if true, include files unchanged between these manifests
514 with a None value in the returned dictionary.
515
516 The result is returned as a dict with filename as key and
517 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
518 nodeid in the current/other manifest and fl1/fl2 is the flag
519 in the current/other manifest. Where the file does not exist,
520 the nodeid will be None and the flags will be the empty
521 string.
522 '''
523 diff = {}
524
525 for fn, n1 in self.iteritems():
526 fl1 = self.flags(fn)
527 n2 = m2.get(fn, None)
528 fl2 = m2.flags(fn)
529 if n2 is None:
530 fl2 = ''
531 if n1 != n2 or fl1 != fl2:
532 diff[fn] = ((n1, fl1), (n2, fl2))
533 elif clean:
534 diff[fn] = None
535
536 for fn, n2 in m2.iteritems():
537 if fn not in self:
538 fl2 = m2.flags(fn)
539 diff[fn] = ((None, ''), (n2, fl2))
540
541 return diff
542
543 def text(self):
544 """Get the full data of this manifest as a bytestring."""
545 fl = self.keys()
546 _checkforbidden(fl)
547
548 hex, flags = revlog.hex, self.flags
549 # if this is changed to support newlines in filenames,
550 # be sure to check the templates/ dir again (especially *-raw.tmpl)
551 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
552
331 553 class manifest(revlog.revlog):
332 554 def __init__(self, opener):
333 555 # During normal operations, we expect to deal with not more than four
334 556 # revs at a time (such as during commit --amend). When rebasing large
335 557 # stacks of commits, the number can go up, hence the config knob below.
336 558 cachesize = 4
337 559 opts = getattr(opener, 'options', None)
338 560 if opts is not None:
339 561 cachesize = opts.get('manifestcachesize', cachesize)
340 562 self._mancache = util.lrucachedict(cachesize)
341 563 revlog.revlog.__init__(self, opener, "00manifest.i")
342 564
343 565 def readdelta(self, node):
344 566 r = self.rev(node)
345 567 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
346 568 return manifestdict(d)
347 569
348 570 def readfast(self, node):
349 571 '''use the faster of readdelta or read'''
350 572 r = self.rev(node)
351 573 deltaparent = self.deltaparent(r)
352 574 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
353 575 return self.readdelta(node)
354 576 return self.read(node)
355 577
356 578 def read(self, node):
357 579 if node == revlog.nullid:
358 580 return manifestdict() # don't upset local cache
359 581 if node in self._mancache:
360 582 return self._mancache[node][0]
361 583 text = self.revision(node)
362 584 arraytext = array.array('c', text)
363 585 m = manifestdict(text)
364 586 self._mancache[node] = (m, arraytext)
365 587 return m
366 588
367 589 def find(self, node, f):
368 590 '''look up entry for a single file efficiently.
369 591 return (node, flags) pair if found, (None, None) if not.'''
370 592 m = self.read(node)
371 593 try:
372 594 return m.find(f)
373 595 except KeyError:
374 596 return None, None
375 597
376 598 def add(self, m, transaction, link, p1, p2, added, removed):
377 599 if p1 in self._mancache:
378 600 # If our first parent is in the manifest cache, we can
379 601 # compute a delta here using properties we know about the
380 602 # manifest up-front, which may save time later for the
381 603 # revlog layer.
382 604
383 605 _checkforbidden(added)
384 606 # combine the changed lists into one list for sorting
385 607 work = [(x, False) for x in added]
386 608 work.extend((x, True) for x in removed)
387 609 # this could use heapq.merge() (from Python 2.6+) or equivalent
388 610 # since the lists are already sorted
389 611 work.sort()
390 612
391 613 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
392 614 cachedelta = self.rev(p1), deltatext
393 615 text = util.buffer(arraytext)
394 616 else:
395 617 # The first parent manifest isn't already loaded, so we'll
396 618 # just encode a fulltext of the manifest and pass that
397 619 # through to the revlog layer, and let it handle the delta
398 620 # process.
399 621 text = m.text()
400 622 arraytext = array.array('c', text)
401 623 cachedelta = None
402 624
403 625 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
404 626 self._mancache[n] = (m, arraytext)
405 627
406 628 return n
General Comments 0
You need to be logged in to leave comments. Login now