##// END OF EJS Templates
treemanifest: add treemanifest._isempty()...
Drew Gottlieb -
r24551:4fdf5eac default
parent child Browse files
Show More
@@ -1,676 +1,680
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _parse(data):
15 15 """Generates (path, node, flags) tuples from a manifest text"""
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 34 def _text(it):
35 35 """Given an iterator over (path, node, flags) tuples, returns a manifest
36 36 text"""
37 37 files = []
38 38 lines = []
39 39 _hex = revlog.hex
40 40 for f, n, fl in it:
41 41 files.append(f)
42 42 # if this is changed to support newlines in filenames,
43 43 # be sure to check the templates/ dir again (especially *-raw.tmpl)
44 44 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
45 45
46 46 _checkforbidden(files)
47 47 return ''.join(lines)
48 48
49 49 class _lazymanifest(dict):
50 50 """This is the pure implementation of lazymanifest.
51 51
52 52 It has not been optimized *at all* and is not lazy.
53 53 """
54 54
55 55 def __init__(self, data):
56 56 dict.__init__(self)
57 57 for f, n, fl in _parse(data):
58 58 self[f] = n, fl
59 59
60 60 def __setitem__(self, k, v):
61 61 node, flag = v
62 62 assert node is not None
63 63 if len(node) > 21:
64 64 node = node[:21] # match c implementation behavior
65 65 dict.__setitem__(self, k, (node, flag))
66 66
67 67 def __iter__(self):
68 68 return iter(sorted(dict.keys(self)))
69 69
70 70 def iterkeys(self):
71 71 return iter(sorted(dict.keys(self)))
72 72
73 73 def iterentries(self):
74 74 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
75 75
76 76 def copy(self):
77 77 c = _lazymanifest('')
78 78 c.update(self)
79 79 return c
80 80
81 81 def diff(self, m2, clean=False):
82 82 '''Finds changes between the current manifest and m2.'''
83 83 diff = {}
84 84
85 85 for fn, e1 in self.iteritems():
86 86 if fn not in m2:
87 87 diff[fn] = e1, (None, '')
88 88 else:
89 89 e2 = m2[fn]
90 90 if e1 != e2:
91 91 diff[fn] = e1, e2
92 92 elif clean:
93 93 diff[fn] = None
94 94
95 95 for fn, e2 in m2.iteritems():
96 96 if fn not in self:
97 97 diff[fn] = (None, ''), e2
98 98
99 99 return diff
100 100
101 101 def filtercopy(self, filterfn):
102 102 c = _lazymanifest('')
103 103 for f, n, fl in self.iterentries():
104 104 if filterfn(f):
105 105 c[f] = n, fl
106 106 return c
107 107
108 108 def text(self):
109 109 """Get the full data of this manifest as a bytestring."""
110 110 return _text(self.iterentries())
111 111
112 112 try:
113 113 _lazymanifest = parsers.lazymanifest
114 114 except AttributeError:
115 115 pass
116 116
117 117 class manifestdict(object):
118 118 def __init__(self, data=''):
119 119 self._lm = _lazymanifest(data)
120 120
121 121 def __getitem__(self, key):
122 122 return self._lm[key][0]
123 123
124 124 def find(self, key):
125 125 return self._lm[key]
126 126
127 127 def __len__(self):
128 128 return len(self._lm)
129 129
130 130 def __setitem__(self, key, node):
131 131 self._lm[key] = node, self.flags(key, '')
132 132
133 133 def __contains__(self, key):
134 134 return key in self._lm
135 135
136 136 def __delitem__(self, key):
137 137 del self._lm[key]
138 138
139 139 def __iter__(self):
140 140 return self._lm.__iter__()
141 141
142 142 def iterkeys(self):
143 143 return self._lm.iterkeys()
144 144
145 145 def keys(self):
146 146 return list(self.iterkeys())
147 147
148 148 def _intersectfiles(self, files):
149 149 '''make a new lazymanifest with the intersection of self with files
150 150
151 151 The algorithm assumes that files is much smaller than self.'''
152 152 ret = manifestdict()
153 153 lm = self._lm
154 154 for fn in files:
155 155 if fn in lm:
156 156 ret._lm[fn] = self._lm[fn]
157 157 return ret
158 158
159 159 def filesnotin(self, m2):
160 160 '''Set of files in this manifest that are not in the other'''
161 161 files = set(self)
162 162 files.difference_update(m2)
163 163 return files
164 164
165 165 @propertycache
166 166 def _dirs(self):
167 167 return scmutil.dirs(self)
168 168
169 169 def dirs(self):
170 170 return self._dirs
171 171
172 172 def hasdir(self, dir):
173 173 return dir in self._dirs
174 174
175 175 def matches(self, match):
176 176 '''generate a new manifest filtered by the match argument'''
177 177 if match.always():
178 178 return self.copy()
179 179
180 180 files = match.files()
181 181 if (len(files) < 100 and (match.isexact() or
182 182 (not match.anypats() and util.all(fn in self for fn in files)))):
183 183 return self._intersectfiles(files)
184 184
185 185 lm = manifestdict('')
186 186 lm._lm = self._lm.filtercopy(match)
187 187 return lm
188 188
189 189 def diff(self, m2, clean=False):
190 190 '''Finds changes between the current manifest and m2.
191 191
192 192 Args:
193 193 m2: the manifest to which this manifest should be compared.
194 194 clean: if true, include files unchanged between these manifests
195 195 with a None value in the returned dictionary.
196 196
197 197 The result is returned as a dict with filename as key and
198 198 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
199 199 nodeid in the current/other manifest and fl1/fl2 is the flag
200 200 in the current/other manifest. Where the file does not exist,
201 201 the nodeid will be None and the flags will be the empty
202 202 string.
203 203 '''
204 204 return self._lm.diff(m2._lm, clean)
205 205
206 206 def setflag(self, key, flag):
207 207 self._lm[key] = self[key], flag
208 208
209 209 def get(self, key, default=None):
210 210 try:
211 211 return self._lm[key][0]
212 212 except KeyError:
213 213 return default
214 214
215 215 def flags(self, key, default=''):
216 216 try:
217 217 return self._lm[key][1]
218 218 except KeyError:
219 219 return default
220 220
221 221 def copy(self):
222 222 c = manifestdict('')
223 223 c._lm = self._lm.copy()
224 224 return c
225 225
226 226 def iteritems(self):
227 227 return (x[:2] for x in self._lm.iterentries())
228 228
229 229 def text(self):
230 230 return self._lm.text()
231 231
232 232 def fastdelta(self, base, changes):
233 233 """Given a base manifest text as an array.array and a list of changes
234 234 relative to that text, compute a delta that can be used by revlog.
235 235 """
236 236 delta = []
237 237 dstart = None
238 238 dend = None
239 239 dline = [""]
240 240 start = 0
241 241 # zero copy representation of base as a buffer
242 242 addbuf = util.buffer(base)
243 243
244 244 # start with a readonly loop that finds the offset of
245 245 # each line and creates the deltas
246 246 for f, todelete in changes:
247 247 # bs will either be the index of the item or the insert point
248 248 start, end = _msearch(addbuf, f, start)
249 249 if not todelete:
250 250 h, fl = self._lm[f]
251 251 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
252 252 else:
253 253 if start == end:
254 254 # item we want to delete was not found, error out
255 255 raise AssertionError(
256 256 _("failed to remove %s from manifest") % f)
257 257 l = ""
258 258 if dstart is not None and dstart <= start and dend >= start:
259 259 if dend < end:
260 260 dend = end
261 261 if l:
262 262 dline.append(l)
263 263 else:
264 264 if dstart is not None:
265 265 delta.append([dstart, dend, "".join(dline)])
266 266 dstart = start
267 267 dend = end
268 268 dline = [l]
269 269
270 270 if dstart is not None:
271 271 delta.append([dstart, dend, "".join(dline)])
272 272 # apply the delta to the base, and get a delta for addrevision
273 273 deltatext, arraytext = _addlistdelta(base, delta)
274 274 return arraytext, deltatext
275 275
276 276 def _msearch(m, s, lo=0, hi=None):
277 277 '''return a tuple (start, end) that says where to find s within m.
278 278
279 279 If the string is found m[start:end] are the line containing
280 280 that string. If start == end the string was not found and
281 281 they indicate the proper sorted insertion point.
282 282
283 283 m should be a buffer or a string
284 284 s is a string'''
285 285 def advance(i, c):
286 286 while i < lenm and m[i] != c:
287 287 i += 1
288 288 return i
289 289 if not s:
290 290 return (lo, lo)
291 291 lenm = len(m)
292 292 if not hi:
293 293 hi = lenm
294 294 while lo < hi:
295 295 mid = (lo + hi) // 2
296 296 start = mid
297 297 while start > 0 and m[start - 1] != '\n':
298 298 start -= 1
299 299 end = advance(start, '\0')
300 300 if m[start:end] < s:
301 301 # we know that after the null there are 40 bytes of sha1
302 302 # this translates to the bisect lo = mid + 1
303 303 lo = advance(end + 40, '\n') + 1
304 304 else:
305 305 # this translates to the bisect hi = mid
306 306 hi = start
307 307 end = advance(lo, '\0')
308 308 found = m[lo:end]
309 309 if s == found:
310 310 # we know that after the null there are 40 bytes of sha1
311 311 end = advance(end + 40, '\n')
312 312 return (lo, end + 1)
313 313 else:
314 314 return (lo, lo)
315 315
316 316 def _checkforbidden(l):
317 317 """Check filenames for illegal characters."""
318 318 for f in l:
319 319 if '\n' in f or '\r' in f:
320 320 raise error.RevlogError(
321 321 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
322 322
323 323
324 324 # apply the changes collected during the bisect loop to our addlist
325 325 # return a delta suitable for addrevision
326 326 def _addlistdelta(addlist, x):
327 327 # for large addlist arrays, building a new array is cheaper
328 328 # than repeatedly modifying the existing one
329 329 currentposition = 0
330 330 newaddlist = array.array('c')
331 331
332 332 for start, end, content in x:
333 333 newaddlist += addlist[currentposition:start]
334 334 if content:
335 335 newaddlist += array.array('c', content)
336 336
337 337 currentposition = end
338 338
339 339 newaddlist += addlist[currentposition:]
340 340
341 341 deltatext = "".join(struct.pack(">lll", start, end, len(content))
342 342 + content for start, end, content in x)
343 343 return deltatext, newaddlist
344 344
345 345 def _splittopdir(f):
346 346 if '/' in f:
347 347 dir, subpath = f.split('/', 1)
348 348 return dir + '/', subpath
349 349 else:
350 350 return '', f
351 351
352 352 class treemanifest(object):
353 353 def __init__(self, dir='', text=''):
354 354 self._dir = dir
355 355 self._dirs = {}
356 356 # Using _lazymanifest here is a little slower than plain old dicts
357 357 self._files = {}
358 358 self._flags = {}
359 359 for f, n, fl in _parse(text):
360 360 self[f] = n
361 361 if fl:
362 362 self.setflag(f, fl)
363 363
364 364 def _subpath(self, path):
365 365 return self._dir + path
366 366
367 367 def __len__(self):
368 368 size = len(self._files)
369 369 for m in self._dirs.values():
370 370 size += m.__len__()
371 371 return size
372 372
373 def _isempty(self):
374 return (not self._files and (not self._dirs or
375 util.all(m._isempty() for m in self._dirs.values())))
376
373 377 def __str__(self):
374 378 return '<treemanifest dir=%s>' % self._dir
375 379
376 380 def iteritems(self):
377 381 for p, n in sorted(self._dirs.items() + self._files.items()):
378 382 if p in self._files:
379 383 yield self._subpath(p), n
380 384 else:
381 385 for f, sn in n.iteritems():
382 386 yield f, sn
383 387
384 388 def iterkeys(self):
385 389 for p in sorted(self._dirs.keys() + self._files.keys()):
386 390 if p in self._files:
387 391 yield self._subpath(p)
388 392 else:
389 393 for f in self._dirs[p].iterkeys():
390 394 yield f
391 395
392 396 def keys(self):
393 397 return list(self.iterkeys())
394 398
395 399 def __iter__(self):
396 400 return self.iterkeys()
397 401
398 402 def __contains__(self, f):
399 403 if f is None:
400 404 return False
401 405 dir, subpath = _splittopdir(f)
402 406 if dir:
403 407 if dir not in self._dirs:
404 408 return False
405 409 return self._dirs[dir].__contains__(subpath)
406 410 else:
407 411 return f in self._files
408 412
409 413 def get(self, f, default=None):
410 414 dir, subpath = _splittopdir(f)
411 415 if dir:
412 416 if dir not in self._dirs:
413 417 return default
414 418 return self._dirs[dir].get(subpath, default)
415 419 else:
416 420 return self._files.get(f, default)
417 421
418 422 def __getitem__(self, f):
419 423 dir, subpath = _splittopdir(f)
420 424 if dir:
421 425 return self._dirs[dir].__getitem__(subpath)
422 426 else:
423 427 return self._files[f]
424 428
425 429 def flags(self, f):
426 430 dir, subpath = _splittopdir(f)
427 431 if dir:
428 432 if dir not in self._dirs:
429 433 return ''
430 434 return self._dirs[dir].flags(subpath)
431 435 else:
432 436 if f in self._dirs:
433 437 return ''
434 438 return self._flags.get(f, '')
435 439
436 440 def find(self, f):
437 441 dir, subpath = _splittopdir(f)
438 442 if dir:
439 443 return self._dirs[dir].find(subpath)
440 444 else:
441 445 return self._files[f], self._flags.get(f, '')
442 446
443 447 def __delitem__(self, f):
444 448 dir, subpath = _splittopdir(f)
445 449 if dir:
446 450 self._dirs[dir].__delitem__(subpath)
447 451 # If the directory is now empty, remove it
448 if not self._dirs[dir]._dirs and not self._dirs[dir]._files:
452 if self._dirs[dir]._isempty():
449 453 del self._dirs[dir]
450 454 else:
451 455 del self._files[f]
452 456 if f in self._flags:
453 457 del self._flags[f]
454 458
455 459 def __setitem__(self, f, n):
456 460 assert n is not None
457 461 dir, subpath = _splittopdir(f)
458 462 if dir:
459 463 if dir not in self._dirs:
460 464 self._dirs[dir] = treemanifest(self._subpath(dir))
461 465 self._dirs[dir].__setitem__(subpath, n)
462 466 else:
463 467 self._files[f] = n[:21] # to match manifestdict's behavior
464 468
465 469 def setflag(self, f, flags):
466 470 """Set the flags (symlink, executable) for path f."""
467 471 dir, subpath = _splittopdir(f)
468 472 if dir:
469 473 if dir not in self._dirs:
470 474 self._dirs[dir] = treemanifest(self._subpath(dir))
471 475 self._dirs[dir].setflag(subpath, flags)
472 476 else:
473 477 self._flags[f] = flags
474 478
475 479 def copy(self):
476 480 copy = treemanifest(self._dir)
477 481 for d in self._dirs:
478 482 copy._dirs[d] = self._dirs[d].copy()
479 483 copy._files = dict.copy(self._files)
480 484 copy._flags = dict.copy(self._flags)
481 485 return copy
482 486
483 487 def filesnotin(self, m2):
484 488 '''Set of files in this manifest that are not in the other'''
485 489 files = set()
486 490 def _filesnotin(t1, t2):
487 491 for d, m1 in t1._dirs.iteritems():
488 492 if d in t2._dirs:
489 493 m2 = t2._dirs[d]
490 494 _filesnotin(m1, m2)
491 495 else:
492 496 files.update(m1.iterkeys())
493 497
494 498 for fn in t1._files.iterkeys():
495 499 if fn not in t2._files:
496 500 files.add(t1._subpath(fn))
497 501
498 502 _filesnotin(self, m2)
499 503 return files
500 504
501 505 @propertycache
502 506 def _alldirs(self):
503 507 return scmutil.dirs(self)
504 508
505 509 def dirs(self):
506 510 return self._alldirs
507 511
508 512 def hasdir(self, dir):
509 513 topdir, subdir = _splittopdir(dir)
510 514 if topdir:
511 515 if topdir in self._dirs:
512 516 return self._dirs[topdir].hasdir(subdir)
513 517 return False
514 518 return (dir + '/') in self._dirs
515 519
516 520 def matches(self, match):
517 521 '''generate a new manifest filtered by the match argument'''
518 522 if match.always():
519 523 return self.copy()
520 524
521 525 m = self.copy()
522 526 for fn in m.keys():
523 527 if not match(fn):
524 528 del m[fn]
525 529 return m
526 530
527 531 def diff(self, m2, clean=False):
528 532 '''Finds changes between the current manifest and m2.
529 533
530 534 Args:
531 535 m2: the manifest to which this manifest should be compared.
532 536 clean: if true, include files unchanged between these manifests
533 537 with a None value in the returned dictionary.
534 538
535 539 The result is returned as a dict with filename as key and
536 540 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
537 541 nodeid in the current/other manifest and fl1/fl2 is the flag
538 542 in the current/other manifest. Where the file does not exist,
539 543 the nodeid will be None and the flags will be the empty
540 544 string.
541 545 '''
542 546 result = {}
543 547 emptytree = treemanifest()
544 548 def _diff(t1, t2):
545 549 for d, m1 in t1._dirs.iteritems():
546 550 m2 = t2._dirs.get(d, emptytree)
547 551 _diff(m1, m2)
548 552
549 553 for d, m2 in t2._dirs.iteritems():
550 554 if d not in t1._dirs:
551 555 _diff(emptytree, m2)
552 556
553 557 for fn, n1 in t1._files.iteritems():
554 558 fl1 = t1._flags.get(fn, '')
555 559 n2 = t2._files.get(fn, None)
556 560 fl2 = t2._flags.get(fn, '')
557 561 if n1 != n2 or fl1 != fl2:
558 562 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
559 563 elif clean:
560 564 result[t1._subpath(fn)] = None
561 565
562 566 for fn, n2 in t2._files.iteritems():
563 567 if fn not in t1._files:
564 568 fl2 = t2._flags.get(fn, '')
565 569 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
566 570
567 571 _diff(self, m2)
568 572 return result
569 573
570 574 def text(self):
571 575 """Get the full data of this manifest as a bytestring."""
572 576 flags = self.flags
573 577 return _text((f, self[f], flags(f)) for f in self.keys())
574 578
575 579 class manifest(revlog.revlog):
576 580 def __init__(self, opener):
577 581 # During normal operations, we expect to deal with not more than four
578 582 # revs at a time (such as during commit --amend). When rebasing large
579 583 # stacks of commits, the number can go up, hence the config knob below.
580 584 cachesize = 4
581 585 usetreemanifest = False
582 586 usemanifestv2 = False
583 587 opts = getattr(opener, 'options', None)
584 588 if opts is not None:
585 589 cachesize = opts.get('manifestcachesize', cachesize)
586 590 usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
587 591 usemanifestv2 = opts.get('usemanifestv2', usemanifestv2)
588 592 self._mancache = util.lrucachedict(cachesize)
589 593 revlog.revlog.__init__(self, opener, "00manifest.i")
590 594 self._usetreemanifest = usetreemanifest
591 595 self._usemanifestv2 = usemanifestv2
592 596
593 597 def _newmanifest(self, data=''):
594 598 if self._usetreemanifest:
595 599 return treemanifest('', data)
596 600 return manifestdict(data)
597 601
598 602 def _slowreaddelta(self, node):
599 603 r0 = self.deltaparent(self.rev(node))
600 604 m0 = self.read(self.node(r0))
601 605 m1 = self.read(node)
602 606 md = self._newmanifest()
603 607 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
604 608 if n1:
605 609 md[f] = n1
606 610 if fl1:
607 611 md.setflag(f, fl1)
608 612 return md
609 613
610 614 def readdelta(self, node):
611 615 if self._usemanifestv2:
612 616 return self._slowreaddelta(node)
613 617 r = self.rev(node)
614 618 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
615 619 return self._newmanifest(d)
616 620
617 621 def readfast(self, node):
618 622 '''use the faster of readdelta or read'''
619 623 r = self.rev(node)
620 624 deltaparent = self.deltaparent(r)
621 625 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
622 626 return self.readdelta(node)
623 627 return self.read(node)
624 628
625 629 def read(self, node):
626 630 if node == revlog.nullid:
627 631 return self._newmanifest() # don't upset local cache
628 632 if node in self._mancache:
629 633 return self._mancache[node][0]
630 634 text = self.revision(node)
631 635 arraytext = array.array('c', text)
632 636 m = self._newmanifest(text)
633 637 self._mancache[node] = (m, arraytext)
634 638 return m
635 639
636 640 def find(self, node, f):
637 641 '''look up entry for a single file efficiently.
638 642 return (node, flags) pair if found, (None, None) if not.'''
639 643 m = self.read(node)
640 644 try:
641 645 return m.find(f)
642 646 except KeyError:
643 647 return None, None
644 648
645 649 def add(self, m, transaction, link, p1, p2, added, removed):
646 650 if (p1 in self._mancache and not self._usetreemanifest
647 651 and not self._usemanifestv2):
648 652 # If our first parent is in the manifest cache, we can
649 653 # compute a delta here using properties we know about the
650 654 # manifest up-front, which may save time later for the
651 655 # revlog layer.
652 656
653 657 _checkforbidden(added)
654 658 # combine the changed lists into one list for sorting
655 659 work = [(x, False) for x in added]
656 660 work.extend((x, True) for x in removed)
657 661 # this could use heapq.merge() (from Python 2.6+) or equivalent
658 662 # since the lists are already sorted
659 663 work.sort()
660 664
661 665 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
662 666 cachedelta = self.rev(p1), deltatext
663 667 text = util.buffer(arraytext)
664 668 else:
665 669 # The first parent manifest isn't already loaded, so we'll
666 670 # just encode a fulltext of the manifest and pass that
667 671 # through to the revlog layer, and let it handle the delta
668 672 # process.
669 673 text = m.text()
670 674 arraytext = array.array('c', text)
671 675 cachedelta = None
672 676
673 677 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
674 678 self._mancache[n] = (m, arraytext)
675 679
676 680 return n
General Comments 0
You need to be logged in to leave comments. Login now