##// END OF EJS Templates
treemanifest: lazily load manifests...
Martin von Zweigbergk -
r25222:0de132d5 default
parent child Browse files
Show More
@@ -1,975 +1,1022
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11 import os
12 12
13 13 propertycache = util.propertycache
14 14
15 15 def _parsev1(data):
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 34 def _parsev2(data):
35 35 metadataend = data.find('\n')
36 36 # Just ignore metadata for now
37 37 pos = metadataend + 1
38 38 prevf = ''
39 39 while pos < len(data):
40 40 end = data.find('\n', pos + 1) # +1 to skip stem length byte
41 41 if end == -1:
42 42 raise ValueError('Manifest ended with incomplete file entry.')
43 43 stemlen = ord(data[pos])
44 44 items = data[pos + 1:end].split('\0')
45 45 f = prevf[:stemlen] + items[0]
46 46 if prevf > f:
47 47 raise ValueError('Manifest entries not in sorted order.')
48 48 fl = items[1]
49 49 # Just ignore metadata (items[2:] for now)
50 50 n = data[end + 1:end + 21]
51 51 yield f, n, fl
52 52 pos = end + 22
53 53 prevf = f
54 54
55 55 def _parse(data):
56 56 """Generates (path, node, flags) tuples from a manifest text"""
57 57 if data.startswith('\0'):
58 58 return iter(_parsev2(data))
59 59 else:
60 60 return iter(_parsev1(data))
61 61
62 62 def _text(it, usemanifestv2):
63 63 """Given an iterator over (path, node, flags) tuples, returns a manifest
64 64 text"""
65 65 if usemanifestv2:
66 66 return _textv2(it)
67 67 else:
68 68 return _textv1(it)
69 69
70 70 def _textv1(it):
71 71 files = []
72 72 lines = []
73 73 _hex = revlog.hex
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return ''.join(lines)
82 82
83 83 def _textv2(it):
84 84 files = []
85 85 lines = ['\0\n']
86 86 prevf = ''
87 87 for f, n, fl in it:
88 88 files.append(f)
89 89 stem = os.path.commonprefix([prevf, f])
90 90 stemlen = min(len(stem), 255)
91 91 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
92 92 prevf = f
93 93 _checkforbidden(files)
94 94 return ''.join(lines)
95 95
96 96 class _lazymanifest(dict):
97 97 """This is the pure implementation of lazymanifest.
98 98
99 99 It has not been optimized *at all* and is not lazy.
100 100 """
101 101
102 102 def __init__(self, data):
103 103 dict.__init__(self)
104 104 for f, n, fl in _parse(data):
105 105 self[f] = n, fl
106 106
107 107 def __setitem__(self, k, v):
108 108 node, flag = v
109 109 assert node is not None
110 110 if len(node) > 21:
111 111 node = node[:21] # match c implementation behavior
112 112 dict.__setitem__(self, k, (node, flag))
113 113
114 114 def __iter__(self):
115 115 return iter(sorted(dict.keys(self)))
116 116
117 117 def iterkeys(self):
118 118 return iter(sorted(dict.keys(self)))
119 119
120 120 def iterentries(self):
121 121 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
122 122
123 123 def copy(self):
124 124 c = _lazymanifest('')
125 125 c.update(self)
126 126 return c
127 127
128 128 def diff(self, m2, clean=False):
129 129 '''Finds changes between the current manifest and m2.'''
130 130 diff = {}
131 131
132 132 for fn, e1 in self.iteritems():
133 133 if fn not in m2:
134 134 diff[fn] = e1, (None, '')
135 135 else:
136 136 e2 = m2[fn]
137 137 if e1 != e2:
138 138 diff[fn] = e1, e2
139 139 elif clean:
140 140 diff[fn] = None
141 141
142 142 for fn, e2 in m2.iteritems():
143 143 if fn not in self:
144 144 diff[fn] = (None, ''), e2
145 145
146 146 return diff
147 147
148 148 def filtercopy(self, filterfn):
149 149 c = _lazymanifest('')
150 150 for f, n, fl in self.iterentries():
151 151 if filterfn(f):
152 152 c[f] = n, fl
153 153 return c
154 154
155 155 def text(self):
156 156 """Get the full data of this manifest as a bytestring."""
157 157 return _textv1(self.iterentries())
158 158
159 159 try:
160 160 _lazymanifest = parsers.lazymanifest
161 161 except AttributeError:
162 162 pass
163 163
164 164 class manifestdict(object):
165 165 def __init__(self, data=''):
166 166 if data.startswith('\0'):
167 167 #_lazymanifest can not parse v2
168 168 self._lm = _lazymanifest('')
169 169 for f, n, fl in _parsev2(data):
170 170 self._lm[f] = n, fl
171 171 else:
172 172 self._lm = _lazymanifest(data)
173 173
174 174 def __getitem__(self, key):
175 175 return self._lm[key][0]
176 176
177 177 def find(self, key):
178 178 return self._lm[key]
179 179
180 180 def __len__(self):
181 181 return len(self._lm)
182 182
183 183 def __setitem__(self, key, node):
184 184 self._lm[key] = node, self.flags(key, '')
185 185
186 186 def __contains__(self, key):
187 187 return key in self._lm
188 188
189 189 def __delitem__(self, key):
190 190 del self._lm[key]
191 191
192 192 def __iter__(self):
193 193 return self._lm.__iter__()
194 194
195 195 def iterkeys(self):
196 196 return self._lm.iterkeys()
197 197
198 198 def keys(self):
199 199 return list(self.iterkeys())
200 200
201 201 def filesnotin(self, m2):
202 202 '''Set of files in this manifest that are not in the other'''
203 203 files = set(self)
204 204 files.difference_update(m2)
205 205 return files
206 206
207 207 @propertycache
208 208 def _dirs(self):
209 209 return util.dirs(self)
210 210
211 211 def dirs(self):
212 212 return self._dirs
213 213
214 214 def hasdir(self, dir):
215 215 return dir in self._dirs
216 216
217 217 def _filesfastpath(self, match):
218 218 '''Checks whether we can correctly and quickly iterate over matcher
219 219 files instead of over manifest files.'''
220 220 files = match.files()
221 221 return (len(files) < 100 and (match.isexact() or
222 222 (not match.anypats() and all(fn in self for fn in files))))
223 223
224 224 def walk(self, match):
225 225 '''Generates matching file names.
226 226
227 227 Equivalent to manifest.matches(match).iterkeys(), but without creating
228 228 an entirely new manifest.
229 229
230 230 It also reports nonexistent files by marking them bad with match.bad().
231 231 '''
232 232 if match.always():
233 233 for f in iter(self):
234 234 yield f
235 235 return
236 236
237 237 fset = set(match.files())
238 238
239 239 # avoid the entire walk if we're only looking for specific files
240 240 if self._filesfastpath(match):
241 241 for fn in sorted(fset):
242 242 yield fn
243 243 return
244 244
245 245 for fn in self:
246 246 if fn in fset:
247 247 # specified pattern is the exact name
248 248 fset.remove(fn)
249 249 if match(fn):
250 250 yield fn
251 251
252 252 # for dirstate.walk, files=['.'] means "walk the whole tree".
253 253 # follow that here, too
254 254 fset.discard('.')
255 255
256 256 for fn in sorted(fset):
257 257 if not self.hasdir(fn):
258 258 match.bad(fn, None)
259 259
260 260 def matches(self, match):
261 261 '''generate a new manifest filtered by the match argument'''
262 262 if match.always():
263 263 return self.copy()
264 264
265 265 if self._filesfastpath(match):
266 266 m = manifestdict()
267 267 lm = self._lm
268 268 for fn in match.files():
269 269 if fn in lm:
270 270 m._lm[fn] = lm[fn]
271 271 return m
272 272
273 273 m = manifestdict()
274 274 m._lm = self._lm.filtercopy(match)
275 275 return m
276 276
277 277 def diff(self, m2, clean=False):
278 278 '''Finds changes between the current manifest and m2.
279 279
280 280 Args:
281 281 m2: the manifest to which this manifest should be compared.
282 282 clean: if true, include files unchanged between these manifests
283 283 with a None value in the returned dictionary.
284 284
285 285 The result is returned as a dict with filename as key and
286 286 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
287 287 nodeid in the current/other manifest and fl1/fl2 is the flag
288 288 in the current/other manifest. Where the file does not exist,
289 289 the nodeid will be None and the flags will be the empty
290 290 string.
291 291 '''
292 292 return self._lm.diff(m2._lm, clean)
293 293
294 294 def setflag(self, key, flag):
295 295 self._lm[key] = self[key], flag
296 296
297 297 def get(self, key, default=None):
298 298 try:
299 299 return self._lm[key][0]
300 300 except KeyError:
301 301 return default
302 302
303 303 def flags(self, key, default=''):
304 304 try:
305 305 return self._lm[key][1]
306 306 except KeyError:
307 307 return default
308 308
309 309 def copy(self):
310 310 c = manifestdict()
311 311 c._lm = self._lm.copy()
312 312 return c
313 313
314 314 def iteritems(self):
315 315 return (x[:2] for x in self._lm.iterentries())
316 316
317 317 def text(self, usemanifestv2=False):
318 318 if usemanifestv2:
319 319 return _textv2(self._lm.iterentries())
320 320 else:
321 321 # use (probably) native version for v1
322 322 return self._lm.text()
323 323
324 324 def fastdelta(self, base, changes):
325 325 """Given a base manifest text as an array.array and a list of changes
326 326 relative to that text, compute a delta that can be used by revlog.
327 327 """
328 328 delta = []
329 329 dstart = None
330 330 dend = None
331 331 dline = [""]
332 332 start = 0
333 333 # zero copy representation of base as a buffer
334 334 addbuf = util.buffer(base)
335 335
336 336 # start with a readonly loop that finds the offset of
337 337 # each line and creates the deltas
338 338 for f, todelete in changes:
339 339 # bs will either be the index of the item or the insert point
340 340 start, end = _msearch(addbuf, f, start)
341 341 if not todelete:
342 342 h, fl = self._lm[f]
343 343 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
344 344 else:
345 345 if start == end:
346 346 # item we want to delete was not found, error out
347 347 raise AssertionError(
348 348 _("failed to remove %s from manifest") % f)
349 349 l = ""
350 350 if dstart is not None and dstart <= start and dend >= start:
351 351 if dend < end:
352 352 dend = end
353 353 if l:
354 354 dline.append(l)
355 355 else:
356 356 if dstart is not None:
357 357 delta.append([dstart, dend, "".join(dline)])
358 358 dstart = start
359 359 dend = end
360 360 dline = [l]
361 361
362 362 if dstart is not None:
363 363 delta.append([dstart, dend, "".join(dline)])
364 364 # apply the delta to the base, and get a delta for addrevision
365 365 deltatext, arraytext = _addlistdelta(base, delta)
366 366 return arraytext, deltatext
367 367
368 368 def _msearch(m, s, lo=0, hi=None):
369 369 '''return a tuple (start, end) that says where to find s within m.
370 370
371 371 If the string is found m[start:end] are the line containing
372 372 that string. If start == end the string was not found and
373 373 they indicate the proper sorted insertion point.
374 374
375 375 m should be a buffer or a string
376 376 s is a string'''
377 377 def advance(i, c):
378 378 while i < lenm and m[i] != c:
379 379 i += 1
380 380 return i
381 381 if not s:
382 382 return (lo, lo)
383 383 lenm = len(m)
384 384 if not hi:
385 385 hi = lenm
386 386 while lo < hi:
387 387 mid = (lo + hi) // 2
388 388 start = mid
389 389 while start > 0 and m[start - 1] != '\n':
390 390 start -= 1
391 391 end = advance(start, '\0')
392 392 if m[start:end] < s:
393 393 # we know that after the null there are 40 bytes of sha1
394 394 # this translates to the bisect lo = mid + 1
395 395 lo = advance(end + 40, '\n') + 1
396 396 else:
397 397 # this translates to the bisect hi = mid
398 398 hi = start
399 399 end = advance(lo, '\0')
400 400 found = m[lo:end]
401 401 if s == found:
402 402 # we know that after the null there are 40 bytes of sha1
403 403 end = advance(end + 40, '\n')
404 404 return (lo, end + 1)
405 405 else:
406 406 return (lo, lo)
407 407
408 408 def _checkforbidden(l):
409 409 """Check filenames for illegal characters."""
410 410 for f in l:
411 411 if '\n' in f or '\r' in f:
412 412 raise error.RevlogError(
413 413 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
414 414
415 415
416 416 # apply the changes collected during the bisect loop to our addlist
417 417 # return a delta suitable for addrevision
418 418 def _addlistdelta(addlist, x):
419 419 # for large addlist arrays, building a new array is cheaper
420 420 # than repeatedly modifying the existing one
421 421 currentposition = 0
422 422 newaddlist = array.array('c')
423 423
424 424 for start, end, content in x:
425 425 newaddlist += addlist[currentposition:start]
426 426 if content:
427 427 newaddlist += array.array('c', content)
428 428
429 429 currentposition = end
430 430
431 431 newaddlist += addlist[currentposition:]
432 432
433 433 deltatext = "".join(struct.pack(">lll", start, end, len(content))
434 434 + content for start, end, content in x)
435 435 return deltatext, newaddlist
436 436
437 437 def _splittopdir(f):
438 438 if '/' in f:
439 439 dir, subpath = f.split('/', 1)
440 440 return dir + '/', subpath
441 441 else:
442 442 return '', f
443 443
444 _noop = lambda: None
445
444 446 class treemanifest(object):
445 447 def __init__(self, dir='', text=''):
446 448 self._dir = dir
447 449 self._node = revlog.nullid
450 self._load = _noop
448 451 self._dirty = False
449 452 self._dirs = {}
450 453 # Using _lazymanifest here is a little slower than plain old dicts
451 454 self._files = {}
452 455 self._flags = {}
453 456 if text:
454 457 def readsubtree(subdir, subm):
455 458 raise AssertionError('treemanifest constructor only accepts '
456 459 'flat manifests')
457 460 self.parse(text, readsubtree)
458 461 self._dirty = True # Mark flat manifest dirty after parsing
459 462
460 463 def _subpath(self, path):
461 464 return self._dir + path
462 465
463 466 def __len__(self):
467 self._load()
464 468 size = len(self._files)
465 469 for m in self._dirs.values():
466 470 size += m.__len__()
467 471 return size
468 472
469 473 def _isempty(self):
474 self._load() # for consistency; already loaded by all callers
470 475 return (not self._files and (not self._dirs or
471 476 all(m._isempty() for m in self._dirs.values())))
472 477
473 478 def __str__(self):
474 return ('<treemanifest dir=%s, node=%s, dirty=%s>' %
475 (self._dir, revlog.hex(self._node), self._dirty))
479 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s>' %
480 (self._dir, revlog.hex(self._node),
481 bool(self._load is _noop),
482 self._dirty))
476 483
477 484 def dir(self):
478 485 '''The directory that this tree manifest represents, including a
479 486 trailing '/'. Empty string for the repo root directory.'''
480 487 return self._dir
481 488
482 489 def node(self):
483 490 '''This node of this instance. nullid for unsaved instances. Should
484 491 be updated when the instance is read or written from a revlog.
485 492 '''
486 493 assert not self._dirty
487 494 return self._node
488 495
489 496 def setnode(self, node):
490 497 self._node = node
491 498 self._dirty = False
492 499
493 500 def iteritems(self):
501 self._load()
494 502 for p, n in sorted(self._dirs.items() + self._files.items()):
495 503 if p in self._files:
496 504 yield self._subpath(p), n
497 505 else:
498 506 for f, sn in n.iteritems():
499 507 yield f, sn
500 508
501 509 def iterkeys(self):
510 self._load()
502 511 for p in sorted(self._dirs.keys() + self._files.keys()):
503 512 if p in self._files:
504 513 yield self._subpath(p)
505 514 else:
506 515 for f in self._dirs[p].iterkeys():
507 516 yield f
508 517
509 518 def keys(self):
510 519 return list(self.iterkeys())
511 520
512 521 def __iter__(self):
513 522 return self.iterkeys()
514 523
515 524 def __contains__(self, f):
516 525 if f is None:
517 526 return False
527 self._load()
518 528 dir, subpath = _splittopdir(f)
519 529 if dir:
520 530 if dir not in self._dirs:
521 531 return False
522 532 return self._dirs[dir].__contains__(subpath)
523 533 else:
524 534 return f in self._files
525 535
526 536 def get(self, f, default=None):
537 self._load()
527 538 dir, subpath = _splittopdir(f)
528 539 if dir:
529 540 if dir not in self._dirs:
530 541 return default
531 542 return self._dirs[dir].get(subpath, default)
532 543 else:
533 544 return self._files.get(f, default)
534 545
535 546 def __getitem__(self, f):
547 self._load()
536 548 dir, subpath = _splittopdir(f)
537 549 if dir:
538 550 return self._dirs[dir].__getitem__(subpath)
539 551 else:
540 552 return self._files[f]
541 553
542 554 def flags(self, f):
555 self._load()
543 556 dir, subpath = _splittopdir(f)
544 557 if dir:
545 558 if dir not in self._dirs:
546 559 return ''
547 560 return self._dirs[dir].flags(subpath)
548 561 else:
549 562 if f in self._dirs:
550 563 return ''
551 564 return self._flags.get(f, '')
552 565
553 566 def find(self, f):
567 self._load()
554 568 dir, subpath = _splittopdir(f)
555 569 if dir:
556 570 return self._dirs[dir].find(subpath)
557 571 else:
558 572 return self._files[f], self._flags.get(f, '')
559 573
560 574 def __delitem__(self, f):
575 self._load()
561 576 dir, subpath = _splittopdir(f)
562 577 if dir:
563 578 self._dirs[dir].__delitem__(subpath)
564 579 # If the directory is now empty, remove it
565 580 if self._dirs[dir]._isempty():
566 581 del self._dirs[dir]
567 582 else:
568 583 del self._files[f]
569 584 if f in self._flags:
570 585 del self._flags[f]
571 586 self._dirty = True
572 587
573 588 def __setitem__(self, f, n):
574 589 assert n is not None
590 self._load()
575 591 dir, subpath = _splittopdir(f)
576 592 if dir:
577 593 if dir not in self._dirs:
578 594 self._dirs[dir] = treemanifest(self._subpath(dir))
579 595 self._dirs[dir].__setitem__(subpath, n)
580 596 else:
581 597 self._files[f] = n[:21] # to match manifestdict's behavior
582 598 self._dirty = True
583 599
584 600 def setflag(self, f, flags):
585 601 """Set the flags (symlink, executable) for path f."""
586 602 assert 'd' not in flags
603 self._load()
587 604 dir, subpath = _splittopdir(f)
588 605 if dir:
589 606 if dir not in self._dirs:
590 607 self._dirs[dir] = treemanifest(self._subpath(dir))
591 608 self._dirs[dir].setflag(subpath, flags)
592 609 else:
593 610 self._flags[f] = flags
594 611 self._dirty = True
595 612
596 613 def copy(self):
597 614 copy = treemanifest(self._dir)
598 615 copy._node = self._node
599 616 copy._dirty = self._dirty
600 for d in self._dirs:
601 copy._dirs[d] = self._dirs[d].copy()
602 copy._files = dict.copy(self._files)
603 copy._flags = dict.copy(self._flags)
617 def _load():
618 self._load()
619 for d in self._dirs:
620 copy._dirs[d] = self._dirs[d].copy()
621 copy._files = dict.copy(self._files)
622 copy._flags = dict.copy(self._flags)
623 copy._load = _noop
624 copy._load = _load
625 if self._load == _noop:
626 # Chaining _load if it's _noop is functionally correct, but the
627 # chain may end up excessively long (stack overflow), and
628 # will prevent garbage collection of 'self'.
629 copy._load()
604 630 return copy
605 631
606 632 def filesnotin(self, m2):
607 633 '''Set of files in this manifest that are not in the other'''
608 634 files = set()
609 635 def _filesnotin(t1, t2):
610 636 if t1._node == t2._node and not t1._dirty and not t2._dirty:
611 637 return
638 t1._load()
639 t2._load()
612 640 for d, m1 in t1._dirs.iteritems():
613 641 if d in t2._dirs:
614 642 m2 = t2._dirs[d]
615 643 _filesnotin(m1, m2)
616 644 else:
617 645 files.update(m1.iterkeys())
618 646
619 647 for fn in t1._files.iterkeys():
620 648 if fn not in t2._files:
621 649 files.add(t1._subpath(fn))
622 650
623 651 _filesnotin(self, m2)
624 652 return files
625 653
626 654 @propertycache
627 655 def _alldirs(self):
628 656 return util.dirs(self)
629 657
630 658 def dirs(self):
631 659 return self._alldirs
632 660
633 661 def hasdir(self, dir):
662 self._load()
634 663 topdir, subdir = _splittopdir(dir)
635 664 if topdir:
636 665 if topdir in self._dirs:
637 666 return self._dirs[topdir].hasdir(subdir)
638 667 return False
639 668 return (dir + '/') in self._dirs
640 669
641 670 def walk(self, match):
642 671 '''Generates matching file names.
643 672
644 673 Equivalent to manifest.matches(match).iterkeys(), but without creating
645 674 an entirely new manifest.
646 675
647 676 It also reports nonexistent files by marking them bad with match.bad().
648 677 '''
649 678 if match.always():
650 679 for f in iter(self):
651 680 yield f
652 681 return
653 682
654 683 fset = set(match.files())
655 684
656 685 for fn in self._walk(match):
657 686 if fn in fset:
658 687 # specified pattern is the exact name
659 688 fset.remove(fn)
660 689 yield fn
661 690
662 691 # for dirstate.walk, files=['.'] means "walk the whole tree".
663 692 # follow that here, too
664 693 fset.discard('.')
665 694
666 695 for fn in sorted(fset):
667 696 if not self.hasdir(fn):
668 697 match.bad(fn, None)
669 698
670 699 def _walk(self, match):
671 700 '''Recursively generates matching file names for walk().'''
672 701 if not match.visitdir(self._dir[:-1] or '.'):
673 702 return
674 703
675 704 # yield this dir's files and walk its submanifests
705 self._load()
676 706 for p in sorted(self._dirs.keys() + self._files.keys()):
677 707 if p in self._files:
678 708 fullp = self._subpath(p)
679 709 if match(fullp):
680 710 yield fullp
681 711 else:
682 712 for f in self._dirs[p]._walk(match):
683 713 yield f
684 714
685 715 def matches(self, match):
686 716 '''generate a new manifest filtered by the match argument'''
687 717 if match.always():
688 718 return self.copy()
689 719
690 720 return self._matches(match)
691 721
692 722 def _matches(self, match):
693 723 '''recursively generate a new manifest filtered by the match argument.
694 724 '''
695 725 ret = treemanifest(self._dir)
696 726
697 727 if not match.visitdir(self._dir[:-1] or '.'):
698 728 return ret
699 729
730 self._load()
700 731 for fn in self._files:
701 732 fullp = self._subpath(fn)
702 733 if not match(fullp):
703 734 continue
704 735 ret._files[fn] = self._files[fn]
705 736 if fn in self._flags:
706 737 ret._flags[fn] = self._flags[fn]
707 738
708 739 for dir, subm in self._dirs.iteritems():
709 740 m = subm._matches(match)
710 741 if not m._isempty():
711 742 ret._dirs[dir] = m
712 743
713 744 if not ret._isempty():
714 745 ret._dirty = True
715 746 return ret
716 747
717 748 def diff(self, m2, clean=False):
718 749 '''Finds changes between the current manifest and m2.
719 750
720 751 Args:
721 752 m2: the manifest to which this manifest should be compared.
722 753 clean: if true, include files unchanged between these manifests
723 754 with a None value in the returned dictionary.
724 755
725 756 The result is returned as a dict with filename as key and
726 757 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
727 758 nodeid in the current/other manifest and fl1/fl2 is the flag
728 759 in the current/other manifest. Where the file does not exist,
729 760 the nodeid will be None and the flags will be the empty
730 761 string.
731 762 '''
732 763 result = {}
733 764 emptytree = treemanifest()
734 765 def _diff(t1, t2):
735 766 if t1._node == t2._node and not t1._dirty and not t2._dirty:
736 767 return
768 t1._load()
769 t2._load()
737 770 for d, m1 in t1._dirs.iteritems():
738 771 m2 = t2._dirs.get(d, emptytree)
739 772 _diff(m1, m2)
740 773
741 774 for d, m2 in t2._dirs.iteritems():
742 775 if d not in t1._dirs:
743 776 _diff(emptytree, m2)
744 777
745 778 for fn, n1 in t1._files.iteritems():
746 779 fl1 = t1._flags.get(fn, '')
747 780 n2 = t2._files.get(fn, None)
748 781 fl2 = t2._flags.get(fn, '')
749 782 if n1 != n2 or fl1 != fl2:
750 783 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
751 784 elif clean:
752 785 result[t1._subpath(fn)] = None
753 786
754 787 for fn, n2 in t2._files.iteritems():
755 788 if fn not in t1._files:
756 789 fl2 = t2._flags.get(fn, '')
757 790 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
758 791
759 792 _diff(self, m2)
760 793 return result
761 794
762 795 def unmodifiedsince(self, m2):
763 796 return not self._dirty and not m2._dirty and self._node == m2._node
764 797
765 798 def parse(self, text, readsubtree):
766 799 for f, n, fl in _parse(text):
767 800 if fl == 'd':
768 801 f = f + '/'
769 802 self._dirs[f] = readsubtree(self._subpath(f), n)
770 803 elif '/' in f:
771 804 # This is a flat manifest, so use __setitem__ and setflag rather
772 805 # than assigning directly to _files and _flags, so we can
773 806 # assign a path in a subdirectory, and to mark dirty (compared
774 807 # to nullid).
775 808 self[f] = n
776 809 if fl:
777 810 self.setflag(f, fl)
778 811 else:
779 812 # Assigning to _files and _flags avoids marking as dirty,
780 813 # and should be a little faster.
781 814 self._files[f] = n
782 815 if fl:
783 816 self._flags[f] = fl
784 817
785 818 def text(self, usemanifestv2=False):
786 819 """Get the full data of this manifest as a bytestring."""
820 self._load()
787 821 flags = self.flags
788 822 return _text(((f, self[f], flags(f)) for f in self.keys()),
789 823 usemanifestv2)
790 824
791 825 def dirtext(self, usemanifestv2=False):
792 826 """Get the full data of this directory as a bytestring. Make sure that
793 827 any submanifests have been written first, so their nodeids are correct.
794 828 """
829 self._load()
795 830 flags = self.flags
796 831 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
797 832 files = [(f, self._files[f], flags(f)) for f in self._files]
798 833 return _text(sorted(dirs + files), usemanifestv2)
799 834
835 def read(self, gettext, readsubtree):
836 def _load():
837 # Mark as loaded already here, so __setitem__ and setflag() don't
838 # cause infinite loops when they try to load.
839 self._load = _noop
840 self.parse(gettext(), readsubtree)
841 self._dirty = False
842 self._load = _load
843
800 844 def writesubtrees(self, m1, m2, writesubtree):
845 self._load() # for consistency; should never have any effect here
801 846 emptytree = treemanifest()
802 847 for d, subm in self._dirs.iteritems():
803 848 subp1 = m1._dirs.get(d, emptytree)._node
804 849 subp2 = m2._dirs.get(d, emptytree)._node
805 850 if subp1 == revlog.nullid:
806 851 subp1, subp2 = subp2, subp1
807 852 writesubtree(subm, subp1, subp2)
808 853
809 854 class manifest(revlog.revlog):
810 855 def __init__(self, opener, dir='', dirlogcache=None):
811 856 '''The 'dir' and 'dirlogcache' arguments are for internal use by
812 857 manifest.manifest only. External users should create a root manifest
813 858 log with manifest.manifest(opener) and call dirlog() on it.
814 859 '''
815 860 # During normal operations, we expect to deal with not more than four
816 861 # revs at a time (such as during commit --amend). When rebasing large
817 862 # stacks of commits, the number can go up, hence the config knob below.
818 863 cachesize = 4
819 864 usetreemanifest = False
820 865 usemanifestv2 = False
821 866 opts = getattr(opener, 'options', None)
822 867 if opts is not None:
823 868 cachesize = opts.get('manifestcachesize', cachesize)
824 869 usetreemanifest = opts.get('treemanifest', usetreemanifest)
825 870 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
826 871 self._mancache = util.lrucachedict(cachesize)
827 872 self._treeinmem = usetreemanifest
828 873 self._treeondisk = usetreemanifest
829 874 self._usemanifestv2 = usemanifestv2
830 875 indexfile = "00manifest.i"
831 876 if dir:
832 877 assert self._treeondisk
833 878 if not dir.endswith('/'):
834 879 dir = dir + '/'
835 880 indexfile = "meta/" + dir + "00manifest.i"
836 881 revlog.revlog.__init__(self, opener, indexfile)
837 882 self._dir = dir
838 883 # The dirlogcache is kept on the root manifest log
839 884 if dir:
840 885 self._dirlogcache = dirlogcache
841 886 else:
842 887 self._dirlogcache = {'': self}
843 888
844 889 def _newmanifest(self, data=''):
845 890 if self._treeinmem:
846 891 return treemanifest(self._dir, data)
847 892 return manifestdict(data)
848 893
849 894 def dirlog(self, dir):
850 895 assert self._treeondisk
851 896 if dir not in self._dirlogcache:
852 897 self._dirlogcache[dir] = manifest(self.opener, dir,
853 898 self._dirlogcache)
854 899 return self._dirlogcache[dir]
855 900
856 901 def _slowreaddelta(self, node):
857 902 r0 = self.deltaparent(self.rev(node))
858 903 m0 = self.read(self.node(r0))
859 904 m1 = self.read(node)
860 905 md = self._newmanifest()
861 906 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
862 907 if n1:
863 908 md[f] = n1
864 909 if fl1:
865 910 md.setflag(f, fl1)
866 911 return md
867 912
868 913 def readdelta(self, node):
869 914 if self._usemanifestv2 or self._treeondisk:
870 915 return self._slowreaddelta(node)
871 916 r = self.rev(node)
872 917 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
873 918 return self._newmanifest(d)
874 919
875 920 def readfast(self, node):
876 921 '''use the faster of readdelta or read
877 922
878 923 This will return a manifest which is either only the files
879 924 added/modified relative to p1, or all files in the
880 925 manifest. Which one is returned depends on the codepath used
881 926 to retrieve the data.
882 927 '''
883 928 r = self.rev(node)
884 929 deltaparent = self.deltaparent(r)
885 930 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
886 931 return self.readdelta(node)
887 932 return self.read(node)
888 933
889 934 def read(self, node):
890 935 if node == revlog.nullid:
891 936 return self._newmanifest() # don't upset local cache
892 937 if node in self._mancache:
893 938 return self._mancache[node][0]
894 text = self.revision(node)
895 939 if self._treeondisk:
940 def gettext():
941 return self.revision(node)
896 942 def readsubtree(dir, subm):
897 943 return self.dirlog(dir).read(subm)
898 944 m = self._newmanifest()
899 m.parse(text, readsubtree)
945 m.read(gettext, readsubtree)
900 946 m.setnode(node)
901 947 arraytext = None
902 948 else:
949 text = self.revision(node)
903 950 m = self._newmanifest(text)
904 951 arraytext = array.array('c', text)
905 952 self._mancache[node] = (m, arraytext)
906 953 return m
907 954
908 955 def find(self, node, f):
909 956 '''look up entry for a single file efficiently.
910 957 return (node, flags) pair if found, (None, None) if not.'''
911 958 m = self.read(node)
912 959 try:
913 960 return m.find(f)
914 961 except KeyError:
915 962 return None, None
916 963
917 964 def add(self, m, transaction, link, p1, p2, added, removed):
918 965 if (p1 in self._mancache and not self._treeinmem
919 966 and not self._usemanifestv2):
920 967 # If our first parent is in the manifest cache, we can
921 968 # compute a delta here using properties we know about the
922 969 # manifest up-front, which may save time later for the
923 970 # revlog layer.
924 971
925 972 _checkforbidden(added)
926 973 # combine the changed lists into one list for sorting
927 974 work = [(x, False) for x in added]
928 975 work.extend((x, True) for x in removed)
929 976 # this could use heapq.merge() (from Python 2.6+) or equivalent
930 977 # since the lists are already sorted
931 978 work.sort()
932 979
933 980 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
934 981 cachedelta = self.rev(p1), deltatext
935 982 text = util.buffer(arraytext)
936 983 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
937 984 else:
938 985 # The first parent manifest isn't already loaded, so we'll
939 986 # just encode a fulltext of the manifest and pass that
940 987 # through to the revlog layer, and let it handle the delta
941 988 # process.
942 989 if self._treeondisk:
943 990 m1 = self.read(p1)
944 991 m2 = self.read(p2)
945 992 n = self._addtree(m, transaction, link, m1, m2)
946 993 arraytext = None
947 994 else:
948 995 text = m.text(self._usemanifestv2)
949 996 n = self.addrevision(text, transaction, link, p1, p2)
950 997 arraytext = array.array('c', text)
951 998
952 999 self._mancache[n] = (m, arraytext)
953 1000
954 1001 return n
955 1002
956 1003 def _addtree(self, m, transaction, link, m1, m2):
957 1004 # If the manifest is unchanged compared to one parent,
958 1005 # don't write a new revision
959 1006 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
960 1007 return m.node()
961 1008 def writesubtree(subm, subp1, subp2):
962 1009 sublog = self.dirlog(subm.dir())
963 1010 sublog.add(subm, transaction, link, subp1, subp2, None, None)
964 1011 m.writesubtrees(m1, m2, writesubtree)
965 1012 text = m.dirtext(self._usemanifestv2)
966 1013 # Double-check whether contents are unchanged to one parent
967 1014 if text == m1.dirtext(self._usemanifestv2):
968 1015 n = m1.node()
969 1016 elif text == m2.dirtext(self._usemanifestv2):
970 1017 n = m2.node()
971 1018 else:
972 1019 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
973 1020 # Save nodeid so parent manifest can calculate its nodeid
974 1021 m.setnode(n)
975 1022 return n
@@ -1,278 +1,282
1 1
2 2 Set up repo
3 3
4 4 $ hg --config experimental.treemanifest=True init repo
5 5 $ cd repo
6 6
7 7 Requirements get set on init
8 8
9 9 $ grep treemanifest .hg/requires
10 10 treemanifest
11 11
12 12 Without directories, looks like any other repo
13 13
14 14 $ echo 0 > a
15 15 $ echo 0 > b
16 16 $ hg ci -Aqm initial
17 17 $ hg debugdata -m 0
18 18 a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
19 19 b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
20 20
21 21 Submanifest is stored in separate revlog
22 22
23 23 $ mkdir dir1
24 24 $ echo 1 > dir1/a
25 25 $ echo 1 > dir1/b
26 26 $ echo 1 > e
27 27 $ hg ci -Aqm 'add dir1'
28 28 $ hg debugdata -m 1
29 29 a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
30 30 b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
31 31 dir1\x008b3ffd73f901e83304c83d33132c8e774ceac44ed (esc)
32 32 e\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
33 33 $ hg debugdata --dir dir1 0
34 34 a\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
35 35 b\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
36 36
37 37 Can add nested directories
38 38
39 39 $ mkdir dir1/dir1
40 40 $ echo 2 > dir1/dir1/a
41 41 $ echo 2 > dir1/dir1/b
42 42 $ mkdir dir1/dir2
43 43 $ echo 2 > dir1/dir2/a
44 44 $ echo 2 > dir1/dir2/b
45 45 $ hg ci -Aqm 'add dir1/dir1'
46 46 $ hg files -r .
47 47 a
48 48 b
49 49 dir1/a (glob)
50 50 dir1/b (glob)
51 51 dir1/dir1/a (glob)
52 52 dir1/dir1/b (glob)
53 53 dir1/dir2/a (glob)
54 54 dir1/dir2/b (glob)
55 55 e
56 56
57 57 Revision is not created for unchanged directory
58 58
59 59 $ mkdir dir2
60 60 $ echo 3 > dir2/a
61 61 $ hg add dir2
62 62 adding dir2/a (glob)
63 63 $ hg debugindex --dir dir1 > before
64 64 $ hg ci -qm 'add dir2'
65 65 $ hg debugindex --dir dir1 > after
66 66 $ diff before after
67 67 $ rm before after
68 68
69 69 Removing directory does not create an revlog entry
70 70
71 71 $ hg rm dir1/dir1
72 72 removing dir1/dir1/a (glob)
73 73 removing dir1/dir1/b (glob)
74 74 $ hg debugindex --dir dir1/dir1 > before
75 75 $ hg ci -qm 'remove dir1/dir1'
76 76 $ hg debugindex --dir dir1/dir1 > after
77 77 $ diff before after
78 78 $ rm before after
79 79
80 80 Check that hg files (calls treemanifest.walk()) works
81 without loading all directory revlogs
81 82
82 83 $ hg co 'desc("add dir2")'
83 84 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
85 $ mv .hg/store/meta/dir2 .hg/store/meta/dir2-backup
84 86 $ hg files -r . dir1
85 87 dir1/a (glob)
86 88 dir1/b (glob)
87 89 dir1/dir1/a (glob)
88 90 dir1/dir1/b (glob)
89 91 dir1/dir2/a (glob)
90 92 dir1/dir2/b (glob)
91 93
92 94 Check that status between revisions works (calls treemanifest.matches())
95 without loading all directory revlogs
93 96
94 97 $ hg status --rev 'desc("add dir1")' --rev . dir1
95 98 A dir1/dir1/a
96 99 A dir1/dir1/b
97 100 A dir1/dir2/a
98 101 A dir1/dir2/b
102 $ mv .hg/store/meta/dir2-backup .hg/store/meta/dir2
99 103
100 104 Merge creates 2-parent revision of directory revlog
101 105
102 106 $ echo 5 > dir1/a
103 107 $ hg ci -Aqm 'modify dir1/a'
104 108 $ hg co '.^'
105 109 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
106 110 $ echo 6 > dir1/b
107 111 $ hg ci -Aqm 'modify dir1/b'
108 112 $ hg merge 'desc("modify dir1/a")'
109 113 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
110 114 (branch merge, don't forget to commit)
111 115 $ hg ci -m 'conflict-free merge involving dir1/'
112 116 $ cat dir1/a
113 117 5
114 118 $ cat dir1/b
115 119 6
116 120 $ hg debugindex --dir dir1
117 121 rev offset length base linkrev nodeid p1 p2
118 122 0 0 54 0 1 8b3ffd73f901 000000000000 000000000000
119 123 1 54 68 0 2 b66d046c644f 8b3ffd73f901 000000000000
120 124 2 122 12 0 4 b87265673c8a b66d046c644f 000000000000
121 125 3 134 95 0 5 aa5d3adcec72 b66d046c644f 000000000000
122 126 4 229 81 0 6 e29b066b91ad b66d046c644f 000000000000
123 127 5 310 107 5 7 a120ce2b83f5 e29b066b91ad aa5d3adcec72
124 128
125 129 Merge keeping directory from parent 1 does not create revlog entry. (Note that
126 130 dir1's manifest does change, but only because dir1/a's filelog changes.)
127 131
128 132 $ hg co 'desc("add dir2")'
129 133 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
130 134 $ echo 8 > dir2/a
131 135 $ hg ci -m 'modify dir2/a'
132 136 created new head
133 137
134 138 $ hg debugindex --dir dir2 > before
135 139 $ hg merge 'desc("modify dir1/a")'
136 140 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
137 141 (branch merge, don't forget to commit)
138 142 $ hg revert -r 'desc("modify dir2/a")' .
139 143 reverting dir1/a (glob)
140 144 $ hg ci -m 'merge, keeping parent 1'
141 145 $ hg debugindex --dir dir2 > after
142 146 $ diff before after
143 147 $ rm before after
144 148
145 149 Merge keeping directory from parent 2 does not create revlog entry. (Note that
146 150 dir2's manifest does change, but only because dir2/a's filelog changes.)
147 151
148 152 $ hg co 'desc("modify dir2/a")'
149 153 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
150 154 $ hg debugindex --dir dir1 > before
151 155 $ hg merge 'desc("modify dir1/a")'
152 156 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
153 157 (branch merge, don't forget to commit)
154 158 $ hg revert -r 'desc("modify dir1/a")' .
155 159 reverting dir2/a (glob)
156 160 $ hg ci -m 'merge, keeping parent 2'
157 161 created new head
158 162 $ hg debugindex --dir dir1 > after
159 163 $ diff before after
160 164 $ rm before after
161 165
162 166 Create flat source repo for tests with mixed flat/tree manifests
163 167
164 168 $ cd ..
165 169 $ hg init repo-flat
166 170 $ cd repo-flat
167 171
168 172 Create a few commits with flat manifest
169 173
170 174 $ echo 0 > a
171 175 $ echo 0 > b
172 176 $ echo 0 > e
173 177 $ for d in dir1 dir1/dir1 dir1/dir2 dir2
174 178 > do
175 179 > mkdir $d
176 180 > echo 0 > $d/a
177 181 > echo 0 > $d/b
178 182 > done
179 183 $ hg ci -Aqm initial
180 184
181 185 $ echo 1 > a
182 186 $ echo 1 > dir1/a
183 187 $ echo 1 > dir1/dir1/a
184 188 $ hg ci -Aqm 'modify on branch 1'
185 189
186 190 $ hg co 0
187 191 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
188 192 $ echo 2 > b
189 193 $ echo 2 > dir1/b
190 194 $ echo 2 > dir1/dir1/b
191 195 $ hg ci -Aqm 'modify on branch 2'
192 196
193 197 $ hg merge 1
194 198 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
195 199 (branch merge, don't forget to commit)
196 200 $ hg ci -m 'merge of flat manifests to new flat manifest'
197 201
198 202 Create clone with tree manifests enabled
199 203
200 204 $ cd ..
201 205 $ hg clone --pull --config experimental.treemanifest=1 repo-flat repo-mixed
202 206 requesting all changes
203 207 adding changesets
204 208 adding manifests
205 209 adding file changes
206 210 added 4 changesets with 17 changes to 11 files
207 211 updating to branch default
208 212 11 files updated, 0 files merged, 0 files removed, 0 files unresolved
209 213 $ cd repo-mixed
210 214 $ test -f .hg/store/meta
211 215 [1]
212 216 $ grep treemanifest .hg/requires
213 217 treemanifest
214 218
215 219 Commit should store revlog per directory
216 220
217 221 $ hg co 1
218 222 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
219 223 $ echo 3 > a
220 224 $ echo 3 > dir1/a
221 225 $ echo 3 > dir1/dir1/a
222 226 $ hg ci -m 'first tree'
223 227 created new head
224 228 $ find .hg/store/meta | sort
225 229 .hg/store/meta
226 230 .hg/store/meta/dir1
227 231 .hg/store/meta/dir1/00manifest.i
228 232 .hg/store/meta/dir1/dir1
229 233 .hg/store/meta/dir1/dir1/00manifest.i
230 234 .hg/store/meta/dir1/dir2
231 235 .hg/store/meta/dir1/dir2/00manifest.i
232 236 .hg/store/meta/dir2
233 237 .hg/store/meta/dir2/00manifest.i
234 238
235 239 Merge of two trees
236 240
237 241 $ hg co 2
238 242 6 files updated, 0 files merged, 0 files removed, 0 files unresolved
239 243 $ hg merge 1
240 244 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
241 245 (branch merge, don't forget to commit)
242 246 $ hg ci -m 'merge of flat manifests to new tree manifest'
243 247 created new head
244 248 $ hg diff -r 3
245 249
246 250 Parent of tree root manifest should be flat manifest, and two for merge
247 251
248 252 $ hg debugindex -m
249 253 rev offset length base linkrev nodeid p1 p2
250 254 0 0 80 0 0 40536115ed9e 000000000000 000000000000
251 255 1 80 83 0 1 f3376063c255 40536115ed9e 000000000000
252 256 2 163 103 0 2 5d9b9da231a2 40536115ed9e 000000000000
253 257 3 266 83 0 3 d17d663cbd8a 5d9b9da231a2 f3376063c255
254 258 4 349 132 4 4 c05a51345f86 f3376063c255 000000000000
255 259 5 481 110 4 5 82594b1f557d 5d9b9da231a2 f3376063c255
256 260
257 261
258 262 Status across flat/tree boundary should work
259 263
260 264 $ hg status --rev '.^' --rev .
261 265 M a
262 266 M dir1/a
263 267 M dir1/dir1/a
264 268
265 269
266 270 Turning off treemanifest config has no effect
267 271
268 272 $ hg debugindex .hg/store/meta/dir1/00manifest.i
269 273 rev offset length base linkrev nodeid p1 p2
270 274 0 0 125 0 4 63c9c0557d24 000000000000 000000000000
271 275 1 125 109 0 5 23d12a1f6e0e 000000000000 000000000000
272 276 $ echo 2 > dir1/a
273 277 $ hg --config experimental.treemanifest=False ci -qm 'modify dir1/a'
274 278 $ hg debugindex .hg/store/meta/dir1/00manifest.i
275 279 rev offset length base linkrev nodeid p1 p2
276 280 0 0 125 0 4 63c9c0557d24 000000000000 000000000000
277 281 1 125 109 0 5 23d12a1f6e0e 000000000000 000000000000
278 282 2 234 55 0 6 3cb2d87b4250 23d12a1f6e0e 000000000000
General Comments 0
You need to be logged in to leave comments. Login now