##// END OF EJS Templates
manifest: move treeinmem onto manifestlog...
Durham Goode -
r29959:483003c2 default
parent child Browse files
Show More
@@ -1,1296 +1,1296
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import heapq
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 mdiff,
19 19 parsers,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 propertycache = util.propertycache
25 25
26 26 def _parsev1(data):
27 27 # This method does a little bit of excessive-looking
28 28 # precondition checking. This is so that the behavior of this
29 29 # class exactly matches its C counterpart to try and help
30 30 # prevent surprise breakage for anyone that develops against
31 31 # the pure version.
32 32 if data and data[-1] != '\n':
33 33 raise ValueError('Manifest did not end in a newline.')
34 34 prev = None
35 35 for l in data.splitlines():
36 36 if prev is not None and prev > l:
37 37 raise ValueError('Manifest lines not in sorted order.')
38 38 prev = l
39 39 f, n = l.split('\0')
40 40 if len(n) > 40:
41 41 yield f, revlog.bin(n[:40]), n[40:]
42 42 else:
43 43 yield f, revlog.bin(n), ''
44 44
45 45 def _parsev2(data):
46 46 metadataend = data.find('\n')
47 47 # Just ignore metadata for now
48 48 pos = metadataend + 1
49 49 prevf = ''
50 50 while pos < len(data):
51 51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 52 if end == -1:
53 53 raise ValueError('Manifest ended with incomplete file entry.')
54 54 stemlen = ord(data[pos])
55 55 items = data[pos + 1:end].split('\0')
56 56 f = prevf[:stemlen] + items[0]
57 57 if prevf > f:
58 58 raise ValueError('Manifest entries not in sorted order.')
59 59 fl = items[1]
60 60 # Just ignore metadata (items[2:] for now)
61 61 n = data[end + 1:end + 21]
62 62 yield f, n, fl
63 63 pos = end + 22
64 64 prevf = f
65 65
66 66 def _parse(data):
67 67 """Generates (path, node, flags) tuples from a manifest text"""
68 68 if data.startswith('\0'):
69 69 return iter(_parsev2(data))
70 70 else:
71 71 return iter(_parsev1(data))
72 72
73 73 def _text(it, usemanifestv2):
74 74 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 75 text"""
76 76 if usemanifestv2:
77 77 return _textv2(it)
78 78 else:
79 79 return _textv1(it)
80 80
81 81 def _textv1(it):
82 82 files = []
83 83 lines = []
84 84 _hex = revlog.hex
85 85 for f, n, fl in it:
86 86 files.append(f)
87 87 # if this is changed to support newlines in filenames,
88 88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 90
91 91 _checkforbidden(files)
92 92 return ''.join(lines)
93 93
94 94 def _textv2(it):
95 95 files = []
96 96 lines = ['\0\n']
97 97 prevf = ''
98 98 for f, n, fl in it:
99 99 files.append(f)
100 100 stem = os.path.commonprefix([prevf, f])
101 101 stemlen = min(len(stem), 255)
102 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 103 prevf = f
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 107 class _lazymanifest(dict):
108 108 """This is the pure implementation of lazymanifest.
109 109
110 110 It has not been optimized *at all* and is not lazy.
111 111 """
112 112
113 113 def __init__(self, data):
114 114 dict.__init__(self)
115 115 for f, n, fl in _parse(data):
116 116 self[f] = n, fl
117 117
118 118 def __setitem__(self, k, v):
119 119 node, flag = v
120 120 assert node is not None
121 121 if len(node) > 21:
122 122 node = node[:21] # match c implementation behavior
123 123 dict.__setitem__(self, k, (node, flag))
124 124
125 125 def __iter__(self):
126 126 return iter(sorted(dict.keys(self)))
127 127
128 128 def iterkeys(self):
129 129 return iter(sorted(dict.keys(self)))
130 130
131 131 def iterentries(self):
132 132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
133 133
134 134 def copy(self):
135 135 c = _lazymanifest('')
136 136 c.update(self)
137 137 return c
138 138
139 139 def diff(self, m2, clean=False):
140 140 '''Finds changes between the current manifest and m2.'''
141 141 diff = {}
142 142
143 143 for fn, e1 in self.iteritems():
144 144 if fn not in m2:
145 145 diff[fn] = e1, (None, '')
146 146 else:
147 147 e2 = m2[fn]
148 148 if e1 != e2:
149 149 diff[fn] = e1, e2
150 150 elif clean:
151 151 diff[fn] = None
152 152
153 153 for fn, e2 in m2.iteritems():
154 154 if fn not in self:
155 155 diff[fn] = (None, ''), e2
156 156
157 157 return diff
158 158
159 159 def filtercopy(self, filterfn):
160 160 c = _lazymanifest('')
161 161 for f, n, fl in self.iterentries():
162 162 if filterfn(f):
163 163 c[f] = n, fl
164 164 return c
165 165
166 166 def text(self):
167 167 """Get the full data of this manifest as a bytestring."""
168 168 return _textv1(self.iterentries())
169 169
170 170 try:
171 171 _lazymanifest = parsers.lazymanifest
172 172 except AttributeError:
173 173 pass
174 174
175 175 class manifestdict(object):
176 176 def __init__(self, data=''):
177 177 if data.startswith('\0'):
178 178 #_lazymanifest can not parse v2
179 179 self._lm = _lazymanifest('')
180 180 for f, n, fl in _parsev2(data):
181 181 self._lm[f] = n, fl
182 182 else:
183 183 self._lm = _lazymanifest(data)
184 184
185 185 def __getitem__(self, key):
186 186 return self._lm[key][0]
187 187
188 188 def find(self, key):
189 189 return self._lm[key]
190 190
191 191 def __len__(self):
192 192 return len(self._lm)
193 193
194 194 def __setitem__(self, key, node):
195 195 self._lm[key] = node, self.flags(key, '')
196 196
197 197 def __contains__(self, key):
198 198 return key in self._lm
199 199
200 200 def __delitem__(self, key):
201 201 del self._lm[key]
202 202
203 203 def __iter__(self):
204 204 return self._lm.__iter__()
205 205
206 206 def iterkeys(self):
207 207 return self._lm.iterkeys()
208 208
209 209 def keys(self):
210 210 return list(self.iterkeys())
211 211
212 212 def filesnotin(self, m2):
213 213 '''Set of files in this manifest that are not in the other'''
214 214 diff = self.diff(m2)
215 215 files = set(filepath
216 216 for filepath, hashflags in diff.iteritems()
217 217 if hashflags[1][0] is None)
218 218 return files
219 219
220 220 @propertycache
221 221 def _dirs(self):
222 222 return util.dirs(self)
223 223
224 224 def dirs(self):
225 225 return self._dirs
226 226
227 227 def hasdir(self, dir):
228 228 return dir in self._dirs
229 229
230 230 def _filesfastpath(self, match):
231 231 '''Checks whether we can correctly and quickly iterate over matcher
232 232 files instead of over manifest files.'''
233 233 files = match.files()
234 234 return (len(files) < 100 and (match.isexact() or
235 235 (match.prefix() and all(fn in self for fn in files))))
236 236
237 237 def walk(self, match):
238 238 '''Generates matching file names.
239 239
240 240 Equivalent to manifest.matches(match).iterkeys(), but without creating
241 241 an entirely new manifest.
242 242
243 243 It also reports nonexistent files by marking them bad with match.bad().
244 244 '''
245 245 if match.always():
246 246 for f in iter(self):
247 247 yield f
248 248 return
249 249
250 250 fset = set(match.files())
251 251
252 252 # avoid the entire walk if we're only looking for specific files
253 253 if self._filesfastpath(match):
254 254 for fn in sorted(fset):
255 255 yield fn
256 256 return
257 257
258 258 for fn in self:
259 259 if fn in fset:
260 260 # specified pattern is the exact name
261 261 fset.remove(fn)
262 262 if match(fn):
263 263 yield fn
264 264
265 265 # for dirstate.walk, files=['.'] means "walk the whole tree".
266 266 # follow that here, too
267 267 fset.discard('.')
268 268
269 269 for fn in sorted(fset):
270 270 if not self.hasdir(fn):
271 271 match.bad(fn, None)
272 272
273 273 def matches(self, match):
274 274 '''generate a new manifest filtered by the match argument'''
275 275 if match.always():
276 276 return self.copy()
277 277
278 278 if self._filesfastpath(match):
279 279 m = manifestdict()
280 280 lm = self._lm
281 281 for fn in match.files():
282 282 if fn in lm:
283 283 m._lm[fn] = lm[fn]
284 284 return m
285 285
286 286 m = manifestdict()
287 287 m._lm = self._lm.filtercopy(match)
288 288 return m
289 289
290 290 def diff(self, m2, clean=False):
291 291 '''Finds changes between the current manifest and m2.
292 292
293 293 Args:
294 294 m2: the manifest to which this manifest should be compared.
295 295 clean: if true, include files unchanged between these manifests
296 296 with a None value in the returned dictionary.
297 297
298 298 The result is returned as a dict with filename as key and
299 299 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
300 300 nodeid in the current/other manifest and fl1/fl2 is the flag
301 301 in the current/other manifest. Where the file does not exist,
302 302 the nodeid will be None and the flags will be the empty
303 303 string.
304 304 '''
305 305 return self._lm.diff(m2._lm, clean)
306 306
307 307 def setflag(self, key, flag):
308 308 self._lm[key] = self[key], flag
309 309
310 310 def get(self, key, default=None):
311 311 try:
312 312 return self._lm[key][0]
313 313 except KeyError:
314 314 return default
315 315
316 316 def flags(self, key, default=''):
317 317 try:
318 318 return self._lm[key][1]
319 319 except KeyError:
320 320 return default
321 321
322 322 def copy(self):
323 323 c = manifestdict()
324 324 c._lm = self._lm.copy()
325 325 return c
326 326
327 327 def iteritems(self):
328 328 return (x[:2] for x in self._lm.iterentries())
329 329
330 330 def iterentries(self):
331 331 return self._lm.iterentries()
332 332
333 333 def text(self, usemanifestv2=False):
334 334 if usemanifestv2:
335 335 return _textv2(self._lm.iterentries())
336 336 else:
337 337 # use (probably) native version for v1
338 338 return self._lm.text()
339 339
340 340 def fastdelta(self, base, changes):
341 341 """Given a base manifest text as an array.array and a list of changes
342 342 relative to that text, compute a delta that can be used by revlog.
343 343 """
344 344 delta = []
345 345 dstart = None
346 346 dend = None
347 347 dline = [""]
348 348 start = 0
349 349 # zero copy representation of base as a buffer
350 350 addbuf = util.buffer(base)
351 351
352 352 changes = list(changes)
353 353 if len(changes) < 1000:
354 354 # start with a readonly loop that finds the offset of
355 355 # each line and creates the deltas
356 356 for f, todelete in changes:
357 357 # bs will either be the index of the item or the insert point
358 358 start, end = _msearch(addbuf, f, start)
359 359 if not todelete:
360 360 h, fl = self._lm[f]
361 361 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
362 362 else:
363 363 if start == end:
364 364 # item we want to delete was not found, error out
365 365 raise AssertionError(
366 366 _("failed to remove %s from manifest") % f)
367 367 l = ""
368 368 if dstart is not None and dstart <= start and dend >= start:
369 369 if dend < end:
370 370 dend = end
371 371 if l:
372 372 dline.append(l)
373 373 else:
374 374 if dstart is not None:
375 375 delta.append([dstart, dend, "".join(dline)])
376 376 dstart = start
377 377 dend = end
378 378 dline = [l]
379 379
380 380 if dstart is not None:
381 381 delta.append([dstart, dend, "".join(dline)])
382 382 # apply the delta to the base, and get a delta for addrevision
383 383 deltatext, arraytext = _addlistdelta(base, delta)
384 384 else:
385 385 # For large changes, it's much cheaper to just build the text and
386 386 # diff it.
387 387 arraytext = array.array('c', self.text())
388 388 deltatext = mdiff.textdiff(base, arraytext)
389 389
390 390 return arraytext, deltatext
391 391
392 392 def _msearch(m, s, lo=0, hi=None):
393 393 '''return a tuple (start, end) that says where to find s within m.
394 394
395 395 If the string is found m[start:end] are the line containing
396 396 that string. If start == end the string was not found and
397 397 they indicate the proper sorted insertion point.
398 398
399 399 m should be a buffer or a string
400 400 s is a string'''
401 401 def advance(i, c):
402 402 while i < lenm and m[i] != c:
403 403 i += 1
404 404 return i
405 405 if not s:
406 406 return (lo, lo)
407 407 lenm = len(m)
408 408 if not hi:
409 409 hi = lenm
410 410 while lo < hi:
411 411 mid = (lo + hi) // 2
412 412 start = mid
413 413 while start > 0 and m[start - 1] != '\n':
414 414 start -= 1
415 415 end = advance(start, '\0')
416 416 if m[start:end] < s:
417 417 # we know that after the null there are 40 bytes of sha1
418 418 # this translates to the bisect lo = mid + 1
419 419 lo = advance(end + 40, '\n') + 1
420 420 else:
421 421 # this translates to the bisect hi = mid
422 422 hi = start
423 423 end = advance(lo, '\0')
424 424 found = m[lo:end]
425 425 if s == found:
426 426 # we know that after the null there are 40 bytes of sha1
427 427 end = advance(end + 40, '\n')
428 428 return (lo, end + 1)
429 429 else:
430 430 return (lo, lo)
431 431
432 432 def _checkforbidden(l):
433 433 """Check filenames for illegal characters."""
434 434 for f in l:
435 435 if '\n' in f or '\r' in f:
436 436 raise error.RevlogError(
437 437 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
438 438
439 439
440 440 # apply the changes collected during the bisect loop to our addlist
441 441 # return a delta suitable for addrevision
442 442 def _addlistdelta(addlist, x):
443 443 # for large addlist arrays, building a new array is cheaper
444 444 # than repeatedly modifying the existing one
445 445 currentposition = 0
446 446 newaddlist = array.array('c')
447 447
448 448 for start, end, content in x:
449 449 newaddlist += addlist[currentposition:start]
450 450 if content:
451 451 newaddlist += array.array('c', content)
452 452
453 453 currentposition = end
454 454
455 455 newaddlist += addlist[currentposition:]
456 456
457 457 deltatext = "".join(struct.pack(">lll", start, end, len(content))
458 458 + content for start, end, content in x)
459 459 return deltatext, newaddlist
460 460
461 461 def _splittopdir(f):
462 462 if '/' in f:
463 463 dir, subpath = f.split('/', 1)
464 464 return dir + '/', subpath
465 465 else:
466 466 return '', f
467 467
468 468 _noop = lambda s: None
469 469
470 470 class treemanifest(object):
471 471 def __init__(self, dir='', text=''):
472 472 self._dir = dir
473 473 self._node = revlog.nullid
474 474 self._loadfunc = _noop
475 475 self._copyfunc = _noop
476 476 self._dirty = False
477 477 self._dirs = {}
478 478 # Using _lazymanifest here is a little slower than plain old dicts
479 479 self._files = {}
480 480 self._flags = {}
481 481 if text:
482 482 def readsubtree(subdir, subm):
483 483 raise AssertionError('treemanifest constructor only accepts '
484 484 'flat manifests')
485 485 self.parse(text, readsubtree)
486 486 self._dirty = True # Mark flat manifest dirty after parsing
487 487
488 488 def _subpath(self, path):
489 489 return self._dir + path
490 490
491 491 def __len__(self):
492 492 self._load()
493 493 size = len(self._files)
494 494 for m in self._dirs.values():
495 495 size += m.__len__()
496 496 return size
497 497
498 498 def _isempty(self):
499 499 self._load() # for consistency; already loaded by all callers
500 500 return (not self._files and (not self._dirs or
501 501 all(m._isempty() for m in self._dirs.values())))
502 502
503 503 def __repr__(self):
504 504 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
505 505 (self._dir, revlog.hex(self._node),
506 506 bool(self._loadfunc is _noop),
507 507 self._dirty, id(self)))
508 508
509 509 def dir(self):
510 510 '''The directory that this tree manifest represents, including a
511 511 trailing '/'. Empty string for the repo root directory.'''
512 512 return self._dir
513 513
514 514 def node(self):
515 515 '''This node of this instance. nullid for unsaved instances. Should
516 516 be updated when the instance is read or written from a revlog.
517 517 '''
518 518 assert not self._dirty
519 519 return self._node
520 520
521 521 def setnode(self, node):
522 522 self._node = node
523 523 self._dirty = False
524 524
525 525 def iterentries(self):
526 526 self._load()
527 527 for p, n in sorted(self._dirs.items() + self._files.items()):
528 528 if p in self._files:
529 529 yield self._subpath(p), n, self._flags.get(p, '')
530 530 else:
531 531 for x in n.iterentries():
532 532 yield x
533 533
534 534 def iteritems(self):
535 535 self._load()
536 536 for p, n in sorted(self._dirs.items() + self._files.items()):
537 537 if p in self._files:
538 538 yield self._subpath(p), n
539 539 else:
540 540 for f, sn in n.iteritems():
541 541 yield f, sn
542 542
543 543 def iterkeys(self):
544 544 self._load()
545 545 for p in sorted(self._dirs.keys() + self._files.keys()):
546 546 if p in self._files:
547 547 yield self._subpath(p)
548 548 else:
549 549 for f in self._dirs[p].iterkeys():
550 550 yield f
551 551
552 552 def keys(self):
553 553 return list(self.iterkeys())
554 554
555 555 def __iter__(self):
556 556 return self.iterkeys()
557 557
558 558 def __contains__(self, f):
559 559 if f is None:
560 560 return False
561 561 self._load()
562 562 dir, subpath = _splittopdir(f)
563 563 if dir:
564 564 if dir not in self._dirs:
565 565 return False
566 566 return self._dirs[dir].__contains__(subpath)
567 567 else:
568 568 return f in self._files
569 569
570 570 def get(self, f, default=None):
571 571 self._load()
572 572 dir, subpath = _splittopdir(f)
573 573 if dir:
574 574 if dir not in self._dirs:
575 575 return default
576 576 return self._dirs[dir].get(subpath, default)
577 577 else:
578 578 return self._files.get(f, default)
579 579
580 580 def __getitem__(self, f):
581 581 self._load()
582 582 dir, subpath = _splittopdir(f)
583 583 if dir:
584 584 return self._dirs[dir].__getitem__(subpath)
585 585 else:
586 586 return self._files[f]
587 587
588 588 def flags(self, f):
589 589 self._load()
590 590 dir, subpath = _splittopdir(f)
591 591 if dir:
592 592 if dir not in self._dirs:
593 593 return ''
594 594 return self._dirs[dir].flags(subpath)
595 595 else:
596 596 if f in self._dirs:
597 597 return ''
598 598 return self._flags.get(f, '')
599 599
600 600 def find(self, f):
601 601 self._load()
602 602 dir, subpath = _splittopdir(f)
603 603 if dir:
604 604 return self._dirs[dir].find(subpath)
605 605 else:
606 606 return self._files[f], self._flags.get(f, '')
607 607
608 608 def __delitem__(self, f):
609 609 self._load()
610 610 dir, subpath = _splittopdir(f)
611 611 if dir:
612 612 self._dirs[dir].__delitem__(subpath)
613 613 # If the directory is now empty, remove it
614 614 if self._dirs[dir]._isempty():
615 615 del self._dirs[dir]
616 616 else:
617 617 del self._files[f]
618 618 if f in self._flags:
619 619 del self._flags[f]
620 620 self._dirty = True
621 621
622 622 def __setitem__(self, f, n):
623 623 assert n is not None
624 624 self._load()
625 625 dir, subpath = _splittopdir(f)
626 626 if dir:
627 627 if dir not in self._dirs:
628 628 self._dirs[dir] = treemanifest(self._subpath(dir))
629 629 self._dirs[dir].__setitem__(subpath, n)
630 630 else:
631 631 self._files[f] = n[:21] # to match manifestdict's behavior
632 632 self._dirty = True
633 633
634 634 def _load(self):
635 635 if self._loadfunc is not _noop:
636 636 lf, self._loadfunc = self._loadfunc, _noop
637 637 lf(self)
638 638 elif self._copyfunc is not _noop:
639 639 cf, self._copyfunc = self._copyfunc, _noop
640 640 cf(self)
641 641
642 642 def setflag(self, f, flags):
643 643 """Set the flags (symlink, executable) for path f."""
644 644 self._load()
645 645 dir, subpath = _splittopdir(f)
646 646 if dir:
647 647 if dir not in self._dirs:
648 648 self._dirs[dir] = treemanifest(self._subpath(dir))
649 649 self._dirs[dir].setflag(subpath, flags)
650 650 else:
651 651 self._flags[f] = flags
652 652 self._dirty = True
653 653
654 654 def copy(self):
655 655 copy = treemanifest(self._dir)
656 656 copy._node = self._node
657 657 copy._dirty = self._dirty
658 658 if self._copyfunc is _noop:
659 659 def _copyfunc(s):
660 660 self._load()
661 661 for d in self._dirs:
662 662 s._dirs[d] = self._dirs[d].copy()
663 663 s._files = dict.copy(self._files)
664 664 s._flags = dict.copy(self._flags)
665 665 if self._loadfunc is _noop:
666 666 _copyfunc(copy)
667 667 else:
668 668 copy._copyfunc = _copyfunc
669 669 else:
670 670 copy._copyfunc = self._copyfunc
671 671 return copy
672 672
673 673 def filesnotin(self, m2):
674 674 '''Set of files in this manifest that are not in the other'''
675 675 files = set()
676 676 def _filesnotin(t1, t2):
677 677 if t1._node == t2._node and not t1._dirty and not t2._dirty:
678 678 return
679 679 t1._load()
680 680 t2._load()
681 681 for d, m1 in t1._dirs.iteritems():
682 682 if d in t2._dirs:
683 683 m2 = t2._dirs[d]
684 684 _filesnotin(m1, m2)
685 685 else:
686 686 files.update(m1.iterkeys())
687 687
688 688 for fn in t1._files.iterkeys():
689 689 if fn not in t2._files:
690 690 files.add(t1._subpath(fn))
691 691
692 692 _filesnotin(self, m2)
693 693 return files
694 694
695 695 @propertycache
696 696 def _alldirs(self):
697 697 return util.dirs(self)
698 698
699 699 def dirs(self):
700 700 return self._alldirs
701 701
702 702 def hasdir(self, dir):
703 703 self._load()
704 704 topdir, subdir = _splittopdir(dir)
705 705 if topdir:
706 706 if topdir in self._dirs:
707 707 return self._dirs[topdir].hasdir(subdir)
708 708 return False
709 709 return (dir + '/') in self._dirs
710 710
711 711 def walk(self, match):
712 712 '''Generates matching file names.
713 713
714 714 Equivalent to manifest.matches(match).iterkeys(), but without creating
715 715 an entirely new manifest.
716 716
717 717 It also reports nonexistent files by marking them bad with match.bad().
718 718 '''
719 719 if match.always():
720 720 for f in iter(self):
721 721 yield f
722 722 return
723 723
724 724 fset = set(match.files())
725 725
726 726 for fn in self._walk(match):
727 727 if fn in fset:
728 728 # specified pattern is the exact name
729 729 fset.remove(fn)
730 730 yield fn
731 731
732 732 # for dirstate.walk, files=['.'] means "walk the whole tree".
733 733 # follow that here, too
734 734 fset.discard('.')
735 735
736 736 for fn in sorted(fset):
737 737 if not self.hasdir(fn):
738 738 match.bad(fn, None)
739 739
740 740 def _walk(self, match):
741 741 '''Recursively generates matching file names for walk().'''
742 742 if not match.visitdir(self._dir[:-1] or '.'):
743 743 return
744 744
745 745 # yield this dir's files and walk its submanifests
746 746 self._load()
747 747 for p in sorted(self._dirs.keys() + self._files.keys()):
748 748 if p in self._files:
749 749 fullp = self._subpath(p)
750 750 if match(fullp):
751 751 yield fullp
752 752 else:
753 753 for f in self._dirs[p]._walk(match):
754 754 yield f
755 755
756 756 def matches(self, match):
757 757 '''generate a new manifest filtered by the match argument'''
758 758 if match.always():
759 759 return self.copy()
760 760
761 761 return self._matches(match)
762 762
763 763 def _matches(self, match):
764 764 '''recursively generate a new manifest filtered by the match argument.
765 765 '''
766 766
767 767 visit = match.visitdir(self._dir[:-1] or '.')
768 768 if visit == 'all':
769 769 return self.copy()
770 770 ret = treemanifest(self._dir)
771 771 if not visit:
772 772 return ret
773 773
774 774 self._load()
775 775 for fn in self._files:
776 776 fullp = self._subpath(fn)
777 777 if not match(fullp):
778 778 continue
779 779 ret._files[fn] = self._files[fn]
780 780 if fn in self._flags:
781 781 ret._flags[fn] = self._flags[fn]
782 782
783 783 for dir, subm in self._dirs.iteritems():
784 784 m = subm._matches(match)
785 785 if not m._isempty():
786 786 ret._dirs[dir] = m
787 787
788 788 if not ret._isempty():
789 789 ret._dirty = True
790 790 return ret
791 791
792 792 def diff(self, m2, clean=False):
793 793 '''Finds changes between the current manifest and m2.
794 794
795 795 Args:
796 796 m2: the manifest to which this manifest should be compared.
797 797 clean: if true, include files unchanged between these manifests
798 798 with a None value in the returned dictionary.
799 799
800 800 The result is returned as a dict with filename as key and
801 801 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
802 802 nodeid in the current/other manifest and fl1/fl2 is the flag
803 803 in the current/other manifest. Where the file does not exist,
804 804 the nodeid will be None and the flags will be the empty
805 805 string.
806 806 '''
807 807 result = {}
808 808 emptytree = treemanifest()
809 809 def _diff(t1, t2):
810 810 if t1._node == t2._node and not t1._dirty and not t2._dirty:
811 811 return
812 812 t1._load()
813 813 t2._load()
814 814 for d, m1 in t1._dirs.iteritems():
815 815 m2 = t2._dirs.get(d, emptytree)
816 816 _diff(m1, m2)
817 817
818 818 for d, m2 in t2._dirs.iteritems():
819 819 if d not in t1._dirs:
820 820 _diff(emptytree, m2)
821 821
822 822 for fn, n1 in t1._files.iteritems():
823 823 fl1 = t1._flags.get(fn, '')
824 824 n2 = t2._files.get(fn, None)
825 825 fl2 = t2._flags.get(fn, '')
826 826 if n1 != n2 or fl1 != fl2:
827 827 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
828 828 elif clean:
829 829 result[t1._subpath(fn)] = None
830 830
831 831 for fn, n2 in t2._files.iteritems():
832 832 if fn not in t1._files:
833 833 fl2 = t2._flags.get(fn, '')
834 834 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
835 835
836 836 _diff(self, m2)
837 837 return result
838 838
839 839 def unmodifiedsince(self, m2):
840 840 return not self._dirty and not m2._dirty and self._node == m2._node
841 841
842 842 def parse(self, text, readsubtree):
843 843 for f, n, fl in _parse(text):
844 844 if fl == 't':
845 845 f = f + '/'
846 846 self._dirs[f] = readsubtree(self._subpath(f), n)
847 847 elif '/' in f:
848 848 # This is a flat manifest, so use __setitem__ and setflag rather
849 849 # than assigning directly to _files and _flags, so we can
850 850 # assign a path in a subdirectory, and to mark dirty (compared
851 851 # to nullid).
852 852 self[f] = n
853 853 if fl:
854 854 self.setflag(f, fl)
855 855 else:
856 856 # Assigning to _files and _flags avoids marking as dirty,
857 857 # and should be a little faster.
858 858 self._files[f] = n
859 859 if fl:
860 860 self._flags[f] = fl
861 861
862 862 def text(self, usemanifestv2=False):
863 863 """Get the full data of this manifest as a bytestring."""
864 864 self._load()
865 865 return _text(self.iterentries(), usemanifestv2)
866 866
867 867 def dirtext(self, usemanifestv2=False):
868 868 """Get the full data of this directory as a bytestring. Make sure that
869 869 any submanifests have been written first, so their nodeids are correct.
870 870 """
871 871 self._load()
872 872 flags = self.flags
873 873 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
874 874 files = [(f, self._files[f], flags(f)) for f in self._files]
875 875 return _text(sorted(dirs + files), usemanifestv2)
876 876
877 877 def read(self, gettext, readsubtree):
878 878 def _load_for_read(s):
879 879 s.parse(gettext(), readsubtree)
880 880 s._dirty = False
881 881 self._loadfunc = _load_for_read
882 882
883 883 def writesubtrees(self, m1, m2, writesubtree):
884 884 self._load() # for consistency; should never have any effect here
885 885 m1._load()
886 886 m2._load()
887 887 emptytree = treemanifest()
888 888 for d, subm in self._dirs.iteritems():
889 889 subp1 = m1._dirs.get(d, emptytree)._node
890 890 subp2 = m2._dirs.get(d, emptytree)._node
891 891 if subp1 == revlog.nullid:
892 892 subp1, subp2 = subp2, subp1
893 893 writesubtree(subm, subp1, subp2)
894 894
895 895 class manifestrevlog(revlog.revlog):
896 896 '''A revlog that stores manifest texts. This is responsible for caching the
897 897 full-text manifest contents.
898 898 '''
899 899 def __init__(self, opener, dir='', dirlogcache=None):
900 900 # During normal operations, we expect to deal with not more than four
901 901 # revs at a time (such as during commit --amend). When rebasing large
902 902 # stacks of commits, the number can go up, hence the config knob below.
903 903 cachesize = 4
904 904 usetreemanifest = False
905 905 usemanifestv2 = False
906 906 opts = getattr(opener, 'options', None)
907 907 if opts is not None:
908 908 cachesize = opts.get('manifestcachesize', cachesize)
909 909 usetreemanifest = opts.get('treemanifest', usetreemanifest)
910 910 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
911 911
912 912 self._treeondisk = usetreemanifest
913 913 self._usemanifestv2 = usemanifestv2
914 914
915 915 self._fulltextcache = util.lrucachedict(cachesize)
916 916
917 917 indexfile = "00manifest.i"
918 918 if dir:
919 919 assert self._treeondisk, 'opts is %r' % opts
920 920 if not dir.endswith('/'):
921 921 dir = dir + '/'
922 922 indexfile = "meta/" + dir + "00manifest.i"
923 923 self._dir = dir
924 924 # The dirlogcache is kept on the root manifest log
925 925 if dir:
926 926 self._dirlogcache = dirlogcache
927 927 else:
928 928 self._dirlogcache = {'': self}
929 929
930 930 super(manifestrevlog, self).__init__(opener, indexfile)
931 931
932 932 @property
933 933 def fulltextcache(self):
934 934 return self._fulltextcache
935 935
936 936 def clearcaches(self):
937 937 super(manifestrevlog, self).clearcaches()
938 938 self._fulltextcache.clear()
939 939 self._dirlogcache = {'': self}
940 940
941 941 def dirlog(self, dir):
942 942 if dir:
943 943 assert self._treeondisk
944 944 if dir not in self._dirlogcache:
945 945 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
946 946 self._dirlogcache)
947 947 return self._dirlogcache[dir]
948 948
949 949 class manifestlog(object):
950 950 """A collection class representing the collection of manifest snapshots
951 951 referenced by commits in the repository.
952 952
953 953 In this situation, 'manifest' refers to the abstract concept of a snapshot
954 954 of the list of files in the given commit. Consumers of the output of this
955 955 class do not care about the implementation details of the actual manifests
956 956 they receive (i.e. tree or flat or lazily loaded, etc)."""
957 957 def __init__(self, opener, repo):
958 958 self._repo = repo
959 959
960 usetreemanifest = False
961
962 opts = getattr(opener, 'options', None)
963 if opts is not None:
964 usetreemanifest = opts.get('treemanifest', usetreemanifest)
965 self._treeinmem = usetreemanifest
966
960 967 # We'll separate this into it's own cache once oldmanifest is no longer
961 968 # used
962 969 self._mancache = repo.manifest._mancache
963 970
964 971 @property
965 972 def _revlog(self):
966 973 return self._repo.manifest
967 974
968 @property
969 def _oldmanifest(self):
970 # _revlog is the same as _oldmanifest right now, but we eventually want
971 # to delete _oldmanifest while still allowing manifestlog to access the
972 # revlog specific apis.
973 return self._repo.manifest
974
975 975 def __getitem__(self, node):
976 976 """Retrieves the manifest instance for the given node. Throws a KeyError
977 977 if not found.
978 978 """
979 979 if node in self._mancache:
980 980 cachemf = self._mancache[node]
981 981 # The old manifest may put non-ctx manifests in the cache, so skip
982 982 # those since they don't implement the full api.
983 983 if (isinstance(cachemf, manifestctx) or
984 984 isinstance(cachemf, treemanifestctx)):
985 985 return cachemf
986 986
987 if self._oldmanifest._treeinmem:
987 if self._treeinmem:
988 988 m = treemanifestctx(self._revlog, '', node)
989 989 else:
990 990 m = manifestctx(self._revlog, node)
991 991 if node != revlog.nullid:
992 992 self._mancache[node] = m
993 993 return m
994 994
995 995 class manifestctx(object):
996 996 """A class representing a single revision of a manifest, including its
997 997 contents, its parent revs, and its linkrev.
998 998 """
999 999 def __init__(self, revlog, node):
1000 1000 self._revlog = revlog
1001 1001 self._data = None
1002 1002
1003 1003 self._node = node
1004 1004
1005 1005 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1006 1006 # but let's add it later when something needs it and we can load it
1007 1007 # lazily.
1008 1008 #self.p1, self.p2 = revlog.parents(node)
1009 1009 #rev = revlog.rev(node)
1010 1010 #self.linkrev = revlog.linkrev(rev)
1011 1011
1012 1012 def node(self):
1013 1013 return self._node
1014 1014
1015 1015 def read(self):
1016 1016 if not self._data:
1017 1017 if self._node == revlog.nullid:
1018 1018 self._data = manifestdict()
1019 1019 else:
1020 1020 text = self._revlog.revision(self._node)
1021 1021 arraytext = array.array('c', text)
1022 1022 self._revlog._fulltextcache[self._node] = arraytext
1023 1023 self._data = manifestdict(text)
1024 1024 return self._data
1025 1025
1026 1026 def readfast(self):
1027 1027 rl = self._revlog
1028 1028 r = rl.rev(self._node)
1029 1029 deltaparent = rl.deltaparent(r)
1030 1030 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1031 1031 return self.readdelta()
1032 1032 return self.read()
1033 1033
1034 1034 def readdelta(self):
1035 1035 revlog = self._revlog
1036 1036 if revlog._usemanifestv2:
1037 1037 # Need to perform a slow delta
1038 1038 r0 = revlog.deltaparent(revlog.rev(self._node))
1039 1039 m0 = manifestctx(revlog, revlog.node(r0)).read()
1040 1040 m1 = self.read()
1041 1041 md = manifestdict()
1042 1042 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1043 1043 if n1:
1044 1044 md[f] = n1
1045 1045 if fl1:
1046 1046 md.setflag(f, fl1)
1047 1047 return md
1048 1048
1049 1049 r = revlog.rev(self._node)
1050 1050 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1051 1051 return manifestdict(d)
1052 1052
1053 1053 class treemanifestctx(object):
1054 1054 def __init__(self, revlog, dir, node):
1055 1055 revlog = revlog.dirlog(dir)
1056 1056 self._revlog = revlog
1057 1057 self._dir = dir
1058 1058 self._data = None
1059 1059
1060 1060 self._node = node
1061 1061
1062 1062 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1063 1063 # we can instantiate treemanifestctx objects for directories we don't
1064 1064 # have on disk.
1065 1065 #self.p1, self.p2 = revlog.parents(node)
1066 1066 #rev = revlog.rev(node)
1067 1067 #self.linkrev = revlog.linkrev(rev)
1068 1068
1069 1069 def read(self):
1070 1070 if not self._data:
1071 1071 if self._node == revlog.nullid:
1072 1072 self._data = treemanifest()
1073 1073 elif self._revlog._treeondisk:
1074 1074 m = treemanifest(dir=self._dir)
1075 1075 def gettext():
1076 1076 return self._revlog.revision(self._node)
1077 1077 def readsubtree(dir, subm):
1078 1078 return treemanifestctx(self._revlog, dir, subm).read()
1079 1079 m.read(gettext, readsubtree)
1080 1080 m.setnode(self._node)
1081 1081 self._data = m
1082 1082 else:
1083 1083 text = self._revlog.revision(self._node)
1084 1084 arraytext = array.array('c', text)
1085 1085 self._revlog.fulltextcache[self._node] = arraytext
1086 1086 self._data = treemanifest(dir=self._dir, text=text)
1087 1087
1088 1088 return self._data
1089 1089
1090 1090 def node(self):
1091 1091 return self._node
1092 1092
1093 1093 def readdelta(self):
1094 1094 # Need to perform a slow delta
1095 1095 revlog = self._revlog
1096 1096 r0 = revlog.deltaparent(revlog.rev(self._node))
1097 1097 m0 = treemanifestctx(revlog, revlog.node(r0), dir=self._dir).read()
1098 1098 m1 = self.read()
1099 1099 md = treemanifest(dir=self._dir)
1100 1100 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1101 1101 if n1:
1102 1102 md[f] = n1
1103 1103 if fl1:
1104 1104 md.setflag(f, fl1)
1105 1105 return md
1106 1106
1107 1107 def readfast(self):
1108 1108 rl = self._revlog
1109 1109 r = rl.rev(self._node)
1110 1110 deltaparent = rl.deltaparent(r)
1111 1111 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1112 1112 return self.readdelta()
1113 1113 return self.read()
1114 1114
1115 1115 class manifest(manifestrevlog):
1116 1116 def __init__(self, opener, dir='', dirlogcache=None):
1117 1117 '''The 'dir' and 'dirlogcache' arguments are for internal use by
1118 1118 manifest.manifest only. External users should create a root manifest
1119 1119 log with manifest.manifest(opener) and call dirlog() on it.
1120 1120 '''
1121 1121 # During normal operations, we expect to deal with not more than four
1122 1122 # revs at a time (such as during commit --amend). When rebasing large
1123 1123 # stacks of commits, the number can go up, hence the config knob below.
1124 1124 cachesize = 4
1125 1125 usetreemanifest = False
1126 1126 opts = getattr(opener, 'options', None)
1127 1127 if opts is not None:
1128 1128 cachesize = opts.get('manifestcachesize', cachesize)
1129 1129 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1130 1130 self._mancache = util.lrucachedict(cachesize)
1131 1131 self._treeinmem = usetreemanifest
1132 1132 super(manifest, self).__init__(opener, dir=dir, dirlogcache=dirlogcache)
1133 1133
1134 1134 def _newmanifest(self, data=''):
1135 1135 if self._treeinmem:
1136 1136 return treemanifest(self._dir, data)
1137 1137 return manifestdict(data)
1138 1138
1139 1139 def dirlog(self, dir):
1140 1140 """This overrides the base revlog implementation to allow construction
1141 1141 'manifest' types instead of manifestrevlog types. This is only needed
1142 1142 until we migrate off the 'manifest' type."""
1143 1143 if dir:
1144 1144 assert self._treeondisk
1145 1145 if dir not in self._dirlogcache:
1146 1146 self._dirlogcache[dir] = manifest(self.opener, dir,
1147 1147 self._dirlogcache)
1148 1148 return self._dirlogcache[dir]
1149 1149
1150 1150 def _slowreaddelta(self, node):
1151 1151 r0 = self.deltaparent(self.rev(node))
1152 1152 m0 = self.read(self.node(r0))
1153 1153 m1 = self.read(node)
1154 1154 md = self._newmanifest()
1155 1155 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1156 1156 if n1:
1157 1157 md[f] = n1
1158 1158 if fl1:
1159 1159 md.setflag(f, fl1)
1160 1160 return md
1161 1161
1162 1162 def readdelta(self, node):
1163 1163 if self._usemanifestv2 or self._treeondisk:
1164 1164 return self._slowreaddelta(node)
1165 1165 r = self.rev(node)
1166 1166 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1167 1167 return self._newmanifest(d)
1168 1168
1169 1169 def readshallowdelta(self, node):
1170 1170 '''For flat manifests, this is the same as readdelta(). For
1171 1171 treemanifests, this will read the delta for this revlog's directory,
1172 1172 without recursively reading subdirectory manifests. Instead, any
1173 1173 subdirectory entry will be reported as it appears in the manifests, i.e.
1174 1174 the subdirectory will be reported among files and distinguished only by
1175 1175 its 't' flag.'''
1176 1176 if not self._treeondisk:
1177 1177 return self.readdelta(node)
1178 1178 if self._usemanifestv2:
1179 1179 raise error.Abort(
1180 1180 _("readshallowdelta() not implemented for manifestv2"))
1181 1181 r = self.rev(node)
1182 1182 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1183 1183 return manifestdict(d)
1184 1184
1185 1185 def readshallowfast(self, node):
1186 1186 '''like readfast(), but calls readshallowdelta() instead of readdelta()
1187 1187 '''
1188 1188 r = self.rev(node)
1189 1189 deltaparent = self.deltaparent(r)
1190 1190 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1191 1191 return self.readshallowdelta(node)
1192 1192 return self.readshallow(node)
1193 1193
1194 1194 def read(self, node):
1195 1195 if node == revlog.nullid:
1196 1196 return self._newmanifest() # don't upset local cache
1197 1197 if node in self._mancache:
1198 1198 cached = self._mancache[node]
1199 1199 if (isinstance(cached, manifestctx) or
1200 1200 isinstance(cached, treemanifestctx)):
1201 1201 cached = cached.read()
1202 1202 return cached
1203 1203 if self._treeondisk:
1204 1204 def gettext():
1205 1205 return self.revision(node)
1206 1206 def readsubtree(dir, subm):
1207 1207 return self.dirlog(dir).read(subm)
1208 1208 m = self._newmanifest()
1209 1209 m.read(gettext, readsubtree)
1210 1210 m.setnode(node)
1211 1211 arraytext = None
1212 1212 else:
1213 1213 text = self.revision(node)
1214 1214 m = self._newmanifest(text)
1215 1215 arraytext = array.array('c', text)
1216 1216 self._mancache[node] = m
1217 1217 self.fulltextcache[node] = arraytext
1218 1218 return m
1219 1219
1220 1220 def readshallow(self, node):
1221 1221 '''Reads the manifest in this directory. When using flat manifests,
1222 1222 this manifest will generally have files in subdirectories in it. Does
1223 1223 not cache the manifest as the callers generally do not read the same
1224 1224 version twice.'''
1225 1225 return manifestdict(self.revision(node))
1226 1226
1227 1227 def find(self, node, f):
1228 1228 '''look up entry for a single file efficiently.
1229 1229 return (node, flags) pair if found, (None, None) if not.'''
1230 1230 m = self.read(node)
1231 1231 try:
1232 1232 return m.find(f)
1233 1233 except KeyError:
1234 1234 return None, None
1235 1235
1236 1236 def add(self, m, transaction, link, p1, p2, added, removed):
1237 1237 if (p1 in self.fulltextcache and not self._treeinmem
1238 1238 and not self._usemanifestv2):
1239 1239 # If our first parent is in the manifest cache, we can
1240 1240 # compute a delta here using properties we know about the
1241 1241 # manifest up-front, which may save time later for the
1242 1242 # revlog layer.
1243 1243
1244 1244 _checkforbidden(added)
1245 1245 # combine the changed lists into one sorted iterator
1246 1246 work = heapq.merge([(x, False) for x in added],
1247 1247 [(x, True) for x in removed])
1248 1248
1249 1249 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1250 1250 cachedelta = self.rev(p1), deltatext
1251 1251 text = util.buffer(arraytext)
1252 1252 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1253 1253 else:
1254 1254 # The first parent manifest isn't already loaded, so we'll
1255 1255 # just encode a fulltext of the manifest and pass that
1256 1256 # through to the revlog layer, and let it handle the delta
1257 1257 # process.
1258 1258 if self._treeondisk:
1259 1259 m1 = self.read(p1)
1260 1260 m2 = self.read(p2)
1261 1261 n = self._addtree(m, transaction, link, m1, m2)
1262 1262 arraytext = None
1263 1263 else:
1264 1264 text = m.text(self._usemanifestv2)
1265 1265 n = self.addrevision(text, transaction, link, p1, p2)
1266 1266 arraytext = array.array('c', text)
1267 1267
1268 1268 self._mancache[n] = m
1269 1269 self.fulltextcache[n] = arraytext
1270 1270
1271 1271 return n
1272 1272
1273 1273 def _addtree(self, m, transaction, link, m1, m2):
1274 1274 # If the manifest is unchanged compared to one parent,
1275 1275 # don't write a new revision
1276 1276 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
1277 1277 return m.node()
1278 1278 def writesubtree(subm, subp1, subp2):
1279 1279 sublog = self.dirlog(subm.dir())
1280 1280 sublog.add(subm, transaction, link, subp1, subp2, None, None)
1281 1281 m.writesubtrees(m1, m2, writesubtree)
1282 1282 text = m.dirtext(self._usemanifestv2)
1283 1283 # Double-check whether contents are unchanged to one parent
1284 1284 if text == m1.dirtext(self._usemanifestv2):
1285 1285 n = m1.node()
1286 1286 elif text == m2.dirtext(self._usemanifestv2):
1287 1287 n = m2.node()
1288 1288 else:
1289 1289 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1290 1290 # Save nodeid so parent manifest can calculate its nodeid
1291 1291 m.setnode(n)
1292 1292 return n
1293 1293
1294 1294 def clearcaches(self):
1295 1295 super(manifest, self).clearcaches()
1296 1296 self._mancache.clear()
General Comments 0
You need to be logged in to leave comments. Login now