##// END OF EJS Templates
manifest: add treemanifestctx class...
Durham Goode -
r29907:4fb4fc33 default
parent child Browse files
Show More
@@ -1,1194 +1,1228 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import heapq
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 mdiff,
19 19 parsers,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 propertycache = util.propertycache
25 25
26 26 def _parsev1(data):
27 27 # This method does a little bit of excessive-looking
28 28 # precondition checking. This is so that the behavior of this
29 29 # class exactly matches its C counterpart to try and help
30 30 # prevent surprise breakage for anyone that develops against
31 31 # the pure version.
32 32 if data and data[-1] != '\n':
33 33 raise ValueError('Manifest did not end in a newline.')
34 34 prev = None
35 35 for l in data.splitlines():
36 36 if prev is not None and prev > l:
37 37 raise ValueError('Manifest lines not in sorted order.')
38 38 prev = l
39 39 f, n = l.split('\0')
40 40 if len(n) > 40:
41 41 yield f, revlog.bin(n[:40]), n[40:]
42 42 else:
43 43 yield f, revlog.bin(n), ''
44 44
45 45 def _parsev2(data):
46 46 metadataend = data.find('\n')
47 47 # Just ignore metadata for now
48 48 pos = metadataend + 1
49 49 prevf = ''
50 50 while pos < len(data):
51 51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 52 if end == -1:
53 53 raise ValueError('Manifest ended with incomplete file entry.')
54 54 stemlen = ord(data[pos])
55 55 items = data[pos + 1:end].split('\0')
56 56 f = prevf[:stemlen] + items[0]
57 57 if prevf > f:
58 58 raise ValueError('Manifest entries not in sorted order.')
59 59 fl = items[1]
60 60 # Just ignore metadata (items[2:] for now)
61 61 n = data[end + 1:end + 21]
62 62 yield f, n, fl
63 63 pos = end + 22
64 64 prevf = f
65 65
66 66 def _parse(data):
67 67 """Generates (path, node, flags) tuples from a manifest text"""
68 68 if data.startswith('\0'):
69 69 return iter(_parsev2(data))
70 70 else:
71 71 return iter(_parsev1(data))
72 72
73 73 def _text(it, usemanifestv2):
74 74 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 75 text"""
76 76 if usemanifestv2:
77 77 return _textv2(it)
78 78 else:
79 79 return _textv1(it)
80 80
81 81 def _textv1(it):
82 82 files = []
83 83 lines = []
84 84 _hex = revlog.hex
85 85 for f, n, fl in it:
86 86 files.append(f)
87 87 # if this is changed to support newlines in filenames,
88 88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 90
91 91 _checkforbidden(files)
92 92 return ''.join(lines)
93 93
94 94 def _textv2(it):
95 95 files = []
96 96 lines = ['\0\n']
97 97 prevf = ''
98 98 for f, n, fl in it:
99 99 files.append(f)
100 100 stem = os.path.commonprefix([prevf, f])
101 101 stemlen = min(len(stem), 255)
102 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 103 prevf = f
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 107 class _lazymanifest(dict):
108 108 """This is the pure implementation of lazymanifest.
109 109
110 110 It has not been optimized *at all* and is not lazy.
111 111 """
112 112
113 113 def __init__(self, data):
114 114 dict.__init__(self)
115 115 for f, n, fl in _parse(data):
116 116 self[f] = n, fl
117 117
118 118 def __setitem__(self, k, v):
119 119 node, flag = v
120 120 assert node is not None
121 121 if len(node) > 21:
122 122 node = node[:21] # match c implementation behavior
123 123 dict.__setitem__(self, k, (node, flag))
124 124
125 125 def __iter__(self):
126 126 return iter(sorted(dict.keys(self)))
127 127
128 128 def iterkeys(self):
129 129 return iter(sorted(dict.keys(self)))
130 130
131 131 def iterentries(self):
132 132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
133 133
134 134 def copy(self):
135 135 c = _lazymanifest('')
136 136 c.update(self)
137 137 return c
138 138
139 139 def diff(self, m2, clean=False):
140 140 '''Finds changes between the current manifest and m2.'''
141 141 diff = {}
142 142
143 143 for fn, e1 in self.iteritems():
144 144 if fn not in m2:
145 145 diff[fn] = e1, (None, '')
146 146 else:
147 147 e2 = m2[fn]
148 148 if e1 != e2:
149 149 diff[fn] = e1, e2
150 150 elif clean:
151 151 diff[fn] = None
152 152
153 153 for fn, e2 in m2.iteritems():
154 154 if fn not in self:
155 155 diff[fn] = (None, ''), e2
156 156
157 157 return diff
158 158
159 159 def filtercopy(self, filterfn):
160 160 c = _lazymanifest('')
161 161 for f, n, fl in self.iterentries():
162 162 if filterfn(f):
163 163 c[f] = n, fl
164 164 return c
165 165
166 166 def text(self):
167 167 """Get the full data of this manifest as a bytestring."""
168 168 return _textv1(self.iterentries())
169 169
170 170 try:
171 171 _lazymanifest = parsers.lazymanifest
172 172 except AttributeError:
173 173 pass
174 174
175 175 class manifestdict(object):
176 176 def __init__(self, data=''):
177 177 if data.startswith('\0'):
178 178 #_lazymanifest can not parse v2
179 179 self._lm = _lazymanifest('')
180 180 for f, n, fl in _parsev2(data):
181 181 self._lm[f] = n, fl
182 182 else:
183 183 self._lm = _lazymanifest(data)
184 184
185 185 def __getitem__(self, key):
186 186 return self._lm[key][0]
187 187
188 188 def find(self, key):
189 189 return self._lm[key]
190 190
191 191 def __len__(self):
192 192 return len(self._lm)
193 193
194 194 def __setitem__(self, key, node):
195 195 self._lm[key] = node, self.flags(key, '')
196 196
197 197 def __contains__(self, key):
198 198 return key in self._lm
199 199
200 200 def __delitem__(self, key):
201 201 del self._lm[key]
202 202
203 203 def __iter__(self):
204 204 return self._lm.__iter__()
205 205
206 206 def iterkeys(self):
207 207 return self._lm.iterkeys()
208 208
209 209 def keys(self):
210 210 return list(self.iterkeys())
211 211
212 212 def filesnotin(self, m2):
213 213 '''Set of files in this manifest that are not in the other'''
214 214 diff = self.diff(m2)
215 215 files = set(filepath
216 216 for filepath, hashflags in diff.iteritems()
217 217 if hashflags[1][0] is None)
218 218 return files
219 219
220 220 @propertycache
221 221 def _dirs(self):
222 222 return util.dirs(self)
223 223
224 224 def dirs(self):
225 225 return self._dirs
226 226
227 227 def hasdir(self, dir):
228 228 return dir in self._dirs
229 229
230 230 def _filesfastpath(self, match):
231 231 '''Checks whether we can correctly and quickly iterate over matcher
232 232 files instead of over manifest files.'''
233 233 files = match.files()
234 234 return (len(files) < 100 and (match.isexact() or
235 235 (match.prefix() and all(fn in self for fn in files))))
236 236
237 237 def walk(self, match):
238 238 '''Generates matching file names.
239 239
240 240 Equivalent to manifest.matches(match).iterkeys(), but without creating
241 241 an entirely new manifest.
242 242
243 243 It also reports nonexistent files by marking them bad with match.bad().
244 244 '''
245 245 if match.always():
246 246 for f in iter(self):
247 247 yield f
248 248 return
249 249
250 250 fset = set(match.files())
251 251
252 252 # avoid the entire walk if we're only looking for specific files
253 253 if self._filesfastpath(match):
254 254 for fn in sorted(fset):
255 255 yield fn
256 256 return
257 257
258 258 for fn in self:
259 259 if fn in fset:
260 260 # specified pattern is the exact name
261 261 fset.remove(fn)
262 262 if match(fn):
263 263 yield fn
264 264
265 265 # for dirstate.walk, files=['.'] means "walk the whole tree".
266 266 # follow that here, too
267 267 fset.discard('.')
268 268
269 269 for fn in sorted(fset):
270 270 if not self.hasdir(fn):
271 271 match.bad(fn, None)
272 272
273 273 def matches(self, match):
274 274 '''generate a new manifest filtered by the match argument'''
275 275 if match.always():
276 276 return self.copy()
277 277
278 278 if self._filesfastpath(match):
279 279 m = manifestdict()
280 280 lm = self._lm
281 281 for fn in match.files():
282 282 if fn in lm:
283 283 m._lm[fn] = lm[fn]
284 284 return m
285 285
286 286 m = manifestdict()
287 287 m._lm = self._lm.filtercopy(match)
288 288 return m
289 289
290 290 def diff(self, m2, clean=False):
291 291 '''Finds changes between the current manifest and m2.
292 292
293 293 Args:
294 294 m2: the manifest to which this manifest should be compared.
295 295 clean: if true, include files unchanged between these manifests
296 296 with a None value in the returned dictionary.
297 297
298 298 The result is returned as a dict with filename as key and
299 299 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
300 300 nodeid in the current/other manifest and fl1/fl2 is the flag
301 301 in the current/other manifest. Where the file does not exist,
302 302 the nodeid will be None and the flags will be the empty
303 303 string.
304 304 '''
305 305 return self._lm.diff(m2._lm, clean)
306 306
307 307 def setflag(self, key, flag):
308 308 self._lm[key] = self[key], flag
309 309
310 310 def get(self, key, default=None):
311 311 try:
312 312 return self._lm[key][0]
313 313 except KeyError:
314 314 return default
315 315
316 316 def flags(self, key, default=''):
317 317 try:
318 318 return self._lm[key][1]
319 319 except KeyError:
320 320 return default
321 321
322 322 def copy(self):
323 323 c = manifestdict()
324 324 c._lm = self._lm.copy()
325 325 return c
326 326
327 327 def iteritems(self):
328 328 return (x[:2] for x in self._lm.iterentries())
329 329
330 330 def iterentries(self):
331 331 return self._lm.iterentries()
332 332
333 333 def text(self, usemanifestv2=False):
334 334 if usemanifestv2:
335 335 return _textv2(self._lm.iterentries())
336 336 else:
337 337 # use (probably) native version for v1
338 338 return self._lm.text()
339 339
340 340 def fastdelta(self, base, changes):
341 341 """Given a base manifest text as an array.array and a list of changes
342 342 relative to that text, compute a delta that can be used by revlog.
343 343 """
344 344 delta = []
345 345 dstart = None
346 346 dend = None
347 347 dline = [""]
348 348 start = 0
349 349 # zero copy representation of base as a buffer
350 350 addbuf = util.buffer(base)
351 351
352 352 changes = list(changes)
353 353 if len(changes) < 1000:
354 354 # start with a readonly loop that finds the offset of
355 355 # each line and creates the deltas
356 356 for f, todelete in changes:
357 357 # bs will either be the index of the item or the insert point
358 358 start, end = _msearch(addbuf, f, start)
359 359 if not todelete:
360 360 h, fl = self._lm[f]
361 361 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
362 362 else:
363 363 if start == end:
364 364 # item we want to delete was not found, error out
365 365 raise AssertionError(
366 366 _("failed to remove %s from manifest") % f)
367 367 l = ""
368 368 if dstart is not None and dstart <= start and dend >= start:
369 369 if dend < end:
370 370 dend = end
371 371 if l:
372 372 dline.append(l)
373 373 else:
374 374 if dstart is not None:
375 375 delta.append([dstart, dend, "".join(dline)])
376 376 dstart = start
377 377 dend = end
378 378 dline = [l]
379 379
380 380 if dstart is not None:
381 381 delta.append([dstart, dend, "".join(dline)])
382 382 # apply the delta to the base, and get a delta for addrevision
383 383 deltatext, arraytext = _addlistdelta(base, delta)
384 384 else:
385 385 # For large changes, it's much cheaper to just build the text and
386 386 # diff it.
387 387 arraytext = array.array('c', self.text())
388 388 deltatext = mdiff.textdiff(base, arraytext)
389 389
390 390 return arraytext, deltatext
391 391
392 392 def _msearch(m, s, lo=0, hi=None):
393 393 '''return a tuple (start, end) that says where to find s within m.
394 394
395 395 If the string is found m[start:end] are the line containing
396 396 that string. If start == end the string was not found and
397 397 they indicate the proper sorted insertion point.
398 398
399 399 m should be a buffer or a string
400 400 s is a string'''
401 401 def advance(i, c):
402 402 while i < lenm and m[i] != c:
403 403 i += 1
404 404 return i
405 405 if not s:
406 406 return (lo, lo)
407 407 lenm = len(m)
408 408 if not hi:
409 409 hi = lenm
410 410 while lo < hi:
411 411 mid = (lo + hi) // 2
412 412 start = mid
413 413 while start > 0 and m[start - 1] != '\n':
414 414 start -= 1
415 415 end = advance(start, '\0')
416 416 if m[start:end] < s:
417 417 # we know that after the null there are 40 bytes of sha1
418 418 # this translates to the bisect lo = mid + 1
419 419 lo = advance(end + 40, '\n') + 1
420 420 else:
421 421 # this translates to the bisect hi = mid
422 422 hi = start
423 423 end = advance(lo, '\0')
424 424 found = m[lo:end]
425 425 if s == found:
426 426 # we know that after the null there are 40 bytes of sha1
427 427 end = advance(end + 40, '\n')
428 428 return (lo, end + 1)
429 429 else:
430 430 return (lo, lo)
431 431
432 432 def _checkforbidden(l):
433 433 """Check filenames for illegal characters."""
434 434 for f in l:
435 435 if '\n' in f or '\r' in f:
436 436 raise error.RevlogError(
437 437 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
438 438
439 439
440 440 # apply the changes collected during the bisect loop to our addlist
441 441 # return a delta suitable for addrevision
442 442 def _addlistdelta(addlist, x):
443 443 # for large addlist arrays, building a new array is cheaper
444 444 # than repeatedly modifying the existing one
445 445 currentposition = 0
446 446 newaddlist = array.array('c')
447 447
448 448 for start, end, content in x:
449 449 newaddlist += addlist[currentposition:start]
450 450 if content:
451 451 newaddlist += array.array('c', content)
452 452
453 453 currentposition = end
454 454
455 455 newaddlist += addlist[currentposition:]
456 456
457 457 deltatext = "".join(struct.pack(">lll", start, end, len(content))
458 458 + content for start, end, content in x)
459 459 return deltatext, newaddlist
460 460
461 461 def _splittopdir(f):
462 462 if '/' in f:
463 463 dir, subpath = f.split('/', 1)
464 464 return dir + '/', subpath
465 465 else:
466 466 return '', f
467 467
468 468 _noop = lambda s: None
469 469
470 470 class treemanifest(object):
471 471 def __init__(self, dir='', text=''):
472 472 self._dir = dir
473 473 self._node = revlog.nullid
474 474 self._loadfunc = _noop
475 475 self._copyfunc = _noop
476 476 self._dirty = False
477 477 self._dirs = {}
478 478 # Using _lazymanifest here is a little slower than plain old dicts
479 479 self._files = {}
480 480 self._flags = {}
481 481 if text:
482 482 def readsubtree(subdir, subm):
483 483 raise AssertionError('treemanifest constructor only accepts '
484 484 'flat manifests')
485 485 self.parse(text, readsubtree)
486 486 self._dirty = True # Mark flat manifest dirty after parsing
487 487
488 488 def _subpath(self, path):
489 489 return self._dir + path
490 490
491 491 def __len__(self):
492 492 self._load()
493 493 size = len(self._files)
494 494 for m in self._dirs.values():
495 495 size += m.__len__()
496 496 return size
497 497
498 498 def _isempty(self):
499 499 self._load() # for consistency; already loaded by all callers
500 500 return (not self._files and (not self._dirs or
501 501 all(m._isempty() for m in self._dirs.values())))
502 502
503 503 def __repr__(self):
504 504 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
505 505 (self._dir, revlog.hex(self._node),
506 506 bool(self._loadfunc is _noop),
507 507 self._dirty, id(self)))
508 508
509 509 def dir(self):
510 510 '''The directory that this tree manifest represents, including a
511 511 trailing '/'. Empty string for the repo root directory.'''
512 512 return self._dir
513 513
514 514 def node(self):
515 515 '''This node of this instance. nullid for unsaved instances. Should
516 516 be updated when the instance is read or written from a revlog.
517 517 '''
518 518 assert not self._dirty
519 519 return self._node
520 520
521 521 def setnode(self, node):
522 522 self._node = node
523 523 self._dirty = False
524 524
525 525 def iterentries(self):
526 526 self._load()
527 527 for p, n in sorted(self._dirs.items() + self._files.items()):
528 528 if p in self._files:
529 529 yield self._subpath(p), n, self._flags.get(p, '')
530 530 else:
531 531 for x in n.iterentries():
532 532 yield x
533 533
534 534 def iteritems(self):
535 535 self._load()
536 536 for p, n in sorted(self._dirs.items() + self._files.items()):
537 537 if p in self._files:
538 538 yield self._subpath(p), n
539 539 else:
540 540 for f, sn in n.iteritems():
541 541 yield f, sn
542 542
543 543 def iterkeys(self):
544 544 self._load()
545 545 for p in sorted(self._dirs.keys() + self._files.keys()):
546 546 if p in self._files:
547 547 yield self._subpath(p)
548 548 else:
549 549 for f in self._dirs[p].iterkeys():
550 550 yield f
551 551
552 552 def keys(self):
553 553 return list(self.iterkeys())
554 554
555 555 def __iter__(self):
556 556 return self.iterkeys()
557 557
558 558 def __contains__(self, f):
559 559 if f is None:
560 560 return False
561 561 self._load()
562 562 dir, subpath = _splittopdir(f)
563 563 if dir:
564 564 if dir not in self._dirs:
565 565 return False
566 566 return self._dirs[dir].__contains__(subpath)
567 567 else:
568 568 return f in self._files
569 569
570 570 def get(self, f, default=None):
571 571 self._load()
572 572 dir, subpath = _splittopdir(f)
573 573 if dir:
574 574 if dir not in self._dirs:
575 575 return default
576 576 return self._dirs[dir].get(subpath, default)
577 577 else:
578 578 return self._files.get(f, default)
579 579
580 580 def __getitem__(self, f):
581 581 self._load()
582 582 dir, subpath = _splittopdir(f)
583 583 if dir:
584 584 return self._dirs[dir].__getitem__(subpath)
585 585 else:
586 586 return self._files[f]
587 587
588 588 def flags(self, f):
589 589 self._load()
590 590 dir, subpath = _splittopdir(f)
591 591 if dir:
592 592 if dir not in self._dirs:
593 593 return ''
594 594 return self._dirs[dir].flags(subpath)
595 595 else:
596 596 if f in self._dirs:
597 597 return ''
598 598 return self._flags.get(f, '')
599 599
600 600 def find(self, f):
601 601 self._load()
602 602 dir, subpath = _splittopdir(f)
603 603 if dir:
604 604 return self._dirs[dir].find(subpath)
605 605 else:
606 606 return self._files[f], self._flags.get(f, '')
607 607
608 608 def __delitem__(self, f):
609 609 self._load()
610 610 dir, subpath = _splittopdir(f)
611 611 if dir:
612 612 self._dirs[dir].__delitem__(subpath)
613 613 # If the directory is now empty, remove it
614 614 if self._dirs[dir]._isempty():
615 615 del self._dirs[dir]
616 616 else:
617 617 del self._files[f]
618 618 if f in self._flags:
619 619 del self._flags[f]
620 620 self._dirty = True
621 621
622 622 def __setitem__(self, f, n):
623 623 assert n is not None
624 624 self._load()
625 625 dir, subpath = _splittopdir(f)
626 626 if dir:
627 627 if dir not in self._dirs:
628 628 self._dirs[dir] = treemanifest(self._subpath(dir))
629 629 self._dirs[dir].__setitem__(subpath, n)
630 630 else:
631 631 self._files[f] = n[:21] # to match manifestdict's behavior
632 632 self._dirty = True
633 633
634 634 def _load(self):
635 635 if self._loadfunc is not _noop:
636 636 lf, self._loadfunc = self._loadfunc, _noop
637 637 lf(self)
638 638 elif self._copyfunc is not _noop:
639 639 cf, self._copyfunc = self._copyfunc, _noop
640 640 cf(self)
641 641
642 642 def setflag(self, f, flags):
643 643 """Set the flags (symlink, executable) for path f."""
644 644 self._load()
645 645 dir, subpath = _splittopdir(f)
646 646 if dir:
647 647 if dir not in self._dirs:
648 648 self._dirs[dir] = treemanifest(self._subpath(dir))
649 649 self._dirs[dir].setflag(subpath, flags)
650 650 else:
651 651 self._flags[f] = flags
652 652 self._dirty = True
653 653
654 654 def copy(self):
655 655 copy = treemanifest(self._dir)
656 656 copy._node = self._node
657 657 copy._dirty = self._dirty
658 658 if self._copyfunc is _noop:
659 659 def _copyfunc(s):
660 660 self._load()
661 661 for d in self._dirs:
662 662 s._dirs[d] = self._dirs[d].copy()
663 663 s._files = dict.copy(self._files)
664 664 s._flags = dict.copy(self._flags)
665 665 if self._loadfunc is _noop:
666 666 _copyfunc(copy)
667 667 else:
668 668 copy._copyfunc = _copyfunc
669 669 else:
670 670 copy._copyfunc = self._copyfunc
671 671 return copy
672 672
673 673 def filesnotin(self, m2):
674 674 '''Set of files in this manifest that are not in the other'''
675 675 files = set()
676 676 def _filesnotin(t1, t2):
677 677 if t1._node == t2._node and not t1._dirty and not t2._dirty:
678 678 return
679 679 t1._load()
680 680 t2._load()
681 681 for d, m1 in t1._dirs.iteritems():
682 682 if d in t2._dirs:
683 683 m2 = t2._dirs[d]
684 684 _filesnotin(m1, m2)
685 685 else:
686 686 files.update(m1.iterkeys())
687 687
688 688 for fn in t1._files.iterkeys():
689 689 if fn not in t2._files:
690 690 files.add(t1._subpath(fn))
691 691
692 692 _filesnotin(self, m2)
693 693 return files
694 694
695 695 @propertycache
696 696 def _alldirs(self):
697 697 return util.dirs(self)
698 698
699 699 def dirs(self):
700 700 return self._alldirs
701 701
702 702 def hasdir(self, dir):
703 703 self._load()
704 704 topdir, subdir = _splittopdir(dir)
705 705 if topdir:
706 706 if topdir in self._dirs:
707 707 return self._dirs[topdir].hasdir(subdir)
708 708 return False
709 709 return (dir + '/') in self._dirs
710 710
711 711 def walk(self, match):
712 712 '''Generates matching file names.
713 713
714 714 Equivalent to manifest.matches(match).iterkeys(), but without creating
715 715 an entirely new manifest.
716 716
717 717 It also reports nonexistent files by marking them bad with match.bad().
718 718 '''
719 719 if match.always():
720 720 for f in iter(self):
721 721 yield f
722 722 return
723 723
724 724 fset = set(match.files())
725 725
726 726 for fn in self._walk(match):
727 727 if fn in fset:
728 728 # specified pattern is the exact name
729 729 fset.remove(fn)
730 730 yield fn
731 731
732 732 # for dirstate.walk, files=['.'] means "walk the whole tree".
733 733 # follow that here, too
734 734 fset.discard('.')
735 735
736 736 for fn in sorted(fset):
737 737 if not self.hasdir(fn):
738 738 match.bad(fn, None)
739 739
740 740 def _walk(self, match):
741 741 '''Recursively generates matching file names for walk().'''
742 742 if not match.visitdir(self._dir[:-1] or '.'):
743 743 return
744 744
745 745 # yield this dir's files and walk its submanifests
746 746 self._load()
747 747 for p in sorted(self._dirs.keys() + self._files.keys()):
748 748 if p in self._files:
749 749 fullp = self._subpath(p)
750 750 if match(fullp):
751 751 yield fullp
752 752 else:
753 753 for f in self._dirs[p]._walk(match):
754 754 yield f
755 755
756 756 def matches(self, match):
757 757 '''generate a new manifest filtered by the match argument'''
758 758 if match.always():
759 759 return self.copy()
760 760
761 761 return self._matches(match)
762 762
763 763 def _matches(self, match):
764 764 '''recursively generate a new manifest filtered by the match argument.
765 765 '''
766 766
767 767 visit = match.visitdir(self._dir[:-1] or '.')
768 768 if visit == 'all':
769 769 return self.copy()
770 770 ret = treemanifest(self._dir)
771 771 if not visit:
772 772 return ret
773 773
774 774 self._load()
775 775 for fn in self._files:
776 776 fullp = self._subpath(fn)
777 777 if not match(fullp):
778 778 continue
779 779 ret._files[fn] = self._files[fn]
780 780 if fn in self._flags:
781 781 ret._flags[fn] = self._flags[fn]
782 782
783 783 for dir, subm in self._dirs.iteritems():
784 784 m = subm._matches(match)
785 785 if not m._isempty():
786 786 ret._dirs[dir] = m
787 787
788 788 if not ret._isempty():
789 789 ret._dirty = True
790 790 return ret
791 791
792 792 def diff(self, m2, clean=False):
793 793 '''Finds changes between the current manifest and m2.
794 794
795 795 Args:
796 796 m2: the manifest to which this manifest should be compared.
797 797 clean: if true, include files unchanged between these manifests
798 798 with a None value in the returned dictionary.
799 799
800 800 The result is returned as a dict with filename as key and
801 801 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
802 802 nodeid in the current/other manifest and fl1/fl2 is the flag
803 803 in the current/other manifest. Where the file does not exist,
804 804 the nodeid will be None and the flags will be the empty
805 805 string.
806 806 '''
807 807 result = {}
808 808 emptytree = treemanifest()
809 809 def _diff(t1, t2):
810 810 if t1._node == t2._node and not t1._dirty and not t2._dirty:
811 811 return
812 812 t1._load()
813 813 t2._load()
814 814 for d, m1 in t1._dirs.iteritems():
815 815 m2 = t2._dirs.get(d, emptytree)
816 816 _diff(m1, m2)
817 817
818 818 for d, m2 in t2._dirs.iteritems():
819 819 if d not in t1._dirs:
820 820 _diff(emptytree, m2)
821 821
822 822 for fn, n1 in t1._files.iteritems():
823 823 fl1 = t1._flags.get(fn, '')
824 824 n2 = t2._files.get(fn, None)
825 825 fl2 = t2._flags.get(fn, '')
826 826 if n1 != n2 or fl1 != fl2:
827 827 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
828 828 elif clean:
829 829 result[t1._subpath(fn)] = None
830 830
831 831 for fn, n2 in t2._files.iteritems():
832 832 if fn not in t1._files:
833 833 fl2 = t2._flags.get(fn, '')
834 834 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
835 835
836 836 _diff(self, m2)
837 837 return result
838 838
839 839 def unmodifiedsince(self, m2):
840 840 return not self._dirty and not m2._dirty and self._node == m2._node
841 841
842 842 def parse(self, text, readsubtree):
843 843 for f, n, fl in _parse(text):
844 844 if fl == 't':
845 845 f = f + '/'
846 846 self._dirs[f] = readsubtree(self._subpath(f), n)
847 847 elif '/' in f:
848 848 # This is a flat manifest, so use __setitem__ and setflag rather
849 849 # than assigning directly to _files and _flags, so we can
850 850 # assign a path in a subdirectory, and to mark dirty (compared
851 851 # to nullid).
852 852 self[f] = n
853 853 if fl:
854 854 self.setflag(f, fl)
855 855 else:
856 856 # Assigning to _files and _flags avoids marking as dirty,
857 857 # and should be a little faster.
858 858 self._files[f] = n
859 859 if fl:
860 860 self._flags[f] = fl
861 861
862 862 def text(self, usemanifestv2=False):
863 863 """Get the full data of this manifest as a bytestring."""
864 864 self._load()
865 865 return _text(self.iterentries(), usemanifestv2)
866 866
867 867 def dirtext(self, usemanifestv2=False):
868 868 """Get the full data of this directory as a bytestring. Make sure that
869 869 any submanifests have been written first, so their nodeids are correct.
870 870 """
871 871 self._load()
872 872 flags = self.flags
873 873 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
874 874 files = [(f, self._files[f], flags(f)) for f in self._files]
875 875 return _text(sorted(dirs + files), usemanifestv2)
876 876
877 877 def read(self, gettext, readsubtree):
878 878 def _load_for_read(s):
879 879 s.parse(gettext(), readsubtree)
880 880 s._dirty = False
881 881 self._loadfunc = _load_for_read
882 882
883 883 def writesubtrees(self, m1, m2, writesubtree):
884 884 self._load() # for consistency; should never have any effect here
885 885 m1._load()
886 886 m2._load()
887 887 emptytree = treemanifest()
888 888 for d, subm in self._dirs.iteritems():
889 889 subp1 = m1._dirs.get(d, emptytree)._node
890 890 subp2 = m2._dirs.get(d, emptytree)._node
891 891 if subp1 == revlog.nullid:
892 892 subp1, subp2 = subp2, subp1
893 893 writesubtree(subm, subp1, subp2)
894 894
895 895 class manifestrevlog(revlog.revlog):
896 896 '''A revlog that stores manifest texts. This is responsible for caching the
897 897 full-text manifest contents.
898 898 '''
899 899 def __init__(self, opener, indexfile):
900 900 super(manifestrevlog, self).__init__(opener, indexfile)
901 901
902 902 # During normal operations, we expect to deal with not more than four
903 903 # revs at a time (such as during commit --amend). When rebasing large
904 904 # stacks of commits, the number can go up, hence the config knob below.
905 905 cachesize = 4
906 906 opts = getattr(opener, 'options', None)
907 907 if opts is not None:
908 908 cachesize = opts.get('manifestcachesize', cachesize)
909 909 self._fulltextcache = util.lrucachedict(cachesize)
910 910
911 911 @property
912 912 def fulltextcache(self):
913 913 return self._fulltextcache
914 914
915 915 def clearcaches(self):
916 916 super(manifestrevlog, self).clearcaches()
917 917 self._fulltextcache.clear()
918 918
919 919 class manifestlog(object):
920 920 """A collection class representing the collection of manifest snapshots
921 921 referenced by commits in the repository.
922 922
923 923 In this situation, 'manifest' refers to the abstract concept of a snapshot
924 924 of the list of files in the given commit. Consumers of the output of this
925 925 class do not care about the implementation details of the actual manifests
926 926 they receive (i.e. tree or flat or lazily loaded, etc)."""
927 927 def __init__(self, opener, repo):
928 928 self._repo = repo
929 929
930 930 # We'll separate this into it's own cache once oldmanifest is no longer
931 931 # used
932 932 self._mancache = repo.manifest._mancache
933 933
934 934 @property
935 935 def _revlog(self):
936 936 return self._repo.manifest
937 937
938 938 @property
939 939 def _oldmanifest(self):
940 940 # _revlog is the same as _oldmanifest right now, but we eventually want
941 941 # to delete _oldmanifest while still allowing manifestlog to access the
942 942 # revlog specific apis.
943 943 return self._repo.manifest
944 944
945 945 def __getitem__(self, node):
946 946 """Retrieves the manifest instance for the given node. Throws a KeyError
947 947 if not found.
948 948 """
949 if (self._oldmanifest._treeondisk
950 or self._oldmanifest._treeinmem):
951 # TODO: come back and support tree manifests directly
952 return self._oldmanifest.read(node)
953
954 if node == revlog.nullid:
955 return manifestdict()
956 949 if node in self._mancache:
957 950 cachemf = self._mancache[node]
958 951 # The old manifest may put non-ctx manifests in the cache, so skip
959 952 # those since they don't implement the full api.
960 if isinstance(cachemf, manifestctx):
953 if (isinstance(cachemf, manifestctx) or
954 isinstance(cachemf, treemanifestctx)):
961 955 return cachemf
962 956
957 if self._oldmanifest._treeinmem:
958 m = treemanifestctx(self._revlog, '', node)
959 else:
963 960 m = manifestctx(self._revlog, node)
961 if node != revlog.nullid:
964 962 self._mancache[node] = m
965 963 return m
966 964
967 965 class manifestctx(manifestdict):
968 966 """A class representing a single revision of a manifest, including its
969 967 contents, its parent revs, and its linkrev.
970 968 """
971 969 def __init__(self, revlog, node):
972 970 self._revlog = revlog
973 971
974 972 self._node = node
975 self.p1, self.p2 = revlog.parents(node)
976 rev = revlog.rev(node)
977 self.linkrev = revlog.linkrev(rev)
973
974 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
975 # but let's add it later when something needs it and we can load it
976 # lazily.
977 #self.p1, self.p2 = revlog.parents(node)
978 #rev = revlog.rev(node)
979 #self.linkrev = revlog.linkrev(rev)
978 980
979 981 # This should eventually be made lazy loaded, so consumers can access
980 982 # the node/p1/linkrev data without having to parse the whole manifest.
981 983 data = revlog.revision(node)
982 984 arraytext = array.array('c', data)
983 985 revlog._fulltextcache[node] = arraytext
984 986 super(manifestctx, self).__init__(data)
985 987
986 988 def node(self):
987 989 return self._node
988 990
991 class treemanifestctx(treemanifest):
992 def __init__(self, revlog, dir, node):
993 revlog = revlog.dirlog(dir)
994 self._revlog = revlog
995 self._dir = dir
996
997 self._node = node
998
999 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1000 # we can instantiate treemanifestctx objects for directories we don't
1001 # have on disk.
1002 #self.p1, self.p2 = revlog.parents(node)
1003 #rev = revlog.rev(node)
1004 #self.linkrev = revlog.linkrev(rev)
1005
1006 if revlog._treeondisk:
1007 super(treemanifestctx, self).__init__(dir=dir)
1008 def gettext():
1009 return revlog.revision(node)
1010 def readsubtree(dir, subm):
1011 return revlog.dirlog(dir).read(subm)
1012 self.read(gettext, readsubtree)
1013 self.setnode(node)
1014 else:
1015 text = revlog.revision(node)
1016 arraytext = array.array('c', text)
1017 revlog.fulltextcache[node] = arraytext
1018 super(treemanifestctx, self).__init__(dir=dir, text=text)
1019
1020 def node(self):
1021 return self._node
1022
989 1023 class manifest(manifestrevlog):
990 1024 def __init__(self, opener, dir='', dirlogcache=None):
991 1025 '''The 'dir' and 'dirlogcache' arguments are for internal use by
992 1026 manifest.manifest only. External users should create a root manifest
993 1027 log with manifest.manifest(opener) and call dirlog() on it.
994 1028 '''
995 1029 # During normal operations, we expect to deal with not more than four
996 1030 # revs at a time (such as during commit --amend). When rebasing large
997 1031 # stacks of commits, the number can go up, hence the config knob below.
998 1032 cachesize = 4
999 1033 usetreemanifest = False
1000 1034 usemanifestv2 = False
1001 1035 opts = getattr(opener, 'options', None)
1002 1036 if opts is not None:
1003 1037 cachesize = opts.get('manifestcachesize', cachesize)
1004 1038 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1005 1039 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1006 1040 self._mancache = util.lrucachedict(cachesize)
1007 1041 self._treeinmem = usetreemanifest
1008 1042 self._treeondisk = usetreemanifest
1009 1043 self._usemanifestv2 = usemanifestv2
1010 1044 indexfile = "00manifest.i"
1011 1045 if dir:
1012 1046 assert self._treeondisk, 'opts is %r' % opts
1013 1047 if not dir.endswith('/'):
1014 1048 dir = dir + '/'
1015 1049 indexfile = "meta/" + dir + "00manifest.i"
1016 1050 super(manifest, self).__init__(opener, indexfile)
1017 1051 self._dir = dir
1018 1052 # The dirlogcache is kept on the root manifest log
1019 1053 if dir:
1020 1054 self._dirlogcache = dirlogcache
1021 1055 else:
1022 1056 self._dirlogcache = {'': self}
1023 1057
1024 1058 def _newmanifest(self, data=''):
1025 1059 if self._treeinmem:
1026 1060 return treemanifest(self._dir, data)
1027 1061 return manifestdict(data)
1028 1062
1029 1063 def dirlog(self, dir):
1030 1064 if dir:
1031 1065 assert self._treeondisk
1032 1066 if dir not in self._dirlogcache:
1033 1067 self._dirlogcache[dir] = manifest(self.opener, dir,
1034 1068 self._dirlogcache)
1035 1069 return self._dirlogcache[dir]
1036 1070
1037 1071 def _slowreaddelta(self, node):
1038 1072 r0 = self.deltaparent(self.rev(node))
1039 1073 m0 = self.read(self.node(r0))
1040 1074 m1 = self.read(node)
1041 1075 md = self._newmanifest()
1042 1076 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1043 1077 if n1:
1044 1078 md[f] = n1
1045 1079 if fl1:
1046 1080 md.setflag(f, fl1)
1047 1081 return md
1048 1082
1049 1083 def readdelta(self, node):
1050 1084 if self._usemanifestv2 or self._treeondisk:
1051 1085 return self._slowreaddelta(node)
1052 1086 r = self.rev(node)
1053 1087 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1054 1088 return self._newmanifest(d)
1055 1089
1056 1090 def readshallowdelta(self, node):
1057 1091 '''For flat manifests, this is the same as readdelta(). For
1058 1092 treemanifests, this will read the delta for this revlog's directory,
1059 1093 without recursively reading subdirectory manifests. Instead, any
1060 1094 subdirectory entry will be reported as it appears in the manifests, i.e.
1061 1095 the subdirectory will be reported among files and distinguished only by
1062 1096 its 't' flag.'''
1063 1097 if not self._treeondisk:
1064 1098 return self.readdelta(node)
1065 1099 if self._usemanifestv2:
1066 1100 raise error.Abort(
1067 1101 _("readshallowdelta() not implemented for manifestv2"))
1068 1102 r = self.rev(node)
1069 1103 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1070 1104 return manifestdict(d)
1071 1105
1072 1106 def readfast(self, node):
1073 1107 '''use the faster of readdelta or read
1074 1108
1075 1109 This will return a manifest which is either only the files
1076 1110 added/modified relative to p1, or all files in the
1077 1111 manifest. Which one is returned depends on the codepath used
1078 1112 to retrieve the data.
1079 1113 '''
1080 1114 r = self.rev(node)
1081 1115 deltaparent = self.deltaparent(r)
1082 1116 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1083 1117 return self.readdelta(node)
1084 1118 return self.read(node)
1085 1119
1086 1120 def readshallowfast(self, node):
1087 1121 '''like readfast(), but calls readshallowdelta() instead of readdelta()
1088 1122 '''
1089 1123 r = self.rev(node)
1090 1124 deltaparent = self.deltaparent(r)
1091 1125 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1092 1126 return self.readshallowdelta(node)
1093 1127 return self.readshallow(node)
1094 1128
1095 1129 def read(self, node):
1096 1130 if node == revlog.nullid:
1097 1131 return self._newmanifest() # don't upset local cache
1098 1132 if node in self._mancache:
1099 1133 return self._mancache[node]
1100 1134 if self._treeondisk:
1101 1135 def gettext():
1102 1136 return self.revision(node)
1103 1137 def readsubtree(dir, subm):
1104 1138 return self.dirlog(dir).read(subm)
1105 1139 m = self._newmanifest()
1106 1140 m.read(gettext, readsubtree)
1107 1141 m.setnode(node)
1108 1142 arraytext = None
1109 1143 else:
1110 1144 text = self.revision(node)
1111 1145 m = self._newmanifest(text)
1112 1146 arraytext = array.array('c', text)
1113 1147 self._mancache[node] = m
1114 1148 self.fulltextcache[node] = arraytext
1115 1149 return m
1116 1150
1117 1151 def readshallow(self, node):
1118 1152 '''Reads the manifest in this directory. When using flat manifests,
1119 1153 this manifest will generally have files in subdirectories in it. Does
1120 1154 not cache the manifest as the callers generally do not read the same
1121 1155 version twice.'''
1122 1156 return manifestdict(self.revision(node))
1123 1157
1124 1158 def find(self, node, f):
1125 1159 '''look up entry for a single file efficiently.
1126 1160 return (node, flags) pair if found, (None, None) if not.'''
1127 1161 m = self.read(node)
1128 1162 try:
1129 1163 return m.find(f)
1130 1164 except KeyError:
1131 1165 return None, None
1132 1166
1133 1167 def add(self, m, transaction, link, p1, p2, added, removed):
1134 1168 if (p1 in self.fulltextcache and not self._treeinmem
1135 1169 and not self._usemanifestv2):
1136 1170 # If our first parent is in the manifest cache, we can
1137 1171 # compute a delta here using properties we know about the
1138 1172 # manifest up-front, which may save time later for the
1139 1173 # revlog layer.
1140 1174
1141 1175 _checkforbidden(added)
1142 1176 # combine the changed lists into one sorted iterator
1143 1177 work = heapq.merge([(x, False) for x in added],
1144 1178 [(x, True) for x in removed])
1145 1179
1146 1180 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1147 1181 cachedelta = self.rev(p1), deltatext
1148 1182 text = util.buffer(arraytext)
1149 1183 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1150 1184 else:
1151 1185 # The first parent manifest isn't already loaded, so we'll
1152 1186 # just encode a fulltext of the manifest and pass that
1153 1187 # through to the revlog layer, and let it handle the delta
1154 1188 # process.
1155 1189 if self._treeondisk:
1156 1190 m1 = self.read(p1)
1157 1191 m2 = self.read(p2)
1158 1192 n = self._addtree(m, transaction, link, m1, m2)
1159 1193 arraytext = None
1160 1194 else:
1161 1195 text = m.text(self._usemanifestv2)
1162 1196 n = self.addrevision(text, transaction, link, p1, p2)
1163 1197 arraytext = array.array('c', text)
1164 1198
1165 1199 self._mancache[n] = m
1166 1200 self.fulltextcache[n] = arraytext
1167 1201
1168 1202 return n
1169 1203
1170 1204 def _addtree(self, m, transaction, link, m1, m2):
1171 1205 # If the manifest is unchanged compared to one parent,
1172 1206 # don't write a new revision
1173 1207 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
1174 1208 return m.node()
1175 1209 def writesubtree(subm, subp1, subp2):
1176 1210 sublog = self.dirlog(subm.dir())
1177 1211 sublog.add(subm, transaction, link, subp1, subp2, None, None)
1178 1212 m.writesubtrees(m1, m2, writesubtree)
1179 1213 text = m.dirtext(self._usemanifestv2)
1180 1214 # Double-check whether contents are unchanged to one parent
1181 1215 if text == m1.dirtext(self._usemanifestv2):
1182 1216 n = m1.node()
1183 1217 elif text == m2.dirtext(self._usemanifestv2):
1184 1218 n = m2.node()
1185 1219 else:
1186 1220 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1187 1221 # Save nodeid so parent manifest can calculate its nodeid
1188 1222 m.setnode(n)
1189 1223 return n
1190 1224
1191 1225 def clearcaches(self):
1192 1226 super(manifest, self).clearcaches()
1193 1227 self._mancache.clear()
1194 1228 self._dirlogcache = {'': self}
General Comments 0
You need to be logged in to leave comments. Login now