##// END OF EJS Templates
manifest: move revlog specific options from manifest to manifestrevlog...
Durham Goode -
r29940:fa145a20 default
parent child Browse files
Show More
@@ -1,1280 +1,1285 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import heapq
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 mdiff,
19 19 parsers,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 propertycache = util.propertycache
25 25
26 26 def _parsev1(data):
27 27 # This method does a little bit of excessive-looking
28 28 # precondition checking. This is so that the behavior of this
29 29 # class exactly matches its C counterpart to try and help
30 30 # prevent surprise breakage for anyone that develops against
31 31 # the pure version.
32 32 if data and data[-1] != '\n':
33 33 raise ValueError('Manifest did not end in a newline.')
34 34 prev = None
35 35 for l in data.splitlines():
36 36 if prev is not None and prev > l:
37 37 raise ValueError('Manifest lines not in sorted order.')
38 38 prev = l
39 39 f, n = l.split('\0')
40 40 if len(n) > 40:
41 41 yield f, revlog.bin(n[:40]), n[40:]
42 42 else:
43 43 yield f, revlog.bin(n), ''
44 44
45 45 def _parsev2(data):
46 46 metadataend = data.find('\n')
47 47 # Just ignore metadata for now
48 48 pos = metadataend + 1
49 49 prevf = ''
50 50 while pos < len(data):
51 51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 52 if end == -1:
53 53 raise ValueError('Manifest ended with incomplete file entry.')
54 54 stemlen = ord(data[pos])
55 55 items = data[pos + 1:end].split('\0')
56 56 f = prevf[:stemlen] + items[0]
57 57 if prevf > f:
58 58 raise ValueError('Manifest entries not in sorted order.')
59 59 fl = items[1]
60 60 # Just ignore metadata (items[2:] for now)
61 61 n = data[end + 1:end + 21]
62 62 yield f, n, fl
63 63 pos = end + 22
64 64 prevf = f
65 65
66 66 def _parse(data):
67 67 """Generates (path, node, flags) tuples from a manifest text"""
68 68 if data.startswith('\0'):
69 69 return iter(_parsev2(data))
70 70 else:
71 71 return iter(_parsev1(data))
72 72
73 73 def _text(it, usemanifestv2):
74 74 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 75 text"""
76 76 if usemanifestv2:
77 77 return _textv2(it)
78 78 else:
79 79 return _textv1(it)
80 80
81 81 def _textv1(it):
82 82 files = []
83 83 lines = []
84 84 _hex = revlog.hex
85 85 for f, n, fl in it:
86 86 files.append(f)
87 87 # if this is changed to support newlines in filenames,
88 88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 90
91 91 _checkforbidden(files)
92 92 return ''.join(lines)
93 93
94 94 def _textv2(it):
95 95 files = []
96 96 lines = ['\0\n']
97 97 prevf = ''
98 98 for f, n, fl in it:
99 99 files.append(f)
100 100 stem = os.path.commonprefix([prevf, f])
101 101 stemlen = min(len(stem), 255)
102 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 103 prevf = f
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 107 class _lazymanifest(dict):
108 108 """This is the pure implementation of lazymanifest.
109 109
110 110 It has not been optimized *at all* and is not lazy.
111 111 """
112 112
113 113 def __init__(self, data):
114 114 dict.__init__(self)
115 115 for f, n, fl in _parse(data):
116 116 self[f] = n, fl
117 117
118 118 def __setitem__(self, k, v):
119 119 node, flag = v
120 120 assert node is not None
121 121 if len(node) > 21:
122 122 node = node[:21] # match c implementation behavior
123 123 dict.__setitem__(self, k, (node, flag))
124 124
125 125 def __iter__(self):
126 126 return iter(sorted(dict.keys(self)))
127 127
128 128 def iterkeys(self):
129 129 return iter(sorted(dict.keys(self)))
130 130
131 131 def iterentries(self):
132 132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
133 133
134 134 def copy(self):
135 135 c = _lazymanifest('')
136 136 c.update(self)
137 137 return c
138 138
139 139 def diff(self, m2, clean=False):
140 140 '''Finds changes between the current manifest and m2.'''
141 141 diff = {}
142 142
143 143 for fn, e1 in self.iteritems():
144 144 if fn not in m2:
145 145 diff[fn] = e1, (None, '')
146 146 else:
147 147 e2 = m2[fn]
148 148 if e1 != e2:
149 149 diff[fn] = e1, e2
150 150 elif clean:
151 151 diff[fn] = None
152 152
153 153 for fn, e2 in m2.iteritems():
154 154 if fn not in self:
155 155 diff[fn] = (None, ''), e2
156 156
157 157 return diff
158 158
159 159 def filtercopy(self, filterfn):
160 160 c = _lazymanifest('')
161 161 for f, n, fl in self.iterentries():
162 162 if filterfn(f):
163 163 c[f] = n, fl
164 164 return c
165 165
166 166 def text(self):
167 167 """Get the full data of this manifest as a bytestring."""
168 168 return _textv1(self.iterentries())
169 169
170 170 try:
171 171 _lazymanifest = parsers.lazymanifest
172 172 except AttributeError:
173 173 pass
174 174
175 175 class manifestdict(object):
176 176 def __init__(self, data=''):
177 177 if data.startswith('\0'):
178 178 #_lazymanifest can not parse v2
179 179 self._lm = _lazymanifest('')
180 180 for f, n, fl in _parsev2(data):
181 181 self._lm[f] = n, fl
182 182 else:
183 183 self._lm = _lazymanifest(data)
184 184
185 185 def __getitem__(self, key):
186 186 return self._lm[key][0]
187 187
188 188 def find(self, key):
189 189 return self._lm[key]
190 190
191 191 def __len__(self):
192 192 return len(self._lm)
193 193
194 194 def __setitem__(self, key, node):
195 195 self._lm[key] = node, self.flags(key, '')
196 196
197 197 def __contains__(self, key):
198 198 return key in self._lm
199 199
200 200 def __delitem__(self, key):
201 201 del self._lm[key]
202 202
203 203 def __iter__(self):
204 204 return self._lm.__iter__()
205 205
206 206 def iterkeys(self):
207 207 return self._lm.iterkeys()
208 208
209 209 def keys(self):
210 210 return list(self.iterkeys())
211 211
212 212 def filesnotin(self, m2):
213 213 '''Set of files in this manifest that are not in the other'''
214 214 diff = self.diff(m2)
215 215 files = set(filepath
216 216 for filepath, hashflags in diff.iteritems()
217 217 if hashflags[1][0] is None)
218 218 return files
219 219
220 220 @propertycache
221 221 def _dirs(self):
222 222 return util.dirs(self)
223 223
224 224 def dirs(self):
225 225 return self._dirs
226 226
227 227 def hasdir(self, dir):
228 228 return dir in self._dirs
229 229
230 230 def _filesfastpath(self, match):
231 231 '''Checks whether we can correctly and quickly iterate over matcher
232 232 files instead of over manifest files.'''
233 233 files = match.files()
234 234 return (len(files) < 100 and (match.isexact() or
235 235 (match.prefix() and all(fn in self for fn in files))))
236 236
237 237 def walk(self, match):
238 238 '''Generates matching file names.
239 239
240 240 Equivalent to manifest.matches(match).iterkeys(), but without creating
241 241 an entirely new manifest.
242 242
243 243 It also reports nonexistent files by marking them bad with match.bad().
244 244 '''
245 245 if match.always():
246 246 for f in iter(self):
247 247 yield f
248 248 return
249 249
250 250 fset = set(match.files())
251 251
252 252 # avoid the entire walk if we're only looking for specific files
253 253 if self._filesfastpath(match):
254 254 for fn in sorted(fset):
255 255 yield fn
256 256 return
257 257
258 258 for fn in self:
259 259 if fn in fset:
260 260 # specified pattern is the exact name
261 261 fset.remove(fn)
262 262 if match(fn):
263 263 yield fn
264 264
265 265 # for dirstate.walk, files=['.'] means "walk the whole tree".
266 266 # follow that here, too
267 267 fset.discard('.')
268 268
269 269 for fn in sorted(fset):
270 270 if not self.hasdir(fn):
271 271 match.bad(fn, None)
272 272
273 273 def matches(self, match):
274 274 '''generate a new manifest filtered by the match argument'''
275 275 if match.always():
276 276 return self.copy()
277 277
278 278 if self._filesfastpath(match):
279 279 m = manifestdict()
280 280 lm = self._lm
281 281 for fn in match.files():
282 282 if fn in lm:
283 283 m._lm[fn] = lm[fn]
284 284 return m
285 285
286 286 m = manifestdict()
287 287 m._lm = self._lm.filtercopy(match)
288 288 return m
289 289
290 290 def diff(self, m2, clean=False):
291 291 '''Finds changes between the current manifest and m2.
292 292
293 293 Args:
294 294 m2: the manifest to which this manifest should be compared.
295 295 clean: if true, include files unchanged between these manifests
296 296 with a None value in the returned dictionary.
297 297
298 298 The result is returned as a dict with filename as key and
299 299 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
300 300 nodeid in the current/other manifest and fl1/fl2 is the flag
301 301 in the current/other manifest. Where the file does not exist,
302 302 the nodeid will be None and the flags will be the empty
303 303 string.
304 304 '''
305 305 return self._lm.diff(m2._lm, clean)
306 306
307 307 def setflag(self, key, flag):
308 308 self._lm[key] = self[key], flag
309 309
310 310 def get(self, key, default=None):
311 311 try:
312 312 return self._lm[key][0]
313 313 except KeyError:
314 314 return default
315 315
316 316 def flags(self, key, default=''):
317 317 try:
318 318 return self._lm[key][1]
319 319 except KeyError:
320 320 return default
321 321
322 322 def copy(self):
323 323 c = manifestdict()
324 324 c._lm = self._lm.copy()
325 325 return c
326 326
327 327 def iteritems(self):
328 328 return (x[:2] for x in self._lm.iterentries())
329 329
330 330 def iterentries(self):
331 331 return self._lm.iterentries()
332 332
333 333 def text(self, usemanifestv2=False):
334 334 if usemanifestv2:
335 335 return _textv2(self._lm.iterentries())
336 336 else:
337 337 # use (probably) native version for v1
338 338 return self._lm.text()
339 339
340 340 def fastdelta(self, base, changes):
341 341 """Given a base manifest text as an array.array and a list of changes
342 342 relative to that text, compute a delta that can be used by revlog.
343 343 """
344 344 delta = []
345 345 dstart = None
346 346 dend = None
347 347 dline = [""]
348 348 start = 0
349 349 # zero copy representation of base as a buffer
350 350 addbuf = util.buffer(base)
351 351
352 352 changes = list(changes)
353 353 if len(changes) < 1000:
354 354 # start with a readonly loop that finds the offset of
355 355 # each line and creates the deltas
356 356 for f, todelete in changes:
357 357 # bs will either be the index of the item or the insert point
358 358 start, end = _msearch(addbuf, f, start)
359 359 if not todelete:
360 360 h, fl = self._lm[f]
361 361 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
362 362 else:
363 363 if start == end:
364 364 # item we want to delete was not found, error out
365 365 raise AssertionError(
366 366 _("failed to remove %s from manifest") % f)
367 367 l = ""
368 368 if dstart is not None and dstart <= start and dend >= start:
369 369 if dend < end:
370 370 dend = end
371 371 if l:
372 372 dline.append(l)
373 373 else:
374 374 if dstart is not None:
375 375 delta.append([dstart, dend, "".join(dline)])
376 376 dstart = start
377 377 dend = end
378 378 dline = [l]
379 379
380 380 if dstart is not None:
381 381 delta.append([dstart, dend, "".join(dline)])
382 382 # apply the delta to the base, and get a delta for addrevision
383 383 deltatext, arraytext = _addlistdelta(base, delta)
384 384 else:
385 385 # For large changes, it's much cheaper to just build the text and
386 386 # diff it.
387 387 arraytext = array.array('c', self.text())
388 388 deltatext = mdiff.textdiff(base, arraytext)
389 389
390 390 return arraytext, deltatext
391 391
392 392 def _msearch(m, s, lo=0, hi=None):
393 393 '''return a tuple (start, end) that says where to find s within m.
394 394
395 395 If the string is found m[start:end] are the line containing
396 396 that string. If start == end the string was not found and
397 397 they indicate the proper sorted insertion point.
398 398
399 399 m should be a buffer or a string
400 400 s is a string'''
401 401 def advance(i, c):
402 402 while i < lenm and m[i] != c:
403 403 i += 1
404 404 return i
405 405 if not s:
406 406 return (lo, lo)
407 407 lenm = len(m)
408 408 if not hi:
409 409 hi = lenm
410 410 while lo < hi:
411 411 mid = (lo + hi) // 2
412 412 start = mid
413 413 while start > 0 and m[start - 1] != '\n':
414 414 start -= 1
415 415 end = advance(start, '\0')
416 416 if m[start:end] < s:
417 417 # we know that after the null there are 40 bytes of sha1
418 418 # this translates to the bisect lo = mid + 1
419 419 lo = advance(end + 40, '\n') + 1
420 420 else:
421 421 # this translates to the bisect hi = mid
422 422 hi = start
423 423 end = advance(lo, '\0')
424 424 found = m[lo:end]
425 425 if s == found:
426 426 # we know that after the null there are 40 bytes of sha1
427 427 end = advance(end + 40, '\n')
428 428 return (lo, end + 1)
429 429 else:
430 430 return (lo, lo)
431 431
432 432 def _checkforbidden(l):
433 433 """Check filenames for illegal characters."""
434 434 for f in l:
435 435 if '\n' in f or '\r' in f:
436 436 raise error.RevlogError(
437 437 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
438 438
439 439
440 440 # apply the changes collected during the bisect loop to our addlist
441 441 # return a delta suitable for addrevision
442 442 def _addlistdelta(addlist, x):
443 443 # for large addlist arrays, building a new array is cheaper
444 444 # than repeatedly modifying the existing one
445 445 currentposition = 0
446 446 newaddlist = array.array('c')
447 447
448 448 for start, end, content in x:
449 449 newaddlist += addlist[currentposition:start]
450 450 if content:
451 451 newaddlist += array.array('c', content)
452 452
453 453 currentposition = end
454 454
455 455 newaddlist += addlist[currentposition:]
456 456
457 457 deltatext = "".join(struct.pack(">lll", start, end, len(content))
458 458 + content for start, end, content in x)
459 459 return deltatext, newaddlist
460 460
461 461 def _splittopdir(f):
462 462 if '/' in f:
463 463 dir, subpath = f.split('/', 1)
464 464 return dir + '/', subpath
465 465 else:
466 466 return '', f
467 467
468 468 _noop = lambda s: None
469 469
470 470 class treemanifest(object):
471 471 def __init__(self, dir='', text=''):
472 472 self._dir = dir
473 473 self._node = revlog.nullid
474 474 self._loadfunc = _noop
475 475 self._copyfunc = _noop
476 476 self._dirty = False
477 477 self._dirs = {}
478 478 # Using _lazymanifest here is a little slower than plain old dicts
479 479 self._files = {}
480 480 self._flags = {}
481 481 if text:
482 482 def readsubtree(subdir, subm):
483 483 raise AssertionError('treemanifest constructor only accepts '
484 484 'flat manifests')
485 485 self.parse(text, readsubtree)
486 486 self._dirty = True # Mark flat manifest dirty after parsing
487 487
488 488 def _subpath(self, path):
489 489 return self._dir + path
490 490
491 491 def __len__(self):
492 492 self._load()
493 493 size = len(self._files)
494 494 for m in self._dirs.values():
495 495 size += m.__len__()
496 496 return size
497 497
498 498 def _isempty(self):
499 499 self._load() # for consistency; already loaded by all callers
500 500 return (not self._files and (not self._dirs or
501 501 all(m._isempty() for m in self._dirs.values())))
502 502
503 503 def __repr__(self):
504 504 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
505 505 (self._dir, revlog.hex(self._node),
506 506 bool(self._loadfunc is _noop),
507 507 self._dirty, id(self)))
508 508
509 509 def dir(self):
510 510 '''The directory that this tree manifest represents, including a
511 511 trailing '/'. Empty string for the repo root directory.'''
512 512 return self._dir
513 513
514 514 def node(self):
515 515 '''This node of this instance. nullid for unsaved instances. Should
516 516 be updated when the instance is read or written from a revlog.
517 517 '''
518 518 assert not self._dirty
519 519 return self._node
520 520
521 521 def setnode(self, node):
522 522 self._node = node
523 523 self._dirty = False
524 524
525 525 def iterentries(self):
526 526 self._load()
527 527 for p, n in sorted(self._dirs.items() + self._files.items()):
528 528 if p in self._files:
529 529 yield self._subpath(p), n, self._flags.get(p, '')
530 530 else:
531 531 for x in n.iterentries():
532 532 yield x
533 533
534 534 def iteritems(self):
535 535 self._load()
536 536 for p, n in sorted(self._dirs.items() + self._files.items()):
537 537 if p in self._files:
538 538 yield self._subpath(p), n
539 539 else:
540 540 for f, sn in n.iteritems():
541 541 yield f, sn
542 542
543 543 def iterkeys(self):
544 544 self._load()
545 545 for p in sorted(self._dirs.keys() + self._files.keys()):
546 546 if p in self._files:
547 547 yield self._subpath(p)
548 548 else:
549 549 for f in self._dirs[p].iterkeys():
550 550 yield f
551 551
552 552 def keys(self):
553 553 return list(self.iterkeys())
554 554
555 555 def __iter__(self):
556 556 return self.iterkeys()
557 557
558 558 def __contains__(self, f):
559 559 if f is None:
560 560 return False
561 561 self._load()
562 562 dir, subpath = _splittopdir(f)
563 563 if dir:
564 564 if dir not in self._dirs:
565 565 return False
566 566 return self._dirs[dir].__contains__(subpath)
567 567 else:
568 568 return f in self._files
569 569
570 570 def get(self, f, default=None):
571 571 self._load()
572 572 dir, subpath = _splittopdir(f)
573 573 if dir:
574 574 if dir not in self._dirs:
575 575 return default
576 576 return self._dirs[dir].get(subpath, default)
577 577 else:
578 578 return self._files.get(f, default)
579 579
580 580 def __getitem__(self, f):
581 581 self._load()
582 582 dir, subpath = _splittopdir(f)
583 583 if dir:
584 584 return self._dirs[dir].__getitem__(subpath)
585 585 else:
586 586 return self._files[f]
587 587
588 588 def flags(self, f):
589 589 self._load()
590 590 dir, subpath = _splittopdir(f)
591 591 if dir:
592 592 if dir not in self._dirs:
593 593 return ''
594 594 return self._dirs[dir].flags(subpath)
595 595 else:
596 596 if f in self._dirs:
597 597 return ''
598 598 return self._flags.get(f, '')
599 599
600 600 def find(self, f):
601 601 self._load()
602 602 dir, subpath = _splittopdir(f)
603 603 if dir:
604 604 return self._dirs[dir].find(subpath)
605 605 else:
606 606 return self._files[f], self._flags.get(f, '')
607 607
608 608 def __delitem__(self, f):
609 609 self._load()
610 610 dir, subpath = _splittopdir(f)
611 611 if dir:
612 612 self._dirs[dir].__delitem__(subpath)
613 613 # If the directory is now empty, remove it
614 614 if self._dirs[dir]._isempty():
615 615 del self._dirs[dir]
616 616 else:
617 617 del self._files[f]
618 618 if f in self._flags:
619 619 del self._flags[f]
620 620 self._dirty = True
621 621
622 622 def __setitem__(self, f, n):
623 623 assert n is not None
624 624 self._load()
625 625 dir, subpath = _splittopdir(f)
626 626 if dir:
627 627 if dir not in self._dirs:
628 628 self._dirs[dir] = treemanifest(self._subpath(dir))
629 629 self._dirs[dir].__setitem__(subpath, n)
630 630 else:
631 631 self._files[f] = n[:21] # to match manifestdict's behavior
632 632 self._dirty = True
633 633
634 634 def _load(self):
635 635 if self._loadfunc is not _noop:
636 636 lf, self._loadfunc = self._loadfunc, _noop
637 637 lf(self)
638 638 elif self._copyfunc is not _noop:
639 639 cf, self._copyfunc = self._copyfunc, _noop
640 640 cf(self)
641 641
642 642 def setflag(self, f, flags):
643 643 """Set the flags (symlink, executable) for path f."""
644 644 self._load()
645 645 dir, subpath = _splittopdir(f)
646 646 if dir:
647 647 if dir not in self._dirs:
648 648 self._dirs[dir] = treemanifest(self._subpath(dir))
649 649 self._dirs[dir].setflag(subpath, flags)
650 650 else:
651 651 self._flags[f] = flags
652 652 self._dirty = True
653 653
654 654 def copy(self):
655 655 copy = treemanifest(self._dir)
656 656 copy._node = self._node
657 657 copy._dirty = self._dirty
658 658 if self._copyfunc is _noop:
659 659 def _copyfunc(s):
660 660 self._load()
661 661 for d in self._dirs:
662 662 s._dirs[d] = self._dirs[d].copy()
663 663 s._files = dict.copy(self._files)
664 664 s._flags = dict.copy(self._flags)
665 665 if self._loadfunc is _noop:
666 666 _copyfunc(copy)
667 667 else:
668 668 copy._copyfunc = _copyfunc
669 669 else:
670 670 copy._copyfunc = self._copyfunc
671 671 return copy
672 672
673 673 def filesnotin(self, m2):
674 674 '''Set of files in this manifest that are not in the other'''
675 675 files = set()
676 676 def _filesnotin(t1, t2):
677 677 if t1._node == t2._node and not t1._dirty and not t2._dirty:
678 678 return
679 679 t1._load()
680 680 t2._load()
681 681 for d, m1 in t1._dirs.iteritems():
682 682 if d in t2._dirs:
683 683 m2 = t2._dirs[d]
684 684 _filesnotin(m1, m2)
685 685 else:
686 686 files.update(m1.iterkeys())
687 687
688 688 for fn in t1._files.iterkeys():
689 689 if fn not in t2._files:
690 690 files.add(t1._subpath(fn))
691 691
692 692 _filesnotin(self, m2)
693 693 return files
694 694
695 695 @propertycache
696 696 def _alldirs(self):
697 697 return util.dirs(self)
698 698
699 699 def dirs(self):
700 700 return self._alldirs
701 701
702 702 def hasdir(self, dir):
703 703 self._load()
704 704 topdir, subdir = _splittopdir(dir)
705 705 if topdir:
706 706 if topdir in self._dirs:
707 707 return self._dirs[topdir].hasdir(subdir)
708 708 return False
709 709 return (dir + '/') in self._dirs
710 710
711 711 def walk(self, match):
712 712 '''Generates matching file names.
713 713
714 714 Equivalent to manifest.matches(match).iterkeys(), but without creating
715 715 an entirely new manifest.
716 716
717 717 It also reports nonexistent files by marking them bad with match.bad().
718 718 '''
719 719 if match.always():
720 720 for f in iter(self):
721 721 yield f
722 722 return
723 723
724 724 fset = set(match.files())
725 725
726 726 for fn in self._walk(match):
727 727 if fn in fset:
728 728 # specified pattern is the exact name
729 729 fset.remove(fn)
730 730 yield fn
731 731
732 732 # for dirstate.walk, files=['.'] means "walk the whole tree".
733 733 # follow that here, too
734 734 fset.discard('.')
735 735
736 736 for fn in sorted(fset):
737 737 if not self.hasdir(fn):
738 738 match.bad(fn, None)
739 739
740 740 def _walk(self, match):
741 741 '''Recursively generates matching file names for walk().'''
742 742 if not match.visitdir(self._dir[:-1] or '.'):
743 743 return
744 744
745 745 # yield this dir's files and walk its submanifests
746 746 self._load()
747 747 for p in sorted(self._dirs.keys() + self._files.keys()):
748 748 if p in self._files:
749 749 fullp = self._subpath(p)
750 750 if match(fullp):
751 751 yield fullp
752 752 else:
753 753 for f in self._dirs[p]._walk(match):
754 754 yield f
755 755
756 756 def matches(self, match):
757 757 '''generate a new manifest filtered by the match argument'''
758 758 if match.always():
759 759 return self.copy()
760 760
761 761 return self._matches(match)
762 762
763 763 def _matches(self, match):
764 764 '''recursively generate a new manifest filtered by the match argument.
765 765 '''
766 766
767 767 visit = match.visitdir(self._dir[:-1] or '.')
768 768 if visit == 'all':
769 769 return self.copy()
770 770 ret = treemanifest(self._dir)
771 771 if not visit:
772 772 return ret
773 773
774 774 self._load()
775 775 for fn in self._files:
776 776 fullp = self._subpath(fn)
777 777 if not match(fullp):
778 778 continue
779 779 ret._files[fn] = self._files[fn]
780 780 if fn in self._flags:
781 781 ret._flags[fn] = self._flags[fn]
782 782
783 783 for dir, subm in self._dirs.iteritems():
784 784 m = subm._matches(match)
785 785 if not m._isempty():
786 786 ret._dirs[dir] = m
787 787
788 788 if not ret._isempty():
789 789 ret._dirty = True
790 790 return ret
791 791
792 792 def diff(self, m2, clean=False):
793 793 '''Finds changes between the current manifest and m2.
794 794
795 795 Args:
796 796 m2: the manifest to which this manifest should be compared.
797 797 clean: if true, include files unchanged between these manifests
798 798 with a None value in the returned dictionary.
799 799
800 800 The result is returned as a dict with filename as key and
801 801 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
802 802 nodeid in the current/other manifest and fl1/fl2 is the flag
803 803 in the current/other manifest. Where the file does not exist,
804 804 the nodeid will be None and the flags will be the empty
805 805 string.
806 806 '''
807 807 result = {}
808 808 emptytree = treemanifest()
809 809 def _diff(t1, t2):
810 810 if t1._node == t2._node and not t1._dirty and not t2._dirty:
811 811 return
812 812 t1._load()
813 813 t2._load()
814 814 for d, m1 in t1._dirs.iteritems():
815 815 m2 = t2._dirs.get(d, emptytree)
816 816 _diff(m1, m2)
817 817
818 818 for d, m2 in t2._dirs.iteritems():
819 819 if d not in t1._dirs:
820 820 _diff(emptytree, m2)
821 821
822 822 for fn, n1 in t1._files.iteritems():
823 823 fl1 = t1._flags.get(fn, '')
824 824 n2 = t2._files.get(fn, None)
825 825 fl2 = t2._flags.get(fn, '')
826 826 if n1 != n2 or fl1 != fl2:
827 827 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
828 828 elif clean:
829 829 result[t1._subpath(fn)] = None
830 830
831 831 for fn, n2 in t2._files.iteritems():
832 832 if fn not in t1._files:
833 833 fl2 = t2._flags.get(fn, '')
834 834 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
835 835
836 836 _diff(self, m2)
837 837 return result
838 838
839 839 def unmodifiedsince(self, m2):
840 840 return not self._dirty and not m2._dirty and self._node == m2._node
841 841
842 842 def parse(self, text, readsubtree):
843 843 for f, n, fl in _parse(text):
844 844 if fl == 't':
845 845 f = f + '/'
846 846 self._dirs[f] = readsubtree(self._subpath(f), n)
847 847 elif '/' in f:
848 848 # This is a flat manifest, so use __setitem__ and setflag rather
849 849 # than assigning directly to _files and _flags, so we can
850 850 # assign a path in a subdirectory, and to mark dirty (compared
851 851 # to nullid).
852 852 self[f] = n
853 853 if fl:
854 854 self.setflag(f, fl)
855 855 else:
856 856 # Assigning to _files and _flags avoids marking as dirty,
857 857 # and should be a little faster.
858 858 self._files[f] = n
859 859 if fl:
860 860 self._flags[f] = fl
861 861
862 862 def text(self, usemanifestv2=False):
863 863 """Get the full data of this manifest as a bytestring."""
864 864 self._load()
865 865 return _text(self.iterentries(), usemanifestv2)
866 866
867 867 def dirtext(self, usemanifestv2=False):
868 868 """Get the full data of this directory as a bytestring. Make sure that
869 869 any submanifests have been written first, so their nodeids are correct.
870 870 """
871 871 self._load()
872 872 flags = self.flags
873 873 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
874 874 files = [(f, self._files[f], flags(f)) for f in self._files]
875 875 return _text(sorted(dirs + files), usemanifestv2)
876 876
877 877 def read(self, gettext, readsubtree):
878 878 def _load_for_read(s):
879 879 s.parse(gettext(), readsubtree)
880 880 s._dirty = False
881 881 self._loadfunc = _load_for_read
882 882
883 883 def writesubtrees(self, m1, m2, writesubtree):
884 884 self._load() # for consistency; should never have any effect here
885 885 m1._load()
886 886 m2._load()
887 887 emptytree = treemanifest()
888 888 for d, subm in self._dirs.iteritems():
889 889 subp1 = m1._dirs.get(d, emptytree)._node
890 890 subp2 = m2._dirs.get(d, emptytree)._node
891 891 if subp1 == revlog.nullid:
892 892 subp1, subp2 = subp2, subp1
893 893 writesubtree(subm, subp1, subp2)
894 894
895 895 class manifestrevlog(revlog.revlog):
896 896 '''A revlog that stores manifest texts. This is responsible for caching the
897 897 full-text manifest contents.
898 898 '''
899 def __init__(self, opener, indexfile):
900 super(manifestrevlog, self).__init__(opener, indexfile)
901
899 def __init__(self, opener, dir=''):
902 900 # During normal operations, we expect to deal with not more than four
903 901 # revs at a time (such as during commit --amend). When rebasing large
904 902 # stacks of commits, the number can go up, hence the config knob below.
905 903 cachesize = 4
904 usetreemanifest = False
905 usemanifestv2 = False
906 906 opts = getattr(opener, 'options', None)
907 907 if opts is not None:
908 908 cachesize = opts.get('manifestcachesize', cachesize)
909 usetreemanifest = opts.get('treemanifest', usetreemanifest)
910 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
911
912 self._treeondisk = usetreemanifest
913 self._usemanifestv2 = usemanifestv2
914
909 915 self._fulltextcache = util.lrucachedict(cachesize)
910 916
917 indexfile = "00manifest.i"
918 if dir:
919 assert self._treeondisk, 'opts is %r' % opts
920 if not dir.endswith('/'):
921 dir = dir + '/'
922 indexfile = "meta/" + dir + "00manifest.i"
923 self._dir = dir
924
925 super(manifestrevlog, self).__init__(opener, indexfile)
926
911 927 @property
912 928 def fulltextcache(self):
913 929 return self._fulltextcache
914 930
915 931 def clearcaches(self):
916 932 super(manifestrevlog, self).clearcaches()
917 933 self._fulltextcache.clear()
918 934
919 935 class manifestlog(object):
920 936 """A collection class representing the collection of manifest snapshots
921 937 referenced by commits in the repository.
922 938
923 939 In this situation, 'manifest' refers to the abstract concept of a snapshot
924 940 of the list of files in the given commit. Consumers of the output of this
925 941 class do not care about the implementation details of the actual manifests
926 942 they receive (i.e. tree or flat or lazily loaded, etc)."""
927 943 def __init__(self, opener, repo):
928 944 self._repo = repo
929 945
930 946 # We'll separate this into it's own cache once oldmanifest is no longer
931 947 # used
932 948 self._mancache = repo.manifest._mancache
933 949
934 950 @property
935 951 def _revlog(self):
936 952 return self._repo.manifest
937 953
938 954 @property
939 955 def _oldmanifest(self):
940 956 # _revlog is the same as _oldmanifest right now, but we eventually want
941 957 # to delete _oldmanifest while still allowing manifestlog to access the
942 958 # revlog specific apis.
943 959 return self._repo.manifest
944 960
945 961 def __getitem__(self, node):
946 962 """Retrieves the manifest instance for the given node. Throws a KeyError
947 963 if not found.
948 964 """
949 965 if node in self._mancache:
950 966 cachemf = self._mancache[node]
951 967 # The old manifest may put non-ctx manifests in the cache, so skip
952 968 # those since they don't implement the full api.
953 969 if (isinstance(cachemf, manifestctx) or
954 970 isinstance(cachemf, treemanifestctx)):
955 971 return cachemf
956 972
957 973 if self._oldmanifest._treeinmem:
958 974 m = treemanifestctx(self._revlog, '', node)
959 975 else:
960 976 m = manifestctx(self._revlog, node)
961 977 if node != revlog.nullid:
962 978 self._mancache[node] = m
963 979 return m
964 980
965 981 class manifestctx(object):
966 982 """A class representing a single revision of a manifest, including its
967 983 contents, its parent revs, and its linkrev.
968 984 """
969 985 def __init__(self, revlog, node):
970 986 self._revlog = revlog
971 987 self._data = None
972 988
973 989 self._node = node
974 990
975 991 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
976 992 # but let's add it later when something needs it and we can load it
977 993 # lazily.
978 994 #self.p1, self.p2 = revlog.parents(node)
979 995 #rev = revlog.rev(node)
980 996 #self.linkrev = revlog.linkrev(rev)
981 997
982 998 def node(self):
983 999 return self._node
984 1000
985 1001 def read(self):
986 1002 if not self._data:
987 1003 if self._node == revlog.nullid:
988 1004 self._data = manifestdict()
989 1005 else:
990 1006 text = self._revlog.revision(self._node)
991 1007 arraytext = array.array('c', text)
992 1008 self._revlog._fulltextcache[self._node] = arraytext
993 1009 self._data = manifestdict(text)
994 1010 return self._data
995 1011
996 1012 def readfast(self):
997 1013 rl = self._revlog
998 1014 r = rl.rev(self._node)
999 1015 deltaparent = rl.deltaparent(r)
1000 1016 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1001 1017 return self.readdelta()
1002 1018 return self.read()
1003 1019
1004 1020 def readdelta(self):
1005 1021 revlog = self._revlog
1006 1022 if revlog._usemanifestv2:
1007 1023 # Need to perform a slow delta
1008 1024 r0 = revlog.deltaparent(revlog.rev(self._node))
1009 1025 m0 = manifestctx(revlog, revlog.node(r0)).read()
1010 1026 m1 = self.read()
1011 1027 md = manifestdict()
1012 1028 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1013 1029 if n1:
1014 1030 md[f] = n1
1015 1031 if fl1:
1016 1032 md.setflag(f, fl1)
1017 1033 return md
1018 1034
1019 1035 r = revlog.rev(self._node)
1020 1036 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1021 1037 return manifestdict(d)
1022 1038
1023 1039 class treemanifestctx(object):
1024 1040 def __init__(self, revlog, dir, node):
1025 1041 revlog = revlog.dirlog(dir)
1026 1042 self._revlog = revlog
1027 1043 self._dir = dir
1028 1044 self._data = None
1029 1045
1030 1046 self._node = node
1031 1047
1032 1048 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1033 1049 # we can instantiate treemanifestctx objects for directories we don't
1034 1050 # have on disk.
1035 1051 #self.p1, self.p2 = revlog.parents(node)
1036 1052 #rev = revlog.rev(node)
1037 1053 #self.linkrev = revlog.linkrev(rev)
1038 1054
1039 1055 def read(self):
1040 1056 if not self._data:
1041 1057 if self._node == revlog.nullid:
1042 1058 self._data = treemanifest()
1043 1059 elif self._revlog._treeondisk:
1044 1060 m = treemanifest(dir=self._dir)
1045 1061 def gettext():
1046 1062 return self._revlog.revision(self._node)
1047 1063 def readsubtree(dir, subm):
1048 1064 return treemanifestctx(self._revlog, dir, subm).read()
1049 1065 m.read(gettext, readsubtree)
1050 1066 m.setnode(self._node)
1051 1067 self._data = m
1052 1068 else:
1053 1069 text = self._revlog.revision(self._node)
1054 1070 arraytext = array.array('c', text)
1055 1071 self._revlog.fulltextcache[self._node] = arraytext
1056 1072 self._data = treemanifest(dir=self._dir, text=text)
1057 1073
1058 1074 return self._data
1059 1075
1060 1076 def node(self):
1061 1077 return self._node
1062 1078
1063 1079 def readdelta(self):
1064 1080 # Need to perform a slow delta
1065 1081 revlog = self._revlog
1066 1082 r0 = revlog.deltaparent(revlog.rev(self._node))
1067 1083 m0 = treemanifestctx(revlog, revlog.node(r0), dir=self._dir).read()
1068 1084 m1 = self.read()
1069 1085 md = treemanifest(dir=self._dir)
1070 1086 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1071 1087 if n1:
1072 1088 md[f] = n1
1073 1089 if fl1:
1074 1090 md.setflag(f, fl1)
1075 1091 return md
1076 1092
1077 1093 def readfast(self):
1078 1094 rl = self._revlog
1079 1095 r = rl.rev(self._node)
1080 1096 deltaparent = rl.deltaparent(r)
1081 1097 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1082 1098 return self.readdelta()
1083 1099 return self.read()
1084 1100
1085 1101 class manifest(manifestrevlog):
1086 1102 def __init__(self, opener, dir='', dirlogcache=None):
1087 1103 '''The 'dir' and 'dirlogcache' arguments are for internal use by
1088 1104 manifest.manifest only. External users should create a root manifest
1089 1105 log with manifest.manifest(opener) and call dirlog() on it.
1090 1106 '''
1091 1107 # During normal operations, we expect to deal with not more than four
1092 1108 # revs at a time (such as during commit --amend). When rebasing large
1093 1109 # stacks of commits, the number can go up, hence the config knob below.
1094 1110 cachesize = 4
1095 1111 usetreemanifest = False
1096 usemanifestv2 = False
1097 1112 opts = getattr(opener, 'options', None)
1098 1113 if opts is not None:
1099 1114 cachesize = opts.get('manifestcachesize', cachesize)
1100 1115 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1101 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1102 1116 self._mancache = util.lrucachedict(cachesize)
1103 1117 self._treeinmem = usetreemanifest
1104 self._treeondisk = usetreemanifest
1105 self._usemanifestv2 = usemanifestv2
1106 indexfile = "00manifest.i"
1107 if dir:
1108 assert self._treeondisk, 'opts is %r' % opts
1109 if not dir.endswith('/'):
1110 dir = dir + '/'
1111 indexfile = "meta/" + dir + "00manifest.i"
1112 super(manifest, self).__init__(opener, indexfile)
1113 self._dir = dir
1118 super(manifest, self).__init__(opener, dir=dir)
1114 1119 # The dirlogcache is kept on the root manifest log
1115 1120 if dir:
1116 1121 self._dirlogcache = dirlogcache
1117 1122 else:
1118 1123 self._dirlogcache = {'': self}
1119 1124
1120 1125 def _newmanifest(self, data=''):
1121 1126 if self._treeinmem:
1122 1127 return treemanifest(self._dir, data)
1123 1128 return manifestdict(data)
1124 1129
1125 1130 def dirlog(self, dir):
1126 1131 if dir:
1127 1132 assert self._treeondisk
1128 1133 if dir not in self._dirlogcache:
1129 1134 self._dirlogcache[dir] = manifest(self.opener, dir,
1130 1135 self._dirlogcache)
1131 1136 return self._dirlogcache[dir]
1132 1137
1133 1138 def _slowreaddelta(self, node):
1134 1139 r0 = self.deltaparent(self.rev(node))
1135 1140 m0 = self.read(self.node(r0))
1136 1141 m1 = self.read(node)
1137 1142 md = self._newmanifest()
1138 1143 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1139 1144 if n1:
1140 1145 md[f] = n1
1141 1146 if fl1:
1142 1147 md.setflag(f, fl1)
1143 1148 return md
1144 1149
1145 1150 def readdelta(self, node):
1146 1151 if self._usemanifestv2 or self._treeondisk:
1147 1152 return self._slowreaddelta(node)
1148 1153 r = self.rev(node)
1149 1154 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1150 1155 return self._newmanifest(d)
1151 1156
1152 1157 def readshallowdelta(self, node):
1153 1158 '''For flat manifests, this is the same as readdelta(). For
1154 1159 treemanifests, this will read the delta for this revlog's directory,
1155 1160 without recursively reading subdirectory manifests. Instead, any
1156 1161 subdirectory entry will be reported as it appears in the manifests, i.e.
1157 1162 the subdirectory will be reported among files and distinguished only by
1158 1163 its 't' flag.'''
1159 1164 if not self._treeondisk:
1160 1165 return self.readdelta(node)
1161 1166 if self._usemanifestv2:
1162 1167 raise error.Abort(
1163 1168 _("readshallowdelta() not implemented for manifestv2"))
1164 1169 r = self.rev(node)
1165 1170 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1166 1171 return manifestdict(d)
1167 1172
1168 1173 def readshallowfast(self, node):
1169 1174 '''like readfast(), but calls readshallowdelta() instead of readdelta()
1170 1175 '''
1171 1176 r = self.rev(node)
1172 1177 deltaparent = self.deltaparent(r)
1173 1178 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1174 1179 return self.readshallowdelta(node)
1175 1180 return self.readshallow(node)
1176 1181
1177 1182 def read(self, node):
1178 1183 if node == revlog.nullid:
1179 1184 return self._newmanifest() # don't upset local cache
1180 1185 if node in self._mancache:
1181 1186 cached = self._mancache[node]
1182 1187 if (isinstance(cached, manifestctx) or
1183 1188 isinstance(cached, treemanifestctx)):
1184 1189 cached = cached.read()
1185 1190 return cached
1186 1191 if self._treeondisk:
1187 1192 def gettext():
1188 1193 return self.revision(node)
1189 1194 def readsubtree(dir, subm):
1190 1195 return self.dirlog(dir).read(subm)
1191 1196 m = self._newmanifest()
1192 1197 m.read(gettext, readsubtree)
1193 1198 m.setnode(node)
1194 1199 arraytext = None
1195 1200 else:
1196 1201 text = self.revision(node)
1197 1202 m = self._newmanifest(text)
1198 1203 arraytext = array.array('c', text)
1199 1204 self._mancache[node] = m
1200 1205 self.fulltextcache[node] = arraytext
1201 1206 return m
1202 1207
1203 1208 def readshallow(self, node):
1204 1209 '''Reads the manifest in this directory. When using flat manifests,
1205 1210 this manifest will generally have files in subdirectories in it. Does
1206 1211 not cache the manifest as the callers generally do not read the same
1207 1212 version twice.'''
1208 1213 return manifestdict(self.revision(node))
1209 1214
1210 1215 def find(self, node, f):
1211 1216 '''look up entry for a single file efficiently.
1212 1217 return (node, flags) pair if found, (None, None) if not.'''
1213 1218 m = self.read(node)
1214 1219 try:
1215 1220 return m.find(f)
1216 1221 except KeyError:
1217 1222 return None, None
1218 1223
1219 1224 def add(self, m, transaction, link, p1, p2, added, removed):
1220 1225 if (p1 in self.fulltextcache and not self._treeinmem
1221 1226 and not self._usemanifestv2):
1222 1227 # If our first parent is in the manifest cache, we can
1223 1228 # compute a delta here using properties we know about the
1224 1229 # manifest up-front, which may save time later for the
1225 1230 # revlog layer.
1226 1231
1227 1232 _checkforbidden(added)
1228 1233 # combine the changed lists into one sorted iterator
1229 1234 work = heapq.merge([(x, False) for x in added],
1230 1235 [(x, True) for x in removed])
1231 1236
1232 1237 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1233 1238 cachedelta = self.rev(p1), deltatext
1234 1239 text = util.buffer(arraytext)
1235 1240 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1236 1241 else:
1237 1242 # The first parent manifest isn't already loaded, so we'll
1238 1243 # just encode a fulltext of the manifest and pass that
1239 1244 # through to the revlog layer, and let it handle the delta
1240 1245 # process.
1241 1246 if self._treeondisk:
1242 1247 m1 = self.read(p1)
1243 1248 m2 = self.read(p2)
1244 1249 n = self._addtree(m, transaction, link, m1, m2)
1245 1250 arraytext = None
1246 1251 else:
1247 1252 text = m.text(self._usemanifestv2)
1248 1253 n = self.addrevision(text, transaction, link, p1, p2)
1249 1254 arraytext = array.array('c', text)
1250 1255
1251 1256 self._mancache[n] = m
1252 1257 self.fulltextcache[n] = arraytext
1253 1258
1254 1259 return n
1255 1260
1256 1261 def _addtree(self, m, transaction, link, m1, m2):
1257 1262 # If the manifest is unchanged compared to one parent,
1258 1263 # don't write a new revision
1259 1264 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
1260 1265 return m.node()
1261 1266 def writesubtree(subm, subp1, subp2):
1262 1267 sublog = self.dirlog(subm.dir())
1263 1268 sublog.add(subm, transaction, link, subp1, subp2, None, None)
1264 1269 m.writesubtrees(m1, m2, writesubtree)
1265 1270 text = m.dirtext(self._usemanifestv2)
1266 1271 # Double-check whether contents are unchanged to one parent
1267 1272 if text == m1.dirtext(self._usemanifestv2):
1268 1273 n = m1.node()
1269 1274 elif text == m2.dirtext(self._usemanifestv2):
1270 1275 n = m2.node()
1271 1276 else:
1272 1277 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1273 1278 # Save nodeid so parent manifest can calculate its nodeid
1274 1279 m.setnode(n)
1275 1280 return n
1276 1281
1277 1282 def clearcaches(self):
1278 1283 super(manifest, self).clearcaches()
1279 1284 self._mancache.clear()
1280 1285 self._dirlogcache = {'': self}
General Comments 0
You need to be logged in to leave comments. Login now