##// END OF EJS Templates
manifest: specify checkambig=True to revlog.__init__, to avoid ambiguity...
FUJIWARA Katsunori -
r29998:14ad8e2a default
parent child Browse files
Show More
@@ -1,1298 +1,1299 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import heapq
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 mdiff,
19 19 parsers,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 propertycache = util.propertycache
25 25
26 26 def _parsev1(data):
27 27 # This method does a little bit of excessive-looking
28 28 # precondition checking. This is so that the behavior of this
29 29 # class exactly matches its C counterpart to try and help
30 30 # prevent surprise breakage for anyone that develops against
31 31 # the pure version.
32 32 if data and data[-1] != '\n':
33 33 raise ValueError('Manifest did not end in a newline.')
34 34 prev = None
35 35 for l in data.splitlines():
36 36 if prev is not None and prev > l:
37 37 raise ValueError('Manifest lines not in sorted order.')
38 38 prev = l
39 39 f, n = l.split('\0')
40 40 if len(n) > 40:
41 41 yield f, revlog.bin(n[:40]), n[40:]
42 42 else:
43 43 yield f, revlog.bin(n), ''
44 44
45 45 def _parsev2(data):
46 46 metadataend = data.find('\n')
47 47 # Just ignore metadata for now
48 48 pos = metadataend + 1
49 49 prevf = ''
50 50 while pos < len(data):
51 51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 52 if end == -1:
53 53 raise ValueError('Manifest ended with incomplete file entry.')
54 54 stemlen = ord(data[pos])
55 55 items = data[pos + 1:end].split('\0')
56 56 f = prevf[:stemlen] + items[0]
57 57 if prevf > f:
58 58 raise ValueError('Manifest entries not in sorted order.')
59 59 fl = items[1]
60 60 # Just ignore metadata (items[2:] for now)
61 61 n = data[end + 1:end + 21]
62 62 yield f, n, fl
63 63 pos = end + 22
64 64 prevf = f
65 65
66 66 def _parse(data):
67 67 """Generates (path, node, flags) tuples from a manifest text"""
68 68 if data.startswith('\0'):
69 69 return iter(_parsev2(data))
70 70 else:
71 71 return iter(_parsev1(data))
72 72
73 73 def _text(it, usemanifestv2):
74 74 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 75 text"""
76 76 if usemanifestv2:
77 77 return _textv2(it)
78 78 else:
79 79 return _textv1(it)
80 80
81 81 def _textv1(it):
82 82 files = []
83 83 lines = []
84 84 _hex = revlog.hex
85 85 for f, n, fl in it:
86 86 files.append(f)
87 87 # if this is changed to support newlines in filenames,
88 88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 90
91 91 _checkforbidden(files)
92 92 return ''.join(lines)
93 93
94 94 def _textv2(it):
95 95 files = []
96 96 lines = ['\0\n']
97 97 prevf = ''
98 98 for f, n, fl in it:
99 99 files.append(f)
100 100 stem = os.path.commonprefix([prevf, f])
101 101 stemlen = min(len(stem), 255)
102 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 103 prevf = f
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 107 class _lazymanifest(dict):
108 108 """This is the pure implementation of lazymanifest.
109 109
110 110 It has not been optimized *at all* and is not lazy.
111 111 """
112 112
113 113 def __init__(self, data):
114 114 dict.__init__(self)
115 115 for f, n, fl in _parse(data):
116 116 self[f] = n, fl
117 117
118 118 def __setitem__(self, k, v):
119 119 node, flag = v
120 120 assert node is not None
121 121 if len(node) > 21:
122 122 node = node[:21] # match c implementation behavior
123 123 dict.__setitem__(self, k, (node, flag))
124 124
125 125 def __iter__(self):
126 126 return iter(sorted(dict.keys(self)))
127 127
128 128 def iterkeys(self):
129 129 return iter(sorted(dict.keys(self)))
130 130
131 131 def iterentries(self):
132 132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
133 133
134 134 def copy(self):
135 135 c = _lazymanifest('')
136 136 c.update(self)
137 137 return c
138 138
139 139 def diff(self, m2, clean=False):
140 140 '''Finds changes between the current manifest and m2.'''
141 141 diff = {}
142 142
143 143 for fn, e1 in self.iteritems():
144 144 if fn not in m2:
145 145 diff[fn] = e1, (None, '')
146 146 else:
147 147 e2 = m2[fn]
148 148 if e1 != e2:
149 149 diff[fn] = e1, e2
150 150 elif clean:
151 151 diff[fn] = None
152 152
153 153 for fn, e2 in m2.iteritems():
154 154 if fn not in self:
155 155 diff[fn] = (None, ''), e2
156 156
157 157 return diff
158 158
159 159 def filtercopy(self, filterfn):
160 160 c = _lazymanifest('')
161 161 for f, n, fl in self.iterentries():
162 162 if filterfn(f):
163 163 c[f] = n, fl
164 164 return c
165 165
166 166 def text(self):
167 167 """Get the full data of this manifest as a bytestring."""
168 168 return _textv1(self.iterentries())
169 169
170 170 try:
171 171 _lazymanifest = parsers.lazymanifest
172 172 except AttributeError:
173 173 pass
174 174
175 175 class manifestdict(object):
176 176 def __init__(self, data=''):
177 177 if data.startswith('\0'):
178 178 #_lazymanifest can not parse v2
179 179 self._lm = _lazymanifest('')
180 180 for f, n, fl in _parsev2(data):
181 181 self._lm[f] = n, fl
182 182 else:
183 183 self._lm = _lazymanifest(data)
184 184
185 185 def __getitem__(self, key):
186 186 return self._lm[key][0]
187 187
188 188 def find(self, key):
189 189 return self._lm[key]
190 190
191 191 def __len__(self):
192 192 return len(self._lm)
193 193
194 194 def __setitem__(self, key, node):
195 195 self._lm[key] = node, self.flags(key, '')
196 196
197 197 def __contains__(self, key):
198 198 return key in self._lm
199 199
200 200 def __delitem__(self, key):
201 201 del self._lm[key]
202 202
203 203 def __iter__(self):
204 204 return self._lm.__iter__()
205 205
206 206 def iterkeys(self):
207 207 return self._lm.iterkeys()
208 208
209 209 def keys(self):
210 210 return list(self.iterkeys())
211 211
212 212 def filesnotin(self, m2):
213 213 '''Set of files in this manifest that are not in the other'''
214 214 diff = self.diff(m2)
215 215 files = set(filepath
216 216 for filepath, hashflags in diff.iteritems()
217 217 if hashflags[1][0] is None)
218 218 return files
219 219
220 220 @propertycache
221 221 def _dirs(self):
222 222 return util.dirs(self)
223 223
224 224 def dirs(self):
225 225 return self._dirs
226 226
227 227 def hasdir(self, dir):
228 228 return dir in self._dirs
229 229
230 230 def _filesfastpath(self, match):
231 231 '''Checks whether we can correctly and quickly iterate over matcher
232 232 files instead of over manifest files.'''
233 233 files = match.files()
234 234 return (len(files) < 100 and (match.isexact() or
235 235 (match.prefix() and all(fn in self for fn in files))))
236 236
237 237 def walk(self, match):
238 238 '''Generates matching file names.
239 239
240 240 Equivalent to manifest.matches(match).iterkeys(), but without creating
241 241 an entirely new manifest.
242 242
243 243 It also reports nonexistent files by marking them bad with match.bad().
244 244 '''
245 245 if match.always():
246 246 for f in iter(self):
247 247 yield f
248 248 return
249 249
250 250 fset = set(match.files())
251 251
252 252 # avoid the entire walk if we're only looking for specific files
253 253 if self._filesfastpath(match):
254 254 for fn in sorted(fset):
255 255 yield fn
256 256 return
257 257
258 258 for fn in self:
259 259 if fn in fset:
260 260 # specified pattern is the exact name
261 261 fset.remove(fn)
262 262 if match(fn):
263 263 yield fn
264 264
265 265 # for dirstate.walk, files=['.'] means "walk the whole tree".
266 266 # follow that here, too
267 267 fset.discard('.')
268 268
269 269 for fn in sorted(fset):
270 270 if not self.hasdir(fn):
271 271 match.bad(fn, None)
272 272
273 273 def matches(self, match):
274 274 '''generate a new manifest filtered by the match argument'''
275 275 if match.always():
276 276 return self.copy()
277 277
278 278 if self._filesfastpath(match):
279 279 m = manifestdict()
280 280 lm = self._lm
281 281 for fn in match.files():
282 282 if fn in lm:
283 283 m._lm[fn] = lm[fn]
284 284 return m
285 285
286 286 m = manifestdict()
287 287 m._lm = self._lm.filtercopy(match)
288 288 return m
289 289
290 290 def diff(self, m2, clean=False):
291 291 '''Finds changes between the current manifest and m2.
292 292
293 293 Args:
294 294 m2: the manifest to which this manifest should be compared.
295 295 clean: if true, include files unchanged between these manifests
296 296 with a None value in the returned dictionary.
297 297
298 298 The result is returned as a dict with filename as key and
299 299 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
300 300 nodeid in the current/other manifest and fl1/fl2 is the flag
301 301 in the current/other manifest. Where the file does not exist,
302 302 the nodeid will be None and the flags will be the empty
303 303 string.
304 304 '''
305 305 return self._lm.diff(m2._lm, clean)
306 306
307 307 def setflag(self, key, flag):
308 308 self._lm[key] = self[key], flag
309 309
310 310 def get(self, key, default=None):
311 311 try:
312 312 return self._lm[key][0]
313 313 except KeyError:
314 314 return default
315 315
316 316 def flags(self, key, default=''):
317 317 try:
318 318 return self._lm[key][1]
319 319 except KeyError:
320 320 return default
321 321
322 322 def copy(self):
323 323 c = manifestdict()
324 324 c._lm = self._lm.copy()
325 325 return c
326 326
327 327 def iteritems(self):
328 328 return (x[:2] for x in self._lm.iterentries())
329 329
330 330 def iterentries(self):
331 331 return self._lm.iterentries()
332 332
333 333 def text(self, usemanifestv2=False):
334 334 if usemanifestv2:
335 335 return _textv2(self._lm.iterentries())
336 336 else:
337 337 # use (probably) native version for v1
338 338 return self._lm.text()
339 339
340 340 def fastdelta(self, base, changes):
341 341 """Given a base manifest text as an array.array and a list of changes
342 342 relative to that text, compute a delta that can be used by revlog.
343 343 """
344 344 delta = []
345 345 dstart = None
346 346 dend = None
347 347 dline = [""]
348 348 start = 0
349 349 # zero copy representation of base as a buffer
350 350 addbuf = util.buffer(base)
351 351
352 352 changes = list(changes)
353 353 if len(changes) < 1000:
354 354 # start with a readonly loop that finds the offset of
355 355 # each line and creates the deltas
356 356 for f, todelete in changes:
357 357 # bs will either be the index of the item or the insert point
358 358 start, end = _msearch(addbuf, f, start)
359 359 if not todelete:
360 360 h, fl = self._lm[f]
361 361 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
362 362 else:
363 363 if start == end:
364 364 # item we want to delete was not found, error out
365 365 raise AssertionError(
366 366 _("failed to remove %s from manifest") % f)
367 367 l = ""
368 368 if dstart is not None and dstart <= start and dend >= start:
369 369 if dend < end:
370 370 dend = end
371 371 if l:
372 372 dline.append(l)
373 373 else:
374 374 if dstart is not None:
375 375 delta.append([dstart, dend, "".join(dline)])
376 376 dstart = start
377 377 dend = end
378 378 dline = [l]
379 379
380 380 if dstart is not None:
381 381 delta.append([dstart, dend, "".join(dline)])
382 382 # apply the delta to the base, and get a delta for addrevision
383 383 deltatext, arraytext = _addlistdelta(base, delta)
384 384 else:
385 385 # For large changes, it's much cheaper to just build the text and
386 386 # diff it.
387 387 arraytext = array.array('c', self.text())
388 388 deltatext = mdiff.textdiff(base, arraytext)
389 389
390 390 return arraytext, deltatext
391 391
392 392 def _msearch(m, s, lo=0, hi=None):
393 393 '''return a tuple (start, end) that says where to find s within m.
394 394
395 395 If the string is found m[start:end] are the line containing
396 396 that string. If start == end the string was not found and
397 397 they indicate the proper sorted insertion point.
398 398
399 399 m should be a buffer or a string
400 400 s is a string'''
401 401 def advance(i, c):
402 402 while i < lenm and m[i] != c:
403 403 i += 1
404 404 return i
405 405 if not s:
406 406 return (lo, lo)
407 407 lenm = len(m)
408 408 if not hi:
409 409 hi = lenm
410 410 while lo < hi:
411 411 mid = (lo + hi) // 2
412 412 start = mid
413 413 while start > 0 and m[start - 1] != '\n':
414 414 start -= 1
415 415 end = advance(start, '\0')
416 416 if m[start:end] < s:
417 417 # we know that after the null there are 40 bytes of sha1
418 418 # this translates to the bisect lo = mid + 1
419 419 lo = advance(end + 40, '\n') + 1
420 420 else:
421 421 # this translates to the bisect hi = mid
422 422 hi = start
423 423 end = advance(lo, '\0')
424 424 found = m[lo:end]
425 425 if s == found:
426 426 # we know that after the null there are 40 bytes of sha1
427 427 end = advance(end + 40, '\n')
428 428 return (lo, end + 1)
429 429 else:
430 430 return (lo, lo)
431 431
432 432 def _checkforbidden(l):
433 433 """Check filenames for illegal characters."""
434 434 for f in l:
435 435 if '\n' in f or '\r' in f:
436 436 raise error.RevlogError(
437 437 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
438 438
439 439
440 440 # apply the changes collected during the bisect loop to our addlist
441 441 # return a delta suitable for addrevision
442 442 def _addlistdelta(addlist, x):
443 443 # for large addlist arrays, building a new array is cheaper
444 444 # than repeatedly modifying the existing one
445 445 currentposition = 0
446 446 newaddlist = array.array('c')
447 447
448 448 for start, end, content in x:
449 449 newaddlist += addlist[currentposition:start]
450 450 if content:
451 451 newaddlist += array.array('c', content)
452 452
453 453 currentposition = end
454 454
455 455 newaddlist += addlist[currentposition:]
456 456
457 457 deltatext = "".join(struct.pack(">lll", start, end, len(content))
458 458 + content for start, end, content in x)
459 459 return deltatext, newaddlist
460 460
461 461 def _splittopdir(f):
462 462 if '/' in f:
463 463 dir, subpath = f.split('/', 1)
464 464 return dir + '/', subpath
465 465 else:
466 466 return '', f
467 467
468 468 _noop = lambda s: None
469 469
470 470 class treemanifest(object):
471 471 def __init__(self, dir='', text=''):
472 472 self._dir = dir
473 473 self._node = revlog.nullid
474 474 self._loadfunc = _noop
475 475 self._copyfunc = _noop
476 476 self._dirty = False
477 477 self._dirs = {}
478 478 # Using _lazymanifest here is a little slower than plain old dicts
479 479 self._files = {}
480 480 self._flags = {}
481 481 if text:
482 482 def readsubtree(subdir, subm):
483 483 raise AssertionError('treemanifest constructor only accepts '
484 484 'flat manifests')
485 485 self.parse(text, readsubtree)
486 486 self._dirty = True # Mark flat manifest dirty after parsing
487 487
488 488 def _subpath(self, path):
489 489 return self._dir + path
490 490
491 491 def __len__(self):
492 492 self._load()
493 493 size = len(self._files)
494 494 for m in self._dirs.values():
495 495 size += m.__len__()
496 496 return size
497 497
498 498 def _isempty(self):
499 499 self._load() # for consistency; already loaded by all callers
500 500 return (not self._files and (not self._dirs or
501 501 all(m._isempty() for m in self._dirs.values())))
502 502
503 503 def __repr__(self):
504 504 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
505 505 (self._dir, revlog.hex(self._node),
506 506 bool(self._loadfunc is _noop),
507 507 self._dirty, id(self)))
508 508
509 509 def dir(self):
510 510 '''The directory that this tree manifest represents, including a
511 511 trailing '/'. Empty string for the repo root directory.'''
512 512 return self._dir
513 513
514 514 def node(self):
515 515 '''This node of this instance. nullid for unsaved instances. Should
516 516 be updated when the instance is read or written from a revlog.
517 517 '''
518 518 assert not self._dirty
519 519 return self._node
520 520
521 521 def setnode(self, node):
522 522 self._node = node
523 523 self._dirty = False
524 524
525 525 def iterentries(self):
526 526 self._load()
527 527 for p, n in sorted(self._dirs.items() + self._files.items()):
528 528 if p in self._files:
529 529 yield self._subpath(p), n, self._flags.get(p, '')
530 530 else:
531 531 for x in n.iterentries():
532 532 yield x
533 533
534 534 def iteritems(self):
535 535 self._load()
536 536 for p, n in sorted(self._dirs.items() + self._files.items()):
537 537 if p in self._files:
538 538 yield self._subpath(p), n
539 539 else:
540 540 for f, sn in n.iteritems():
541 541 yield f, sn
542 542
543 543 def iterkeys(self):
544 544 self._load()
545 545 for p in sorted(self._dirs.keys() + self._files.keys()):
546 546 if p in self._files:
547 547 yield self._subpath(p)
548 548 else:
549 549 for f in self._dirs[p].iterkeys():
550 550 yield f
551 551
552 552 def keys(self):
553 553 return list(self.iterkeys())
554 554
555 555 def __iter__(self):
556 556 return self.iterkeys()
557 557
558 558 def __contains__(self, f):
559 559 if f is None:
560 560 return False
561 561 self._load()
562 562 dir, subpath = _splittopdir(f)
563 563 if dir:
564 564 if dir not in self._dirs:
565 565 return False
566 566 return self._dirs[dir].__contains__(subpath)
567 567 else:
568 568 return f in self._files
569 569
570 570 def get(self, f, default=None):
571 571 self._load()
572 572 dir, subpath = _splittopdir(f)
573 573 if dir:
574 574 if dir not in self._dirs:
575 575 return default
576 576 return self._dirs[dir].get(subpath, default)
577 577 else:
578 578 return self._files.get(f, default)
579 579
580 580 def __getitem__(self, f):
581 581 self._load()
582 582 dir, subpath = _splittopdir(f)
583 583 if dir:
584 584 return self._dirs[dir].__getitem__(subpath)
585 585 else:
586 586 return self._files[f]
587 587
588 588 def flags(self, f):
589 589 self._load()
590 590 dir, subpath = _splittopdir(f)
591 591 if dir:
592 592 if dir not in self._dirs:
593 593 return ''
594 594 return self._dirs[dir].flags(subpath)
595 595 else:
596 596 if f in self._dirs:
597 597 return ''
598 598 return self._flags.get(f, '')
599 599
600 600 def find(self, f):
601 601 self._load()
602 602 dir, subpath = _splittopdir(f)
603 603 if dir:
604 604 return self._dirs[dir].find(subpath)
605 605 else:
606 606 return self._files[f], self._flags.get(f, '')
607 607
608 608 def __delitem__(self, f):
609 609 self._load()
610 610 dir, subpath = _splittopdir(f)
611 611 if dir:
612 612 self._dirs[dir].__delitem__(subpath)
613 613 # If the directory is now empty, remove it
614 614 if self._dirs[dir]._isempty():
615 615 del self._dirs[dir]
616 616 else:
617 617 del self._files[f]
618 618 if f in self._flags:
619 619 del self._flags[f]
620 620 self._dirty = True
621 621
622 622 def __setitem__(self, f, n):
623 623 assert n is not None
624 624 self._load()
625 625 dir, subpath = _splittopdir(f)
626 626 if dir:
627 627 if dir not in self._dirs:
628 628 self._dirs[dir] = treemanifest(self._subpath(dir))
629 629 self._dirs[dir].__setitem__(subpath, n)
630 630 else:
631 631 self._files[f] = n[:21] # to match manifestdict's behavior
632 632 self._dirty = True
633 633
634 634 def _load(self):
635 635 if self._loadfunc is not _noop:
636 636 lf, self._loadfunc = self._loadfunc, _noop
637 637 lf(self)
638 638 elif self._copyfunc is not _noop:
639 639 cf, self._copyfunc = self._copyfunc, _noop
640 640 cf(self)
641 641
642 642 def setflag(self, f, flags):
643 643 """Set the flags (symlink, executable) for path f."""
644 644 self._load()
645 645 dir, subpath = _splittopdir(f)
646 646 if dir:
647 647 if dir not in self._dirs:
648 648 self._dirs[dir] = treemanifest(self._subpath(dir))
649 649 self._dirs[dir].setflag(subpath, flags)
650 650 else:
651 651 self._flags[f] = flags
652 652 self._dirty = True
653 653
654 654 def copy(self):
655 655 copy = treemanifest(self._dir)
656 656 copy._node = self._node
657 657 copy._dirty = self._dirty
658 658 if self._copyfunc is _noop:
659 659 def _copyfunc(s):
660 660 self._load()
661 661 for d in self._dirs:
662 662 s._dirs[d] = self._dirs[d].copy()
663 663 s._files = dict.copy(self._files)
664 664 s._flags = dict.copy(self._flags)
665 665 if self._loadfunc is _noop:
666 666 _copyfunc(copy)
667 667 else:
668 668 copy._copyfunc = _copyfunc
669 669 else:
670 670 copy._copyfunc = self._copyfunc
671 671 return copy
672 672
673 673 def filesnotin(self, m2):
674 674 '''Set of files in this manifest that are not in the other'''
675 675 files = set()
676 676 def _filesnotin(t1, t2):
677 677 if t1._node == t2._node and not t1._dirty and not t2._dirty:
678 678 return
679 679 t1._load()
680 680 t2._load()
681 681 for d, m1 in t1._dirs.iteritems():
682 682 if d in t2._dirs:
683 683 m2 = t2._dirs[d]
684 684 _filesnotin(m1, m2)
685 685 else:
686 686 files.update(m1.iterkeys())
687 687
688 688 for fn in t1._files.iterkeys():
689 689 if fn not in t2._files:
690 690 files.add(t1._subpath(fn))
691 691
692 692 _filesnotin(self, m2)
693 693 return files
694 694
695 695 @propertycache
696 696 def _alldirs(self):
697 697 return util.dirs(self)
698 698
699 699 def dirs(self):
700 700 return self._alldirs
701 701
702 702 def hasdir(self, dir):
703 703 self._load()
704 704 topdir, subdir = _splittopdir(dir)
705 705 if topdir:
706 706 if topdir in self._dirs:
707 707 return self._dirs[topdir].hasdir(subdir)
708 708 return False
709 709 return (dir + '/') in self._dirs
710 710
711 711 def walk(self, match):
712 712 '''Generates matching file names.
713 713
714 714 Equivalent to manifest.matches(match).iterkeys(), but without creating
715 715 an entirely new manifest.
716 716
717 717 It also reports nonexistent files by marking them bad with match.bad().
718 718 '''
719 719 if match.always():
720 720 for f in iter(self):
721 721 yield f
722 722 return
723 723
724 724 fset = set(match.files())
725 725
726 726 for fn in self._walk(match):
727 727 if fn in fset:
728 728 # specified pattern is the exact name
729 729 fset.remove(fn)
730 730 yield fn
731 731
732 732 # for dirstate.walk, files=['.'] means "walk the whole tree".
733 733 # follow that here, too
734 734 fset.discard('.')
735 735
736 736 for fn in sorted(fset):
737 737 if not self.hasdir(fn):
738 738 match.bad(fn, None)
739 739
740 740 def _walk(self, match):
741 741 '''Recursively generates matching file names for walk().'''
742 742 if not match.visitdir(self._dir[:-1] or '.'):
743 743 return
744 744
745 745 # yield this dir's files and walk its submanifests
746 746 self._load()
747 747 for p in sorted(self._dirs.keys() + self._files.keys()):
748 748 if p in self._files:
749 749 fullp = self._subpath(p)
750 750 if match(fullp):
751 751 yield fullp
752 752 else:
753 753 for f in self._dirs[p]._walk(match):
754 754 yield f
755 755
756 756 def matches(self, match):
757 757 '''generate a new manifest filtered by the match argument'''
758 758 if match.always():
759 759 return self.copy()
760 760
761 761 return self._matches(match)
762 762
763 763 def _matches(self, match):
764 764 '''recursively generate a new manifest filtered by the match argument.
765 765 '''
766 766
767 767 visit = match.visitdir(self._dir[:-1] or '.')
768 768 if visit == 'all':
769 769 return self.copy()
770 770 ret = treemanifest(self._dir)
771 771 if not visit:
772 772 return ret
773 773
774 774 self._load()
775 775 for fn in self._files:
776 776 fullp = self._subpath(fn)
777 777 if not match(fullp):
778 778 continue
779 779 ret._files[fn] = self._files[fn]
780 780 if fn in self._flags:
781 781 ret._flags[fn] = self._flags[fn]
782 782
783 783 for dir, subm in self._dirs.iteritems():
784 784 m = subm._matches(match)
785 785 if not m._isempty():
786 786 ret._dirs[dir] = m
787 787
788 788 if not ret._isempty():
789 789 ret._dirty = True
790 790 return ret
791 791
792 792 def diff(self, m2, clean=False):
793 793 '''Finds changes between the current manifest and m2.
794 794
795 795 Args:
796 796 m2: the manifest to which this manifest should be compared.
797 797 clean: if true, include files unchanged between these manifests
798 798 with a None value in the returned dictionary.
799 799
800 800 The result is returned as a dict with filename as key and
801 801 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
802 802 nodeid in the current/other manifest and fl1/fl2 is the flag
803 803 in the current/other manifest. Where the file does not exist,
804 804 the nodeid will be None and the flags will be the empty
805 805 string.
806 806 '''
807 807 result = {}
808 808 emptytree = treemanifest()
809 809 def _diff(t1, t2):
810 810 if t1._node == t2._node and not t1._dirty and not t2._dirty:
811 811 return
812 812 t1._load()
813 813 t2._load()
814 814 for d, m1 in t1._dirs.iteritems():
815 815 m2 = t2._dirs.get(d, emptytree)
816 816 _diff(m1, m2)
817 817
818 818 for d, m2 in t2._dirs.iteritems():
819 819 if d not in t1._dirs:
820 820 _diff(emptytree, m2)
821 821
822 822 for fn, n1 in t1._files.iteritems():
823 823 fl1 = t1._flags.get(fn, '')
824 824 n2 = t2._files.get(fn, None)
825 825 fl2 = t2._flags.get(fn, '')
826 826 if n1 != n2 or fl1 != fl2:
827 827 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
828 828 elif clean:
829 829 result[t1._subpath(fn)] = None
830 830
831 831 for fn, n2 in t2._files.iteritems():
832 832 if fn not in t1._files:
833 833 fl2 = t2._flags.get(fn, '')
834 834 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
835 835
836 836 _diff(self, m2)
837 837 return result
838 838
839 839 def unmodifiedsince(self, m2):
840 840 return not self._dirty and not m2._dirty and self._node == m2._node
841 841
842 842 def parse(self, text, readsubtree):
843 843 for f, n, fl in _parse(text):
844 844 if fl == 't':
845 845 f = f + '/'
846 846 self._dirs[f] = readsubtree(self._subpath(f), n)
847 847 elif '/' in f:
848 848 # This is a flat manifest, so use __setitem__ and setflag rather
849 849 # than assigning directly to _files and _flags, so we can
850 850 # assign a path in a subdirectory, and to mark dirty (compared
851 851 # to nullid).
852 852 self[f] = n
853 853 if fl:
854 854 self.setflag(f, fl)
855 855 else:
856 856 # Assigning to _files and _flags avoids marking as dirty,
857 857 # and should be a little faster.
858 858 self._files[f] = n
859 859 if fl:
860 860 self._flags[f] = fl
861 861
862 862 def text(self, usemanifestv2=False):
863 863 """Get the full data of this manifest as a bytestring."""
864 864 self._load()
865 865 return _text(self.iterentries(), usemanifestv2)
866 866
867 867 def dirtext(self, usemanifestv2=False):
868 868 """Get the full data of this directory as a bytestring. Make sure that
869 869 any submanifests have been written first, so their nodeids are correct.
870 870 """
871 871 self._load()
872 872 flags = self.flags
873 873 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
874 874 files = [(f, self._files[f], flags(f)) for f in self._files]
875 875 return _text(sorted(dirs + files), usemanifestv2)
876 876
877 877 def read(self, gettext, readsubtree):
878 878 def _load_for_read(s):
879 879 s.parse(gettext(), readsubtree)
880 880 s._dirty = False
881 881 self._loadfunc = _load_for_read
882 882
883 883 def writesubtrees(self, m1, m2, writesubtree):
884 884 self._load() # for consistency; should never have any effect here
885 885 m1._load()
886 886 m2._load()
887 887 emptytree = treemanifest()
888 888 for d, subm in self._dirs.iteritems():
889 889 subp1 = m1._dirs.get(d, emptytree)._node
890 890 subp2 = m2._dirs.get(d, emptytree)._node
891 891 if subp1 == revlog.nullid:
892 892 subp1, subp2 = subp2, subp1
893 893 writesubtree(subm, subp1, subp2)
894 894
895 895 class manifestrevlog(revlog.revlog):
896 896 '''A revlog that stores manifest texts. This is responsible for caching the
897 897 full-text manifest contents.
898 898 '''
899 899 def __init__(self, opener, dir='', dirlogcache=None):
900 900 # During normal operations, we expect to deal with not more than four
901 901 # revs at a time (such as during commit --amend). When rebasing large
902 902 # stacks of commits, the number can go up, hence the config knob below.
903 903 cachesize = 4
904 904 usetreemanifest = False
905 905 usemanifestv2 = False
906 906 opts = getattr(opener, 'options', None)
907 907 if opts is not None:
908 908 cachesize = opts.get('manifestcachesize', cachesize)
909 909 usetreemanifest = opts.get('treemanifest', usetreemanifest)
910 910 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
911 911
912 912 self._treeondisk = usetreemanifest
913 913 self._usemanifestv2 = usemanifestv2
914 914
915 915 self._fulltextcache = util.lrucachedict(cachesize)
916 916
917 917 indexfile = "00manifest.i"
918 918 if dir:
919 919 assert self._treeondisk, 'opts is %r' % opts
920 920 if not dir.endswith('/'):
921 921 dir = dir + '/'
922 922 indexfile = "meta/" + dir + "00manifest.i"
923 923 self._dir = dir
924 924 # The dirlogcache is kept on the root manifest log
925 925 if dir:
926 926 self._dirlogcache = dirlogcache
927 927 else:
928 928 self._dirlogcache = {'': self}
929 929
930 super(manifestrevlog, self).__init__(opener, indexfile)
930 super(manifestrevlog, self).__init__(opener, indexfile,
931 checkambig=bool(dir))
931 932
932 933 @property
933 934 def fulltextcache(self):
934 935 return self._fulltextcache
935 936
936 937 def clearcaches(self):
937 938 super(manifestrevlog, self).clearcaches()
938 939 self._fulltextcache.clear()
939 940 self._dirlogcache = {'': self}
940 941
941 942 def dirlog(self, dir):
942 943 if dir:
943 944 assert self._treeondisk
944 945 if dir not in self._dirlogcache:
945 946 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
946 947 self._dirlogcache)
947 948 return self._dirlogcache[dir]
948 949
949 950 def add(self, m, transaction, link, p1, p2, added, removed):
950 951 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
951 952 and not self._usemanifestv2):
952 953 # If our first parent is in the manifest cache, we can
953 954 # compute a delta here using properties we know about the
954 955 # manifest up-front, which may save time later for the
955 956 # revlog layer.
956 957
957 958 _checkforbidden(added)
958 959 # combine the changed lists into one sorted iterator
959 960 work = heapq.merge([(x, False) for x in added],
960 961 [(x, True) for x in removed])
961 962
962 963 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
963 964 cachedelta = self.rev(p1), deltatext
964 965 text = util.buffer(arraytext)
965 966 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
966 967 else:
967 968 # The first parent manifest isn't already loaded, so we'll
968 969 # just encode a fulltext of the manifest and pass that
969 970 # through to the revlog layer, and let it handle the delta
970 971 # process.
971 972 if self._treeondisk:
972 973 m1 = self.read(p1)
973 974 m2 = self.read(p2)
974 975 n = self._addtree(m, transaction, link, m1, m2)
975 976 arraytext = None
976 977 else:
977 978 text = m.text(self._usemanifestv2)
978 979 n = self.addrevision(text, transaction, link, p1, p2)
979 980 arraytext = array.array('c', text)
980 981
981 982 self.fulltextcache[n] = arraytext
982 983
983 984 return n
984 985
985 986 def _addtree(self, m, transaction, link, m1, m2):
986 987 # If the manifest is unchanged compared to one parent,
987 988 # don't write a new revision
988 989 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
989 990 return m.node()
990 991 def writesubtree(subm, subp1, subp2):
991 992 sublog = self.dirlog(subm.dir())
992 993 sublog.add(subm, transaction, link, subp1, subp2, None, None)
993 994 m.writesubtrees(m1, m2, writesubtree)
994 995 text = m.dirtext(self._usemanifestv2)
995 996 # Double-check whether contents are unchanged to one parent
996 997 if text == m1.dirtext(self._usemanifestv2):
997 998 n = m1.node()
998 999 elif text == m2.dirtext(self._usemanifestv2):
999 1000 n = m2.node()
1000 1001 else:
1001 1002 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1002 1003 # Save nodeid so parent manifest can calculate its nodeid
1003 1004 m.setnode(n)
1004 1005 return n
1005 1006
1006 1007 class manifestlog(object):
1007 1008 """A collection class representing the collection of manifest snapshots
1008 1009 referenced by commits in the repository.
1009 1010
1010 1011 In this situation, 'manifest' refers to the abstract concept of a snapshot
1011 1012 of the list of files in the given commit. Consumers of the output of this
1012 1013 class do not care about the implementation details of the actual manifests
1013 1014 they receive (i.e. tree or flat or lazily loaded, etc)."""
1014 1015 def __init__(self, opener, repo):
1015 1016 self._repo = repo
1016 1017
1017 1018 usetreemanifest = False
1018 1019
1019 1020 opts = getattr(opener, 'options', None)
1020 1021 if opts is not None:
1021 1022 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1022 1023 self._treeinmem = usetreemanifest
1023 1024
1024 1025 # We'll separate this into it's own cache once oldmanifest is no longer
1025 1026 # used
1026 1027 self._mancache = repo.manifest._mancache
1027 1028
1028 1029 @property
1029 1030 def _revlog(self):
1030 1031 return self._repo.manifest
1031 1032
1032 1033 def __getitem__(self, node):
1033 1034 """Retrieves the manifest instance for the given node. Throws a KeyError
1034 1035 if not found.
1035 1036 """
1036 1037 if node in self._mancache:
1037 1038 cachemf = self._mancache[node]
1038 1039 # The old manifest may put non-ctx manifests in the cache, so skip
1039 1040 # those since they don't implement the full api.
1040 1041 if (isinstance(cachemf, manifestctx) or
1041 1042 isinstance(cachemf, treemanifestctx)):
1042 1043 return cachemf
1043 1044
1044 1045 if self._treeinmem:
1045 1046 m = treemanifestctx(self._revlog, '', node)
1046 1047 else:
1047 1048 m = manifestctx(self._revlog, node)
1048 1049 if node != revlog.nullid:
1049 1050 self._mancache[node] = m
1050 1051 return m
1051 1052
1052 1053 def add(self, m, transaction, link, p1, p2, added, removed):
1053 1054 return self._revlog.add(m, transaction, link, p1, p2, added, removed)
1054 1055
1055 1056 class manifestctx(object):
1056 1057 """A class representing a single revision of a manifest, including its
1057 1058 contents, its parent revs, and its linkrev.
1058 1059 """
1059 1060 def __init__(self, revlog, node):
1060 1061 self._revlog = revlog
1061 1062 self._data = None
1062 1063
1063 1064 self._node = node
1064 1065
1065 1066 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1066 1067 # but let's add it later when something needs it and we can load it
1067 1068 # lazily.
1068 1069 #self.p1, self.p2 = revlog.parents(node)
1069 1070 #rev = revlog.rev(node)
1070 1071 #self.linkrev = revlog.linkrev(rev)
1071 1072
1072 1073 def node(self):
1073 1074 return self._node
1074 1075
1075 1076 def read(self):
1076 1077 if not self._data:
1077 1078 if self._node == revlog.nullid:
1078 1079 self._data = manifestdict()
1079 1080 else:
1080 1081 text = self._revlog.revision(self._node)
1081 1082 arraytext = array.array('c', text)
1082 1083 self._revlog._fulltextcache[self._node] = arraytext
1083 1084 self._data = manifestdict(text)
1084 1085 return self._data
1085 1086
1086 1087 def readfast(self):
1087 1088 rl = self._revlog
1088 1089 r = rl.rev(self._node)
1089 1090 deltaparent = rl.deltaparent(r)
1090 1091 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1091 1092 return self.readdelta()
1092 1093 return self.read()
1093 1094
1094 1095 def readdelta(self):
1095 1096 revlog = self._revlog
1096 1097 if revlog._usemanifestv2:
1097 1098 # Need to perform a slow delta
1098 1099 r0 = revlog.deltaparent(revlog.rev(self._node))
1099 1100 m0 = manifestctx(revlog, revlog.node(r0)).read()
1100 1101 m1 = self.read()
1101 1102 md = manifestdict()
1102 1103 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1103 1104 if n1:
1104 1105 md[f] = n1
1105 1106 if fl1:
1106 1107 md.setflag(f, fl1)
1107 1108 return md
1108 1109
1109 1110 r = revlog.rev(self._node)
1110 1111 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1111 1112 return manifestdict(d)
1112 1113
1113 1114 class treemanifestctx(object):
1114 1115 def __init__(self, revlog, dir, node):
1115 1116 revlog = revlog.dirlog(dir)
1116 1117 self._revlog = revlog
1117 1118 self._dir = dir
1118 1119 self._data = None
1119 1120
1120 1121 self._node = node
1121 1122
1122 1123 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1123 1124 # we can instantiate treemanifestctx objects for directories we don't
1124 1125 # have on disk.
1125 1126 #self.p1, self.p2 = revlog.parents(node)
1126 1127 #rev = revlog.rev(node)
1127 1128 #self.linkrev = revlog.linkrev(rev)
1128 1129
1129 1130 def read(self):
1130 1131 if not self._data:
1131 1132 if self._node == revlog.nullid:
1132 1133 self._data = treemanifest()
1133 1134 elif self._revlog._treeondisk:
1134 1135 m = treemanifest(dir=self._dir)
1135 1136 def gettext():
1136 1137 return self._revlog.revision(self._node)
1137 1138 def readsubtree(dir, subm):
1138 1139 return treemanifestctx(self._revlog, dir, subm).read()
1139 1140 m.read(gettext, readsubtree)
1140 1141 m.setnode(self._node)
1141 1142 self._data = m
1142 1143 else:
1143 1144 text = self._revlog.revision(self._node)
1144 1145 arraytext = array.array('c', text)
1145 1146 self._revlog.fulltextcache[self._node] = arraytext
1146 1147 self._data = treemanifest(dir=self._dir, text=text)
1147 1148
1148 1149 return self._data
1149 1150
1150 1151 def node(self):
1151 1152 return self._node
1152 1153
1153 1154 def readdelta(self):
1154 1155 # Need to perform a slow delta
1155 1156 revlog = self._revlog
1156 1157 r0 = revlog.deltaparent(revlog.rev(self._node))
1157 1158 m0 = treemanifestctx(revlog, revlog.node(r0), dir=self._dir).read()
1158 1159 m1 = self.read()
1159 1160 md = treemanifest(dir=self._dir)
1160 1161 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1161 1162 if n1:
1162 1163 md[f] = n1
1163 1164 if fl1:
1164 1165 md.setflag(f, fl1)
1165 1166 return md
1166 1167
1167 1168 def readfast(self):
1168 1169 rl = self._revlog
1169 1170 r = rl.rev(self._node)
1170 1171 deltaparent = rl.deltaparent(r)
1171 1172 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1172 1173 return self.readdelta()
1173 1174 return self.read()
1174 1175
1175 1176 class manifest(manifestrevlog):
1176 1177 def __init__(self, opener, dir='', dirlogcache=None):
1177 1178 '''The 'dir' and 'dirlogcache' arguments are for internal use by
1178 1179 manifest.manifest only. External users should create a root manifest
1179 1180 log with manifest.manifest(opener) and call dirlog() on it.
1180 1181 '''
1181 1182 # During normal operations, we expect to deal with not more than four
1182 1183 # revs at a time (such as during commit --amend). When rebasing large
1183 1184 # stacks of commits, the number can go up, hence the config knob below.
1184 1185 cachesize = 4
1185 1186 usetreemanifest = False
1186 1187 opts = getattr(opener, 'options', None)
1187 1188 if opts is not None:
1188 1189 cachesize = opts.get('manifestcachesize', cachesize)
1189 1190 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1190 1191 self._mancache = util.lrucachedict(cachesize)
1191 1192 self._treeinmem = usetreemanifest
1192 1193 super(manifest, self).__init__(opener, dir=dir, dirlogcache=dirlogcache)
1193 1194
1194 1195 def _newmanifest(self, data=''):
1195 1196 if self._treeinmem:
1196 1197 return treemanifest(self._dir, data)
1197 1198 return manifestdict(data)
1198 1199
1199 1200 def dirlog(self, dir):
1200 1201 """This overrides the base revlog implementation to allow construction
1201 1202 'manifest' types instead of manifestrevlog types. This is only needed
1202 1203 until we migrate off the 'manifest' type."""
1203 1204 if dir:
1204 1205 assert self._treeondisk
1205 1206 if dir not in self._dirlogcache:
1206 1207 self._dirlogcache[dir] = manifest(self.opener, dir,
1207 1208 self._dirlogcache)
1208 1209 return self._dirlogcache[dir]
1209 1210
1210 1211 def _slowreaddelta(self, node):
1211 1212 r0 = self.deltaparent(self.rev(node))
1212 1213 m0 = self.read(self.node(r0))
1213 1214 m1 = self.read(node)
1214 1215 md = self._newmanifest()
1215 1216 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1216 1217 if n1:
1217 1218 md[f] = n1
1218 1219 if fl1:
1219 1220 md.setflag(f, fl1)
1220 1221 return md
1221 1222
1222 1223 def readdelta(self, node):
1223 1224 if self._usemanifestv2 or self._treeondisk:
1224 1225 return self._slowreaddelta(node)
1225 1226 r = self.rev(node)
1226 1227 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1227 1228 return self._newmanifest(d)
1228 1229
1229 1230 def readshallowdelta(self, node):
1230 1231 '''For flat manifests, this is the same as readdelta(). For
1231 1232 treemanifests, this will read the delta for this revlog's directory,
1232 1233 without recursively reading subdirectory manifests. Instead, any
1233 1234 subdirectory entry will be reported as it appears in the manifests, i.e.
1234 1235 the subdirectory will be reported among files and distinguished only by
1235 1236 its 't' flag.'''
1236 1237 if not self._treeondisk:
1237 1238 return self.readdelta(node)
1238 1239 if self._usemanifestv2:
1239 1240 raise error.Abort(
1240 1241 _("readshallowdelta() not implemented for manifestv2"))
1241 1242 r = self.rev(node)
1242 1243 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1243 1244 return manifestdict(d)
1244 1245
1245 1246 def readshallowfast(self, node):
1246 1247 '''like readfast(), but calls readshallowdelta() instead of readdelta()
1247 1248 '''
1248 1249 r = self.rev(node)
1249 1250 deltaparent = self.deltaparent(r)
1250 1251 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1251 1252 return self.readshallowdelta(node)
1252 1253 return self.readshallow(node)
1253 1254
1254 1255 def read(self, node):
1255 1256 if node == revlog.nullid:
1256 1257 return self._newmanifest() # don't upset local cache
1257 1258 if node in self._mancache:
1258 1259 cached = self._mancache[node]
1259 1260 if (isinstance(cached, manifestctx) or
1260 1261 isinstance(cached, treemanifestctx)):
1261 1262 cached = cached.read()
1262 1263 return cached
1263 1264 if self._treeondisk:
1264 1265 def gettext():
1265 1266 return self.revision(node)
1266 1267 def readsubtree(dir, subm):
1267 1268 return self.dirlog(dir).read(subm)
1268 1269 m = self._newmanifest()
1269 1270 m.read(gettext, readsubtree)
1270 1271 m.setnode(node)
1271 1272 arraytext = None
1272 1273 else:
1273 1274 text = self.revision(node)
1274 1275 m = self._newmanifest(text)
1275 1276 arraytext = array.array('c', text)
1276 1277 self._mancache[node] = m
1277 1278 self.fulltextcache[node] = arraytext
1278 1279 return m
1279 1280
1280 1281 def readshallow(self, node):
1281 1282 '''Reads the manifest in this directory. When using flat manifests,
1282 1283 this manifest will generally have files in subdirectories in it. Does
1283 1284 not cache the manifest as the callers generally do not read the same
1284 1285 version twice.'''
1285 1286 return manifestdict(self.revision(node))
1286 1287
1287 1288 def find(self, node, f):
1288 1289 '''look up entry for a single file efficiently.
1289 1290 return (node, flags) pair if found, (None, None) if not.'''
1290 1291 m = self.read(node)
1291 1292 try:
1292 1293 return m.find(f)
1293 1294 except KeyError:
1294 1295 return None, None
1295 1296
1296 1297 def clearcaches(self):
1297 1298 super(manifest, self).clearcaches()
1298 1299 self._mancache.clear()
General Comments 0
You need to be logged in to leave comments. Login now