##// END OF EJS Templates
manifest: change terminology for storage in context classes...
Gregory Szorc -
r39353:eb9b8679 default
parent child Browse files
Show More
@@ -1,1902 +1,1904 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from . import (
23 23 error,
24 24 mdiff,
25 25 policy,
26 26 pycompat,
27 27 repository,
28 28 revlog,
29 29 util,
30 30 )
31 31 from .utils import (
32 32 interfaceutil,
33 33 )
34 34
35 35 parsers = policy.importmod(r'parsers')
36 36 propertycache = util.propertycache
37 37
38 38 def _parse(data):
39 39 # This method does a little bit of excessive-looking
40 40 # precondition checking. This is so that the behavior of this
41 41 # class exactly matches its C counterpart to try and help
42 42 # prevent surprise breakage for anyone that develops against
43 43 # the pure version.
44 44 if data and data[-1:] != '\n':
45 45 raise ValueError('Manifest did not end in a newline.')
46 46 prev = None
47 47 for l in data.splitlines():
48 48 if prev is not None and prev > l:
49 49 raise ValueError('Manifest lines not in sorted order.')
50 50 prev = l
51 51 f, n = l.split('\0')
52 52 if len(n) > 40:
53 53 yield f, bin(n[:40]), n[40:]
54 54 else:
55 55 yield f, bin(n), ''
56 56
57 57 def _text(it):
58 58 files = []
59 59 lines = []
60 60 for f, n, fl in it:
61 61 files.append(f)
62 62 # if this is changed to support newlines in filenames,
63 63 # be sure to check the templates/ dir again (especially *-raw.tmpl)
64 64 lines.append("%s\0%s%s\n" % (f, hex(n), fl))
65 65
66 66 _checkforbidden(files)
67 67 return ''.join(lines)
68 68
69 69 class lazymanifestiter(object):
70 70 def __init__(self, lm):
71 71 self.pos = 0
72 72 self.lm = lm
73 73
74 74 def __iter__(self):
75 75 return self
76 76
77 77 def next(self):
78 78 try:
79 79 data, pos = self.lm._get(self.pos)
80 80 except IndexError:
81 81 raise StopIteration
82 82 if pos == -1:
83 83 self.pos += 1
84 84 return data[0]
85 85 self.pos += 1
86 86 zeropos = data.find('\x00', pos)
87 87 return data[pos:zeropos]
88 88
89 89 __next__ = next
90 90
91 91 class lazymanifestiterentries(object):
92 92 def __init__(self, lm):
93 93 self.lm = lm
94 94 self.pos = 0
95 95
96 96 def __iter__(self):
97 97 return self
98 98
99 99 def next(self):
100 100 try:
101 101 data, pos = self.lm._get(self.pos)
102 102 except IndexError:
103 103 raise StopIteration
104 104 if pos == -1:
105 105 self.pos += 1
106 106 return data
107 107 zeropos = data.find('\x00', pos)
108 108 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
109 109 zeropos + 1, 40)
110 110 flags = self.lm._getflags(data, self.pos, zeropos)
111 111 self.pos += 1
112 112 return (data[pos:zeropos], hashval, flags)
113 113
114 114 __next__ = next
115 115
116 116 def unhexlify(data, extra, pos, length):
117 117 s = bin(data[pos:pos + length])
118 118 if extra:
119 119 s += chr(extra & 0xff)
120 120 return s
121 121
122 122 def _cmp(a, b):
123 123 return (a > b) - (a < b)
124 124
125 125 class _lazymanifest(object):
126 126 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
127 127 if positions is None:
128 128 self.positions = self.findlines(data)
129 129 self.extrainfo = [0] * len(self.positions)
130 130 self.data = data
131 131 self.extradata = []
132 132 else:
133 133 self.positions = positions[:]
134 134 self.extrainfo = extrainfo[:]
135 135 self.extradata = extradata[:]
136 136 self.data = data
137 137
138 138 def findlines(self, data):
139 139 if not data:
140 140 return []
141 141 pos = data.find("\n")
142 142 if pos == -1 or data[-1:] != '\n':
143 143 raise ValueError("Manifest did not end in a newline.")
144 144 positions = [0]
145 145 prev = data[:data.find('\x00')]
146 146 while pos < len(data) - 1 and pos != -1:
147 147 positions.append(pos + 1)
148 148 nexts = data[pos + 1:data.find('\x00', pos + 1)]
149 149 if nexts < prev:
150 150 raise ValueError("Manifest lines not in sorted order.")
151 151 prev = nexts
152 152 pos = data.find("\n", pos + 1)
153 153 return positions
154 154
155 155 def _get(self, index):
156 156 # get the position encoded in pos:
157 157 # positive number is an index in 'data'
158 158 # negative number is in extrapieces
159 159 pos = self.positions[index]
160 160 if pos >= 0:
161 161 return self.data, pos
162 162 return self.extradata[-pos - 1], -1
163 163
164 164 def _getkey(self, pos):
165 165 if pos >= 0:
166 166 return self.data[pos:self.data.find('\x00', pos + 1)]
167 167 return self.extradata[-pos - 1][0]
168 168
169 169 def bsearch(self, key):
170 170 first = 0
171 171 last = len(self.positions) - 1
172 172
173 173 while first <= last:
174 174 midpoint = (first + last)//2
175 175 nextpos = self.positions[midpoint]
176 176 candidate = self._getkey(nextpos)
177 177 r = _cmp(key, candidate)
178 178 if r == 0:
179 179 return midpoint
180 180 else:
181 181 if r < 0:
182 182 last = midpoint - 1
183 183 else:
184 184 first = midpoint + 1
185 185 return -1
186 186
187 187 def bsearch2(self, key):
188 188 # same as the above, but will always return the position
189 189 # done for performance reasons
190 190 first = 0
191 191 last = len(self.positions) - 1
192 192
193 193 while first <= last:
194 194 midpoint = (first + last)//2
195 195 nextpos = self.positions[midpoint]
196 196 candidate = self._getkey(nextpos)
197 197 r = _cmp(key, candidate)
198 198 if r == 0:
199 199 return (midpoint, True)
200 200 else:
201 201 if r < 0:
202 202 last = midpoint - 1
203 203 else:
204 204 first = midpoint + 1
205 205 return (first, False)
206 206
207 207 def __contains__(self, key):
208 208 return self.bsearch(key) != -1
209 209
210 210 def _getflags(self, data, needle, pos):
211 211 start = pos + 41
212 212 end = data.find("\n", start)
213 213 if end == -1:
214 214 end = len(data) - 1
215 215 if start == end:
216 216 return ''
217 217 return self.data[start:end]
218 218
219 219 def __getitem__(self, key):
220 220 if not isinstance(key, bytes):
221 221 raise TypeError("getitem: manifest keys must be a bytes.")
222 222 needle = self.bsearch(key)
223 223 if needle == -1:
224 224 raise KeyError
225 225 data, pos = self._get(needle)
226 226 if pos == -1:
227 227 return (data[1], data[2])
228 228 zeropos = data.find('\x00', pos)
229 229 assert 0 <= needle <= len(self.positions)
230 230 assert len(self.extrainfo) == len(self.positions)
231 231 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
232 232 flags = self._getflags(data, needle, zeropos)
233 233 return (hashval, flags)
234 234
235 235 def __delitem__(self, key):
236 236 needle, found = self.bsearch2(key)
237 237 if not found:
238 238 raise KeyError
239 239 cur = self.positions[needle]
240 240 self.positions = self.positions[:needle] + self.positions[needle + 1:]
241 241 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
242 242 if cur >= 0:
243 243 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
244 244
245 245 def __setitem__(self, key, value):
246 246 if not isinstance(key, bytes):
247 247 raise TypeError("setitem: manifest keys must be a byte string.")
248 248 if not isinstance(value, tuple) or len(value) != 2:
249 249 raise TypeError("Manifest values must be a tuple of (node, flags).")
250 250 hashval = value[0]
251 251 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
252 252 raise TypeError("node must be a 20-byte byte string")
253 253 flags = value[1]
254 254 if len(hashval) == 22:
255 255 hashval = hashval[:-1]
256 256 if not isinstance(flags, bytes) or len(flags) > 1:
257 257 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
258 258 needle, found = self.bsearch2(key)
259 259 if found:
260 260 # put the item
261 261 pos = self.positions[needle]
262 262 if pos < 0:
263 263 self.extradata[-pos - 1] = (key, hashval, value[1])
264 264 else:
265 265 # just don't bother
266 266 self.extradata.append((key, hashval, value[1]))
267 267 self.positions[needle] = -len(self.extradata)
268 268 else:
269 269 # not found, put it in with extra positions
270 270 self.extradata.append((key, hashval, value[1]))
271 271 self.positions = (self.positions[:needle] + [-len(self.extradata)]
272 272 + self.positions[needle:])
273 273 self.extrainfo = (self.extrainfo[:needle] + [0] +
274 274 self.extrainfo[needle:])
275 275
276 276 def copy(self):
277 277 # XXX call _compact like in C?
278 278 return _lazymanifest(self.data, self.positions, self.extrainfo,
279 279 self.extradata)
280 280
281 281 def _compact(self):
282 282 # hopefully not called TOO often
283 283 if len(self.extradata) == 0:
284 284 return
285 285 l = []
286 286 last_cut = 0
287 287 i = 0
288 288 offset = 0
289 289 self.extrainfo = [0] * len(self.positions)
290 290 while i < len(self.positions):
291 291 if self.positions[i] >= 0:
292 292 cur = self.positions[i]
293 293 last_cut = cur
294 294 while True:
295 295 self.positions[i] = offset
296 296 i += 1
297 297 if i == len(self.positions) or self.positions[i] < 0:
298 298 break
299 299 offset += self.positions[i] - cur
300 300 cur = self.positions[i]
301 301 end_cut = self.data.find('\n', cur)
302 302 if end_cut != -1:
303 303 end_cut += 1
304 304 offset += end_cut - cur
305 305 l.append(self.data[last_cut:end_cut])
306 306 else:
307 307 while i < len(self.positions) and self.positions[i] < 0:
308 308 cur = self.positions[i]
309 309 t = self.extradata[-cur - 1]
310 310 l.append(self._pack(t))
311 311 self.positions[i] = offset
312 312 if len(t[1]) > 20:
313 313 self.extrainfo[i] = ord(t[1][21])
314 314 offset += len(l[-1])
315 315 i += 1
316 316 self.data = ''.join(l)
317 317 self.extradata = []
318 318
319 319 def _pack(self, d):
320 320 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
321 321
322 322 def text(self):
323 323 self._compact()
324 324 return self.data
325 325
326 326 def diff(self, m2, clean=False):
327 327 '''Finds changes between the current manifest and m2.'''
328 328 # XXX think whether efficiency matters here
329 329 diff = {}
330 330
331 331 for fn, e1, flags in self.iterentries():
332 332 if fn not in m2:
333 333 diff[fn] = (e1, flags), (None, '')
334 334 else:
335 335 e2 = m2[fn]
336 336 if (e1, flags) != e2:
337 337 diff[fn] = (e1, flags), e2
338 338 elif clean:
339 339 diff[fn] = None
340 340
341 341 for fn, e2, flags in m2.iterentries():
342 342 if fn not in self:
343 343 diff[fn] = (None, ''), (e2, flags)
344 344
345 345 return diff
346 346
347 347 def iterentries(self):
348 348 return lazymanifestiterentries(self)
349 349
350 350 def iterkeys(self):
351 351 return lazymanifestiter(self)
352 352
353 353 def __iter__(self):
354 354 return lazymanifestiter(self)
355 355
356 356 def __len__(self):
357 357 return len(self.positions)
358 358
359 359 def filtercopy(self, filterfn):
360 360 # XXX should be optimized
361 361 c = _lazymanifest('')
362 362 for f, n, fl in self.iterentries():
363 363 if filterfn(f):
364 364 c[f] = n, fl
365 365 return c
366 366
367 367 try:
368 368 _lazymanifest = parsers.lazymanifest
369 369 except AttributeError:
370 370 pass
371 371
372 372 @interfaceutil.implementer(repository.imanifestdict)
373 373 class manifestdict(object):
374 374 def __init__(self, data=''):
375 375 self._lm = _lazymanifest(data)
376 376
377 377 def __getitem__(self, key):
378 378 return self._lm[key][0]
379 379
380 380 def find(self, key):
381 381 return self._lm[key]
382 382
383 383 def __len__(self):
384 384 return len(self._lm)
385 385
386 386 def __nonzero__(self):
387 387 # nonzero is covered by the __len__ function, but implementing it here
388 388 # makes it easier for extensions to override.
389 389 return len(self._lm) != 0
390 390
391 391 __bool__ = __nonzero__
392 392
393 393 def __setitem__(self, key, node):
394 394 self._lm[key] = node, self.flags(key, '')
395 395
396 396 def __contains__(self, key):
397 397 if key is None:
398 398 return False
399 399 return key in self._lm
400 400
401 401 def __delitem__(self, key):
402 402 del self._lm[key]
403 403
404 404 def __iter__(self):
405 405 return self._lm.__iter__()
406 406
407 407 def iterkeys(self):
408 408 return self._lm.iterkeys()
409 409
410 410 def keys(self):
411 411 return list(self.iterkeys())
412 412
413 413 def filesnotin(self, m2, match=None):
414 414 '''Set of files in this manifest that are not in the other'''
415 415 if match:
416 416 m1 = self.matches(match)
417 417 m2 = m2.matches(match)
418 418 return m1.filesnotin(m2)
419 419 diff = self.diff(m2)
420 420 files = set(filepath
421 421 for filepath, hashflags in diff.iteritems()
422 422 if hashflags[1][0] is None)
423 423 return files
424 424
425 425 @propertycache
426 426 def _dirs(self):
427 427 return util.dirs(self)
428 428
429 429 def dirs(self):
430 430 return self._dirs
431 431
432 432 def hasdir(self, dir):
433 433 return dir in self._dirs
434 434
435 435 def _filesfastpath(self, match):
436 436 '''Checks whether we can correctly and quickly iterate over matcher
437 437 files instead of over manifest files.'''
438 438 files = match.files()
439 439 return (len(files) < 100 and (match.isexact() or
440 440 (match.prefix() and all(fn in self for fn in files))))
441 441
442 442 def walk(self, match):
443 443 '''Generates matching file names.
444 444
445 445 Equivalent to manifest.matches(match).iterkeys(), but without creating
446 446 an entirely new manifest.
447 447
448 448 It also reports nonexistent files by marking them bad with match.bad().
449 449 '''
450 450 if match.always():
451 451 for f in iter(self):
452 452 yield f
453 453 return
454 454
455 455 fset = set(match.files())
456 456
457 457 # avoid the entire walk if we're only looking for specific files
458 458 if self._filesfastpath(match):
459 459 for fn in sorted(fset):
460 460 yield fn
461 461 return
462 462
463 463 for fn in self:
464 464 if fn in fset:
465 465 # specified pattern is the exact name
466 466 fset.remove(fn)
467 467 if match(fn):
468 468 yield fn
469 469
470 470 # for dirstate.walk, files=['.'] means "walk the whole tree".
471 471 # follow that here, too
472 472 fset.discard('.')
473 473
474 474 for fn in sorted(fset):
475 475 if not self.hasdir(fn):
476 476 match.bad(fn, None)
477 477
478 478 def matches(self, match):
479 479 '''generate a new manifest filtered by the match argument'''
480 480 if match.always():
481 481 return self.copy()
482 482
483 483 if self._filesfastpath(match):
484 484 m = manifestdict()
485 485 lm = self._lm
486 486 for fn in match.files():
487 487 if fn in lm:
488 488 m._lm[fn] = lm[fn]
489 489 return m
490 490
491 491 m = manifestdict()
492 492 m._lm = self._lm.filtercopy(match)
493 493 return m
494 494
495 495 def diff(self, m2, match=None, clean=False):
496 496 '''Finds changes between the current manifest and m2.
497 497
498 498 Args:
499 499 m2: the manifest to which this manifest should be compared.
500 500 clean: if true, include files unchanged between these manifests
501 501 with a None value in the returned dictionary.
502 502
503 503 The result is returned as a dict with filename as key and
504 504 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
505 505 nodeid in the current/other manifest and fl1/fl2 is the flag
506 506 in the current/other manifest. Where the file does not exist,
507 507 the nodeid will be None and the flags will be the empty
508 508 string.
509 509 '''
510 510 if match:
511 511 m1 = self.matches(match)
512 512 m2 = m2.matches(match)
513 513 return m1.diff(m2, clean=clean)
514 514 return self._lm.diff(m2._lm, clean)
515 515
516 516 def setflag(self, key, flag):
517 517 self._lm[key] = self[key], flag
518 518
519 519 def get(self, key, default=None):
520 520 try:
521 521 return self._lm[key][0]
522 522 except KeyError:
523 523 return default
524 524
525 525 def flags(self, key, default=''):
526 526 try:
527 527 return self._lm[key][1]
528 528 except KeyError:
529 529 return default
530 530
531 531 def copy(self):
532 532 c = manifestdict()
533 533 c._lm = self._lm.copy()
534 534 return c
535 535
536 536 def items(self):
537 537 return (x[:2] for x in self._lm.iterentries())
538 538
539 539 def iteritems(self):
540 540 return (x[:2] for x in self._lm.iterentries())
541 541
542 542 def iterentries(self):
543 543 return self._lm.iterentries()
544 544
545 545 def text(self):
546 546 # most likely uses native version
547 547 return self._lm.text()
548 548
549 549 def fastdelta(self, base, changes):
550 550 """Given a base manifest text as a bytearray and a list of changes
551 551 relative to that text, compute a delta that can be used by revlog.
552 552 """
553 553 delta = []
554 554 dstart = None
555 555 dend = None
556 556 dline = [""]
557 557 start = 0
558 558 # zero copy representation of base as a buffer
559 559 addbuf = util.buffer(base)
560 560
561 561 changes = list(changes)
562 562 if len(changes) < 1000:
563 563 # start with a readonly loop that finds the offset of
564 564 # each line and creates the deltas
565 565 for f, todelete in changes:
566 566 # bs will either be the index of the item or the insert point
567 567 start, end = _msearch(addbuf, f, start)
568 568 if not todelete:
569 569 h, fl = self._lm[f]
570 570 l = "%s\0%s%s\n" % (f, hex(h), fl)
571 571 else:
572 572 if start == end:
573 573 # item we want to delete was not found, error out
574 574 raise AssertionError(
575 575 _("failed to remove %s from manifest") % f)
576 576 l = ""
577 577 if dstart is not None and dstart <= start and dend >= start:
578 578 if dend < end:
579 579 dend = end
580 580 if l:
581 581 dline.append(l)
582 582 else:
583 583 if dstart is not None:
584 584 delta.append([dstart, dend, "".join(dline)])
585 585 dstart = start
586 586 dend = end
587 587 dline = [l]
588 588
589 589 if dstart is not None:
590 590 delta.append([dstart, dend, "".join(dline)])
591 591 # apply the delta to the base, and get a delta for addrevision
592 592 deltatext, arraytext = _addlistdelta(base, delta)
593 593 else:
594 594 # For large changes, it's much cheaper to just build the text and
595 595 # diff it.
596 596 arraytext = bytearray(self.text())
597 597 deltatext = mdiff.textdiff(
598 598 util.buffer(base), util.buffer(arraytext))
599 599
600 600 return arraytext, deltatext
601 601
602 602 def _msearch(m, s, lo=0, hi=None):
603 603 '''return a tuple (start, end) that says where to find s within m.
604 604
605 605 If the string is found m[start:end] are the line containing
606 606 that string. If start == end the string was not found and
607 607 they indicate the proper sorted insertion point.
608 608
609 609 m should be a buffer, a memoryview or a byte string.
610 610 s is a byte string'''
611 611 def advance(i, c):
612 612 while i < lenm and m[i:i + 1] != c:
613 613 i += 1
614 614 return i
615 615 if not s:
616 616 return (lo, lo)
617 617 lenm = len(m)
618 618 if not hi:
619 619 hi = lenm
620 620 while lo < hi:
621 621 mid = (lo + hi) // 2
622 622 start = mid
623 623 while start > 0 and m[start - 1:start] != '\n':
624 624 start -= 1
625 625 end = advance(start, '\0')
626 626 if bytes(m[start:end]) < s:
627 627 # we know that after the null there are 40 bytes of sha1
628 628 # this translates to the bisect lo = mid + 1
629 629 lo = advance(end + 40, '\n') + 1
630 630 else:
631 631 # this translates to the bisect hi = mid
632 632 hi = start
633 633 end = advance(lo, '\0')
634 634 found = m[lo:end]
635 635 if s == found:
636 636 # we know that after the null there are 40 bytes of sha1
637 637 end = advance(end + 40, '\n')
638 638 return (lo, end + 1)
639 639 else:
640 640 return (lo, lo)
641 641
642 642 def _checkforbidden(l):
643 643 """Check filenames for illegal characters."""
644 644 for f in l:
645 645 if '\n' in f or '\r' in f:
646 646 raise error.RevlogError(
647 647 _("'\\n' and '\\r' disallowed in filenames: %r")
648 648 % pycompat.bytestr(f))
649 649
650 650
651 651 # apply the changes collected during the bisect loop to our addlist
652 652 # return a delta suitable for addrevision
653 653 def _addlistdelta(addlist, x):
654 654 # for large addlist arrays, building a new array is cheaper
655 655 # than repeatedly modifying the existing one
656 656 currentposition = 0
657 657 newaddlist = bytearray()
658 658
659 659 for start, end, content in x:
660 660 newaddlist += addlist[currentposition:start]
661 661 if content:
662 662 newaddlist += bytearray(content)
663 663
664 664 currentposition = end
665 665
666 666 newaddlist += addlist[currentposition:]
667 667
668 668 deltatext = "".join(struct.pack(">lll", start, end, len(content))
669 669 + content for start, end, content in x)
670 670 return deltatext, newaddlist
671 671
672 672 def _splittopdir(f):
673 673 if '/' in f:
674 674 dir, subpath = f.split('/', 1)
675 675 return dir + '/', subpath
676 676 else:
677 677 return '', f
678 678
679 679 _noop = lambda s: None
680 680
681 681 class treemanifest(object):
682 682 def __init__(self, dir='', text=''):
683 683 self._dir = dir
684 684 self._node = nullid
685 685 self._loadfunc = _noop
686 686 self._copyfunc = _noop
687 687 self._dirty = False
688 688 self._dirs = {}
689 689 # Using _lazymanifest here is a little slower than plain old dicts
690 690 self._files = {}
691 691 self._flags = {}
692 692 if text:
693 693 def readsubtree(subdir, subm):
694 694 raise AssertionError('treemanifest constructor only accepts '
695 695 'flat manifests')
696 696 self.parse(text, readsubtree)
697 697 self._dirty = True # Mark flat manifest dirty after parsing
698 698
699 699 def _subpath(self, path):
700 700 return self._dir + path
701 701
702 702 def __len__(self):
703 703 self._load()
704 704 size = len(self._files)
705 705 for m in self._dirs.values():
706 706 size += m.__len__()
707 707 return size
708 708
709 709 def __nonzero__(self):
710 710 # Faster than "__len() != 0" since it avoids loading sub-manifests
711 711 return not self._isempty()
712 712
713 713 __bool__ = __nonzero__
714 714
715 715 def _isempty(self):
716 716 self._load() # for consistency; already loaded by all callers
717 717 return (not self._files and (not self._dirs or
718 718 all(m._isempty() for m in self._dirs.values())))
719 719
720 720 def __repr__(self):
721 721 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
722 722 (self._dir, hex(self._node),
723 723 bool(self._loadfunc is _noop),
724 724 self._dirty, id(self)))
725 725
726 726 def dir(self):
727 727 '''The directory that this tree manifest represents, including a
728 728 trailing '/'. Empty string for the repo root directory.'''
729 729 return self._dir
730 730
731 731 def node(self):
732 732 '''This node of this instance. nullid for unsaved instances. Should
733 733 be updated when the instance is read or written from a revlog.
734 734 '''
735 735 assert not self._dirty
736 736 return self._node
737 737
738 738 def setnode(self, node):
739 739 self._node = node
740 740 self._dirty = False
741 741
742 742 def iterentries(self):
743 743 self._load()
744 744 for p, n in sorted(itertools.chain(self._dirs.items(),
745 745 self._files.items())):
746 746 if p in self._files:
747 747 yield self._subpath(p), n, self._flags.get(p, '')
748 748 else:
749 749 for x in n.iterentries():
750 750 yield x
751 751
752 752 def items(self):
753 753 self._load()
754 754 for p, n in sorted(itertools.chain(self._dirs.items(),
755 755 self._files.items())):
756 756 if p in self._files:
757 757 yield self._subpath(p), n
758 758 else:
759 759 for f, sn in n.iteritems():
760 760 yield f, sn
761 761
762 762 iteritems = items
763 763
764 764 def iterkeys(self):
765 765 self._load()
766 766 for p in sorted(itertools.chain(self._dirs, self._files)):
767 767 if p in self._files:
768 768 yield self._subpath(p)
769 769 else:
770 770 for f in self._dirs[p]:
771 771 yield f
772 772
773 773 def keys(self):
774 774 return list(self.iterkeys())
775 775
776 776 def __iter__(self):
777 777 return self.iterkeys()
778 778
779 779 def __contains__(self, f):
780 780 if f is None:
781 781 return False
782 782 self._load()
783 783 dir, subpath = _splittopdir(f)
784 784 if dir:
785 785 if dir not in self._dirs:
786 786 return False
787 787 return self._dirs[dir].__contains__(subpath)
788 788 else:
789 789 return f in self._files
790 790
791 791 def get(self, f, default=None):
792 792 self._load()
793 793 dir, subpath = _splittopdir(f)
794 794 if dir:
795 795 if dir not in self._dirs:
796 796 return default
797 797 return self._dirs[dir].get(subpath, default)
798 798 else:
799 799 return self._files.get(f, default)
800 800
801 801 def __getitem__(self, f):
802 802 self._load()
803 803 dir, subpath = _splittopdir(f)
804 804 if dir:
805 805 return self._dirs[dir].__getitem__(subpath)
806 806 else:
807 807 return self._files[f]
808 808
809 809 def flags(self, f):
810 810 self._load()
811 811 dir, subpath = _splittopdir(f)
812 812 if dir:
813 813 if dir not in self._dirs:
814 814 return ''
815 815 return self._dirs[dir].flags(subpath)
816 816 else:
817 817 if f in self._dirs:
818 818 return ''
819 819 return self._flags.get(f, '')
820 820
821 821 def find(self, f):
822 822 self._load()
823 823 dir, subpath = _splittopdir(f)
824 824 if dir:
825 825 return self._dirs[dir].find(subpath)
826 826 else:
827 827 return self._files[f], self._flags.get(f, '')
828 828
829 829 def __delitem__(self, f):
830 830 self._load()
831 831 dir, subpath = _splittopdir(f)
832 832 if dir:
833 833 self._dirs[dir].__delitem__(subpath)
834 834 # If the directory is now empty, remove it
835 835 if self._dirs[dir]._isempty():
836 836 del self._dirs[dir]
837 837 else:
838 838 del self._files[f]
839 839 if f in self._flags:
840 840 del self._flags[f]
841 841 self._dirty = True
842 842
843 843 def __setitem__(self, f, n):
844 844 assert n is not None
845 845 self._load()
846 846 dir, subpath = _splittopdir(f)
847 847 if dir:
848 848 if dir not in self._dirs:
849 849 self._dirs[dir] = treemanifest(self._subpath(dir))
850 850 self._dirs[dir].__setitem__(subpath, n)
851 851 else:
852 852 self._files[f] = n[:21] # to match manifestdict's behavior
853 853 self._dirty = True
854 854
855 855 def _load(self):
856 856 if self._loadfunc is not _noop:
857 857 lf, self._loadfunc = self._loadfunc, _noop
858 858 lf(self)
859 859 elif self._copyfunc is not _noop:
860 860 cf, self._copyfunc = self._copyfunc, _noop
861 861 cf(self)
862 862
863 863 def setflag(self, f, flags):
864 864 """Set the flags (symlink, executable) for path f."""
865 865 self._load()
866 866 dir, subpath = _splittopdir(f)
867 867 if dir:
868 868 if dir not in self._dirs:
869 869 self._dirs[dir] = treemanifest(self._subpath(dir))
870 870 self._dirs[dir].setflag(subpath, flags)
871 871 else:
872 872 self._flags[f] = flags
873 873 self._dirty = True
874 874
875 875 def copy(self):
876 876 copy = treemanifest(self._dir)
877 877 copy._node = self._node
878 878 copy._dirty = self._dirty
879 879 if self._copyfunc is _noop:
880 880 def _copyfunc(s):
881 881 self._load()
882 882 for d in self._dirs:
883 883 s._dirs[d] = self._dirs[d].copy()
884 884 s._files = dict.copy(self._files)
885 885 s._flags = dict.copy(self._flags)
886 886 if self._loadfunc is _noop:
887 887 _copyfunc(copy)
888 888 else:
889 889 copy._copyfunc = _copyfunc
890 890 else:
891 891 copy._copyfunc = self._copyfunc
892 892 return copy
893 893
894 894 def filesnotin(self, m2, match=None):
895 895 '''Set of files in this manifest that are not in the other'''
896 896 if match:
897 897 m1 = self.matches(match)
898 898 m2 = m2.matches(match)
899 899 return m1.filesnotin(m2)
900 900
901 901 files = set()
902 902 def _filesnotin(t1, t2):
903 903 if t1._node == t2._node and not t1._dirty and not t2._dirty:
904 904 return
905 905 t1._load()
906 906 t2._load()
907 907 for d, m1 in t1._dirs.iteritems():
908 908 if d in t2._dirs:
909 909 m2 = t2._dirs[d]
910 910 _filesnotin(m1, m2)
911 911 else:
912 912 files.update(m1.iterkeys())
913 913
914 914 for fn in t1._files:
915 915 if fn not in t2._files:
916 916 files.add(t1._subpath(fn))
917 917
918 918 _filesnotin(self, m2)
919 919 return files
920 920
921 921 @propertycache
922 922 def _alldirs(self):
923 923 return util.dirs(self)
924 924
925 925 def dirs(self):
926 926 return self._alldirs
927 927
928 928 def hasdir(self, dir):
929 929 self._load()
930 930 topdir, subdir = _splittopdir(dir)
931 931 if topdir:
932 932 if topdir in self._dirs:
933 933 return self._dirs[topdir].hasdir(subdir)
934 934 return False
935 935 return (dir + '/') in self._dirs
936 936
937 937 def walk(self, match):
938 938 '''Generates matching file names.
939 939
940 940 Equivalent to manifest.matches(match).iterkeys(), but without creating
941 941 an entirely new manifest.
942 942
943 943 It also reports nonexistent files by marking them bad with match.bad().
944 944 '''
945 945 if match.always():
946 946 for f in iter(self):
947 947 yield f
948 948 return
949 949
950 950 fset = set(match.files())
951 951
952 952 for fn in self._walk(match):
953 953 if fn in fset:
954 954 # specified pattern is the exact name
955 955 fset.remove(fn)
956 956 yield fn
957 957
958 958 # for dirstate.walk, files=['.'] means "walk the whole tree".
959 959 # follow that here, too
960 960 fset.discard('.')
961 961
962 962 for fn in sorted(fset):
963 963 if not self.hasdir(fn):
964 964 match.bad(fn, None)
965 965
966 966 def _walk(self, match):
967 967 '''Recursively generates matching file names for walk().'''
968 968 if not match.visitdir(self._dir[:-1] or '.'):
969 969 return
970 970
971 971 # yield this dir's files and walk its submanifests
972 972 self._load()
973 973 for p in sorted(list(self._dirs) + list(self._files)):
974 974 if p in self._files:
975 975 fullp = self._subpath(p)
976 976 if match(fullp):
977 977 yield fullp
978 978 else:
979 979 for f in self._dirs[p]._walk(match):
980 980 yield f
981 981
982 982 def matches(self, match):
983 983 '''generate a new manifest filtered by the match argument'''
984 984 if match.always():
985 985 return self.copy()
986 986
987 987 return self._matches(match)
988 988
989 989 def _matches(self, match):
990 990 '''recursively generate a new manifest filtered by the match argument.
991 991 '''
992 992
993 993 visit = match.visitdir(self._dir[:-1] or '.')
994 994 if visit == 'all':
995 995 return self.copy()
996 996 ret = treemanifest(self._dir)
997 997 if not visit:
998 998 return ret
999 999
1000 1000 self._load()
1001 1001 for fn in self._files:
1002 1002 fullp = self._subpath(fn)
1003 1003 if not match(fullp):
1004 1004 continue
1005 1005 ret._files[fn] = self._files[fn]
1006 1006 if fn in self._flags:
1007 1007 ret._flags[fn] = self._flags[fn]
1008 1008
1009 1009 for dir, subm in self._dirs.iteritems():
1010 1010 m = subm._matches(match)
1011 1011 if not m._isempty():
1012 1012 ret._dirs[dir] = m
1013 1013
1014 1014 if not ret._isempty():
1015 1015 ret._dirty = True
1016 1016 return ret
1017 1017
1018 1018 def diff(self, m2, match=None, clean=False):
1019 1019 '''Finds changes between the current manifest and m2.
1020 1020
1021 1021 Args:
1022 1022 m2: the manifest to which this manifest should be compared.
1023 1023 clean: if true, include files unchanged between these manifests
1024 1024 with a None value in the returned dictionary.
1025 1025
1026 1026 The result is returned as a dict with filename as key and
1027 1027 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1028 1028 nodeid in the current/other manifest and fl1/fl2 is the flag
1029 1029 in the current/other manifest. Where the file does not exist,
1030 1030 the nodeid will be None and the flags will be the empty
1031 1031 string.
1032 1032 '''
1033 1033 if match:
1034 1034 m1 = self.matches(match)
1035 1035 m2 = m2.matches(match)
1036 1036 return m1.diff(m2, clean=clean)
1037 1037 result = {}
1038 1038 emptytree = treemanifest()
1039 1039 def _diff(t1, t2):
1040 1040 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1041 1041 return
1042 1042 t1._load()
1043 1043 t2._load()
1044 1044 for d, m1 in t1._dirs.iteritems():
1045 1045 m2 = t2._dirs.get(d, emptytree)
1046 1046 _diff(m1, m2)
1047 1047
1048 1048 for d, m2 in t2._dirs.iteritems():
1049 1049 if d not in t1._dirs:
1050 1050 _diff(emptytree, m2)
1051 1051
1052 1052 for fn, n1 in t1._files.iteritems():
1053 1053 fl1 = t1._flags.get(fn, '')
1054 1054 n2 = t2._files.get(fn, None)
1055 1055 fl2 = t2._flags.get(fn, '')
1056 1056 if n1 != n2 or fl1 != fl2:
1057 1057 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1058 1058 elif clean:
1059 1059 result[t1._subpath(fn)] = None
1060 1060
1061 1061 for fn, n2 in t2._files.iteritems():
1062 1062 if fn not in t1._files:
1063 1063 fl2 = t2._flags.get(fn, '')
1064 1064 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1065 1065
1066 1066 _diff(self, m2)
1067 1067 return result
1068 1068
1069 1069 def unmodifiedsince(self, m2):
1070 1070 return not self._dirty and not m2._dirty and self._node == m2._node
1071 1071
1072 1072 def parse(self, text, readsubtree):
1073 1073 for f, n, fl in _parse(text):
1074 1074 if fl == 't':
1075 1075 f = f + '/'
1076 1076 self._dirs[f] = readsubtree(self._subpath(f), n)
1077 1077 elif '/' in f:
1078 1078 # This is a flat manifest, so use __setitem__ and setflag rather
1079 1079 # than assigning directly to _files and _flags, so we can
1080 1080 # assign a path in a subdirectory, and to mark dirty (compared
1081 1081 # to nullid).
1082 1082 self[f] = n
1083 1083 if fl:
1084 1084 self.setflag(f, fl)
1085 1085 else:
1086 1086 # Assigning to _files and _flags avoids marking as dirty,
1087 1087 # and should be a little faster.
1088 1088 self._files[f] = n
1089 1089 if fl:
1090 1090 self._flags[f] = fl
1091 1091
1092 1092 def text(self):
1093 1093 """Get the full data of this manifest as a bytestring."""
1094 1094 self._load()
1095 1095 return _text(self.iterentries())
1096 1096
1097 1097 def dirtext(self):
1098 1098 """Get the full data of this directory as a bytestring. Make sure that
1099 1099 any submanifests have been written first, so their nodeids are correct.
1100 1100 """
1101 1101 self._load()
1102 1102 flags = self.flags
1103 1103 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1104 1104 files = [(f, self._files[f], flags(f)) for f in self._files]
1105 1105 return _text(sorted(dirs + files))
1106 1106
1107 1107 def read(self, gettext, readsubtree):
1108 1108 def _load_for_read(s):
1109 1109 s.parse(gettext(), readsubtree)
1110 1110 s._dirty = False
1111 1111 self._loadfunc = _load_for_read
1112 1112
1113 1113 def writesubtrees(self, m1, m2, writesubtree):
1114 1114 self._load() # for consistency; should never have any effect here
1115 1115 m1._load()
1116 1116 m2._load()
1117 1117 emptytree = treemanifest()
1118 1118 for d, subm in self._dirs.iteritems():
1119 1119 subp1 = m1._dirs.get(d, emptytree)._node
1120 1120 subp2 = m2._dirs.get(d, emptytree)._node
1121 1121 if subp1 == nullid:
1122 1122 subp1, subp2 = subp2, subp1
1123 1123 writesubtree(subm, subp1, subp2)
1124 1124
1125 1125 def walksubtrees(self, matcher=None):
1126 1126 """Returns an iterator of the subtrees of this manifest, including this
1127 1127 manifest itself.
1128 1128
1129 1129 If `matcher` is provided, it only returns subtrees that match.
1130 1130 """
1131 1131 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1132 1132 return
1133 1133 if not matcher or matcher(self._dir[:-1]):
1134 1134 yield self
1135 1135
1136 1136 self._load()
1137 1137 for d, subm in self._dirs.iteritems():
1138 1138 for subtree in subm.walksubtrees(matcher=matcher):
1139 1139 yield subtree
1140 1140
1141 1141 class manifestfulltextcache(util.lrucachedict):
1142 1142 """File-backed LRU cache for the manifest cache
1143 1143
1144 1144 File consists of entries, up to EOF:
1145 1145
1146 1146 - 20 bytes node, 4 bytes length, <length> manifest data
1147 1147
1148 1148 These are written in reverse cache order (oldest to newest).
1149 1149
1150 1150 """
1151 1151 def __init__(self, max):
1152 1152 super(manifestfulltextcache, self).__init__(max)
1153 1153 self._dirty = False
1154 1154 self._read = False
1155 1155 self._opener = None
1156 1156
1157 1157 def read(self):
1158 1158 if self._read or self._opener is None:
1159 1159 return
1160 1160
1161 1161 try:
1162 1162 with self._opener('manifestfulltextcache') as fp:
1163 1163 set = super(manifestfulltextcache, self).__setitem__
1164 1164 # ignore trailing data, this is a cache, corruption is skipped
1165 1165 while True:
1166 1166 node = fp.read(20)
1167 1167 if len(node) < 20:
1168 1168 break
1169 1169 try:
1170 1170 size = struct.unpack('>L', fp.read(4))[0]
1171 1171 except struct.error:
1172 1172 break
1173 1173 value = bytearray(fp.read(size))
1174 1174 if len(value) != size:
1175 1175 break
1176 1176 set(node, value)
1177 1177 except IOError:
1178 1178 # the file is allowed to be missing
1179 1179 pass
1180 1180
1181 1181 self._read = True
1182 1182 self._dirty = False
1183 1183
1184 1184 def write(self):
1185 1185 if not self._dirty or self._opener is None:
1186 1186 return
1187 1187 # rotate backwards to the first used node
1188 1188 with self._opener(
1189 1189 'manifestfulltextcache', 'w', atomictemp=True, checkambig=True
1190 1190 ) as fp:
1191 1191 node = self._head.prev
1192 1192 while True:
1193 1193 if node.key in self._cache:
1194 1194 fp.write(node.key)
1195 1195 fp.write(struct.pack('>L', len(node.value)))
1196 1196 fp.write(node.value)
1197 1197 if node is self._head:
1198 1198 break
1199 1199 node = node.prev
1200 1200
1201 1201 def __len__(self):
1202 1202 if not self._read:
1203 1203 self.read()
1204 1204 return super(manifestfulltextcache, self).__len__()
1205 1205
1206 1206 def __contains__(self, k):
1207 1207 if not self._read:
1208 1208 self.read()
1209 1209 return super(manifestfulltextcache, self).__contains__(k)
1210 1210
1211 1211 def __iter__(self):
1212 1212 if not self._read:
1213 1213 self.read()
1214 1214 return super(manifestfulltextcache, self).__iter__()
1215 1215
1216 1216 def __getitem__(self, k):
1217 1217 if not self._read:
1218 1218 self.read()
1219 1219 # the cache lru order can change on read
1220 1220 setdirty = self._cache.get(k) is not self._head
1221 1221 value = super(manifestfulltextcache, self).__getitem__(k)
1222 1222 if setdirty:
1223 1223 self._dirty = True
1224 1224 return value
1225 1225
1226 1226 def __setitem__(self, k, v):
1227 1227 if not self._read:
1228 1228 self.read()
1229 1229 super(manifestfulltextcache, self).__setitem__(k, v)
1230 1230 self._dirty = True
1231 1231
1232 1232 def __delitem__(self, k):
1233 1233 if not self._read:
1234 1234 self.read()
1235 1235 super(manifestfulltextcache, self).__delitem__(k)
1236 1236 self._dirty = True
1237 1237
1238 1238 def get(self, k, default=None):
1239 1239 if not self._read:
1240 1240 self.read()
1241 1241 return super(manifestfulltextcache, self).get(k, default=default)
1242 1242
1243 1243 def clear(self, clear_persisted_data=False):
1244 1244 super(manifestfulltextcache, self).clear()
1245 1245 if clear_persisted_data:
1246 1246 self._dirty = True
1247 1247 self.write()
1248 1248 self._read = False
1249 1249
1250 1250 @interfaceutil.implementer(repository.imanifeststorage)
1251 1251 class manifestrevlog(object):
1252 1252 '''A revlog that stores manifest texts. This is responsible for caching the
1253 1253 full-text manifest contents.
1254 1254 '''
1255 1255 def __init__(self, opener, tree='', dirlogcache=None, indexfile=None,
1256 1256 treemanifest=False):
1257 1257 """Constructs a new manifest revlog
1258 1258
1259 1259 `indexfile` - used by extensions to have two manifests at once, like
1260 1260 when transitioning between flatmanifeset and treemanifests.
1261 1261
1262 1262 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1263 1263 options can also be used to make this a tree manifest revlog. The opener
1264 1264 option takes precedence, so if it is set to True, we ignore whatever
1265 1265 value is passed in to the constructor.
1266 1266 """
1267 1267 # During normal operations, we expect to deal with not more than four
1268 1268 # revs at a time (such as during commit --amend). When rebasing large
1269 1269 # stacks of commits, the number can go up, hence the config knob below.
1270 1270 cachesize = 4
1271 1271 optiontreemanifest = False
1272 1272 opts = getattr(opener, 'options', None)
1273 1273 if opts is not None:
1274 1274 cachesize = opts.get('manifestcachesize', cachesize)
1275 1275 optiontreemanifest = opts.get('treemanifest', False)
1276 1276
1277 1277 self._treeondisk = optiontreemanifest or treemanifest
1278 1278
1279 1279 self._fulltextcache = manifestfulltextcache(cachesize)
1280 1280
1281 1281 if tree:
1282 1282 assert self._treeondisk, 'opts is %r' % opts
1283 1283
1284 1284 if indexfile is None:
1285 1285 indexfile = '00manifest.i'
1286 1286 if tree:
1287 1287 indexfile = "meta/" + tree + indexfile
1288 1288
1289 1289 self.tree = tree
1290 1290
1291 1291 # The dirlogcache is kept on the root manifest log
1292 1292 if tree:
1293 1293 self._dirlogcache = dirlogcache
1294 1294 else:
1295 1295 self._dirlogcache = {'': self}
1296 1296
1297 1297 self._revlog = revlog.revlog(opener, indexfile,
1298 1298 # only root indexfile is cached
1299 1299 checkambig=not bool(tree),
1300 1300 mmaplargeindex=True)
1301 1301
1302 1302 self.index = self._revlog.index
1303 1303 self.version = self._revlog.version
1304 1304 self._generaldelta = self._revlog._generaldelta
1305 1305
1306 1306 def _setupmanifestcachehooks(self, repo):
1307 1307 """Persist the manifestfulltextcache on lock release"""
1308 1308 if not util.safehasattr(repo, '_lockref'):
1309 1309 return
1310 1310
1311 1311 self._fulltextcache._opener = repo.cachevfs
1312 1312 reporef = weakref.ref(repo)
1313 1313 manifestrevlogref = weakref.ref(self)
1314 1314
1315 1315 def persistmanifestcache():
1316 1316 repo = reporef()
1317 1317 self = manifestrevlogref()
1318 1318 if repo is None or self is None:
1319 1319 return
1320 1320 if repo.manifestlog._revlog is not self:
1321 1321 # there's a different manifest in play now, abort
1322 1322 return
1323 1323 self._fulltextcache.write()
1324 1324
1325 1325 if repo._currentlock(repo._lockref) is not None:
1326 1326 repo._afterlock(persistmanifestcache)
1327 1327
1328 1328 @property
1329 1329 def fulltextcache(self):
1330 1330 return self._fulltextcache
1331 1331
1332 1332 def clearcaches(self, clear_persisted_data=False):
1333 1333 self._revlog.clearcaches()
1334 1334 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1335 1335 self._dirlogcache = {self.tree: self}
1336 1336
1337 1337 def dirlog(self, d):
1338 1338 if d:
1339 1339 assert self._treeondisk
1340 1340 if d not in self._dirlogcache:
1341 1341 mfrevlog = manifestrevlog(self.opener, d,
1342 1342 self._dirlogcache,
1343 1343 treemanifest=self._treeondisk)
1344 1344 self._dirlogcache[d] = mfrevlog
1345 1345 return self._dirlogcache[d]
1346 1346
1347 1347 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1348 1348 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1349 1349 # If our first parent is in the manifest cache, we can
1350 1350 # compute a delta here using properties we know about the
1351 1351 # manifest up-front, which may save time later for the
1352 1352 # revlog layer.
1353 1353
1354 1354 _checkforbidden(added)
1355 1355 # combine the changed lists into one sorted iterator
1356 1356 work = heapq.merge([(x, False) for x in added],
1357 1357 [(x, True) for x in removed])
1358 1358
1359 1359 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1360 1360 cachedelta = self._revlog.rev(p1), deltatext
1361 1361 text = util.buffer(arraytext)
1362 1362 n = self._revlog.addrevision(text, transaction, link, p1, p2,
1363 1363 cachedelta)
1364 1364 else:
1365 1365 # The first parent manifest isn't already loaded, so we'll
1366 1366 # just encode a fulltext of the manifest and pass that
1367 1367 # through to the revlog layer, and let it handle the delta
1368 1368 # process.
1369 1369 if self._treeondisk:
1370 1370 assert readtree, "readtree must be set for treemanifest writes"
1371 1371 m1 = readtree(self.tree, p1)
1372 1372 m2 = readtree(self.tree, p2)
1373 1373 n = self._addtree(m, transaction, link, m1, m2, readtree)
1374 1374 arraytext = None
1375 1375 else:
1376 1376 text = m.text()
1377 1377 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1378 1378 arraytext = bytearray(text)
1379 1379
1380 1380 if arraytext is not None:
1381 1381 self.fulltextcache[n] = arraytext
1382 1382
1383 1383 return n
1384 1384
1385 1385 def _addtree(self, m, transaction, link, m1, m2, readtree):
1386 1386 # If the manifest is unchanged compared to one parent,
1387 1387 # don't write a new revision
1388 1388 if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
1389 1389 m2)):
1390 1390 return m.node()
1391 1391 def writesubtree(subm, subp1, subp2):
1392 1392 sublog = self.dirlog(subm.dir())
1393 1393 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1394 1394 readtree=readtree)
1395 1395 m.writesubtrees(m1, m2, writesubtree)
1396 1396 text = m.dirtext()
1397 1397 n = None
1398 1398 if self.tree != '':
1399 1399 # Double-check whether contents are unchanged to one parent
1400 1400 if text == m1.dirtext():
1401 1401 n = m1.node()
1402 1402 elif text == m2.dirtext():
1403 1403 n = m2.node()
1404 1404
1405 1405 if not n:
1406 1406 n = self._revlog.addrevision(text, transaction, link, m1.node(),
1407 1407 m2.node())
1408 1408
1409 1409 # Save nodeid so parent manifest can calculate its nodeid
1410 1410 m.setnode(n)
1411 1411 return n
1412 1412
1413 1413 def __len__(self):
1414 1414 return len(self._revlog)
1415 1415
1416 1416 def __iter__(self):
1417 1417 return self._revlog.__iter__()
1418 1418
1419 1419 def rev(self, node):
1420 1420 return self._revlog.rev(node)
1421 1421
1422 1422 def node(self, rev):
1423 1423 return self._revlog.node(rev)
1424 1424
1425 1425 def lookup(self, value):
1426 1426 return self._revlog.lookup(value)
1427 1427
1428 1428 def parentrevs(self, rev):
1429 1429 return self._revlog.parentrevs(rev)
1430 1430
1431 1431 def parents(self, node):
1432 1432 return self._revlog.parents(node)
1433 1433
1434 1434 def linkrev(self, rev):
1435 1435 return self._revlog.linkrev(rev)
1436 1436
1437 1437 def checksize(self):
1438 1438 return self._revlog.checksize()
1439 1439
1440 1440 def revision(self, node, _df=None, raw=False):
1441 1441 return self._revlog.revision(node, _df=_df, raw=raw)
1442 1442
1443 1443 def revdiff(self, rev1, rev2):
1444 1444 return self._revlog.revdiff(rev1, rev2)
1445 1445
1446 1446 def cmp(self, node, text):
1447 1447 return self._revlog.cmp(node, text)
1448 1448
1449 1449 def deltaparent(self, rev):
1450 1450 return self._revlog.deltaparent(rev)
1451 1451
1452 1452 def emitrevisiondeltas(self, requests):
1453 1453 return self._revlog.emitrevisiondeltas(requests)
1454 1454
1455 1455 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1456 1456 return self._revlog.addgroup(deltas, linkmapper, transaction,
1457 1457 addrevisioncb=addrevisioncb)
1458 1458
1459 1459 def getstrippoint(self, minlink):
1460 1460 return self._revlog.getstrippoint(minlink)
1461 1461
1462 1462 def strip(self, minlink, transaction):
1463 1463 return self._revlog.strip(minlink, transaction)
1464 1464
1465 1465 def files(self):
1466 1466 return self._revlog.files()
1467 1467
1468 1468 def clone(self, tr, destrevlog, **kwargs):
1469 1469 if not isinstance(destrevlog, manifestrevlog):
1470 1470 raise error.ProgrammingError('expected manifestrevlog to clone()')
1471 1471
1472 1472 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1473 1473
1474 1474 @property
1475 1475 def indexfile(self):
1476 1476 return self._revlog.indexfile
1477 1477
1478 1478 @indexfile.setter
1479 1479 def indexfile(self, value):
1480 1480 self._revlog.indexfile = value
1481 1481
1482 1482 @property
1483 1483 def opener(self):
1484 1484 return self._revlog.opener
1485 1485
1486 1486 @opener.setter
1487 1487 def opener(self, value):
1488 1488 self._revlog.opener = value
1489 1489
1490 1490 @interfaceutil.implementer(repository.imanifestlog)
1491 1491 class manifestlog(object):
1492 1492 """A collection class representing the collection of manifest snapshots
1493 1493 referenced by commits in the repository.
1494 1494
1495 1495 In this situation, 'manifest' refers to the abstract concept of a snapshot
1496 1496 of the list of files in the given commit. Consumers of the output of this
1497 1497 class do not care about the implementation details of the actual manifests
1498 1498 they receive (i.e. tree or flat or lazily loaded, etc)."""
1499 1499 def __init__(self, opener, repo):
1500 1500 usetreemanifest = False
1501 1501 cachesize = 4
1502 1502
1503 1503 opts = getattr(opener, 'options', None)
1504 1504 if opts is not None:
1505 1505 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1506 1506 cachesize = opts.get('manifestcachesize', cachesize)
1507 1507
1508 1508 self._treemanifests = usetreemanifest
1509 1509
1510 1510 self._revlog = repo._constructmanifest()
1511 1511 self._revlog._setupmanifestcachehooks(repo)
1512 1512 self._narrowmatch = repo.narrowmatch()
1513 1513
1514 1514 # A cache of the manifestctx or treemanifestctx for each directory
1515 1515 self._dirmancache = {}
1516 1516 self._dirmancache[''] = util.lrucachedict(cachesize)
1517 1517
1518 1518 self._cachesize = cachesize
1519 1519
1520 1520 def __getitem__(self, node):
1521 1521 """Retrieves the manifest instance for the given node. Throws a
1522 1522 LookupError if not found.
1523 1523 """
1524 1524 return self.get('', node)
1525 1525
1526 1526 def get(self, tree, node, verify=True):
1527 1527 """Retrieves the manifest instance for the given node. Throws a
1528 1528 LookupError if not found.
1529 1529
1530 1530 `verify` - if True an exception will be thrown if the node is not in
1531 1531 the revlog
1532 1532 """
1533 1533 if node in self._dirmancache.get(tree, ()):
1534 1534 return self._dirmancache[tree][node]
1535 1535
1536 1536 if not self._narrowmatch.always():
1537 1537 if not self._narrowmatch.visitdir(tree[:-1] or '.'):
1538 1538 return excludeddirmanifestctx(tree, node)
1539 1539 if tree:
1540 1540 if self._revlog._treeondisk:
1541 1541 if verify:
1542 1542 # Side-effect is LookupError is raised if node doesn't
1543 1543 # exist.
1544 1544 self.getstorage(tree).rev(node)
1545 1545
1546 1546 m = treemanifestctx(self, tree, node)
1547 1547 else:
1548 1548 raise error.Abort(
1549 1549 _("cannot ask for manifest directory '%s' in a flat "
1550 1550 "manifest") % tree)
1551 1551 else:
1552 1552 if verify:
1553 1553 # Side-effect is LookupError is raised if node doesn't exist.
1554 1554 self._revlog.rev(node)
1555 1555
1556 1556 if self._treemanifests:
1557 1557 m = treemanifestctx(self, '', node)
1558 1558 else:
1559 1559 m = manifestctx(self, node)
1560 1560
1561 1561 if node != nullid:
1562 1562 mancache = self._dirmancache.get(tree)
1563 1563 if not mancache:
1564 1564 mancache = util.lrucachedict(self._cachesize)
1565 1565 self._dirmancache[tree] = mancache
1566 1566 mancache[node] = m
1567 1567 return m
1568 1568
1569 1569 def getstorage(self, tree):
1570 1570 return self._revlog.dirlog(tree)
1571 1571
1572 1572 def clearcaches(self, clear_persisted_data=False):
1573 1573 self._dirmancache.clear()
1574 1574 self._revlog.clearcaches(clear_persisted_data=clear_persisted_data)
1575 1575
1576 1576 def rev(self, node):
1577 1577 return self._revlog.rev(node)
1578 1578
1579 1579 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1580 1580 class memmanifestctx(object):
1581 1581 def __init__(self, manifestlog):
1582 1582 self._manifestlog = manifestlog
1583 1583 self._manifestdict = manifestdict()
1584 1584
1585 def _revlog(self):
1585 def _storage(self):
1586 1586 return self._manifestlog._revlog
1587 1587
1588 1588 def new(self):
1589 1589 return memmanifestctx(self._manifestlog)
1590 1590
1591 1591 def copy(self):
1592 1592 memmf = memmanifestctx(self._manifestlog)
1593 1593 memmf._manifestdict = self.read().copy()
1594 1594 return memmf
1595 1595
1596 1596 def read(self):
1597 1597 return self._manifestdict
1598 1598
1599 1599 def write(self, transaction, link, p1, p2, added, removed):
1600 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1601 added, removed)
1600 return self._storage().add(self._manifestdict, transaction, link,
1601 p1, p2, added, removed)
1602 1602
1603 1603 @interfaceutil.implementer(repository.imanifestrevisionstored)
1604 1604 class manifestctx(object):
1605 1605 """A class representing a single revision of a manifest, including its
1606 1606 contents, its parent revs, and its linkrev.
1607 1607 """
1608 1608 def __init__(self, manifestlog, node):
1609 1609 self._manifestlog = manifestlog
1610 1610 self._data = None
1611 1611
1612 1612 self._node = node
1613 1613
1614 1614 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1615 1615 # but let's add it later when something needs it and we can load it
1616 1616 # lazily.
1617 #self.p1, self.p2 = revlog.parents(node)
1618 #rev = revlog.rev(node)
1619 #self.linkrev = revlog.linkrev(rev)
1617 #self.p1, self.p2 = store.parents(node)
1618 #rev = store.rev(node)
1619 #self.linkrev = store.linkrev(rev)
1620 1620
1621 def _revlog(self):
1621 def _storage(self):
1622 1622 return self._manifestlog._revlog
1623 1623
1624 1624 def node(self):
1625 1625 return self._node
1626 1626
1627 1627 def new(self):
1628 1628 return memmanifestctx(self._manifestlog)
1629 1629
1630 1630 def copy(self):
1631 1631 memmf = memmanifestctx(self._manifestlog)
1632 1632 memmf._manifestdict = self.read().copy()
1633 1633 return memmf
1634 1634
1635 1635 @propertycache
1636 1636 def parents(self):
1637 return self._revlog().parents(self._node)
1637 return self._storage().parents(self._node)
1638 1638
1639 1639 def read(self):
1640 1640 if self._data is None:
1641 1641 if self._node == nullid:
1642 1642 self._data = manifestdict()
1643 1643 else:
1644 rl = self._revlog()
1645 if self._node in rl._fulltextcache:
1646 text = pycompat.bytestr(rl._fulltextcache[self._node])
1644 store = self._storage()
1645 # TODO accessing non-public API.
1646 if self._node in store._fulltextcache:
1647 text = pycompat.bytestr(store._fulltextcache[self._node])
1647 1648 else:
1648 text = rl.revision(self._node)
1649 text = store.revision(self._node)
1649 1650 arraytext = bytearray(text)
1650 rl._fulltextcache[self._node] = arraytext
1651 store._fulltextcache[self._node] = arraytext
1651 1652 self._data = manifestdict(text)
1652 1653 return self._data
1653 1654
1654 1655 def readfast(self, shallow=False):
1655 1656 '''Calls either readdelta or read, based on which would be less work.
1656 1657 readdelta is called if the delta is against the p1, and therefore can be
1657 1658 read quickly.
1658 1659
1659 1660 If `shallow` is True, nothing changes since this is a flat manifest.
1660 1661 '''
1661 rl = self._revlog()
1662 r = rl.rev(self._node)
1663 deltaparent = rl.deltaparent(r)
1664 if deltaparent != nullrev and deltaparent in rl.parentrevs(r):
1662 store = self._storage()
1663 r = store.rev(self._node)
1664 deltaparent = store.deltaparent(r)
1665 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
1665 1666 return self.readdelta()
1666 1667 return self.read()
1667 1668
1668 1669 def readdelta(self, shallow=False):
1669 1670 '''Returns a manifest containing just the entries that are present
1670 1671 in this manifest, but not in its p1 manifest. This is efficient to read
1671 1672 if the revlog delta is already p1.
1672 1673
1673 1674 Changing the value of `shallow` has no effect on flat manifests.
1674 1675 '''
1675 revlog = self._revlog()
1676 r = revlog.rev(self._node)
1677 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1676 store = self._storage()
1677 r = store.rev(self._node)
1678 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1678 1679 return manifestdict(d)
1679 1680
1680 1681 def find(self, key):
1681 1682 return self.read().find(key)
1682 1683
1683 1684 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1684 1685 class memtreemanifestctx(object):
1685 1686 def __init__(self, manifestlog, dir=''):
1686 1687 self._manifestlog = manifestlog
1687 1688 self._dir = dir
1688 1689 self._treemanifest = treemanifest()
1689 1690
1690 def _revlog(self):
1691 def _storage(self):
1691 1692 return self._manifestlog._revlog
1692 1693
1693 1694 def new(self, dir=''):
1694 1695 return memtreemanifestctx(self._manifestlog, dir=dir)
1695 1696
1696 1697 def copy(self):
1697 1698 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1698 1699 memmf._treemanifest = self._treemanifest.copy()
1699 1700 return memmf
1700 1701
1701 1702 def read(self):
1702 1703 return self._treemanifest
1703 1704
1704 1705 def write(self, transaction, link, p1, p2, added, removed):
1705 1706 def readtree(dir, node):
1706 1707 return self._manifestlog.get(dir, node).read()
1707 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1708 added, removed, readtree=readtree)
1708 return self._storage().add(self._treemanifest, transaction, link,
1709 p1, p2, added, removed, readtree=readtree)
1709 1710
1710 1711 @interfaceutil.implementer(repository.imanifestrevisionstored)
1711 1712 class treemanifestctx(object):
1712 1713 def __init__(self, manifestlog, dir, node):
1713 1714 self._manifestlog = manifestlog
1714 1715 self._dir = dir
1715 1716 self._data = None
1716 1717
1717 1718 self._node = node
1718 1719
1719 1720 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1720 1721 # we can instantiate treemanifestctx objects for directories we don't
1721 1722 # have on disk.
1722 #self.p1, self.p2 = revlog.parents(node)
1723 #rev = revlog.rev(node)
1724 #self.linkrev = revlog.linkrev(rev)
1723 #self.p1, self.p2 = store.parents(node)
1724 #rev = store.rev(node)
1725 #self.linkrev = store.linkrev(rev)
1725 1726
1726 def _revlog(self):
1727 def _storage(self):
1727 1728 narrowmatch = self._manifestlog._narrowmatch
1728 1729 if not narrowmatch.always():
1729 1730 if not narrowmatch.visitdir(self._dir[:-1] or '.'):
1730 1731 return excludedmanifestrevlog(self._dir)
1731 1732 return self._manifestlog.getstorage(self._dir)
1732 1733
1733 1734 def read(self):
1734 1735 if self._data is None:
1735 rl = self._revlog()
1736 store = self._storage()
1736 1737 if self._node == nullid:
1737 1738 self._data = treemanifest()
1738 elif rl._treeondisk:
1739 # TODO accessing non-public API
1740 elif store._treeondisk:
1739 1741 m = treemanifest(dir=self._dir)
1740 1742 def gettext():
1741 return rl.revision(self._node)
1743 return store.revision(self._node)
1742 1744 def readsubtree(dir, subm):
1743 1745 # Set verify to False since we need to be able to create
1744 1746 # subtrees for trees that don't exist on disk.
1745 1747 return self._manifestlog.get(dir, subm, verify=False).read()
1746 1748 m.read(gettext, readsubtree)
1747 1749 m.setnode(self._node)
1748 1750 self._data = m
1749 1751 else:
1750 if self._node in rl.fulltextcache:
1751 text = pycompat.bytestr(rl.fulltextcache[self._node])
1752 if self._node in store.fulltextcache:
1753 text = pycompat.bytestr(store.fulltextcache[self._node])
1752 1754 else:
1753 text = rl.revision(self._node)
1755 text = store.revision(self._node)
1754 1756 arraytext = bytearray(text)
1755 rl.fulltextcache[self._node] = arraytext
1757 store.fulltextcache[self._node] = arraytext
1756 1758 self._data = treemanifest(dir=self._dir, text=text)
1757 1759
1758 1760 return self._data
1759 1761
1760 1762 def node(self):
1761 1763 return self._node
1762 1764
1763 1765 def new(self, dir=''):
1764 1766 return memtreemanifestctx(self._manifestlog, dir=dir)
1765 1767
1766 1768 def copy(self):
1767 1769 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1768 1770 memmf._treemanifest = self.read().copy()
1769 1771 return memmf
1770 1772
1771 1773 @propertycache
1772 1774 def parents(self):
1773 return self._revlog().parents(self._node)
1775 return self._storage().parents(self._node)
1774 1776
1775 1777 def readdelta(self, shallow=False):
1776 1778 '''Returns a manifest containing just the entries that are present
1777 1779 in this manifest, but not in its p1 manifest. This is efficient to read
1778 1780 if the revlog delta is already p1.
1779 1781
1780 1782 If `shallow` is True, this will read the delta for this directory,
1781 1783 without recursively reading subdirectory manifests. Instead, any
1782 1784 subdirectory entry will be reported as it appears in the manifest, i.e.
1783 1785 the subdirectory will be reported among files and distinguished only by
1784 1786 its 't' flag.
1785 1787 '''
1786 revlog = self._revlog()
1788 store = self._storage()
1787 1789 if shallow:
1788 r = revlog.rev(self._node)
1789 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1790 r = store.rev(self._node)
1791 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1790 1792 return manifestdict(d)
1791 1793 else:
1792 1794 # Need to perform a slow delta
1793 r0 = revlog.deltaparent(revlog.rev(self._node))
1794 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1795 r0 = store.deltaparent(store.rev(self._node))
1796 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
1795 1797 m1 = self.read()
1796 1798 md = treemanifest(dir=self._dir)
1797 1799 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1798 1800 if n1:
1799 1801 md[f] = n1
1800 1802 if fl1:
1801 1803 md.setflag(f, fl1)
1802 1804 return md
1803 1805
1804 1806 def readfast(self, shallow=False):
1805 1807 '''Calls either readdelta or read, based on which would be less work.
1806 1808 readdelta is called if the delta is against the p1, and therefore can be
1807 1809 read quickly.
1808 1810
1809 1811 If `shallow` is True, it only returns the entries from this manifest,
1810 1812 and not any submanifests.
1811 1813 '''
1812 rl = self._revlog()
1813 r = rl.rev(self._node)
1814 deltaparent = rl.deltaparent(r)
1814 store = self._storage()
1815 r = store.rev(self._node)
1816 deltaparent = store.deltaparent(r)
1815 1817 if (deltaparent != nullrev and
1816 deltaparent in rl.parentrevs(r)):
1818 deltaparent in store.parentrevs(r)):
1817 1819 return self.readdelta(shallow=shallow)
1818 1820
1819 1821 if shallow:
1820 return manifestdict(rl.revision(self._node))
1822 return manifestdict(store.revision(self._node))
1821 1823 else:
1822 1824 return self.read()
1823 1825
1824 1826 def find(self, key):
1825 1827 return self.read().find(key)
1826 1828
1827 1829 class excludeddir(treemanifest):
1828 1830 """Stand-in for a directory that is excluded from the repository.
1829 1831
1830 1832 With narrowing active on a repository that uses treemanifests,
1831 1833 some of the directory revlogs will be excluded from the resulting
1832 1834 clone. This is a huge storage win for clients, but means we need
1833 1835 some sort of pseudo-manifest to surface to internals so we can
1834 1836 detect a merge conflict outside the narrowspec. That's what this
1835 1837 class is: it stands in for a directory whose node is known, but
1836 1838 whose contents are unknown.
1837 1839 """
1838 1840 def __init__(self, dir, node):
1839 1841 super(excludeddir, self).__init__(dir)
1840 1842 self._node = node
1841 1843 # Add an empty file, which will be included by iterators and such,
1842 1844 # appearing as the directory itself (i.e. something like "dir/")
1843 1845 self._files[''] = node
1844 1846 self._flags[''] = 't'
1845 1847
1846 1848 # Manifests outside the narrowspec should never be modified, so avoid
1847 1849 # copying. This makes a noticeable difference when there are very many
1848 1850 # directories outside the narrowspec. Also, it makes sense for the copy to
1849 1851 # be of the same type as the original, which would not happen with the
1850 1852 # super type's copy().
1851 1853 def copy(self):
1852 1854 return self
1853 1855
1854 1856 class excludeddirmanifestctx(treemanifestctx):
1855 1857 """context wrapper for excludeddir - see that docstring for rationale"""
1856 1858 def __init__(self, dir, node):
1857 1859 self._dir = dir
1858 1860 self._node = node
1859 1861
1860 1862 def read(self):
1861 1863 return excludeddir(self._dir, self._node)
1862 1864
1863 1865 def write(self, *args):
1864 1866 raise error.ProgrammingError(
1865 1867 'attempt to write manifest from excluded dir %s' % self._dir)
1866 1868
1867 1869 class excludedmanifestrevlog(manifestrevlog):
1868 1870 """Stand-in for excluded treemanifest revlogs.
1869 1871
1870 1872 When narrowing is active on a treemanifest repository, we'll have
1871 1873 references to directories we can't see due to the revlog being
1872 1874 skipped. This class exists to conform to the manifestrevlog
1873 1875 interface for those directories and proactively prevent writes to
1874 1876 outside the narrowspec.
1875 1877 """
1876 1878
1877 1879 def __init__(self, dir):
1878 1880 self._dir = dir
1879 1881
1880 1882 def __len__(self):
1881 1883 raise error.ProgrammingError(
1882 1884 'attempt to get length of excluded dir %s' % self._dir)
1883 1885
1884 1886 def rev(self, node):
1885 1887 raise error.ProgrammingError(
1886 1888 'attempt to get rev from excluded dir %s' % self._dir)
1887 1889
1888 1890 def linkrev(self, node):
1889 1891 raise error.ProgrammingError(
1890 1892 'attempt to get linkrev from excluded dir %s' % self._dir)
1891 1893
1892 1894 def node(self, rev):
1893 1895 raise error.ProgrammingError(
1894 1896 'attempt to get node from excluded dir %s' % self._dir)
1895 1897
1896 1898 def add(self, *args, **kwargs):
1897 1899 # We should never write entries in dirlogs outside the narrow clone.
1898 1900 # However, the method still gets called from writesubtree() in
1899 1901 # _addtree(), so we need to handle it. We should possibly make that
1900 1902 # avoid calling add() with a clean manifest (_dirty is always False
1901 1903 # in excludeddir instances).
1902 1904 pass
General Comments 0
You need to be logged in to leave comments. Login now