##// END OF EJS Templates
manifest: don't go through revlog to access node symbols...
Gregory Szorc -
r39352:53363a8e default
parent child Browse files
Show More
@@ -1,1901 +1,1902
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 nullid,
20 nullrev,
19 21 )
20 22 from . import (
21 23 error,
22 24 mdiff,
23 25 policy,
24 26 pycompat,
25 27 repository,
26 28 revlog,
27 29 util,
28 30 )
29 31 from .utils import (
30 32 interfaceutil,
31 33 )
32 34
33 35 parsers = policy.importmod(r'parsers')
34 36 propertycache = util.propertycache
35 37
36 38 def _parse(data):
37 39 # This method does a little bit of excessive-looking
38 40 # precondition checking. This is so that the behavior of this
39 41 # class exactly matches its C counterpart to try and help
40 42 # prevent surprise breakage for anyone that develops against
41 43 # the pure version.
42 44 if data and data[-1:] != '\n':
43 45 raise ValueError('Manifest did not end in a newline.')
44 46 prev = None
45 47 for l in data.splitlines():
46 48 if prev is not None and prev > l:
47 49 raise ValueError('Manifest lines not in sorted order.')
48 50 prev = l
49 51 f, n = l.split('\0')
50 52 if len(n) > 40:
51 53 yield f, bin(n[:40]), n[40:]
52 54 else:
53 55 yield f, bin(n), ''
54 56
55 57 def _text(it):
56 58 files = []
57 59 lines = []
58 _hex = revlog.hex
59 60 for f, n, fl in it:
60 61 files.append(f)
61 62 # if this is changed to support newlines in filenames,
62 63 # be sure to check the templates/ dir again (especially *-raw.tmpl)
63 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
64 lines.append("%s\0%s%s\n" % (f, hex(n), fl))
64 65
65 66 _checkforbidden(files)
66 67 return ''.join(lines)
67 68
68 69 class lazymanifestiter(object):
69 70 def __init__(self, lm):
70 71 self.pos = 0
71 72 self.lm = lm
72 73
73 74 def __iter__(self):
74 75 return self
75 76
76 77 def next(self):
77 78 try:
78 79 data, pos = self.lm._get(self.pos)
79 80 except IndexError:
80 81 raise StopIteration
81 82 if pos == -1:
82 83 self.pos += 1
83 84 return data[0]
84 85 self.pos += 1
85 86 zeropos = data.find('\x00', pos)
86 87 return data[pos:zeropos]
87 88
88 89 __next__ = next
89 90
90 91 class lazymanifestiterentries(object):
91 92 def __init__(self, lm):
92 93 self.lm = lm
93 94 self.pos = 0
94 95
95 96 def __iter__(self):
96 97 return self
97 98
98 99 def next(self):
99 100 try:
100 101 data, pos = self.lm._get(self.pos)
101 102 except IndexError:
102 103 raise StopIteration
103 104 if pos == -1:
104 105 self.pos += 1
105 106 return data
106 107 zeropos = data.find('\x00', pos)
107 108 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
108 109 zeropos + 1, 40)
109 110 flags = self.lm._getflags(data, self.pos, zeropos)
110 111 self.pos += 1
111 112 return (data[pos:zeropos], hashval, flags)
112 113
113 114 __next__ = next
114 115
115 116 def unhexlify(data, extra, pos, length):
116 117 s = bin(data[pos:pos + length])
117 118 if extra:
118 119 s += chr(extra & 0xff)
119 120 return s
120 121
121 122 def _cmp(a, b):
122 123 return (a > b) - (a < b)
123 124
124 125 class _lazymanifest(object):
125 126 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
126 127 if positions is None:
127 128 self.positions = self.findlines(data)
128 129 self.extrainfo = [0] * len(self.positions)
129 130 self.data = data
130 131 self.extradata = []
131 132 else:
132 133 self.positions = positions[:]
133 134 self.extrainfo = extrainfo[:]
134 135 self.extradata = extradata[:]
135 136 self.data = data
136 137
137 138 def findlines(self, data):
138 139 if not data:
139 140 return []
140 141 pos = data.find("\n")
141 142 if pos == -1 or data[-1:] != '\n':
142 143 raise ValueError("Manifest did not end in a newline.")
143 144 positions = [0]
144 145 prev = data[:data.find('\x00')]
145 146 while pos < len(data) - 1 and pos != -1:
146 147 positions.append(pos + 1)
147 148 nexts = data[pos + 1:data.find('\x00', pos + 1)]
148 149 if nexts < prev:
149 150 raise ValueError("Manifest lines not in sorted order.")
150 151 prev = nexts
151 152 pos = data.find("\n", pos + 1)
152 153 return positions
153 154
154 155 def _get(self, index):
155 156 # get the position encoded in pos:
156 157 # positive number is an index in 'data'
157 158 # negative number is in extrapieces
158 159 pos = self.positions[index]
159 160 if pos >= 0:
160 161 return self.data, pos
161 162 return self.extradata[-pos - 1], -1
162 163
163 164 def _getkey(self, pos):
164 165 if pos >= 0:
165 166 return self.data[pos:self.data.find('\x00', pos + 1)]
166 167 return self.extradata[-pos - 1][0]
167 168
168 169 def bsearch(self, key):
169 170 first = 0
170 171 last = len(self.positions) - 1
171 172
172 173 while first <= last:
173 174 midpoint = (first + last)//2
174 175 nextpos = self.positions[midpoint]
175 176 candidate = self._getkey(nextpos)
176 177 r = _cmp(key, candidate)
177 178 if r == 0:
178 179 return midpoint
179 180 else:
180 181 if r < 0:
181 182 last = midpoint - 1
182 183 else:
183 184 first = midpoint + 1
184 185 return -1
185 186
186 187 def bsearch2(self, key):
187 188 # same as the above, but will always return the position
188 189 # done for performance reasons
189 190 first = 0
190 191 last = len(self.positions) - 1
191 192
192 193 while first <= last:
193 194 midpoint = (first + last)//2
194 195 nextpos = self.positions[midpoint]
195 196 candidate = self._getkey(nextpos)
196 197 r = _cmp(key, candidate)
197 198 if r == 0:
198 199 return (midpoint, True)
199 200 else:
200 201 if r < 0:
201 202 last = midpoint - 1
202 203 else:
203 204 first = midpoint + 1
204 205 return (first, False)
205 206
206 207 def __contains__(self, key):
207 208 return self.bsearch(key) != -1
208 209
209 210 def _getflags(self, data, needle, pos):
210 211 start = pos + 41
211 212 end = data.find("\n", start)
212 213 if end == -1:
213 214 end = len(data) - 1
214 215 if start == end:
215 216 return ''
216 217 return self.data[start:end]
217 218
218 219 def __getitem__(self, key):
219 220 if not isinstance(key, bytes):
220 221 raise TypeError("getitem: manifest keys must be a bytes.")
221 222 needle = self.bsearch(key)
222 223 if needle == -1:
223 224 raise KeyError
224 225 data, pos = self._get(needle)
225 226 if pos == -1:
226 227 return (data[1], data[2])
227 228 zeropos = data.find('\x00', pos)
228 229 assert 0 <= needle <= len(self.positions)
229 230 assert len(self.extrainfo) == len(self.positions)
230 231 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
231 232 flags = self._getflags(data, needle, zeropos)
232 233 return (hashval, flags)
233 234
234 235 def __delitem__(self, key):
235 236 needle, found = self.bsearch2(key)
236 237 if not found:
237 238 raise KeyError
238 239 cur = self.positions[needle]
239 240 self.positions = self.positions[:needle] + self.positions[needle + 1:]
240 241 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
241 242 if cur >= 0:
242 243 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
243 244
244 245 def __setitem__(self, key, value):
245 246 if not isinstance(key, bytes):
246 247 raise TypeError("setitem: manifest keys must be a byte string.")
247 248 if not isinstance(value, tuple) or len(value) != 2:
248 249 raise TypeError("Manifest values must be a tuple of (node, flags).")
249 250 hashval = value[0]
250 251 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
251 252 raise TypeError("node must be a 20-byte byte string")
252 253 flags = value[1]
253 254 if len(hashval) == 22:
254 255 hashval = hashval[:-1]
255 256 if not isinstance(flags, bytes) or len(flags) > 1:
256 257 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
257 258 needle, found = self.bsearch2(key)
258 259 if found:
259 260 # put the item
260 261 pos = self.positions[needle]
261 262 if pos < 0:
262 263 self.extradata[-pos - 1] = (key, hashval, value[1])
263 264 else:
264 265 # just don't bother
265 266 self.extradata.append((key, hashval, value[1]))
266 267 self.positions[needle] = -len(self.extradata)
267 268 else:
268 269 # not found, put it in with extra positions
269 270 self.extradata.append((key, hashval, value[1]))
270 271 self.positions = (self.positions[:needle] + [-len(self.extradata)]
271 272 + self.positions[needle:])
272 273 self.extrainfo = (self.extrainfo[:needle] + [0] +
273 274 self.extrainfo[needle:])
274 275
275 276 def copy(self):
276 277 # XXX call _compact like in C?
277 278 return _lazymanifest(self.data, self.positions, self.extrainfo,
278 279 self.extradata)
279 280
280 281 def _compact(self):
281 282 # hopefully not called TOO often
282 283 if len(self.extradata) == 0:
283 284 return
284 285 l = []
285 286 last_cut = 0
286 287 i = 0
287 288 offset = 0
288 289 self.extrainfo = [0] * len(self.positions)
289 290 while i < len(self.positions):
290 291 if self.positions[i] >= 0:
291 292 cur = self.positions[i]
292 293 last_cut = cur
293 294 while True:
294 295 self.positions[i] = offset
295 296 i += 1
296 297 if i == len(self.positions) or self.positions[i] < 0:
297 298 break
298 299 offset += self.positions[i] - cur
299 300 cur = self.positions[i]
300 301 end_cut = self.data.find('\n', cur)
301 302 if end_cut != -1:
302 303 end_cut += 1
303 304 offset += end_cut - cur
304 305 l.append(self.data[last_cut:end_cut])
305 306 else:
306 307 while i < len(self.positions) and self.positions[i] < 0:
307 308 cur = self.positions[i]
308 309 t = self.extradata[-cur - 1]
309 310 l.append(self._pack(t))
310 311 self.positions[i] = offset
311 312 if len(t[1]) > 20:
312 313 self.extrainfo[i] = ord(t[1][21])
313 314 offset += len(l[-1])
314 315 i += 1
315 316 self.data = ''.join(l)
316 317 self.extradata = []
317 318
318 319 def _pack(self, d):
319 320 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
320 321
321 322 def text(self):
322 323 self._compact()
323 324 return self.data
324 325
325 326 def diff(self, m2, clean=False):
326 327 '''Finds changes between the current manifest and m2.'''
327 328 # XXX think whether efficiency matters here
328 329 diff = {}
329 330
330 331 for fn, e1, flags in self.iterentries():
331 332 if fn not in m2:
332 333 diff[fn] = (e1, flags), (None, '')
333 334 else:
334 335 e2 = m2[fn]
335 336 if (e1, flags) != e2:
336 337 diff[fn] = (e1, flags), e2
337 338 elif clean:
338 339 diff[fn] = None
339 340
340 341 for fn, e2, flags in m2.iterentries():
341 342 if fn not in self:
342 343 diff[fn] = (None, ''), (e2, flags)
343 344
344 345 return diff
345 346
346 347 def iterentries(self):
347 348 return lazymanifestiterentries(self)
348 349
349 350 def iterkeys(self):
350 351 return lazymanifestiter(self)
351 352
352 353 def __iter__(self):
353 354 return lazymanifestiter(self)
354 355
355 356 def __len__(self):
356 357 return len(self.positions)
357 358
358 359 def filtercopy(self, filterfn):
359 360 # XXX should be optimized
360 361 c = _lazymanifest('')
361 362 for f, n, fl in self.iterentries():
362 363 if filterfn(f):
363 364 c[f] = n, fl
364 365 return c
365 366
366 367 try:
367 368 _lazymanifest = parsers.lazymanifest
368 369 except AttributeError:
369 370 pass
370 371
371 372 @interfaceutil.implementer(repository.imanifestdict)
372 373 class manifestdict(object):
373 374 def __init__(self, data=''):
374 375 self._lm = _lazymanifest(data)
375 376
376 377 def __getitem__(self, key):
377 378 return self._lm[key][0]
378 379
379 380 def find(self, key):
380 381 return self._lm[key]
381 382
382 383 def __len__(self):
383 384 return len(self._lm)
384 385
385 386 def __nonzero__(self):
386 387 # nonzero is covered by the __len__ function, but implementing it here
387 388 # makes it easier for extensions to override.
388 389 return len(self._lm) != 0
389 390
390 391 __bool__ = __nonzero__
391 392
392 393 def __setitem__(self, key, node):
393 394 self._lm[key] = node, self.flags(key, '')
394 395
395 396 def __contains__(self, key):
396 397 if key is None:
397 398 return False
398 399 return key in self._lm
399 400
400 401 def __delitem__(self, key):
401 402 del self._lm[key]
402 403
403 404 def __iter__(self):
404 405 return self._lm.__iter__()
405 406
406 407 def iterkeys(self):
407 408 return self._lm.iterkeys()
408 409
409 410 def keys(self):
410 411 return list(self.iterkeys())
411 412
412 413 def filesnotin(self, m2, match=None):
413 414 '''Set of files in this manifest that are not in the other'''
414 415 if match:
415 416 m1 = self.matches(match)
416 417 m2 = m2.matches(match)
417 418 return m1.filesnotin(m2)
418 419 diff = self.diff(m2)
419 420 files = set(filepath
420 421 for filepath, hashflags in diff.iteritems()
421 422 if hashflags[1][0] is None)
422 423 return files
423 424
424 425 @propertycache
425 426 def _dirs(self):
426 427 return util.dirs(self)
427 428
428 429 def dirs(self):
429 430 return self._dirs
430 431
431 432 def hasdir(self, dir):
432 433 return dir in self._dirs
433 434
434 435 def _filesfastpath(self, match):
435 436 '''Checks whether we can correctly and quickly iterate over matcher
436 437 files instead of over manifest files.'''
437 438 files = match.files()
438 439 return (len(files) < 100 and (match.isexact() or
439 440 (match.prefix() and all(fn in self for fn in files))))
440 441
441 442 def walk(self, match):
442 443 '''Generates matching file names.
443 444
444 445 Equivalent to manifest.matches(match).iterkeys(), but without creating
445 446 an entirely new manifest.
446 447
447 448 It also reports nonexistent files by marking them bad with match.bad().
448 449 '''
449 450 if match.always():
450 451 for f in iter(self):
451 452 yield f
452 453 return
453 454
454 455 fset = set(match.files())
455 456
456 457 # avoid the entire walk if we're only looking for specific files
457 458 if self._filesfastpath(match):
458 459 for fn in sorted(fset):
459 460 yield fn
460 461 return
461 462
462 463 for fn in self:
463 464 if fn in fset:
464 465 # specified pattern is the exact name
465 466 fset.remove(fn)
466 467 if match(fn):
467 468 yield fn
468 469
469 470 # for dirstate.walk, files=['.'] means "walk the whole tree".
470 471 # follow that here, too
471 472 fset.discard('.')
472 473
473 474 for fn in sorted(fset):
474 475 if not self.hasdir(fn):
475 476 match.bad(fn, None)
476 477
477 478 def matches(self, match):
478 479 '''generate a new manifest filtered by the match argument'''
479 480 if match.always():
480 481 return self.copy()
481 482
482 483 if self._filesfastpath(match):
483 484 m = manifestdict()
484 485 lm = self._lm
485 486 for fn in match.files():
486 487 if fn in lm:
487 488 m._lm[fn] = lm[fn]
488 489 return m
489 490
490 491 m = manifestdict()
491 492 m._lm = self._lm.filtercopy(match)
492 493 return m
493 494
494 495 def diff(self, m2, match=None, clean=False):
495 496 '''Finds changes between the current manifest and m2.
496 497
497 498 Args:
498 499 m2: the manifest to which this manifest should be compared.
499 500 clean: if true, include files unchanged between these manifests
500 501 with a None value in the returned dictionary.
501 502
502 503 The result is returned as a dict with filename as key and
503 504 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
504 505 nodeid in the current/other manifest and fl1/fl2 is the flag
505 506 in the current/other manifest. Where the file does not exist,
506 507 the nodeid will be None and the flags will be the empty
507 508 string.
508 509 '''
509 510 if match:
510 511 m1 = self.matches(match)
511 512 m2 = m2.matches(match)
512 513 return m1.diff(m2, clean=clean)
513 514 return self._lm.diff(m2._lm, clean)
514 515
515 516 def setflag(self, key, flag):
516 517 self._lm[key] = self[key], flag
517 518
518 519 def get(self, key, default=None):
519 520 try:
520 521 return self._lm[key][0]
521 522 except KeyError:
522 523 return default
523 524
524 525 def flags(self, key, default=''):
525 526 try:
526 527 return self._lm[key][1]
527 528 except KeyError:
528 529 return default
529 530
530 531 def copy(self):
531 532 c = manifestdict()
532 533 c._lm = self._lm.copy()
533 534 return c
534 535
535 536 def items(self):
536 537 return (x[:2] for x in self._lm.iterentries())
537 538
538 539 def iteritems(self):
539 540 return (x[:2] for x in self._lm.iterentries())
540 541
541 542 def iterentries(self):
542 543 return self._lm.iterentries()
543 544
544 545 def text(self):
545 546 # most likely uses native version
546 547 return self._lm.text()
547 548
548 549 def fastdelta(self, base, changes):
549 550 """Given a base manifest text as a bytearray and a list of changes
550 551 relative to that text, compute a delta that can be used by revlog.
551 552 """
552 553 delta = []
553 554 dstart = None
554 555 dend = None
555 556 dline = [""]
556 557 start = 0
557 558 # zero copy representation of base as a buffer
558 559 addbuf = util.buffer(base)
559 560
560 561 changes = list(changes)
561 562 if len(changes) < 1000:
562 563 # start with a readonly loop that finds the offset of
563 564 # each line and creates the deltas
564 565 for f, todelete in changes:
565 566 # bs will either be the index of the item or the insert point
566 567 start, end = _msearch(addbuf, f, start)
567 568 if not todelete:
568 569 h, fl = self._lm[f]
569 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
570 l = "%s\0%s%s\n" % (f, hex(h), fl)
570 571 else:
571 572 if start == end:
572 573 # item we want to delete was not found, error out
573 574 raise AssertionError(
574 575 _("failed to remove %s from manifest") % f)
575 576 l = ""
576 577 if dstart is not None and dstart <= start and dend >= start:
577 578 if dend < end:
578 579 dend = end
579 580 if l:
580 581 dline.append(l)
581 582 else:
582 583 if dstart is not None:
583 584 delta.append([dstart, dend, "".join(dline)])
584 585 dstart = start
585 586 dend = end
586 587 dline = [l]
587 588
588 589 if dstart is not None:
589 590 delta.append([dstart, dend, "".join(dline)])
590 591 # apply the delta to the base, and get a delta for addrevision
591 592 deltatext, arraytext = _addlistdelta(base, delta)
592 593 else:
593 594 # For large changes, it's much cheaper to just build the text and
594 595 # diff it.
595 596 arraytext = bytearray(self.text())
596 597 deltatext = mdiff.textdiff(
597 598 util.buffer(base), util.buffer(arraytext))
598 599
599 600 return arraytext, deltatext
600 601
601 602 def _msearch(m, s, lo=0, hi=None):
602 603 '''return a tuple (start, end) that says where to find s within m.
603 604
604 605 If the string is found m[start:end] are the line containing
605 606 that string. If start == end the string was not found and
606 607 they indicate the proper sorted insertion point.
607 608
608 609 m should be a buffer, a memoryview or a byte string.
609 610 s is a byte string'''
610 611 def advance(i, c):
611 612 while i < lenm and m[i:i + 1] != c:
612 613 i += 1
613 614 return i
614 615 if not s:
615 616 return (lo, lo)
616 617 lenm = len(m)
617 618 if not hi:
618 619 hi = lenm
619 620 while lo < hi:
620 621 mid = (lo + hi) // 2
621 622 start = mid
622 623 while start > 0 and m[start - 1:start] != '\n':
623 624 start -= 1
624 625 end = advance(start, '\0')
625 626 if bytes(m[start:end]) < s:
626 627 # we know that after the null there are 40 bytes of sha1
627 628 # this translates to the bisect lo = mid + 1
628 629 lo = advance(end + 40, '\n') + 1
629 630 else:
630 631 # this translates to the bisect hi = mid
631 632 hi = start
632 633 end = advance(lo, '\0')
633 634 found = m[lo:end]
634 635 if s == found:
635 636 # we know that after the null there are 40 bytes of sha1
636 637 end = advance(end + 40, '\n')
637 638 return (lo, end + 1)
638 639 else:
639 640 return (lo, lo)
640 641
641 642 def _checkforbidden(l):
642 643 """Check filenames for illegal characters."""
643 644 for f in l:
644 645 if '\n' in f or '\r' in f:
645 646 raise error.RevlogError(
646 647 _("'\\n' and '\\r' disallowed in filenames: %r")
647 648 % pycompat.bytestr(f))
648 649
649 650
650 651 # apply the changes collected during the bisect loop to our addlist
651 652 # return a delta suitable for addrevision
652 653 def _addlistdelta(addlist, x):
653 654 # for large addlist arrays, building a new array is cheaper
654 655 # than repeatedly modifying the existing one
655 656 currentposition = 0
656 657 newaddlist = bytearray()
657 658
658 659 for start, end, content in x:
659 660 newaddlist += addlist[currentposition:start]
660 661 if content:
661 662 newaddlist += bytearray(content)
662 663
663 664 currentposition = end
664 665
665 666 newaddlist += addlist[currentposition:]
666 667
667 668 deltatext = "".join(struct.pack(">lll", start, end, len(content))
668 669 + content for start, end, content in x)
669 670 return deltatext, newaddlist
670 671
671 672 def _splittopdir(f):
672 673 if '/' in f:
673 674 dir, subpath = f.split('/', 1)
674 675 return dir + '/', subpath
675 676 else:
676 677 return '', f
677 678
678 679 _noop = lambda s: None
679 680
680 681 class treemanifest(object):
681 682 def __init__(self, dir='', text=''):
682 683 self._dir = dir
683 self._node = revlog.nullid
684 self._node = nullid
684 685 self._loadfunc = _noop
685 686 self._copyfunc = _noop
686 687 self._dirty = False
687 688 self._dirs = {}
688 689 # Using _lazymanifest here is a little slower than plain old dicts
689 690 self._files = {}
690 691 self._flags = {}
691 692 if text:
692 693 def readsubtree(subdir, subm):
693 694 raise AssertionError('treemanifest constructor only accepts '
694 695 'flat manifests')
695 696 self.parse(text, readsubtree)
696 697 self._dirty = True # Mark flat manifest dirty after parsing
697 698
698 699 def _subpath(self, path):
699 700 return self._dir + path
700 701
701 702 def __len__(self):
702 703 self._load()
703 704 size = len(self._files)
704 705 for m in self._dirs.values():
705 706 size += m.__len__()
706 707 return size
707 708
708 709 def __nonzero__(self):
709 710 # Faster than "__len() != 0" since it avoids loading sub-manifests
710 711 return not self._isempty()
711 712
712 713 __bool__ = __nonzero__
713 714
714 715 def _isempty(self):
715 716 self._load() # for consistency; already loaded by all callers
716 717 return (not self._files and (not self._dirs or
717 718 all(m._isempty() for m in self._dirs.values())))
718 719
719 720 def __repr__(self):
720 721 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
721 (self._dir, revlog.hex(self._node),
722 (self._dir, hex(self._node),
722 723 bool(self._loadfunc is _noop),
723 724 self._dirty, id(self)))
724 725
725 726 def dir(self):
726 727 '''The directory that this tree manifest represents, including a
727 728 trailing '/'. Empty string for the repo root directory.'''
728 729 return self._dir
729 730
730 731 def node(self):
731 732 '''This node of this instance. nullid for unsaved instances. Should
732 733 be updated when the instance is read or written from a revlog.
733 734 '''
734 735 assert not self._dirty
735 736 return self._node
736 737
737 738 def setnode(self, node):
738 739 self._node = node
739 740 self._dirty = False
740 741
741 742 def iterentries(self):
742 743 self._load()
743 744 for p, n in sorted(itertools.chain(self._dirs.items(),
744 745 self._files.items())):
745 746 if p in self._files:
746 747 yield self._subpath(p), n, self._flags.get(p, '')
747 748 else:
748 749 for x in n.iterentries():
749 750 yield x
750 751
751 752 def items(self):
752 753 self._load()
753 754 for p, n in sorted(itertools.chain(self._dirs.items(),
754 755 self._files.items())):
755 756 if p in self._files:
756 757 yield self._subpath(p), n
757 758 else:
758 759 for f, sn in n.iteritems():
759 760 yield f, sn
760 761
761 762 iteritems = items
762 763
763 764 def iterkeys(self):
764 765 self._load()
765 766 for p in sorted(itertools.chain(self._dirs, self._files)):
766 767 if p in self._files:
767 768 yield self._subpath(p)
768 769 else:
769 770 for f in self._dirs[p]:
770 771 yield f
771 772
772 773 def keys(self):
773 774 return list(self.iterkeys())
774 775
775 776 def __iter__(self):
776 777 return self.iterkeys()
777 778
778 779 def __contains__(self, f):
779 780 if f is None:
780 781 return False
781 782 self._load()
782 783 dir, subpath = _splittopdir(f)
783 784 if dir:
784 785 if dir not in self._dirs:
785 786 return False
786 787 return self._dirs[dir].__contains__(subpath)
787 788 else:
788 789 return f in self._files
789 790
790 791 def get(self, f, default=None):
791 792 self._load()
792 793 dir, subpath = _splittopdir(f)
793 794 if dir:
794 795 if dir not in self._dirs:
795 796 return default
796 797 return self._dirs[dir].get(subpath, default)
797 798 else:
798 799 return self._files.get(f, default)
799 800
800 801 def __getitem__(self, f):
801 802 self._load()
802 803 dir, subpath = _splittopdir(f)
803 804 if dir:
804 805 return self._dirs[dir].__getitem__(subpath)
805 806 else:
806 807 return self._files[f]
807 808
808 809 def flags(self, f):
809 810 self._load()
810 811 dir, subpath = _splittopdir(f)
811 812 if dir:
812 813 if dir not in self._dirs:
813 814 return ''
814 815 return self._dirs[dir].flags(subpath)
815 816 else:
816 817 if f in self._dirs:
817 818 return ''
818 819 return self._flags.get(f, '')
819 820
820 821 def find(self, f):
821 822 self._load()
822 823 dir, subpath = _splittopdir(f)
823 824 if dir:
824 825 return self._dirs[dir].find(subpath)
825 826 else:
826 827 return self._files[f], self._flags.get(f, '')
827 828
828 829 def __delitem__(self, f):
829 830 self._load()
830 831 dir, subpath = _splittopdir(f)
831 832 if dir:
832 833 self._dirs[dir].__delitem__(subpath)
833 834 # If the directory is now empty, remove it
834 835 if self._dirs[dir]._isempty():
835 836 del self._dirs[dir]
836 837 else:
837 838 del self._files[f]
838 839 if f in self._flags:
839 840 del self._flags[f]
840 841 self._dirty = True
841 842
842 843 def __setitem__(self, f, n):
843 844 assert n is not None
844 845 self._load()
845 846 dir, subpath = _splittopdir(f)
846 847 if dir:
847 848 if dir not in self._dirs:
848 849 self._dirs[dir] = treemanifest(self._subpath(dir))
849 850 self._dirs[dir].__setitem__(subpath, n)
850 851 else:
851 852 self._files[f] = n[:21] # to match manifestdict's behavior
852 853 self._dirty = True
853 854
854 855 def _load(self):
855 856 if self._loadfunc is not _noop:
856 857 lf, self._loadfunc = self._loadfunc, _noop
857 858 lf(self)
858 859 elif self._copyfunc is not _noop:
859 860 cf, self._copyfunc = self._copyfunc, _noop
860 861 cf(self)
861 862
862 863 def setflag(self, f, flags):
863 864 """Set the flags (symlink, executable) for path f."""
864 865 self._load()
865 866 dir, subpath = _splittopdir(f)
866 867 if dir:
867 868 if dir not in self._dirs:
868 869 self._dirs[dir] = treemanifest(self._subpath(dir))
869 870 self._dirs[dir].setflag(subpath, flags)
870 871 else:
871 872 self._flags[f] = flags
872 873 self._dirty = True
873 874
874 875 def copy(self):
875 876 copy = treemanifest(self._dir)
876 877 copy._node = self._node
877 878 copy._dirty = self._dirty
878 879 if self._copyfunc is _noop:
879 880 def _copyfunc(s):
880 881 self._load()
881 882 for d in self._dirs:
882 883 s._dirs[d] = self._dirs[d].copy()
883 884 s._files = dict.copy(self._files)
884 885 s._flags = dict.copy(self._flags)
885 886 if self._loadfunc is _noop:
886 887 _copyfunc(copy)
887 888 else:
888 889 copy._copyfunc = _copyfunc
889 890 else:
890 891 copy._copyfunc = self._copyfunc
891 892 return copy
892 893
893 894 def filesnotin(self, m2, match=None):
894 895 '''Set of files in this manifest that are not in the other'''
895 896 if match:
896 897 m1 = self.matches(match)
897 898 m2 = m2.matches(match)
898 899 return m1.filesnotin(m2)
899 900
900 901 files = set()
901 902 def _filesnotin(t1, t2):
902 903 if t1._node == t2._node and not t1._dirty and not t2._dirty:
903 904 return
904 905 t1._load()
905 906 t2._load()
906 907 for d, m1 in t1._dirs.iteritems():
907 908 if d in t2._dirs:
908 909 m2 = t2._dirs[d]
909 910 _filesnotin(m1, m2)
910 911 else:
911 912 files.update(m1.iterkeys())
912 913
913 914 for fn in t1._files:
914 915 if fn not in t2._files:
915 916 files.add(t1._subpath(fn))
916 917
917 918 _filesnotin(self, m2)
918 919 return files
919 920
920 921 @propertycache
921 922 def _alldirs(self):
922 923 return util.dirs(self)
923 924
924 925 def dirs(self):
925 926 return self._alldirs
926 927
927 928 def hasdir(self, dir):
928 929 self._load()
929 930 topdir, subdir = _splittopdir(dir)
930 931 if topdir:
931 932 if topdir in self._dirs:
932 933 return self._dirs[topdir].hasdir(subdir)
933 934 return False
934 935 return (dir + '/') in self._dirs
935 936
936 937 def walk(self, match):
937 938 '''Generates matching file names.
938 939
939 940 Equivalent to manifest.matches(match).iterkeys(), but without creating
940 941 an entirely new manifest.
941 942
942 943 It also reports nonexistent files by marking them bad with match.bad().
943 944 '''
944 945 if match.always():
945 946 for f in iter(self):
946 947 yield f
947 948 return
948 949
949 950 fset = set(match.files())
950 951
951 952 for fn in self._walk(match):
952 953 if fn in fset:
953 954 # specified pattern is the exact name
954 955 fset.remove(fn)
955 956 yield fn
956 957
957 958 # for dirstate.walk, files=['.'] means "walk the whole tree".
958 959 # follow that here, too
959 960 fset.discard('.')
960 961
961 962 for fn in sorted(fset):
962 963 if not self.hasdir(fn):
963 964 match.bad(fn, None)
964 965
965 966 def _walk(self, match):
966 967 '''Recursively generates matching file names for walk().'''
967 968 if not match.visitdir(self._dir[:-1] or '.'):
968 969 return
969 970
970 971 # yield this dir's files and walk its submanifests
971 972 self._load()
972 973 for p in sorted(list(self._dirs) + list(self._files)):
973 974 if p in self._files:
974 975 fullp = self._subpath(p)
975 976 if match(fullp):
976 977 yield fullp
977 978 else:
978 979 for f in self._dirs[p]._walk(match):
979 980 yield f
980 981
981 982 def matches(self, match):
982 983 '''generate a new manifest filtered by the match argument'''
983 984 if match.always():
984 985 return self.copy()
985 986
986 987 return self._matches(match)
987 988
988 989 def _matches(self, match):
989 990 '''recursively generate a new manifest filtered by the match argument.
990 991 '''
991 992
992 993 visit = match.visitdir(self._dir[:-1] or '.')
993 994 if visit == 'all':
994 995 return self.copy()
995 996 ret = treemanifest(self._dir)
996 997 if not visit:
997 998 return ret
998 999
999 1000 self._load()
1000 1001 for fn in self._files:
1001 1002 fullp = self._subpath(fn)
1002 1003 if not match(fullp):
1003 1004 continue
1004 1005 ret._files[fn] = self._files[fn]
1005 1006 if fn in self._flags:
1006 1007 ret._flags[fn] = self._flags[fn]
1007 1008
1008 1009 for dir, subm in self._dirs.iteritems():
1009 1010 m = subm._matches(match)
1010 1011 if not m._isempty():
1011 1012 ret._dirs[dir] = m
1012 1013
1013 1014 if not ret._isempty():
1014 1015 ret._dirty = True
1015 1016 return ret
1016 1017
1017 1018 def diff(self, m2, match=None, clean=False):
1018 1019 '''Finds changes between the current manifest and m2.
1019 1020
1020 1021 Args:
1021 1022 m2: the manifest to which this manifest should be compared.
1022 1023 clean: if true, include files unchanged between these manifests
1023 1024 with a None value in the returned dictionary.
1024 1025
1025 1026 The result is returned as a dict with filename as key and
1026 1027 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1027 1028 nodeid in the current/other manifest and fl1/fl2 is the flag
1028 1029 in the current/other manifest. Where the file does not exist,
1029 1030 the nodeid will be None and the flags will be the empty
1030 1031 string.
1031 1032 '''
1032 1033 if match:
1033 1034 m1 = self.matches(match)
1034 1035 m2 = m2.matches(match)
1035 1036 return m1.diff(m2, clean=clean)
1036 1037 result = {}
1037 1038 emptytree = treemanifest()
1038 1039 def _diff(t1, t2):
1039 1040 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1040 1041 return
1041 1042 t1._load()
1042 1043 t2._load()
1043 1044 for d, m1 in t1._dirs.iteritems():
1044 1045 m2 = t2._dirs.get(d, emptytree)
1045 1046 _diff(m1, m2)
1046 1047
1047 1048 for d, m2 in t2._dirs.iteritems():
1048 1049 if d not in t1._dirs:
1049 1050 _diff(emptytree, m2)
1050 1051
1051 1052 for fn, n1 in t1._files.iteritems():
1052 1053 fl1 = t1._flags.get(fn, '')
1053 1054 n2 = t2._files.get(fn, None)
1054 1055 fl2 = t2._flags.get(fn, '')
1055 1056 if n1 != n2 or fl1 != fl2:
1056 1057 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1057 1058 elif clean:
1058 1059 result[t1._subpath(fn)] = None
1059 1060
1060 1061 for fn, n2 in t2._files.iteritems():
1061 1062 if fn not in t1._files:
1062 1063 fl2 = t2._flags.get(fn, '')
1063 1064 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1064 1065
1065 1066 _diff(self, m2)
1066 1067 return result
1067 1068
1068 1069 def unmodifiedsince(self, m2):
1069 1070 return not self._dirty and not m2._dirty and self._node == m2._node
1070 1071
1071 1072 def parse(self, text, readsubtree):
1072 1073 for f, n, fl in _parse(text):
1073 1074 if fl == 't':
1074 1075 f = f + '/'
1075 1076 self._dirs[f] = readsubtree(self._subpath(f), n)
1076 1077 elif '/' in f:
1077 1078 # This is a flat manifest, so use __setitem__ and setflag rather
1078 1079 # than assigning directly to _files and _flags, so we can
1079 1080 # assign a path in a subdirectory, and to mark dirty (compared
1080 1081 # to nullid).
1081 1082 self[f] = n
1082 1083 if fl:
1083 1084 self.setflag(f, fl)
1084 1085 else:
1085 1086 # Assigning to _files and _flags avoids marking as dirty,
1086 1087 # and should be a little faster.
1087 1088 self._files[f] = n
1088 1089 if fl:
1089 1090 self._flags[f] = fl
1090 1091
1091 1092 def text(self):
1092 1093 """Get the full data of this manifest as a bytestring."""
1093 1094 self._load()
1094 1095 return _text(self.iterentries())
1095 1096
1096 1097 def dirtext(self):
1097 1098 """Get the full data of this directory as a bytestring. Make sure that
1098 1099 any submanifests have been written first, so their nodeids are correct.
1099 1100 """
1100 1101 self._load()
1101 1102 flags = self.flags
1102 1103 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1103 1104 files = [(f, self._files[f], flags(f)) for f in self._files]
1104 1105 return _text(sorted(dirs + files))
1105 1106
1106 1107 def read(self, gettext, readsubtree):
1107 1108 def _load_for_read(s):
1108 1109 s.parse(gettext(), readsubtree)
1109 1110 s._dirty = False
1110 1111 self._loadfunc = _load_for_read
1111 1112
1112 1113 def writesubtrees(self, m1, m2, writesubtree):
1113 1114 self._load() # for consistency; should never have any effect here
1114 1115 m1._load()
1115 1116 m2._load()
1116 1117 emptytree = treemanifest()
1117 1118 for d, subm in self._dirs.iteritems():
1118 1119 subp1 = m1._dirs.get(d, emptytree)._node
1119 1120 subp2 = m2._dirs.get(d, emptytree)._node
1120 if subp1 == revlog.nullid:
1121 if subp1 == nullid:
1121 1122 subp1, subp2 = subp2, subp1
1122 1123 writesubtree(subm, subp1, subp2)
1123 1124
1124 1125 def walksubtrees(self, matcher=None):
1125 1126 """Returns an iterator of the subtrees of this manifest, including this
1126 1127 manifest itself.
1127 1128
1128 1129 If `matcher` is provided, it only returns subtrees that match.
1129 1130 """
1130 1131 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1131 1132 return
1132 1133 if not matcher or matcher(self._dir[:-1]):
1133 1134 yield self
1134 1135
1135 1136 self._load()
1136 1137 for d, subm in self._dirs.iteritems():
1137 1138 for subtree in subm.walksubtrees(matcher=matcher):
1138 1139 yield subtree
1139 1140
1140 1141 class manifestfulltextcache(util.lrucachedict):
1141 1142 """File-backed LRU cache for the manifest cache
1142 1143
1143 1144 File consists of entries, up to EOF:
1144 1145
1145 1146 - 20 bytes node, 4 bytes length, <length> manifest data
1146 1147
1147 1148 These are written in reverse cache order (oldest to newest).
1148 1149
1149 1150 """
1150 1151 def __init__(self, max):
1151 1152 super(manifestfulltextcache, self).__init__(max)
1152 1153 self._dirty = False
1153 1154 self._read = False
1154 1155 self._opener = None
1155 1156
1156 1157 def read(self):
1157 1158 if self._read or self._opener is None:
1158 1159 return
1159 1160
1160 1161 try:
1161 1162 with self._opener('manifestfulltextcache') as fp:
1162 1163 set = super(manifestfulltextcache, self).__setitem__
1163 1164 # ignore trailing data, this is a cache, corruption is skipped
1164 1165 while True:
1165 1166 node = fp.read(20)
1166 1167 if len(node) < 20:
1167 1168 break
1168 1169 try:
1169 1170 size = struct.unpack('>L', fp.read(4))[0]
1170 1171 except struct.error:
1171 1172 break
1172 1173 value = bytearray(fp.read(size))
1173 1174 if len(value) != size:
1174 1175 break
1175 1176 set(node, value)
1176 1177 except IOError:
1177 1178 # the file is allowed to be missing
1178 1179 pass
1179 1180
1180 1181 self._read = True
1181 1182 self._dirty = False
1182 1183
1183 1184 def write(self):
1184 1185 if not self._dirty or self._opener is None:
1185 1186 return
1186 1187 # rotate backwards to the first used node
1187 1188 with self._opener(
1188 1189 'manifestfulltextcache', 'w', atomictemp=True, checkambig=True
1189 1190 ) as fp:
1190 1191 node = self._head.prev
1191 1192 while True:
1192 1193 if node.key in self._cache:
1193 1194 fp.write(node.key)
1194 1195 fp.write(struct.pack('>L', len(node.value)))
1195 1196 fp.write(node.value)
1196 1197 if node is self._head:
1197 1198 break
1198 1199 node = node.prev
1199 1200
1200 1201 def __len__(self):
1201 1202 if not self._read:
1202 1203 self.read()
1203 1204 return super(manifestfulltextcache, self).__len__()
1204 1205
1205 1206 def __contains__(self, k):
1206 1207 if not self._read:
1207 1208 self.read()
1208 1209 return super(manifestfulltextcache, self).__contains__(k)
1209 1210
1210 1211 def __iter__(self):
1211 1212 if not self._read:
1212 1213 self.read()
1213 1214 return super(manifestfulltextcache, self).__iter__()
1214 1215
1215 1216 def __getitem__(self, k):
1216 1217 if not self._read:
1217 1218 self.read()
1218 1219 # the cache lru order can change on read
1219 1220 setdirty = self._cache.get(k) is not self._head
1220 1221 value = super(manifestfulltextcache, self).__getitem__(k)
1221 1222 if setdirty:
1222 1223 self._dirty = True
1223 1224 return value
1224 1225
1225 1226 def __setitem__(self, k, v):
1226 1227 if not self._read:
1227 1228 self.read()
1228 1229 super(manifestfulltextcache, self).__setitem__(k, v)
1229 1230 self._dirty = True
1230 1231
1231 1232 def __delitem__(self, k):
1232 1233 if not self._read:
1233 1234 self.read()
1234 1235 super(manifestfulltextcache, self).__delitem__(k)
1235 1236 self._dirty = True
1236 1237
1237 1238 def get(self, k, default=None):
1238 1239 if not self._read:
1239 1240 self.read()
1240 1241 return super(manifestfulltextcache, self).get(k, default=default)
1241 1242
1242 1243 def clear(self, clear_persisted_data=False):
1243 1244 super(manifestfulltextcache, self).clear()
1244 1245 if clear_persisted_data:
1245 1246 self._dirty = True
1246 1247 self.write()
1247 1248 self._read = False
1248 1249
1249 1250 @interfaceutil.implementer(repository.imanifeststorage)
1250 1251 class manifestrevlog(object):
1251 1252 '''A revlog that stores manifest texts. This is responsible for caching the
1252 1253 full-text manifest contents.
1253 1254 '''
1254 1255 def __init__(self, opener, tree='', dirlogcache=None, indexfile=None,
1255 1256 treemanifest=False):
1256 1257 """Constructs a new manifest revlog
1257 1258
1258 1259 `indexfile` - used by extensions to have two manifests at once, like
1259 1260 when transitioning between flatmanifeset and treemanifests.
1260 1261
1261 1262 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1262 1263 options can also be used to make this a tree manifest revlog. The opener
1263 1264 option takes precedence, so if it is set to True, we ignore whatever
1264 1265 value is passed in to the constructor.
1265 1266 """
1266 1267 # During normal operations, we expect to deal with not more than four
1267 1268 # revs at a time (such as during commit --amend). When rebasing large
1268 1269 # stacks of commits, the number can go up, hence the config knob below.
1269 1270 cachesize = 4
1270 1271 optiontreemanifest = False
1271 1272 opts = getattr(opener, 'options', None)
1272 1273 if opts is not None:
1273 1274 cachesize = opts.get('manifestcachesize', cachesize)
1274 1275 optiontreemanifest = opts.get('treemanifest', False)
1275 1276
1276 1277 self._treeondisk = optiontreemanifest or treemanifest
1277 1278
1278 1279 self._fulltextcache = manifestfulltextcache(cachesize)
1279 1280
1280 1281 if tree:
1281 1282 assert self._treeondisk, 'opts is %r' % opts
1282 1283
1283 1284 if indexfile is None:
1284 1285 indexfile = '00manifest.i'
1285 1286 if tree:
1286 1287 indexfile = "meta/" + tree + indexfile
1287 1288
1288 1289 self.tree = tree
1289 1290
1290 1291 # The dirlogcache is kept on the root manifest log
1291 1292 if tree:
1292 1293 self._dirlogcache = dirlogcache
1293 1294 else:
1294 1295 self._dirlogcache = {'': self}
1295 1296
1296 1297 self._revlog = revlog.revlog(opener, indexfile,
1297 1298 # only root indexfile is cached
1298 1299 checkambig=not bool(tree),
1299 1300 mmaplargeindex=True)
1300 1301
1301 1302 self.index = self._revlog.index
1302 1303 self.version = self._revlog.version
1303 1304 self._generaldelta = self._revlog._generaldelta
1304 1305
1305 1306 def _setupmanifestcachehooks(self, repo):
1306 1307 """Persist the manifestfulltextcache on lock release"""
1307 1308 if not util.safehasattr(repo, '_lockref'):
1308 1309 return
1309 1310
1310 1311 self._fulltextcache._opener = repo.cachevfs
1311 1312 reporef = weakref.ref(repo)
1312 1313 manifestrevlogref = weakref.ref(self)
1313 1314
1314 1315 def persistmanifestcache():
1315 1316 repo = reporef()
1316 1317 self = manifestrevlogref()
1317 1318 if repo is None or self is None:
1318 1319 return
1319 1320 if repo.manifestlog._revlog is not self:
1320 1321 # there's a different manifest in play now, abort
1321 1322 return
1322 1323 self._fulltextcache.write()
1323 1324
1324 1325 if repo._currentlock(repo._lockref) is not None:
1325 1326 repo._afterlock(persistmanifestcache)
1326 1327
1327 1328 @property
1328 1329 def fulltextcache(self):
1329 1330 return self._fulltextcache
1330 1331
1331 1332 def clearcaches(self, clear_persisted_data=False):
1332 1333 self._revlog.clearcaches()
1333 1334 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1334 1335 self._dirlogcache = {self.tree: self}
1335 1336
1336 1337 def dirlog(self, d):
1337 1338 if d:
1338 1339 assert self._treeondisk
1339 1340 if d not in self._dirlogcache:
1340 1341 mfrevlog = manifestrevlog(self.opener, d,
1341 1342 self._dirlogcache,
1342 1343 treemanifest=self._treeondisk)
1343 1344 self._dirlogcache[d] = mfrevlog
1344 1345 return self._dirlogcache[d]
1345 1346
1346 1347 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1347 1348 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1348 1349 # If our first parent is in the manifest cache, we can
1349 1350 # compute a delta here using properties we know about the
1350 1351 # manifest up-front, which may save time later for the
1351 1352 # revlog layer.
1352 1353
1353 1354 _checkforbidden(added)
1354 1355 # combine the changed lists into one sorted iterator
1355 1356 work = heapq.merge([(x, False) for x in added],
1356 1357 [(x, True) for x in removed])
1357 1358
1358 1359 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1359 1360 cachedelta = self._revlog.rev(p1), deltatext
1360 1361 text = util.buffer(arraytext)
1361 1362 n = self._revlog.addrevision(text, transaction, link, p1, p2,
1362 1363 cachedelta)
1363 1364 else:
1364 1365 # The first parent manifest isn't already loaded, so we'll
1365 1366 # just encode a fulltext of the manifest and pass that
1366 1367 # through to the revlog layer, and let it handle the delta
1367 1368 # process.
1368 1369 if self._treeondisk:
1369 1370 assert readtree, "readtree must be set for treemanifest writes"
1370 1371 m1 = readtree(self.tree, p1)
1371 1372 m2 = readtree(self.tree, p2)
1372 1373 n = self._addtree(m, transaction, link, m1, m2, readtree)
1373 1374 arraytext = None
1374 1375 else:
1375 1376 text = m.text()
1376 1377 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1377 1378 arraytext = bytearray(text)
1378 1379
1379 1380 if arraytext is not None:
1380 1381 self.fulltextcache[n] = arraytext
1381 1382
1382 1383 return n
1383 1384
1384 1385 def _addtree(self, m, transaction, link, m1, m2, readtree):
1385 1386 # If the manifest is unchanged compared to one parent,
1386 1387 # don't write a new revision
1387 1388 if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
1388 1389 m2)):
1389 1390 return m.node()
1390 1391 def writesubtree(subm, subp1, subp2):
1391 1392 sublog = self.dirlog(subm.dir())
1392 1393 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1393 1394 readtree=readtree)
1394 1395 m.writesubtrees(m1, m2, writesubtree)
1395 1396 text = m.dirtext()
1396 1397 n = None
1397 1398 if self.tree != '':
1398 1399 # Double-check whether contents are unchanged to one parent
1399 1400 if text == m1.dirtext():
1400 1401 n = m1.node()
1401 1402 elif text == m2.dirtext():
1402 1403 n = m2.node()
1403 1404
1404 1405 if not n:
1405 1406 n = self._revlog.addrevision(text, transaction, link, m1.node(),
1406 1407 m2.node())
1407 1408
1408 1409 # Save nodeid so parent manifest can calculate its nodeid
1409 1410 m.setnode(n)
1410 1411 return n
1411 1412
1412 1413 def __len__(self):
1413 1414 return len(self._revlog)
1414 1415
1415 1416 def __iter__(self):
1416 1417 return self._revlog.__iter__()
1417 1418
1418 1419 def rev(self, node):
1419 1420 return self._revlog.rev(node)
1420 1421
1421 1422 def node(self, rev):
1422 1423 return self._revlog.node(rev)
1423 1424
1424 1425 def lookup(self, value):
1425 1426 return self._revlog.lookup(value)
1426 1427
1427 1428 def parentrevs(self, rev):
1428 1429 return self._revlog.parentrevs(rev)
1429 1430
1430 1431 def parents(self, node):
1431 1432 return self._revlog.parents(node)
1432 1433
1433 1434 def linkrev(self, rev):
1434 1435 return self._revlog.linkrev(rev)
1435 1436
1436 1437 def checksize(self):
1437 1438 return self._revlog.checksize()
1438 1439
1439 1440 def revision(self, node, _df=None, raw=False):
1440 1441 return self._revlog.revision(node, _df=_df, raw=raw)
1441 1442
1442 1443 def revdiff(self, rev1, rev2):
1443 1444 return self._revlog.revdiff(rev1, rev2)
1444 1445
1445 1446 def cmp(self, node, text):
1446 1447 return self._revlog.cmp(node, text)
1447 1448
1448 1449 def deltaparent(self, rev):
1449 1450 return self._revlog.deltaparent(rev)
1450 1451
1451 1452 def emitrevisiondeltas(self, requests):
1452 1453 return self._revlog.emitrevisiondeltas(requests)
1453 1454
1454 1455 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1455 1456 return self._revlog.addgroup(deltas, linkmapper, transaction,
1456 1457 addrevisioncb=addrevisioncb)
1457 1458
1458 1459 def getstrippoint(self, minlink):
1459 1460 return self._revlog.getstrippoint(minlink)
1460 1461
1461 1462 def strip(self, minlink, transaction):
1462 1463 return self._revlog.strip(minlink, transaction)
1463 1464
1464 1465 def files(self):
1465 1466 return self._revlog.files()
1466 1467
1467 1468 def clone(self, tr, destrevlog, **kwargs):
1468 1469 if not isinstance(destrevlog, manifestrevlog):
1469 1470 raise error.ProgrammingError('expected manifestrevlog to clone()')
1470 1471
1471 1472 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1472 1473
1473 1474 @property
1474 1475 def indexfile(self):
1475 1476 return self._revlog.indexfile
1476 1477
1477 1478 @indexfile.setter
1478 1479 def indexfile(self, value):
1479 1480 self._revlog.indexfile = value
1480 1481
1481 1482 @property
1482 1483 def opener(self):
1483 1484 return self._revlog.opener
1484 1485
1485 1486 @opener.setter
1486 1487 def opener(self, value):
1487 1488 self._revlog.opener = value
1488 1489
1489 1490 @interfaceutil.implementer(repository.imanifestlog)
1490 1491 class manifestlog(object):
1491 1492 """A collection class representing the collection of manifest snapshots
1492 1493 referenced by commits in the repository.
1493 1494
1494 1495 In this situation, 'manifest' refers to the abstract concept of a snapshot
1495 1496 of the list of files in the given commit. Consumers of the output of this
1496 1497 class do not care about the implementation details of the actual manifests
1497 1498 they receive (i.e. tree or flat or lazily loaded, etc)."""
1498 1499 def __init__(self, opener, repo):
1499 1500 usetreemanifest = False
1500 1501 cachesize = 4
1501 1502
1502 1503 opts = getattr(opener, 'options', None)
1503 1504 if opts is not None:
1504 1505 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1505 1506 cachesize = opts.get('manifestcachesize', cachesize)
1506 1507
1507 1508 self._treemanifests = usetreemanifest
1508 1509
1509 1510 self._revlog = repo._constructmanifest()
1510 1511 self._revlog._setupmanifestcachehooks(repo)
1511 1512 self._narrowmatch = repo.narrowmatch()
1512 1513
1513 1514 # A cache of the manifestctx or treemanifestctx for each directory
1514 1515 self._dirmancache = {}
1515 1516 self._dirmancache[''] = util.lrucachedict(cachesize)
1516 1517
1517 1518 self._cachesize = cachesize
1518 1519
1519 1520 def __getitem__(self, node):
1520 1521 """Retrieves the manifest instance for the given node. Throws a
1521 1522 LookupError if not found.
1522 1523 """
1523 1524 return self.get('', node)
1524 1525
1525 1526 def get(self, tree, node, verify=True):
1526 1527 """Retrieves the manifest instance for the given node. Throws a
1527 1528 LookupError if not found.
1528 1529
1529 1530 `verify` - if True an exception will be thrown if the node is not in
1530 1531 the revlog
1531 1532 """
1532 1533 if node in self._dirmancache.get(tree, ()):
1533 1534 return self._dirmancache[tree][node]
1534 1535
1535 1536 if not self._narrowmatch.always():
1536 1537 if not self._narrowmatch.visitdir(tree[:-1] or '.'):
1537 1538 return excludeddirmanifestctx(tree, node)
1538 1539 if tree:
1539 1540 if self._revlog._treeondisk:
1540 1541 if verify:
1541 1542 # Side-effect is LookupError is raised if node doesn't
1542 1543 # exist.
1543 1544 self.getstorage(tree).rev(node)
1544 1545
1545 1546 m = treemanifestctx(self, tree, node)
1546 1547 else:
1547 1548 raise error.Abort(
1548 1549 _("cannot ask for manifest directory '%s' in a flat "
1549 1550 "manifest") % tree)
1550 1551 else:
1551 1552 if verify:
1552 1553 # Side-effect is LookupError is raised if node doesn't exist.
1553 1554 self._revlog.rev(node)
1554 1555
1555 1556 if self._treemanifests:
1556 1557 m = treemanifestctx(self, '', node)
1557 1558 else:
1558 1559 m = manifestctx(self, node)
1559 1560
1560 if node != revlog.nullid:
1561 if node != nullid:
1561 1562 mancache = self._dirmancache.get(tree)
1562 1563 if not mancache:
1563 1564 mancache = util.lrucachedict(self._cachesize)
1564 1565 self._dirmancache[tree] = mancache
1565 1566 mancache[node] = m
1566 1567 return m
1567 1568
1568 1569 def getstorage(self, tree):
1569 1570 return self._revlog.dirlog(tree)
1570 1571
1571 1572 def clearcaches(self, clear_persisted_data=False):
1572 1573 self._dirmancache.clear()
1573 1574 self._revlog.clearcaches(clear_persisted_data=clear_persisted_data)
1574 1575
1575 1576 def rev(self, node):
1576 1577 return self._revlog.rev(node)
1577 1578
1578 1579 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1579 1580 class memmanifestctx(object):
1580 1581 def __init__(self, manifestlog):
1581 1582 self._manifestlog = manifestlog
1582 1583 self._manifestdict = manifestdict()
1583 1584
1584 1585 def _revlog(self):
1585 1586 return self._manifestlog._revlog
1586 1587
1587 1588 def new(self):
1588 1589 return memmanifestctx(self._manifestlog)
1589 1590
1590 1591 def copy(self):
1591 1592 memmf = memmanifestctx(self._manifestlog)
1592 1593 memmf._manifestdict = self.read().copy()
1593 1594 return memmf
1594 1595
1595 1596 def read(self):
1596 1597 return self._manifestdict
1597 1598
1598 1599 def write(self, transaction, link, p1, p2, added, removed):
1599 1600 return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
1600 1601 added, removed)
1601 1602
1602 1603 @interfaceutil.implementer(repository.imanifestrevisionstored)
1603 1604 class manifestctx(object):
1604 1605 """A class representing a single revision of a manifest, including its
1605 1606 contents, its parent revs, and its linkrev.
1606 1607 """
1607 1608 def __init__(self, manifestlog, node):
1608 1609 self._manifestlog = manifestlog
1609 1610 self._data = None
1610 1611
1611 1612 self._node = node
1612 1613
1613 1614 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1614 1615 # but let's add it later when something needs it and we can load it
1615 1616 # lazily.
1616 1617 #self.p1, self.p2 = revlog.parents(node)
1617 1618 #rev = revlog.rev(node)
1618 1619 #self.linkrev = revlog.linkrev(rev)
1619 1620
1620 1621 def _revlog(self):
1621 1622 return self._manifestlog._revlog
1622 1623
1623 1624 def node(self):
1624 1625 return self._node
1625 1626
1626 1627 def new(self):
1627 1628 return memmanifestctx(self._manifestlog)
1628 1629
1629 1630 def copy(self):
1630 1631 memmf = memmanifestctx(self._manifestlog)
1631 1632 memmf._manifestdict = self.read().copy()
1632 1633 return memmf
1633 1634
1634 1635 @propertycache
1635 1636 def parents(self):
1636 1637 return self._revlog().parents(self._node)
1637 1638
1638 1639 def read(self):
1639 1640 if self._data is None:
1640 if self._node == revlog.nullid:
1641 if self._node == nullid:
1641 1642 self._data = manifestdict()
1642 1643 else:
1643 1644 rl = self._revlog()
1644 1645 if self._node in rl._fulltextcache:
1645 1646 text = pycompat.bytestr(rl._fulltextcache[self._node])
1646 1647 else:
1647 1648 text = rl.revision(self._node)
1648 1649 arraytext = bytearray(text)
1649 1650 rl._fulltextcache[self._node] = arraytext
1650 1651 self._data = manifestdict(text)
1651 1652 return self._data
1652 1653
1653 1654 def readfast(self, shallow=False):
1654 1655 '''Calls either readdelta or read, based on which would be less work.
1655 1656 readdelta is called if the delta is against the p1, and therefore can be
1656 1657 read quickly.
1657 1658
1658 1659 If `shallow` is True, nothing changes since this is a flat manifest.
1659 1660 '''
1660 1661 rl = self._revlog()
1661 1662 r = rl.rev(self._node)
1662 1663 deltaparent = rl.deltaparent(r)
1663 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1664 if deltaparent != nullrev and deltaparent in rl.parentrevs(r):
1664 1665 return self.readdelta()
1665 1666 return self.read()
1666 1667
1667 1668 def readdelta(self, shallow=False):
1668 1669 '''Returns a manifest containing just the entries that are present
1669 1670 in this manifest, but not in its p1 manifest. This is efficient to read
1670 1671 if the revlog delta is already p1.
1671 1672
1672 1673 Changing the value of `shallow` has no effect on flat manifests.
1673 1674 '''
1674 1675 revlog = self._revlog()
1675 1676 r = revlog.rev(self._node)
1676 1677 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1677 1678 return manifestdict(d)
1678 1679
1679 1680 def find(self, key):
1680 1681 return self.read().find(key)
1681 1682
1682 1683 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1683 1684 class memtreemanifestctx(object):
1684 1685 def __init__(self, manifestlog, dir=''):
1685 1686 self._manifestlog = manifestlog
1686 1687 self._dir = dir
1687 1688 self._treemanifest = treemanifest()
1688 1689
1689 1690 def _revlog(self):
1690 1691 return self._manifestlog._revlog
1691 1692
1692 1693 def new(self, dir=''):
1693 1694 return memtreemanifestctx(self._manifestlog, dir=dir)
1694 1695
1695 1696 def copy(self):
1696 1697 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1697 1698 memmf._treemanifest = self._treemanifest.copy()
1698 1699 return memmf
1699 1700
1700 1701 def read(self):
1701 1702 return self._treemanifest
1702 1703
1703 1704 def write(self, transaction, link, p1, p2, added, removed):
1704 1705 def readtree(dir, node):
1705 1706 return self._manifestlog.get(dir, node).read()
1706 1707 return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
1707 1708 added, removed, readtree=readtree)
1708 1709
1709 1710 @interfaceutil.implementer(repository.imanifestrevisionstored)
1710 1711 class treemanifestctx(object):
1711 1712 def __init__(self, manifestlog, dir, node):
1712 1713 self._manifestlog = manifestlog
1713 1714 self._dir = dir
1714 1715 self._data = None
1715 1716
1716 1717 self._node = node
1717 1718
1718 1719 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1719 1720 # we can instantiate treemanifestctx objects for directories we don't
1720 1721 # have on disk.
1721 1722 #self.p1, self.p2 = revlog.parents(node)
1722 1723 #rev = revlog.rev(node)
1723 1724 #self.linkrev = revlog.linkrev(rev)
1724 1725
1725 1726 def _revlog(self):
1726 1727 narrowmatch = self._manifestlog._narrowmatch
1727 1728 if not narrowmatch.always():
1728 1729 if not narrowmatch.visitdir(self._dir[:-1] or '.'):
1729 1730 return excludedmanifestrevlog(self._dir)
1730 1731 return self._manifestlog.getstorage(self._dir)
1731 1732
1732 1733 def read(self):
1733 1734 if self._data is None:
1734 1735 rl = self._revlog()
1735 if self._node == revlog.nullid:
1736 if self._node == nullid:
1736 1737 self._data = treemanifest()
1737 1738 elif rl._treeondisk:
1738 1739 m = treemanifest(dir=self._dir)
1739 1740 def gettext():
1740 1741 return rl.revision(self._node)
1741 1742 def readsubtree(dir, subm):
1742 1743 # Set verify to False since we need to be able to create
1743 1744 # subtrees for trees that don't exist on disk.
1744 1745 return self._manifestlog.get(dir, subm, verify=False).read()
1745 1746 m.read(gettext, readsubtree)
1746 1747 m.setnode(self._node)
1747 1748 self._data = m
1748 1749 else:
1749 1750 if self._node in rl.fulltextcache:
1750 1751 text = pycompat.bytestr(rl.fulltextcache[self._node])
1751 1752 else:
1752 1753 text = rl.revision(self._node)
1753 1754 arraytext = bytearray(text)
1754 1755 rl.fulltextcache[self._node] = arraytext
1755 1756 self._data = treemanifest(dir=self._dir, text=text)
1756 1757
1757 1758 return self._data
1758 1759
1759 1760 def node(self):
1760 1761 return self._node
1761 1762
1762 1763 def new(self, dir=''):
1763 1764 return memtreemanifestctx(self._manifestlog, dir=dir)
1764 1765
1765 1766 def copy(self):
1766 1767 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1767 1768 memmf._treemanifest = self.read().copy()
1768 1769 return memmf
1769 1770
1770 1771 @propertycache
1771 1772 def parents(self):
1772 1773 return self._revlog().parents(self._node)
1773 1774
1774 1775 def readdelta(self, shallow=False):
1775 1776 '''Returns a manifest containing just the entries that are present
1776 1777 in this manifest, but not in its p1 manifest. This is efficient to read
1777 1778 if the revlog delta is already p1.
1778 1779
1779 1780 If `shallow` is True, this will read the delta for this directory,
1780 1781 without recursively reading subdirectory manifests. Instead, any
1781 1782 subdirectory entry will be reported as it appears in the manifest, i.e.
1782 1783 the subdirectory will be reported among files and distinguished only by
1783 1784 its 't' flag.
1784 1785 '''
1785 1786 revlog = self._revlog()
1786 1787 if shallow:
1787 1788 r = revlog.rev(self._node)
1788 1789 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1789 1790 return manifestdict(d)
1790 1791 else:
1791 1792 # Need to perform a slow delta
1792 1793 r0 = revlog.deltaparent(revlog.rev(self._node))
1793 1794 m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
1794 1795 m1 = self.read()
1795 1796 md = treemanifest(dir=self._dir)
1796 1797 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1797 1798 if n1:
1798 1799 md[f] = n1
1799 1800 if fl1:
1800 1801 md.setflag(f, fl1)
1801 1802 return md
1802 1803
1803 1804 def readfast(self, shallow=False):
1804 1805 '''Calls either readdelta or read, based on which would be less work.
1805 1806 readdelta is called if the delta is against the p1, and therefore can be
1806 1807 read quickly.
1807 1808
1808 1809 If `shallow` is True, it only returns the entries from this manifest,
1809 1810 and not any submanifests.
1810 1811 '''
1811 1812 rl = self._revlog()
1812 1813 r = rl.rev(self._node)
1813 1814 deltaparent = rl.deltaparent(r)
1814 if (deltaparent != revlog.nullrev and
1815 if (deltaparent != nullrev and
1815 1816 deltaparent in rl.parentrevs(r)):
1816 1817 return self.readdelta(shallow=shallow)
1817 1818
1818 1819 if shallow:
1819 1820 return manifestdict(rl.revision(self._node))
1820 1821 else:
1821 1822 return self.read()
1822 1823
1823 1824 def find(self, key):
1824 1825 return self.read().find(key)
1825 1826
1826 1827 class excludeddir(treemanifest):
1827 1828 """Stand-in for a directory that is excluded from the repository.
1828 1829
1829 1830 With narrowing active on a repository that uses treemanifests,
1830 1831 some of the directory revlogs will be excluded from the resulting
1831 1832 clone. This is a huge storage win for clients, but means we need
1832 1833 some sort of pseudo-manifest to surface to internals so we can
1833 1834 detect a merge conflict outside the narrowspec. That's what this
1834 1835 class is: it stands in for a directory whose node is known, but
1835 1836 whose contents are unknown.
1836 1837 """
1837 1838 def __init__(self, dir, node):
1838 1839 super(excludeddir, self).__init__(dir)
1839 1840 self._node = node
1840 1841 # Add an empty file, which will be included by iterators and such,
1841 1842 # appearing as the directory itself (i.e. something like "dir/")
1842 1843 self._files[''] = node
1843 1844 self._flags[''] = 't'
1844 1845
1845 1846 # Manifests outside the narrowspec should never be modified, so avoid
1846 1847 # copying. This makes a noticeable difference when there are very many
1847 1848 # directories outside the narrowspec. Also, it makes sense for the copy to
1848 1849 # be of the same type as the original, which would not happen with the
1849 1850 # super type's copy().
1850 1851 def copy(self):
1851 1852 return self
1852 1853
1853 1854 class excludeddirmanifestctx(treemanifestctx):
1854 1855 """context wrapper for excludeddir - see that docstring for rationale"""
1855 1856 def __init__(self, dir, node):
1856 1857 self._dir = dir
1857 1858 self._node = node
1858 1859
1859 1860 def read(self):
1860 1861 return excludeddir(self._dir, self._node)
1861 1862
1862 1863 def write(self, *args):
1863 1864 raise error.ProgrammingError(
1864 1865 'attempt to write manifest from excluded dir %s' % self._dir)
1865 1866
1866 1867 class excludedmanifestrevlog(manifestrevlog):
1867 1868 """Stand-in for excluded treemanifest revlogs.
1868 1869
1869 1870 When narrowing is active on a treemanifest repository, we'll have
1870 1871 references to directories we can't see due to the revlog being
1871 1872 skipped. This class exists to conform to the manifestrevlog
1872 1873 interface for those directories and proactively prevent writes to
1873 1874 outside the narrowspec.
1874 1875 """
1875 1876
1876 1877 def __init__(self, dir):
1877 1878 self._dir = dir
1878 1879
1879 1880 def __len__(self):
1880 1881 raise error.ProgrammingError(
1881 1882 'attempt to get length of excluded dir %s' % self._dir)
1882 1883
1883 1884 def rev(self, node):
1884 1885 raise error.ProgrammingError(
1885 1886 'attempt to get rev from excluded dir %s' % self._dir)
1886 1887
1887 1888 def linkrev(self, node):
1888 1889 raise error.ProgrammingError(
1889 1890 'attempt to get linkrev from excluded dir %s' % self._dir)
1890 1891
1891 1892 def node(self, rev):
1892 1893 raise error.ProgrammingError(
1893 1894 'attempt to get node from excluded dir %s' % self._dir)
1894 1895
1895 1896 def add(self, *args, **kwargs):
1896 1897 # We should never write entries in dirlogs outside the narrow clone.
1897 1898 # However, the method still gets called from writesubtree() in
1898 1899 # _addtree(), so we need to handle it. We should possibly make that
1899 1900 # avoid calling add() with a clean manifest (_dirty is always False
1900 1901 # in excludeddir instances).
1901 1902 pass
General Comments 0
You need to be logged in to leave comments. Login now