##// END OF EJS Templates
manifest: fix another pure-parsing 20-byte assumption...
Augie Fackler -
r45194:0415a566 default
parent child Browse files
Show More
@@ -1,2276 +1,2281 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 if len(n) > 40:
61 61 yield f, bin(n[:40]), n[40:]
62 62 else:
63 63 yield f, bin(n), b''
64 64
65 65
66 66 def _text(it):
67 67 files = []
68 68 lines = []
69 69 for f, n, fl in it:
70 70 files.append(f)
71 71 # if this is changed to support newlines in filenames,
72 72 # be sure to check the templates/ dir again (especially *-raw.tmpl)
73 73 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
74 74
75 75 _checkforbidden(files)
76 76 return b''.join(lines)
77 77
78 78
79 79 class lazymanifestiter(object):
80 80 def __init__(self, lm):
81 81 self.pos = 0
82 82 self.lm = lm
83 83
84 84 def __iter__(self):
85 85 return self
86 86
87 87 def next(self):
88 88 try:
89 89 data, pos = self.lm._get(self.pos)
90 90 except IndexError:
91 91 raise StopIteration
92 92 if pos == -1:
93 93 self.pos += 1
94 94 return data[0]
95 95 self.pos += 1
96 96 zeropos = data.find(b'\x00', pos)
97 97 return data[pos:zeropos]
98 98
99 99 __next__ = next
100 100
101 101
102 102 class lazymanifestiterentries(object):
103 103 def __init__(self, lm):
104 104 self.lm = lm
105 105 self.pos = 0
106 106
107 107 def __iter__(self):
108 108 return self
109 109
110 110 def next(self):
111 111 try:
112 112 data, pos = self.lm._get(self.pos)
113 113 except IndexError:
114 114 raise StopIteration
115 115 if pos == -1:
116 116 self.pos += 1
117 117 return data
118 118 zeropos = data.find(b'\x00', pos)
119 119 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
120 120 flags = self.lm._getflags(data, self.pos, zeropos)
121 121 self.pos += 1
122 122 return (data[pos:zeropos], hashval, flags)
123 123
124 124 __next__ = next
125 125
126 126
127 127 def unhexlify(data, extra, pos, length):
128 128 s = bin(data[pos : pos + length])
129 129 if extra:
130 130 s += chr(extra & 0xFF)
131 131 return s
132 132
133 133
134 134 def _cmp(a, b):
135 135 return (a > b) - (a < b)
136 136
137 137
138 138 class _lazymanifest(object):
139 139 """A pure python manifest backed by a byte string. It is supplimented with
140 140 internal lists as it is modified, until it is compacted back to a pure byte
141 141 string.
142 142
143 143 ``data`` is the initial manifest data.
144 144
145 145 ``positions`` is a list of offsets, one per manifest entry. Positive
146 146 values are offsets into ``data``, negative values are offsets into the
147 147 ``extradata`` list. When an entry is removed, its entry is dropped from
148 148 ``positions``. The values are encoded such that when walking the list and
149 149 indexing into ``data`` or ``extradata`` as appropriate, the entries are
150 150 sorted by filename.
151 151
152 152 ``extradata`` is a list of (key, hash, flags) for entries that were added or
153 153 modified since the manifest was created or compacted.
154 154 """
155 155
156 156 def __init__(
157 157 self,
158 158 data,
159 159 positions=None,
160 160 extrainfo=None,
161 161 extradata=None,
162 162 hasremovals=False,
163 163 ):
164 164 if positions is None:
165 165 self.positions = self.findlines(data)
166 166 self.extrainfo = [0] * len(self.positions)
167 167 self.data = data
168 168 self.extradata = []
169 169 self.hasremovals = False
170 170 else:
171 171 self.positions = positions[:]
172 172 self.extrainfo = extrainfo[:]
173 173 self.extradata = extradata[:]
174 174 self.data = data
175 175 self.hasremovals = hasremovals
176 176
177 177 def findlines(self, data):
178 178 if not data:
179 179 return []
180 180 pos = data.find(b"\n")
181 181 if pos == -1 or data[-1:] != b'\n':
182 182 raise ValueError(b"Manifest did not end in a newline.")
183 183 positions = [0]
184 184 prev = data[: data.find(b'\x00')]
185 185 while pos < len(data) - 1 and pos != -1:
186 186 positions.append(pos + 1)
187 187 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
188 188 if nexts < prev:
189 189 raise ValueError(b"Manifest lines not in sorted order.")
190 190 prev = nexts
191 191 pos = data.find(b"\n", pos + 1)
192 192 return positions
193 193
194 194 def _get(self, index):
195 195 # get the position encoded in pos:
196 196 # positive number is an index in 'data'
197 197 # negative number is in extrapieces
198 198 pos = self.positions[index]
199 199 if pos >= 0:
200 200 return self.data, pos
201 201 return self.extradata[-pos - 1], -1
202 202
203 203 def _getkey(self, pos):
204 204 if pos >= 0:
205 205 return self.data[pos : self.data.find(b'\x00', pos + 1)]
206 206 return self.extradata[-pos - 1][0]
207 207
208 208 def bsearch(self, key):
209 209 first = 0
210 210 last = len(self.positions) - 1
211 211
212 212 while first <= last:
213 213 midpoint = (first + last) // 2
214 214 nextpos = self.positions[midpoint]
215 215 candidate = self._getkey(nextpos)
216 216 r = _cmp(key, candidate)
217 217 if r == 0:
218 218 return midpoint
219 219 else:
220 220 if r < 0:
221 221 last = midpoint - 1
222 222 else:
223 223 first = midpoint + 1
224 224 return -1
225 225
226 226 def bsearch2(self, key):
227 227 # same as the above, but will always return the position
228 228 # done for performance reasons
229 229 first = 0
230 230 last = len(self.positions) - 1
231 231
232 232 while first <= last:
233 233 midpoint = (first + last) // 2
234 234 nextpos = self.positions[midpoint]
235 235 candidate = self._getkey(nextpos)
236 236 r = _cmp(key, candidate)
237 237 if r == 0:
238 238 return (midpoint, True)
239 239 else:
240 240 if r < 0:
241 241 last = midpoint - 1
242 242 else:
243 243 first = midpoint + 1
244 244 return (first, False)
245 245
246 246 def __contains__(self, key):
247 247 return self.bsearch(key) != -1
248 248
249 249 def _getflags(self, data, needle, pos):
250 250 start = pos + 41
251 251 end = data.find(b"\n", start)
252 252 if end == -1:
253 253 end = len(data) - 1
254 254 if start == end:
255 255 return b''
256 256 return self.data[start:end]
257 257
258 258 def __getitem__(self, key):
259 259 if not isinstance(key, bytes):
260 260 raise TypeError(b"getitem: manifest keys must be a bytes.")
261 261 needle = self.bsearch(key)
262 262 if needle == -1:
263 263 raise KeyError
264 264 data, pos = self._get(needle)
265 265 if pos == -1:
266 266 return (data[1], data[2])
267 267 zeropos = data.find(b'\x00', pos)
268 268 assert 0 <= needle <= len(self.positions)
269 269 assert len(self.extrainfo) == len(self.positions)
270 270 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
271 271 flags = self._getflags(data, needle, zeropos)
272 272 return (hashval, flags)
273 273
274 274 def __delitem__(self, key):
275 275 needle, found = self.bsearch2(key)
276 276 if not found:
277 277 raise KeyError
278 278 cur = self.positions[needle]
279 279 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
280 280 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
281 281 if cur >= 0:
282 282 # This does NOT unsort the list as far as the search functions are
283 283 # concerned, as they only examine lines mapped by self.positions.
284 284 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
285 285 self.hasremovals = True
286 286
287 287 def __setitem__(self, key, value):
288 288 if not isinstance(key, bytes):
289 289 raise TypeError(b"setitem: manifest keys must be a byte string.")
290 290 if not isinstance(value, tuple) or len(value) != 2:
291 291 raise TypeError(
292 292 b"Manifest values must be a tuple of (node, flags)."
293 293 )
294 294 hashval = value[0]
295 295 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
296 296 raise TypeError(b"node must be a 20-byte byte string")
297 297 flags = value[1]
298 298 if len(hashval) == 22:
299 299 hashval = hashval[:-1]
300 300 if not isinstance(flags, bytes) or len(flags) > 1:
301 301 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
302 302 needle, found = self.bsearch2(key)
303 303 if found:
304 304 # put the item
305 305 pos = self.positions[needle]
306 306 if pos < 0:
307 307 self.extradata[-pos - 1] = (key, hashval, value[1])
308 308 else:
309 309 # just don't bother
310 310 self.extradata.append((key, hashval, value[1]))
311 311 self.positions[needle] = -len(self.extradata)
312 312 else:
313 313 # not found, put it in with extra positions
314 314 self.extradata.append((key, hashval, value[1]))
315 315 self.positions = (
316 316 self.positions[:needle]
317 317 + [-len(self.extradata)]
318 318 + self.positions[needle:]
319 319 )
320 320 self.extrainfo = (
321 321 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
322 322 )
323 323
324 324 def copy(self):
325 325 # XXX call _compact like in C?
326 326 return _lazymanifest(
327 327 self.data,
328 328 self.positions,
329 329 self.extrainfo,
330 330 self.extradata,
331 331 self.hasremovals,
332 332 )
333 333
334 334 def _compact(self):
335 335 # hopefully not called TOO often
336 336 if len(self.extradata) == 0 and not self.hasremovals:
337 337 return
338 338 l = []
339 339 i = 0
340 340 offset = 0
341 341 self.extrainfo = [0] * len(self.positions)
342 342 while i < len(self.positions):
343 343 if self.positions[i] >= 0:
344 344 cur = self.positions[i]
345 345 last_cut = cur
346 346
347 347 # Collect all contiguous entries in the buffer at the current
348 348 # offset, breaking out only for added/modified items held in
349 349 # extradata, or a deleted line prior to the next position.
350 350 while True:
351 351 self.positions[i] = offset
352 352 i += 1
353 353 if i == len(self.positions) or self.positions[i] < 0:
354 354 break
355 355
356 356 # A removed file has no positions[] entry, but does have an
357 357 # overwritten first byte. Break out and find the end of the
358 358 # current good entry/entries if there is a removed file
359 359 # before the next position.
360 360 if (
361 361 self.hasremovals
362 362 and self.data.find(b'\n\x00', cur, self.positions[i])
363 363 != -1
364 364 ):
365 365 break
366 366
367 367 offset += self.positions[i] - cur
368 368 cur = self.positions[i]
369 369 end_cut = self.data.find(b'\n', cur)
370 370 if end_cut != -1:
371 371 end_cut += 1
372 372 offset += end_cut - cur
373 373 l.append(self.data[last_cut:end_cut])
374 374 else:
375 375 while i < len(self.positions) and self.positions[i] < 0:
376 376 cur = self.positions[i]
377 377 t = self.extradata[-cur - 1]
378 378 l.append(self._pack(t))
379 379 self.positions[i] = offset
380 if len(t[1]) > 20:
381 self.extrainfo[i] = ord(t[1][21])
380 # Hashes are either 20 bytes (old sha1s) or 32
381 # bytes (new non-sha1).
382 hlen = 20
383 if len(t[1]) > 25:
384 hlen = 32
385 if len(t[1]) > hlen:
386 self.extrainfo[i] = ord(t[1][hlen + 1])
382 387 offset += len(l[-1])
383 388 i += 1
384 389 self.data = b''.join(l)
385 390 self.hasremovals = False
386 391 self.extradata = []
387 392
388 393 def _pack(self, d):
389 394 n = d[1]
390 395 if len(n) == 21 or len(n) == 33:
391 396 n = n[:-1]
392 397 assert len(n) == 20 or len(n) == 32
393 398 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
394 399
395 400 def text(self):
396 401 self._compact()
397 402 return self.data
398 403
399 404 def diff(self, m2, clean=False):
400 405 '''Finds changes between the current manifest and m2.'''
401 406 # XXX think whether efficiency matters here
402 407 diff = {}
403 408
404 409 for fn, e1, flags in self.iterentries():
405 410 if fn not in m2:
406 411 diff[fn] = (e1, flags), (None, b'')
407 412 else:
408 413 e2 = m2[fn]
409 414 if (e1, flags) != e2:
410 415 diff[fn] = (e1, flags), e2
411 416 elif clean:
412 417 diff[fn] = None
413 418
414 419 for fn, e2, flags in m2.iterentries():
415 420 if fn not in self:
416 421 diff[fn] = (None, b''), (e2, flags)
417 422
418 423 return diff
419 424
420 425 def iterentries(self):
421 426 return lazymanifestiterentries(self)
422 427
423 428 def iterkeys(self):
424 429 return lazymanifestiter(self)
425 430
426 431 def __iter__(self):
427 432 return lazymanifestiter(self)
428 433
429 434 def __len__(self):
430 435 return len(self.positions)
431 436
432 437 def filtercopy(self, filterfn):
433 438 # XXX should be optimized
434 439 c = _lazymanifest(b'')
435 440 for f, n, fl in self.iterentries():
436 441 if filterfn(f):
437 442 c[f] = n, fl
438 443 return c
439 444
440 445
441 446 try:
442 447 _lazymanifest = parsers.lazymanifest
443 448 except AttributeError:
444 449 pass
445 450
446 451
447 452 @interfaceutil.implementer(repository.imanifestdict)
448 453 class manifestdict(object):
449 454 def __init__(self, data=b''):
450 455 self._lm = _lazymanifest(data)
451 456
452 457 def __getitem__(self, key):
453 458 return self._lm[key][0]
454 459
455 460 def find(self, key):
456 461 return self._lm[key]
457 462
458 463 def __len__(self):
459 464 return len(self._lm)
460 465
461 466 def __nonzero__(self):
462 467 # nonzero is covered by the __len__ function, but implementing it here
463 468 # makes it easier for extensions to override.
464 469 return len(self._lm) != 0
465 470
466 471 __bool__ = __nonzero__
467 472
468 473 def __setitem__(self, key, node):
469 474 self._lm[key] = node, self.flags(key)
470 475
471 476 def __contains__(self, key):
472 477 if key is None:
473 478 return False
474 479 return key in self._lm
475 480
476 481 def __delitem__(self, key):
477 482 del self._lm[key]
478 483
479 484 def __iter__(self):
480 485 return self._lm.__iter__()
481 486
482 487 def iterkeys(self):
483 488 return self._lm.iterkeys()
484 489
485 490 def keys(self):
486 491 return list(self.iterkeys())
487 492
488 493 def filesnotin(self, m2, match=None):
489 494 '''Set of files in this manifest that are not in the other'''
490 495 if match is not None:
491 496 match = matchmod.badmatch(match, lambda path, msg: None)
492 497 sm2 = set(m2.walk(match))
493 498 return {f for f in self.walk(match) if f not in sm2}
494 499 return {f for f in self if f not in m2}
495 500
496 501 @propertycache
497 502 def _dirs(self):
498 503 return pathutil.dirs(self)
499 504
500 505 def dirs(self):
501 506 return self._dirs
502 507
503 508 def hasdir(self, dir):
504 509 return dir in self._dirs
505 510
506 511 def _filesfastpath(self, match):
507 512 '''Checks whether we can correctly and quickly iterate over matcher
508 513 files instead of over manifest files.'''
509 514 files = match.files()
510 515 return len(files) < 100 and (
511 516 match.isexact()
512 517 or (match.prefix() and all(fn in self for fn in files))
513 518 )
514 519
515 520 def walk(self, match):
516 521 '''Generates matching file names.
517 522
518 523 Equivalent to manifest.matches(match).iterkeys(), but without creating
519 524 an entirely new manifest.
520 525
521 526 It also reports nonexistent files by marking them bad with match.bad().
522 527 '''
523 528 if match.always():
524 529 for f in iter(self):
525 530 yield f
526 531 return
527 532
528 533 fset = set(match.files())
529 534
530 535 # avoid the entire walk if we're only looking for specific files
531 536 if self._filesfastpath(match):
532 537 for fn in sorted(fset):
533 538 if fn in self:
534 539 yield fn
535 540 return
536 541
537 542 for fn in self:
538 543 if fn in fset:
539 544 # specified pattern is the exact name
540 545 fset.remove(fn)
541 546 if match(fn):
542 547 yield fn
543 548
544 549 # for dirstate.walk, files=[''] means "walk the whole tree".
545 550 # follow that here, too
546 551 fset.discard(b'')
547 552
548 553 for fn in sorted(fset):
549 554 if not self.hasdir(fn):
550 555 match.bad(fn, None)
551 556
552 557 def _matches(self, match):
553 558 '''generate a new manifest filtered by the match argument'''
554 559 if match.always():
555 560 return self.copy()
556 561
557 562 if self._filesfastpath(match):
558 563 m = manifestdict()
559 564 lm = self._lm
560 565 for fn in match.files():
561 566 if fn in lm:
562 567 m._lm[fn] = lm[fn]
563 568 return m
564 569
565 570 m = manifestdict()
566 571 m._lm = self._lm.filtercopy(match)
567 572 return m
568 573
569 574 def diff(self, m2, match=None, clean=False):
570 575 '''Finds changes between the current manifest and m2.
571 576
572 577 Args:
573 578 m2: the manifest to which this manifest should be compared.
574 579 clean: if true, include files unchanged between these manifests
575 580 with a None value in the returned dictionary.
576 581
577 582 The result is returned as a dict with filename as key and
578 583 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
579 584 nodeid in the current/other manifest and fl1/fl2 is the flag
580 585 in the current/other manifest. Where the file does not exist,
581 586 the nodeid will be None and the flags will be the empty
582 587 string.
583 588 '''
584 589 if match:
585 590 m1 = self._matches(match)
586 591 m2 = m2._matches(match)
587 592 return m1.diff(m2, clean=clean)
588 593 return self._lm.diff(m2._lm, clean)
589 594
590 595 def setflag(self, key, flag):
591 596 self._lm[key] = self[key], flag
592 597
593 598 def get(self, key, default=None):
594 599 try:
595 600 return self._lm[key][0]
596 601 except KeyError:
597 602 return default
598 603
599 604 def flags(self, key):
600 605 try:
601 606 return self._lm[key][1]
602 607 except KeyError:
603 608 return b''
604 609
605 610 def copy(self):
606 611 c = manifestdict()
607 612 c._lm = self._lm.copy()
608 613 return c
609 614
610 615 def items(self):
611 616 return (x[:2] for x in self._lm.iterentries())
612 617
613 618 def iteritems(self):
614 619 return (x[:2] for x in self._lm.iterentries())
615 620
616 621 def iterentries(self):
617 622 return self._lm.iterentries()
618 623
619 624 def text(self):
620 625 # most likely uses native version
621 626 return self._lm.text()
622 627
623 628 def fastdelta(self, base, changes):
624 629 """Given a base manifest text as a bytearray and a list of changes
625 630 relative to that text, compute a delta that can be used by revlog.
626 631 """
627 632 delta = []
628 633 dstart = None
629 634 dend = None
630 635 dline = [b""]
631 636 start = 0
632 637 # zero copy representation of base as a buffer
633 638 addbuf = util.buffer(base)
634 639
635 640 changes = list(changes)
636 641 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
637 642 # start with a readonly loop that finds the offset of
638 643 # each line and creates the deltas
639 644 for f, todelete in changes:
640 645 # bs will either be the index of the item or the insert point
641 646 start, end = _msearch(addbuf, f, start)
642 647 if not todelete:
643 648 h, fl = self._lm[f]
644 649 l = b"%s\0%s%s\n" % (f, hex(h), fl)
645 650 else:
646 651 if start == end:
647 652 # item we want to delete was not found, error out
648 653 raise AssertionError(
649 654 _(b"failed to remove %s from manifest") % f
650 655 )
651 656 l = b""
652 657 if dstart is not None and dstart <= start and dend >= start:
653 658 if dend < end:
654 659 dend = end
655 660 if l:
656 661 dline.append(l)
657 662 else:
658 663 if dstart is not None:
659 664 delta.append([dstart, dend, b"".join(dline)])
660 665 dstart = start
661 666 dend = end
662 667 dline = [l]
663 668
664 669 if dstart is not None:
665 670 delta.append([dstart, dend, b"".join(dline)])
666 671 # apply the delta to the base, and get a delta for addrevision
667 672 deltatext, arraytext = _addlistdelta(base, delta)
668 673 else:
669 674 # For large changes, it's much cheaper to just build the text and
670 675 # diff it.
671 676 arraytext = bytearray(self.text())
672 677 deltatext = mdiff.textdiff(
673 678 util.buffer(base), util.buffer(arraytext)
674 679 )
675 680
676 681 return arraytext, deltatext
677 682
678 683
679 684 def _msearch(m, s, lo=0, hi=None):
680 685 '''return a tuple (start, end) that says where to find s within m.
681 686
682 687 If the string is found m[start:end] are the line containing
683 688 that string. If start == end the string was not found and
684 689 they indicate the proper sorted insertion point.
685 690
686 691 m should be a buffer, a memoryview or a byte string.
687 692 s is a byte string'''
688 693
689 694 def advance(i, c):
690 695 while i < lenm and m[i : i + 1] != c:
691 696 i += 1
692 697 return i
693 698
694 699 if not s:
695 700 return (lo, lo)
696 701 lenm = len(m)
697 702 if not hi:
698 703 hi = lenm
699 704 while lo < hi:
700 705 mid = (lo + hi) // 2
701 706 start = mid
702 707 while start > 0 and m[start - 1 : start] != b'\n':
703 708 start -= 1
704 709 end = advance(start, b'\0')
705 710 if bytes(m[start:end]) < s:
706 711 # we know that after the null there are 40 bytes of sha1
707 712 # this translates to the bisect lo = mid + 1
708 713 lo = advance(end + 40, b'\n') + 1
709 714 else:
710 715 # this translates to the bisect hi = mid
711 716 hi = start
712 717 end = advance(lo, b'\0')
713 718 found = m[lo:end]
714 719 if s == found:
715 720 # we know that after the null there are 40 bytes of sha1
716 721 end = advance(end + 40, b'\n')
717 722 return (lo, end + 1)
718 723 else:
719 724 return (lo, lo)
720 725
721 726
722 727 def _checkforbidden(l):
723 728 """Check filenames for illegal characters."""
724 729 for f in l:
725 730 if b'\n' in f or b'\r' in f:
726 731 raise error.StorageError(
727 732 _(b"'\\n' and '\\r' disallowed in filenames: %r")
728 733 % pycompat.bytestr(f)
729 734 )
730 735
731 736
732 737 # apply the changes collected during the bisect loop to our addlist
733 738 # return a delta suitable for addrevision
734 739 def _addlistdelta(addlist, x):
735 740 # for large addlist arrays, building a new array is cheaper
736 741 # than repeatedly modifying the existing one
737 742 currentposition = 0
738 743 newaddlist = bytearray()
739 744
740 745 for start, end, content in x:
741 746 newaddlist += addlist[currentposition:start]
742 747 if content:
743 748 newaddlist += bytearray(content)
744 749
745 750 currentposition = end
746 751
747 752 newaddlist += addlist[currentposition:]
748 753
749 754 deltatext = b"".join(
750 755 struct.pack(b">lll", start, end, len(content)) + content
751 756 for start, end, content in x
752 757 )
753 758 return deltatext, newaddlist
754 759
755 760
756 761 def _splittopdir(f):
757 762 if b'/' in f:
758 763 dir, subpath = f.split(b'/', 1)
759 764 return dir + b'/', subpath
760 765 else:
761 766 return b'', f
762 767
763 768
764 769 _noop = lambda s: None
765 770
766 771
767 772 @interfaceutil.implementer(repository.imanifestdict)
768 773 class treemanifest(object):
769 774 def __init__(self, dir=b'', text=b''):
770 775 self._dir = dir
771 776 self._node = nullid
772 777 self._loadfunc = _noop
773 778 self._copyfunc = _noop
774 779 self._dirty = False
775 780 self._dirs = {}
776 781 self._lazydirs = {}
777 782 # Using _lazymanifest here is a little slower than plain old dicts
778 783 self._files = {}
779 784 self._flags = {}
780 785 if text:
781 786
782 787 def readsubtree(subdir, subm):
783 788 raise AssertionError(
784 789 b'treemanifest constructor only accepts flat manifests'
785 790 )
786 791
787 792 self.parse(text, readsubtree)
788 793 self._dirty = True # Mark flat manifest dirty after parsing
789 794
790 795 def _subpath(self, path):
791 796 return self._dir + path
792 797
793 798 def _loadalllazy(self):
794 799 selfdirs = self._dirs
795 800 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
796 801 self._lazydirs
797 802 ):
798 803 if docopy:
799 804 selfdirs[d] = readsubtree(path, node).copy()
800 805 else:
801 806 selfdirs[d] = readsubtree(path, node)
802 807 self._lazydirs = {}
803 808
804 809 def _loadlazy(self, d):
805 810 v = self._lazydirs.get(d)
806 811 if v:
807 812 path, node, readsubtree, docopy = v
808 813 if docopy:
809 814 self._dirs[d] = readsubtree(path, node).copy()
810 815 else:
811 816 self._dirs[d] = readsubtree(path, node)
812 817 del self._lazydirs[d]
813 818
814 819 def _loadchildrensetlazy(self, visit):
815 820 if not visit:
816 821 return None
817 822 if visit == b'all' or visit == b'this':
818 823 self._loadalllazy()
819 824 return None
820 825
821 826 loadlazy = self._loadlazy
822 827 for k in visit:
823 828 loadlazy(k + b'/')
824 829 return visit
825 830
826 831 def _loaddifflazy(self, t1, t2):
827 832 """load items in t1 and t2 if they're needed for diffing.
828 833
829 834 The criteria currently is:
830 835 - if it's not present in _lazydirs in either t1 or t2, load it in the
831 836 other (it may already be loaded or it may not exist, doesn't matter)
832 837 - if it's present in _lazydirs in both, compare the nodeid; if it
833 838 differs, load it in both
834 839 """
835 840 toloadlazy = []
836 841 for d, v1 in pycompat.iteritems(t1._lazydirs):
837 842 v2 = t2._lazydirs.get(d)
838 843 if not v2 or v2[1] != v1[1]:
839 844 toloadlazy.append(d)
840 845 for d, v1 in pycompat.iteritems(t2._lazydirs):
841 846 if d not in t1._lazydirs:
842 847 toloadlazy.append(d)
843 848
844 849 for d in toloadlazy:
845 850 t1._loadlazy(d)
846 851 t2._loadlazy(d)
847 852
848 853 def __len__(self):
849 854 self._load()
850 855 size = len(self._files)
851 856 self._loadalllazy()
852 857 for m in self._dirs.values():
853 858 size += m.__len__()
854 859 return size
855 860
856 861 def __nonzero__(self):
857 862 # Faster than "__len() != 0" since it avoids loading sub-manifests
858 863 return not self._isempty()
859 864
860 865 __bool__ = __nonzero__
861 866
862 867 def _isempty(self):
863 868 self._load() # for consistency; already loaded by all callers
864 869 # See if we can skip loading everything.
865 870 if self._files or (
866 871 self._dirs and any(not m._isempty() for m in self._dirs.values())
867 872 ):
868 873 return False
869 874 self._loadalllazy()
870 875 return not self._dirs or all(m._isempty() for m in self._dirs.values())
871 876
872 877 @encoding.strmethod
873 878 def __repr__(self):
874 879 return (
875 880 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
876 881 % (
877 882 self._dir,
878 883 hex(self._node),
879 884 bool(self._loadfunc is _noop),
880 885 self._dirty,
881 886 id(self),
882 887 )
883 888 )
884 889
885 890 def dir(self):
886 891 '''The directory that this tree manifest represents, including a
887 892 trailing '/'. Empty string for the repo root directory.'''
888 893 return self._dir
889 894
890 895 def node(self):
891 896 '''This node of this instance. nullid for unsaved instances. Should
892 897 be updated when the instance is read or written from a revlog.
893 898 '''
894 899 assert not self._dirty
895 900 return self._node
896 901
897 902 def setnode(self, node):
898 903 self._node = node
899 904 self._dirty = False
900 905
901 906 def iterentries(self):
902 907 self._load()
903 908 self._loadalllazy()
904 909 for p, n in sorted(
905 910 itertools.chain(self._dirs.items(), self._files.items())
906 911 ):
907 912 if p in self._files:
908 913 yield self._subpath(p), n, self._flags.get(p, b'')
909 914 else:
910 915 for x in n.iterentries():
911 916 yield x
912 917
913 918 def items(self):
914 919 self._load()
915 920 self._loadalllazy()
916 921 for p, n in sorted(
917 922 itertools.chain(self._dirs.items(), self._files.items())
918 923 ):
919 924 if p in self._files:
920 925 yield self._subpath(p), n
921 926 else:
922 927 for f, sn in pycompat.iteritems(n):
923 928 yield f, sn
924 929
925 930 iteritems = items
926 931
927 932 def iterkeys(self):
928 933 self._load()
929 934 self._loadalllazy()
930 935 for p in sorted(itertools.chain(self._dirs, self._files)):
931 936 if p in self._files:
932 937 yield self._subpath(p)
933 938 else:
934 939 for f in self._dirs[p]:
935 940 yield f
936 941
937 942 def keys(self):
938 943 return list(self.iterkeys())
939 944
940 945 def __iter__(self):
941 946 return self.iterkeys()
942 947
943 948 def __contains__(self, f):
944 949 if f is None:
945 950 return False
946 951 self._load()
947 952 dir, subpath = _splittopdir(f)
948 953 if dir:
949 954 self._loadlazy(dir)
950 955
951 956 if dir not in self._dirs:
952 957 return False
953 958
954 959 return self._dirs[dir].__contains__(subpath)
955 960 else:
956 961 return f in self._files
957 962
958 963 def get(self, f, default=None):
959 964 self._load()
960 965 dir, subpath = _splittopdir(f)
961 966 if dir:
962 967 self._loadlazy(dir)
963 968
964 969 if dir not in self._dirs:
965 970 return default
966 971 return self._dirs[dir].get(subpath, default)
967 972 else:
968 973 return self._files.get(f, default)
969 974
970 975 def __getitem__(self, f):
971 976 self._load()
972 977 dir, subpath = _splittopdir(f)
973 978 if dir:
974 979 self._loadlazy(dir)
975 980
976 981 return self._dirs[dir].__getitem__(subpath)
977 982 else:
978 983 return self._files[f]
979 984
980 985 def flags(self, f):
981 986 self._load()
982 987 dir, subpath = _splittopdir(f)
983 988 if dir:
984 989 self._loadlazy(dir)
985 990
986 991 if dir not in self._dirs:
987 992 return b''
988 993 return self._dirs[dir].flags(subpath)
989 994 else:
990 995 if f in self._lazydirs or f in self._dirs:
991 996 return b''
992 997 return self._flags.get(f, b'')
993 998
994 999 def find(self, f):
995 1000 self._load()
996 1001 dir, subpath = _splittopdir(f)
997 1002 if dir:
998 1003 self._loadlazy(dir)
999 1004
1000 1005 return self._dirs[dir].find(subpath)
1001 1006 else:
1002 1007 return self._files[f], self._flags.get(f, b'')
1003 1008
1004 1009 def __delitem__(self, f):
1005 1010 self._load()
1006 1011 dir, subpath = _splittopdir(f)
1007 1012 if dir:
1008 1013 self._loadlazy(dir)
1009 1014
1010 1015 self._dirs[dir].__delitem__(subpath)
1011 1016 # If the directory is now empty, remove it
1012 1017 if self._dirs[dir]._isempty():
1013 1018 del self._dirs[dir]
1014 1019 else:
1015 1020 del self._files[f]
1016 1021 if f in self._flags:
1017 1022 del self._flags[f]
1018 1023 self._dirty = True
1019 1024
1020 1025 def __setitem__(self, f, n):
1021 1026 assert n is not None
1022 1027 self._load()
1023 1028 dir, subpath = _splittopdir(f)
1024 1029 if dir:
1025 1030 self._loadlazy(dir)
1026 1031 if dir not in self._dirs:
1027 1032 self._dirs[dir] = treemanifest(self._subpath(dir))
1028 1033 self._dirs[dir].__setitem__(subpath, n)
1029 1034 else:
1030 1035 # manifest nodes are either 20 bytes or 32 bytes,
1031 1036 # depending on the hash in use. An extra byte is
1032 1037 # occasionally used by hg, but won't ever be
1033 1038 # persisted. Trim to 21 or 33 bytes as appropriate.
1034 1039 trim = 21 if len(n) < 25 else 33
1035 1040 self._files[f] = n[:trim] # to match manifestdict's behavior
1036 1041 self._dirty = True
1037 1042
1038 1043 def _load(self):
1039 1044 if self._loadfunc is not _noop:
1040 1045 lf, self._loadfunc = self._loadfunc, _noop
1041 1046 lf(self)
1042 1047 elif self._copyfunc is not _noop:
1043 1048 cf, self._copyfunc = self._copyfunc, _noop
1044 1049 cf(self)
1045 1050
1046 1051 def setflag(self, f, flags):
1047 1052 """Set the flags (symlink, executable) for path f."""
1048 1053 self._load()
1049 1054 dir, subpath = _splittopdir(f)
1050 1055 if dir:
1051 1056 self._loadlazy(dir)
1052 1057 if dir not in self._dirs:
1053 1058 self._dirs[dir] = treemanifest(self._subpath(dir))
1054 1059 self._dirs[dir].setflag(subpath, flags)
1055 1060 else:
1056 1061 self._flags[f] = flags
1057 1062 self._dirty = True
1058 1063
1059 1064 def copy(self):
1060 1065 copy = treemanifest(self._dir)
1061 1066 copy._node = self._node
1062 1067 copy._dirty = self._dirty
1063 1068 if self._copyfunc is _noop:
1064 1069
1065 1070 def _copyfunc(s):
1066 1071 self._load()
1067 1072 s._lazydirs = {
1068 1073 d: (p, n, r, True)
1069 1074 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1070 1075 }
1071 1076 sdirs = s._dirs
1072 1077 for d, v in pycompat.iteritems(self._dirs):
1073 1078 sdirs[d] = v.copy()
1074 1079 s._files = dict.copy(self._files)
1075 1080 s._flags = dict.copy(self._flags)
1076 1081
1077 1082 if self._loadfunc is _noop:
1078 1083 _copyfunc(copy)
1079 1084 else:
1080 1085 copy._copyfunc = _copyfunc
1081 1086 else:
1082 1087 copy._copyfunc = self._copyfunc
1083 1088 return copy
1084 1089
1085 1090 def filesnotin(self, m2, match=None):
1086 1091 '''Set of files in this manifest that are not in the other'''
1087 1092 if match and not match.always():
1088 1093 m1 = self._matches(match)
1089 1094 m2 = m2._matches(match)
1090 1095 return m1.filesnotin(m2)
1091 1096
1092 1097 files = set()
1093 1098
1094 1099 def _filesnotin(t1, t2):
1095 1100 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1096 1101 return
1097 1102 t1._load()
1098 1103 t2._load()
1099 1104 self._loaddifflazy(t1, t2)
1100 1105 for d, m1 in pycompat.iteritems(t1._dirs):
1101 1106 if d in t2._dirs:
1102 1107 m2 = t2._dirs[d]
1103 1108 _filesnotin(m1, m2)
1104 1109 else:
1105 1110 files.update(m1.iterkeys())
1106 1111
1107 1112 for fn in t1._files:
1108 1113 if fn not in t2._files:
1109 1114 files.add(t1._subpath(fn))
1110 1115
1111 1116 _filesnotin(self, m2)
1112 1117 return files
1113 1118
1114 1119 @propertycache
1115 1120 def _alldirs(self):
1116 1121 return pathutil.dirs(self)
1117 1122
1118 1123 def dirs(self):
1119 1124 return self._alldirs
1120 1125
1121 1126 def hasdir(self, dir):
1122 1127 self._load()
1123 1128 topdir, subdir = _splittopdir(dir)
1124 1129 if topdir:
1125 1130 self._loadlazy(topdir)
1126 1131 if topdir in self._dirs:
1127 1132 return self._dirs[topdir].hasdir(subdir)
1128 1133 return False
1129 1134 dirslash = dir + b'/'
1130 1135 return dirslash in self._dirs or dirslash in self._lazydirs
1131 1136
1132 1137 def walk(self, match):
1133 1138 '''Generates matching file names.
1134 1139
1135 1140 It also reports nonexistent files by marking them bad with match.bad().
1136 1141 '''
1137 1142 if match.always():
1138 1143 for f in iter(self):
1139 1144 yield f
1140 1145 return
1141 1146
1142 1147 fset = set(match.files())
1143 1148
1144 1149 for fn in self._walk(match):
1145 1150 if fn in fset:
1146 1151 # specified pattern is the exact name
1147 1152 fset.remove(fn)
1148 1153 yield fn
1149 1154
1150 1155 # for dirstate.walk, files=[''] means "walk the whole tree".
1151 1156 # follow that here, too
1152 1157 fset.discard(b'')
1153 1158
1154 1159 for fn in sorted(fset):
1155 1160 if not self.hasdir(fn):
1156 1161 match.bad(fn, None)
1157 1162
1158 1163 def _walk(self, match):
1159 1164 '''Recursively generates matching file names for walk().'''
1160 1165 visit = match.visitchildrenset(self._dir[:-1])
1161 1166 if not visit:
1162 1167 return
1163 1168
1164 1169 # yield this dir's files and walk its submanifests
1165 1170 self._load()
1166 1171 visit = self._loadchildrensetlazy(visit)
1167 1172 for p in sorted(list(self._dirs) + list(self._files)):
1168 1173 if p in self._files:
1169 1174 fullp = self._subpath(p)
1170 1175 if match(fullp):
1171 1176 yield fullp
1172 1177 else:
1173 1178 if not visit or p[:-1] in visit:
1174 1179 for f in self._dirs[p]._walk(match):
1175 1180 yield f
1176 1181
1177 1182 def _matches(self, match):
1178 1183 '''recursively generate a new manifest filtered by the match argument.
1179 1184 '''
1180 1185 if match.always():
1181 1186 return self.copy()
1182 1187 return self._matches_inner(match)
1183 1188
1184 1189 def _matches_inner(self, match):
1185 1190 if match.always():
1186 1191 return self.copy()
1187 1192
1188 1193 visit = match.visitchildrenset(self._dir[:-1])
1189 1194 if visit == b'all':
1190 1195 return self.copy()
1191 1196 ret = treemanifest(self._dir)
1192 1197 if not visit:
1193 1198 return ret
1194 1199
1195 1200 self._load()
1196 1201 for fn in self._files:
1197 1202 # While visitchildrenset *usually* lists only subdirs, this is
1198 1203 # actually up to the matcher and may have some files in the set().
1199 1204 # If visit == 'this', we should obviously look at the files in this
1200 1205 # directory; if visit is a set, and fn is in it, we should inspect
1201 1206 # fn (but no need to inspect things not in the set).
1202 1207 if visit != b'this' and fn not in visit:
1203 1208 continue
1204 1209 fullp = self._subpath(fn)
1205 1210 # visitchildrenset isn't perfect, we still need to call the regular
1206 1211 # matcher code to further filter results.
1207 1212 if not match(fullp):
1208 1213 continue
1209 1214 ret._files[fn] = self._files[fn]
1210 1215 if fn in self._flags:
1211 1216 ret._flags[fn] = self._flags[fn]
1212 1217
1213 1218 visit = self._loadchildrensetlazy(visit)
1214 1219 for dir, subm in pycompat.iteritems(self._dirs):
1215 1220 if visit and dir[:-1] not in visit:
1216 1221 continue
1217 1222 m = subm._matches_inner(match)
1218 1223 if not m._isempty():
1219 1224 ret._dirs[dir] = m
1220 1225
1221 1226 if not ret._isempty():
1222 1227 ret._dirty = True
1223 1228 return ret
1224 1229
1225 1230 def fastdelta(self, base, changes):
1226 1231 raise FastdeltaUnavailable()
1227 1232
1228 1233 def diff(self, m2, match=None, clean=False):
1229 1234 '''Finds changes between the current manifest and m2.
1230 1235
1231 1236 Args:
1232 1237 m2: the manifest to which this manifest should be compared.
1233 1238 clean: if true, include files unchanged between these manifests
1234 1239 with a None value in the returned dictionary.
1235 1240
1236 1241 The result is returned as a dict with filename as key and
1237 1242 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1238 1243 nodeid in the current/other manifest and fl1/fl2 is the flag
1239 1244 in the current/other manifest. Where the file does not exist,
1240 1245 the nodeid will be None and the flags will be the empty
1241 1246 string.
1242 1247 '''
1243 1248 if match and not match.always():
1244 1249 m1 = self._matches(match)
1245 1250 m2 = m2._matches(match)
1246 1251 return m1.diff(m2, clean=clean)
1247 1252 result = {}
1248 1253 emptytree = treemanifest()
1249 1254
1250 1255 def _iterativediff(t1, t2, stack):
1251 1256 """compares two tree manifests and append new tree-manifests which
1252 1257 needs to be compared to stack"""
1253 1258 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1254 1259 return
1255 1260 t1._load()
1256 1261 t2._load()
1257 1262 self._loaddifflazy(t1, t2)
1258 1263
1259 1264 for d, m1 in pycompat.iteritems(t1._dirs):
1260 1265 m2 = t2._dirs.get(d, emptytree)
1261 1266 stack.append((m1, m2))
1262 1267
1263 1268 for d, m2 in pycompat.iteritems(t2._dirs):
1264 1269 if d not in t1._dirs:
1265 1270 stack.append((emptytree, m2))
1266 1271
1267 1272 for fn, n1 in pycompat.iteritems(t1._files):
1268 1273 fl1 = t1._flags.get(fn, b'')
1269 1274 n2 = t2._files.get(fn, None)
1270 1275 fl2 = t2._flags.get(fn, b'')
1271 1276 if n1 != n2 or fl1 != fl2:
1272 1277 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1273 1278 elif clean:
1274 1279 result[t1._subpath(fn)] = None
1275 1280
1276 1281 for fn, n2 in pycompat.iteritems(t2._files):
1277 1282 if fn not in t1._files:
1278 1283 fl2 = t2._flags.get(fn, b'')
1279 1284 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1280 1285
1281 1286 stackls = []
1282 1287 _iterativediff(self, m2, stackls)
1283 1288 while stackls:
1284 1289 t1, t2 = stackls.pop()
1285 1290 # stackls is populated in the function call
1286 1291 _iterativediff(t1, t2, stackls)
1287 1292 return result
1288 1293
1289 1294 def unmodifiedsince(self, m2):
1290 1295 return not self._dirty and not m2._dirty and self._node == m2._node
1291 1296
1292 1297 def parse(self, text, readsubtree):
1293 1298 selflazy = self._lazydirs
1294 1299 subpath = self._subpath
1295 1300 for f, n, fl in _parse(text):
1296 1301 if fl == b't':
1297 1302 f = f + b'/'
1298 1303 # False below means "doesn't need to be copied" and can use the
1299 1304 # cached value from readsubtree directly.
1300 1305 selflazy[f] = (subpath(f), n, readsubtree, False)
1301 1306 elif b'/' in f:
1302 1307 # This is a flat manifest, so use __setitem__ and setflag rather
1303 1308 # than assigning directly to _files and _flags, so we can
1304 1309 # assign a path in a subdirectory, and to mark dirty (compared
1305 1310 # to nullid).
1306 1311 self[f] = n
1307 1312 if fl:
1308 1313 self.setflag(f, fl)
1309 1314 else:
1310 1315 # Assigning to _files and _flags avoids marking as dirty,
1311 1316 # and should be a little faster.
1312 1317 self._files[f] = n
1313 1318 if fl:
1314 1319 self._flags[f] = fl
1315 1320
1316 1321 def text(self):
1317 1322 """Get the full data of this manifest as a bytestring."""
1318 1323 self._load()
1319 1324 return _text(self.iterentries())
1320 1325
1321 1326 def dirtext(self):
1322 1327 """Get the full data of this directory as a bytestring. Make sure that
1323 1328 any submanifests have been written first, so their nodeids are correct.
1324 1329 """
1325 1330 self._load()
1326 1331 flags = self.flags
1327 1332 lazydirs = [
1328 1333 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1329 1334 ]
1330 1335 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1331 1336 files = [(f, self._files[f], flags(f)) for f in self._files]
1332 1337 return _text(sorted(dirs + files + lazydirs))
1333 1338
1334 1339 def read(self, gettext, readsubtree):
1335 1340 def _load_for_read(s):
1336 1341 s.parse(gettext(), readsubtree)
1337 1342 s._dirty = False
1338 1343
1339 1344 self._loadfunc = _load_for_read
1340 1345
1341 1346 def writesubtrees(self, m1, m2, writesubtree, match):
1342 1347 self._load() # for consistency; should never have any effect here
1343 1348 m1._load()
1344 1349 m2._load()
1345 1350 emptytree = treemanifest()
1346 1351
1347 1352 def getnode(m, d):
1348 1353 ld = m._lazydirs.get(d)
1349 1354 if ld:
1350 1355 return ld[1]
1351 1356 return m._dirs.get(d, emptytree)._node
1352 1357
1353 1358 # let's skip investigating things that `match` says we do not need.
1354 1359 visit = match.visitchildrenset(self._dir[:-1])
1355 1360 visit = self._loadchildrensetlazy(visit)
1356 1361 if visit == b'this' or visit == b'all':
1357 1362 visit = None
1358 1363 for d, subm in pycompat.iteritems(self._dirs):
1359 1364 if visit and d[:-1] not in visit:
1360 1365 continue
1361 1366 subp1 = getnode(m1, d)
1362 1367 subp2 = getnode(m2, d)
1363 1368 if subp1 == nullid:
1364 1369 subp1, subp2 = subp2, subp1
1365 1370 writesubtree(subm, subp1, subp2, match)
1366 1371
1367 1372 def walksubtrees(self, matcher=None):
1368 1373 """Returns an iterator of the subtrees of this manifest, including this
1369 1374 manifest itself.
1370 1375
1371 1376 If `matcher` is provided, it only returns subtrees that match.
1372 1377 """
1373 1378 if matcher and not matcher.visitdir(self._dir[:-1]):
1374 1379 return
1375 1380 if not matcher or matcher(self._dir[:-1]):
1376 1381 yield self
1377 1382
1378 1383 self._load()
1379 1384 # OPT: use visitchildrenset to avoid loading everything.
1380 1385 self._loadalllazy()
1381 1386 for d, subm in pycompat.iteritems(self._dirs):
1382 1387 for subtree in subm.walksubtrees(matcher=matcher):
1383 1388 yield subtree
1384 1389
1385 1390
1386 1391 class manifestfulltextcache(util.lrucachedict):
1387 1392 """File-backed LRU cache for the manifest cache
1388 1393
1389 1394 File consists of entries, up to EOF:
1390 1395
1391 1396 - 20 bytes node, 4 bytes length, <length> manifest data
1392 1397
1393 1398 These are written in reverse cache order (oldest to newest).
1394 1399
1395 1400 """
1396 1401
1397 1402 _file = b'manifestfulltextcache'
1398 1403
1399 1404 def __init__(self, max):
1400 1405 super(manifestfulltextcache, self).__init__(max)
1401 1406 self._dirty = False
1402 1407 self._read = False
1403 1408 self._opener = None
1404 1409
1405 1410 def read(self):
1406 1411 if self._read or self._opener is None:
1407 1412 return
1408 1413
1409 1414 try:
1410 1415 with self._opener(self._file) as fp:
1411 1416 set = super(manifestfulltextcache, self).__setitem__
1412 1417 # ignore trailing data, this is a cache, corruption is skipped
1413 1418 while True:
1414 1419 node = fp.read(20)
1415 1420 if len(node) < 20:
1416 1421 break
1417 1422 try:
1418 1423 size = struct.unpack(b'>L', fp.read(4))[0]
1419 1424 except struct.error:
1420 1425 break
1421 1426 value = bytearray(fp.read(size))
1422 1427 if len(value) != size:
1423 1428 break
1424 1429 set(node, value)
1425 1430 except IOError:
1426 1431 # the file is allowed to be missing
1427 1432 pass
1428 1433
1429 1434 self._read = True
1430 1435 self._dirty = False
1431 1436
1432 1437 def write(self):
1433 1438 if not self._dirty or self._opener is None:
1434 1439 return
1435 1440 # rotate backwards to the first used node
1436 1441 with self._opener(
1437 1442 self._file, b'w', atomictemp=True, checkambig=True
1438 1443 ) as fp:
1439 1444 node = self._head.prev
1440 1445 while True:
1441 1446 if node.key in self._cache:
1442 1447 fp.write(node.key)
1443 1448 fp.write(struct.pack(b'>L', len(node.value)))
1444 1449 fp.write(node.value)
1445 1450 if node is self._head:
1446 1451 break
1447 1452 node = node.prev
1448 1453
1449 1454 def __len__(self):
1450 1455 if not self._read:
1451 1456 self.read()
1452 1457 return super(manifestfulltextcache, self).__len__()
1453 1458
1454 1459 def __contains__(self, k):
1455 1460 if not self._read:
1456 1461 self.read()
1457 1462 return super(manifestfulltextcache, self).__contains__(k)
1458 1463
1459 1464 def __iter__(self):
1460 1465 if not self._read:
1461 1466 self.read()
1462 1467 return super(manifestfulltextcache, self).__iter__()
1463 1468
1464 1469 def __getitem__(self, k):
1465 1470 if not self._read:
1466 1471 self.read()
1467 1472 # the cache lru order can change on read
1468 1473 setdirty = self._cache.get(k) is not self._head
1469 1474 value = super(manifestfulltextcache, self).__getitem__(k)
1470 1475 if setdirty:
1471 1476 self._dirty = True
1472 1477 return value
1473 1478
1474 1479 def __setitem__(self, k, v):
1475 1480 if not self._read:
1476 1481 self.read()
1477 1482 super(manifestfulltextcache, self).__setitem__(k, v)
1478 1483 self._dirty = True
1479 1484
1480 1485 def __delitem__(self, k):
1481 1486 if not self._read:
1482 1487 self.read()
1483 1488 super(manifestfulltextcache, self).__delitem__(k)
1484 1489 self._dirty = True
1485 1490
1486 1491 def get(self, k, default=None):
1487 1492 if not self._read:
1488 1493 self.read()
1489 1494 return super(manifestfulltextcache, self).get(k, default=default)
1490 1495
1491 1496 def clear(self, clear_persisted_data=False):
1492 1497 super(manifestfulltextcache, self).clear()
1493 1498 if clear_persisted_data:
1494 1499 self._dirty = True
1495 1500 self.write()
1496 1501 self._read = False
1497 1502
1498 1503
1499 1504 # and upper bound of what we expect from compression
1500 1505 # (real live value seems to be "3")
1501 1506 MAXCOMPRESSION = 3
1502 1507
1503 1508
1504 1509 class FastdeltaUnavailable(Exception):
1505 1510 """Exception raised when fastdelta isn't usable on a manifest."""
1506 1511
1507 1512
1508 1513 @interfaceutil.implementer(repository.imanifeststorage)
1509 1514 class manifestrevlog(object):
1510 1515 '''A revlog that stores manifest texts. This is responsible for caching the
1511 1516 full-text manifest contents.
1512 1517 '''
1513 1518
1514 1519 def __init__(
1515 1520 self,
1516 1521 opener,
1517 1522 tree=b'',
1518 1523 dirlogcache=None,
1519 1524 indexfile=None,
1520 1525 treemanifest=False,
1521 1526 ):
1522 1527 """Constructs a new manifest revlog
1523 1528
1524 1529 `indexfile` - used by extensions to have two manifests at once, like
1525 1530 when transitioning between flatmanifeset and treemanifests.
1526 1531
1527 1532 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1528 1533 options can also be used to make this a tree manifest revlog. The opener
1529 1534 option takes precedence, so if it is set to True, we ignore whatever
1530 1535 value is passed in to the constructor.
1531 1536 """
1532 1537 # During normal operations, we expect to deal with not more than four
1533 1538 # revs at a time (such as during commit --amend). When rebasing large
1534 1539 # stacks of commits, the number can go up, hence the config knob below.
1535 1540 cachesize = 4
1536 1541 optiontreemanifest = False
1537 1542 opts = getattr(opener, 'options', None)
1538 1543 if opts is not None:
1539 1544 cachesize = opts.get(b'manifestcachesize', cachesize)
1540 1545 optiontreemanifest = opts.get(b'treemanifest', False)
1541 1546
1542 1547 self._treeondisk = optiontreemanifest or treemanifest
1543 1548
1544 1549 self._fulltextcache = manifestfulltextcache(cachesize)
1545 1550
1546 1551 if tree:
1547 1552 assert self._treeondisk, b'opts is %r' % opts
1548 1553
1549 1554 if indexfile is None:
1550 1555 indexfile = b'00manifest.i'
1551 1556 if tree:
1552 1557 indexfile = b"meta/" + tree + indexfile
1553 1558
1554 1559 self.tree = tree
1555 1560
1556 1561 # The dirlogcache is kept on the root manifest log
1557 1562 if tree:
1558 1563 self._dirlogcache = dirlogcache
1559 1564 else:
1560 1565 self._dirlogcache = {b'': self}
1561 1566
1562 1567 self._revlog = revlog.revlog(
1563 1568 opener,
1564 1569 indexfile,
1565 1570 # only root indexfile is cached
1566 1571 checkambig=not bool(tree),
1567 1572 mmaplargeindex=True,
1568 1573 upperboundcomp=MAXCOMPRESSION,
1569 1574 )
1570 1575
1571 1576 self.index = self._revlog.index
1572 1577 self.version = self._revlog.version
1573 1578 self._generaldelta = self._revlog._generaldelta
1574 1579
1575 1580 def _setupmanifestcachehooks(self, repo):
1576 1581 """Persist the manifestfulltextcache on lock release"""
1577 1582 if not util.safehasattr(repo, b'_wlockref'):
1578 1583 return
1579 1584
1580 1585 self._fulltextcache._opener = repo.wcachevfs
1581 1586 if repo._currentlock(repo._wlockref) is None:
1582 1587 return
1583 1588
1584 1589 reporef = weakref.ref(repo)
1585 1590 manifestrevlogref = weakref.ref(self)
1586 1591
1587 1592 def persistmanifestcache(success):
1588 1593 # Repo is in an unknown state, do not persist.
1589 1594 if not success:
1590 1595 return
1591 1596
1592 1597 repo = reporef()
1593 1598 self = manifestrevlogref()
1594 1599 if repo is None or self is None:
1595 1600 return
1596 1601 if repo.manifestlog.getstorage(b'') is not self:
1597 1602 # there's a different manifest in play now, abort
1598 1603 return
1599 1604 self._fulltextcache.write()
1600 1605
1601 1606 repo._afterlock(persistmanifestcache)
1602 1607
1603 1608 @property
1604 1609 def fulltextcache(self):
1605 1610 return self._fulltextcache
1606 1611
1607 1612 def clearcaches(self, clear_persisted_data=False):
1608 1613 self._revlog.clearcaches()
1609 1614 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1610 1615 self._dirlogcache = {self.tree: self}
1611 1616
1612 1617 def dirlog(self, d):
1613 1618 if d:
1614 1619 assert self._treeondisk
1615 1620 if d not in self._dirlogcache:
1616 1621 mfrevlog = manifestrevlog(
1617 1622 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1618 1623 )
1619 1624 self._dirlogcache[d] = mfrevlog
1620 1625 return self._dirlogcache[d]
1621 1626
1622 1627 def add(
1623 1628 self,
1624 1629 m,
1625 1630 transaction,
1626 1631 link,
1627 1632 p1,
1628 1633 p2,
1629 1634 added,
1630 1635 removed,
1631 1636 readtree=None,
1632 1637 match=None,
1633 1638 ):
1634 1639 try:
1635 1640 if p1 not in self.fulltextcache:
1636 1641 raise FastdeltaUnavailable()
1637 1642 # If our first parent is in the manifest cache, we can
1638 1643 # compute a delta here using properties we know about the
1639 1644 # manifest up-front, which may save time later for the
1640 1645 # revlog layer.
1641 1646
1642 1647 _checkforbidden(added)
1643 1648 # combine the changed lists into one sorted iterator
1644 1649 work = heapq.merge(
1645 1650 [(x, False) for x in sorted(added)],
1646 1651 [(x, True) for x in sorted(removed)],
1647 1652 )
1648 1653
1649 1654 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1650 1655 cachedelta = self._revlog.rev(p1), deltatext
1651 1656 text = util.buffer(arraytext)
1652 1657 n = self._revlog.addrevision(
1653 1658 text, transaction, link, p1, p2, cachedelta
1654 1659 )
1655 1660 except FastdeltaUnavailable:
1656 1661 # The first parent manifest isn't already loaded or the
1657 1662 # manifest implementation doesn't support fastdelta, so
1658 1663 # we'll just encode a fulltext of the manifest and pass
1659 1664 # that through to the revlog layer, and let it handle the
1660 1665 # delta process.
1661 1666 if self._treeondisk:
1662 1667 assert readtree, b"readtree must be set for treemanifest writes"
1663 1668 assert match, b"match must be specified for treemanifest writes"
1664 1669 m1 = readtree(self.tree, p1)
1665 1670 m2 = readtree(self.tree, p2)
1666 1671 n = self._addtree(
1667 1672 m, transaction, link, m1, m2, readtree, match=match
1668 1673 )
1669 1674 arraytext = None
1670 1675 else:
1671 1676 text = m.text()
1672 1677 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1673 1678 arraytext = bytearray(text)
1674 1679
1675 1680 if arraytext is not None:
1676 1681 self.fulltextcache[n] = arraytext
1677 1682
1678 1683 return n
1679 1684
1680 1685 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1681 1686 # If the manifest is unchanged compared to one parent,
1682 1687 # don't write a new revision
1683 1688 if self.tree != b'' and (
1684 1689 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1685 1690 ):
1686 1691 return m.node()
1687 1692
1688 1693 def writesubtree(subm, subp1, subp2, match):
1689 1694 sublog = self.dirlog(subm.dir())
1690 1695 sublog.add(
1691 1696 subm,
1692 1697 transaction,
1693 1698 link,
1694 1699 subp1,
1695 1700 subp2,
1696 1701 None,
1697 1702 None,
1698 1703 readtree=readtree,
1699 1704 match=match,
1700 1705 )
1701 1706
1702 1707 m.writesubtrees(m1, m2, writesubtree, match)
1703 1708 text = m.dirtext()
1704 1709 n = None
1705 1710 if self.tree != b'':
1706 1711 # Double-check whether contents are unchanged to one parent
1707 1712 if text == m1.dirtext():
1708 1713 n = m1.node()
1709 1714 elif text == m2.dirtext():
1710 1715 n = m2.node()
1711 1716
1712 1717 if not n:
1713 1718 n = self._revlog.addrevision(
1714 1719 text, transaction, link, m1.node(), m2.node()
1715 1720 )
1716 1721
1717 1722 # Save nodeid so parent manifest can calculate its nodeid
1718 1723 m.setnode(n)
1719 1724 return n
1720 1725
1721 1726 def __len__(self):
1722 1727 return len(self._revlog)
1723 1728
1724 1729 def __iter__(self):
1725 1730 return self._revlog.__iter__()
1726 1731
1727 1732 def rev(self, node):
1728 1733 return self._revlog.rev(node)
1729 1734
1730 1735 def node(self, rev):
1731 1736 return self._revlog.node(rev)
1732 1737
1733 1738 def lookup(self, value):
1734 1739 return self._revlog.lookup(value)
1735 1740
1736 1741 def parentrevs(self, rev):
1737 1742 return self._revlog.parentrevs(rev)
1738 1743
1739 1744 def parents(self, node):
1740 1745 return self._revlog.parents(node)
1741 1746
1742 1747 def linkrev(self, rev):
1743 1748 return self._revlog.linkrev(rev)
1744 1749
1745 1750 def checksize(self):
1746 1751 return self._revlog.checksize()
1747 1752
1748 1753 def revision(self, node, _df=None, raw=False):
1749 1754 return self._revlog.revision(node, _df=_df, raw=raw)
1750 1755
1751 1756 def rawdata(self, node, _df=None):
1752 1757 return self._revlog.rawdata(node, _df=_df)
1753 1758
1754 1759 def revdiff(self, rev1, rev2):
1755 1760 return self._revlog.revdiff(rev1, rev2)
1756 1761
1757 1762 def cmp(self, node, text):
1758 1763 return self._revlog.cmp(node, text)
1759 1764
1760 1765 def deltaparent(self, rev):
1761 1766 return self._revlog.deltaparent(rev)
1762 1767
1763 1768 def emitrevisions(
1764 1769 self,
1765 1770 nodes,
1766 1771 nodesorder=None,
1767 1772 revisiondata=False,
1768 1773 assumehaveparentrevisions=False,
1769 1774 deltamode=repository.CG_DELTAMODE_STD,
1770 1775 ):
1771 1776 return self._revlog.emitrevisions(
1772 1777 nodes,
1773 1778 nodesorder=nodesorder,
1774 1779 revisiondata=revisiondata,
1775 1780 assumehaveparentrevisions=assumehaveparentrevisions,
1776 1781 deltamode=deltamode,
1777 1782 )
1778 1783
1779 1784 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1780 1785 return self._revlog.addgroup(
1781 1786 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1782 1787 )
1783 1788
1784 1789 def rawsize(self, rev):
1785 1790 return self._revlog.rawsize(rev)
1786 1791
1787 1792 def getstrippoint(self, minlink):
1788 1793 return self._revlog.getstrippoint(minlink)
1789 1794
1790 1795 def strip(self, minlink, transaction):
1791 1796 return self._revlog.strip(minlink, transaction)
1792 1797
1793 1798 def files(self):
1794 1799 return self._revlog.files()
1795 1800
1796 1801 def clone(self, tr, destrevlog, **kwargs):
1797 1802 if not isinstance(destrevlog, manifestrevlog):
1798 1803 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1799 1804
1800 1805 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1801 1806
1802 1807 def storageinfo(
1803 1808 self,
1804 1809 exclusivefiles=False,
1805 1810 sharedfiles=False,
1806 1811 revisionscount=False,
1807 1812 trackedsize=False,
1808 1813 storedsize=False,
1809 1814 ):
1810 1815 return self._revlog.storageinfo(
1811 1816 exclusivefiles=exclusivefiles,
1812 1817 sharedfiles=sharedfiles,
1813 1818 revisionscount=revisionscount,
1814 1819 trackedsize=trackedsize,
1815 1820 storedsize=storedsize,
1816 1821 )
1817 1822
1818 1823 @property
1819 1824 def indexfile(self):
1820 1825 return self._revlog.indexfile
1821 1826
1822 1827 @indexfile.setter
1823 1828 def indexfile(self, value):
1824 1829 self._revlog.indexfile = value
1825 1830
1826 1831 @property
1827 1832 def opener(self):
1828 1833 return self._revlog.opener
1829 1834
1830 1835 @opener.setter
1831 1836 def opener(self, value):
1832 1837 self._revlog.opener = value
1833 1838
1834 1839
1835 1840 @interfaceutil.implementer(repository.imanifestlog)
1836 1841 class manifestlog(object):
1837 1842 """A collection class representing the collection of manifest snapshots
1838 1843 referenced by commits in the repository.
1839 1844
1840 1845 In this situation, 'manifest' refers to the abstract concept of a snapshot
1841 1846 of the list of files in the given commit. Consumers of the output of this
1842 1847 class do not care about the implementation details of the actual manifests
1843 1848 they receive (i.e. tree or flat or lazily loaded, etc)."""
1844 1849
1845 1850 def __init__(self, opener, repo, rootstore, narrowmatch):
1846 1851 usetreemanifest = False
1847 1852 cachesize = 4
1848 1853
1849 1854 opts = getattr(opener, 'options', None)
1850 1855 if opts is not None:
1851 1856 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1852 1857 cachesize = opts.get(b'manifestcachesize', cachesize)
1853 1858
1854 1859 self._treemanifests = usetreemanifest
1855 1860
1856 1861 self._rootstore = rootstore
1857 1862 self._rootstore._setupmanifestcachehooks(repo)
1858 1863 self._narrowmatch = narrowmatch
1859 1864
1860 1865 # A cache of the manifestctx or treemanifestctx for each directory
1861 1866 self._dirmancache = {}
1862 1867 self._dirmancache[b''] = util.lrucachedict(cachesize)
1863 1868
1864 1869 self._cachesize = cachesize
1865 1870
1866 1871 def __getitem__(self, node):
1867 1872 """Retrieves the manifest instance for the given node. Throws a
1868 1873 LookupError if not found.
1869 1874 """
1870 1875 return self.get(b'', node)
1871 1876
1872 1877 def get(self, tree, node, verify=True):
1873 1878 """Retrieves the manifest instance for the given node. Throws a
1874 1879 LookupError if not found.
1875 1880
1876 1881 `verify` - if True an exception will be thrown if the node is not in
1877 1882 the revlog
1878 1883 """
1879 1884 if node in self._dirmancache.get(tree, ()):
1880 1885 return self._dirmancache[tree][node]
1881 1886
1882 1887 if not self._narrowmatch.always():
1883 1888 if not self._narrowmatch.visitdir(tree[:-1]):
1884 1889 return excludeddirmanifestctx(tree, node)
1885 1890 if tree:
1886 1891 if self._rootstore._treeondisk:
1887 1892 if verify:
1888 1893 # Side-effect is LookupError is raised if node doesn't
1889 1894 # exist.
1890 1895 self.getstorage(tree).rev(node)
1891 1896
1892 1897 m = treemanifestctx(self, tree, node)
1893 1898 else:
1894 1899 raise error.Abort(
1895 1900 _(
1896 1901 b"cannot ask for manifest directory '%s' in a flat "
1897 1902 b"manifest"
1898 1903 )
1899 1904 % tree
1900 1905 )
1901 1906 else:
1902 1907 if verify:
1903 1908 # Side-effect is LookupError is raised if node doesn't exist.
1904 1909 self._rootstore.rev(node)
1905 1910
1906 1911 if self._treemanifests:
1907 1912 m = treemanifestctx(self, b'', node)
1908 1913 else:
1909 1914 m = manifestctx(self, node)
1910 1915
1911 1916 if node != nullid:
1912 1917 mancache = self._dirmancache.get(tree)
1913 1918 if not mancache:
1914 1919 mancache = util.lrucachedict(self._cachesize)
1915 1920 self._dirmancache[tree] = mancache
1916 1921 mancache[node] = m
1917 1922 return m
1918 1923
1919 1924 def getstorage(self, tree):
1920 1925 return self._rootstore.dirlog(tree)
1921 1926
1922 1927 def clearcaches(self, clear_persisted_data=False):
1923 1928 self._dirmancache.clear()
1924 1929 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1925 1930
1926 1931 def rev(self, node):
1927 1932 return self._rootstore.rev(node)
1928 1933
1929 1934
1930 1935 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1931 1936 class memmanifestctx(object):
1932 1937 def __init__(self, manifestlog):
1933 1938 self._manifestlog = manifestlog
1934 1939 self._manifestdict = manifestdict()
1935 1940
1936 1941 def _storage(self):
1937 1942 return self._manifestlog.getstorage(b'')
1938 1943
1939 1944 def copy(self):
1940 1945 memmf = memmanifestctx(self._manifestlog)
1941 1946 memmf._manifestdict = self.read().copy()
1942 1947 return memmf
1943 1948
1944 1949 def read(self):
1945 1950 return self._manifestdict
1946 1951
1947 1952 def write(self, transaction, link, p1, p2, added, removed, match=None):
1948 1953 return self._storage().add(
1949 1954 self._manifestdict,
1950 1955 transaction,
1951 1956 link,
1952 1957 p1,
1953 1958 p2,
1954 1959 added,
1955 1960 removed,
1956 1961 match=match,
1957 1962 )
1958 1963
1959 1964
1960 1965 @interfaceutil.implementer(repository.imanifestrevisionstored)
1961 1966 class manifestctx(object):
1962 1967 """A class representing a single revision of a manifest, including its
1963 1968 contents, its parent revs, and its linkrev.
1964 1969 """
1965 1970
1966 1971 def __init__(self, manifestlog, node):
1967 1972 self._manifestlog = manifestlog
1968 1973 self._data = None
1969 1974
1970 1975 self._node = node
1971 1976
1972 1977 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1973 1978 # but let's add it later when something needs it and we can load it
1974 1979 # lazily.
1975 1980 # self.p1, self.p2 = store.parents(node)
1976 1981 # rev = store.rev(node)
1977 1982 # self.linkrev = store.linkrev(rev)
1978 1983
1979 1984 def _storage(self):
1980 1985 return self._manifestlog.getstorage(b'')
1981 1986
1982 1987 def node(self):
1983 1988 return self._node
1984 1989
1985 1990 def copy(self):
1986 1991 memmf = memmanifestctx(self._manifestlog)
1987 1992 memmf._manifestdict = self.read().copy()
1988 1993 return memmf
1989 1994
1990 1995 @propertycache
1991 1996 def parents(self):
1992 1997 return self._storage().parents(self._node)
1993 1998
1994 1999 def read(self):
1995 2000 if self._data is None:
1996 2001 if self._node == nullid:
1997 2002 self._data = manifestdict()
1998 2003 else:
1999 2004 store = self._storage()
2000 2005 if self._node in store.fulltextcache:
2001 2006 text = pycompat.bytestr(store.fulltextcache[self._node])
2002 2007 else:
2003 2008 text = store.revision(self._node)
2004 2009 arraytext = bytearray(text)
2005 2010 store.fulltextcache[self._node] = arraytext
2006 2011 self._data = manifestdict(text)
2007 2012 return self._data
2008 2013
2009 2014 def readfast(self, shallow=False):
2010 2015 '''Calls either readdelta or read, based on which would be less work.
2011 2016 readdelta is called if the delta is against the p1, and therefore can be
2012 2017 read quickly.
2013 2018
2014 2019 If `shallow` is True, nothing changes since this is a flat manifest.
2015 2020 '''
2016 2021 store = self._storage()
2017 2022 r = store.rev(self._node)
2018 2023 deltaparent = store.deltaparent(r)
2019 2024 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2020 2025 return self.readdelta()
2021 2026 return self.read()
2022 2027
2023 2028 def readdelta(self, shallow=False):
2024 2029 '''Returns a manifest containing just the entries that are present
2025 2030 in this manifest, but not in its p1 manifest. This is efficient to read
2026 2031 if the revlog delta is already p1.
2027 2032
2028 2033 Changing the value of `shallow` has no effect on flat manifests.
2029 2034 '''
2030 2035 store = self._storage()
2031 2036 r = store.rev(self._node)
2032 2037 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2033 2038 return manifestdict(d)
2034 2039
2035 2040 def find(self, key):
2036 2041 return self.read().find(key)
2037 2042
2038 2043
2039 2044 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2040 2045 class memtreemanifestctx(object):
2041 2046 def __init__(self, manifestlog, dir=b''):
2042 2047 self._manifestlog = manifestlog
2043 2048 self._dir = dir
2044 2049 self._treemanifest = treemanifest()
2045 2050
2046 2051 def _storage(self):
2047 2052 return self._manifestlog.getstorage(b'')
2048 2053
2049 2054 def copy(self):
2050 2055 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2051 2056 memmf._treemanifest = self._treemanifest.copy()
2052 2057 return memmf
2053 2058
2054 2059 def read(self):
2055 2060 return self._treemanifest
2056 2061
2057 2062 def write(self, transaction, link, p1, p2, added, removed, match=None):
2058 2063 def readtree(dir, node):
2059 2064 return self._manifestlog.get(dir, node).read()
2060 2065
2061 2066 return self._storage().add(
2062 2067 self._treemanifest,
2063 2068 transaction,
2064 2069 link,
2065 2070 p1,
2066 2071 p2,
2067 2072 added,
2068 2073 removed,
2069 2074 readtree=readtree,
2070 2075 match=match,
2071 2076 )
2072 2077
2073 2078
2074 2079 @interfaceutil.implementer(repository.imanifestrevisionstored)
2075 2080 class treemanifestctx(object):
2076 2081 def __init__(self, manifestlog, dir, node):
2077 2082 self._manifestlog = manifestlog
2078 2083 self._dir = dir
2079 2084 self._data = None
2080 2085
2081 2086 self._node = node
2082 2087
2083 2088 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2084 2089 # we can instantiate treemanifestctx objects for directories we don't
2085 2090 # have on disk.
2086 2091 # self.p1, self.p2 = store.parents(node)
2087 2092 # rev = store.rev(node)
2088 2093 # self.linkrev = store.linkrev(rev)
2089 2094
2090 2095 def _storage(self):
2091 2096 narrowmatch = self._manifestlog._narrowmatch
2092 2097 if not narrowmatch.always():
2093 2098 if not narrowmatch.visitdir(self._dir[:-1]):
2094 2099 return excludedmanifestrevlog(self._dir)
2095 2100 return self._manifestlog.getstorage(self._dir)
2096 2101
2097 2102 def read(self):
2098 2103 if self._data is None:
2099 2104 store = self._storage()
2100 2105 if self._node == nullid:
2101 2106 self._data = treemanifest()
2102 2107 # TODO accessing non-public API
2103 2108 elif store._treeondisk:
2104 2109 m = treemanifest(dir=self._dir)
2105 2110
2106 2111 def gettext():
2107 2112 return store.revision(self._node)
2108 2113
2109 2114 def readsubtree(dir, subm):
2110 2115 # Set verify to False since we need to be able to create
2111 2116 # subtrees for trees that don't exist on disk.
2112 2117 return self._manifestlog.get(dir, subm, verify=False).read()
2113 2118
2114 2119 m.read(gettext, readsubtree)
2115 2120 m.setnode(self._node)
2116 2121 self._data = m
2117 2122 else:
2118 2123 if self._node in store.fulltextcache:
2119 2124 text = pycompat.bytestr(store.fulltextcache[self._node])
2120 2125 else:
2121 2126 text = store.revision(self._node)
2122 2127 arraytext = bytearray(text)
2123 2128 store.fulltextcache[self._node] = arraytext
2124 2129 self._data = treemanifest(dir=self._dir, text=text)
2125 2130
2126 2131 return self._data
2127 2132
2128 2133 def node(self):
2129 2134 return self._node
2130 2135
2131 2136 def copy(self):
2132 2137 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2133 2138 memmf._treemanifest = self.read().copy()
2134 2139 return memmf
2135 2140
2136 2141 @propertycache
2137 2142 def parents(self):
2138 2143 return self._storage().parents(self._node)
2139 2144
2140 2145 def readdelta(self, shallow=False):
2141 2146 '''Returns a manifest containing just the entries that are present
2142 2147 in this manifest, but not in its p1 manifest. This is efficient to read
2143 2148 if the revlog delta is already p1.
2144 2149
2145 2150 If `shallow` is True, this will read the delta for this directory,
2146 2151 without recursively reading subdirectory manifests. Instead, any
2147 2152 subdirectory entry will be reported as it appears in the manifest, i.e.
2148 2153 the subdirectory will be reported among files and distinguished only by
2149 2154 its 't' flag.
2150 2155 '''
2151 2156 store = self._storage()
2152 2157 if shallow:
2153 2158 r = store.rev(self._node)
2154 2159 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2155 2160 return manifestdict(d)
2156 2161 else:
2157 2162 # Need to perform a slow delta
2158 2163 r0 = store.deltaparent(store.rev(self._node))
2159 2164 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2160 2165 m1 = self.read()
2161 2166 md = treemanifest(dir=self._dir)
2162 2167 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2163 2168 if n1:
2164 2169 md[f] = n1
2165 2170 if fl1:
2166 2171 md.setflag(f, fl1)
2167 2172 return md
2168 2173
2169 2174 def readfast(self, shallow=False):
2170 2175 '''Calls either readdelta or read, based on which would be less work.
2171 2176 readdelta is called if the delta is against the p1, and therefore can be
2172 2177 read quickly.
2173 2178
2174 2179 If `shallow` is True, it only returns the entries from this manifest,
2175 2180 and not any submanifests.
2176 2181 '''
2177 2182 store = self._storage()
2178 2183 r = store.rev(self._node)
2179 2184 deltaparent = store.deltaparent(r)
2180 2185 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2181 2186 return self.readdelta(shallow=shallow)
2182 2187
2183 2188 if shallow:
2184 2189 return manifestdict(store.revision(self._node))
2185 2190 else:
2186 2191 return self.read()
2187 2192
2188 2193 def find(self, key):
2189 2194 return self.read().find(key)
2190 2195
2191 2196
2192 2197 class excludeddir(treemanifest):
2193 2198 """Stand-in for a directory that is excluded from the repository.
2194 2199
2195 2200 With narrowing active on a repository that uses treemanifests,
2196 2201 some of the directory revlogs will be excluded from the resulting
2197 2202 clone. This is a huge storage win for clients, but means we need
2198 2203 some sort of pseudo-manifest to surface to internals so we can
2199 2204 detect a merge conflict outside the narrowspec. That's what this
2200 2205 class is: it stands in for a directory whose node is known, but
2201 2206 whose contents are unknown.
2202 2207 """
2203 2208
2204 2209 def __init__(self, dir, node):
2205 2210 super(excludeddir, self).__init__(dir)
2206 2211 self._node = node
2207 2212 # Add an empty file, which will be included by iterators and such,
2208 2213 # appearing as the directory itself (i.e. something like "dir/")
2209 2214 self._files[b''] = node
2210 2215 self._flags[b''] = b't'
2211 2216
2212 2217 # Manifests outside the narrowspec should never be modified, so avoid
2213 2218 # copying. This makes a noticeable difference when there are very many
2214 2219 # directories outside the narrowspec. Also, it makes sense for the copy to
2215 2220 # be of the same type as the original, which would not happen with the
2216 2221 # super type's copy().
2217 2222 def copy(self):
2218 2223 return self
2219 2224
2220 2225
2221 2226 class excludeddirmanifestctx(treemanifestctx):
2222 2227 """context wrapper for excludeddir - see that docstring for rationale"""
2223 2228
2224 2229 def __init__(self, dir, node):
2225 2230 self._dir = dir
2226 2231 self._node = node
2227 2232
2228 2233 def read(self):
2229 2234 return excludeddir(self._dir, self._node)
2230 2235
2231 2236 def write(self, *args):
2232 2237 raise error.ProgrammingError(
2233 2238 b'attempt to write manifest from excluded dir %s' % self._dir
2234 2239 )
2235 2240
2236 2241
2237 2242 class excludedmanifestrevlog(manifestrevlog):
2238 2243 """Stand-in for excluded treemanifest revlogs.
2239 2244
2240 2245 When narrowing is active on a treemanifest repository, we'll have
2241 2246 references to directories we can't see due to the revlog being
2242 2247 skipped. This class exists to conform to the manifestrevlog
2243 2248 interface for those directories and proactively prevent writes to
2244 2249 outside the narrowspec.
2245 2250 """
2246 2251
2247 2252 def __init__(self, dir):
2248 2253 self._dir = dir
2249 2254
2250 2255 def __len__(self):
2251 2256 raise error.ProgrammingError(
2252 2257 b'attempt to get length of excluded dir %s' % self._dir
2253 2258 )
2254 2259
2255 2260 def rev(self, node):
2256 2261 raise error.ProgrammingError(
2257 2262 b'attempt to get rev from excluded dir %s' % self._dir
2258 2263 )
2259 2264
2260 2265 def linkrev(self, node):
2261 2266 raise error.ProgrammingError(
2262 2267 b'attempt to get linkrev from excluded dir %s' % self._dir
2263 2268 )
2264 2269
2265 2270 def node(self, rev):
2266 2271 raise error.ProgrammingError(
2267 2272 b'attempt to get node from excluded dir %s' % self._dir
2268 2273 )
2269 2274
2270 2275 def add(self, *args, **kwargs):
2271 2276 # We should never write entries in dirlogs outside the narrow clone.
2272 2277 # However, the method still gets called from writesubtree() in
2273 2278 # _addtree(), so we need to handle it. We should possibly make that
2274 2279 # avoid calling add() with a clean manifest (_dirty is always False
2275 2280 # in excludeddir instances).
2276 2281 pass
General Comments 0
You need to be logged in to leave comments. Login now