##// END OF EJS Templates
manifest: remove a final 40-byte assumption from pure-python parser...
Augie Fackler -
r45200:ce126b6b default
parent child Browse files
Show More
@@ -1,2286 +1,2297 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 if len(n) > 40:
60 nl = len(n)
61 if 64 < nl:
62 # modern hash, full width
63 yield f, bin(n[:64]), n[64:]
64 elif 40 < nl < 45:
65 # legacy hash, always sha1
61 66 yield f, bin(n[:40]), n[40:]
62 67 else:
63 68 yield f, bin(n), b''
64 69
65 70
66 71 def _text(it):
67 72 files = []
68 73 lines = []
69 74 for f, n, fl in it:
70 75 files.append(f)
71 76 # if this is changed to support newlines in filenames,
72 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
73 78 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
74 79
75 80 _checkforbidden(files)
76 81 return b''.join(lines)
77 82
78 83
79 84 class lazymanifestiter(object):
80 85 def __init__(self, lm):
81 86 self.pos = 0
82 87 self.lm = lm
83 88
84 89 def __iter__(self):
85 90 return self
86 91
87 92 def next(self):
88 93 try:
89 94 data, pos = self.lm._get(self.pos)
90 95 except IndexError:
91 96 raise StopIteration
92 97 if pos == -1:
93 98 self.pos += 1
94 99 return data[0]
95 100 self.pos += 1
96 101 zeropos = data.find(b'\x00', pos)
97 102 return data[pos:zeropos]
98 103
99 104 __next__ = next
100 105
101 106
102 107 class lazymanifestiterentries(object):
103 108 def __init__(self, lm):
104 109 self.lm = lm
105 110 self.pos = 0
106 111
107 112 def __iter__(self):
108 113 return self
109 114
110 115 def next(self):
111 116 try:
112 117 data, pos = self.lm._get(self.pos)
113 118 except IndexError:
114 119 raise StopIteration
115 120 if pos == -1:
116 121 self.pos += 1
117 122 return data
118 123 zeropos = data.find(b'\x00', pos)
119 124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
120 125 flags = self.lm._getflags(data, self.pos, zeropos)
121 126 self.pos += 1
122 127 return (data[pos:zeropos], hashval, flags)
123 128
124 129 __next__ = next
125 130
126 131
127 132 def unhexlify(data, extra, pos, length):
128 133 s = bin(data[pos : pos + length])
129 134 if extra:
130 135 s += chr(extra & 0xFF)
131 136 return s
132 137
133 138
134 139 def _cmp(a, b):
135 140 return (a > b) - (a < b)
136 141
137 142
138 143 class _lazymanifest(object):
139 144 """A pure python manifest backed by a byte string. It is supplimented with
140 145 internal lists as it is modified, until it is compacted back to a pure byte
141 146 string.
142 147
143 148 ``data`` is the initial manifest data.
144 149
145 150 ``positions`` is a list of offsets, one per manifest entry. Positive
146 151 values are offsets into ``data``, negative values are offsets into the
147 152 ``extradata`` list. When an entry is removed, its entry is dropped from
148 153 ``positions``. The values are encoded such that when walking the list and
149 154 indexing into ``data`` or ``extradata`` as appropriate, the entries are
150 155 sorted by filename.
151 156
152 157 ``extradata`` is a list of (key, hash, flags) for entries that were added or
153 158 modified since the manifest was created or compacted.
154 159 """
155 160
156 161 def __init__(
157 162 self,
158 163 data,
159 164 positions=None,
160 165 extrainfo=None,
161 166 extradata=None,
162 167 hasremovals=False,
163 168 ):
164 169 if positions is None:
165 170 self.positions = self.findlines(data)
166 171 self.extrainfo = [0] * len(self.positions)
167 172 self.data = data
168 173 self.extradata = []
169 174 self.hasremovals = False
170 175 else:
171 176 self.positions = positions[:]
172 177 self.extrainfo = extrainfo[:]
173 178 self.extradata = extradata[:]
174 179 self.data = data
175 180 self.hasremovals = hasremovals
176 181
177 182 def findlines(self, data):
178 183 if not data:
179 184 return []
180 185 pos = data.find(b"\n")
181 186 if pos == -1 or data[-1:] != b'\n':
182 187 raise ValueError(b"Manifest did not end in a newline.")
183 188 positions = [0]
184 189 prev = data[: data.find(b'\x00')]
185 190 while pos < len(data) - 1 and pos != -1:
186 191 positions.append(pos + 1)
187 192 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
188 193 if nexts < prev:
189 194 raise ValueError(b"Manifest lines not in sorted order.")
190 195 prev = nexts
191 196 pos = data.find(b"\n", pos + 1)
192 197 return positions
193 198
194 199 def _get(self, index):
195 200 # get the position encoded in pos:
196 201 # positive number is an index in 'data'
197 202 # negative number is in extrapieces
198 203 pos = self.positions[index]
199 204 if pos >= 0:
200 205 return self.data, pos
201 206 return self.extradata[-pos - 1], -1
202 207
203 208 def _getkey(self, pos):
204 209 if pos >= 0:
205 210 return self.data[pos : self.data.find(b'\x00', pos + 1)]
206 211 return self.extradata[-pos - 1][0]
207 212
208 213 def bsearch(self, key):
209 214 first = 0
210 215 last = len(self.positions) - 1
211 216
212 217 while first <= last:
213 218 midpoint = (first + last) // 2
214 219 nextpos = self.positions[midpoint]
215 220 candidate = self._getkey(nextpos)
216 221 r = _cmp(key, candidate)
217 222 if r == 0:
218 223 return midpoint
219 224 else:
220 225 if r < 0:
221 226 last = midpoint - 1
222 227 else:
223 228 first = midpoint + 1
224 229 return -1
225 230
226 231 def bsearch2(self, key):
227 232 # same as the above, but will always return the position
228 233 # done for performance reasons
229 234 first = 0
230 235 last = len(self.positions) - 1
231 236
232 237 while first <= last:
233 238 midpoint = (first + last) // 2
234 239 nextpos = self.positions[midpoint]
235 240 candidate = self._getkey(nextpos)
236 241 r = _cmp(key, candidate)
237 242 if r == 0:
238 243 return (midpoint, True)
239 244 else:
240 245 if r < 0:
241 246 last = midpoint - 1
242 247 else:
243 248 first = midpoint + 1
244 249 return (first, False)
245 250
246 251 def __contains__(self, key):
247 252 return self.bsearch(key) != -1
248 253
249 254 def _getflags(self, data, needle, pos):
250 255 start = pos + 41
251 256 end = data.find(b"\n", start)
252 257 if end == -1:
253 258 end = len(data) - 1
254 259 if start == end:
255 260 return b''
256 261 return self.data[start:end]
257 262
258 263 def __getitem__(self, key):
259 264 if not isinstance(key, bytes):
260 265 raise TypeError(b"getitem: manifest keys must be a bytes.")
261 266 needle = self.bsearch(key)
262 267 if needle == -1:
263 268 raise KeyError
264 269 data, pos = self._get(needle)
265 270 if pos == -1:
266 271 return (data[1], data[2])
267 272 zeropos = data.find(b'\x00', pos)
273 nlpos = data.find(b'\n', zeropos)
268 274 assert 0 <= needle <= len(self.positions)
269 275 assert len(self.extrainfo) == len(self.positions)
270 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
276 hlen = nlpos - zeropos - 1
277 # Hashes sometimes have an extra byte tucked on the end, so
278 # detect that.
279 if hlen % 2:
280 hlen -= 1
281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
271 282 flags = self._getflags(data, needle, zeropos)
272 283 return (hashval, flags)
273 284
274 285 def __delitem__(self, key):
275 286 needle, found = self.bsearch2(key)
276 287 if not found:
277 288 raise KeyError
278 289 cur = self.positions[needle]
279 290 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
280 291 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
281 292 if cur >= 0:
282 293 # This does NOT unsort the list as far as the search functions are
283 294 # concerned, as they only examine lines mapped by self.positions.
284 295 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
285 296 self.hasremovals = True
286 297
287 298 def __setitem__(self, key, value):
288 299 if not isinstance(key, bytes):
289 300 raise TypeError(b"setitem: manifest keys must be a byte string.")
290 301 if not isinstance(value, tuple) or len(value) != 2:
291 302 raise TypeError(
292 303 b"Manifest values must be a tuple of (node, flags)."
293 304 )
294 305 hashval = value[0]
295 306 # hashes are either 20 or 32 bytes (sha1 or its replacement),
296 307 # and allow one extra byte taht won't be persisted to disk but
297 308 # is sometimes used in memory.
298 309 if not isinstance(hashval, bytes) or not (
299 310 20 <= len(hashval) <= 22 or 32 <= len(hashval) <= 34
300 311 ):
301 312 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
302 313 flags = value[1]
303 314 if len(hashval) == 22:
304 315 hashval = hashval[:-1]
305 316 if not isinstance(flags, bytes) or len(flags) > 1:
306 317 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
307 318 needle, found = self.bsearch2(key)
308 319 if found:
309 320 # put the item
310 321 pos = self.positions[needle]
311 322 if pos < 0:
312 323 self.extradata[-pos - 1] = (key, hashval, value[1])
313 324 else:
314 325 # just don't bother
315 326 self.extradata.append((key, hashval, value[1]))
316 327 self.positions[needle] = -len(self.extradata)
317 328 else:
318 329 # not found, put it in with extra positions
319 330 self.extradata.append((key, hashval, value[1]))
320 331 self.positions = (
321 332 self.positions[:needle]
322 333 + [-len(self.extradata)]
323 334 + self.positions[needle:]
324 335 )
325 336 self.extrainfo = (
326 337 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
327 338 )
328 339
329 340 def copy(self):
330 341 # XXX call _compact like in C?
331 342 return _lazymanifest(
332 343 self.data,
333 344 self.positions,
334 345 self.extrainfo,
335 346 self.extradata,
336 347 self.hasremovals,
337 348 )
338 349
339 350 def _compact(self):
340 351 # hopefully not called TOO often
341 352 if len(self.extradata) == 0 and not self.hasremovals:
342 353 return
343 354 l = []
344 355 i = 0
345 356 offset = 0
346 357 self.extrainfo = [0] * len(self.positions)
347 358 while i < len(self.positions):
348 359 if self.positions[i] >= 0:
349 360 cur = self.positions[i]
350 361 last_cut = cur
351 362
352 363 # Collect all contiguous entries in the buffer at the current
353 364 # offset, breaking out only for added/modified items held in
354 365 # extradata, or a deleted line prior to the next position.
355 366 while True:
356 367 self.positions[i] = offset
357 368 i += 1
358 369 if i == len(self.positions) or self.positions[i] < 0:
359 370 break
360 371
361 372 # A removed file has no positions[] entry, but does have an
362 373 # overwritten first byte. Break out and find the end of the
363 374 # current good entry/entries if there is a removed file
364 375 # before the next position.
365 376 if (
366 377 self.hasremovals
367 378 and self.data.find(b'\n\x00', cur, self.positions[i])
368 379 != -1
369 380 ):
370 381 break
371 382
372 383 offset += self.positions[i] - cur
373 384 cur = self.positions[i]
374 385 end_cut = self.data.find(b'\n', cur)
375 386 if end_cut != -1:
376 387 end_cut += 1
377 388 offset += end_cut - cur
378 389 l.append(self.data[last_cut:end_cut])
379 390 else:
380 391 while i < len(self.positions) and self.positions[i] < 0:
381 392 cur = self.positions[i]
382 393 t = self.extradata[-cur - 1]
383 394 l.append(self._pack(t))
384 395 self.positions[i] = offset
385 396 # Hashes are either 20 bytes (old sha1s) or 32
386 397 # bytes (new non-sha1).
387 398 hlen = 20
388 399 if len(t[1]) > 25:
389 400 hlen = 32
390 401 if len(t[1]) > hlen:
391 402 self.extrainfo[i] = ord(t[1][hlen + 1])
392 403 offset += len(l[-1])
393 404 i += 1
394 405 self.data = b''.join(l)
395 406 self.hasremovals = False
396 407 self.extradata = []
397 408
398 409 def _pack(self, d):
399 410 n = d[1]
400 411 if len(n) == 21 or len(n) == 33:
401 412 n = n[:-1]
402 413 assert len(n) == 20 or len(n) == 32
403 414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
404 415
405 416 def text(self):
406 417 self._compact()
407 418 return self.data
408 419
409 420 def diff(self, m2, clean=False):
410 421 '''Finds changes between the current manifest and m2.'''
411 422 # XXX think whether efficiency matters here
412 423 diff = {}
413 424
414 425 for fn, e1, flags in self.iterentries():
415 426 if fn not in m2:
416 427 diff[fn] = (e1, flags), (None, b'')
417 428 else:
418 429 e2 = m2[fn]
419 430 if (e1, flags) != e2:
420 431 diff[fn] = (e1, flags), e2
421 432 elif clean:
422 433 diff[fn] = None
423 434
424 435 for fn, e2, flags in m2.iterentries():
425 436 if fn not in self:
426 437 diff[fn] = (None, b''), (e2, flags)
427 438
428 439 return diff
429 440
430 441 def iterentries(self):
431 442 return lazymanifestiterentries(self)
432 443
433 444 def iterkeys(self):
434 445 return lazymanifestiter(self)
435 446
436 447 def __iter__(self):
437 448 return lazymanifestiter(self)
438 449
439 450 def __len__(self):
440 451 return len(self.positions)
441 452
442 453 def filtercopy(self, filterfn):
443 454 # XXX should be optimized
444 455 c = _lazymanifest(b'')
445 456 for f, n, fl in self.iterentries():
446 457 if filterfn(f):
447 458 c[f] = n, fl
448 459 return c
449 460
450 461
451 462 try:
452 463 _lazymanifest = parsers.lazymanifest
453 464 except AttributeError:
454 465 pass
455 466
456 467
457 468 @interfaceutil.implementer(repository.imanifestdict)
458 469 class manifestdict(object):
459 470 def __init__(self, data=b''):
460 471 self._lm = _lazymanifest(data)
461 472
462 473 def __getitem__(self, key):
463 474 return self._lm[key][0]
464 475
465 476 def find(self, key):
466 477 return self._lm[key]
467 478
468 479 def __len__(self):
469 480 return len(self._lm)
470 481
471 482 def __nonzero__(self):
472 483 # nonzero is covered by the __len__ function, but implementing it here
473 484 # makes it easier for extensions to override.
474 485 return len(self._lm) != 0
475 486
476 487 __bool__ = __nonzero__
477 488
478 489 def __setitem__(self, key, node):
479 490 self._lm[key] = node, self.flags(key)
480 491
481 492 def __contains__(self, key):
482 493 if key is None:
483 494 return False
484 495 return key in self._lm
485 496
486 497 def __delitem__(self, key):
487 498 del self._lm[key]
488 499
489 500 def __iter__(self):
490 501 return self._lm.__iter__()
491 502
492 503 def iterkeys(self):
493 504 return self._lm.iterkeys()
494 505
495 506 def keys(self):
496 507 return list(self.iterkeys())
497 508
498 509 def filesnotin(self, m2, match=None):
499 510 '''Set of files in this manifest that are not in the other'''
500 511 if match is not None:
501 512 match = matchmod.badmatch(match, lambda path, msg: None)
502 513 sm2 = set(m2.walk(match))
503 514 return {f for f in self.walk(match) if f not in sm2}
504 515 return {f for f in self if f not in m2}
505 516
506 517 @propertycache
507 518 def _dirs(self):
508 519 return pathutil.dirs(self)
509 520
510 521 def dirs(self):
511 522 return self._dirs
512 523
513 524 def hasdir(self, dir):
514 525 return dir in self._dirs
515 526
516 527 def _filesfastpath(self, match):
517 528 '''Checks whether we can correctly and quickly iterate over matcher
518 529 files instead of over manifest files.'''
519 530 files = match.files()
520 531 return len(files) < 100 and (
521 532 match.isexact()
522 533 or (match.prefix() and all(fn in self for fn in files))
523 534 )
524 535
525 536 def walk(self, match):
526 537 '''Generates matching file names.
527 538
528 539 Equivalent to manifest.matches(match).iterkeys(), but without creating
529 540 an entirely new manifest.
530 541
531 542 It also reports nonexistent files by marking them bad with match.bad().
532 543 '''
533 544 if match.always():
534 545 for f in iter(self):
535 546 yield f
536 547 return
537 548
538 549 fset = set(match.files())
539 550
540 551 # avoid the entire walk if we're only looking for specific files
541 552 if self._filesfastpath(match):
542 553 for fn in sorted(fset):
543 554 if fn in self:
544 555 yield fn
545 556 return
546 557
547 558 for fn in self:
548 559 if fn in fset:
549 560 # specified pattern is the exact name
550 561 fset.remove(fn)
551 562 if match(fn):
552 563 yield fn
553 564
554 565 # for dirstate.walk, files=[''] means "walk the whole tree".
555 566 # follow that here, too
556 567 fset.discard(b'')
557 568
558 569 for fn in sorted(fset):
559 570 if not self.hasdir(fn):
560 571 match.bad(fn, None)
561 572
562 573 def _matches(self, match):
563 574 '''generate a new manifest filtered by the match argument'''
564 575 if match.always():
565 576 return self.copy()
566 577
567 578 if self._filesfastpath(match):
568 579 m = manifestdict()
569 580 lm = self._lm
570 581 for fn in match.files():
571 582 if fn in lm:
572 583 m._lm[fn] = lm[fn]
573 584 return m
574 585
575 586 m = manifestdict()
576 587 m._lm = self._lm.filtercopy(match)
577 588 return m
578 589
579 590 def diff(self, m2, match=None, clean=False):
580 591 '''Finds changes between the current manifest and m2.
581 592
582 593 Args:
583 594 m2: the manifest to which this manifest should be compared.
584 595 clean: if true, include files unchanged between these manifests
585 596 with a None value in the returned dictionary.
586 597
587 598 The result is returned as a dict with filename as key and
588 599 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
589 600 nodeid in the current/other manifest and fl1/fl2 is the flag
590 601 in the current/other manifest. Where the file does not exist,
591 602 the nodeid will be None and the flags will be the empty
592 603 string.
593 604 '''
594 605 if match:
595 606 m1 = self._matches(match)
596 607 m2 = m2._matches(match)
597 608 return m1.diff(m2, clean=clean)
598 609 return self._lm.diff(m2._lm, clean)
599 610
600 611 def setflag(self, key, flag):
601 612 self._lm[key] = self[key], flag
602 613
603 614 def get(self, key, default=None):
604 615 try:
605 616 return self._lm[key][0]
606 617 except KeyError:
607 618 return default
608 619
609 620 def flags(self, key):
610 621 try:
611 622 return self._lm[key][1]
612 623 except KeyError:
613 624 return b''
614 625
615 626 def copy(self):
616 627 c = manifestdict()
617 628 c._lm = self._lm.copy()
618 629 return c
619 630
620 631 def items(self):
621 632 return (x[:2] for x in self._lm.iterentries())
622 633
623 634 def iteritems(self):
624 635 return (x[:2] for x in self._lm.iterentries())
625 636
626 637 def iterentries(self):
627 638 return self._lm.iterentries()
628 639
629 640 def text(self):
630 641 # most likely uses native version
631 642 return self._lm.text()
632 643
633 644 def fastdelta(self, base, changes):
634 645 """Given a base manifest text as a bytearray and a list of changes
635 646 relative to that text, compute a delta that can be used by revlog.
636 647 """
637 648 delta = []
638 649 dstart = None
639 650 dend = None
640 651 dline = [b""]
641 652 start = 0
642 653 # zero copy representation of base as a buffer
643 654 addbuf = util.buffer(base)
644 655
645 656 changes = list(changes)
646 657 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
647 658 # start with a readonly loop that finds the offset of
648 659 # each line and creates the deltas
649 660 for f, todelete in changes:
650 661 # bs will either be the index of the item or the insert point
651 662 start, end = _msearch(addbuf, f, start)
652 663 if not todelete:
653 664 h, fl = self._lm[f]
654 665 l = b"%s\0%s%s\n" % (f, hex(h), fl)
655 666 else:
656 667 if start == end:
657 668 # item we want to delete was not found, error out
658 669 raise AssertionError(
659 670 _(b"failed to remove %s from manifest") % f
660 671 )
661 672 l = b""
662 673 if dstart is not None and dstart <= start and dend >= start:
663 674 if dend < end:
664 675 dend = end
665 676 if l:
666 677 dline.append(l)
667 678 else:
668 679 if dstart is not None:
669 680 delta.append([dstart, dend, b"".join(dline)])
670 681 dstart = start
671 682 dend = end
672 683 dline = [l]
673 684
674 685 if dstart is not None:
675 686 delta.append([dstart, dend, b"".join(dline)])
676 687 # apply the delta to the base, and get a delta for addrevision
677 688 deltatext, arraytext = _addlistdelta(base, delta)
678 689 else:
679 690 # For large changes, it's much cheaper to just build the text and
680 691 # diff it.
681 692 arraytext = bytearray(self.text())
682 693 deltatext = mdiff.textdiff(
683 694 util.buffer(base), util.buffer(arraytext)
684 695 )
685 696
686 697 return arraytext, deltatext
687 698
688 699
689 700 def _msearch(m, s, lo=0, hi=None):
690 701 '''return a tuple (start, end) that says where to find s within m.
691 702
692 703 If the string is found m[start:end] are the line containing
693 704 that string. If start == end the string was not found and
694 705 they indicate the proper sorted insertion point.
695 706
696 707 m should be a buffer, a memoryview or a byte string.
697 708 s is a byte string'''
698 709
699 710 def advance(i, c):
700 711 while i < lenm and m[i : i + 1] != c:
701 712 i += 1
702 713 return i
703 714
704 715 if not s:
705 716 return (lo, lo)
706 717 lenm = len(m)
707 718 if not hi:
708 719 hi = lenm
709 720 while lo < hi:
710 721 mid = (lo + hi) // 2
711 722 start = mid
712 723 while start > 0 and m[start - 1 : start] != b'\n':
713 724 start -= 1
714 725 end = advance(start, b'\0')
715 726 if bytes(m[start:end]) < s:
716 727 # we know that after the null there are 40 bytes of sha1
717 728 # this translates to the bisect lo = mid + 1
718 729 lo = advance(end + 40, b'\n') + 1
719 730 else:
720 731 # this translates to the bisect hi = mid
721 732 hi = start
722 733 end = advance(lo, b'\0')
723 734 found = m[lo:end]
724 735 if s == found:
725 736 # we know that after the null there are 40 bytes of sha1
726 737 end = advance(end + 40, b'\n')
727 738 return (lo, end + 1)
728 739 else:
729 740 return (lo, lo)
730 741
731 742
732 743 def _checkforbidden(l):
733 744 """Check filenames for illegal characters."""
734 745 for f in l:
735 746 if b'\n' in f or b'\r' in f:
736 747 raise error.StorageError(
737 748 _(b"'\\n' and '\\r' disallowed in filenames: %r")
738 749 % pycompat.bytestr(f)
739 750 )
740 751
741 752
742 753 # apply the changes collected during the bisect loop to our addlist
743 754 # return a delta suitable for addrevision
744 755 def _addlistdelta(addlist, x):
745 756 # for large addlist arrays, building a new array is cheaper
746 757 # than repeatedly modifying the existing one
747 758 currentposition = 0
748 759 newaddlist = bytearray()
749 760
750 761 for start, end, content in x:
751 762 newaddlist += addlist[currentposition:start]
752 763 if content:
753 764 newaddlist += bytearray(content)
754 765
755 766 currentposition = end
756 767
757 768 newaddlist += addlist[currentposition:]
758 769
759 770 deltatext = b"".join(
760 771 struct.pack(b">lll", start, end, len(content)) + content
761 772 for start, end, content in x
762 773 )
763 774 return deltatext, newaddlist
764 775
765 776
766 777 def _splittopdir(f):
767 778 if b'/' in f:
768 779 dir, subpath = f.split(b'/', 1)
769 780 return dir + b'/', subpath
770 781 else:
771 782 return b'', f
772 783
773 784
774 785 _noop = lambda s: None
775 786
776 787
777 788 @interfaceutil.implementer(repository.imanifestdict)
778 789 class treemanifest(object):
779 790 def __init__(self, dir=b'', text=b''):
780 791 self._dir = dir
781 792 self._node = nullid
782 793 self._loadfunc = _noop
783 794 self._copyfunc = _noop
784 795 self._dirty = False
785 796 self._dirs = {}
786 797 self._lazydirs = {}
787 798 # Using _lazymanifest here is a little slower than plain old dicts
788 799 self._files = {}
789 800 self._flags = {}
790 801 if text:
791 802
792 803 def readsubtree(subdir, subm):
793 804 raise AssertionError(
794 805 b'treemanifest constructor only accepts flat manifests'
795 806 )
796 807
797 808 self.parse(text, readsubtree)
798 809 self._dirty = True # Mark flat manifest dirty after parsing
799 810
800 811 def _subpath(self, path):
801 812 return self._dir + path
802 813
803 814 def _loadalllazy(self):
804 815 selfdirs = self._dirs
805 816 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
806 817 self._lazydirs
807 818 ):
808 819 if docopy:
809 820 selfdirs[d] = readsubtree(path, node).copy()
810 821 else:
811 822 selfdirs[d] = readsubtree(path, node)
812 823 self._lazydirs = {}
813 824
814 825 def _loadlazy(self, d):
815 826 v = self._lazydirs.get(d)
816 827 if v:
817 828 path, node, readsubtree, docopy = v
818 829 if docopy:
819 830 self._dirs[d] = readsubtree(path, node).copy()
820 831 else:
821 832 self._dirs[d] = readsubtree(path, node)
822 833 del self._lazydirs[d]
823 834
824 835 def _loadchildrensetlazy(self, visit):
825 836 if not visit:
826 837 return None
827 838 if visit == b'all' or visit == b'this':
828 839 self._loadalllazy()
829 840 return None
830 841
831 842 loadlazy = self._loadlazy
832 843 for k in visit:
833 844 loadlazy(k + b'/')
834 845 return visit
835 846
836 847 def _loaddifflazy(self, t1, t2):
837 848 """load items in t1 and t2 if they're needed for diffing.
838 849
839 850 The criteria currently is:
840 851 - if it's not present in _lazydirs in either t1 or t2, load it in the
841 852 other (it may already be loaded or it may not exist, doesn't matter)
842 853 - if it's present in _lazydirs in both, compare the nodeid; if it
843 854 differs, load it in both
844 855 """
845 856 toloadlazy = []
846 857 for d, v1 in pycompat.iteritems(t1._lazydirs):
847 858 v2 = t2._lazydirs.get(d)
848 859 if not v2 or v2[1] != v1[1]:
849 860 toloadlazy.append(d)
850 861 for d, v1 in pycompat.iteritems(t2._lazydirs):
851 862 if d not in t1._lazydirs:
852 863 toloadlazy.append(d)
853 864
854 865 for d in toloadlazy:
855 866 t1._loadlazy(d)
856 867 t2._loadlazy(d)
857 868
858 869 def __len__(self):
859 870 self._load()
860 871 size = len(self._files)
861 872 self._loadalllazy()
862 873 for m in self._dirs.values():
863 874 size += m.__len__()
864 875 return size
865 876
866 877 def __nonzero__(self):
867 878 # Faster than "__len() != 0" since it avoids loading sub-manifests
868 879 return not self._isempty()
869 880
870 881 __bool__ = __nonzero__
871 882
872 883 def _isempty(self):
873 884 self._load() # for consistency; already loaded by all callers
874 885 # See if we can skip loading everything.
875 886 if self._files or (
876 887 self._dirs and any(not m._isempty() for m in self._dirs.values())
877 888 ):
878 889 return False
879 890 self._loadalllazy()
880 891 return not self._dirs or all(m._isempty() for m in self._dirs.values())
881 892
882 893 @encoding.strmethod
883 894 def __repr__(self):
884 895 return (
885 896 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
886 897 % (
887 898 self._dir,
888 899 hex(self._node),
889 900 bool(self._loadfunc is _noop),
890 901 self._dirty,
891 902 id(self),
892 903 )
893 904 )
894 905
895 906 def dir(self):
896 907 '''The directory that this tree manifest represents, including a
897 908 trailing '/'. Empty string for the repo root directory.'''
898 909 return self._dir
899 910
900 911 def node(self):
901 912 '''This node of this instance. nullid for unsaved instances. Should
902 913 be updated when the instance is read or written from a revlog.
903 914 '''
904 915 assert not self._dirty
905 916 return self._node
906 917
907 918 def setnode(self, node):
908 919 self._node = node
909 920 self._dirty = False
910 921
911 922 def iterentries(self):
912 923 self._load()
913 924 self._loadalllazy()
914 925 for p, n in sorted(
915 926 itertools.chain(self._dirs.items(), self._files.items())
916 927 ):
917 928 if p in self._files:
918 929 yield self._subpath(p), n, self._flags.get(p, b'')
919 930 else:
920 931 for x in n.iterentries():
921 932 yield x
922 933
923 934 def items(self):
924 935 self._load()
925 936 self._loadalllazy()
926 937 for p, n in sorted(
927 938 itertools.chain(self._dirs.items(), self._files.items())
928 939 ):
929 940 if p in self._files:
930 941 yield self._subpath(p), n
931 942 else:
932 943 for f, sn in pycompat.iteritems(n):
933 944 yield f, sn
934 945
935 946 iteritems = items
936 947
937 948 def iterkeys(self):
938 949 self._load()
939 950 self._loadalllazy()
940 951 for p in sorted(itertools.chain(self._dirs, self._files)):
941 952 if p in self._files:
942 953 yield self._subpath(p)
943 954 else:
944 955 for f in self._dirs[p]:
945 956 yield f
946 957
947 958 def keys(self):
948 959 return list(self.iterkeys())
949 960
950 961 def __iter__(self):
951 962 return self.iterkeys()
952 963
953 964 def __contains__(self, f):
954 965 if f is None:
955 966 return False
956 967 self._load()
957 968 dir, subpath = _splittopdir(f)
958 969 if dir:
959 970 self._loadlazy(dir)
960 971
961 972 if dir not in self._dirs:
962 973 return False
963 974
964 975 return self._dirs[dir].__contains__(subpath)
965 976 else:
966 977 return f in self._files
967 978
968 979 def get(self, f, default=None):
969 980 self._load()
970 981 dir, subpath = _splittopdir(f)
971 982 if dir:
972 983 self._loadlazy(dir)
973 984
974 985 if dir not in self._dirs:
975 986 return default
976 987 return self._dirs[dir].get(subpath, default)
977 988 else:
978 989 return self._files.get(f, default)
979 990
980 991 def __getitem__(self, f):
981 992 self._load()
982 993 dir, subpath = _splittopdir(f)
983 994 if dir:
984 995 self._loadlazy(dir)
985 996
986 997 return self._dirs[dir].__getitem__(subpath)
987 998 else:
988 999 return self._files[f]
989 1000
990 1001 def flags(self, f):
991 1002 self._load()
992 1003 dir, subpath = _splittopdir(f)
993 1004 if dir:
994 1005 self._loadlazy(dir)
995 1006
996 1007 if dir not in self._dirs:
997 1008 return b''
998 1009 return self._dirs[dir].flags(subpath)
999 1010 else:
1000 1011 if f in self._lazydirs or f in self._dirs:
1001 1012 return b''
1002 1013 return self._flags.get(f, b'')
1003 1014
1004 1015 def find(self, f):
1005 1016 self._load()
1006 1017 dir, subpath = _splittopdir(f)
1007 1018 if dir:
1008 1019 self._loadlazy(dir)
1009 1020
1010 1021 return self._dirs[dir].find(subpath)
1011 1022 else:
1012 1023 return self._files[f], self._flags.get(f, b'')
1013 1024
1014 1025 def __delitem__(self, f):
1015 1026 self._load()
1016 1027 dir, subpath = _splittopdir(f)
1017 1028 if dir:
1018 1029 self._loadlazy(dir)
1019 1030
1020 1031 self._dirs[dir].__delitem__(subpath)
1021 1032 # If the directory is now empty, remove it
1022 1033 if self._dirs[dir]._isempty():
1023 1034 del self._dirs[dir]
1024 1035 else:
1025 1036 del self._files[f]
1026 1037 if f in self._flags:
1027 1038 del self._flags[f]
1028 1039 self._dirty = True
1029 1040
1030 1041 def __setitem__(self, f, n):
1031 1042 assert n is not None
1032 1043 self._load()
1033 1044 dir, subpath = _splittopdir(f)
1034 1045 if dir:
1035 1046 self._loadlazy(dir)
1036 1047 if dir not in self._dirs:
1037 1048 self._dirs[dir] = treemanifest(self._subpath(dir))
1038 1049 self._dirs[dir].__setitem__(subpath, n)
1039 1050 else:
1040 1051 # manifest nodes are either 20 bytes or 32 bytes,
1041 1052 # depending on the hash in use. An extra byte is
1042 1053 # occasionally used by hg, but won't ever be
1043 1054 # persisted. Trim to 21 or 33 bytes as appropriate.
1044 1055 trim = 21 if len(n) < 25 else 33
1045 1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1046 1057 self._dirty = True
1047 1058
1048 1059 def _load(self):
1049 1060 if self._loadfunc is not _noop:
1050 1061 lf, self._loadfunc = self._loadfunc, _noop
1051 1062 lf(self)
1052 1063 elif self._copyfunc is not _noop:
1053 1064 cf, self._copyfunc = self._copyfunc, _noop
1054 1065 cf(self)
1055 1066
1056 1067 def setflag(self, f, flags):
1057 1068 """Set the flags (symlink, executable) for path f."""
1058 1069 self._load()
1059 1070 dir, subpath = _splittopdir(f)
1060 1071 if dir:
1061 1072 self._loadlazy(dir)
1062 1073 if dir not in self._dirs:
1063 1074 self._dirs[dir] = treemanifest(self._subpath(dir))
1064 1075 self._dirs[dir].setflag(subpath, flags)
1065 1076 else:
1066 1077 self._flags[f] = flags
1067 1078 self._dirty = True
1068 1079
1069 1080 def copy(self):
1070 1081 copy = treemanifest(self._dir)
1071 1082 copy._node = self._node
1072 1083 copy._dirty = self._dirty
1073 1084 if self._copyfunc is _noop:
1074 1085
1075 1086 def _copyfunc(s):
1076 1087 self._load()
1077 1088 s._lazydirs = {
1078 1089 d: (p, n, r, True)
1079 1090 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1080 1091 }
1081 1092 sdirs = s._dirs
1082 1093 for d, v in pycompat.iteritems(self._dirs):
1083 1094 sdirs[d] = v.copy()
1084 1095 s._files = dict.copy(self._files)
1085 1096 s._flags = dict.copy(self._flags)
1086 1097
1087 1098 if self._loadfunc is _noop:
1088 1099 _copyfunc(copy)
1089 1100 else:
1090 1101 copy._copyfunc = _copyfunc
1091 1102 else:
1092 1103 copy._copyfunc = self._copyfunc
1093 1104 return copy
1094 1105
1095 1106 def filesnotin(self, m2, match=None):
1096 1107 '''Set of files in this manifest that are not in the other'''
1097 1108 if match and not match.always():
1098 1109 m1 = self._matches(match)
1099 1110 m2 = m2._matches(match)
1100 1111 return m1.filesnotin(m2)
1101 1112
1102 1113 files = set()
1103 1114
1104 1115 def _filesnotin(t1, t2):
1105 1116 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1106 1117 return
1107 1118 t1._load()
1108 1119 t2._load()
1109 1120 self._loaddifflazy(t1, t2)
1110 1121 for d, m1 in pycompat.iteritems(t1._dirs):
1111 1122 if d in t2._dirs:
1112 1123 m2 = t2._dirs[d]
1113 1124 _filesnotin(m1, m2)
1114 1125 else:
1115 1126 files.update(m1.iterkeys())
1116 1127
1117 1128 for fn in t1._files:
1118 1129 if fn not in t2._files:
1119 1130 files.add(t1._subpath(fn))
1120 1131
1121 1132 _filesnotin(self, m2)
1122 1133 return files
1123 1134
1124 1135 @propertycache
1125 1136 def _alldirs(self):
1126 1137 return pathutil.dirs(self)
1127 1138
1128 1139 def dirs(self):
1129 1140 return self._alldirs
1130 1141
1131 1142 def hasdir(self, dir):
1132 1143 self._load()
1133 1144 topdir, subdir = _splittopdir(dir)
1134 1145 if topdir:
1135 1146 self._loadlazy(topdir)
1136 1147 if topdir in self._dirs:
1137 1148 return self._dirs[topdir].hasdir(subdir)
1138 1149 return False
1139 1150 dirslash = dir + b'/'
1140 1151 return dirslash in self._dirs or dirslash in self._lazydirs
1141 1152
1142 1153 def walk(self, match):
1143 1154 '''Generates matching file names.
1144 1155
1145 1156 It also reports nonexistent files by marking them bad with match.bad().
1146 1157 '''
1147 1158 if match.always():
1148 1159 for f in iter(self):
1149 1160 yield f
1150 1161 return
1151 1162
1152 1163 fset = set(match.files())
1153 1164
1154 1165 for fn in self._walk(match):
1155 1166 if fn in fset:
1156 1167 # specified pattern is the exact name
1157 1168 fset.remove(fn)
1158 1169 yield fn
1159 1170
1160 1171 # for dirstate.walk, files=[''] means "walk the whole tree".
1161 1172 # follow that here, too
1162 1173 fset.discard(b'')
1163 1174
1164 1175 for fn in sorted(fset):
1165 1176 if not self.hasdir(fn):
1166 1177 match.bad(fn, None)
1167 1178
1168 1179 def _walk(self, match):
1169 1180 '''Recursively generates matching file names for walk().'''
1170 1181 visit = match.visitchildrenset(self._dir[:-1])
1171 1182 if not visit:
1172 1183 return
1173 1184
1174 1185 # yield this dir's files and walk its submanifests
1175 1186 self._load()
1176 1187 visit = self._loadchildrensetlazy(visit)
1177 1188 for p in sorted(list(self._dirs) + list(self._files)):
1178 1189 if p in self._files:
1179 1190 fullp = self._subpath(p)
1180 1191 if match(fullp):
1181 1192 yield fullp
1182 1193 else:
1183 1194 if not visit or p[:-1] in visit:
1184 1195 for f in self._dirs[p]._walk(match):
1185 1196 yield f
1186 1197
1187 1198 def _matches(self, match):
1188 1199 '''recursively generate a new manifest filtered by the match argument.
1189 1200 '''
1190 1201 if match.always():
1191 1202 return self.copy()
1192 1203 return self._matches_inner(match)
1193 1204
1194 1205 def _matches_inner(self, match):
1195 1206 if match.always():
1196 1207 return self.copy()
1197 1208
1198 1209 visit = match.visitchildrenset(self._dir[:-1])
1199 1210 if visit == b'all':
1200 1211 return self.copy()
1201 1212 ret = treemanifest(self._dir)
1202 1213 if not visit:
1203 1214 return ret
1204 1215
1205 1216 self._load()
1206 1217 for fn in self._files:
1207 1218 # While visitchildrenset *usually* lists only subdirs, this is
1208 1219 # actually up to the matcher and may have some files in the set().
1209 1220 # If visit == 'this', we should obviously look at the files in this
1210 1221 # directory; if visit is a set, and fn is in it, we should inspect
1211 1222 # fn (but no need to inspect things not in the set).
1212 1223 if visit != b'this' and fn not in visit:
1213 1224 continue
1214 1225 fullp = self._subpath(fn)
1215 1226 # visitchildrenset isn't perfect, we still need to call the regular
1216 1227 # matcher code to further filter results.
1217 1228 if not match(fullp):
1218 1229 continue
1219 1230 ret._files[fn] = self._files[fn]
1220 1231 if fn in self._flags:
1221 1232 ret._flags[fn] = self._flags[fn]
1222 1233
1223 1234 visit = self._loadchildrensetlazy(visit)
1224 1235 for dir, subm in pycompat.iteritems(self._dirs):
1225 1236 if visit and dir[:-1] not in visit:
1226 1237 continue
1227 1238 m = subm._matches_inner(match)
1228 1239 if not m._isempty():
1229 1240 ret._dirs[dir] = m
1230 1241
1231 1242 if not ret._isempty():
1232 1243 ret._dirty = True
1233 1244 return ret
1234 1245
1235 1246 def fastdelta(self, base, changes):
1236 1247 raise FastdeltaUnavailable()
1237 1248
1238 1249 def diff(self, m2, match=None, clean=False):
1239 1250 '''Finds changes between the current manifest and m2.
1240 1251
1241 1252 Args:
1242 1253 m2: the manifest to which this manifest should be compared.
1243 1254 clean: if true, include files unchanged between these manifests
1244 1255 with a None value in the returned dictionary.
1245 1256
1246 1257 The result is returned as a dict with filename as key and
1247 1258 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1248 1259 nodeid in the current/other manifest and fl1/fl2 is the flag
1249 1260 in the current/other manifest. Where the file does not exist,
1250 1261 the nodeid will be None and the flags will be the empty
1251 1262 string.
1252 1263 '''
1253 1264 if match and not match.always():
1254 1265 m1 = self._matches(match)
1255 1266 m2 = m2._matches(match)
1256 1267 return m1.diff(m2, clean=clean)
1257 1268 result = {}
1258 1269 emptytree = treemanifest()
1259 1270
1260 1271 def _iterativediff(t1, t2, stack):
1261 1272 """compares two tree manifests and append new tree-manifests which
1262 1273 needs to be compared to stack"""
1263 1274 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1264 1275 return
1265 1276 t1._load()
1266 1277 t2._load()
1267 1278 self._loaddifflazy(t1, t2)
1268 1279
1269 1280 for d, m1 in pycompat.iteritems(t1._dirs):
1270 1281 m2 = t2._dirs.get(d, emptytree)
1271 1282 stack.append((m1, m2))
1272 1283
1273 1284 for d, m2 in pycompat.iteritems(t2._dirs):
1274 1285 if d not in t1._dirs:
1275 1286 stack.append((emptytree, m2))
1276 1287
1277 1288 for fn, n1 in pycompat.iteritems(t1._files):
1278 1289 fl1 = t1._flags.get(fn, b'')
1279 1290 n2 = t2._files.get(fn, None)
1280 1291 fl2 = t2._flags.get(fn, b'')
1281 1292 if n1 != n2 or fl1 != fl2:
1282 1293 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1283 1294 elif clean:
1284 1295 result[t1._subpath(fn)] = None
1285 1296
1286 1297 for fn, n2 in pycompat.iteritems(t2._files):
1287 1298 if fn not in t1._files:
1288 1299 fl2 = t2._flags.get(fn, b'')
1289 1300 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1290 1301
1291 1302 stackls = []
1292 1303 _iterativediff(self, m2, stackls)
1293 1304 while stackls:
1294 1305 t1, t2 = stackls.pop()
1295 1306 # stackls is populated in the function call
1296 1307 _iterativediff(t1, t2, stackls)
1297 1308 return result
1298 1309
1299 1310 def unmodifiedsince(self, m2):
1300 1311 return not self._dirty and not m2._dirty and self._node == m2._node
1301 1312
1302 1313 def parse(self, text, readsubtree):
1303 1314 selflazy = self._lazydirs
1304 1315 subpath = self._subpath
1305 1316 for f, n, fl in _parse(text):
1306 1317 if fl == b't':
1307 1318 f = f + b'/'
1308 1319 # False below means "doesn't need to be copied" and can use the
1309 1320 # cached value from readsubtree directly.
1310 1321 selflazy[f] = (subpath(f), n, readsubtree, False)
1311 1322 elif b'/' in f:
1312 1323 # This is a flat manifest, so use __setitem__ and setflag rather
1313 1324 # than assigning directly to _files and _flags, so we can
1314 1325 # assign a path in a subdirectory, and to mark dirty (compared
1315 1326 # to nullid).
1316 1327 self[f] = n
1317 1328 if fl:
1318 1329 self.setflag(f, fl)
1319 1330 else:
1320 1331 # Assigning to _files and _flags avoids marking as dirty,
1321 1332 # and should be a little faster.
1322 1333 self._files[f] = n
1323 1334 if fl:
1324 1335 self._flags[f] = fl
1325 1336
1326 1337 def text(self):
1327 1338 """Get the full data of this manifest as a bytestring."""
1328 1339 self._load()
1329 1340 return _text(self.iterentries())
1330 1341
1331 1342 def dirtext(self):
1332 1343 """Get the full data of this directory as a bytestring. Make sure that
1333 1344 any submanifests have been written first, so their nodeids are correct.
1334 1345 """
1335 1346 self._load()
1336 1347 flags = self.flags
1337 1348 lazydirs = [
1338 1349 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1339 1350 ]
1340 1351 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1341 1352 files = [(f, self._files[f], flags(f)) for f in self._files]
1342 1353 return _text(sorted(dirs + files + lazydirs))
1343 1354
1344 1355 def read(self, gettext, readsubtree):
1345 1356 def _load_for_read(s):
1346 1357 s.parse(gettext(), readsubtree)
1347 1358 s._dirty = False
1348 1359
1349 1360 self._loadfunc = _load_for_read
1350 1361
1351 1362 def writesubtrees(self, m1, m2, writesubtree, match):
1352 1363 self._load() # for consistency; should never have any effect here
1353 1364 m1._load()
1354 1365 m2._load()
1355 1366 emptytree = treemanifest()
1356 1367
1357 1368 def getnode(m, d):
1358 1369 ld = m._lazydirs.get(d)
1359 1370 if ld:
1360 1371 return ld[1]
1361 1372 return m._dirs.get(d, emptytree)._node
1362 1373
1363 1374 # let's skip investigating things that `match` says we do not need.
1364 1375 visit = match.visitchildrenset(self._dir[:-1])
1365 1376 visit = self._loadchildrensetlazy(visit)
1366 1377 if visit == b'this' or visit == b'all':
1367 1378 visit = None
1368 1379 for d, subm in pycompat.iteritems(self._dirs):
1369 1380 if visit and d[:-1] not in visit:
1370 1381 continue
1371 1382 subp1 = getnode(m1, d)
1372 1383 subp2 = getnode(m2, d)
1373 1384 if subp1 == nullid:
1374 1385 subp1, subp2 = subp2, subp1
1375 1386 writesubtree(subm, subp1, subp2, match)
1376 1387
1377 1388 def walksubtrees(self, matcher=None):
1378 1389 """Returns an iterator of the subtrees of this manifest, including this
1379 1390 manifest itself.
1380 1391
1381 1392 If `matcher` is provided, it only returns subtrees that match.
1382 1393 """
1383 1394 if matcher and not matcher.visitdir(self._dir[:-1]):
1384 1395 return
1385 1396 if not matcher or matcher(self._dir[:-1]):
1386 1397 yield self
1387 1398
1388 1399 self._load()
1389 1400 # OPT: use visitchildrenset to avoid loading everything.
1390 1401 self._loadalllazy()
1391 1402 for d, subm in pycompat.iteritems(self._dirs):
1392 1403 for subtree in subm.walksubtrees(matcher=matcher):
1393 1404 yield subtree
1394 1405
1395 1406
1396 1407 class manifestfulltextcache(util.lrucachedict):
1397 1408 """File-backed LRU cache for the manifest cache
1398 1409
1399 1410 File consists of entries, up to EOF:
1400 1411
1401 1412 - 20 bytes node, 4 bytes length, <length> manifest data
1402 1413
1403 1414 These are written in reverse cache order (oldest to newest).
1404 1415
1405 1416 """
1406 1417
1407 1418 _file = b'manifestfulltextcache'
1408 1419
1409 1420 def __init__(self, max):
1410 1421 super(manifestfulltextcache, self).__init__(max)
1411 1422 self._dirty = False
1412 1423 self._read = False
1413 1424 self._opener = None
1414 1425
1415 1426 def read(self):
1416 1427 if self._read or self._opener is None:
1417 1428 return
1418 1429
1419 1430 try:
1420 1431 with self._opener(self._file) as fp:
1421 1432 set = super(manifestfulltextcache, self).__setitem__
1422 1433 # ignore trailing data, this is a cache, corruption is skipped
1423 1434 while True:
1424 1435 node = fp.read(20)
1425 1436 if len(node) < 20:
1426 1437 break
1427 1438 try:
1428 1439 size = struct.unpack(b'>L', fp.read(4))[0]
1429 1440 except struct.error:
1430 1441 break
1431 1442 value = bytearray(fp.read(size))
1432 1443 if len(value) != size:
1433 1444 break
1434 1445 set(node, value)
1435 1446 except IOError:
1436 1447 # the file is allowed to be missing
1437 1448 pass
1438 1449
1439 1450 self._read = True
1440 1451 self._dirty = False
1441 1452
1442 1453 def write(self):
1443 1454 if not self._dirty or self._opener is None:
1444 1455 return
1445 1456 # rotate backwards to the first used node
1446 1457 with self._opener(
1447 1458 self._file, b'w', atomictemp=True, checkambig=True
1448 1459 ) as fp:
1449 1460 node = self._head.prev
1450 1461 while True:
1451 1462 if node.key in self._cache:
1452 1463 fp.write(node.key)
1453 1464 fp.write(struct.pack(b'>L', len(node.value)))
1454 1465 fp.write(node.value)
1455 1466 if node is self._head:
1456 1467 break
1457 1468 node = node.prev
1458 1469
1459 1470 def __len__(self):
1460 1471 if not self._read:
1461 1472 self.read()
1462 1473 return super(manifestfulltextcache, self).__len__()
1463 1474
1464 1475 def __contains__(self, k):
1465 1476 if not self._read:
1466 1477 self.read()
1467 1478 return super(manifestfulltextcache, self).__contains__(k)
1468 1479
1469 1480 def __iter__(self):
1470 1481 if not self._read:
1471 1482 self.read()
1472 1483 return super(manifestfulltextcache, self).__iter__()
1473 1484
1474 1485 def __getitem__(self, k):
1475 1486 if not self._read:
1476 1487 self.read()
1477 1488 # the cache lru order can change on read
1478 1489 setdirty = self._cache.get(k) is not self._head
1479 1490 value = super(manifestfulltextcache, self).__getitem__(k)
1480 1491 if setdirty:
1481 1492 self._dirty = True
1482 1493 return value
1483 1494
1484 1495 def __setitem__(self, k, v):
1485 1496 if not self._read:
1486 1497 self.read()
1487 1498 super(manifestfulltextcache, self).__setitem__(k, v)
1488 1499 self._dirty = True
1489 1500
1490 1501 def __delitem__(self, k):
1491 1502 if not self._read:
1492 1503 self.read()
1493 1504 super(manifestfulltextcache, self).__delitem__(k)
1494 1505 self._dirty = True
1495 1506
1496 1507 def get(self, k, default=None):
1497 1508 if not self._read:
1498 1509 self.read()
1499 1510 return super(manifestfulltextcache, self).get(k, default=default)
1500 1511
1501 1512 def clear(self, clear_persisted_data=False):
1502 1513 super(manifestfulltextcache, self).clear()
1503 1514 if clear_persisted_data:
1504 1515 self._dirty = True
1505 1516 self.write()
1506 1517 self._read = False
1507 1518
1508 1519
1509 1520 # and upper bound of what we expect from compression
1510 1521 # (real live value seems to be "3")
1511 1522 MAXCOMPRESSION = 3
1512 1523
1513 1524
1514 1525 class FastdeltaUnavailable(Exception):
1515 1526 """Exception raised when fastdelta isn't usable on a manifest."""
1516 1527
1517 1528
1518 1529 @interfaceutil.implementer(repository.imanifeststorage)
1519 1530 class manifestrevlog(object):
1520 1531 '''A revlog that stores manifest texts. This is responsible for caching the
1521 1532 full-text manifest contents.
1522 1533 '''
1523 1534
1524 1535 def __init__(
1525 1536 self,
1526 1537 opener,
1527 1538 tree=b'',
1528 1539 dirlogcache=None,
1529 1540 indexfile=None,
1530 1541 treemanifest=False,
1531 1542 ):
1532 1543 """Constructs a new manifest revlog
1533 1544
1534 1545 `indexfile` - used by extensions to have two manifests at once, like
1535 1546 when transitioning between flatmanifeset and treemanifests.
1536 1547
1537 1548 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1538 1549 options can also be used to make this a tree manifest revlog. The opener
1539 1550 option takes precedence, so if it is set to True, we ignore whatever
1540 1551 value is passed in to the constructor.
1541 1552 """
1542 1553 # During normal operations, we expect to deal with not more than four
1543 1554 # revs at a time (such as during commit --amend). When rebasing large
1544 1555 # stacks of commits, the number can go up, hence the config knob below.
1545 1556 cachesize = 4
1546 1557 optiontreemanifest = False
1547 1558 opts = getattr(opener, 'options', None)
1548 1559 if opts is not None:
1549 1560 cachesize = opts.get(b'manifestcachesize', cachesize)
1550 1561 optiontreemanifest = opts.get(b'treemanifest', False)
1551 1562
1552 1563 self._treeondisk = optiontreemanifest or treemanifest
1553 1564
1554 1565 self._fulltextcache = manifestfulltextcache(cachesize)
1555 1566
1556 1567 if tree:
1557 1568 assert self._treeondisk, b'opts is %r' % opts
1558 1569
1559 1570 if indexfile is None:
1560 1571 indexfile = b'00manifest.i'
1561 1572 if tree:
1562 1573 indexfile = b"meta/" + tree + indexfile
1563 1574
1564 1575 self.tree = tree
1565 1576
1566 1577 # The dirlogcache is kept on the root manifest log
1567 1578 if tree:
1568 1579 self._dirlogcache = dirlogcache
1569 1580 else:
1570 1581 self._dirlogcache = {b'': self}
1571 1582
1572 1583 self._revlog = revlog.revlog(
1573 1584 opener,
1574 1585 indexfile,
1575 1586 # only root indexfile is cached
1576 1587 checkambig=not bool(tree),
1577 1588 mmaplargeindex=True,
1578 1589 upperboundcomp=MAXCOMPRESSION,
1579 1590 )
1580 1591
1581 1592 self.index = self._revlog.index
1582 1593 self.version = self._revlog.version
1583 1594 self._generaldelta = self._revlog._generaldelta
1584 1595
1585 1596 def _setupmanifestcachehooks(self, repo):
1586 1597 """Persist the manifestfulltextcache on lock release"""
1587 1598 if not util.safehasattr(repo, b'_wlockref'):
1588 1599 return
1589 1600
1590 1601 self._fulltextcache._opener = repo.wcachevfs
1591 1602 if repo._currentlock(repo._wlockref) is None:
1592 1603 return
1593 1604
1594 1605 reporef = weakref.ref(repo)
1595 1606 manifestrevlogref = weakref.ref(self)
1596 1607
1597 1608 def persistmanifestcache(success):
1598 1609 # Repo is in an unknown state, do not persist.
1599 1610 if not success:
1600 1611 return
1601 1612
1602 1613 repo = reporef()
1603 1614 self = manifestrevlogref()
1604 1615 if repo is None or self is None:
1605 1616 return
1606 1617 if repo.manifestlog.getstorage(b'') is not self:
1607 1618 # there's a different manifest in play now, abort
1608 1619 return
1609 1620 self._fulltextcache.write()
1610 1621
1611 1622 repo._afterlock(persistmanifestcache)
1612 1623
1613 1624 @property
1614 1625 def fulltextcache(self):
1615 1626 return self._fulltextcache
1616 1627
1617 1628 def clearcaches(self, clear_persisted_data=False):
1618 1629 self._revlog.clearcaches()
1619 1630 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1620 1631 self._dirlogcache = {self.tree: self}
1621 1632
1622 1633 def dirlog(self, d):
1623 1634 if d:
1624 1635 assert self._treeondisk
1625 1636 if d not in self._dirlogcache:
1626 1637 mfrevlog = manifestrevlog(
1627 1638 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1628 1639 )
1629 1640 self._dirlogcache[d] = mfrevlog
1630 1641 return self._dirlogcache[d]
1631 1642
1632 1643 def add(
1633 1644 self,
1634 1645 m,
1635 1646 transaction,
1636 1647 link,
1637 1648 p1,
1638 1649 p2,
1639 1650 added,
1640 1651 removed,
1641 1652 readtree=None,
1642 1653 match=None,
1643 1654 ):
1644 1655 try:
1645 1656 if p1 not in self.fulltextcache:
1646 1657 raise FastdeltaUnavailable()
1647 1658 # If our first parent is in the manifest cache, we can
1648 1659 # compute a delta here using properties we know about the
1649 1660 # manifest up-front, which may save time later for the
1650 1661 # revlog layer.
1651 1662
1652 1663 _checkforbidden(added)
1653 1664 # combine the changed lists into one sorted iterator
1654 1665 work = heapq.merge(
1655 1666 [(x, False) for x in sorted(added)],
1656 1667 [(x, True) for x in sorted(removed)],
1657 1668 )
1658 1669
1659 1670 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1660 1671 cachedelta = self._revlog.rev(p1), deltatext
1661 1672 text = util.buffer(arraytext)
1662 1673 n = self._revlog.addrevision(
1663 1674 text, transaction, link, p1, p2, cachedelta
1664 1675 )
1665 1676 except FastdeltaUnavailable:
1666 1677 # The first parent manifest isn't already loaded or the
1667 1678 # manifest implementation doesn't support fastdelta, so
1668 1679 # we'll just encode a fulltext of the manifest and pass
1669 1680 # that through to the revlog layer, and let it handle the
1670 1681 # delta process.
1671 1682 if self._treeondisk:
1672 1683 assert readtree, b"readtree must be set for treemanifest writes"
1673 1684 assert match, b"match must be specified for treemanifest writes"
1674 1685 m1 = readtree(self.tree, p1)
1675 1686 m2 = readtree(self.tree, p2)
1676 1687 n = self._addtree(
1677 1688 m, transaction, link, m1, m2, readtree, match=match
1678 1689 )
1679 1690 arraytext = None
1680 1691 else:
1681 1692 text = m.text()
1682 1693 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1683 1694 arraytext = bytearray(text)
1684 1695
1685 1696 if arraytext is not None:
1686 1697 self.fulltextcache[n] = arraytext
1687 1698
1688 1699 return n
1689 1700
1690 1701 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1691 1702 # If the manifest is unchanged compared to one parent,
1692 1703 # don't write a new revision
1693 1704 if self.tree != b'' and (
1694 1705 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1695 1706 ):
1696 1707 return m.node()
1697 1708
1698 1709 def writesubtree(subm, subp1, subp2, match):
1699 1710 sublog = self.dirlog(subm.dir())
1700 1711 sublog.add(
1701 1712 subm,
1702 1713 transaction,
1703 1714 link,
1704 1715 subp1,
1705 1716 subp2,
1706 1717 None,
1707 1718 None,
1708 1719 readtree=readtree,
1709 1720 match=match,
1710 1721 )
1711 1722
1712 1723 m.writesubtrees(m1, m2, writesubtree, match)
1713 1724 text = m.dirtext()
1714 1725 n = None
1715 1726 if self.tree != b'':
1716 1727 # Double-check whether contents are unchanged to one parent
1717 1728 if text == m1.dirtext():
1718 1729 n = m1.node()
1719 1730 elif text == m2.dirtext():
1720 1731 n = m2.node()
1721 1732
1722 1733 if not n:
1723 1734 n = self._revlog.addrevision(
1724 1735 text, transaction, link, m1.node(), m2.node()
1725 1736 )
1726 1737
1727 1738 # Save nodeid so parent manifest can calculate its nodeid
1728 1739 m.setnode(n)
1729 1740 return n
1730 1741
1731 1742 def __len__(self):
1732 1743 return len(self._revlog)
1733 1744
1734 1745 def __iter__(self):
1735 1746 return self._revlog.__iter__()
1736 1747
1737 1748 def rev(self, node):
1738 1749 return self._revlog.rev(node)
1739 1750
1740 1751 def node(self, rev):
1741 1752 return self._revlog.node(rev)
1742 1753
1743 1754 def lookup(self, value):
1744 1755 return self._revlog.lookup(value)
1745 1756
1746 1757 def parentrevs(self, rev):
1747 1758 return self._revlog.parentrevs(rev)
1748 1759
1749 1760 def parents(self, node):
1750 1761 return self._revlog.parents(node)
1751 1762
1752 1763 def linkrev(self, rev):
1753 1764 return self._revlog.linkrev(rev)
1754 1765
1755 1766 def checksize(self):
1756 1767 return self._revlog.checksize()
1757 1768
1758 1769 def revision(self, node, _df=None, raw=False):
1759 1770 return self._revlog.revision(node, _df=_df, raw=raw)
1760 1771
1761 1772 def rawdata(self, node, _df=None):
1762 1773 return self._revlog.rawdata(node, _df=_df)
1763 1774
1764 1775 def revdiff(self, rev1, rev2):
1765 1776 return self._revlog.revdiff(rev1, rev2)
1766 1777
1767 1778 def cmp(self, node, text):
1768 1779 return self._revlog.cmp(node, text)
1769 1780
1770 1781 def deltaparent(self, rev):
1771 1782 return self._revlog.deltaparent(rev)
1772 1783
1773 1784 def emitrevisions(
1774 1785 self,
1775 1786 nodes,
1776 1787 nodesorder=None,
1777 1788 revisiondata=False,
1778 1789 assumehaveparentrevisions=False,
1779 1790 deltamode=repository.CG_DELTAMODE_STD,
1780 1791 ):
1781 1792 return self._revlog.emitrevisions(
1782 1793 nodes,
1783 1794 nodesorder=nodesorder,
1784 1795 revisiondata=revisiondata,
1785 1796 assumehaveparentrevisions=assumehaveparentrevisions,
1786 1797 deltamode=deltamode,
1787 1798 )
1788 1799
1789 1800 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1790 1801 return self._revlog.addgroup(
1791 1802 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1792 1803 )
1793 1804
1794 1805 def rawsize(self, rev):
1795 1806 return self._revlog.rawsize(rev)
1796 1807
1797 1808 def getstrippoint(self, minlink):
1798 1809 return self._revlog.getstrippoint(minlink)
1799 1810
1800 1811 def strip(self, minlink, transaction):
1801 1812 return self._revlog.strip(minlink, transaction)
1802 1813
1803 1814 def files(self):
1804 1815 return self._revlog.files()
1805 1816
1806 1817 def clone(self, tr, destrevlog, **kwargs):
1807 1818 if not isinstance(destrevlog, manifestrevlog):
1808 1819 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1809 1820
1810 1821 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1811 1822
1812 1823 def storageinfo(
1813 1824 self,
1814 1825 exclusivefiles=False,
1815 1826 sharedfiles=False,
1816 1827 revisionscount=False,
1817 1828 trackedsize=False,
1818 1829 storedsize=False,
1819 1830 ):
1820 1831 return self._revlog.storageinfo(
1821 1832 exclusivefiles=exclusivefiles,
1822 1833 sharedfiles=sharedfiles,
1823 1834 revisionscount=revisionscount,
1824 1835 trackedsize=trackedsize,
1825 1836 storedsize=storedsize,
1826 1837 )
1827 1838
1828 1839 @property
1829 1840 def indexfile(self):
1830 1841 return self._revlog.indexfile
1831 1842
1832 1843 @indexfile.setter
1833 1844 def indexfile(self, value):
1834 1845 self._revlog.indexfile = value
1835 1846
1836 1847 @property
1837 1848 def opener(self):
1838 1849 return self._revlog.opener
1839 1850
1840 1851 @opener.setter
1841 1852 def opener(self, value):
1842 1853 self._revlog.opener = value
1843 1854
1844 1855
1845 1856 @interfaceutil.implementer(repository.imanifestlog)
1846 1857 class manifestlog(object):
1847 1858 """A collection class representing the collection of manifest snapshots
1848 1859 referenced by commits in the repository.
1849 1860
1850 1861 In this situation, 'manifest' refers to the abstract concept of a snapshot
1851 1862 of the list of files in the given commit. Consumers of the output of this
1852 1863 class do not care about the implementation details of the actual manifests
1853 1864 they receive (i.e. tree or flat or lazily loaded, etc)."""
1854 1865
1855 1866 def __init__(self, opener, repo, rootstore, narrowmatch):
1856 1867 usetreemanifest = False
1857 1868 cachesize = 4
1858 1869
1859 1870 opts = getattr(opener, 'options', None)
1860 1871 if opts is not None:
1861 1872 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1862 1873 cachesize = opts.get(b'manifestcachesize', cachesize)
1863 1874
1864 1875 self._treemanifests = usetreemanifest
1865 1876
1866 1877 self._rootstore = rootstore
1867 1878 self._rootstore._setupmanifestcachehooks(repo)
1868 1879 self._narrowmatch = narrowmatch
1869 1880
1870 1881 # A cache of the manifestctx or treemanifestctx for each directory
1871 1882 self._dirmancache = {}
1872 1883 self._dirmancache[b''] = util.lrucachedict(cachesize)
1873 1884
1874 1885 self._cachesize = cachesize
1875 1886
1876 1887 def __getitem__(self, node):
1877 1888 """Retrieves the manifest instance for the given node. Throws a
1878 1889 LookupError if not found.
1879 1890 """
1880 1891 return self.get(b'', node)
1881 1892
1882 1893 def get(self, tree, node, verify=True):
1883 1894 """Retrieves the manifest instance for the given node. Throws a
1884 1895 LookupError if not found.
1885 1896
1886 1897 `verify` - if True an exception will be thrown if the node is not in
1887 1898 the revlog
1888 1899 """
1889 1900 if node in self._dirmancache.get(tree, ()):
1890 1901 return self._dirmancache[tree][node]
1891 1902
1892 1903 if not self._narrowmatch.always():
1893 1904 if not self._narrowmatch.visitdir(tree[:-1]):
1894 1905 return excludeddirmanifestctx(tree, node)
1895 1906 if tree:
1896 1907 if self._rootstore._treeondisk:
1897 1908 if verify:
1898 1909 # Side-effect is LookupError is raised if node doesn't
1899 1910 # exist.
1900 1911 self.getstorage(tree).rev(node)
1901 1912
1902 1913 m = treemanifestctx(self, tree, node)
1903 1914 else:
1904 1915 raise error.Abort(
1905 1916 _(
1906 1917 b"cannot ask for manifest directory '%s' in a flat "
1907 1918 b"manifest"
1908 1919 )
1909 1920 % tree
1910 1921 )
1911 1922 else:
1912 1923 if verify:
1913 1924 # Side-effect is LookupError is raised if node doesn't exist.
1914 1925 self._rootstore.rev(node)
1915 1926
1916 1927 if self._treemanifests:
1917 1928 m = treemanifestctx(self, b'', node)
1918 1929 else:
1919 1930 m = manifestctx(self, node)
1920 1931
1921 1932 if node != nullid:
1922 1933 mancache = self._dirmancache.get(tree)
1923 1934 if not mancache:
1924 1935 mancache = util.lrucachedict(self._cachesize)
1925 1936 self._dirmancache[tree] = mancache
1926 1937 mancache[node] = m
1927 1938 return m
1928 1939
1929 1940 def getstorage(self, tree):
1930 1941 return self._rootstore.dirlog(tree)
1931 1942
1932 1943 def clearcaches(self, clear_persisted_data=False):
1933 1944 self._dirmancache.clear()
1934 1945 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1935 1946
1936 1947 def rev(self, node):
1937 1948 return self._rootstore.rev(node)
1938 1949
1939 1950
1940 1951 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1941 1952 class memmanifestctx(object):
1942 1953 def __init__(self, manifestlog):
1943 1954 self._manifestlog = manifestlog
1944 1955 self._manifestdict = manifestdict()
1945 1956
1946 1957 def _storage(self):
1947 1958 return self._manifestlog.getstorage(b'')
1948 1959
1949 1960 def copy(self):
1950 1961 memmf = memmanifestctx(self._manifestlog)
1951 1962 memmf._manifestdict = self.read().copy()
1952 1963 return memmf
1953 1964
1954 1965 def read(self):
1955 1966 return self._manifestdict
1956 1967
1957 1968 def write(self, transaction, link, p1, p2, added, removed, match=None):
1958 1969 return self._storage().add(
1959 1970 self._manifestdict,
1960 1971 transaction,
1961 1972 link,
1962 1973 p1,
1963 1974 p2,
1964 1975 added,
1965 1976 removed,
1966 1977 match=match,
1967 1978 )
1968 1979
1969 1980
1970 1981 @interfaceutil.implementer(repository.imanifestrevisionstored)
1971 1982 class manifestctx(object):
1972 1983 """A class representing a single revision of a manifest, including its
1973 1984 contents, its parent revs, and its linkrev.
1974 1985 """
1975 1986
1976 1987 def __init__(self, manifestlog, node):
1977 1988 self._manifestlog = manifestlog
1978 1989 self._data = None
1979 1990
1980 1991 self._node = node
1981 1992
1982 1993 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1983 1994 # but let's add it later when something needs it and we can load it
1984 1995 # lazily.
1985 1996 # self.p1, self.p2 = store.parents(node)
1986 1997 # rev = store.rev(node)
1987 1998 # self.linkrev = store.linkrev(rev)
1988 1999
1989 2000 def _storage(self):
1990 2001 return self._manifestlog.getstorage(b'')
1991 2002
1992 2003 def node(self):
1993 2004 return self._node
1994 2005
1995 2006 def copy(self):
1996 2007 memmf = memmanifestctx(self._manifestlog)
1997 2008 memmf._manifestdict = self.read().copy()
1998 2009 return memmf
1999 2010
2000 2011 @propertycache
2001 2012 def parents(self):
2002 2013 return self._storage().parents(self._node)
2003 2014
2004 2015 def read(self):
2005 2016 if self._data is None:
2006 2017 if self._node == nullid:
2007 2018 self._data = manifestdict()
2008 2019 else:
2009 2020 store = self._storage()
2010 2021 if self._node in store.fulltextcache:
2011 2022 text = pycompat.bytestr(store.fulltextcache[self._node])
2012 2023 else:
2013 2024 text = store.revision(self._node)
2014 2025 arraytext = bytearray(text)
2015 2026 store.fulltextcache[self._node] = arraytext
2016 2027 self._data = manifestdict(text)
2017 2028 return self._data
2018 2029
2019 2030 def readfast(self, shallow=False):
2020 2031 '''Calls either readdelta or read, based on which would be less work.
2021 2032 readdelta is called if the delta is against the p1, and therefore can be
2022 2033 read quickly.
2023 2034
2024 2035 If `shallow` is True, nothing changes since this is a flat manifest.
2025 2036 '''
2026 2037 store = self._storage()
2027 2038 r = store.rev(self._node)
2028 2039 deltaparent = store.deltaparent(r)
2029 2040 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2030 2041 return self.readdelta()
2031 2042 return self.read()
2032 2043
2033 2044 def readdelta(self, shallow=False):
2034 2045 '''Returns a manifest containing just the entries that are present
2035 2046 in this manifest, but not in its p1 manifest. This is efficient to read
2036 2047 if the revlog delta is already p1.
2037 2048
2038 2049 Changing the value of `shallow` has no effect on flat manifests.
2039 2050 '''
2040 2051 store = self._storage()
2041 2052 r = store.rev(self._node)
2042 2053 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2043 2054 return manifestdict(d)
2044 2055
2045 2056 def find(self, key):
2046 2057 return self.read().find(key)
2047 2058
2048 2059
2049 2060 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2050 2061 class memtreemanifestctx(object):
2051 2062 def __init__(self, manifestlog, dir=b''):
2052 2063 self._manifestlog = manifestlog
2053 2064 self._dir = dir
2054 2065 self._treemanifest = treemanifest()
2055 2066
2056 2067 def _storage(self):
2057 2068 return self._manifestlog.getstorage(b'')
2058 2069
2059 2070 def copy(self):
2060 2071 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2061 2072 memmf._treemanifest = self._treemanifest.copy()
2062 2073 return memmf
2063 2074
2064 2075 def read(self):
2065 2076 return self._treemanifest
2066 2077
2067 2078 def write(self, transaction, link, p1, p2, added, removed, match=None):
2068 2079 def readtree(dir, node):
2069 2080 return self._manifestlog.get(dir, node).read()
2070 2081
2071 2082 return self._storage().add(
2072 2083 self._treemanifest,
2073 2084 transaction,
2074 2085 link,
2075 2086 p1,
2076 2087 p2,
2077 2088 added,
2078 2089 removed,
2079 2090 readtree=readtree,
2080 2091 match=match,
2081 2092 )
2082 2093
2083 2094
2084 2095 @interfaceutil.implementer(repository.imanifestrevisionstored)
2085 2096 class treemanifestctx(object):
2086 2097 def __init__(self, manifestlog, dir, node):
2087 2098 self._manifestlog = manifestlog
2088 2099 self._dir = dir
2089 2100 self._data = None
2090 2101
2091 2102 self._node = node
2092 2103
2093 2104 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2094 2105 # we can instantiate treemanifestctx objects for directories we don't
2095 2106 # have on disk.
2096 2107 # self.p1, self.p2 = store.parents(node)
2097 2108 # rev = store.rev(node)
2098 2109 # self.linkrev = store.linkrev(rev)
2099 2110
2100 2111 def _storage(self):
2101 2112 narrowmatch = self._manifestlog._narrowmatch
2102 2113 if not narrowmatch.always():
2103 2114 if not narrowmatch.visitdir(self._dir[:-1]):
2104 2115 return excludedmanifestrevlog(self._dir)
2105 2116 return self._manifestlog.getstorage(self._dir)
2106 2117
2107 2118 def read(self):
2108 2119 if self._data is None:
2109 2120 store = self._storage()
2110 2121 if self._node == nullid:
2111 2122 self._data = treemanifest()
2112 2123 # TODO accessing non-public API
2113 2124 elif store._treeondisk:
2114 2125 m = treemanifest(dir=self._dir)
2115 2126
2116 2127 def gettext():
2117 2128 return store.revision(self._node)
2118 2129
2119 2130 def readsubtree(dir, subm):
2120 2131 # Set verify to False since we need to be able to create
2121 2132 # subtrees for trees that don't exist on disk.
2122 2133 return self._manifestlog.get(dir, subm, verify=False).read()
2123 2134
2124 2135 m.read(gettext, readsubtree)
2125 2136 m.setnode(self._node)
2126 2137 self._data = m
2127 2138 else:
2128 2139 if self._node in store.fulltextcache:
2129 2140 text = pycompat.bytestr(store.fulltextcache[self._node])
2130 2141 else:
2131 2142 text = store.revision(self._node)
2132 2143 arraytext = bytearray(text)
2133 2144 store.fulltextcache[self._node] = arraytext
2134 2145 self._data = treemanifest(dir=self._dir, text=text)
2135 2146
2136 2147 return self._data
2137 2148
2138 2149 def node(self):
2139 2150 return self._node
2140 2151
2141 2152 def copy(self):
2142 2153 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2143 2154 memmf._treemanifest = self.read().copy()
2144 2155 return memmf
2145 2156
2146 2157 @propertycache
2147 2158 def parents(self):
2148 2159 return self._storage().parents(self._node)
2149 2160
2150 2161 def readdelta(self, shallow=False):
2151 2162 '''Returns a manifest containing just the entries that are present
2152 2163 in this manifest, but not in its p1 manifest. This is efficient to read
2153 2164 if the revlog delta is already p1.
2154 2165
2155 2166 If `shallow` is True, this will read the delta for this directory,
2156 2167 without recursively reading subdirectory manifests. Instead, any
2157 2168 subdirectory entry will be reported as it appears in the manifest, i.e.
2158 2169 the subdirectory will be reported among files and distinguished only by
2159 2170 its 't' flag.
2160 2171 '''
2161 2172 store = self._storage()
2162 2173 if shallow:
2163 2174 r = store.rev(self._node)
2164 2175 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2165 2176 return manifestdict(d)
2166 2177 else:
2167 2178 # Need to perform a slow delta
2168 2179 r0 = store.deltaparent(store.rev(self._node))
2169 2180 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2170 2181 m1 = self.read()
2171 2182 md = treemanifest(dir=self._dir)
2172 2183 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2173 2184 if n1:
2174 2185 md[f] = n1
2175 2186 if fl1:
2176 2187 md.setflag(f, fl1)
2177 2188 return md
2178 2189
2179 2190 def readfast(self, shallow=False):
2180 2191 '''Calls either readdelta or read, based on which would be less work.
2181 2192 readdelta is called if the delta is against the p1, and therefore can be
2182 2193 read quickly.
2183 2194
2184 2195 If `shallow` is True, it only returns the entries from this manifest,
2185 2196 and not any submanifests.
2186 2197 '''
2187 2198 store = self._storage()
2188 2199 r = store.rev(self._node)
2189 2200 deltaparent = store.deltaparent(r)
2190 2201 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2191 2202 return self.readdelta(shallow=shallow)
2192 2203
2193 2204 if shallow:
2194 2205 return manifestdict(store.revision(self._node))
2195 2206 else:
2196 2207 return self.read()
2197 2208
2198 2209 def find(self, key):
2199 2210 return self.read().find(key)
2200 2211
2201 2212
2202 2213 class excludeddir(treemanifest):
2203 2214 """Stand-in for a directory that is excluded from the repository.
2204 2215
2205 2216 With narrowing active on a repository that uses treemanifests,
2206 2217 some of the directory revlogs will be excluded from the resulting
2207 2218 clone. This is a huge storage win for clients, but means we need
2208 2219 some sort of pseudo-manifest to surface to internals so we can
2209 2220 detect a merge conflict outside the narrowspec. That's what this
2210 2221 class is: it stands in for a directory whose node is known, but
2211 2222 whose contents are unknown.
2212 2223 """
2213 2224
2214 2225 def __init__(self, dir, node):
2215 2226 super(excludeddir, self).__init__(dir)
2216 2227 self._node = node
2217 2228 # Add an empty file, which will be included by iterators and such,
2218 2229 # appearing as the directory itself (i.e. something like "dir/")
2219 2230 self._files[b''] = node
2220 2231 self._flags[b''] = b't'
2221 2232
2222 2233 # Manifests outside the narrowspec should never be modified, so avoid
2223 2234 # copying. This makes a noticeable difference when there are very many
2224 2235 # directories outside the narrowspec. Also, it makes sense for the copy to
2225 2236 # be of the same type as the original, which would not happen with the
2226 2237 # super type's copy().
2227 2238 def copy(self):
2228 2239 return self
2229 2240
2230 2241
2231 2242 class excludeddirmanifestctx(treemanifestctx):
2232 2243 """context wrapper for excludeddir - see that docstring for rationale"""
2233 2244
2234 2245 def __init__(self, dir, node):
2235 2246 self._dir = dir
2236 2247 self._node = node
2237 2248
2238 2249 def read(self):
2239 2250 return excludeddir(self._dir, self._node)
2240 2251
2241 2252 def write(self, *args):
2242 2253 raise error.ProgrammingError(
2243 2254 b'attempt to write manifest from excluded dir %s' % self._dir
2244 2255 )
2245 2256
2246 2257
2247 2258 class excludedmanifestrevlog(manifestrevlog):
2248 2259 """Stand-in for excluded treemanifest revlogs.
2249 2260
2250 2261 When narrowing is active on a treemanifest repository, we'll have
2251 2262 references to directories we can't see due to the revlog being
2252 2263 skipped. This class exists to conform to the manifestrevlog
2253 2264 interface for those directories and proactively prevent writes to
2254 2265 outside the narrowspec.
2255 2266 """
2256 2267
2257 2268 def __init__(self, dir):
2258 2269 self._dir = dir
2259 2270
2260 2271 def __len__(self):
2261 2272 raise error.ProgrammingError(
2262 2273 b'attempt to get length of excluded dir %s' % self._dir
2263 2274 )
2264 2275
2265 2276 def rev(self, node):
2266 2277 raise error.ProgrammingError(
2267 2278 b'attempt to get rev from excluded dir %s' % self._dir
2268 2279 )
2269 2280
2270 2281 def linkrev(self, node):
2271 2282 raise error.ProgrammingError(
2272 2283 b'attempt to get linkrev from excluded dir %s' % self._dir
2273 2284 )
2274 2285
2275 2286 def node(self, rev):
2276 2287 raise error.ProgrammingError(
2277 2288 b'attempt to get node from excluded dir %s' % self._dir
2278 2289 )
2279 2290
2280 2291 def add(self, *args, **kwargs):
2281 2292 # We should never write entries in dirlogs outside the narrow clone.
2282 2293 # However, the method still gets called from writesubtree() in
2283 2294 # _addtree(), so we need to handle it. We should possibly make that
2284 2295 # avoid calling add() with a clean manifest (_dirty is always False
2285 2296 # in excludeddir instances).
2286 2297 pass
General Comments 0
You need to be logged in to leave comments. Login now