##// END OF EJS Templates
nodemap: only use persistent nodemap for non-inlined revlog...
marmoute -
r44791:daad3aac default
parent child Browse files
Show More
@@ -1,2995 +1,2996 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False, {}
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text, sidedata):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def getoffset(q):
139 139 return int(q >> 16)
140 140
141 141
142 142 def gettype(q):
143 143 return int(q & 0xFFFF)
144 144
145 145
146 146 def offset_type(offset, type):
147 147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 148 raise ValueError(b'unknown revlog index flags')
149 149 return int(int(offset) << 16 | type)
150 150
151 151
152 152 def _verify_revision(rl, skipflags, state, node):
153 153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 154 point for extensions to influence the operation."""
155 155 if skipflags:
156 156 state[b'skipread'].add(node)
157 157 else:
158 158 # Side-effect: read content and verify hash.
159 159 rl.revision(node)
160 160
161 161
162 162 @attr.s(slots=True, frozen=True)
163 163 class _revisioninfo(object):
164 164 """Information about a revision that allows building its fulltext
165 165 node: expected hash of the revision
166 166 p1, p2: parent revs of the revision
167 167 btext: built text cache consisting of a one-element list
168 168 cachedelta: (baserev, uncompressed_delta) or None
169 169 flags: flags associated to the revision storage
170 170
171 171 One of btext[0] or cachedelta must be set.
172 172 """
173 173
174 174 node = attr.ib()
175 175 p1 = attr.ib()
176 176 p2 = attr.ib()
177 177 btext = attr.ib()
178 178 textlen = attr.ib()
179 179 cachedelta = attr.ib()
180 180 flags = attr.ib()
181 181
182 182
183 183 @interfaceutil.implementer(repository.irevisiondelta)
184 184 @attr.s(slots=True)
185 185 class revlogrevisiondelta(object):
186 186 node = attr.ib()
187 187 p1node = attr.ib()
188 188 p2node = attr.ib()
189 189 basenode = attr.ib()
190 190 flags = attr.ib()
191 191 baserevisionsize = attr.ib()
192 192 revision = attr.ib()
193 193 delta = attr.ib()
194 194 linknode = attr.ib(default=None)
195 195
196 196
197 197 @interfaceutil.implementer(repository.iverifyproblem)
198 198 @attr.s(frozen=True)
199 199 class revlogproblem(object):
200 200 warning = attr.ib(default=None)
201 201 error = attr.ib(default=None)
202 202 node = attr.ib(default=None)
203 203
204 204
205 205 # index v0:
206 206 # 4 bytes: offset
207 207 # 4 bytes: compressed length
208 208 # 4 bytes: base rev
209 209 # 4 bytes: link rev
210 210 # 20 bytes: parent 1 nodeid
211 211 # 20 bytes: parent 2 nodeid
212 212 # 20 bytes: nodeid
213 213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 214 indexformatv0_pack = indexformatv0.pack
215 215 indexformatv0_unpack = indexformatv0.unpack
216 216
217 217
218 218 class revlogoldindex(list):
219 219 @property
220 220 def nodemap(self):
221 221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 223 return self._nodemap
224 224
225 225 @util.propertycache
226 226 def _nodemap(self):
227 227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 228 for r in range(0, len(self)):
229 229 n = self[r][7]
230 230 nodemap[n] = r
231 231 return nodemap
232 232
233 233 def has_node(self, node):
234 234 """return True if the node exist in the index"""
235 235 return node in self._nodemap
236 236
237 237 def rev(self, node):
238 238 """return a revision for a node
239 239
240 240 If the node is unknown, raise a RevlogError"""
241 241 return self._nodemap[node]
242 242
243 243 def get_rev(self, node):
244 244 """return a revision for a node
245 245
246 246 If the node is unknown, return None"""
247 247 return self._nodemap.get(node)
248 248
249 249 def append(self, tup):
250 250 self._nodemap[tup[7]] = len(self)
251 251 super(revlogoldindex, self).append(tup)
252 252
253 253 def __delitem__(self, i):
254 254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 256 for r in pycompat.xrange(i.start, len(self)):
257 257 del self._nodemap[self[r][7]]
258 258 super(revlogoldindex, self).__delitem__(i)
259 259
260 260 def clearcaches(self):
261 261 self.__dict__.pop('_nodemap', None)
262 262
263 263 def __getitem__(self, i):
264 264 if i == -1:
265 265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 266 return list.__getitem__(self, i)
267 267
268 268
269 269 class revlogoldio(object):
270 270 def __init__(self):
271 271 self.size = indexformatv0.size
272 272
273 273 def parseindex(self, data, inline):
274 274 s = self.size
275 275 index = []
276 276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 277 n = off = 0
278 278 l = len(data)
279 279 while off + s <= l:
280 280 cur = data[off : off + s]
281 281 off += s
282 282 e = indexformatv0_unpack(cur)
283 283 # transform to revlogv1 format
284 284 e2 = (
285 285 offset_type(e[0], 0),
286 286 e[1],
287 287 -1,
288 288 e[2],
289 289 e[3],
290 290 nodemap.get(e[4], nullrev),
291 291 nodemap.get(e[5], nullrev),
292 292 e[6],
293 293 )
294 294 index.append(e2)
295 295 nodemap[e[6]] = n
296 296 n += 1
297 297
298 298 index = revlogoldindex(index)
299 299 return index, None
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 if gettype(entry[0]):
303 303 raise error.RevlogError(
304 304 _(b'index entry flags need revlog version 1')
305 305 )
306 306 e2 = (
307 307 getoffset(entry[0]),
308 308 entry[1],
309 309 entry[3],
310 310 entry[4],
311 311 node(entry[5]),
312 312 node(entry[6]),
313 313 entry[7],
314 314 )
315 315 return indexformatv0_pack(*e2)
316 316
317 317
318 318 # index ng:
319 319 # 6 bytes: offset
320 320 # 2 bytes: flags
321 321 # 4 bytes: compressed length
322 322 # 4 bytes: uncompressed length
323 323 # 4 bytes: base rev
324 324 # 4 bytes: link rev
325 325 # 4 bytes: parent 1 rev
326 326 # 4 bytes: parent 2 rev
327 327 # 32 bytes: nodeid
328 328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 329 indexformatng_pack = indexformatng.pack
330 330 versionformat = struct.Struct(b">I")
331 331 versionformat_pack = versionformat.pack
332 332 versionformat_unpack = versionformat.unpack
333 333
334 334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 335 # signed integer)
336 336 _maxentrysize = 0x7FFFFFFF
337 337
338 338
339 339 class revlogio(object):
340 340 def __init__(self):
341 341 self.size = indexformatng.size
342 342
343 343 def parseindex(self, data, inline):
344 344 # call the C implementation to parse the index data
345 345 index, cache = parsers.parse_index2(data, inline)
346 346 return index, cache
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 p = indexformatng_pack(*entry)
350 350 if rev == 0:
351 351 p = versionformat_pack(version) + p[4:]
352 352 return p
353 353
354 354
355 355 class rustrevlogio(revlogio):
356 356 def parseindex(self, data, inline):
357 357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
358 358 return rustrevlog.MixedIndex(index), cache
359 359
360 360
361 361 class revlog(object):
362 362 """
363 363 the underlying revision storage object
364 364
365 365 A revlog consists of two parts, an index and the revision data.
366 366
367 367 The index is a file with a fixed record size containing
368 368 information on each revision, including its nodeid (hash), the
369 369 nodeids of its parents, the position and offset of its data within
370 370 the data file, and the revision it's based on. Finally, each entry
371 371 contains a linkrev entry that can serve as a pointer to external
372 372 data.
373 373
374 374 The revision data itself is a linear collection of data chunks.
375 375 Each chunk represents a revision and is usually represented as a
376 376 delta against the previous chunk. To bound lookup time, runs of
377 377 deltas are limited to about 2 times the length of the original
378 378 version data. This makes retrieval of a version proportional to
379 379 its size, or O(1) relative to the number of revisions.
380 380
381 381 Both pieces of the revlog are written to in an append-only
382 382 fashion, which means we never need to rewrite a file to insert or
383 383 remove data, and can use some simple techniques to avoid the need
384 384 for locking while reading.
385 385
386 386 If checkambig, indexfile is opened with checkambig=True at
387 387 writing, to avoid file stat ambiguity.
388 388
389 389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
390 390 index will be mmapped rather than read if it is larger than the
391 391 configured threshold.
392 392
393 393 If censorable is True, the revlog can have censored revisions.
394 394
395 395 If `upperboundcomp` is not None, this is the expected maximal gain from
396 396 compression for the data content.
397 397 """
398 398
399 399 _flagserrorclass = error.RevlogError
400 400
401 401 def __init__(
402 402 self,
403 403 opener,
404 404 indexfile,
405 405 datafile=None,
406 406 checkambig=False,
407 407 mmaplargeindex=False,
408 408 censorable=False,
409 409 upperboundcomp=None,
410 410 persistentnodemap=False,
411 411 ):
412 412 """
413 413 create a revlog object
414 414
415 415 opener is a function that abstracts the file opening operation
416 416 and can be used to implement COW semantics or the like.
417 417
418 418 """
419 419 self.upperboundcomp = upperboundcomp
420 420 self.indexfile = indexfile
421 421 self.datafile = datafile or (indexfile[:-2] + b".d")
422 422 self.nodemap_file = None
423 423 if persistentnodemap:
424 424 self.nodemap_file = indexfile[:-2] + b".n"
425 425
426 426 self.opener = opener
427 427 # When True, indexfile is opened with checkambig=True at writing, to
428 428 # avoid file stat ambiguity.
429 429 self._checkambig = checkambig
430 430 self._mmaplargeindex = mmaplargeindex
431 431 self._censorable = censorable
432 432 # 3-tuple of (node, rev, text) for a raw revision.
433 433 self._revisioncache = None
434 434 # Maps rev to chain base rev.
435 435 self._chainbasecache = util.lrucachedict(100)
436 436 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
437 437 self._chunkcache = (0, b'')
438 438 # How much data to read and cache into the raw revlog data cache.
439 439 self._chunkcachesize = 65536
440 440 self._maxchainlen = None
441 441 self._deltabothparents = True
442 442 self.index = None
443 443 # Mapping of partial identifiers to full nodes.
444 444 self._pcache = {}
445 445 # Mapping of revision integer to full node.
446 446 self._compengine = b'zlib'
447 447 self._compengineopts = {}
448 448 self._maxdeltachainspan = -1
449 449 self._withsparseread = False
450 450 self._sparserevlog = False
451 451 self._srdensitythreshold = 0.50
452 452 self._srmingapsize = 262144
453 453
454 454 # Make copy of flag processors so each revlog instance can support
455 455 # custom flags.
456 456 self._flagprocessors = dict(flagutil.flagprocessors)
457 457
458 458 # 2-tuple of file handles being used for active writing.
459 459 self._writinghandles = None
460 460
461 461 self._loadindex()
462 462
463 463 def _loadindex(self):
464 464 mmapindexthreshold = None
465 465 opts = self.opener.options
466 466
467 467 if b'revlogv2' in opts:
468 468 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
469 469 elif b'revlogv1' in opts:
470 470 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
471 471 if b'generaldelta' in opts:
472 472 newversionflags |= FLAG_GENERALDELTA
473 473 elif b'revlogv0' in self.opener.options:
474 474 newversionflags = REVLOGV0
475 475 else:
476 476 newversionflags = REVLOG_DEFAULT_VERSION
477 477
478 478 if b'chunkcachesize' in opts:
479 479 self._chunkcachesize = opts[b'chunkcachesize']
480 480 if b'maxchainlen' in opts:
481 481 self._maxchainlen = opts[b'maxchainlen']
482 482 if b'deltabothparents' in opts:
483 483 self._deltabothparents = opts[b'deltabothparents']
484 484 self._lazydelta = bool(opts.get(b'lazydelta', True))
485 485 self._lazydeltabase = False
486 486 if self._lazydelta:
487 487 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
488 488 if b'compengine' in opts:
489 489 self._compengine = opts[b'compengine']
490 490 if b'zlib.level' in opts:
491 491 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
492 492 if b'zstd.level' in opts:
493 493 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
494 494 if b'maxdeltachainspan' in opts:
495 495 self._maxdeltachainspan = opts[b'maxdeltachainspan']
496 496 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
497 497 mmapindexthreshold = opts[b'mmapindexthreshold']
498 498 self.hassidedata = bool(opts.get(b'side-data', False))
499 499 if self.hassidedata:
500 500 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
501 501 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
502 502 withsparseread = bool(opts.get(b'with-sparse-read', False))
503 503 # sparse-revlog forces sparse-read
504 504 self._withsparseread = self._sparserevlog or withsparseread
505 505 if b'sparse-read-density-threshold' in opts:
506 506 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
507 507 if b'sparse-read-min-gap-size' in opts:
508 508 self._srmingapsize = opts[b'sparse-read-min-gap-size']
509 509 if opts.get(b'enableellipsis'):
510 510 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
511 511
512 512 # revlog v0 doesn't have flag processors
513 513 for flag, processor in pycompat.iteritems(
514 514 opts.get(b'flagprocessors', {})
515 515 ):
516 516 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
517 517
518 518 if self._chunkcachesize <= 0:
519 519 raise error.RevlogError(
520 520 _(b'revlog chunk cache size %r is not greater than 0')
521 521 % self._chunkcachesize
522 522 )
523 523 elif self._chunkcachesize & (self._chunkcachesize - 1):
524 524 raise error.RevlogError(
525 525 _(b'revlog chunk cache size %r is not a power of 2')
526 526 % self._chunkcachesize
527 527 )
528 528
529 529 indexdata = b''
530 530 self._initempty = True
531 531 try:
532 532 with self._indexfp() as f:
533 533 if (
534 534 mmapindexthreshold is not None
535 535 and self.opener.fstat(f).st_size >= mmapindexthreshold
536 536 ):
537 537 # TODO: should .close() to release resources without
538 538 # relying on Python GC
539 539 indexdata = util.buffer(util.mmapread(f))
540 540 else:
541 541 indexdata = f.read()
542 542 if len(indexdata) > 0:
543 543 versionflags = versionformat_unpack(indexdata[:4])[0]
544 544 self._initempty = False
545 545 else:
546 546 versionflags = newversionflags
547 547 except IOError as inst:
548 548 if inst.errno != errno.ENOENT:
549 549 raise
550 550
551 551 versionflags = newversionflags
552 552
553 553 self.version = versionflags
554 554
555 555 flags = versionflags & ~0xFFFF
556 556 fmt = versionflags & 0xFFFF
557 557
558 558 if fmt == REVLOGV0:
559 559 if flags:
560 560 raise error.RevlogError(
561 561 _(b'unknown flags (%#04x) in version %d revlog %s')
562 562 % (flags >> 16, fmt, self.indexfile)
563 563 )
564 564
565 565 self._inline = False
566 566 self._generaldelta = False
567 567
568 568 elif fmt == REVLOGV1:
569 569 if flags & ~REVLOGV1_FLAGS:
570 570 raise error.RevlogError(
571 571 _(b'unknown flags (%#04x) in version %d revlog %s')
572 572 % (flags >> 16, fmt, self.indexfile)
573 573 )
574 574
575 575 self._inline = versionflags & FLAG_INLINE_DATA
576 576 self._generaldelta = versionflags & FLAG_GENERALDELTA
577 577
578 578 elif fmt == REVLOGV2:
579 579 if flags & ~REVLOGV2_FLAGS:
580 580 raise error.RevlogError(
581 581 _(b'unknown flags (%#04x) in version %d revlog %s')
582 582 % (flags >> 16, fmt, self.indexfile)
583 583 )
584 584
585 585 self._inline = versionflags & FLAG_INLINE_DATA
586 586 # generaldelta implied by version 2 revlogs.
587 587 self._generaldelta = True
588 588
589 589 else:
590 590 raise error.RevlogError(
591 591 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
592 592 )
593 593 # sparse-revlog can't be on without general-delta (issue6056)
594 594 if not self._generaldelta:
595 595 self._sparserevlog = False
596 596
597 597 self._storedeltachains = True
598 598
599 599 self._io = revlogio()
600 600 if self.version == REVLOGV0:
601 601 self._io = revlogoldio()
602 602 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
603 603 self._io = rustrevlogio()
604 604 try:
605 605 d = self._io.parseindex(indexdata, self._inline)
606 606 except (ValueError, IndexError):
607 607 raise error.RevlogError(
608 608 _(b"index %s is corrupted") % self.indexfile
609 609 )
610 610 self.index, self._chunkcache = d
611 611 if not self._chunkcache:
612 612 self._chunkclear()
613 613 # revnum -> (chain-length, sum-delta-length)
614 614 self._chaininfocache = {}
615 615 # revlog header -> revlog compressor
616 616 self._decompressors = {}
617 617
618 618 @util.propertycache
619 619 def _compressor(self):
620 620 engine = util.compengines[self._compengine]
621 621 return engine.revlogcompressor(self._compengineopts)
622 622
623 623 def _indexfp(self, mode=b'r'):
624 624 """file object for the revlog's index file"""
625 625 args = {'mode': mode}
626 626 if mode != b'r':
627 627 args['checkambig'] = self._checkambig
628 628 if mode == b'w':
629 629 args['atomictemp'] = True
630 630 return self.opener(self.indexfile, **args)
631 631
632 632 def _datafp(self, mode=b'r'):
633 633 """file object for the revlog's data file"""
634 634 return self.opener(self.datafile, mode=mode)
635 635
636 636 @contextlib.contextmanager
637 637 def _datareadfp(self, existingfp=None):
638 638 """file object suitable to read data"""
639 639 # Use explicit file handle, if given.
640 640 if existingfp is not None:
641 641 yield existingfp
642 642
643 643 # Use a file handle being actively used for writes, if available.
644 644 # There is some danger to doing this because reads will seek the
645 645 # file. However, _writeentry() performs a SEEK_END before all writes,
646 646 # so we should be safe.
647 647 elif self._writinghandles:
648 648 if self._inline:
649 649 yield self._writinghandles[0]
650 650 else:
651 651 yield self._writinghandles[1]
652 652
653 653 # Otherwise open a new file handle.
654 654 else:
655 655 if self._inline:
656 656 func = self._indexfp
657 657 else:
658 658 func = self._datafp
659 659 with func() as fp:
660 660 yield fp
661 661
662 662 def tiprev(self):
663 663 return len(self.index) - 1
664 664
665 665 def tip(self):
666 666 return self.node(self.tiprev())
667 667
668 668 def __contains__(self, rev):
669 669 return 0 <= rev < len(self)
670 670
671 671 def __len__(self):
672 672 return len(self.index)
673 673
674 674 def __iter__(self):
675 675 return iter(pycompat.xrange(len(self)))
676 676
677 677 def revs(self, start=0, stop=None):
678 678 """iterate over all rev in this revlog (from start to stop)"""
679 679 return storageutil.iterrevs(len(self), start=start, stop=stop)
680 680
681 681 @property
682 682 def nodemap(self):
683 683 msg = (
684 684 b"revlog.nodemap is deprecated, "
685 685 b"use revlog.index.[has_node|rev|get_rev]"
686 686 )
687 687 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
688 688 return self.index.nodemap
689 689
690 690 @property
691 691 def _nodecache(self):
692 692 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
693 693 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
694 694 return self.index.nodemap
695 695
696 696 def hasnode(self, node):
697 697 try:
698 698 self.rev(node)
699 699 return True
700 700 except KeyError:
701 701 return False
702 702
703 703 def candelta(self, baserev, rev):
704 704 """whether two revisions (baserev, rev) can be delta-ed or not"""
705 705 # Disable delta if either rev requires a content-changing flag
706 706 # processor (ex. LFS). This is because such flag processor can alter
707 707 # the rawtext content that the delta will be based on, and two clients
708 708 # could have a same revlog node with different flags (i.e. different
709 709 # rawtext contents) and the delta could be incompatible.
710 710 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
711 711 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
712 712 ):
713 713 return False
714 714 return True
715 715
716 716 def clearcaches(self):
717 717 self._revisioncache = None
718 718 self._chainbasecache.clear()
719 719 self._chunkcache = (0, b'')
720 720 self._pcache = {}
721 721 self.index.clearcaches()
722 722
723 723 def rev(self, node):
724 724 try:
725 725 return self.index.rev(node)
726 726 except TypeError:
727 727 raise
728 728 except error.RevlogError:
729 729 # parsers.c radix tree lookup failed
730 730 if node == wdirid or node in wdirfilenodeids:
731 731 raise error.WdirUnsupported
732 732 raise error.LookupError(node, self.indexfile, _(b'no node'))
733 733
734 734 # Accessors for index entries.
735 735
736 736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
737 737 # are flags.
738 738 def start(self, rev):
739 739 return int(self.index[rev][0] >> 16)
740 740
741 741 def flags(self, rev):
742 742 return self.index[rev][0] & 0xFFFF
743 743
744 744 def length(self, rev):
745 745 return self.index[rev][1]
746 746
747 747 def rawsize(self, rev):
748 748 """return the length of the uncompressed text for a given revision"""
749 749 l = self.index[rev][2]
750 750 if l >= 0:
751 751 return l
752 752
753 753 t = self.rawdata(rev)
754 754 return len(t)
755 755
756 756 def size(self, rev):
757 757 """length of non-raw text (processed by a "read" flag processor)"""
758 758 # fast path: if no "read" flag processor could change the content,
759 759 # size is rawsize. note: ELLIPSIS is known to not change the content.
760 760 flags = self.flags(rev)
761 761 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
762 762 return self.rawsize(rev)
763 763
764 764 return len(self.revision(rev, raw=False))
765 765
766 766 def chainbase(self, rev):
767 767 base = self._chainbasecache.get(rev)
768 768 if base is not None:
769 769 return base
770 770
771 771 index = self.index
772 772 iterrev = rev
773 773 base = index[iterrev][3]
774 774 while base != iterrev:
775 775 iterrev = base
776 776 base = index[iterrev][3]
777 777
778 778 self._chainbasecache[rev] = base
779 779 return base
780 780
781 781 def linkrev(self, rev):
782 782 return self.index[rev][4]
783 783
784 784 def parentrevs(self, rev):
785 785 try:
786 786 entry = self.index[rev]
787 787 except IndexError:
788 788 if rev == wdirrev:
789 789 raise error.WdirUnsupported
790 790 raise
791 791
792 792 return entry[5], entry[6]
793 793
794 794 # fast parentrevs(rev) where rev isn't filtered
795 795 _uncheckedparentrevs = parentrevs
796 796
797 797 def node(self, rev):
798 798 try:
799 799 return self.index[rev][7]
800 800 except IndexError:
801 801 if rev == wdirrev:
802 802 raise error.WdirUnsupported
803 803 raise
804 804
805 805 # Derived from index values.
806 806
807 807 def end(self, rev):
808 808 return self.start(rev) + self.length(rev)
809 809
810 810 def parents(self, node):
811 811 i = self.index
812 812 d = i[self.rev(node)]
813 813 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
814 814
815 815 def chainlen(self, rev):
816 816 return self._chaininfo(rev)[0]
817 817
818 818 def _chaininfo(self, rev):
819 819 chaininfocache = self._chaininfocache
820 820 if rev in chaininfocache:
821 821 return chaininfocache[rev]
822 822 index = self.index
823 823 generaldelta = self._generaldelta
824 824 iterrev = rev
825 825 e = index[iterrev]
826 826 clen = 0
827 827 compresseddeltalen = 0
828 828 while iterrev != e[3]:
829 829 clen += 1
830 830 compresseddeltalen += e[1]
831 831 if generaldelta:
832 832 iterrev = e[3]
833 833 else:
834 834 iterrev -= 1
835 835 if iterrev in chaininfocache:
836 836 t = chaininfocache[iterrev]
837 837 clen += t[0]
838 838 compresseddeltalen += t[1]
839 839 break
840 840 e = index[iterrev]
841 841 else:
842 842 # Add text length of base since decompressing that also takes
843 843 # work. For cache hits the length is already included.
844 844 compresseddeltalen += e[1]
845 845 r = (clen, compresseddeltalen)
846 846 chaininfocache[rev] = r
847 847 return r
848 848
849 849 def _deltachain(self, rev, stoprev=None):
850 850 """Obtain the delta chain for a revision.
851 851
852 852 ``stoprev`` specifies a revision to stop at. If not specified, we
853 853 stop at the base of the chain.
854 854
855 855 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
856 856 revs in ascending order and ``stopped`` is a bool indicating whether
857 857 ``stoprev`` was hit.
858 858 """
859 859 # Try C implementation.
860 860 try:
861 861 return self.index.deltachain(rev, stoprev, self._generaldelta)
862 862 except AttributeError:
863 863 pass
864 864
865 865 chain = []
866 866
867 867 # Alias to prevent attribute lookup in tight loop.
868 868 index = self.index
869 869 generaldelta = self._generaldelta
870 870
871 871 iterrev = rev
872 872 e = index[iterrev]
873 873 while iterrev != e[3] and iterrev != stoprev:
874 874 chain.append(iterrev)
875 875 if generaldelta:
876 876 iterrev = e[3]
877 877 else:
878 878 iterrev -= 1
879 879 e = index[iterrev]
880 880
881 881 if iterrev == stoprev:
882 882 stopped = True
883 883 else:
884 884 chain.append(iterrev)
885 885 stopped = False
886 886
887 887 chain.reverse()
888 888 return chain, stopped
889 889
890 890 def ancestors(self, revs, stoprev=0, inclusive=False):
891 891 """Generate the ancestors of 'revs' in reverse revision order.
892 892 Does not generate revs lower than stoprev.
893 893
894 894 See the documentation for ancestor.lazyancestors for more details."""
895 895
896 896 # first, make sure start revisions aren't filtered
897 897 revs = list(revs)
898 898 checkrev = self.node
899 899 for r in revs:
900 900 checkrev(r)
901 901 # and we're sure ancestors aren't filtered as well
902 902
903 903 if rustancestor is not None:
904 904 lazyancestors = rustancestor.LazyAncestors
905 905 arg = self.index
906 906 elif util.safehasattr(parsers, b'rustlazyancestors'):
907 907 lazyancestors = ancestor.rustlazyancestors
908 908 arg = self.index
909 909 else:
910 910 lazyancestors = ancestor.lazyancestors
911 911 arg = self._uncheckedparentrevs
912 912 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
913 913
914 914 def descendants(self, revs):
915 915 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
916 916
917 917 def findcommonmissing(self, common=None, heads=None):
918 918 """Return a tuple of the ancestors of common and the ancestors of heads
919 919 that are not ancestors of common. In revset terminology, we return the
920 920 tuple:
921 921
922 922 ::common, (::heads) - (::common)
923 923
924 924 The list is sorted by revision number, meaning it is
925 925 topologically sorted.
926 926
927 927 'heads' and 'common' are both lists of node IDs. If heads is
928 928 not supplied, uses all of the revlog's heads. If common is not
929 929 supplied, uses nullid."""
930 930 if common is None:
931 931 common = [nullid]
932 932 if heads is None:
933 933 heads = self.heads()
934 934
935 935 common = [self.rev(n) for n in common]
936 936 heads = [self.rev(n) for n in heads]
937 937
938 938 # we want the ancestors, but inclusive
939 939 class lazyset(object):
940 940 def __init__(self, lazyvalues):
941 941 self.addedvalues = set()
942 942 self.lazyvalues = lazyvalues
943 943
944 944 def __contains__(self, value):
945 945 return value in self.addedvalues or value in self.lazyvalues
946 946
947 947 def __iter__(self):
948 948 added = self.addedvalues
949 949 for r in added:
950 950 yield r
951 951 for r in self.lazyvalues:
952 952 if not r in added:
953 953 yield r
954 954
955 955 def add(self, value):
956 956 self.addedvalues.add(value)
957 957
958 958 def update(self, values):
959 959 self.addedvalues.update(values)
960 960
961 961 has = lazyset(self.ancestors(common))
962 962 has.add(nullrev)
963 963 has.update(common)
964 964
965 965 # take all ancestors from heads that aren't in has
966 966 missing = set()
967 967 visit = collections.deque(r for r in heads if r not in has)
968 968 while visit:
969 969 r = visit.popleft()
970 970 if r in missing:
971 971 continue
972 972 else:
973 973 missing.add(r)
974 974 for p in self.parentrevs(r):
975 975 if p not in has:
976 976 visit.append(p)
977 977 missing = list(missing)
978 978 missing.sort()
979 979 return has, [self.node(miss) for miss in missing]
980 980
981 981 def incrementalmissingrevs(self, common=None):
982 982 """Return an object that can be used to incrementally compute the
983 983 revision numbers of the ancestors of arbitrary sets that are not
984 984 ancestors of common. This is an ancestor.incrementalmissingancestors
985 985 object.
986 986
987 987 'common' is a list of revision numbers. If common is not supplied, uses
988 988 nullrev.
989 989 """
990 990 if common is None:
991 991 common = [nullrev]
992 992
993 993 if rustancestor is not None:
994 994 return rustancestor.MissingAncestors(self.index, common)
995 995 return ancestor.incrementalmissingancestors(self.parentrevs, common)
996 996
997 997 def findmissingrevs(self, common=None, heads=None):
998 998 """Return the revision numbers of the ancestors of heads that
999 999 are not ancestors of common.
1000 1000
1001 1001 More specifically, return a list of revision numbers corresponding to
1002 1002 nodes N such that every N satisfies the following constraints:
1003 1003
1004 1004 1. N is an ancestor of some node in 'heads'
1005 1005 2. N is not an ancestor of any node in 'common'
1006 1006
1007 1007 The list is sorted by revision number, meaning it is
1008 1008 topologically sorted.
1009 1009
1010 1010 'heads' and 'common' are both lists of revision numbers. If heads is
1011 1011 not supplied, uses all of the revlog's heads. If common is not
1012 1012 supplied, uses nullid."""
1013 1013 if common is None:
1014 1014 common = [nullrev]
1015 1015 if heads is None:
1016 1016 heads = self.headrevs()
1017 1017
1018 1018 inc = self.incrementalmissingrevs(common=common)
1019 1019 return inc.missingancestors(heads)
1020 1020
1021 1021 def findmissing(self, common=None, heads=None):
1022 1022 """Return the ancestors of heads that are not ancestors of common.
1023 1023
1024 1024 More specifically, return a list of nodes N such that every N
1025 1025 satisfies the following constraints:
1026 1026
1027 1027 1. N is an ancestor of some node in 'heads'
1028 1028 2. N is not an ancestor of any node in 'common'
1029 1029
1030 1030 The list is sorted by revision number, meaning it is
1031 1031 topologically sorted.
1032 1032
1033 1033 'heads' and 'common' are both lists of node IDs. If heads is
1034 1034 not supplied, uses all of the revlog's heads. If common is not
1035 1035 supplied, uses nullid."""
1036 1036 if common is None:
1037 1037 common = [nullid]
1038 1038 if heads is None:
1039 1039 heads = self.heads()
1040 1040
1041 1041 common = [self.rev(n) for n in common]
1042 1042 heads = [self.rev(n) for n in heads]
1043 1043
1044 1044 inc = self.incrementalmissingrevs(common=common)
1045 1045 return [self.node(r) for r in inc.missingancestors(heads)]
1046 1046
1047 1047 def nodesbetween(self, roots=None, heads=None):
1048 1048 """Return a topological path from 'roots' to 'heads'.
1049 1049
1050 1050 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1051 1051 topologically sorted list of all nodes N that satisfy both of
1052 1052 these constraints:
1053 1053
1054 1054 1. N is a descendant of some node in 'roots'
1055 1055 2. N is an ancestor of some node in 'heads'
1056 1056
1057 1057 Every node is considered to be both a descendant and an ancestor
1058 1058 of itself, so every reachable node in 'roots' and 'heads' will be
1059 1059 included in 'nodes'.
1060 1060
1061 1061 'outroots' is the list of reachable nodes in 'roots', i.e., the
1062 1062 subset of 'roots' that is returned in 'nodes'. Likewise,
1063 1063 'outheads' is the subset of 'heads' that is also in 'nodes'.
1064 1064
1065 1065 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1066 1066 unspecified, uses nullid as the only root. If 'heads' is
1067 1067 unspecified, uses list of all of the revlog's heads."""
1068 1068 nonodes = ([], [], [])
1069 1069 if roots is not None:
1070 1070 roots = list(roots)
1071 1071 if not roots:
1072 1072 return nonodes
1073 1073 lowestrev = min([self.rev(n) for n in roots])
1074 1074 else:
1075 1075 roots = [nullid] # Everybody's a descendant of nullid
1076 1076 lowestrev = nullrev
1077 1077 if (lowestrev == nullrev) and (heads is None):
1078 1078 # We want _all_ the nodes!
1079 1079 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1080 1080 if heads is None:
1081 1081 # All nodes are ancestors, so the latest ancestor is the last
1082 1082 # node.
1083 1083 highestrev = len(self) - 1
1084 1084 # Set ancestors to None to signal that every node is an ancestor.
1085 1085 ancestors = None
1086 1086 # Set heads to an empty dictionary for later discovery of heads
1087 1087 heads = {}
1088 1088 else:
1089 1089 heads = list(heads)
1090 1090 if not heads:
1091 1091 return nonodes
1092 1092 ancestors = set()
1093 1093 # Turn heads into a dictionary so we can remove 'fake' heads.
1094 1094 # Also, later we will be using it to filter out the heads we can't
1095 1095 # find from roots.
1096 1096 heads = dict.fromkeys(heads, False)
1097 1097 # Start at the top and keep marking parents until we're done.
1098 1098 nodestotag = set(heads)
1099 1099 # Remember where the top was so we can use it as a limit later.
1100 1100 highestrev = max([self.rev(n) for n in nodestotag])
1101 1101 while nodestotag:
1102 1102 # grab a node to tag
1103 1103 n = nodestotag.pop()
1104 1104 # Never tag nullid
1105 1105 if n == nullid:
1106 1106 continue
1107 1107 # A node's revision number represents its place in a
1108 1108 # topologically sorted list of nodes.
1109 1109 r = self.rev(n)
1110 1110 if r >= lowestrev:
1111 1111 if n not in ancestors:
1112 1112 # If we are possibly a descendant of one of the roots
1113 1113 # and we haven't already been marked as an ancestor
1114 1114 ancestors.add(n) # Mark as ancestor
1115 1115 # Add non-nullid parents to list of nodes to tag.
1116 1116 nodestotag.update(
1117 1117 [p for p in self.parents(n) if p != nullid]
1118 1118 )
1119 1119 elif n in heads: # We've seen it before, is it a fake head?
1120 1120 # So it is, real heads should not be the ancestors of
1121 1121 # any other heads.
1122 1122 heads.pop(n)
1123 1123 if not ancestors:
1124 1124 return nonodes
1125 1125 # Now that we have our set of ancestors, we want to remove any
1126 1126 # roots that are not ancestors.
1127 1127
1128 1128 # If one of the roots was nullid, everything is included anyway.
1129 1129 if lowestrev > nullrev:
1130 1130 # But, since we weren't, let's recompute the lowest rev to not
1131 1131 # include roots that aren't ancestors.
1132 1132
1133 1133 # Filter out roots that aren't ancestors of heads
1134 1134 roots = [root for root in roots if root in ancestors]
1135 1135 # Recompute the lowest revision
1136 1136 if roots:
1137 1137 lowestrev = min([self.rev(root) for root in roots])
1138 1138 else:
1139 1139 # No more roots? Return empty list
1140 1140 return nonodes
1141 1141 else:
1142 1142 # We are descending from nullid, and don't need to care about
1143 1143 # any other roots.
1144 1144 lowestrev = nullrev
1145 1145 roots = [nullid]
1146 1146 # Transform our roots list into a set.
1147 1147 descendants = set(roots)
1148 1148 # Also, keep the original roots so we can filter out roots that aren't
1149 1149 # 'real' roots (i.e. are descended from other roots).
1150 1150 roots = descendants.copy()
1151 1151 # Our topologically sorted list of output nodes.
1152 1152 orderedout = []
1153 1153 # Don't start at nullid since we don't want nullid in our output list,
1154 1154 # and if nullid shows up in descendants, empty parents will look like
1155 1155 # they're descendants.
1156 1156 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1157 1157 n = self.node(r)
1158 1158 isdescendant = False
1159 1159 if lowestrev == nullrev: # Everybody is a descendant of nullid
1160 1160 isdescendant = True
1161 1161 elif n in descendants:
1162 1162 # n is already a descendant
1163 1163 isdescendant = True
1164 1164 # This check only needs to be done here because all the roots
1165 1165 # will start being marked is descendants before the loop.
1166 1166 if n in roots:
1167 1167 # If n was a root, check if it's a 'real' root.
1168 1168 p = tuple(self.parents(n))
1169 1169 # If any of its parents are descendants, it's not a root.
1170 1170 if (p[0] in descendants) or (p[1] in descendants):
1171 1171 roots.remove(n)
1172 1172 else:
1173 1173 p = tuple(self.parents(n))
1174 1174 # A node is a descendant if either of its parents are
1175 1175 # descendants. (We seeded the dependents list with the roots
1176 1176 # up there, remember?)
1177 1177 if (p[0] in descendants) or (p[1] in descendants):
1178 1178 descendants.add(n)
1179 1179 isdescendant = True
1180 1180 if isdescendant and ((ancestors is None) or (n in ancestors)):
1181 1181 # Only include nodes that are both descendants and ancestors.
1182 1182 orderedout.append(n)
1183 1183 if (ancestors is not None) and (n in heads):
1184 1184 # We're trying to figure out which heads are reachable
1185 1185 # from roots.
1186 1186 # Mark this head as having been reached
1187 1187 heads[n] = True
1188 1188 elif ancestors is None:
1189 1189 # Otherwise, we're trying to discover the heads.
1190 1190 # Assume this is a head because if it isn't, the next step
1191 1191 # will eventually remove it.
1192 1192 heads[n] = True
1193 1193 # But, obviously its parents aren't.
1194 1194 for p in self.parents(n):
1195 1195 heads.pop(p, None)
1196 1196 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1197 1197 roots = list(roots)
1198 1198 assert orderedout
1199 1199 assert roots
1200 1200 assert heads
1201 1201 return (orderedout, roots, heads)
1202 1202
1203 1203 def headrevs(self, revs=None):
1204 1204 if revs is None:
1205 1205 try:
1206 1206 return self.index.headrevs()
1207 1207 except AttributeError:
1208 1208 return self._headrevs()
1209 1209 if rustdagop is not None:
1210 1210 return rustdagop.headrevs(self.index, revs)
1211 1211 return dagop.headrevs(revs, self._uncheckedparentrevs)
1212 1212
1213 1213 def computephases(self, roots):
1214 1214 return self.index.computephasesmapsets(roots)
1215 1215
1216 1216 def _headrevs(self):
1217 1217 count = len(self)
1218 1218 if not count:
1219 1219 return [nullrev]
1220 1220 # we won't iter over filtered rev so nobody is a head at start
1221 1221 ishead = [0] * (count + 1)
1222 1222 index = self.index
1223 1223 for r in self:
1224 1224 ishead[r] = 1 # I may be an head
1225 1225 e = index[r]
1226 1226 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1227 1227 return [r for r, val in enumerate(ishead) if val]
1228 1228
1229 1229 def heads(self, start=None, stop=None):
1230 1230 """return the list of all nodes that have no children
1231 1231
1232 1232 if start is specified, only heads that are descendants of
1233 1233 start will be returned
1234 1234 if stop is specified, it will consider all the revs from stop
1235 1235 as if they had no children
1236 1236 """
1237 1237 if start is None and stop is None:
1238 1238 if not len(self):
1239 1239 return [nullid]
1240 1240 return [self.node(r) for r in self.headrevs()]
1241 1241
1242 1242 if start is None:
1243 1243 start = nullrev
1244 1244 else:
1245 1245 start = self.rev(start)
1246 1246
1247 1247 stoprevs = set(self.rev(n) for n in stop or [])
1248 1248
1249 1249 revs = dagop.headrevssubset(
1250 1250 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1251 1251 )
1252 1252
1253 1253 return [self.node(rev) for rev in revs]
1254 1254
1255 1255 def children(self, node):
1256 1256 """find the children of a given node"""
1257 1257 c = []
1258 1258 p = self.rev(node)
1259 1259 for r in self.revs(start=p + 1):
1260 1260 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1261 1261 if prevs:
1262 1262 for pr in prevs:
1263 1263 if pr == p:
1264 1264 c.append(self.node(r))
1265 1265 elif p == nullrev:
1266 1266 c.append(self.node(r))
1267 1267 return c
1268 1268
1269 1269 def commonancestorsheads(self, a, b):
1270 1270 """calculate all the heads of the common ancestors of nodes a and b"""
1271 1271 a, b = self.rev(a), self.rev(b)
1272 1272 ancs = self._commonancestorsheads(a, b)
1273 1273 return pycompat.maplist(self.node, ancs)
1274 1274
1275 1275 def _commonancestorsheads(self, *revs):
1276 1276 """calculate all the heads of the common ancestors of revs"""
1277 1277 try:
1278 1278 ancs = self.index.commonancestorsheads(*revs)
1279 1279 except (AttributeError, OverflowError): # C implementation failed
1280 1280 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1281 1281 return ancs
1282 1282
1283 1283 def isancestor(self, a, b):
1284 1284 """return True if node a is an ancestor of node b
1285 1285
1286 1286 A revision is considered an ancestor of itself."""
1287 1287 a, b = self.rev(a), self.rev(b)
1288 1288 return self.isancestorrev(a, b)
1289 1289
1290 1290 def isancestorrev(self, a, b):
1291 1291 """return True if revision a is an ancestor of revision b
1292 1292
1293 1293 A revision is considered an ancestor of itself.
1294 1294
1295 1295 The implementation of this is trivial but the use of
1296 1296 reachableroots is not."""
1297 1297 if a == nullrev:
1298 1298 return True
1299 1299 elif a == b:
1300 1300 return True
1301 1301 elif a > b:
1302 1302 return False
1303 1303 return bool(self.reachableroots(a, [b], [a], includepath=False))
1304 1304
1305 1305 def reachableroots(self, minroot, heads, roots, includepath=False):
1306 1306 """return (heads(::(<roots> and <roots>::<heads>)))
1307 1307
1308 1308 If includepath is True, return (<roots>::<heads>)."""
1309 1309 try:
1310 1310 return self.index.reachableroots2(
1311 1311 minroot, heads, roots, includepath
1312 1312 )
1313 1313 except AttributeError:
1314 1314 return dagop._reachablerootspure(
1315 1315 self.parentrevs, minroot, roots, heads, includepath
1316 1316 )
1317 1317
1318 1318 def ancestor(self, a, b):
1319 1319 """calculate the "best" common ancestor of nodes a and b"""
1320 1320
1321 1321 a, b = self.rev(a), self.rev(b)
1322 1322 try:
1323 1323 ancs = self.index.ancestors(a, b)
1324 1324 except (AttributeError, OverflowError):
1325 1325 ancs = ancestor.ancestors(self.parentrevs, a, b)
1326 1326 if ancs:
1327 1327 # choose a consistent winner when there's a tie
1328 1328 return min(map(self.node, ancs))
1329 1329 return nullid
1330 1330
1331 1331 def _match(self, id):
1332 1332 if isinstance(id, int):
1333 1333 # rev
1334 1334 return self.node(id)
1335 1335 if len(id) == 20:
1336 1336 # possibly a binary node
1337 1337 # odds of a binary node being all hex in ASCII are 1 in 10**25
1338 1338 try:
1339 1339 node = id
1340 1340 self.rev(node) # quick search the index
1341 1341 return node
1342 1342 except error.LookupError:
1343 1343 pass # may be partial hex id
1344 1344 try:
1345 1345 # str(rev)
1346 1346 rev = int(id)
1347 1347 if b"%d" % rev != id:
1348 1348 raise ValueError
1349 1349 if rev < 0:
1350 1350 rev = len(self) + rev
1351 1351 if rev < 0 or rev >= len(self):
1352 1352 raise ValueError
1353 1353 return self.node(rev)
1354 1354 except (ValueError, OverflowError):
1355 1355 pass
1356 1356 if len(id) == 40:
1357 1357 try:
1358 1358 # a full hex nodeid?
1359 1359 node = bin(id)
1360 1360 self.rev(node)
1361 1361 return node
1362 1362 except (TypeError, error.LookupError):
1363 1363 pass
1364 1364
1365 1365 def _partialmatch(self, id):
1366 1366 # we don't care wdirfilenodeids as they should be always full hash
1367 1367 maybewdir = wdirhex.startswith(id)
1368 1368 try:
1369 1369 partial = self.index.partialmatch(id)
1370 1370 if partial and self.hasnode(partial):
1371 1371 if maybewdir:
1372 1372 # single 'ff...' match in radix tree, ambiguous with wdir
1373 1373 raise error.RevlogError
1374 1374 return partial
1375 1375 if maybewdir:
1376 1376 # no 'ff...' match in radix tree, wdir identified
1377 1377 raise error.WdirUnsupported
1378 1378 return None
1379 1379 except error.RevlogError:
1380 1380 # parsers.c radix tree lookup gave multiple matches
1381 1381 # fast path: for unfiltered changelog, radix tree is accurate
1382 1382 if not getattr(self, 'filteredrevs', None):
1383 1383 raise error.AmbiguousPrefixLookupError(
1384 1384 id, self.indexfile, _(b'ambiguous identifier')
1385 1385 )
1386 1386 # fall through to slow path that filters hidden revisions
1387 1387 except (AttributeError, ValueError):
1388 1388 # we are pure python, or key was too short to search radix tree
1389 1389 pass
1390 1390
1391 1391 if id in self._pcache:
1392 1392 return self._pcache[id]
1393 1393
1394 1394 if len(id) <= 40:
1395 1395 try:
1396 1396 # hex(node)[:...]
1397 1397 l = len(id) // 2 # grab an even number of digits
1398 1398 prefix = bin(id[: l * 2])
1399 1399 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1400 1400 nl = [
1401 1401 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1402 1402 ]
1403 1403 if nullhex.startswith(id):
1404 1404 nl.append(nullid)
1405 1405 if len(nl) > 0:
1406 1406 if len(nl) == 1 and not maybewdir:
1407 1407 self._pcache[id] = nl[0]
1408 1408 return nl[0]
1409 1409 raise error.AmbiguousPrefixLookupError(
1410 1410 id, self.indexfile, _(b'ambiguous identifier')
1411 1411 )
1412 1412 if maybewdir:
1413 1413 raise error.WdirUnsupported
1414 1414 return None
1415 1415 except TypeError:
1416 1416 pass
1417 1417
1418 1418 def lookup(self, id):
1419 1419 """locate a node based on:
1420 1420 - revision number or str(revision number)
1421 1421 - nodeid or subset of hex nodeid
1422 1422 """
1423 1423 n = self._match(id)
1424 1424 if n is not None:
1425 1425 return n
1426 1426 n = self._partialmatch(id)
1427 1427 if n:
1428 1428 return n
1429 1429
1430 1430 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1431 1431
1432 1432 def shortest(self, node, minlength=1):
1433 1433 """Find the shortest unambiguous prefix that matches node."""
1434 1434
1435 1435 def isvalid(prefix):
1436 1436 try:
1437 1437 matchednode = self._partialmatch(prefix)
1438 1438 except error.AmbiguousPrefixLookupError:
1439 1439 return False
1440 1440 except error.WdirUnsupported:
1441 1441 # single 'ff...' match
1442 1442 return True
1443 1443 if matchednode is None:
1444 1444 raise error.LookupError(node, self.indexfile, _(b'no node'))
1445 1445 return True
1446 1446
1447 1447 def maybewdir(prefix):
1448 1448 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1449 1449
1450 1450 hexnode = hex(node)
1451 1451
1452 1452 def disambiguate(hexnode, minlength):
1453 1453 """Disambiguate against wdirid."""
1454 1454 for length in range(minlength, 41):
1455 1455 prefix = hexnode[:length]
1456 1456 if not maybewdir(prefix):
1457 1457 return prefix
1458 1458
1459 1459 if not getattr(self, 'filteredrevs', None):
1460 1460 try:
1461 1461 length = max(self.index.shortest(node), minlength)
1462 1462 return disambiguate(hexnode, length)
1463 1463 except error.RevlogError:
1464 1464 if node != wdirid:
1465 1465 raise error.LookupError(node, self.indexfile, _(b'no node'))
1466 1466 except AttributeError:
1467 1467 # Fall through to pure code
1468 1468 pass
1469 1469
1470 1470 if node == wdirid:
1471 1471 for length in range(minlength, 41):
1472 1472 prefix = hexnode[:length]
1473 1473 if isvalid(prefix):
1474 1474 return prefix
1475 1475
1476 1476 for length in range(minlength, 41):
1477 1477 prefix = hexnode[:length]
1478 1478 if isvalid(prefix):
1479 1479 return disambiguate(hexnode, length)
1480 1480
1481 1481 def cmp(self, node, text):
1482 1482 """compare text with a given file revision
1483 1483
1484 1484 returns True if text is different than what is stored.
1485 1485 """
1486 1486 p1, p2 = self.parents(node)
1487 1487 return storageutil.hashrevisionsha1(text, p1, p2) != node
1488 1488
1489 1489 def _cachesegment(self, offset, data):
1490 1490 """Add a segment to the revlog cache.
1491 1491
1492 1492 Accepts an absolute offset and the data that is at that location.
1493 1493 """
1494 1494 o, d = self._chunkcache
1495 1495 # try to add to existing cache
1496 1496 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1497 1497 self._chunkcache = o, d + data
1498 1498 else:
1499 1499 self._chunkcache = offset, data
1500 1500
1501 1501 def _readsegment(self, offset, length, df=None):
1502 1502 """Load a segment of raw data from the revlog.
1503 1503
1504 1504 Accepts an absolute offset, length to read, and an optional existing
1505 1505 file handle to read from.
1506 1506
1507 1507 If an existing file handle is passed, it will be seeked and the
1508 1508 original seek position will NOT be restored.
1509 1509
1510 1510 Returns a str or buffer of raw byte data.
1511 1511
1512 1512 Raises if the requested number of bytes could not be read.
1513 1513 """
1514 1514 # Cache data both forward and backward around the requested
1515 1515 # data, in a fixed size window. This helps speed up operations
1516 1516 # involving reading the revlog backwards.
1517 1517 cachesize = self._chunkcachesize
1518 1518 realoffset = offset & ~(cachesize - 1)
1519 1519 reallength = (
1520 1520 (offset + length + cachesize) & ~(cachesize - 1)
1521 1521 ) - realoffset
1522 1522 with self._datareadfp(df) as df:
1523 1523 df.seek(realoffset)
1524 1524 d = df.read(reallength)
1525 1525
1526 1526 self._cachesegment(realoffset, d)
1527 1527 if offset != realoffset or reallength != length:
1528 1528 startoffset = offset - realoffset
1529 1529 if len(d) - startoffset < length:
1530 1530 raise error.RevlogError(
1531 1531 _(
1532 1532 b'partial read of revlog %s; expected %d bytes from '
1533 1533 b'offset %d, got %d'
1534 1534 )
1535 1535 % (
1536 1536 self.indexfile if self._inline else self.datafile,
1537 1537 length,
1538 1538 realoffset,
1539 1539 len(d) - startoffset,
1540 1540 )
1541 1541 )
1542 1542
1543 1543 return util.buffer(d, startoffset, length)
1544 1544
1545 1545 if len(d) < length:
1546 1546 raise error.RevlogError(
1547 1547 _(
1548 1548 b'partial read of revlog %s; expected %d bytes from offset '
1549 1549 b'%d, got %d'
1550 1550 )
1551 1551 % (
1552 1552 self.indexfile if self._inline else self.datafile,
1553 1553 length,
1554 1554 offset,
1555 1555 len(d),
1556 1556 )
1557 1557 )
1558 1558
1559 1559 return d
1560 1560
1561 1561 def _getsegment(self, offset, length, df=None):
1562 1562 """Obtain a segment of raw data from the revlog.
1563 1563
1564 1564 Accepts an absolute offset, length of bytes to obtain, and an
1565 1565 optional file handle to the already-opened revlog. If the file
1566 1566 handle is used, it's original seek position will not be preserved.
1567 1567
1568 1568 Requests for data may be returned from a cache.
1569 1569
1570 1570 Returns a str or a buffer instance of raw byte data.
1571 1571 """
1572 1572 o, d = self._chunkcache
1573 1573 l = len(d)
1574 1574
1575 1575 # is it in the cache?
1576 1576 cachestart = offset - o
1577 1577 cacheend = cachestart + length
1578 1578 if cachestart >= 0 and cacheend <= l:
1579 1579 if cachestart == 0 and cacheend == l:
1580 1580 return d # avoid a copy
1581 1581 return util.buffer(d, cachestart, cacheend - cachestart)
1582 1582
1583 1583 return self._readsegment(offset, length, df=df)
1584 1584
1585 1585 def _getsegmentforrevs(self, startrev, endrev, df=None):
1586 1586 """Obtain a segment of raw data corresponding to a range of revisions.
1587 1587
1588 1588 Accepts the start and end revisions and an optional already-open
1589 1589 file handle to be used for reading. If the file handle is read, its
1590 1590 seek position will not be preserved.
1591 1591
1592 1592 Requests for data may be satisfied by a cache.
1593 1593
1594 1594 Returns a 2-tuple of (offset, data) for the requested range of
1595 1595 revisions. Offset is the integer offset from the beginning of the
1596 1596 revlog and data is a str or buffer of the raw byte data.
1597 1597
1598 1598 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1599 1599 to determine where each revision's data begins and ends.
1600 1600 """
1601 1601 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1602 1602 # (functions are expensive).
1603 1603 index = self.index
1604 1604 istart = index[startrev]
1605 1605 start = int(istart[0] >> 16)
1606 1606 if startrev == endrev:
1607 1607 end = start + istart[1]
1608 1608 else:
1609 1609 iend = index[endrev]
1610 1610 end = int(iend[0] >> 16) + iend[1]
1611 1611
1612 1612 if self._inline:
1613 1613 start += (startrev + 1) * self._io.size
1614 1614 end += (endrev + 1) * self._io.size
1615 1615 length = end - start
1616 1616
1617 1617 return start, self._getsegment(start, length, df=df)
1618 1618
1619 1619 def _chunk(self, rev, df=None):
1620 1620 """Obtain a single decompressed chunk for a revision.
1621 1621
1622 1622 Accepts an integer revision and an optional already-open file handle
1623 1623 to be used for reading. If used, the seek position of the file will not
1624 1624 be preserved.
1625 1625
1626 1626 Returns a str holding uncompressed data for the requested revision.
1627 1627 """
1628 1628 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1629 1629
1630 1630 def _chunks(self, revs, df=None, targetsize=None):
1631 1631 """Obtain decompressed chunks for the specified revisions.
1632 1632
1633 1633 Accepts an iterable of numeric revisions that are assumed to be in
1634 1634 ascending order. Also accepts an optional already-open file handle
1635 1635 to be used for reading. If used, the seek position of the file will
1636 1636 not be preserved.
1637 1637
1638 1638 This function is similar to calling ``self._chunk()`` multiple times,
1639 1639 but is faster.
1640 1640
1641 1641 Returns a list with decompressed data for each requested revision.
1642 1642 """
1643 1643 if not revs:
1644 1644 return []
1645 1645 start = self.start
1646 1646 length = self.length
1647 1647 inline = self._inline
1648 1648 iosize = self._io.size
1649 1649 buffer = util.buffer
1650 1650
1651 1651 l = []
1652 1652 ladd = l.append
1653 1653
1654 1654 if not self._withsparseread:
1655 1655 slicedchunks = (revs,)
1656 1656 else:
1657 1657 slicedchunks = deltautil.slicechunk(
1658 1658 self, revs, targetsize=targetsize
1659 1659 )
1660 1660
1661 1661 for revschunk in slicedchunks:
1662 1662 firstrev = revschunk[0]
1663 1663 # Skip trailing revisions with empty diff
1664 1664 for lastrev in revschunk[::-1]:
1665 1665 if length(lastrev) != 0:
1666 1666 break
1667 1667
1668 1668 try:
1669 1669 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1670 1670 except OverflowError:
1671 1671 # issue4215 - we can't cache a run of chunks greater than
1672 1672 # 2G on Windows
1673 1673 return [self._chunk(rev, df=df) for rev in revschunk]
1674 1674
1675 1675 decomp = self.decompress
1676 1676 for rev in revschunk:
1677 1677 chunkstart = start(rev)
1678 1678 if inline:
1679 1679 chunkstart += (rev + 1) * iosize
1680 1680 chunklength = length(rev)
1681 1681 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1682 1682
1683 1683 return l
1684 1684
1685 1685 def _chunkclear(self):
1686 1686 """Clear the raw chunk cache."""
1687 1687 self._chunkcache = (0, b'')
1688 1688
1689 1689 def deltaparent(self, rev):
1690 1690 """return deltaparent of the given revision"""
1691 1691 base = self.index[rev][3]
1692 1692 if base == rev:
1693 1693 return nullrev
1694 1694 elif self._generaldelta:
1695 1695 return base
1696 1696 else:
1697 1697 return rev - 1
1698 1698
1699 1699 def issnapshot(self, rev):
1700 1700 """tells whether rev is a snapshot
1701 1701 """
1702 1702 if not self._sparserevlog:
1703 1703 return self.deltaparent(rev) == nullrev
1704 1704 elif util.safehasattr(self.index, b'issnapshot'):
1705 1705 # directly assign the method to cache the testing and access
1706 1706 self.issnapshot = self.index.issnapshot
1707 1707 return self.issnapshot(rev)
1708 1708 if rev == nullrev:
1709 1709 return True
1710 1710 entry = self.index[rev]
1711 1711 base = entry[3]
1712 1712 if base == rev:
1713 1713 return True
1714 1714 if base == nullrev:
1715 1715 return True
1716 1716 p1 = entry[5]
1717 1717 p2 = entry[6]
1718 1718 if base == p1 or base == p2:
1719 1719 return False
1720 1720 return self.issnapshot(base)
1721 1721
1722 1722 def snapshotdepth(self, rev):
1723 1723 """number of snapshot in the chain before this one"""
1724 1724 if not self.issnapshot(rev):
1725 1725 raise error.ProgrammingError(b'revision %d not a snapshot')
1726 1726 return len(self._deltachain(rev)[0]) - 1
1727 1727
1728 1728 def revdiff(self, rev1, rev2):
1729 1729 """return or calculate a delta between two revisions
1730 1730
1731 1731 The delta calculated is in binary form and is intended to be written to
1732 1732 revlog data directly. So this function needs raw revision data.
1733 1733 """
1734 1734 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1735 1735 return bytes(self._chunk(rev2))
1736 1736
1737 1737 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1738 1738
1739 1739 def _processflags(self, text, flags, operation, raw=False):
1740 1740 """deprecated entry point to access flag processors"""
1741 1741 msg = b'_processflag(...) use the specialized variant'
1742 1742 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1743 1743 if raw:
1744 1744 return text, flagutil.processflagsraw(self, text, flags)
1745 1745 elif operation == b'read':
1746 1746 return flagutil.processflagsread(self, text, flags)
1747 1747 else: # write operation
1748 1748 return flagutil.processflagswrite(self, text, flags)
1749 1749
1750 1750 def revision(self, nodeorrev, _df=None, raw=False):
1751 1751 """return an uncompressed revision of a given node or revision
1752 1752 number.
1753 1753
1754 1754 _df - an existing file handle to read from. (internal-only)
1755 1755 raw - an optional argument specifying if the revision data is to be
1756 1756 treated as raw data when applying flag transforms. 'raw' should be set
1757 1757 to True when generating changegroups or in debug commands.
1758 1758 """
1759 1759 if raw:
1760 1760 msg = (
1761 1761 b'revlog.revision(..., raw=True) is deprecated, '
1762 1762 b'use revlog.rawdata(...)'
1763 1763 )
1764 1764 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1765 1765 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1766 1766
1767 1767 def sidedata(self, nodeorrev, _df=None):
1768 1768 """a map of extra data related to the changeset but not part of the hash
1769 1769
1770 1770 This function currently return a dictionary. However, more advanced
1771 1771 mapping object will likely be used in the future for a more
1772 1772 efficient/lazy code.
1773 1773 """
1774 1774 return self._revisiondata(nodeorrev, _df)[1]
1775 1775
1776 1776 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1777 1777 # deal with <nodeorrev> argument type
1778 1778 if isinstance(nodeorrev, int):
1779 1779 rev = nodeorrev
1780 1780 node = self.node(rev)
1781 1781 else:
1782 1782 node = nodeorrev
1783 1783 rev = None
1784 1784
1785 1785 # fast path the special `nullid` rev
1786 1786 if node == nullid:
1787 1787 return b"", {}
1788 1788
1789 1789 # ``rawtext`` is the text as stored inside the revlog. Might be the
1790 1790 # revision or might need to be processed to retrieve the revision.
1791 1791 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1792 1792
1793 1793 if raw and validated:
1794 1794 # if we don't want to process the raw text and that raw
1795 1795 # text is cached, we can exit early.
1796 1796 return rawtext, {}
1797 1797 if rev is None:
1798 1798 rev = self.rev(node)
1799 1799 # the revlog's flag for this revision
1800 1800 # (usually alter its state or content)
1801 1801 flags = self.flags(rev)
1802 1802
1803 1803 if validated and flags == REVIDX_DEFAULT_FLAGS:
1804 1804 # no extra flags set, no flag processor runs, text = rawtext
1805 1805 return rawtext, {}
1806 1806
1807 1807 sidedata = {}
1808 1808 if raw:
1809 1809 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1810 1810 text = rawtext
1811 1811 else:
1812 1812 try:
1813 1813 r = flagutil.processflagsread(self, rawtext, flags)
1814 1814 except error.SidedataHashError as exc:
1815 1815 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1816 1816 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1817 1817 raise error.RevlogError(msg)
1818 1818 text, validatehash, sidedata = r
1819 1819 if validatehash:
1820 1820 self.checkhash(text, node, rev=rev)
1821 1821 if not validated:
1822 1822 self._revisioncache = (node, rev, rawtext)
1823 1823
1824 1824 return text, sidedata
1825 1825
1826 1826 def _rawtext(self, node, rev, _df=None):
1827 1827 """return the possibly unvalidated rawtext for a revision
1828 1828
1829 1829 returns (rev, rawtext, validated)
1830 1830 """
1831 1831
1832 1832 # revision in the cache (could be useful to apply delta)
1833 1833 cachedrev = None
1834 1834 # An intermediate text to apply deltas to
1835 1835 basetext = None
1836 1836
1837 1837 # Check if we have the entry in cache
1838 1838 # The cache entry looks like (node, rev, rawtext)
1839 1839 if self._revisioncache:
1840 1840 if self._revisioncache[0] == node:
1841 1841 return (rev, self._revisioncache[2], True)
1842 1842 cachedrev = self._revisioncache[1]
1843 1843
1844 1844 if rev is None:
1845 1845 rev = self.rev(node)
1846 1846
1847 1847 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1848 1848 if stopped:
1849 1849 basetext = self._revisioncache[2]
1850 1850
1851 1851 # drop cache to save memory, the caller is expected to
1852 1852 # update self._revisioncache after validating the text
1853 1853 self._revisioncache = None
1854 1854
1855 1855 targetsize = None
1856 1856 rawsize = self.index[rev][2]
1857 1857 if 0 <= rawsize:
1858 1858 targetsize = 4 * rawsize
1859 1859
1860 1860 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1861 1861 if basetext is None:
1862 1862 basetext = bytes(bins[0])
1863 1863 bins = bins[1:]
1864 1864
1865 1865 rawtext = mdiff.patches(basetext, bins)
1866 1866 del basetext # let us have a chance to free memory early
1867 1867 return (rev, rawtext, False)
1868 1868
1869 1869 def rawdata(self, nodeorrev, _df=None):
1870 1870 """return an uncompressed raw data of a given node or revision number.
1871 1871
1872 1872 _df - an existing file handle to read from. (internal-only)
1873 1873 """
1874 1874 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1875 1875
1876 1876 def hash(self, text, p1, p2):
1877 1877 """Compute a node hash.
1878 1878
1879 1879 Available as a function so that subclasses can replace the hash
1880 1880 as needed.
1881 1881 """
1882 1882 return storageutil.hashrevisionsha1(text, p1, p2)
1883 1883
1884 1884 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1885 1885 """Check node hash integrity.
1886 1886
1887 1887 Available as a function so that subclasses can extend hash mismatch
1888 1888 behaviors as needed.
1889 1889 """
1890 1890 try:
1891 1891 if p1 is None and p2 is None:
1892 1892 p1, p2 = self.parents(node)
1893 1893 if node != self.hash(text, p1, p2):
1894 1894 # Clear the revision cache on hash failure. The revision cache
1895 1895 # only stores the raw revision and clearing the cache does have
1896 1896 # the side-effect that we won't have a cache hit when the raw
1897 1897 # revision data is accessed. But this case should be rare and
1898 1898 # it is extra work to teach the cache about the hash
1899 1899 # verification state.
1900 1900 if self._revisioncache and self._revisioncache[0] == node:
1901 1901 self._revisioncache = None
1902 1902
1903 1903 revornode = rev
1904 1904 if revornode is None:
1905 1905 revornode = templatefilters.short(hex(node))
1906 1906 raise error.RevlogError(
1907 1907 _(b"integrity check failed on %s:%s")
1908 1908 % (self.indexfile, pycompat.bytestr(revornode))
1909 1909 )
1910 1910 except error.RevlogError:
1911 1911 if self._censorable and storageutil.iscensoredtext(text):
1912 1912 raise error.CensoredNodeError(self.indexfile, node, text)
1913 1913 raise
1914 1914
1915 1915 def _enforceinlinesize(self, tr, fp=None):
1916 1916 """Check if the revlog is too big for inline and convert if so.
1917 1917
1918 1918 This should be called after revisions are added to the revlog. If the
1919 1919 revlog has grown too large to be an inline revlog, it will convert it
1920 1920 to use multiple index and data files.
1921 1921 """
1922 1922 tiprev = len(self) - 1
1923 1923 if (
1924 1924 not self._inline
1925 1925 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1926 1926 ):
1927 1927 return
1928 1928
1929 1929 trinfo = tr.find(self.indexfile)
1930 1930 if trinfo is None:
1931 1931 raise error.RevlogError(
1932 1932 _(b"%s not found in the transaction") % self.indexfile
1933 1933 )
1934 1934
1935 1935 trindex = trinfo[2]
1936 1936 if trindex is not None:
1937 1937 dataoff = self.start(trindex)
1938 1938 else:
1939 1939 # revlog was stripped at start of transaction, use all leftover data
1940 1940 trindex = len(self) - 1
1941 1941 dataoff = self.end(tiprev)
1942 1942
1943 1943 tr.add(self.datafile, dataoff)
1944 1944
1945 1945 if fp:
1946 1946 fp.flush()
1947 1947 fp.close()
1948 1948 # We can't use the cached file handle after close(). So prevent
1949 1949 # its usage.
1950 1950 self._writinghandles = None
1951 1951
1952 1952 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1953 1953 for r in self:
1954 1954 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1955 1955
1956 1956 with self._indexfp(b'w') as fp:
1957 1957 self.version &= ~FLAG_INLINE_DATA
1958 1958 self._inline = False
1959 1959 io = self._io
1960 1960 for i in self:
1961 1961 e = io.packentry(self.index[i], self.node, self.version, i)
1962 1962 fp.write(e)
1963 1963
1964 1964 # the temp file replace the real index when we exit the context
1965 1965 # manager
1966 1966
1967 1967 tr.replace(self.indexfile, trindex * self._io.size)
1968 nodemaputil.setup_persistent_nodemap(tr, self)
1968 1969 self._chunkclear()
1969 1970
1970 1971 def _nodeduplicatecallback(self, transaction, node):
1971 1972 """called when trying to add a node already stored.
1972 1973 """
1973 1974
1974 1975 def addrevision(
1975 1976 self,
1976 1977 text,
1977 1978 transaction,
1978 1979 link,
1979 1980 p1,
1980 1981 p2,
1981 1982 cachedelta=None,
1982 1983 node=None,
1983 1984 flags=REVIDX_DEFAULT_FLAGS,
1984 1985 deltacomputer=None,
1985 1986 sidedata=None,
1986 1987 ):
1987 1988 """add a revision to the log
1988 1989
1989 1990 text - the revision data to add
1990 1991 transaction - the transaction object used for rollback
1991 1992 link - the linkrev data to add
1992 1993 p1, p2 - the parent nodeids of the revision
1993 1994 cachedelta - an optional precomputed delta
1994 1995 node - nodeid of revision; typically node is not specified, and it is
1995 1996 computed by default as hash(text, p1, p2), however subclasses might
1996 1997 use different hashing method (and override checkhash() in such case)
1997 1998 flags - the known flags to set on the revision
1998 1999 deltacomputer - an optional deltacomputer instance shared between
1999 2000 multiple calls
2000 2001 """
2001 2002 if link == nullrev:
2002 2003 raise error.RevlogError(
2003 2004 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2004 2005 )
2005 2006
2006 2007 if sidedata is None:
2007 2008 sidedata = {}
2008 2009 flags = flags & ~REVIDX_SIDEDATA
2009 2010 elif not self.hassidedata:
2010 2011 raise error.ProgrammingError(
2011 2012 _(b"trying to add sidedata to a revlog who don't support them")
2012 2013 )
2013 2014 else:
2014 2015 flags |= REVIDX_SIDEDATA
2015 2016
2016 2017 if flags:
2017 2018 node = node or self.hash(text, p1, p2)
2018 2019
2019 2020 rawtext, validatehash = flagutil.processflagswrite(
2020 2021 self, text, flags, sidedata=sidedata
2021 2022 )
2022 2023
2023 2024 # If the flag processor modifies the revision data, ignore any provided
2024 2025 # cachedelta.
2025 2026 if rawtext != text:
2026 2027 cachedelta = None
2027 2028
2028 2029 if len(rawtext) > _maxentrysize:
2029 2030 raise error.RevlogError(
2030 2031 _(
2031 2032 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2032 2033 )
2033 2034 % (self.indexfile, len(rawtext))
2034 2035 )
2035 2036
2036 2037 node = node or self.hash(rawtext, p1, p2)
2037 2038 if self.index.has_node(node):
2038 2039 return node
2039 2040
2040 2041 if validatehash:
2041 2042 self.checkhash(rawtext, node, p1=p1, p2=p2)
2042 2043
2043 2044 return self.addrawrevision(
2044 2045 rawtext,
2045 2046 transaction,
2046 2047 link,
2047 2048 p1,
2048 2049 p2,
2049 2050 node,
2050 2051 flags,
2051 2052 cachedelta=cachedelta,
2052 2053 deltacomputer=deltacomputer,
2053 2054 )
2054 2055
2055 2056 def addrawrevision(
2056 2057 self,
2057 2058 rawtext,
2058 2059 transaction,
2059 2060 link,
2060 2061 p1,
2061 2062 p2,
2062 2063 node,
2063 2064 flags,
2064 2065 cachedelta=None,
2065 2066 deltacomputer=None,
2066 2067 ):
2067 2068 """add a raw revision with known flags, node and parents
2068 2069 useful when reusing a revision not stored in this revlog (ex: received
2069 2070 over wire, or read from an external bundle).
2070 2071 """
2071 2072 dfh = None
2072 2073 if not self._inline:
2073 2074 dfh = self._datafp(b"a+")
2074 2075 ifh = self._indexfp(b"a+")
2075 2076 try:
2076 2077 return self._addrevision(
2077 2078 node,
2078 2079 rawtext,
2079 2080 transaction,
2080 2081 link,
2081 2082 p1,
2082 2083 p2,
2083 2084 flags,
2084 2085 cachedelta,
2085 2086 ifh,
2086 2087 dfh,
2087 2088 deltacomputer=deltacomputer,
2088 2089 )
2089 2090 finally:
2090 2091 if dfh:
2091 2092 dfh.close()
2092 2093 ifh.close()
2093 2094
2094 2095 def compress(self, data):
2095 2096 """Generate a possibly-compressed representation of data."""
2096 2097 if not data:
2097 2098 return b'', data
2098 2099
2099 2100 compressed = self._compressor.compress(data)
2100 2101
2101 2102 if compressed:
2102 2103 # The revlog compressor added the header in the returned data.
2103 2104 return b'', compressed
2104 2105
2105 2106 if data[0:1] == b'\0':
2106 2107 return b'', data
2107 2108 return b'u', data
2108 2109
2109 2110 def decompress(self, data):
2110 2111 """Decompress a revlog chunk.
2111 2112
2112 2113 The chunk is expected to begin with a header identifying the
2113 2114 format type so it can be routed to an appropriate decompressor.
2114 2115 """
2115 2116 if not data:
2116 2117 return data
2117 2118
2118 2119 # Revlogs are read much more frequently than they are written and many
2119 2120 # chunks only take microseconds to decompress, so performance is
2120 2121 # important here.
2121 2122 #
2122 2123 # We can make a few assumptions about revlogs:
2123 2124 #
2124 2125 # 1) the majority of chunks will be compressed (as opposed to inline
2125 2126 # raw data).
2126 2127 # 2) decompressing *any* data will likely by at least 10x slower than
2127 2128 # returning raw inline data.
2128 2129 # 3) we want to prioritize common and officially supported compression
2129 2130 # engines
2130 2131 #
2131 2132 # It follows that we want to optimize for "decompress compressed data
2132 2133 # when encoded with common and officially supported compression engines"
2133 2134 # case over "raw data" and "data encoded by less common or non-official
2134 2135 # compression engines." That is why we have the inline lookup first
2135 2136 # followed by the compengines lookup.
2136 2137 #
2137 2138 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2138 2139 # compressed chunks. And this matters for changelog and manifest reads.
2139 2140 t = data[0:1]
2140 2141
2141 2142 if t == b'x':
2142 2143 try:
2143 2144 return _zlibdecompress(data)
2144 2145 except zlib.error as e:
2145 2146 raise error.RevlogError(
2146 2147 _(b'revlog decompress error: %s')
2147 2148 % stringutil.forcebytestr(e)
2148 2149 )
2149 2150 # '\0' is more common than 'u' so it goes first.
2150 2151 elif t == b'\0':
2151 2152 return data
2152 2153 elif t == b'u':
2153 2154 return util.buffer(data, 1)
2154 2155
2155 2156 try:
2156 2157 compressor = self._decompressors[t]
2157 2158 except KeyError:
2158 2159 try:
2159 2160 engine = util.compengines.forrevlogheader(t)
2160 2161 compressor = engine.revlogcompressor(self._compengineopts)
2161 2162 self._decompressors[t] = compressor
2162 2163 except KeyError:
2163 2164 raise error.RevlogError(_(b'unknown compression type %r') % t)
2164 2165
2165 2166 return compressor.decompress(data)
2166 2167
2167 2168 def _addrevision(
2168 2169 self,
2169 2170 node,
2170 2171 rawtext,
2171 2172 transaction,
2172 2173 link,
2173 2174 p1,
2174 2175 p2,
2175 2176 flags,
2176 2177 cachedelta,
2177 2178 ifh,
2178 2179 dfh,
2179 2180 alwayscache=False,
2180 2181 deltacomputer=None,
2181 2182 ):
2182 2183 """internal function to add revisions to the log
2183 2184
2184 2185 see addrevision for argument descriptions.
2185 2186
2186 2187 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2187 2188
2188 2189 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2189 2190 be used.
2190 2191
2191 2192 invariants:
2192 2193 - rawtext is optional (can be None); if not set, cachedelta must be set.
2193 2194 if both are set, they must correspond to each other.
2194 2195 """
2195 2196 if node == nullid:
2196 2197 raise error.RevlogError(
2197 2198 _(b"%s: attempt to add null revision") % self.indexfile
2198 2199 )
2199 2200 if node == wdirid or node in wdirfilenodeids:
2200 2201 raise error.RevlogError(
2201 2202 _(b"%s: attempt to add wdir revision") % self.indexfile
2202 2203 )
2203 2204
2204 2205 if self._inline:
2205 2206 fh = ifh
2206 2207 else:
2207 2208 fh = dfh
2208 2209
2209 2210 btext = [rawtext]
2210 2211
2211 2212 curr = len(self)
2212 2213 prev = curr - 1
2213 2214 offset = self.end(prev)
2214 2215 p1r, p2r = self.rev(p1), self.rev(p2)
2215 2216
2216 2217 # full versions are inserted when the needed deltas
2217 2218 # become comparable to the uncompressed text
2218 2219 if rawtext is None:
2219 2220 # need rawtext size, before changed by flag processors, which is
2220 2221 # the non-raw size. use revlog explicitly to avoid filelog's extra
2221 2222 # logic that might remove metadata size.
2222 2223 textlen = mdiff.patchedsize(
2223 2224 revlog.size(self, cachedelta[0]), cachedelta[1]
2224 2225 )
2225 2226 else:
2226 2227 textlen = len(rawtext)
2227 2228
2228 2229 if deltacomputer is None:
2229 2230 deltacomputer = deltautil.deltacomputer(self)
2230 2231
2231 2232 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2232 2233
2233 2234 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2234 2235
2235 2236 e = (
2236 2237 offset_type(offset, flags),
2237 2238 deltainfo.deltalen,
2238 2239 textlen,
2239 2240 deltainfo.base,
2240 2241 link,
2241 2242 p1r,
2242 2243 p2r,
2243 2244 node,
2244 2245 )
2245 2246 self.index.append(e)
2246 2247
2247 2248 entry = self._io.packentry(e, self.node, self.version, curr)
2248 2249 self._writeentry(
2249 2250 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2250 2251 )
2251 2252
2252 2253 rawtext = btext[0]
2253 2254
2254 2255 if alwayscache and rawtext is None:
2255 2256 rawtext = deltacomputer.buildtext(revinfo, fh)
2256 2257
2257 2258 if type(rawtext) == bytes: # only accept immutable objects
2258 2259 self._revisioncache = (node, curr, rawtext)
2259 2260 self._chainbasecache[curr] = deltainfo.chainbase
2260 2261 return node
2261 2262
2262 2263 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2263 2264 # Files opened in a+ mode have inconsistent behavior on various
2264 2265 # platforms. Windows requires that a file positioning call be made
2265 2266 # when the file handle transitions between reads and writes. See
2266 2267 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2267 2268 # platforms, Python or the platform itself can be buggy. Some versions
2268 2269 # of Solaris have been observed to not append at the end of the file
2269 2270 # if the file was seeked to before the end. See issue4943 for more.
2270 2271 #
2271 2272 # We work around this issue by inserting a seek() before writing.
2272 2273 # Note: This is likely not necessary on Python 3. However, because
2273 2274 # the file handle is reused for reads and may be seeked there, we need
2274 2275 # to be careful before changing this.
2275 2276 ifh.seek(0, os.SEEK_END)
2276 2277 if dfh:
2277 2278 dfh.seek(0, os.SEEK_END)
2278 2279
2279 2280 curr = len(self) - 1
2280 2281 if not self._inline:
2281 2282 transaction.add(self.datafile, offset)
2282 2283 transaction.add(self.indexfile, curr * len(entry))
2283 2284 if data[0]:
2284 2285 dfh.write(data[0])
2285 2286 dfh.write(data[1])
2286 2287 ifh.write(entry)
2287 2288 else:
2288 2289 offset += curr * self._io.size
2289 2290 transaction.add(self.indexfile, offset, curr)
2290 2291 ifh.write(entry)
2291 2292 ifh.write(data[0])
2292 2293 ifh.write(data[1])
2293 2294 self._enforceinlinesize(transaction, ifh)
2294 2295 nodemaputil.setup_persistent_nodemap(transaction, self)
2295 2296
2296 2297 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2297 2298 """
2298 2299 add a delta group
2299 2300
2300 2301 given a set of deltas, add them to the revision log. the
2301 2302 first delta is against its parent, which should be in our
2302 2303 log, the rest are against the previous delta.
2303 2304
2304 2305 If ``addrevisioncb`` is defined, it will be called with arguments of
2305 2306 this revlog and the node that was added.
2306 2307 """
2307 2308
2308 2309 if self._writinghandles:
2309 2310 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2310 2311
2311 2312 nodes = []
2312 2313
2313 2314 r = len(self)
2314 2315 end = 0
2315 2316 if r:
2316 2317 end = self.end(r - 1)
2317 2318 ifh = self._indexfp(b"a+")
2318 2319 isize = r * self._io.size
2319 2320 if self._inline:
2320 2321 transaction.add(self.indexfile, end + isize, r)
2321 2322 dfh = None
2322 2323 else:
2323 2324 transaction.add(self.indexfile, isize, r)
2324 2325 transaction.add(self.datafile, end)
2325 2326 dfh = self._datafp(b"a+")
2326 2327
2327 2328 def flush():
2328 2329 if dfh:
2329 2330 dfh.flush()
2330 2331 ifh.flush()
2331 2332
2332 2333 self._writinghandles = (ifh, dfh)
2333 2334
2334 2335 try:
2335 2336 deltacomputer = deltautil.deltacomputer(self)
2336 2337 # loop through our set of deltas
2337 2338 for data in deltas:
2338 2339 node, p1, p2, linknode, deltabase, delta, flags = data
2339 2340 link = linkmapper(linknode)
2340 2341 flags = flags or REVIDX_DEFAULT_FLAGS
2341 2342
2342 2343 nodes.append(node)
2343 2344
2344 2345 if self.index.has_node(node):
2345 2346 self._nodeduplicatecallback(transaction, node)
2346 2347 # this can happen if two branches make the same change
2347 2348 continue
2348 2349
2349 2350 for p in (p1, p2):
2350 2351 if not self.index.has_node(p):
2351 2352 raise error.LookupError(
2352 2353 p, self.indexfile, _(b'unknown parent')
2353 2354 )
2354 2355
2355 2356 if not self.index.has_node(deltabase):
2356 2357 raise error.LookupError(
2357 2358 deltabase, self.indexfile, _(b'unknown delta base')
2358 2359 )
2359 2360
2360 2361 baserev = self.rev(deltabase)
2361 2362
2362 2363 if baserev != nullrev and self.iscensored(baserev):
2363 2364 # if base is censored, delta must be full replacement in a
2364 2365 # single patch operation
2365 2366 hlen = struct.calcsize(b">lll")
2366 2367 oldlen = self.rawsize(baserev)
2367 2368 newlen = len(delta) - hlen
2368 2369 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2369 2370 raise error.CensoredBaseError(
2370 2371 self.indexfile, self.node(baserev)
2371 2372 )
2372 2373
2373 2374 if not flags and self._peek_iscensored(baserev, delta, flush):
2374 2375 flags |= REVIDX_ISCENSORED
2375 2376
2376 2377 # We assume consumers of addrevisioncb will want to retrieve
2377 2378 # the added revision, which will require a call to
2378 2379 # revision(). revision() will fast path if there is a cache
2379 2380 # hit. So, we tell _addrevision() to always cache in this case.
2380 2381 # We're only using addgroup() in the context of changegroup
2381 2382 # generation so the revision data can always be handled as raw
2382 2383 # by the flagprocessor.
2383 2384 self._addrevision(
2384 2385 node,
2385 2386 None,
2386 2387 transaction,
2387 2388 link,
2388 2389 p1,
2389 2390 p2,
2390 2391 flags,
2391 2392 (baserev, delta),
2392 2393 ifh,
2393 2394 dfh,
2394 2395 alwayscache=bool(addrevisioncb),
2395 2396 deltacomputer=deltacomputer,
2396 2397 )
2397 2398
2398 2399 if addrevisioncb:
2399 2400 addrevisioncb(self, node)
2400 2401
2401 2402 if not dfh and not self._inline:
2402 2403 # addrevision switched from inline to conventional
2403 2404 # reopen the index
2404 2405 ifh.close()
2405 2406 dfh = self._datafp(b"a+")
2406 2407 ifh = self._indexfp(b"a+")
2407 2408 self._writinghandles = (ifh, dfh)
2408 2409 finally:
2409 2410 self._writinghandles = None
2410 2411
2411 2412 if dfh:
2412 2413 dfh.close()
2413 2414 ifh.close()
2414 2415
2415 2416 return nodes
2416 2417
2417 2418 def iscensored(self, rev):
2418 2419 """Check if a file revision is censored."""
2419 2420 if not self._censorable:
2420 2421 return False
2421 2422
2422 2423 return self.flags(rev) & REVIDX_ISCENSORED
2423 2424
2424 2425 def _peek_iscensored(self, baserev, delta, flush):
2425 2426 """Quickly check if a delta produces a censored revision."""
2426 2427 if not self._censorable:
2427 2428 return False
2428 2429
2429 2430 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2430 2431
2431 2432 def getstrippoint(self, minlink):
2432 2433 """find the minimum rev that must be stripped to strip the linkrev
2433 2434
2434 2435 Returns a tuple containing the minimum rev and a set of all revs that
2435 2436 have linkrevs that will be broken by this strip.
2436 2437 """
2437 2438 return storageutil.resolvestripinfo(
2438 2439 minlink,
2439 2440 len(self) - 1,
2440 2441 self.headrevs(),
2441 2442 self.linkrev,
2442 2443 self.parentrevs,
2443 2444 )
2444 2445
2445 2446 def strip(self, minlink, transaction):
2446 2447 """truncate the revlog on the first revision with a linkrev >= minlink
2447 2448
2448 2449 This function is called when we're stripping revision minlink and
2449 2450 its descendants from the repository.
2450 2451
2451 2452 We have to remove all revisions with linkrev >= minlink, because
2452 2453 the equivalent changelog revisions will be renumbered after the
2453 2454 strip.
2454 2455
2455 2456 So we truncate the revlog on the first of these revisions, and
2456 2457 trust that the caller has saved the revisions that shouldn't be
2457 2458 removed and that it'll re-add them after this truncation.
2458 2459 """
2459 2460 if len(self) == 0:
2460 2461 return
2461 2462
2462 2463 rev, _ = self.getstrippoint(minlink)
2463 2464 if rev == len(self):
2464 2465 return
2465 2466
2466 2467 # first truncate the files on disk
2467 2468 end = self.start(rev)
2468 2469 if not self._inline:
2469 2470 transaction.add(self.datafile, end)
2470 2471 end = rev * self._io.size
2471 2472 else:
2472 2473 end += rev * self._io.size
2473 2474
2474 2475 transaction.add(self.indexfile, end)
2475 2476
2476 2477 # then reset internal state in memory to forget those revisions
2477 2478 self._revisioncache = None
2478 2479 self._chaininfocache = {}
2479 2480 self._chunkclear()
2480 2481
2481 2482 del self.index[rev:-1]
2482 2483
2483 2484 def checksize(self):
2484 2485 """Check size of index and data files
2485 2486
2486 2487 return a (dd, di) tuple.
2487 2488 - dd: extra bytes for the "data" file
2488 2489 - di: extra bytes for the "index" file
2489 2490
2490 2491 A healthy revlog will return (0, 0).
2491 2492 """
2492 2493 expected = 0
2493 2494 if len(self):
2494 2495 expected = max(0, self.end(len(self) - 1))
2495 2496
2496 2497 try:
2497 2498 with self._datafp() as f:
2498 2499 f.seek(0, io.SEEK_END)
2499 2500 actual = f.tell()
2500 2501 dd = actual - expected
2501 2502 except IOError as inst:
2502 2503 if inst.errno != errno.ENOENT:
2503 2504 raise
2504 2505 dd = 0
2505 2506
2506 2507 try:
2507 2508 f = self.opener(self.indexfile)
2508 2509 f.seek(0, io.SEEK_END)
2509 2510 actual = f.tell()
2510 2511 f.close()
2511 2512 s = self._io.size
2512 2513 i = max(0, actual // s)
2513 2514 di = actual - (i * s)
2514 2515 if self._inline:
2515 2516 databytes = 0
2516 2517 for r in self:
2517 2518 databytes += max(0, self.length(r))
2518 2519 dd = 0
2519 2520 di = actual - len(self) * s - databytes
2520 2521 except IOError as inst:
2521 2522 if inst.errno != errno.ENOENT:
2522 2523 raise
2523 2524 di = 0
2524 2525
2525 2526 return (dd, di)
2526 2527
2527 2528 def files(self):
2528 2529 res = [self.indexfile]
2529 2530 if not self._inline:
2530 2531 res.append(self.datafile)
2531 2532 return res
2532 2533
2533 2534 def emitrevisions(
2534 2535 self,
2535 2536 nodes,
2536 2537 nodesorder=None,
2537 2538 revisiondata=False,
2538 2539 assumehaveparentrevisions=False,
2539 2540 deltamode=repository.CG_DELTAMODE_STD,
2540 2541 ):
2541 2542 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2542 2543 raise error.ProgrammingError(
2543 2544 b'unhandled value for nodesorder: %s' % nodesorder
2544 2545 )
2545 2546
2546 2547 if nodesorder is None and not self._generaldelta:
2547 2548 nodesorder = b'storage'
2548 2549
2549 2550 if (
2550 2551 not self._storedeltachains
2551 2552 and deltamode != repository.CG_DELTAMODE_PREV
2552 2553 ):
2553 2554 deltamode = repository.CG_DELTAMODE_FULL
2554 2555
2555 2556 return storageutil.emitrevisions(
2556 2557 self,
2557 2558 nodes,
2558 2559 nodesorder,
2559 2560 revlogrevisiondelta,
2560 2561 deltaparentfn=self.deltaparent,
2561 2562 candeltafn=self.candelta,
2562 2563 rawsizefn=self.rawsize,
2563 2564 revdifffn=self.revdiff,
2564 2565 flagsfn=self.flags,
2565 2566 deltamode=deltamode,
2566 2567 revisiondata=revisiondata,
2567 2568 assumehaveparentrevisions=assumehaveparentrevisions,
2568 2569 )
2569 2570
2570 2571 DELTAREUSEALWAYS = b'always'
2571 2572 DELTAREUSESAMEREVS = b'samerevs'
2572 2573 DELTAREUSENEVER = b'never'
2573 2574
2574 2575 DELTAREUSEFULLADD = b'fulladd'
2575 2576
2576 2577 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2577 2578
2578 2579 def clone(
2579 2580 self,
2580 2581 tr,
2581 2582 destrevlog,
2582 2583 addrevisioncb=None,
2583 2584 deltareuse=DELTAREUSESAMEREVS,
2584 2585 forcedeltabothparents=None,
2585 2586 sidedatacompanion=None,
2586 2587 ):
2587 2588 """Copy this revlog to another, possibly with format changes.
2588 2589
2589 2590 The destination revlog will contain the same revisions and nodes.
2590 2591 However, it may not be bit-for-bit identical due to e.g. delta encoding
2591 2592 differences.
2592 2593
2593 2594 The ``deltareuse`` argument control how deltas from the existing revlog
2594 2595 are preserved in the destination revlog. The argument can have the
2595 2596 following values:
2596 2597
2597 2598 DELTAREUSEALWAYS
2598 2599 Deltas will always be reused (if possible), even if the destination
2599 2600 revlog would not select the same revisions for the delta. This is the
2600 2601 fastest mode of operation.
2601 2602 DELTAREUSESAMEREVS
2602 2603 Deltas will be reused if the destination revlog would pick the same
2603 2604 revisions for the delta. This mode strikes a balance between speed
2604 2605 and optimization.
2605 2606 DELTAREUSENEVER
2606 2607 Deltas will never be reused. This is the slowest mode of execution.
2607 2608 This mode can be used to recompute deltas (e.g. if the diff/delta
2608 2609 algorithm changes).
2609 2610 DELTAREUSEFULLADD
2610 2611 Revision will be re-added as if their were new content. This is
2611 2612 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2612 2613 eg: large file detection and handling.
2613 2614
2614 2615 Delta computation can be slow, so the choice of delta reuse policy can
2615 2616 significantly affect run time.
2616 2617
2617 2618 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2618 2619 two extremes. Deltas will be reused if they are appropriate. But if the
2619 2620 delta could choose a better revision, it will do so. This means if you
2620 2621 are converting a non-generaldelta revlog to a generaldelta revlog,
2621 2622 deltas will be recomputed if the delta's parent isn't a parent of the
2622 2623 revision.
2623 2624
2624 2625 In addition to the delta policy, the ``forcedeltabothparents``
2625 2626 argument controls whether to force compute deltas against both parents
2626 2627 for merges. By default, the current default is used.
2627 2628
2628 2629 If not None, the `sidedatacompanion` is callable that accept two
2629 2630 arguments:
2630 2631
2631 2632 (srcrevlog, rev)
2632 2633
2633 2634 and return a triplet that control changes to sidedata content from the
2634 2635 old revision to the new clone result:
2635 2636
2636 2637 (dropall, filterout, update)
2637 2638
2638 2639 * if `dropall` is True, all sidedata should be dropped
2639 2640 * `filterout` is a set of sidedata keys that should be dropped
2640 2641 * `update` is a mapping of additionnal/new key -> value
2641 2642 """
2642 2643 if deltareuse not in self.DELTAREUSEALL:
2643 2644 raise ValueError(
2644 2645 _(b'value for deltareuse invalid: %s') % deltareuse
2645 2646 )
2646 2647
2647 2648 if len(destrevlog):
2648 2649 raise ValueError(_(b'destination revlog is not empty'))
2649 2650
2650 2651 if getattr(self, 'filteredrevs', None):
2651 2652 raise ValueError(_(b'source revlog has filtered revisions'))
2652 2653 if getattr(destrevlog, 'filteredrevs', None):
2653 2654 raise ValueError(_(b'destination revlog has filtered revisions'))
2654 2655
2655 2656 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2656 2657 # if possible.
2657 2658 oldlazydelta = destrevlog._lazydelta
2658 2659 oldlazydeltabase = destrevlog._lazydeltabase
2659 2660 oldamd = destrevlog._deltabothparents
2660 2661
2661 2662 try:
2662 2663 if deltareuse == self.DELTAREUSEALWAYS:
2663 2664 destrevlog._lazydeltabase = True
2664 2665 destrevlog._lazydelta = True
2665 2666 elif deltareuse == self.DELTAREUSESAMEREVS:
2666 2667 destrevlog._lazydeltabase = False
2667 2668 destrevlog._lazydelta = True
2668 2669 elif deltareuse == self.DELTAREUSENEVER:
2669 2670 destrevlog._lazydeltabase = False
2670 2671 destrevlog._lazydelta = False
2671 2672
2672 2673 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2673 2674
2674 2675 self._clone(
2675 2676 tr,
2676 2677 destrevlog,
2677 2678 addrevisioncb,
2678 2679 deltareuse,
2679 2680 forcedeltabothparents,
2680 2681 sidedatacompanion,
2681 2682 )
2682 2683
2683 2684 finally:
2684 2685 destrevlog._lazydelta = oldlazydelta
2685 2686 destrevlog._lazydeltabase = oldlazydeltabase
2686 2687 destrevlog._deltabothparents = oldamd
2687 2688
2688 2689 def _clone(
2689 2690 self,
2690 2691 tr,
2691 2692 destrevlog,
2692 2693 addrevisioncb,
2693 2694 deltareuse,
2694 2695 forcedeltabothparents,
2695 2696 sidedatacompanion,
2696 2697 ):
2697 2698 """perform the core duty of `revlog.clone` after parameter processing"""
2698 2699 deltacomputer = deltautil.deltacomputer(destrevlog)
2699 2700 index = self.index
2700 2701 for rev in self:
2701 2702 entry = index[rev]
2702 2703
2703 2704 # Some classes override linkrev to take filtered revs into
2704 2705 # account. Use raw entry from index.
2705 2706 flags = entry[0] & 0xFFFF
2706 2707 linkrev = entry[4]
2707 2708 p1 = index[entry[5]][7]
2708 2709 p2 = index[entry[6]][7]
2709 2710 node = entry[7]
2710 2711
2711 2712 sidedataactions = (False, [], {})
2712 2713 if sidedatacompanion is not None:
2713 2714 sidedataactions = sidedatacompanion(self, rev)
2714 2715
2715 2716 # (Possibly) reuse the delta from the revlog if allowed and
2716 2717 # the revlog chunk is a delta.
2717 2718 cachedelta = None
2718 2719 rawtext = None
2719 2720 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2720 2721 dropall, filterout, update = sidedataactions
2721 2722 text, sidedata = self._revisiondata(rev)
2722 2723 if dropall:
2723 2724 sidedata = {}
2724 2725 for key in filterout:
2725 2726 sidedata.pop(key, None)
2726 2727 sidedata.update(update)
2727 2728 if not sidedata:
2728 2729 sidedata = None
2729 2730 destrevlog.addrevision(
2730 2731 text,
2731 2732 tr,
2732 2733 linkrev,
2733 2734 p1,
2734 2735 p2,
2735 2736 cachedelta=cachedelta,
2736 2737 node=node,
2737 2738 flags=flags,
2738 2739 deltacomputer=deltacomputer,
2739 2740 sidedata=sidedata,
2740 2741 )
2741 2742 else:
2742 2743 if destrevlog._lazydelta:
2743 2744 dp = self.deltaparent(rev)
2744 2745 if dp != nullrev:
2745 2746 cachedelta = (dp, bytes(self._chunk(rev)))
2746 2747
2747 2748 if not cachedelta:
2748 2749 rawtext = self.rawdata(rev)
2749 2750
2750 2751 ifh = destrevlog.opener(
2751 2752 destrevlog.indexfile, b'a+', checkambig=False
2752 2753 )
2753 2754 dfh = None
2754 2755 if not destrevlog._inline:
2755 2756 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2756 2757 try:
2757 2758 destrevlog._addrevision(
2758 2759 node,
2759 2760 rawtext,
2760 2761 tr,
2761 2762 linkrev,
2762 2763 p1,
2763 2764 p2,
2764 2765 flags,
2765 2766 cachedelta,
2766 2767 ifh,
2767 2768 dfh,
2768 2769 deltacomputer=deltacomputer,
2769 2770 )
2770 2771 finally:
2771 2772 if dfh:
2772 2773 dfh.close()
2773 2774 ifh.close()
2774 2775
2775 2776 if addrevisioncb:
2776 2777 addrevisioncb(self, rev, node)
2777 2778
2778 2779 def censorrevision(self, tr, censornode, tombstone=b''):
2779 2780 if (self.version & 0xFFFF) == REVLOGV0:
2780 2781 raise error.RevlogError(
2781 2782 _(b'cannot censor with version %d revlogs') % self.version
2782 2783 )
2783 2784
2784 2785 censorrev = self.rev(censornode)
2785 2786 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2786 2787
2787 2788 if len(tombstone) > self.rawsize(censorrev):
2788 2789 raise error.Abort(
2789 2790 _(b'censor tombstone must be no longer than censored data')
2790 2791 )
2791 2792
2792 2793 # Rewriting the revlog in place is hard. Our strategy for censoring is
2793 2794 # to create a new revlog, copy all revisions to it, then replace the
2794 2795 # revlogs on transaction close.
2795 2796
2796 2797 newindexfile = self.indexfile + b'.tmpcensored'
2797 2798 newdatafile = self.datafile + b'.tmpcensored'
2798 2799
2799 2800 # This is a bit dangerous. We could easily have a mismatch of state.
2800 2801 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2801 2802 newrl.version = self.version
2802 2803 newrl._generaldelta = self._generaldelta
2803 2804 newrl._io = self._io
2804 2805
2805 2806 for rev in self.revs():
2806 2807 node = self.node(rev)
2807 2808 p1, p2 = self.parents(node)
2808 2809
2809 2810 if rev == censorrev:
2810 2811 newrl.addrawrevision(
2811 2812 tombstone,
2812 2813 tr,
2813 2814 self.linkrev(censorrev),
2814 2815 p1,
2815 2816 p2,
2816 2817 censornode,
2817 2818 REVIDX_ISCENSORED,
2818 2819 )
2819 2820
2820 2821 if newrl.deltaparent(rev) != nullrev:
2821 2822 raise error.Abort(
2822 2823 _(
2823 2824 b'censored revision stored as delta; '
2824 2825 b'cannot censor'
2825 2826 ),
2826 2827 hint=_(
2827 2828 b'censoring of revlogs is not '
2828 2829 b'fully implemented; please report '
2829 2830 b'this bug'
2830 2831 ),
2831 2832 )
2832 2833 continue
2833 2834
2834 2835 if self.iscensored(rev):
2835 2836 if self.deltaparent(rev) != nullrev:
2836 2837 raise error.Abort(
2837 2838 _(
2838 2839 b'cannot censor due to censored '
2839 2840 b'revision having delta stored'
2840 2841 )
2841 2842 )
2842 2843 rawtext = self._chunk(rev)
2843 2844 else:
2844 2845 rawtext = self.rawdata(rev)
2845 2846
2846 2847 newrl.addrawrevision(
2847 2848 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2848 2849 )
2849 2850
2850 2851 tr.addbackup(self.indexfile, location=b'store')
2851 2852 if not self._inline:
2852 2853 tr.addbackup(self.datafile, location=b'store')
2853 2854
2854 2855 self.opener.rename(newrl.indexfile, self.indexfile)
2855 2856 if not self._inline:
2856 2857 self.opener.rename(newrl.datafile, self.datafile)
2857 2858
2858 2859 self.clearcaches()
2859 2860 self._loadindex()
2860 2861
2861 2862 def verifyintegrity(self, state):
2862 2863 """Verifies the integrity of the revlog.
2863 2864
2864 2865 Yields ``revlogproblem`` instances describing problems that are
2865 2866 found.
2866 2867 """
2867 2868 dd, di = self.checksize()
2868 2869 if dd:
2869 2870 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2870 2871 if di:
2871 2872 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2872 2873
2873 2874 version = self.version & 0xFFFF
2874 2875
2875 2876 # The verifier tells us what version revlog we should be.
2876 2877 if version != state[b'expectedversion']:
2877 2878 yield revlogproblem(
2878 2879 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2879 2880 % (self.indexfile, version, state[b'expectedversion'])
2880 2881 )
2881 2882
2882 2883 state[b'skipread'] = set()
2883 2884 state[b'safe_renamed'] = set()
2884 2885
2885 2886 for rev in self:
2886 2887 node = self.node(rev)
2887 2888
2888 2889 # Verify contents. 4 cases to care about:
2889 2890 #
2890 2891 # common: the most common case
2891 2892 # rename: with a rename
2892 2893 # meta: file content starts with b'\1\n', the metadata
2893 2894 # header defined in filelog.py, but without a rename
2894 2895 # ext: content stored externally
2895 2896 #
2896 2897 # More formally, their differences are shown below:
2897 2898 #
2898 2899 # | common | rename | meta | ext
2899 2900 # -------------------------------------------------------
2900 2901 # flags() | 0 | 0 | 0 | not 0
2901 2902 # renamed() | False | True | False | ?
2902 2903 # rawtext[0:2]=='\1\n'| False | True | True | ?
2903 2904 #
2904 2905 # "rawtext" means the raw text stored in revlog data, which
2905 2906 # could be retrieved by "rawdata(rev)". "text"
2906 2907 # mentioned below is "revision(rev)".
2907 2908 #
2908 2909 # There are 3 different lengths stored physically:
2909 2910 # 1. L1: rawsize, stored in revlog index
2910 2911 # 2. L2: len(rawtext), stored in revlog data
2911 2912 # 3. L3: len(text), stored in revlog data if flags==0, or
2912 2913 # possibly somewhere else if flags!=0
2913 2914 #
2914 2915 # L1 should be equal to L2. L3 could be different from them.
2915 2916 # "text" may or may not affect commit hash depending on flag
2916 2917 # processors (see flagutil.addflagprocessor).
2917 2918 #
2918 2919 # | common | rename | meta | ext
2919 2920 # -------------------------------------------------
2920 2921 # rawsize() | L1 | L1 | L1 | L1
2921 2922 # size() | L1 | L2-LM | L1(*) | L1 (?)
2922 2923 # len(rawtext) | L2 | L2 | L2 | L2
2923 2924 # len(text) | L2 | L2 | L2 | L3
2924 2925 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2925 2926 #
2926 2927 # LM: length of metadata, depending on rawtext
2927 2928 # (*): not ideal, see comment in filelog.size
2928 2929 # (?): could be "- len(meta)" if the resolved content has
2929 2930 # rename metadata
2930 2931 #
2931 2932 # Checks needed to be done:
2932 2933 # 1. length check: L1 == L2, in all cases.
2933 2934 # 2. hash check: depending on flag processor, we may need to
2934 2935 # use either "text" (external), or "rawtext" (in revlog).
2935 2936
2936 2937 try:
2937 2938 skipflags = state.get(b'skipflags', 0)
2938 2939 if skipflags:
2939 2940 skipflags &= self.flags(rev)
2940 2941
2941 2942 _verify_revision(self, skipflags, state, node)
2942 2943
2943 2944 l1 = self.rawsize(rev)
2944 2945 l2 = len(self.rawdata(node))
2945 2946
2946 2947 if l1 != l2:
2947 2948 yield revlogproblem(
2948 2949 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2949 2950 node=node,
2950 2951 )
2951 2952
2952 2953 except error.CensoredNodeError:
2953 2954 if state[b'erroroncensored']:
2954 2955 yield revlogproblem(
2955 2956 error=_(b'censored file data'), node=node
2956 2957 )
2957 2958 state[b'skipread'].add(node)
2958 2959 except Exception as e:
2959 2960 yield revlogproblem(
2960 2961 error=_(b'unpacking %s: %s')
2961 2962 % (short(node), stringutil.forcebytestr(e)),
2962 2963 node=node,
2963 2964 )
2964 2965 state[b'skipread'].add(node)
2965 2966
2966 2967 def storageinfo(
2967 2968 self,
2968 2969 exclusivefiles=False,
2969 2970 sharedfiles=False,
2970 2971 revisionscount=False,
2971 2972 trackedsize=False,
2972 2973 storedsize=False,
2973 2974 ):
2974 2975 d = {}
2975 2976
2976 2977 if exclusivefiles:
2977 2978 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2978 2979 if not self._inline:
2979 2980 d[b'exclusivefiles'].append((self.opener, self.datafile))
2980 2981
2981 2982 if sharedfiles:
2982 2983 d[b'sharedfiles'] = []
2983 2984
2984 2985 if revisionscount:
2985 2986 d[b'revisionscount'] = len(self)
2986 2987
2987 2988 if trackedsize:
2988 2989 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2989 2990
2990 2991 if storedsize:
2991 2992 d[b'storedsize'] = sum(
2992 2993 self.opener.stat(path).st_size for path in self.files()
2993 2994 )
2994 2995
2995 2996 return d
@@ -1,202 +1,204 b''
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import struct
12 12
13 13 from .. import (
14 14 error,
15 15 node as nodemod,
16 16 pycompat,
17 17 )
18 18
19 19
20 20 class NodeMap(dict):
21 21 def __missing__(self, x):
22 22 raise error.RevlogError(b'unknown node: %s' % x)
23 23
24 24
25 25 def persisted_data(revlog):
26 26 """read the nodemap for a revlog from disk"""
27 27 if revlog.nodemap_file is None:
28 28 return None
29 29 return revlog.opener.tryread(revlog.nodemap_file)
30 30
31 31
32 32 def setup_persistent_nodemap(tr, revlog):
33 33 """Install whatever is needed transaction side to persist a nodemap on disk
34 34
35 35 (only actually persist the nodemap if this is relevant for this revlog)
36 36 """
37 if revlog._inline:
38 return # inlined revlog are too small for this to be relevant
37 39 if revlog.nodemap_file is None:
38 40 return # we do not use persistent_nodemap on this revlog
39 41 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
40 42 if tr.hasfinalize(callback_id):
41 43 return # no need to register again
42 44 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
43 45
44 46
45 47 def _persist_nodemap(tr, revlog):
46 48 """Write nodemap data on disk for a given revlog
47 49 """
48 50 if getattr(revlog, 'filteredrevs', ()):
49 51 raise error.ProgrammingError(
50 52 "cannot persist nodemap of a filtered changelog"
51 53 )
52 54 if revlog.nodemap_file is None:
53 55 msg = "calling persist nodemap on a revlog without the feature enableb"
54 56 raise error.ProgrammingError(msg)
55 57 data = persistent_data(revlog.index)
56 58 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
57 59 # store vfs
58 60 with revlog.opener(revlog.nodemap_file, b'w') as f:
59 61 f.write(data)
60 62 # EXP-TODO: if the transaction abort, we should remove the new data and
61 63 # reinstall the old one. (This will be simpler when the file format get a
62 64 # bit more advanced)
63 65
64 66
65 67 ### Nodemap Trie
66 68 #
67 69 # This is a simple reference implementation to compute and persist a nodemap
68 70 # trie. This reference implementation is write only. The python version of this
69 71 # is not expected to be actually used, since it wont provide performance
70 72 # improvement over existing non-persistent C implementation.
71 73 #
72 74 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
73 75 # revision can be adressed using its node shortest prefix.
74 76 #
75 77 # The trie is stored as a sequence of block. Each block contains 16 entries
76 78 # (signed 64bit integer, big endian). Each entry can be one of the following:
77 79 #
78 80 # * value >= 0 -> index of sub-block
79 81 # * value == -1 -> no value
80 82 # * value < -1 -> a revision value: rev = -(value+10)
81 83 #
82 84 # The implementation focus on simplicity, not on performance. A Rust
83 85 # implementation should provide a efficient version of the same binary
84 86 # persistence. This reference python implementation is never meant to be
85 87 # extensively use in production.
86 88
87 89
88 90 def persistent_data(index):
89 91 """return the persistent binary form for a nodemap for a given index
90 92 """
91 93 trie = _build_trie(index)
92 94 return _persist_trie(trie)
93 95
94 96
95 97 S_BLOCK = struct.Struct(">" + ("l" * 16))
96 98
97 99 NO_ENTRY = -1
98 100 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
99 101 REV_OFFSET = 2
100 102
101 103
102 104 def _transform_rev(rev):
103 105 """Return the number used to represent the rev in the tree.
104 106
105 107 (or retrieve a rev number from such representation)
106 108
107 109 Note that this is an involution, a function equal to its inverse (i.e.
108 110 which gives the identity when applied to itself).
109 111 """
110 112 return -(rev + REV_OFFSET)
111 113
112 114
113 115 def _to_int(hex_digit):
114 116 """turn an hexadecimal digit into a proper integer"""
115 117 return int(hex_digit, 16)
116 118
117 119
118 120 def _build_trie(index):
119 121 """build a nodemap trie
120 122
121 123 The nodemap stores revision number for each unique prefix.
122 124
123 125 Each block is a dictionary with keys in `[0, 15]`. Values are either
124 126 another block or a revision number.
125 127 """
126 128 root = {}
127 129 for rev in range(len(index)):
128 130 hex = nodemod.hex(index[rev][7])
129 131 _insert_into_block(index, 0, root, rev, hex)
130 132 return root
131 133
132 134
133 135 def _insert_into_block(index, level, block, current_rev, current_hex):
134 136 """insert a new revision in a block
135 137
136 138 index: the index we are adding revision for
137 139 level: the depth of the current block in the trie
138 140 block: the block currently being considered
139 141 current_rev: the revision number we are adding
140 142 current_hex: the hexadecimal representation of the of that revision
141 143 """
142 144 hex_digit = _to_int(current_hex[level : level + 1])
143 145 entry = block.get(hex_digit)
144 146 if entry is None:
145 147 # no entry, simply store the revision number
146 148 block[hex_digit] = current_rev
147 149 elif isinstance(entry, dict):
148 150 # need to recurse to an underlying block
149 151 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
150 152 else:
151 153 # collision with a previously unique prefix, inserting new
152 154 # vertices to fit both entry.
153 155 other_hex = nodemod.hex(index[entry][7])
154 156 other_rev = entry
155 157 new = {}
156 158 block[hex_digit] = new
157 159 _insert_into_block(index, level + 1, new, other_rev, other_hex)
158 160 _insert_into_block(index, level + 1, new, current_rev, current_hex)
159 161
160 162
161 163 def _persist_trie(root):
162 164 """turn a nodemap trie into persistent binary data
163 165
164 166 See `_build_trie` for nodemap trie structure"""
165 167 block_map = {}
166 168 chunks = []
167 169 for tn in _walk_trie(root):
168 170 block_map[id(tn)] = len(chunks)
169 171 chunks.append(_persist_block(tn, block_map))
170 172 return b''.join(chunks)
171 173
172 174
173 175 def _walk_trie(block):
174 176 """yield all the block in a trie
175 177
176 178 Children blocks are always yield before their parent block.
177 179 """
178 180 for (_, item) in sorted(block.items()):
179 181 if isinstance(item, dict):
180 182 for sub_block in _walk_trie(item):
181 183 yield sub_block
182 184 yield block
183 185
184 186
185 187 def _persist_block(block_node, block_map):
186 188 """produce persistent binary data for a single block
187 189
188 190 Children block are assumed to be already persisted and present in
189 191 block_map.
190 192 """
191 193 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
192 194 return S_BLOCK.pack(*data)
193 195
194 196
195 197 def _to_value(item, block_map):
196 198 """persist any value as an integer"""
197 199 if item is None:
198 200 return NO_ENTRY
199 201 elif isinstance(item, dict):
200 202 return block_map[id(item)]
201 203 else:
202 204 return _transform_rev(item)
General Comments 0
You need to be logged in to leave comments. Login now