##// END OF EJS Templates
revlog: drop an unused variable assignment...
Matt Harbison -
r44431:bdb35716 default
parent child Browse files
Show More
@@ -1,2981 +1,2979
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 def _verify_revision(rl, skipflags, state, node):
152 152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 153 point for extensions to influence the operation."""
154 154 if skipflags:
155 155 state[b'skipread'].add(node)
156 156 else:
157 157 # Side-effect: read content and verify hash.
158 158 rl.revision(node)
159 159
160 160
161 161 @attr.s(slots=True, frozen=True)
162 162 class _revisioninfo(object):
163 163 """Information about a revision that allows building its fulltext
164 164 node: expected hash of the revision
165 165 p1, p2: parent revs of the revision
166 166 btext: built text cache consisting of a one-element list
167 167 cachedelta: (baserev, uncompressed_delta) or None
168 168 flags: flags associated to the revision storage
169 169
170 170 One of btext[0] or cachedelta must be set.
171 171 """
172 172
173 173 node = attr.ib()
174 174 p1 = attr.ib()
175 175 p2 = attr.ib()
176 176 btext = attr.ib()
177 177 textlen = attr.ib()
178 178 cachedelta = attr.ib()
179 179 flags = attr.ib()
180 180
181 181
182 182 @interfaceutil.implementer(repository.irevisiondelta)
183 183 @attr.s(slots=True)
184 184 class revlogrevisiondelta(object):
185 185 node = attr.ib()
186 186 p1node = attr.ib()
187 187 p2node = attr.ib()
188 188 basenode = attr.ib()
189 189 flags = attr.ib()
190 190 baserevisionsize = attr.ib()
191 191 revision = attr.ib()
192 192 delta = attr.ib()
193 193 linknode = attr.ib(default=None)
194 194
195 195
196 196 @interfaceutil.implementer(repository.iverifyproblem)
197 197 @attr.s(frozen=True)
198 198 class revlogproblem(object):
199 199 warning = attr.ib(default=None)
200 200 error = attr.ib(default=None)
201 201 node = attr.ib(default=None)
202 202
203 203
204 204 # index v0:
205 205 # 4 bytes: offset
206 206 # 4 bytes: compressed length
207 207 # 4 bytes: base rev
208 208 # 4 bytes: link rev
209 209 # 20 bytes: parent 1 nodeid
210 210 # 20 bytes: parent 2 nodeid
211 211 # 20 bytes: nodeid
212 212 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 213 indexformatv0_pack = indexformatv0.pack
214 214 indexformatv0_unpack = indexformatv0.unpack
215 215
216 216
217 217 class revlogoldindex(list):
218 218 @property
219 219 def nodemap(self):
220 220 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 221 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 222 return self._nodemap
223 223
224 224 @util.propertycache
225 225 def _nodemap(self):
226 226 nodemap = revlogutils.NodeMap({nullid: nullrev})
227 227 for r in range(0, len(self)):
228 228 n = self[r][7]
229 229 nodemap[n] = r
230 230 return nodemap
231 231
232 232 def has_node(self, node):
233 233 """return True if the node exist in the index"""
234 234 return node in self._nodemap
235 235
236 236 def rev(self, node):
237 237 """return a revision for a node
238 238
239 239 If the node is unknown, raise a RevlogError"""
240 240 return self._nodemap[node]
241 241
242 242 def get_rev(self, node):
243 243 """return a revision for a node
244 244
245 245 If the node is unknown, return None"""
246 246 return self._nodemap.get(node)
247 247
248 248 def append(self, tup):
249 249 self._nodemap[tup[7]] = len(self)
250 250 super(revlogoldindex, self).append(tup)
251 251
252 252 def __delitem__(self, i):
253 253 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 254 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 255 for r in pycompat.xrange(i.start, len(self)):
256 256 del self._nodemap[self[r][7]]
257 257 super(revlogoldindex, self).__delitem__(i)
258 258
259 259 def clearcaches(self):
260 260 self.__dict__.pop('_nodemap', None)
261 261
262 262 def __getitem__(self, i):
263 263 if i == -1:
264 264 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 265 return list.__getitem__(self, i)
266 266
267 267
268 268 class revlogoldio(object):
269 269 def __init__(self):
270 270 self.size = indexformatv0.size
271 271
272 272 def parseindex(self, data, inline):
273 273 s = self.size
274 274 index = []
275 275 nodemap = revlogutils.NodeMap({nullid: nullrev})
276 276 n = off = 0
277 277 l = len(data)
278 278 while off + s <= l:
279 279 cur = data[off : off + s]
280 280 off += s
281 281 e = indexformatv0_unpack(cur)
282 282 # transform to revlogv1 format
283 283 e2 = (
284 284 offset_type(e[0], 0),
285 285 e[1],
286 286 -1,
287 287 e[2],
288 288 e[3],
289 289 nodemap.get(e[4], nullrev),
290 290 nodemap.get(e[5], nullrev),
291 291 e[6],
292 292 )
293 293 index.append(e2)
294 294 nodemap[e[6]] = n
295 295 n += 1
296 296
297 297 index = revlogoldindex(index)
298 298 return index, None
299 299
300 300 def packentry(self, entry, node, version, rev):
301 301 if gettype(entry[0]):
302 302 raise error.RevlogError(
303 303 _(b'index entry flags need revlog version 1')
304 304 )
305 305 e2 = (
306 306 getoffset(entry[0]),
307 307 entry[1],
308 308 entry[3],
309 309 entry[4],
310 310 node(entry[5]),
311 311 node(entry[6]),
312 312 entry[7],
313 313 )
314 314 return indexformatv0_pack(*e2)
315 315
316 316
317 317 # index ng:
318 318 # 6 bytes: offset
319 319 # 2 bytes: flags
320 320 # 4 bytes: compressed length
321 321 # 4 bytes: uncompressed length
322 322 # 4 bytes: base rev
323 323 # 4 bytes: link rev
324 324 # 4 bytes: parent 1 rev
325 325 # 4 bytes: parent 2 rev
326 326 # 32 bytes: nodeid
327 327 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 328 indexformatng_pack = indexformatng.pack
329 329 versionformat = struct.Struct(b">I")
330 330 versionformat_pack = versionformat.pack
331 331 versionformat_unpack = versionformat.unpack
332 332
333 333 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 334 # signed integer)
335 335 _maxentrysize = 0x7FFFFFFF
336 336
337 337
338 338 class revlogio(object):
339 339 def __init__(self):
340 340 self.size = indexformatng.size
341 341
342 342 def parseindex(self, data, inline):
343 343 # call the C implementation to parse the index data
344 344 index, cache = parsers.parse_index2(data, inline)
345 345 return index, cache
346 346
347 347 def packentry(self, entry, node, version, rev):
348 348 p = indexformatng_pack(*entry)
349 349 if rev == 0:
350 350 p = versionformat_pack(version) + p[4:]
351 351 return p
352 352
353 353
354 354 class revlog(object):
355 355 """
356 356 the underlying revision storage object
357 357
358 358 A revlog consists of two parts, an index and the revision data.
359 359
360 360 The index is a file with a fixed record size containing
361 361 information on each revision, including its nodeid (hash), the
362 362 nodeids of its parents, the position and offset of its data within
363 363 the data file, and the revision it's based on. Finally, each entry
364 364 contains a linkrev entry that can serve as a pointer to external
365 365 data.
366 366
367 367 The revision data itself is a linear collection of data chunks.
368 368 Each chunk represents a revision and is usually represented as a
369 369 delta against the previous chunk. To bound lookup time, runs of
370 370 deltas are limited to about 2 times the length of the original
371 371 version data. This makes retrieval of a version proportional to
372 372 its size, or O(1) relative to the number of revisions.
373 373
374 374 Both pieces of the revlog are written to in an append-only
375 375 fashion, which means we never need to rewrite a file to insert or
376 376 remove data, and can use some simple techniques to avoid the need
377 377 for locking while reading.
378 378
379 379 If checkambig, indexfile is opened with checkambig=True at
380 380 writing, to avoid file stat ambiguity.
381 381
382 382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
383 383 index will be mmapped rather than read if it is larger than the
384 384 configured threshold.
385 385
386 386 If censorable is True, the revlog can have censored revisions.
387 387
388 388 If `upperboundcomp` is not None, this is the expected maximal gain from
389 389 compression for the data content.
390 390 """
391 391
392 392 _flagserrorclass = error.RevlogError
393 393
394 394 def __init__(
395 395 self,
396 396 opener,
397 397 indexfile,
398 398 datafile=None,
399 399 checkambig=False,
400 400 mmaplargeindex=False,
401 401 censorable=False,
402 402 upperboundcomp=None,
403 403 ):
404 404 """
405 405 create a revlog object
406 406
407 407 opener is a function that abstracts the file opening operation
408 408 and can be used to implement COW semantics or the like.
409 409
410 410 """
411 411 self.upperboundcomp = upperboundcomp
412 412 self.indexfile = indexfile
413 413 self.datafile = datafile or (indexfile[:-2] + b".d")
414 414 self.opener = opener
415 415 # When True, indexfile is opened with checkambig=True at writing, to
416 416 # avoid file stat ambiguity.
417 417 self._checkambig = checkambig
418 418 self._mmaplargeindex = mmaplargeindex
419 419 self._censorable = censorable
420 420 # 3-tuple of (node, rev, text) for a raw revision.
421 421 self._revisioncache = None
422 422 # Maps rev to chain base rev.
423 423 self._chainbasecache = util.lrucachedict(100)
424 424 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
425 425 self._chunkcache = (0, b'')
426 426 # How much data to read and cache into the raw revlog data cache.
427 427 self._chunkcachesize = 65536
428 428 self._maxchainlen = None
429 429 self._deltabothparents = True
430 430 self.index = None
431 431 # Mapping of partial identifiers to full nodes.
432 432 self._pcache = {}
433 433 # Mapping of revision integer to full node.
434 434 self._compengine = b'zlib'
435 435 self._compengineopts = {}
436 436 self._maxdeltachainspan = -1
437 437 self._withsparseread = False
438 438 self._sparserevlog = False
439 439 self._srdensitythreshold = 0.50
440 440 self._srmingapsize = 262144
441 441
442 442 # Make copy of flag processors so each revlog instance can support
443 443 # custom flags.
444 444 self._flagprocessors = dict(flagutil.flagprocessors)
445 445
446 446 # 2-tuple of file handles being used for active writing.
447 447 self._writinghandles = None
448 448
449 449 self._loadindex()
450 450
451 451 def _loadindex(self):
452 452 mmapindexthreshold = None
453 453 opts = self.opener.options
454 454
455 455 if b'revlogv2' in opts:
456 456 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
457 457 elif b'revlogv1' in opts:
458 458 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
459 459 if b'generaldelta' in opts:
460 460 newversionflags |= FLAG_GENERALDELTA
461 461 elif b'revlogv0' in self.opener.options:
462 462 newversionflags = REVLOGV0
463 463 else:
464 464 newversionflags = REVLOG_DEFAULT_VERSION
465 465
466 466 if b'chunkcachesize' in opts:
467 467 self._chunkcachesize = opts[b'chunkcachesize']
468 468 if b'maxchainlen' in opts:
469 469 self._maxchainlen = opts[b'maxchainlen']
470 470 if b'deltabothparents' in opts:
471 471 self._deltabothparents = opts[b'deltabothparents']
472 472 self._lazydelta = bool(opts.get(b'lazydelta', True))
473 473 self._lazydeltabase = False
474 474 if self._lazydelta:
475 475 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
476 476 if b'compengine' in opts:
477 477 self._compengine = opts[b'compengine']
478 478 if b'zlib.level' in opts:
479 479 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
480 480 if b'zstd.level' in opts:
481 481 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
482 482 if b'maxdeltachainspan' in opts:
483 483 self._maxdeltachainspan = opts[b'maxdeltachainspan']
484 484 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
485 485 mmapindexthreshold = opts[b'mmapindexthreshold']
486 486 self.hassidedata = bool(opts.get(b'side-data', False))
487 487 if self.hassidedata:
488 488 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
489 489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
490 490 withsparseread = bool(opts.get(b'with-sparse-read', False))
491 491 # sparse-revlog forces sparse-read
492 492 self._withsparseread = self._sparserevlog or withsparseread
493 493 if b'sparse-read-density-threshold' in opts:
494 494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
495 495 if b'sparse-read-min-gap-size' in opts:
496 496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
497 497 if opts.get(b'enableellipsis'):
498 498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
499 499
500 500 # revlog v0 doesn't have flag processors
501 501 for flag, processor in pycompat.iteritems(
502 502 opts.get(b'flagprocessors', {})
503 503 ):
504 504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
505 505
506 506 if self._chunkcachesize <= 0:
507 507 raise error.RevlogError(
508 508 _(b'revlog chunk cache size %r is not greater than 0')
509 509 % self._chunkcachesize
510 510 )
511 511 elif self._chunkcachesize & (self._chunkcachesize - 1):
512 512 raise error.RevlogError(
513 513 _(b'revlog chunk cache size %r is not a power of 2')
514 514 % self._chunkcachesize
515 515 )
516 516
517 517 indexdata = b''
518 518 self._initempty = True
519 519 try:
520 520 with self._indexfp() as f:
521 521 if (
522 522 mmapindexthreshold is not None
523 523 and self.opener.fstat(f).st_size >= mmapindexthreshold
524 524 ):
525 525 # TODO: should .close() to release resources without
526 526 # relying on Python GC
527 527 indexdata = util.buffer(util.mmapread(f))
528 528 else:
529 529 indexdata = f.read()
530 530 if len(indexdata) > 0:
531 531 versionflags = versionformat_unpack(indexdata[:4])[0]
532 532 self._initempty = False
533 533 else:
534 534 versionflags = newversionflags
535 535 except IOError as inst:
536 536 if inst.errno != errno.ENOENT:
537 537 raise
538 538
539 539 versionflags = newversionflags
540 540
541 541 self.version = versionflags
542 542
543 543 flags = versionflags & ~0xFFFF
544 544 fmt = versionflags & 0xFFFF
545 545
546 546 if fmt == REVLOGV0:
547 547 if flags:
548 548 raise error.RevlogError(
549 549 _(b'unknown flags (%#04x) in version %d revlog %s')
550 550 % (flags >> 16, fmt, self.indexfile)
551 551 )
552 552
553 553 self._inline = False
554 554 self._generaldelta = False
555 555
556 556 elif fmt == REVLOGV1:
557 557 if flags & ~REVLOGV1_FLAGS:
558 558 raise error.RevlogError(
559 559 _(b'unknown flags (%#04x) in version %d revlog %s')
560 560 % (flags >> 16, fmt, self.indexfile)
561 561 )
562 562
563 563 self._inline = versionflags & FLAG_INLINE_DATA
564 564 self._generaldelta = versionflags & FLAG_GENERALDELTA
565 565
566 566 elif fmt == REVLOGV2:
567 567 if flags & ~REVLOGV2_FLAGS:
568 568 raise error.RevlogError(
569 569 _(b'unknown flags (%#04x) in version %d revlog %s')
570 570 % (flags >> 16, fmt, self.indexfile)
571 571 )
572 572
573 573 self._inline = versionflags & FLAG_INLINE_DATA
574 574 # generaldelta implied by version 2 revlogs.
575 575 self._generaldelta = True
576 576
577 577 else:
578 578 raise error.RevlogError(
579 579 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
580 580 )
581 581 # sparse-revlog can't be on without general-delta (issue6056)
582 582 if not self._generaldelta:
583 583 self._sparserevlog = False
584 584
585 585 self._storedeltachains = True
586 586
587 587 self._io = revlogio()
588 588 if self.version == REVLOGV0:
589 589 self._io = revlogoldio()
590 590 try:
591 591 d = self._io.parseindex(indexdata, self._inline)
592 592 except (ValueError, IndexError):
593 593 raise error.RevlogError(
594 594 _(b"index %s is corrupted") % self.indexfile
595 595 )
596 596 self.index, self._chunkcache = d
597 597 if not self._chunkcache:
598 598 self._chunkclear()
599 599 # revnum -> (chain-length, sum-delta-length)
600 600 self._chaininfocache = {}
601 601 # revlog header -> revlog compressor
602 602 self._decompressors = {}
603 603
604 604 @util.propertycache
605 605 def _compressor(self):
606 606 engine = util.compengines[self._compengine]
607 607 return engine.revlogcompressor(self._compengineopts)
608 608
609 609 def _indexfp(self, mode=b'r'):
610 610 """file object for the revlog's index file"""
611 611 args = {'mode': mode}
612 612 if mode != b'r':
613 613 args['checkambig'] = self._checkambig
614 614 if mode == b'w':
615 615 args['atomictemp'] = True
616 616 return self.opener(self.indexfile, **args)
617 617
618 618 def _datafp(self, mode=b'r'):
619 619 """file object for the revlog's data file"""
620 620 return self.opener(self.datafile, mode=mode)
621 621
622 622 @contextlib.contextmanager
623 623 def _datareadfp(self, existingfp=None):
624 624 """file object suitable to read data"""
625 625 # Use explicit file handle, if given.
626 626 if existingfp is not None:
627 627 yield existingfp
628 628
629 629 # Use a file handle being actively used for writes, if available.
630 630 # There is some danger to doing this because reads will seek the
631 631 # file. However, _writeentry() performs a SEEK_END before all writes,
632 632 # so we should be safe.
633 633 elif self._writinghandles:
634 634 if self._inline:
635 635 yield self._writinghandles[0]
636 636 else:
637 637 yield self._writinghandles[1]
638 638
639 639 # Otherwise open a new file handle.
640 640 else:
641 641 if self._inline:
642 642 func = self._indexfp
643 643 else:
644 644 func = self._datafp
645 645 with func() as fp:
646 646 yield fp
647 647
648 648 def tiprev(self):
649 649 return len(self.index) - 1
650 650
651 651 def tip(self):
652 652 return self.node(self.tiprev())
653 653
654 654 def __contains__(self, rev):
655 655 return 0 <= rev < len(self)
656 656
657 657 def __len__(self):
658 658 return len(self.index)
659 659
660 660 def __iter__(self):
661 661 return iter(pycompat.xrange(len(self)))
662 662
663 663 def revs(self, start=0, stop=None):
664 664 """iterate over all rev in this revlog (from start to stop)"""
665 665 return storageutil.iterrevs(len(self), start=start, stop=stop)
666 666
667 667 @property
668 668 def nodemap(self):
669 669 msg = (
670 670 b"revlog.nodemap is deprecated, "
671 671 b"use revlog.index.[has_node|rev|get_rev]"
672 672 )
673 673 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
674 674 return self.index.nodemap
675 675
676 676 @property
677 677 def _nodecache(self):
678 678 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
679 679 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
680 680 return self.index.nodemap
681 681
682 682 def hasnode(self, node):
683 683 try:
684 684 self.rev(node)
685 685 return True
686 686 except KeyError:
687 687 return False
688 688
689 689 def candelta(self, baserev, rev):
690 690 """whether two revisions (baserev, rev) can be delta-ed or not"""
691 691 # Disable delta if either rev requires a content-changing flag
692 692 # processor (ex. LFS). This is because such flag processor can alter
693 693 # the rawtext content that the delta will be based on, and two clients
694 694 # could have a same revlog node with different flags (i.e. different
695 695 # rawtext contents) and the delta could be incompatible.
696 696 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
697 697 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
698 698 ):
699 699 return False
700 700 return True
701 701
702 702 def clearcaches(self):
703 703 self._revisioncache = None
704 704 self._chainbasecache.clear()
705 705 self._chunkcache = (0, b'')
706 706 self._pcache = {}
707 707 self.index.clearcaches()
708 708
709 709 def rev(self, node):
710 710 try:
711 711 return self.index.rev(node)
712 712 except TypeError:
713 713 raise
714 714 except error.RevlogError:
715 715 # parsers.c radix tree lookup failed
716 716 if node == wdirid or node in wdirfilenodeids:
717 717 raise error.WdirUnsupported
718 718 raise error.LookupError(node, self.indexfile, _(b'no node'))
719 719
720 720 # Accessors for index entries.
721 721
722 722 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
723 723 # are flags.
724 724 def start(self, rev):
725 725 return int(self.index[rev][0] >> 16)
726 726
727 727 def flags(self, rev):
728 728 return self.index[rev][0] & 0xFFFF
729 729
730 730 def length(self, rev):
731 731 return self.index[rev][1]
732 732
733 733 def rawsize(self, rev):
734 734 """return the length of the uncompressed text for a given revision"""
735 735 l = self.index[rev][2]
736 736 if l >= 0:
737 737 return l
738 738
739 739 t = self.rawdata(rev)
740 740 return len(t)
741 741
742 742 def size(self, rev):
743 743 """length of non-raw text (processed by a "read" flag processor)"""
744 744 # fast path: if no "read" flag processor could change the content,
745 745 # size is rawsize. note: ELLIPSIS is known to not change the content.
746 746 flags = self.flags(rev)
747 747 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
748 748 return self.rawsize(rev)
749 749
750 750 return len(self.revision(rev, raw=False))
751 751
752 752 def chainbase(self, rev):
753 753 base = self._chainbasecache.get(rev)
754 754 if base is not None:
755 755 return base
756 756
757 757 index = self.index
758 758 iterrev = rev
759 759 base = index[iterrev][3]
760 760 while base != iterrev:
761 761 iterrev = base
762 762 base = index[iterrev][3]
763 763
764 764 self._chainbasecache[rev] = base
765 765 return base
766 766
767 767 def linkrev(self, rev):
768 768 return self.index[rev][4]
769 769
770 770 def parentrevs(self, rev):
771 771 try:
772 772 entry = self.index[rev]
773 773 except IndexError:
774 774 if rev == wdirrev:
775 775 raise error.WdirUnsupported
776 776 raise
777 777
778 778 return entry[5], entry[6]
779 779
780 780 # fast parentrevs(rev) where rev isn't filtered
781 781 _uncheckedparentrevs = parentrevs
782 782
783 783 def node(self, rev):
784 784 try:
785 785 return self.index[rev][7]
786 786 except IndexError:
787 787 if rev == wdirrev:
788 788 raise error.WdirUnsupported
789 789 raise
790 790
791 791 # Derived from index values.
792 792
793 793 def end(self, rev):
794 794 return self.start(rev) + self.length(rev)
795 795
796 796 def parents(self, node):
797 797 i = self.index
798 798 d = i[self.rev(node)]
799 799 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
800 800
801 801 def chainlen(self, rev):
802 802 return self._chaininfo(rev)[0]
803 803
804 804 def _chaininfo(self, rev):
805 805 chaininfocache = self._chaininfocache
806 806 if rev in chaininfocache:
807 807 return chaininfocache[rev]
808 808 index = self.index
809 809 generaldelta = self._generaldelta
810 810 iterrev = rev
811 811 e = index[iterrev]
812 812 clen = 0
813 813 compresseddeltalen = 0
814 814 while iterrev != e[3]:
815 815 clen += 1
816 816 compresseddeltalen += e[1]
817 817 if generaldelta:
818 818 iterrev = e[3]
819 819 else:
820 820 iterrev -= 1
821 821 if iterrev in chaininfocache:
822 822 t = chaininfocache[iterrev]
823 823 clen += t[0]
824 824 compresseddeltalen += t[1]
825 825 break
826 826 e = index[iterrev]
827 827 else:
828 828 # Add text length of base since decompressing that also takes
829 829 # work. For cache hits the length is already included.
830 830 compresseddeltalen += e[1]
831 831 r = (clen, compresseddeltalen)
832 832 chaininfocache[rev] = r
833 833 return r
834 834
835 835 def _deltachain(self, rev, stoprev=None):
836 836 """Obtain the delta chain for a revision.
837 837
838 838 ``stoprev`` specifies a revision to stop at. If not specified, we
839 839 stop at the base of the chain.
840 840
841 841 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
842 842 revs in ascending order and ``stopped`` is a bool indicating whether
843 843 ``stoprev`` was hit.
844 844 """
845 845 # Try C implementation.
846 846 try:
847 847 return self.index.deltachain(rev, stoprev, self._generaldelta)
848 848 except AttributeError:
849 849 pass
850 850
851 851 chain = []
852 852
853 853 # Alias to prevent attribute lookup in tight loop.
854 854 index = self.index
855 855 generaldelta = self._generaldelta
856 856
857 857 iterrev = rev
858 858 e = index[iterrev]
859 859 while iterrev != e[3] and iterrev != stoprev:
860 860 chain.append(iterrev)
861 861 if generaldelta:
862 862 iterrev = e[3]
863 863 else:
864 864 iterrev -= 1
865 865 e = index[iterrev]
866 866
867 867 if iterrev == stoprev:
868 868 stopped = True
869 869 else:
870 870 chain.append(iterrev)
871 871 stopped = False
872 872
873 873 chain.reverse()
874 874 return chain, stopped
875 875
876 876 def ancestors(self, revs, stoprev=0, inclusive=False):
877 877 """Generate the ancestors of 'revs' in reverse revision order.
878 878 Does not generate revs lower than stoprev.
879 879
880 880 See the documentation for ancestor.lazyancestors for more details."""
881 881
882 882 # first, make sure start revisions aren't filtered
883 883 revs = list(revs)
884 884 checkrev = self.node
885 885 for r in revs:
886 886 checkrev(r)
887 887 # and we're sure ancestors aren't filtered as well
888 888
889 889 if rustancestor is not None:
890 890 lazyancestors = rustancestor.LazyAncestors
891 891 arg = self.index
892 892 elif util.safehasattr(parsers, b'rustlazyancestors'):
893 893 lazyancestors = ancestor.rustlazyancestors
894 894 arg = self.index
895 895 else:
896 896 lazyancestors = ancestor.lazyancestors
897 897 arg = self._uncheckedparentrevs
898 898 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
899 899
900 900 def descendants(self, revs):
901 901 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
902 902
903 903 def findcommonmissing(self, common=None, heads=None):
904 904 """Return a tuple of the ancestors of common and the ancestors of heads
905 905 that are not ancestors of common. In revset terminology, we return the
906 906 tuple:
907 907
908 908 ::common, (::heads) - (::common)
909 909
910 910 The list is sorted by revision number, meaning it is
911 911 topologically sorted.
912 912
913 913 'heads' and 'common' are both lists of node IDs. If heads is
914 914 not supplied, uses all of the revlog's heads. If common is not
915 915 supplied, uses nullid."""
916 916 if common is None:
917 917 common = [nullid]
918 918 if heads is None:
919 919 heads = self.heads()
920 920
921 921 common = [self.rev(n) for n in common]
922 922 heads = [self.rev(n) for n in heads]
923 923
924 924 # we want the ancestors, but inclusive
925 925 class lazyset(object):
926 926 def __init__(self, lazyvalues):
927 927 self.addedvalues = set()
928 928 self.lazyvalues = lazyvalues
929 929
930 930 def __contains__(self, value):
931 931 return value in self.addedvalues or value in self.lazyvalues
932 932
933 933 def __iter__(self):
934 934 added = self.addedvalues
935 935 for r in added:
936 936 yield r
937 937 for r in self.lazyvalues:
938 938 if not r in added:
939 939 yield r
940 940
941 941 def add(self, value):
942 942 self.addedvalues.add(value)
943 943
944 944 def update(self, values):
945 945 self.addedvalues.update(values)
946 946
947 947 has = lazyset(self.ancestors(common))
948 948 has.add(nullrev)
949 949 has.update(common)
950 950
951 951 # take all ancestors from heads that aren't in has
952 952 missing = set()
953 953 visit = collections.deque(r for r in heads if r not in has)
954 954 while visit:
955 955 r = visit.popleft()
956 956 if r in missing:
957 957 continue
958 958 else:
959 959 missing.add(r)
960 960 for p in self.parentrevs(r):
961 961 if p not in has:
962 962 visit.append(p)
963 963 missing = list(missing)
964 964 missing.sort()
965 965 return has, [self.node(miss) for miss in missing]
966 966
967 967 def incrementalmissingrevs(self, common=None):
968 968 """Return an object that can be used to incrementally compute the
969 969 revision numbers of the ancestors of arbitrary sets that are not
970 970 ancestors of common. This is an ancestor.incrementalmissingancestors
971 971 object.
972 972
973 973 'common' is a list of revision numbers. If common is not supplied, uses
974 974 nullrev.
975 975 """
976 976 if common is None:
977 977 common = [nullrev]
978 978
979 979 if rustancestor is not None:
980 980 return rustancestor.MissingAncestors(self.index, common)
981 981 return ancestor.incrementalmissingancestors(self.parentrevs, common)
982 982
983 983 def findmissingrevs(self, common=None, heads=None):
984 984 """Return the revision numbers of the ancestors of heads that
985 985 are not ancestors of common.
986 986
987 987 More specifically, return a list of revision numbers corresponding to
988 988 nodes N such that every N satisfies the following constraints:
989 989
990 990 1. N is an ancestor of some node in 'heads'
991 991 2. N is not an ancestor of any node in 'common'
992 992
993 993 The list is sorted by revision number, meaning it is
994 994 topologically sorted.
995 995
996 996 'heads' and 'common' are both lists of revision numbers. If heads is
997 997 not supplied, uses all of the revlog's heads. If common is not
998 998 supplied, uses nullid."""
999 999 if common is None:
1000 1000 common = [nullrev]
1001 1001 if heads is None:
1002 1002 heads = self.headrevs()
1003 1003
1004 1004 inc = self.incrementalmissingrevs(common=common)
1005 1005 return inc.missingancestors(heads)
1006 1006
1007 1007 def findmissing(self, common=None, heads=None):
1008 1008 """Return the ancestors of heads that are not ancestors of common.
1009 1009
1010 1010 More specifically, return a list of nodes N such that every N
1011 1011 satisfies the following constraints:
1012 1012
1013 1013 1. N is an ancestor of some node in 'heads'
1014 1014 2. N is not an ancestor of any node in 'common'
1015 1015
1016 1016 The list is sorted by revision number, meaning it is
1017 1017 topologically sorted.
1018 1018
1019 1019 'heads' and 'common' are both lists of node IDs. If heads is
1020 1020 not supplied, uses all of the revlog's heads. If common is not
1021 1021 supplied, uses nullid."""
1022 1022 if common is None:
1023 1023 common = [nullid]
1024 1024 if heads is None:
1025 1025 heads = self.heads()
1026 1026
1027 1027 common = [self.rev(n) for n in common]
1028 1028 heads = [self.rev(n) for n in heads]
1029 1029
1030 1030 inc = self.incrementalmissingrevs(common=common)
1031 1031 return [self.node(r) for r in inc.missingancestors(heads)]
1032 1032
1033 1033 def nodesbetween(self, roots=None, heads=None):
1034 1034 """Return a topological path from 'roots' to 'heads'.
1035 1035
1036 1036 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1037 1037 topologically sorted list of all nodes N that satisfy both of
1038 1038 these constraints:
1039 1039
1040 1040 1. N is a descendant of some node in 'roots'
1041 1041 2. N is an ancestor of some node in 'heads'
1042 1042
1043 1043 Every node is considered to be both a descendant and an ancestor
1044 1044 of itself, so every reachable node in 'roots' and 'heads' will be
1045 1045 included in 'nodes'.
1046 1046
1047 1047 'outroots' is the list of reachable nodes in 'roots', i.e., the
1048 1048 subset of 'roots' that is returned in 'nodes'. Likewise,
1049 1049 'outheads' is the subset of 'heads' that is also in 'nodes'.
1050 1050
1051 1051 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1052 1052 unspecified, uses nullid as the only root. If 'heads' is
1053 1053 unspecified, uses list of all of the revlog's heads."""
1054 1054 nonodes = ([], [], [])
1055 1055 if roots is not None:
1056 1056 roots = list(roots)
1057 1057 if not roots:
1058 1058 return nonodes
1059 1059 lowestrev = min([self.rev(n) for n in roots])
1060 1060 else:
1061 1061 roots = [nullid] # Everybody's a descendant of nullid
1062 1062 lowestrev = nullrev
1063 1063 if (lowestrev == nullrev) and (heads is None):
1064 1064 # We want _all_ the nodes!
1065 1065 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1066 1066 if heads is None:
1067 1067 # All nodes are ancestors, so the latest ancestor is the last
1068 1068 # node.
1069 1069 highestrev = len(self) - 1
1070 1070 # Set ancestors to None to signal that every node is an ancestor.
1071 1071 ancestors = None
1072 1072 # Set heads to an empty dictionary for later discovery of heads
1073 1073 heads = {}
1074 1074 else:
1075 1075 heads = list(heads)
1076 1076 if not heads:
1077 1077 return nonodes
1078 1078 ancestors = set()
1079 1079 # Turn heads into a dictionary so we can remove 'fake' heads.
1080 1080 # Also, later we will be using it to filter out the heads we can't
1081 1081 # find from roots.
1082 1082 heads = dict.fromkeys(heads, False)
1083 1083 # Start at the top and keep marking parents until we're done.
1084 1084 nodestotag = set(heads)
1085 1085 # Remember where the top was so we can use it as a limit later.
1086 1086 highestrev = max([self.rev(n) for n in nodestotag])
1087 1087 while nodestotag:
1088 1088 # grab a node to tag
1089 1089 n = nodestotag.pop()
1090 1090 # Never tag nullid
1091 1091 if n == nullid:
1092 1092 continue
1093 1093 # A node's revision number represents its place in a
1094 1094 # topologically sorted list of nodes.
1095 1095 r = self.rev(n)
1096 1096 if r >= lowestrev:
1097 1097 if n not in ancestors:
1098 1098 # If we are possibly a descendant of one of the roots
1099 1099 # and we haven't already been marked as an ancestor
1100 1100 ancestors.add(n) # Mark as ancestor
1101 1101 # Add non-nullid parents to list of nodes to tag.
1102 1102 nodestotag.update(
1103 1103 [p for p in self.parents(n) if p != nullid]
1104 1104 )
1105 1105 elif n in heads: # We've seen it before, is it a fake head?
1106 1106 # So it is, real heads should not be the ancestors of
1107 1107 # any other heads.
1108 1108 heads.pop(n)
1109 1109 if not ancestors:
1110 1110 return nonodes
1111 1111 # Now that we have our set of ancestors, we want to remove any
1112 1112 # roots that are not ancestors.
1113 1113
1114 1114 # If one of the roots was nullid, everything is included anyway.
1115 1115 if lowestrev > nullrev:
1116 1116 # But, since we weren't, let's recompute the lowest rev to not
1117 1117 # include roots that aren't ancestors.
1118 1118
1119 1119 # Filter out roots that aren't ancestors of heads
1120 1120 roots = [root for root in roots if root in ancestors]
1121 1121 # Recompute the lowest revision
1122 1122 if roots:
1123 1123 lowestrev = min([self.rev(root) for root in roots])
1124 1124 else:
1125 1125 # No more roots? Return empty list
1126 1126 return nonodes
1127 1127 else:
1128 1128 # We are descending from nullid, and don't need to care about
1129 1129 # any other roots.
1130 1130 lowestrev = nullrev
1131 1131 roots = [nullid]
1132 1132 # Transform our roots list into a set.
1133 1133 descendants = set(roots)
1134 1134 # Also, keep the original roots so we can filter out roots that aren't
1135 1135 # 'real' roots (i.e. are descended from other roots).
1136 1136 roots = descendants.copy()
1137 1137 # Our topologically sorted list of output nodes.
1138 1138 orderedout = []
1139 1139 # Don't start at nullid since we don't want nullid in our output list,
1140 1140 # and if nullid shows up in descendants, empty parents will look like
1141 1141 # they're descendants.
1142 1142 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1143 1143 n = self.node(r)
1144 1144 isdescendant = False
1145 1145 if lowestrev == nullrev: # Everybody is a descendant of nullid
1146 1146 isdescendant = True
1147 1147 elif n in descendants:
1148 1148 # n is already a descendant
1149 1149 isdescendant = True
1150 1150 # This check only needs to be done here because all the roots
1151 1151 # will start being marked is descendants before the loop.
1152 1152 if n in roots:
1153 1153 # If n was a root, check if it's a 'real' root.
1154 1154 p = tuple(self.parents(n))
1155 1155 # If any of its parents are descendants, it's not a root.
1156 1156 if (p[0] in descendants) or (p[1] in descendants):
1157 1157 roots.remove(n)
1158 1158 else:
1159 1159 p = tuple(self.parents(n))
1160 1160 # A node is a descendant if either of its parents are
1161 1161 # descendants. (We seeded the dependents list with the roots
1162 1162 # up there, remember?)
1163 1163 if (p[0] in descendants) or (p[1] in descendants):
1164 1164 descendants.add(n)
1165 1165 isdescendant = True
1166 1166 if isdescendant and ((ancestors is None) or (n in ancestors)):
1167 1167 # Only include nodes that are both descendants and ancestors.
1168 1168 orderedout.append(n)
1169 1169 if (ancestors is not None) and (n in heads):
1170 1170 # We're trying to figure out which heads are reachable
1171 1171 # from roots.
1172 1172 # Mark this head as having been reached
1173 1173 heads[n] = True
1174 1174 elif ancestors is None:
1175 1175 # Otherwise, we're trying to discover the heads.
1176 1176 # Assume this is a head because if it isn't, the next step
1177 1177 # will eventually remove it.
1178 1178 heads[n] = True
1179 1179 # But, obviously its parents aren't.
1180 1180 for p in self.parents(n):
1181 1181 heads.pop(p, None)
1182 1182 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1183 1183 roots = list(roots)
1184 1184 assert orderedout
1185 1185 assert roots
1186 1186 assert heads
1187 1187 return (orderedout, roots, heads)
1188 1188
1189 1189 def headrevs(self, revs=None):
1190 1190 if revs is None:
1191 1191 try:
1192 1192 return self.index.headrevs()
1193 1193 except AttributeError:
1194 1194 return self._headrevs()
1195 1195 if rustdagop is not None:
1196 1196 return rustdagop.headrevs(self.index, revs)
1197 1197 return dagop.headrevs(revs, self._uncheckedparentrevs)
1198 1198
1199 1199 def computephases(self, roots):
1200 1200 return self.index.computephasesmapsets(roots)
1201 1201
1202 1202 def _headrevs(self):
1203 1203 count = len(self)
1204 1204 if not count:
1205 1205 return [nullrev]
1206 1206 # we won't iter over filtered rev so nobody is a head at start
1207 1207 ishead = [0] * (count + 1)
1208 1208 index = self.index
1209 1209 for r in self:
1210 1210 ishead[r] = 1 # I may be an head
1211 1211 e = index[r]
1212 1212 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1213 1213 return [r for r, val in enumerate(ishead) if val]
1214 1214
1215 1215 def heads(self, start=None, stop=None):
1216 1216 """return the list of all nodes that have no children
1217 1217
1218 1218 if start is specified, only heads that are descendants of
1219 1219 start will be returned
1220 1220 if stop is specified, it will consider all the revs from stop
1221 1221 as if they had no children
1222 1222 """
1223 1223 if start is None and stop is None:
1224 1224 if not len(self):
1225 1225 return [nullid]
1226 1226 return [self.node(r) for r in self.headrevs()]
1227 1227
1228 1228 if start is None:
1229 1229 start = nullrev
1230 1230 else:
1231 1231 start = self.rev(start)
1232 1232
1233 1233 stoprevs = set(self.rev(n) for n in stop or [])
1234 1234
1235 1235 revs = dagop.headrevssubset(
1236 1236 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1237 1237 )
1238 1238
1239 1239 return [self.node(rev) for rev in revs]
1240 1240
1241 1241 def children(self, node):
1242 1242 """find the children of a given node"""
1243 1243 c = []
1244 1244 p = self.rev(node)
1245 1245 for r in self.revs(start=p + 1):
1246 1246 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1247 1247 if prevs:
1248 1248 for pr in prevs:
1249 1249 if pr == p:
1250 1250 c.append(self.node(r))
1251 1251 elif p == nullrev:
1252 1252 c.append(self.node(r))
1253 1253 return c
1254 1254
1255 1255 def commonancestorsheads(self, a, b):
1256 1256 """calculate all the heads of the common ancestors of nodes a and b"""
1257 1257 a, b = self.rev(a), self.rev(b)
1258 1258 ancs = self._commonancestorsheads(a, b)
1259 1259 return pycompat.maplist(self.node, ancs)
1260 1260
1261 1261 def _commonancestorsheads(self, *revs):
1262 1262 """calculate all the heads of the common ancestors of revs"""
1263 1263 try:
1264 1264 ancs = self.index.commonancestorsheads(*revs)
1265 1265 except (AttributeError, OverflowError): # C implementation failed
1266 1266 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1267 1267 return ancs
1268 1268
1269 1269 def isancestor(self, a, b):
1270 1270 """return True if node a is an ancestor of node b
1271 1271
1272 1272 A revision is considered an ancestor of itself."""
1273 1273 a, b = self.rev(a), self.rev(b)
1274 1274 return self.isancestorrev(a, b)
1275 1275
1276 1276 def isancestorrev(self, a, b):
1277 1277 """return True if revision a is an ancestor of revision b
1278 1278
1279 1279 A revision is considered an ancestor of itself.
1280 1280
1281 1281 The implementation of this is trivial but the use of
1282 1282 reachableroots is not."""
1283 1283 if a == nullrev:
1284 1284 return True
1285 1285 elif a == b:
1286 1286 return True
1287 1287 elif a > b:
1288 1288 return False
1289 1289 return bool(self.reachableroots(a, [b], [a], includepath=False))
1290 1290
1291 1291 def reachableroots(self, minroot, heads, roots, includepath=False):
1292 1292 """return (heads(::(<roots> and <roots>::<heads>)))
1293 1293
1294 1294 If includepath is True, return (<roots>::<heads>)."""
1295 1295 try:
1296 1296 return self.index.reachableroots2(
1297 1297 minroot, heads, roots, includepath
1298 1298 )
1299 1299 except AttributeError:
1300 1300 return dagop._reachablerootspure(
1301 1301 self.parentrevs, minroot, roots, heads, includepath
1302 1302 )
1303 1303
1304 1304 def ancestor(self, a, b):
1305 1305 """calculate the "best" common ancestor of nodes a and b"""
1306 1306
1307 1307 a, b = self.rev(a), self.rev(b)
1308 1308 try:
1309 1309 ancs = self.index.ancestors(a, b)
1310 1310 except (AttributeError, OverflowError):
1311 1311 ancs = ancestor.ancestors(self.parentrevs, a, b)
1312 1312 if ancs:
1313 1313 # choose a consistent winner when there's a tie
1314 1314 return min(map(self.node, ancs))
1315 1315 return nullid
1316 1316
1317 1317 def _match(self, id):
1318 1318 if isinstance(id, int):
1319 1319 # rev
1320 1320 return self.node(id)
1321 1321 if len(id) == 20:
1322 1322 # possibly a binary node
1323 1323 # odds of a binary node being all hex in ASCII are 1 in 10**25
1324 1324 try:
1325 1325 node = id
1326 1326 self.rev(node) # quick search the index
1327 1327 return node
1328 1328 except error.LookupError:
1329 1329 pass # may be partial hex id
1330 1330 try:
1331 1331 # str(rev)
1332 1332 rev = int(id)
1333 1333 if b"%d" % rev != id:
1334 1334 raise ValueError
1335 1335 if rev < 0:
1336 1336 rev = len(self) + rev
1337 1337 if rev < 0 or rev >= len(self):
1338 1338 raise ValueError
1339 1339 return self.node(rev)
1340 1340 except (ValueError, OverflowError):
1341 1341 pass
1342 1342 if len(id) == 40:
1343 1343 try:
1344 1344 # a full hex nodeid?
1345 1345 node = bin(id)
1346 1346 self.rev(node)
1347 1347 return node
1348 1348 except (TypeError, error.LookupError):
1349 1349 pass
1350 1350
1351 1351 def _partialmatch(self, id):
1352 1352 # we don't care wdirfilenodeids as they should be always full hash
1353 1353 maybewdir = wdirhex.startswith(id)
1354 1354 try:
1355 1355 partial = self.index.partialmatch(id)
1356 1356 if partial and self.hasnode(partial):
1357 1357 if maybewdir:
1358 1358 # single 'ff...' match in radix tree, ambiguous with wdir
1359 1359 raise error.RevlogError
1360 1360 return partial
1361 1361 if maybewdir:
1362 1362 # no 'ff...' match in radix tree, wdir identified
1363 1363 raise error.WdirUnsupported
1364 1364 return None
1365 1365 except error.RevlogError:
1366 1366 # parsers.c radix tree lookup gave multiple matches
1367 1367 # fast path: for unfiltered changelog, radix tree is accurate
1368 1368 if not getattr(self, 'filteredrevs', None):
1369 1369 raise error.AmbiguousPrefixLookupError(
1370 1370 id, self.indexfile, _(b'ambiguous identifier')
1371 1371 )
1372 1372 # fall through to slow path that filters hidden revisions
1373 1373 except (AttributeError, ValueError):
1374 1374 # we are pure python, or key was too short to search radix tree
1375 1375 pass
1376 1376
1377 1377 if id in self._pcache:
1378 1378 return self._pcache[id]
1379 1379
1380 1380 if len(id) <= 40:
1381 1381 try:
1382 1382 # hex(node)[:...]
1383 1383 l = len(id) // 2 # grab an even number of digits
1384 1384 prefix = bin(id[: l * 2])
1385 1385 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1386 1386 nl = [
1387 1387 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1388 1388 ]
1389 1389 if nullhex.startswith(id):
1390 1390 nl.append(nullid)
1391 1391 if len(nl) > 0:
1392 1392 if len(nl) == 1 and not maybewdir:
1393 1393 self._pcache[id] = nl[0]
1394 1394 return nl[0]
1395 1395 raise error.AmbiguousPrefixLookupError(
1396 1396 id, self.indexfile, _(b'ambiguous identifier')
1397 1397 )
1398 1398 if maybewdir:
1399 1399 raise error.WdirUnsupported
1400 1400 return None
1401 1401 except TypeError:
1402 1402 pass
1403 1403
1404 1404 def lookup(self, id):
1405 1405 """locate a node based on:
1406 1406 - revision number or str(revision number)
1407 1407 - nodeid or subset of hex nodeid
1408 1408 """
1409 1409 n = self._match(id)
1410 1410 if n is not None:
1411 1411 return n
1412 1412 n = self._partialmatch(id)
1413 1413 if n:
1414 1414 return n
1415 1415
1416 1416 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1417 1417
1418 1418 def shortest(self, node, minlength=1):
1419 1419 """Find the shortest unambiguous prefix that matches node."""
1420 1420
1421 1421 def isvalid(prefix):
1422 1422 try:
1423 1423 matchednode = self._partialmatch(prefix)
1424 1424 except error.AmbiguousPrefixLookupError:
1425 1425 return False
1426 1426 except error.WdirUnsupported:
1427 1427 # single 'ff...' match
1428 1428 return True
1429 1429 if matchednode is None:
1430 1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1431 1431 return True
1432 1432
1433 1433 def maybewdir(prefix):
1434 1434 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1435 1435
1436 1436 hexnode = hex(node)
1437 1437
1438 1438 def disambiguate(hexnode, minlength):
1439 1439 """Disambiguate against wdirid."""
1440 1440 for length in range(minlength, 41):
1441 1441 prefix = hexnode[:length]
1442 1442 if not maybewdir(prefix):
1443 1443 return prefix
1444 1444
1445 1445 if not getattr(self, 'filteredrevs', None):
1446 1446 try:
1447 1447 length = max(self.index.shortest(node), minlength)
1448 1448 return disambiguate(hexnode, length)
1449 1449 except error.RevlogError:
1450 1450 if node != wdirid:
1451 1451 raise error.LookupError(node, self.indexfile, _(b'no node'))
1452 1452 except AttributeError:
1453 1453 # Fall through to pure code
1454 1454 pass
1455 1455
1456 1456 if node == wdirid:
1457 1457 for length in range(minlength, 41):
1458 1458 prefix = hexnode[:length]
1459 1459 if isvalid(prefix):
1460 1460 return prefix
1461 1461
1462 1462 for length in range(minlength, 41):
1463 1463 prefix = hexnode[:length]
1464 1464 if isvalid(prefix):
1465 1465 return disambiguate(hexnode, length)
1466 1466
1467 1467 def cmp(self, node, text):
1468 1468 """compare text with a given file revision
1469 1469
1470 1470 returns True if text is different than what is stored.
1471 1471 """
1472 1472 p1, p2 = self.parents(node)
1473 1473 return storageutil.hashrevisionsha1(text, p1, p2) != node
1474 1474
1475 1475 def _cachesegment(self, offset, data):
1476 1476 """Add a segment to the revlog cache.
1477 1477
1478 1478 Accepts an absolute offset and the data that is at that location.
1479 1479 """
1480 1480 o, d = self._chunkcache
1481 1481 # try to add to existing cache
1482 1482 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1483 1483 self._chunkcache = o, d + data
1484 1484 else:
1485 1485 self._chunkcache = offset, data
1486 1486
1487 1487 def _readsegment(self, offset, length, df=None):
1488 1488 """Load a segment of raw data from the revlog.
1489 1489
1490 1490 Accepts an absolute offset, length to read, and an optional existing
1491 1491 file handle to read from.
1492 1492
1493 1493 If an existing file handle is passed, it will be seeked and the
1494 1494 original seek position will NOT be restored.
1495 1495
1496 1496 Returns a str or buffer of raw byte data.
1497 1497
1498 1498 Raises if the requested number of bytes could not be read.
1499 1499 """
1500 1500 # Cache data both forward and backward around the requested
1501 1501 # data, in a fixed size window. This helps speed up operations
1502 1502 # involving reading the revlog backwards.
1503 1503 cachesize = self._chunkcachesize
1504 1504 realoffset = offset & ~(cachesize - 1)
1505 1505 reallength = (
1506 1506 (offset + length + cachesize) & ~(cachesize - 1)
1507 1507 ) - realoffset
1508 1508 with self._datareadfp(df) as df:
1509 1509 df.seek(realoffset)
1510 1510 d = df.read(reallength)
1511 1511
1512 1512 self._cachesegment(realoffset, d)
1513 1513 if offset != realoffset or reallength != length:
1514 1514 startoffset = offset - realoffset
1515 1515 if len(d) - startoffset < length:
1516 1516 raise error.RevlogError(
1517 1517 _(
1518 1518 b'partial read of revlog %s; expected %d bytes from '
1519 1519 b'offset %d, got %d'
1520 1520 )
1521 1521 % (
1522 1522 self.indexfile if self._inline else self.datafile,
1523 1523 length,
1524 1524 realoffset,
1525 1525 len(d) - startoffset,
1526 1526 )
1527 1527 )
1528 1528
1529 1529 return util.buffer(d, startoffset, length)
1530 1530
1531 1531 if len(d) < length:
1532 1532 raise error.RevlogError(
1533 1533 _(
1534 1534 b'partial read of revlog %s; expected %d bytes from offset '
1535 1535 b'%d, got %d'
1536 1536 )
1537 1537 % (
1538 1538 self.indexfile if self._inline else self.datafile,
1539 1539 length,
1540 1540 offset,
1541 1541 len(d),
1542 1542 )
1543 1543 )
1544 1544
1545 1545 return d
1546 1546
1547 1547 def _getsegment(self, offset, length, df=None):
1548 1548 """Obtain a segment of raw data from the revlog.
1549 1549
1550 1550 Accepts an absolute offset, length of bytes to obtain, and an
1551 1551 optional file handle to the already-opened revlog. If the file
1552 1552 handle is used, it's original seek position will not be preserved.
1553 1553
1554 1554 Requests for data may be returned from a cache.
1555 1555
1556 1556 Returns a str or a buffer instance of raw byte data.
1557 1557 """
1558 1558 o, d = self._chunkcache
1559 1559 l = len(d)
1560 1560
1561 1561 # is it in the cache?
1562 1562 cachestart = offset - o
1563 1563 cacheend = cachestart + length
1564 1564 if cachestart >= 0 and cacheend <= l:
1565 1565 if cachestart == 0 and cacheend == l:
1566 1566 return d # avoid a copy
1567 1567 return util.buffer(d, cachestart, cacheend - cachestart)
1568 1568
1569 1569 return self._readsegment(offset, length, df=df)
1570 1570
1571 1571 def _getsegmentforrevs(self, startrev, endrev, df=None):
1572 1572 """Obtain a segment of raw data corresponding to a range of revisions.
1573 1573
1574 1574 Accepts the start and end revisions and an optional already-open
1575 1575 file handle to be used for reading. If the file handle is read, its
1576 1576 seek position will not be preserved.
1577 1577
1578 1578 Requests for data may be satisfied by a cache.
1579 1579
1580 1580 Returns a 2-tuple of (offset, data) for the requested range of
1581 1581 revisions. Offset is the integer offset from the beginning of the
1582 1582 revlog and data is a str or buffer of the raw byte data.
1583 1583
1584 1584 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1585 1585 to determine where each revision's data begins and ends.
1586 1586 """
1587 1587 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1588 1588 # (functions are expensive).
1589 1589 index = self.index
1590 1590 istart = index[startrev]
1591 1591 start = int(istart[0] >> 16)
1592 1592 if startrev == endrev:
1593 1593 end = start + istart[1]
1594 1594 else:
1595 1595 iend = index[endrev]
1596 1596 end = int(iend[0] >> 16) + iend[1]
1597 1597
1598 1598 if self._inline:
1599 1599 start += (startrev + 1) * self._io.size
1600 1600 end += (endrev + 1) * self._io.size
1601 1601 length = end - start
1602 1602
1603 1603 return start, self._getsegment(start, length, df=df)
1604 1604
1605 1605 def _chunk(self, rev, df=None):
1606 1606 """Obtain a single decompressed chunk for a revision.
1607 1607
1608 1608 Accepts an integer revision and an optional already-open file handle
1609 1609 to be used for reading. If used, the seek position of the file will not
1610 1610 be preserved.
1611 1611
1612 1612 Returns a str holding uncompressed data for the requested revision.
1613 1613 """
1614 1614 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1615 1615
1616 1616 def _chunks(self, revs, df=None, targetsize=None):
1617 1617 """Obtain decompressed chunks for the specified revisions.
1618 1618
1619 1619 Accepts an iterable of numeric revisions that are assumed to be in
1620 1620 ascending order. Also accepts an optional already-open file handle
1621 1621 to be used for reading. If used, the seek position of the file will
1622 1622 not be preserved.
1623 1623
1624 1624 This function is similar to calling ``self._chunk()`` multiple times,
1625 1625 but is faster.
1626 1626
1627 1627 Returns a list with decompressed data for each requested revision.
1628 1628 """
1629 1629 if not revs:
1630 1630 return []
1631 1631 start = self.start
1632 1632 length = self.length
1633 1633 inline = self._inline
1634 1634 iosize = self._io.size
1635 1635 buffer = util.buffer
1636 1636
1637 1637 l = []
1638 1638 ladd = l.append
1639 1639
1640 1640 if not self._withsparseread:
1641 1641 slicedchunks = (revs,)
1642 1642 else:
1643 1643 slicedchunks = deltautil.slicechunk(
1644 1644 self, revs, targetsize=targetsize
1645 1645 )
1646 1646
1647 1647 for revschunk in slicedchunks:
1648 1648 firstrev = revschunk[0]
1649 1649 # Skip trailing revisions with empty diff
1650 1650 for lastrev in revschunk[::-1]:
1651 1651 if length(lastrev) != 0:
1652 1652 break
1653 1653
1654 1654 try:
1655 1655 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1656 1656 except OverflowError:
1657 1657 # issue4215 - we can't cache a run of chunks greater than
1658 1658 # 2G on Windows
1659 1659 return [self._chunk(rev, df=df) for rev in revschunk]
1660 1660
1661 1661 decomp = self.decompress
1662 1662 for rev in revschunk:
1663 1663 chunkstart = start(rev)
1664 1664 if inline:
1665 1665 chunkstart += (rev + 1) * iosize
1666 1666 chunklength = length(rev)
1667 1667 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1668 1668
1669 1669 return l
1670 1670
1671 1671 def _chunkclear(self):
1672 1672 """Clear the raw chunk cache."""
1673 1673 self._chunkcache = (0, b'')
1674 1674
1675 1675 def deltaparent(self, rev):
1676 1676 """return deltaparent of the given revision"""
1677 1677 base = self.index[rev][3]
1678 1678 if base == rev:
1679 1679 return nullrev
1680 1680 elif self._generaldelta:
1681 1681 return base
1682 1682 else:
1683 1683 return rev - 1
1684 1684
1685 1685 def issnapshot(self, rev):
1686 1686 """tells whether rev is a snapshot
1687 1687 """
1688 1688 if not self._sparserevlog:
1689 1689 return self.deltaparent(rev) == nullrev
1690 1690 elif util.safehasattr(self.index, b'issnapshot'):
1691 1691 # directly assign the method to cache the testing and access
1692 1692 self.issnapshot = self.index.issnapshot
1693 1693 return self.issnapshot(rev)
1694 1694 if rev == nullrev:
1695 1695 return True
1696 1696 entry = self.index[rev]
1697 1697 base = entry[3]
1698 1698 if base == rev:
1699 1699 return True
1700 1700 if base == nullrev:
1701 1701 return True
1702 1702 p1 = entry[5]
1703 1703 p2 = entry[6]
1704 1704 if base == p1 or base == p2:
1705 1705 return False
1706 1706 return self.issnapshot(base)
1707 1707
1708 1708 def snapshotdepth(self, rev):
1709 1709 """number of snapshot in the chain before this one"""
1710 1710 if not self.issnapshot(rev):
1711 1711 raise error.ProgrammingError(b'revision %d not a snapshot')
1712 1712 return len(self._deltachain(rev)[0]) - 1
1713 1713
1714 1714 def revdiff(self, rev1, rev2):
1715 1715 """return or calculate a delta between two revisions
1716 1716
1717 1717 The delta calculated is in binary form and is intended to be written to
1718 1718 revlog data directly. So this function needs raw revision data.
1719 1719 """
1720 1720 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1721 1721 return bytes(self._chunk(rev2))
1722 1722
1723 1723 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1724 1724
1725 1725 def _processflags(self, text, flags, operation, raw=False):
1726 1726 """deprecated entry point to access flag processors"""
1727 1727 msg = b'_processflag(...) use the specialized variant'
1728 1728 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1729 1729 if raw:
1730 1730 return text, flagutil.processflagsraw(self, text, flags)
1731 1731 elif operation == b'read':
1732 1732 return flagutil.processflagsread(self, text, flags)
1733 1733 else: # write operation
1734 1734 return flagutil.processflagswrite(self, text, flags)
1735 1735
1736 1736 def revision(self, nodeorrev, _df=None, raw=False):
1737 1737 """return an uncompressed revision of a given node or revision
1738 1738 number.
1739 1739
1740 1740 _df - an existing file handle to read from. (internal-only)
1741 1741 raw - an optional argument specifying if the revision data is to be
1742 1742 treated as raw data when applying flag transforms. 'raw' should be set
1743 1743 to True when generating changegroups or in debug commands.
1744 1744 """
1745 1745 if raw:
1746 1746 msg = (
1747 1747 b'revlog.revision(..., raw=True) is deprecated, '
1748 1748 b'use revlog.rawdata(...)'
1749 1749 )
1750 1750 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1751 1751 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1752 1752
1753 1753 def sidedata(self, nodeorrev, _df=None):
1754 1754 """a map of extra data related to the changeset but not part of the hash
1755 1755
1756 1756 This function currently return a dictionary. However, more advanced
1757 1757 mapping object will likely be used in the future for a more
1758 1758 efficient/lazy code.
1759 1759 """
1760 1760 return self._revisiondata(nodeorrev, _df)[1]
1761 1761
1762 1762 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1763 1763 # deal with <nodeorrev> argument type
1764 1764 if isinstance(nodeorrev, int):
1765 1765 rev = nodeorrev
1766 1766 node = self.node(rev)
1767 1767 else:
1768 1768 node = nodeorrev
1769 1769 rev = None
1770 1770
1771 1771 # fast path the special `nullid` rev
1772 1772 if node == nullid:
1773 1773 return b"", {}
1774 1774
1775 # The text as stored inside the revlog. Might be the revision or might
1776 # need to be processed to retrieve the revision.
1777 rawtext = None
1778
1775 # ``rawtext`` is the text as stored inside the revlog. Might be the
1776 # revision or might need to be processed to retrieve the revision.
1779 1777 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1780 1778
1781 1779 if raw and validated:
1782 1780 # if we don't want to process the raw text and that raw
1783 1781 # text is cached, we can exit early.
1784 1782 return rawtext, {}
1785 1783 if rev is None:
1786 1784 rev = self.rev(node)
1787 1785 # the revlog's flag for this revision
1788 1786 # (usually alter its state or content)
1789 1787 flags = self.flags(rev)
1790 1788
1791 1789 if validated and flags == REVIDX_DEFAULT_FLAGS:
1792 1790 # no extra flags set, no flag processor runs, text = rawtext
1793 1791 return rawtext, {}
1794 1792
1795 1793 sidedata = {}
1796 1794 if raw:
1797 1795 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1798 1796 text = rawtext
1799 1797 else:
1800 1798 try:
1801 1799 r = flagutil.processflagsread(self, rawtext, flags)
1802 1800 except error.SidedataHashError as exc:
1803 1801 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1804 1802 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1805 1803 raise error.RevlogError(msg)
1806 1804 text, validatehash, sidedata = r
1807 1805 if validatehash:
1808 1806 self.checkhash(text, node, rev=rev)
1809 1807 if not validated:
1810 1808 self._revisioncache = (node, rev, rawtext)
1811 1809
1812 1810 return text, sidedata
1813 1811
1814 1812 def _rawtext(self, node, rev, _df=None):
1815 1813 """return the possibly unvalidated rawtext for a revision
1816 1814
1817 1815 returns (rev, rawtext, validated)
1818 1816 """
1819 1817
1820 1818 # revision in the cache (could be useful to apply delta)
1821 1819 cachedrev = None
1822 1820 # An intermediate text to apply deltas to
1823 1821 basetext = None
1824 1822
1825 1823 # Check if we have the entry in cache
1826 1824 # The cache entry looks like (node, rev, rawtext)
1827 1825 if self._revisioncache:
1828 1826 if self._revisioncache[0] == node:
1829 1827 return (rev, self._revisioncache[2], True)
1830 1828 cachedrev = self._revisioncache[1]
1831 1829
1832 1830 if rev is None:
1833 1831 rev = self.rev(node)
1834 1832
1835 1833 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1836 1834 if stopped:
1837 1835 basetext = self._revisioncache[2]
1838 1836
1839 1837 # drop cache to save memory, the caller is expected to
1840 1838 # update self._revisioncache after validating the text
1841 1839 self._revisioncache = None
1842 1840
1843 1841 targetsize = None
1844 1842 rawsize = self.index[rev][2]
1845 1843 if 0 <= rawsize:
1846 1844 targetsize = 4 * rawsize
1847 1845
1848 1846 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1849 1847 if basetext is None:
1850 1848 basetext = bytes(bins[0])
1851 1849 bins = bins[1:]
1852 1850
1853 1851 rawtext = mdiff.patches(basetext, bins)
1854 1852 del basetext # let us have a chance to free memory early
1855 1853 return (rev, rawtext, False)
1856 1854
1857 1855 def rawdata(self, nodeorrev, _df=None):
1858 1856 """return an uncompressed raw data of a given node or revision number.
1859 1857
1860 1858 _df - an existing file handle to read from. (internal-only)
1861 1859 """
1862 1860 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1863 1861
1864 1862 def hash(self, text, p1, p2):
1865 1863 """Compute a node hash.
1866 1864
1867 1865 Available as a function so that subclasses can replace the hash
1868 1866 as needed.
1869 1867 """
1870 1868 return storageutil.hashrevisionsha1(text, p1, p2)
1871 1869
1872 1870 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1873 1871 """Check node hash integrity.
1874 1872
1875 1873 Available as a function so that subclasses can extend hash mismatch
1876 1874 behaviors as needed.
1877 1875 """
1878 1876 try:
1879 1877 if p1 is None and p2 is None:
1880 1878 p1, p2 = self.parents(node)
1881 1879 if node != self.hash(text, p1, p2):
1882 1880 # Clear the revision cache on hash failure. The revision cache
1883 1881 # only stores the raw revision and clearing the cache does have
1884 1882 # the side-effect that we won't have a cache hit when the raw
1885 1883 # revision data is accessed. But this case should be rare and
1886 1884 # it is extra work to teach the cache about the hash
1887 1885 # verification state.
1888 1886 if self._revisioncache and self._revisioncache[0] == node:
1889 1887 self._revisioncache = None
1890 1888
1891 1889 revornode = rev
1892 1890 if revornode is None:
1893 1891 revornode = templatefilters.short(hex(node))
1894 1892 raise error.RevlogError(
1895 1893 _(b"integrity check failed on %s:%s")
1896 1894 % (self.indexfile, pycompat.bytestr(revornode))
1897 1895 )
1898 1896 except error.RevlogError:
1899 1897 if self._censorable and storageutil.iscensoredtext(text):
1900 1898 raise error.CensoredNodeError(self.indexfile, node, text)
1901 1899 raise
1902 1900
1903 1901 def _enforceinlinesize(self, tr, fp=None):
1904 1902 """Check if the revlog is too big for inline and convert if so.
1905 1903
1906 1904 This should be called after revisions are added to the revlog. If the
1907 1905 revlog has grown too large to be an inline revlog, it will convert it
1908 1906 to use multiple index and data files.
1909 1907 """
1910 1908 tiprev = len(self) - 1
1911 1909 if (
1912 1910 not self._inline
1913 1911 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1914 1912 ):
1915 1913 return
1916 1914
1917 1915 trinfo = tr.find(self.indexfile)
1918 1916 if trinfo is None:
1919 1917 raise error.RevlogError(
1920 1918 _(b"%s not found in the transaction") % self.indexfile
1921 1919 )
1922 1920
1923 1921 trindex = trinfo[2]
1924 1922 if trindex is not None:
1925 1923 dataoff = self.start(trindex)
1926 1924 else:
1927 1925 # revlog was stripped at start of transaction, use all leftover data
1928 1926 trindex = len(self) - 1
1929 1927 dataoff = self.end(tiprev)
1930 1928
1931 1929 tr.add(self.datafile, dataoff)
1932 1930
1933 1931 if fp:
1934 1932 fp.flush()
1935 1933 fp.close()
1936 1934 # We can't use the cached file handle after close(). So prevent
1937 1935 # its usage.
1938 1936 self._writinghandles = None
1939 1937
1940 1938 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 1939 for r in self:
1942 1940 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 1941
1944 1942 with self._indexfp(b'w') as fp:
1945 1943 self.version &= ~FLAG_INLINE_DATA
1946 1944 self._inline = False
1947 1945 io = self._io
1948 1946 for i in self:
1949 1947 e = io.packentry(self.index[i], self.node, self.version, i)
1950 1948 fp.write(e)
1951 1949
1952 1950 # the temp file replace the real index when we exit the context
1953 1951 # manager
1954 1952
1955 1953 tr.replace(self.indexfile, trindex * self._io.size)
1956 1954 self._chunkclear()
1957 1955
1958 1956 def _nodeduplicatecallback(self, transaction, node):
1959 1957 """called when trying to add a node already stored.
1960 1958 """
1961 1959
1962 1960 def addrevision(
1963 1961 self,
1964 1962 text,
1965 1963 transaction,
1966 1964 link,
1967 1965 p1,
1968 1966 p2,
1969 1967 cachedelta=None,
1970 1968 node=None,
1971 1969 flags=REVIDX_DEFAULT_FLAGS,
1972 1970 deltacomputer=None,
1973 1971 sidedata=None,
1974 1972 ):
1975 1973 """add a revision to the log
1976 1974
1977 1975 text - the revision data to add
1978 1976 transaction - the transaction object used for rollback
1979 1977 link - the linkrev data to add
1980 1978 p1, p2 - the parent nodeids of the revision
1981 1979 cachedelta - an optional precomputed delta
1982 1980 node - nodeid of revision; typically node is not specified, and it is
1983 1981 computed by default as hash(text, p1, p2), however subclasses might
1984 1982 use different hashing method (and override checkhash() in such case)
1985 1983 flags - the known flags to set on the revision
1986 1984 deltacomputer - an optional deltacomputer instance shared between
1987 1985 multiple calls
1988 1986 """
1989 1987 if link == nullrev:
1990 1988 raise error.RevlogError(
1991 1989 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1992 1990 )
1993 1991
1994 1992 if sidedata is None:
1995 1993 sidedata = {}
1996 1994 flags = flags & ~REVIDX_SIDEDATA
1997 1995 elif not self.hassidedata:
1998 1996 raise error.ProgrammingError(
1999 1997 _(b"trying to add sidedata to a revlog who don't support them")
2000 1998 )
2001 1999 else:
2002 2000 flags |= REVIDX_SIDEDATA
2003 2001
2004 2002 if flags:
2005 2003 node = node or self.hash(text, p1, p2)
2006 2004
2007 2005 rawtext, validatehash = flagutil.processflagswrite(
2008 2006 self, text, flags, sidedata=sidedata
2009 2007 )
2010 2008
2011 2009 # If the flag processor modifies the revision data, ignore any provided
2012 2010 # cachedelta.
2013 2011 if rawtext != text:
2014 2012 cachedelta = None
2015 2013
2016 2014 if len(rawtext) > _maxentrysize:
2017 2015 raise error.RevlogError(
2018 2016 _(
2019 2017 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2020 2018 )
2021 2019 % (self.indexfile, len(rawtext))
2022 2020 )
2023 2021
2024 2022 node = node or self.hash(rawtext, p1, p2)
2025 2023 if self.index.has_node(node):
2026 2024 return node
2027 2025
2028 2026 if validatehash:
2029 2027 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030 2028
2031 2029 return self.addrawrevision(
2032 2030 rawtext,
2033 2031 transaction,
2034 2032 link,
2035 2033 p1,
2036 2034 p2,
2037 2035 node,
2038 2036 flags,
2039 2037 cachedelta=cachedelta,
2040 2038 deltacomputer=deltacomputer,
2041 2039 )
2042 2040
2043 2041 def addrawrevision(
2044 2042 self,
2045 2043 rawtext,
2046 2044 transaction,
2047 2045 link,
2048 2046 p1,
2049 2047 p2,
2050 2048 node,
2051 2049 flags,
2052 2050 cachedelta=None,
2053 2051 deltacomputer=None,
2054 2052 ):
2055 2053 """add a raw revision with known flags, node and parents
2056 2054 useful when reusing a revision not stored in this revlog (ex: received
2057 2055 over wire, or read from an external bundle).
2058 2056 """
2059 2057 dfh = None
2060 2058 if not self._inline:
2061 2059 dfh = self._datafp(b"a+")
2062 2060 ifh = self._indexfp(b"a+")
2063 2061 try:
2064 2062 return self._addrevision(
2065 2063 node,
2066 2064 rawtext,
2067 2065 transaction,
2068 2066 link,
2069 2067 p1,
2070 2068 p2,
2071 2069 flags,
2072 2070 cachedelta,
2073 2071 ifh,
2074 2072 dfh,
2075 2073 deltacomputer=deltacomputer,
2076 2074 )
2077 2075 finally:
2078 2076 if dfh:
2079 2077 dfh.close()
2080 2078 ifh.close()
2081 2079
2082 2080 def compress(self, data):
2083 2081 """Generate a possibly-compressed representation of data."""
2084 2082 if not data:
2085 2083 return b'', data
2086 2084
2087 2085 compressed = self._compressor.compress(data)
2088 2086
2089 2087 if compressed:
2090 2088 # The revlog compressor added the header in the returned data.
2091 2089 return b'', compressed
2092 2090
2093 2091 if data[0:1] == b'\0':
2094 2092 return b'', data
2095 2093 return b'u', data
2096 2094
2097 2095 def decompress(self, data):
2098 2096 """Decompress a revlog chunk.
2099 2097
2100 2098 The chunk is expected to begin with a header identifying the
2101 2099 format type so it can be routed to an appropriate decompressor.
2102 2100 """
2103 2101 if not data:
2104 2102 return data
2105 2103
2106 2104 # Revlogs are read much more frequently than they are written and many
2107 2105 # chunks only take microseconds to decompress, so performance is
2108 2106 # important here.
2109 2107 #
2110 2108 # We can make a few assumptions about revlogs:
2111 2109 #
2112 2110 # 1) the majority of chunks will be compressed (as opposed to inline
2113 2111 # raw data).
2114 2112 # 2) decompressing *any* data will likely by at least 10x slower than
2115 2113 # returning raw inline data.
2116 2114 # 3) we want to prioritize common and officially supported compression
2117 2115 # engines
2118 2116 #
2119 2117 # It follows that we want to optimize for "decompress compressed data
2120 2118 # when encoded with common and officially supported compression engines"
2121 2119 # case over "raw data" and "data encoded by less common or non-official
2122 2120 # compression engines." That is why we have the inline lookup first
2123 2121 # followed by the compengines lookup.
2124 2122 #
2125 2123 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2126 2124 # compressed chunks. And this matters for changelog and manifest reads.
2127 2125 t = data[0:1]
2128 2126
2129 2127 if t == b'x':
2130 2128 try:
2131 2129 return _zlibdecompress(data)
2132 2130 except zlib.error as e:
2133 2131 raise error.RevlogError(
2134 2132 _(b'revlog decompress error: %s')
2135 2133 % stringutil.forcebytestr(e)
2136 2134 )
2137 2135 # '\0' is more common than 'u' so it goes first.
2138 2136 elif t == b'\0':
2139 2137 return data
2140 2138 elif t == b'u':
2141 2139 return util.buffer(data, 1)
2142 2140
2143 2141 try:
2144 2142 compressor = self._decompressors[t]
2145 2143 except KeyError:
2146 2144 try:
2147 2145 engine = util.compengines.forrevlogheader(t)
2148 2146 compressor = engine.revlogcompressor(self._compengineopts)
2149 2147 self._decompressors[t] = compressor
2150 2148 except KeyError:
2151 2149 raise error.RevlogError(_(b'unknown compression type %r') % t)
2152 2150
2153 2151 return compressor.decompress(data)
2154 2152
2155 2153 def _addrevision(
2156 2154 self,
2157 2155 node,
2158 2156 rawtext,
2159 2157 transaction,
2160 2158 link,
2161 2159 p1,
2162 2160 p2,
2163 2161 flags,
2164 2162 cachedelta,
2165 2163 ifh,
2166 2164 dfh,
2167 2165 alwayscache=False,
2168 2166 deltacomputer=None,
2169 2167 ):
2170 2168 """internal function to add revisions to the log
2171 2169
2172 2170 see addrevision for argument descriptions.
2173 2171
2174 2172 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2175 2173
2176 2174 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2177 2175 be used.
2178 2176
2179 2177 invariants:
2180 2178 - rawtext is optional (can be None); if not set, cachedelta must be set.
2181 2179 if both are set, they must correspond to each other.
2182 2180 """
2183 2181 if node == nullid:
2184 2182 raise error.RevlogError(
2185 2183 _(b"%s: attempt to add null revision") % self.indexfile
2186 2184 )
2187 2185 if node == wdirid or node in wdirfilenodeids:
2188 2186 raise error.RevlogError(
2189 2187 _(b"%s: attempt to add wdir revision") % self.indexfile
2190 2188 )
2191 2189
2192 2190 if self._inline:
2193 2191 fh = ifh
2194 2192 else:
2195 2193 fh = dfh
2196 2194
2197 2195 btext = [rawtext]
2198 2196
2199 2197 curr = len(self)
2200 2198 prev = curr - 1
2201 2199 offset = self.end(prev)
2202 2200 p1r, p2r = self.rev(p1), self.rev(p2)
2203 2201
2204 2202 # full versions are inserted when the needed deltas
2205 2203 # become comparable to the uncompressed text
2206 2204 if rawtext is None:
2207 2205 # need rawtext size, before changed by flag processors, which is
2208 2206 # the non-raw size. use revlog explicitly to avoid filelog's extra
2209 2207 # logic that might remove metadata size.
2210 2208 textlen = mdiff.patchedsize(
2211 2209 revlog.size(self, cachedelta[0]), cachedelta[1]
2212 2210 )
2213 2211 else:
2214 2212 textlen = len(rawtext)
2215 2213
2216 2214 if deltacomputer is None:
2217 2215 deltacomputer = deltautil.deltacomputer(self)
2218 2216
2219 2217 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2220 2218
2221 2219 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2222 2220
2223 2221 e = (
2224 2222 offset_type(offset, flags),
2225 2223 deltainfo.deltalen,
2226 2224 textlen,
2227 2225 deltainfo.base,
2228 2226 link,
2229 2227 p1r,
2230 2228 p2r,
2231 2229 node,
2232 2230 )
2233 2231 self.index.append(e)
2234 2232
2235 2233 entry = self._io.packentry(e, self.node, self.version, curr)
2236 2234 self._writeentry(
2237 2235 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2238 2236 )
2239 2237
2240 2238 rawtext = btext[0]
2241 2239
2242 2240 if alwayscache and rawtext is None:
2243 2241 rawtext = deltacomputer.buildtext(revinfo, fh)
2244 2242
2245 2243 if type(rawtext) == bytes: # only accept immutable objects
2246 2244 self._revisioncache = (node, curr, rawtext)
2247 2245 self._chainbasecache[curr] = deltainfo.chainbase
2248 2246 return node
2249 2247
2250 2248 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2251 2249 # Files opened in a+ mode have inconsistent behavior on various
2252 2250 # platforms. Windows requires that a file positioning call be made
2253 2251 # when the file handle transitions between reads and writes. See
2254 2252 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2255 2253 # platforms, Python or the platform itself can be buggy. Some versions
2256 2254 # of Solaris have been observed to not append at the end of the file
2257 2255 # if the file was seeked to before the end. See issue4943 for more.
2258 2256 #
2259 2257 # We work around this issue by inserting a seek() before writing.
2260 2258 # Note: This is likely not necessary on Python 3. However, because
2261 2259 # the file handle is reused for reads and may be seeked there, we need
2262 2260 # to be careful before changing this.
2263 2261 ifh.seek(0, os.SEEK_END)
2264 2262 if dfh:
2265 2263 dfh.seek(0, os.SEEK_END)
2266 2264
2267 2265 curr = len(self) - 1
2268 2266 if not self._inline:
2269 2267 transaction.add(self.datafile, offset)
2270 2268 transaction.add(self.indexfile, curr * len(entry))
2271 2269 if data[0]:
2272 2270 dfh.write(data[0])
2273 2271 dfh.write(data[1])
2274 2272 ifh.write(entry)
2275 2273 else:
2276 2274 offset += curr * self._io.size
2277 2275 transaction.add(self.indexfile, offset, curr)
2278 2276 ifh.write(entry)
2279 2277 ifh.write(data[0])
2280 2278 ifh.write(data[1])
2281 2279 self._enforceinlinesize(transaction, ifh)
2282 2280
2283 2281 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2284 2282 """
2285 2283 add a delta group
2286 2284
2287 2285 given a set of deltas, add them to the revision log. the
2288 2286 first delta is against its parent, which should be in our
2289 2287 log, the rest are against the previous delta.
2290 2288
2291 2289 If ``addrevisioncb`` is defined, it will be called with arguments of
2292 2290 this revlog and the node that was added.
2293 2291 """
2294 2292
2295 2293 if self._writinghandles:
2296 2294 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2297 2295
2298 2296 nodes = []
2299 2297
2300 2298 r = len(self)
2301 2299 end = 0
2302 2300 if r:
2303 2301 end = self.end(r - 1)
2304 2302 ifh = self._indexfp(b"a+")
2305 2303 isize = r * self._io.size
2306 2304 if self._inline:
2307 2305 transaction.add(self.indexfile, end + isize, r)
2308 2306 dfh = None
2309 2307 else:
2310 2308 transaction.add(self.indexfile, isize, r)
2311 2309 transaction.add(self.datafile, end)
2312 2310 dfh = self._datafp(b"a+")
2313 2311
2314 2312 def flush():
2315 2313 if dfh:
2316 2314 dfh.flush()
2317 2315 ifh.flush()
2318 2316
2319 2317 self._writinghandles = (ifh, dfh)
2320 2318
2321 2319 try:
2322 2320 deltacomputer = deltautil.deltacomputer(self)
2323 2321 # loop through our set of deltas
2324 2322 for data in deltas:
2325 2323 node, p1, p2, linknode, deltabase, delta, flags = data
2326 2324 link = linkmapper(linknode)
2327 2325 flags = flags or REVIDX_DEFAULT_FLAGS
2328 2326
2329 2327 nodes.append(node)
2330 2328
2331 2329 if self.index.has_node(node):
2332 2330 self._nodeduplicatecallback(transaction, node)
2333 2331 # this can happen if two branches make the same change
2334 2332 continue
2335 2333
2336 2334 for p in (p1, p2):
2337 2335 if not self.index.has_node(p):
2338 2336 raise error.LookupError(
2339 2337 p, self.indexfile, _(b'unknown parent')
2340 2338 )
2341 2339
2342 2340 if not self.index.has_node(deltabase):
2343 2341 raise error.LookupError(
2344 2342 deltabase, self.indexfile, _(b'unknown delta base')
2345 2343 )
2346 2344
2347 2345 baserev = self.rev(deltabase)
2348 2346
2349 2347 if baserev != nullrev and self.iscensored(baserev):
2350 2348 # if base is censored, delta must be full replacement in a
2351 2349 # single patch operation
2352 2350 hlen = struct.calcsize(b">lll")
2353 2351 oldlen = self.rawsize(baserev)
2354 2352 newlen = len(delta) - hlen
2355 2353 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2356 2354 raise error.CensoredBaseError(
2357 2355 self.indexfile, self.node(baserev)
2358 2356 )
2359 2357
2360 2358 if not flags and self._peek_iscensored(baserev, delta, flush):
2361 2359 flags |= REVIDX_ISCENSORED
2362 2360
2363 2361 # We assume consumers of addrevisioncb will want to retrieve
2364 2362 # the added revision, which will require a call to
2365 2363 # revision(). revision() will fast path if there is a cache
2366 2364 # hit. So, we tell _addrevision() to always cache in this case.
2367 2365 # We're only using addgroup() in the context of changegroup
2368 2366 # generation so the revision data can always be handled as raw
2369 2367 # by the flagprocessor.
2370 2368 self._addrevision(
2371 2369 node,
2372 2370 None,
2373 2371 transaction,
2374 2372 link,
2375 2373 p1,
2376 2374 p2,
2377 2375 flags,
2378 2376 (baserev, delta),
2379 2377 ifh,
2380 2378 dfh,
2381 2379 alwayscache=bool(addrevisioncb),
2382 2380 deltacomputer=deltacomputer,
2383 2381 )
2384 2382
2385 2383 if addrevisioncb:
2386 2384 addrevisioncb(self, node)
2387 2385
2388 2386 if not dfh and not self._inline:
2389 2387 # addrevision switched from inline to conventional
2390 2388 # reopen the index
2391 2389 ifh.close()
2392 2390 dfh = self._datafp(b"a+")
2393 2391 ifh = self._indexfp(b"a+")
2394 2392 self._writinghandles = (ifh, dfh)
2395 2393 finally:
2396 2394 self._writinghandles = None
2397 2395
2398 2396 if dfh:
2399 2397 dfh.close()
2400 2398 ifh.close()
2401 2399
2402 2400 return nodes
2403 2401
2404 2402 def iscensored(self, rev):
2405 2403 """Check if a file revision is censored."""
2406 2404 if not self._censorable:
2407 2405 return False
2408 2406
2409 2407 return self.flags(rev) & REVIDX_ISCENSORED
2410 2408
2411 2409 def _peek_iscensored(self, baserev, delta, flush):
2412 2410 """Quickly check if a delta produces a censored revision."""
2413 2411 if not self._censorable:
2414 2412 return False
2415 2413
2416 2414 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2417 2415
2418 2416 def getstrippoint(self, minlink):
2419 2417 """find the minimum rev that must be stripped to strip the linkrev
2420 2418
2421 2419 Returns a tuple containing the minimum rev and a set of all revs that
2422 2420 have linkrevs that will be broken by this strip.
2423 2421 """
2424 2422 return storageutil.resolvestripinfo(
2425 2423 minlink,
2426 2424 len(self) - 1,
2427 2425 self.headrevs(),
2428 2426 self.linkrev,
2429 2427 self.parentrevs,
2430 2428 )
2431 2429
2432 2430 def strip(self, minlink, transaction):
2433 2431 """truncate the revlog on the first revision with a linkrev >= minlink
2434 2432
2435 2433 This function is called when we're stripping revision minlink and
2436 2434 its descendants from the repository.
2437 2435
2438 2436 We have to remove all revisions with linkrev >= minlink, because
2439 2437 the equivalent changelog revisions will be renumbered after the
2440 2438 strip.
2441 2439
2442 2440 So we truncate the revlog on the first of these revisions, and
2443 2441 trust that the caller has saved the revisions that shouldn't be
2444 2442 removed and that it'll re-add them after this truncation.
2445 2443 """
2446 2444 if len(self) == 0:
2447 2445 return
2448 2446
2449 2447 rev, _ = self.getstrippoint(minlink)
2450 2448 if rev == len(self):
2451 2449 return
2452 2450
2453 2451 # first truncate the files on disk
2454 2452 end = self.start(rev)
2455 2453 if not self._inline:
2456 2454 transaction.add(self.datafile, end)
2457 2455 end = rev * self._io.size
2458 2456 else:
2459 2457 end += rev * self._io.size
2460 2458
2461 2459 transaction.add(self.indexfile, end)
2462 2460
2463 2461 # then reset internal state in memory to forget those revisions
2464 2462 self._revisioncache = None
2465 2463 self._chaininfocache = {}
2466 2464 self._chunkclear()
2467 2465
2468 2466 del self.index[rev:-1]
2469 2467
2470 2468 def checksize(self):
2471 2469 """Check size of index and data files
2472 2470
2473 2471 return a (dd, di) tuple.
2474 2472 - dd: extra bytes for the "data" file
2475 2473 - di: extra bytes for the "index" file
2476 2474
2477 2475 A healthy revlog will return (0, 0).
2478 2476 """
2479 2477 expected = 0
2480 2478 if len(self):
2481 2479 expected = max(0, self.end(len(self) - 1))
2482 2480
2483 2481 try:
2484 2482 with self._datafp() as f:
2485 2483 f.seek(0, io.SEEK_END)
2486 2484 actual = f.tell()
2487 2485 dd = actual - expected
2488 2486 except IOError as inst:
2489 2487 if inst.errno != errno.ENOENT:
2490 2488 raise
2491 2489 dd = 0
2492 2490
2493 2491 try:
2494 2492 f = self.opener(self.indexfile)
2495 2493 f.seek(0, io.SEEK_END)
2496 2494 actual = f.tell()
2497 2495 f.close()
2498 2496 s = self._io.size
2499 2497 i = max(0, actual // s)
2500 2498 di = actual - (i * s)
2501 2499 if self._inline:
2502 2500 databytes = 0
2503 2501 for r in self:
2504 2502 databytes += max(0, self.length(r))
2505 2503 dd = 0
2506 2504 di = actual - len(self) * s - databytes
2507 2505 except IOError as inst:
2508 2506 if inst.errno != errno.ENOENT:
2509 2507 raise
2510 2508 di = 0
2511 2509
2512 2510 return (dd, di)
2513 2511
2514 2512 def files(self):
2515 2513 res = [self.indexfile]
2516 2514 if not self._inline:
2517 2515 res.append(self.datafile)
2518 2516 return res
2519 2517
2520 2518 def emitrevisions(
2521 2519 self,
2522 2520 nodes,
2523 2521 nodesorder=None,
2524 2522 revisiondata=False,
2525 2523 assumehaveparentrevisions=False,
2526 2524 deltamode=repository.CG_DELTAMODE_STD,
2527 2525 ):
2528 2526 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2529 2527 raise error.ProgrammingError(
2530 2528 b'unhandled value for nodesorder: %s' % nodesorder
2531 2529 )
2532 2530
2533 2531 if nodesorder is None and not self._generaldelta:
2534 2532 nodesorder = b'storage'
2535 2533
2536 2534 if (
2537 2535 not self._storedeltachains
2538 2536 and deltamode != repository.CG_DELTAMODE_PREV
2539 2537 ):
2540 2538 deltamode = repository.CG_DELTAMODE_FULL
2541 2539
2542 2540 return storageutil.emitrevisions(
2543 2541 self,
2544 2542 nodes,
2545 2543 nodesorder,
2546 2544 revlogrevisiondelta,
2547 2545 deltaparentfn=self.deltaparent,
2548 2546 candeltafn=self.candelta,
2549 2547 rawsizefn=self.rawsize,
2550 2548 revdifffn=self.revdiff,
2551 2549 flagsfn=self.flags,
2552 2550 deltamode=deltamode,
2553 2551 revisiondata=revisiondata,
2554 2552 assumehaveparentrevisions=assumehaveparentrevisions,
2555 2553 )
2556 2554
2557 2555 DELTAREUSEALWAYS = b'always'
2558 2556 DELTAREUSESAMEREVS = b'samerevs'
2559 2557 DELTAREUSENEVER = b'never'
2560 2558
2561 2559 DELTAREUSEFULLADD = b'fulladd'
2562 2560
2563 2561 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2564 2562
2565 2563 def clone(
2566 2564 self,
2567 2565 tr,
2568 2566 destrevlog,
2569 2567 addrevisioncb=None,
2570 2568 deltareuse=DELTAREUSESAMEREVS,
2571 2569 forcedeltabothparents=None,
2572 2570 sidedatacompanion=None,
2573 2571 ):
2574 2572 """Copy this revlog to another, possibly with format changes.
2575 2573
2576 2574 The destination revlog will contain the same revisions and nodes.
2577 2575 However, it may not be bit-for-bit identical due to e.g. delta encoding
2578 2576 differences.
2579 2577
2580 2578 The ``deltareuse`` argument control how deltas from the existing revlog
2581 2579 are preserved in the destination revlog. The argument can have the
2582 2580 following values:
2583 2581
2584 2582 DELTAREUSEALWAYS
2585 2583 Deltas will always be reused (if possible), even if the destination
2586 2584 revlog would not select the same revisions for the delta. This is the
2587 2585 fastest mode of operation.
2588 2586 DELTAREUSESAMEREVS
2589 2587 Deltas will be reused if the destination revlog would pick the same
2590 2588 revisions for the delta. This mode strikes a balance between speed
2591 2589 and optimization.
2592 2590 DELTAREUSENEVER
2593 2591 Deltas will never be reused. This is the slowest mode of execution.
2594 2592 This mode can be used to recompute deltas (e.g. if the diff/delta
2595 2593 algorithm changes).
2596 2594 DELTAREUSEFULLADD
2597 2595 Revision will be re-added as if their were new content. This is
2598 2596 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2599 2597 eg: large file detection and handling.
2600 2598
2601 2599 Delta computation can be slow, so the choice of delta reuse policy can
2602 2600 significantly affect run time.
2603 2601
2604 2602 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2605 2603 two extremes. Deltas will be reused if they are appropriate. But if the
2606 2604 delta could choose a better revision, it will do so. This means if you
2607 2605 are converting a non-generaldelta revlog to a generaldelta revlog,
2608 2606 deltas will be recomputed if the delta's parent isn't a parent of the
2609 2607 revision.
2610 2608
2611 2609 In addition to the delta policy, the ``forcedeltabothparents``
2612 2610 argument controls whether to force compute deltas against both parents
2613 2611 for merges. By default, the current default is used.
2614 2612
2615 2613 If not None, the `sidedatacompanion` is callable that accept two
2616 2614 arguments:
2617 2615
2618 2616 (srcrevlog, rev)
2619 2617
2620 2618 and return a triplet that control changes to sidedata content from the
2621 2619 old revision to the new clone result:
2622 2620
2623 2621 (dropall, filterout, update)
2624 2622
2625 2623 * if `dropall` is True, all sidedata should be dropped
2626 2624 * `filterout` is a set of sidedata keys that should be dropped
2627 2625 * `update` is a mapping of additionnal/new key -> value
2628 2626 """
2629 2627 if deltareuse not in self.DELTAREUSEALL:
2630 2628 raise ValueError(
2631 2629 _(b'value for deltareuse invalid: %s') % deltareuse
2632 2630 )
2633 2631
2634 2632 if len(destrevlog):
2635 2633 raise ValueError(_(b'destination revlog is not empty'))
2636 2634
2637 2635 if getattr(self, 'filteredrevs', None):
2638 2636 raise ValueError(_(b'source revlog has filtered revisions'))
2639 2637 if getattr(destrevlog, 'filteredrevs', None):
2640 2638 raise ValueError(_(b'destination revlog has filtered revisions'))
2641 2639
2642 2640 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2643 2641 # if possible.
2644 2642 oldlazydelta = destrevlog._lazydelta
2645 2643 oldlazydeltabase = destrevlog._lazydeltabase
2646 2644 oldamd = destrevlog._deltabothparents
2647 2645
2648 2646 try:
2649 2647 if deltareuse == self.DELTAREUSEALWAYS:
2650 2648 destrevlog._lazydeltabase = True
2651 2649 destrevlog._lazydelta = True
2652 2650 elif deltareuse == self.DELTAREUSESAMEREVS:
2653 2651 destrevlog._lazydeltabase = False
2654 2652 destrevlog._lazydelta = True
2655 2653 elif deltareuse == self.DELTAREUSENEVER:
2656 2654 destrevlog._lazydeltabase = False
2657 2655 destrevlog._lazydelta = False
2658 2656
2659 2657 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2660 2658
2661 2659 self._clone(
2662 2660 tr,
2663 2661 destrevlog,
2664 2662 addrevisioncb,
2665 2663 deltareuse,
2666 2664 forcedeltabothparents,
2667 2665 sidedatacompanion,
2668 2666 )
2669 2667
2670 2668 finally:
2671 2669 destrevlog._lazydelta = oldlazydelta
2672 2670 destrevlog._lazydeltabase = oldlazydeltabase
2673 2671 destrevlog._deltabothparents = oldamd
2674 2672
2675 2673 def _clone(
2676 2674 self,
2677 2675 tr,
2678 2676 destrevlog,
2679 2677 addrevisioncb,
2680 2678 deltareuse,
2681 2679 forcedeltabothparents,
2682 2680 sidedatacompanion,
2683 2681 ):
2684 2682 """perform the core duty of `revlog.clone` after parameter processing"""
2685 2683 deltacomputer = deltautil.deltacomputer(destrevlog)
2686 2684 index = self.index
2687 2685 for rev in self:
2688 2686 entry = index[rev]
2689 2687
2690 2688 # Some classes override linkrev to take filtered revs into
2691 2689 # account. Use raw entry from index.
2692 2690 flags = entry[0] & 0xFFFF
2693 2691 linkrev = entry[4]
2694 2692 p1 = index[entry[5]][7]
2695 2693 p2 = index[entry[6]][7]
2696 2694 node = entry[7]
2697 2695
2698 2696 sidedataactions = (False, [], {})
2699 2697 if sidedatacompanion is not None:
2700 2698 sidedataactions = sidedatacompanion(self, rev)
2701 2699
2702 2700 # (Possibly) reuse the delta from the revlog if allowed and
2703 2701 # the revlog chunk is a delta.
2704 2702 cachedelta = None
2705 2703 rawtext = None
2706 2704 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2707 2705 dropall, filterout, update = sidedataactions
2708 2706 text, sidedata = self._revisiondata(rev)
2709 2707 if dropall:
2710 2708 sidedata = {}
2711 2709 for key in filterout:
2712 2710 sidedata.pop(key, None)
2713 2711 sidedata.update(update)
2714 2712 if not sidedata:
2715 2713 sidedata = None
2716 2714 destrevlog.addrevision(
2717 2715 text,
2718 2716 tr,
2719 2717 linkrev,
2720 2718 p1,
2721 2719 p2,
2722 2720 cachedelta=cachedelta,
2723 2721 node=node,
2724 2722 flags=flags,
2725 2723 deltacomputer=deltacomputer,
2726 2724 sidedata=sidedata,
2727 2725 )
2728 2726 else:
2729 2727 if destrevlog._lazydelta:
2730 2728 dp = self.deltaparent(rev)
2731 2729 if dp != nullrev:
2732 2730 cachedelta = (dp, bytes(self._chunk(rev)))
2733 2731
2734 2732 if not cachedelta:
2735 2733 rawtext = self.rawdata(rev)
2736 2734
2737 2735 ifh = destrevlog.opener(
2738 2736 destrevlog.indexfile, b'a+', checkambig=False
2739 2737 )
2740 2738 dfh = None
2741 2739 if not destrevlog._inline:
2742 2740 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2743 2741 try:
2744 2742 destrevlog._addrevision(
2745 2743 node,
2746 2744 rawtext,
2747 2745 tr,
2748 2746 linkrev,
2749 2747 p1,
2750 2748 p2,
2751 2749 flags,
2752 2750 cachedelta,
2753 2751 ifh,
2754 2752 dfh,
2755 2753 deltacomputer=deltacomputer,
2756 2754 )
2757 2755 finally:
2758 2756 if dfh:
2759 2757 dfh.close()
2760 2758 ifh.close()
2761 2759
2762 2760 if addrevisioncb:
2763 2761 addrevisioncb(self, rev, node)
2764 2762
2765 2763 def censorrevision(self, tr, censornode, tombstone=b''):
2766 2764 if (self.version & 0xFFFF) == REVLOGV0:
2767 2765 raise error.RevlogError(
2768 2766 _(b'cannot censor with version %d revlogs') % self.version
2769 2767 )
2770 2768
2771 2769 censorrev = self.rev(censornode)
2772 2770 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2773 2771
2774 2772 if len(tombstone) > self.rawsize(censorrev):
2775 2773 raise error.Abort(
2776 2774 _(b'censor tombstone must be no longer than censored data')
2777 2775 )
2778 2776
2779 2777 # Rewriting the revlog in place is hard. Our strategy for censoring is
2780 2778 # to create a new revlog, copy all revisions to it, then replace the
2781 2779 # revlogs on transaction close.
2782 2780
2783 2781 newindexfile = self.indexfile + b'.tmpcensored'
2784 2782 newdatafile = self.datafile + b'.tmpcensored'
2785 2783
2786 2784 # This is a bit dangerous. We could easily have a mismatch of state.
2787 2785 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2788 2786 newrl.version = self.version
2789 2787 newrl._generaldelta = self._generaldelta
2790 2788 newrl._io = self._io
2791 2789
2792 2790 for rev in self.revs():
2793 2791 node = self.node(rev)
2794 2792 p1, p2 = self.parents(node)
2795 2793
2796 2794 if rev == censorrev:
2797 2795 newrl.addrawrevision(
2798 2796 tombstone,
2799 2797 tr,
2800 2798 self.linkrev(censorrev),
2801 2799 p1,
2802 2800 p2,
2803 2801 censornode,
2804 2802 REVIDX_ISCENSORED,
2805 2803 )
2806 2804
2807 2805 if newrl.deltaparent(rev) != nullrev:
2808 2806 raise error.Abort(
2809 2807 _(
2810 2808 b'censored revision stored as delta; '
2811 2809 b'cannot censor'
2812 2810 ),
2813 2811 hint=_(
2814 2812 b'censoring of revlogs is not '
2815 2813 b'fully implemented; please report '
2816 2814 b'this bug'
2817 2815 ),
2818 2816 )
2819 2817 continue
2820 2818
2821 2819 if self.iscensored(rev):
2822 2820 if self.deltaparent(rev) != nullrev:
2823 2821 raise error.Abort(
2824 2822 _(
2825 2823 b'cannot censor due to censored '
2826 2824 b'revision having delta stored'
2827 2825 )
2828 2826 )
2829 2827 rawtext = self._chunk(rev)
2830 2828 else:
2831 2829 rawtext = self.rawdata(rev)
2832 2830
2833 2831 newrl.addrawrevision(
2834 2832 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2835 2833 )
2836 2834
2837 2835 tr.addbackup(self.indexfile, location=b'store')
2838 2836 if not self._inline:
2839 2837 tr.addbackup(self.datafile, location=b'store')
2840 2838
2841 2839 self.opener.rename(newrl.indexfile, self.indexfile)
2842 2840 if not self._inline:
2843 2841 self.opener.rename(newrl.datafile, self.datafile)
2844 2842
2845 2843 self.clearcaches()
2846 2844 self._loadindex()
2847 2845
2848 2846 def verifyintegrity(self, state):
2849 2847 """Verifies the integrity of the revlog.
2850 2848
2851 2849 Yields ``revlogproblem`` instances describing problems that are
2852 2850 found.
2853 2851 """
2854 2852 dd, di = self.checksize()
2855 2853 if dd:
2856 2854 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2857 2855 if di:
2858 2856 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2859 2857
2860 2858 version = self.version & 0xFFFF
2861 2859
2862 2860 # The verifier tells us what version revlog we should be.
2863 2861 if version != state[b'expectedversion']:
2864 2862 yield revlogproblem(
2865 2863 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2866 2864 % (self.indexfile, version, state[b'expectedversion'])
2867 2865 )
2868 2866
2869 2867 state[b'skipread'] = set()
2870 2868
2871 2869 for rev in self:
2872 2870 node = self.node(rev)
2873 2871
2874 2872 # Verify contents. 4 cases to care about:
2875 2873 #
2876 2874 # common: the most common case
2877 2875 # rename: with a rename
2878 2876 # meta: file content starts with b'\1\n', the metadata
2879 2877 # header defined in filelog.py, but without a rename
2880 2878 # ext: content stored externally
2881 2879 #
2882 2880 # More formally, their differences are shown below:
2883 2881 #
2884 2882 # | common | rename | meta | ext
2885 2883 # -------------------------------------------------------
2886 2884 # flags() | 0 | 0 | 0 | not 0
2887 2885 # renamed() | False | True | False | ?
2888 2886 # rawtext[0:2]=='\1\n'| False | True | True | ?
2889 2887 #
2890 2888 # "rawtext" means the raw text stored in revlog data, which
2891 2889 # could be retrieved by "rawdata(rev)". "text"
2892 2890 # mentioned below is "revision(rev)".
2893 2891 #
2894 2892 # There are 3 different lengths stored physically:
2895 2893 # 1. L1: rawsize, stored in revlog index
2896 2894 # 2. L2: len(rawtext), stored in revlog data
2897 2895 # 3. L3: len(text), stored in revlog data if flags==0, or
2898 2896 # possibly somewhere else if flags!=0
2899 2897 #
2900 2898 # L1 should be equal to L2. L3 could be different from them.
2901 2899 # "text" may or may not affect commit hash depending on flag
2902 2900 # processors (see flagutil.addflagprocessor).
2903 2901 #
2904 2902 # | common | rename | meta | ext
2905 2903 # -------------------------------------------------
2906 2904 # rawsize() | L1 | L1 | L1 | L1
2907 2905 # size() | L1 | L2-LM | L1(*) | L1 (?)
2908 2906 # len(rawtext) | L2 | L2 | L2 | L2
2909 2907 # len(text) | L2 | L2 | L2 | L3
2910 2908 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2911 2909 #
2912 2910 # LM: length of metadata, depending on rawtext
2913 2911 # (*): not ideal, see comment in filelog.size
2914 2912 # (?): could be "- len(meta)" if the resolved content has
2915 2913 # rename metadata
2916 2914 #
2917 2915 # Checks needed to be done:
2918 2916 # 1. length check: L1 == L2, in all cases.
2919 2917 # 2. hash check: depending on flag processor, we may need to
2920 2918 # use either "text" (external), or "rawtext" (in revlog).
2921 2919
2922 2920 try:
2923 2921 skipflags = state.get(b'skipflags', 0)
2924 2922 if skipflags:
2925 2923 skipflags &= self.flags(rev)
2926 2924
2927 2925 _verify_revision(self, skipflags, state, node)
2928 2926
2929 2927 l1 = self.rawsize(rev)
2930 2928 l2 = len(self.rawdata(node))
2931 2929
2932 2930 if l1 != l2:
2933 2931 yield revlogproblem(
2934 2932 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2935 2933 node=node,
2936 2934 )
2937 2935
2938 2936 except error.CensoredNodeError:
2939 2937 if state[b'erroroncensored']:
2940 2938 yield revlogproblem(
2941 2939 error=_(b'censored file data'), node=node
2942 2940 )
2943 2941 state[b'skipread'].add(node)
2944 2942 except Exception as e:
2945 2943 yield revlogproblem(
2946 2944 error=_(b'unpacking %s: %s')
2947 2945 % (short(node), stringutil.forcebytestr(e)),
2948 2946 node=node,
2949 2947 )
2950 2948 state[b'skipread'].add(node)
2951 2949
2952 2950 def storageinfo(
2953 2951 self,
2954 2952 exclusivefiles=False,
2955 2953 sharedfiles=False,
2956 2954 revisionscount=False,
2957 2955 trackedsize=False,
2958 2956 storedsize=False,
2959 2957 ):
2960 2958 d = {}
2961 2959
2962 2960 if exclusivefiles:
2963 2961 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2964 2962 if not self._inline:
2965 2963 d[b'exclusivefiles'].append((self.opener, self.datafile))
2966 2964
2967 2965 if sharedfiles:
2968 2966 d[b'sharedfiles'] = []
2969 2967
2970 2968 if revisionscount:
2971 2969 d[b'revisionscount'] = len(self)
2972 2970
2973 2971 if trackedsize:
2974 2972 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2975 2973
2976 2974 if storedsize:
2977 2975 d[b'storedsize'] = sum(
2978 2976 self.opener.stat(path).st_size for path in self.files()
2979 2977 )
2980 2978
2981 2979 return d
General Comments 0
You need to be logged in to leave comments. Login now