##// END OF EJS Templates
revlog: split the content verification of a node into a separate method...
Matt Harbison -
r44409:7c3118b9 default
parent child Browse files
Show More
@@ -1,2975 +1,2981 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
154 if skipflags:
155 state[b'skipread'].add(node)
156 else:
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
159
160
151 161 @attr.s(slots=True, frozen=True)
152 162 class _revisioninfo(object):
153 163 """Information about a revision that allows building its fulltext
154 164 node: expected hash of the revision
155 165 p1, p2: parent revs of the revision
156 166 btext: built text cache consisting of a one-element list
157 167 cachedelta: (baserev, uncompressed_delta) or None
158 168 flags: flags associated to the revision storage
159 169
160 170 One of btext[0] or cachedelta must be set.
161 171 """
162 172
163 173 node = attr.ib()
164 174 p1 = attr.ib()
165 175 p2 = attr.ib()
166 176 btext = attr.ib()
167 177 textlen = attr.ib()
168 178 cachedelta = attr.ib()
169 179 flags = attr.ib()
170 180
171 181
172 182 @interfaceutil.implementer(repository.irevisiondelta)
173 183 @attr.s(slots=True)
174 184 class revlogrevisiondelta(object):
175 185 node = attr.ib()
176 186 p1node = attr.ib()
177 187 p2node = attr.ib()
178 188 basenode = attr.ib()
179 189 flags = attr.ib()
180 190 baserevisionsize = attr.ib()
181 191 revision = attr.ib()
182 192 delta = attr.ib()
183 193 linknode = attr.ib(default=None)
184 194
185 195
186 196 @interfaceutil.implementer(repository.iverifyproblem)
187 197 @attr.s(frozen=True)
188 198 class revlogproblem(object):
189 199 warning = attr.ib(default=None)
190 200 error = attr.ib(default=None)
191 201 node = attr.ib(default=None)
192 202
193 203
194 204 # index v0:
195 205 # 4 bytes: offset
196 206 # 4 bytes: compressed length
197 207 # 4 bytes: base rev
198 208 # 4 bytes: link rev
199 209 # 20 bytes: parent 1 nodeid
200 210 # 20 bytes: parent 2 nodeid
201 211 # 20 bytes: nodeid
202 212 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 213 indexformatv0_pack = indexformatv0.pack
204 214 indexformatv0_unpack = indexformatv0.unpack
205 215
206 216
207 217 class revlogoldindex(list):
208 218 @property
209 219 def nodemap(self):
210 220 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
211 221 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
212 222 return self._nodemap
213 223
214 224 @util.propertycache
215 225 def _nodemap(self):
216 226 nodemap = revlogutils.NodeMap({nullid: nullrev})
217 227 for r in range(0, len(self)):
218 228 n = self[r][7]
219 229 nodemap[n] = r
220 230 return nodemap
221 231
222 232 def has_node(self, node):
223 233 """return True if the node exist in the index"""
224 234 return node in self._nodemap
225 235
226 236 def rev(self, node):
227 237 """return a revision for a node
228 238
229 239 If the node is unknown, raise a RevlogError"""
230 240 return self._nodemap[node]
231 241
232 242 def get_rev(self, node):
233 243 """return a revision for a node
234 244
235 245 If the node is unknown, return None"""
236 246 return self._nodemap.get(node)
237 247
238 248 def append(self, tup):
239 249 self._nodemap[tup[7]] = len(self)
240 250 super(revlogoldindex, self).append(tup)
241 251
242 252 def __delitem__(self, i):
243 253 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
244 254 raise ValueError(b"deleting slices only supports a:-1 with step 1")
245 255 for r in pycompat.xrange(i.start, len(self)):
246 256 del self._nodemap[self[r][7]]
247 257 super(revlogoldindex, self).__delitem__(i)
248 258
249 259 def clearcaches(self):
250 260 self.__dict__.pop('_nodemap', None)
251 261
252 262 def __getitem__(self, i):
253 263 if i == -1:
254 264 return (0, 0, 0, -1, -1, -1, -1, nullid)
255 265 return list.__getitem__(self, i)
256 266
257 267
258 268 class revlogoldio(object):
259 269 def __init__(self):
260 270 self.size = indexformatv0.size
261 271
262 272 def parseindex(self, data, inline):
263 273 s = self.size
264 274 index = []
265 275 nodemap = revlogutils.NodeMap({nullid: nullrev})
266 276 n = off = 0
267 277 l = len(data)
268 278 while off + s <= l:
269 279 cur = data[off : off + s]
270 280 off += s
271 281 e = indexformatv0_unpack(cur)
272 282 # transform to revlogv1 format
273 283 e2 = (
274 284 offset_type(e[0], 0),
275 285 e[1],
276 286 -1,
277 287 e[2],
278 288 e[3],
279 289 nodemap.get(e[4], nullrev),
280 290 nodemap.get(e[5], nullrev),
281 291 e[6],
282 292 )
283 293 index.append(e2)
284 294 nodemap[e[6]] = n
285 295 n += 1
286 296
287 297 index = revlogoldindex(index)
288 298 return index, None
289 299
290 300 def packentry(self, entry, node, version, rev):
291 301 if gettype(entry[0]):
292 302 raise error.RevlogError(
293 303 _(b'index entry flags need revlog version 1')
294 304 )
295 305 e2 = (
296 306 getoffset(entry[0]),
297 307 entry[1],
298 308 entry[3],
299 309 entry[4],
300 310 node(entry[5]),
301 311 node(entry[6]),
302 312 entry[7],
303 313 )
304 314 return indexformatv0_pack(*e2)
305 315
306 316
307 317 # index ng:
308 318 # 6 bytes: offset
309 319 # 2 bytes: flags
310 320 # 4 bytes: compressed length
311 321 # 4 bytes: uncompressed length
312 322 # 4 bytes: base rev
313 323 # 4 bytes: link rev
314 324 # 4 bytes: parent 1 rev
315 325 # 4 bytes: parent 2 rev
316 326 # 32 bytes: nodeid
317 327 indexformatng = struct.Struct(b">Qiiiiii20s12x")
318 328 indexformatng_pack = indexformatng.pack
319 329 versionformat = struct.Struct(b">I")
320 330 versionformat_pack = versionformat.pack
321 331 versionformat_unpack = versionformat.unpack
322 332
323 333 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
324 334 # signed integer)
325 335 _maxentrysize = 0x7FFFFFFF
326 336
327 337
328 338 class revlogio(object):
329 339 def __init__(self):
330 340 self.size = indexformatng.size
331 341
332 342 def parseindex(self, data, inline):
333 343 # call the C implementation to parse the index data
334 344 index, cache = parsers.parse_index2(data, inline)
335 345 return index, cache
336 346
337 347 def packentry(self, entry, node, version, rev):
338 348 p = indexformatng_pack(*entry)
339 349 if rev == 0:
340 350 p = versionformat_pack(version) + p[4:]
341 351 return p
342 352
343 353
344 354 class revlog(object):
345 355 """
346 356 the underlying revision storage object
347 357
348 358 A revlog consists of two parts, an index and the revision data.
349 359
350 360 The index is a file with a fixed record size containing
351 361 information on each revision, including its nodeid (hash), the
352 362 nodeids of its parents, the position and offset of its data within
353 363 the data file, and the revision it's based on. Finally, each entry
354 364 contains a linkrev entry that can serve as a pointer to external
355 365 data.
356 366
357 367 The revision data itself is a linear collection of data chunks.
358 368 Each chunk represents a revision and is usually represented as a
359 369 delta against the previous chunk. To bound lookup time, runs of
360 370 deltas are limited to about 2 times the length of the original
361 371 version data. This makes retrieval of a version proportional to
362 372 its size, or O(1) relative to the number of revisions.
363 373
364 374 Both pieces of the revlog are written to in an append-only
365 375 fashion, which means we never need to rewrite a file to insert or
366 376 remove data, and can use some simple techniques to avoid the need
367 377 for locking while reading.
368 378
369 379 If checkambig, indexfile is opened with checkambig=True at
370 380 writing, to avoid file stat ambiguity.
371 381
372 382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
373 383 index will be mmapped rather than read if it is larger than the
374 384 configured threshold.
375 385
376 386 If censorable is True, the revlog can have censored revisions.
377 387
378 388 If `upperboundcomp` is not None, this is the expected maximal gain from
379 389 compression for the data content.
380 390 """
381 391
382 392 _flagserrorclass = error.RevlogError
383 393
384 394 def __init__(
385 395 self,
386 396 opener,
387 397 indexfile,
388 398 datafile=None,
389 399 checkambig=False,
390 400 mmaplargeindex=False,
391 401 censorable=False,
392 402 upperboundcomp=None,
393 403 ):
394 404 """
395 405 create a revlog object
396 406
397 407 opener is a function that abstracts the file opening operation
398 408 and can be used to implement COW semantics or the like.
399 409
400 410 """
401 411 self.upperboundcomp = upperboundcomp
402 412 self.indexfile = indexfile
403 413 self.datafile = datafile or (indexfile[:-2] + b".d")
404 414 self.opener = opener
405 415 # When True, indexfile is opened with checkambig=True at writing, to
406 416 # avoid file stat ambiguity.
407 417 self._checkambig = checkambig
408 418 self._mmaplargeindex = mmaplargeindex
409 419 self._censorable = censorable
410 420 # 3-tuple of (node, rev, text) for a raw revision.
411 421 self._revisioncache = None
412 422 # Maps rev to chain base rev.
413 423 self._chainbasecache = util.lrucachedict(100)
414 424 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
415 425 self._chunkcache = (0, b'')
416 426 # How much data to read and cache into the raw revlog data cache.
417 427 self._chunkcachesize = 65536
418 428 self._maxchainlen = None
419 429 self._deltabothparents = True
420 430 self.index = None
421 431 # Mapping of partial identifiers to full nodes.
422 432 self._pcache = {}
423 433 # Mapping of revision integer to full node.
424 434 self._compengine = b'zlib'
425 435 self._compengineopts = {}
426 436 self._maxdeltachainspan = -1
427 437 self._withsparseread = False
428 438 self._sparserevlog = False
429 439 self._srdensitythreshold = 0.50
430 440 self._srmingapsize = 262144
431 441
432 442 # Make copy of flag processors so each revlog instance can support
433 443 # custom flags.
434 444 self._flagprocessors = dict(flagutil.flagprocessors)
435 445
436 446 # 2-tuple of file handles being used for active writing.
437 447 self._writinghandles = None
438 448
439 449 self._loadindex()
440 450
441 451 def _loadindex(self):
442 452 mmapindexthreshold = None
443 453 opts = self.opener.options
444 454
445 455 if b'revlogv2' in opts:
446 456 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
447 457 elif b'revlogv1' in opts:
448 458 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
449 459 if b'generaldelta' in opts:
450 460 newversionflags |= FLAG_GENERALDELTA
451 461 elif b'revlogv0' in self.opener.options:
452 462 newversionflags = REVLOGV0
453 463 else:
454 464 newversionflags = REVLOG_DEFAULT_VERSION
455 465
456 466 if b'chunkcachesize' in opts:
457 467 self._chunkcachesize = opts[b'chunkcachesize']
458 468 if b'maxchainlen' in opts:
459 469 self._maxchainlen = opts[b'maxchainlen']
460 470 if b'deltabothparents' in opts:
461 471 self._deltabothparents = opts[b'deltabothparents']
462 472 self._lazydelta = bool(opts.get(b'lazydelta', True))
463 473 self._lazydeltabase = False
464 474 if self._lazydelta:
465 475 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
466 476 if b'compengine' in opts:
467 477 self._compengine = opts[b'compengine']
468 478 if b'zlib.level' in opts:
469 479 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
470 480 if b'zstd.level' in opts:
471 481 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
472 482 if b'maxdeltachainspan' in opts:
473 483 self._maxdeltachainspan = opts[b'maxdeltachainspan']
474 484 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
475 485 mmapindexthreshold = opts[b'mmapindexthreshold']
476 486 self.hassidedata = bool(opts.get(b'side-data', False))
477 487 if self.hassidedata:
478 488 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
479 489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
480 490 withsparseread = bool(opts.get(b'with-sparse-read', False))
481 491 # sparse-revlog forces sparse-read
482 492 self._withsparseread = self._sparserevlog or withsparseread
483 493 if b'sparse-read-density-threshold' in opts:
484 494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
485 495 if b'sparse-read-min-gap-size' in opts:
486 496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
487 497 if opts.get(b'enableellipsis'):
488 498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
489 499
490 500 # revlog v0 doesn't have flag processors
491 501 for flag, processor in pycompat.iteritems(
492 502 opts.get(b'flagprocessors', {})
493 503 ):
494 504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
495 505
496 506 if self._chunkcachesize <= 0:
497 507 raise error.RevlogError(
498 508 _(b'revlog chunk cache size %r is not greater than 0')
499 509 % self._chunkcachesize
500 510 )
501 511 elif self._chunkcachesize & (self._chunkcachesize - 1):
502 512 raise error.RevlogError(
503 513 _(b'revlog chunk cache size %r is not a power of 2')
504 514 % self._chunkcachesize
505 515 )
506 516
507 517 indexdata = b''
508 518 self._initempty = True
509 519 try:
510 520 with self._indexfp() as f:
511 521 if (
512 522 mmapindexthreshold is not None
513 523 and self.opener.fstat(f).st_size >= mmapindexthreshold
514 524 ):
515 525 # TODO: should .close() to release resources without
516 526 # relying on Python GC
517 527 indexdata = util.buffer(util.mmapread(f))
518 528 else:
519 529 indexdata = f.read()
520 530 if len(indexdata) > 0:
521 531 versionflags = versionformat_unpack(indexdata[:4])[0]
522 532 self._initempty = False
523 533 else:
524 534 versionflags = newversionflags
525 535 except IOError as inst:
526 536 if inst.errno != errno.ENOENT:
527 537 raise
528 538
529 539 versionflags = newversionflags
530 540
531 541 self.version = versionflags
532 542
533 543 flags = versionflags & ~0xFFFF
534 544 fmt = versionflags & 0xFFFF
535 545
536 546 if fmt == REVLOGV0:
537 547 if flags:
538 548 raise error.RevlogError(
539 549 _(b'unknown flags (%#04x) in version %d revlog %s')
540 550 % (flags >> 16, fmt, self.indexfile)
541 551 )
542 552
543 553 self._inline = False
544 554 self._generaldelta = False
545 555
546 556 elif fmt == REVLOGV1:
547 557 if flags & ~REVLOGV1_FLAGS:
548 558 raise error.RevlogError(
549 559 _(b'unknown flags (%#04x) in version %d revlog %s')
550 560 % (flags >> 16, fmt, self.indexfile)
551 561 )
552 562
553 563 self._inline = versionflags & FLAG_INLINE_DATA
554 564 self._generaldelta = versionflags & FLAG_GENERALDELTA
555 565
556 566 elif fmt == REVLOGV2:
557 567 if flags & ~REVLOGV2_FLAGS:
558 568 raise error.RevlogError(
559 569 _(b'unknown flags (%#04x) in version %d revlog %s')
560 570 % (flags >> 16, fmt, self.indexfile)
561 571 )
562 572
563 573 self._inline = versionflags & FLAG_INLINE_DATA
564 574 # generaldelta implied by version 2 revlogs.
565 575 self._generaldelta = True
566 576
567 577 else:
568 578 raise error.RevlogError(
569 579 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
570 580 )
571 581 # sparse-revlog can't be on without general-delta (issue6056)
572 582 if not self._generaldelta:
573 583 self._sparserevlog = False
574 584
575 585 self._storedeltachains = True
576 586
577 587 self._io = revlogio()
578 588 if self.version == REVLOGV0:
579 589 self._io = revlogoldio()
580 590 try:
581 591 d = self._io.parseindex(indexdata, self._inline)
582 592 except (ValueError, IndexError):
583 593 raise error.RevlogError(
584 594 _(b"index %s is corrupted") % self.indexfile
585 595 )
586 596 self.index, self._chunkcache = d
587 597 if not self._chunkcache:
588 598 self._chunkclear()
589 599 # revnum -> (chain-length, sum-delta-length)
590 600 self._chaininfocache = {}
591 601 # revlog header -> revlog compressor
592 602 self._decompressors = {}
593 603
594 604 @util.propertycache
595 605 def _compressor(self):
596 606 engine = util.compengines[self._compengine]
597 607 return engine.revlogcompressor(self._compengineopts)
598 608
599 609 def _indexfp(self, mode=b'r'):
600 610 """file object for the revlog's index file"""
601 611 args = {'mode': mode}
602 612 if mode != b'r':
603 613 args['checkambig'] = self._checkambig
604 614 if mode == b'w':
605 615 args['atomictemp'] = True
606 616 return self.opener(self.indexfile, **args)
607 617
608 618 def _datafp(self, mode=b'r'):
609 619 """file object for the revlog's data file"""
610 620 return self.opener(self.datafile, mode=mode)
611 621
612 622 @contextlib.contextmanager
613 623 def _datareadfp(self, existingfp=None):
614 624 """file object suitable to read data"""
615 625 # Use explicit file handle, if given.
616 626 if existingfp is not None:
617 627 yield existingfp
618 628
619 629 # Use a file handle being actively used for writes, if available.
620 630 # There is some danger to doing this because reads will seek the
621 631 # file. However, _writeentry() performs a SEEK_END before all writes,
622 632 # so we should be safe.
623 633 elif self._writinghandles:
624 634 if self._inline:
625 635 yield self._writinghandles[0]
626 636 else:
627 637 yield self._writinghandles[1]
628 638
629 639 # Otherwise open a new file handle.
630 640 else:
631 641 if self._inline:
632 642 func = self._indexfp
633 643 else:
634 644 func = self._datafp
635 645 with func() as fp:
636 646 yield fp
637 647
638 648 def tiprev(self):
639 649 return len(self.index) - 1
640 650
641 651 def tip(self):
642 652 return self.node(self.tiprev())
643 653
644 654 def __contains__(self, rev):
645 655 return 0 <= rev < len(self)
646 656
647 657 def __len__(self):
648 658 return len(self.index)
649 659
650 660 def __iter__(self):
651 661 return iter(pycompat.xrange(len(self)))
652 662
653 663 def revs(self, start=0, stop=None):
654 664 """iterate over all rev in this revlog (from start to stop)"""
655 665 return storageutil.iterrevs(len(self), start=start, stop=stop)
656 666
657 667 @property
658 668 def nodemap(self):
659 669 msg = (
660 670 b"revlog.nodemap is deprecated, "
661 671 b"use revlog.index.[has_node|rev|get_rev]"
662 672 )
663 673 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
664 674 return self.index.nodemap
665 675
666 676 @property
667 677 def _nodecache(self):
668 678 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
669 679 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
670 680 return self.index.nodemap
671 681
672 682 def hasnode(self, node):
673 683 try:
674 684 self.rev(node)
675 685 return True
676 686 except KeyError:
677 687 return False
678 688
679 689 def candelta(self, baserev, rev):
680 690 """whether two revisions (baserev, rev) can be delta-ed or not"""
681 691 # Disable delta if either rev requires a content-changing flag
682 692 # processor (ex. LFS). This is because such flag processor can alter
683 693 # the rawtext content that the delta will be based on, and two clients
684 694 # could have a same revlog node with different flags (i.e. different
685 695 # rawtext contents) and the delta could be incompatible.
686 696 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
687 697 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
688 698 ):
689 699 return False
690 700 return True
691 701
692 702 def clearcaches(self):
693 703 self._revisioncache = None
694 704 self._chainbasecache.clear()
695 705 self._chunkcache = (0, b'')
696 706 self._pcache = {}
697 707 self.index.clearcaches()
698 708
699 709 def rev(self, node):
700 710 try:
701 711 return self.index.rev(node)
702 712 except TypeError:
703 713 raise
704 714 except error.RevlogError:
705 715 # parsers.c radix tree lookup failed
706 716 if node == wdirid or node in wdirfilenodeids:
707 717 raise error.WdirUnsupported
708 718 raise error.LookupError(node, self.indexfile, _(b'no node'))
709 719
710 720 # Accessors for index entries.
711 721
712 722 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
713 723 # are flags.
714 724 def start(self, rev):
715 725 return int(self.index[rev][0] >> 16)
716 726
717 727 def flags(self, rev):
718 728 return self.index[rev][0] & 0xFFFF
719 729
720 730 def length(self, rev):
721 731 return self.index[rev][1]
722 732
723 733 def rawsize(self, rev):
724 734 """return the length of the uncompressed text for a given revision"""
725 735 l = self.index[rev][2]
726 736 if l >= 0:
727 737 return l
728 738
729 739 t = self.rawdata(rev)
730 740 return len(t)
731 741
732 742 def size(self, rev):
733 743 """length of non-raw text (processed by a "read" flag processor)"""
734 744 # fast path: if no "read" flag processor could change the content,
735 745 # size is rawsize. note: ELLIPSIS is known to not change the content.
736 746 flags = self.flags(rev)
737 747 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
738 748 return self.rawsize(rev)
739 749
740 750 return len(self.revision(rev, raw=False))
741 751
742 752 def chainbase(self, rev):
743 753 base = self._chainbasecache.get(rev)
744 754 if base is not None:
745 755 return base
746 756
747 757 index = self.index
748 758 iterrev = rev
749 759 base = index[iterrev][3]
750 760 while base != iterrev:
751 761 iterrev = base
752 762 base = index[iterrev][3]
753 763
754 764 self._chainbasecache[rev] = base
755 765 return base
756 766
757 767 def linkrev(self, rev):
758 768 return self.index[rev][4]
759 769
760 770 def parentrevs(self, rev):
761 771 try:
762 772 entry = self.index[rev]
763 773 except IndexError:
764 774 if rev == wdirrev:
765 775 raise error.WdirUnsupported
766 776 raise
767 777
768 778 return entry[5], entry[6]
769 779
770 780 # fast parentrevs(rev) where rev isn't filtered
771 781 _uncheckedparentrevs = parentrevs
772 782
773 783 def node(self, rev):
774 784 try:
775 785 return self.index[rev][7]
776 786 except IndexError:
777 787 if rev == wdirrev:
778 788 raise error.WdirUnsupported
779 789 raise
780 790
781 791 # Derived from index values.
782 792
783 793 def end(self, rev):
784 794 return self.start(rev) + self.length(rev)
785 795
786 796 def parents(self, node):
787 797 i = self.index
788 798 d = i[self.rev(node)]
789 799 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
790 800
791 801 def chainlen(self, rev):
792 802 return self._chaininfo(rev)[0]
793 803
794 804 def _chaininfo(self, rev):
795 805 chaininfocache = self._chaininfocache
796 806 if rev in chaininfocache:
797 807 return chaininfocache[rev]
798 808 index = self.index
799 809 generaldelta = self._generaldelta
800 810 iterrev = rev
801 811 e = index[iterrev]
802 812 clen = 0
803 813 compresseddeltalen = 0
804 814 while iterrev != e[3]:
805 815 clen += 1
806 816 compresseddeltalen += e[1]
807 817 if generaldelta:
808 818 iterrev = e[3]
809 819 else:
810 820 iterrev -= 1
811 821 if iterrev in chaininfocache:
812 822 t = chaininfocache[iterrev]
813 823 clen += t[0]
814 824 compresseddeltalen += t[1]
815 825 break
816 826 e = index[iterrev]
817 827 else:
818 828 # Add text length of base since decompressing that also takes
819 829 # work. For cache hits the length is already included.
820 830 compresseddeltalen += e[1]
821 831 r = (clen, compresseddeltalen)
822 832 chaininfocache[rev] = r
823 833 return r
824 834
825 835 def _deltachain(self, rev, stoprev=None):
826 836 """Obtain the delta chain for a revision.
827 837
828 838 ``stoprev`` specifies a revision to stop at. If not specified, we
829 839 stop at the base of the chain.
830 840
831 841 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
832 842 revs in ascending order and ``stopped`` is a bool indicating whether
833 843 ``stoprev`` was hit.
834 844 """
835 845 # Try C implementation.
836 846 try:
837 847 return self.index.deltachain(rev, stoprev, self._generaldelta)
838 848 except AttributeError:
839 849 pass
840 850
841 851 chain = []
842 852
843 853 # Alias to prevent attribute lookup in tight loop.
844 854 index = self.index
845 855 generaldelta = self._generaldelta
846 856
847 857 iterrev = rev
848 858 e = index[iterrev]
849 859 while iterrev != e[3] and iterrev != stoprev:
850 860 chain.append(iterrev)
851 861 if generaldelta:
852 862 iterrev = e[3]
853 863 else:
854 864 iterrev -= 1
855 865 e = index[iterrev]
856 866
857 867 if iterrev == stoprev:
858 868 stopped = True
859 869 else:
860 870 chain.append(iterrev)
861 871 stopped = False
862 872
863 873 chain.reverse()
864 874 return chain, stopped
865 875
866 876 def ancestors(self, revs, stoprev=0, inclusive=False):
867 877 """Generate the ancestors of 'revs' in reverse revision order.
868 878 Does not generate revs lower than stoprev.
869 879
870 880 See the documentation for ancestor.lazyancestors for more details."""
871 881
872 882 # first, make sure start revisions aren't filtered
873 883 revs = list(revs)
874 884 checkrev = self.node
875 885 for r in revs:
876 886 checkrev(r)
877 887 # and we're sure ancestors aren't filtered as well
878 888
879 889 if rustancestor is not None:
880 890 lazyancestors = rustancestor.LazyAncestors
881 891 arg = self.index
882 892 elif util.safehasattr(parsers, b'rustlazyancestors'):
883 893 lazyancestors = ancestor.rustlazyancestors
884 894 arg = self.index
885 895 else:
886 896 lazyancestors = ancestor.lazyancestors
887 897 arg = self._uncheckedparentrevs
888 898 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
889 899
890 900 def descendants(self, revs):
891 901 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
892 902
893 903 def findcommonmissing(self, common=None, heads=None):
894 904 """Return a tuple of the ancestors of common and the ancestors of heads
895 905 that are not ancestors of common. In revset terminology, we return the
896 906 tuple:
897 907
898 908 ::common, (::heads) - (::common)
899 909
900 910 The list is sorted by revision number, meaning it is
901 911 topologically sorted.
902 912
903 913 'heads' and 'common' are both lists of node IDs. If heads is
904 914 not supplied, uses all of the revlog's heads. If common is not
905 915 supplied, uses nullid."""
906 916 if common is None:
907 917 common = [nullid]
908 918 if heads is None:
909 919 heads = self.heads()
910 920
911 921 common = [self.rev(n) for n in common]
912 922 heads = [self.rev(n) for n in heads]
913 923
914 924 # we want the ancestors, but inclusive
915 925 class lazyset(object):
916 926 def __init__(self, lazyvalues):
917 927 self.addedvalues = set()
918 928 self.lazyvalues = lazyvalues
919 929
920 930 def __contains__(self, value):
921 931 return value in self.addedvalues or value in self.lazyvalues
922 932
923 933 def __iter__(self):
924 934 added = self.addedvalues
925 935 for r in added:
926 936 yield r
927 937 for r in self.lazyvalues:
928 938 if not r in added:
929 939 yield r
930 940
931 941 def add(self, value):
932 942 self.addedvalues.add(value)
933 943
934 944 def update(self, values):
935 945 self.addedvalues.update(values)
936 946
937 947 has = lazyset(self.ancestors(common))
938 948 has.add(nullrev)
939 949 has.update(common)
940 950
941 951 # take all ancestors from heads that aren't in has
942 952 missing = set()
943 953 visit = collections.deque(r for r in heads if r not in has)
944 954 while visit:
945 955 r = visit.popleft()
946 956 if r in missing:
947 957 continue
948 958 else:
949 959 missing.add(r)
950 960 for p in self.parentrevs(r):
951 961 if p not in has:
952 962 visit.append(p)
953 963 missing = list(missing)
954 964 missing.sort()
955 965 return has, [self.node(miss) for miss in missing]
956 966
957 967 def incrementalmissingrevs(self, common=None):
958 968 """Return an object that can be used to incrementally compute the
959 969 revision numbers of the ancestors of arbitrary sets that are not
960 970 ancestors of common. This is an ancestor.incrementalmissingancestors
961 971 object.
962 972
963 973 'common' is a list of revision numbers. If common is not supplied, uses
964 974 nullrev.
965 975 """
966 976 if common is None:
967 977 common = [nullrev]
968 978
969 979 if rustancestor is not None:
970 980 return rustancestor.MissingAncestors(self.index, common)
971 981 return ancestor.incrementalmissingancestors(self.parentrevs, common)
972 982
973 983 def findmissingrevs(self, common=None, heads=None):
974 984 """Return the revision numbers of the ancestors of heads that
975 985 are not ancestors of common.
976 986
977 987 More specifically, return a list of revision numbers corresponding to
978 988 nodes N such that every N satisfies the following constraints:
979 989
980 990 1. N is an ancestor of some node in 'heads'
981 991 2. N is not an ancestor of any node in 'common'
982 992
983 993 The list is sorted by revision number, meaning it is
984 994 topologically sorted.
985 995
986 996 'heads' and 'common' are both lists of revision numbers. If heads is
987 997 not supplied, uses all of the revlog's heads. If common is not
988 998 supplied, uses nullid."""
989 999 if common is None:
990 1000 common = [nullrev]
991 1001 if heads is None:
992 1002 heads = self.headrevs()
993 1003
994 1004 inc = self.incrementalmissingrevs(common=common)
995 1005 return inc.missingancestors(heads)
996 1006
997 1007 def findmissing(self, common=None, heads=None):
998 1008 """Return the ancestors of heads that are not ancestors of common.
999 1009
1000 1010 More specifically, return a list of nodes N such that every N
1001 1011 satisfies the following constraints:
1002 1012
1003 1013 1. N is an ancestor of some node in 'heads'
1004 1014 2. N is not an ancestor of any node in 'common'
1005 1015
1006 1016 The list is sorted by revision number, meaning it is
1007 1017 topologically sorted.
1008 1018
1009 1019 'heads' and 'common' are both lists of node IDs. If heads is
1010 1020 not supplied, uses all of the revlog's heads. If common is not
1011 1021 supplied, uses nullid."""
1012 1022 if common is None:
1013 1023 common = [nullid]
1014 1024 if heads is None:
1015 1025 heads = self.heads()
1016 1026
1017 1027 common = [self.rev(n) for n in common]
1018 1028 heads = [self.rev(n) for n in heads]
1019 1029
1020 1030 inc = self.incrementalmissingrevs(common=common)
1021 1031 return [self.node(r) for r in inc.missingancestors(heads)]
1022 1032
1023 1033 def nodesbetween(self, roots=None, heads=None):
1024 1034 """Return a topological path from 'roots' to 'heads'.
1025 1035
1026 1036 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1027 1037 topologically sorted list of all nodes N that satisfy both of
1028 1038 these constraints:
1029 1039
1030 1040 1. N is a descendant of some node in 'roots'
1031 1041 2. N is an ancestor of some node in 'heads'
1032 1042
1033 1043 Every node is considered to be both a descendant and an ancestor
1034 1044 of itself, so every reachable node in 'roots' and 'heads' will be
1035 1045 included in 'nodes'.
1036 1046
1037 1047 'outroots' is the list of reachable nodes in 'roots', i.e., the
1038 1048 subset of 'roots' that is returned in 'nodes'. Likewise,
1039 1049 'outheads' is the subset of 'heads' that is also in 'nodes'.
1040 1050
1041 1051 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1042 1052 unspecified, uses nullid as the only root. If 'heads' is
1043 1053 unspecified, uses list of all of the revlog's heads."""
1044 1054 nonodes = ([], [], [])
1045 1055 if roots is not None:
1046 1056 roots = list(roots)
1047 1057 if not roots:
1048 1058 return nonodes
1049 1059 lowestrev = min([self.rev(n) for n in roots])
1050 1060 else:
1051 1061 roots = [nullid] # Everybody's a descendant of nullid
1052 1062 lowestrev = nullrev
1053 1063 if (lowestrev == nullrev) and (heads is None):
1054 1064 # We want _all_ the nodes!
1055 1065 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1056 1066 if heads is None:
1057 1067 # All nodes are ancestors, so the latest ancestor is the last
1058 1068 # node.
1059 1069 highestrev = len(self) - 1
1060 1070 # Set ancestors to None to signal that every node is an ancestor.
1061 1071 ancestors = None
1062 1072 # Set heads to an empty dictionary for later discovery of heads
1063 1073 heads = {}
1064 1074 else:
1065 1075 heads = list(heads)
1066 1076 if not heads:
1067 1077 return nonodes
1068 1078 ancestors = set()
1069 1079 # Turn heads into a dictionary so we can remove 'fake' heads.
1070 1080 # Also, later we will be using it to filter out the heads we can't
1071 1081 # find from roots.
1072 1082 heads = dict.fromkeys(heads, False)
1073 1083 # Start at the top and keep marking parents until we're done.
1074 1084 nodestotag = set(heads)
1075 1085 # Remember where the top was so we can use it as a limit later.
1076 1086 highestrev = max([self.rev(n) for n in nodestotag])
1077 1087 while nodestotag:
1078 1088 # grab a node to tag
1079 1089 n = nodestotag.pop()
1080 1090 # Never tag nullid
1081 1091 if n == nullid:
1082 1092 continue
1083 1093 # A node's revision number represents its place in a
1084 1094 # topologically sorted list of nodes.
1085 1095 r = self.rev(n)
1086 1096 if r >= lowestrev:
1087 1097 if n not in ancestors:
1088 1098 # If we are possibly a descendant of one of the roots
1089 1099 # and we haven't already been marked as an ancestor
1090 1100 ancestors.add(n) # Mark as ancestor
1091 1101 # Add non-nullid parents to list of nodes to tag.
1092 1102 nodestotag.update(
1093 1103 [p for p in self.parents(n) if p != nullid]
1094 1104 )
1095 1105 elif n in heads: # We've seen it before, is it a fake head?
1096 1106 # So it is, real heads should not be the ancestors of
1097 1107 # any other heads.
1098 1108 heads.pop(n)
1099 1109 if not ancestors:
1100 1110 return nonodes
1101 1111 # Now that we have our set of ancestors, we want to remove any
1102 1112 # roots that are not ancestors.
1103 1113
1104 1114 # If one of the roots was nullid, everything is included anyway.
1105 1115 if lowestrev > nullrev:
1106 1116 # But, since we weren't, let's recompute the lowest rev to not
1107 1117 # include roots that aren't ancestors.
1108 1118
1109 1119 # Filter out roots that aren't ancestors of heads
1110 1120 roots = [root for root in roots if root in ancestors]
1111 1121 # Recompute the lowest revision
1112 1122 if roots:
1113 1123 lowestrev = min([self.rev(root) for root in roots])
1114 1124 else:
1115 1125 # No more roots? Return empty list
1116 1126 return nonodes
1117 1127 else:
1118 1128 # We are descending from nullid, and don't need to care about
1119 1129 # any other roots.
1120 1130 lowestrev = nullrev
1121 1131 roots = [nullid]
1122 1132 # Transform our roots list into a set.
1123 1133 descendants = set(roots)
1124 1134 # Also, keep the original roots so we can filter out roots that aren't
1125 1135 # 'real' roots (i.e. are descended from other roots).
1126 1136 roots = descendants.copy()
1127 1137 # Our topologically sorted list of output nodes.
1128 1138 orderedout = []
1129 1139 # Don't start at nullid since we don't want nullid in our output list,
1130 1140 # and if nullid shows up in descendants, empty parents will look like
1131 1141 # they're descendants.
1132 1142 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1133 1143 n = self.node(r)
1134 1144 isdescendant = False
1135 1145 if lowestrev == nullrev: # Everybody is a descendant of nullid
1136 1146 isdescendant = True
1137 1147 elif n in descendants:
1138 1148 # n is already a descendant
1139 1149 isdescendant = True
1140 1150 # This check only needs to be done here because all the roots
1141 1151 # will start being marked is descendants before the loop.
1142 1152 if n in roots:
1143 1153 # If n was a root, check if it's a 'real' root.
1144 1154 p = tuple(self.parents(n))
1145 1155 # If any of its parents are descendants, it's not a root.
1146 1156 if (p[0] in descendants) or (p[1] in descendants):
1147 1157 roots.remove(n)
1148 1158 else:
1149 1159 p = tuple(self.parents(n))
1150 1160 # A node is a descendant if either of its parents are
1151 1161 # descendants. (We seeded the dependents list with the roots
1152 1162 # up there, remember?)
1153 1163 if (p[0] in descendants) or (p[1] in descendants):
1154 1164 descendants.add(n)
1155 1165 isdescendant = True
1156 1166 if isdescendant and ((ancestors is None) or (n in ancestors)):
1157 1167 # Only include nodes that are both descendants and ancestors.
1158 1168 orderedout.append(n)
1159 1169 if (ancestors is not None) and (n in heads):
1160 1170 # We're trying to figure out which heads are reachable
1161 1171 # from roots.
1162 1172 # Mark this head as having been reached
1163 1173 heads[n] = True
1164 1174 elif ancestors is None:
1165 1175 # Otherwise, we're trying to discover the heads.
1166 1176 # Assume this is a head because if it isn't, the next step
1167 1177 # will eventually remove it.
1168 1178 heads[n] = True
1169 1179 # But, obviously its parents aren't.
1170 1180 for p in self.parents(n):
1171 1181 heads.pop(p, None)
1172 1182 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1173 1183 roots = list(roots)
1174 1184 assert orderedout
1175 1185 assert roots
1176 1186 assert heads
1177 1187 return (orderedout, roots, heads)
1178 1188
1179 1189 def headrevs(self, revs=None):
1180 1190 if revs is None:
1181 1191 try:
1182 1192 return self.index.headrevs()
1183 1193 except AttributeError:
1184 1194 return self._headrevs()
1185 1195 if rustdagop is not None:
1186 1196 return rustdagop.headrevs(self.index, revs)
1187 1197 return dagop.headrevs(revs, self._uncheckedparentrevs)
1188 1198
1189 1199 def computephases(self, roots):
1190 1200 return self.index.computephasesmapsets(roots)
1191 1201
1192 1202 def _headrevs(self):
1193 1203 count = len(self)
1194 1204 if not count:
1195 1205 return [nullrev]
1196 1206 # we won't iter over filtered rev so nobody is a head at start
1197 1207 ishead = [0] * (count + 1)
1198 1208 index = self.index
1199 1209 for r in self:
1200 1210 ishead[r] = 1 # I may be an head
1201 1211 e = index[r]
1202 1212 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1203 1213 return [r for r, val in enumerate(ishead) if val]
1204 1214
1205 1215 def heads(self, start=None, stop=None):
1206 1216 """return the list of all nodes that have no children
1207 1217
1208 1218 if start is specified, only heads that are descendants of
1209 1219 start will be returned
1210 1220 if stop is specified, it will consider all the revs from stop
1211 1221 as if they had no children
1212 1222 """
1213 1223 if start is None and stop is None:
1214 1224 if not len(self):
1215 1225 return [nullid]
1216 1226 return [self.node(r) for r in self.headrevs()]
1217 1227
1218 1228 if start is None:
1219 1229 start = nullrev
1220 1230 else:
1221 1231 start = self.rev(start)
1222 1232
1223 1233 stoprevs = set(self.rev(n) for n in stop or [])
1224 1234
1225 1235 revs = dagop.headrevssubset(
1226 1236 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1227 1237 )
1228 1238
1229 1239 return [self.node(rev) for rev in revs]
1230 1240
1231 1241 def children(self, node):
1232 1242 """find the children of a given node"""
1233 1243 c = []
1234 1244 p = self.rev(node)
1235 1245 for r in self.revs(start=p + 1):
1236 1246 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1237 1247 if prevs:
1238 1248 for pr in prevs:
1239 1249 if pr == p:
1240 1250 c.append(self.node(r))
1241 1251 elif p == nullrev:
1242 1252 c.append(self.node(r))
1243 1253 return c
1244 1254
1245 1255 def commonancestorsheads(self, a, b):
1246 1256 """calculate all the heads of the common ancestors of nodes a and b"""
1247 1257 a, b = self.rev(a), self.rev(b)
1248 1258 ancs = self._commonancestorsheads(a, b)
1249 1259 return pycompat.maplist(self.node, ancs)
1250 1260
1251 1261 def _commonancestorsheads(self, *revs):
1252 1262 """calculate all the heads of the common ancestors of revs"""
1253 1263 try:
1254 1264 ancs = self.index.commonancestorsheads(*revs)
1255 1265 except (AttributeError, OverflowError): # C implementation failed
1256 1266 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1257 1267 return ancs
1258 1268
1259 1269 def isancestor(self, a, b):
1260 1270 """return True if node a is an ancestor of node b
1261 1271
1262 1272 A revision is considered an ancestor of itself."""
1263 1273 a, b = self.rev(a), self.rev(b)
1264 1274 return self.isancestorrev(a, b)
1265 1275
1266 1276 def isancestorrev(self, a, b):
1267 1277 """return True if revision a is an ancestor of revision b
1268 1278
1269 1279 A revision is considered an ancestor of itself.
1270 1280
1271 1281 The implementation of this is trivial but the use of
1272 1282 reachableroots is not."""
1273 1283 if a == nullrev:
1274 1284 return True
1275 1285 elif a == b:
1276 1286 return True
1277 1287 elif a > b:
1278 1288 return False
1279 1289 return bool(self.reachableroots(a, [b], [a], includepath=False))
1280 1290
1281 1291 def reachableroots(self, minroot, heads, roots, includepath=False):
1282 1292 """return (heads(::(<roots> and <roots>::<heads>)))
1283 1293
1284 1294 If includepath is True, return (<roots>::<heads>)."""
1285 1295 try:
1286 1296 return self.index.reachableroots2(
1287 1297 minroot, heads, roots, includepath
1288 1298 )
1289 1299 except AttributeError:
1290 1300 return dagop._reachablerootspure(
1291 1301 self.parentrevs, minroot, roots, heads, includepath
1292 1302 )
1293 1303
1294 1304 def ancestor(self, a, b):
1295 1305 """calculate the "best" common ancestor of nodes a and b"""
1296 1306
1297 1307 a, b = self.rev(a), self.rev(b)
1298 1308 try:
1299 1309 ancs = self.index.ancestors(a, b)
1300 1310 except (AttributeError, OverflowError):
1301 1311 ancs = ancestor.ancestors(self.parentrevs, a, b)
1302 1312 if ancs:
1303 1313 # choose a consistent winner when there's a tie
1304 1314 return min(map(self.node, ancs))
1305 1315 return nullid
1306 1316
1307 1317 def _match(self, id):
1308 1318 if isinstance(id, int):
1309 1319 # rev
1310 1320 return self.node(id)
1311 1321 if len(id) == 20:
1312 1322 # possibly a binary node
1313 1323 # odds of a binary node being all hex in ASCII are 1 in 10**25
1314 1324 try:
1315 1325 node = id
1316 1326 self.rev(node) # quick search the index
1317 1327 return node
1318 1328 except error.LookupError:
1319 1329 pass # may be partial hex id
1320 1330 try:
1321 1331 # str(rev)
1322 1332 rev = int(id)
1323 1333 if b"%d" % rev != id:
1324 1334 raise ValueError
1325 1335 if rev < 0:
1326 1336 rev = len(self) + rev
1327 1337 if rev < 0 or rev >= len(self):
1328 1338 raise ValueError
1329 1339 return self.node(rev)
1330 1340 except (ValueError, OverflowError):
1331 1341 pass
1332 1342 if len(id) == 40:
1333 1343 try:
1334 1344 # a full hex nodeid?
1335 1345 node = bin(id)
1336 1346 self.rev(node)
1337 1347 return node
1338 1348 except (TypeError, error.LookupError):
1339 1349 pass
1340 1350
1341 1351 def _partialmatch(self, id):
1342 1352 # we don't care wdirfilenodeids as they should be always full hash
1343 1353 maybewdir = wdirhex.startswith(id)
1344 1354 try:
1345 1355 partial = self.index.partialmatch(id)
1346 1356 if partial and self.hasnode(partial):
1347 1357 if maybewdir:
1348 1358 # single 'ff...' match in radix tree, ambiguous with wdir
1349 1359 raise error.RevlogError
1350 1360 return partial
1351 1361 if maybewdir:
1352 1362 # no 'ff...' match in radix tree, wdir identified
1353 1363 raise error.WdirUnsupported
1354 1364 return None
1355 1365 except error.RevlogError:
1356 1366 # parsers.c radix tree lookup gave multiple matches
1357 1367 # fast path: for unfiltered changelog, radix tree is accurate
1358 1368 if not getattr(self, 'filteredrevs', None):
1359 1369 raise error.AmbiguousPrefixLookupError(
1360 1370 id, self.indexfile, _(b'ambiguous identifier')
1361 1371 )
1362 1372 # fall through to slow path that filters hidden revisions
1363 1373 except (AttributeError, ValueError):
1364 1374 # we are pure python, or key was too short to search radix tree
1365 1375 pass
1366 1376
1367 1377 if id in self._pcache:
1368 1378 return self._pcache[id]
1369 1379
1370 1380 if len(id) <= 40:
1371 1381 try:
1372 1382 # hex(node)[:...]
1373 1383 l = len(id) // 2 # grab an even number of digits
1374 1384 prefix = bin(id[: l * 2])
1375 1385 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1376 1386 nl = [
1377 1387 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1378 1388 ]
1379 1389 if nullhex.startswith(id):
1380 1390 nl.append(nullid)
1381 1391 if len(nl) > 0:
1382 1392 if len(nl) == 1 and not maybewdir:
1383 1393 self._pcache[id] = nl[0]
1384 1394 return nl[0]
1385 1395 raise error.AmbiguousPrefixLookupError(
1386 1396 id, self.indexfile, _(b'ambiguous identifier')
1387 1397 )
1388 1398 if maybewdir:
1389 1399 raise error.WdirUnsupported
1390 1400 return None
1391 1401 except TypeError:
1392 1402 pass
1393 1403
1394 1404 def lookup(self, id):
1395 1405 """locate a node based on:
1396 1406 - revision number or str(revision number)
1397 1407 - nodeid or subset of hex nodeid
1398 1408 """
1399 1409 n = self._match(id)
1400 1410 if n is not None:
1401 1411 return n
1402 1412 n = self._partialmatch(id)
1403 1413 if n:
1404 1414 return n
1405 1415
1406 1416 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1407 1417
1408 1418 def shortest(self, node, minlength=1):
1409 1419 """Find the shortest unambiguous prefix that matches node."""
1410 1420
1411 1421 def isvalid(prefix):
1412 1422 try:
1413 1423 matchednode = self._partialmatch(prefix)
1414 1424 except error.AmbiguousPrefixLookupError:
1415 1425 return False
1416 1426 except error.WdirUnsupported:
1417 1427 # single 'ff...' match
1418 1428 return True
1419 1429 if matchednode is None:
1420 1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1421 1431 return True
1422 1432
1423 1433 def maybewdir(prefix):
1424 1434 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1425 1435
1426 1436 hexnode = hex(node)
1427 1437
1428 1438 def disambiguate(hexnode, minlength):
1429 1439 """Disambiguate against wdirid."""
1430 1440 for length in range(minlength, 41):
1431 1441 prefix = hexnode[:length]
1432 1442 if not maybewdir(prefix):
1433 1443 return prefix
1434 1444
1435 1445 if not getattr(self, 'filteredrevs', None):
1436 1446 try:
1437 1447 length = max(self.index.shortest(node), minlength)
1438 1448 return disambiguate(hexnode, length)
1439 1449 except error.RevlogError:
1440 1450 if node != wdirid:
1441 1451 raise error.LookupError(node, self.indexfile, _(b'no node'))
1442 1452 except AttributeError:
1443 1453 # Fall through to pure code
1444 1454 pass
1445 1455
1446 1456 if node == wdirid:
1447 1457 for length in range(minlength, 41):
1448 1458 prefix = hexnode[:length]
1449 1459 if isvalid(prefix):
1450 1460 return prefix
1451 1461
1452 1462 for length in range(minlength, 41):
1453 1463 prefix = hexnode[:length]
1454 1464 if isvalid(prefix):
1455 1465 return disambiguate(hexnode, length)
1456 1466
1457 1467 def cmp(self, node, text):
1458 1468 """compare text with a given file revision
1459 1469
1460 1470 returns True if text is different than what is stored.
1461 1471 """
1462 1472 p1, p2 = self.parents(node)
1463 1473 return storageutil.hashrevisionsha1(text, p1, p2) != node
1464 1474
1465 1475 def _cachesegment(self, offset, data):
1466 1476 """Add a segment to the revlog cache.
1467 1477
1468 1478 Accepts an absolute offset and the data that is at that location.
1469 1479 """
1470 1480 o, d = self._chunkcache
1471 1481 # try to add to existing cache
1472 1482 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1473 1483 self._chunkcache = o, d + data
1474 1484 else:
1475 1485 self._chunkcache = offset, data
1476 1486
1477 1487 def _readsegment(self, offset, length, df=None):
1478 1488 """Load a segment of raw data from the revlog.
1479 1489
1480 1490 Accepts an absolute offset, length to read, and an optional existing
1481 1491 file handle to read from.
1482 1492
1483 1493 If an existing file handle is passed, it will be seeked and the
1484 1494 original seek position will NOT be restored.
1485 1495
1486 1496 Returns a str or buffer of raw byte data.
1487 1497
1488 1498 Raises if the requested number of bytes could not be read.
1489 1499 """
1490 1500 # Cache data both forward and backward around the requested
1491 1501 # data, in a fixed size window. This helps speed up operations
1492 1502 # involving reading the revlog backwards.
1493 1503 cachesize = self._chunkcachesize
1494 1504 realoffset = offset & ~(cachesize - 1)
1495 1505 reallength = (
1496 1506 (offset + length + cachesize) & ~(cachesize - 1)
1497 1507 ) - realoffset
1498 1508 with self._datareadfp(df) as df:
1499 1509 df.seek(realoffset)
1500 1510 d = df.read(reallength)
1501 1511
1502 1512 self._cachesegment(realoffset, d)
1503 1513 if offset != realoffset or reallength != length:
1504 1514 startoffset = offset - realoffset
1505 1515 if len(d) - startoffset < length:
1506 1516 raise error.RevlogError(
1507 1517 _(
1508 1518 b'partial read of revlog %s; expected %d bytes from '
1509 1519 b'offset %d, got %d'
1510 1520 )
1511 1521 % (
1512 1522 self.indexfile if self._inline else self.datafile,
1513 1523 length,
1514 1524 realoffset,
1515 1525 len(d) - startoffset,
1516 1526 )
1517 1527 )
1518 1528
1519 1529 return util.buffer(d, startoffset, length)
1520 1530
1521 1531 if len(d) < length:
1522 1532 raise error.RevlogError(
1523 1533 _(
1524 1534 b'partial read of revlog %s; expected %d bytes from offset '
1525 1535 b'%d, got %d'
1526 1536 )
1527 1537 % (
1528 1538 self.indexfile if self._inline else self.datafile,
1529 1539 length,
1530 1540 offset,
1531 1541 len(d),
1532 1542 )
1533 1543 )
1534 1544
1535 1545 return d
1536 1546
1537 1547 def _getsegment(self, offset, length, df=None):
1538 1548 """Obtain a segment of raw data from the revlog.
1539 1549
1540 1550 Accepts an absolute offset, length of bytes to obtain, and an
1541 1551 optional file handle to the already-opened revlog. If the file
1542 1552 handle is used, it's original seek position will not be preserved.
1543 1553
1544 1554 Requests for data may be returned from a cache.
1545 1555
1546 1556 Returns a str or a buffer instance of raw byte data.
1547 1557 """
1548 1558 o, d = self._chunkcache
1549 1559 l = len(d)
1550 1560
1551 1561 # is it in the cache?
1552 1562 cachestart = offset - o
1553 1563 cacheend = cachestart + length
1554 1564 if cachestart >= 0 and cacheend <= l:
1555 1565 if cachestart == 0 and cacheend == l:
1556 1566 return d # avoid a copy
1557 1567 return util.buffer(d, cachestart, cacheend - cachestart)
1558 1568
1559 1569 return self._readsegment(offset, length, df=df)
1560 1570
1561 1571 def _getsegmentforrevs(self, startrev, endrev, df=None):
1562 1572 """Obtain a segment of raw data corresponding to a range of revisions.
1563 1573
1564 1574 Accepts the start and end revisions and an optional already-open
1565 1575 file handle to be used for reading. If the file handle is read, its
1566 1576 seek position will not be preserved.
1567 1577
1568 1578 Requests for data may be satisfied by a cache.
1569 1579
1570 1580 Returns a 2-tuple of (offset, data) for the requested range of
1571 1581 revisions. Offset is the integer offset from the beginning of the
1572 1582 revlog and data is a str or buffer of the raw byte data.
1573 1583
1574 1584 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1575 1585 to determine where each revision's data begins and ends.
1576 1586 """
1577 1587 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1578 1588 # (functions are expensive).
1579 1589 index = self.index
1580 1590 istart = index[startrev]
1581 1591 start = int(istart[0] >> 16)
1582 1592 if startrev == endrev:
1583 1593 end = start + istart[1]
1584 1594 else:
1585 1595 iend = index[endrev]
1586 1596 end = int(iend[0] >> 16) + iend[1]
1587 1597
1588 1598 if self._inline:
1589 1599 start += (startrev + 1) * self._io.size
1590 1600 end += (endrev + 1) * self._io.size
1591 1601 length = end - start
1592 1602
1593 1603 return start, self._getsegment(start, length, df=df)
1594 1604
1595 1605 def _chunk(self, rev, df=None):
1596 1606 """Obtain a single decompressed chunk for a revision.
1597 1607
1598 1608 Accepts an integer revision and an optional already-open file handle
1599 1609 to be used for reading. If used, the seek position of the file will not
1600 1610 be preserved.
1601 1611
1602 1612 Returns a str holding uncompressed data for the requested revision.
1603 1613 """
1604 1614 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1605 1615
1606 1616 def _chunks(self, revs, df=None, targetsize=None):
1607 1617 """Obtain decompressed chunks for the specified revisions.
1608 1618
1609 1619 Accepts an iterable of numeric revisions that are assumed to be in
1610 1620 ascending order. Also accepts an optional already-open file handle
1611 1621 to be used for reading. If used, the seek position of the file will
1612 1622 not be preserved.
1613 1623
1614 1624 This function is similar to calling ``self._chunk()`` multiple times,
1615 1625 but is faster.
1616 1626
1617 1627 Returns a list with decompressed data for each requested revision.
1618 1628 """
1619 1629 if not revs:
1620 1630 return []
1621 1631 start = self.start
1622 1632 length = self.length
1623 1633 inline = self._inline
1624 1634 iosize = self._io.size
1625 1635 buffer = util.buffer
1626 1636
1627 1637 l = []
1628 1638 ladd = l.append
1629 1639
1630 1640 if not self._withsparseread:
1631 1641 slicedchunks = (revs,)
1632 1642 else:
1633 1643 slicedchunks = deltautil.slicechunk(
1634 1644 self, revs, targetsize=targetsize
1635 1645 )
1636 1646
1637 1647 for revschunk in slicedchunks:
1638 1648 firstrev = revschunk[0]
1639 1649 # Skip trailing revisions with empty diff
1640 1650 for lastrev in revschunk[::-1]:
1641 1651 if length(lastrev) != 0:
1642 1652 break
1643 1653
1644 1654 try:
1645 1655 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1646 1656 except OverflowError:
1647 1657 # issue4215 - we can't cache a run of chunks greater than
1648 1658 # 2G on Windows
1649 1659 return [self._chunk(rev, df=df) for rev in revschunk]
1650 1660
1651 1661 decomp = self.decompress
1652 1662 for rev in revschunk:
1653 1663 chunkstart = start(rev)
1654 1664 if inline:
1655 1665 chunkstart += (rev + 1) * iosize
1656 1666 chunklength = length(rev)
1657 1667 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1658 1668
1659 1669 return l
1660 1670
1661 1671 def _chunkclear(self):
1662 1672 """Clear the raw chunk cache."""
1663 1673 self._chunkcache = (0, b'')
1664 1674
1665 1675 def deltaparent(self, rev):
1666 1676 """return deltaparent of the given revision"""
1667 1677 base = self.index[rev][3]
1668 1678 if base == rev:
1669 1679 return nullrev
1670 1680 elif self._generaldelta:
1671 1681 return base
1672 1682 else:
1673 1683 return rev - 1
1674 1684
1675 1685 def issnapshot(self, rev):
1676 1686 """tells whether rev is a snapshot
1677 1687 """
1678 1688 if not self._sparserevlog:
1679 1689 return self.deltaparent(rev) == nullrev
1680 1690 elif util.safehasattr(self.index, b'issnapshot'):
1681 1691 # directly assign the method to cache the testing and access
1682 1692 self.issnapshot = self.index.issnapshot
1683 1693 return self.issnapshot(rev)
1684 1694 if rev == nullrev:
1685 1695 return True
1686 1696 entry = self.index[rev]
1687 1697 base = entry[3]
1688 1698 if base == rev:
1689 1699 return True
1690 1700 if base == nullrev:
1691 1701 return True
1692 1702 p1 = entry[5]
1693 1703 p2 = entry[6]
1694 1704 if base == p1 or base == p2:
1695 1705 return False
1696 1706 return self.issnapshot(base)
1697 1707
1698 1708 def snapshotdepth(self, rev):
1699 1709 """number of snapshot in the chain before this one"""
1700 1710 if not self.issnapshot(rev):
1701 1711 raise error.ProgrammingError(b'revision %d not a snapshot')
1702 1712 return len(self._deltachain(rev)[0]) - 1
1703 1713
1704 1714 def revdiff(self, rev1, rev2):
1705 1715 """return or calculate a delta between two revisions
1706 1716
1707 1717 The delta calculated is in binary form and is intended to be written to
1708 1718 revlog data directly. So this function needs raw revision data.
1709 1719 """
1710 1720 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1711 1721 return bytes(self._chunk(rev2))
1712 1722
1713 1723 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1714 1724
1715 1725 def _processflags(self, text, flags, operation, raw=False):
1716 1726 """deprecated entry point to access flag processors"""
1717 1727 msg = b'_processflag(...) use the specialized variant'
1718 1728 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1719 1729 if raw:
1720 1730 return text, flagutil.processflagsraw(self, text, flags)
1721 1731 elif operation == b'read':
1722 1732 return flagutil.processflagsread(self, text, flags)
1723 1733 else: # write operation
1724 1734 return flagutil.processflagswrite(self, text, flags)
1725 1735
1726 1736 def revision(self, nodeorrev, _df=None, raw=False):
1727 1737 """return an uncompressed revision of a given node or revision
1728 1738 number.
1729 1739
1730 1740 _df - an existing file handle to read from. (internal-only)
1731 1741 raw - an optional argument specifying if the revision data is to be
1732 1742 treated as raw data when applying flag transforms. 'raw' should be set
1733 1743 to True when generating changegroups or in debug commands.
1734 1744 """
1735 1745 if raw:
1736 1746 msg = (
1737 1747 b'revlog.revision(..., raw=True) is deprecated, '
1738 1748 b'use revlog.rawdata(...)'
1739 1749 )
1740 1750 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1741 1751 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1742 1752
1743 1753 def sidedata(self, nodeorrev, _df=None):
1744 1754 """a map of extra data related to the changeset but not part of the hash
1745 1755
1746 1756 This function currently return a dictionary. However, more advanced
1747 1757 mapping object will likely be used in the future for a more
1748 1758 efficient/lazy code.
1749 1759 """
1750 1760 return self._revisiondata(nodeorrev, _df)[1]
1751 1761
1752 1762 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1753 1763 # deal with <nodeorrev> argument type
1754 1764 if isinstance(nodeorrev, int):
1755 1765 rev = nodeorrev
1756 1766 node = self.node(rev)
1757 1767 else:
1758 1768 node = nodeorrev
1759 1769 rev = None
1760 1770
1761 1771 # fast path the special `nullid` rev
1762 1772 if node == nullid:
1763 1773 return b"", {}
1764 1774
1765 1775 # The text as stored inside the revlog. Might be the revision or might
1766 1776 # need to be processed to retrieve the revision.
1767 1777 rawtext = None
1768 1778
1769 1779 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1770 1780
1771 1781 if raw and validated:
1772 1782 # if we don't want to process the raw text and that raw
1773 1783 # text is cached, we can exit early.
1774 1784 return rawtext, {}
1775 1785 if rev is None:
1776 1786 rev = self.rev(node)
1777 1787 # the revlog's flag for this revision
1778 1788 # (usually alter its state or content)
1779 1789 flags = self.flags(rev)
1780 1790
1781 1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1782 1792 # no extra flags set, no flag processor runs, text = rawtext
1783 1793 return rawtext, {}
1784 1794
1785 1795 sidedata = {}
1786 1796 if raw:
1787 1797 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1788 1798 text = rawtext
1789 1799 else:
1790 1800 try:
1791 1801 r = flagutil.processflagsread(self, rawtext, flags)
1792 1802 except error.SidedataHashError as exc:
1793 1803 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1794 1804 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1795 1805 raise error.RevlogError(msg)
1796 1806 text, validatehash, sidedata = r
1797 1807 if validatehash:
1798 1808 self.checkhash(text, node, rev=rev)
1799 1809 if not validated:
1800 1810 self._revisioncache = (node, rev, rawtext)
1801 1811
1802 1812 return text, sidedata
1803 1813
1804 1814 def _rawtext(self, node, rev, _df=None):
1805 1815 """return the possibly unvalidated rawtext for a revision
1806 1816
1807 1817 returns (rev, rawtext, validated)
1808 1818 """
1809 1819
1810 1820 # revision in the cache (could be useful to apply delta)
1811 1821 cachedrev = None
1812 1822 # An intermediate text to apply deltas to
1813 1823 basetext = None
1814 1824
1815 1825 # Check if we have the entry in cache
1816 1826 # The cache entry looks like (node, rev, rawtext)
1817 1827 if self._revisioncache:
1818 1828 if self._revisioncache[0] == node:
1819 1829 return (rev, self._revisioncache[2], True)
1820 1830 cachedrev = self._revisioncache[1]
1821 1831
1822 1832 if rev is None:
1823 1833 rev = self.rev(node)
1824 1834
1825 1835 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1826 1836 if stopped:
1827 1837 basetext = self._revisioncache[2]
1828 1838
1829 1839 # drop cache to save memory, the caller is expected to
1830 1840 # update self._revisioncache after validating the text
1831 1841 self._revisioncache = None
1832 1842
1833 1843 targetsize = None
1834 1844 rawsize = self.index[rev][2]
1835 1845 if 0 <= rawsize:
1836 1846 targetsize = 4 * rawsize
1837 1847
1838 1848 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1839 1849 if basetext is None:
1840 1850 basetext = bytes(bins[0])
1841 1851 bins = bins[1:]
1842 1852
1843 1853 rawtext = mdiff.patches(basetext, bins)
1844 1854 del basetext # let us have a chance to free memory early
1845 1855 return (rev, rawtext, False)
1846 1856
1847 1857 def rawdata(self, nodeorrev, _df=None):
1848 1858 """return an uncompressed raw data of a given node or revision number.
1849 1859
1850 1860 _df - an existing file handle to read from. (internal-only)
1851 1861 """
1852 1862 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1853 1863
1854 1864 def hash(self, text, p1, p2):
1855 1865 """Compute a node hash.
1856 1866
1857 1867 Available as a function so that subclasses can replace the hash
1858 1868 as needed.
1859 1869 """
1860 1870 return storageutil.hashrevisionsha1(text, p1, p2)
1861 1871
1862 1872 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1863 1873 """Check node hash integrity.
1864 1874
1865 1875 Available as a function so that subclasses can extend hash mismatch
1866 1876 behaviors as needed.
1867 1877 """
1868 1878 try:
1869 1879 if p1 is None and p2 is None:
1870 1880 p1, p2 = self.parents(node)
1871 1881 if node != self.hash(text, p1, p2):
1872 1882 # Clear the revision cache on hash failure. The revision cache
1873 1883 # only stores the raw revision and clearing the cache does have
1874 1884 # the side-effect that we won't have a cache hit when the raw
1875 1885 # revision data is accessed. But this case should be rare and
1876 1886 # it is extra work to teach the cache about the hash
1877 1887 # verification state.
1878 1888 if self._revisioncache and self._revisioncache[0] == node:
1879 1889 self._revisioncache = None
1880 1890
1881 1891 revornode = rev
1882 1892 if revornode is None:
1883 1893 revornode = templatefilters.short(hex(node))
1884 1894 raise error.RevlogError(
1885 1895 _(b"integrity check failed on %s:%s")
1886 1896 % (self.indexfile, pycompat.bytestr(revornode))
1887 1897 )
1888 1898 except error.RevlogError:
1889 1899 if self._censorable and storageutil.iscensoredtext(text):
1890 1900 raise error.CensoredNodeError(self.indexfile, node, text)
1891 1901 raise
1892 1902
1893 1903 def _enforceinlinesize(self, tr, fp=None):
1894 1904 """Check if the revlog is too big for inline and convert if so.
1895 1905
1896 1906 This should be called after revisions are added to the revlog. If the
1897 1907 revlog has grown too large to be an inline revlog, it will convert it
1898 1908 to use multiple index and data files.
1899 1909 """
1900 1910 tiprev = len(self) - 1
1901 1911 if (
1902 1912 not self._inline
1903 1913 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1904 1914 ):
1905 1915 return
1906 1916
1907 1917 trinfo = tr.find(self.indexfile)
1908 1918 if trinfo is None:
1909 1919 raise error.RevlogError(
1910 1920 _(b"%s not found in the transaction") % self.indexfile
1911 1921 )
1912 1922
1913 1923 trindex = trinfo[2]
1914 1924 if trindex is not None:
1915 1925 dataoff = self.start(trindex)
1916 1926 else:
1917 1927 # revlog was stripped at start of transaction, use all leftover data
1918 1928 trindex = len(self) - 1
1919 1929 dataoff = self.end(tiprev)
1920 1930
1921 1931 tr.add(self.datafile, dataoff)
1922 1932
1923 1933 if fp:
1924 1934 fp.flush()
1925 1935 fp.close()
1926 1936 # We can't use the cached file handle after close(). So prevent
1927 1937 # its usage.
1928 1938 self._writinghandles = None
1929 1939
1930 1940 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1931 1941 for r in self:
1932 1942 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1933 1943
1934 1944 with self._indexfp(b'w') as fp:
1935 1945 self.version &= ~FLAG_INLINE_DATA
1936 1946 self._inline = False
1937 1947 io = self._io
1938 1948 for i in self:
1939 1949 e = io.packentry(self.index[i], self.node, self.version, i)
1940 1950 fp.write(e)
1941 1951
1942 1952 # the temp file replace the real index when we exit the context
1943 1953 # manager
1944 1954
1945 1955 tr.replace(self.indexfile, trindex * self._io.size)
1946 1956 self._chunkclear()
1947 1957
1948 1958 def _nodeduplicatecallback(self, transaction, node):
1949 1959 """called when trying to add a node already stored.
1950 1960 """
1951 1961
1952 1962 def addrevision(
1953 1963 self,
1954 1964 text,
1955 1965 transaction,
1956 1966 link,
1957 1967 p1,
1958 1968 p2,
1959 1969 cachedelta=None,
1960 1970 node=None,
1961 1971 flags=REVIDX_DEFAULT_FLAGS,
1962 1972 deltacomputer=None,
1963 1973 sidedata=None,
1964 1974 ):
1965 1975 """add a revision to the log
1966 1976
1967 1977 text - the revision data to add
1968 1978 transaction - the transaction object used for rollback
1969 1979 link - the linkrev data to add
1970 1980 p1, p2 - the parent nodeids of the revision
1971 1981 cachedelta - an optional precomputed delta
1972 1982 node - nodeid of revision; typically node is not specified, and it is
1973 1983 computed by default as hash(text, p1, p2), however subclasses might
1974 1984 use different hashing method (and override checkhash() in such case)
1975 1985 flags - the known flags to set on the revision
1976 1986 deltacomputer - an optional deltacomputer instance shared between
1977 1987 multiple calls
1978 1988 """
1979 1989 if link == nullrev:
1980 1990 raise error.RevlogError(
1981 1991 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1982 1992 )
1983 1993
1984 1994 if sidedata is None:
1985 1995 sidedata = {}
1986 1996 flags = flags & ~REVIDX_SIDEDATA
1987 1997 elif not self.hassidedata:
1988 1998 raise error.ProgrammingError(
1989 1999 _(b"trying to add sidedata to a revlog who don't support them")
1990 2000 )
1991 2001 else:
1992 2002 flags |= REVIDX_SIDEDATA
1993 2003
1994 2004 if flags:
1995 2005 node = node or self.hash(text, p1, p2)
1996 2006
1997 2007 rawtext, validatehash = flagutil.processflagswrite(
1998 2008 self, text, flags, sidedata=sidedata
1999 2009 )
2000 2010
2001 2011 # If the flag processor modifies the revision data, ignore any provided
2002 2012 # cachedelta.
2003 2013 if rawtext != text:
2004 2014 cachedelta = None
2005 2015
2006 2016 if len(rawtext) > _maxentrysize:
2007 2017 raise error.RevlogError(
2008 2018 _(
2009 2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2010 2020 )
2011 2021 % (self.indexfile, len(rawtext))
2012 2022 )
2013 2023
2014 2024 node = node or self.hash(rawtext, p1, p2)
2015 2025 if self.index.has_node(node):
2016 2026 return node
2017 2027
2018 2028 if validatehash:
2019 2029 self.checkhash(rawtext, node, p1=p1, p2=p2)
2020 2030
2021 2031 return self.addrawrevision(
2022 2032 rawtext,
2023 2033 transaction,
2024 2034 link,
2025 2035 p1,
2026 2036 p2,
2027 2037 node,
2028 2038 flags,
2029 2039 cachedelta=cachedelta,
2030 2040 deltacomputer=deltacomputer,
2031 2041 )
2032 2042
2033 2043 def addrawrevision(
2034 2044 self,
2035 2045 rawtext,
2036 2046 transaction,
2037 2047 link,
2038 2048 p1,
2039 2049 p2,
2040 2050 node,
2041 2051 flags,
2042 2052 cachedelta=None,
2043 2053 deltacomputer=None,
2044 2054 ):
2045 2055 """add a raw revision with known flags, node and parents
2046 2056 useful when reusing a revision not stored in this revlog (ex: received
2047 2057 over wire, or read from an external bundle).
2048 2058 """
2049 2059 dfh = None
2050 2060 if not self._inline:
2051 2061 dfh = self._datafp(b"a+")
2052 2062 ifh = self._indexfp(b"a+")
2053 2063 try:
2054 2064 return self._addrevision(
2055 2065 node,
2056 2066 rawtext,
2057 2067 transaction,
2058 2068 link,
2059 2069 p1,
2060 2070 p2,
2061 2071 flags,
2062 2072 cachedelta,
2063 2073 ifh,
2064 2074 dfh,
2065 2075 deltacomputer=deltacomputer,
2066 2076 )
2067 2077 finally:
2068 2078 if dfh:
2069 2079 dfh.close()
2070 2080 ifh.close()
2071 2081
2072 2082 def compress(self, data):
2073 2083 """Generate a possibly-compressed representation of data."""
2074 2084 if not data:
2075 2085 return b'', data
2076 2086
2077 2087 compressed = self._compressor.compress(data)
2078 2088
2079 2089 if compressed:
2080 2090 # The revlog compressor added the header in the returned data.
2081 2091 return b'', compressed
2082 2092
2083 2093 if data[0:1] == b'\0':
2084 2094 return b'', data
2085 2095 return b'u', data
2086 2096
2087 2097 def decompress(self, data):
2088 2098 """Decompress a revlog chunk.
2089 2099
2090 2100 The chunk is expected to begin with a header identifying the
2091 2101 format type so it can be routed to an appropriate decompressor.
2092 2102 """
2093 2103 if not data:
2094 2104 return data
2095 2105
2096 2106 # Revlogs are read much more frequently than they are written and many
2097 2107 # chunks only take microseconds to decompress, so performance is
2098 2108 # important here.
2099 2109 #
2100 2110 # We can make a few assumptions about revlogs:
2101 2111 #
2102 2112 # 1) the majority of chunks will be compressed (as opposed to inline
2103 2113 # raw data).
2104 2114 # 2) decompressing *any* data will likely by at least 10x slower than
2105 2115 # returning raw inline data.
2106 2116 # 3) we want to prioritize common and officially supported compression
2107 2117 # engines
2108 2118 #
2109 2119 # It follows that we want to optimize for "decompress compressed data
2110 2120 # when encoded with common and officially supported compression engines"
2111 2121 # case over "raw data" and "data encoded by less common or non-official
2112 2122 # compression engines." That is why we have the inline lookup first
2113 2123 # followed by the compengines lookup.
2114 2124 #
2115 2125 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2116 2126 # compressed chunks. And this matters for changelog and manifest reads.
2117 2127 t = data[0:1]
2118 2128
2119 2129 if t == b'x':
2120 2130 try:
2121 2131 return _zlibdecompress(data)
2122 2132 except zlib.error as e:
2123 2133 raise error.RevlogError(
2124 2134 _(b'revlog decompress error: %s')
2125 2135 % stringutil.forcebytestr(e)
2126 2136 )
2127 2137 # '\0' is more common than 'u' so it goes first.
2128 2138 elif t == b'\0':
2129 2139 return data
2130 2140 elif t == b'u':
2131 2141 return util.buffer(data, 1)
2132 2142
2133 2143 try:
2134 2144 compressor = self._decompressors[t]
2135 2145 except KeyError:
2136 2146 try:
2137 2147 engine = util.compengines.forrevlogheader(t)
2138 2148 compressor = engine.revlogcompressor(self._compengineopts)
2139 2149 self._decompressors[t] = compressor
2140 2150 except KeyError:
2141 2151 raise error.RevlogError(_(b'unknown compression type %r') % t)
2142 2152
2143 2153 return compressor.decompress(data)
2144 2154
2145 2155 def _addrevision(
2146 2156 self,
2147 2157 node,
2148 2158 rawtext,
2149 2159 transaction,
2150 2160 link,
2151 2161 p1,
2152 2162 p2,
2153 2163 flags,
2154 2164 cachedelta,
2155 2165 ifh,
2156 2166 dfh,
2157 2167 alwayscache=False,
2158 2168 deltacomputer=None,
2159 2169 ):
2160 2170 """internal function to add revisions to the log
2161 2171
2162 2172 see addrevision for argument descriptions.
2163 2173
2164 2174 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2165 2175
2166 2176 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2167 2177 be used.
2168 2178
2169 2179 invariants:
2170 2180 - rawtext is optional (can be None); if not set, cachedelta must be set.
2171 2181 if both are set, they must correspond to each other.
2172 2182 """
2173 2183 if node == nullid:
2174 2184 raise error.RevlogError(
2175 2185 _(b"%s: attempt to add null revision") % self.indexfile
2176 2186 )
2177 2187 if node == wdirid or node in wdirfilenodeids:
2178 2188 raise error.RevlogError(
2179 2189 _(b"%s: attempt to add wdir revision") % self.indexfile
2180 2190 )
2181 2191
2182 2192 if self._inline:
2183 2193 fh = ifh
2184 2194 else:
2185 2195 fh = dfh
2186 2196
2187 2197 btext = [rawtext]
2188 2198
2189 2199 curr = len(self)
2190 2200 prev = curr - 1
2191 2201 offset = self.end(prev)
2192 2202 p1r, p2r = self.rev(p1), self.rev(p2)
2193 2203
2194 2204 # full versions are inserted when the needed deltas
2195 2205 # become comparable to the uncompressed text
2196 2206 if rawtext is None:
2197 2207 # need rawtext size, before changed by flag processors, which is
2198 2208 # the non-raw size. use revlog explicitly to avoid filelog's extra
2199 2209 # logic that might remove metadata size.
2200 2210 textlen = mdiff.patchedsize(
2201 2211 revlog.size(self, cachedelta[0]), cachedelta[1]
2202 2212 )
2203 2213 else:
2204 2214 textlen = len(rawtext)
2205 2215
2206 2216 if deltacomputer is None:
2207 2217 deltacomputer = deltautil.deltacomputer(self)
2208 2218
2209 2219 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2210 2220
2211 2221 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2212 2222
2213 2223 e = (
2214 2224 offset_type(offset, flags),
2215 2225 deltainfo.deltalen,
2216 2226 textlen,
2217 2227 deltainfo.base,
2218 2228 link,
2219 2229 p1r,
2220 2230 p2r,
2221 2231 node,
2222 2232 )
2223 2233 self.index.append(e)
2224 2234
2225 2235 entry = self._io.packentry(e, self.node, self.version, curr)
2226 2236 self._writeentry(
2227 2237 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2228 2238 )
2229 2239
2230 2240 rawtext = btext[0]
2231 2241
2232 2242 if alwayscache and rawtext is None:
2233 2243 rawtext = deltacomputer.buildtext(revinfo, fh)
2234 2244
2235 2245 if type(rawtext) == bytes: # only accept immutable objects
2236 2246 self._revisioncache = (node, curr, rawtext)
2237 2247 self._chainbasecache[curr] = deltainfo.chainbase
2238 2248 return node
2239 2249
2240 2250 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2241 2251 # Files opened in a+ mode have inconsistent behavior on various
2242 2252 # platforms. Windows requires that a file positioning call be made
2243 2253 # when the file handle transitions between reads and writes. See
2244 2254 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2245 2255 # platforms, Python or the platform itself can be buggy. Some versions
2246 2256 # of Solaris have been observed to not append at the end of the file
2247 2257 # if the file was seeked to before the end. See issue4943 for more.
2248 2258 #
2249 2259 # We work around this issue by inserting a seek() before writing.
2250 2260 # Note: This is likely not necessary on Python 3. However, because
2251 2261 # the file handle is reused for reads and may be seeked there, we need
2252 2262 # to be careful before changing this.
2253 2263 ifh.seek(0, os.SEEK_END)
2254 2264 if dfh:
2255 2265 dfh.seek(0, os.SEEK_END)
2256 2266
2257 2267 curr = len(self) - 1
2258 2268 if not self._inline:
2259 2269 transaction.add(self.datafile, offset)
2260 2270 transaction.add(self.indexfile, curr * len(entry))
2261 2271 if data[0]:
2262 2272 dfh.write(data[0])
2263 2273 dfh.write(data[1])
2264 2274 ifh.write(entry)
2265 2275 else:
2266 2276 offset += curr * self._io.size
2267 2277 transaction.add(self.indexfile, offset, curr)
2268 2278 ifh.write(entry)
2269 2279 ifh.write(data[0])
2270 2280 ifh.write(data[1])
2271 2281 self._enforceinlinesize(transaction, ifh)
2272 2282
2273 2283 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2274 2284 """
2275 2285 add a delta group
2276 2286
2277 2287 given a set of deltas, add them to the revision log. the
2278 2288 first delta is against its parent, which should be in our
2279 2289 log, the rest are against the previous delta.
2280 2290
2281 2291 If ``addrevisioncb`` is defined, it will be called with arguments of
2282 2292 this revlog and the node that was added.
2283 2293 """
2284 2294
2285 2295 if self._writinghandles:
2286 2296 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2287 2297
2288 2298 nodes = []
2289 2299
2290 2300 r = len(self)
2291 2301 end = 0
2292 2302 if r:
2293 2303 end = self.end(r - 1)
2294 2304 ifh = self._indexfp(b"a+")
2295 2305 isize = r * self._io.size
2296 2306 if self._inline:
2297 2307 transaction.add(self.indexfile, end + isize, r)
2298 2308 dfh = None
2299 2309 else:
2300 2310 transaction.add(self.indexfile, isize, r)
2301 2311 transaction.add(self.datafile, end)
2302 2312 dfh = self._datafp(b"a+")
2303 2313
2304 2314 def flush():
2305 2315 if dfh:
2306 2316 dfh.flush()
2307 2317 ifh.flush()
2308 2318
2309 2319 self._writinghandles = (ifh, dfh)
2310 2320
2311 2321 try:
2312 2322 deltacomputer = deltautil.deltacomputer(self)
2313 2323 # loop through our set of deltas
2314 2324 for data in deltas:
2315 2325 node, p1, p2, linknode, deltabase, delta, flags = data
2316 2326 link = linkmapper(linknode)
2317 2327 flags = flags or REVIDX_DEFAULT_FLAGS
2318 2328
2319 2329 nodes.append(node)
2320 2330
2321 2331 if self.index.has_node(node):
2322 2332 self._nodeduplicatecallback(transaction, node)
2323 2333 # this can happen if two branches make the same change
2324 2334 continue
2325 2335
2326 2336 for p in (p1, p2):
2327 2337 if not self.index.has_node(p):
2328 2338 raise error.LookupError(
2329 2339 p, self.indexfile, _(b'unknown parent')
2330 2340 )
2331 2341
2332 2342 if not self.index.has_node(deltabase):
2333 2343 raise error.LookupError(
2334 2344 deltabase, self.indexfile, _(b'unknown delta base')
2335 2345 )
2336 2346
2337 2347 baserev = self.rev(deltabase)
2338 2348
2339 2349 if baserev != nullrev and self.iscensored(baserev):
2340 2350 # if base is censored, delta must be full replacement in a
2341 2351 # single patch operation
2342 2352 hlen = struct.calcsize(b">lll")
2343 2353 oldlen = self.rawsize(baserev)
2344 2354 newlen = len(delta) - hlen
2345 2355 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2346 2356 raise error.CensoredBaseError(
2347 2357 self.indexfile, self.node(baserev)
2348 2358 )
2349 2359
2350 2360 if not flags and self._peek_iscensored(baserev, delta, flush):
2351 2361 flags |= REVIDX_ISCENSORED
2352 2362
2353 2363 # We assume consumers of addrevisioncb will want to retrieve
2354 2364 # the added revision, which will require a call to
2355 2365 # revision(). revision() will fast path if there is a cache
2356 2366 # hit. So, we tell _addrevision() to always cache in this case.
2357 2367 # We're only using addgroup() in the context of changegroup
2358 2368 # generation so the revision data can always be handled as raw
2359 2369 # by the flagprocessor.
2360 2370 self._addrevision(
2361 2371 node,
2362 2372 None,
2363 2373 transaction,
2364 2374 link,
2365 2375 p1,
2366 2376 p2,
2367 2377 flags,
2368 2378 (baserev, delta),
2369 2379 ifh,
2370 2380 dfh,
2371 2381 alwayscache=bool(addrevisioncb),
2372 2382 deltacomputer=deltacomputer,
2373 2383 )
2374 2384
2375 2385 if addrevisioncb:
2376 2386 addrevisioncb(self, node)
2377 2387
2378 2388 if not dfh and not self._inline:
2379 2389 # addrevision switched from inline to conventional
2380 2390 # reopen the index
2381 2391 ifh.close()
2382 2392 dfh = self._datafp(b"a+")
2383 2393 ifh = self._indexfp(b"a+")
2384 2394 self._writinghandles = (ifh, dfh)
2385 2395 finally:
2386 2396 self._writinghandles = None
2387 2397
2388 2398 if dfh:
2389 2399 dfh.close()
2390 2400 ifh.close()
2391 2401
2392 2402 return nodes
2393 2403
2394 2404 def iscensored(self, rev):
2395 2405 """Check if a file revision is censored."""
2396 2406 if not self._censorable:
2397 2407 return False
2398 2408
2399 2409 return self.flags(rev) & REVIDX_ISCENSORED
2400 2410
2401 2411 def _peek_iscensored(self, baserev, delta, flush):
2402 2412 """Quickly check if a delta produces a censored revision."""
2403 2413 if not self._censorable:
2404 2414 return False
2405 2415
2406 2416 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2407 2417
2408 2418 def getstrippoint(self, minlink):
2409 2419 """find the minimum rev that must be stripped to strip the linkrev
2410 2420
2411 2421 Returns a tuple containing the minimum rev and a set of all revs that
2412 2422 have linkrevs that will be broken by this strip.
2413 2423 """
2414 2424 return storageutil.resolvestripinfo(
2415 2425 minlink,
2416 2426 len(self) - 1,
2417 2427 self.headrevs(),
2418 2428 self.linkrev,
2419 2429 self.parentrevs,
2420 2430 )
2421 2431
2422 2432 def strip(self, minlink, transaction):
2423 2433 """truncate the revlog on the first revision with a linkrev >= minlink
2424 2434
2425 2435 This function is called when we're stripping revision minlink and
2426 2436 its descendants from the repository.
2427 2437
2428 2438 We have to remove all revisions with linkrev >= minlink, because
2429 2439 the equivalent changelog revisions will be renumbered after the
2430 2440 strip.
2431 2441
2432 2442 So we truncate the revlog on the first of these revisions, and
2433 2443 trust that the caller has saved the revisions that shouldn't be
2434 2444 removed and that it'll re-add them after this truncation.
2435 2445 """
2436 2446 if len(self) == 0:
2437 2447 return
2438 2448
2439 2449 rev, _ = self.getstrippoint(minlink)
2440 2450 if rev == len(self):
2441 2451 return
2442 2452
2443 2453 # first truncate the files on disk
2444 2454 end = self.start(rev)
2445 2455 if not self._inline:
2446 2456 transaction.add(self.datafile, end)
2447 2457 end = rev * self._io.size
2448 2458 else:
2449 2459 end += rev * self._io.size
2450 2460
2451 2461 transaction.add(self.indexfile, end)
2452 2462
2453 2463 # then reset internal state in memory to forget those revisions
2454 2464 self._revisioncache = None
2455 2465 self._chaininfocache = {}
2456 2466 self._chunkclear()
2457 2467
2458 2468 del self.index[rev:-1]
2459 2469
2460 2470 def checksize(self):
2461 2471 """Check size of index and data files
2462 2472
2463 2473 return a (dd, di) tuple.
2464 2474 - dd: extra bytes for the "data" file
2465 2475 - di: extra bytes for the "index" file
2466 2476
2467 2477 A healthy revlog will return (0, 0).
2468 2478 """
2469 2479 expected = 0
2470 2480 if len(self):
2471 2481 expected = max(0, self.end(len(self) - 1))
2472 2482
2473 2483 try:
2474 2484 with self._datafp() as f:
2475 2485 f.seek(0, io.SEEK_END)
2476 2486 actual = f.tell()
2477 2487 dd = actual - expected
2478 2488 except IOError as inst:
2479 2489 if inst.errno != errno.ENOENT:
2480 2490 raise
2481 2491 dd = 0
2482 2492
2483 2493 try:
2484 2494 f = self.opener(self.indexfile)
2485 2495 f.seek(0, io.SEEK_END)
2486 2496 actual = f.tell()
2487 2497 f.close()
2488 2498 s = self._io.size
2489 2499 i = max(0, actual // s)
2490 2500 di = actual - (i * s)
2491 2501 if self._inline:
2492 2502 databytes = 0
2493 2503 for r in self:
2494 2504 databytes += max(0, self.length(r))
2495 2505 dd = 0
2496 2506 di = actual - len(self) * s - databytes
2497 2507 except IOError as inst:
2498 2508 if inst.errno != errno.ENOENT:
2499 2509 raise
2500 2510 di = 0
2501 2511
2502 2512 return (dd, di)
2503 2513
2504 2514 def files(self):
2505 2515 res = [self.indexfile]
2506 2516 if not self._inline:
2507 2517 res.append(self.datafile)
2508 2518 return res
2509 2519
2510 2520 def emitrevisions(
2511 2521 self,
2512 2522 nodes,
2513 2523 nodesorder=None,
2514 2524 revisiondata=False,
2515 2525 assumehaveparentrevisions=False,
2516 2526 deltamode=repository.CG_DELTAMODE_STD,
2517 2527 ):
2518 2528 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2519 2529 raise error.ProgrammingError(
2520 2530 b'unhandled value for nodesorder: %s' % nodesorder
2521 2531 )
2522 2532
2523 2533 if nodesorder is None and not self._generaldelta:
2524 2534 nodesorder = b'storage'
2525 2535
2526 2536 if (
2527 2537 not self._storedeltachains
2528 2538 and deltamode != repository.CG_DELTAMODE_PREV
2529 2539 ):
2530 2540 deltamode = repository.CG_DELTAMODE_FULL
2531 2541
2532 2542 return storageutil.emitrevisions(
2533 2543 self,
2534 2544 nodes,
2535 2545 nodesorder,
2536 2546 revlogrevisiondelta,
2537 2547 deltaparentfn=self.deltaparent,
2538 2548 candeltafn=self.candelta,
2539 2549 rawsizefn=self.rawsize,
2540 2550 revdifffn=self.revdiff,
2541 2551 flagsfn=self.flags,
2542 2552 deltamode=deltamode,
2543 2553 revisiondata=revisiondata,
2544 2554 assumehaveparentrevisions=assumehaveparentrevisions,
2545 2555 )
2546 2556
2547 2557 DELTAREUSEALWAYS = b'always'
2548 2558 DELTAREUSESAMEREVS = b'samerevs'
2549 2559 DELTAREUSENEVER = b'never'
2550 2560
2551 2561 DELTAREUSEFULLADD = b'fulladd'
2552 2562
2553 2563 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2554 2564
2555 2565 def clone(
2556 2566 self,
2557 2567 tr,
2558 2568 destrevlog,
2559 2569 addrevisioncb=None,
2560 2570 deltareuse=DELTAREUSESAMEREVS,
2561 2571 forcedeltabothparents=None,
2562 2572 sidedatacompanion=None,
2563 2573 ):
2564 2574 """Copy this revlog to another, possibly with format changes.
2565 2575
2566 2576 The destination revlog will contain the same revisions and nodes.
2567 2577 However, it may not be bit-for-bit identical due to e.g. delta encoding
2568 2578 differences.
2569 2579
2570 2580 The ``deltareuse`` argument control how deltas from the existing revlog
2571 2581 are preserved in the destination revlog. The argument can have the
2572 2582 following values:
2573 2583
2574 2584 DELTAREUSEALWAYS
2575 2585 Deltas will always be reused (if possible), even if the destination
2576 2586 revlog would not select the same revisions for the delta. This is the
2577 2587 fastest mode of operation.
2578 2588 DELTAREUSESAMEREVS
2579 2589 Deltas will be reused if the destination revlog would pick the same
2580 2590 revisions for the delta. This mode strikes a balance between speed
2581 2591 and optimization.
2582 2592 DELTAREUSENEVER
2583 2593 Deltas will never be reused. This is the slowest mode of execution.
2584 2594 This mode can be used to recompute deltas (e.g. if the diff/delta
2585 2595 algorithm changes).
2586 2596 DELTAREUSEFULLADD
2587 2597 Revision will be re-added as if their were new content. This is
2588 2598 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2589 2599 eg: large file detection and handling.
2590 2600
2591 2601 Delta computation can be slow, so the choice of delta reuse policy can
2592 2602 significantly affect run time.
2593 2603
2594 2604 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2595 2605 two extremes. Deltas will be reused if they are appropriate. But if the
2596 2606 delta could choose a better revision, it will do so. This means if you
2597 2607 are converting a non-generaldelta revlog to a generaldelta revlog,
2598 2608 deltas will be recomputed if the delta's parent isn't a parent of the
2599 2609 revision.
2600 2610
2601 2611 In addition to the delta policy, the ``forcedeltabothparents``
2602 2612 argument controls whether to force compute deltas against both parents
2603 2613 for merges. By default, the current default is used.
2604 2614
2605 2615 If not None, the `sidedatacompanion` is callable that accept two
2606 2616 arguments:
2607 2617
2608 2618 (srcrevlog, rev)
2609 2619
2610 2620 and return a triplet that control changes to sidedata content from the
2611 2621 old revision to the new clone result:
2612 2622
2613 2623 (dropall, filterout, update)
2614 2624
2615 2625 * if `dropall` is True, all sidedata should be dropped
2616 2626 * `filterout` is a set of sidedata keys that should be dropped
2617 2627 * `update` is a mapping of additionnal/new key -> value
2618 2628 """
2619 2629 if deltareuse not in self.DELTAREUSEALL:
2620 2630 raise ValueError(
2621 2631 _(b'value for deltareuse invalid: %s') % deltareuse
2622 2632 )
2623 2633
2624 2634 if len(destrevlog):
2625 2635 raise ValueError(_(b'destination revlog is not empty'))
2626 2636
2627 2637 if getattr(self, 'filteredrevs', None):
2628 2638 raise ValueError(_(b'source revlog has filtered revisions'))
2629 2639 if getattr(destrevlog, 'filteredrevs', None):
2630 2640 raise ValueError(_(b'destination revlog has filtered revisions'))
2631 2641
2632 2642 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2633 2643 # if possible.
2634 2644 oldlazydelta = destrevlog._lazydelta
2635 2645 oldlazydeltabase = destrevlog._lazydeltabase
2636 2646 oldamd = destrevlog._deltabothparents
2637 2647
2638 2648 try:
2639 2649 if deltareuse == self.DELTAREUSEALWAYS:
2640 2650 destrevlog._lazydeltabase = True
2641 2651 destrevlog._lazydelta = True
2642 2652 elif deltareuse == self.DELTAREUSESAMEREVS:
2643 2653 destrevlog._lazydeltabase = False
2644 2654 destrevlog._lazydelta = True
2645 2655 elif deltareuse == self.DELTAREUSENEVER:
2646 2656 destrevlog._lazydeltabase = False
2647 2657 destrevlog._lazydelta = False
2648 2658
2649 2659 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2650 2660
2651 2661 self._clone(
2652 2662 tr,
2653 2663 destrevlog,
2654 2664 addrevisioncb,
2655 2665 deltareuse,
2656 2666 forcedeltabothparents,
2657 2667 sidedatacompanion,
2658 2668 )
2659 2669
2660 2670 finally:
2661 2671 destrevlog._lazydelta = oldlazydelta
2662 2672 destrevlog._lazydeltabase = oldlazydeltabase
2663 2673 destrevlog._deltabothparents = oldamd
2664 2674
2665 2675 def _clone(
2666 2676 self,
2667 2677 tr,
2668 2678 destrevlog,
2669 2679 addrevisioncb,
2670 2680 deltareuse,
2671 2681 forcedeltabothparents,
2672 2682 sidedatacompanion,
2673 2683 ):
2674 2684 """perform the core duty of `revlog.clone` after parameter processing"""
2675 2685 deltacomputer = deltautil.deltacomputer(destrevlog)
2676 2686 index = self.index
2677 2687 for rev in self:
2678 2688 entry = index[rev]
2679 2689
2680 2690 # Some classes override linkrev to take filtered revs into
2681 2691 # account. Use raw entry from index.
2682 2692 flags = entry[0] & 0xFFFF
2683 2693 linkrev = entry[4]
2684 2694 p1 = index[entry[5]][7]
2685 2695 p2 = index[entry[6]][7]
2686 2696 node = entry[7]
2687 2697
2688 2698 sidedataactions = (False, [], {})
2689 2699 if sidedatacompanion is not None:
2690 2700 sidedataactions = sidedatacompanion(self, rev)
2691 2701
2692 2702 # (Possibly) reuse the delta from the revlog if allowed and
2693 2703 # the revlog chunk is a delta.
2694 2704 cachedelta = None
2695 2705 rawtext = None
2696 2706 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2697 2707 dropall, filterout, update = sidedataactions
2698 2708 text, sidedata = self._revisiondata(rev)
2699 2709 if dropall:
2700 2710 sidedata = {}
2701 2711 for key in filterout:
2702 2712 sidedata.pop(key, None)
2703 2713 sidedata.update(update)
2704 2714 if not sidedata:
2705 2715 sidedata = None
2706 2716 destrevlog.addrevision(
2707 2717 text,
2708 2718 tr,
2709 2719 linkrev,
2710 2720 p1,
2711 2721 p2,
2712 2722 cachedelta=cachedelta,
2713 2723 node=node,
2714 2724 flags=flags,
2715 2725 deltacomputer=deltacomputer,
2716 2726 sidedata=sidedata,
2717 2727 )
2718 2728 else:
2719 2729 if destrevlog._lazydelta:
2720 2730 dp = self.deltaparent(rev)
2721 2731 if dp != nullrev:
2722 2732 cachedelta = (dp, bytes(self._chunk(rev)))
2723 2733
2724 2734 if not cachedelta:
2725 2735 rawtext = self.rawdata(rev)
2726 2736
2727 2737 ifh = destrevlog.opener(
2728 2738 destrevlog.indexfile, b'a+', checkambig=False
2729 2739 )
2730 2740 dfh = None
2731 2741 if not destrevlog._inline:
2732 2742 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2733 2743 try:
2734 2744 destrevlog._addrevision(
2735 2745 node,
2736 2746 rawtext,
2737 2747 tr,
2738 2748 linkrev,
2739 2749 p1,
2740 2750 p2,
2741 2751 flags,
2742 2752 cachedelta,
2743 2753 ifh,
2744 2754 dfh,
2745 2755 deltacomputer=deltacomputer,
2746 2756 )
2747 2757 finally:
2748 2758 if dfh:
2749 2759 dfh.close()
2750 2760 ifh.close()
2751 2761
2752 2762 if addrevisioncb:
2753 2763 addrevisioncb(self, rev, node)
2754 2764
2755 2765 def censorrevision(self, tr, censornode, tombstone=b''):
2756 2766 if (self.version & 0xFFFF) == REVLOGV0:
2757 2767 raise error.RevlogError(
2758 2768 _(b'cannot censor with version %d revlogs') % self.version
2759 2769 )
2760 2770
2761 2771 censorrev = self.rev(censornode)
2762 2772 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2763 2773
2764 2774 if len(tombstone) > self.rawsize(censorrev):
2765 2775 raise error.Abort(
2766 2776 _(b'censor tombstone must be no longer than censored data')
2767 2777 )
2768 2778
2769 2779 # Rewriting the revlog in place is hard. Our strategy for censoring is
2770 2780 # to create a new revlog, copy all revisions to it, then replace the
2771 2781 # revlogs on transaction close.
2772 2782
2773 2783 newindexfile = self.indexfile + b'.tmpcensored'
2774 2784 newdatafile = self.datafile + b'.tmpcensored'
2775 2785
2776 2786 # This is a bit dangerous. We could easily have a mismatch of state.
2777 2787 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2778 2788 newrl.version = self.version
2779 2789 newrl._generaldelta = self._generaldelta
2780 2790 newrl._io = self._io
2781 2791
2782 2792 for rev in self.revs():
2783 2793 node = self.node(rev)
2784 2794 p1, p2 = self.parents(node)
2785 2795
2786 2796 if rev == censorrev:
2787 2797 newrl.addrawrevision(
2788 2798 tombstone,
2789 2799 tr,
2790 2800 self.linkrev(censorrev),
2791 2801 p1,
2792 2802 p2,
2793 2803 censornode,
2794 2804 REVIDX_ISCENSORED,
2795 2805 )
2796 2806
2797 2807 if newrl.deltaparent(rev) != nullrev:
2798 2808 raise error.Abort(
2799 2809 _(
2800 2810 b'censored revision stored as delta; '
2801 2811 b'cannot censor'
2802 2812 ),
2803 2813 hint=_(
2804 2814 b'censoring of revlogs is not '
2805 2815 b'fully implemented; please report '
2806 2816 b'this bug'
2807 2817 ),
2808 2818 )
2809 2819 continue
2810 2820
2811 2821 if self.iscensored(rev):
2812 2822 if self.deltaparent(rev) != nullrev:
2813 2823 raise error.Abort(
2814 2824 _(
2815 2825 b'cannot censor due to censored '
2816 2826 b'revision having delta stored'
2817 2827 )
2818 2828 )
2819 2829 rawtext = self._chunk(rev)
2820 2830 else:
2821 2831 rawtext = self.rawdata(rev)
2822 2832
2823 2833 newrl.addrawrevision(
2824 2834 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2825 2835 )
2826 2836
2827 2837 tr.addbackup(self.indexfile, location=b'store')
2828 2838 if not self._inline:
2829 2839 tr.addbackup(self.datafile, location=b'store')
2830 2840
2831 2841 self.opener.rename(newrl.indexfile, self.indexfile)
2832 2842 if not self._inline:
2833 2843 self.opener.rename(newrl.datafile, self.datafile)
2834 2844
2835 2845 self.clearcaches()
2836 2846 self._loadindex()
2837 2847
2838 2848 def verifyintegrity(self, state):
2839 2849 """Verifies the integrity of the revlog.
2840 2850
2841 2851 Yields ``revlogproblem`` instances describing problems that are
2842 2852 found.
2843 2853 """
2844 2854 dd, di = self.checksize()
2845 2855 if dd:
2846 2856 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2847 2857 if di:
2848 2858 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2849 2859
2850 2860 version = self.version & 0xFFFF
2851 2861
2852 2862 # The verifier tells us what version revlog we should be.
2853 2863 if version != state[b'expectedversion']:
2854 2864 yield revlogproblem(
2855 2865 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2856 2866 % (self.indexfile, version, state[b'expectedversion'])
2857 2867 )
2858 2868
2859 2869 state[b'skipread'] = set()
2860 2870
2861 2871 for rev in self:
2862 2872 node = self.node(rev)
2863 2873
2864 2874 # Verify contents. 4 cases to care about:
2865 2875 #
2866 2876 # common: the most common case
2867 2877 # rename: with a rename
2868 2878 # meta: file content starts with b'\1\n', the metadata
2869 2879 # header defined in filelog.py, but without a rename
2870 2880 # ext: content stored externally
2871 2881 #
2872 2882 # More formally, their differences are shown below:
2873 2883 #
2874 2884 # | common | rename | meta | ext
2875 2885 # -------------------------------------------------------
2876 2886 # flags() | 0 | 0 | 0 | not 0
2877 2887 # renamed() | False | True | False | ?
2878 2888 # rawtext[0:2]=='\1\n'| False | True | True | ?
2879 2889 #
2880 2890 # "rawtext" means the raw text stored in revlog data, which
2881 2891 # could be retrieved by "rawdata(rev)". "text"
2882 2892 # mentioned below is "revision(rev)".
2883 2893 #
2884 2894 # There are 3 different lengths stored physically:
2885 2895 # 1. L1: rawsize, stored in revlog index
2886 2896 # 2. L2: len(rawtext), stored in revlog data
2887 2897 # 3. L3: len(text), stored in revlog data if flags==0, or
2888 2898 # possibly somewhere else if flags!=0
2889 2899 #
2890 2900 # L1 should be equal to L2. L3 could be different from them.
2891 2901 # "text" may or may not affect commit hash depending on flag
2892 2902 # processors (see flagutil.addflagprocessor).
2893 2903 #
2894 2904 # | common | rename | meta | ext
2895 2905 # -------------------------------------------------
2896 2906 # rawsize() | L1 | L1 | L1 | L1
2897 2907 # size() | L1 | L2-LM | L1(*) | L1 (?)
2898 2908 # len(rawtext) | L2 | L2 | L2 | L2
2899 2909 # len(text) | L2 | L2 | L2 | L3
2900 2910 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2901 2911 #
2902 2912 # LM: length of metadata, depending on rawtext
2903 2913 # (*): not ideal, see comment in filelog.size
2904 2914 # (?): could be "- len(meta)" if the resolved content has
2905 2915 # rename metadata
2906 2916 #
2907 2917 # Checks needed to be done:
2908 2918 # 1. length check: L1 == L2, in all cases.
2909 2919 # 2. hash check: depending on flag processor, we may need to
2910 2920 # use either "text" (external), or "rawtext" (in revlog).
2911 2921
2912 2922 try:
2913 2923 skipflags = state.get(b'skipflags', 0)
2914 2924 if skipflags:
2915 2925 skipflags &= self.flags(rev)
2916 2926
2917 if skipflags:
2918 state[b'skipread'].add(node)
2919 else:
2920 # Side-effect: read content and verify hash.
2921 self.revision(node)
2927 _verify_revision(self, skipflags, state, node)
2922 2928
2923 2929 l1 = self.rawsize(rev)
2924 2930 l2 = len(self.rawdata(node))
2925 2931
2926 2932 if l1 != l2:
2927 2933 yield revlogproblem(
2928 2934 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2929 2935 node=node,
2930 2936 )
2931 2937
2932 2938 except error.CensoredNodeError:
2933 2939 if state[b'erroroncensored']:
2934 2940 yield revlogproblem(
2935 2941 error=_(b'censored file data'), node=node
2936 2942 )
2937 2943 state[b'skipread'].add(node)
2938 2944 except Exception as e:
2939 2945 yield revlogproblem(
2940 2946 error=_(b'unpacking %s: %s')
2941 2947 % (short(node), stringutil.forcebytestr(e)),
2942 2948 node=node,
2943 2949 )
2944 2950 state[b'skipread'].add(node)
2945 2951
2946 2952 def storageinfo(
2947 2953 self,
2948 2954 exclusivefiles=False,
2949 2955 sharedfiles=False,
2950 2956 revisionscount=False,
2951 2957 trackedsize=False,
2952 2958 storedsize=False,
2953 2959 ):
2954 2960 d = {}
2955 2961
2956 2962 if exclusivefiles:
2957 2963 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2958 2964 if not self._inline:
2959 2965 d[b'exclusivefiles'].append((self.opener, self.datafile))
2960 2966
2961 2967 if sharedfiles:
2962 2968 d[b'sharedfiles'] = []
2963 2969
2964 2970 if revisionscount:
2965 2971 d[b'revisionscount'] = len(self)
2966 2972
2967 2973 if trackedsize:
2968 2974 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2969 2975
2970 2976 if storedsize:
2971 2977 d[b'storedsize'] = sum(
2972 2978 self.opener.stat(path).st_size for path in self.files()
2973 2979 )
2974 2980
2975 2981 return d
General Comments 0
You need to be logged in to leave comments. Login now