##// END OF EJS Templates
revlog: change addrawrevision to return the revision...
Joerg Sonnenberger -
r47257:9ee4e988 default
parent child Browse files
Show More
@@ -1,3086 +1,3086 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False, {}
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text, sidedata):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def getoffset(q):
141 141 return int(q >> 16)
142 142
143 143
144 144 def gettype(q):
145 145 return int(q & 0xFFFF)
146 146
147 147
148 148 def offset_type(offset, type):
149 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 150 raise ValueError(b'unknown revlog index flags')
151 151 return int(int(offset) << 16 | type)
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @attr.s(slots=True, frozen=True)
175 175 class _revisioninfo(object):
176 176 """Information about a revision that allows building its fulltext
177 177 node: expected hash of the revision
178 178 p1, p2: parent revs of the revision
179 179 btext: built text cache consisting of a one-element list
180 180 cachedelta: (baserev, uncompressed_delta) or None
181 181 flags: flags associated to the revision storage
182 182
183 183 One of btext[0] or cachedelta must be set.
184 184 """
185 185
186 186 node = attr.ib()
187 187 p1 = attr.ib()
188 188 p2 = attr.ib()
189 189 btext = attr.ib()
190 190 textlen = attr.ib()
191 191 cachedelta = attr.ib()
192 192 flags = attr.ib()
193 193
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208
209 209 @interfaceutil.implementer(repository.iverifyproblem)
210 210 @attr.s(frozen=True)
211 211 class revlogproblem(object):
212 212 warning = attr.ib(default=None)
213 213 error = attr.ib(default=None)
214 214 node = attr.ib(default=None)
215 215
216 216
217 217 # index v0:
218 218 # 4 bytes: offset
219 219 # 4 bytes: compressed length
220 220 # 4 bytes: base rev
221 221 # 4 bytes: link rev
222 222 # 20 bytes: parent 1 nodeid
223 223 # 20 bytes: parent 2 nodeid
224 224 # 20 bytes: nodeid
225 225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 226 indexformatv0_pack = indexformatv0.pack
227 227 indexformatv0_unpack = indexformatv0.unpack
228 228
229 229
230 230 class revlogoldindex(list):
231 231 @property
232 232 def nodemap(self):
233 233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 235 return self._nodemap
236 236
237 237 @util.propertycache
238 238 def _nodemap(self):
239 239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 240 for r in range(0, len(self)):
241 241 n = self[r][7]
242 242 nodemap[n] = r
243 243 return nodemap
244 244
245 245 def has_node(self, node):
246 246 """return True if the node exist in the index"""
247 247 return node in self._nodemap
248 248
249 249 def rev(self, node):
250 250 """return a revision for a node
251 251
252 252 If the node is unknown, raise a RevlogError"""
253 253 return self._nodemap[node]
254 254
255 255 def get_rev(self, node):
256 256 """return a revision for a node
257 257
258 258 If the node is unknown, return None"""
259 259 return self._nodemap.get(node)
260 260
261 261 def append(self, tup):
262 262 self._nodemap[tup[7]] = len(self)
263 263 super(revlogoldindex, self).append(tup)
264 264
265 265 def __delitem__(self, i):
266 266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 268 for r in pycompat.xrange(i.start, len(self)):
269 269 del self._nodemap[self[r][7]]
270 270 super(revlogoldindex, self).__delitem__(i)
271 271
272 272 def clearcaches(self):
273 273 self.__dict__.pop('_nodemap', None)
274 274
275 275 def __getitem__(self, i):
276 276 if i == -1:
277 277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 278 return list.__getitem__(self, i)
279 279
280 280
281 281 class revlogoldio(object):
282 282 def __init__(self):
283 283 self.size = indexformatv0.size
284 284
285 285 def parseindex(self, data, inline):
286 286 s = self.size
287 287 index = []
288 288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 289 n = off = 0
290 290 l = len(data)
291 291 while off + s <= l:
292 292 cur = data[off : off + s]
293 293 off += s
294 294 e = indexformatv0_unpack(cur)
295 295 # transform to revlogv1 format
296 296 e2 = (
297 297 offset_type(e[0], 0),
298 298 e[1],
299 299 -1,
300 300 e[2],
301 301 e[3],
302 302 nodemap.get(e[4], nullrev),
303 303 nodemap.get(e[5], nullrev),
304 304 e[6],
305 305 )
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 index = revlogoldindex(index)
311 311 return index, None
312 312
313 313 def packentry(self, entry, node, version, rev):
314 314 if gettype(entry[0]):
315 315 raise error.RevlogError(
316 316 _(b'index entry flags need revlog version 1')
317 317 )
318 318 e2 = (
319 319 getoffset(entry[0]),
320 320 entry[1],
321 321 entry[3],
322 322 entry[4],
323 323 node(entry[5]),
324 324 node(entry[6]),
325 325 entry[7],
326 326 )
327 327 return indexformatv0_pack(*e2)
328 328
329 329
330 330 # index ng:
331 331 # 6 bytes: offset
332 332 # 2 bytes: flags
333 333 # 4 bytes: compressed length
334 334 # 4 bytes: uncompressed length
335 335 # 4 bytes: base rev
336 336 # 4 bytes: link rev
337 337 # 4 bytes: parent 1 rev
338 338 # 4 bytes: parent 2 rev
339 339 # 32 bytes: nodeid
340 340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 341 indexformatng_pack = indexformatng.pack
342 342 versionformat = struct.Struct(b">I")
343 343 versionformat_pack = versionformat.pack
344 344 versionformat_unpack = versionformat.unpack
345 345
346 346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 347 # signed integer)
348 348 _maxentrysize = 0x7FFFFFFF
349 349
350 350
351 351 class revlogio(object):
352 352 def __init__(self):
353 353 self.size = indexformatng.size
354 354
355 355 def parseindex(self, data, inline):
356 356 # call the C implementation to parse the index data
357 357 index, cache = parsers.parse_index2(data, inline)
358 358 return index, cache
359 359
360 360 def packentry(self, entry, node, version, rev):
361 361 p = indexformatng_pack(*entry)
362 362 if rev == 0:
363 363 p = versionformat_pack(version) + p[4:]
364 364 return p
365 365
366 366
367 367 NodemapRevlogIO = None
368 368
369 369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 370
371 371 class NodemapRevlogIO(revlogio):
372 372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 373
374 374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 375 """
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 379 return index, cache
380 380
381 381
382 382 class rustrevlogio(revlogio):
383 383 def parseindex(self, data, inline):
384 384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 385 return rustrevlog.MixedIndex(index), cache
386 386
387 387
388 388 class revlog(object):
389 389 """
390 390 the underlying revision storage object
391 391
392 392 A revlog consists of two parts, an index and the revision data.
393 393
394 394 The index is a file with a fixed record size containing
395 395 information on each revision, including its nodeid (hash), the
396 396 nodeids of its parents, the position and offset of its data within
397 397 the data file, and the revision it's based on. Finally, each entry
398 398 contains a linkrev entry that can serve as a pointer to external
399 399 data.
400 400
401 401 The revision data itself is a linear collection of data chunks.
402 402 Each chunk represents a revision and is usually represented as a
403 403 delta against the previous chunk. To bound lookup time, runs of
404 404 deltas are limited to about 2 times the length of the original
405 405 version data. This makes retrieval of a version proportional to
406 406 its size, or O(1) relative to the number of revisions.
407 407
408 408 Both pieces of the revlog are written to in an append-only
409 409 fashion, which means we never need to rewrite a file to insert or
410 410 remove data, and can use some simple techniques to avoid the need
411 411 for locking while reading.
412 412
413 413 If checkambig, indexfile is opened with checkambig=True at
414 414 writing, to avoid file stat ambiguity.
415 415
416 416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 417 index will be mmapped rather than read if it is larger than the
418 418 configured threshold.
419 419
420 420 If censorable is True, the revlog can have censored revisions.
421 421
422 422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 423 compression for the data content.
424 424 """
425 425
426 426 _flagserrorclass = error.RevlogError
427 427
428 428 def __init__(
429 429 self,
430 430 opener,
431 431 indexfile,
432 432 datafile=None,
433 433 checkambig=False,
434 434 mmaplargeindex=False,
435 435 censorable=False,
436 436 upperboundcomp=None,
437 437 persistentnodemap=False,
438 438 ):
439 439 """
440 440 create a revlog object
441 441
442 442 opener is a function that abstracts the file opening operation
443 443 and can be used to implement COW semantics or the like.
444 444
445 445 """
446 446 self.upperboundcomp = upperboundcomp
447 447 self.indexfile = indexfile
448 448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 449 self.nodemap_file = None
450 450 if persistentnodemap:
451 451 self.nodemap_file = nodemaputil.get_nodemap_file(
452 452 opener, self.indexfile
453 453 )
454 454
455 455 self.opener = opener
456 456 # When True, indexfile is opened with checkambig=True at writing, to
457 457 # avoid file stat ambiguity.
458 458 self._checkambig = checkambig
459 459 self._mmaplargeindex = mmaplargeindex
460 460 self._censorable = censorable
461 461 # 3-tuple of (node, rev, text) for a raw revision.
462 462 self._revisioncache = None
463 463 # Maps rev to chain base rev.
464 464 self._chainbasecache = util.lrucachedict(100)
465 465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
466 466 self._chunkcache = (0, b'')
467 467 # How much data to read and cache into the raw revlog data cache.
468 468 self._chunkcachesize = 65536
469 469 self._maxchainlen = None
470 470 self._deltabothparents = True
471 471 self.index = None
472 472 self._nodemap_docket = None
473 473 # Mapping of partial identifiers to full nodes.
474 474 self._pcache = {}
475 475 # Mapping of revision integer to full node.
476 476 self._compengine = b'zlib'
477 477 self._compengineopts = {}
478 478 self._maxdeltachainspan = -1
479 479 self._withsparseread = False
480 480 self._sparserevlog = False
481 481 self._srdensitythreshold = 0.50
482 482 self._srmingapsize = 262144
483 483
484 484 # Make copy of flag processors so each revlog instance can support
485 485 # custom flags.
486 486 self._flagprocessors = dict(flagutil.flagprocessors)
487 487
488 488 # 2-tuple of file handles being used for active writing.
489 489 self._writinghandles = None
490 490
491 491 self._loadindex()
492 492
493 493 def _loadindex(self):
494 494 mmapindexthreshold = None
495 495 opts = self.opener.options
496 496
497 497 if b'revlogv2' in opts:
498 498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
499 499 elif b'revlogv1' in opts:
500 500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
501 501 if b'generaldelta' in opts:
502 502 newversionflags |= FLAG_GENERALDELTA
503 503 elif b'revlogv0' in self.opener.options:
504 504 newversionflags = REVLOGV0
505 505 else:
506 506 newversionflags = REVLOG_DEFAULT_VERSION
507 507
508 508 if b'chunkcachesize' in opts:
509 509 self._chunkcachesize = opts[b'chunkcachesize']
510 510 if b'maxchainlen' in opts:
511 511 self._maxchainlen = opts[b'maxchainlen']
512 512 if b'deltabothparents' in opts:
513 513 self._deltabothparents = opts[b'deltabothparents']
514 514 self._lazydelta = bool(opts.get(b'lazydelta', True))
515 515 self._lazydeltabase = False
516 516 if self._lazydelta:
517 517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
518 518 if b'compengine' in opts:
519 519 self._compengine = opts[b'compengine']
520 520 if b'zlib.level' in opts:
521 521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
522 522 if b'zstd.level' in opts:
523 523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
524 524 if b'maxdeltachainspan' in opts:
525 525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
526 526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
527 527 mmapindexthreshold = opts[b'mmapindexthreshold']
528 528 self.hassidedata = bool(opts.get(b'side-data', False))
529 529 if self.hassidedata:
530 530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
531 531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
532 532 withsparseread = bool(opts.get(b'with-sparse-read', False))
533 533 # sparse-revlog forces sparse-read
534 534 self._withsparseread = self._sparserevlog or withsparseread
535 535 if b'sparse-read-density-threshold' in opts:
536 536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
537 537 if b'sparse-read-min-gap-size' in opts:
538 538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
539 539 if opts.get(b'enableellipsis'):
540 540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
541 541
542 542 # revlog v0 doesn't have flag processors
543 543 for flag, processor in pycompat.iteritems(
544 544 opts.get(b'flagprocessors', {})
545 545 ):
546 546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
547 547
548 548 if self._chunkcachesize <= 0:
549 549 raise error.RevlogError(
550 550 _(b'revlog chunk cache size %r is not greater than 0')
551 551 % self._chunkcachesize
552 552 )
553 553 elif self._chunkcachesize & (self._chunkcachesize - 1):
554 554 raise error.RevlogError(
555 555 _(b'revlog chunk cache size %r is not a power of 2')
556 556 % self._chunkcachesize
557 557 )
558 558
559 559 indexdata = b''
560 560 self._initempty = True
561 561 try:
562 562 with self._indexfp() as f:
563 563 if (
564 564 mmapindexthreshold is not None
565 565 and self.opener.fstat(f).st_size >= mmapindexthreshold
566 566 ):
567 567 # TODO: should .close() to release resources without
568 568 # relying on Python GC
569 569 indexdata = util.buffer(util.mmapread(f))
570 570 else:
571 571 indexdata = f.read()
572 572 if len(indexdata) > 0:
573 573 versionflags = versionformat_unpack(indexdata[:4])[0]
574 574 self._initempty = False
575 575 else:
576 576 versionflags = newversionflags
577 577 except IOError as inst:
578 578 if inst.errno != errno.ENOENT:
579 579 raise
580 580
581 581 versionflags = newversionflags
582 582
583 583 self.version = versionflags
584 584
585 585 flags = versionflags & ~0xFFFF
586 586 fmt = versionflags & 0xFFFF
587 587
588 588 if fmt == REVLOGV0:
589 589 if flags:
590 590 raise error.RevlogError(
591 591 _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 % (flags >> 16, fmt, self.indexfile)
593 593 )
594 594
595 595 self._inline = False
596 596 self._generaldelta = False
597 597
598 598 elif fmt == REVLOGV1:
599 599 if flags & ~REVLOGV1_FLAGS:
600 600 raise error.RevlogError(
601 601 _(b'unknown flags (%#04x) in version %d revlog %s')
602 602 % (flags >> 16, fmt, self.indexfile)
603 603 )
604 604
605 605 self._inline = versionflags & FLAG_INLINE_DATA
606 606 self._generaldelta = versionflags & FLAG_GENERALDELTA
607 607
608 608 elif fmt == REVLOGV2:
609 609 if flags & ~REVLOGV2_FLAGS:
610 610 raise error.RevlogError(
611 611 _(b'unknown flags (%#04x) in version %d revlog %s')
612 612 % (flags >> 16, fmt, self.indexfile)
613 613 )
614 614
615 615 self._inline = versionflags & FLAG_INLINE_DATA
616 616 # generaldelta implied by version 2 revlogs.
617 617 self._generaldelta = True
618 618
619 619 else:
620 620 raise error.RevlogError(
621 621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
622 622 )
623 623 # sparse-revlog can't be on without general-delta (issue6056)
624 624 if not self._generaldelta:
625 625 self._sparserevlog = False
626 626
627 627 self._storedeltachains = True
628 628
629 629 devel_nodemap = (
630 630 self.nodemap_file
631 631 and opts.get(b'devel-force-nodemap', False)
632 632 and NodemapRevlogIO is not None
633 633 )
634 634
635 635 use_rust_index = False
636 636 if rustrevlog is not None:
637 637 if self.nodemap_file is not None:
638 638 use_rust_index = True
639 639 else:
640 640 use_rust_index = self.opener.options.get(b'rust.index')
641 641
642 642 self._io = revlogio()
643 643 if self.version == REVLOGV0:
644 644 self._io = revlogoldio()
645 645 elif devel_nodemap:
646 646 self._io = NodemapRevlogIO()
647 647 elif use_rust_index:
648 648 self._io = rustrevlogio()
649 649 try:
650 650 d = self._io.parseindex(indexdata, self._inline)
651 651 index, _chunkcache = d
652 652 use_nodemap = (
653 653 not self._inline
654 654 and self.nodemap_file is not None
655 655 and util.safehasattr(index, 'update_nodemap_data')
656 656 )
657 657 if use_nodemap:
658 658 nodemap_data = nodemaputil.persisted_data(self)
659 659 if nodemap_data is not None:
660 660 docket = nodemap_data[0]
661 661 if (
662 662 len(d[0]) > docket.tip_rev
663 663 and d[0][docket.tip_rev][7] == docket.tip_node
664 664 ):
665 665 # no changelog tampering
666 666 self._nodemap_docket = docket
667 667 index.update_nodemap_data(*nodemap_data)
668 668 except (ValueError, IndexError):
669 669 raise error.RevlogError(
670 670 _(b"index %s is corrupted") % self.indexfile
671 671 )
672 672 self.index, self._chunkcache = d
673 673 if not self._chunkcache:
674 674 self._chunkclear()
675 675 # revnum -> (chain-length, sum-delta-length)
676 676 self._chaininfocache = util.lrucachedict(500)
677 677 # revlog header -> revlog compressor
678 678 self._decompressors = {}
679 679
680 680 @util.propertycache
681 681 def _compressor(self):
682 682 engine = util.compengines[self._compengine]
683 683 return engine.revlogcompressor(self._compengineopts)
684 684
685 685 def _indexfp(self, mode=b'r'):
686 686 """file object for the revlog's index file"""
687 687 args = {'mode': mode}
688 688 if mode != b'r':
689 689 args['checkambig'] = self._checkambig
690 690 if mode == b'w':
691 691 args['atomictemp'] = True
692 692 return self.opener(self.indexfile, **args)
693 693
694 694 def _datafp(self, mode=b'r'):
695 695 """file object for the revlog's data file"""
696 696 return self.opener(self.datafile, mode=mode)
697 697
698 698 @contextlib.contextmanager
699 699 def _datareadfp(self, existingfp=None):
700 700 """file object suitable to read data"""
701 701 # Use explicit file handle, if given.
702 702 if existingfp is not None:
703 703 yield existingfp
704 704
705 705 # Use a file handle being actively used for writes, if available.
706 706 # There is some danger to doing this because reads will seek the
707 707 # file. However, _writeentry() performs a SEEK_END before all writes,
708 708 # so we should be safe.
709 709 elif self._writinghandles:
710 710 if self._inline:
711 711 yield self._writinghandles[0]
712 712 else:
713 713 yield self._writinghandles[1]
714 714
715 715 # Otherwise open a new file handle.
716 716 else:
717 717 if self._inline:
718 718 func = self._indexfp
719 719 else:
720 720 func = self._datafp
721 721 with func() as fp:
722 722 yield fp
723 723
724 724 def tiprev(self):
725 725 return len(self.index) - 1
726 726
727 727 def tip(self):
728 728 return self.node(self.tiprev())
729 729
730 730 def __contains__(self, rev):
731 731 return 0 <= rev < len(self)
732 732
733 733 def __len__(self):
734 734 return len(self.index)
735 735
736 736 def __iter__(self):
737 737 return iter(pycompat.xrange(len(self)))
738 738
739 739 def revs(self, start=0, stop=None):
740 740 """iterate over all rev in this revlog (from start to stop)"""
741 741 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 742
743 743 @property
744 744 def nodemap(self):
745 745 msg = (
746 746 b"revlog.nodemap is deprecated, "
747 747 b"use revlog.index.[has_node|rev|get_rev]"
748 748 )
749 749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 750 return self.index.nodemap
751 751
752 752 @property
753 753 def _nodecache(self):
754 754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 756 return self.index.nodemap
757 757
758 758 def hasnode(self, node):
759 759 try:
760 760 self.rev(node)
761 761 return True
762 762 except KeyError:
763 763 return False
764 764
765 765 def candelta(self, baserev, rev):
766 766 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 767 # Disable delta if either rev requires a content-changing flag
768 768 # processor (ex. LFS). This is because such flag processor can alter
769 769 # the rawtext content that the delta will be based on, and two clients
770 770 # could have a same revlog node with different flags (i.e. different
771 771 # rawtext contents) and the delta could be incompatible.
772 772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 774 ):
775 775 return False
776 776 return True
777 777
778 778 def update_caches(self, transaction):
779 779 if self.nodemap_file is not None:
780 780 if transaction is None:
781 781 nodemaputil.update_persistent_nodemap(self)
782 782 else:
783 783 nodemaputil.setup_persistent_nodemap(transaction, self)
784 784
785 785 def clearcaches(self):
786 786 self._revisioncache = None
787 787 self._chainbasecache.clear()
788 788 self._chunkcache = (0, b'')
789 789 self._pcache = {}
790 790 self._nodemap_docket = None
791 791 self.index.clearcaches()
792 792 # The python code is the one responsible for validating the docket, we
793 793 # end up having to refresh it here.
794 794 use_nodemap = (
795 795 not self._inline
796 796 and self.nodemap_file is not None
797 797 and util.safehasattr(self.index, 'update_nodemap_data')
798 798 )
799 799 if use_nodemap:
800 800 nodemap_data = nodemaputil.persisted_data(self)
801 801 if nodemap_data is not None:
802 802 self._nodemap_docket = nodemap_data[0]
803 803 self.index.update_nodemap_data(*nodemap_data)
804 804
805 805 def rev(self, node):
806 806 try:
807 807 return self.index.rev(node)
808 808 except TypeError:
809 809 raise
810 810 except error.RevlogError:
811 811 # parsers.c radix tree lookup failed
812 812 if node == wdirid or node in wdirfilenodeids:
813 813 raise error.WdirUnsupported
814 814 raise error.LookupError(node, self.indexfile, _(b'no node'))
815 815
816 816 # Accessors for index entries.
817 817
818 818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
819 819 # are flags.
820 820 def start(self, rev):
821 821 return int(self.index[rev][0] >> 16)
822 822
823 823 def flags(self, rev):
824 824 return self.index[rev][0] & 0xFFFF
825 825
826 826 def length(self, rev):
827 827 return self.index[rev][1]
828 828
829 829 def rawsize(self, rev):
830 830 """return the length of the uncompressed text for a given revision"""
831 831 l = self.index[rev][2]
832 832 if l >= 0:
833 833 return l
834 834
835 835 t = self.rawdata(rev)
836 836 return len(t)
837 837
838 838 def size(self, rev):
839 839 """length of non-raw text (processed by a "read" flag processor)"""
840 840 # fast path: if no "read" flag processor could change the content,
841 841 # size is rawsize. note: ELLIPSIS is known to not change the content.
842 842 flags = self.flags(rev)
843 843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
844 844 return self.rawsize(rev)
845 845
846 846 return len(self.revision(rev, raw=False))
847 847
848 848 def chainbase(self, rev):
849 849 base = self._chainbasecache.get(rev)
850 850 if base is not None:
851 851 return base
852 852
853 853 index = self.index
854 854 iterrev = rev
855 855 base = index[iterrev][3]
856 856 while base != iterrev:
857 857 iterrev = base
858 858 base = index[iterrev][3]
859 859
860 860 self._chainbasecache[rev] = base
861 861 return base
862 862
863 863 def linkrev(self, rev):
864 864 return self.index[rev][4]
865 865
866 866 def parentrevs(self, rev):
867 867 try:
868 868 entry = self.index[rev]
869 869 except IndexError:
870 870 if rev == wdirrev:
871 871 raise error.WdirUnsupported
872 872 raise
873 873
874 874 return entry[5], entry[6]
875 875
876 876 # fast parentrevs(rev) where rev isn't filtered
877 877 _uncheckedparentrevs = parentrevs
878 878
879 879 def node(self, rev):
880 880 try:
881 881 return self.index[rev][7]
882 882 except IndexError:
883 883 if rev == wdirrev:
884 884 raise error.WdirUnsupported
885 885 raise
886 886
887 887 # Derived from index values.
888 888
889 889 def end(self, rev):
890 890 return self.start(rev) + self.length(rev)
891 891
892 892 def parents(self, node):
893 893 i = self.index
894 894 d = i[self.rev(node)]
895 895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
896 896
897 897 def chainlen(self, rev):
898 898 return self._chaininfo(rev)[0]
899 899
900 900 def _chaininfo(self, rev):
901 901 chaininfocache = self._chaininfocache
902 902 if rev in chaininfocache:
903 903 return chaininfocache[rev]
904 904 index = self.index
905 905 generaldelta = self._generaldelta
906 906 iterrev = rev
907 907 e = index[iterrev]
908 908 clen = 0
909 909 compresseddeltalen = 0
910 910 while iterrev != e[3]:
911 911 clen += 1
912 912 compresseddeltalen += e[1]
913 913 if generaldelta:
914 914 iterrev = e[3]
915 915 else:
916 916 iterrev -= 1
917 917 if iterrev in chaininfocache:
918 918 t = chaininfocache[iterrev]
919 919 clen += t[0]
920 920 compresseddeltalen += t[1]
921 921 break
922 922 e = index[iterrev]
923 923 else:
924 924 # Add text length of base since decompressing that also takes
925 925 # work. For cache hits the length is already included.
926 926 compresseddeltalen += e[1]
927 927 r = (clen, compresseddeltalen)
928 928 chaininfocache[rev] = r
929 929 return r
930 930
931 931 def _deltachain(self, rev, stoprev=None):
932 932 """Obtain the delta chain for a revision.
933 933
934 934 ``stoprev`` specifies a revision to stop at. If not specified, we
935 935 stop at the base of the chain.
936 936
937 937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
938 938 revs in ascending order and ``stopped`` is a bool indicating whether
939 939 ``stoprev`` was hit.
940 940 """
941 941 # Try C implementation.
942 942 try:
943 943 return self.index.deltachain(rev, stoprev, self._generaldelta)
944 944 except AttributeError:
945 945 pass
946 946
947 947 chain = []
948 948
949 949 # Alias to prevent attribute lookup in tight loop.
950 950 index = self.index
951 951 generaldelta = self._generaldelta
952 952
953 953 iterrev = rev
954 954 e = index[iterrev]
955 955 while iterrev != e[3] and iterrev != stoprev:
956 956 chain.append(iterrev)
957 957 if generaldelta:
958 958 iterrev = e[3]
959 959 else:
960 960 iterrev -= 1
961 961 e = index[iterrev]
962 962
963 963 if iterrev == stoprev:
964 964 stopped = True
965 965 else:
966 966 chain.append(iterrev)
967 967 stopped = False
968 968
969 969 chain.reverse()
970 970 return chain, stopped
971 971
972 972 def ancestors(self, revs, stoprev=0, inclusive=False):
973 973 """Generate the ancestors of 'revs' in reverse revision order.
974 974 Does not generate revs lower than stoprev.
975 975
976 976 See the documentation for ancestor.lazyancestors for more details."""
977 977
978 978 # first, make sure start revisions aren't filtered
979 979 revs = list(revs)
980 980 checkrev = self.node
981 981 for r in revs:
982 982 checkrev(r)
983 983 # and we're sure ancestors aren't filtered as well
984 984
985 985 if rustancestor is not None:
986 986 lazyancestors = rustancestor.LazyAncestors
987 987 arg = self.index
988 988 else:
989 989 lazyancestors = ancestor.lazyancestors
990 990 arg = self._uncheckedparentrevs
991 991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
992 992
993 993 def descendants(self, revs):
994 994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
995 995
996 996 def findcommonmissing(self, common=None, heads=None):
997 997 """Return a tuple of the ancestors of common and the ancestors of heads
998 998 that are not ancestors of common. In revset terminology, we return the
999 999 tuple:
1000 1000
1001 1001 ::common, (::heads) - (::common)
1002 1002
1003 1003 The list is sorted by revision number, meaning it is
1004 1004 topologically sorted.
1005 1005
1006 1006 'heads' and 'common' are both lists of node IDs. If heads is
1007 1007 not supplied, uses all of the revlog's heads. If common is not
1008 1008 supplied, uses nullid."""
1009 1009 if common is None:
1010 1010 common = [nullid]
1011 1011 if heads is None:
1012 1012 heads = self.heads()
1013 1013
1014 1014 common = [self.rev(n) for n in common]
1015 1015 heads = [self.rev(n) for n in heads]
1016 1016
1017 1017 # we want the ancestors, but inclusive
1018 1018 class lazyset(object):
1019 1019 def __init__(self, lazyvalues):
1020 1020 self.addedvalues = set()
1021 1021 self.lazyvalues = lazyvalues
1022 1022
1023 1023 def __contains__(self, value):
1024 1024 return value in self.addedvalues or value in self.lazyvalues
1025 1025
1026 1026 def __iter__(self):
1027 1027 added = self.addedvalues
1028 1028 for r in added:
1029 1029 yield r
1030 1030 for r in self.lazyvalues:
1031 1031 if not r in added:
1032 1032 yield r
1033 1033
1034 1034 def add(self, value):
1035 1035 self.addedvalues.add(value)
1036 1036
1037 1037 def update(self, values):
1038 1038 self.addedvalues.update(values)
1039 1039
1040 1040 has = lazyset(self.ancestors(common))
1041 1041 has.add(nullrev)
1042 1042 has.update(common)
1043 1043
1044 1044 # take all ancestors from heads that aren't in has
1045 1045 missing = set()
1046 1046 visit = collections.deque(r for r in heads if r not in has)
1047 1047 while visit:
1048 1048 r = visit.popleft()
1049 1049 if r in missing:
1050 1050 continue
1051 1051 else:
1052 1052 missing.add(r)
1053 1053 for p in self.parentrevs(r):
1054 1054 if p not in has:
1055 1055 visit.append(p)
1056 1056 missing = list(missing)
1057 1057 missing.sort()
1058 1058 return has, [self.node(miss) for miss in missing]
1059 1059
1060 1060 def incrementalmissingrevs(self, common=None):
1061 1061 """Return an object that can be used to incrementally compute the
1062 1062 revision numbers of the ancestors of arbitrary sets that are not
1063 1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1064 1064 object.
1065 1065
1066 1066 'common' is a list of revision numbers. If common is not supplied, uses
1067 1067 nullrev.
1068 1068 """
1069 1069 if common is None:
1070 1070 common = [nullrev]
1071 1071
1072 1072 if rustancestor is not None:
1073 1073 return rustancestor.MissingAncestors(self.index, common)
1074 1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1075 1075
1076 1076 def findmissingrevs(self, common=None, heads=None):
1077 1077 """Return the revision numbers of the ancestors of heads that
1078 1078 are not ancestors of common.
1079 1079
1080 1080 More specifically, return a list of revision numbers corresponding to
1081 1081 nodes N such that every N satisfies the following constraints:
1082 1082
1083 1083 1. N is an ancestor of some node in 'heads'
1084 1084 2. N is not an ancestor of any node in 'common'
1085 1085
1086 1086 The list is sorted by revision number, meaning it is
1087 1087 topologically sorted.
1088 1088
1089 1089 'heads' and 'common' are both lists of revision numbers. If heads is
1090 1090 not supplied, uses all of the revlog's heads. If common is not
1091 1091 supplied, uses nullid."""
1092 1092 if common is None:
1093 1093 common = [nullrev]
1094 1094 if heads is None:
1095 1095 heads = self.headrevs()
1096 1096
1097 1097 inc = self.incrementalmissingrevs(common=common)
1098 1098 return inc.missingancestors(heads)
1099 1099
1100 1100 def findmissing(self, common=None, heads=None):
1101 1101 """Return the ancestors of heads that are not ancestors of common.
1102 1102
1103 1103 More specifically, return a list of nodes N such that every N
1104 1104 satisfies the following constraints:
1105 1105
1106 1106 1. N is an ancestor of some node in 'heads'
1107 1107 2. N is not an ancestor of any node in 'common'
1108 1108
1109 1109 The list is sorted by revision number, meaning it is
1110 1110 topologically sorted.
1111 1111
1112 1112 'heads' and 'common' are both lists of node IDs. If heads is
1113 1113 not supplied, uses all of the revlog's heads. If common is not
1114 1114 supplied, uses nullid."""
1115 1115 if common is None:
1116 1116 common = [nullid]
1117 1117 if heads is None:
1118 1118 heads = self.heads()
1119 1119
1120 1120 common = [self.rev(n) for n in common]
1121 1121 heads = [self.rev(n) for n in heads]
1122 1122
1123 1123 inc = self.incrementalmissingrevs(common=common)
1124 1124 return [self.node(r) for r in inc.missingancestors(heads)]
1125 1125
1126 1126 def nodesbetween(self, roots=None, heads=None):
1127 1127 """Return a topological path from 'roots' to 'heads'.
1128 1128
1129 1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1130 1130 topologically sorted list of all nodes N that satisfy both of
1131 1131 these constraints:
1132 1132
1133 1133 1. N is a descendant of some node in 'roots'
1134 1134 2. N is an ancestor of some node in 'heads'
1135 1135
1136 1136 Every node is considered to be both a descendant and an ancestor
1137 1137 of itself, so every reachable node in 'roots' and 'heads' will be
1138 1138 included in 'nodes'.
1139 1139
1140 1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1141 1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1142 1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1143 1143
1144 1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1145 1145 unspecified, uses nullid as the only root. If 'heads' is
1146 1146 unspecified, uses list of all of the revlog's heads."""
1147 1147 nonodes = ([], [], [])
1148 1148 if roots is not None:
1149 1149 roots = list(roots)
1150 1150 if not roots:
1151 1151 return nonodes
1152 1152 lowestrev = min([self.rev(n) for n in roots])
1153 1153 else:
1154 1154 roots = [nullid] # Everybody's a descendant of nullid
1155 1155 lowestrev = nullrev
1156 1156 if (lowestrev == nullrev) and (heads is None):
1157 1157 # We want _all_ the nodes!
1158 1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1159 1159 if heads is None:
1160 1160 # All nodes are ancestors, so the latest ancestor is the last
1161 1161 # node.
1162 1162 highestrev = len(self) - 1
1163 1163 # Set ancestors to None to signal that every node is an ancestor.
1164 1164 ancestors = None
1165 1165 # Set heads to an empty dictionary for later discovery of heads
1166 1166 heads = {}
1167 1167 else:
1168 1168 heads = list(heads)
1169 1169 if not heads:
1170 1170 return nonodes
1171 1171 ancestors = set()
1172 1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1173 1173 # Also, later we will be using it to filter out the heads we can't
1174 1174 # find from roots.
1175 1175 heads = dict.fromkeys(heads, False)
1176 1176 # Start at the top and keep marking parents until we're done.
1177 1177 nodestotag = set(heads)
1178 1178 # Remember where the top was so we can use it as a limit later.
1179 1179 highestrev = max([self.rev(n) for n in nodestotag])
1180 1180 while nodestotag:
1181 1181 # grab a node to tag
1182 1182 n = nodestotag.pop()
1183 1183 # Never tag nullid
1184 1184 if n == nullid:
1185 1185 continue
1186 1186 # A node's revision number represents its place in a
1187 1187 # topologically sorted list of nodes.
1188 1188 r = self.rev(n)
1189 1189 if r >= lowestrev:
1190 1190 if n not in ancestors:
1191 1191 # If we are possibly a descendant of one of the roots
1192 1192 # and we haven't already been marked as an ancestor
1193 1193 ancestors.add(n) # Mark as ancestor
1194 1194 # Add non-nullid parents to list of nodes to tag.
1195 1195 nodestotag.update(
1196 1196 [p for p in self.parents(n) if p != nullid]
1197 1197 )
1198 1198 elif n in heads: # We've seen it before, is it a fake head?
1199 1199 # So it is, real heads should not be the ancestors of
1200 1200 # any other heads.
1201 1201 heads.pop(n)
1202 1202 if not ancestors:
1203 1203 return nonodes
1204 1204 # Now that we have our set of ancestors, we want to remove any
1205 1205 # roots that are not ancestors.
1206 1206
1207 1207 # If one of the roots was nullid, everything is included anyway.
1208 1208 if lowestrev > nullrev:
1209 1209 # But, since we weren't, let's recompute the lowest rev to not
1210 1210 # include roots that aren't ancestors.
1211 1211
1212 1212 # Filter out roots that aren't ancestors of heads
1213 1213 roots = [root for root in roots if root in ancestors]
1214 1214 # Recompute the lowest revision
1215 1215 if roots:
1216 1216 lowestrev = min([self.rev(root) for root in roots])
1217 1217 else:
1218 1218 # No more roots? Return empty list
1219 1219 return nonodes
1220 1220 else:
1221 1221 # We are descending from nullid, and don't need to care about
1222 1222 # any other roots.
1223 1223 lowestrev = nullrev
1224 1224 roots = [nullid]
1225 1225 # Transform our roots list into a set.
1226 1226 descendants = set(roots)
1227 1227 # Also, keep the original roots so we can filter out roots that aren't
1228 1228 # 'real' roots (i.e. are descended from other roots).
1229 1229 roots = descendants.copy()
1230 1230 # Our topologically sorted list of output nodes.
1231 1231 orderedout = []
1232 1232 # Don't start at nullid since we don't want nullid in our output list,
1233 1233 # and if nullid shows up in descendants, empty parents will look like
1234 1234 # they're descendants.
1235 1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1236 1236 n = self.node(r)
1237 1237 isdescendant = False
1238 1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1239 1239 isdescendant = True
1240 1240 elif n in descendants:
1241 1241 # n is already a descendant
1242 1242 isdescendant = True
1243 1243 # This check only needs to be done here because all the roots
1244 1244 # will start being marked is descendants before the loop.
1245 1245 if n in roots:
1246 1246 # If n was a root, check if it's a 'real' root.
1247 1247 p = tuple(self.parents(n))
1248 1248 # If any of its parents are descendants, it's not a root.
1249 1249 if (p[0] in descendants) or (p[1] in descendants):
1250 1250 roots.remove(n)
1251 1251 else:
1252 1252 p = tuple(self.parents(n))
1253 1253 # A node is a descendant if either of its parents are
1254 1254 # descendants. (We seeded the dependents list with the roots
1255 1255 # up there, remember?)
1256 1256 if (p[0] in descendants) or (p[1] in descendants):
1257 1257 descendants.add(n)
1258 1258 isdescendant = True
1259 1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1260 1260 # Only include nodes that are both descendants and ancestors.
1261 1261 orderedout.append(n)
1262 1262 if (ancestors is not None) and (n in heads):
1263 1263 # We're trying to figure out which heads are reachable
1264 1264 # from roots.
1265 1265 # Mark this head as having been reached
1266 1266 heads[n] = True
1267 1267 elif ancestors is None:
1268 1268 # Otherwise, we're trying to discover the heads.
1269 1269 # Assume this is a head because if it isn't, the next step
1270 1270 # will eventually remove it.
1271 1271 heads[n] = True
1272 1272 # But, obviously its parents aren't.
1273 1273 for p in self.parents(n):
1274 1274 heads.pop(p, None)
1275 1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1276 1276 roots = list(roots)
1277 1277 assert orderedout
1278 1278 assert roots
1279 1279 assert heads
1280 1280 return (orderedout, roots, heads)
1281 1281
1282 1282 def headrevs(self, revs=None):
1283 1283 if revs is None:
1284 1284 try:
1285 1285 return self.index.headrevs()
1286 1286 except AttributeError:
1287 1287 return self._headrevs()
1288 1288 if rustdagop is not None:
1289 1289 return rustdagop.headrevs(self.index, revs)
1290 1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1291 1291
1292 1292 def computephases(self, roots):
1293 1293 return self.index.computephasesmapsets(roots)
1294 1294
1295 1295 def _headrevs(self):
1296 1296 count = len(self)
1297 1297 if not count:
1298 1298 return [nullrev]
1299 1299 # we won't iter over filtered rev so nobody is a head at start
1300 1300 ishead = [0] * (count + 1)
1301 1301 index = self.index
1302 1302 for r in self:
1303 1303 ishead[r] = 1 # I may be an head
1304 1304 e = index[r]
1305 1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1306 1306 return [r for r, val in enumerate(ishead) if val]
1307 1307
1308 1308 def heads(self, start=None, stop=None):
1309 1309 """return the list of all nodes that have no children
1310 1310
1311 1311 if start is specified, only heads that are descendants of
1312 1312 start will be returned
1313 1313 if stop is specified, it will consider all the revs from stop
1314 1314 as if they had no children
1315 1315 """
1316 1316 if start is None and stop is None:
1317 1317 if not len(self):
1318 1318 return [nullid]
1319 1319 return [self.node(r) for r in self.headrevs()]
1320 1320
1321 1321 if start is None:
1322 1322 start = nullrev
1323 1323 else:
1324 1324 start = self.rev(start)
1325 1325
1326 1326 stoprevs = {self.rev(n) for n in stop or []}
1327 1327
1328 1328 revs = dagop.headrevssubset(
1329 1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1330 1330 )
1331 1331
1332 1332 return [self.node(rev) for rev in revs]
1333 1333
1334 1334 def children(self, node):
1335 1335 """find the children of a given node"""
1336 1336 c = []
1337 1337 p = self.rev(node)
1338 1338 for r in self.revs(start=p + 1):
1339 1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1340 1340 if prevs:
1341 1341 for pr in prevs:
1342 1342 if pr == p:
1343 1343 c.append(self.node(r))
1344 1344 elif p == nullrev:
1345 1345 c.append(self.node(r))
1346 1346 return c
1347 1347
1348 1348 def commonancestorsheads(self, a, b):
1349 1349 """calculate all the heads of the common ancestors of nodes a and b"""
1350 1350 a, b = self.rev(a), self.rev(b)
1351 1351 ancs = self._commonancestorsheads(a, b)
1352 1352 return pycompat.maplist(self.node, ancs)
1353 1353
1354 1354 def _commonancestorsheads(self, *revs):
1355 1355 """calculate all the heads of the common ancestors of revs"""
1356 1356 try:
1357 1357 ancs = self.index.commonancestorsheads(*revs)
1358 1358 except (AttributeError, OverflowError): # C implementation failed
1359 1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1360 1360 return ancs
1361 1361
1362 1362 def isancestor(self, a, b):
1363 1363 """return True if node a is an ancestor of node b
1364 1364
1365 1365 A revision is considered an ancestor of itself."""
1366 1366 a, b = self.rev(a), self.rev(b)
1367 1367 return self.isancestorrev(a, b)
1368 1368
1369 1369 def isancestorrev(self, a, b):
1370 1370 """return True if revision a is an ancestor of revision b
1371 1371
1372 1372 A revision is considered an ancestor of itself.
1373 1373
1374 1374 The implementation of this is trivial but the use of
1375 1375 reachableroots is not."""
1376 1376 if a == nullrev:
1377 1377 return True
1378 1378 elif a == b:
1379 1379 return True
1380 1380 elif a > b:
1381 1381 return False
1382 1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1383 1383
1384 1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1385 1385 """return (heads(::(<roots> and <roots>::<heads>)))
1386 1386
1387 1387 If includepath is True, return (<roots>::<heads>)."""
1388 1388 try:
1389 1389 return self.index.reachableroots2(
1390 1390 minroot, heads, roots, includepath
1391 1391 )
1392 1392 except AttributeError:
1393 1393 return dagop._reachablerootspure(
1394 1394 self.parentrevs, minroot, roots, heads, includepath
1395 1395 )
1396 1396
1397 1397 def ancestor(self, a, b):
1398 1398 """calculate the "best" common ancestor of nodes a and b"""
1399 1399
1400 1400 a, b = self.rev(a), self.rev(b)
1401 1401 try:
1402 1402 ancs = self.index.ancestors(a, b)
1403 1403 except (AttributeError, OverflowError):
1404 1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1405 1405 if ancs:
1406 1406 # choose a consistent winner when there's a tie
1407 1407 return min(map(self.node, ancs))
1408 1408 return nullid
1409 1409
1410 1410 def _match(self, id):
1411 1411 if isinstance(id, int):
1412 1412 # rev
1413 1413 return self.node(id)
1414 1414 if len(id) == 20:
1415 1415 # possibly a binary node
1416 1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1417 1417 try:
1418 1418 node = id
1419 1419 self.rev(node) # quick search the index
1420 1420 return node
1421 1421 except error.LookupError:
1422 1422 pass # may be partial hex id
1423 1423 try:
1424 1424 # str(rev)
1425 1425 rev = int(id)
1426 1426 if b"%d" % rev != id:
1427 1427 raise ValueError
1428 1428 if rev < 0:
1429 1429 rev = len(self) + rev
1430 1430 if rev < 0 or rev >= len(self):
1431 1431 raise ValueError
1432 1432 return self.node(rev)
1433 1433 except (ValueError, OverflowError):
1434 1434 pass
1435 1435 if len(id) == 40:
1436 1436 try:
1437 1437 # a full hex nodeid?
1438 1438 node = bin(id)
1439 1439 self.rev(node)
1440 1440 return node
1441 1441 except (TypeError, error.LookupError):
1442 1442 pass
1443 1443
1444 1444 def _partialmatch(self, id):
1445 1445 # we don't care wdirfilenodeids as they should be always full hash
1446 1446 maybewdir = wdirhex.startswith(id)
1447 1447 try:
1448 1448 partial = self.index.partialmatch(id)
1449 1449 if partial and self.hasnode(partial):
1450 1450 if maybewdir:
1451 1451 # single 'ff...' match in radix tree, ambiguous with wdir
1452 1452 raise error.RevlogError
1453 1453 return partial
1454 1454 if maybewdir:
1455 1455 # no 'ff...' match in radix tree, wdir identified
1456 1456 raise error.WdirUnsupported
1457 1457 return None
1458 1458 except error.RevlogError:
1459 1459 # parsers.c radix tree lookup gave multiple matches
1460 1460 # fast path: for unfiltered changelog, radix tree is accurate
1461 1461 if not getattr(self, 'filteredrevs', None):
1462 1462 raise error.AmbiguousPrefixLookupError(
1463 1463 id, self.indexfile, _(b'ambiguous identifier')
1464 1464 )
1465 1465 # fall through to slow path that filters hidden revisions
1466 1466 except (AttributeError, ValueError):
1467 1467 # we are pure python, or key was too short to search radix tree
1468 1468 pass
1469 1469
1470 1470 if id in self._pcache:
1471 1471 return self._pcache[id]
1472 1472
1473 1473 if len(id) <= 40:
1474 1474 try:
1475 1475 # hex(node)[:...]
1476 1476 l = len(id) // 2 # grab an even number of digits
1477 1477 prefix = bin(id[: l * 2])
1478 1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1479 1479 nl = [
1480 1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1481 1481 ]
1482 1482 if nullhex.startswith(id):
1483 1483 nl.append(nullid)
1484 1484 if len(nl) > 0:
1485 1485 if len(nl) == 1 and not maybewdir:
1486 1486 self._pcache[id] = nl[0]
1487 1487 return nl[0]
1488 1488 raise error.AmbiguousPrefixLookupError(
1489 1489 id, self.indexfile, _(b'ambiguous identifier')
1490 1490 )
1491 1491 if maybewdir:
1492 1492 raise error.WdirUnsupported
1493 1493 return None
1494 1494 except TypeError:
1495 1495 pass
1496 1496
1497 1497 def lookup(self, id):
1498 1498 """locate a node based on:
1499 1499 - revision number or str(revision number)
1500 1500 - nodeid or subset of hex nodeid
1501 1501 """
1502 1502 n = self._match(id)
1503 1503 if n is not None:
1504 1504 return n
1505 1505 n = self._partialmatch(id)
1506 1506 if n:
1507 1507 return n
1508 1508
1509 1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1510 1510
1511 1511 def shortest(self, node, minlength=1):
1512 1512 """Find the shortest unambiguous prefix that matches node."""
1513 1513
1514 1514 def isvalid(prefix):
1515 1515 try:
1516 1516 matchednode = self._partialmatch(prefix)
1517 1517 except error.AmbiguousPrefixLookupError:
1518 1518 return False
1519 1519 except error.WdirUnsupported:
1520 1520 # single 'ff...' match
1521 1521 return True
1522 1522 if matchednode is None:
1523 1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 1524 return True
1525 1525
1526 1526 def maybewdir(prefix):
1527 1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1528 1528
1529 1529 hexnode = hex(node)
1530 1530
1531 1531 def disambiguate(hexnode, minlength):
1532 1532 """Disambiguate against wdirid."""
1533 1533 for length in range(minlength, len(hexnode) + 1):
1534 1534 prefix = hexnode[:length]
1535 1535 if not maybewdir(prefix):
1536 1536 return prefix
1537 1537
1538 1538 if not getattr(self, 'filteredrevs', None):
1539 1539 try:
1540 1540 length = max(self.index.shortest(node), minlength)
1541 1541 return disambiguate(hexnode, length)
1542 1542 except error.RevlogError:
1543 1543 if node != wdirid:
1544 1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1545 1545 except AttributeError:
1546 1546 # Fall through to pure code
1547 1547 pass
1548 1548
1549 1549 if node == wdirid:
1550 1550 for length in range(minlength, len(hexnode) + 1):
1551 1551 prefix = hexnode[:length]
1552 1552 if isvalid(prefix):
1553 1553 return prefix
1554 1554
1555 1555 for length in range(minlength, len(hexnode) + 1):
1556 1556 prefix = hexnode[:length]
1557 1557 if isvalid(prefix):
1558 1558 return disambiguate(hexnode, length)
1559 1559
1560 1560 def cmp(self, node, text):
1561 1561 """compare text with a given file revision
1562 1562
1563 1563 returns True if text is different than what is stored.
1564 1564 """
1565 1565 p1, p2 = self.parents(node)
1566 1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1567 1567
1568 1568 def _cachesegment(self, offset, data):
1569 1569 """Add a segment to the revlog cache.
1570 1570
1571 1571 Accepts an absolute offset and the data that is at that location.
1572 1572 """
1573 1573 o, d = self._chunkcache
1574 1574 # try to add to existing cache
1575 1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 1576 self._chunkcache = o, d + data
1577 1577 else:
1578 1578 self._chunkcache = offset, data
1579 1579
1580 1580 def _readsegment(self, offset, length, df=None):
1581 1581 """Load a segment of raw data from the revlog.
1582 1582
1583 1583 Accepts an absolute offset, length to read, and an optional existing
1584 1584 file handle to read from.
1585 1585
1586 1586 If an existing file handle is passed, it will be seeked and the
1587 1587 original seek position will NOT be restored.
1588 1588
1589 1589 Returns a str or buffer of raw byte data.
1590 1590
1591 1591 Raises if the requested number of bytes could not be read.
1592 1592 """
1593 1593 # Cache data both forward and backward around the requested
1594 1594 # data, in a fixed size window. This helps speed up operations
1595 1595 # involving reading the revlog backwards.
1596 1596 cachesize = self._chunkcachesize
1597 1597 realoffset = offset & ~(cachesize - 1)
1598 1598 reallength = (
1599 1599 (offset + length + cachesize) & ~(cachesize - 1)
1600 1600 ) - realoffset
1601 1601 with self._datareadfp(df) as df:
1602 1602 df.seek(realoffset)
1603 1603 d = df.read(reallength)
1604 1604
1605 1605 self._cachesegment(realoffset, d)
1606 1606 if offset != realoffset or reallength != length:
1607 1607 startoffset = offset - realoffset
1608 1608 if len(d) - startoffset < length:
1609 1609 raise error.RevlogError(
1610 1610 _(
1611 1611 b'partial read of revlog %s; expected %d bytes from '
1612 1612 b'offset %d, got %d'
1613 1613 )
1614 1614 % (
1615 1615 self.indexfile if self._inline else self.datafile,
1616 1616 length,
1617 1617 realoffset,
1618 1618 len(d) - startoffset,
1619 1619 )
1620 1620 )
1621 1621
1622 1622 return util.buffer(d, startoffset, length)
1623 1623
1624 1624 if len(d) < length:
1625 1625 raise error.RevlogError(
1626 1626 _(
1627 1627 b'partial read of revlog %s; expected %d bytes from offset '
1628 1628 b'%d, got %d'
1629 1629 )
1630 1630 % (
1631 1631 self.indexfile if self._inline else self.datafile,
1632 1632 length,
1633 1633 offset,
1634 1634 len(d),
1635 1635 )
1636 1636 )
1637 1637
1638 1638 return d
1639 1639
1640 1640 def _getsegment(self, offset, length, df=None):
1641 1641 """Obtain a segment of raw data from the revlog.
1642 1642
1643 1643 Accepts an absolute offset, length of bytes to obtain, and an
1644 1644 optional file handle to the already-opened revlog. If the file
1645 1645 handle is used, it's original seek position will not be preserved.
1646 1646
1647 1647 Requests for data may be returned from a cache.
1648 1648
1649 1649 Returns a str or a buffer instance of raw byte data.
1650 1650 """
1651 1651 o, d = self._chunkcache
1652 1652 l = len(d)
1653 1653
1654 1654 # is it in the cache?
1655 1655 cachestart = offset - o
1656 1656 cacheend = cachestart + length
1657 1657 if cachestart >= 0 and cacheend <= l:
1658 1658 if cachestart == 0 and cacheend == l:
1659 1659 return d # avoid a copy
1660 1660 return util.buffer(d, cachestart, cacheend - cachestart)
1661 1661
1662 1662 return self._readsegment(offset, length, df=df)
1663 1663
1664 1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1665 1665 """Obtain a segment of raw data corresponding to a range of revisions.
1666 1666
1667 1667 Accepts the start and end revisions and an optional already-open
1668 1668 file handle to be used for reading. If the file handle is read, its
1669 1669 seek position will not be preserved.
1670 1670
1671 1671 Requests for data may be satisfied by a cache.
1672 1672
1673 1673 Returns a 2-tuple of (offset, data) for the requested range of
1674 1674 revisions. Offset is the integer offset from the beginning of the
1675 1675 revlog and data is a str or buffer of the raw byte data.
1676 1676
1677 1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1678 1678 to determine where each revision's data begins and ends.
1679 1679 """
1680 1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1681 1681 # (functions are expensive).
1682 1682 index = self.index
1683 1683 istart = index[startrev]
1684 1684 start = int(istart[0] >> 16)
1685 1685 if startrev == endrev:
1686 1686 end = start + istart[1]
1687 1687 else:
1688 1688 iend = index[endrev]
1689 1689 end = int(iend[0] >> 16) + iend[1]
1690 1690
1691 1691 if self._inline:
1692 1692 start += (startrev + 1) * self._io.size
1693 1693 end += (endrev + 1) * self._io.size
1694 1694 length = end - start
1695 1695
1696 1696 return start, self._getsegment(start, length, df=df)
1697 1697
1698 1698 def _chunk(self, rev, df=None):
1699 1699 """Obtain a single decompressed chunk for a revision.
1700 1700
1701 1701 Accepts an integer revision and an optional already-open file handle
1702 1702 to be used for reading. If used, the seek position of the file will not
1703 1703 be preserved.
1704 1704
1705 1705 Returns a str holding uncompressed data for the requested revision.
1706 1706 """
1707 1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1708 1708
1709 1709 def _chunks(self, revs, df=None, targetsize=None):
1710 1710 """Obtain decompressed chunks for the specified revisions.
1711 1711
1712 1712 Accepts an iterable of numeric revisions that are assumed to be in
1713 1713 ascending order. Also accepts an optional already-open file handle
1714 1714 to be used for reading. If used, the seek position of the file will
1715 1715 not be preserved.
1716 1716
1717 1717 This function is similar to calling ``self._chunk()`` multiple times,
1718 1718 but is faster.
1719 1719
1720 1720 Returns a list with decompressed data for each requested revision.
1721 1721 """
1722 1722 if not revs:
1723 1723 return []
1724 1724 start = self.start
1725 1725 length = self.length
1726 1726 inline = self._inline
1727 1727 iosize = self._io.size
1728 1728 buffer = util.buffer
1729 1729
1730 1730 l = []
1731 1731 ladd = l.append
1732 1732
1733 1733 if not self._withsparseread:
1734 1734 slicedchunks = (revs,)
1735 1735 else:
1736 1736 slicedchunks = deltautil.slicechunk(
1737 1737 self, revs, targetsize=targetsize
1738 1738 )
1739 1739
1740 1740 for revschunk in slicedchunks:
1741 1741 firstrev = revschunk[0]
1742 1742 # Skip trailing revisions with empty diff
1743 1743 for lastrev in revschunk[::-1]:
1744 1744 if length(lastrev) != 0:
1745 1745 break
1746 1746
1747 1747 try:
1748 1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1749 1749 except OverflowError:
1750 1750 # issue4215 - we can't cache a run of chunks greater than
1751 1751 # 2G on Windows
1752 1752 return [self._chunk(rev, df=df) for rev in revschunk]
1753 1753
1754 1754 decomp = self.decompress
1755 1755 for rev in revschunk:
1756 1756 chunkstart = start(rev)
1757 1757 if inline:
1758 1758 chunkstart += (rev + 1) * iosize
1759 1759 chunklength = length(rev)
1760 1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1761 1761
1762 1762 return l
1763 1763
1764 1764 def _chunkclear(self):
1765 1765 """Clear the raw chunk cache."""
1766 1766 self._chunkcache = (0, b'')
1767 1767
1768 1768 def deltaparent(self, rev):
1769 1769 """return deltaparent of the given revision"""
1770 1770 base = self.index[rev][3]
1771 1771 if base == rev:
1772 1772 return nullrev
1773 1773 elif self._generaldelta:
1774 1774 return base
1775 1775 else:
1776 1776 return rev - 1
1777 1777
1778 1778 def issnapshot(self, rev):
1779 1779 """tells whether rev is a snapshot"""
1780 1780 if not self._sparserevlog:
1781 1781 return self.deltaparent(rev) == nullrev
1782 1782 elif util.safehasattr(self.index, b'issnapshot'):
1783 1783 # directly assign the method to cache the testing and access
1784 1784 self.issnapshot = self.index.issnapshot
1785 1785 return self.issnapshot(rev)
1786 1786 if rev == nullrev:
1787 1787 return True
1788 1788 entry = self.index[rev]
1789 1789 base = entry[3]
1790 1790 if base == rev:
1791 1791 return True
1792 1792 if base == nullrev:
1793 1793 return True
1794 1794 p1 = entry[5]
1795 1795 p2 = entry[6]
1796 1796 if base == p1 or base == p2:
1797 1797 return False
1798 1798 return self.issnapshot(base)
1799 1799
1800 1800 def snapshotdepth(self, rev):
1801 1801 """number of snapshot in the chain before this one"""
1802 1802 if not self.issnapshot(rev):
1803 1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1804 1804 return len(self._deltachain(rev)[0]) - 1
1805 1805
1806 1806 def revdiff(self, rev1, rev2):
1807 1807 """return or calculate a delta between two revisions
1808 1808
1809 1809 The delta calculated is in binary form and is intended to be written to
1810 1810 revlog data directly. So this function needs raw revision data.
1811 1811 """
1812 1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1813 1813 return bytes(self._chunk(rev2))
1814 1814
1815 1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1816 1816
1817 1817 def _processflags(self, text, flags, operation, raw=False):
1818 1818 """deprecated entry point to access flag processors"""
1819 1819 msg = b'_processflag(...) use the specialized variant'
1820 1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1821 1821 if raw:
1822 1822 return text, flagutil.processflagsraw(self, text, flags)
1823 1823 elif operation == b'read':
1824 1824 return flagutil.processflagsread(self, text, flags)
1825 1825 else: # write operation
1826 1826 return flagutil.processflagswrite(self, text, flags, None)
1827 1827
1828 1828 def revision(self, nodeorrev, _df=None, raw=False):
1829 1829 """return an uncompressed revision of a given node or revision
1830 1830 number.
1831 1831
1832 1832 _df - an existing file handle to read from. (internal-only)
1833 1833 raw - an optional argument specifying if the revision data is to be
1834 1834 treated as raw data when applying flag transforms. 'raw' should be set
1835 1835 to True when generating changegroups or in debug commands.
1836 1836 """
1837 1837 if raw:
1838 1838 msg = (
1839 1839 b'revlog.revision(..., raw=True) is deprecated, '
1840 1840 b'use revlog.rawdata(...)'
1841 1841 )
1842 1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1844 1844
1845 1845 def sidedata(self, nodeorrev, _df=None):
1846 1846 """a map of extra data related to the changeset but not part of the hash
1847 1847
1848 1848 This function currently return a dictionary. However, more advanced
1849 1849 mapping object will likely be used in the future for a more
1850 1850 efficient/lazy code.
1851 1851 """
1852 1852 return self._revisiondata(nodeorrev, _df)[1]
1853 1853
1854 1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1855 1855 # deal with <nodeorrev> argument type
1856 1856 if isinstance(nodeorrev, int):
1857 1857 rev = nodeorrev
1858 1858 node = self.node(rev)
1859 1859 else:
1860 1860 node = nodeorrev
1861 1861 rev = None
1862 1862
1863 1863 # fast path the special `nullid` rev
1864 1864 if node == nullid:
1865 1865 return b"", {}
1866 1866
1867 1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1868 1868 # revision or might need to be processed to retrieve the revision.
1869 1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1870 1870
1871 1871 if raw and validated:
1872 1872 # if we don't want to process the raw text and that raw
1873 1873 # text is cached, we can exit early.
1874 1874 return rawtext, {}
1875 1875 if rev is None:
1876 1876 rev = self.rev(node)
1877 1877 # the revlog's flag for this revision
1878 1878 # (usually alter its state or content)
1879 1879 flags = self.flags(rev)
1880 1880
1881 1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1882 1882 # no extra flags set, no flag processor runs, text = rawtext
1883 1883 return rawtext, {}
1884 1884
1885 1885 sidedata = {}
1886 1886 if raw:
1887 1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 1888 text = rawtext
1889 1889 else:
1890 1890 try:
1891 1891 r = flagutil.processflagsread(self, rawtext, flags)
1892 1892 except error.SidedataHashError as exc:
1893 1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1894 1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1895 1895 raise error.RevlogError(msg)
1896 1896 text, validatehash, sidedata = r
1897 1897 if validatehash:
1898 1898 self.checkhash(text, node, rev=rev)
1899 1899 if not validated:
1900 1900 self._revisioncache = (node, rev, rawtext)
1901 1901
1902 1902 return text, sidedata
1903 1903
1904 1904 def _rawtext(self, node, rev, _df=None):
1905 1905 """return the possibly unvalidated rawtext for a revision
1906 1906
1907 1907 returns (rev, rawtext, validated)
1908 1908 """
1909 1909
1910 1910 # revision in the cache (could be useful to apply delta)
1911 1911 cachedrev = None
1912 1912 # An intermediate text to apply deltas to
1913 1913 basetext = None
1914 1914
1915 1915 # Check if we have the entry in cache
1916 1916 # The cache entry looks like (node, rev, rawtext)
1917 1917 if self._revisioncache:
1918 1918 if self._revisioncache[0] == node:
1919 1919 return (rev, self._revisioncache[2], True)
1920 1920 cachedrev = self._revisioncache[1]
1921 1921
1922 1922 if rev is None:
1923 1923 rev = self.rev(node)
1924 1924
1925 1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1926 1926 if stopped:
1927 1927 basetext = self._revisioncache[2]
1928 1928
1929 1929 # drop cache to save memory, the caller is expected to
1930 1930 # update self._revisioncache after validating the text
1931 1931 self._revisioncache = None
1932 1932
1933 1933 targetsize = None
1934 1934 rawsize = self.index[rev][2]
1935 1935 if 0 <= rawsize:
1936 1936 targetsize = 4 * rawsize
1937 1937
1938 1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1939 1939 if basetext is None:
1940 1940 basetext = bytes(bins[0])
1941 1941 bins = bins[1:]
1942 1942
1943 1943 rawtext = mdiff.patches(basetext, bins)
1944 1944 del basetext # let us have a chance to free memory early
1945 1945 return (rev, rawtext, False)
1946 1946
1947 1947 def rawdata(self, nodeorrev, _df=None):
1948 1948 """return an uncompressed raw data of a given node or revision number.
1949 1949
1950 1950 _df - an existing file handle to read from. (internal-only)
1951 1951 """
1952 1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1953 1953
1954 1954 def hash(self, text, p1, p2):
1955 1955 """Compute a node hash.
1956 1956
1957 1957 Available as a function so that subclasses can replace the hash
1958 1958 as needed.
1959 1959 """
1960 1960 return storageutil.hashrevisionsha1(text, p1, p2)
1961 1961
1962 1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1963 1963 """Check node hash integrity.
1964 1964
1965 1965 Available as a function so that subclasses can extend hash mismatch
1966 1966 behaviors as needed.
1967 1967 """
1968 1968 try:
1969 1969 if p1 is None and p2 is None:
1970 1970 p1, p2 = self.parents(node)
1971 1971 if node != self.hash(text, p1, p2):
1972 1972 # Clear the revision cache on hash failure. The revision cache
1973 1973 # only stores the raw revision and clearing the cache does have
1974 1974 # the side-effect that we won't have a cache hit when the raw
1975 1975 # revision data is accessed. But this case should be rare and
1976 1976 # it is extra work to teach the cache about the hash
1977 1977 # verification state.
1978 1978 if self._revisioncache and self._revisioncache[0] == node:
1979 1979 self._revisioncache = None
1980 1980
1981 1981 revornode = rev
1982 1982 if revornode is None:
1983 1983 revornode = templatefilters.short(hex(node))
1984 1984 raise error.RevlogError(
1985 1985 _(b"integrity check failed on %s:%s")
1986 1986 % (self.indexfile, pycompat.bytestr(revornode))
1987 1987 )
1988 1988 except error.RevlogError:
1989 1989 if self._censorable and storageutil.iscensoredtext(text):
1990 1990 raise error.CensoredNodeError(self.indexfile, node, text)
1991 1991 raise
1992 1992
1993 1993 def _enforceinlinesize(self, tr, fp=None):
1994 1994 """Check if the revlog is too big for inline and convert if so.
1995 1995
1996 1996 This should be called after revisions are added to the revlog. If the
1997 1997 revlog has grown too large to be an inline revlog, it will convert it
1998 1998 to use multiple index and data files.
1999 1999 """
2000 2000 tiprev = len(self) - 1
2001 2001 if (
2002 2002 not self._inline
2003 2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2004 2004 ):
2005 2005 return
2006 2006
2007 2007 troffset = tr.findoffset(self.indexfile)
2008 2008 if troffset is None:
2009 2009 raise error.RevlogError(
2010 2010 _(b"%s not found in the transaction") % self.indexfile
2011 2011 )
2012 2012 trindex = 0
2013 2013 tr.add(self.datafile, 0)
2014 2014
2015 2015 if fp:
2016 2016 fp.flush()
2017 2017 fp.close()
2018 2018 # We can't use the cached file handle after close(). So prevent
2019 2019 # its usage.
2020 2020 self._writinghandles = None
2021 2021
2022 2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2023 2023 for r in self:
2024 2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2025 2025 if troffset <= self.start(r):
2026 2026 trindex = r
2027 2027
2028 2028 with self._indexfp(b'w') as fp:
2029 2029 self.version &= ~FLAG_INLINE_DATA
2030 2030 self._inline = False
2031 2031 io = self._io
2032 2032 for i in self:
2033 2033 e = io.packentry(self.index[i], self.node, self.version, i)
2034 2034 fp.write(e)
2035 2035
2036 2036 # the temp file replace the real index when we exit the context
2037 2037 # manager
2038 2038
2039 2039 tr.replace(self.indexfile, trindex * self._io.size)
2040 2040 nodemaputil.setup_persistent_nodemap(tr, self)
2041 2041 self._chunkclear()
2042 2042
2043 2043 def _nodeduplicatecallback(self, transaction, node):
2044 2044 """called when trying to add a node already stored."""
2045 2045
2046 2046 def addrevision(
2047 2047 self,
2048 2048 text,
2049 2049 transaction,
2050 2050 link,
2051 2051 p1,
2052 2052 p2,
2053 2053 cachedelta=None,
2054 2054 node=None,
2055 2055 flags=REVIDX_DEFAULT_FLAGS,
2056 2056 deltacomputer=None,
2057 2057 sidedata=None,
2058 2058 ):
2059 2059 """add a revision to the log
2060 2060
2061 2061 text - the revision data to add
2062 2062 transaction - the transaction object used for rollback
2063 2063 link - the linkrev data to add
2064 2064 p1, p2 - the parent nodeids of the revision
2065 2065 cachedelta - an optional precomputed delta
2066 2066 node - nodeid of revision; typically node is not specified, and it is
2067 2067 computed by default as hash(text, p1, p2), however subclasses might
2068 2068 use different hashing method (and override checkhash() in such case)
2069 2069 flags - the known flags to set on the revision
2070 2070 deltacomputer - an optional deltacomputer instance shared between
2071 2071 multiple calls
2072 2072 """
2073 2073 if link == nullrev:
2074 2074 raise error.RevlogError(
2075 2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2076 2076 )
2077 2077
2078 2078 if sidedata is None:
2079 2079 sidedata = {}
2080 2080 flags = flags & ~REVIDX_SIDEDATA
2081 2081 elif not self.hassidedata:
2082 2082 raise error.ProgrammingError(
2083 2083 _(b"trying to add sidedata to a revlog who don't support them")
2084 2084 )
2085 2085 else:
2086 2086 flags |= REVIDX_SIDEDATA
2087 2087
2088 2088 if flags:
2089 2089 node = node or self.hash(text, p1, p2)
2090 2090
2091 2091 rawtext, validatehash = flagutil.processflagswrite(
2092 2092 self, text, flags, sidedata=sidedata
2093 2093 )
2094 2094
2095 2095 # If the flag processor modifies the revision data, ignore any provided
2096 2096 # cachedelta.
2097 2097 if rawtext != text:
2098 2098 cachedelta = None
2099 2099
2100 2100 if len(rawtext) > _maxentrysize:
2101 2101 raise error.RevlogError(
2102 2102 _(
2103 2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2104 2104 )
2105 2105 % (self.indexfile, len(rawtext))
2106 2106 )
2107 2107
2108 2108 node = node or self.hash(rawtext, p1, p2)
2109 2109 if self.index.has_node(node):
2110 2110 return node
2111 2111
2112 2112 if validatehash:
2113 2113 self.checkhash(rawtext, node, p1=p1, p2=p2)
2114 2114
2115 return self.addrawrevision(
2115 rev = self.addrawrevision(
2116 2116 rawtext,
2117 2117 transaction,
2118 2118 link,
2119 2119 p1,
2120 2120 p2,
2121 2121 node,
2122 2122 flags,
2123 2123 cachedelta=cachedelta,
2124 2124 deltacomputer=deltacomputer,
2125 2125 )
2126 return node
2126 2127
2127 2128 def addrawrevision(
2128 2129 self,
2129 2130 rawtext,
2130 2131 transaction,
2131 2132 link,
2132 2133 p1,
2133 2134 p2,
2134 2135 node,
2135 2136 flags,
2136 2137 cachedelta=None,
2137 2138 deltacomputer=None,
2138 2139 ):
2139 2140 """add a raw revision with known flags, node and parents
2140 2141 useful when reusing a revision not stored in this revlog (ex: received
2141 2142 over wire, or read from an external bundle).
2142 2143 """
2143 2144 dfh = None
2144 2145 if not self._inline:
2145 2146 dfh = self._datafp(b"a+")
2146 2147 ifh = self._indexfp(b"a+")
2147 2148 try:
2148 self._addrevision(
2149 return self._addrevision(
2149 2150 node,
2150 2151 rawtext,
2151 2152 transaction,
2152 2153 link,
2153 2154 p1,
2154 2155 p2,
2155 2156 flags,
2156 2157 cachedelta,
2157 2158 ifh,
2158 2159 dfh,
2159 2160 deltacomputer=deltacomputer,
2160 2161 )
2161 return node
2162 2162 finally:
2163 2163 if dfh:
2164 2164 dfh.close()
2165 2165 ifh.close()
2166 2166
2167 2167 def compress(self, data):
2168 2168 """Generate a possibly-compressed representation of data."""
2169 2169 if not data:
2170 2170 return b'', data
2171 2171
2172 2172 compressed = self._compressor.compress(data)
2173 2173
2174 2174 if compressed:
2175 2175 # The revlog compressor added the header in the returned data.
2176 2176 return b'', compressed
2177 2177
2178 2178 if data[0:1] == b'\0':
2179 2179 return b'', data
2180 2180 return b'u', data
2181 2181
2182 2182 def decompress(self, data):
2183 2183 """Decompress a revlog chunk.
2184 2184
2185 2185 The chunk is expected to begin with a header identifying the
2186 2186 format type so it can be routed to an appropriate decompressor.
2187 2187 """
2188 2188 if not data:
2189 2189 return data
2190 2190
2191 2191 # Revlogs are read much more frequently than they are written and many
2192 2192 # chunks only take microseconds to decompress, so performance is
2193 2193 # important here.
2194 2194 #
2195 2195 # We can make a few assumptions about revlogs:
2196 2196 #
2197 2197 # 1) the majority of chunks will be compressed (as opposed to inline
2198 2198 # raw data).
2199 2199 # 2) decompressing *any* data will likely by at least 10x slower than
2200 2200 # returning raw inline data.
2201 2201 # 3) we want to prioritize common and officially supported compression
2202 2202 # engines
2203 2203 #
2204 2204 # It follows that we want to optimize for "decompress compressed data
2205 2205 # when encoded with common and officially supported compression engines"
2206 2206 # case over "raw data" and "data encoded by less common or non-official
2207 2207 # compression engines." That is why we have the inline lookup first
2208 2208 # followed by the compengines lookup.
2209 2209 #
2210 2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 2211 # compressed chunks. And this matters for changelog and manifest reads.
2212 2212 t = data[0:1]
2213 2213
2214 2214 if t == b'x':
2215 2215 try:
2216 2216 return _zlibdecompress(data)
2217 2217 except zlib.error as e:
2218 2218 raise error.RevlogError(
2219 2219 _(b'revlog decompress error: %s')
2220 2220 % stringutil.forcebytestr(e)
2221 2221 )
2222 2222 # '\0' is more common than 'u' so it goes first.
2223 2223 elif t == b'\0':
2224 2224 return data
2225 2225 elif t == b'u':
2226 2226 return util.buffer(data, 1)
2227 2227
2228 2228 try:
2229 2229 compressor = self._decompressors[t]
2230 2230 except KeyError:
2231 2231 try:
2232 2232 engine = util.compengines.forrevlogheader(t)
2233 2233 compressor = engine.revlogcompressor(self._compengineopts)
2234 2234 self._decompressors[t] = compressor
2235 2235 except KeyError:
2236 2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237 2237
2238 2238 return compressor.decompress(data)
2239 2239
2240 2240 def _addrevision(
2241 2241 self,
2242 2242 node,
2243 2243 rawtext,
2244 2244 transaction,
2245 2245 link,
2246 2246 p1,
2247 2247 p2,
2248 2248 flags,
2249 2249 cachedelta,
2250 2250 ifh,
2251 2251 dfh,
2252 2252 alwayscache=False,
2253 2253 deltacomputer=None,
2254 2254 ):
2255 2255 """internal function to add revisions to the log
2256 2256
2257 2257 see addrevision for argument descriptions.
2258 2258
2259 2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260 2260
2261 2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 2262 be used.
2263 2263
2264 2264 invariants:
2265 2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 2266 if both are set, they must correspond to each other.
2267 2267 """
2268 2268 if node == nullid:
2269 2269 raise error.RevlogError(
2270 2270 _(b"%s: attempt to add null revision") % self.indexfile
2271 2271 )
2272 2272 if node == wdirid or node in wdirfilenodeids:
2273 2273 raise error.RevlogError(
2274 2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 2275 )
2276 2276
2277 2277 if self._inline:
2278 2278 fh = ifh
2279 2279 else:
2280 2280 fh = dfh
2281 2281
2282 2282 btext = [rawtext]
2283 2283
2284 2284 curr = len(self)
2285 2285 prev = curr - 1
2286 2286 offset = self.end(prev)
2287 2287 p1r, p2r = self.rev(p1), self.rev(p2)
2288 2288
2289 2289 # full versions are inserted when the needed deltas
2290 2290 # become comparable to the uncompressed text
2291 2291 if rawtext is None:
2292 2292 # need rawtext size, before changed by flag processors, which is
2293 2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 2294 # logic that might remove metadata size.
2295 2295 textlen = mdiff.patchedsize(
2296 2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 2297 )
2298 2298 else:
2299 2299 textlen = len(rawtext)
2300 2300
2301 2301 if deltacomputer is None:
2302 2302 deltacomputer = deltautil.deltacomputer(self)
2303 2303
2304 2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305 2305
2306 2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307 2307
2308 2308 e = (
2309 2309 offset_type(offset, flags),
2310 2310 deltainfo.deltalen,
2311 2311 textlen,
2312 2312 deltainfo.base,
2313 2313 link,
2314 2314 p1r,
2315 2315 p2r,
2316 2316 node,
2317 2317 )
2318 2318 self.index.append(e)
2319 2319
2320 2320 entry = self._io.packentry(e, self.node, self.version, curr)
2321 2321 self._writeentry(
2322 2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 2323 )
2324 2324
2325 2325 rawtext = btext[0]
2326 2326
2327 2327 if alwayscache and rawtext is None:
2328 2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2329 2329
2330 2330 if type(rawtext) == bytes: # only accept immutable objects
2331 2331 self._revisioncache = (node, curr, rawtext)
2332 2332 self._chainbasecache[curr] = deltainfo.chainbase
2333 2333 return curr
2334 2334
2335 2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 2336 # Files opened in a+ mode have inconsistent behavior on various
2337 2337 # platforms. Windows requires that a file positioning call be made
2338 2338 # when the file handle transitions between reads and writes. See
2339 2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 2340 # platforms, Python or the platform itself can be buggy. Some versions
2341 2341 # of Solaris have been observed to not append at the end of the file
2342 2342 # if the file was seeked to before the end. See issue4943 for more.
2343 2343 #
2344 2344 # We work around this issue by inserting a seek() before writing.
2345 2345 # Note: This is likely not necessary on Python 3. However, because
2346 2346 # the file handle is reused for reads and may be seeked there, we need
2347 2347 # to be careful before changing this.
2348 2348 ifh.seek(0, os.SEEK_END)
2349 2349 if dfh:
2350 2350 dfh.seek(0, os.SEEK_END)
2351 2351
2352 2352 curr = len(self) - 1
2353 2353 if not self._inline:
2354 2354 transaction.add(self.datafile, offset)
2355 2355 transaction.add(self.indexfile, curr * len(entry))
2356 2356 if data[0]:
2357 2357 dfh.write(data[0])
2358 2358 dfh.write(data[1])
2359 2359 ifh.write(entry)
2360 2360 else:
2361 2361 offset += curr * self._io.size
2362 2362 transaction.add(self.indexfile, offset)
2363 2363 ifh.write(entry)
2364 2364 ifh.write(data[0])
2365 2365 ifh.write(data[1])
2366 2366 self._enforceinlinesize(transaction, ifh)
2367 2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2368 2368
2369 2369 def addgroup(
2370 2370 self,
2371 2371 deltas,
2372 2372 linkmapper,
2373 2373 transaction,
2374 2374 alwayscache=False,
2375 2375 addrevisioncb=None,
2376 2376 duplicaterevisioncb=None,
2377 2377 ):
2378 2378 """
2379 2379 add a delta group
2380 2380
2381 2381 given a set of deltas, add them to the revision log. the
2382 2382 first delta is against its parent, which should be in our
2383 2383 log, the rest are against the previous delta.
2384 2384
2385 2385 If ``addrevisioncb`` is defined, it will be called with arguments of
2386 2386 this revlog and the node that was added.
2387 2387 """
2388 2388
2389 2389 if self._writinghandles:
2390 2390 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2391 2391
2392 2392 r = len(self)
2393 2393 end = 0
2394 2394 if r:
2395 2395 end = self.end(r - 1)
2396 2396 ifh = self._indexfp(b"a+")
2397 2397 isize = r * self._io.size
2398 2398 if self._inline:
2399 2399 transaction.add(self.indexfile, end + isize)
2400 2400 dfh = None
2401 2401 else:
2402 2402 transaction.add(self.indexfile, isize)
2403 2403 transaction.add(self.datafile, end)
2404 2404 dfh = self._datafp(b"a+")
2405 2405
2406 2406 def flush():
2407 2407 if dfh:
2408 2408 dfh.flush()
2409 2409 ifh.flush()
2410 2410
2411 2411 self._writinghandles = (ifh, dfh)
2412 2412 empty = True
2413 2413
2414 2414 try:
2415 2415 deltacomputer = deltautil.deltacomputer(self)
2416 2416 # loop through our set of deltas
2417 2417 for data in deltas:
2418 2418 node, p1, p2, linknode, deltabase, delta, flags = data
2419 2419 link = linkmapper(linknode)
2420 2420 flags = flags or REVIDX_DEFAULT_FLAGS
2421 2421
2422 2422 if self.index.has_node(node):
2423 2423 # this can happen if two branches make the same change
2424 2424 self._nodeduplicatecallback(transaction, node)
2425 2425 if duplicaterevisioncb:
2426 2426 duplicaterevisioncb(self, node)
2427 2427 empty = False
2428 2428 continue
2429 2429
2430 2430 for p in (p1, p2):
2431 2431 if not self.index.has_node(p):
2432 2432 raise error.LookupError(
2433 2433 p, self.indexfile, _(b'unknown parent')
2434 2434 )
2435 2435
2436 2436 if not self.index.has_node(deltabase):
2437 2437 raise error.LookupError(
2438 2438 deltabase, self.indexfile, _(b'unknown delta base')
2439 2439 )
2440 2440
2441 2441 baserev = self.rev(deltabase)
2442 2442
2443 2443 if baserev != nullrev and self.iscensored(baserev):
2444 2444 # if base is censored, delta must be full replacement in a
2445 2445 # single patch operation
2446 2446 hlen = struct.calcsize(b">lll")
2447 2447 oldlen = self.rawsize(baserev)
2448 2448 newlen = len(delta) - hlen
2449 2449 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2450 2450 raise error.CensoredBaseError(
2451 2451 self.indexfile, self.node(baserev)
2452 2452 )
2453 2453
2454 2454 if not flags and self._peek_iscensored(baserev, delta, flush):
2455 2455 flags |= REVIDX_ISCENSORED
2456 2456
2457 2457 # We assume consumers of addrevisioncb will want to retrieve
2458 2458 # the added revision, which will require a call to
2459 2459 # revision(). revision() will fast path if there is a cache
2460 2460 # hit. So, we tell _addrevision() to always cache in this case.
2461 2461 # We're only using addgroup() in the context of changegroup
2462 2462 # generation so the revision data can always be handled as raw
2463 2463 # by the flagprocessor.
2464 2464 self._addrevision(
2465 2465 node,
2466 2466 None,
2467 2467 transaction,
2468 2468 link,
2469 2469 p1,
2470 2470 p2,
2471 2471 flags,
2472 2472 (baserev, delta),
2473 2473 ifh,
2474 2474 dfh,
2475 2475 alwayscache=alwayscache,
2476 2476 deltacomputer=deltacomputer,
2477 2477 )
2478 2478
2479 2479 if addrevisioncb:
2480 2480 addrevisioncb(self, node)
2481 2481 empty = False
2482 2482
2483 2483 if not dfh and not self._inline:
2484 2484 # addrevision switched from inline to conventional
2485 2485 # reopen the index
2486 2486 ifh.close()
2487 2487 dfh = self._datafp(b"a+")
2488 2488 ifh = self._indexfp(b"a+")
2489 2489 self._writinghandles = (ifh, dfh)
2490 2490 finally:
2491 2491 self._writinghandles = None
2492 2492
2493 2493 if dfh:
2494 2494 dfh.close()
2495 2495 ifh.close()
2496 2496 return not empty
2497 2497
2498 2498 def iscensored(self, rev):
2499 2499 """Check if a file revision is censored."""
2500 2500 if not self._censorable:
2501 2501 return False
2502 2502
2503 2503 return self.flags(rev) & REVIDX_ISCENSORED
2504 2504
2505 2505 def _peek_iscensored(self, baserev, delta, flush):
2506 2506 """Quickly check if a delta produces a censored revision."""
2507 2507 if not self._censorable:
2508 2508 return False
2509 2509
2510 2510 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2511 2511
2512 2512 def getstrippoint(self, minlink):
2513 2513 """find the minimum rev that must be stripped to strip the linkrev
2514 2514
2515 2515 Returns a tuple containing the minimum rev and a set of all revs that
2516 2516 have linkrevs that will be broken by this strip.
2517 2517 """
2518 2518 return storageutil.resolvestripinfo(
2519 2519 minlink,
2520 2520 len(self) - 1,
2521 2521 self.headrevs(),
2522 2522 self.linkrev,
2523 2523 self.parentrevs,
2524 2524 )
2525 2525
2526 2526 def strip(self, minlink, transaction):
2527 2527 """truncate the revlog on the first revision with a linkrev >= minlink
2528 2528
2529 2529 This function is called when we're stripping revision minlink and
2530 2530 its descendants from the repository.
2531 2531
2532 2532 We have to remove all revisions with linkrev >= minlink, because
2533 2533 the equivalent changelog revisions will be renumbered after the
2534 2534 strip.
2535 2535
2536 2536 So we truncate the revlog on the first of these revisions, and
2537 2537 trust that the caller has saved the revisions that shouldn't be
2538 2538 removed and that it'll re-add them after this truncation.
2539 2539 """
2540 2540 if len(self) == 0:
2541 2541 return
2542 2542
2543 2543 rev, _ = self.getstrippoint(minlink)
2544 2544 if rev == len(self):
2545 2545 return
2546 2546
2547 2547 # first truncate the files on disk
2548 2548 end = self.start(rev)
2549 2549 if not self._inline:
2550 2550 transaction.add(self.datafile, end)
2551 2551 end = rev * self._io.size
2552 2552 else:
2553 2553 end += rev * self._io.size
2554 2554
2555 2555 transaction.add(self.indexfile, end)
2556 2556
2557 2557 # then reset internal state in memory to forget those revisions
2558 2558 self._revisioncache = None
2559 2559 self._chaininfocache = util.lrucachedict(500)
2560 2560 self._chunkclear()
2561 2561
2562 2562 del self.index[rev:-1]
2563 2563
2564 2564 def checksize(self):
2565 2565 """Check size of index and data files
2566 2566
2567 2567 return a (dd, di) tuple.
2568 2568 - dd: extra bytes for the "data" file
2569 2569 - di: extra bytes for the "index" file
2570 2570
2571 2571 A healthy revlog will return (0, 0).
2572 2572 """
2573 2573 expected = 0
2574 2574 if len(self):
2575 2575 expected = max(0, self.end(len(self) - 1))
2576 2576
2577 2577 try:
2578 2578 with self._datafp() as f:
2579 2579 f.seek(0, io.SEEK_END)
2580 2580 actual = f.tell()
2581 2581 dd = actual - expected
2582 2582 except IOError as inst:
2583 2583 if inst.errno != errno.ENOENT:
2584 2584 raise
2585 2585 dd = 0
2586 2586
2587 2587 try:
2588 2588 f = self.opener(self.indexfile)
2589 2589 f.seek(0, io.SEEK_END)
2590 2590 actual = f.tell()
2591 2591 f.close()
2592 2592 s = self._io.size
2593 2593 i = max(0, actual // s)
2594 2594 di = actual - (i * s)
2595 2595 if self._inline:
2596 2596 databytes = 0
2597 2597 for r in self:
2598 2598 databytes += max(0, self.length(r))
2599 2599 dd = 0
2600 2600 di = actual - len(self) * s - databytes
2601 2601 except IOError as inst:
2602 2602 if inst.errno != errno.ENOENT:
2603 2603 raise
2604 2604 di = 0
2605 2605
2606 2606 return (dd, di)
2607 2607
2608 2608 def files(self):
2609 2609 res = [self.indexfile]
2610 2610 if not self._inline:
2611 2611 res.append(self.datafile)
2612 2612 return res
2613 2613
2614 2614 def emitrevisions(
2615 2615 self,
2616 2616 nodes,
2617 2617 nodesorder=None,
2618 2618 revisiondata=False,
2619 2619 assumehaveparentrevisions=False,
2620 2620 deltamode=repository.CG_DELTAMODE_STD,
2621 2621 ):
2622 2622 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2623 2623 raise error.ProgrammingError(
2624 2624 b'unhandled value for nodesorder: %s' % nodesorder
2625 2625 )
2626 2626
2627 2627 if nodesorder is None and not self._generaldelta:
2628 2628 nodesorder = b'storage'
2629 2629
2630 2630 if (
2631 2631 not self._storedeltachains
2632 2632 and deltamode != repository.CG_DELTAMODE_PREV
2633 2633 ):
2634 2634 deltamode = repository.CG_DELTAMODE_FULL
2635 2635
2636 2636 return storageutil.emitrevisions(
2637 2637 self,
2638 2638 nodes,
2639 2639 nodesorder,
2640 2640 revlogrevisiondelta,
2641 2641 deltaparentfn=self.deltaparent,
2642 2642 candeltafn=self.candelta,
2643 2643 rawsizefn=self.rawsize,
2644 2644 revdifffn=self.revdiff,
2645 2645 flagsfn=self.flags,
2646 2646 deltamode=deltamode,
2647 2647 revisiondata=revisiondata,
2648 2648 assumehaveparentrevisions=assumehaveparentrevisions,
2649 2649 )
2650 2650
2651 2651 DELTAREUSEALWAYS = b'always'
2652 2652 DELTAREUSESAMEREVS = b'samerevs'
2653 2653 DELTAREUSENEVER = b'never'
2654 2654
2655 2655 DELTAREUSEFULLADD = b'fulladd'
2656 2656
2657 2657 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2658 2658
2659 2659 def clone(
2660 2660 self,
2661 2661 tr,
2662 2662 destrevlog,
2663 2663 addrevisioncb=None,
2664 2664 deltareuse=DELTAREUSESAMEREVS,
2665 2665 forcedeltabothparents=None,
2666 2666 sidedatacompanion=None,
2667 2667 ):
2668 2668 """Copy this revlog to another, possibly with format changes.
2669 2669
2670 2670 The destination revlog will contain the same revisions and nodes.
2671 2671 However, it may not be bit-for-bit identical due to e.g. delta encoding
2672 2672 differences.
2673 2673
2674 2674 The ``deltareuse`` argument control how deltas from the existing revlog
2675 2675 are preserved in the destination revlog. The argument can have the
2676 2676 following values:
2677 2677
2678 2678 DELTAREUSEALWAYS
2679 2679 Deltas will always be reused (if possible), even if the destination
2680 2680 revlog would not select the same revisions for the delta. This is the
2681 2681 fastest mode of operation.
2682 2682 DELTAREUSESAMEREVS
2683 2683 Deltas will be reused if the destination revlog would pick the same
2684 2684 revisions for the delta. This mode strikes a balance between speed
2685 2685 and optimization.
2686 2686 DELTAREUSENEVER
2687 2687 Deltas will never be reused. This is the slowest mode of execution.
2688 2688 This mode can be used to recompute deltas (e.g. if the diff/delta
2689 2689 algorithm changes).
2690 2690 DELTAREUSEFULLADD
2691 2691 Revision will be re-added as if their were new content. This is
2692 2692 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2693 2693 eg: large file detection and handling.
2694 2694
2695 2695 Delta computation can be slow, so the choice of delta reuse policy can
2696 2696 significantly affect run time.
2697 2697
2698 2698 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2699 2699 two extremes. Deltas will be reused if they are appropriate. But if the
2700 2700 delta could choose a better revision, it will do so. This means if you
2701 2701 are converting a non-generaldelta revlog to a generaldelta revlog,
2702 2702 deltas will be recomputed if the delta's parent isn't a parent of the
2703 2703 revision.
2704 2704
2705 2705 In addition to the delta policy, the ``forcedeltabothparents``
2706 2706 argument controls whether to force compute deltas against both parents
2707 2707 for merges. By default, the current default is used.
2708 2708
2709 2709 If not None, the `sidedatacompanion` is callable that accept two
2710 2710 arguments:
2711 2711
2712 2712 (srcrevlog, rev)
2713 2713
2714 2714 and return a quintet that control changes to sidedata content from the
2715 2715 old revision to the new clone result:
2716 2716
2717 2717 (dropall, filterout, update, new_flags, dropped_flags)
2718 2718
2719 2719 * if `dropall` is True, all sidedata should be dropped
2720 2720 * `filterout` is a set of sidedata keys that should be dropped
2721 2721 * `update` is a mapping of additionnal/new key -> value
2722 2722 * new_flags is a bitfields of new flags that the revision should get
2723 2723 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2724 2724 """
2725 2725 if deltareuse not in self.DELTAREUSEALL:
2726 2726 raise ValueError(
2727 2727 _(b'value for deltareuse invalid: %s') % deltareuse
2728 2728 )
2729 2729
2730 2730 if len(destrevlog):
2731 2731 raise ValueError(_(b'destination revlog is not empty'))
2732 2732
2733 2733 if getattr(self, 'filteredrevs', None):
2734 2734 raise ValueError(_(b'source revlog has filtered revisions'))
2735 2735 if getattr(destrevlog, 'filteredrevs', None):
2736 2736 raise ValueError(_(b'destination revlog has filtered revisions'))
2737 2737
2738 2738 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2739 2739 # if possible.
2740 2740 oldlazydelta = destrevlog._lazydelta
2741 2741 oldlazydeltabase = destrevlog._lazydeltabase
2742 2742 oldamd = destrevlog._deltabothparents
2743 2743
2744 2744 try:
2745 2745 if deltareuse == self.DELTAREUSEALWAYS:
2746 2746 destrevlog._lazydeltabase = True
2747 2747 destrevlog._lazydelta = True
2748 2748 elif deltareuse == self.DELTAREUSESAMEREVS:
2749 2749 destrevlog._lazydeltabase = False
2750 2750 destrevlog._lazydelta = True
2751 2751 elif deltareuse == self.DELTAREUSENEVER:
2752 2752 destrevlog._lazydeltabase = False
2753 2753 destrevlog._lazydelta = False
2754 2754
2755 2755 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2756 2756
2757 2757 self._clone(
2758 2758 tr,
2759 2759 destrevlog,
2760 2760 addrevisioncb,
2761 2761 deltareuse,
2762 2762 forcedeltabothparents,
2763 2763 sidedatacompanion,
2764 2764 )
2765 2765
2766 2766 finally:
2767 2767 destrevlog._lazydelta = oldlazydelta
2768 2768 destrevlog._lazydeltabase = oldlazydeltabase
2769 2769 destrevlog._deltabothparents = oldamd
2770 2770
2771 2771 def _clone(
2772 2772 self,
2773 2773 tr,
2774 2774 destrevlog,
2775 2775 addrevisioncb,
2776 2776 deltareuse,
2777 2777 forcedeltabothparents,
2778 2778 sidedatacompanion,
2779 2779 ):
2780 2780 """perform the core duty of `revlog.clone` after parameter processing"""
2781 2781 deltacomputer = deltautil.deltacomputer(destrevlog)
2782 2782 index = self.index
2783 2783 for rev in self:
2784 2784 entry = index[rev]
2785 2785
2786 2786 # Some classes override linkrev to take filtered revs into
2787 2787 # account. Use raw entry from index.
2788 2788 flags = entry[0] & 0xFFFF
2789 2789 linkrev = entry[4]
2790 2790 p1 = index[entry[5]][7]
2791 2791 p2 = index[entry[6]][7]
2792 2792 node = entry[7]
2793 2793
2794 2794 sidedataactions = (False, [], {}, 0, 0)
2795 2795 if sidedatacompanion is not None:
2796 2796 sidedataactions = sidedatacompanion(self, rev)
2797 2797
2798 2798 # (Possibly) reuse the delta from the revlog if allowed and
2799 2799 # the revlog chunk is a delta.
2800 2800 cachedelta = None
2801 2801 rawtext = None
2802 2802 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2803 2803 dropall = sidedataactions[0]
2804 2804 filterout = sidedataactions[1]
2805 2805 update = sidedataactions[2]
2806 2806 new_flags = sidedataactions[3]
2807 2807 dropped_flags = sidedataactions[4]
2808 2808 text, sidedata = self._revisiondata(rev)
2809 2809 if dropall:
2810 2810 sidedata = {}
2811 2811 for key in filterout:
2812 2812 sidedata.pop(key, None)
2813 2813 sidedata.update(update)
2814 2814 if not sidedata:
2815 2815 sidedata = None
2816 2816
2817 2817 flags |= new_flags
2818 2818 flags &= ~dropped_flags
2819 2819
2820 2820 destrevlog.addrevision(
2821 2821 text,
2822 2822 tr,
2823 2823 linkrev,
2824 2824 p1,
2825 2825 p2,
2826 2826 cachedelta=cachedelta,
2827 2827 node=node,
2828 2828 flags=flags,
2829 2829 deltacomputer=deltacomputer,
2830 2830 sidedata=sidedata,
2831 2831 )
2832 2832 else:
2833 2833 if destrevlog._lazydelta:
2834 2834 dp = self.deltaparent(rev)
2835 2835 if dp != nullrev:
2836 2836 cachedelta = (dp, bytes(self._chunk(rev)))
2837 2837
2838 2838 if not cachedelta:
2839 2839 rawtext = self.rawdata(rev)
2840 2840
2841 2841 ifh = destrevlog.opener(
2842 2842 destrevlog.indexfile, b'a+', checkambig=False
2843 2843 )
2844 2844 dfh = None
2845 2845 if not destrevlog._inline:
2846 2846 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2847 2847 try:
2848 2848 destrevlog._addrevision(
2849 2849 node,
2850 2850 rawtext,
2851 2851 tr,
2852 2852 linkrev,
2853 2853 p1,
2854 2854 p2,
2855 2855 flags,
2856 2856 cachedelta,
2857 2857 ifh,
2858 2858 dfh,
2859 2859 deltacomputer=deltacomputer,
2860 2860 )
2861 2861 finally:
2862 2862 if dfh:
2863 2863 dfh.close()
2864 2864 ifh.close()
2865 2865
2866 2866 if addrevisioncb:
2867 2867 addrevisioncb(self, rev, node)
2868 2868
2869 2869 def censorrevision(self, tr, censornode, tombstone=b''):
2870 2870 if (self.version & 0xFFFF) == REVLOGV0:
2871 2871 raise error.RevlogError(
2872 2872 _(b'cannot censor with version %d revlogs') % self.version
2873 2873 )
2874 2874
2875 2875 censorrev = self.rev(censornode)
2876 2876 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2877 2877
2878 2878 if len(tombstone) > self.rawsize(censorrev):
2879 2879 raise error.Abort(
2880 2880 _(b'censor tombstone must be no longer than censored data')
2881 2881 )
2882 2882
2883 2883 # Rewriting the revlog in place is hard. Our strategy for censoring is
2884 2884 # to create a new revlog, copy all revisions to it, then replace the
2885 2885 # revlogs on transaction close.
2886 2886
2887 2887 newindexfile = self.indexfile + b'.tmpcensored'
2888 2888 newdatafile = self.datafile + b'.tmpcensored'
2889 2889
2890 2890 # This is a bit dangerous. We could easily have a mismatch of state.
2891 2891 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2892 2892 newrl.version = self.version
2893 2893 newrl._generaldelta = self._generaldelta
2894 2894 newrl._io = self._io
2895 2895
2896 2896 for rev in self.revs():
2897 2897 node = self.node(rev)
2898 2898 p1, p2 = self.parents(node)
2899 2899
2900 2900 if rev == censorrev:
2901 2901 newrl.addrawrevision(
2902 2902 tombstone,
2903 2903 tr,
2904 2904 self.linkrev(censorrev),
2905 2905 p1,
2906 2906 p2,
2907 2907 censornode,
2908 2908 REVIDX_ISCENSORED,
2909 2909 )
2910 2910
2911 2911 if newrl.deltaparent(rev) != nullrev:
2912 2912 raise error.Abort(
2913 2913 _(
2914 2914 b'censored revision stored as delta; '
2915 2915 b'cannot censor'
2916 2916 ),
2917 2917 hint=_(
2918 2918 b'censoring of revlogs is not '
2919 2919 b'fully implemented; please report '
2920 2920 b'this bug'
2921 2921 ),
2922 2922 )
2923 2923 continue
2924 2924
2925 2925 if self.iscensored(rev):
2926 2926 if self.deltaparent(rev) != nullrev:
2927 2927 raise error.Abort(
2928 2928 _(
2929 2929 b'cannot censor due to censored '
2930 2930 b'revision having delta stored'
2931 2931 )
2932 2932 )
2933 2933 rawtext = self._chunk(rev)
2934 2934 else:
2935 2935 rawtext = self.rawdata(rev)
2936 2936
2937 2937 newrl.addrawrevision(
2938 2938 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2939 2939 )
2940 2940
2941 2941 tr.addbackup(self.indexfile, location=b'store')
2942 2942 if not self._inline:
2943 2943 tr.addbackup(self.datafile, location=b'store')
2944 2944
2945 2945 self.opener.rename(newrl.indexfile, self.indexfile)
2946 2946 if not self._inline:
2947 2947 self.opener.rename(newrl.datafile, self.datafile)
2948 2948
2949 2949 self.clearcaches()
2950 2950 self._loadindex()
2951 2951
2952 2952 def verifyintegrity(self, state):
2953 2953 """Verifies the integrity of the revlog.
2954 2954
2955 2955 Yields ``revlogproblem`` instances describing problems that are
2956 2956 found.
2957 2957 """
2958 2958 dd, di = self.checksize()
2959 2959 if dd:
2960 2960 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2961 2961 if di:
2962 2962 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2963 2963
2964 2964 version = self.version & 0xFFFF
2965 2965
2966 2966 # The verifier tells us what version revlog we should be.
2967 2967 if version != state[b'expectedversion']:
2968 2968 yield revlogproblem(
2969 2969 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2970 2970 % (self.indexfile, version, state[b'expectedversion'])
2971 2971 )
2972 2972
2973 2973 state[b'skipread'] = set()
2974 2974 state[b'safe_renamed'] = set()
2975 2975
2976 2976 for rev in self:
2977 2977 node = self.node(rev)
2978 2978
2979 2979 # Verify contents. 4 cases to care about:
2980 2980 #
2981 2981 # common: the most common case
2982 2982 # rename: with a rename
2983 2983 # meta: file content starts with b'\1\n', the metadata
2984 2984 # header defined in filelog.py, but without a rename
2985 2985 # ext: content stored externally
2986 2986 #
2987 2987 # More formally, their differences are shown below:
2988 2988 #
2989 2989 # | common | rename | meta | ext
2990 2990 # -------------------------------------------------------
2991 2991 # flags() | 0 | 0 | 0 | not 0
2992 2992 # renamed() | False | True | False | ?
2993 2993 # rawtext[0:2]=='\1\n'| False | True | True | ?
2994 2994 #
2995 2995 # "rawtext" means the raw text stored in revlog data, which
2996 2996 # could be retrieved by "rawdata(rev)". "text"
2997 2997 # mentioned below is "revision(rev)".
2998 2998 #
2999 2999 # There are 3 different lengths stored physically:
3000 3000 # 1. L1: rawsize, stored in revlog index
3001 3001 # 2. L2: len(rawtext), stored in revlog data
3002 3002 # 3. L3: len(text), stored in revlog data if flags==0, or
3003 3003 # possibly somewhere else if flags!=0
3004 3004 #
3005 3005 # L1 should be equal to L2. L3 could be different from them.
3006 3006 # "text" may or may not affect commit hash depending on flag
3007 3007 # processors (see flagutil.addflagprocessor).
3008 3008 #
3009 3009 # | common | rename | meta | ext
3010 3010 # -------------------------------------------------
3011 3011 # rawsize() | L1 | L1 | L1 | L1
3012 3012 # size() | L1 | L2-LM | L1(*) | L1 (?)
3013 3013 # len(rawtext) | L2 | L2 | L2 | L2
3014 3014 # len(text) | L2 | L2 | L2 | L3
3015 3015 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3016 3016 #
3017 3017 # LM: length of metadata, depending on rawtext
3018 3018 # (*): not ideal, see comment in filelog.size
3019 3019 # (?): could be "- len(meta)" if the resolved content has
3020 3020 # rename metadata
3021 3021 #
3022 3022 # Checks needed to be done:
3023 3023 # 1. length check: L1 == L2, in all cases.
3024 3024 # 2. hash check: depending on flag processor, we may need to
3025 3025 # use either "text" (external), or "rawtext" (in revlog).
3026 3026
3027 3027 try:
3028 3028 skipflags = state.get(b'skipflags', 0)
3029 3029 if skipflags:
3030 3030 skipflags &= self.flags(rev)
3031 3031
3032 3032 _verify_revision(self, skipflags, state, node)
3033 3033
3034 3034 l1 = self.rawsize(rev)
3035 3035 l2 = len(self.rawdata(node))
3036 3036
3037 3037 if l1 != l2:
3038 3038 yield revlogproblem(
3039 3039 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3040 3040 node=node,
3041 3041 )
3042 3042
3043 3043 except error.CensoredNodeError:
3044 3044 if state[b'erroroncensored']:
3045 3045 yield revlogproblem(
3046 3046 error=_(b'censored file data'), node=node
3047 3047 )
3048 3048 state[b'skipread'].add(node)
3049 3049 except Exception as e:
3050 3050 yield revlogproblem(
3051 3051 error=_(b'unpacking %s: %s')
3052 3052 % (short(node), stringutil.forcebytestr(e)),
3053 3053 node=node,
3054 3054 )
3055 3055 state[b'skipread'].add(node)
3056 3056
3057 3057 def storageinfo(
3058 3058 self,
3059 3059 exclusivefiles=False,
3060 3060 sharedfiles=False,
3061 3061 revisionscount=False,
3062 3062 trackedsize=False,
3063 3063 storedsize=False,
3064 3064 ):
3065 3065 d = {}
3066 3066
3067 3067 if exclusivefiles:
3068 3068 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3069 3069 if not self._inline:
3070 3070 d[b'exclusivefiles'].append((self.opener, self.datafile))
3071 3071
3072 3072 if sharedfiles:
3073 3073 d[b'sharedfiles'] = []
3074 3074
3075 3075 if revisionscount:
3076 3076 d[b'revisionscount'] = len(self)
3077 3077
3078 3078 if trackedsize:
3079 3079 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3080 3080
3081 3081 if storedsize:
3082 3082 d[b'storedsize'] = sum(
3083 3083 self.opener.stat(path).st_size for path in self.files()
3084 3084 )
3085 3085
3086 3086 return d
General Comments 0
You need to be logged in to leave comments. Login now