##// END OF EJS Templates
rust-nodemap: automatically use the rust index for persistent nodemap...
marmoute -
r45000:e7fff9c3 default
parent child Browse files
Show More
@@ -1,3048 +1,3055
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False, {}
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text, sidedata):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def getoffset(q):
139 139 return int(q >> 16)
140 140
141 141
142 142 def gettype(q):
143 143 return int(q & 0xFFFF)
144 144
145 145
146 146 def offset_type(offset, type):
147 147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 148 raise ValueError(b'unknown revlog index flags')
149 149 return int(int(offset) << 16 | type)
150 150
151 151
152 152 def _verify_revision(rl, skipflags, state, node):
153 153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 154 point for extensions to influence the operation."""
155 155 if skipflags:
156 156 state[b'skipread'].add(node)
157 157 else:
158 158 # Side-effect: read content and verify hash.
159 159 rl.revision(node)
160 160
161 161
162 162 @attr.s(slots=True, frozen=True)
163 163 class _revisioninfo(object):
164 164 """Information about a revision that allows building its fulltext
165 165 node: expected hash of the revision
166 166 p1, p2: parent revs of the revision
167 167 btext: built text cache consisting of a one-element list
168 168 cachedelta: (baserev, uncompressed_delta) or None
169 169 flags: flags associated to the revision storage
170 170
171 171 One of btext[0] or cachedelta must be set.
172 172 """
173 173
174 174 node = attr.ib()
175 175 p1 = attr.ib()
176 176 p2 = attr.ib()
177 177 btext = attr.ib()
178 178 textlen = attr.ib()
179 179 cachedelta = attr.ib()
180 180 flags = attr.ib()
181 181
182 182
183 183 @interfaceutil.implementer(repository.irevisiondelta)
184 184 @attr.s(slots=True)
185 185 class revlogrevisiondelta(object):
186 186 node = attr.ib()
187 187 p1node = attr.ib()
188 188 p2node = attr.ib()
189 189 basenode = attr.ib()
190 190 flags = attr.ib()
191 191 baserevisionsize = attr.ib()
192 192 revision = attr.ib()
193 193 delta = attr.ib()
194 194 linknode = attr.ib(default=None)
195 195
196 196
197 197 @interfaceutil.implementer(repository.iverifyproblem)
198 198 @attr.s(frozen=True)
199 199 class revlogproblem(object):
200 200 warning = attr.ib(default=None)
201 201 error = attr.ib(default=None)
202 202 node = attr.ib(default=None)
203 203
204 204
205 205 # index v0:
206 206 # 4 bytes: offset
207 207 # 4 bytes: compressed length
208 208 # 4 bytes: base rev
209 209 # 4 bytes: link rev
210 210 # 20 bytes: parent 1 nodeid
211 211 # 20 bytes: parent 2 nodeid
212 212 # 20 bytes: nodeid
213 213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 214 indexformatv0_pack = indexformatv0.pack
215 215 indexformatv0_unpack = indexformatv0.unpack
216 216
217 217
218 218 class revlogoldindex(list):
219 219 @property
220 220 def nodemap(self):
221 221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 223 return self._nodemap
224 224
225 225 @util.propertycache
226 226 def _nodemap(self):
227 227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 228 for r in range(0, len(self)):
229 229 n = self[r][7]
230 230 nodemap[n] = r
231 231 return nodemap
232 232
233 233 def has_node(self, node):
234 234 """return True if the node exist in the index"""
235 235 return node in self._nodemap
236 236
237 237 def rev(self, node):
238 238 """return a revision for a node
239 239
240 240 If the node is unknown, raise a RevlogError"""
241 241 return self._nodemap[node]
242 242
243 243 def get_rev(self, node):
244 244 """return a revision for a node
245 245
246 246 If the node is unknown, return None"""
247 247 return self._nodemap.get(node)
248 248
249 249 def append(self, tup):
250 250 self._nodemap[tup[7]] = len(self)
251 251 super(revlogoldindex, self).append(tup)
252 252
253 253 def __delitem__(self, i):
254 254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 256 for r in pycompat.xrange(i.start, len(self)):
257 257 del self._nodemap[self[r][7]]
258 258 super(revlogoldindex, self).__delitem__(i)
259 259
260 260 def clearcaches(self):
261 261 self.__dict__.pop('_nodemap', None)
262 262
263 263 def __getitem__(self, i):
264 264 if i == -1:
265 265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 266 return list.__getitem__(self, i)
267 267
268 268
269 269 class revlogoldio(object):
270 270 def __init__(self):
271 271 self.size = indexformatv0.size
272 272
273 273 def parseindex(self, data, inline):
274 274 s = self.size
275 275 index = []
276 276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 277 n = off = 0
278 278 l = len(data)
279 279 while off + s <= l:
280 280 cur = data[off : off + s]
281 281 off += s
282 282 e = indexformatv0_unpack(cur)
283 283 # transform to revlogv1 format
284 284 e2 = (
285 285 offset_type(e[0], 0),
286 286 e[1],
287 287 -1,
288 288 e[2],
289 289 e[3],
290 290 nodemap.get(e[4], nullrev),
291 291 nodemap.get(e[5], nullrev),
292 292 e[6],
293 293 )
294 294 index.append(e2)
295 295 nodemap[e[6]] = n
296 296 n += 1
297 297
298 298 index = revlogoldindex(index)
299 299 return index, None
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 if gettype(entry[0]):
303 303 raise error.RevlogError(
304 304 _(b'index entry flags need revlog version 1')
305 305 )
306 306 e2 = (
307 307 getoffset(entry[0]),
308 308 entry[1],
309 309 entry[3],
310 310 entry[4],
311 311 node(entry[5]),
312 312 node(entry[6]),
313 313 entry[7],
314 314 )
315 315 return indexformatv0_pack(*e2)
316 316
317 317
318 318 # index ng:
319 319 # 6 bytes: offset
320 320 # 2 bytes: flags
321 321 # 4 bytes: compressed length
322 322 # 4 bytes: uncompressed length
323 323 # 4 bytes: base rev
324 324 # 4 bytes: link rev
325 325 # 4 bytes: parent 1 rev
326 326 # 4 bytes: parent 2 rev
327 327 # 32 bytes: nodeid
328 328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 329 indexformatng_pack = indexformatng.pack
330 330 versionformat = struct.Struct(b">I")
331 331 versionformat_pack = versionformat.pack
332 332 versionformat_unpack = versionformat.unpack
333 333
334 334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 335 # signed integer)
336 336 _maxentrysize = 0x7FFFFFFF
337 337
338 338
339 339 class revlogio(object):
340 340 def __init__(self):
341 341 self.size = indexformatng.size
342 342
343 343 def parseindex(self, data, inline):
344 344 # call the C implementation to parse the index data
345 345 index, cache = parsers.parse_index2(data, inline)
346 346 return index, cache
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 p = indexformatng_pack(*entry)
350 350 if rev == 0:
351 351 p = versionformat_pack(version) + p[4:]
352 352 return p
353 353
354 354
355 355 NodemapRevlogIO = None
356 356
357 357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358 358
359 359 class NodemapRevlogIO(revlogio):
360 360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361 361
362 362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 363 """
364 364
365 365 def parseindex(self, data, inline):
366 366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 367 return index, cache
368 368
369 369
370 370 class rustrevlogio(revlogio):
371 371 def parseindex(self, data, inline):
372 372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 373 return rustrevlog.MixedIndex(index), cache
374 374
375 375
376 376 class revlog(object):
377 377 """
378 378 the underlying revision storage object
379 379
380 380 A revlog consists of two parts, an index and the revision data.
381 381
382 382 The index is a file with a fixed record size containing
383 383 information on each revision, including its nodeid (hash), the
384 384 nodeids of its parents, the position and offset of its data within
385 385 the data file, and the revision it's based on. Finally, each entry
386 386 contains a linkrev entry that can serve as a pointer to external
387 387 data.
388 388
389 389 The revision data itself is a linear collection of data chunks.
390 390 Each chunk represents a revision and is usually represented as a
391 391 delta against the previous chunk. To bound lookup time, runs of
392 392 deltas are limited to about 2 times the length of the original
393 393 version data. This makes retrieval of a version proportional to
394 394 its size, or O(1) relative to the number of revisions.
395 395
396 396 Both pieces of the revlog are written to in an append-only
397 397 fashion, which means we never need to rewrite a file to insert or
398 398 remove data, and can use some simple techniques to avoid the need
399 399 for locking while reading.
400 400
401 401 If checkambig, indexfile is opened with checkambig=True at
402 402 writing, to avoid file stat ambiguity.
403 403
404 404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 405 index will be mmapped rather than read if it is larger than the
406 406 configured threshold.
407 407
408 408 If censorable is True, the revlog can have censored revisions.
409 409
410 410 If `upperboundcomp` is not None, this is the expected maximal gain from
411 411 compression for the data content.
412 412 """
413 413
414 414 _flagserrorclass = error.RevlogError
415 415
416 416 def __init__(
417 417 self,
418 418 opener,
419 419 indexfile,
420 420 datafile=None,
421 421 checkambig=False,
422 422 mmaplargeindex=False,
423 423 censorable=False,
424 424 upperboundcomp=None,
425 425 persistentnodemap=False,
426 426 ):
427 427 """
428 428 create a revlog object
429 429
430 430 opener is a function that abstracts the file opening operation
431 431 and can be used to implement COW semantics or the like.
432 432
433 433 """
434 434 self.upperboundcomp = upperboundcomp
435 435 self.indexfile = indexfile
436 436 self.datafile = datafile or (indexfile[:-2] + b".d")
437 437 self.nodemap_file = None
438 438 if persistentnodemap:
439 439 self.nodemap_file = indexfile[:-2] + b".n"
440 440
441 441 self.opener = opener
442 442 # When True, indexfile is opened with checkambig=True at writing, to
443 443 # avoid file stat ambiguity.
444 444 self._checkambig = checkambig
445 445 self._mmaplargeindex = mmaplargeindex
446 446 self._censorable = censorable
447 447 # 3-tuple of (node, rev, text) for a raw revision.
448 448 self._revisioncache = None
449 449 # Maps rev to chain base rev.
450 450 self._chainbasecache = util.lrucachedict(100)
451 451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
452 452 self._chunkcache = (0, b'')
453 453 # How much data to read and cache into the raw revlog data cache.
454 454 self._chunkcachesize = 65536
455 455 self._maxchainlen = None
456 456 self._deltabothparents = True
457 457 self.index = None
458 458 self._nodemap_docket = None
459 459 # Mapping of partial identifiers to full nodes.
460 460 self._pcache = {}
461 461 # Mapping of revision integer to full node.
462 462 self._compengine = b'zlib'
463 463 self._compengineopts = {}
464 464 self._maxdeltachainspan = -1
465 465 self._withsparseread = False
466 466 self._sparserevlog = False
467 467 self._srdensitythreshold = 0.50
468 468 self._srmingapsize = 262144
469 469
470 470 # Make copy of flag processors so each revlog instance can support
471 471 # custom flags.
472 472 self._flagprocessors = dict(flagutil.flagprocessors)
473 473
474 474 # 2-tuple of file handles being used for active writing.
475 475 self._writinghandles = None
476 476
477 477 self._loadindex()
478 478
479 479 def _loadindex(self):
480 480 mmapindexthreshold = None
481 481 opts = self.opener.options
482 482
483 483 if b'revlogv2' in opts:
484 484 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
485 485 elif b'revlogv1' in opts:
486 486 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
487 487 if b'generaldelta' in opts:
488 488 newversionflags |= FLAG_GENERALDELTA
489 489 elif b'revlogv0' in self.opener.options:
490 490 newversionflags = REVLOGV0
491 491 else:
492 492 newversionflags = REVLOG_DEFAULT_VERSION
493 493
494 494 if b'chunkcachesize' in opts:
495 495 self._chunkcachesize = opts[b'chunkcachesize']
496 496 if b'maxchainlen' in opts:
497 497 self._maxchainlen = opts[b'maxchainlen']
498 498 if b'deltabothparents' in opts:
499 499 self._deltabothparents = opts[b'deltabothparents']
500 500 self._lazydelta = bool(opts.get(b'lazydelta', True))
501 501 self._lazydeltabase = False
502 502 if self._lazydelta:
503 503 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
504 504 if b'compengine' in opts:
505 505 self._compengine = opts[b'compengine']
506 506 if b'zlib.level' in opts:
507 507 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
508 508 if b'zstd.level' in opts:
509 509 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
510 510 if b'maxdeltachainspan' in opts:
511 511 self._maxdeltachainspan = opts[b'maxdeltachainspan']
512 512 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
513 513 mmapindexthreshold = opts[b'mmapindexthreshold']
514 514 self.hassidedata = bool(opts.get(b'side-data', False))
515 515 if self.hassidedata:
516 516 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
517 517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
518 518 withsparseread = bool(opts.get(b'with-sparse-read', False))
519 519 # sparse-revlog forces sparse-read
520 520 self._withsparseread = self._sparserevlog or withsparseread
521 521 if b'sparse-read-density-threshold' in opts:
522 522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
523 523 if b'sparse-read-min-gap-size' in opts:
524 524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
525 525 if opts.get(b'enableellipsis'):
526 526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
527 527
528 528 # revlog v0 doesn't have flag processors
529 529 for flag, processor in pycompat.iteritems(
530 530 opts.get(b'flagprocessors', {})
531 531 ):
532 532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
533 533
534 534 if self._chunkcachesize <= 0:
535 535 raise error.RevlogError(
536 536 _(b'revlog chunk cache size %r is not greater than 0')
537 537 % self._chunkcachesize
538 538 )
539 539 elif self._chunkcachesize & (self._chunkcachesize - 1):
540 540 raise error.RevlogError(
541 541 _(b'revlog chunk cache size %r is not a power of 2')
542 542 % self._chunkcachesize
543 543 )
544 544
545 545 indexdata = b''
546 546 self._initempty = True
547 547 try:
548 548 with self._indexfp() as f:
549 549 if (
550 550 mmapindexthreshold is not None
551 551 and self.opener.fstat(f).st_size >= mmapindexthreshold
552 552 ):
553 553 # TODO: should .close() to release resources without
554 554 # relying on Python GC
555 555 indexdata = util.buffer(util.mmapread(f))
556 556 else:
557 557 indexdata = f.read()
558 558 if len(indexdata) > 0:
559 559 versionflags = versionformat_unpack(indexdata[:4])[0]
560 560 self._initempty = False
561 561 else:
562 562 versionflags = newversionflags
563 563 except IOError as inst:
564 564 if inst.errno != errno.ENOENT:
565 565 raise
566 566
567 567 versionflags = newversionflags
568 568
569 569 self.version = versionflags
570 570
571 571 flags = versionflags & ~0xFFFF
572 572 fmt = versionflags & 0xFFFF
573 573
574 574 if fmt == REVLOGV0:
575 575 if flags:
576 576 raise error.RevlogError(
577 577 _(b'unknown flags (%#04x) in version %d revlog %s')
578 578 % (flags >> 16, fmt, self.indexfile)
579 579 )
580 580
581 581 self._inline = False
582 582 self._generaldelta = False
583 583
584 584 elif fmt == REVLOGV1:
585 585 if flags & ~REVLOGV1_FLAGS:
586 586 raise error.RevlogError(
587 587 _(b'unknown flags (%#04x) in version %d revlog %s')
588 588 % (flags >> 16, fmt, self.indexfile)
589 589 )
590 590
591 591 self._inline = versionflags & FLAG_INLINE_DATA
592 592 self._generaldelta = versionflags & FLAG_GENERALDELTA
593 593
594 594 elif fmt == REVLOGV2:
595 595 if flags & ~REVLOGV2_FLAGS:
596 596 raise error.RevlogError(
597 597 _(b'unknown flags (%#04x) in version %d revlog %s')
598 598 % (flags >> 16, fmt, self.indexfile)
599 599 )
600 600
601 601 self._inline = versionflags & FLAG_INLINE_DATA
602 602 # generaldelta implied by version 2 revlogs.
603 603 self._generaldelta = True
604 604
605 605 else:
606 606 raise error.RevlogError(
607 607 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
608 608 )
609 609 # sparse-revlog can't be on without general-delta (issue6056)
610 610 if not self._generaldelta:
611 611 self._sparserevlog = False
612 612
613 613 self._storedeltachains = True
614 614
615 615 devel_nodemap = (
616 616 self.nodemap_file
617 617 and opts.get(b'devel-force-nodemap', False)
618 618 and NodemapRevlogIO is not None
619 619 )
620 620
621 use_rust_index = False
622 if rustrevlog is not None:
623 if self.nodemap_file is not None:
624 use_rust_index = True
625 else:
626 use_rust_index = self.opener.options.get(b'rust.index')
627
621 628 self._io = revlogio()
622 629 if self.version == REVLOGV0:
623 630 self._io = revlogoldio()
624 631 elif devel_nodemap:
625 632 self._io = NodemapRevlogIO()
626 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
633 elif use_rust_index:
627 634 self._io = rustrevlogio()
628 635 try:
629 636 d = self._io.parseindex(indexdata, self._inline)
630 637 index, _chunkcache = d
631 638 use_nodemap = (
632 639 not self._inline
633 640 and self.nodemap_file is not None
634 641 and util.safehasattr(index, 'update_nodemap_data')
635 642 )
636 643 if use_nodemap:
637 644 nodemap_data = nodemaputil.persisted_data(self)
638 645 if nodemap_data is not None:
639 646 self._nodemap_docket = nodemap_data[0]
640 647 index.update_nodemap_data(*nodemap_data)
641 648 except (ValueError, IndexError):
642 649 raise error.RevlogError(
643 650 _(b"index %s is corrupted") % self.indexfile
644 651 )
645 652 self.index, self._chunkcache = d
646 653 if not self._chunkcache:
647 654 self._chunkclear()
648 655 # revnum -> (chain-length, sum-delta-length)
649 656 self._chaininfocache = {}
650 657 # revlog header -> revlog compressor
651 658 self._decompressors = {}
652 659
653 660 @util.propertycache
654 661 def _compressor(self):
655 662 engine = util.compengines[self._compengine]
656 663 return engine.revlogcompressor(self._compengineopts)
657 664
658 665 def _indexfp(self, mode=b'r'):
659 666 """file object for the revlog's index file"""
660 667 args = {'mode': mode}
661 668 if mode != b'r':
662 669 args['checkambig'] = self._checkambig
663 670 if mode == b'w':
664 671 args['atomictemp'] = True
665 672 return self.opener(self.indexfile, **args)
666 673
667 674 def _datafp(self, mode=b'r'):
668 675 """file object for the revlog's data file"""
669 676 return self.opener(self.datafile, mode=mode)
670 677
671 678 @contextlib.contextmanager
672 679 def _datareadfp(self, existingfp=None):
673 680 """file object suitable to read data"""
674 681 # Use explicit file handle, if given.
675 682 if existingfp is not None:
676 683 yield existingfp
677 684
678 685 # Use a file handle being actively used for writes, if available.
679 686 # There is some danger to doing this because reads will seek the
680 687 # file. However, _writeentry() performs a SEEK_END before all writes,
681 688 # so we should be safe.
682 689 elif self._writinghandles:
683 690 if self._inline:
684 691 yield self._writinghandles[0]
685 692 else:
686 693 yield self._writinghandles[1]
687 694
688 695 # Otherwise open a new file handle.
689 696 else:
690 697 if self._inline:
691 698 func = self._indexfp
692 699 else:
693 700 func = self._datafp
694 701 with func() as fp:
695 702 yield fp
696 703
697 704 def tiprev(self):
698 705 return len(self.index) - 1
699 706
700 707 def tip(self):
701 708 return self.node(self.tiprev())
702 709
703 710 def __contains__(self, rev):
704 711 return 0 <= rev < len(self)
705 712
706 713 def __len__(self):
707 714 return len(self.index)
708 715
709 716 def __iter__(self):
710 717 return iter(pycompat.xrange(len(self)))
711 718
712 719 def revs(self, start=0, stop=None):
713 720 """iterate over all rev in this revlog (from start to stop)"""
714 721 return storageutil.iterrevs(len(self), start=start, stop=stop)
715 722
716 723 @property
717 724 def nodemap(self):
718 725 msg = (
719 726 b"revlog.nodemap is deprecated, "
720 727 b"use revlog.index.[has_node|rev|get_rev]"
721 728 )
722 729 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
723 730 return self.index.nodemap
724 731
725 732 @property
726 733 def _nodecache(self):
727 734 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
728 735 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
729 736 return self.index.nodemap
730 737
731 738 def hasnode(self, node):
732 739 try:
733 740 self.rev(node)
734 741 return True
735 742 except KeyError:
736 743 return False
737 744
738 745 def candelta(self, baserev, rev):
739 746 """whether two revisions (baserev, rev) can be delta-ed or not"""
740 747 # Disable delta if either rev requires a content-changing flag
741 748 # processor (ex. LFS). This is because such flag processor can alter
742 749 # the rawtext content that the delta will be based on, and two clients
743 750 # could have a same revlog node with different flags (i.e. different
744 751 # rawtext contents) and the delta could be incompatible.
745 752 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
746 753 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
747 754 ):
748 755 return False
749 756 return True
750 757
751 758 def update_caches(self, transaction):
752 759 if self.nodemap_file is not None:
753 760 if transaction is None:
754 761 nodemaputil.update_persistent_nodemap(self)
755 762 else:
756 763 nodemaputil.setup_persistent_nodemap(transaction, self)
757 764
758 765 def clearcaches(self):
759 766 self._revisioncache = None
760 767 self._chainbasecache.clear()
761 768 self._chunkcache = (0, b'')
762 769 self._pcache = {}
763 770 self._nodemap_docket = None
764 771 self.index.clearcaches()
765 772 # The python code is the one responsible for validating the docket, we
766 773 # end up having to refresh it here.
767 774 use_nodemap = (
768 775 not self._inline
769 776 and self.nodemap_file is not None
770 777 and util.safehasattr(self.index, 'update_nodemap_data')
771 778 )
772 779 if use_nodemap:
773 780 nodemap_data = nodemaputil.persisted_data(self)
774 781 if nodemap_data is not None:
775 782 self._nodemap_docket = nodemap_data[0]
776 783 self.index.update_nodemap_data(*nodemap_data)
777 784
778 785 def rev(self, node):
779 786 try:
780 787 return self.index.rev(node)
781 788 except TypeError:
782 789 raise
783 790 except error.RevlogError:
784 791 # parsers.c radix tree lookup failed
785 792 if node == wdirid or node in wdirfilenodeids:
786 793 raise error.WdirUnsupported
787 794 raise error.LookupError(node, self.indexfile, _(b'no node'))
788 795
789 796 # Accessors for index entries.
790 797
791 798 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
792 799 # are flags.
793 800 def start(self, rev):
794 801 return int(self.index[rev][0] >> 16)
795 802
796 803 def flags(self, rev):
797 804 return self.index[rev][0] & 0xFFFF
798 805
799 806 def length(self, rev):
800 807 return self.index[rev][1]
801 808
802 809 def rawsize(self, rev):
803 810 """return the length of the uncompressed text for a given revision"""
804 811 l = self.index[rev][2]
805 812 if l >= 0:
806 813 return l
807 814
808 815 t = self.rawdata(rev)
809 816 return len(t)
810 817
811 818 def size(self, rev):
812 819 """length of non-raw text (processed by a "read" flag processor)"""
813 820 # fast path: if no "read" flag processor could change the content,
814 821 # size is rawsize. note: ELLIPSIS is known to not change the content.
815 822 flags = self.flags(rev)
816 823 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
817 824 return self.rawsize(rev)
818 825
819 826 return len(self.revision(rev, raw=False))
820 827
821 828 def chainbase(self, rev):
822 829 base = self._chainbasecache.get(rev)
823 830 if base is not None:
824 831 return base
825 832
826 833 index = self.index
827 834 iterrev = rev
828 835 base = index[iterrev][3]
829 836 while base != iterrev:
830 837 iterrev = base
831 838 base = index[iterrev][3]
832 839
833 840 self._chainbasecache[rev] = base
834 841 return base
835 842
836 843 def linkrev(self, rev):
837 844 return self.index[rev][4]
838 845
839 846 def parentrevs(self, rev):
840 847 try:
841 848 entry = self.index[rev]
842 849 except IndexError:
843 850 if rev == wdirrev:
844 851 raise error.WdirUnsupported
845 852 raise
846 853
847 854 return entry[5], entry[6]
848 855
849 856 # fast parentrevs(rev) where rev isn't filtered
850 857 _uncheckedparentrevs = parentrevs
851 858
852 859 def node(self, rev):
853 860 try:
854 861 return self.index[rev][7]
855 862 except IndexError:
856 863 if rev == wdirrev:
857 864 raise error.WdirUnsupported
858 865 raise
859 866
860 867 # Derived from index values.
861 868
862 869 def end(self, rev):
863 870 return self.start(rev) + self.length(rev)
864 871
865 872 def parents(self, node):
866 873 i = self.index
867 874 d = i[self.rev(node)]
868 875 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
869 876
870 877 def chainlen(self, rev):
871 878 return self._chaininfo(rev)[0]
872 879
873 880 def _chaininfo(self, rev):
874 881 chaininfocache = self._chaininfocache
875 882 if rev in chaininfocache:
876 883 return chaininfocache[rev]
877 884 index = self.index
878 885 generaldelta = self._generaldelta
879 886 iterrev = rev
880 887 e = index[iterrev]
881 888 clen = 0
882 889 compresseddeltalen = 0
883 890 while iterrev != e[3]:
884 891 clen += 1
885 892 compresseddeltalen += e[1]
886 893 if generaldelta:
887 894 iterrev = e[3]
888 895 else:
889 896 iterrev -= 1
890 897 if iterrev in chaininfocache:
891 898 t = chaininfocache[iterrev]
892 899 clen += t[0]
893 900 compresseddeltalen += t[1]
894 901 break
895 902 e = index[iterrev]
896 903 else:
897 904 # Add text length of base since decompressing that also takes
898 905 # work. For cache hits the length is already included.
899 906 compresseddeltalen += e[1]
900 907 r = (clen, compresseddeltalen)
901 908 chaininfocache[rev] = r
902 909 return r
903 910
904 911 def _deltachain(self, rev, stoprev=None):
905 912 """Obtain the delta chain for a revision.
906 913
907 914 ``stoprev`` specifies a revision to stop at. If not specified, we
908 915 stop at the base of the chain.
909 916
910 917 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
911 918 revs in ascending order and ``stopped`` is a bool indicating whether
912 919 ``stoprev`` was hit.
913 920 """
914 921 # Try C implementation.
915 922 try:
916 923 return self.index.deltachain(rev, stoprev, self._generaldelta)
917 924 except AttributeError:
918 925 pass
919 926
920 927 chain = []
921 928
922 929 # Alias to prevent attribute lookup in tight loop.
923 930 index = self.index
924 931 generaldelta = self._generaldelta
925 932
926 933 iterrev = rev
927 934 e = index[iterrev]
928 935 while iterrev != e[3] and iterrev != stoprev:
929 936 chain.append(iterrev)
930 937 if generaldelta:
931 938 iterrev = e[3]
932 939 else:
933 940 iterrev -= 1
934 941 e = index[iterrev]
935 942
936 943 if iterrev == stoprev:
937 944 stopped = True
938 945 else:
939 946 chain.append(iterrev)
940 947 stopped = False
941 948
942 949 chain.reverse()
943 950 return chain, stopped
944 951
945 952 def ancestors(self, revs, stoprev=0, inclusive=False):
946 953 """Generate the ancestors of 'revs' in reverse revision order.
947 954 Does not generate revs lower than stoprev.
948 955
949 956 See the documentation for ancestor.lazyancestors for more details."""
950 957
951 958 # first, make sure start revisions aren't filtered
952 959 revs = list(revs)
953 960 checkrev = self.node
954 961 for r in revs:
955 962 checkrev(r)
956 963 # and we're sure ancestors aren't filtered as well
957 964
958 965 if rustancestor is not None:
959 966 lazyancestors = rustancestor.LazyAncestors
960 967 arg = self.index
961 968 else:
962 969 lazyancestors = ancestor.lazyancestors
963 970 arg = self._uncheckedparentrevs
964 971 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
965 972
966 973 def descendants(self, revs):
967 974 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
968 975
969 976 def findcommonmissing(self, common=None, heads=None):
970 977 """Return a tuple of the ancestors of common and the ancestors of heads
971 978 that are not ancestors of common. In revset terminology, we return the
972 979 tuple:
973 980
974 981 ::common, (::heads) - (::common)
975 982
976 983 The list is sorted by revision number, meaning it is
977 984 topologically sorted.
978 985
979 986 'heads' and 'common' are both lists of node IDs. If heads is
980 987 not supplied, uses all of the revlog's heads. If common is not
981 988 supplied, uses nullid."""
982 989 if common is None:
983 990 common = [nullid]
984 991 if heads is None:
985 992 heads = self.heads()
986 993
987 994 common = [self.rev(n) for n in common]
988 995 heads = [self.rev(n) for n in heads]
989 996
990 997 # we want the ancestors, but inclusive
991 998 class lazyset(object):
992 999 def __init__(self, lazyvalues):
993 1000 self.addedvalues = set()
994 1001 self.lazyvalues = lazyvalues
995 1002
996 1003 def __contains__(self, value):
997 1004 return value in self.addedvalues or value in self.lazyvalues
998 1005
999 1006 def __iter__(self):
1000 1007 added = self.addedvalues
1001 1008 for r in added:
1002 1009 yield r
1003 1010 for r in self.lazyvalues:
1004 1011 if not r in added:
1005 1012 yield r
1006 1013
1007 1014 def add(self, value):
1008 1015 self.addedvalues.add(value)
1009 1016
1010 1017 def update(self, values):
1011 1018 self.addedvalues.update(values)
1012 1019
1013 1020 has = lazyset(self.ancestors(common))
1014 1021 has.add(nullrev)
1015 1022 has.update(common)
1016 1023
1017 1024 # take all ancestors from heads that aren't in has
1018 1025 missing = set()
1019 1026 visit = collections.deque(r for r in heads if r not in has)
1020 1027 while visit:
1021 1028 r = visit.popleft()
1022 1029 if r in missing:
1023 1030 continue
1024 1031 else:
1025 1032 missing.add(r)
1026 1033 for p in self.parentrevs(r):
1027 1034 if p not in has:
1028 1035 visit.append(p)
1029 1036 missing = list(missing)
1030 1037 missing.sort()
1031 1038 return has, [self.node(miss) for miss in missing]
1032 1039
1033 1040 def incrementalmissingrevs(self, common=None):
1034 1041 """Return an object that can be used to incrementally compute the
1035 1042 revision numbers of the ancestors of arbitrary sets that are not
1036 1043 ancestors of common. This is an ancestor.incrementalmissingancestors
1037 1044 object.
1038 1045
1039 1046 'common' is a list of revision numbers. If common is not supplied, uses
1040 1047 nullrev.
1041 1048 """
1042 1049 if common is None:
1043 1050 common = [nullrev]
1044 1051
1045 1052 if rustancestor is not None:
1046 1053 return rustancestor.MissingAncestors(self.index, common)
1047 1054 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1048 1055
1049 1056 def findmissingrevs(self, common=None, heads=None):
1050 1057 """Return the revision numbers of the ancestors of heads that
1051 1058 are not ancestors of common.
1052 1059
1053 1060 More specifically, return a list of revision numbers corresponding to
1054 1061 nodes N such that every N satisfies the following constraints:
1055 1062
1056 1063 1. N is an ancestor of some node in 'heads'
1057 1064 2. N is not an ancestor of any node in 'common'
1058 1065
1059 1066 The list is sorted by revision number, meaning it is
1060 1067 topologically sorted.
1061 1068
1062 1069 'heads' and 'common' are both lists of revision numbers. If heads is
1063 1070 not supplied, uses all of the revlog's heads. If common is not
1064 1071 supplied, uses nullid."""
1065 1072 if common is None:
1066 1073 common = [nullrev]
1067 1074 if heads is None:
1068 1075 heads = self.headrevs()
1069 1076
1070 1077 inc = self.incrementalmissingrevs(common=common)
1071 1078 return inc.missingancestors(heads)
1072 1079
1073 1080 def findmissing(self, common=None, heads=None):
1074 1081 """Return the ancestors of heads that are not ancestors of common.
1075 1082
1076 1083 More specifically, return a list of nodes N such that every N
1077 1084 satisfies the following constraints:
1078 1085
1079 1086 1. N is an ancestor of some node in 'heads'
1080 1087 2. N is not an ancestor of any node in 'common'
1081 1088
1082 1089 The list is sorted by revision number, meaning it is
1083 1090 topologically sorted.
1084 1091
1085 1092 'heads' and 'common' are both lists of node IDs. If heads is
1086 1093 not supplied, uses all of the revlog's heads. If common is not
1087 1094 supplied, uses nullid."""
1088 1095 if common is None:
1089 1096 common = [nullid]
1090 1097 if heads is None:
1091 1098 heads = self.heads()
1092 1099
1093 1100 common = [self.rev(n) for n in common]
1094 1101 heads = [self.rev(n) for n in heads]
1095 1102
1096 1103 inc = self.incrementalmissingrevs(common=common)
1097 1104 return [self.node(r) for r in inc.missingancestors(heads)]
1098 1105
1099 1106 def nodesbetween(self, roots=None, heads=None):
1100 1107 """Return a topological path from 'roots' to 'heads'.
1101 1108
1102 1109 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1103 1110 topologically sorted list of all nodes N that satisfy both of
1104 1111 these constraints:
1105 1112
1106 1113 1. N is a descendant of some node in 'roots'
1107 1114 2. N is an ancestor of some node in 'heads'
1108 1115
1109 1116 Every node is considered to be both a descendant and an ancestor
1110 1117 of itself, so every reachable node in 'roots' and 'heads' will be
1111 1118 included in 'nodes'.
1112 1119
1113 1120 'outroots' is the list of reachable nodes in 'roots', i.e., the
1114 1121 subset of 'roots' that is returned in 'nodes'. Likewise,
1115 1122 'outheads' is the subset of 'heads' that is also in 'nodes'.
1116 1123
1117 1124 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1118 1125 unspecified, uses nullid as the only root. If 'heads' is
1119 1126 unspecified, uses list of all of the revlog's heads."""
1120 1127 nonodes = ([], [], [])
1121 1128 if roots is not None:
1122 1129 roots = list(roots)
1123 1130 if not roots:
1124 1131 return nonodes
1125 1132 lowestrev = min([self.rev(n) for n in roots])
1126 1133 else:
1127 1134 roots = [nullid] # Everybody's a descendant of nullid
1128 1135 lowestrev = nullrev
1129 1136 if (lowestrev == nullrev) and (heads is None):
1130 1137 # We want _all_ the nodes!
1131 1138 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1132 1139 if heads is None:
1133 1140 # All nodes are ancestors, so the latest ancestor is the last
1134 1141 # node.
1135 1142 highestrev = len(self) - 1
1136 1143 # Set ancestors to None to signal that every node is an ancestor.
1137 1144 ancestors = None
1138 1145 # Set heads to an empty dictionary for later discovery of heads
1139 1146 heads = {}
1140 1147 else:
1141 1148 heads = list(heads)
1142 1149 if not heads:
1143 1150 return nonodes
1144 1151 ancestors = set()
1145 1152 # Turn heads into a dictionary so we can remove 'fake' heads.
1146 1153 # Also, later we will be using it to filter out the heads we can't
1147 1154 # find from roots.
1148 1155 heads = dict.fromkeys(heads, False)
1149 1156 # Start at the top and keep marking parents until we're done.
1150 1157 nodestotag = set(heads)
1151 1158 # Remember where the top was so we can use it as a limit later.
1152 1159 highestrev = max([self.rev(n) for n in nodestotag])
1153 1160 while nodestotag:
1154 1161 # grab a node to tag
1155 1162 n = nodestotag.pop()
1156 1163 # Never tag nullid
1157 1164 if n == nullid:
1158 1165 continue
1159 1166 # A node's revision number represents its place in a
1160 1167 # topologically sorted list of nodes.
1161 1168 r = self.rev(n)
1162 1169 if r >= lowestrev:
1163 1170 if n not in ancestors:
1164 1171 # If we are possibly a descendant of one of the roots
1165 1172 # and we haven't already been marked as an ancestor
1166 1173 ancestors.add(n) # Mark as ancestor
1167 1174 # Add non-nullid parents to list of nodes to tag.
1168 1175 nodestotag.update(
1169 1176 [p for p in self.parents(n) if p != nullid]
1170 1177 )
1171 1178 elif n in heads: # We've seen it before, is it a fake head?
1172 1179 # So it is, real heads should not be the ancestors of
1173 1180 # any other heads.
1174 1181 heads.pop(n)
1175 1182 if not ancestors:
1176 1183 return nonodes
1177 1184 # Now that we have our set of ancestors, we want to remove any
1178 1185 # roots that are not ancestors.
1179 1186
1180 1187 # If one of the roots was nullid, everything is included anyway.
1181 1188 if lowestrev > nullrev:
1182 1189 # But, since we weren't, let's recompute the lowest rev to not
1183 1190 # include roots that aren't ancestors.
1184 1191
1185 1192 # Filter out roots that aren't ancestors of heads
1186 1193 roots = [root for root in roots if root in ancestors]
1187 1194 # Recompute the lowest revision
1188 1195 if roots:
1189 1196 lowestrev = min([self.rev(root) for root in roots])
1190 1197 else:
1191 1198 # No more roots? Return empty list
1192 1199 return nonodes
1193 1200 else:
1194 1201 # We are descending from nullid, and don't need to care about
1195 1202 # any other roots.
1196 1203 lowestrev = nullrev
1197 1204 roots = [nullid]
1198 1205 # Transform our roots list into a set.
1199 1206 descendants = set(roots)
1200 1207 # Also, keep the original roots so we can filter out roots that aren't
1201 1208 # 'real' roots (i.e. are descended from other roots).
1202 1209 roots = descendants.copy()
1203 1210 # Our topologically sorted list of output nodes.
1204 1211 orderedout = []
1205 1212 # Don't start at nullid since we don't want nullid in our output list,
1206 1213 # and if nullid shows up in descendants, empty parents will look like
1207 1214 # they're descendants.
1208 1215 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1209 1216 n = self.node(r)
1210 1217 isdescendant = False
1211 1218 if lowestrev == nullrev: # Everybody is a descendant of nullid
1212 1219 isdescendant = True
1213 1220 elif n in descendants:
1214 1221 # n is already a descendant
1215 1222 isdescendant = True
1216 1223 # This check only needs to be done here because all the roots
1217 1224 # will start being marked is descendants before the loop.
1218 1225 if n in roots:
1219 1226 # If n was a root, check if it's a 'real' root.
1220 1227 p = tuple(self.parents(n))
1221 1228 # If any of its parents are descendants, it's not a root.
1222 1229 if (p[0] in descendants) or (p[1] in descendants):
1223 1230 roots.remove(n)
1224 1231 else:
1225 1232 p = tuple(self.parents(n))
1226 1233 # A node is a descendant if either of its parents are
1227 1234 # descendants. (We seeded the dependents list with the roots
1228 1235 # up there, remember?)
1229 1236 if (p[0] in descendants) or (p[1] in descendants):
1230 1237 descendants.add(n)
1231 1238 isdescendant = True
1232 1239 if isdescendant and ((ancestors is None) or (n in ancestors)):
1233 1240 # Only include nodes that are both descendants and ancestors.
1234 1241 orderedout.append(n)
1235 1242 if (ancestors is not None) and (n in heads):
1236 1243 # We're trying to figure out which heads are reachable
1237 1244 # from roots.
1238 1245 # Mark this head as having been reached
1239 1246 heads[n] = True
1240 1247 elif ancestors is None:
1241 1248 # Otherwise, we're trying to discover the heads.
1242 1249 # Assume this is a head because if it isn't, the next step
1243 1250 # will eventually remove it.
1244 1251 heads[n] = True
1245 1252 # But, obviously its parents aren't.
1246 1253 for p in self.parents(n):
1247 1254 heads.pop(p, None)
1248 1255 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1249 1256 roots = list(roots)
1250 1257 assert orderedout
1251 1258 assert roots
1252 1259 assert heads
1253 1260 return (orderedout, roots, heads)
1254 1261
1255 1262 def headrevs(self, revs=None):
1256 1263 if revs is None:
1257 1264 try:
1258 1265 return self.index.headrevs()
1259 1266 except AttributeError:
1260 1267 return self._headrevs()
1261 1268 if rustdagop is not None:
1262 1269 return rustdagop.headrevs(self.index, revs)
1263 1270 return dagop.headrevs(revs, self._uncheckedparentrevs)
1264 1271
1265 1272 def computephases(self, roots):
1266 1273 return self.index.computephasesmapsets(roots)
1267 1274
1268 1275 def _headrevs(self):
1269 1276 count = len(self)
1270 1277 if not count:
1271 1278 return [nullrev]
1272 1279 # we won't iter over filtered rev so nobody is a head at start
1273 1280 ishead = [0] * (count + 1)
1274 1281 index = self.index
1275 1282 for r in self:
1276 1283 ishead[r] = 1 # I may be an head
1277 1284 e = index[r]
1278 1285 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1279 1286 return [r for r, val in enumerate(ishead) if val]
1280 1287
1281 1288 def heads(self, start=None, stop=None):
1282 1289 """return the list of all nodes that have no children
1283 1290
1284 1291 if start is specified, only heads that are descendants of
1285 1292 start will be returned
1286 1293 if stop is specified, it will consider all the revs from stop
1287 1294 as if they had no children
1288 1295 """
1289 1296 if start is None and stop is None:
1290 1297 if not len(self):
1291 1298 return [nullid]
1292 1299 return [self.node(r) for r in self.headrevs()]
1293 1300
1294 1301 if start is None:
1295 1302 start = nullrev
1296 1303 else:
1297 1304 start = self.rev(start)
1298 1305
1299 1306 stoprevs = {self.rev(n) for n in stop or []}
1300 1307
1301 1308 revs = dagop.headrevssubset(
1302 1309 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1303 1310 )
1304 1311
1305 1312 return [self.node(rev) for rev in revs]
1306 1313
1307 1314 def children(self, node):
1308 1315 """find the children of a given node"""
1309 1316 c = []
1310 1317 p = self.rev(node)
1311 1318 for r in self.revs(start=p + 1):
1312 1319 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1313 1320 if prevs:
1314 1321 for pr in prevs:
1315 1322 if pr == p:
1316 1323 c.append(self.node(r))
1317 1324 elif p == nullrev:
1318 1325 c.append(self.node(r))
1319 1326 return c
1320 1327
1321 1328 def commonancestorsheads(self, a, b):
1322 1329 """calculate all the heads of the common ancestors of nodes a and b"""
1323 1330 a, b = self.rev(a), self.rev(b)
1324 1331 ancs = self._commonancestorsheads(a, b)
1325 1332 return pycompat.maplist(self.node, ancs)
1326 1333
1327 1334 def _commonancestorsheads(self, *revs):
1328 1335 """calculate all the heads of the common ancestors of revs"""
1329 1336 try:
1330 1337 ancs = self.index.commonancestorsheads(*revs)
1331 1338 except (AttributeError, OverflowError): # C implementation failed
1332 1339 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1333 1340 return ancs
1334 1341
1335 1342 def isancestor(self, a, b):
1336 1343 """return True if node a is an ancestor of node b
1337 1344
1338 1345 A revision is considered an ancestor of itself."""
1339 1346 a, b = self.rev(a), self.rev(b)
1340 1347 return self.isancestorrev(a, b)
1341 1348
1342 1349 def isancestorrev(self, a, b):
1343 1350 """return True if revision a is an ancestor of revision b
1344 1351
1345 1352 A revision is considered an ancestor of itself.
1346 1353
1347 1354 The implementation of this is trivial but the use of
1348 1355 reachableroots is not."""
1349 1356 if a == nullrev:
1350 1357 return True
1351 1358 elif a == b:
1352 1359 return True
1353 1360 elif a > b:
1354 1361 return False
1355 1362 return bool(self.reachableroots(a, [b], [a], includepath=False))
1356 1363
1357 1364 def reachableroots(self, minroot, heads, roots, includepath=False):
1358 1365 """return (heads(::(<roots> and <roots>::<heads>)))
1359 1366
1360 1367 If includepath is True, return (<roots>::<heads>)."""
1361 1368 try:
1362 1369 return self.index.reachableroots2(
1363 1370 minroot, heads, roots, includepath
1364 1371 )
1365 1372 except AttributeError:
1366 1373 return dagop._reachablerootspure(
1367 1374 self.parentrevs, minroot, roots, heads, includepath
1368 1375 )
1369 1376
1370 1377 def ancestor(self, a, b):
1371 1378 """calculate the "best" common ancestor of nodes a and b"""
1372 1379
1373 1380 a, b = self.rev(a), self.rev(b)
1374 1381 try:
1375 1382 ancs = self.index.ancestors(a, b)
1376 1383 except (AttributeError, OverflowError):
1377 1384 ancs = ancestor.ancestors(self.parentrevs, a, b)
1378 1385 if ancs:
1379 1386 # choose a consistent winner when there's a tie
1380 1387 return min(map(self.node, ancs))
1381 1388 return nullid
1382 1389
1383 1390 def _match(self, id):
1384 1391 if isinstance(id, int):
1385 1392 # rev
1386 1393 return self.node(id)
1387 1394 if len(id) == 20:
1388 1395 # possibly a binary node
1389 1396 # odds of a binary node being all hex in ASCII are 1 in 10**25
1390 1397 try:
1391 1398 node = id
1392 1399 self.rev(node) # quick search the index
1393 1400 return node
1394 1401 except error.LookupError:
1395 1402 pass # may be partial hex id
1396 1403 try:
1397 1404 # str(rev)
1398 1405 rev = int(id)
1399 1406 if b"%d" % rev != id:
1400 1407 raise ValueError
1401 1408 if rev < 0:
1402 1409 rev = len(self) + rev
1403 1410 if rev < 0 or rev >= len(self):
1404 1411 raise ValueError
1405 1412 return self.node(rev)
1406 1413 except (ValueError, OverflowError):
1407 1414 pass
1408 1415 if len(id) == 40:
1409 1416 try:
1410 1417 # a full hex nodeid?
1411 1418 node = bin(id)
1412 1419 self.rev(node)
1413 1420 return node
1414 1421 except (TypeError, error.LookupError):
1415 1422 pass
1416 1423
1417 1424 def _partialmatch(self, id):
1418 1425 # we don't care wdirfilenodeids as they should be always full hash
1419 1426 maybewdir = wdirhex.startswith(id)
1420 1427 try:
1421 1428 partial = self.index.partialmatch(id)
1422 1429 if partial and self.hasnode(partial):
1423 1430 if maybewdir:
1424 1431 # single 'ff...' match in radix tree, ambiguous with wdir
1425 1432 raise error.RevlogError
1426 1433 return partial
1427 1434 if maybewdir:
1428 1435 # no 'ff...' match in radix tree, wdir identified
1429 1436 raise error.WdirUnsupported
1430 1437 return None
1431 1438 except error.RevlogError:
1432 1439 # parsers.c radix tree lookup gave multiple matches
1433 1440 # fast path: for unfiltered changelog, radix tree is accurate
1434 1441 if not getattr(self, 'filteredrevs', None):
1435 1442 raise error.AmbiguousPrefixLookupError(
1436 1443 id, self.indexfile, _(b'ambiguous identifier')
1437 1444 )
1438 1445 # fall through to slow path that filters hidden revisions
1439 1446 except (AttributeError, ValueError):
1440 1447 # we are pure python, or key was too short to search radix tree
1441 1448 pass
1442 1449
1443 1450 if id in self._pcache:
1444 1451 return self._pcache[id]
1445 1452
1446 1453 if len(id) <= 40:
1447 1454 try:
1448 1455 # hex(node)[:...]
1449 1456 l = len(id) // 2 # grab an even number of digits
1450 1457 prefix = bin(id[: l * 2])
1451 1458 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1452 1459 nl = [
1453 1460 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1454 1461 ]
1455 1462 if nullhex.startswith(id):
1456 1463 nl.append(nullid)
1457 1464 if len(nl) > 0:
1458 1465 if len(nl) == 1 and not maybewdir:
1459 1466 self._pcache[id] = nl[0]
1460 1467 return nl[0]
1461 1468 raise error.AmbiguousPrefixLookupError(
1462 1469 id, self.indexfile, _(b'ambiguous identifier')
1463 1470 )
1464 1471 if maybewdir:
1465 1472 raise error.WdirUnsupported
1466 1473 return None
1467 1474 except TypeError:
1468 1475 pass
1469 1476
1470 1477 def lookup(self, id):
1471 1478 """locate a node based on:
1472 1479 - revision number or str(revision number)
1473 1480 - nodeid or subset of hex nodeid
1474 1481 """
1475 1482 n = self._match(id)
1476 1483 if n is not None:
1477 1484 return n
1478 1485 n = self._partialmatch(id)
1479 1486 if n:
1480 1487 return n
1481 1488
1482 1489 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1483 1490
1484 1491 def shortest(self, node, minlength=1):
1485 1492 """Find the shortest unambiguous prefix that matches node."""
1486 1493
1487 1494 def isvalid(prefix):
1488 1495 try:
1489 1496 matchednode = self._partialmatch(prefix)
1490 1497 except error.AmbiguousPrefixLookupError:
1491 1498 return False
1492 1499 except error.WdirUnsupported:
1493 1500 # single 'ff...' match
1494 1501 return True
1495 1502 if matchednode is None:
1496 1503 raise error.LookupError(node, self.indexfile, _(b'no node'))
1497 1504 return True
1498 1505
1499 1506 def maybewdir(prefix):
1500 1507 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1501 1508
1502 1509 hexnode = hex(node)
1503 1510
1504 1511 def disambiguate(hexnode, minlength):
1505 1512 """Disambiguate against wdirid."""
1506 1513 for length in range(minlength, 41):
1507 1514 prefix = hexnode[:length]
1508 1515 if not maybewdir(prefix):
1509 1516 return prefix
1510 1517
1511 1518 if not getattr(self, 'filteredrevs', None):
1512 1519 try:
1513 1520 length = max(self.index.shortest(node), minlength)
1514 1521 return disambiguate(hexnode, length)
1515 1522 except error.RevlogError:
1516 1523 if node != wdirid:
1517 1524 raise error.LookupError(node, self.indexfile, _(b'no node'))
1518 1525 except AttributeError:
1519 1526 # Fall through to pure code
1520 1527 pass
1521 1528
1522 1529 if node == wdirid:
1523 1530 for length in range(minlength, 41):
1524 1531 prefix = hexnode[:length]
1525 1532 if isvalid(prefix):
1526 1533 return prefix
1527 1534
1528 1535 for length in range(minlength, 41):
1529 1536 prefix = hexnode[:length]
1530 1537 if isvalid(prefix):
1531 1538 return disambiguate(hexnode, length)
1532 1539
1533 1540 def cmp(self, node, text):
1534 1541 """compare text with a given file revision
1535 1542
1536 1543 returns True if text is different than what is stored.
1537 1544 """
1538 1545 p1, p2 = self.parents(node)
1539 1546 return storageutil.hashrevisionsha1(text, p1, p2) != node
1540 1547
1541 1548 def _cachesegment(self, offset, data):
1542 1549 """Add a segment to the revlog cache.
1543 1550
1544 1551 Accepts an absolute offset and the data that is at that location.
1545 1552 """
1546 1553 o, d = self._chunkcache
1547 1554 # try to add to existing cache
1548 1555 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1549 1556 self._chunkcache = o, d + data
1550 1557 else:
1551 1558 self._chunkcache = offset, data
1552 1559
1553 1560 def _readsegment(self, offset, length, df=None):
1554 1561 """Load a segment of raw data from the revlog.
1555 1562
1556 1563 Accepts an absolute offset, length to read, and an optional existing
1557 1564 file handle to read from.
1558 1565
1559 1566 If an existing file handle is passed, it will be seeked and the
1560 1567 original seek position will NOT be restored.
1561 1568
1562 1569 Returns a str or buffer of raw byte data.
1563 1570
1564 1571 Raises if the requested number of bytes could not be read.
1565 1572 """
1566 1573 # Cache data both forward and backward around the requested
1567 1574 # data, in a fixed size window. This helps speed up operations
1568 1575 # involving reading the revlog backwards.
1569 1576 cachesize = self._chunkcachesize
1570 1577 realoffset = offset & ~(cachesize - 1)
1571 1578 reallength = (
1572 1579 (offset + length + cachesize) & ~(cachesize - 1)
1573 1580 ) - realoffset
1574 1581 with self._datareadfp(df) as df:
1575 1582 df.seek(realoffset)
1576 1583 d = df.read(reallength)
1577 1584
1578 1585 self._cachesegment(realoffset, d)
1579 1586 if offset != realoffset or reallength != length:
1580 1587 startoffset = offset - realoffset
1581 1588 if len(d) - startoffset < length:
1582 1589 raise error.RevlogError(
1583 1590 _(
1584 1591 b'partial read of revlog %s; expected %d bytes from '
1585 1592 b'offset %d, got %d'
1586 1593 )
1587 1594 % (
1588 1595 self.indexfile if self._inline else self.datafile,
1589 1596 length,
1590 1597 realoffset,
1591 1598 len(d) - startoffset,
1592 1599 )
1593 1600 )
1594 1601
1595 1602 return util.buffer(d, startoffset, length)
1596 1603
1597 1604 if len(d) < length:
1598 1605 raise error.RevlogError(
1599 1606 _(
1600 1607 b'partial read of revlog %s; expected %d bytes from offset '
1601 1608 b'%d, got %d'
1602 1609 )
1603 1610 % (
1604 1611 self.indexfile if self._inline else self.datafile,
1605 1612 length,
1606 1613 offset,
1607 1614 len(d),
1608 1615 )
1609 1616 )
1610 1617
1611 1618 return d
1612 1619
1613 1620 def _getsegment(self, offset, length, df=None):
1614 1621 """Obtain a segment of raw data from the revlog.
1615 1622
1616 1623 Accepts an absolute offset, length of bytes to obtain, and an
1617 1624 optional file handle to the already-opened revlog. If the file
1618 1625 handle is used, it's original seek position will not be preserved.
1619 1626
1620 1627 Requests for data may be returned from a cache.
1621 1628
1622 1629 Returns a str or a buffer instance of raw byte data.
1623 1630 """
1624 1631 o, d = self._chunkcache
1625 1632 l = len(d)
1626 1633
1627 1634 # is it in the cache?
1628 1635 cachestart = offset - o
1629 1636 cacheend = cachestart + length
1630 1637 if cachestart >= 0 and cacheend <= l:
1631 1638 if cachestart == 0 and cacheend == l:
1632 1639 return d # avoid a copy
1633 1640 return util.buffer(d, cachestart, cacheend - cachestart)
1634 1641
1635 1642 return self._readsegment(offset, length, df=df)
1636 1643
1637 1644 def _getsegmentforrevs(self, startrev, endrev, df=None):
1638 1645 """Obtain a segment of raw data corresponding to a range of revisions.
1639 1646
1640 1647 Accepts the start and end revisions and an optional already-open
1641 1648 file handle to be used for reading. If the file handle is read, its
1642 1649 seek position will not be preserved.
1643 1650
1644 1651 Requests for data may be satisfied by a cache.
1645 1652
1646 1653 Returns a 2-tuple of (offset, data) for the requested range of
1647 1654 revisions. Offset is the integer offset from the beginning of the
1648 1655 revlog and data is a str or buffer of the raw byte data.
1649 1656
1650 1657 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1651 1658 to determine where each revision's data begins and ends.
1652 1659 """
1653 1660 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1654 1661 # (functions are expensive).
1655 1662 index = self.index
1656 1663 istart = index[startrev]
1657 1664 start = int(istart[0] >> 16)
1658 1665 if startrev == endrev:
1659 1666 end = start + istart[1]
1660 1667 else:
1661 1668 iend = index[endrev]
1662 1669 end = int(iend[0] >> 16) + iend[1]
1663 1670
1664 1671 if self._inline:
1665 1672 start += (startrev + 1) * self._io.size
1666 1673 end += (endrev + 1) * self._io.size
1667 1674 length = end - start
1668 1675
1669 1676 return start, self._getsegment(start, length, df=df)
1670 1677
1671 1678 def _chunk(self, rev, df=None):
1672 1679 """Obtain a single decompressed chunk for a revision.
1673 1680
1674 1681 Accepts an integer revision and an optional already-open file handle
1675 1682 to be used for reading. If used, the seek position of the file will not
1676 1683 be preserved.
1677 1684
1678 1685 Returns a str holding uncompressed data for the requested revision.
1679 1686 """
1680 1687 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1681 1688
1682 1689 def _chunks(self, revs, df=None, targetsize=None):
1683 1690 """Obtain decompressed chunks for the specified revisions.
1684 1691
1685 1692 Accepts an iterable of numeric revisions that are assumed to be in
1686 1693 ascending order. Also accepts an optional already-open file handle
1687 1694 to be used for reading. If used, the seek position of the file will
1688 1695 not be preserved.
1689 1696
1690 1697 This function is similar to calling ``self._chunk()`` multiple times,
1691 1698 but is faster.
1692 1699
1693 1700 Returns a list with decompressed data for each requested revision.
1694 1701 """
1695 1702 if not revs:
1696 1703 return []
1697 1704 start = self.start
1698 1705 length = self.length
1699 1706 inline = self._inline
1700 1707 iosize = self._io.size
1701 1708 buffer = util.buffer
1702 1709
1703 1710 l = []
1704 1711 ladd = l.append
1705 1712
1706 1713 if not self._withsparseread:
1707 1714 slicedchunks = (revs,)
1708 1715 else:
1709 1716 slicedchunks = deltautil.slicechunk(
1710 1717 self, revs, targetsize=targetsize
1711 1718 )
1712 1719
1713 1720 for revschunk in slicedchunks:
1714 1721 firstrev = revschunk[0]
1715 1722 # Skip trailing revisions with empty diff
1716 1723 for lastrev in revschunk[::-1]:
1717 1724 if length(lastrev) != 0:
1718 1725 break
1719 1726
1720 1727 try:
1721 1728 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1722 1729 except OverflowError:
1723 1730 # issue4215 - we can't cache a run of chunks greater than
1724 1731 # 2G on Windows
1725 1732 return [self._chunk(rev, df=df) for rev in revschunk]
1726 1733
1727 1734 decomp = self.decompress
1728 1735 for rev in revschunk:
1729 1736 chunkstart = start(rev)
1730 1737 if inline:
1731 1738 chunkstart += (rev + 1) * iosize
1732 1739 chunklength = length(rev)
1733 1740 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1734 1741
1735 1742 return l
1736 1743
1737 1744 def _chunkclear(self):
1738 1745 """Clear the raw chunk cache."""
1739 1746 self._chunkcache = (0, b'')
1740 1747
1741 1748 def deltaparent(self, rev):
1742 1749 """return deltaparent of the given revision"""
1743 1750 base = self.index[rev][3]
1744 1751 if base == rev:
1745 1752 return nullrev
1746 1753 elif self._generaldelta:
1747 1754 return base
1748 1755 else:
1749 1756 return rev - 1
1750 1757
1751 1758 def issnapshot(self, rev):
1752 1759 """tells whether rev is a snapshot
1753 1760 """
1754 1761 if not self._sparserevlog:
1755 1762 return self.deltaparent(rev) == nullrev
1756 1763 elif util.safehasattr(self.index, b'issnapshot'):
1757 1764 # directly assign the method to cache the testing and access
1758 1765 self.issnapshot = self.index.issnapshot
1759 1766 return self.issnapshot(rev)
1760 1767 if rev == nullrev:
1761 1768 return True
1762 1769 entry = self.index[rev]
1763 1770 base = entry[3]
1764 1771 if base == rev:
1765 1772 return True
1766 1773 if base == nullrev:
1767 1774 return True
1768 1775 p1 = entry[5]
1769 1776 p2 = entry[6]
1770 1777 if base == p1 or base == p2:
1771 1778 return False
1772 1779 return self.issnapshot(base)
1773 1780
1774 1781 def snapshotdepth(self, rev):
1775 1782 """number of snapshot in the chain before this one"""
1776 1783 if not self.issnapshot(rev):
1777 1784 raise error.ProgrammingError(b'revision %d not a snapshot')
1778 1785 return len(self._deltachain(rev)[0]) - 1
1779 1786
1780 1787 def revdiff(self, rev1, rev2):
1781 1788 """return or calculate a delta between two revisions
1782 1789
1783 1790 The delta calculated is in binary form and is intended to be written to
1784 1791 revlog data directly. So this function needs raw revision data.
1785 1792 """
1786 1793 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1787 1794 return bytes(self._chunk(rev2))
1788 1795
1789 1796 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1790 1797
1791 1798 def _processflags(self, text, flags, operation, raw=False):
1792 1799 """deprecated entry point to access flag processors"""
1793 1800 msg = b'_processflag(...) use the specialized variant'
1794 1801 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1795 1802 if raw:
1796 1803 return text, flagutil.processflagsraw(self, text, flags)
1797 1804 elif operation == b'read':
1798 1805 return flagutil.processflagsread(self, text, flags)
1799 1806 else: # write operation
1800 1807 return flagutil.processflagswrite(self, text, flags)
1801 1808
1802 1809 def revision(self, nodeorrev, _df=None, raw=False):
1803 1810 """return an uncompressed revision of a given node or revision
1804 1811 number.
1805 1812
1806 1813 _df - an existing file handle to read from. (internal-only)
1807 1814 raw - an optional argument specifying if the revision data is to be
1808 1815 treated as raw data when applying flag transforms. 'raw' should be set
1809 1816 to True when generating changegroups or in debug commands.
1810 1817 """
1811 1818 if raw:
1812 1819 msg = (
1813 1820 b'revlog.revision(..., raw=True) is deprecated, '
1814 1821 b'use revlog.rawdata(...)'
1815 1822 )
1816 1823 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1817 1824 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1818 1825
1819 1826 def sidedata(self, nodeorrev, _df=None):
1820 1827 """a map of extra data related to the changeset but not part of the hash
1821 1828
1822 1829 This function currently return a dictionary. However, more advanced
1823 1830 mapping object will likely be used in the future for a more
1824 1831 efficient/lazy code.
1825 1832 """
1826 1833 return self._revisiondata(nodeorrev, _df)[1]
1827 1834
1828 1835 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1829 1836 # deal with <nodeorrev> argument type
1830 1837 if isinstance(nodeorrev, int):
1831 1838 rev = nodeorrev
1832 1839 node = self.node(rev)
1833 1840 else:
1834 1841 node = nodeorrev
1835 1842 rev = None
1836 1843
1837 1844 # fast path the special `nullid` rev
1838 1845 if node == nullid:
1839 1846 return b"", {}
1840 1847
1841 1848 # ``rawtext`` is the text as stored inside the revlog. Might be the
1842 1849 # revision or might need to be processed to retrieve the revision.
1843 1850 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1844 1851
1845 1852 if raw and validated:
1846 1853 # if we don't want to process the raw text and that raw
1847 1854 # text is cached, we can exit early.
1848 1855 return rawtext, {}
1849 1856 if rev is None:
1850 1857 rev = self.rev(node)
1851 1858 # the revlog's flag for this revision
1852 1859 # (usually alter its state or content)
1853 1860 flags = self.flags(rev)
1854 1861
1855 1862 if validated and flags == REVIDX_DEFAULT_FLAGS:
1856 1863 # no extra flags set, no flag processor runs, text = rawtext
1857 1864 return rawtext, {}
1858 1865
1859 1866 sidedata = {}
1860 1867 if raw:
1861 1868 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1862 1869 text = rawtext
1863 1870 else:
1864 1871 try:
1865 1872 r = flagutil.processflagsread(self, rawtext, flags)
1866 1873 except error.SidedataHashError as exc:
1867 1874 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1868 1875 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1869 1876 raise error.RevlogError(msg)
1870 1877 text, validatehash, sidedata = r
1871 1878 if validatehash:
1872 1879 self.checkhash(text, node, rev=rev)
1873 1880 if not validated:
1874 1881 self._revisioncache = (node, rev, rawtext)
1875 1882
1876 1883 return text, sidedata
1877 1884
1878 1885 def _rawtext(self, node, rev, _df=None):
1879 1886 """return the possibly unvalidated rawtext for a revision
1880 1887
1881 1888 returns (rev, rawtext, validated)
1882 1889 """
1883 1890
1884 1891 # revision in the cache (could be useful to apply delta)
1885 1892 cachedrev = None
1886 1893 # An intermediate text to apply deltas to
1887 1894 basetext = None
1888 1895
1889 1896 # Check if we have the entry in cache
1890 1897 # The cache entry looks like (node, rev, rawtext)
1891 1898 if self._revisioncache:
1892 1899 if self._revisioncache[0] == node:
1893 1900 return (rev, self._revisioncache[2], True)
1894 1901 cachedrev = self._revisioncache[1]
1895 1902
1896 1903 if rev is None:
1897 1904 rev = self.rev(node)
1898 1905
1899 1906 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1900 1907 if stopped:
1901 1908 basetext = self._revisioncache[2]
1902 1909
1903 1910 # drop cache to save memory, the caller is expected to
1904 1911 # update self._revisioncache after validating the text
1905 1912 self._revisioncache = None
1906 1913
1907 1914 targetsize = None
1908 1915 rawsize = self.index[rev][2]
1909 1916 if 0 <= rawsize:
1910 1917 targetsize = 4 * rawsize
1911 1918
1912 1919 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1913 1920 if basetext is None:
1914 1921 basetext = bytes(bins[0])
1915 1922 bins = bins[1:]
1916 1923
1917 1924 rawtext = mdiff.patches(basetext, bins)
1918 1925 del basetext # let us have a chance to free memory early
1919 1926 return (rev, rawtext, False)
1920 1927
1921 1928 def rawdata(self, nodeorrev, _df=None):
1922 1929 """return an uncompressed raw data of a given node or revision number.
1923 1930
1924 1931 _df - an existing file handle to read from. (internal-only)
1925 1932 """
1926 1933 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1927 1934
1928 1935 def hash(self, text, p1, p2):
1929 1936 """Compute a node hash.
1930 1937
1931 1938 Available as a function so that subclasses can replace the hash
1932 1939 as needed.
1933 1940 """
1934 1941 return storageutil.hashrevisionsha1(text, p1, p2)
1935 1942
1936 1943 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1937 1944 """Check node hash integrity.
1938 1945
1939 1946 Available as a function so that subclasses can extend hash mismatch
1940 1947 behaviors as needed.
1941 1948 """
1942 1949 try:
1943 1950 if p1 is None and p2 is None:
1944 1951 p1, p2 = self.parents(node)
1945 1952 if node != self.hash(text, p1, p2):
1946 1953 # Clear the revision cache on hash failure. The revision cache
1947 1954 # only stores the raw revision and clearing the cache does have
1948 1955 # the side-effect that we won't have a cache hit when the raw
1949 1956 # revision data is accessed. But this case should be rare and
1950 1957 # it is extra work to teach the cache about the hash
1951 1958 # verification state.
1952 1959 if self._revisioncache and self._revisioncache[0] == node:
1953 1960 self._revisioncache = None
1954 1961
1955 1962 revornode = rev
1956 1963 if revornode is None:
1957 1964 revornode = templatefilters.short(hex(node))
1958 1965 raise error.RevlogError(
1959 1966 _(b"integrity check failed on %s:%s")
1960 1967 % (self.indexfile, pycompat.bytestr(revornode))
1961 1968 )
1962 1969 except error.RevlogError:
1963 1970 if self._censorable and storageutil.iscensoredtext(text):
1964 1971 raise error.CensoredNodeError(self.indexfile, node, text)
1965 1972 raise
1966 1973
1967 1974 def _enforceinlinesize(self, tr, fp=None):
1968 1975 """Check if the revlog is too big for inline and convert if so.
1969 1976
1970 1977 This should be called after revisions are added to the revlog. If the
1971 1978 revlog has grown too large to be an inline revlog, it will convert it
1972 1979 to use multiple index and data files.
1973 1980 """
1974 1981 tiprev = len(self) - 1
1975 1982 if (
1976 1983 not self._inline
1977 1984 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1978 1985 ):
1979 1986 return
1980 1987
1981 1988 trinfo = tr.find(self.indexfile)
1982 1989 if trinfo is None:
1983 1990 raise error.RevlogError(
1984 1991 _(b"%s not found in the transaction") % self.indexfile
1985 1992 )
1986 1993
1987 1994 trindex = trinfo[2]
1988 1995 if trindex is not None:
1989 1996 dataoff = self.start(trindex)
1990 1997 else:
1991 1998 # revlog was stripped at start of transaction, use all leftover data
1992 1999 trindex = len(self) - 1
1993 2000 dataoff = self.end(tiprev)
1994 2001
1995 2002 tr.add(self.datafile, dataoff)
1996 2003
1997 2004 if fp:
1998 2005 fp.flush()
1999 2006 fp.close()
2000 2007 # We can't use the cached file handle after close(). So prevent
2001 2008 # its usage.
2002 2009 self._writinghandles = None
2003 2010
2004 2011 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2005 2012 for r in self:
2006 2013 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2007 2014
2008 2015 with self._indexfp(b'w') as fp:
2009 2016 self.version &= ~FLAG_INLINE_DATA
2010 2017 self._inline = False
2011 2018 io = self._io
2012 2019 for i in self:
2013 2020 e = io.packentry(self.index[i], self.node, self.version, i)
2014 2021 fp.write(e)
2015 2022
2016 2023 # the temp file replace the real index when we exit the context
2017 2024 # manager
2018 2025
2019 2026 tr.replace(self.indexfile, trindex * self._io.size)
2020 2027 nodemaputil.setup_persistent_nodemap(tr, self)
2021 2028 self._chunkclear()
2022 2029
2023 2030 def _nodeduplicatecallback(self, transaction, node):
2024 2031 """called when trying to add a node already stored.
2025 2032 """
2026 2033
2027 2034 def addrevision(
2028 2035 self,
2029 2036 text,
2030 2037 transaction,
2031 2038 link,
2032 2039 p1,
2033 2040 p2,
2034 2041 cachedelta=None,
2035 2042 node=None,
2036 2043 flags=REVIDX_DEFAULT_FLAGS,
2037 2044 deltacomputer=None,
2038 2045 sidedata=None,
2039 2046 ):
2040 2047 """add a revision to the log
2041 2048
2042 2049 text - the revision data to add
2043 2050 transaction - the transaction object used for rollback
2044 2051 link - the linkrev data to add
2045 2052 p1, p2 - the parent nodeids of the revision
2046 2053 cachedelta - an optional precomputed delta
2047 2054 node - nodeid of revision; typically node is not specified, and it is
2048 2055 computed by default as hash(text, p1, p2), however subclasses might
2049 2056 use different hashing method (and override checkhash() in such case)
2050 2057 flags - the known flags to set on the revision
2051 2058 deltacomputer - an optional deltacomputer instance shared between
2052 2059 multiple calls
2053 2060 """
2054 2061 if link == nullrev:
2055 2062 raise error.RevlogError(
2056 2063 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2057 2064 )
2058 2065
2059 2066 if sidedata is None:
2060 2067 sidedata = {}
2061 2068 flags = flags & ~REVIDX_SIDEDATA
2062 2069 elif not self.hassidedata:
2063 2070 raise error.ProgrammingError(
2064 2071 _(b"trying to add sidedata to a revlog who don't support them")
2065 2072 )
2066 2073 else:
2067 2074 flags |= REVIDX_SIDEDATA
2068 2075
2069 2076 if flags:
2070 2077 node = node or self.hash(text, p1, p2)
2071 2078
2072 2079 rawtext, validatehash = flagutil.processflagswrite(
2073 2080 self, text, flags, sidedata=sidedata
2074 2081 )
2075 2082
2076 2083 # If the flag processor modifies the revision data, ignore any provided
2077 2084 # cachedelta.
2078 2085 if rawtext != text:
2079 2086 cachedelta = None
2080 2087
2081 2088 if len(rawtext) > _maxentrysize:
2082 2089 raise error.RevlogError(
2083 2090 _(
2084 2091 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2085 2092 )
2086 2093 % (self.indexfile, len(rawtext))
2087 2094 )
2088 2095
2089 2096 node = node or self.hash(rawtext, p1, p2)
2090 2097 if self.index.has_node(node):
2091 2098 return node
2092 2099
2093 2100 if validatehash:
2094 2101 self.checkhash(rawtext, node, p1=p1, p2=p2)
2095 2102
2096 2103 return self.addrawrevision(
2097 2104 rawtext,
2098 2105 transaction,
2099 2106 link,
2100 2107 p1,
2101 2108 p2,
2102 2109 node,
2103 2110 flags,
2104 2111 cachedelta=cachedelta,
2105 2112 deltacomputer=deltacomputer,
2106 2113 )
2107 2114
2108 2115 def addrawrevision(
2109 2116 self,
2110 2117 rawtext,
2111 2118 transaction,
2112 2119 link,
2113 2120 p1,
2114 2121 p2,
2115 2122 node,
2116 2123 flags,
2117 2124 cachedelta=None,
2118 2125 deltacomputer=None,
2119 2126 ):
2120 2127 """add a raw revision with known flags, node and parents
2121 2128 useful when reusing a revision not stored in this revlog (ex: received
2122 2129 over wire, or read from an external bundle).
2123 2130 """
2124 2131 dfh = None
2125 2132 if not self._inline:
2126 2133 dfh = self._datafp(b"a+")
2127 2134 ifh = self._indexfp(b"a+")
2128 2135 try:
2129 2136 return self._addrevision(
2130 2137 node,
2131 2138 rawtext,
2132 2139 transaction,
2133 2140 link,
2134 2141 p1,
2135 2142 p2,
2136 2143 flags,
2137 2144 cachedelta,
2138 2145 ifh,
2139 2146 dfh,
2140 2147 deltacomputer=deltacomputer,
2141 2148 )
2142 2149 finally:
2143 2150 if dfh:
2144 2151 dfh.close()
2145 2152 ifh.close()
2146 2153
2147 2154 def compress(self, data):
2148 2155 """Generate a possibly-compressed representation of data."""
2149 2156 if not data:
2150 2157 return b'', data
2151 2158
2152 2159 compressed = self._compressor.compress(data)
2153 2160
2154 2161 if compressed:
2155 2162 # The revlog compressor added the header in the returned data.
2156 2163 return b'', compressed
2157 2164
2158 2165 if data[0:1] == b'\0':
2159 2166 return b'', data
2160 2167 return b'u', data
2161 2168
2162 2169 def decompress(self, data):
2163 2170 """Decompress a revlog chunk.
2164 2171
2165 2172 The chunk is expected to begin with a header identifying the
2166 2173 format type so it can be routed to an appropriate decompressor.
2167 2174 """
2168 2175 if not data:
2169 2176 return data
2170 2177
2171 2178 # Revlogs are read much more frequently than they are written and many
2172 2179 # chunks only take microseconds to decompress, so performance is
2173 2180 # important here.
2174 2181 #
2175 2182 # We can make a few assumptions about revlogs:
2176 2183 #
2177 2184 # 1) the majority of chunks will be compressed (as opposed to inline
2178 2185 # raw data).
2179 2186 # 2) decompressing *any* data will likely by at least 10x slower than
2180 2187 # returning raw inline data.
2181 2188 # 3) we want to prioritize common and officially supported compression
2182 2189 # engines
2183 2190 #
2184 2191 # It follows that we want to optimize for "decompress compressed data
2185 2192 # when encoded with common and officially supported compression engines"
2186 2193 # case over "raw data" and "data encoded by less common or non-official
2187 2194 # compression engines." That is why we have the inline lookup first
2188 2195 # followed by the compengines lookup.
2189 2196 #
2190 2197 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2191 2198 # compressed chunks. And this matters for changelog and manifest reads.
2192 2199 t = data[0:1]
2193 2200
2194 2201 if t == b'x':
2195 2202 try:
2196 2203 return _zlibdecompress(data)
2197 2204 except zlib.error as e:
2198 2205 raise error.RevlogError(
2199 2206 _(b'revlog decompress error: %s')
2200 2207 % stringutil.forcebytestr(e)
2201 2208 )
2202 2209 # '\0' is more common than 'u' so it goes first.
2203 2210 elif t == b'\0':
2204 2211 return data
2205 2212 elif t == b'u':
2206 2213 return util.buffer(data, 1)
2207 2214
2208 2215 try:
2209 2216 compressor = self._decompressors[t]
2210 2217 except KeyError:
2211 2218 try:
2212 2219 engine = util.compengines.forrevlogheader(t)
2213 2220 compressor = engine.revlogcompressor(self._compengineopts)
2214 2221 self._decompressors[t] = compressor
2215 2222 except KeyError:
2216 2223 raise error.RevlogError(_(b'unknown compression type %r') % t)
2217 2224
2218 2225 return compressor.decompress(data)
2219 2226
2220 2227 def _addrevision(
2221 2228 self,
2222 2229 node,
2223 2230 rawtext,
2224 2231 transaction,
2225 2232 link,
2226 2233 p1,
2227 2234 p2,
2228 2235 flags,
2229 2236 cachedelta,
2230 2237 ifh,
2231 2238 dfh,
2232 2239 alwayscache=False,
2233 2240 deltacomputer=None,
2234 2241 ):
2235 2242 """internal function to add revisions to the log
2236 2243
2237 2244 see addrevision for argument descriptions.
2238 2245
2239 2246 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2240 2247
2241 2248 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2242 2249 be used.
2243 2250
2244 2251 invariants:
2245 2252 - rawtext is optional (can be None); if not set, cachedelta must be set.
2246 2253 if both are set, they must correspond to each other.
2247 2254 """
2248 2255 if node == nullid:
2249 2256 raise error.RevlogError(
2250 2257 _(b"%s: attempt to add null revision") % self.indexfile
2251 2258 )
2252 2259 if node == wdirid or node in wdirfilenodeids:
2253 2260 raise error.RevlogError(
2254 2261 _(b"%s: attempt to add wdir revision") % self.indexfile
2255 2262 )
2256 2263
2257 2264 if self._inline:
2258 2265 fh = ifh
2259 2266 else:
2260 2267 fh = dfh
2261 2268
2262 2269 btext = [rawtext]
2263 2270
2264 2271 curr = len(self)
2265 2272 prev = curr - 1
2266 2273 offset = self.end(prev)
2267 2274 p1r, p2r = self.rev(p1), self.rev(p2)
2268 2275
2269 2276 # full versions are inserted when the needed deltas
2270 2277 # become comparable to the uncompressed text
2271 2278 if rawtext is None:
2272 2279 # need rawtext size, before changed by flag processors, which is
2273 2280 # the non-raw size. use revlog explicitly to avoid filelog's extra
2274 2281 # logic that might remove metadata size.
2275 2282 textlen = mdiff.patchedsize(
2276 2283 revlog.size(self, cachedelta[0]), cachedelta[1]
2277 2284 )
2278 2285 else:
2279 2286 textlen = len(rawtext)
2280 2287
2281 2288 if deltacomputer is None:
2282 2289 deltacomputer = deltautil.deltacomputer(self)
2283 2290
2284 2291 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2285 2292
2286 2293 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2287 2294
2288 2295 e = (
2289 2296 offset_type(offset, flags),
2290 2297 deltainfo.deltalen,
2291 2298 textlen,
2292 2299 deltainfo.base,
2293 2300 link,
2294 2301 p1r,
2295 2302 p2r,
2296 2303 node,
2297 2304 )
2298 2305 self.index.append(e)
2299 2306
2300 2307 entry = self._io.packentry(e, self.node, self.version, curr)
2301 2308 self._writeentry(
2302 2309 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2303 2310 )
2304 2311
2305 2312 rawtext = btext[0]
2306 2313
2307 2314 if alwayscache and rawtext is None:
2308 2315 rawtext = deltacomputer.buildtext(revinfo, fh)
2309 2316
2310 2317 if type(rawtext) == bytes: # only accept immutable objects
2311 2318 self._revisioncache = (node, curr, rawtext)
2312 2319 self._chainbasecache[curr] = deltainfo.chainbase
2313 2320 return node
2314 2321
2315 2322 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2316 2323 # Files opened in a+ mode have inconsistent behavior on various
2317 2324 # platforms. Windows requires that a file positioning call be made
2318 2325 # when the file handle transitions between reads and writes. See
2319 2326 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2320 2327 # platforms, Python or the platform itself can be buggy. Some versions
2321 2328 # of Solaris have been observed to not append at the end of the file
2322 2329 # if the file was seeked to before the end. See issue4943 for more.
2323 2330 #
2324 2331 # We work around this issue by inserting a seek() before writing.
2325 2332 # Note: This is likely not necessary on Python 3. However, because
2326 2333 # the file handle is reused for reads and may be seeked there, we need
2327 2334 # to be careful before changing this.
2328 2335 ifh.seek(0, os.SEEK_END)
2329 2336 if dfh:
2330 2337 dfh.seek(0, os.SEEK_END)
2331 2338
2332 2339 curr = len(self) - 1
2333 2340 if not self._inline:
2334 2341 transaction.add(self.datafile, offset)
2335 2342 transaction.add(self.indexfile, curr * len(entry))
2336 2343 if data[0]:
2337 2344 dfh.write(data[0])
2338 2345 dfh.write(data[1])
2339 2346 ifh.write(entry)
2340 2347 else:
2341 2348 offset += curr * self._io.size
2342 2349 transaction.add(self.indexfile, offset, curr)
2343 2350 ifh.write(entry)
2344 2351 ifh.write(data[0])
2345 2352 ifh.write(data[1])
2346 2353 self._enforceinlinesize(transaction, ifh)
2347 2354 nodemaputil.setup_persistent_nodemap(transaction, self)
2348 2355
2349 2356 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2350 2357 """
2351 2358 add a delta group
2352 2359
2353 2360 given a set of deltas, add them to the revision log. the
2354 2361 first delta is against its parent, which should be in our
2355 2362 log, the rest are against the previous delta.
2356 2363
2357 2364 If ``addrevisioncb`` is defined, it will be called with arguments of
2358 2365 this revlog and the node that was added.
2359 2366 """
2360 2367
2361 2368 if self._writinghandles:
2362 2369 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2363 2370
2364 2371 nodes = []
2365 2372
2366 2373 r = len(self)
2367 2374 end = 0
2368 2375 if r:
2369 2376 end = self.end(r - 1)
2370 2377 ifh = self._indexfp(b"a+")
2371 2378 isize = r * self._io.size
2372 2379 if self._inline:
2373 2380 transaction.add(self.indexfile, end + isize, r)
2374 2381 dfh = None
2375 2382 else:
2376 2383 transaction.add(self.indexfile, isize, r)
2377 2384 transaction.add(self.datafile, end)
2378 2385 dfh = self._datafp(b"a+")
2379 2386
2380 2387 def flush():
2381 2388 if dfh:
2382 2389 dfh.flush()
2383 2390 ifh.flush()
2384 2391
2385 2392 self._writinghandles = (ifh, dfh)
2386 2393
2387 2394 try:
2388 2395 deltacomputer = deltautil.deltacomputer(self)
2389 2396 # loop through our set of deltas
2390 2397 for data in deltas:
2391 2398 node, p1, p2, linknode, deltabase, delta, flags = data
2392 2399 link = linkmapper(linknode)
2393 2400 flags = flags or REVIDX_DEFAULT_FLAGS
2394 2401
2395 2402 nodes.append(node)
2396 2403
2397 2404 if self.index.has_node(node):
2398 2405 self._nodeduplicatecallback(transaction, node)
2399 2406 # this can happen if two branches make the same change
2400 2407 continue
2401 2408
2402 2409 for p in (p1, p2):
2403 2410 if not self.index.has_node(p):
2404 2411 raise error.LookupError(
2405 2412 p, self.indexfile, _(b'unknown parent')
2406 2413 )
2407 2414
2408 2415 if not self.index.has_node(deltabase):
2409 2416 raise error.LookupError(
2410 2417 deltabase, self.indexfile, _(b'unknown delta base')
2411 2418 )
2412 2419
2413 2420 baserev = self.rev(deltabase)
2414 2421
2415 2422 if baserev != nullrev and self.iscensored(baserev):
2416 2423 # if base is censored, delta must be full replacement in a
2417 2424 # single patch operation
2418 2425 hlen = struct.calcsize(b">lll")
2419 2426 oldlen = self.rawsize(baserev)
2420 2427 newlen = len(delta) - hlen
2421 2428 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2422 2429 raise error.CensoredBaseError(
2423 2430 self.indexfile, self.node(baserev)
2424 2431 )
2425 2432
2426 2433 if not flags and self._peek_iscensored(baserev, delta, flush):
2427 2434 flags |= REVIDX_ISCENSORED
2428 2435
2429 2436 # We assume consumers of addrevisioncb will want to retrieve
2430 2437 # the added revision, which will require a call to
2431 2438 # revision(). revision() will fast path if there is a cache
2432 2439 # hit. So, we tell _addrevision() to always cache in this case.
2433 2440 # We're only using addgroup() in the context of changegroup
2434 2441 # generation so the revision data can always be handled as raw
2435 2442 # by the flagprocessor.
2436 2443 self._addrevision(
2437 2444 node,
2438 2445 None,
2439 2446 transaction,
2440 2447 link,
2441 2448 p1,
2442 2449 p2,
2443 2450 flags,
2444 2451 (baserev, delta),
2445 2452 ifh,
2446 2453 dfh,
2447 2454 alwayscache=bool(addrevisioncb),
2448 2455 deltacomputer=deltacomputer,
2449 2456 )
2450 2457
2451 2458 if addrevisioncb:
2452 2459 addrevisioncb(self, node)
2453 2460
2454 2461 if not dfh and not self._inline:
2455 2462 # addrevision switched from inline to conventional
2456 2463 # reopen the index
2457 2464 ifh.close()
2458 2465 dfh = self._datafp(b"a+")
2459 2466 ifh = self._indexfp(b"a+")
2460 2467 self._writinghandles = (ifh, dfh)
2461 2468 finally:
2462 2469 self._writinghandles = None
2463 2470
2464 2471 if dfh:
2465 2472 dfh.close()
2466 2473 ifh.close()
2467 2474
2468 2475 return nodes
2469 2476
2470 2477 def iscensored(self, rev):
2471 2478 """Check if a file revision is censored."""
2472 2479 if not self._censorable:
2473 2480 return False
2474 2481
2475 2482 return self.flags(rev) & REVIDX_ISCENSORED
2476 2483
2477 2484 def _peek_iscensored(self, baserev, delta, flush):
2478 2485 """Quickly check if a delta produces a censored revision."""
2479 2486 if not self._censorable:
2480 2487 return False
2481 2488
2482 2489 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2483 2490
2484 2491 def getstrippoint(self, minlink):
2485 2492 """find the minimum rev that must be stripped to strip the linkrev
2486 2493
2487 2494 Returns a tuple containing the minimum rev and a set of all revs that
2488 2495 have linkrevs that will be broken by this strip.
2489 2496 """
2490 2497 return storageutil.resolvestripinfo(
2491 2498 minlink,
2492 2499 len(self) - 1,
2493 2500 self.headrevs(),
2494 2501 self.linkrev,
2495 2502 self.parentrevs,
2496 2503 )
2497 2504
2498 2505 def strip(self, minlink, transaction):
2499 2506 """truncate the revlog on the first revision with a linkrev >= minlink
2500 2507
2501 2508 This function is called when we're stripping revision minlink and
2502 2509 its descendants from the repository.
2503 2510
2504 2511 We have to remove all revisions with linkrev >= minlink, because
2505 2512 the equivalent changelog revisions will be renumbered after the
2506 2513 strip.
2507 2514
2508 2515 So we truncate the revlog on the first of these revisions, and
2509 2516 trust that the caller has saved the revisions that shouldn't be
2510 2517 removed and that it'll re-add them after this truncation.
2511 2518 """
2512 2519 if len(self) == 0:
2513 2520 return
2514 2521
2515 2522 rev, _ = self.getstrippoint(minlink)
2516 2523 if rev == len(self):
2517 2524 return
2518 2525
2519 2526 # first truncate the files on disk
2520 2527 end = self.start(rev)
2521 2528 if not self._inline:
2522 2529 transaction.add(self.datafile, end)
2523 2530 end = rev * self._io.size
2524 2531 else:
2525 2532 end += rev * self._io.size
2526 2533
2527 2534 transaction.add(self.indexfile, end)
2528 2535
2529 2536 # then reset internal state in memory to forget those revisions
2530 2537 self._revisioncache = None
2531 2538 self._chaininfocache = {}
2532 2539 self._chunkclear()
2533 2540
2534 2541 del self.index[rev:-1]
2535 2542
2536 2543 def checksize(self):
2537 2544 """Check size of index and data files
2538 2545
2539 2546 return a (dd, di) tuple.
2540 2547 - dd: extra bytes for the "data" file
2541 2548 - di: extra bytes for the "index" file
2542 2549
2543 2550 A healthy revlog will return (0, 0).
2544 2551 """
2545 2552 expected = 0
2546 2553 if len(self):
2547 2554 expected = max(0, self.end(len(self) - 1))
2548 2555
2549 2556 try:
2550 2557 with self._datafp() as f:
2551 2558 f.seek(0, io.SEEK_END)
2552 2559 actual = f.tell()
2553 2560 dd = actual - expected
2554 2561 except IOError as inst:
2555 2562 if inst.errno != errno.ENOENT:
2556 2563 raise
2557 2564 dd = 0
2558 2565
2559 2566 try:
2560 2567 f = self.opener(self.indexfile)
2561 2568 f.seek(0, io.SEEK_END)
2562 2569 actual = f.tell()
2563 2570 f.close()
2564 2571 s = self._io.size
2565 2572 i = max(0, actual // s)
2566 2573 di = actual - (i * s)
2567 2574 if self._inline:
2568 2575 databytes = 0
2569 2576 for r in self:
2570 2577 databytes += max(0, self.length(r))
2571 2578 dd = 0
2572 2579 di = actual - len(self) * s - databytes
2573 2580 except IOError as inst:
2574 2581 if inst.errno != errno.ENOENT:
2575 2582 raise
2576 2583 di = 0
2577 2584
2578 2585 return (dd, di)
2579 2586
2580 2587 def files(self):
2581 2588 res = [self.indexfile]
2582 2589 if not self._inline:
2583 2590 res.append(self.datafile)
2584 2591 return res
2585 2592
2586 2593 def emitrevisions(
2587 2594 self,
2588 2595 nodes,
2589 2596 nodesorder=None,
2590 2597 revisiondata=False,
2591 2598 assumehaveparentrevisions=False,
2592 2599 deltamode=repository.CG_DELTAMODE_STD,
2593 2600 ):
2594 2601 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2595 2602 raise error.ProgrammingError(
2596 2603 b'unhandled value for nodesorder: %s' % nodesorder
2597 2604 )
2598 2605
2599 2606 if nodesorder is None and not self._generaldelta:
2600 2607 nodesorder = b'storage'
2601 2608
2602 2609 if (
2603 2610 not self._storedeltachains
2604 2611 and deltamode != repository.CG_DELTAMODE_PREV
2605 2612 ):
2606 2613 deltamode = repository.CG_DELTAMODE_FULL
2607 2614
2608 2615 return storageutil.emitrevisions(
2609 2616 self,
2610 2617 nodes,
2611 2618 nodesorder,
2612 2619 revlogrevisiondelta,
2613 2620 deltaparentfn=self.deltaparent,
2614 2621 candeltafn=self.candelta,
2615 2622 rawsizefn=self.rawsize,
2616 2623 revdifffn=self.revdiff,
2617 2624 flagsfn=self.flags,
2618 2625 deltamode=deltamode,
2619 2626 revisiondata=revisiondata,
2620 2627 assumehaveparentrevisions=assumehaveparentrevisions,
2621 2628 )
2622 2629
2623 2630 DELTAREUSEALWAYS = b'always'
2624 2631 DELTAREUSESAMEREVS = b'samerevs'
2625 2632 DELTAREUSENEVER = b'never'
2626 2633
2627 2634 DELTAREUSEFULLADD = b'fulladd'
2628 2635
2629 2636 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2630 2637
2631 2638 def clone(
2632 2639 self,
2633 2640 tr,
2634 2641 destrevlog,
2635 2642 addrevisioncb=None,
2636 2643 deltareuse=DELTAREUSESAMEREVS,
2637 2644 forcedeltabothparents=None,
2638 2645 sidedatacompanion=None,
2639 2646 ):
2640 2647 """Copy this revlog to another, possibly with format changes.
2641 2648
2642 2649 The destination revlog will contain the same revisions and nodes.
2643 2650 However, it may not be bit-for-bit identical due to e.g. delta encoding
2644 2651 differences.
2645 2652
2646 2653 The ``deltareuse`` argument control how deltas from the existing revlog
2647 2654 are preserved in the destination revlog. The argument can have the
2648 2655 following values:
2649 2656
2650 2657 DELTAREUSEALWAYS
2651 2658 Deltas will always be reused (if possible), even if the destination
2652 2659 revlog would not select the same revisions for the delta. This is the
2653 2660 fastest mode of operation.
2654 2661 DELTAREUSESAMEREVS
2655 2662 Deltas will be reused if the destination revlog would pick the same
2656 2663 revisions for the delta. This mode strikes a balance between speed
2657 2664 and optimization.
2658 2665 DELTAREUSENEVER
2659 2666 Deltas will never be reused. This is the slowest mode of execution.
2660 2667 This mode can be used to recompute deltas (e.g. if the diff/delta
2661 2668 algorithm changes).
2662 2669 DELTAREUSEFULLADD
2663 2670 Revision will be re-added as if their were new content. This is
2664 2671 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2665 2672 eg: large file detection and handling.
2666 2673
2667 2674 Delta computation can be slow, so the choice of delta reuse policy can
2668 2675 significantly affect run time.
2669 2676
2670 2677 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2671 2678 two extremes. Deltas will be reused if they are appropriate. But if the
2672 2679 delta could choose a better revision, it will do so. This means if you
2673 2680 are converting a non-generaldelta revlog to a generaldelta revlog,
2674 2681 deltas will be recomputed if the delta's parent isn't a parent of the
2675 2682 revision.
2676 2683
2677 2684 In addition to the delta policy, the ``forcedeltabothparents``
2678 2685 argument controls whether to force compute deltas against both parents
2679 2686 for merges. By default, the current default is used.
2680 2687
2681 2688 If not None, the `sidedatacompanion` is callable that accept two
2682 2689 arguments:
2683 2690
2684 2691 (srcrevlog, rev)
2685 2692
2686 2693 and return a triplet that control changes to sidedata content from the
2687 2694 old revision to the new clone result:
2688 2695
2689 2696 (dropall, filterout, update)
2690 2697
2691 2698 * if `dropall` is True, all sidedata should be dropped
2692 2699 * `filterout` is a set of sidedata keys that should be dropped
2693 2700 * `update` is a mapping of additionnal/new key -> value
2694 2701 """
2695 2702 if deltareuse not in self.DELTAREUSEALL:
2696 2703 raise ValueError(
2697 2704 _(b'value for deltareuse invalid: %s') % deltareuse
2698 2705 )
2699 2706
2700 2707 if len(destrevlog):
2701 2708 raise ValueError(_(b'destination revlog is not empty'))
2702 2709
2703 2710 if getattr(self, 'filteredrevs', None):
2704 2711 raise ValueError(_(b'source revlog has filtered revisions'))
2705 2712 if getattr(destrevlog, 'filteredrevs', None):
2706 2713 raise ValueError(_(b'destination revlog has filtered revisions'))
2707 2714
2708 2715 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2709 2716 # if possible.
2710 2717 oldlazydelta = destrevlog._lazydelta
2711 2718 oldlazydeltabase = destrevlog._lazydeltabase
2712 2719 oldamd = destrevlog._deltabothparents
2713 2720
2714 2721 try:
2715 2722 if deltareuse == self.DELTAREUSEALWAYS:
2716 2723 destrevlog._lazydeltabase = True
2717 2724 destrevlog._lazydelta = True
2718 2725 elif deltareuse == self.DELTAREUSESAMEREVS:
2719 2726 destrevlog._lazydeltabase = False
2720 2727 destrevlog._lazydelta = True
2721 2728 elif deltareuse == self.DELTAREUSENEVER:
2722 2729 destrevlog._lazydeltabase = False
2723 2730 destrevlog._lazydelta = False
2724 2731
2725 2732 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2726 2733
2727 2734 self._clone(
2728 2735 tr,
2729 2736 destrevlog,
2730 2737 addrevisioncb,
2731 2738 deltareuse,
2732 2739 forcedeltabothparents,
2733 2740 sidedatacompanion,
2734 2741 )
2735 2742
2736 2743 finally:
2737 2744 destrevlog._lazydelta = oldlazydelta
2738 2745 destrevlog._lazydeltabase = oldlazydeltabase
2739 2746 destrevlog._deltabothparents = oldamd
2740 2747
2741 2748 def _clone(
2742 2749 self,
2743 2750 tr,
2744 2751 destrevlog,
2745 2752 addrevisioncb,
2746 2753 deltareuse,
2747 2754 forcedeltabothparents,
2748 2755 sidedatacompanion,
2749 2756 ):
2750 2757 """perform the core duty of `revlog.clone` after parameter processing"""
2751 2758 deltacomputer = deltautil.deltacomputer(destrevlog)
2752 2759 index = self.index
2753 2760 for rev in self:
2754 2761 entry = index[rev]
2755 2762
2756 2763 # Some classes override linkrev to take filtered revs into
2757 2764 # account. Use raw entry from index.
2758 2765 flags = entry[0] & 0xFFFF
2759 2766 linkrev = entry[4]
2760 2767 p1 = index[entry[5]][7]
2761 2768 p2 = index[entry[6]][7]
2762 2769 node = entry[7]
2763 2770
2764 2771 sidedataactions = (False, [], {})
2765 2772 if sidedatacompanion is not None:
2766 2773 sidedataactions = sidedatacompanion(self, rev)
2767 2774
2768 2775 # (Possibly) reuse the delta from the revlog if allowed and
2769 2776 # the revlog chunk is a delta.
2770 2777 cachedelta = None
2771 2778 rawtext = None
2772 2779 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2773 2780 dropall, filterout, update = sidedataactions
2774 2781 text, sidedata = self._revisiondata(rev)
2775 2782 if dropall:
2776 2783 sidedata = {}
2777 2784 for key in filterout:
2778 2785 sidedata.pop(key, None)
2779 2786 sidedata.update(update)
2780 2787 if not sidedata:
2781 2788 sidedata = None
2782 2789 destrevlog.addrevision(
2783 2790 text,
2784 2791 tr,
2785 2792 linkrev,
2786 2793 p1,
2787 2794 p2,
2788 2795 cachedelta=cachedelta,
2789 2796 node=node,
2790 2797 flags=flags,
2791 2798 deltacomputer=deltacomputer,
2792 2799 sidedata=sidedata,
2793 2800 )
2794 2801 else:
2795 2802 if destrevlog._lazydelta:
2796 2803 dp = self.deltaparent(rev)
2797 2804 if dp != nullrev:
2798 2805 cachedelta = (dp, bytes(self._chunk(rev)))
2799 2806
2800 2807 if not cachedelta:
2801 2808 rawtext = self.rawdata(rev)
2802 2809
2803 2810 ifh = destrevlog.opener(
2804 2811 destrevlog.indexfile, b'a+', checkambig=False
2805 2812 )
2806 2813 dfh = None
2807 2814 if not destrevlog._inline:
2808 2815 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2809 2816 try:
2810 2817 destrevlog._addrevision(
2811 2818 node,
2812 2819 rawtext,
2813 2820 tr,
2814 2821 linkrev,
2815 2822 p1,
2816 2823 p2,
2817 2824 flags,
2818 2825 cachedelta,
2819 2826 ifh,
2820 2827 dfh,
2821 2828 deltacomputer=deltacomputer,
2822 2829 )
2823 2830 finally:
2824 2831 if dfh:
2825 2832 dfh.close()
2826 2833 ifh.close()
2827 2834
2828 2835 if addrevisioncb:
2829 2836 addrevisioncb(self, rev, node)
2830 2837
2831 2838 def censorrevision(self, tr, censornode, tombstone=b''):
2832 2839 if (self.version & 0xFFFF) == REVLOGV0:
2833 2840 raise error.RevlogError(
2834 2841 _(b'cannot censor with version %d revlogs') % self.version
2835 2842 )
2836 2843
2837 2844 censorrev = self.rev(censornode)
2838 2845 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2839 2846
2840 2847 if len(tombstone) > self.rawsize(censorrev):
2841 2848 raise error.Abort(
2842 2849 _(b'censor tombstone must be no longer than censored data')
2843 2850 )
2844 2851
2845 2852 # Rewriting the revlog in place is hard. Our strategy for censoring is
2846 2853 # to create a new revlog, copy all revisions to it, then replace the
2847 2854 # revlogs on transaction close.
2848 2855
2849 2856 newindexfile = self.indexfile + b'.tmpcensored'
2850 2857 newdatafile = self.datafile + b'.tmpcensored'
2851 2858
2852 2859 # This is a bit dangerous. We could easily have a mismatch of state.
2853 2860 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2854 2861 newrl.version = self.version
2855 2862 newrl._generaldelta = self._generaldelta
2856 2863 newrl._io = self._io
2857 2864
2858 2865 for rev in self.revs():
2859 2866 node = self.node(rev)
2860 2867 p1, p2 = self.parents(node)
2861 2868
2862 2869 if rev == censorrev:
2863 2870 newrl.addrawrevision(
2864 2871 tombstone,
2865 2872 tr,
2866 2873 self.linkrev(censorrev),
2867 2874 p1,
2868 2875 p2,
2869 2876 censornode,
2870 2877 REVIDX_ISCENSORED,
2871 2878 )
2872 2879
2873 2880 if newrl.deltaparent(rev) != nullrev:
2874 2881 raise error.Abort(
2875 2882 _(
2876 2883 b'censored revision stored as delta; '
2877 2884 b'cannot censor'
2878 2885 ),
2879 2886 hint=_(
2880 2887 b'censoring of revlogs is not '
2881 2888 b'fully implemented; please report '
2882 2889 b'this bug'
2883 2890 ),
2884 2891 )
2885 2892 continue
2886 2893
2887 2894 if self.iscensored(rev):
2888 2895 if self.deltaparent(rev) != nullrev:
2889 2896 raise error.Abort(
2890 2897 _(
2891 2898 b'cannot censor due to censored '
2892 2899 b'revision having delta stored'
2893 2900 )
2894 2901 )
2895 2902 rawtext = self._chunk(rev)
2896 2903 else:
2897 2904 rawtext = self.rawdata(rev)
2898 2905
2899 2906 newrl.addrawrevision(
2900 2907 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2901 2908 )
2902 2909
2903 2910 tr.addbackup(self.indexfile, location=b'store')
2904 2911 if not self._inline:
2905 2912 tr.addbackup(self.datafile, location=b'store')
2906 2913
2907 2914 self.opener.rename(newrl.indexfile, self.indexfile)
2908 2915 if not self._inline:
2909 2916 self.opener.rename(newrl.datafile, self.datafile)
2910 2917
2911 2918 self.clearcaches()
2912 2919 self._loadindex()
2913 2920
2914 2921 def verifyintegrity(self, state):
2915 2922 """Verifies the integrity of the revlog.
2916 2923
2917 2924 Yields ``revlogproblem`` instances describing problems that are
2918 2925 found.
2919 2926 """
2920 2927 dd, di = self.checksize()
2921 2928 if dd:
2922 2929 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2923 2930 if di:
2924 2931 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2925 2932
2926 2933 version = self.version & 0xFFFF
2927 2934
2928 2935 # The verifier tells us what version revlog we should be.
2929 2936 if version != state[b'expectedversion']:
2930 2937 yield revlogproblem(
2931 2938 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2932 2939 % (self.indexfile, version, state[b'expectedversion'])
2933 2940 )
2934 2941
2935 2942 state[b'skipread'] = set()
2936 2943 state[b'safe_renamed'] = set()
2937 2944
2938 2945 for rev in self:
2939 2946 node = self.node(rev)
2940 2947
2941 2948 # Verify contents. 4 cases to care about:
2942 2949 #
2943 2950 # common: the most common case
2944 2951 # rename: with a rename
2945 2952 # meta: file content starts with b'\1\n', the metadata
2946 2953 # header defined in filelog.py, but without a rename
2947 2954 # ext: content stored externally
2948 2955 #
2949 2956 # More formally, their differences are shown below:
2950 2957 #
2951 2958 # | common | rename | meta | ext
2952 2959 # -------------------------------------------------------
2953 2960 # flags() | 0 | 0 | 0 | not 0
2954 2961 # renamed() | False | True | False | ?
2955 2962 # rawtext[0:2]=='\1\n'| False | True | True | ?
2956 2963 #
2957 2964 # "rawtext" means the raw text stored in revlog data, which
2958 2965 # could be retrieved by "rawdata(rev)". "text"
2959 2966 # mentioned below is "revision(rev)".
2960 2967 #
2961 2968 # There are 3 different lengths stored physically:
2962 2969 # 1. L1: rawsize, stored in revlog index
2963 2970 # 2. L2: len(rawtext), stored in revlog data
2964 2971 # 3. L3: len(text), stored in revlog data if flags==0, or
2965 2972 # possibly somewhere else if flags!=0
2966 2973 #
2967 2974 # L1 should be equal to L2. L3 could be different from them.
2968 2975 # "text" may or may not affect commit hash depending on flag
2969 2976 # processors (see flagutil.addflagprocessor).
2970 2977 #
2971 2978 # | common | rename | meta | ext
2972 2979 # -------------------------------------------------
2973 2980 # rawsize() | L1 | L1 | L1 | L1
2974 2981 # size() | L1 | L2-LM | L1(*) | L1 (?)
2975 2982 # len(rawtext) | L2 | L2 | L2 | L2
2976 2983 # len(text) | L2 | L2 | L2 | L3
2977 2984 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2978 2985 #
2979 2986 # LM: length of metadata, depending on rawtext
2980 2987 # (*): not ideal, see comment in filelog.size
2981 2988 # (?): could be "- len(meta)" if the resolved content has
2982 2989 # rename metadata
2983 2990 #
2984 2991 # Checks needed to be done:
2985 2992 # 1. length check: L1 == L2, in all cases.
2986 2993 # 2. hash check: depending on flag processor, we may need to
2987 2994 # use either "text" (external), or "rawtext" (in revlog).
2988 2995
2989 2996 try:
2990 2997 skipflags = state.get(b'skipflags', 0)
2991 2998 if skipflags:
2992 2999 skipflags &= self.flags(rev)
2993 3000
2994 3001 _verify_revision(self, skipflags, state, node)
2995 3002
2996 3003 l1 = self.rawsize(rev)
2997 3004 l2 = len(self.rawdata(node))
2998 3005
2999 3006 if l1 != l2:
3000 3007 yield revlogproblem(
3001 3008 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3002 3009 node=node,
3003 3010 )
3004 3011
3005 3012 except error.CensoredNodeError:
3006 3013 if state[b'erroroncensored']:
3007 3014 yield revlogproblem(
3008 3015 error=_(b'censored file data'), node=node
3009 3016 )
3010 3017 state[b'skipread'].add(node)
3011 3018 except Exception as e:
3012 3019 yield revlogproblem(
3013 3020 error=_(b'unpacking %s: %s')
3014 3021 % (short(node), stringutil.forcebytestr(e)),
3015 3022 node=node,
3016 3023 )
3017 3024 state[b'skipread'].add(node)
3018 3025
3019 3026 def storageinfo(
3020 3027 self,
3021 3028 exclusivefiles=False,
3022 3029 sharedfiles=False,
3023 3030 revisionscount=False,
3024 3031 trackedsize=False,
3025 3032 storedsize=False,
3026 3033 ):
3027 3034 d = {}
3028 3035
3029 3036 if exclusivefiles:
3030 3037 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3031 3038 if not self._inline:
3032 3039 d[b'exclusivefiles'].append((self.opener, self.datafile))
3033 3040
3034 3041 if sharedfiles:
3035 3042 d[b'sharedfiles'] = []
3036 3043
3037 3044 if revisionscount:
3038 3045 d[b'revisionscount'] = len(self)
3039 3046
3040 3047 if trackedsize:
3041 3048 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3042 3049
3043 3050 if storedsize:
3044 3051 d[b'storedsize'] = sum(
3045 3052 self.opener.stat(path).st_size for path in self.files()
3046 3053 )
3047 3054
3048 3055 return d
@@ -1,185 +1,231
1 1 ===================================
2 2 Test the persistent on-disk nodemap
3 3 ===================================
4 4
5
6 5 $ hg init test-repo
7 6 $ cd test-repo
8 7 $ cat << EOF >> .hg/hgrc
9 8 > [experimental]
10 9 > exp-persistent-nodemap=yes
11 10 > [devel]
12 11 > persistent-nodemap=yes
13 12 > EOF
14 13 $ hg debugbuilddag .+5000
15 14 $ hg debugnodemap --metadata
16 15 uid: ???????????????? (glob)
17 16 tip-rev: 5000
18 17 data-length: 122880
19 18 data-unused: 0
20 19 $ f --size .hg/store/00changelog.n
21 20 .hg/store/00changelog.n: size=42
22 21
23 22 Simple lookup works
24 23
25 24 $ ANYNODE=`hg log --template '{node|short}\n' --rev tip`
26 25 $ hg log -r "$ANYNODE" --template '{rev}\n'
27 26 5000
28 27
28
29 #if rust
30
31 $ f --sha256 .hg/store/00changelog-*.nd
32 .hg/store/00changelog-????????????????.nd: sha256=1e38e9ffaa45cad13f15c1a9880ad606f4241e8beea2f61b4d5365abadfb55f6 (glob)
33 $ hg debugnodemap --dump-new | f --sha256 --size
34 size=122880, sha256=1e38e9ffaa45cad13f15c1a9880ad606f4241e8beea2f61b4d5365abadfb55f6
35 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
36 size=122880, sha256=1e38e9ffaa45cad13f15c1a9880ad606f4241e8beea2f61b4d5365abadfb55f6
37 0000: 00 00 00 76 00 00 01 65 00 00 00 95 00 00 01 34 |...v...e.......4|
38 0010: 00 00 00 19 00 00 01 69 00 00 00 ab 00 00 00 4b |.......i.......K|
39 0020: 00 00 00 07 00 00 01 4c 00 00 00 f8 00 00 00 8f |.......L........|
40 0030: 00 00 00 c0 00 00 00 a7 00 00 00 89 00 00 01 46 |...............F|
41 0040: 00 00 00 92 00 00 01 bc 00 00 00 71 00 00 00 ac |...........q....|
42 0050: 00 00 00 af 00 00 00 b4 00 00 00 34 00 00 01 ca |...........4....|
43 0060: 00 00 00 23 00 00 01 45 00 00 00 2d 00 00 00 b2 |...#...E...-....|
44 0070: 00 00 00 56 00 00 01 0f 00 00 00 4e 00 00 02 4c |...V.......N...L|
45 0080: 00 00 00 e7 00 00 00 cd 00 00 01 5b 00 00 00 78 |...........[...x|
46 0090: 00 00 00 e3 00 00 01 8e 00 00 00 4f 00 00 00 b1 |...........O....|
47 00a0: 00 00 00 30 00 00 00 11 00 00 00 25 00 00 00 d2 |...0.......%....|
48 00b0: 00 00 00 ec 00 00 00 69 00 00 01 2b 00 00 01 2e |.......i...+....|
49 00c0: 00 00 00 aa 00 00 00 15 00 00 00 3a 00 00 01 4e |...........:...N|
50 00d0: 00 00 00 4d 00 00 00 9d 00 00 00 8e 00 00 00 a4 |...M............|
51 00e0: 00 00 00 c3 00 00 00 eb 00 00 00 29 00 00 00 ad |...........)....|
52 00f0: 00 00 01 3a 00 00 01 32 00 00 00 04 00 00 00 53 |...:...2.......S|
53
54
55 #else
56
29 57 $ f --sha256 .hg/store/00changelog-*.nd
30 58 .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
31 59 $ hg debugnodemap --dump-new | f --sha256 --size
32 60 size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
33 61 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
34 62 size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
35 63 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
36 64 0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff |................|
37 65 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
38 66 0030: ff ff ff ff ff ff ed b3 ff ff ff ff ff ff ff ff |................|
39 67 0040: ff ff ff ff ff ff ee 34 00 00 00 00 ff ff ff ff |.......4........|
40 68 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
41 69 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
42 70 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
43 71 0080: ff ff ff ff ff ff f8 50 ff ff ff ff ff ff ff ff |.......P........|
44 72 0090: ff ff ff ff ff ff ff ff ff ff ec c7 ff ff ff ff |................|
45 73 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
46 74 00b0: ff ff ff ff ff ff fa be ff ff f2 fc ff ff ff ff |................|
47 75 00c0: ff ff ff ff ff ff ef ea ff ff ff ff ff ff f9 17 |................|
48 76 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
49 77 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
50 78 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
79
80 #endif
81
51 82 $ hg debugnodemap --check
52 83 revision in index: 5001
53 84 revision in nodemap: 5001
54 85
55 86 add a new commit
56 87
57 88 $ hg up
58 89 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
59 90 $ echo foo > foo
60 91 $ hg add foo
61 92 $ hg ci -m 'foo'
62 93
63 #if pure
94 #if no-pure no-rust
95 $ hg debugnodemap --metadata
96 uid: ???????????????? (glob)
97 tip-rev: 5001
98 data-length: 122880
99 data-unused: 0
100 #else
64 101 $ hg debugnodemap --metadata
65 102 uid: ???????????????? (glob)
66 103 tip-rev: 5001
67 104 data-length: 123072
68 105 data-unused: 192
69 #else
70 $ hg debugnodemap --metadata
71 uid: ???????????????? (glob)
72 tip-rev: 5001
73 data-length: 122880
74 data-unused: 0
75 106 #endif
107
76 108 $ f --size .hg/store/00changelog.n
77 109 .hg/store/00changelog.n: size=42
78 110
79 111 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
80 112
81 113 #if pure
82 114 $ f --sha256 .hg/store/00changelog-*.nd --size
83 115 .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)
116 #endif
84 117
85 #else
118 #if rust
119 $ f --sha256 .hg/store/00changelog-*.nd --size
120 .hg/store/00changelog-????????????????.nd: size=123072, sha256=ccc8a43310ace13812fcc648683e259346754ef934c12dd238cf9b7fadfe9a4b (glob)
121 #endif
122
123 #if no-pure no-rust
86 124 $ f --sha256 .hg/store/00changelog-*.nd --size
87 125 .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)
88
89 126 #endif
90 127
91 128 $ hg debugnodemap --check
92 129 revision in index: 5002
93 130 revision in nodemap: 5002
94 131
95 132 Test code path without mmap
96 133 ---------------------------
97 134
98 135 $ echo bar > bar
99 136 $ hg add bar
100 137 $ hg ci -m 'bar' --config experimental.exp-persistent-nodemap.mmap=no
101 138
102 139 $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=yes
103 140 revision in index: 5003
104 141 revision in nodemap: 5003
105 142 $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=no
106 143 revision in index: 5003
107 144 revision in nodemap: 5003
108 145
109 146
110 147 #if pure
111 148 $ hg debugnodemap --metadata
112 149 uid: ???????????????? (glob)
113 150 tip-rev: 5002
114 151 data-length: 123328
115 152 data-unused: 384
116 153 $ f --sha256 .hg/store/00changelog-*.nd --size
117 154 .hg/store/00changelog-????????????????.nd: size=123328, sha256=10d26e9776b6596af0f89143a54eba8cc581e929c38242a02a7b0760698c6c70 (glob)
118
119 #else
155 #endif
156 #if rust
157 $ hg debugnodemap --metadata
158 uid: ???????????????? (glob)
159 tip-rev: 5002
160 data-length: 123328
161 data-unused: 384
162 $ f --sha256 .hg/store/00changelog-*.nd --size
163 .hg/store/00changelog-????????????????.nd: size=123328, sha256=081eec9eb6708f2bf085d939b4c97bc0b6762bc8336bc4b93838f7fffa1516bf (glob)
164 #endif
165 #if no-pure no-rust
120 166 $ hg debugnodemap --metadata
121 167 uid: ???????????????? (glob)
122 168 tip-rev: 5002
123 169 data-length: 122944
124 170 data-unused: 0
125 171 $ f --sha256 .hg/store/00changelog-*.nd --size
126 172 .hg/store/00changelog-????????????????.nd: size=122944, sha256=755976b22b64ab680401b45395953504e64e7fa8c31ac570f58dee21e15f9bc0 (glob)
127 173 #endif
128 174
129 175 Test force warming the cache
130 176
131 177 $ rm .hg/store/00changelog.n
132 178 $ hg debugnodemap --metadata
133 179 $ hg debugupdatecache
134 180 #if pure
135 181 $ hg debugnodemap --metadata
136 182 uid: ???????????????? (glob)
137 183 tip-rev: 5002
138 184 data-length: 122944
139 185 data-unused: 0
140 186 #else
141 187 $ hg debugnodemap --metadata
142 188 uid: ???????????????? (glob)
143 189 tip-rev: 5002
144 190 data-length: 122944
145 191 data-unused: 0
146 192 #endif
147 193
148 194 Check out of sync nodemap
149 195 =========================
150 196
151 197 First copy old data on the side.
152 198
153 199 $ mkdir ../tmp-copies
154 200 $ cp .hg/store/00changelog-????????????????.nd .hg/store/00changelog.n ../tmp-copies
155 201
156 202 Nodemap lagging behind
157 203 ----------------------
158 204
159 205 make a new commit
160 206
161 207 $ echo bar2 > bar
162 208 $ hg ci -m 'bar2'
163 209 $ NODE=`hg log -r tip -T '{node}\n'`
164 210 $ hg log -r "$NODE" -T '{rev}\n'
165 211 5003
166 212
167 213 If the nodemap is lagging behind, it can catch up fine
168 214
169 215 $ hg debugnodemap --metadata
170 216 uid: ???????????????? (glob)
171 217 tip-rev: 5003
172 218 data-length: 123200 (pure !)
173 219 data-length: 123200 (rust !)
174 220 data-length: 122944 (no-rust no-pure !)
175 221 data-unused: 256 (pure !)
176 222 data-unused: 256 (rust !)
177 223 data-unused: 0 (no-rust no-pure !)
178 224 $ cp -f ../tmp-copies/* .hg/store/
179 225 $ hg debugnodemap --metadata
180 226 uid: ???????????????? (glob)
181 227 tip-rev: 5002
182 228 data-length: 122944
183 229 data-unused: 0
184 230 $ hg log -r "$NODE" -T '{rev}\n'
185 231 5003
General Comments 0
You need to be logged in to leave comments. Login now