##// END OF EJS Templates
revlog: avoid hard-coded hash sizes...
Joerg Sonnenberger -
r45596:cc257292 default
parent child Browse files
Show More
@@ -1,3068 +1,3068 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False, {}
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text, sidedata):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def getoffset(q):
139 139 return int(q >> 16)
140 140
141 141
142 142 def gettype(q):
143 143 return int(q & 0xFFFF)
144 144
145 145
146 146 def offset_type(offset, type):
147 147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 148 raise ValueError(b'unknown revlog index flags')
149 149 return int(int(offset) << 16 | type)
150 150
151 151
152 152 def _verify_revision(rl, skipflags, state, node):
153 153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 154 point for extensions to influence the operation."""
155 155 if skipflags:
156 156 state[b'skipread'].add(node)
157 157 else:
158 158 # Side-effect: read content and verify hash.
159 159 rl.revision(node)
160 160
161 161
162 162 @attr.s(slots=True, frozen=True)
163 163 class _revisioninfo(object):
164 164 """Information about a revision that allows building its fulltext
165 165 node: expected hash of the revision
166 166 p1, p2: parent revs of the revision
167 167 btext: built text cache consisting of a one-element list
168 168 cachedelta: (baserev, uncompressed_delta) or None
169 169 flags: flags associated to the revision storage
170 170
171 171 One of btext[0] or cachedelta must be set.
172 172 """
173 173
174 174 node = attr.ib()
175 175 p1 = attr.ib()
176 176 p2 = attr.ib()
177 177 btext = attr.ib()
178 178 textlen = attr.ib()
179 179 cachedelta = attr.ib()
180 180 flags = attr.ib()
181 181
182 182
183 183 @interfaceutil.implementer(repository.irevisiondelta)
184 184 @attr.s(slots=True)
185 185 class revlogrevisiondelta(object):
186 186 node = attr.ib()
187 187 p1node = attr.ib()
188 188 p2node = attr.ib()
189 189 basenode = attr.ib()
190 190 flags = attr.ib()
191 191 baserevisionsize = attr.ib()
192 192 revision = attr.ib()
193 193 delta = attr.ib()
194 194 linknode = attr.ib(default=None)
195 195
196 196
197 197 @interfaceutil.implementer(repository.iverifyproblem)
198 198 @attr.s(frozen=True)
199 199 class revlogproblem(object):
200 200 warning = attr.ib(default=None)
201 201 error = attr.ib(default=None)
202 202 node = attr.ib(default=None)
203 203
204 204
205 205 # index v0:
206 206 # 4 bytes: offset
207 207 # 4 bytes: compressed length
208 208 # 4 bytes: base rev
209 209 # 4 bytes: link rev
210 210 # 20 bytes: parent 1 nodeid
211 211 # 20 bytes: parent 2 nodeid
212 212 # 20 bytes: nodeid
213 213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 214 indexformatv0_pack = indexformatv0.pack
215 215 indexformatv0_unpack = indexformatv0.unpack
216 216
217 217
218 218 class revlogoldindex(list):
219 219 @property
220 220 def nodemap(self):
221 221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 223 return self._nodemap
224 224
225 225 @util.propertycache
226 226 def _nodemap(self):
227 227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 228 for r in range(0, len(self)):
229 229 n = self[r][7]
230 230 nodemap[n] = r
231 231 return nodemap
232 232
233 233 def has_node(self, node):
234 234 """return True if the node exist in the index"""
235 235 return node in self._nodemap
236 236
237 237 def rev(self, node):
238 238 """return a revision for a node
239 239
240 240 If the node is unknown, raise a RevlogError"""
241 241 return self._nodemap[node]
242 242
243 243 def get_rev(self, node):
244 244 """return a revision for a node
245 245
246 246 If the node is unknown, return None"""
247 247 return self._nodemap.get(node)
248 248
249 249 def append(self, tup):
250 250 self._nodemap[tup[7]] = len(self)
251 251 super(revlogoldindex, self).append(tup)
252 252
253 253 def __delitem__(self, i):
254 254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 256 for r in pycompat.xrange(i.start, len(self)):
257 257 del self._nodemap[self[r][7]]
258 258 super(revlogoldindex, self).__delitem__(i)
259 259
260 260 def clearcaches(self):
261 261 self.__dict__.pop('_nodemap', None)
262 262
263 263 def __getitem__(self, i):
264 264 if i == -1:
265 265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 266 return list.__getitem__(self, i)
267 267
268 268
269 269 class revlogoldio(object):
270 270 def __init__(self):
271 271 self.size = indexformatv0.size
272 272
273 273 def parseindex(self, data, inline):
274 274 s = self.size
275 275 index = []
276 276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 277 n = off = 0
278 278 l = len(data)
279 279 while off + s <= l:
280 280 cur = data[off : off + s]
281 281 off += s
282 282 e = indexformatv0_unpack(cur)
283 283 # transform to revlogv1 format
284 284 e2 = (
285 285 offset_type(e[0], 0),
286 286 e[1],
287 287 -1,
288 288 e[2],
289 289 e[3],
290 290 nodemap.get(e[4], nullrev),
291 291 nodemap.get(e[5], nullrev),
292 292 e[6],
293 293 )
294 294 index.append(e2)
295 295 nodemap[e[6]] = n
296 296 n += 1
297 297
298 298 index = revlogoldindex(index)
299 299 return index, None
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 if gettype(entry[0]):
303 303 raise error.RevlogError(
304 304 _(b'index entry flags need revlog version 1')
305 305 )
306 306 e2 = (
307 307 getoffset(entry[0]),
308 308 entry[1],
309 309 entry[3],
310 310 entry[4],
311 311 node(entry[5]),
312 312 node(entry[6]),
313 313 entry[7],
314 314 )
315 315 return indexformatv0_pack(*e2)
316 316
317 317
318 318 # index ng:
319 319 # 6 bytes: offset
320 320 # 2 bytes: flags
321 321 # 4 bytes: compressed length
322 322 # 4 bytes: uncompressed length
323 323 # 4 bytes: base rev
324 324 # 4 bytes: link rev
325 325 # 4 bytes: parent 1 rev
326 326 # 4 bytes: parent 2 rev
327 327 # 32 bytes: nodeid
328 328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 329 indexformatng_pack = indexformatng.pack
330 330 versionformat = struct.Struct(b">I")
331 331 versionformat_pack = versionformat.pack
332 332 versionformat_unpack = versionformat.unpack
333 333
334 334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 335 # signed integer)
336 336 _maxentrysize = 0x7FFFFFFF
337 337
338 338
339 339 class revlogio(object):
340 340 def __init__(self):
341 341 self.size = indexformatng.size
342 342
343 343 def parseindex(self, data, inline):
344 344 # call the C implementation to parse the index data
345 345 index, cache = parsers.parse_index2(data, inline)
346 346 return index, cache
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 p = indexformatng_pack(*entry)
350 350 if rev == 0:
351 351 p = versionformat_pack(version) + p[4:]
352 352 return p
353 353
354 354
355 355 NodemapRevlogIO = None
356 356
357 357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358 358
359 359 class NodemapRevlogIO(revlogio):
360 360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361 361
362 362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 363 """
364 364
365 365 def parseindex(self, data, inline):
366 366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 367 return index, cache
368 368
369 369
370 370 class rustrevlogio(revlogio):
371 371 def parseindex(self, data, inline):
372 372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 373 return rustrevlog.MixedIndex(index), cache
374 374
375 375
376 376 class revlog(object):
377 377 """
378 378 the underlying revision storage object
379 379
380 380 A revlog consists of two parts, an index and the revision data.
381 381
382 382 The index is a file with a fixed record size containing
383 383 information on each revision, including its nodeid (hash), the
384 384 nodeids of its parents, the position and offset of its data within
385 385 the data file, and the revision it's based on. Finally, each entry
386 386 contains a linkrev entry that can serve as a pointer to external
387 387 data.
388 388
389 389 The revision data itself is a linear collection of data chunks.
390 390 Each chunk represents a revision and is usually represented as a
391 391 delta against the previous chunk. To bound lookup time, runs of
392 392 deltas are limited to about 2 times the length of the original
393 393 version data. This makes retrieval of a version proportional to
394 394 its size, or O(1) relative to the number of revisions.
395 395
396 396 Both pieces of the revlog are written to in an append-only
397 397 fashion, which means we never need to rewrite a file to insert or
398 398 remove data, and can use some simple techniques to avoid the need
399 399 for locking while reading.
400 400
401 401 If checkambig, indexfile is opened with checkambig=True at
402 402 writing, to avoid file stat ambiguity.
403 403
404 404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 405 index will be mmapped rather than read if it is larger than the
406 406 configured threshold.
407 407
408 408 If censorable is True, the revlog can have censored revisions.
409 409
410 410 If `upperboundcomp` is not None, this is the expected maximal gain from
411 411 compression for the data content.
412 412 """
413 413
414 414 _flagserrorclass = error.RevlogError
415 415
416 416 def __init__(
417 417 self,
418 418 opener,
419 419 indexfile,
420 420 datafile=None,
421 421 checkambig=False,
422 422 mmaplargeindex=False,
423 423 censorable=False,
424 424 upperboundcomp=None,
425 425 persistentnodemap=False,
426 426 ):
427 427 """
428 428 create a revlog object
429 429
430 430 opener is a function that abstracts the file opening operation
431 431 and can be used to implement COW semantics or the like.
432 432
433 433 """
434 434 self.upperboundcomp = upperboundcomp
435 435 self.indexfile = indexfile
436 436 self.datafile = datafile or (indexfile[:-2] + b".d")
437 437 self.nodemap_file = None
438 438 if persistentnodemap:
439 439 if indexfile.endswith(b'.a'):
440 440 pending_path = indexfile[:-4] + b".n.a"
441 441 if opener.exists(pending_path):
442 442 self.nodemap_file = pending_path
443 443 else:
444 444 self.nodemap_file = indexfile[:-4] + b".n"
445 445 else:
446 446 self.nodemap_file = indexfile[:-2] + b".n"
447 447
448 448 self.opener = opener
449 449 # When True, indexfile is opened with checkambig=True at writing, to
450 450 # avoid file stat ambiguity.
451 451 self._checkambig = checkambig
452 452 self._mmaplargeindex = mmaplargeindex
453 453 self._censorable = censorable
454 454 # 3-tuple of (node, rev, text) for a raw revision.
455 455 self._revisioncache = None
456 456 # Maps rev to chain base rev.
457 457 self._chainbasecache = util.lrucachedict(100)
458 458 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
459 459 self._chunkcache = (0, b'')
460 460 # How much data to read and cache into the raw revlog data cache.
461 461 self._chunkcachesize = 65536
462 462 self._maxchainlen = None
463 463 self._deltabothparents = True
464 464 self.index = None
465 465 self._nodemap_docket = None
466 466 # Mapping of partial identifiers to full nodes.
467 467 self._pcache = {}
468 468 # Mapping of revision integer to full node.
469 469 self._compengine = b'zlib'
470 470 self._compengineopts = {}
471 471 self._maxdeltachainspan = -1
472 472 self._withsparseread = False
473 473 self._sparserevlog = False
474 474 self._srdensitythreshold = 0.50
475 475 self._srmingapsize = 262144
476 476
477 477 # Make copy of flag processors so each revlog instance can support
478 478 # custom flags.
479 479 self._flagprocessors = dict(flagutil.flagprocessors)
480 480
481 481 # 2-tuple of file handles being used for active writing.
482 482 self._writinghandles = None
483 483
484 484 self._loadindex()
485 485
486 486 def _loadindex(self):
487 487 mmapindexthreshold = None
488 488 opts = self.opener.options
489 489
490 490 if b'revlogv2' in opts:
491 491 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
492 492 elif b'revlogv1' in opts:
493 493 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
494 494 if b'generaldelta' in opts:
495 495 newversionflags |= FLAG_GENERALDELTA
496 496 elif b'revlogv0' in self.opener.options:
497 497 newversionflags = REVLOGV0
498 498 else:
499 499 newversionflags = REVLOG_DEFAULT_VERSION
500 500
501 501 if b'chunkcachesize' in opts:
502 502 self._chunkcachesize = opts[b'chunkcachesize']
503 503 if b'maxchainlen' in opts:
504 504 self._maxchainlen = opts[b'maxchainlen']
505 505 if b'deltabothparents' in opts:
506 506 self._deltabothparents = opts[b'deltabothparents']
507 507 self._lazydelta = bool(opts.get(b'lazydelta', True))
508 508 self._lazydeltabase = False
509 509 if self._lazydelta:
510 510 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
511 511 if b'compengine' in opts:
512 512 self._compengine = opts[b'compengine']
513 513 if b'zlib.level' in opts:
514 514 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
515 515 if b'zstd.level' in opts:
516 516 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
517 517 if b'maxdeltachainspan' in opts:
518 518 self._maxdeltachainspan = opts[b'maxdeltachainspan']
519 519 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
520 520 mmapindexthreshold = opts[b'mmapindexthreshold']
521 521 self.hassidedata = bool(opts.get(b'side-data', False))
522 522 if self.hassidedata:
523 523 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
524 524 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
525 525 withsparseread = bool(opts.get(b'with-sparse-read', False))
526 526 # sparse-revlog forces sparse-read
527 527 self._withsparseread = self._sparserevlog or withsparseread
528 528 if b'sparse-read-density-threshold' in opts:
529 529 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
530 530 if b'sparse-read-min-gap-size' in opts:
531 531 self._srmingapsize = opts[b'sparse-read-min-gap-size']
532 532 if opts.get(b'enableellipsis'):
533 533 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
534 534
535 535 # revlog v0 doesn't have flag processors
536 536 for flag, processor in pycompat.iteritems(
537 537 opts.get(b'flagprocessors', {})
538 538 ):
539 539 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
540 540
541 541 if self._chunkcachesize <= 0:
542 542 raise error.RevlogError(
543 543 _(b'revlog chunk cache size %r is not greater than 0')
544 544 % self._chunkcachesize
545 545 )
546 546 elif self._chunkcachesize & (self._chunkcachesize - 1):
547 547 raise error.RevlogError(
548 548 _(b'revlog chunk cache size %r is not a power of 2')
549 549 % self._chunkcachesize
550 550 )
551 551
552 552 indexdata = b''
553 553 self._initempty = True
554 554 try:
555 555 with self._indexfp() as f:
556 556 if (
557 557 mmapindexthreshold is not None
558 558 and self.opener.fstat(f).st_size >= mmapindexthreshold
559 559 ):
560 560 # TODO: should .close() to release resources without
561 561 # relying on Python GC
562 562 indexdata = util.buffer(util.mmapread(f))
563 563 else:
564 564 indexdata = f.read()
565 565 if len(indexdata) > 0:
566 566 versionflags = versionformat_unpack(indexdata[:4])[0]
567 567 self._initempty = False
568 568 else:
569 569 versionflags = newversionflags
570 570 except IOError as inst:
571 571 if inst.errno != errno.ENOENT:
572 572 raise
573 573
574 574 versionflags = newversionflags
575 575
576 576 self.version = versionflags
577 577
578 578 flags = versionflags & ~0xFFFF
579 579 fmt = versionflags & 0xFFFF
580 580
581 581 if fmt == REVLOGV0:
582 582 if flags:
583 583 raise error.RevlogError(
584 584 _(b'unknown flags (%#04x) in version %d revlog %s')
585 585 % (flags >> 16, fmt, self.indexfile)
586 586 )
587 587
588 588 self._inline = False
589 589 self._generaldelta = False
590 590
591 591 elif fmt == REVLOGV1:
592 592 if flags & ~REVLOGV1_FLAGS:
593 593 raise error.RevlogError(
594 594 _(b'unknown flags (%#04x) in version %d revlog %s')
595 595 % (flags >> 16, fmt, self.indexfile)
596 596 )
597 597
598 598 self._inline = versionflags & FLAG_INLINE_DATA
599 599 self._generaldelta = versionflags & FLAG_GENERALDELTA
600 600
601 601 elif fmt == REVLOGV2:
602 602 if flags & ~REVLOGV2_FLAGS:
603 603 raise error.RevlogError(
604 604 _(b'unknown flags (%#04x) in version %d revlog %s')
605 605 % (flags >> 16, fmt, self.indexfile)
606 606 )
607 607
608 608 self._inline = versionflags & FLAG_INLINE_DATA
609 609 # generaldelta implied by version 2 revlogs.
610 610 self._generaldelta = True
611 611
612 612 else:
613 613 raise error.RevlogError(
614 614 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
615 615 )
616 616 # sparse-revlog can't be on without general-delta (issue6056)
617 617 if not self._generaldelta:
618 618 self._sparserevlog = False
619 619
620 620 self._storedeltachains = True
621 621
622 622 devel_nodemap = (
623 623 self.nodemap_file
624 624 and opts.get(b'devel-force-nodemap', False)
625 625 and NodemapRevlogIO is not None
626 626 )
627 627
628 628 use_rust_index = False
629 629 if rustrevlog is not None:
630 630 if self.nodemap_file is not None:
631 631 use_rust_index = True
632 632 else:
633 633 use_rust_index = self.opener.options.get(b'rust.index')
634 634
635 635 self._io = revlogio()
636 636 if self.version == REVLOGV0:
637 637 self._io = revlogoldio()
638 638 elif devel_nodemap:
639 639 self._io = NodemapRevlogIO()
640 640 elif use_rust_index:
641 641 self._io = rustrevlogio()
642 642 try:
643 643 d = self._io.parseindex(indexdata, self._inline)
644 644 index, _chunkcache = d
645 645 use_nodemap = (
646 646 not self._inline
647 647 and self.nodemap_file is not None
648 648 and util.safehasattr(index, 'update_nodemap_data')
649 649 )
650 650 if use_nodemap:
651 651 nodemap_data = nodemaputil.persisted_data(self)
652 652 if nodemap_data is not None:
653 653 docket = nodemap_data[0]
654 654 if (
655 655 len(d[0]) > docket.tip_rev
656 656 and d[0][docket.tip_rev][7] == docket.tip_node
657 657 ):
658 658 # no changelog tampering
659 659 self._nodemap_docket = docket
660 660 index.update_nodemap_data(*nodemap_data)
661 661 except (ValueError, IndexError):
662 662 raise error.RevlogError(
663 663 _(b"index %s is corrupted") % self.indexfile
664 664 )
665 665 self.index, self._chunkcache = d
666 666 if not self._chunkcache:
667 667 self._chunkclear()
668 668 # revnum -> (chain-length, sum-delta-length)
669 669 self._chaininfocache = {}
670 670 # revlog header -> revlog compressor
671 671 self._decompressors = {}
672 672
673 673 @util.propertycache
674 674 def _compressor(self):
675 675 engine = util.compengines[self._compengine]
676 676 return engine.revlogcompressor(self._compengineopts)
677 677
678 678 def _indexfp(self, mode=b'r'):
679 679 """file object for the revlog's index file"""
680 680 args = {'mode': mode}
681 681 if mode != b'r':
682 682 args['checkambig'] = self._checkambig
683 683 if mode == b'w':
684 684 args['atomictemp'] = True
685 685 return self.opener(self.indexfile, **args)
686 686
687 687 def _datafp(self, mode=b'r'):
688 688 """file object for the revlog's data file"""
689 689 return self.opener(self.datafile, mode=mode)
690 690
691 691 @contextlib.contextmanager
692 692 def _datareadfp(self, existingfp=None):
693 693 """file object suitable to read data"""
694 694 # Use explicit file handle, if given.
695 695 if existingfp is not None:
696 696 yield existingfp
697 697
698 698 # Use a file handle being actively used for writes, if available.
699 699 # There is some danger to doing this because reads will seek the
700 700 # file. However, _writeentry() performs a SEEK_END before all writes,
701 701 # so we should be safe.
702 702 elif self._writinghandles:
703 703 if self._inline:
704 704 yield self._writinghandles[0]
705 705 else:
706 706 yield self._writinghandles[1]
707 707
708 708 # Otherwise open a new file handle.
709 709 else:
710 710 if self._inline:
711 711 func = self._indexfp
712 712 else:
713 713 func = self._datafp
714 714 with func() as fp:
715 715 yield fp
716 716
717 717 def tiprev(self):
718 718 return len(self.index) - 1
719 719
720 720 def tip(self):
721 721 return self.node(self.tiprev())
722 722
723 723 def __contains__(self, rev):
724 724 return 0 <= rev < len(self)
725 725
726 726 def __len__(self):
727 727 return len(self.index)
728 728
729 729 def __iter__(self):
730 730 return iter(pycompat.xrange(len(self)))
731 731
732 732 def revs(self, start=0, stop=None):
733 733 """iterate over all rev in this revlog (from start to stop)"""
734 734 return storageutil.iterrevs(len(self), start=start, stop=stop)
735 735
736 736 @property
737 737 def nodemap(self):
738 738 msg = (
739 739 b"revlog.nodemap is deprecated, "
740 740 b"use revlog.index.[has_node|rev|get_rev]"
741 741 )
742 742 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
743 743 return self.index.nodemap
744 744
745 745 @property
746 746 def _nodecache(self):
747 747 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
748 748 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
749 749 return self.index.nodemap
750 750
751 751 def hasnode(self, node):
752 752 try:
753 753 self.rev(node)
754 754 return True
755 755 except KeyError:
756 756 return False
757 757
758 758 def candelta(self, baserev, rev):
759 759 """whether two revisions (baserev, rev) can be delta-ed or not"""
760 760 # Disable delta if either rev requires a content-changing flag
761 761 # processor (ex. LFS). This is because such flag processor can alter
762 762 # the rawtext content that the delta will be based on, and two clients
763 763 # could have a same revlog node with different flags (i.e. different
764 764 # rawtext contents) and the delta could be incompatible.
765 765 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
766 766 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
767 767 ):
768 768 return False
769 769 return True
770 770
771 771 def update_caches(self, transaction):
772 772 if self.nodemap_file is not None:
773 773 if transaction is None:
774 774 nodemaputil.update_persistent_nodemap(self)
775 775 else:
776 776 nodemaputil.setup_persistent_nodemap(transaction, self)
777 777
778 778 def clearcaches(self):
779 779 self._revisioncache = None
780 780 self._chainbasecache.clear()
781 781 self._chunkcache = (0, b'')
782 782 self._pcache = {}
783 783 self._nodemap_docket = None
784 784 self.index.clearcaches()
785 785 # The python code is the one responsible for validating the docket, we
786 786 # end up having to refresh it here.
787 787 use_nodemap = (
788 788 not self._inline
789 789 and self.nodemap_file is not None
790 790 and util.safehasattr(self.index, 'update_nodemap_data')
791 791 )
792 792 if use_nodemap:
793 793 nodemap_data = nodemaputil.persisted_data(self)
794 794 if nodemap_data is not None:
795 795 self._nodemap_docket = nodemap_data[0]
796 796 self.index.update_nodemap_data(*nodemap_data)
797 797
798 798 def rev(self, node):
799 799 try:
800 800 return self.index.rev(node)
801 801 except TypeError:
802 802 raise
803 803 except error.RevlogError:
804 804 # parsers.c radix tree lookup failed
805 805 if node == wdirid or node in wdirfilenodeids:
806 806 raise error.WdirUnsupported
807 807 raise error.LookupError(node, self.indexfile, _(b'no node'))
808 808
809 809 # Accessors for index entries.
810 810
811 811 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
812 812 # are flags.
813 813 def start(self, rev):
814 814 return int(self.index[rev][0] >> 16)
815 815
816 816 def flags(self, rev):
817 817 return self.index[rev][0] & 0xFFFF
818 818
819 819 def length(self, rev):
820 820 return self.index[rev][1]
821 821
822 822 def rawsize(self, rev):
823 823 """return the length of the uncompressed text for a given revision"""
824 824 l = self.index[rev][2]
825 825 if l >= 0:
826 826 return l
827 827
828 828 t = self.rawdata(rev)
829 829 return len(t)
830 830
831 831 def size(self, rev):
832 832 """length of non-raw text (processed by a "read" flag processor)"""
833 833 # fast path: if no "read" flag processor could change the content,
834 834 # size is rawsize. note: ELLIPSIS is known to not change the content.
835 835 flags = self.flags(rev)
836 836 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
837 837 return self.rawsize(rev)
838 838
839 839 return len(self.revision(rev, raw=False))
840 840
841 841 def chainbase(self, rev):
842 842 base = self._chainbasecache.get(rev)
843 843 if base is not None:
844 844 return base
845 845
846 846 index = self.index
847 847 iterrev = rev
848 848 base = index[iterrev][3]
849 849 while base != iterrev:
850 850 iterrev = base
851 851 base = index[iterrev][3]
852 852
853 853 self._chainbasecache[rev] = base
854 854 return base
855 855
856 856 def linkrev(self, rev):
857 857 return self.index[rev][4]
858 858
859 859 def parentrevs(self, rev):
860 860 try:
861 861 entry = self.index[rev]
862 862 except IndexError:
863 863 if rev == wdirrev:
864 864 raise error.WdirUnsupported
865 865 raise
866 866
867 867 return entry[5], entry[6]
868 868
869 869 # fast parentrevs(rev) where rev isn't filtered
870 870 _uncheckedparentrevs = parentrevs
871 871
872 872 def node(self, rev):
873 873 try:
874 874 return self.index[rev][7]
875 875 except IndexError:
876 876 if rev == wdirrev:
877 877 raise error.WdirUnsupported
878 878 raise
879 879
880 880 # Derived from index values.
881 881
882 882 def end(self, rev):
883 883 return self.start(rev) + self.length(rev)
884 884
885 885 def parents(self, node):
886 886 i = self.index
887 887 d = i[self.rev(node)]
888 888 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
889 889
890 890 def chainlen(self, rev):
891 891 return self._chaininfo(rev)[0]
892 892
893 893 def _chaininfo(self, rev):
894 894 chaininfocache = self._chaininfocache
895 895 if rev in chaininfocache:
896 896 return chaininfocache[rev]
897 897 index = self.index
898 898 generaldelta = self._generaldelta
899 899 iterrev = rev
900 900 e = index[iterrev]
901 901 clen = 0
902 902 compresseddeltalen = 0
903 903 while iterrev != e[3]:
904 904 clen += 1
905 905 compresseddeltalen += e[1]
906 906 if generaldelta:
907 907 iterrev = e[3]
908 908 else:
909 909 iterrev -= 1
910 910 if iterrev in chaininfocache:
911 911 t = chaininfocache[iterrev]
912 912 clen += t[0]
913 913 compresseddeltalen += t[1]
914 914 break
915 915 e = index[iterrev]
916 916 else:
917 917 # Add text length of base since decompressing that also takes
918 918 # work. For cache hits the length is already included.
919 919 compresseddeltalen += e[1]
920 920 r = (clen, compresseddeltalen)
921 921 chaininfocache[rev] = r
922 922 return r
923 923
924 924 def _deltachain(self, rev, stoprev=None):
925 925 """Obtain the delta chain for a revision.
926 926
927 927 ``stoprev`` specifies a revision to stop at. If not specified, we
928 928 stop at the base of the chain.
929 929
930 930 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
931 931 revs in ascending order and ``stopped`` is a bool indicating whether
932 932 ``stoprev`` was hit.
933 933 """
934 934 # Try C implementation.
935 935 try:
936 936 return self.index.deltachain(rev, stoprev, self._generaldelta)
937 937 except AttributeError:
938 938 pass
939 939
940 940 chain = []
941 941
942 942 # Alias to prevent attribute lookup in tight loop.
943 943 index = self.index
944 944 generaldelta = self._generaldelta
945 945
946 946 iterrev = rev
947 947 e = index[iterrev]
948 948 while iterrev != e[3] and iterrev != stoprev:
949 949 chain.append(iterrev)
950 950 if generaldelta:
951 951 iterrev = e[3]
952 952 else:
953 953 iterrev -= 1
954 954 e = index[iterrev]
955 955
956 956 if iterrev == stoprev:
957 957 stopped = True
958 958 else:
959 959 chain.append(iterrev)
960 960 stopped = False
961 961
962 962 chain.reverse()
963 963 return chain, stopped
964 964
965 965 def ancestors(self, revs, stoprev=0, inclusive=False):
966 966 """Generate the ancestors of 'revs' in reverse revision order.
967 967 Does not generate revs lower than stoprev.
968 968
969 969 See the documentation for ancestor.lazyancestors for more details."""
970 970
971 971 # first, make sure start revisions aren't filtered
972 972 revs = list(revs)
973 973 checkrev = self.node
974 974 for r in revs:
975 975 checkrev(r)
976 976 # and we're sure ancestors aren't filtered as well
977 977
978 978 if rustancestor is not None:
979 979 lazyancestors = rustancestor.LazyAncestors
980 980 arg = self.index
981 981 else:
982 982 lazyancestors = ancestor.lazyancestors
983 983 arg = self._uncheckedparentrevs
984 984 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
985 985
986 986 def descendants(self, revs):
987 987 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
988 988
989 989 def findcommonmissing(self, common=None, heads=None):
990 990 """Return a tuple of the ancestors of common and the ancestors of heads
991 991 that are not ancestors of common. In revset terminology, we return the
992 992 tuple:
993 993
994 994 ::common, (::heads) - (::common)
995 995
996 996 The list is sorted by revision number, meaning it is
997 997 topologically sorted.
998 998
999 999 'heads' and 'common' are both lists of node IDs. If heads is
1000 1000 not supplied, uses all of the revlog's heads. If common is not
1001 1001 supplied, uses nullid."""
1002 1002 if common is None:
1003 1003 common = [nullid]
1004 1004 if heads is None:
1005 1005 heads = self.heads()
1006 1006
1007 1007 common = [self.rev(n) for n in common]
1008 1008 heads = [self.rev(n) for n in heads]
1009 1009
1010 1010 # we want the ancestors, but inclusive
1011 1011 class lazyset(object):
1012 1012 def __init__(self, lazyvalues):
1013 1013 self.addedvalues = set()
1014 1014 self.lazyvalues = lazyvalues
1015 1015
1016 1016 def __contains__(self, value):
1017 1017 return value in self.addedvalues or value in self.lazyvalues
1018 1018
1019 1019 def __iter__(self):
1020 1020 added = self.addedvalues
1021 1021 for r in added:
1022 1022 yield r
1023 1023 for r in self.lazyvalues:
1024 1024 if not r in added:
1025 1025 yield r
1026 1026
1027 1027 def add(self, value):
1028 1028 self.addedvalues.add(value)
1029 1029
1030 1030 def update(self, values):
1031 1031 self.addedvalues.update(values)
1032 1032
1033 1033 has = lazyset(self.ancestors(common))
1034 1034 has.add(nullrev)
1035 1035 has.update(common)
1036 1036
1037 1037 # take all ancestors from heads that aren't in has
1038 1038 missing = set()
1039 1039 visit = collections.deque(r for r in heads if r not in has)
1040 1040 while visit:
1041 1041 r = visit.popleft()
1042 1042 if r in missing:
1043 1043 continue
1044 1044 else:
1045 1045 missing.add(r)
1046 1046 for p in self.parentrevs(r):
1047 1047 if p not in has:
1048 1048 visit.append(p)
1049 1049 missing = list(missing)
1050 1050 missing.sort()
1051 1051 return has, [self.node(miss) for miss in missing]
1052 1052
1053 1053 def incrementalmissingrevs(self, common=None):
1054 1054 """Return an object that can be used to incrementally compute the
1055 1055 revision numbers of the ancestors of arbitrary sets that are not
1056 1056 ancestors of common. This is an ancestor.incrementalmissingancestors
1057 1057 object.
1058 1058
1059 1059 'common' is a list of revision numbers. If common is not supplied, uses
1060 1060 nullrev.
1061 1061 """
1062 1062 if common is None:
1063 1063 common = [nullrev]
1064 1064
1065 1065 if rustancestor is not None:
1066 1066 return rustancestor.MissingAncestors(self.index, common)
1067 1067 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1068 1068
1069 1069 def findmissingrevs(self, common=None, heads=None):
1070 1070 """Return the revision numbers of the ancestors of heads that
1071 1071 are not ancestors of common.
1072 1072
1073 1073 More specifically, return a list of revision numbers corresponding to
1074 1074 nodes N such that every N satisfies the following constraints:
1075 1075
1076 1076 1. N is an ancestor of some node in 'heads'
1077 1077 2. N is not an ancestor of any node in 'common'
1078 1078
1079 1079 The list is sorted by revision number, meaning it is
1080 1080 topologically sorted.
1081 1081
1082 1082 'heads' and 'common' are both lists of revision numbers. If heads is
1083 1083 not supplied, uses all of the revlog's heads. If common is not
1084 1084 supplied, uses nullid."""
1085 1085 if common is None:
1086 1086 common = [nullrev]
1087 1087 if heads is None:
1088 1088 heads = self.headrevs()
1089 1089
1090 1090 inc = self.incrementalmissingrevs(common=common)
1091 1091 return inc.missingancestors(heads)
1092 1092
1093 1093 def findmissing(self, common=None, heads=None):
1094 1094 """Return the ancestors of heads that are not ancestors of common.
1095 1095
1096 1096 More specifically, return a list of nodes N such that every N
1097 1097 satisfies the following constraints:
1098 1098
1099 1099 1. N is an ancestor of some node in 'heads'
1100 1100 2. N is not an ancestor of any node in 'common'
1101 1101
1102 1102 The list is sorted by revision number, meaning it is
1103 1103 topologically sorted.
1104 1104
1105 1105 'heads' and 'common' are both lists of node IDs. If heads is
1106 1106 not supplied, uses all of the revlog's heads. If common is not
1107 1107 supplied, uses nullid."""
1108 1108 if common is None:
1109 1109 common = [nullid]
1110 1110 if heads is None:
1111 1111 heads = self.heads()
1112 1112
1113 1113 common = [self.rev(n) for n in common]
1114 1114 heads = [self.rev(n) for n in heads]
1115 1115
1116 1116 inc = self.incrementalmissingrevs(common=common)
1117 1117 return [self.node(r) for r in inc.missingancestors(heads)]
1118 1118
1119 1119 def nodesbetween(self, roots=None, heads=None):
1120 1120 """Return a topological path from 'roots' to 'heads'.
1121 1121
1122 1122 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1123 1123 topologically sorted list of all nodes N that satisfy both of
1124 1124 these constraints:
1125 1125
1126 1126 1. N is a descendant of some node in 'roots'
1127 1127 2. N is an ancestor of some node in 'heads'
1128 1128
1129 1129 Every node is considered to be both a descendant and an ancestor
1130 1130 of itself, so every reachable node in 'roots' and 'heads' will be
1131 1131 included in 'nodes'.
1132 1132
1133 1133 'outroots' is the list of reachable nodes in 'roots', i.e., the
1134 1134 subset of 'roots' that is returned in 'nodes'. Likewise,
1135 1135 'outheads' is the subset of 'heads' that is also in 'nodes'.
1136 1136
1137 1137 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1138 1138 unspecified, uses nullid as the only root. If 'heads' is
1139 1139 unspecified, uses list of all of the revlog's heads."""
1140 1140 nonodes = ([], [], [])
1141 1141 if roots is not None:
1142 1142 roots = list(roots)
1143 1143 if not roots:
1144 1144 return nonodes
1145 1145 lowestrev = min([self.rev(n) for n in roots])
1146 1146 else:
1147 1147 roots = [nullid] # Everybody's a descendant of nullid
1148 1148 lowestrev = nullrev
1149 1149 if (lowestrev == nullrev) and (heads is None):
1150 1150 # We want _all_ the nodes!
1151 1151 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1152 1152 if heads is None:
1153 1153 # All nodes are ancestors, so the latest ancestor is the last
1154 1154 # node.
1155 1155 highestrev = len(self) - 1
1156 1156 # Set ancestors to None to signal that every node is an ancestor.
1157 1157 ancestors = None
1158 1158 # Set heads to an empty dictionary for later discovery of heads
1159 1159 heads = {}
1160 1160 else:
1161 1161 heads = list(heads)
1162 1162 if not heads:
1163 1163 return nonodes
1164 1164 ancestors = set()
1165 1165 # Turn heads into a dictionary so we can remove 'fake' heads.
1166 1166 # Also, later we will be using it to filter out the heads we can't
1167 1167 # find from roots.
1168 1168 heads = dict.fromkeys(heads, False)
1169 1169 # Start at the top and keep marking parents until we're done.
1170 1170 nodestotag = set(heads)
1171 1171 # Remember where the top was so we can use it as a limit later.
1172 1172 highestrev = max([self.rev(n) for n in nodestotag])
1173 1173 while nodestotag:
1174 1174 # grab a node to tag
1175 1175 n = nodestotag.pop()
1176 1176 # Never tag nullid
1177 1177 if n == nullid:
1178 1178 continue
1179 1179 # A node's revision number represents its place in a
1180 1180 # topologically sorted list of nodes.
1181 1181 r = self.rev(n)
1182 1182 if r >= lowestrev:
1183 1183 if n not in ancestors:
1184 1184 # If we are possibly a descendant of one of the roots
1185 1185 # and we haven't already been marked as an ancestor
1186 1186 ancestors.add(n) # Mark as ancestor
1187 1187 # Add non-nullid parents to list of nodes to tag.
1188 1188 nodestotag.update(
1189 1189 [p for p in self.parents(n) if p != nullid]
1190 1190 )
1191 1191 elif n in heads: # We've seen it before, is it a fake head?
1192 1192 # So it is, real heads should not be the ancestors of
1193 1193 # any other heads.
1194 1194 heads.pop(n)
1195 1195 if not ancestors:
1196 1196 return nonodes
1197 1197 # Now that we have our set of ancestors, we want to remove any
1198 1198 # roots that are not ancestors.
1199 1199
1200 1200 # If one of the roots was nullid, everything is included anyway.
1201 1201 if lowestrev > nullrev:
1202 1202 # But, since we weren't, let's recompute the lowest rev to not
1203 1203 # include roots that aren't ancestors.
1204 1204
1205 1205 # Filter out roots that aren't ancestors of heads
1206 1206 roots = [root for root in roots if root in ancestors]
1207 1207 # Recompute the lowest revision
1208 1208 if roots:
1209 1209 lowestrev = min([self.rev(root) for root in roots])
1210 1210 else:
1211 1211 # No more roots? Return empty list
1212 1212 return nonodes
1213 1213 else:
1214 1214 # We are descending from nullid, and don't need to care about
1215 1215 # any other roots.
1216 1216 lowestrev = nullrev
1217 1217 roots = [nullid]
1218 1218 # Transform our roots list into a set.
1219 1219 descendants = set(roots)
1220 1220 # Also, keep the original roots so we can filter out roots that aren't
1221 1221 # 'real' roots (i.e. are descended from other roots).
1222 1222 roots = descendants.copy()
1223 1223 # Our topologically sorted list of output nodes.
1224 1224 orderedout = []
1225 1225 # Don't start at nullid since we don't want nullid in our output list,
1226 1226 # and if nullid shows up in descendants, empty parents will look like
1227 1227 # they're descendants.
1228 1228 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1229 1229 n = self.node(r)
1230 1230 isdescendant = False
1231 1231 if lowestrev == nullrev: # Everybody is a descendant of nullid
1232 1232 isdescendant = True
1233 1233 elif n in descendants:
1234 1234 # n is already a descendant
1235 1235 isdescendant = True
1236 1236 # This check only needs to be done here because all the roots
1237 1237 # will start being marked is descendants before the loop.
1238 1238 if n in roots:
1239 1239 # If n was a root, check if it's a 'real' root.
1240 1240 p = tuple(self.parents(n))
1241 1241 # If any of its parents are descendants, it's not a root.
1242 1242 if (p[0] in descendants) or (p[1] in descendants):
1243 1243 roots.remove(n)
1244 1244 else:
1245 1245 p = tuple(self.parents(n))
1246 1246 # A node is a descendant if either of its parents are
1247 1247 # descendants. (We seeded the dependents list with the roots
1248 1248 # up there, remember?)
1249 1249 if (p[0] in descendants) or (p[1] in descendants):
1250 1250 descendants.add(n)
1251 1251 isdescendant = True
1252 1252 if isdescendant and ((ancestors is None) or (n in ancestors)):
1253 1253 # Only include nodes that are both descendants and ancestors.
1254 1254 orderedout.append(n)
1255 1255 if (ancestors is not None) and (n in heads):
1256 1256 # We're trying to figure out which heads are reachable
1257 1257 # from roots.
1258 1258 # Mark this head as having been reached
1259 1259 heads[n] = True
1260 1260 elif ancestors is None:
1261 1261 # Otherwise, we're trying to discover the heads.
1262 1262 # Assume this is a head because if it isn't, the next step
1263 1263 # will eventually remove it.
1264 1264 heads[n] = True
1265 1265 # But, obviously its parents aren't.
1266 1266 for p in self.parents(n):
1267 1267 heads.pop(p, None)
1268 1268 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1269 1269 roots = list(roots)
1270 1270 assert orderedout
1271 1271 assert roots
1272 1272 assert heads
1273 1273 return (orderedout, roots, heads)
1274 1274
1275 1275 def headrevs(self, revs=None):
1276 1276 if revs is None:
1277 1277 try:
1278 1278 return self.index.headrevs()
1279 1279 except AttributeError:
1280 1280 return self._headrevs()
1281 1281 if rustdagop is not None:
1282 1282 return rustdagop.headrevs(self.index, revs)
1283 1283 return dagop.headrevs(revs, self._uncheckedparentrevs)
1284 1284
1285 1285 def computephases(self, roots):
1286 1286 return self.index.computephasesmapsets(roots)
1287 1287
1288 1288 def _headrevs(self):
1289 1289 count = len(self)
1290 1290 if not count:
1291 1291 return [nullrev]
1292 1292 # we won't iter over filtered rev so nobody is a head at start
1293 1293 ishead = [0] * (count + 1)
1294 1294 index = self.index
1295 1295 for r in self:
1296 1296 ishead[r] = 1 # I may be an head
1297 1297 e = index[r]
1298 1298 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1299 1299 return [r for r, val in enumerate(ishead) if val]
1300 1300
1301 1301 def heads(self, start=None, stop=None):
1302 1302 """return the list of all nodes that have no children
1303 1303
1304 1304 if start is specified, only heads that are descendants of
1305 1305 start will be returned
1306 1306 if stop is specified, it will consider all the revs from stop
1307 1307 as if they had no children
1308 1308 """
1309 1309 if start is None and stop is None:
1310 1310 if not len(self):
1311 1311 return [nullid]
1312 1312 return [self.node(r) for r in self.headrevs()]
1313 1313
1314 1314 if start is None:
1315 1315 start = nullrev
1316 1316 else:
1317 1317 start = self.rev(start)
1318 1318
1319 1319 stoprevs = {self.rev(n) for n in stop or []}
1320 1320
1321 1321 revs = dagop.headrevssubset(
1322 1322 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1323 1323 )
1324 1324
1325 1325 return [self.node(rev) for rev in revs]
1326 1326
1327 1327 def children(self, node):
1328 1328 """find the children of a given node"""
1329 1329 c = []
1330 1330 p = self.rev(node)
1331 1331 for r in self.revs(start=p + 1):
1332 1332 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1333 1333 if prevs:
1334 1334 for pr in prevs:
1335 1335 if pr == p:
1336 1336 c.append(self.node(r))
1337 1337 elif p == nullrev:
1338 1338 c.append(self.node(r))
1339 1339 return c
1340 1340
1341 1341 def commonancestorsheads(self, a, b):
1342 1342 """calculate all the heads of the common ancestors of nodes a and b"""
1343 1343 a, b = self.rev(a), self.rev(b)
1344 1344 ancs = self._commonancestorsheads(a, b)
1345 1345 return pycompat.maplist(self.node, ancs)
1346 1346
1347 1347 def _commonancestorsheads(self, *revs):
1348 1348 """calculate all the heads of the common ancestors of revs"""
1349 1349 try:
1350 1350 ancs = self.index.commonancestorsheads(*revs)
1351 1351 except (AttributeError, OverflowError): # C implementation failed
1352 1352 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1353 1353 return ancs
1354 1354
1355 1355 def isancestor(self, a, b):
1356 1356 """return True if node a is an ancestor of node b
1357 1357
1358 1358 A revision is considered an ancestor of itself."""
1359 1359 a, b = self.rev(a), self.rev(b)
1360 1360 return self.isancestorrev(a, b)
1361 1361
1362 1362 def isancestorrev(self, a, b):
1363 1363 """return True if revision a is an ancestor of revision b
1364 1364
1365 1365 A revision is considered an ancestor of itself.
1366 1366
1367 1367 The implementation of this is trivial but the use of
1368 1368 reachableroots is not."""
1369 1369 if a == nullrev:
1370 1370 return True
1371 1371 elif a == b:
1372 1372 return True
1373 1373 elif a > b:
1374 1374 return False
1375 1375 return bool(self.reachableroots(a, [b], [a], includepath=False))
1376 1376
1377 1377 def reachableroots(self, minroot, heads, roots, includepath=False):
1378 1378 """return (heads(::(<roots> and <roots>::<heads>)))
1379 1379
1380 1380 If includepath is True, return (<roots>::<heads>)."""
1381 1381 try:
1382 1382 return self.index.reachableroots2(
1383 1383 minroot, heads, roots, includepath
1384 1384 )
1385 1385 except AttributeError:
1386 1386 return dagop._reachablerootspure(
1387 1387 self.parentrevs, minroot, roots, heads, includepath
1388 1388 )
1389 1389
1390 1390 def ancestor(self, a, b):
1391 1391 """calculate the "best" common ancestor of nodes a and b"""
1392 1392
1393 1393 a, b = self.rev(a), self.rev(b)
1394 1394 try:
1395 1395 ancs = self.index.ancestors(a, b)
1396 1396 except (AttributeError, OverflowError):
1397 1397 ancs = ancestor.ancestors(self.parentrevs, a, b)
1398 1398 if ancs:
1399 1399 # choose a consistent winner when there's a tie
1400 1400 return min(map(self.node, ancs))
1401 1401 return nullid
1402 1402
1403 1403 def _match(self, id):
1404 1404 if isinstance(id, int):
1405 1405 # rev
1406 1406 return self.node(id)
1407 1407 if len(id) == 20:
1408 1408 # possibly a binary node
1409 1409 # odds of a binary node being all hex in ASCII are 1 in 10**25
1410 1410 try:
1411 1411 node = id
1412 1412 self.rev(node) # quick search the index
1413 1413 return node
1414 1414 except error.LookupError:
1415 1415 pass # may be partial hex id
1416 1416 try:
1417 1417 # str(rev)
1418 1418 rev = int(id)
1419 1419 if b"%d" % rev != id:
1420 1420 raise ValueError
1421 1421 if rev < 0:
1422 1422 rev = len(self) + rev
1423 1423 if rev < 0 or rev >= len(self):
1424 1424 raise ValueError
1425 1425 return self.node(rev)
1426 1426 except (ValueError, OverflowError):
1427 1427 pass
1428 1428 if len(id) == 40:
1429 1429 try:
1430 1430 # a full hex nodeid?
1431 1431 node = bin(id)
1432 1432 self.rev(node)
1433 1433 return node
1434 1434 except (TypeError, error.LookupError):
1435 1435 pass
1436 1436
1437 1437 def _partialmatch(self, id):
1438 1438 # we don't care wdirfilenodeids as they should be always full hash
1439 1439 maybewdir = wdirhex.startswith(id)
1440 1440 try:
1441 1441 partial = self.index.partialmatch(id)
1442 1442 if partial and self.hasnode(partial):
1443 1443 if maybewdir:
1444 1444 # single 'ff...' match in radix tree, ambiguous with wdir
1445 1445 raise error.RevlogError
1446 1446 return partial
1447 1447 if maybewdir:
1448 1448 # no 'ff...' match in radix tree, wdir identified
1449 1449 raise error.WdirUnsupported
1450 1450 return None
1451 1451 except error.RevlogError:
1452 1452 # parsers.c radix tree lookup gave multiple matches
1453 1453 # fast path: for unfiltered changelog, radix tree is accurate
1454 1454 if not getattr(self, 'filteredrevs', None):
1455 1455 raise error.AmbiguousPrefixLookupError(
1456 1456 id, self.indexfile, _(b'ambiguous identifier')
1457 1457 )
1458 1458 # fall through to slow path that filters hidden revisions
1459 1459 except (AttributeError, ValueError):
1460 1460 # we are pure python, or key was too short to search radix tree
1461 1461 pass
1462 1462
1463 1463 if id in self._pcache:
1464 1464 return self._pcache[id]
1465 1465
1466 1466 if len(id) <= 40:
1467 1467 try:
1468 1468 # hex(node)[:...]
1469 1469 l = len(id) // 2 # grab an even number of digits
1470 1470 prefix = bin(id[: l * 2])
1471 1471 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1472 1472 nl = [
1473 1473 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1474 1474 ]
1475 1475 if nullhex.startswith(id):
1476 1476 nl.append(nullid)
1477 1477 if len(nl) > 0:
1478 1478 if len(nl) == 1 and not maybewdir:
1479 1479 self._pcache[id] = nl[0]
1480 1480 return nl[0]
1481 1481 raise error.AmbiguousPrefixLookupError(
1482 1482 id, self.indexfile, _(b'ambiguous identifier')
1483 1483 )
1484 1484 if maybewdir:
1485 1485 raise error.WdirUnsupported
1486 1486 return None
1487 1487 except TypeError:
1488 1488 pass
1489 1489
1490 1490 def lookup(self, id):
1491 1491 """locate a node based on:
1492 1492 - revision number or str(revision number)
1493 1493 - nodeid or subset of hex nodeid
1494 1494 """
1495 1495 n = self._match(id)
1496 1496 if n is not None:
1497 1497 return n
1498 1498 n = self._partialmatch(id)
1499 1499 if n:
1500 1500 return n
1501 1501
1502 1502 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1503 1503
1504 1504 def shortest(self, node, minlength=1):
1505 1505 """Find the shortest unambiguous prefix that matches node."""
1506 1506
1507 1507 def isvalid(prefix):
1508 1508 try:
1509 1509 matchednode = self._partialmatch(prefix)
1510 1510 except error.AmbiguousPrefixLookupError:
1511 1511 return False
1512 1512 except error.WdirUnsupported:
1513 1513 # single 'ff...' match
1514 1514 return True
1515 1515 if matchednode is None:
1516 1516 raise error.LookupError(node, self.indexfile, _(b'no node'))
1517 1517 return True
1518 1518
1519 1519 def maybewdir(prefix):
1520 1520 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1521 1521
1522 1522 hexnode = hex(node)
1523 1523
1524 1524 def disambiguate(hexnode, minlength):
1525 1525 """Disambiguate against wdirid."""
1526 for length in range(minlength, 41):
1526 for length in range(minlength, len(hexnode) + 1):
1527 1527 prefix = hexnode[:length]
1528 1528 if not maybewdir(prefix):
1529 1529 return prefix
1530 1530
1531 1531 if not getattr(self, 'filteredrevs', None):
1532 1532 try:
1533 1533 length = max(self.index.shortest(node), minlength)
1534 1534 return disambiguate(hexnode, length)
1535 1535 except error.RevlogError:
1536 1536 if node != wdirid:
1537 1537 raise error.LookupError(node, self.indexfile, _(b'no node'))
1538 1538 except AttributeError:
1539 1539 # Fall through to pure code
1540 1540 pass
1541 1541
1542 1542 if node == wdirid:
1543 for length in range(minlength, 41):
1543 for length in range(minlength, len(hexnode) + 1):
1544 1544 prefix = hexnode[:length]
1545 1545 if isvalid(prefix):
1546 1546 return prefix
1547 1547
1548 for length in range(minlength, 41):
1548 for length in range(minlength, len(hexnode) + 1):
1549 1549 prefix = hexnode[:length]
1550 1550 if isvalid(prefix):
1551 1551 return disambiguate(hexnode, length)
1552 1552
1553 1553 def cmp(self, node, text):
1554 1554 """compare text with a given file revision
1555 1555
1556 1556 returns True if text is different than what is stored.
1557 1557 """
1558 1558 p1, p2 = self.parents(node)
1559 1559 return storageutil.hashrevisionsha1(text, p1, p2) != node
1560 1560
1561 1561 def _cachesegment(self, offset, data):
1562 1562 """Add a segment to the revlog cache.
1563 1563
1564 1564 Accepts an absolute offset and the data that is at that location.
1565 1565 """
1566 1566 o, d = self._chunkcache
1567 1567 # try to add to existing cache
1568 1568 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1569 1569 self._chunkcache = o, d + data
1570 1570 else:
1571 1571 self._chunkcache = offset, data
1572 1572
1573 1573 def _readsegment(self, offset, length, df=None):
1574 1574 """Load a segment of raw data from the revlog.
1575 1575
1576 1576 Accepts an absolute offset, length to read, and an optional existing
1577 1577 file handle to read from.
1578 1578
1579 1579 If an existing file handle is passed, it will be seeked and the
1580 1580 original seek position will NOT be restored.
1581 1581
1582 1582 Returns a str or buffer of raw byte data.
1583 1583
1584 1584 Raises if the requested number of bytes could not be read.
1585 1585 """
1586 1586 # Cache data both forward and backward around the requested
1587 1587 # data, in a fixed size window. This helps speed up operations
1588 1588 # involving reading the revlog backwards.
1589 1589 cachesize = self._chunkcachesize
1590 1590 realoffset = offset & ~(cachesize - 1)
1591 1591 reallength = (
1592 1592 (offset + length + cachesize) & ~(cachesize - 1)
1593 1593 ) - realoffset
1594 1594 with self._datareadfp(df) as df:
1595 1595 df.seek(realoffset)
1596 1596 d = df.read(reallength)
1597 1597
1598 1598 self._cachesegment(realoffset, d)
1599 1599 if offset != realoffset or reallength != length:
1600 1600 startoffset = offset - realoffset
1601 1601 if len(d) - startoffset < length:
1602 1602 raise error.RevlogError(
1603 1603 _(
1604 1604 b'partial read of revlog %s; expected %d bytes from '
1605 1605 b'offset %d, got %d'
1606 1606 )
1607 1607 % (
1608 1608 self.indexfile if self._inline else self.datafile,
1609 1609 length,
1610 1610 realoffset,
1611 1611 len(d) - startoffset,
1612 1612 )
1613 1613 )
1614 1614
1615 1615 return util.buffer(d, startoffset, length)
1616 1616
1617 1617 if len(d) < length:
1618 1618 raise error.RevlogError(
1619 1619 _(
1620 1620 b'partial read of revlog %s; expected %d bytes from offset '
1621 1621 b'%d, got %d'
1622 1622 )
1623 1623 % (
1624 1624 self.indexfile if self._inline else self.datafile,
1625 1625 length,
1626 1626 offset,
1627 1627 len(d),
1628 1628 )
1629 1629 )
1630 1630
1631 1631 return d
1632 1632
1633 1633 def _getsegment(self, offset, length, df=None):
1634 1634 """Obtain a segment of raw data from the revlog.
1635 1635
1636 1636 Accepts an absolute offset, length of bytes to obtain, and an
1637 1637 optional file handle to the already-opened revlog. If the file
1638 1638 handle is used, it's original seek position will not be preserved.
1639 1639
1640 1640 Requests for data may be returned from a cache.
1641 1641
1642 1642 Returns a str or a buffer instance of raw byte data.
1643 1643 """
1644 1644 o, d = self._chunkcache
1645 1645 l = len(d)
1646 1646
1647 1647 # is it in the cache?
1648 1648 cachestart = offset - o
1649 1649 cacheend = cachestart + length
1650 1650 if cachestart >= 0 and cacheend <= l:
1651 1651 if cachestart == 0 and cacheend == l:
1652 1652 return d # avoid a copy
1653 1653 return util.buffer(d, cachestart, cacheend - cachestart)
1654 1654
1655 1655 return self._readsegment(offset, length, df=df)
1656 1656
1657 1657 def _getsegmentforrevs(self, startrev, endrev, df=None):
1658 1658 """Obtain a segment of raw data corresponding to a range of revisions.
1659 1659
1660 1660 Accepts the start and end revisions and an optional already-open
1661 1661 file handle to be used for reading. If the file handle is read, its
1662 1662 seek position will not be preserved.
1663 1663
1664 1664 Requests for data may be satisfied by a cache.
1665 1665
1666 1666 Returns a 2-tuple of (offset, data) for the requested range of
1667 1667 revisions. Offset is the integer offset from the beginning of the
1668 1668 revlog and data is a str or buffer of the raw byte data.
1669 1669
1670 1670 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1671 1671 to determine where each revision's data begins and ends.
1672 1672 """
1673 1673 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1674 1674 # (functions are expensive).
1675 1675 index = self.index
1676 1676 istart = index[startrev]
1677 1677 start = int(istart[0] >> 16)
1678 1678 if startrev == endrev:
1679 1679 end = start + istart[1]
1680 1680 else:
1681 1681 iend = index[endrev]
1682 1682 end = int(iend[0] >> 16) + iend[1]
1683 1683
1684 1684 if self._inline:
1685 1685 start += (startrev + 1) * self._io.size
1686 1686 end += (endrev + 1) * self._io.size
1687 1687 length = end - start
1688 1688
1689 1689 return start, self._getsegment(start, length, df=df)
1690 1690
1691 1691 def _chunk(self, rev, df=None):
1692 1692 """Obtain a single decompressed chunk for a revision.
1693 1693
1694 1694 Accepts an integer revision and an optional already-open file handle
1695 1695 to be used for reading. If used, the seek position of the file will not
1696 1696 be preserved.
1697 1697
1698 1698 Returns a str holding uncompressed data for the requested revision.
1699 1699 """
1700 1700 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1701 1701
1702 1702 def _chunks(self, revs, df=None, targetsize=None):
1703 1703 """Obtain decompressed chunks for the specified revisions.
1704 1704
1705 1705 Accepts an iterable of numeric revisions that are assumed to be in
1706 1706 ascending order. Also accepts an optional already-open file handle
1707 1707 to be used for reading. If used, the seek position of the file will
1708 1708 not be preserved.
1709 1709
1710 1710 This function is similar to calling ``self._chunk()`` multiple times,
1711 1711 but is faster.
1712 1712
1713 1713 Returns a list with decompressed data for each requested revision.
1714 1714 """
1715 1715 if not revs:
1716 1716 return []
1717 1717 start = self.start
1718 1718 length = self.length
1719 1719 inline = self._inline
1720 1720 iosize = self._io.size
1721 1721 buffer = util.buffer
1722 1722
1723 1723 l = []
1724 1724 ladd = l.append
1725 1725
1726 1726 if not self._withsparseread:
1727 1727 slicedchunks = (revs,)
1728 1728 else:
1729 1729 slicedchunks = deltautil.slicechunk(
1730 1730 self, revs, targetsize=targetsize
1731 1731 )
1732 1732
1733 1733 for revschunk in slicedchunks:
1734 1734 firstrev = revschunk[0]
1735 1735 # Skip trailing revisions with empty diff
1736 1736 for lastrev in revschunk[::-1]:
1737 1737 if length(lastrev) != 0:
1738 1738 break
1739 1739
1740 1740 try:
1741 1741 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1742 1742 except OverflowError:
1743 1743 # issue4215 - we can't cache a run of chunks greater than
1744 1744 # 2G on Windows
1745 1745 return [self._chunk(rev, df=df) for rev in revschunk]
1746 1746
1747 1747 decomp = self.decompress
1748 1748 for rev in revschunk:
1749 1749 chunkstart = start(rev)
1750 1750 if inline:
1751 1751 chunkstart += (rev + 1) * iosize
1752 1752 chunklength = length(rev)
1753 1753 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1754 1754
1755 1755 return l
1756 1756
1757 1757 def _chunkclear(self):
1758 1758 """Clear the raw chunk cache."""
1759 1759 self._chunkcache = (0, b'')
1760 1760
1761 1761 def deltaparent(self, rev):
1762 1762 """return deltaparent of the given revision"""
1763 1763 base = self.index[rev][3]
1764 1764 if base == rev:
1765 1765 return nullrev
1766 1766 elif self._generaldelta:
1767 1767 return base
1768 1768 else:
1769 1769 return rev - 1
1770 1770
1771 1771 def issnapshot(self, rev):
1772 1772 """tells whether rev is a snapshot
1773 1773 """
1774 1774 if not self._sparserevlog:
1775 1775 return self.deltaparent(rev) == nullrev
1776 1776 elif util.safehasattr(self.index, b'issnapshot'):
1777 1777 # directly assign the method to cache the testing and access
1778 1778 self.issnapshot = self.index.issnapshot
1779 1779 return self.issnapshot(rev)
1780 1780 if rev == nullrev:
1781 1781 return True
1782 1782 entry = self.index[rev]
1783 1783 base = entry[3]
1784 1784 if base == rev:
1785 1785 return True
1786 1786 if base == nullrev:
1787 1787 return True
1788 1788 p1 = entry[5]
1789 1789 p2 = entry[6]
1790 1790 if base == p1 or base == p2:
1791 1791 return False
1792 1792 return self.issnapshot(base)
1793 1793
1794 1794 def snapshotdepth(self, rev):
1795 1795 """number of snapshot in the chain before this one"""
1796 1796 if not self.issnapshot(rev):
1797 1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1798 1798 return len(self._deltachain(rev)[0]) - 1
1799 1799
1800 1800 def revdiff(self, rev1, rev2):
1801 1801 """return or calculate a delta between two revisions
1802 1802
1803 1803 The delta calculated is in binary form and is intended to be written to
1804 1804 revlog data directly. So this function needs raw revision data.
1805 1805 """
1806 1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1807 1807 return bytes(self._chunk(rev2))
1808 1808
1809 1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1810 1810
1811 1811 def _processflags(self, text, flags, operation, raw=False):
1812 1812 """deprecated entry point to access flag processors"""
1813 1813 msg = b'_processflag(...) use the specialized variant'
1814 1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 1815 if raw:
1816 1816 return text, flagutil.processflagsraw(self, text, flags)
1817 1817 elif operation == b'read':
1818 1818 return flagutil.processflagsread(self, text, flags)
1819 1819 else: # write operation
1820 1820 return flagutil.processflagswrite(self, text, flags)
1821 1821
1822 1822 def revision(self, nodeorrev, _df=None, raw=False):
1823 1823 """return an uncompressed revision of a given node or revision
1824 1824 number.
1825 1825
1826 1826 _df - an existing file handle to read from. (internal-only)
1827 1827 raw - an optional argument specifying if the revision data is to be
1828 1828 treated as raw data when applying flag transforms. 'raw' should be set
1829 1829 to True when generating changegroups or in debug commands.
1830 1830 """
1831 1831 if raw:
1832 1832 msg = (
1833 1833 b'revlog.revision(..., raw=True) is deprecated, '
1834 1834 b'use revlog.rawdata(...)'
1835 1835 )
1836 1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1837 1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1838 1838
1839 1839 def sidedata(self, nodeorrev, _df=None):
1840 1840 """a map of extra data related to the changeset but not part of the hash
1841 1841
1842 1842 This function currently return a dictionary. However, more advanced
1843 1843 mapping object will likely be used in the future for a more
1844 1844 efficient/lazy code.
1845 1845 """
1846 1846 return self._revisiondata(nodeorrev, _df)[1]
1847 1847
1848 1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1849 1849 # deal with <nodeorrev> argument type
1850 1850 if isinstance(nodeorrev, int):
1851 1851 rev = nodeorrev
1852 1852 node = self.node(rev)
1853 1853 else:
1854 1854 node = nodeorrev
1855 1855 rev = None
1856 1856
1857 1857 # fast path the special `nullid` rev
1858 1858 if node == nullid:
1859 1859 return b"", {}
1860 1860
1861 1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1862 1862 # revision or might need to be processed to retrieve the revision.
1863 1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1864 1864
1865 1865 if raw and validated:
1866 1866 # if we don't want to process the raw text and that raw
1867 1867 # text is cached, we can exit early.
1868 1868 return rawtext, {}
1869 1869 if rev is None:
1870 1870 rev = self.rev(node)
1871 1871 # the revlog's flag for this revision
1872 1872 # (usually alter its state or content)
1873 1873 flags = self.flags(rev)
1874 1874
1875 1875 if validated and flags == REVIDX_DEFAULT_FLAGS:
1876 1876 # no extra flags set, no flag processor runs, text = rawtext
1877 1877 return rawtext, {}
1878 1878
1879 1879 sidedata = {}
1880 1880 if raw:
1881 1881 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1882 1882 text = rawtext
1883 1883 else:
1884 1884 try:
1885 1885 r = flagutil.processflagsread(self, rawtext, flags)
1886 1886 except error.SidedataHashError as exc:
1887 1887 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1888 1888 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1889 1889 raise error.RevlogError(msg)
1890 1890 text, validatehash, sidedata = r
1891 1891 if validatehash:
1892 1892 self.checkhash(text, node, rev=rev)
1893 1893 if not validated:
1894 1894 self._revisioncache = (node, rev, rawtext)
1895 1895
1896 1896 return text, sidedata
1897 1897
1898 1898 def _rawtext(self, node, rev, _df=None):
1899 1899 """return the possibly unvalidated rawtext for a revision
1900 1900
1901 1901 returns (rev, rawtext, validated)
1902 1902 """
1903 1903
1904 1904 # revision in the cache (could be useful to apply delta)
1905 1905 cachedrev = None
1906 1906 # An intermediate text to apply deltas to
1907 1907 basetext = None
1908 1908
1909 1909 # Check if we have the entry in cache
1910 1910 # The cache entry looks like (node, rev, rawtext)
1911 1911 if self._revisioncache:
1912 1912 if self._revisioncache[0] == node:
1913 1913 return (rev, self._revisioncache[2], True)
1914 1914 cachedrev = self._revisioncache[1]
1915 1915
1916 1916 if rev is None:
1917 1917 rev = self.rev(node)
1918 1918
1919 1919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1920 1920 if stopped:
1921 1921 basetext = self._revisioncache[2]
1922 1922
1923 1923 # drop cache to save memory, the caller is expected to
1924 1924 # update self._revisioncache after validating the text
1925 1925 self._revisioncache = None
1926 1926
1927 1927 targetsize = None
1928 1928 rawsize = self.index[rev][2]
1929 1929 if 0 <= rawsize:
1930 1930 targetsize = 4 * rawsize
1931 1931
1932 1932 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1933 1933 if basetext is None:
1934 1934 basetext = bytes(bins[0])
1935 1935 bins = bins[1:]
1936 1936
1937 1937 rawtext = mdiff.patches(basetext, bins)
1938 1938 del basetext # let us have a chance to free memory early
1939 1939 return (rev, rawtext, False)
1940 1940
1941 1941 def rawdata(self, nodeorrev, _df=None):
1942 1942 """return an uncompressed raw data of a given node or revision number.
1943 1943
1944 1944 _df - an existing file handle to read from. (internal-only)
1945 1945 """
1946 1946 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1947 1947
1948 1948 def hash(self, text, p1, p2):
1949 1949 """Compute a node hash.
1950 1950
1951 1951 Available as a function so that subclasses can replace the hash
1952 1952 as needed.
1953 1953 """
1954 1954 return storageutil.hashrevisionsha1(text, p1, p2)
1955 1955
1956 1956 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1957 1957 """Check node hash integrity.
1958 1958
1959 1959 Available as a function so that subclasses can extend hash mismatch
1960 1960 behaviors as needed.
1961 1961 """
1962 1962 try:
1963 1963 if p1 is None and p2 is None:
1964 1964 p1, p2 = self.parents(node)
1965 1965 if node != self.hash(text, p1, p2):
1966 1966 # Clear the revision cache on hash failure. The revision cache
1967 1967 # only stores the raw revision and clearing the cache does have
1968 1968 # the side-effect that we won't have a cache hit when the raw
1969 1969 # revision data is accessed. But this case should be rare and
1970 1970 # it is extra work to teach the cache about the hash
1971 1971 # verification state.
1972 1972 if self._revisioncache and self._revisioncache[0] == node:
1973 1973 self._revisioncache = None
1974 1974
1975 1975 revornode = rev
1976 1976 if revornode is None:
1977 1977 revornode = templatefilters.short(hex(node))
1978 1978 raise error.RevlogError(
1979 1979 _(b"integrity check failed on %s:%s")
1980 1980 % (self.indexfile, pycompat.bytestr(revornode))
1981 1981 )
1982 1982 except error.RevlogError:
1983 1983 if self._censorable and storageutil.iscensoredtext(text):
1984 1984 raise error.CensoredNodeError(self.indexfile, node, text)
1985 1985 raise
1986 1986
1987 1987 def _enforceinlinesize(self, tr, fp=None):
1988 1988 """Check if the revlog is too big for inline and convert if so.
1989 1989
1990 1990 This should be called after revisions are added to the revlog. If the
1991 1991 revlog has grown too large to be an inline revlog, it will convert it
1992 1992 to use multiple index and data files.
1993 1993 """
1994 1994 tiprev = len(self) - 1
1995 1995 if (
1996 1996 not self._inline
1997 1997 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1998 1998 ):
1999 1999 return
2000 2000
2001 2001 trinfo = tr.find(self.indexfile)
2002 2002 if trinfo is None:
2003 2003 raise error.RevlogError(
2004 2004 _(b"%s not found in the transaction") % self.indexfile
2005 2005 )
2006 2006
2007 2007 trindex = trinfo[2]
2008 2008 if trindex is not None:
2009 2009 dataoff = self.start(trindex)
2010 2010 else:
2011 2011 # revlog was stripped at start of transaction, use all leftover data
2012 2012 trindex = len(self) - 1
2013 2013 dataoff = self.end(tiprev)
2014 2014
2015 2015 tr.add(self.datafile, dataoff)
2016 2016
2017 2017 if fp:
2018 2018 fp.flush()
2019 2019 fp.close()
2020 2020 # We can't use the cached file handle after close(). So prevent
2021 2021 # its usage.
2022 2022 self._writinghandles = None
2023 2023
2024 2024 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2025 2025 for r in self:
2026 2026 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2027 2027
2028 2028 with self._indexfp(b'w') as fp:
2029 2029 self.version &= ~FLAG_INLINE_DATA
2030 2030 self._inline = False
2031 2031 io = self._io
2032 2032 for i in self:
2033 2033 e = io.packentry(self.index[i], self.node, self.version, i)
2034 2034 fp.write(e)
2035 2035
2036 2036 # the temp file replace the real index when we exit the context
2037 2037 # manager
2038 2038
2039 2039 tr.replace(self.indexfile, trindex * self._io.size)
2040 2040 nodemaputil.setup_persistent_nodemap(tr, self)
2041 2041 self._chunkclear()
2042 2042
2043 2043 def _nodeduplicatecallback(self, transaction, node):
2044 2044 """called when trying to add a node already stored.
2045 2045 """
2046 2046
2047 2047 def addrevision(
2048 2048 self,
2049 2049 text,
2050 2050 transaction,
2051 2051 link,
2052 2052 p1,
2053 2053 p2,
2054 2054 cachedelta=None,
2055 2055 node=None,
2056 2056 flags=REVIDX_DEFAULT_FLAGS,
2057 2057 deltacomputer=None,
2058 2058 sidedata=None,
2059 2059 ):
2060 2060 """add a revision to the log
2061 2061
2062 2062 text - the revision data to add
2063 2063 transaction - the transaction object used for rollback
2064 2064 link - the linkrev data to add
2065 2065 p1, p2 - the parent nodeids of the revision
2066 2066 cachedelta - an optional precomputed delta
2067 2067 node - nodeid of revision; typically node is not specified, and it is
2068 2068 computed by default as hash(text, p1, p2), however subclasses might
2069 2069 use different hashing method (and override checkhash() in such case)
2070 2070 flags - the known flags to set on the revision
2071 2071 deltacomputer - an optional deltacomputer instance shared between
2072 2072 multiple calls
2073 2073 """
2074 2074 if link == nullrev:
2075 2075 raise error.RevlogError(
2076 2076 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2077 2077 )
2078 2078
2079 2079 if sidedata is None:
2080 2080 sidedata = {}
2081 2081 flags = flags & ~REVIDX_SIDEDATA
2082 2082 elif not self.hassidedata:
2083 2083 raise error.ProgrammingError(
2084 2084 _(b"trying to add sidedata to a revlog who don't support them")
2085 2085 )
2086 2086 else:
2087 2087 flags |= REVIDX_SIDEDATA
2088 2088
2089 2089 if flags:
2090 2090 node = node or self.hash(text, p1, p2)
2091 2091
2092 2092 rawtext, validatehash = flagutil.processflagswrite(
2093 2093 self, text, flags, sidedata=sidedata
2094 2094 )
2095 2095
2096 2096 # If the flag processor modifies the revision data, ignore any provided
2097 2097 # cachedelta.
2098 2098 if rawtext != text:
2099 2099 cachedelta = None
2100 2100
2101 2101 if len(rawtext) > _maxentrysize:
2102 2102 raise error.RevlogError(
2103 2103 _(
2104 2104 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2105 2105 )
2106 2106 % (self.indexfile, len(rawtext))
2107 2107 )
2108 2108
2109 2109 node = node or self.hash(rawtext, p1, p2)
2110 2110 if self.index.has_node(node):
2111 2111 return node
2112 2112
2113 2113 if validatehash:
2114 2114 self.checkhash(rawtext, node, p1=p1, p2=p2)
2115 2115
2116 2116 return self.addrawrevision(
2117 2117 rawtext,
2118 2118 transaction,
2119 2119 link,
2120 2120 p1,
2121 2121 p2,
2122 2122 node,
2123 2123 flags,
2124 2124 cachedelta=cachedelta,
2125 2125 deltacomputer=deltacomputer,
2126 2126 )
2127 2127
2128 2128 def addrawrevision(
2129 2129 self,
2130 2130 rawtext,
2131 2131 transaction,
2132 2132 link,
2133 2133 p1,
2134 2134 p2,
2135 2135 node,
2136 2136 flags,
2137 2137 cachedelta=None,
2138 2138 deltacomputer=None,
2139 2139 ):
2140 2140 """add a raw revision with known flags, node and parents
2141 2141 useful when reusing a revision not stored in this revlog (ex: received
2142 2142 over wire, or read from an external bundle).
2143 2143 """
2144 2144 dfh = None
2145 2145 if not self._inline:
2146 2146 dfh = self._datafp(b"a+")
2147 2147 ifh = self._indexfp(b"a+")
2148 2148 try:
2149 2149 return self._addrevision(
2150 2150 node,
2151 2151 rawtext,
2152 2152 transaction,
2153 2153 link,
2154 2154 p1,
2155 2155 p2,
2156 2156 flags,
2157 2157 cachedelta,
2158 2158 ifh,
2159 2159 dfh,
2160 2160 deltacomputer=deltacomputer,
2161 2161 )
2162 2162 finally:
2163 2163 if dfh:
2164 2164 dfh.close()
2165 2165 ifh.close()
2166 2166
2167 2167 def compress(self, data):
2168 2168 """Generate a possibly-compressed representation of data."""
2169 2169 if not data:
2170 2170 return b'', data
2171 2171
2172 2172 compressed = self._compressor.compress(data)
2173 2173
2174 2174 if compressed:
2175 2175 # The revlog compressor added the header in the returned data.
2176 2176 return b'', compressed
2177 2177
2178 2178 if data[0:1] == b'\0':
2179 2179 return b'', data
2180 2180 return b'u', data
2181 2181
2182 2182 def decompress(self, data):
2183 2183 """Decompress a revlog chunk.
2184 2184
2185 2185 The chunk is expected to begin with a header identifying the
2186 2186 format type so it can be routed to an appropriate decompressor.
2187 2187 """
2188 2188 if not data:
2189 2189 return data
2190 2190
2191 2191 # Revlogs are read much more frequently than they are written and many
2192 2192 # chunks only take microseconds to decompress, so performance is
2193 2193 # important here.
2194 2194 #
2195 2195 # We can make a few assumptions about revlogs:
2196 2196 #
2197 2197 # 1) the majority of chunks will be compressed (as opposed to inline
2198 2198 # raw data).
2199 2199 # 2) decompressing *any* data will likely by at least 10x slower than
2200 2200 # returning raw inline data.
2201 2201 # 3) we want to prioritize common and officially supported compression
2202 2202 # engines
2203 2203 #
2204 2204 # It follows that we want to optimize for "decompress compressed data
2205 2205 # when encoded with common and officially supported compression engines"
2206 2206 # case over "raw data" and "data encoded by less common or non-official
2207 2207 # compression engines." That is why we have the inline lookup first
2208 2208 # followed by the compengines lookup.
2209 2209 #
2210 2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 2211 # compressed chunks. And this matters for changelog and manifest reads.
2212 2212 t = data[0:1]
2213 2213
2214 2214 if t == b'x':
2215 2215 try:
2216 2216 return _zlibdecompress(data)
2217 2217 except zlib.error as e:
2218 2218 raise error.RevlogError(
2219 2219 _(b'revlog decompress error: %s')
2220 2220 % stringutil.forcebytestr(e)
2221 2221 )
2222 2222 # '\0' is more common than 'u' so it goes first.
2223 2223 elif t == b'\0':
2224 2224 return data
2225 2225 elif t == b'u':
2226 2226 return util.buffer(data, 1)
2227 2227
2228 2228 try:
2229 2229 compressor = self._decompressors[t]
2230 2230 except KeyError:
2231 2231 try:
2232 2232 engine = util.compengines.forrevlogheader(t)
2233 2233 compressor = engine.revlogcompressor(self._compengineopts)
2234 2234 self._decompressors[t] = compressor
2235 2235 except KeyError:
2236 2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237 2237
2238 2238 return compressor.decompress(data)
2239 2239
2240 2240 def _addrevision(
2241 2241 self,
2242 2242 node,
2243 2243 rawtext,
2244 2244 transaction,
2245 2245 link,
2246 2246 p1,
2247 2247 p2,
2248 2248 flags,
2249 2249 cachedelta,
2250 2250 ifh,
2251 2251 dfh,
2252 2252 alwayscache=False,
2253 2253 deltacomputer=None,
2254 2254 ):
2255 2255 """internal function to add revisions to the log
2256 2256
2257 2257 see addrevision for argument descriptions.
2258 2258
2259 2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260 2260
2261 2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 2262 be used.
2263 2263
2264 2264 invariants:
2265 2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 2266 if both are set, they must correspond to each other.
2267 2267 """
2268 2268 if node == nullid:
2269 2269 raise error.RevlogError(
2270 2270 _(b"%s: attempt to add null revision") % self.indexfile
2271 2271 )
2272 2272 if node == wdirid or node in wdirfilenodeids:
2273 2273 raise error.RevlogError(
2274 2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 2275 )
2276 2276
2277 2277 if self._inline:
2278 2278 fh = ifh
2279 2279 else:
2280 2280 fh = dfh
2281 2281
2282 2282 btext = [rawtext]
2283 2283
2284 2284 curr = len(self)
2285 2285 prev = curr - 1
2286 2286 offset = self.end(prev)
2287 2287 p1r, p2r = self.rev(p1), self.rev(p2)
2288 2288
2289 2289 # full versions are inserted when the needed deltas
2290 2290 # become comparable to the uncompressed text
2291 2291 if rawtext is None:
2292 2292 # need rawtext size, before changed by flag processors, which is
2293 2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 2294 # logic that might remove metadata size.
2295 2295 textlen = mdiff.patchedsize(
2296 2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 2297 )
2298 2298 else:
2299 2299 textlen = len(rawtext)
2300 2300
2301 2301 if deltacomputer is None:
2302 2302 deltacomputer = deltautil.deltacomputer(self)
2303 2303
2304 2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305 2305
2306 2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307 2307
2308 2308 e = (
2309 2309 offset_type(offset, flags),
2310 2310 deltainfo.deltalen,
2311 2311 textlen,
2312 2312 deltainfo.base,
2313 2313 link,
2314 2314 p1r,
2315 2315 p2r,
2316 2316 node,
2317 2317 )
2318 2318 self.index.append(e)
2319 2319
2320 2320 entry = self._io.packentry(e, self.node, self.version, curr)
2321 2321 self._writeentry(
2322 2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 2323 )
2324 2324
2325 2325 rawtext = btext[0]
2326 2326
2327 2327 if alwayscache and rawtext is None:
2328 2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2329 2329
2330 2330 if type(rawtext) == bytes: # only accept immutable objects
2331 2331 self._revisioncache = (node, curr, rawtext)
2332 2332 self._chainbasecache[curr] = deltainfo.chainbase
2333 2333 return node
2334 2334
2335 2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 2336 # Files opened in a+ mode have inconsistent behavior on various
2337 2337 # platforms. Windows requires that a file positioning call be made
2338 2338 # when the file handle transitions between reads and writes. See
2339 2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 2340 # platforms, Python or the platform itself can be buggy. Some versions
2341 2341 # of Solaris have been observed to not append at the end of the file
2342 2342 # if the file was seeked to before the end. See issue4943 for more.
2343 2343 #
2344 2344 # We work around this issue by inserting a seek() before writing.
2345 2345 # Note: This is likely not necessary on Python 3. However, because
2346 2346 # the file handle is reused for reads and may be seeked there, we need
2347 2347 # to be careful before changing this.
2348 2348 ifh.seek(0, os.SEEK_END)
2349 2349 if dfh:
2350 2350 dfh.seek(0, os.SEEK_END)
2351 2351
2352 2352 curr = len(self) - 1
2353 2353 if not self._inline:
2354 2354 transaction.add(self.datafile, offset)
2355 2355 transaction.add(self.indexfile, curr * len(entry))
2356 2356 if data[0]:
2357 2357 dfh.write(data[0])
2358 2358 dfh.write(data[1])
2359 2359 ifh.write(entry)
2360 2360 else:
2361 2361 offset += curr * self._io.size
2362 2362 transaction.add(self.indexfile, offset, curr)
2363 2363 ifh.write(entry)
2364 2364 ifh.write(data[0])
2365 2365 ifh.write(data[1])
2366 2366 self._enforceinlinesize(transaction, ifh)
2367 2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2368 2368
2369 2369 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2370 2370 """
2371 2371 add a delta group
2372 2372
2373 2373 given a set of deltas, add them to the revision log. the
2374 2374 first delta is against its parent, which should be in our
2375 2375 log, the rest are against the previous delta.
2376 2376
2377 2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 2378 this revlog and the node that was added.
2379 2379 """
2380 2380
2381 2381 if self._writinghandles:
2382 2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383 2383
2384 2384 nodes = []
2385 2385
2386 2386 r = len(self)
2387 2387 end = 0
2388 2388 if r:
2389 2389 end = self.end(r - 1)
2390 2390 ifh = self._indexfp(b"a+")
2391 2391 isize = r * self._io.size
2392 2392 if self._inline:
2393 2393 transaction.add(self.indexfile, end + isize, r)
2394 2394 dfh = None
2395 2395 else:
2396 2396 transaction.add(self.indexfile, isize, r)
2397 2397 transaction.add(self.datafile, end)
2398 2398 dfh = self._datafp(b"a+")
2399 2399
2400 2400 def flush():
2401 2401 if dfh:
2402 2402 dfh.flush()
2403 2403 ifh.flush()
2404 2404
2405 2405 self._writinghandles = (ifh, dfh)
2406 2406
2407 2407 try:
2408 2408 deltacomputer = deltautil.deltacomputer(self)
2409 2409 # loop through our set of deltas
2410 2410 for data in deltas:
2411 2411 node, p1, p2, linknode, deltabase, delta, flags = data
2412 2412 link = linkmapper(linknode)
2413 2413 flags = flags or REVIDX_DEFAULT_FLAGS
2414 2414
2415 2415 nodes.append(node)
2416 2416
2417 2417 if self.index.has_node(node):
2418 2418 self._nodeduplicatecallback(transaction, node)
2419 2419 # this can happen if two branches make the same change
2420 2420 continue
2421 2421
2422 2422 for p in (p1, p2):
2423 2423 if not self.index.has_node(p):
2424 2424 raise error.LookupError(
2425 2425 p, self.indexfile, _(b'unknown parent')
2426 2426 )
2427 2427
2428 2428 if not self.index.has_node(deltabase):
2429 2429 raise error.LookupError(
2430 2430 deltabase, self.indexfile, _(b'unknown delta base')
2431 2431 )
2432 2432
2433 2433 baserev = self.rev(deltabase)
2434 2434
2435 2435 if baserev != nullrev and self.iscensored(baserev):
2436 2436 # if base is censored, delta must be full replacement in a
2437 2437 # single patch operation
2438 2438 hlen = struct.calcsize(b">lll")
2439 2439 oldlen = self.rawsize(baserev)
2440 2440 newlen = len(delta) - hlen
2441 2441 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 2442 raise error.CensoredBaseError(
2443 2443 self.indexfile, self.node(baserev)
2444 2444 )
2445 2445
2446 2446 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 2447 flags |= REVIDX_ISCENSORED
2448 2448
2449 2449 # We assume consumers of addrevisioncb will want to retrieve
2450 2450 # the added revision, which will require a call to
2451 2451 # revision(). revision() will fast path if there is a cache
2452 2452 # hit. So, we tell _addrevision() to always cache in this case.
2453 2453 # We're only using addgroup() in the context of changegroup
2454 2454 # generation so the revision data can always be handled as raw
2455 2455 # by the flagprocessor.
2456 2456 self._addrevision(
2457 2457 node,
2458 2458 None,
2459 2459 transaction,
2460 2460 link,
2461 2461 p1,
2462 2462 p2,
2463 2463 flags,
2464 2464 (baserev, delta),
2465 2465 ifh,
2466 2466 dfh,
2467 2467 alwayscache=bool(addrevisioncb),
2468 2468 deltacomputer=deltacomputer,
2469 2469 )
2470 2470
2471 2471 if addrevisioncb:
2472 2472 addrevisioncb(self, node)
2473 2473
2474 2474 if not dfh and not self._inline:
2475 2475 # addrevision switched from inline to conventional
2476 2476 # reopen the index
2477 2477 ifh.close()
2478 2478 dfh = self._datafp(b"a+")
2479 2479 ifh = self._indexfp(b"a+")
2480 2480 self._writinghandles = (ifh, dfh)
2481 2481 finally:
2482 2482 self._writinghandles = None
2483 2483
2484 2484 if dfh:
2485 2485 dfh.close()
2486 2486 ifh.close()
2487 2487
2488 2488 return nodes
2489 2489
2490 2490 def iscensored(self, rev):
2491 2491 """Check if a file revision is censored."""
2492 2492 if not self._censorable:
2493 2493 return False
2494 2494
2495 2495 return self.flags(rev) & REVIDX_ISCENSORED
2496 2496
2497 2497 def _peek_iscensored(self, baserev, delta, flush):
2498 2498 """Quickly check if a delta produces a censored revision."""
2499 2499 if not self._censorable:
2500 2500 return False
2501 2501
2502 2502 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2503 2503
2504 2504 def getstrippoint(self, minlink):
2505 2505 """find the minimum rev that must be stripped to strip the linkrev
2506 2506
2507 2507 Returns a tuple containing the minimum rev and a set of all revs that
2508 2508 have linkrevs that will be broken by this strip.
2509 2509 """
2510 2510 return storageutil.resolvestripinfo(
2511 2511 minlink,
2512 2512 len(self) - 1,
2513 2513 self.headrevs(),
2514 2514 self.linkrev,
2515 2515 self.parentrevs,
2516 2516 )
2517 2517
2518 2518 def strip(self, minlink, transaction):
2519 2519 """truncate the revlog on the first revision with a linkrev >= minlink
2520 2520
2521 2521 This function is called when we're stripping revision minlink and
2522 2522 its descendants from the repository.
2523 2523
2524 2524 We have to remove all revisions with linkrev >= minlink, because
2525 2525 the equivalent changelog revisions will be renumbered after the
2526 2526 strip.
2527 2527
2528 2528 So we truncate the revlog on the first of these revisions, and
2529 2529 trust that the caller has saved the revisions that shouldn't be
2530 2530 removed and that it'll re-add them after this truncation.
2531 2531 """
2532 2532 if len(self) == 0:
2533 2533 return
2534 2534
2535 2535 rev, _ = self.getstrippoint(minlink)
2536 2536 if rev == len(self):
2537 2537 return
2538 2538
2539 2539 # first truncate the files on disk
2540 2540 end = self.start(rev)
2541 2541 if not self._inline:
2542 2542 transaction.add(self.datafile, end)
2543 2543 end = rev * self._io.size
2544 2544 else:
2545 2545 end += rev * self._io.size
2546 2546
2547 2547 transaction.add(self.indexfile, end)
2548 2548
2549 2549 # then reset internal state in memory to forget those revisions
2550 2550 self._revisioncache = None
2551 2551 self._chaininfocache = {}
2552 2552 self._chunkclear()
2553 2553
2554 2554 del self.index[rev:-1]
2555 2555
2556 2556 def checksize(self):
2557 2557 """Check size of index and data files
2558 2558
2559 2559 return a (dd, di) tuple.
2560 2560 - dd: extra bytes for the "data" file
2561 2561 - di: extra bytes for the "index" file
2562 2562
2563 2563 A healthy revlog will return (0, 0).
2564 2564 """
2565 2565 expected = 0
2566 2566 if len(self):
2567 2567 expected = max(0, self.end(len(self) - 1))
2568 2568
2569 2569 try:
2570 2570 with self._datafp() as f:
2571 2571 f.seek(0, io.SEEK_END)
2572 2572 actual = f.tell()
2573 2573 dd = actual - expected
2574 2574 except IOError as inst:
2575 2575 if inst.errno != errno.ENOENT:
2576 2576 raise
2577 2577 dd = 0
2578 2578
2579 2579 try:
2580 2580 f = self.opener(self.indexfile)
2581 2581 f.seek(0, io.SEEK_END)
2582 2582 actual = f.tell()
2583 2583 f.close()
2584 2584 s = self._io.size
2585 2585 i = max(0, actual // s)
2586 2586 di = actual - (i * s)
2587 2587 if self._inline:
2588 2588 databytes = 0
2589 2589 for r in self:
2590 2590 databytes += max(0, self.length(r))
2591 2591 dd = 0
2592 2592 di = actual - len(self) * s - databytes
2593 2593 except IOError as inst:
2594 2594 if inst.errno != errno.ENOENT:
2595 2595 raise
2596 2596 di = 0
2597 2597
2598 2598 return (dd, di)
2599 2599
2600 2600 def files(self):
2601 2601 res = [self.indexfile]
2602 2602 if not self._inline:
2603 2603 res.append(self.datafile)
2604 2604 return res
2605 2605
2606 2606 def emitrevisions(
2607 2607 self,
2608 2608 nodes,
2609 2609 nodesorder=None,
2610 2610 revisiondata=False,
2611 2611 assumehaveparentrevisions=False,
2612 2612 deltamode=repository.CG_DELTAMODE_STD,
2613 2613 ):
2614 2614 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2615 2615 raise error.ProgrammingError(
2616 2616 b'unhandled value for nodesorder: %s' % nodesorder
2617 2617 )
2618 2618
2619 2619 if nodesorder is None and not self._generaldelta:
2620 2620 nodesorder = b'storage'
2621 2621
2622 2622 if (
2623 2623 not self._storedeltachains
2624 2624 and deltamode != repository.CG_DELTAMODE_PREV
2625 2625 ):
2626 2626 deltamode = repository.CG_DELTAMODE_FULL
2627 2627
2628 2628 return storageutil.emitrevisions(
2629 2629 self,
2630 2630 nodes,
2631 2631 nodesorder,
2632 2632 revlogrevisiondelta,
2633 2633 deltaparentfn=self.deltaparent,
2634 2634 candeltafn=self.candelta,
2635 2635 rawsizefn=self.rawsize,
2636 2636 revdifffn=self.revdiff,
2637 2637 flagsfn=self.flags,
2638 2638 deltamode=deltamode,
2639 2639 revisiondata=revisiondata,
2640 2640 assumehaveparentrevisions=assumehaveparentrevisions,
2641 2641 )
2642 2642
2643 2643 DELTAREUSEALWAYS = b'always'
2644 2644 DELTAREUSESAMEREVS = b'samerevs'
2645 2645 DELTAREUSENEVER = b'never'
2646 2646
2647 2647 DELTAREUSEFULLADD = b'fulladd'
2648 2648
2649 2649 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2650 2650
2651 2651 def clone(
2652 2652 self,
2653 2653 tr,
2654 2654 destrevlog,
2655 2655 addrevisioncb=None,
2656 2656 deltareuse=DELTAREUSESAMEREVS,
2657 2657 forcedeltabothparents=None,
2658 2658 sidedatacompanion=None,
2659 2659 ):
2660 2660 """Copy this revlog to another, possibly with format changes.
2661 2661
2662 2662 The destination revlog will contain the same revisions and nodes.
2663 2663 However, it may not be bit-for-bit identical due to e.g. delta encoding
2664 2664 differences.
2665 2665
2666 2666 The ``deltareuse`` argument control how deltas from the existing revlog
2667 2667 are preserved in the destination revlog. The argument can have the
2668 2668 following values:
2669 2669
2670 2670 DELTAREUSEALWAYS
2671 2671 Deltas will always be reused (if possible), even if the destination
2672 2672 revlog would not select the same revisions for the delta. This is the
2673 2673 fastest mode of operation.
2674 2674 DELTAREUSESAMEREVS
2675 2675 Deltas will be reused if the destination revlog would pick the same
2676 2676 revisions for the delta. This mode strikes a balance between speed
2677 2677 and optimization.
2678 2678 DELTAREUSENEVER
2679 2679 Deltas will never be reused. This is the slowest mode of execution.
2680 2680 This mode can be used to recompute deltas (e.g. if the diff/delta
2681 2681 algorithm changes).
2682 2682 DELTAREUSEFULLADD
2683 2683 Revision will be re-added as if their were new content. This is
2684 2684 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2685 2685 eg: large file detection and handling.
2686 2686
2687 2687 Delta computation can be slow, so the choice of delta reuse policy can
2688 2688 significantly affect run time.
2689 2689
2690 2690 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2691 2691 two extremes. Deltas will be reused if they are appropriate. But if the
2692 2692 delta could choose a better revision, it will do so. This means if you
2693 2693 are converting a non-generaldelta revlog to a generaldelta revlog,
2694 2694 deltas will be recomputed if the delta's parent isn't a parent of the
2695 2695 revision.
2696 2696
2697 2697 In addition to the delta policy, the ``forcedeltabothparents``
2698 2698 argument controls whether to force compute deltas against both parents
2699 2699 for merges. By default, the current default is used.
2700 2700
2701 2701 If not None, the `sidedatacompanion` is callable that accept two
2702 2702 arguments:
2703 2703
2704 2704 (srcrevlog, rev)
2705 2705
2706 2706 and return a triplet that control changes to sidedata content from the
2707 2707 old revision to the new clone result:
2708 2708
2709 2709 (dropall, filterout, update)
2710 2710
2711 2711 * if `dropall` is True, all sidedata should be dropped
2712 2712 * `filterout` is a set of sidedata keys that should be dropped
2713 2713 * `update` is a mapping of additionnal/new key -> value
2714 2714 """
2715 2715 if deltareuse not in self.DELTAREUSEALL:
2716 2716 raise ValueError(
2717 2717 _(b'value for deltareuse invalid: %s') % deltareuse
2718 2718 )
2719 2719
2720 2720 if len(destrevlog):
2721 2721 raise ValueError(_(b'destination revlog is not empty'))
2722 2722
2723 2723 if getattr(self, 'filteredrevs', None):
2724 2724 raise ValueError(_(b'source revlog has filtered revisions'))
2725 2725 if getattr(destrevlog, 'filteredrevs', None):
2726 2726 raise ValueError(_(b'destination revlog has filtered revisions'))
2727 2727
2728 2728 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2729 2729 # if possible.
2730 2730 oldlazydelta = destrevlog._lazydelta
2731 2731 oldlazydeltabase = destrevlog._lazydeltabase
2732 2732 oldamd = destrevlog._deltabothparents
2733 2733
2734 2734 try:
2735 2735 if deltareuse == self.DELTAREUSEALWAYS:
2736 2736 destrevlog._lazydeltabase = True
2737 2737 destrevlog._lazydelta = True
2738 2738 elif deltareuse == self.DELTAREUSESAMEREVS:
2739 2739 destrevlog._lazydeltabase = False
2740 2740 destrevlog._lazydelta = True
2741 2741 elif deltareuse == self.DELTAREUSENEVER:
2742 2742 destrevlog._lazydeltabase = False
2743 2743 destrevlog._lazydelta = False
2744 2744
2745 2745 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2746 2746
2747 2747 self._clone(
2748 2748 tr,
2749 2749 destrevlog,
2750 2750 addrevisioncb,
2751 2751 deltareuse,
2752 2752 forcedeltabothparents,
2753 2753 sidedatacompanion,
2754 2754 )
2755 2755
2756 2756 finally:
2757 2757 destrevlog._lazydelta = oldlazydelta
2758 2758 destrevlog._lazydeltabase = oldlazydeltabase
2759 2759 destrevlog._deltabothparents = oldamd
2760 2760
2761 2761 def _clone(
2762 2762 self,
2763 2763 tr,
2764 2764 destrevlog,
2765 2765 addrevisioncb,
2766 2766 deltareuse,
2767 2767 forcedeltabothparents,
2768 2768 sidedatacompanion,
2769 2769 ):
2770 2770 """perform the core duty of `revlog.clone` after parameter processing"""
2771 2771 deltacomputer = deltautil.deltacomputer(destrevlog)
2772 2772 index = self.index
2773 2773 for rev in self:
2774 2774 entry = index[rev]
2775 2775
2776 2776 # Some classes override linkrev to take filtered revs into
2777 2777 # account. Use raw entry from index.
2778 2778 flags = entry[0] & 0xFFFF
2779 2779 linkrev = entry[4]
2780 2780 p1 = index[entry[5]][7]
2781 2781 p2 = index[entry[6]][7]
2782 2782 node = entry[7]
2783 2783
2784 2784 sidedataactions = (False, [], {})
2785 2785 if sidedatacompanion is not None:
2786 2786 sidedataactions = sidedatacompanion(self, rev)
2787 2787
2788 2788 # (Possibly) reuse the delta from the revlog if allowed and
2789 2789 # the revlog chunk is a delta.
2790 2790 cachedelta = None
2791 2791 rawtext = None
2792 2792 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2793 2793 dropall, filterout, update = sidedataactions
2794 2794 text, sidedata = self._revisiondata(rev)
2795 2795 if dropall:
2796 2796 sidedata = {}
2797 2797 for key in filterout:
2798 2798 sidedata.pop(key, None)
2799 2799 sidedata.update(update)
2800 2800 if not sidedata:
2801 2801 sidedata = None
2802 2802 destrevlog.addrevision(
2803 2803 text,
2804 2804 tr,
2805 2805 linkrev,
2806 2806 p1,
2807 2807 p2,
2808 2808 cachedelta=cachedelta,
2809 2809 node=node,
2810 2810 flags=flags,
2811 2811 deltacomputer=deltacomputer,
2812 2812 sidedata=sidedata,
2813 2813 )
2814 2814 else:
2815 2815 if destrevlog._lazydelta:
2816 2816 dp = self.deltaparent(rev)
2817 2817 if dp != nullrev:
2818 2818 cachedelta = (dp, bytes(self._chunk(rev)))
2819 2819
2820 2820 if not cachedelta:
2821 2821 rawtext = self.rawdata(rev)
2822 2822
2823 2823 ifh = destrevlog.opener(
2824 2824 destrevlog.indexfile, b'a+', checkambig=False
2825 2825 )
2826 2826 dfh = None
2827 2827 if not destrevlog._inline:
2828 2828 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2829 2829 try:
2830 2830 destrevlog._addrevision(
2831 2831 node,
2832 2832 rawtext,
2833 2833 tr,
2834 2834 linkrev,
2835 2835 p1,
2836 2836 p2,
2837 2837 flags,
2838 2838 cachedelta,
2839 2839 ifh,
2840 2840 dfh,
2841 2841 deltacomputer=deltacomputer,
2842 2842 )
2843 2843 finally:
2844 2844 if dfh:
2845 2845 dfh.close()
2846 2846 ifh.close()
2847 2847
2848 2848 if addrevisioncb:
2849 2849 addrevisioncb(self, rev, node)
2850 2850
2851 2851 def censorrevision(self, tr, censornode, tombstone=b''):
2852 2852 if (self.version & 0xFFFF) == REVLOGV0:
2853 2853 raise error.RevlogError(
2854 2854 _(b'cannot censor with version %d revlogs') % self.version
2855 2855 )
2856 2856
2857 2857 censorrev = self.rev(censornode)
2858 2858 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2859 2859
2860 2860 if len(tombstone) > self.rawsize(censorrev):
2861 2861 raise error.Abort(
2862 2862 _(b'censor tombstone must be no longer than censored data')
2863 2863 )
2864 2864
2865 2865 # Rewriting the revlog in place is hard. Our strategy for censoring is
2866 2866 # to create a new revlog, copy all revisions to it, then replace the
2867 2867 # revlogs on transaction close.
2868 2868
2869 2869 newindexfile = self.indexfile + b'.tmpcensored'
2870 2870 newdatafile = self.datafile + b'.tmpcensored'
2871 2871
2872 2872 # This is a bit dangerous. We could easily have a mismatch of state.
2873 2873 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2874 2874 newrl.version = self.version
2875 2875 newrl._generaldelta = self._generaldelta
2876 2876 newrl._io = self._io
2877 2877
2878 2878 for rev in self.revs():
2879 2879 node = self.node(rev)
2880 2880 p1, p2 = self.parents(node)
2881 2881
2882 2882 if rev == censorrev:
2883 2883 newrl.addrawrevision(
2884 2884 tombstone,
2885 2885 tr,
2886 2886 self.linkrev(censorrev),
2887 2887 p1,
2888 2888 p2,
2889 2889 censornode,
2890 2890 REVIDX_ISCENSORED,
2891 2891 )
2892 2892
2893 2893 if newrl.deltaparent(rev) != nullrev:
2894 2894 raise error.Abort(
2895 2895 _(
2896 2896 b'censored revision stored as delta; '
2897 2897 b'cannot censor'
2898 2898 ),
2899 2899 hint=_(
2900 2900 b'censoring of revlogs is not '
2901 2901 b'fully implemented; please report '
2902 2902 b'this bug'
2903 2903 ),
2904 2904 )
2905 2905 continue
2906 2906
2907 2907 if self.iscensored(rev):
2908 2908 if self.deltaparent(rev) != nullrev:
2909 2909 raise error.Abort(
2910 2910 _(
2911 2911 b'cannot censor due to censored '
2912 2912 b'revision having delta stored'
2913 2913 )
2914 2914 )
2915 2915 rawtext = self._chunk(rev)
2916 2916 else:
2917 2917 rawtext = self.rawdata(rev)
2918 2918
2919 2919 newrl.addrawrevision(
2920 2920 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2921 2921 )
2922 2922
2923 2923 tr.addbackup(self.indexfile, location=b'store')
2924 2924 if not self._inline:
2925 2925 tr.addbackup(self.datafile, location=b'store')
2926 2926
2927 2927 self.opener.rename(newrl.indexfile, self.indexfile)
2928 2928 if not self._inline:
2929 2929 self.opener.rename(newrl.datafile, self.datafile)
2930 2930
2931 2931 self.clearcaches()
2932 2932 self._loadindex()
2933 2933
2934 2934 def verifyintegrity(self, state):
2935 2935 """Verifies the integrity of the revlog.
2936 2936
2937 2937 Yields ``revlogproblem`` instances describing problems that are
2938 2938 found.
2939 2939 """
2940 2940 dd, di = self.checksize()
2941 2941 if dd:
2942 2942 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2943 2943 if di:
2944 2944 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2945 2945
2946 2946 version = self.version & 0xFFFF
2947 2947
2948 2948 # The verifier tells us what version revlog we should be.
2949 2949 if version != state[b'expectedversion']:
2950 2950 yield revlogproblem(
2951 2951 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2952 2952 % (self.indexfile, version, state[b'expectedversion'])
2953 2953 )
2954 2954
2955 2955 state[b'skipread'] = set()
2956 2956 state[b'safe_renamed'] = set()
2957 2957
2958 2958 for rev in self:
2959 2959 node = self.node(rev)
2960 2960
2961 2961 # Verify contents. 4 cases to care about:
2962 2962 #
2963 2963 # common: the most common case
2964 2964 # rename: with a rename
2965 2965 # meta: file content starts with b'\1\n', the metadata
2966 2966 # header defined in filelog.py, but without a rename
2967 2967 # ext: content stored externally
2968 2968 #
2969 2969 # More formally, their differences are shown below:
2970 2970 #
2971 2971 # | common | rename | meta | ext
2972 2972 # -------------------------------------------------------
2973 2973 # flags() | 0 | 0 | 0 | not 0
2974 2974 # renamed() | False | True | False | ?
2975 2975 # rawtext[0:2]=='\1\n'| False | True | True | ?
2976 2976 #
2977 2977 # "rawtext" means the raw text stored in revlog data, which
2978 2978 # could be retrieved by "rawdata(rev)". "text"
2979 2979 # mentioned below is "revision(rev)".
2980 2980 #
2981 2981 # There are 3 different lengths stored physically:
2982 2982 # 1. L1: rawsize, stored in revlog index
2983 2983 # 2. L2: len(rawtext), stored in revlog data
2984 2984 # 3. L3: len(text), stored in revlog data if flags==0, or
2985 2985 # possibly somewhere else if flags!=0
2986 2986 #
2987 2987 # L1 should be equal to L2. L3 could be different from them.
2988 2988 # "text" may or may not affect commit hash depending on flag
2989 2989 # processors (see flagutil.addflagprocessor).
2990 2990 #
2991 2991 # | common | rename | meta | ext
2992 2992 # -------------------------------------------------
2993 2993 # rawsize() | L1 | L1 | L1 | L1
2994 2994 # size() | L1 | L2-LM | L1(*) | L1 (?)
2995 2995 # len(rawtext) | L2 | L2 | L2 | L2
2996 2996 # len(text) | L2 | L2 | L2 | L3
2997 2997 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2998 2998 #
2999 2999 # LM: length of metadata, depending on rawtext
3000 3000 # (*): not ideal, see comment in filelog.size
3001 3001 # (?): could be "- len(meta)" if the resolved content has
3002 3002 # rename metadata
3003 3003 #
3004 3004 # Checks needed to be done:
3005 3005 # 1. length check: L1 == L2, in all cases.
3006 3006 # 2. hash check: depending on flag processor, we may need to
3007 3007 # use either "text" (external), or "rawtext" (in revlog).
3008 3008
3009 3009 try:
3010 3010 skipflags = state.get(b'skipflags', 0)
3011 3011 if skipflags:
3012 3012 skipflags &= self.flags(rev)
3013 3013
3014 3014 _verify_revision(self, skipflags, state, node)
3015 3015
3016 3016 l1 = self.rawsize(rev)
3017 3017 l2 = len(self.rawdata(node))
3018 3018
3019 3019 if l1 != l2:
3020 3020 yield revlogproblem(
3021 3021 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3022 3022 node=node,
3023 3023 )
3024 3024
3025 3025 except error.CensoredNodeError:
3026 3026 if state[b'erroroncensored']:
3027 3027 yield revlogproblem(
3028 3028 error=_(b'censored file data'), node=node
3029 3029 )
3030 3030 state[b'skipread'].add(node)
3031 3031 except Exception as e:
3032 3032 yield revlogproblem(
3033 3033 error=_(b'unpacking %s: %s')
3034 3034 % (short(node), stringutil.forcebytestr(e)),
3035 3035 node=node,
3036 3036 )
3037 3037 state[b'skipread'].add(node)
3038 3038
3039 3039 def storageinfo(
3040 3040 self,
3041 3041 exclusivefiles=False,
3042 3042 sharedfiles=False,
3043 3043 revisionscount=False,
3044 3044 trackedsize=False,
3045 3045 storedsize=False,
3046 3046 ):
3047 3047 d = {}
3048 3048
3049 3049 if exclusivefiles:
3050 3050 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3051 3051 if not self._inline:
3052 3052 d[b'exclusivefiles'].append((self.opener, self.datafile))
3053 3053
3054 3054 if sharedfiles:
3055 3055 d[b'sharedfiles'] = []
3056 3056
3057 3057 if revisionscount:
3058 3058 d[b'revisionscount'] = len(self)
3059 3059
3060 3060 if trackedsize:
3061 3061 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3062 3062
3063 3063 if storedsize:
3064 3064 d[b'storedsize'] = sum(
3065 3065 self.opener.stat(path).st_size for path in self.files()
3066 3066 )
3067 3067
3068 3068 return d
General Comments 0
You need to be logged in to leave comments. Login now