##// END OF EJS Templates
revlog: fix revset in reachableroots docstring...
Jun Wu -
r44168:1a42f845 default draft
parent child Browse files
Show More
@@ -1,2975 +1,2975
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 @attr.s(slots=True, frozen=True)
152 152 class _revisioninfo(object):
153 153 """Information about a revision that allows building its fulltext
154 154 node: expected hash of the revision
155 155 p1, p2: parent revs of the revision
156 156 btext: built text cache consisting of a one-element list
157 157 cachedelta: (baserev, uncompressed_delta) or None
158 158 flags: flags associated to the revision storage
159 159
160 160 One of btext[0] or cachedelta must be set.
161 161 """
162 162
163 163 node = attr.ib()
164 164 p1 = attr.ib()
165 165 p2 = attr.ib()
166 166 btext = attr.ib()
167 167 textlen = attr.ib()
168 168 cachedelta = attr.ib()
169 169 flags = attr.ib()
170 170
171 171
172 172 @interfaceutil.implementer(repository.irevisiondelta)
173 173 @attr.s(slots=True)
174 174 class revlogrevisiondelta(object):
175 175 node = attr.ib()
176 176 p1node = attr.ib()
177 177 p2node = attr.ib()
178 178 basenode = attr.ib()
179 179 flags = attr.ib()
180 180 baserevisionsize = attr.ib()
181 181 revision = attr.ib()
182 182 delta = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 # index v0:
195 195 # 4 bytes: offset
196 196 # 4 bytes: compressed length
197 197 # 4 bytes: base rev
198 198 # 4 bytes: link rev
199 199 # 20 bytes: parent 1 nodeid
200 200 # 20 bytes: parent 2 nodeid
201 201 # 20 bytes: nodeid
202 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 203 indexformatv0_pack = indexformatv0.pack
204 204 indexformatv0_unpack = indexformatv0.unpack
205 205
206 206
207 207 class revlogoldindex(list):
208 208 @property
209 209 def nodemap(self):
210 210 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
211 211 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
212 212 return self._nodemap
213 213
214 214 @util.propertycache
215 215 def _nodemap(self):
216 216 nodemap = revlogutils.NodeMap({nullid: nullrev})
217 217 for r in range(0, len(self)):
218 218 n = self[r][7]
219 219 nodemap[n] = r
220 220 return nodemap
221 221
222 222 def has_node(self, node):
223 223 """return True if the node exist in the index"""
224 224 return node in self._nodemap
225 225
226 226 def rev(self, node):
227 227 """return a revision for a node
228 228
229 229 If the node is unknown, raise a RevlogError"""
230 230 return self._nodemap[node]
231 231
232 232 def get_rev(self, node):
233 233 """return a revision for a node
234 234
235 235 If the node is unknown, return None"""
236 236 return self._nodemap.get(node)
237 237
238 238 def append(self, tup):
239 239 self._nodemap[tup[7]] = len(self)
240 240 super(revlogoldindex, self).append(tup)
241 241
242 242 def __delitem__(self, i):
243 243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
244 244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
245 245 for r in pycompat.xrange(i.start, len(self)):
246 246 del self._nodemap[self[r][7]]
247 247 super(revlogoldindex, self).__delitem__(i)
248 248
249 249 def clearcaches(self):
250 250 self.__dict__.pop('_nodemap', None)
251 251
252 252 def __getitem__(self, i):
253 253 if i == -1:
254 254 return (0, 0, 0, -1, -1, -1, -1, nullid)
255 255 return list.__getitem__(self, i)
256 256
257 257
258 258 class revlogoldio(object):
259 259 def __init__(self):
260 260 self.size = indexformatv0.size
261 261
262 262 def parseindex(self, data, inline):
263 263 s = self.size
264 264 index = []
265 265 nodemap = revlogutils.NodeMap({nullid: nullrev})
266 266 n = off = 0
267 267 l = len(data)
268 268 while off + s <= l:
269 269 cur = data[off : off + s]
270 270 off += s
271 271 e = indexformatv0_unpack(cur)
272 272 # transform to revlogv1 format
273 273 e2 = (
274 274 offset_type(e[0], 0),
275 275 e[1],
276 276 -1,
277 277 e[2],
278 278 e[3],
279 279 nodemap.get(e[4], nullrev),
280 280 nodemap.get(e[5], nullrev),
281 281 e[6],
282 282 )
283 283 index.append(e2)
284 284 nodemap[e[6]] = n
285 285 n += 1
286 286
287 287 index = revlogoldindex(index)
288 288 return index, None
289 289
290 290 def packentry(self, entry, node, version, rev):
291 291 if gettype(entry[0]):
292 292 raise error.RevlogError(
293 293 _(b'index entry flags need revlog version 1')
294 294 )
295 295 e2 = (
296 296 getoffset(entry[0]),
297 297 entry[1],
298 298 entry[3],
299 299 entry[4],
300 300 node(entry[5]),
301 301 node(entry[6]),
302 302 entry[7],
303 303 )
304 304 return indexformatv0_pack(*e2)
305 305
306 306
307 307 # index ng:
308 308 # 6 bytes: offset
309 309 # 2 bytes: flags
310 310 # 4 bytes: compressed length
311 311 # 4 bytes: uncompressed length
312 312 # 4 bytes: base rev
313 313 # 4 bytes: link rev
314 314 # 4 bytes: parent 1 rev
315 315 # 4 bytes: parent 2 rev
316 316 # 32 bytes: nodeid
317 317 indexformatng = struct.Struct(b">Qiiiiii20s12x")
318 318 indexformatng_pack = indexformatng.pack
319 319 versionformat = struct.Struct(b">I")
320 320 versionformat_pack = versionformat.pack
321 321 versionformat_unpack = versionformat.unpack
322 322
323 323 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
324 324 # signed integer)
325 325 _maxentrysize = 0x7FFFFFFF
326 326
327 327
328 328 class revlogio(object):
329 329 def __init__(self):
330 330 self.size = indexformatng.size
331 331
332 332 def parseindex(self, data, inline):
333 333 # call the C implementation to parse the index data
334 334 index, cache = parsers.parse_index2(data, inline)
335 335 return index, cache
336 336
337 337 def packentry(self, entry, node, version, rev):
338 338 p = indexformatng_pack(*entry)
339 339 if rev == 0:
340 340 p = versionformat_pack(version) + p[4:]
341 341 return p
342 342
343 343
344 344 class revlog(object):
345 345 """
346 346 the underlying revision storage object
347 347
348 348 A revlog consists of two parts, an index and the revision data.
349 349
350 350 The index is a file with a fixed record size containing
351 351 information on each revision, including its nodeid (hash), the
352 352 nodeids of its parents, the position and offset of its data within
353 353 the data file, and the revision it's based on. Finally, each entry
354 354 contains a linkrev entry that can serve as a pointer to external
355 355 data.
356 356
357 357 The revision data itself is a linear collection of data chunks.
358 358 Each chunk represents a revision and is usually represented as a
359 359 delta against the previous chunk. To bound lookup time, runs of
360 360 deltas are limited to about 2 times the length of the original
361 361 version data. This makes retrieval of a version proportional to
362 362 its size, or O(1) relative to the number of revisions.
363 363
364 364 Both pieces of the revlog are written to in an append-only
365 365 fashion, which means we never need to rewrite a file to insert or
366 366 remove data, and can use some simple techniques to avoid the need
367 367 for locking while reading.
368 368
369 369 If checkambig, indexfile is opened with checkambig=True at
370 370 writing, to avoid file stat ambiguity.
371 371
372 372 If mmaplargeindex is True, and an mmapindexthreshold is set, the
373 373 index will be mmapped rather than read if it is larger than the
374 374 configured threshold.
375 375
376 376 If censorable is True, the revlog can have censored revisions.
377 377
378 378 If `upperboundcomp` is not None, this is the expected maximal gain from
379 379 compression for the data content.
380 380 """
381 381
382 382 _flagserrorclass = error.RevlogError
383 383
384 384 def __init__(
385 385 self,
386 386 opener,
387 387 indexfile,
388 388 datafile=None,
389 389 checkambig=False,
390 390 mmaplargeindex=False,
391 391 censorable=False,
392 392 upperboundcomp=None,
393 393 ):
394 394 """
395 395 create a revlog object
396 396
397 397 opener is a function that abstracts the file opening operation
398 398 and can be used to implement COW semantics or the like.
399 399
400 400 """
401 401 self.upperboundcomp = upperboundcomp
402 402 self.indexfile = indexfile
403 403 self.datafile = datafile or (indexfile[:-2] + b".d")
404 404 self.opener = opener
405 405 # When True, indexfile is opened with checkambig=True at writing, to
406 406 # avoid file stat ambiguity.
407 407 self._checkambig = checkambig
408 408 self._mmaplargeindex = mmaplargeindex
409 409 self._censorable = censorable
410 410 # 3-tuple of (node, rev, text) for a raw revision.
411 411 self._revisioncache = None
412 412 # Maps rev to chain base rev.
413 413 self._chainbasecache = util.lrucachedict(100)
414 414 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
415 415 self._chunkcache = (0, b'')
416 416 # How much data to read and cache into the raw revlog data cache.
417 417 self._chunkcachesize = 65536
418 418 self._maxchainlen = None
419 419 self._deltabothparents = True
420 420 self.index = None
421 421 # Mapping of partial identifiers to full nodes.
422 422 self._pcache = {}
423 423 # Mapping of revision integer to full node.
424 424 self._compengine = b'zlib'
425 425 self._compengineopts = {}
426 426 self._maxdeltachainspan = -1
427 427 self._withsparseread = False
428 428 self._sparserevlog = False
429 429 self._srdensitythreshold = 0.50
430 430 self._srmingapsize = 262144
431 431
432 432 # Make copy of flag processors so each revlog instance can support
433 433 # custom flags.
434 434 self._flagprocessors = dict(flagutil.flagprocessors)
435 435
436 436 # 2-tuple of file handles being used for active writing.
437 437 self._writinghandles = None
438 438
439 439 self._loadindex()
440 440
441 441 def _loadindex(self):
442 442 mmapindexthreshold = None
443 443 opts = self.opener.options
444 444
445 445 if b'revlogv2' in opts:
446 446 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
447 447 elif b'revlogv1' in opts:
448 448 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
449 449 if b'generaldelta' in opts:
450 450 newversionflags |= FLAG_GENERALDELTA
451 451 elif b'revlogv0' in self.opener.options:
452 452 newversionflags = REVLOGV0
453 453 else:
454 454 newversionflags = REVLOG_DEFAULT_VERSION
455 455
456 456 if b'chunkcachesize' in opts:
457 457 self._chunkcachesize = opts[b'chunkcachesize']
458 458 if b'maxchainlen' in opts:
459 459 self._maxchainlen = opts[b'maxchainlen']
460 460 if b'deltabothparents' in opts:
461 461 self._deltabothparents = opts[b'deltabothparents']
462 462 self._lazydelta = bool(opts.get(b'lazydelta', True))
463 463 self._lazydeltabase = False
464 464 if self._lazydelta:
465 465 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
466 466 if b'compengine' in opts:
467 467 self._compengine = opts[b'compengine']
468 468 if b'zlib.level' in opts:
469 469 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
470 470 if b'zstd.level' in opts:
471 471 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
472 472 if b'maxdeltachainspan' in opts:
473 473 self._maxdeltachainspan = opts[b'maxdeltachainspan']
474 474 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
475 475 mmapindexthreshold = opts[b'mmapindexthreshold']
476 476 self.hassidedata = bool(opts.get(b'side-data', False))
477 477 if self.hassidedata:
478 478 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
479 479 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
480 480 withsparseread = bool(opts.get(b'with-sparse-read', False))
481 481 # sparse-revlog forces sparse-read
482 482 self._withsparseread = self._sparserevlog or withsparseread
483 483 if b'sparse-read-density-threshold' in opts:
484 484 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
485 485 if b'sparse-read-min-gap-size' in opts:
486 486 self._srmingapsize = opts[b'sparse-read-min-gap-size']
487 487 if opts.get(b'enableellipsis'):
488 488 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
489 489
490 490 # revlog v0 doesn't have flag processors
491 491 for flag, processor in pycompat.iteritems(
492 492 opts.get(b'flagprocessors', {})
493 493 ):
494 494 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
495 495
496 496 if self._chunkcachesize <= 0:
497 497 raise error.RevlogError(
498 498 _(b'revlog chunk cache size %r is not greater than 0')
499 499 % self._chunkcachesize
500 500 )
501 501 elif self._chunkcachesize & (self._chunkcachesize - 1):
502 502 raise error.RevlogError(
503 503 _(b'revlog chunk cache size %r is not a power of 2')
504 504 % self._chunkcachesize
505 505 )
506 506
507 507 indexdata = b''
508 508 self._initempty = True
509 509 try:
510 510 with self._indexfp() as f:
511 511 if (
512 512 mmapindexthreshold is not None
513 513 and self.opener.fstat(f).st_size >= mmapindexthreshold
514 514 ):
515 515 # TODO: should .close() to release resources without
516 516 # relying on Python GC
517 517 indexdata = util.buffer(util.mmapread(f))
518 518 else:
519 519 indexdata = f.read()
520 520 if len(indexdata) > 0:
521 521 versionflags = versionformat_unpack(indexdata[:4])[0]
522 522 self._initempty = False
523 523 else:
524 524 versionflags = newversionflags
525 525 except IOError as inst:
526 526 if inst.errno != errno.ENOENT:
527 527 raise
528 528
529 529 versionflags = newversionflags
530 530
531 531 self.version = versionflags
532 532
533 533 flags = versionflags & ~0xFFFF
534 534 fmt = versionflags & 0xFFFF
535 535
536 536 if fmt == REVLOGV0:
537 537 if flags:
538 538 raise error.RevlogError(
539 539 _(b'unknown flags (%#04x) in version %d revlog %s')
540 540 % (flags >> 16, fmt, self.indexfile)
541 541 )
542 542
543 543 self._inline = False
544 544 self._generaldelta = False
545 545
546 546 elif fmt == REVLOGV1:
547 547 if flags & ~REVLOGV1_FLAGS:
548 548 raise error.RevlogError(
549 549 _(b'unknown flags (%#04x) in version %d revlog %s')
550 550 % (flags >> 16, fmt, self.indexfile)
551 551 )
552 552
553 553 self._inline = versionflags & FLAG_INLINE_DATA
554 554 self._generaldelta = versionflags & FLAG_GENERALDELTA
555 555
556 556 elif fmt == REVLOGV2:
557 557 if flags & ~REVLOGV2_FLAGS:
558 558 raise error.RevlogError(
559 559 _(b'unknown flags (%#04x) in version %d revlog %s')
560 560 % (flags >> 16, fmt, self.indexfile)
561 561 )
562 562
563 563 self._inline = versionflags & FLAG_INLINE_DATA
564 564 # generaldelta implied by version 2 revlogs.
565 565 self._generaldelta = True
566 566
567 567 else:
568 568 raise error.RevlogError(
569 569 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
570 570 )
571 571 # sparse-revlog can't be on without general-delta (issue6056)
572 572 if not self._generaldelta:
573 573 self._sparserevlog = False
574 574
575 575 self._storedeltachains = True
576 576
577 577 self._io = revlogio()
578 578 if self.version == REVLOGV0:
579 579 self._io = revlogoldio()
580 580 try:
581 581 d = self._io.parseindex(indexdata, self._inline)
582 582 except (ValueError, IndexError):
583 583 raise error.RevlogError(
584 584 _(b"index %s is corrupted") % self.indexfile
585 585 )
586 586 self.index, self._chunkcache = d
587 587 if not self._chunkcache:
588 588 self._chunkclear()
589 589 # revnum -> (chain-length, sum-delta-length)
590 590 self._chaininfocache = {}
591 591 # revlog header -> revlog compressor
592 592 self._decompressors = {}
593 593
594 594 @util.propertycache
595 595 def _compressor(self):
596 596 engine = util.compengines[self._compengine]
597 597 return engine.revlogcompressor(self._compengineopts)
598 598
599 599 def _indexfp(self, mode=b'r'):
600 600 """file object for the revlog's index file"""
601 601 args = {'mode': mode}
602 602 if mode != b'r':
603 603 args['checkambig'] = self._checkambig
604 604 if mode == b'w':
605 605 args['atomictemp'] = True
606 606 return self.opener(self.indexfile, **args)
607 607
608 608 def _datafp(self, mode=b'r'):
609 609 """file object for the revlog's data file"""
610 610 return self.opener(self.datafile, mode=mode)
611 611
612 612 @contextlib.contextmanager
613 613 def _datareadfp(self, existingfp=None):
614 614 """file object suitable to read data"""
615 615 # Use explicit file handle, if given.
616 616 if existingfp is not None:
617 617 yield existingfp
618 618
619 619 # Use a file handle being actively used for writes, if available.
620 620 # There is some danger to doing this because reads will seek the
621 621 # file. However, _writeentry() performs a SEEK_END before all writes,
622 622 # so we should be safe.
623 623 elif self._writinghandles:
624 624 if self._inline:
625 625 yield self._writinghandles[0]
626 626 else:
627 627 yield self._writinghandles[1]
628 628
629 629 # Otherwise open a new file handle.
630 630 else:
631 631 if self._inline:
632 632 func = self._indexfp
633 633 else:
634 634 func = self._datafp
635 635 with func() as fp:
636 636 yield fp
637 637
638 638 def tiprev(self):
639 639 return len(self.index) - 1
640 640
641 641 def tip(self):
642 642 return self.node(self.tiprev())
643 643
644 644 def __contains__(self, rev):
645 645 return 0 <= rev < len(self)
646 646
647 647 def __len__(self):
648 648 return len(self.index)
649 649
650 650 def __iter__(self):
651 651 return iter(pycompat.xrange(len(self)))
652 652
653 653 def revs(self, start=0, stop=None):
654 654 """iterate over all rev in this revlog (from start to stop)"""
655 655 return storageutil.iterrevs(len(self), start=start, stop=stop)
656 656
657 657 @property
658 658 def nodemap(self):
659 659 msg = (
660 660 b"revlog.nodemap is deprecated, "
661 661 b"use revlog.index.[has_node|rev|get_rev]"
662 662 )
663 663 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
664 664 return self.index.nodemap
665 665
666 666 @property
667 667 def _nodecache(self):
668 668 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
669 669 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
670 670 return self.index.nodemap
671 671
672 672 def hasnode(self, node):
673 673 try:
674 674 self.rev(node)
675 675 return True
676 676 except KeyError:
677 677 return False
678 678
679 679 def candelta(self, baserev, rev):
680 680 """whether two revisions (baserev, rev) can be delta-ed or not"""
681 681 # Disable delta if either rev requires a content-changing flag
682 682 # processor (ex. LFS). This is because such flag processor can alter
683 683 # the rawtext content that the delta will be based on, and two clients
684 684 # could have a same revlog node with different flags (i.e. different
685 685 # rawtext contents) and the delta could be incompatible.
686 686 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
687 687 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
688 688 ):
689 689 return False
690 690 return True
691 691
692 692 def clearcaches(self):
693 693 self._revisioncache = None
694 694 self._chainbasecache.clear()
695 695 self._chunkcache = (0, b'')
696 696 self._pcache = {}
697 697 self.index.clearcaches()
698 698
699 699 def rev(self, node):
700 700 try:
701 701 return self.index.rev(node)
702 702 except TypeError:
703 703 raise
704 704 except error.RevlogError:
705 705 # parsers.c radix tree lookup failed
706 706 if node == wdirid or node in wdirfilenodeids:
707 707 raise error.WdirUnsupported
708 708 raise error.LookupError(node, self.indexfile, _(b'no node'))
709 709
710 710 # Accessors for index entries.
711 711
712 712 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
713 713 # are flags.
714 714 def start(self, rev):
715 715 return int(self.index[rev][0] >> 16)
716 716
717 717 def flags(self, rev):
718 718 return self.index[rev][0] & 0xFFFF
719 719
720 720 def length(self, rev):
721 721 return self.index[rev][1]
722 722
723 723 def rawsize(self, rev):
724 724 """return the length of the uncompressed text for a given revision"""
725 725 l = self.index[rev][2]
726 726 if l >= 0:
727 727 return l
728 728
729 729 t = self.rawdata(rev)
730 730 return len(t)
731 731
732 732 def size(self, rev):
733 733 """length of non-raw text (processed by a "read" flag processor)"""
734 734 # fast path: if no "read" flag processor could change the content,
735 735 # size is rawsize. note: ELLIPSIS is known to not change the content.
736 736 flags = self.flags(rev)
737 737 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
738 738 return self.rawsize(rev)
739 739
740 740 return len(self.revision(rev, raw=False))
741 741
742 742 def chainbase(self, rev):
743 743 base = self._chainbasecache.get(rev)
744 744 if base is not None:
745 745 return base
746 746
747 747 index = self.index
748 748 iterrev = rev
749 749 base = index[iterrev][3]
750 750 while base != iterrev:
751 751 iterrev = base
752 752 base = index[iterrev][3]
753 753
754 754 self._chainbasecache[rev] = base
755 755 return base
756 756
757 757 def linkrev(self, rev):
758 758 return self.index[rev][4]
759 759
760 760 def parentrevs(self, rev):
761 761 try:
762 762 entry = self.index[rev]
763 763 except IndexError:
764 764 if rev == wdirrev:
765 765 raise error.WdirUnsupported
766 766 raise
767 767
768 768 return entry[5], entry[6]
769 769
770 770 # fast parentrevs(rev) where rev isn't filtered
771 771 _uncheckedparentrevs = parentrevs
772 772
773 773 def node(self, rev):
774 774 try:
775 775 return self.index[rev][7]
776 776 except IndexError:
777 777 if rev == wdirrev:
778 778 raise error.WdirUnsupported
779 779 raise
780 780
781 781 # Derived from index values.
782 782
783 783 def end(self, rev):
784 784 return self.start(rev) + self.length(rev)
785 785
786 786 def parents(self, node):
787 787 i = self.index
788 788 d = i[self.rev(node)]
789 789 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
790 790
791 791 def chainlen(self, rev):
792 792 return self._chaininfo(rev)[0]
793 793
794 794 def _chaininfo(self, rev):
795 795 chaininfocache = self._chaininfocache
796 796 if rev in chaininfocache:
797 797 return chaininfocache[rev]
798 798 index = self.index
799 799 generaldelta = self._generaldelta
800 800 iterrev = rev
801 801 e = index[iterrev]
802 802 clen = 0
803 803 compresseddeltalen = 0
804 804 while iterrev != e[3]:
805 805 clen += 1
806 806 compresseddeltalen += e[1]
807 807 if generaldelta:
808 808 iterrev = e[3]
809 809 else:
810 810 iterrev -= 1
811 811 if iterrev in chaininfocache:
812 812 t = chaininfocache[iterrev]
813 813 clen += t[0]
814 814 compresseddeltalen += t[1]
815 815 break
816 816 e = index[iterrev]
817 817 else:
818 818 # Add text length of base since decompressing that also takes
819 819 # work. For cache hits the length is already included.
820 820 compresseddeltalen += e[1]
821 821 r = (clen, compresseddeltalen)
822 822 chaininfocache[rev] = r
823 823 return r
824 824
825 825 def _deltachain(self, rev, stoprev=None):
826 826 """Obtain the delta chain for a revision.
827 827
828 828 ``stoprev`` specifies a revision to stop at. If not specified, we
829 829 stop at the base of the chain.
830 830
831 831 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
832 832 revs in ascending order and ``stopped`` is a bool indicating whether
833 833 ``stoprev`` was hit.
834 834 """
835 835 # Try C implementation.
836 836 try:
837 837 return self.index.deltachain(rev, stoprev, self._generaldelta)
838 838 except AttributeError:
839 839 pass
840 840
841 841 chain = []
842 842
843 843 # Alias to prevent attribute lookup in tight loop.
844 844 index = self.index
845 845 generaldelta = self._generaldelta
846 846
847 847 iterrev = rev
848 848 e = index[iterrev]
849 849 while iterrev != e[3] and iterrev != stoprev:
850 850 chain.append(iterrev)
851 851 if generaldelta:
852 852 iterrev = e[3]
853 853 else:
854 854 iterrev -= 1
855 855 e = index[iterrev]
856 856
857 857 if iterrev == stoprev:
858 858 stopped = True
859 859 else:
860 860 chain.append(iterrev)
861 861 stopped = False
862 862
863 863 chain.reverse()
864 864 return chain, stopped
865 865
866 866 def ancestors(self, revs, stoprev=0, inclusive=False):
867 867 """Generate the ancestors of 'revs' in reverse revision order.
868 868 Does not generate revs lower than stoprev.
869 869
870 870 See the documentation for ancestor.lazyancestors for more details."""
871 871
872 872 # first, make sure start revisions aren't filtered
873 873 revs = list(revs)
874 874 checkrev = self.node
875 875 for r in revs:
876 876 checkrev(r)
877 877 # and we're sure ancestors aren't filtered as well
878 878
879 879 if rustancestor is not None:
880 880 lazyancestors = rustancestor.LazyAncestors
881 881 arg = self.index
882 882 elif util.safehasattr(parsers, b'rustlazyancestors'):
883 883 lazyancestors = ancestor.rustlazyancestors
884 884 arg = self.index
885 885 else:
886 886 lazyancestors = ancestor.lazyancestors
887 887 arg = self._uncheckedparentrevs
888 888 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
889 889
890 890 def descendants(self, revs):
891 891 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
892 892
893 893 def findcommonmissing(self, common=None, heads=None):
894 894 """Return a tuple of the ancestors of common and the ancestors of heads
895 895 that are not ancestors of common. In revset terminology, we return the
896 896 tuple:
897 897
898 898 ::common, (::heads) - (::common)
899 899
900 900 The list is sorted by revision number, meaning it is
901 901 topologically sorted.
902 902
903 903 'heads' and 'common' are both lists of node IDs. If heads is
904 904 not supplied, uses all of the revlog's heads. If common is not
905 905 supplied, uses nullid."""
906 906 if common is None:
907 907 common = [nullid]
908 908 if heads is None:
909 909 heads = self.heads()
910 910
911 911 common = [self.rev(n) for n in common]
912 912 heads = [self.rev(n) for n in heads]
913 913
914 914 # we want the ancestors, but inclusive
915 915 class lazyset(object):
916 916 def __init__(self, lazyvalues):
917 917 self.addedvalues = set()
918 918 self.lazyvalues = lazyvalues
919 919
920 920 def __contains__(self, value):
921 921 return value in self.addedvalues or value in self.lazyvalues
922 922
923 923 def __iter__(self):
924 924 added = self.addedvalues
925 925 for r in added:
926 926 yield r
927 927 for r in self.lazyvalues:
928 928 if not r in added:
929 929 yield r
930 930
931 931 def add(self, value):
932 932 self.addedvalues.add(value)
933 933
934 934 def update(self, values):
935 935 self.addedvalues.update(values)
936 936
937 937 has = lazyset(self.ancestors(common))
938 938 has.add(nullrev)
939 939 has.update(common)
940 940
941 941 # take all ancestors from heads that aren't in has
942 942 missing = set()
943 943 visit = collections.deque(r for r in heads if r not in has)
944 944 while visit:
945 945 r = visit.popleft()
946 946 if r in missing:
947 947 continue
948 948 else:
949 949 missing.add(r)
950 950 for p in self.parentrevs(r):
951 951 if p not in has:
952 952 visit.append(p)
953 953 missing = list(missing)
954 954 missing.sort()
955 955 return has, [self.node(miss) for miss in missing]
956 956
957 957 def incrementalmissingrevs(self, common=None):
958 958 """Return an object that can be used to incrementally compute the
959 959 revision numbers of the ancestors of arbitrary sets that are not
960 960 ancestors of common. This is an ancestor.incrementalmissingancestors
961 961 object.
962 962
963 963 'common' is a list of revision numbers. If common is not supplied, uses
964 964 nullrev.
965 965 """
966 966 if common is None:
967 967 common = [nullrev]
968 968
969 969 if rustancestor is not None:
970 970 return rustancestor.MissingAncestors(self.index, common)
971 971 return ancestor.incrementalmissingancestors(self.parentrevs, common)
972 972
973 973 def findmissingrevs(self, common=None, heads=None):
974 974 """Return the revision numbers of the ancestors of heads that
975 975 are not ancestors of common.
976 976
977 977 More specifically, return a list of revision numbers corresponding to
978 978 nodes N such that every N satisfies the following constraints:
979 979
980 980 1. N is an ancestor of some node in 'heads'
981 981 2. N is not an ancestor of any node in 'common'
982 982
983 983 The list is sorted by revision number, meaning it is
984 984 topologically sorted.
985 985
986 986 'heads' and 'common' are both lists of revision numbers. If heads is
987 987 not supplied, uses all of the revlog's heads. If common is not
988 988 supplied, uses nullid."""
989 989 if common is None:
990 990 common = [nullrev]
991 991 if heads is None:
992 992 heads = self.headrevs()
993 993
994 994 inc = self.incrementalmissingrevs(common=common)
995 995 return inc.missingancestors(heads)
996 996
997 997 def findmissing(self, common=None, heads=None):
998 998 """Return the ancestors of heads that are not ancestors of common.
999 999
1000 1000 More specifically, return a list of nodes N such that every N
1001 1001 satisfies the following constraints:
1002 1002
1003 1003 1. N is an ancestor of some node in 'heads'
1004 1004 2. N is not an ancestor of any node in 'common'
1005 1005
1006 1006 The list is sorted by revision number, meaning it is
1007 1007 topologically sorted.
1008 1008
1009 1009 'heads' and 'common' are both lists of node IDs. If heads is
1010 1010 not supplied, uses all of the revlog's heads. If common is not
1011 1011 supplied, uses nullid."""
1012 1012 if common is None:
1013 1013 common = [nullid]
1014 1014 if heads is None:
1015 1015 heads = self.heads()
1016 1016
1017 1017 common = [self.rev(n) for n in common]
1018 1018 heads = [self.rev(n) for n in heads]
1019 1019
1020 1020 inc = self.incrementalmissingrevs(common=common)
1021 1021 return [self.node(r) for r in inc.missingancestors(heads)]
1022 1022
1023 1023 def nodesbetween(self, roots=None, heads=None):
1024 1024 """Return a topological path from 'roots' to 'heads'.
1025 1025
1026 1026 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1027 1027 topologically sorted list of all nodes N that satisfy both of
1028 1028 these constraints:
1029 1029
1030 1030 1. N is a descendant of some node in 'roots'
1031 1031 2. N is an ancestor of some node in 'heads'
1032 1032
1033 1033 Every node is considered to be both a descendant and an ancestor
1034 1034 of itself, so every reachable node in 'roots' and 'heads' will be
1035 1035 included in 'nodes'.
1036 1036
1037 1037 'outroots' is the list of reachable nodes in 'roots', i.e., the
1038 1038 subset of 'roots' that is returned in 'nodes'. Likewise,
1039 1039 'outheads' is the subset of 'heads' that is also in 'nodes'.
1040 1040
1041 1041 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1042 1042 unspecified, uses nullid as the only root. If 'heads' is
1043 1043 unspecified, uses list of all of the revlog's heads."""
1044 1044 nonodes = ([], [], [])
1045 1045 if roots is not None:
1046 1046 roots = list(roots)
1047 1047 if not roots:
1048 1048 return nonodes
1049 1049 lowestrev = min([self.rev(n) for n in roots])
1050 1050 else:
1051 1051 roots = [nullid] # Everybody's a descendant of nullid
1052 1052 lowestrev = nullrev
1053 1053 if (lowestrev == nullrev) and (heads is None):
1054 1054 # We want _all_ the nodes!
1055 1055 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1056 1056 if heads is None:
1057 1057 # All nodes are ancestors, so the latest ancestor is the last
1058 1058 # node.
1059 1059 highestrev = len(self) - 1
1060 1060 # Set ancestors to None to signal that every node is an ancestor.
1061 1061 ancestors = None
1062 1062 # Set heads to an empty dictionary for later discovery of heads
1063 1063 heads = {}
1064 1064 else:
1065 1065 heads = list(heads)
1066 1066 if not heads:
1067 1067 return nonodes
1068 1068 ancestors = set()
1069 1069 # Turn heads into a dictionary so we can remove 'fake' heads.
1070 1070 # Also, later we will be using it to filter out the heads we can't
1071 1071 # find from roots.
1072 1072 heads = dict.fromkeys(heads, False)
1073 1073 # Start at the top and keep marking parents until we're done.
1074 1074 nodestotag = set(heads)
1075 1075 # Remember where the top was so we can use it as a limit later.
1076 1076 highestrev = max([self.rev(n) for n in nodestotag])
1077 1077 while nodestotag:
1078 1078 # grab a node to tag
1079 1079 n = nodestotag.pop()
1080 1080 # Never tag nullid
1081 1081 if n == nullid:
1082 1082 continue
1083 1083 # A node's revision number represents its place in a
1084 1084 # topologically sorted list of nodes.
1085 1085 r = self.rev(n)
1086 1086 if r >= lowestrev:
1087 1087 if n not in ancestors:
1088 1088 # If we are possibly a descendant of one of the roots
1089 1089 # and we haven't already been marked as an ancestor
1090 1090 ancestors.add(n) # Mark as ancestor
1091 1091 # Add non-nullid parents to list of nodes to tag.
1092 1092 nodestotag.update(
1093 1093 [p for p in self.parents(n) if p != nullid]
1094 1094 )
1095 1095 elif n in heads: # We've seen it before, is it a fake head?
1096 1096 # So it is, real heads should not be the ancestors of
1097 1097 # any other heads.
1098 1098 heads.pop(n)
1099 1099 if not ancestors:
1100 1100 return nonodes
1101 1101 # Now that we have our set of ancestors, we want to remove any
1102 1102 # roots that are not ancestors.
1103 1103
1104 1104 # If one of the roots was nullid, everything is included anyway.
1105 1105 if lowestrev > nullrev:
1106 1106 # But, since we weren't, let's recompute the lowest rev to not
1107 1107 # include roots that aren't ancestors.
1108 1108
1109 1109 # Filter out roots that aren't ancestors of heads
1110 1110 roots = [root for root in roots if root in ancestors]
1111 1111 # Recompute the lowest revision
1112 1112 if roots:
1113 1113 lowestrev = min([self.rev(root) for root in roots])
1114 1114 else:
1115 1115 # No more roots? Return empty list
1116 1116 return nonodes
1117 1117 else:
1118 1118 # We are descending from nullid, and don't need to care about
1119 1119 # any other roots.
1120 1120 lowestrev = nullrev
1121 1121 roots = [nullid]
1122 1122 # Transform our roots list into a set.
1123 1123 descendants = set(roots)
1124 1124 # Also, keep the original roots so we can filter out roots that aren't
1125 1125 # 'real' roots (i.e. are descended from other roots).
1126 1126 roots = descendants.copy()
1127 1127 # Our topologically sorted list of output nodes.
1128 1128 orderedout = []
1129 1129 # Don't start at nullid since we don't want nullid in our output list,
1130 1130 # and if nullid shows up in descendants, empty parents will look like
1131 1131 # they're descendants.
1132 1132 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1133 1133 n = self.node(r)
1134 1134 isdescendant = False
1135 1135 if lowestrev == nullrev: # Everybody is a descendant of nullid
1136 1136 isdescendant = True
1137 1137 elif n in descendants:
1138 1138 # n is already a descendant
1139 1139 isdescendant = True
1140 1140 # This check only needs to be done here because all the roots
1141 1141 # will start being marked is descendants before the loop.
1142 1142 if n in roots:
1143 1143 # If n was a root, check if it's a 'real' root.
1144 1144 p = tuple(self.parents(n))
1145 1145 # If any of its parents are descendants, it's not a root.
1146 1146 if (p[0] in descendants) or (p[1] in descendants):
1147 1147 roots.remove(n)
1148 1148 else:
1149 1149 p = tuple(self.parents(n))
1150 1150 # A node is a descendant if either of its parents are
1151 1151 # descendants. (We seeded the dependents list with the roots
1152 1152 # up there, remember?)
1153 1153 if (p[0] in descendants) or (p[1] in descendants):
1154 1154 descendants.add(n)
1155 1155 isdescendant = True
1156 1156 if isdescendant and ((ancestors is None) or (n in ancestors)):
1157 1157 # Only include nodes that are both descendants and ancestors.
1158 1158 orderedout.append(n)
1159 1159 if (ancestors is not None) and (n in heads):
1160 1160 # We're trying to figure out which heads are reachable
1161 1161 # from roots.
1162 1162 # Mark this head as having been reached
1163 1163 heads[n] = True
1164 1164 elif ancestors is None:
1165 1165 # Otherwise, we're trying to discover the heads.
1166 1166 # Assume this is a head because if it isn't, the next step
1167 1167 # will eventually remove it.
1168 1168 heads[n] = True
1169 1169 # But, obviously its parents aren't.
1170 1170 for p in self.parents(n):
1171 1171 heads.pop(p, None)
1172 1172 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1173 1173 roots = list(roots)
1174 1174 assert orderedout
1175 1175 assert roots
1176 1176 assert heads
1177 1177 return (orderedout, roots, heads)
1178 1178
1179 1179 def headrevs(self, revs=None):
1180 1180 if revs is None:
1181 1181 try:
1182 1182 return self.index.headrevs()
1183 1183 except AttributeError:
1184 1184 return self._headrevs()
1185 1185 if rustdagop is not None:
1186 1186 return rustdagop.headrevs(self.index, revs)
1187 1187 return dagop.headrevs(revs, self._uncheckedparentrevs)
1188 1188
1189 1189 def computephases(self, roots):
1190 1190 return self.index.computephasesmapsets(roots)
1191 1191
1192 1192 def _headrevs(self):
1193 1193 count = len(self)
1194 1194 if not count:
1195 1195 return [nullrev]
1196 1196 # we won't iter over filtered rev so nobody is a head at start
1197 1197 ishead = [0] * (count + 1)
1198 1198 index = self.index
1199 1199 for r in self:
1200 1200 ishead[r] = 1 # I may be an head
1201 1201 e = index[r]
1202 1202 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1203 1203 return [r for r, val in enumerate(ishead) if val]
1204 1204
1205 1205 def heads(self, start=None, stop=None):
1206 1206 """return the list of all nodes that have no children
1207 1207
1208 1208 if start is specified, only heads that are descendants of
1209 1209 start will be returned
1210 1210 if stop is specified, it will consider all the revs from stop
1211 1211 as if they had no children
1212 1212 """
1213 1213 if start is None and stop is None:
1214 1214 if not len(self):
1215 1215 return [nullid]
1216 1216 return [self.node(r) for r in self.headrevs()]
1217 1217
1218 1218 if start is None:
1219 1219 start = nullrev
1220 1220 else:
1221 1221 start = self.rev(start)
1222 1222
1223 1223 stoprevs = set(self.rev(n) for n in stop or [])
1224 1224
1225 1225 revs = dagop.headrevssubset(
1226 1226 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1227 1227 )
1228 1228
1229 1229 return [self.node(rev) for rev in revs]
1230 1230
1231 1231 def children(self, node):
1232 1232 """find the children of a given node"""
1233 1233 c = []
1234 1234 p = self.rev(node)
1235 1235 for r in self.revs(start=p + 1):
1236 1236 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1237 1237 if prevs:
1238 1238 for pr in prevs:
1239 1239 if pr == p:
1240 1240 c.append(self.node(r))
1241 1241 elif p == nullrev:
1242 1242 c.append(self.node(r))
1243 1243 return c
1244 1244
1245 1245 def commonancestorsheads(self, a, b):
1246 1246 """calculate all the heads of the common ancestors of nodes a and b"""
1247 1247 a, b = self.rev(a), self.rev(b)
1248 1248 ancs = self._commonancestorsheads(a, b)
1249 1249 return pycompat.maplist(self.node, ancs)
1250 1250
1251 1251 def _commonancestorsheads(self, *revs):
1252 1252 """calculate all the heads of the common ancestors of revs"""
1253 1253 try:
1254 1254 ancs = self.index.commonancestorsheads(*revs)
1255 1255 except (AttributeError, OverflowError): # C implementation failed
1256 1256 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1257 1257 return ancs
1258 1258
1259 1259 def isancestor(self, a, b):
1260 1260 """return True if node a is an ancestor of node b
1261 1261
1262 1262 A revision is considered an ancestor of itself."""
1263 1263 a, b = self.rev(a), self.rev(b)
1264 1264 return self.isancestorrev(a, b)
1265 1265
1266 1266 def isancestorrev(self, a, b):
1267 1267 """return True if revision a is an ancestor of revision b
1268 1268
1269 1269 A revision is considered an ancestor of itself.
1270 1270
1271 1271 The implementation of this is trivial but the use of
1272 1272 reachableroots is not."""
1273 1273 if a == nullrev:
1274 1274 return True
1275 1275 elif a == b:
1276 1276 return True
1277 1277 elif a > b:
1278 1278 return False
1279 1279 return bool(self.reachableroots(a, [b], [a], includepath=False))
1280 1280
1281 1281 def reachableroots(self, minroot, heads, roots, includepath=False):
1282 """return (heads(::<roots> and <roots>::<heads>))
1282 """return (heads(::(<roots> and <roots>::<heads>)))
1283 1283
1284 1284 If includepath is True, return (<roots>::<heads>)."""
1285 1285 try:
1286 1286 return self.index.reachableroots2(
1287 1287 minroot, heads, roots, includepath
1288 1288 )
1289 1289 except AttributeError:
1290 1290 return dagop._reachablerootspure(
1291 1291 self.parentrevs, minroot, roots, heads, includepath
1292 1292 )
1293 1293
1294 1294 def ancestor(self, a, b):
1295 1295 """calculate the "best" common ancestor of nodes a and b"""
1296 1296
1297 1297 a, b = self.rev(a), self.rev(b)
1298 1298 try:
1299 1299 ancs = self.index.ancestors(a, b)
1300 1300 except (AttributeError, OverflowError):
1301 1301 ancs = ancestor.ancestors(self.parentrevs, a, b)
1302 1302 if ancs:
1303 1303 # choose a consistent winner when there's a tie
1304 1304 return min(map(self.node, ancs))
1305 1305 return nullid
1306 1306
1307 1307 def _match(self, id):
1308 1308 if isinstance(id, int):
1309 1309 # rev
1310 1310 return self.node(id)
1311 1311 if len(id) == 20:
1312 1312 # possibly a binary node
1313 1313 # odds of a binary node being all hex in ASCII are 1 in 10**25
1314 1314 try:
1315 1315 node = id
1316 1316 self.rev(node) # quick search the index
1317 1317 return node
1318 1318 except error.LookupError:
1319 1319 pass # may be partial hex id
1320 1320 try:
1321 1321 # str(rev)
1322 1322 rev = int(id)
1323 1323 if b"%d" % rev != id:
1324 1324 raise ValueError
1325 1325 if rev < 0:
1326 1326 rev = len(self) + rev
1327 1327 if rev < 0 or rev >= len(self):
1328 1328 raise ValueError
1329 1329 return self.node(rev)
1330 1330 except (ValueError, OverflowError):
1331 1331 pass
1332 1332 if len(id) == 40:
1333 1333 try:
1334 1334 # a full hex nodeid?
1335 1335 node = bin(id)
1336 1336 self.rev(node)
1337 1337 return node
1338 1338 except (TypeError, error.LookupError):
1339 1339 pass
1340 1340
1341 1341 def _partialmatch(self, id):
1342 1342 # we don't care wdirfilenodeids as they should be always full hash
1343 1343 maybewdir = wdirhex.startswith(id)
1344 1344 try:
1345 1345 partial = self.index.partialmatch(id)
1346 1346 if partial and self.hasnode(partial):
1347 1347 if maybewdir:
1348 1348 # single 'ff...' match in radix tree, ambiguous with wdir
1349 1349 raise error.RevlogError
1350 1350 return partial
1351 1351 if maybewdir:
1352 1352 # no 'ff...' match in radix tree, wdir identified
1353 1353 raise error.WdirUnsupported
1354 1354 return None
1355 1355 except error.RevlogError:
1356 1356 # parsers.c radix tree lookup gave multiple matches
1357 1357 # fast path: for unfiltered changelog, radix tree is accurate
1358 1358 if not getattr(self, 'filteredrevs', None):
1359 1359 raise error.AmbiguousPrefixLookupError(
1360 1360 id, self.indexfile, _(b'ambiguous identifier')
1361 1361 )
1362 1362 # fall through to slow path that filters hidden revisions
1363 1363 except (AttributeError, ValueError):
1364 1364 # we are pure python, or key was too short to search radix tree
1365 1365 pass
1366 1366
1367 1367 if id in self._pcache:
1368 1368 return self._pcache[id]
1369 1369
1370 1370 if len(id) <= 40:
1371 1371 try:
1372 1372 # hex(node)[:...]
1373 1373 l = len(id) // 2 # grab an even number of digits
1374 1374 prefix = bin(id[: l * 2])
1375 1375 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1376 1376 nl = [
1377 1377 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1378 1378 ]
1379 1379 if nullhex.startswith(id):
1380 1380 nl.append(nullid)
1381 1381 if len(nl) > 0:
1382 1382 if len(nl) == 1 and not maybewdir:
1383 1383 self._pcache[id] = nl[0]
1384 1384 return nl[0]
1385 1385 raise error.AmbiguousPrefixLookupError(
1386 1386 id, self.indexfile, _(b'ambiguous identifier')
1387 1387 )
1388 1388 if maybewdir:
1389 1389 raise error.WdirUnsupported
1390 1390 return None
1391 1391 except TypeError:
1392 1392 pass
1393 1393
1394 1394 def lookup(self, id):
1395 1395 """locate a node based on:
1396 1396 - revision number or str(revision number)
1397 1397 - nodeid or subset of hex nodeid
1398 1398 """
1399 1399 n = self._match(id)
1400 1400 if n is not None:
1401 1401 return n
1402 1402 n = self._partialmatch(id)
1403 1403 if n:
1404 1404 return n
1405 1405
1406 1406 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1407 1407
1408 1408 def shortest(self, node, minlength=1):
1409 1409 """Find the shortest unambiguous prefix that matches node."""
1410 1410
1411 1411 def isvalid(prefix):
1412 1412 try:
1413 1413 matchednode = self._partialmatch(prefix)
1414 1414 except error.AmbiguousPrefixLookupError:
1415 1415 return False
1416 1416 except error.WdirUnsupported:
1417 1417 # single 'ff...' match
1418 1418 return True
1419 1419 if matchednode is None:
1420 1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1421 1421 return True
1422 1422
1423 1423 def maybewdir(prefix):
1424 1424 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1425 1425
1426 1426 hexnode = hex(node)
1427 1427
1428 1428 def disambiguate(hexnode, minlength):
1429 1429 """Disambiguate against wdirid."""
1430 1430 for length in range(minlength, 41):
1431 1431 prefix = hexnode[:length]
1432 1432 if not maybewdir(prefix):
1433 1433 return prefix
1434 1434
1435 1435 if not getattr(self, 'filteredrevs', None):
1436 1436 try:
1437 1437 length = max(self.index.shortest(node), minlength)
1438 1438 return disambiguate(hexnode, length)
1439 1439 except error.RevlogError:
1440 1440 if node != wdirid:
1441 1441 raise error.LookupError(node, self.indexfile, _(b'no node'))
1442 1442 except AttributeError:
1443 1443 # Fall through to pure code
1444 1444 pass
1445 1445
1446 1446 if node == wdirid:
1447 1447 for length in range(minlength, 41):
1448 1448 prefix = hexnode[:length]
1449 1449 if isvalid(prefix):
1450 1450 return prefix
1451 1451
1452 1452 for length in range(minlength, 41):
1453 1453 prefix = hexnode[:length]
1454 1454 if isvalid(prefix):
1455 1455 return disambiguate(hexnode, length)
1456 1456
1457 1457 def cmp(self, node, text):
1458 1458 """compare text with a given file revision
1459 1459
1460 1460 returns True if text is different than what is stored.
1461 1461 """
1462 1462 p1, p2 = self.parents(node)
1463 1463 return storageutil.hashrevisionsha1(text, p1, p2) != node
1464 1464
1465 1465 def _cachesegment(self, offset, data):
1466 1466 """Add a segment to the revlog cache.
1467 1467
1468 1468 Accepts an absolute offset and the data that is at that location.
1469 1469 """
1470 1470 o, d = self._chunkcache
1471 1471 # try to add to existing cache
1472 1472 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1473 1473 self._chunkcache = o, d + data
1474 1474 else:
1475 1475 self._chunkcache = offset, data
1476 1476
1477 1477 def _readsegment(self, offset, length, df=None):
1478 1478 """Load a segment of raw data from the revlog.
1479 1479
1480 1480 Accepts an absolute offset, length to read, and an optional existing
1481 1481 file handle to read from.
1482 1482
1483 1483 If an existing file handle is passed, it will be seeked and the
1484 1484 original seek position will NOT be restored.
1485 1485
1486 1486 Returns a str or buffer of raw byte data.
1487 1487
1488 1488 Raises if the requested number of bytes could not be read.
1489 1489 """
1490 1490 # Cache data both forward and backward around the requested
1491 1491 # data, in a fixed size window. This helps speed up operations
1492 1492 # involving reading the revlog backwards.
1493 1493 cachesize = self._chunkcachesize
1494 1494 realoffset = offset & ~(cachesize - 1)
1495 1495 reallength = (
1496 1496 (offset + length + cachesize) & ~(cachesize - 1)
1497 1497 ) - realoffset
1498 1498 with self._datareadfp(df) as df:
1499 1499 df.seek(realoffset)
1500 1500 d = df.read(reallength)
1501 1501
1502 1502 self._cachesegment(realoffset, d)
1503 1503 if offset != realoffset or reallength != length:
1504 1504 startoffset = offset - realoffset
1505 1505 if len(d) - startoffset < length:
1506 1506 raise error.RevlogError(
1507 1507 _(
1508 1508 b'partial read of revlog %s; expected %d bytes from '
1509 1509 b'offset %d, got %d'
1510 1510 )
1511 1511 % (
1512 1512 self.indexfile if self._inline else self.datafile,
1513 1513 length,
1514 1514 realoffset,
1515 1515 len(d) - startoffset,
1516 1516 )
1517 1517 )
1518 1518
1519 1519 return util.buffer(d, startoffset, length)
1520 1520
1521 1521 if len(d) < length:
1522 1522 raise error.RevlogError(
1523 1523 _(
1524 1524 b'partial read of revlog %s; expected %d bytes from offset '
1525 1525 b'%d, got %d'
1526 1526 )
1527 1527 % (
1528 1528 self.indexfile if self._inline else self.datafile,
1529 1529 length,
1530 1530 offset,
1531 1531 len(d),
1532 1532 )
1533 1533 )
1534 1534
1535 1535 return d
1536 1536
1537 1537 def _getsegment(self, offset, length, df=None):
1538 1538 """Obtain a segment of raw data from the revlog.
1539 1539
1540 1540 Accepts an absolute offset, length of bytes to obtain, and an
1541 1541 optional file handle to the already-opened revlog. If the file
1542 1542 handle is used, it's original seek position will not be preserved.
1543 1543
1544 1544 Requests for data may be returned from a cache.
1545 1545
1546 1546 Returns a str or a buffer instance of raw byte data.
1547 1547 """
1548 1548 o, d = self._chunkcache
1549 1549 l = len(d)
1550 1550
1551 1551 # is it in the cache?
1552 1552 cachestart = offset - o
1553 1553 cacheend = cachestart + length
1554 1554 if cachestart >= 0 and cacheend <= l:
1555 1555 if cachestart == 0 and cacheend == l:
1556 1556 return d # avoid a copy
1557 1557 return util.buffer(d, cachestart, cacheend - cachestart)
1558 1558
1559 1559 return self._readsegment(offset, length, df=df)
1560 1560
1561 1561 def _getsegmentforrevs(self, startrev, endrev, df=None):
1562 1562 """Obtain a segment of raw data corresponding to a range of revisions.
1563 1563
1564 1564 Accepts the start and end revisions and an optional already-open
1565 1565 file handle to be used for reading. If the file handle is read, its
1566 1566 seek position will not be preserved.
1567 1567
1568 1568 Requests for data may be satisfied by a cache.
1569 1569
1570 1570 Returns a 2-tuple of (offset, data) for the requested range of
1571 1571 revisions. Offset is the integer offset from the beginning of the
1572 1572 revlog and data is a str or buffer of the raw byte data.
1573 1573
1574 1574 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1575 1575 to determine where each revision's data begins and ends.
1576 1576 """
1577 1577 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1578 1578 # (functions are expensive).
1579 1579 index = self.index
1580 1580 istart = index[startrev]
1581 1581 start = int(istart[0] >> 16)
1582 1582 if startrev == endrev:
1583 1583 end = start + istart[1]
1584 1584 else:
1585 1585 iend = index[endrev]
1586 1586 end = int(iend[0] >> 16) + iend[1]
1587 1587
1588 1588 if self._inline:
1589 1589 start += (startrev + 1) * self._io.size
1590 1590 end += (endrev + 1) * self._io.size
1591 1591 length = end - start
1592 1592
1593 1593 return start, self._getsegment(start, length, df=df)
1594 1594
1595 1595 def _chunk(self, rev, df=None):
1596 1596 """Obtain a single decompressed chunk for a revision.
1597 1597
1598 1598 Accepts an integer revision and an optional already-open file handle
1599 1599 to be used for reading. If used, the seek position of the file will not
1600 1600 be preserved.
1601 1601
1602 1602 Returns a str holding uncompressed data for the requested revision.
1603 1603 """
1604 1604 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1605 1605
1606 1606 def _chunks(self, revs, df=None, targetsize=None):
1607 1607 """Obtain decompressed chunks for the specified revisions.
1608 1608
1609 1609 Accepts an iterable of numeric revisions that are assumed to be in
1610 1610 ascending order. Also accepts an optional already-open file handle
1611 1611 to be used for reading. If used, the seek position of the file will
1612 1612 not be preserved.
1613 1613
1614 1614 This function is similar to calling ``self._chunk()`` multiple times,
1615 1615 but is faster.
1616 1616
1617 1617 Returns a list with decompressed data for each requested revision.
1618 1618 """
1619 1619 if not revs:
1620 1620 return []
1621 1621 start = self.start
1622 1622 length = self.length
1623 1623 inline = self._inline
1624 1624 iosize = self._io.size
1625 1625 buffer = util.buffer
1626 1626
1627 1627 l = []
1628 1628 ladd = l.append
1629 1629
1630 1630 if not self._withsparseread:
1631 1631 slicedchunks = (revs,)
1632 1632 else:
1633 1633 slicedchunks = deltautil.slicechunk(
1634 1634 self, revs, targetsize=targetsize
1635 1635 )
1636 1636
1637 1637 for revschunk in slicedchunks:
1638 1638 firstrev = revschunk[0]
1639 1639 # Skip trailing revisions with empty diff
1640 1640 for lastrev in revschunk[::-1]:
1641 1641 if length(lastrev) != 0:
1642 1642 break
1643 1643
1644 1644 try:
1645 1645 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1646 1646 except OverflowError:
1647 1647 # issue4215 - we can't cache a run of chunks greater than
1648 1648 # 2G on Windows
1649 1649 return [self._chunk(rev, df=df) for rev in revschunk]
1650 1650
1651 1651 decomp = self.decompress
1652 1652 for rev in revschunk:
1653 1653 chunkstart = start(rev)
1654 1654 if inline:
1655 1655 chunkstart += (rev + 1) * iosize
1656 1656 chunklength = length(rev)
1657 1657 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1658 1658
1659 1659 return l
1660 1660
1661 1661 def _chunkclear(self):
1662 1662 """Clear the raw chunk cache."""
1663 1663 self._chunkcache = (0, b'')
1664 1664
1665 1665 def deltaparent(self, rev):
1666 1666 """return deltaparent of the given revision"""
1667 1667 base = self.index[rev][3]
1668 1668 if base == rev:
1669 1669 return nullrev
1670 1670 elif self._generaldelta:
1671 1671 return base
1672 1672 else:
1673 1673 return rev - 1
1674 1674
1675 1675 def issnapshot(self, rev):
1676 1676 """tells whether rev is a snapshot
1677 1677 """
1678 1678 if not self._sparserevlog:
1679 1679 return self.deltaparent(rev) == nullrev
1680 1680 elif util.safehasattr(self.index, b'issnapshot'):
1681 1681 # directly assign the method to cache the testing and access
1682 1682 self.issnapshot = self.index.issnapshot
1683 1683 return self.issnapshot(rev)
1684 1684 if rev == nullrev:
1685 1685 return True
1686 1686 entry = self.index[rev]
1687 1687 base = entry[3]
1688 1688 if base == rev:
1689 1689 return True
1690 1690 if base == nullrev:
1691 1691 return True
1692 1692 p1 = entry[5]
1693 1693 p2 = entry[6]
1694 1694 if base == p1 or base == p2:
1695 1695 return False
1696 1696 return self.issnapshot(base)
1697 1697
1698 1698 def snapshotdepth(self, rev):
1699 1699 """number of snapshot in the chain before this one"""
1700 1700 if not self.issnapshot(rev):
1701 1701 raise error.ProgrammingError(b'revision %d not a snapshot')
1702 1702 return len(self._deltachain(rev)[0]) - 1
1703 1703
1704 1704 def revdiff(self, rev1, rev2):
1705 1705 """return or calculate a delta between two revisions
1706 1706
1707 1707 The delta calculated is in binary form and is intended to be written to
1708 1708 revlog data directly. So this function needs raw revision data.
1709 1709 """
1710 1710 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1711 1711 return bytes(self._chunk(rev2))
1712 1712
1713 1713 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1714 1714
1715 1715 def _processflags(self, text, flags, operation, raw=False):
1716 1716 """deprecated entry point to access flag processors"""
1717 1717 msg = b'_processflag(...) use the specialized variant'
1718 1718 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1719 1719 if raw:
1720 1720 return text, flagutil.processflagsraw(self, text, flags)
1721 1721 elif operation == b'read':
1722 1722 return flagutil.processflagsread(self, text, flags)
1723 1723 else: # write operation
1724 1724 return flagutil.processflagswrite(self, text, flags)
1725 1725
1726 1726 def revision(self, nodeorrev, _df=None, raw=False):
1727 1727 """return an uncompressed revision of a given node or revision
1728 1728 number.
1729 1729
1730 1730 _df - an existing file handle to read from. (internal-only)
1731 1731 raw - an optional argument specifying if the revision data is to be
1732 1732 treated as raw data when applying flag transforms. 'raw' should be set
1733 1733 to True when generating changegroups or in debug commands.
1734 1734 """
1735 1735 if raw:
1736 1736 msg = (
1737 1737 b'revlog.revision(..., raw=True) is deprecated, '
1738 1738 b'use revlog.rawdata(...)'
1739 1739 )
1740 1740 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1741 1741 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1742 1742
1743 1743 def sidedata(self, nodeorrev, _df=None):
1744 1744 """a map of extra data related to the changeset but not part of the hash
1745 1745
1746 1746 This function currently return a dictionary. However, more advanced
1747 1747 mapping object will likely be used in the future for a more
1748 1748 efficient/lazy code.
1749 1749 """
1750 1750 return self._revisiondata(nodeorrev, _df)[1]
1751 1751
1752 1752 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1753 1753 # deal with <nodeorrev> argument type
1754 1754 if isinstance(nodeorrev, int):
1755 1755 rev = nodeorrev
1756 1756 node = self.node(rev)
1757 1757 else:
1758 1758 node = nodeorrev
1759 1759 rev = None
1760 1760
1761 1761 # fast path the special `nullid` rev
1762 1762 if node == nullid:
1763 1763 return b"", {}
1764 1764
1765 1765 # The text as stored inside the revlog. Might be the revision or might
1766 1766 # need to be processed to retrieve the revision.
1767 1767 rawtext = None
1768 1768
1769 1769 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1770 1770
1771 1771 if raw and validated:
1772 1772 # if we don't want to process the raw text and that raw
1773 1773 # text is cached, we can exit early.
1774 1774 return rawtext, {}
1775 1775 if rev is None:
1776 1776 rev = self.rev(node)
1777 1777 # the revlog's flag for this revision
1778 1778 # (usually alter its state or content)
1779 1779 flags = self.flags(rev)
1780 1780
1781 1781 if validated and flags == REVIDX_DEFAULT_FLAGS:
1782 1782 # no extra flags set, no flag processor runs, text = rawtext
1783 1783 return rawtext, {}
1784 1784
1785 1785 sidedata = {}
1786 1786 if raw:
1787 1787 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1788 1788 text = rawtext
1789 1789 else:
1790 1790 try:
1791 1791 r = flagutil.processflagsread(self, rawtext, flags)
1792 1792 except error.SidedataHashError as exc:
1793 1793 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1794 1794 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1795 1795 raise error.RevlogError(msg)
1796 1796 text, validatehash, sidedata = r
1797 1797 if validatehash:
1798 1798 self.checkhash(text, node, rev=rev)
1799 1799 if not validated:
1800 1800 self._revisioncache = (node, rev, rawtext)
1801 1801
1802 1802 return text, sidedata
1803 1803
1804 1804 def _rawtext(self, node, rev, _df=None):
1805 1805 """return the possibly unvalidated rawtext for a revision
1806 1806
1807 1807 returns (rev, rawtext, validated)
1808 1808 """
1809 1809
1810 1810 # revision in the cache (could be useful to apply delta)
1811 1811 cachedrev = None
1812 1812 # An intermediate text to apply deltas to
1813 1813 basetext = None
1814 1814
1815 1815 # Check if we have the entry in cache
1816 1816 # The cache entry looks like (node, rev, rawtext)
1817 1817 if self._revisioncache:
1818 1818 if self._revisioncache[0] == node:
1819 1819 return (rev, self._revisioncache[2], True)
1820 1820 cachedrev = self._revisioncache[1]
1821 1821
1822 1822 if rev is None:
1823 1823 rev = self.rev(node)
1824 1824
1825 1825 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1826 1826 if stopped:
1827 1827 basetext = self._revisioncache[2]
1828 1828
1829 1829 # drop cache to save memory, the caller is expected to
1830 1830 # update self._revisioncache after validating the text
1831 1831 self._revisioncache = None
1832 1832
1833 1833 targetsize = None
1834 1834 rawsize = self.index[rev][2]
1835 1835 if 0 <= rawsize:
1836 1836 targetsize = 4 * rawsize
1837 1837
1838 1838 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1839 1839 if basetext is None:
1840 1840 basetext = bytes(bins[0])
1841 1841 bins = bins[1:]
1842 1842
1843 1843 rawtext = mdiff.patches(basetext, bins)
1844 1844 del basetext # let us have a chance to free memory early
1845 1845 return (rev, rawtext, False)
1846 1846
1847 1847 def rawdata(self, nodeorrev, _df=None):
1848 1848 """return an uncompressed raw data of a given node or revision number.
1849 1849
1850 1850 _df - an existing file handle to read from. (internal-only)
1851 1851 """
1852 1852 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1853 1853
1854 1854 def hash(self, text, p1, p2):
1855 1855 """Compute a node hash.
1856 1856
1857 1857 Available as a function so that subclasses can replace the hash
1858 1858 as needed.
1859 1859 """
1860 1860 return storageutil.hashrevisionsha1(text, p1, p2)
1861 1861
1862 1862 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1863 1863 """Check node hash integrity.
1864 1864
1865 1865 Available as a function so that subclasses can extend hash mismatch
1866 1866 behaviors as needed.
1867 1867 """
1868 1868 try:
1869 1869 if p1 is None and p2 is None:
1870 1870 p1, p2 = self.parents(node)
1871 1871 if node != self.hash(text, p1, p2):
1872 1872 # Clear the revision cache on hash failure. The revision cache
1873 1873 # only stores the raw revision and clearing the cache does have
1874 1874 # the side-effect that we won't have a cache hit when the raw
1875 1875 # revision data is accessed. But this case should be rare and
1876 1876 # it is extra work to teach the cache about the hash
1877 1877 # verification state.
1878 1878 if self._revisioncache and self._revisioncache[0] == node:
1879 1879 self._revisioncache = None
1880 1880
1881 1881 revornode = rev
1882 1882 if revornode is None:
1883 1883 revornode = templatefilters.short(hex(node))
1884 1884 raise error.RevlogError(
1885 1885 _(b"integrity check failed on %s:%s")
1886 1886 % (self.indexfile, pycompat.bytestr(revornode))
1887 1887 )
1888 1888 except error.RevlogError:
1889 1889 if self._censorable and storageutil.iscensoredtext(text):
1890 1890 raise error.CensoredNodeError(self.indexfile, node, text)
1891 1891 raise
1892 1892
1893 1893 def _enforceinlinesize(self, tr, fp=None):
1894 1894 """Check if the revlog is too big for inline and convert if so.
1895 1895
1896 1896 This should be called after revisions are added to the revlog. If the
1897 1897 revlog has grown too large to be an inline revlog, it will convert it
1898 1898 to use multiple index and data files.
1899 1899 """
1900 1900 tiprev = len(self) - 1
1901 1901 if (
1902 1902 not self._inline
1903 1903 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1904 1904 ):
1905 1905 return
1906 1906
1907 1907 trinfo = tr.find(self.indexfile)
1908 1908 if trinfo is None:
1909 1909 raise error.RevlogError(
1910 1910 _(b"%s not found in the transaction") % self.indexfile
1911 1911 )
1912 1912
1913 1913 trindex = trinfo[2]
1914 1914 if trindex is not None:
1915 1915 dataoff = self.start(trindex)
1916 1916 else:
1917 1917 # revlog was stripped at start of transaction, use all leftover data
1918 1918 trindex = len(self) - 1
1919 1919 dataoff = self.end(tiprev)
1920 1920
1921 1921 tr.add(self.datafile, dataoff)
1922 1922
1923 1923 if fp:
1924 1924 fp.flush()
1925 1925 fp.close()
1926 1926 # We can't use the cached file handle after close(). So prevent
1927 1927 # its usage.
1928 1928 self._writinghandles = None
1929 1929
1930 1930 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1931 1931 for r in self:
1932 1932 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1933 1933
1934 1934 with self._indexfp(b'w') as fp:
1935 1935 self.version &= ~FLAG_INLINE_DATA
1936 1936 self._inline = False
1937 1937 io = self._io
1938 1938 for i in self:
1939 1939 e = io.packentry(self.index[i], self.node, self.version, i)
1940 1940 fp.write(e)
1941 1941
1942 1942 # the temp file replace the real index when we exit the context
1943 1943 # manager
1944 1944
1945 1945 tr.replace(self.indexfile, trindex * self._io.size)
1946 1946 self._chunkclear()
1947 1947
1948 1948 def _nodeduplicatecallback(self, transaction, node):
1949 1949 """called when trying to add a node already stored.
1950 1950 """
1951 1951
1952 1952 def addrevision(
1953 1953 self,
1954 1954 text,
1955 1955 transaction,
1956 1956 link,
1957 1957 p1,
1958 1958 p2,
1959 1959 cachedelta=None,
1960 1960 node=None,
1961 1961 flags=REVIDX_DEFAULT_FLAGS,
1962 1962 deltacomputer=None,
1963 1963 sidedata=None,
1964 1964 ):
1965 1965 """add a revision to the log
1966 1966
1967 1967 text - the revision data to add
1968 1968 transaction - the transaction object used for rollback
1969 1969 link - the linkrev data to add
1970 1970 p1, p2 - the parent nodeids of the revision
1971 1971 cachedelta - an optional precomputed delta
1972 1972 node - nodeid of revision; typically node is not specified, and it is
1973 1973 computed by default as hash(text, p1, p2), however subclasses might
1974 1974 use different hashing method (and override checkhash() in such case)
1975 1975 flags - the known flags to set on the revision
1976 1976 deltacomputer - an optional deltacomputer instance shared between
1977 1977 multiple calls
1978 1978 """
1979 1979 if link == nullrev:
1980 1980 raise error.RevlogError(
1981 1981 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1982 1982 )
1983 1983
1984 1984 if sidedata is None:
1985 1985 sidedata = {}
1986 1986 flags = flags & ~REVIDX_SIDEDATA
1987 1987 elif not self.hassidedata:
1988 1988 raise error.ProgrammingError(
1989 1989 _(b"trying to add sidedata to a revlog who don't support them")
1990 1990 )
1991 1991 else:
1992 1992 flags |= REVIDX_SIDEDATA
1993 1993
1994 1994 if flags:
1995 1995 node = node or self.hash(text, p1, p2)
1996 1996
1997 1997 rawtext, validatehash = flagutil.processflagswrite(
1998 1998 self, text, flags, sidedata=sidedata
1999 1999 )
2000 2000
2001 2001 # If the flag processor modifies the revision data, ignore any provided
2002 2002 # cachedelta.
2003 2003 if rawtext != text:
2004 2004 cachedelta = None
2005 2005
2006 2006 if len(rawtext) > _maxentrysize:
2007 2007 raise error.RevlogError(
2008 2008 _(
2009 2009 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2010 2010 )
2011 2011 % (self.indexfile, len(rawtext))
2012 2012 )
2013 2013
2014 2014 node = node or self.hash(rawtext, p1, p2)
2015 2015 if self.index.has_node(node):
2016 2016 return node
2017 2017
2018 2018 if validatehash:
2019 2019 self.checkhash(rawtext, node, p1=p1, p2=p2)
2020 2020
2021 2021 return self.addrawrevision(
2022 2022 rawtext,
2023 2023 transaction,
2024 2024 link,
2025 2025 p1,
2026 2026 p2,
2027 2027 node,
2028 2028 flags,
2029 2029 cachedelta=cachedelta,
2030 2030 deltacomputer=deltacomputer,
2031 2031 )
2032 2032
2033 2033 def addrawrevision(
2034 2034 self,
2035 2035 rawtext,
2036 2036 transaction,
2037 2037 link,
2038 2038 p1,
2039 2039 p2,
2040 2040 node,
2041 2041 flags,
2042 2042 cachedelta=None,
2043 2043 deltacomputer=None,
2044 2044 ):
2045 2045 """add a raw revision with known flags, node and parents
2046 2046 useful when reusing a revision not stored in this revlog (ex: received
2047 2047 over wire, or read from an external bundle).
2048 2048 """
2049 2049 dfh = None
2050 2050 if not self._inline:
2051 2051 dfh = self._datafp(b"a+")
2052 2052 ifh = self._indexfp(b"a+")
2053 2053 try:
2054 2054 return self._addrevision(
2055 2055 node,
2056 2056 rawtext,
2057 2057 transaction,
2058 2058 link,
2059 2059 p1,
2060 2060 p2,
2061 2061 flags,
2062 2062 cachedelta,
2063 2063 ifh,
2064 2064 dfh,
2065 2065 deltacomputer=deltacomputer,
2066 2066 )
2067 2067 finally:
2068 2068 if dfh:
2069 2069 dfh.close()
2070 2070 ifh.close()
2071 2071
2072 2072 def compress(self, data):
2073 2073 """Generate a possibly-compressed representation of data."""
2074 2074 if not data:
2075 2075 return b'', data
2076 2076
2077 2077 compressed = self._compressor.compress(data)
2078 2078
2079 2079 if compressed:
2080 2080 # The revlog compressor added the header in the returned data.
2081 2081 return b'', compressed
2082 2082
2083 2083 if data[0:1] == b'\0':
2084 2084 return b'', data
2085 2085 return b'u', data
2086 2086
2087 2087 def decompress(self, data):
2088 2088 """Decompress a revlog chunk.
2089 2089
2090 2090 The chunk is expected to begin with a header identifying the
2091 2091 format type so it can be routed to an appropriate decompressor.
2092 2092 """
2093 2093 if not data:
2094 2094 return data
2095 2095
2096 2096 # Revlogs are read much more frequently than they are written and many
2097 2097 # chunks only take microseconds to decompress, so performance is
2098 2098 # important here.
2099 2099 #
2100 2100 # We can make a few assumptions about revlogs:
2101 2101 #
2102 2102 # 1) the majority of chunks will be compressed (as opposed to inline
2103 2103 # raw data).
2104 2104 # 2) decompressing *any* data will likely by at least 10x slower than
2105 2105 # returning raw inline data.
2106 2106 # 3) we want to prioritize common and officially supported compression
2107 2107 # engines
2108 2108 #
2109 2109 # It follows that we want to optimize for "decompress compressed data
2110 2110 # when encoded with common and officially supported compression engines"
2111 2111 # case over "raw data" and "data encoded by less common or non-official
2112 2112 # compression engines." That is why we have the inline lookup first
2113 2113 # followed by the compengines lookup.
2114 2114 #
2115 2115 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2116 2116 # compressed chunks. And this matters for changelog and manifest reads.
2117 2117 t = data[0:1]
2118 2118
2119 2119 if t == b'x':
2120 2120 try:
2121 2121 return _zlibdecompress(data)
2122 2122 except zlib.error as e:
2123 2123 raise error.RevlogError(
2124 2124 _(b'revlog decompress error: %s')
2125 2125 % stringutil.forcebytestr(e)
2126 2126 )
2127 2127 # '\0' is more common than 'u' so it goes first.
2128 2128 elif t == b'\0':
2129 2129 return data
2130 2130 elif t == b'u':
2131 2131 return util.buffer(data, 1)
2132 2132
2133 2133 try:
2134 2134 compressor = self._decompressors[t]
2135 2135 except KeyError:
2136 2136 try:
2137 2137 engine = util.compengines.forrevlogheader(t)
2138 2138 compressor = engine.revlogcompressor(self._compengineopts)
2139 2139 self._decompressors[t] = compressor
2140 2140 except KeyError:
2141 2141 raise error.RevlogError(_(b'unknown compression type %r') % t)
2142 2142
2143 2143 return compressor.decompress(data)
2144 2144
2145 2145 def _addrevision(
2146 2146 self,
2147 2147 node,
2148 2148 rawtext,
2149 2149 transaction,
2150 2150 link,
2151 2151 p1,
2152 2152 p2,
2153 2153 flags,
2154 2154 cachedelta,
2155 2155 ifh,
2156 2156 dfh,
2157 2157 alwayscache=False,
2158 2158 deltacomputer=None,
2159 2159 ):
2160 2160 """internal function to add revisions to the log
2161 2161
2162 2162 see addrevision for argument descriptions.
2163 2163
2164 2164 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2165 2165
2166 2166 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2167 2167 be used.
2168 2168
2169 2169 invariants:
2170 2170 - rawtext is optional (can be None); if not set, cachedelta must be set.
2171 2171 if both are set, they must correspond to each other.
2172 2172 """
2173 2173 if node == nullid:
2174 2174 raise error.RevlogError(
2175 2175 _(b"%s: attempt to add null revision") % self.indexfile
2176 2176 )
2177 2177 if node == wdirid or node in wdirfilenodeids:
2178 2178 raise error.RevlogError(
2179 2179 _(b"%s: attempt to add wdir revision") % self.indexfile
2180 2180 )
2181 2181
2182 2182 if self._inline:
2183 2183 fh = ifh
2184 2184 else:
2185 2185 fh = dfh
2186 2186
2187 2187 btext = [rawtext]
2188 2188
2189 2189 curr = len(self)
2190 2190 prev = curr - 1
2191 2191 offset = self.end(prev)
2192 2192 p1r, p2r = self.rev(p1), self.rev(p2)
2193 2193
2194 2194 # full versions are inserted when the needed deltas
2195 2195 # become comparable to the uncompressed text
2196 2196 if rawtext is None:
2197 2197 # need rawtext size, before changed by flag processors, which is
2198 2198 # the non-raw size. use revlog explicitly to avoid filelog's extra
2199 2199 # logic that might remove metadata size.
2200 2200 textlen = mdiff.patchedsize(
2201 2201 revlog.size(self, cachedelta[0]), cachedelta[1]
2202 2202 )
2203 2203 else:
2204 2204 textlen = len(rawtext)
2205 2205
2206 2206 if deltacomputer is None:
2207 2207 deltacomputer = deltautil.deltacomputer(self)
2208 2208
2209 2209 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2210 2210
2211 2211 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2212 2212
2213 2213 e = (
2214 2214 offset_type(offset, flags),
2215 2215 deltainfo.deltalen,
2216 2216 textlen,
2217 2217 deltainfo.base,
2218 2218 link,
2219 2219 p1r,
2220 2220 p2r,
2221 2221 node,
2222 2222 )
2223 2223 self.index.append(e)
2224 2224
2225 2225 entry = self._io.packentry(e, self.node, self.version, curr)
2226 2226 self._writeentry(
2227 2227 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2228 2228 )
2229 2229
2230 2230 rawtext = btext[0]
2231 2231
2232 2232 if alwayscache and rawtext is None:
2233 2233 rawtext = deltacomputer.buildtext(revinfo, fh)
2234 2234
2235 2235 if type(rawtext) == bytes: # only accept immutable objects
2236 2236 self._revisioncache = (node, curr, rawtext)
2237 2237 self._chainbasecache[curr] = deltainfo.chainbase
2238 2238 return node
2239 2239
2240 2240 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2241 2241 # Files opened in a+ mode have inconsistent behavior on various
2242 2242 # platforms. Windows requires that a file positioning call be made
2243 2243 # when the file handle transitions between reads and writes. See
2244 2244 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2245 2245 # platforms, Python or the platform itself can be buggy. Some versions
2246 2246 # of Solaris have been observed to not append at the end of the file
2247 2247 # if the file was seeked to before the end. See issue4943 for more.
2248 2248 #
2249 2249 # We work around this issue by inserting a seek() before writing.
2250 2250 # Note: This is likely not necessary on Python 3. However, because
2251 2251 # the file handle is reused for reads and may be seeked there, we need
2252 2252 # to be careful before changing this.
2253 2253 ifh.seek(0, os.SEEK_END)
2254 2254 if dfh:
2255 2255 dfh.seek(0, os.SEEK_END)
2256 2256
2257 2257 curr = len(self) - 1
2258 2258 if not self._inline:
2259 2259 transaction.add(self.datafile, offset)
2260 2260 transaction.add(self.indexfile, curr * len(entry))
2261 2261 if data[0]:
2262 2262 dfh.write(data[0])
2263 2263 dfh.write(data[1])
2264 2264 ifh.write(entry)
2265 2265 else:
2266 2266 offset += curr * self._io.size
2267 2267 transaction.add(self.indexfile, offset, curr)
2268 2268 ifh.write(entry)
2269 2269 ifh.write(data[0])
2270 2270 ifh.write(data[1])
2271 2271 self._enforceinlinesize(transaction, ifh)
2272 2272
2273 2273 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2274 2274 """
2275 2275 add a delta group
2276 2276
2277 2277 given a set of deltas, add them to the revision log. the
2278 2278 first delta is against its parent, which should be in our
2279 2279 log, the rest are against the previous delta.
2280 2280
2281 2281 If ``addrevisioncb`` is defined, it will be called with arguments of
2282 2282 this revlog and the node that was added.
2283 2283 """
2284 2284
2285 2285 if self._writinghandles:
2286 2286 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2287 2287
2288 2288 nodes = []
2289 2289
2290 2290 r = len(self)
2291 2291 end = 0
2292 2292 if r:
2293 2293 end = self.end(r - 1)
2294 2294 ifh = self._indexfp(b"a+")
2295 2295 isize = r * self._io.size
2296 2296 if self._inline:
2297 2297 transaction.add(self.indexfile, end + isize, r)
2298 2298 dfh = None
2299 2299 else:
2300 2300 transaction.add(self.indexfile, isize, r)
2301 2301 transaction.add(self.datafile, end)
2302 2302 dfh = self._datafp(b"a+")
2303 2303
2304 2304 def flush():
2305 2305 if dfh:
2306 2306 dfh.flush()
2307 2307 ifh.flush()
2308 2308
2309 2309 self._writinghandles = (ifh, dfh)
2310 2310
2311 2311 try:
2312 2312 deltacomputer = deltautil.deltacomputer(self)
2313 2313 # loop through our set of deltas
2314 2314 for data in deltas:
2315 2315 node, p1, p2, linknode, deltabase, delta, flags = data
2316 2316 link = linkmapper(linknode)
2317 2317 flags = flags or REVIDX_DEFAULT_FLAGS
2318 2318
2319 2319 nodes.append(node)
2320 2320
2321 2321 if self.index.has_node(node):
2322 2322 self._nodeduplicatecallback(transaction, node)
2323 2323 # this can happen if two branches make the same change
2324 2324 continue
2325 2325
2326 2326 for p in (p1, p2):
2327 2327 if not self.index.has_node(p):
2328 2328 raise error.LookupError(
2329 2329 p, self.indexfile, _(b'unknown parent')
2330 2330 )
2331 2331
2332 2332 if not self.index.has_node(deltabase):
2333 2333 raise error.LookupError(
2334 2334 deltabase, self.indexfile, _(b'unknown delta base')
2335 2335 )
2336 2336
2337 2337 baserev = self.rev(deltabase)
2338 2338
2339 2339 if baserev != nullrev and self.iscensored(baserev):
2340 2340 # if base is censored, delta must be full replacement in a
2341 2341 # single patch operation
2342 2342 hlen = struct.calcsize(b">lll")
2343 2343 oldlen = self.rawsize(baserev)
2344 2344 newlen = len(delta) - hlen
2345 2345 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2346 2346 raise error.CensoredBaseError(
2347 2347 self.indexfile, self.node(baserev)
2348 2348 )
2349 2349
2350 2350 if not flags and self._peek_iscensored(baserev, delta, flush):
2351 2351 flags |= REVIDX_ISCENSORED
2352 2352
2353 2353 # We assume consumers of addrevisioncb will want to retrieve
2354 2354 # the added revision, which will require a call to
2355 2355 # revision(). revision() will fast path if there is a cache
2356 2356 # hit. So, we tell _addrevision() to always cache in this case.
2357 2357 # We're only using addgroup() in the context of changegroup
2358 2358 # generation so the revision data can always be handled as raw
2359 2359 # by the flagprocessor.
2360 2360 self._addrevision(
2361 2361 node,
2362 2362 None,
2363 2363 transaction,
2364 2364 link,
2365 2365 p1,
2366 2366 p2,
2367 2367 flags,
2368 2368 (baserev, delta),
2369 2369 ifh,
2370 2370 dfh,
2371 2371 alwayscache=bool(addrevisioncb),
2372 2372 deltacomputer=deltacomputer,
2373 2373 )
2374 2374
2375 2375 if addrevisioncb:
2376 2376 addrevisioncb(self, node)
2377 2377
2378 2378 if not dfh and not self._inline:
2379 2379 # addrevision switched from inline to conventional
2380 2380 # reopen the index
2381 2381 ifh.close()
2382 2382 dfh = self._datafp(b"a+")
2383 2383 ifh = self._indexfp(b"a+")
2384 2384 self._writinghandles = (ifh, dfh)
2385 2385 finally:
2386 2386 self._writinghandles = None
2387 2387
2388 2388 if dfh:
2389 2389 dfh.close()
2390 2390 ifh.close()
2391 2391
2392 2392 return nodes
2393 2393
2394 2394 def iscensored(self, rev):
2395 2395 """Check if a file revision is censored."""
2396 2396 if not self._censorable:
2397 2397 return False
2398 2398
2399 2399 return self.flags(rev) & REVIDX_ISCENSORED
2400 2400
2401 2401 def _peek_iscensored(self, baserev, delta, flush):
2402 2402 """Quickly check if a delta produces a censored revision."""
2403 2403 if not self._censorable:
2404 2404 return False
2405 2405
2406 2406 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2407 2407
2408 2408 def getstrippoint(self, minlink):
2409 2409 """find the minimum rev that must be stripped to strip the linkrev
2410 2410
2411 2411 Returns a tuple containing the minimum rev and a set of all revs that
2412 2412 have linkrevs that will be broken by this strip.
2413 2413 """
2414 2414 return storageutil.resolvestripinfo(
2415 2415 minlink,
2416 2416 len(self) - 1,
2417 2417 self.headrevs(),
2418 2418 self.linkrev,
2419 2419 self.parentrevs,
2420 2420 )
2421 2421
2422 2422 def strip(self, minlink, transaction):
2423 2423 """truncate the revlog on the first revision with a linkrev >= minlink
2424 2424
2425 2425 This function is called when we're stripping revision minlink and
2426 2426 its descendants from the repository.
2427 2427
2428 2428 We have to remove all revisions with linkrev >= minlink, because
2429 2429 the equivalent changelog revisions will be renumbered after the
2430 2430 strip.
2431 2431
2432 2432 So we truncate the revlog on the first of these revisions, and
2433 2433 trust that the caller has saved the revisions that shouldn't be
2434 2434 removed and that it'll re-add them after this truncation.
2435 2435 """
2436 2436 if len(self) == 0:
2437 2437 return
2438 2438
2439 2439 rev, _ = self.getstrippoint(minlink)
2440 2440 if rev == len(self):
2441 2441 return
2442 2442
2443 2443 # first truncate the files on disk
2444 2444 end = self.start(rev)
2445 2445 if not self._inline:
2446 2446 transaction.add(self.datafile, end)
2447 2447 end = rev * self._io.size
2448 2448 else:
2449 2449 end += rev * self._io.size
2450 2450
2451 2451 transaction.add(self.indexfile, end)
2452 2452
2453 2453 # then reset internal state in memory to forget those revisions
2454 2454 self._revisioncache = None
2455 2455 self._chaininfocache = {}
2456 2456 self._chunkclear()
2457 2457
2458 2458 del self.index[rev:-1]
2459 2459
2460 2460 def checksize(self):
2461 2461 """Check size of index and data files
2462 2462
2463 2463 return a (dd, di) tuple.
2464 2464 - dd: extra bytes for the "data" file
2465 2465 - di: extra bytes for the "index" file
2466 2466
2467 2467 A healthy revlog will return (0, 0).
2468 2468 """
2469 2469 expected = 0
2470 2470 if len(self):
2471 2471 expected = max(0, self.end(len(self) - 1))
2472 2472
2473 2473 try:
2474 2474 with self._datafp() as f:
2475 2475 f.seek(0, io.SEEK_END)
2476 2476 actual = f.tell()
2477 2477 dd = actual - expected
2478 2478 except IOError as inst:
2479 2479 if inst.errno != errno.ENOENT:
2480 2480 raise
2481 2481 dd = 0
2482 2482
2483 2483 try:
2484 2484 f = self.opener(self.indexfile)
2485 2485 f.seek(0, io.SEEK_END)
2486 2486 actual = f.tell()
2487 2487 f.close()
2488 2488 s = self._io.size
2489 2489 i = max(0, actual // s)
2490 2490 di = actual - (i * s)
2491 2491 if self._inline:
2492 2492 databytes = 0
2493 2493 for r in self:
2494 2494 databytes += max(0, self.length(r))
2495 2495 dd = 0
2496 2496 di = actual - len(self) * s - databytes
2497 2497 except IOError as inst:
2498 2498 if inst.errno != errno.ENOENT:
2499 2499 raise
2500 2500 di = 0
2501 2501
2502 2502 return (dd, di)
2503 2503
2504 2504 def files(self):
2505 2505 res = [self.indexfile]
2506 2506 if not self._inline:
2507 2507 res.append(self.datafile)
2508 2508 return res
2509 2509
2510 2510 def emitrevisions(
2511 2511 self,
2512 2512 nodes,
2513 2513 nodesorder=None,
2514 2514 revisiondata=False,
2515 2515 assumehaveparentrevisions=False,
2516 2516 deltamode=repository.CG_DELTAMODE_STD,
2517 2517 ):
2518 2518 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2519 2519 raise error.ProgrammingError(
2520 2520 b'unhandled value for nodesorder: %s' % nodesorder
2521 2521 )
2522 2522
2523 2523 if nodesorder is None and not self._generaldelta:
2524 2524 nodesorder = b'storage'
2525 2525
2526 2526 if (
2527 2527 not self._storedeltachains
2528 2528 and deltamode != repository.CG_DELTAMODE_PREV
2529 2529 ):
2530 2530 deltamode = repository.CG_DELTAMODE_FULL
2531 2531
2532 2532 return storageutil.emitrevisions(
2533 2533 self,
2534 2534 nodes,
2535 2535 nodesorder,
2536 2536 revlogrevisiondelta,
2537 2537 deltaparentfn=self.deltaparent,
2538 2538 candeltafn=self.candelta,
2539 2539 rawsizefn=self.rawsize,
2540 2540 revdifffn=self.revdiff,
2541 2541 flagsfn=self.flags,
2542 2542 deltamode=deltamode,
2543 2543 revisiondata=revisiondata,
2544 2544 assumehaveparentrevisions=assumehaveparentrevisions,
2545 2545 )
2546 2546
2547 2547 DELTAREUSEALWAYS = b'always'
2548 2548 DELTAREUSESAMEREVS = b'samerevs'
2549 2549 DELTAREUSENEVER = b'never'
2550 2550
2551 2551 DELTAREUSEFULLADD = b'fulladd'
2552 2552
2553 2553 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2554 2554
2555 2555 def clone(
2556 2556 self,
2557 2557 tr,
2558 2558 destrevlog,
2559 2559 addrevisioncb=None,
2560 2560 deltareuse=DELTAREUSESAMEREVS,
2561 2561 forcedeltabothparents=None,
2562 2562 sidedatacompanion=None,
2563 2563 ):
2564 2564 """Copy this revlog to another, possibly with format changes.
2565 2565
2566 2566 The destination revlog will contain the same revisions and nodes.
2567 2567 However, it may not be bit-for-bit identical due to e.g. delta encoding
2568 2568 differences.
2569 2569
2570 2570 The ``deltareuse`` argument control how deltas from the existing revlog
2571 2571 are preserved in the destination revlog. The argument can have the
2572 2572 following values:
2573 2573
2574 2574 DELTAREUSEALWAYS
2575 2575 Deltas will always be reused (if possible), even if the destination
2576 2576 revlog would not select the same revisions for the delta. This is the
2577 2577 fastest mode of operation.
2578 2578 DELTAREUSESAMEREVS
2579 2579 Deltas will be reused if the destination revlog would pick the same
2580 2580 revisions for the delta. This mode strikes a balance between speed
2581 2581 and optimization.
2582 2582 DELTAREUSENEVER
2583 2583 Deltas will never be reused. This is the slowest mode of execution.
2584 2584 This mode can be used to recompute deltas (e.g. if the diff/delta
2585 2585 algorithm changes).
2586 2586 DELTAREUSEFULLADD
2587 2587 Revision will be re-added as if their were new content. This is
2588 2588 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2589 2589 eg: large file detection and handling.
2590 2590
2591 2591 Delta computation can be slow, so the choice of delta reuse policy can
2592 2592 significantly affect run time.
2593 2593
2594 2594 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2595 2595 two extremes. Deltas will be reused if they are appropriate. But if the
2596 2596 delta could choose a better revision, it will do so. This means if you
2597 2597 are converting a non-generaldelta revlog to a generaldelta revlog,
2598 2598 deltas will be recomputed if the delta's parent isn't a parent of the
2599 2599 revision.
2600 2600
2601 2601 In addition to the delta policy, the ``forcedeltabothparents``
2602 2602 argument controls whether to force compute deltas against both parents
2603 2603 for merges. By default, the current default is used.
2604 2604
2605 2605 If not None, the `sidedatacompanion` is callable that accept two
2606 2606 arguments:
2607 2607
2608 2608 (srcrevlog, rev)
2609 2609
2610 2610 and return a triplet that control changes to sidedata content from the
2611 2611 old revision to the new clone result:
2612 2612
2613 2613 (dropall, filterout, update)
2614 2614
2615 2615 * if `dropall` is True, all sidedata should be dropped
2616 2616 * `filterout` is a set of sidedata keys that should be dropped
2617 2617 * `update` is a mapping of additionnal/new key -> value
2618 2618 """
2619 2619 if deltareuse not in self.DELTAREUSEALL:
2620 2620 raise ValueError(
2621 2621 _(b'value for deltareuse invalid: %s') % deltareuse
2622 2622 )
2623 2623
2624 2624 if len(destrevlog):
2625 2625 raise ValueError(_(b'destination revlog is not empty'))
2626 2626
2627 2627 if getattr(self, 'filteredrevs', None):
2628 2628 raise ValueError(_(b'source revlog has filtered revisions'))
2629 2629 if getattr(destrevlog, 'filteredrevs', None):
2630 2630 raise ValueError(_(b'destination revlog has filtered revisions'))
2631 2631
2632 2632 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2633 2633 # if possible.
2634 2634 oldlazydelta = destrevlog._lazydelta
2635 2635 oldlazydeltabase = destrevlog._lazydeltabase
2636 2636 oldamd = destrevlog._deltabothparents
2637 2637
2638 2638 try:
2639 2639 if deltareuse == self.DELTAREUSEALWAYS:
2640 2640 destrevlog._lazydeltabase = True
2641 2641 destrevlog._lazydelta = True
2642 2642 elif deltareuse == self.DELTAREUSESAMEREVS:
2643 2643 destrevlog._lazydeltabase = False
2644 2644 destrevlog._lazydelta = True
2645 2645 elif deltareuse == self.DELTAREUSENEVER:
2646 2646 destrevlog._lazydeltabase = False
2647 2647 destrevlog._lazydelta = False
2648 2648
2649 2649 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2650 2650
2651 2651 self._clone(
2652 2652 tr,
2653 2653 destrevlog,
2654 2654 addrevisioncb,
2655 2655 deltareuse,
2656 2656 forcedeltabothparents,
2657 2657 sidedatacompanion,
2658 2658 )
2659 2659
2660 2660 finally:
2661 2661 destrevlog._lazydelta = oldlazydelta
2662 2662 destrevlog._lazydeltabase = oldlazydeltabase
2663 2663 destrevlog._deltabothparents = oldamd
2664 2664
2665 2665 def _clone(
2666 2666 self,
2667 2667 tr,
2668 2668 destrevlog,
2669 2669 addrevisioncb,
2670 2670 deltareuse,
2671 2671 forcedeltabothparents,
2672 2672 sidedatacompanion,
2673 2673 ):
2674 2674 """perform the core duty of `revlog.clone` after parameter processing"""
2675 2675 deltacomputer = deltautil.deltacomputer(destrevlog)
2676 2676 index = self.index
2677 2677 for rev in self:
2678 2678 entry = index[rev]
2679 2679
2680 2680 # Some classes override linkrev to take filtered revs into
2681 2681 # account. Use raw entry from index.
2682 2682 flags = entry[0] & 0xFFFF
2683 2683 linkrev = entry[4]
2684 2684 p1 = index[entry[5]][7]
2685 2685 p2 = index[entry[6]][7]
2686 2686 node = entry[7]
2687 2687
2688 2688 sidedataactions = (False, [], {})
2689 2689 if sidedatacompanion is not None:
2690 2690 sidedataactions = sidedatacompanion(self, rev)
2691 2691
2692 2692 # (Possibly) reuse the delta from the revlog if allowed and
2693 2693 # the revlog chunk is a delta.
2694 2694 cachedelta = None
2695 2695 rawtext = None
2696 2696 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2697 2697 dropall, filterout, update = sidedataactions
2698 2698 text, sidedata = self._revisiondata(rev)
2699 2699 if dropall:
2700 2700 sidedata = {}
2701 2701 for key in filterout:
2702 2702 sidedata.pop(key, None)
2703 2703 sidedata.update(update)
2704 2704 if not sidedata:
2705 2705 sidedata = None
2706 2706 destrevlog.addrevision(
2707 2707 text,
2708 2708 tr,
2709 2709 linkrev,
2710 2710 p1,
2711 2711 p2,
2712 2712 cachedelta=cachedelta,
2713 2713 node=node,
2714 2714 flags=flags,
2715 2715 deltacomputer=deltacomputer,
2716 2716 sidedata=sidedata,
2717 2717 )
2718 2718 else:
2719 2719 if destrevlog._lazydelta:
2720 2720 dp = self.deltaparent(rev)
2721 2721 if dp != nullrev:
2722 2722 cachedelta = (dp, bytes(self._chunk(rev)))
2723 2723
2724 2724 if not cachedelta:
2725 2725 rawtext = self.rawdata(rev)
2726 2726
2727 2727 ifh = destrevlog.opener(
2728 2728 destrevlog.indexfile, b'a+', checkambig=False
2729 2729 )
2730 2730 dfh = None
2731 2731 if not destrevlog._inline:
2732 2732 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2733 2733 try:
2734 2734 destrevlog._addrevision(
2735 2735 node,
2736 2736 rawtext,
2737 2737 tr,
2738 2738 linkrev,
2739 2739 p1,
2740 2740 p2,
2741 2741 flags,
2742 2742 cachedelta,
2743 2743 ifh,
2744 2744 dfh,
2745 2745 deltacomputer=deltacomputer,
2746 2746 )
2747 2747 finally:
2748 2748 if dfh:
2749 2749 dfh.close()
2750 2750 ifh.close()
2751 2751
2752 2752 if addrevisioncb:
2753 2753 addrevisioncb(self, rev, node)
2754 2754
2755 2755 def censorrevision(self, tr, censornode, tombstone=b''):
2756 2756 if (self.version & 0xFFFF) == REVLOGV0:
2757 2757 raise error.RevlogError(
2758 2758 _(b'cannot censor with version %d revlogs') % self.version
2759 2759 )
2760 2760
2761 2761 censorrev = self.rev(censornode)
2762 2762 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2763 2763
2764 2764 if len(tombstone) > self.rawsize(censorrev):
2765 2765 raise error.Abort(
2766 2766 _(b'censor tombstone must be no longer than censored data')
2767 2767 )
2768 2768
2769 2769 # Rewriting the revlog in place is hard. Our strategy for censoring is
2770 2770 # to create a new revlog, copy all revisions to it, then replace the
2771 2771 # revlogs on transaction close.
2772 2772
2773 2773 newindexfile = self.indexfile + b'.tmpcensored'
2774 2774 newdatafile = self.datafile + b'.tmpcensored'
2775 2775
2776 2776 # This is a bit dangerous. We could easily have a mismatch of state.
2777 2777 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2778 2778 newrl.version = self.version
2779 2779 newrl._generaldelta = self._generaldelta
2780 2780 newrl._io = self._io
2781 2781
2782 2782 for rev in self.revs():
2783 2783 node = self.node(rev)
2784 2784 p1, p2 = self.parents(node)
2785 2785
2786 2786 if rev == censorrev:
2787 2787 newrl.addrawrevision(
2788 2788 tombstone,
2789 2789 tr,
2790 2790 self.linkrev(censorrev),
2791 2791 p1,
2792 2792 p2,
2793 2793 censornode,
2794 2794 REVIDX_ISCENSORED,
2795 2795 )
2796 2796
2797 2797 if newrl.deltaparent(rev) != nullrev:
2798 2798 raise error.Abort(
2799 2799 _(
2800 2800 b'censored revision stored as delta; '
2801 2801 b'cannot censor'
2802 2802 ),
2803 2803 hint=_(
2804 2804 b'censoring of revlogs is not '
2805 2805 b'fully implemented; please report '
2806 2806 b'this bug'
2807 2807 ),
2808 2808 )
2809 2809 continue
2810 2810
2811 2811 if self.iscensored(rev):
2812 2812 if self.deltaparent(rev) != nullrev:
2813 2813 raise error.Abort(
2814 2814 _(
2815 2815 b'cannot censor due to censored '
2816 2816 b'revision having delta stored'
2817 2817 )
2818 2818 )
2819 2819 rawtext = self._chunk(rev)
2820 2820 else:
2821 2821 rawtext = self.rawdata(rev)
2822 2822
2823 2823 newrl.addrawrevision(
2824 2824 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2825 2825 )
2826 2826
2827 2827 tr.addbackup(self.indexfile, location=b'store')
2828 2828 if not self._inline:
2829 2829 tr.addbackup(self.datafile, location=b'store')
2830 2830
2831 2831 self.opener.rename(newrl.indexfile, self.indexfile)
2832 2832 if not self._inline:
2833 2833 self.opener.rename(newrl.datafile, self.datafile)
2834 2834
2835 2835 self.clearcaches()
2836 2836 self._loadindex()
2837 2837
2838 2838 def verifyintegrity(self, state):
2839 2839 """Verifies the integrity of the revlog.
2840 2840
2841 2841 Yields ``revlogproblem`` instances describing problems that are
2842 2842 found.
2843 2843 """
2844 2844 dd, di = self.checksize()
2845 2845 if dd:
2846 2846 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2847 2847 if di:
2848 2848 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2849 2849
2850 2850 version = self.version & 0xFFFF
2851 2851
2852 2852 # The verifier tells us what version revlog we should be.
2853 2853 if version != state[b'expectedversion']:
2854 2854 yield revlogproblem(
2855 2855 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2856 2856 % (self.indexfile, version, state[b'expectedversion'])
2857 2857 )
2858 2858
2859 2859 state[b'skipread'] = set()
2860 2860
2861 2861 for rev in self:
2862 2862 node = self.node(rev)
2863 2863
2864 2864 # Verify contents. 4 cases to care about:
2865 2865 #
2866 2866 # common: the most common case
2867 2867 # rename: with a rename
2868 2868 # meta: file content starts with b'\1\n', the metadata
2869 2869 # header defined in filelog.py, but without a rename
2870 2870 # ext: content stored externally
2871 2871 #
2872 2872 # More formally, their differences are shown below:
2873 2873 #
2874 2874 # | common | rename | meta | ext
2875 2875 # -------------------------------------------------------
2876 2876 # flags() | 0 | 0 | 0 | not 0
2877 2877 # renamed() | False | True | False | ?
2878 2878 # rawtext[0:2]=='\1\n'| False | True | True | ?
2879 2879 #
2880 2880 # "rawtext" means the raw text stored in revlog data, which
2881 2881 # could be retrieved by "rawdata(rev)". "text"
2882 2882 # mentioned below is "revision(rev)".
2883 2883 #
2884 2884 # There are 3 different lengths stored physically:
2885 2885 # 1. L1: rawsize, stored in revlog index
2886 2886 # 2. L2: len(rawtext), stored in revlog data
2887 2887 # 3. L3: len(text), stored in revlog data if flags==0, or
2888 2888 # possibly somewhere else if flags!=0
2889 2889 #
2890 2890 # L1 should be equal to L2. L3 could be different from them.
2891 2891 # "text" may or may not affect commit hash depending on flag
2892 2892 # processors (see flagutil.addflagprocessor).
2893 2893 #
2894 2894 # | common | rename | meta | ext
2895 2895 # -------------------------------------------------
2896 2896 # rawsize() | L1 | L1 | L1 | L1
2897 2897 # size() | L1 | L2-LM | L1(*) | L1 (?)
2898 2898 # len(rawtext) | L2 | L2 | L2 | L2
2899 2899 # len(text) | L2 | L2 | L2 | L3
2900 2900 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2901 2901 #
2902 2902 # LM: length of metadata, depending on rawtext
2903 2903 # (*): not ideal, see comment in filelog.size
2904 2904 # (?): could be "- len(meta)" if the resolved content has
2905 2905 # rename metadata
2906 2906 #
2907 2907 # Checks needed to be done:
2908 2908 # 1. length check: L1 == L2, in all cases.
2909 2909 # 2. hash check: depending on flag processor, we may need to
2910 2910 # use either "text" (external), or "rawtext" (in revlog).
2911 2911
2912 2912 try:
2913 2913 skipflags = state.get(b'skipflags', 0)
2914 2914 if skipflags:
2915 2915 skipflags &= self.flags(rev)
2916 2916
2917 2917 if skipflags:
2918 2918 state[b'skipread'].add(node)
2919 2919 else:
2920 2920 # Side-effect: read content and verify hash.
2921 2921 self.revision(node)
2922 2922
2923 2923 l1 = self.rawsize(rev)
2924 2924 l2 = len(self.rawdata(node))
2925 2925
2926 2926 if l1 != l2:
2927 2927 yield revlogproblem(
2928 2928 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2929 2929 node=node,
2930 2930 )
2931 2931
2932 2932 except error.CensoredNodeError:
2933 2933 if state[b'erroroncensored']:
2934 2934 yield revlogproblem(
2935 2935 error=_(b'censored file data'), node=node
2936 2936 )
2937 2937 state[b'skipread'].add(node)
2938 2938 except Exception as e:
2939 2939 yield revlogproblem(
2940 2940 error=_(b'unpacking %s: %s')
2941 2941 % (short(node), stringutil.forcebytestr(e)),
2942 2942 node=node,
2943 2943 )
2944 2944 state[b'skipread'].add(node)
2945 2945
2946 2946 def storageinfo(
2947 2947 self,
2948 2948 exclusivefiles=False,
2949 2949 sharedfiles=False,
2950 2950 revisionscount=False,
2951 2951 trackedsize=False,
2952 2952 storedsize=False,
2953 2953 ):
2954 2954 d = {}
2955 2955
2956 2956 if exclusivefiles:
2957 2957 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2958 2958 if not self._inline:
2959 2959 d[b'exclusivefiles'].append((self.opener, self.datafile))
2960 2960
2961 2961 if sharedfiles:
2962 2962 d[b'sharedfiles'] = []
2963 2963
2964 2964 if revisionscount:
2965 2965 d[b'revisionscount'] = len(self)
2966 2966
2967 2967 if trackedsize:
2968 2968 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2969 2969
2970 2970 if storedsize:
2971 2971 d[b'storedsize'] = sum(
2972 2972 self.opener.stat(path).st_size for path in self.files()
2973 2973 )
2974 2974
2975 2975 return d
General Comments 0
You need to be logged in to leave comments. Login now