##// END OF EJS Templates
revlog: deprecate the _nodecache attribute (API)...
marmoute -
r43930:6e3e3e54 default
parent child Browse files
Show More
@@ -1,2947 +1,2952
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 @attr.s(slots=True, frozen=True)
152 152 class _revisioninfo(object):
153 153 """Information about a revision that allows building its fulltext
154 154 node: expected hash of the revision
155 155 p1, p2: parent revs of the revision
156 156 btext: built text cache consisting of a one-element list
157 157 cachedelta: (baserev, uncompressed_delta) or None
158 158 flags: flags associated to the revision storage
159 159
160 160 One of btext[0] or cachedelta must be set.
161 161 """
162 162
163 163 node = attr.ib()
164 164 p1 = attr.ib()
165 165 p2 = attr.ib()
166 166 btext = attr.ib()
167 167 textlen = attr.ib()
168 168 cachedelta = attr.ib()
169 169 flags = attr.ib()
170 170
171 171
172 172 @interfaceutil.implementer(repository.irevisiondelta)
173 173 @attr.s(slots=True)
174 174 class revlogrevisiondelta(object):
175 175 node = attr.ib()
176 176 p1node = attr.ib()
177 177 p2node = attr.ib()
178 178 basenode = attr.ib()
179 179 flags = attr.ib()
180 180 baserevisionsize = attr.ib()
181 181 revision = attr.ib()
182 182 delta = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 # index v0:
195 195 # 4 bytes: offset
196 196 # 4 bytes: compressed length
197 197 # 4 bytes: base rev
198 198 # 4 bytes: link rev
199 199 # 20 bytes: parent 1 nodeid
200 200 # 20 bytes: parent 2 nodeid
201 201 # 20 bytes: nodeid
202 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 203 indexformatv0_pack = indexformatv0.pack
204 204 indexformatv0_unpack = indexformatv0.unpack
205 205
206 206
207 207 class revlogoldindex(list):
208 208 @util.propertycache
209 209 def nodemap(self):
210 210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 211 for r in range(0, len(self)):
212 212 n = self[r][7]
213 213 nodemap[n] = r
214 214 return nodemap
215 215
216 216 def clearcaches(self):
217 217 self.__dict__.pop('nodemap', None)
218 218
219 219 def __getitem__(self, i):
220 220 if i == -1:
221 221 return (0, 0, 0, -1, -1, -1, -1, nullid)
222 222 return list.__getitem__(self, i)
223 223
224 224
225 225 class revlogoldio(object):
226 226 def __init__(self):
227 227 self.size = indexformatv0.size
228 228
229 229 def parseindex(self, data, inline):
230 230 s = self.size
231 231 index = []
232 232 nodemap = revlogutils.NodeMap({nullid: nullrev})
233 233 n = off = 0
234 234 l = len(data)
235 235 while off + s <= l:
236 236 cur = data[off : off + s]
237 237 off += s
238 238 e = indexformatv0_unpack(cur)
239 239 # transform to revlogv1 format
240 240 e2 = (
241 241 offset_type(e[0], 0),
242 242 e[1],
243 243 -1,
244 244 e[2],
245 245 e[3],
246 246 nodemap.get(e[4], nullrev),
247 247 nodemap.get(e[5], nullrev),
248 248 e[6],
249 249 )
250 250 index.append(e2)
251 251 nodemap[e[6]] = n
252 252 n += 1
253 253
254 254 index = revlogoldindex(index)
255 255 return index, None
256 256
257 257 def packentry(self, entry, node, version, rev):
258 258 if gettype(entry[0]):
259 259 raise error.RevlogError(
260 260 _(b'index entry flags need revlog version 1')
261 261 )
262 262 e2 = (
263 263 getoffset(entry[0]),
264 264 entry[1],
265 265 entry[3],
266 266 entry[4],
267 267 node(entry[5]),
268 268 node(entry[6]),
269 269 entry[7],
270 270 )
271 271 return indexformatv0_pack(*e2)
272 272
273 273
274 274 # index ng:
275 275 # 6 bytes: offset
276 276 # 2 bytes: flags
277 277 # 4 bytes: compressed length
278 278 # 4 bytes: uncompressed length
279 279 # 4 bytes: base rev
280 280 # 4 bytes: link rev
281 281 # 4 bytes: parent 1 rev
282 282 # 4 bytes: parent 2 rev
283 283 # 32 bytes: nodeid
284 284 indexformatng = struct.Struct(b">Qiiiiii20s12x")
285 285 indexformatng_pack = indexformatng.pack
286 286 versionformat = struct.Struct(b">I")
287 287 versionformat_pack = versionformat.pack
288 288 versionformat_unpack = versionformat.unpack
289 289
290 290 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
291 291 # signed integer)
292 292 _maxentrysize = 0x7FFFFFFF
293 293
294 294
295 295 class revlogio(object):
296 296 def __init__(self):
297 297 self.size = indexformatng.size
298 298
299 299 def parseindex(self, data, inline):
300 300 # call the C implementation to parse the index data
301 301 index, cache = parsers.parse_index2(data, inline)
302 302 return index, cache
303 303
304 304 def packentry(self, entry, node, version, rev):
305 305 p = indexformatng_pack(*entry)
306 306 if rev == 0:
307 307 p = versionformat_pack(version) + p[4:]
308 308 return p
309 309
310 310
311 311 class revlog(object):
312 312 """
313 313 the underlying revision storage object
314 314
315 315 A revlog consists of two parts, an index and the revision data.
316 316
317 317 The index is a file with a fixed record size containing
318 318 information on each revision, including its nodeid (hash), the
319 319 nodeids of its parents, the position and offset of its data within
320 320 the data file, and the revision it's based on. Finally, each entry
321 321 contains a linkrev entry that can serve as a pointer to external
322 322 data.
323 323
324 324 The revision data itself is a linear collection of data chunks.
325 325 Each chunk represents a revision and is usually represented as a
326 326 delta against the previous chunk. To bound lookup time, runs of
327 327 deltas are limited to about 2 times the length of the original
328 328 version data. This makes retrieval of a version proportional to
329 329 its size, or O(1) relative to the number of revisions.
330 330
331 331 Both pieces of the revlog are written to in an append-only
332 332 fashion, which means we never need to rewrite a file to insert or
333 333 remove data, and can use some simple techniques to avoid the need
334 334 for locking while reading.
335 335
336 336 If checkambig, indexfile is opened with checkambig=True at
337 337 writing, to avoid file stat ambiguity.
338 338
339 339 If mmaplargeindex is True, and an mmapindexthreshold is set, the
340 340 index will be mmapped rather than read if it is larger than the
341 341 configured threshold.
342 342
343 343 If censorable is True, the revlog can have censored revisions.
344 344
345 345 If `upperboundcomp` is not None, this is the expected maximal gain from
346 346 compression for the data content.
347 347 """
348 348
349 349 _flagserrorclass = error.RevlogError
350 350
351 351 def __init__(
352 352 self,
353 353 opener,
354 354 indexfile,
355 355 datafile=None,
356 356 checkambig=False,
357 357 mmaplargeindex=False,
358 358 censorable=False,
359 359 upperboundcomp=None,
360 360 ):
361 361 """
362 362 create a revlog object
363 363
364 364 opener is a function that abstracts the file opening operation
365 365 and can be used to implement COW semantics or the like.
366 366
367 367 """
368 368 self.upperboundcomp = upperboundcomp
369 369 self.indexfile = indexfile
370 370 self.datafile = datafile or (indexfile[:-2] + b".d")
371 371 self.opener = opener
372 372 # When True, indexfile is opened with checkambig=True at writing, to
373 373 # avoid file stat ambiguity.
374 374 self._checkambig = checkambig
375 375 self._mmaplargeindex = mmaplargeindex
376 376 self._censorable = censorable
377 377 # 3-tuple of (node, rev, text) for a raw revision.
378 378 self._revisioncache = None
379 379 # Maps rev to chain base rev.
380 380 self._chainbasecache = util.lrucachedict(100)
381 381 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
382 382 self._chunkcache = (0, b'')
383 383 # How much data to read and cache into the raw revlog data cache.
384 384 self._chunkcachesize = 65536
385 385 self._maxchainlen = None
386 386 self._deltabothparents = True
387 387 self.index = None
388 388 # Mapping of partial identifiers to full nodes.
389 389 self._pcache = {}
390 390 # Mapping of revision integer to full node.
391 self._nodecache = None
392 391 self._nodepos = None
393 392 self._compengine = b'zlib'
394 393 self._compengineopts = {}
395 394 self._maxdeltachainspan = -1
396 395 self._withsparseread = False
397 396 self._sparserevlog = False
398 397 self._srdensitythreshold = 0.50
399 398 self._srmingapsize = 262144
400 399
401 400 # Make copy of flag processors so each revlog instance can support
402 401 # custom flags.
403 402 self._flagprocessors = dict(flagutil.flagprocessors)
404 403
405 404 # 2-tuple of file handles being used for active writing.
406 405 self._writinghandles = None
407 406
408 407 self._loadindex()
409 408
410 409 def _loadindex(self):
411 410 mmapindexthreshold = None
412 411 opts = self.opener.options
413 412
414 413 if b'revlogv2' in opts:
415 414 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
416 415 elif b'revlogv1' in opts:
417 416 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
418 417 if b'generaldelta' in opts:
419 418 newversionflags |= FLAG_GENERALDELTA
420 419 elif b'revlogv0' in self.opener.options:
421 420 newversionflags = REVLOGV0
422 421 else:
423 422 newversionflags = REVLOG_DEFAULT_VERSION
424 423
425 424 if b'chunkcachesize' in opts:
426 425 self._chunkcachesize = opts[b'chunkcachesize']
427 426 if b'maxchainlen' in opts:
428 427 self._maxchainlen = opts[b'maxchainlen']
429 428 if b'deltabothparents' in opts:
430 429 self._deltabothparents = opts[b'deltabothparents']
431 430 self._lazydelta = bool(opts.get(b'lazydelta', True))
432 431 self._lazydeltabase = False
433 432 if self._lazydelta:
434 433 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
435 434 if b'compengine' in opts:
436 435 self._compengine = opts[b'compengine']
437 436 if b'zlib.level' in opts:
438 437 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
439 438 if b'zstd.level' in opts:
440 439 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
441 440 if b'maxdeltachainspan' in opts:
442 441 self._maxdeltachainspan = opts[b'maxdeltachainspan']
443 442 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
444 443 mmapindexthreshold = opts[b'mmapindexthreshold']
445 444 self.hassidedata = bool(opts.get(b'side-data', False))
446 445 if self.hassidedata:
447 446 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
448 447 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
449 448 withsparseread = bool(opts.get(b'with-sparse-read', False))
450 449 # sparse-revlog forces sparse-read
451 450 self._withsparseread = self._sparserevlog or withsparseread
452 451 if b'sparse-read-density-threshold' in opts:
453 452 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
454 453 if b'sparse-read-min-gap-size' in opts:
455 454 self._srmingapsize = opts[b'sparse-read-min-gap-size']
456 455 if opts.get(b'enableellipsis'):
457 456 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
458 457
459 458 # revlog v0 doesn't have flag processors
460 459 for flag, processor in pycompat.iteritems(
461 460 opts.get(b'flagprocessors', {})
462 461 ):
463 462 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
464 463
465 464 if self._chunkcachesize <= 0:
466 465 raise error.RevlogError(
467 466 _(b'revlog chunk cache size %r is not greater than 0')
468 467 % self._chunkcachesize
469 468 )
470 469 elif self._chunkcachesize & (self._chunkcachesize - 1):
471 470 raise error.RevlogError(
472 471 _(b'revlog chunk cache size %r is not a power of 2')
473 472 % self._chunkcachesize
474 473 )
475 474
476 475 indexdata = b''
477 476 self._initempty = True
478 477 try:
479 478 with self._indexfp() as f:
480 479 if (
481 480 mmapindexthreshold is not None
482 481 and self.opener.fstat(f).st_size >= mmapindexthreshold
483 482 ):
484 483 # TODO: should .close() to release resources without
485 484 # relying on Python GC
486 485 indexdata = util.buffer(util.mmapread(f))
487 486 else:
488 487 indexdata = f.read()
489 488 if len(indexdata) > 0:
490 489 versionflags = versionformat_unpack(indexdata[:4])[0]
491 490 self._initempty = False
492 491 else:
493 492 versionflags = newversionflags
494 493 except IOError as inst:
495 494 if inst.errno != errno.ENOENT:
496 495 raise
497 496
498 497 versionflags = newversionflags
499 498
500 499 self.version = versionflags
501 500
502 501 flags = versionflags & ~0xFFFF
503 502 fmt = versionflags & 0xFFFF
504 503
505 504 if fmt == REVLOGV0:
506 505 if flags:
507 506 raise error.RevlogError(
508 507 _(b'unknown flags (%#04x) in version %d revlog %s')
509 508 % (flags >> 16, fmt, self.indexfile)
510 509 )
511 510
512 511 self._inline = False
513 512 self._generaldelta = False
514 513
515 514 elif fmt == REVLOGV1:
516 515 if flags & ~REVLOGV1_FLAGS:
517 516 raise error.RevlogError(
518 517 _(b'unknown flags (%#04x) in version %d revlog %s')
519 518 % (flags >> 16, fmt, self.indexfile)
520 519 )
521 520
522 521 self._inline = versionflags & FLAG_INLINE_DATA
523 522 self._generaldelta = versionflags & FLAG_GENERALDELTA
524 523
525 524 elif fmt == REVLOGV2:
526 525 if flags & ~REVLOGV2_FLAGS:
527 526 raise error.RevlogError(
528 527 _(b'unknown flags (%#04x) in version %d revlog %s')
529 528 % (flags >> 16, fmt, self.indexfile)
530 529 )
531 530
532 531 self._inline = versionflags & FLAG_INLINE_DATA
533 532 # generaldelta implied by version 2 revlogs.
534 533 self._generaldelta = True
535 534
536 535 else:
537 536 raise error.RevlogError(
538 537 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
539 538 )
540 539 # sparse-revlog can't be on without general-delta (issue6056)
541 540 if not self._generaldelta:
542 541 self._sparserevlog = False
543 542
544 543 self._storedeltachains = True
545 544
546 545 self._io = revlogio()
547 546 if self.version == REVLOGV0:
548 547 self._io = revlogoldio()
549 548 try:
550 549 d = self._io.parseindex(indexdata, self._inline)
551 550 except (ValueError, IndexError):
552 551 raise error.RevlogError(
553 552 _(b"index %s is corrupted") % self.indexfile
554 553 )
555 554 self.index, self._chunkcache = d
556 self.nodemap = self._nodecache = self.index.nodemap
555 self.nodemap = self.index.nodemap
557 556 if not self._chunkcache:
558 557 self._chunkclear()
559 558 # revnum -> (chain-length, sum-delta-length)
560 559 self._chaininfocache = {}
561 560 # revlog header -> revlog compressor
562 561 self._decompressors = {}
563 562
564 563 @util.propertycache
565 564 def _compressor(self):
566 565 engine = util.compengines[self._compengine]
567 566 return engine.revlogcompressor(self._compengineopts)
568 567
569 568 def _indexfp(self, mode=b'r'):
570 569 """file object for the revlog's index file"""
571 570 args = {'mode': mode}
572 571 if mode != b'r':
573 572 args['checkambig'] = self._checkambig
574 573 if mode == b'w':
575 574 args['atomictemp'] = True
576 575 return self.opener(self.indexfile, **args)
577 576
578 577 def _datafp(self, mode=b'r'):
579 578 """file object for the revlog's data file"""
580 579 return self.opener(self.datafile, mode=mode)
581 580
582 581 @contextlib.contextmanager
583 582 def _datareadfp(self, existingfp=None):
584 583 """file object suitable to read data"""
585 584 # Use explicit file handle, if given.
586 585 if existingfp is not None:
587 586 yield existingfp
588 587
589 588 # Use a file handle being actively used for writes, if available.
590 589 # There is some danger to doing this because reads will seek the
591 590 # file. However, _writeentry() performs a SEEK_END before all writes,
592 591 # so we should be safe.
593 592 elif self._writinghandles:
594 593 if self._inline:
595 594 yield self._writinghandles[0]
596 595 else:
597 596 yield self._writinghandles[1]
598 597
599 598 # Otherwise open a new file handle.
600 599 else:
601 600 if self._inline:
602 601 func = self._indexfp
603 602 else:
604 603 func = self._datafp
605 604 with func() as fp:
606 605 yield fp
607 606
608 607 def tiprev(self):
609 608 return len(self.index) - 1
610 609
611 610 def tip(self):
612 611 return self.node(self.tiprev())
613 612
614 613 def __contains__(self, rev):
615 614 return 0 <= rev < len(self)
616 615
617 616 def __len__(self):
618 617 return len(self.index)
619 618
620 619 def __iter__(self):
621 620 return iter(pycompat.xrange(len(self)))
622 621
623 622 def revs(self, start=0, stop=None):
624 623 """iterate over all rev in this revlog (from start to stop)"""
625 624 return storageutil.iterrevs(len(self), start=start, stop=stop)
626 625
627 626 @util.propertycache
628 627 def nodemap(self):
629 628 if self.index:
630 629 # populate mapping down to the initial node
631 630 node0 = self.index[0][7] # get around changelog filtering
632 631 self.rev(node0)
633 632 return self.index.nodemap
634 633
634 @property
635 def _nodecache(self):
636 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
637 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
638 return self.index.nodemap
639
635 640 def hasnode(self, node):
636 641 try:
637 642 self.rev(node)
638 643 return True
639 644 except KeyError:
640 645 return False
641 646
642 647 def candelta(self, baserev, rev):
643 648 """whether two revisions (baserev, rev) can be delta-ed or not"""
644 649 # Disable delta if either rev requires a content-changing flag
645 650 # processor (ex. LFS). This is because such flag processor can alter
646 651 # the rawtext content that the delta will be based on, and two clients
647 652 # could have a same revlog node with different flags (i.e. different
648 653 # rawtext contents) and the delta could be incompatible.
649 654 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
650 655 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
651 656 ):
652 657 return False
653 658 return True
654 659
655 660 def clearcaches(self):
656 661 self._revisioncache = None
657 662 self._chainbasecache.clear()
658 663 self._chunkcache = (0, b'')
659 664 self._pcache = {}
660 665 self.index.clearcaches()
661 666
662 667 def rev(self, node):
663 668 try:
664 669 return self.index.nodemap[node]
665 670 except TypeError:
666 671 raise
667 672 except error.RevlogError:
668 673 # parsers.c radix tree lookup failed
669 674 if node == wdirid or node in wdirfilenodeids:
670 675 raise error.WdirUnsupported
671 676 raise error.LookupError(node, self.indexfile, _(b'no node'))
672 677
673 678 # Accessors for index entries.
674 679
675 680 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
676 681 # are flags.
677 682 def start(self, rev):
678 683 return int(self.index[rev][0] >> 16)
679 684
680 685 def flags(self, rev):
681 686 return self.index[rev][0] & 0xFFFF
682 687
683 688 def length(self, rev):
684 689 return self.index[rev][1]
685 690
686 691 def rawsize(self, rev):
687 692 """return the length of the uncompressed text for a given revision"""
688 693 l = self.index[rev][2]
689 694 if l >= 0:
690 695 return l
691 696
692 697 t = self.rawdata(rev)
693 698 return len(t)
694 699
695 700 def size(self, rev):
696 701 """length of non-raw text (processed by a "read" flag processor)"""
697 702 # fast path: if no "read" flag processor could change the content,
698 703 # size is rawsize. note: ELLIPSIS is known to not change the content.
699 704 flags = self.flags(rev)
700 705 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
701 706 return self.rawsize(rev)
702 707
703 708 return len(self.revision(rev, raw=False))
704 709
705 710 def chainbase(self, rev):
706 711 base = self._chainbasecache.get(rev)
707 712 if base is not None:
708 713 return base
709 714
710 715 index = self.index
711 716 iterrev = rev
712 717 base = index[iterrev][3]
713 718 while base != iterrev:
714 719 iterrev = base
715 720 base = index[iterrev][3]
716 721
717 722 self._chainbasecache[rev] = base
718 723 return base
719 724
720 725 def linkrev(self, rev):
721 726 return self.index[rev][4]
722 727
723 728 def parentrevs(self, rev):
724 729 try:
725 730 entry = self.index[rev]
726 731 except IndexError:
727 732 if rev == wdirrev:
728 733 raise error.WdirUnsupported
729 734 raise
730 735
731 736 return entry[5], entry[6]
732 737
733 738 # fast parentrevs(rev) where rev isn't filtered
734 739 _uncheckedparentrevs = parentrevs
735 740
736 741 def node(self, rev):
737 742 try:
738 743 return self.index[rev][7]
739 744 except IndexError:
740 745 if rev == wdirrev:
741 746 raise error.WdirUnsupported
742 747 raise
743 748
744 749 # Derived from index values.
745 750
746 751 def end(self, rev):
747 752 return self.start(rev) + self.length(rev)
748 753
749 754 def parents(self, node):
750 755 i = self.index
751 756 d = i[self.rev(node)]
752 757 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
753 758
754 759 def chainlen(self, rev):
755 760 return self._chaininfo(rev)[0]
756 761
757 762 def _chaininfo(self, rev):
758 763 chaininfocache = self._chaininfocache
759 764 if rev in chaininfocache:
760 765 return chaininfocache[rev]
761 766 index = self.index
762 767 generaldelta = self._generaldelta
763 768 iterrev = rev
764 769 e = index[iterrev]
765 770 clen = 0
766 771 compresseddeltalen = 0
767 772 while iterrev != e[3]:
768 773 clen += 1
769 774 compresseddeltalen += e[1]
770 775 if generaldelta:
771 776 iterrev = e[3]
772 777 else:
773 778 iterrev -= 1
774 779 if iterrev in chaininfocache:
775 780 t = chaininfocache[iterrev]
776 781 clen += t[0]
777 782 compresseddeltalen += t[1]
778 783 break
779 784 e = index[iterrev]
780 785 else:
781 786 # Add text length of base since decompressing that also takes
782 787 # work. For cache hits the length is already included.
783 788 compresseddeltalen += e[1]
784 789 r = (clen, compresseddeltalen)
785 790 chaininfocache[rev] = r
786 791 return r
787 792
788 793 def _deltachain(self, rev, stoprev=None):
789 794 """Obtain the delta chain for a revision.
790 795
791 796 ``stoprev`` specifies a revision to stop at. If not specified, we
792 797 stop at the base of the chain.
793 798
794 799 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
795 800 revs in ascending order and ``stopped`` is a bool indicating whether
796 801 ``stoprev`` was hit.
797 802 """
798 803 # Try C implementation.
799 804 try:
800 805 return self.index.deltachain(rev, stoprev, self._generaldelta)
801 806 except AttributeError:
802 807 pass
803 808
804 809 chain = []
805 810
806 811 # Alias to prevent attribute lookup in tight loop.
807 812 index = self.index
808 813 generaldelta = self._generaldelta
809 814
810 815 iterrev = rev
811 816 e = index[iterrev]
812 817 while iterrev != e[3] and iterrev != stoprev:
813 818 chain.append(iterrev)
814 819 if generaldelta:
815 820 iterrev = e[3]
816 821 else:
817 822 iterrev -= 1
818 823 e = index[iterrev]
819 824
820 825 if iterrev == stoprev:
821 826 stopped = True
822 827 else:
823 828 chain.append(iterrev)
824 829 stopped = False
825 830
826 831 chain.reverse()
827 832 return chain, stopped
828 833
829 834 def ancestors(self, revs, stoprev=0, inclusive=False):
830 835 """Generate the ancestors of 'revs' in reverse revision order.
831 836 Does not generate revs lower than stoprev.
832 837
833 838 See the documentation for ancestor.lazyancestors for more details."""
834 839
835 840 # first, make sure start revisions aren't filtered
836 841 revs = list(revs)
837 842 checkrev = self.node
838 843 for r in revs:
839 844 checkrev(r)
840 845 # and we're sure ancestors aren't filtered as well
841 846
842 847 if rustancestor is not None:
843 848 lazyancestors = rustancestor.LazyAncestors
844 849 arg = self.index
845 850 elif util.safehasattr(parsers, b'rustlazyancestors'):
846 851 lazyancestors = ancestor.rustlazyancestors
847 852 arg = self.index
848 853 else:
849 854 lazyancestors = ancestor.lazyancestors
850 855 arg = self._uncheckedparentrevs
851 856 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
852 857
853 858 def descendants(self, revs):
854 859 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
855 860
856 861 def findcommonmissing(self, common=None, heads=None):
857 862 """Return a tuple of the ancestors of common and the ancestors of heads
858 863 that are not ancestors of common. In revset terminology, we return the
859 864 tuple:
860 865
861 866 ::common, (::heads) - (::common)
862 867
863 868 The list is sorted by revision number, meaning it is
864 869 topologically sorted.
865 870
866 871 'heads' and 'common' are both lists of node IDs. If heads is
867 872 not supplied, uses all of the revlog's heads. If common is not
868 873 supplied, uses nullid."""
869 874 if common is None:
870 875 common = [nullid]
871 876 if heads is None:
872 877 heads = self.heads()
873 878
874 879 common = [self.rev(n) for n in common]
875 880 heads = [self.rev(n) for n in heads]
876 881
877 882 # we want the ancestors, but inclusive
878 883 class lazyset(object):
879 884 def __init__(self, lazyvalues):
880 885 self.addedvalues = set()
881 886 self.lazyvalues = lazyvalues
882 887
883 888 def __contains__(self, value):
884 889 return value in self.addedvalues or value in self.lazyvalues
885 890
886 891 def __iter__(self):
887 892 added = self.addedvalues
888 893 for r in added:
889 894 yield r
890 895 for r in self.lazyvalues:
891 896 if not r in added:
892 897 yield r
893 898
894 899 def add(self, value):
895 900 self.addedvalues.add(value)
896 901
897 902 def update(self, values):
898 903 self.addedvalues.update(values)
899 904
900 905 has = lazyset(self.ancestors(common))
901 906 has.add(nullrev)
902 907 has.update(common)
903 908
904 909 # take all ancestors from heads that aren't in has
905 910 missing = set()
906 911 visit = collections.deque(r for r in heads if r not in has)
907 912 while visit:
908 913 r = visit.popleft()
909 914 if r in missing:
910 915 continue
911 916 else:
912 917 missing.add(r)
913 918 for p in self.parentrevs(r):
914 919 if p not in has:
915 920 visit.append(p)
916 921 missing = list(missing)
917 922 missing.sort()
918 923 return has, [self.node(miss) for miss in missing]
919 924
920 925 def incrementalmissingrevs(self, common=None):
921 926 """Return an object that can be used to incrementally compute the
922 927 revision numbers of the ancestors of arbitrary sets that are not
923 928 ancestors of common. This is an ancestor.incrementalmissingancestors
924 929 object.
925 930
926 931 'common' is a list of revision numbers. If common is not supplied, uses
927 932 nullrev.
928 933 """
929 934 if common is None:
930 935 common = [nullrev]
931 936
932 937 if rustancestor is not None:
933 938 return rustancestor.MissingAncestors(self.index, common)
934 939 return ancestor.incrementalmissingancestors(self.parentrevs, common)
935 940
936 941 def findmissingrevs(self, common=None, heads=None):
937 942 """Return the revision numbers of the ancestors of heads that
938 943 are not ancestors of common.
939 944
940 945 More specifically, return a list of revision numbers corresponding to
941 946 nodes N such that every N satisfies the following constraints:
942 947
943 948 1. N is an ancestor of some node in 'heads'
944 949 2. N is not an ancestor of any node in 'common'
945 950
946 951 The list is sorted by revision number, meaning it is
947 952 topologically sorted.
948 953
949 954 'heads' and 'common' are both lists of revision numbers. If heads is
950 955 not supplied, uses all of the revlog's heads. If common is not
951 956 supplied, uses nullid."""
952 957 if common is None:
953 958 common = [nullrev]
954 959 if heads is None:
955 960 heads = self.headrevs()
956 961
957 962 inc = self.incrementalmissingrevs(common=common)
958 963 return inc.missingancestors(heads)
959 964
960 965 def findmissing(self, common=None, heads=None):
961 966 """Return the ancestors of heads that are not ancestors of common.
962 967
963 968 More specifically, return a list of nodes N such that every N
964 969 satisfies the following constraints:
965 970
966 971 1. N is an ancestor of some node in 'heads'
967 972 2. N is not an ancestor of any node in 'common'
968 973
969 974 The list is sorted by revision number, meaning it is
970 975 topologically sorted.
971 976
972 977 'heads' and 'common' are both lists of node IDs. If heads is
973 978 not supplied, uses all of the revlog's heads. If common is not
974 979 supplied, uses nullid."""
975 980 if common is None:
976 981 common = [nullid]
977 982 if heads is None:
978 983 heads = self.heads()
979 984
980 985 common = [self.rev(n) for n in common]
981 986 heads = [self.rev(n) for n in heads]
982 987
983 988 inc = self.incrementalmissingrevs(common=common)
984 989 return [self.node(r) for r in inc.missingancestors(heads)]
985 990
986 991 def nodesbetween(self, roots=None, heads=None):
987 992 """Return a topological path from 'roots' to 'heads'.
988 993
989 994 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
990 995 topologically sorted list of all nodes N that satisfy both of
991 996 these constraints:
992 997
993 998 1. N is a descendant of some node in 'roots'
994 999 2. N is an ancestor of some node in 'heads'
995 1000
996 1001 Every node is considered to be both a descendant and an ancestor
997 1002 of itself, so every reachable node in 'roots' and 'heads' will be
998 1003 included in 'nodes'.
999 1004
1000 1005 'outroots' is the list of reachable nodes in 'roots', i.e., the
1001 1006 subset of 'roots' that is returned in 'nodes'. Likewise,
1002 1007 'outheads' is the subset of 'heads' that is also in 'nodes'.
1003 1008
1004 1009 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1005 1010 unspecified, uses nullid as the only root. If 'heads' is
1006 1011 unspecified, uses list of all of the revlog's heads."""
1007 1012 nonodes = ([], [], [])
1008 1013 if roots is not None:
1009 1014 roots = list(roots)
1010 1015 if not roots:
1011 1016 return nonodes
1012 1017 lowestrev = min([self.rev(n) for n in roots])
1013 1018 else:
1014 1019 roots = [nullid] # Everybody's a descendant of nullid
1015 1020 lowestrev = nullrev
1016 1021 if (lowestrev == nullrev) and (heads is None):
1017 1022 # We want _all_ the nodes!
1018 1023 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1019 1024 if heads is None:
1020 1025 # All nodes are ancestors, so the latest ancestor is the last
1021 1026 # node.
1022 1027 highestrev = len(self) - 1
1023 1028 # Set ancestors to None to signal that every node is an ancestor.
1024 1029 ancestors = None
1025 1030 # Set heads to an empty dictionary for later discovery of heads
1026 1031 heads = {}
1027 1032 else:
1028 1033 heads = list(heads)
1029 1034 if not heads:
1030 1035 return nonodes
1031 1036 ancestors = set()
1032 1037 # Turn heads into a dictionary so we can remove 'fake' heads.
1033 1038 # Also, later we will be using it to filter out the heads we can't
1034 1039 # find from roots.
1035 1040 heads = dict.fromkeys(heads, False)
1036 1041 # Start at the top and keep marking parents until we're done.
1037 1042 nodestotag = set(heads)
1038 1043 # Remember where the top was so we can use it as a limit later.
1039 1044 highestrev = max([self.rev(n) for n in nodestotag])
1040 1045 while nodestotag:
1041 1046 # grab a node to tag
1042 1047 n = nodestotag.pop()
1043 1048 # Never tag nullid
1044 1049 if n == nullid:
1045 1050 continue
1046 1051 # A node's revision number represents its place in a
1047 1052 # topologically sorted list of nodes.
1048 1053 r = self.rev(n)
1049 1054 if r >= lowestrev:
1050 1055 if n not in ancestors:
1051 1056 # If we are possibly a descendant of one of the roots
1052 1057 # and we haven't already been marked as an ancestor
1053 1058 ancestors.add(n) # Mark as ancestor
1054 1059 # Add non-nullid parents to list of nodes to tag.
1055 1060 nodestotag.update(
1056 1061 [p for p in self.parents(n) if p != nullid]
1057 1062 )
1058 1063 elif n in heads: # We've seen it before, is it a fake head?
1059 1064 # So it is, real heads should not be the ancestors of
1060 1065 # any other heads.
1061 1066 heads.pop(n)
1062 1067 if not ancestors:
1063 1068 return nonodes
1064 1069 # Now that we have our set of ancestors, we want to remove any
1065 1070 # roots that are not ancestors.
1066 1071
1067 1072 # If one of the roots was nullid, everything is included anyway.
1068 1073 if lowestrev > nullrev:
1069 1074 # But, since we weren't, let's recompute the lowest rev to not
1070 1075 # include roots that aren't ancestors.
1071 1076
1072 1077 # Filter out roots that aren't ancestors of heads
1073 1078 roots = [root for root in roots if root in ancestors]
1074 1079 # Recompute the lowest revision
1075 1080 if roots:
1076 1081 lowestrev = min([self.rev(root) for root in roots])
1077 1082 else:
1078 1083 # No more roots? Return empty list
1079 1084 return nonodes
1080 1085 else:
1081 1086 # We are descending from nullid, and don't need to care about
1082 1087 # any other roots.
1083 1088 lowestrev = nullrev
1084 1089 roots = [nullid]
1085 1090 # Transform our roots list into a set.
1086 1091 descendants = set(roots)
1087 1092 # Also, keep the original roots so we can filter out roots that aren't
1088 1093 # 'real' roots (i.e. are descended from other roots).
1089 1094 roots = descendants.copy()
1090 1095 # Our topologically sorted list of output nodes.
1091 1096 orderedout = []
1092 1097 # Don't start at nullid since we don't want nullid in our output list,
1093 1098 # and if nullid shows up in descendants, empty parents will look like
1094 1099 # they're descendants.
1095 1100 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1096 1101 n = self.node(r)
1097 1102 isdescendant = False
1098 1103 if lowestrev == nullrev: # Everybody is a descendant of nullid
1099 1104 isdescendant = True
1100 1105 elif n in descendants:
1101 1106 # n is already a descendant
1102 1107 isdescendant = True
1103 1108 # This check only needs to be done here because all the roots
1104 1109 # will start being marked is descendants before the loop.
1105 1110 if n in roots:
1106 1111 # If n was a root, check if it's a 'real' root.
1107 1112 p = tuple(self.parents(n))
1108 1113 # If any of its parents are descendants, it's not a root.
1109 1114 if (p[0] in descendants) or (p[1] in descendants):
1110 1115 roots.remove(n)
1111 1116 else:
1112 1117 p = tuple(self.parents(n))
1113 1118 # A node is a descendant if either of its parents are
1114 1119 # descendants. (We seeded the dependents list with the roots
1115 1120 # up there, remember?)
1116 1121 if (p[0] in descendants) or (p[1] in descendants):
1117 1122 descendants.add(n)
1118 1123 isdescendant = True
1119 1124 if isdescendant and ((ancestors is None) or (n in ancestors)):
1120 1125 # Only include nodes that are both descendants and ancestors.
1121 1126 orderedout.append(n)
1122 1127 if (ancestors is not None) and (n in heads):
1123 1128 # We're trying to figure out which heads are reachable
1124 1129 # from roots.
1125 1130 # Mark this head as having been reached
1126 1131 heads[n] = True
1127 1132 elif ancestors is None:
1128 1133 # Otherwise, we're trying to discover the heads.
1129 1134 # Assume this is a head because if it isn't, the next step
1130 1135 # will eventually remove it.
1131 1136 heads[n] = True
1132 1137 # But, obviously its parents aren't.
1133 1138 for p in self.parents(n):
1134 1139 heads.pop(p, None)
1135 1140 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1136 1141 roots = list(roots)
1137 1142 assert orderedout
1138 1143 assert roots
1139 1144 assert heads
1140 1145 return (orderedout, roots, heads)
1141 1146
1142 1147 def headrevs(self, revs=None):
1143 1148 if revs is None:
1144 1149 try:
1145 1150 return self.index.headrevs()
1146 1151 except AttributeError:
1147 1152 return self._headrevs()
1148 1153 if rustdagop is not None:
1149 1154 return rustdagop.headrevs(self.index, revs)
1150 1155 return dagop.headrevs(revs, self._uncheckedparentrevs)
1151 1156
1152 1157 def computephases(self, roots):
1153 1158 return self.index.computephasesmapsets(roots)
1154 1159
1155 1160 def _headrevs(self):
1156 1161 count = len(self)
1157 1162 if not count:
1158 1163 return [nullrev]
1159 1164 # we won't iter over filtered rev so nobody is a head at start
1160 1165 ishead = [0] * (count + 1)
1161 1166 index = self.index
1162 1167 for r in self:
1163 1168 ishead[r] = 1 # I may be an head
1164 1169 e = index[r]
1165 1170 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1166 1171 return [r for r, val in enumerate(ishead) if val]
1167 1172
1168 1173 def heads(self, start=None, stop=None):
1169 1174 """return the list of all nodes that have no children
1170 1175
1171 1176 if start is specified, only heads that are descendants of
1172 1177 start will be returned
1173 1178 if stop is specified, it will consider all the revs from stop
1174 1179 as if they had no children
1175 1180 """
1176 1181 if start is None and stop is None:
1177 1182 if not len(self):
1178 1183 return [nullid]
1179 1184 return [self.node(r) for r in self.headrevs()]
1180 1185
1181 1186 if start is None:
1182 1187 start = nullrev
1183 1188 else:
1184 1189 start = self.rev(start)
1185 1190
1186 1191 stoprevs = set(self.rev(n) for n in stop or [])
1187 1192
1188 1193 revs = dagop.headrevssubset(
1189 1194 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1190 1195 )
1191 1196
1192 1197 return [self.node(rev) for rev in revs]
1193 1198
1194 1199 def children(self, node):
1195 1200 """find the children of a given node"""
1196 1201 c = []
1197 1202 p = self.rev(node)
1198 1203 for r in self.revs(start=p + 1):
1199 1204 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1200 1205 if prevs:
1201 1206 for pr in prevs:
1202 1207 if pr == p:
1203 1208 c.append(self.node(r))
1204 1209 elif p == nullrev:
1205 1210 c.append(self.node(r))
1206 1211 return c
1207 1212
1208 1213 def commonancestorsheads(self, a, b):
1209 1214 """calculate all the heads of the common ancestors of nodes a and b"""
1210 1215 a, b = self.rev(a), self.rev(b)
1211 1216 ancs = self._commonancestorsheads(a, b)
1212 1217 return pycompat.maplist(self.node, ancs)
1213 1218
1214 1219 def _commonancestorsheads(self, *revs):
1215 1220 """calculate all the heads of the common ancestors of revs"""
1216 1221 try:
1217 1222 ancs = self.index.commonancestorsheads(*revs)
1218 1223 except (AttributeError, OverflowError): # C implementation failed
1219 1224 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1220 1225 return ancs
1221 1226
1222 1227 def isancestor(self, a, b):
1223 1228 """return True if node a is an ancestor of node b
1224 1229
1225 1230 A revision is considered an ancestor of itself."""
1226 1231 a, b = self.rev(a), self.rev(b)
1227 1232 return self.isancestorrev(a, b)
1228 1233
1229 1234 def isancestorrev(self, a, b):
1230 1235 """return True if revision a is an ancestor of revision b
1231 1236
1232 1237 A revision is considered an ancestor of itself.
1233 1238
1234 1239 The implementation of this is trivial but the use of
1235 1240 reachableroots is not."""
1236 1241 if a == nullrev:
1237 1242 return True
1238 1243 elif a == b:
1239 1244 return True
1240 1245 elif a > b:
1241 1246 return False
1242 1247 return bool(self.reachableroots(a, [b], [a], includepath=False))
1243 1248
1244 1249 def reachableroots(self, minroot, heads, roots, includepath=False):
1245 1250 """return (heads(::<roots> and <roots>::<heads>))
1246 1251
1247 1252 If includepath is True, return (<roots>::<heads>)."""
1248 1253 try:
1249 1254 return self.index.reachableroots2(
1250 1255 minroot, heads, roots, includepath
1251 1256 )
1252 1257 except AttributeError:
1253 1258 return dagop._reachablerootspure(
1254 1259 self.parentrevs, minroot, roots, heads, includepath
1255 1260 )
1256 1261
1257 1262 def ancestor(self, a, b):
1258 1263 """calculate the "best" common ancestor of nodes a and b"""
1259 1264
1260 1265 a, b = self.rev(a), self.rev(b)
1261 1266 try:
1262 1267 ancs = self.index.ancestors(a, b)
1263 1268 except (AttributeError, OverflowError):
1264 1269 ancs = ancestor.ancestors(self.parentrevs, a, b)
1265 1270 if ancs:
1266 1271 # choose a consistent winner when there's a tie
1267 1272 return min(map(self.node, ancs))
1268 1273 return nullid
1269 1274
1270 1275 def _match(self, id):
1271 1276 if isinstance(id, int):
1272 1277 # rev
1273 1278 return self.node(id)
1274 1279 if len(id) == 20:
1275 1280 # possibly a binary node
1276 1281 # odds of a binary node being all hex in ASCII are 1 in 10**25
1277 1282 try:
1278 1283 node = id
1279 1284 self.rev(node) # quick search the index
1280 1285 return node
1281 1286 except error.LookupError:
1282 1287 pass # may be partial hex id
1283 1288 try:
1284 1289 # str(rev)
1285 1290 rev = int(id)
1286 1291 if b"%d" % rev != id:
1287 1292 raise ValueError
1288 1293 if rev < 0:
1289 1294 rev = len(self) + rev
1290 1295 if rev < 0 or rev >= len(self):
1291 1296 raise ValueError
1292 1297 return self.node(rev)
1293 1298 except (ValueError, OverflowError):
1294 1299 pass
1295 1300 if len(id) == 40:
1296 1301 try:
1297 1302 # a full hex nodeid?
1298 1303 node = bin(id)
1299 1304 self.rev(node)
1300 1305 return node
1301 1306 except (TypeError, error.LookupError):
1302 1307 pass
1303 1308
1304 1309 def _partialmatch(self, id):
1305 1310 # we don't care wdirfilenodeids as they should be always full hash
1306 1311 maybewdir = wdirhex.startswith(id)
1307 1312 try:
1308 1313 partial = self.index.partialmatch(id)
1309 1314 if partial and self.hasnode(partial):
1310 1315 if maybewdir:
1311 1316 # single 'ff...' match in radix tree, ambiguous with wdir
1312 1317 raise error.RevlogError
1313 1318 return partial
1314 1319 if maybewdir:
1315 1320 # no 'ff...' match in radix tree, wdir identified
1316 1321 raise error.WdirUnsupported
1317 1322 return None
1318 1323 except error.RevlogError:
1319 1324 # parsers.c radix tree lookup gave multiple matches
1320 1325 # fast path: for unfiltered changelog, radix tree is accurate
1321 1326 if not getattr(self, 'filteredrevs', None):
1322 1327 raise error.AmbiguousPrefixLookupError(
1323 1328 id, self.indexfile, _(b'ambiguous identifier')
1324 1329 )
1325 1330 # fall through to slow path that filters hidden revisions
1326 1331 except (AttributeError, ValueError):
1327 1332 # we are pure python, or key was too short to search radix tree
1328 1333 pass
1329 1334
1330 1335 if id in self._pcache:
1331 1336 return self._pcache[id]
1332 1337
1333 1338 if len(id) <= 40:
1334 1339 try:
1335 1340 # hex(node)[:...]
1336 1341 l = len(id) // 2 # grab an even number of digits
1337 1342 prefix = bin(id[: l * 2])
1338 1343 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1339 1344 nl = [
1340 1345 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1341 1346 ]
1342 1347 if nullhex.startswith(id):
1343 1348 nl.append(nullid)
1344 1349 if len(nl) > 0:
1345 1350 if len(nl) == 1 and not maybewdir:
1346 1351 self._pcache[id] = nl[0]
1347 1352 return nl[0]
1348 1353 raise error.AmbiguousPrefixLookupError(
1349 1354 id, self.indexfile, _(b'ambiguous identifier')
1350 1355 )
1351 1356 if maybewdir:
1352 1357 raise error.WdirUnsupported
1353 1358 return None
1354 1359 except TypeError:
1355 1360 pass
1356 1361
1357 1362 def lookup(self, id):
1358 1363 """locate a node based on:
1359 1364 - revision number or str(revision number)
1360 1365 - nodeid or subset of hex nodeid
1361 1366 """
1362 1367 n = self._match(id)
1363 1368 if n is not None:
1364 1369 return n
1365 1370 n = self._partialmatch(id)
1366 1371 if n:
1367 1372 return n
1368 1373
1369 1374 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1370 1375
1371 1376 def shortest(self, node, minlength=1):
1372 1377 """Find the shortest unambiguous prefix that matches node."""
1373 1378
1374 1379 def isvalid(prefix):
1375 1380 try:
1376 1381 matchednode = self._partialmatch(prefix)
1377 1382 except error.AmbiguousPrefixLookupError:
1378 1383 return False
1379 1384 except error.WdirUnsupported:
1380 1385 # single 'ff...' match
1381 1386 return True
1382 1387 if matchednode is None:
1383 1388 raise error.LookupError(node, self.indexfile, _(b'no node'))
1384 1389 return True
1385 1390
1386 1391 def maybewdir(prefix):
1387 1392 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1388 1393
1389 1394 hexnode = hex(node)
1390 1395
1391 1396 def disambiguate(hexnode, minlength):
1392 1397 """Disambiguate against wdirid."""
1393 1398 for length in range(minlength, 41):
1394 1399 prefix = hexnode[:length]
1395 1400 if not maybewdir(prefix):
1396 1401 return prefix
1397 1402
1398 1403 if not getattr(self, 'filteredrevs', None):
1399 1404 try:
1400 1405 length = max(self.index.shortest(node), minlength)
1401 1406 return disambiguate(hexnode, length)
1402 1407 except error.RevlogError:
1403 1408 if node != wdirid:
1404 1409 raise error.LookupError(node, self.indexfile, _(b'no node'))
1405 1410 except AttributeError:
1406 1411 # Fall through to pure code
1407 1412 pass
1408 1413
1409 1414 if node == wdirid:
1410 1415 for length in range(minlength, 41):
1411 1416 prefix = hexnode[:length]
1412 1417 if isvalid(prefix):
1413 1418 return prefix
1414 1419
1415 1420 for length in range(minlength, 41):
1416 1421 prefix = hexnode[:length]
1417 1422 if isvalid(prefix):
1418 1423 return disambiguate(hexnode, length)
1419 1424
1420 1425 def cmp(self, node, text):
1421 1426 """compare text with a given file revision
1422 1427
1423 1428 returns True if text is different than what is stored.
1424 1429 """
1425 1430 p1, p2 = self.parents(node)
1426 1431 return storageutil.hashrevisionsha1(text, p1, p2) != node
1427 1432
1428 1433 def _cachesegment(self, offset, data):
1429 1434 """Add a segment to the revlog cache.
1430 1435
1431 1436 Accepts an absolute offset and the data that is at that location.
1432 1437 """
1433 1438 o, d = self._chunkcache
1434 1439 # try to add to existing cache
1435 1440 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1436 1441 self._chunkcache = o, d + data
1437 1442 else:
1438 1443 self._chunkcache = offset, data
1439 1444
1440 1445 def _readsegment(self, offset, length, df=None):
1441 1446 """Load a segment of raw data from the revlog.
1442 1447
1443 1448 Accepts an absolute offset, length to read, and an optional existing
1444 1449 file handle to read from.
1445 1450
1446 1451 If an existing file handle is passed, it will be seeked and the
1447 1452 original seek position will NOT be restored.
1448 1453
1449 1454 Returns a str or buffer of raw byte data.
1450 1455
1451 1456 Raises if the requested number of bytes could not be read.
1452 1457 """
1453 1458 # Cache data both forward and backward around the requested
1454 1459 # data, in a fixed size window. This helps speed up operations
1455 1460 # involving reading the revlog backwards.
1456 1461 cachesize = self._chunkcachesize
1457 1462 realoffset = offset & ~(cachesize - 1)
1458 1463 reallength = (
1459 1464 (offset + length + cachesize) & ~(cachesize - 1)
1460 1465 ) - realoffset
1461 1466 with self._datareadfp(df) as df:
1462 1467 df.seek(realoffset)
1463 1468 d = df.read(reallength)
1464 1469
1465 1470 self._cachesegment(realoffset, d)
1466 1471 if offset != realoffset or reallength != length:
1467 1472 startoffset = offset - realoffset
1468 1473 if len(d) - startoffset < length:
1469 1474 raise error.RevlogError(
1470 1475 _(
1471 1476 b'partial read of revlog %s; expected %d bytes from '
1472 1477 b'offset %d, got %d'
1473 1478 )
1474 1479 % (
1475 1480 self.indexfile if self._inline else self.datafile,
1476 1481 length,
1477 1482 realoffset,
1478 1483 len(d) - startoffset,
1479 1484 )
1480 1485 )
1481 1486
1482 1487 return util.buffer(d, startoffset, length)
1483 1488
1484 1489 if len(d) < length:
1485 1490 raise error.RevlogError(
1486 1491 _(
1487 1492 b'partial read of revlog %s; expected %d bytes from offset '
1488 1493 b'%d, got %d'
1489 1494 )
1490 1495 % (
1491 1496 self.indexfile if self._inline else self.datafile,
1492 1497 length,
1493 1498 offset,
1494 1499 len(d),
1495 1500 )
1496 1501 )
1497 1502
1498 1503 return d
1499 1504
1500 1505 def _getsegment(self, offset, length, df=None):
1501 1506 """Obtain a segment of raw data from the revlog.
1502 1507
1503 1508 Accepts an absolute offset, length of bytes to obtain, and an
1504 1509 optional file handle to the already-opened revlog. If the file
1505 1510 handle is used, it's original seek position will not be preserved.
1506 1511
1507 1512 Requests for data may be returned from a cache.
1508 1513
1509 1514 Returns a str or a buffer instance of raw byte data.
1510 1515 """
1511 1516 o, d = self._chunkcache
1512 1517 l = len(d)
1513 1518
1514 1519 # is it in the cache?
1515 1520 cachestart = offset - o
1516 1521 cacheend = cachestart + length
1517 1522 if cachestart >= 0 and cacheend <= l:
1518 1523 if cachestart == 0 and cacheend == l:
1519 1524 return d # avoid a copy
1520 1525 return util.buffer(d, cachestart, cacheend - cachestart)
1521 1526
1522 1527 return self._readsegment(offset, length, df=df)
1523 1528
1524 1529 def _getsegmentforrevs(self, startrev, endrev, df=None):
1525 1530 """Obtain a segment of raw data corresponding to a range of revisions.
1526 1531
1527 1532 Accepts the start and end revisions and an optional already-open
1528 1533 file handle to be used for reading. If the file handle is read, its
1529 1534 seek position will not be preserved.
1530 1535
1531 1536 Requests for data may be satisfied by a cache.
1532 1537
1533 1538 Returns a 2-tuple of (offset, data) for the requested range of
1534 1539 revisions. Offset is the integer offset from the beginning of the
1535 1540 revlog and data is a str or buffer of the raw byte data.
1536 1541
1537 1542 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1538 1543 to determine where each revision's data begins and ends.
1539 1544 """
1540 1545 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1541 1546 # (functions are expensive).
1542 1547 index = self.index
1543 1548 istart = index[startrev]
1544 1549 start = int(istart[0] >> 16)
1545 1550 if startrev == endrev:
1546 1551 end = start + istart[1]
1547 1552 else:
1548 1553 iend = index[endrev]
1549 1554 end = int(iend[0] >> 16) + iend[1]
1550 1555
1551 1556 if self._inline:
1552 1557 start += (startrev + 1) * self._io.size
1553 1558 end += (endrev + 1) * self._io.size
1554 1559 length = end - start
1555 1560
1556 1561 return start, self._getsegment(start, length, df=df)
1557 1562
1558 1563 def _chunk(self, rev, df=None):
1559 1564 """Obtain a single decompressed chunk for a revision.
1560 1565
1561 1566 Accepts an integer revision and an optional already-open file handle
1562 1567 to be used for reading. If used, the seek position of the file will not
1563 1568 be preserved.
1564 1569
1565 1570 Returns a str holding uncompressed data for the requested revision.
1566 1571 """
1567 1572 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1568 1573
1569 1574 def _chunks(self, revs, df=None, targetsize=None):
1570 1575 """Obtain decompressed chunks for the specified revisions.
1571 1576
1572 1577 Accepts an iterable of numeric revisions that are assumed to be in
1573 1578 ascending order. Also accepts an optional already-open file handle
1574 1579 to be used for reading. If used, the seek position of the file will
1575 1580 not be preserved.
1576 1581
1577 1582 This function is similar to calling ``self._chunk()`` multiple times,
1578 1583 but is faster.
1579 1584
1580 1585 Returns a list with decompressed data for each requested revision.
1581 1586 """
1582 1587 if not revs:
1583 1588 return []
1584 1589 start = self.start
1585 1590 length = self.length
1586 1591 inline = self._inline
1587 1592 iosize = self._io.size
1588 1593 buffer = util.buffer
1589 1594
1590 1595 l = []
1591 1596 ladd = l.append
1592 1597
1593 1598 if not self._withsparseread:
1594 1599 slicedchunks = (revs,)
1595 1600 else:
1596 1601 slicedchunks = deltautil.slicechunk(
1597 1602 self, revs, targetsize=targetsize
1598 1603 )
1599 1604
1600 1605 for revschunk in slicedchunks:
1601 1606 firstrev = revschunk[0]
1602 1607 # Skip trailing revisions with empty diff
1603 1608 for lastrev in revschunk[::-1]:
1604 1609 if length(lastrev) != 0:
1605 1610 break
1606 1611
1607 1612 try:
1608 1613 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1609 1614 except OverflowError:
1610 1615 # issue4215 - we can't cache a run of chunks greater than
1611 1616 # 2G on Windows
1612 1617 return [self._chunk(rev, df=df) for rev in revschunk]
1613 1618
1614 1619 decomp = self.decompress
1615 1620 for rev in revschunk:
1616 1621 chunkstart = start(rev)
1617 1622 if inline:
1618 1623 chunkstart += (rev + 1) * iosize
1619 1624 chunklength = length(rev)
1620 1625 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1621 1626
1622 1627 return l
1623 1628
1624 1629 def _chunkclear(self):
1625 1630 """Clear the raw chunk cache."""
1626 1631 self._chunkcache = (0, b'')
1627 1632
1628 1633 def deltaparent(self, rev):
1629 1634 """return deltaparent of the given revision"""
1630 1635 base = self.index[rev][3]
1631 1636 if base == rev:
1632 1637 return nullrev
1633 1638 elif self._generaldelta:
1634 1639 return base
1635 1640 else:
1636 1641 return rev - 1
1637 1642
1638 1643 def issnapshot(self, rev):
1639 1644 """tells whether rev is a snapshot
1640 1645 """
1641 1646 if not self._sparserevlog:
1642 1647 return self.deltaparent(rev) == nullrev
1643 1648 elif util.safehasattr(self.index, b'issnapshot'):
1644 1649 # directly assign the method to cache the testing and access
1645 1650 self.issnapshot = self.index.issnapshot
1646 1651 return self.issnapshot(rev)
1647 1652 if rev == nullrev:
1648 1653 return True
1649 1654 entry = self.index[rev]
1650 1655 base = entry[3]
1651 1656 if base == rev:
1652 1657 return True
1653 1658 if base == nullrev:
1654 1659 return True
1655 1660 p1 = entry[5]
1656 1661 p2 = entry[6]
1657 1662 if base == p1 or base == p2:
1658 1663 return False
1659 1664 return self.issnapshot(base)
1660 1665
1661 1666 def snapshotdepth(self, rev):
1662 1667 """number of snapshot in the chain before this one"""
1663 1668 if not self.issnapshot(rev):
1664 1669 raise error.ProgrammingError(b'revision %d not a snapshot')
1665 1670 return len(self._deltachain(rev)[0]) - 1
1666 1671
1667 1672 def revdiff(self, rev1, rev2):
1668 1673 """return or calculate a delta between two revisions
1669 1674
1670 1675 The delta calculated is in binary form and is intended to be written to
1671 1676 revlog data directly. So this function needs raw revision data.
1672 1677 """
1673 1678 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1674 1679 return bytes(self._chunk(rev2))
1675 1680
1676 1681 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1677 1682
1678 1683 def _processflags(self, text, flags, operation, raw=False):
1679 1684 """deprecated entry point to access flag processors"""
1680 1685 msg = b'_processflag(...) use the specialized variant'
1681 1686 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1682 1687 if raw:
1683 1688 return text, flagutil.processflagsraw(self, text, flags)
1684 1689 elif operation == b'read':
1685 1690 return flagutil.processflagsread(self, text, flags)
1686 1691 else: # write operation
1687 1692 return flagutil.processflagswrite(self, text, flags)
1688 1693
1689 1694 def revision(self, nodeorrev, _df=None, raw=False):
1690 1695 """return an uncompressed revision of a given node or revision
1691 1696 number.
1692 1697
1693 1698 _df - an existing file handle to read from. (internal-only)
1694 1699 raw - an optional argument specifying if the revision data is to be
1695 1700 treated as raw data when applying flag transforms. 'raw' should be set
1696 1701 to True when generating changegroups or in debug commands.
1697 1702 """
1698 1703 if raw:
1699 1704 msg = (
1700 1705 b'revlog.revision(..., raw=True) is deprecated, '
1701 1706 b'use revlog.rawdata(...)'
1702 1707 )
1703 1708 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1704 1709 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1705 1710
1706 1711 def sidedata(self, nodeorrev, _df=None):
1707 1712 """a map of extra data related to the changeset but not part of the hash
1708 1713
1709 1714 This function currently return a dictionary. However, more advanced
1710 1715 mapping object will likely be used in the future for a more
1711 1716 efficient/lazy code.
1712 1717 """
1713 1718 return self._revisiondata(nodeorrev, _df)[1]
1714 1719
1715 1720 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1716 1721 # deal with <nodeorrev> argument type
1717 1722 if isinstance(nodeorrev, int):
1718 1723 rev = nodeorrev
1719 1724 node = self.node(rev)
1720 1725 else:
1721 1726 node = nodeorrev
1722 1727 rev = None
1723 1728
1724 1729 # fast path the special `nullid` rev
1725 1730 if node == nullid:
1726 1731 return b"", {}
1727 1732
1728 1733 # The text as stored inside the revlog. Might be the revision or might
1729 1734 # need to be processed to retrieve the revision.
1730 1735 rawtext = None
1731 1736
1732 1737 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1733 1738
1734 1739 if raw and validated:
1735 1740 # if we don't want to process the raw text and that raw
1736 1741 # text is cached, we can exit early.
1737 1742 return rawtext, {}
1738 1743 if rev is None:
1739 1744 rev = self.rev(node)
1740 1745 # the revlog's flag for this revision
1741 1746 # (usually alter its state or content)
1742 1747 flags = self.flags(rev)
1743 1748
1744 1749 if validated and flags == REVIDX_DEFAULT_FLAGS:
1745 1750 # no extra flags set, no flag processor runs, text = rawtext
1746 1751 return rawtext, {}
1747 1752
1748 1753 sidedata = {}
1749 1754 if raw:
1750 1755 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1751 1756 text = rawtext
1752 1757 else:
1753 1758 try:
1754 1759 r = flagutil.processflagsread(self, rawtext, flags)
1755 1760 except error.SidedataHashError as exc:
1756 1761 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1757 1762 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1758 1763 raise error.RevlogError(msg)
1759 1764 text, validatehash, sidedata = r
1760 1765 if validatehash:
1761 1766 self.checkhash(text, node, rev=rev)
1762 1767 if not validated:
1763 1768 self._revisioncache = (node, rev, rawtext)
1764 1769
1765 1770 return text, sidedata
1766 1771
1767 1772 def _rawtext(self, node, rev, _df=None):
1768 1773 """return the possibly unvalidated rawtext for a revision
1769 1774
1770 1775 returns (rev, rawtext, validated)
1771 1776 """
1772 1777
1773 1778 # revision in the cache (could be useful to apply delta)
1774 1779 cachedrev = None
1775 1780 # An intermediate text to apply deltas to
1776 1781 basetext = None
1777 1782
1778 1783 # Check if we have the entry in cache
1779 1784 # The cache entry looks like (node, rev, rawtext)
1780 1785 if self._revisioncache:
1781 1786 if self._revisioncache[0] == node:
1782 1787 return (rev, self._revisioncache[2], True)
1783 1788 cachedrev = self._revisioncache[1]
1784 1789
1785 1790 if rev is None:
1786 1791 rev = self.rev(node)
1787 1792
1788 1793 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1789 1794 if stopped:
1790 1795 basetext = self._revisioncache[2]
1791 1796
1792 1797 # drop cache to save memory, the caller is expected to
1793 1798 # update self._revisioncache after validating the text
1794 1799 self._revisioncache = None
1795 1800
1796 1801 targetsize = None
1797 1802 rawsize = self.index[rev][2]
1798 1803 if 0 <= rawsize:
1799 1804 targetsize = 4 * rawsize
1800 1805
1801 1806 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1802 1807 if basetext is None:
1803 1808 basetext = bytes(bins[0])
1804 1809 bins = bins[1:]
1805 1810
1806 1811 rawtext = mdiff.patches(basetext, bins)
1807 1812 del basetext # let us have a chance to free memory early
1808 1813 return (rev, rawtext, False)
1809 1814
1810 1815 def rawdata(self, nodeorrev, _df=None):
1811 1816 """return an uncompressed raw data of a given node or revision number.
1812 1817
1813 1818 _df - an existing file handle to read from. (internal-only)
1814 1819 """
1815 1820 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1816 1821
1817 1822 def hash(self, text, p1, p2):
1818 1823 """Compute a node hash.
1819 1824
1820 1825 Available as a function so that subclasses can replace the hash
1821 1826 as needed.
1822 1827 """
1823 1828 return storageutil.hashrevisionsha1(text, p1, p2)
1824 1829
1825 1830 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1826 1831 """Check node hash integrity.
1827 1832
1828 1833 Available as a function so that subclasses can extend hash mismatch
1829 1834 behaviors as needed.
1830 1835 """
1831 1836 try:
1832 1837 if p1 is None and p2 is None:
1833 1838 p1, p2 = self.parents(node)
1834 1839 if node != self.hash(text, p1, p2):
1835 1840 # Clear the revision cache on hash failure. The revision cache
1836 1841 # only stores the raw revision and clearing the cache does have
1837 1842 # the side-effect that we won't have a cache hit when the raw
1838 1843 # revision data is accessed. But this case should be rare and
1839 1844 # it is extra work to teach the cache about the hash
1840 1845 # verification state.
1841 1846 if self._revisioncache and self._revisioncache[0] == node:
1842 1847 self._revisioncache = None
1843 1848
1844 1849 revornode = rev
1845 1850 if revornode is None:
1846 1851 revornode = templatefilters.short(hex(node))
1847 1852 raise error.RevlogError(
1848 1853 _(b"integrity check failed on %s:%s")
1849 1854 % (self.indexfile, pycompat.bytestr(revornode))
1850 1855 )
1851 1856 except error.RevlogError:
1852 1857 if self._censorable and storageutil.iscensoredtext(text):
1853 1858 raise error.CensoredNodeError(self.indexfile, node, text)
1854 1859 raise
1855 1860
1856 1861 def _enforceinlinesize(self, tr, fp=None):
1857 1862 """Check if the revlog is too big for inline and convert if so.
1858 1863
1859 1864 This should be called after revisions are added to the revlog. If the
1860 1865 revlog has grown too large to be an inline revlog, it will convert it
1861 1866 to use multiple index and data files.
1862 1867 """
1863 1868 tiprev = len(self) - 1
1864 1869 if (
1865 1870 not self._inline
1866 1871 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1867 1872 ):
1868 1873 return
1869 1874
1870 1875 trinfo = tr.find(self.indexfile)
1871 1876 if trinfo is None:
1872 1877 raise error.RevlogError(
1873 1878 _(b"%s not found in the transaction") % self.indexfile
1874 1879 )
1875 1880
1876 1881 trindex = trinfo[2]
1877 1882 if trindex is not None:
1878 1883 dataoff = self.start(trindex)
1879 1884 else:
1880 1885 # revlog was stripped at start of transaction, use all leftover data
1881 1886 trindex = len(self) - 1
1882 1887 dataoff = self.end(tiprev)
1883 1888
1884 1889 tr.add(self.datafile, dataoff)
1885 1890
1886 1891 if fp:
1887 1892 fp.flush()
1888 1893 fp.close()
1889 1894 # We can't use the cached file handle after close(). So prevent
1890 1895 # its usage.
1891 1896 self._writinghandles = None
1892 1897
1893 1898 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1894 1899 for r in self:
1895 1900 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1896 1901
1897 1902 with self._indexfp(b'w') as fp:
1898 1903 self.version &= ~FLAG_INLINE_DATA
1899 1904 self._inline = False
1900 1905 io = self._io
1901 1906 for i in self:
1902 1907 e = io.packentry(self.index[i], self.node, self.version, i)
1903 1908 fp.write(e)
1904 1909
1905 1910 # the temp file replace the real index when we exit the context
1906 1911 # manager
1907 1912
1908 1913 tr.replace(self.indexfile, trindex * self._io.size)
1909 1914 self._chunkclear()
1910 1915
1911 1916 def _nodeduplicatecallback(self, transaction, node):
1912 1917 """called when trying to add a node already stored.
1913 1918 """
1914 1919
1915 1920 def addrevision(
1916 1921 self,
1917 1922 text,
1918 1923 transaction,
1919 1924 link,
1920 1925 p1,
1921 1926 p2,
1922 1927 cachedelta=None,
1923 1928 node=None,
1924 1929 flags=REVIDX_DEFAULT_FLAGS,
1925 1930 deltacomputer=None,
1926 1931 sidedata=None,
1927 1932 ):
1928 1933 """add a revision to the log
1929 1934
1930 1935 text - the revision data to add
1931 1936 transaction - the transaction object used for rollback
1932 1937 link - the linkrev data to add
1933 1938 p1, p2 - the parent nodeids of the revision
1934 1939 cachedelta - an optional precomputed delta
1935 1940 node - nodeid of revision; typically node is not specified, and it is
1936 1941 computed by default as hash(text, p1, p2), however subclasses might
1937 1942 use different hashing method (and override checkhash() in such case)
1938 1943 flags - the known flags to set on the revision
1939 1944 deltacomputer - an optional deltacomputer instance shared between
1940 1945 multiple calls
1941 1946 """
1942 1947 if link == nullrev:
1943 1948 raise error.RevlogError(
1944 1949 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1945 1950 )
1946 1951
1947 1952 if sidedata is None:
1948 1953 sidedata = {}
1949 1954 flags = flags & ~REVIDX_SIDEDATA
1950 1955 elif not self.hassidedata:
1951 1956 raise error.ProgrammingError(
1952 1957 _(b"trying to add sidedata to a revlog who don't support them")
1953 1958 )
1954 1959 else:
1955 1960 flags |= REVIDX_SIDEDATA
1956 1961
1957 1962 if flags:
1958 1963 node = node or self.hash(text, p1, p2)
1959 1964
1960 1965 rawtext, validatehash = flagutil.processflagswrite(
1961 1966 self, text, flags, sidedata=sidedata
1962 1967 )
1963 1968
1964 1969 # If the flag processor modifies the revision data, ignore any provided
1965 1970 # cachedelta.
1966 1971 if rawtext != text:
1967 1972 cachedelta = None
1968 1973
1969 1974 if len(rawtext) > _maxentrysize:
1970 1975 raise error.RevlogError(
1971 1976 _(
1972 1977 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1973 1978 )
1974 1979 % (self.indexfile, len(rawtext))
1975 1980 )
1976 1981
1977 1982 node = node or self.hash(rawtext, p1, p2)
1978 1983 if node in self.nodemap:
1979 1984 return node
1980 1985
1981 1986 if validatehash:
1982 1987 self.checkhash(rawtext, node, p1=p1, p2=p2)
1983 1988
1984 1989 return self.addrawrevision(
1985 1990 rawtext,
1986 1991 transaction,
1987 1992 link,
1988 1993 p1,
1989 1994 p2,
1990 1995 node,
1991 1996 flags,
1992 1997 cachedelta=cachedelta,
1993 1998 deltacomputer=deltacomputer,
1994 1999 )
1995 2000
1996 2001 def addrawrevision(
1997 2002 self,
1998 2003 rawtext,
1999 2004 transaction,
2000 2005 link,
2001 2006 p1,
2002 2007 p2,
2003 2008 node,
2004 2009 flags,
2005 2010 cachedelta=None,
2006 2011 deltacomputer=None,
2007 2012 ):
2008 2013 """add a raw revision with known flags, node and parents
2009 2014 useful when reusing a revision not stored in this revlog (ex: received
2010 2015 over wire, or read from an external bundle).
2011 2016 """
2012 2017 dfh = None
2013 2018 if not self._inline:
2014 2019 dfh = self._datafp(b"a+")
2015 2020 ifh = self._indexfp(b"a+")
2016 2021 try:
2017 2022 return self._addrevision(
2018 2023 node,
2019 2024 rawtext,
2020 2025 transaction,
2021 2026 link,
2022 2027 p1,
2023 2028 p2,
2024 2029 flags,
2025 2030 cachedelta,
2026 2031 ifh,
2027 2032 dfh,
2028 2033 deltacomputer=deltacomputer,
2029 2034 )
2030 2035 finally:
2031 2036 if dfh:
2032 2037 dfh.close()
2033 2038 ifh.close()
2034 2039
2035 2040 def compress(self, data):
2036 2041 """Generate a possibly-compressed representation of data."""
2037 2042 if not data:
2038 2043 return b'', data
2039 2044
2040 2045 compressed = self._compressor.compress(data)
2041 2046
2042 2047 if compressed:
2043 2048 # The revlog compressor added the header in the returned data.
2044 2049 return b'', compressed
2045 2050
2046 2051 if data[0:1] == b'\0':
2047 2052 return b'', data
2048 2053 return b'u', data
2049 2054
2050 2055 def decompress(self, data):
2051 2056 """Decompress a revlog chunk.
2052 2057
2053 2058 The chunk is expected to begin with a header identifying the
2054 2059 format type so it can be routed to an appropriate decompressor.
2055 2060 """
2056 2061 if not data:
2057 2062 return data
2058 2063
2059 2064 # Revlogs are read much more frequently than they are written and many
2060 2065 # chunks only take microseconds to decompress, so performance is
2061 2066 # important here.
2062 2067 #
2063 2068 # We can make a few assumptions about revlogs:
2064 2069 #
2065 2070 # 1) the majority of chunks will be compressed (as opposed to inline
2066 2071 # raw data).
2067 2072 # 2) decompressing *any* data will likely by at least 10x slower than
2068 2073 # returning raw inline data.
2069 2074 # 3) we want to prioritize common and officially supported compression
2070 2075 # engines
2071 2076 #
2072 2077 # It follows that we want to optimize for "decompress compressed data
2073 2078 # when encoded with common and officially supported compression engines"
2074 2079 # case over "raw data" and "data encoded by less common or non-official
2075 2080 # compression engines." That is why we have the inline lookup first
2076 2081 # followed by the compengines lookup.
2077 2082 #
2078 2083 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2079 2084 # compressed chunks. And this matters for changelog and manifest reads.
2080 2085 t = data[0:1]
2081 2086
2082 2087 if t == b'x':
2083 2088 try:
2084 2089 return _zlibdecompress(data)
2085 2090 except zlib.error as e:
2086 2091 raise error.RevlogError(
2087 2092 _(b'revlog decompress error: %s')
2088 2093 % stringutil.forcebytestr(e)
2089 2094 )
2090 2095 # '\0' is more common than 'u' so it goes first.
2091 2096 elif t == b'\0':
2092 2097 return data
2093 2098 elif t == b'u':
2094 2099 return util.buffer(data, 1)
2095 2100
2096 2101 try:
2097 2102 compressor = self._decompressors[t]
2098 2103 except KeyError:
2099 2104 try:
2100 2105 engine = util.compengines.forrevlogheader(t)
2101 2106 compressor = engine.revlogcompressor(self._compengineopts)
2102 2107 self._decompressors[t] = compressor
2103 2108 except KeyError:
2104 2109 raise error.RevlogError(_(b'unknown compression type %r') % t)
2105 2110
2106 2111 return compressor.decompress(data)
2107 2112
2108 2113 def _addrevision(
2109 2114 self,
2110 2115 node,
2111 2116 rawtext,
2112 2117 transaction,
2113 2118 link,
2114 2119 p1,
2115 2120 p2,
2116 2121 flags,
2117 2122 cachedelta,
2118 2123 ifh,
2119 2124 dfh,
2120 2125 alwayscache=False,
2121 2126 deltacomputer=None,
2122 2127 ):
2123 2128 """internal function to add revisions to the log
2124 2129
2125 2130 see addrevision for argument descriptions.
2126 2131
2127 2132 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2128 2133
2129 2134 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2130 2135 be used.
2131 2136
2132 2137 invariants:
2133 2138 - rawtext is optional (can be None); if not set, cachedelta must be set.
2134 2139 if both are set, they must correspond to each other.
2135 2140 """
2136 2141 if node == nullid:
2137 2142 raise error.RevlogError(
2138 2143 _(b"%s: attempt to add null revision") % self.indexfile
2139 2144 )
2140 2145 if node == wdirid or node in wdirfilenodeids:
2141 2146 raise error.RevlogError(
2142 2147 _(b"%s: attempt to add wdir revision") % self.indexfile
2143 2148 )
2144 2149
2145 2150 if self._inline:
2146 2151 fh = ifh
2147 2152 else:
2148 2153 fh = dfh
2149 2154
2150 2155 btext = [rawtext]
2151 2156
2152 2157 curr = len(self)
2153 2158 prev = curr - 1
2154 2159 offset = self.end(prev)
2155 2160 p1r, p2r = self.rev(p1), self.rev(p2)
2156 2161
2157 2162 # full versions are inserted when the needed deltas
2158 2163 # become comparable to the uncompressed text
2159 2164 if rawtext is None:
2160 2165 # need rawtext size, before changed by flag processors, which is
2161 2166 # the non-raw size. use revlog explicitly to avoid filelog's extra
2162 2167 # logic that might remove metadata size.
2163 2168 textlen = mdiff.patchedsize(
2164 2169 revlog.size(self, cachedelta[0]), cachedelta[1]
2165 2170 )
2166 2171 else:
2167 2172 textlen = len(rawtext)
2168 2173
2169 2174 if deltacomputer is None:
2170 2175 deltacomputer = deltautil.deltacomputer(self)
2171 2176
2172 2177 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2173 2178
2174 2179 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2175 2180
2176 2181 e = (
2177 2182 offset_type(offset, flags),
2178 2183 deltainfo.deltalen,
2179 2184 textlen,
2180 2185 deltainfo.base,
2181 2186 link,
2182 2187 p1r,
2183 2188 p2r,
2184 2189 node,
2185 2190 )
2186 2191 self.index.append(e)
2187 2192 self.nodemap[node] = curr
2188 2193
2189 2194 # Reset the pure node cache start lookup offset to account for new
2190 2195 # revision.
2191 2196 if self._nodepos is not None:
2192 2197 self._nodepos = curr
2193 2198
2194 2199 entry = self._io.packentry(e, self.node, self.version, curr)
2195 2200 self._writeentry(
2196 2201 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2197 2202 )
2198 2203
2199 2204 rawtext = btext[0]
2200 2205
2201 2206 if alwayscache and rawtext is None:
2202 2207 rawtext = deltacomputer.buildtext(revinfo, fh)
2203 2208
2204 2209 if type(rawtext) == bytes: # only accept immutable objects
2205 2210 self._revisioncache = (node, curr, rawtext)
2206 2211 self._chainbasecache[curr] = deltainfo.chainbase
2207 2212 return node
2208 2213
2209 2214 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2210 2215 # Files opened in a+ mode have inconsistent behavior on various
2211 2216 # platforms. Windows requires that a file positioning call be made
2212 2217 # when the file handle transitions between reads and writes. See
2213 2218 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2214 2219 # platforms, Python or the platform itself can be buggy. Some versions
2215 2220 # of Solaris have been observed to not append at the end of the file
2216 2221 # if the file was seeked to before the end. See issue4943 for more.
2217 2222 #
2218 2223 # We work around this issue by inserting a seek() before writing.
2219 2224 # Note: This is likely not necessary on Python 3. However, because
2220 2225 # the file handle is reused for reads and may be seeked there, we need
2221 2226 # to be careful before changing this.
2222 2227 ifh.seek(0, os.SEEK_END)
2223 2228 if dfh:
2224 2229 dfh.seek(0, os.SEEK_END)
2225 2230
2226 2231 curr = len(self) - 1
2227 2232 if not self._inline:
2228 2233 transaction.add(self.datafile, offset)
2229 2234 transaction.add(self.indexfile, curr * len(entry))
2230 2235 if data[0]:
2231 2236 dfh.write(data[0])
2232 2237 dfh.write(data[1])
2233 2238 ifh.write(entry)
2234 2239 else:
2235 2240 offset += curr * self._io.size
2236 2241 transaction.add(self.indexfile, offset, curr)
2237 2242 ifh.write(entry)
2238 2243 ifh.write(data[0])
2239 2244 ifh.write(data[1])
2240 2245 self._enforceinlinesize(transaction, ifh)
2241 2246
2242 2247 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2243 2248 """
2244 2249 add a delta group
2245 2250
2246 2251 given a set of deltas, add them to the revision log. the
2247 2252 first delta is against its parent, which should be in our
2248 2253 log, the rest are against the previous delta.
2249 2254
2250 2255 If ``addrevisioncb`` is defined, it will be called with arguments of
2251 2256 this revlog and the node that was added.
2252 2257 """
2253 2258
2254 2259 if self._writinghandles:
2255 2260 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2256 2261
2257 2262 nodes = []
2258 2263
2259 2264 r = len(self)
2260 2265 end = 0
2261 2266 if r:
2262 2267 end = self.end(r - 1)
2263 2268 ifh = self._indexfp(b"a+")
2264 2269 isize = r * self._io.size
2265 2270 if self._inline:
2266 2271 transaction.add(self.indexfile, end + isize, r)
2267 2272 dfh = None
2268 2273 else:
2269 2274 transaction.add(self.indexfile, isize, r)
2270 2275 transaction.add(self.datafile, end)
2271 2276 dfh = self._datafp(b"a+")
2272 2277
2273 2278 def flush():
2274 2279 if dfh:
2275 2280 dfh.flush()
2276 2281 ifh.flush()
2277 2282
2278 2283 self._writinghandles = (ifh, dfh)
2279 2284
2280 2285 try:
2281 2286 deltacomputer = deltautil.deltacomputer(self)
2282 2287 # loop through our set of deltas
2283 2288 for data in deltas:
2284 2289 node, p1, p2, linknode, deltabase, delta, flags = data
2285 2290 link = linkmapper(linknode)
2286 2291 flags = flags or REVIDX_DEFAULT_FLAGS
2287 2292
2288 2293 nodes.append(node)
2289 2294
2290 2295 if node in self.nodemap:
2291 2296 self._nodeduplicatecallback(transaction, node)
2292 2297 # this can happen if two branches make the same change
2293 2298 continue
2294 2299
2295 2300 for p in (p1, p2):
2296 2301 if p not in self.nodemap:
2297 2302 raise error.LookupError(
2298 2303 p, self.indexfile, _(b'unknown parent')
2299 2304 )
2300 2305
2301 2306 if deltabase not in self.nodemap:
2302 2307 raise error.LookupError(
2303 2308 deltabase, self.indexfile, _(b'unknown delta base')
2304 2309 )
2305 2310
2306 2311 baserev = self.rev(deltabase)
2307 2312
2308 2313 if baserev != nullrev and self.iscensored(baserev):
2309 2314 # if base is censored, delta must be full replacement in a
2310 2315 # single patch operation
2311 2316 hlen = struct.calcsize(b">lll")
2312 2317 oldlen = self.rawsize(baserev)
2313 2318 newlen = len(delta) - hlen
2314 2319 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2315 2320 raise error.CensoredBaseError(
2316 2321 self.indexfile, self.node(baserev)
2317 2322 )
2318 2323
2319 2324 if not flags and self._peek_iscensored(baserev, delta, flush):
2320 2325 flags |= REVIDX_ISCENSORED
2321 2326
2322 2327 # We assume consumers of addrevisioncb will want to retrieve
2323 2328 # the added revision, which will require a call to
2324 2329 # revision(). revision() will fast path if there is a cache
2325 2330 # hit. So, we tell _addrevision() to always cache in this case.
2326 2331 # We're only using addgroup() in the context of changegroup
2327 2332 # generation so the revision data can always be handled as raw
2328 2333 # by the flagprocessor.
2329 2334 self._addrevision(
2330 2335 node,
2331 2336 None,
2332 2337 transaction,
2333 2338 link,
2334 2339 p1,
2335 2340 p2,
2336 2341 flags,
2337 2342 (baserev, delta),
2338 2343 ifh,
2339 2344 dfh,
2340 2345 alwayscache=bool(addrevisioncb),
2341 2346 deltacomputer=deltacomputer,
2342 2347 )
2343 2348
2344 2349 if addrevisioncb:
2345 2350 addrevisioncb(self, node)
2346 2351
2347 2352 if not dfh and not self._inline:
2348 2353 # addrevision switched from inline to conventional
2349 2354 # reopen the index
2350 2355 ifh.close()
2351 2356 dfh = self._datafp(b"a+")
2352 2357 ifh = self._indexfp(b"a+")
2353 2358 self._writinghandles = (ifh, dfh)
2354 2359 finally:
2355 2360 self._writinghandles = None
2356 2361
2357 2362 if dfh:
2358 2363 dfh.close()
2359 2364 ifh.close()
2360 2365
2361 2366 return nodes
2362 2367
2363 2368 def iscensored(self, rev):
2364 2369 """Check if a file revision is censored."""
2365 2370 if not self._censorable:
2366 2371 return False
2367 2372
2368 2373 return self.flags(rev) & REVIDX_ISCENSORED
2369 2374
2370 2375 def _peek_iscensored(self, baserev, delta, flush):
2371 2376 """Quickly check if a delta produces a censored revision."""
2372 2377 if not self._censorable:
2373 2378 return False
2374 2379
2375 2380 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2376 2381
2377 2382 def getstrippoint(self, minlink):
2378 2383 """find the minimum rev that must be stripped to strip the linkrev
2379 2384
2380 2385 Returns a tuple containing the minimum rev and a set of all revs that
2381 2386 have linkrevs that will be broken by this strip.
2382 2387 """
2383 2388 return storageutil.resolvestripinfo(
2384 2389 minlink,
2385 2390 len(self) - 1,
2386 2391 self.headrevs(),
2387 2392 self.linkrev,
2388 2393 self.parentrevs,
2389 2394 )
2390 2395
2391 2396 def strip(self, minlink, transaction):
2392 2397 """truncate the revlog on the first revision with a linkrev >= minlink
2393 2398
2394 2399 This function is called when we're stripping revision minlink and
2395 2400 its descendants from the repository.
2396 2401
2397 2402 We have to remove all revisions with linkrev >= minlink, because
2398 2403 the equivalent changelog revisions will be renumbered after the
2399 2404 strip.
2400 2405
2401 2406 So we truncate the revlog on the first of these revisions, and
2402 2407 trust that the caller has saved the revisions that shouldn't be
2403 2408 removed and that it'll re-add them after this truncation.
2404 2409 """
2405 2410 if len(self) == 0:
2406 2411 return
2407 2412
2408 2413 rev, _ = self.getstrippoint(minlink)
2409 2414 if rev == len(self):
2410 2415 return
2411 2416
2412 2417 # first truncate the files on disk
2413 2418 end = self.start(rev)
2414 2419 if not self._inline:
2415 2420 transaction.add(self.datafile, end)
2416 2421 end = rev * self._io.size
2417 2422 else:
2418 2423 end += rev * self._io.size
2419 2424
2420 2425 transaction.add(self.indexfile, end)
2421 2426
2422 2427 # then reset internal state in memory to forget those revisions
2423 2428 self._revisioncache = None
2424 2429 self._chaininfocache = {}
2425 2430 self._chunkclear()
2426 2431 for x in pycompat.xrange(rev, len(self)):
2427 2432 del self.nodemap[self.node(x)]
2428 2433
2429 2434 del self.index[rev:-1]
2430 2435 self._nodepos = None
2431 2436
2432 2437 def checksize(self):
2433 2438 """Check size of index and data files
2434 2439
2435 2440 return a (dd, di) tuple.
2436 2441 - dd: extra bytes for the "data" file
2437 2442 - di: extra bytes for the "index" file
2438 2443
2439 2444 A healthy revlog will return (0, 0).
2440 2445 """
2441 2446 expected = 0
2442 2447 if len(self):
2443 2448 expected = max(0, self.end(len(self) - 1))
2444 2449
2445 2450 try:
2446 2451 with self._datafp() as f:
2447 2452 f.seek(0, io.SEEK_END)
2448 2453 actual = f.tell()
2449 2454 dd = actual - expected
2450 2455 except IOError as inst:
2451 2456 if inst.errno != errno.ENOENT:
2452 2457 raise
2453 2458 dd = 0
2454 2459
2455 2460 try:
2456 2461 f = self.opener(self.indexfile)
2457 2462 f.seek(0, io.SEEK_END)
2458 2463 actual = f.tell()
2459 2464 f.close()
2460 2465 s = self._io.size
2461 2466 i = max(0, actual // s)
2462 2467 di = actual - (i * s)
2463 2468 if self._inline:
2464 2469 databytes = 0
2465 2470 for r in self:
2466 2471 databytes += max(0, self.length(r))
2467 2472 dd = 0
2468 2473 di = actual - len(self) * s - databytes
2469 2474 except IOError as inst:
2470 2475 if inst.errno != errno.ENOENT:
2471 2476 raise
2472 2477 di = 0
2473 2478
2474 2479 return (dd, di)
2475 2480
2476 2481 def files(self):
2477 2482 res = [self.indexfile]
2478 2483 if not self._inline:
2479 2484 res.append(self.datafile)
2480 2485 return res
2481 2486
2482 2487 def emitrevisions(
2483 2488 self,
2484 2489 nodes,
2485 2490 nodesorder=None,
2486 2491 revisiondata=False,
2487 2492 assumehaveparentrevisions=False,
2488 2493 deltamode=repository.CG_DELTAMODE_STD,
2489 2494 ):
2490 2495 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2491 2496 raise error.ProgrammingError(
2492 2497 b'unhandled value for nodesorder: %s' % nodesorder
2493 2498 )
2494 2499
2495 2500 if nodesorder is None and not self._generaldelta:
2496 2501 nodesorder = b'storage'
2497 2502
2498 2503 if (
2499 2504 not self._storedeltachains
2500 2505 and deltamode != repository.CG_DELTAMODE_PREV
2501 2506 ):
2502 2507 deltamode = repository.CG_DELTAMODE_FULL
2503 2508
2504 2509 return storageutil.emitrevisions(
2505 2510 self,
2506 2511 nodes,
2507 2512 nodesorder,
2508 2513 revlogrevisiondelta,
2509 2514 deltaparentfn=self.deltaparent,
2510 2515 candeltafn=self.candelta,
2511 2516 rawsizefn=self.rawsize,
2512 2517 revdifffn=self.revdiff,
2513 2518 flagsfn=self.flags,
2514 2519 deltamode=deltamode,
2515 2520 revisiondata=revisiondata,
2516 2521 assumehaveparentrevisions=assumehaveparentrevisions,
2517 2522 )
2518 2523
2519 2524 DELTAREUSEALWAYS = b'always'
2520 2525 DELTAREUSESAMEREVS = b'samerevs'
2521 2526 DELTAREUSENEVER = b'never'
2522 2527
2523 2528 DELTAREUSEFULLADD = b'fulladd'
2524 2529
2525 2530 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2526 2531
2527 2532 def clone(
2528 2533 self,
2529 2534 tr,
2530 2535 destrevlog,
2531 2536 addrevisioncb=None,
2532 2537 deltareuse=DELTAREUSESAMEREVS,
2533 2538 forcedeltabothparents=None,
2534 2539 sidedatacompanion=None,
2535 2540 ):
2536 2541 """Copy this revlog to another, possibly with format changes.
2537 2542
2538 2543 The destination revlog will contain the same revisions and nodes.
2539 2544 However, it may not be bit-for-bit identical due to e.g. delta encoding
2540 2545 differences.
2541 2546
2542 2547 The ``deltareuse`` argument control how deltas from the existing revlog
2543 2548 are preserved in the destination revlog. The argument can have the
2544 2549 following values:
2545 2550
2546 2551 DELTAREUSEALWAYS
2547 2552 Deltas will always be reused (if possible), even if the destination
2548 2553 revlog would not select the same revisions for the delta. This is the
2549 2554 fastest mode of operation.
2550 2555 DELTAREUSESAMEREVS
2551 2556 Deltas will be reused if the destination revlog would pick the same
2552 2557 revisions for the delta. This mode strikes a balance between speed
2553 2558 and optimization.
2554 2559 DELTAREUSENEVER
2555 2560 Deltas will never be reused. This is the slowest mode of execution.
2556 2561 This mode can be used to recompute deltas (e.g. if the diff/delta
2557 2562 algorithm changes).
2558 2563 DELTAREUSEFULLADD
2559 2564 Revision will be re-added as if their were new content. This is
2560 2565 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2561 2566 eg: large file detection and handling.
2562 2567
2563 2568 Delta computation can be slow, so the choice of delta reuse policy can
2564 2569 significantly affect run time.
2565 2570
2566 2571 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2567 2572 two extremes. Deltas will be reused if they are appropriate. But if the
2568 2573 delta could choose a better revision, it will do so. This means if you
2569 2574 are converting a non-generaldelta revlog to a generaldelta revlog,
2570 2575 deltas will be recomputed if the delta's parent isn't a parent of the
2571 2576 revision.
2572 2577
2573 2578 In addition to the delta policy, the ``forcedeltabothparents``
2574 2579 argument controls whether to force compute deltas against both parents
2575 2580 for merges. By default, the current default is used.
2576 2581
2577 2582 If not None, the `sidedatacompanion` is callable that accept two
2578 2583 arguments:
2579 2584
2580 2585 (srcrevlog, rev)
2581 2586
2582 2587 and return a triplet that control changes to sidedata content from the
2583 2588 old revision to the new clone result:
2584 2589
2585 2590 (dropall, filterout, update)
2586 2591
2587 2592 * if `dropall` is True, all sidedata should be dropped
2588 2593 * `filterout` is a set of sidedata keys that should be dropped
2589 2594 * `update` is a mapping of additionnal/new key -> value
2590 2595 """
2591 2596 if deltareuse not in self.DELTAREUSEALL:
2592 2597 raise ValueError(
2593 2598 _(b'value for deltareuse invalid: %s') % deltareuse
2594 2599 )
2595 2600
2596 2601 if len(destrevlog):
2597 2602 raise ValueError(_(b'destination revlog is not empty'))
2598 2603
2599 2604 if getattr(self, 'filteredrevs', None):
2600 2605 raise ValueError(_(b'source revlog has filtered revisions'))
2601 2606 if getattr(destrevlog, 'filteredrevs', None):
2602 2607 raise ValueError(_(b'destination revlog has filtered revisions'))
2603 2608
2604 2609 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2605 2610 # if possible.
2606 2611 oldlazydelta = destrevlog._lazydelta
2607 2612 oldlazydeltabase = destrevlog._lazydeltabase
2608 2613 oldamd = destrevlog._deltabothparents
2609 2614
2610 2615 try:
2611 2616 if deltareuse == self.DELTAREUSEALWAYS:
2612 2617 destrevlog._lazydeltabase = True
2613 2618 destrevlog._lazydelta = True
2614 2619 elif deltareuse == self.DELTAREUSESAMEREVS:
2615 2620 destrevlog._lazydeltabase = False
2616 2621 destrevlog._lazydelta = True
2617 2622 elif deltareuse == self.DELTAREUSENEVER:
2618 2623 destrevlog._lazydeltabase = False
2619 2624 destrevlog._lazydelta = False
2620 2625
2621 2626 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2622 2627
2623 2628 self._clone(
2624 2629 tr,
2625 2630 destrevlog,
2626 2631 addrevisioncb,
2627 2632 deltareuse,
2628 2633 forcedeltabothparents,
2629 2634 sidedatacompanion,
2630 2635 )
2631 2636
2632 2637 finally:
2633 2638 destrevlog._lazydelta = oldlazydelta
2634 2639 destrevlog._lazydeltabase = oldlazydeltabase
2635 2640 destrevlog._deltabothparents = oldamd
2636 2641
2637 2642 def _clone(
2638 2643 self,
2639 2644 tr,
2640 2645 destrevlog,
2641 2646 addrevisioncb,
2642 2647 deltareuse,
2643 2648 forcedeltabothparents,
2644 2649 sidedatacompanion,
2645 2650 ):
2646 2651 """perform the core duty of `revlog.clone` after parameter processing"""
2647 2652 deltacomputer = deltautil.deltacomputer(destrevlog)
2648 2653 index = self.index
2649 2654 for rev in self:
2650 2655 entry = index[rev]
2651 2656
2652 2657 # Some classes override linkrev to take filtered revs into
2653 2658 # account. Use raw entry from index.
2654 2659 flags = entry[0] & 0xFFFF
2655 2660 linkrev = entry[4]
2656 2661 p1 = index[entry[5]][7]
2657 2662 p2 = index[entry[6]][7]
2658 2663 node = entry[7]
2659 2664
2660 2665 sidedataactions = (False, [], {})
2661 2666 if sidedatacompanion is not None:
2662 2667 sidedataactions = sidedatacompanion(self, rev)
2663 2668
2664 2669 # (Possibly) reuse the delta from the revlog if allowed and
2665 2670 # the revlog chunk is a delta.
2666 2671 cachedelta = None
2667 2672 rawtext = None
2668 2673 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2669 2674 dropall, filterout, update = sidedataactions
2670 2675 text, sidedata = self._revisiondata(rev)
2671 2676 if dropall:
2672 2677 sidedata = {}
2673 2678 for key in filterout:
2674 2679 sidedata.pop(key, None)
2675 2680 sidedata.update(update)
2676 2681 if not sidedata:
2677 2682 sidedata = None
2678 2683 destrevlog.addrevision(
2679 2684 text,
2680 2685 tr,
2681 2686 linkrev,
2682 2687 p1,
2683 2688 p2,
2684 2689 cachedelta=cachedelta,
2685 2690 node=node,
2686 2691 flags=flags,
2687 2692 deltacomputer=deltacomputer,
2688 2693 sidedata=sidedata,
2689 2694 )
2690 2695 else:
2691 2696 if destrevlog._lazydelta:
2692 2697 dp = self.deltaparent(rev)
2693 2698 if dp != nullrev:
2694 2699 cachedelta = (dp, bytes(self._chunk(rev)))
2695 2700
2696 2701 if not cachedelta:
2697 2702 rawtext = self.rawdata(rev)
2698 2703
2699 2704 ifh = destrevlog.opener(
2700 2705 destrevlog.indexfile, b'a+', checkambig=False
2701 2706 )
2702 2707 dfh = None
2703 2708 if not destrevlog._inline:
2704 2709 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2705 2710 try:
2706 2711 destrevlog._addrevision(
2707 2712 node,
2708 2713 rawtext,
2709 2714 tr,
2710 2715 linkrev,
2711 2716 p1,
2712 2717 p2,
2713 2718 flags,
2714 2719 cachedelta,
2715 2720 ifh,
2716 2721 dfh,
2717 2722 deltacomputer=deltacomputer,
2718 2723 )
2719 2724 finally:
2720 2725 if dfh:
2721 2726 dfh.close()
2722 2727 ifh.close()
2723 2728
2724 2729 if addrevisioncb:
2725 2730 addrevisioncb(self, rev, node)
2726 2731
2727 2732 def censorrevision(self, tr, censornode, tombstone=b''):
2728 2733 if (self.version & 0xFFFF) == REVLOGV0:
2729 2734 raise error.RevlogError(
2730 2735 _(b'cannot censor with version %d revlogs') % self.version
2731 2736 )
2732 2737
2733 2738 censorrev = self.rev(censornode)
2734 2739 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2735 2740
2736 2741 if len(tombstone) > self.rawsize(censorrev):
2737 2742 raise error.Abort(
2738 2743 _(b'censor tombstone must be no longer than censored data')
2739 2744 )
2740 2745
2741 2746 # Rewriting the revlog in place is hard. Our strategy for censoring is
2742 2747 # to create a new revlog, copy all revisions to it, then replace the
2743 2748 # revlogs on transaction close.
2744 2749
2745 2750 newindexfile = self.indexfile + b'.tmpcensored'
2746 2751 newdatafile = self.datafile + b'.tmpcensored'
2747 2752
2748 2753 # This is a bit dangerous. We could easily have a mismatch of state.
2749 2754 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2750 2755 newrl.version = self.version
2751 2756 newrl._generaldelta = self._generaldelta
2752 2757 newrl._io = self._io
2753 2758
2754 2759 for rev in self.revs():
2755 2760 node = self.node(rev)
2756 2761 p1, p2 = self.parents(node)
2757 2762
2758 2763 if rev == censorrev:
2759 2764 newrl.addrawrevision(
2760 2765 tombstone,
2761 2766 tr,
2762 2767 self.linkrev(censorrev),
2763 2768 p1,
2764 2769 p2,
2765 2770 censornode,
2766 2771 REVIDX_ISCENSORED,
2767 2772 )
2768 2773
2769 2774 if newrl.deltaparent(rev) != nullrev:
2770 2775 raise error.Abort(
2771 2776 _(
2772 2777 b'censored revision stored as delta; '
2773 2778 b'cannot censor'
2774 2779 ),
2775 2780 hint=_(
2776 2781 b'censoring of revlogs is not '
2777 2782 b'fully implemented; please report '
2778 2783 b'this bug'
2779 2784 ),
2780 2785 )
2781 2786 continue
2782 2787
2783 2788 if self.iscensored(rev):
2784 2789 if self.deltaparent(rev) != nullrev:
2785 2790 raise error.Abort(
2786 2791 _(
2787 2792 b'cannot censor due to censored '
2788 2793 b'revision having delta stored'
2789 2794 )
2790 2795 )
2791 2796 rawtext = self._chunk(rev)
2792 2797 else:
2793 2798 rawtext = self.rawdata(rev)
2794 2799
2795 2800 newrl.addrawrevision(
2796 2801 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2797 2802 )
2798 2803
2799 2804 tr.addbackup(self.indexfile, location=b'store')
2800 2805 if not self._inline:
2801 2806 tr.addbackup(self.datafile, location=b'store')
2802 2807
2803 2808 self.opener.rename(newrl.indexfile, self.indexfile)
2804 2809 if not self._inline:
2805 2810 self.opener.rename(newrl.datafile, self.datafile)
2806 2811
2807 2812 self.clearcaches()
2808 2813 self._loadindex()
2809 2814
2810 2815 def verifyintegrity(self, state):
2811 2816 """Verifies the integrity of the revlog.
2812 2817
2813 2818 Yields ``revlogproblem`` instances describing problems that are
2814 2819 found.
2815 2820 """
2816 2821 dd, di = self.checksize()
2817 2822 if dd:
2818 2823 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2819 2824 if di:
2820 2825 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2821 2826
2822 2827 version = self.version & 0xFFFF
2823 2828
2824 2829 # The verifier tells us what version revlog we should be.
2825 2830 if version != state[b'expectedversion']:
2826 2831 yield revlogproblem(
2827 2832 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2828 2833 % (self.indexfile, version, state[b'expectedversion'])
2829 2834 )
2830 2835
2831 2836 state[b'skipread'] = set()
2832 2837
2833 2838 for rev in self:
2834 2839 node = self.node(rev)
2835 2840
2836 2841 # Verify contents. 4 cases to care about:
2837 2842 #
2838 2843 # common: the most common case
2839 2844 # rename: with a rename
2840 2845 # meta: file content starts with b'\1\n', the metadata
2841 2846 # header defined in filelog.py, but without a rename
2842 2847 # ext: content stored externally
2843 2848 #
2844 2849 # More formally, their differences are shown below:
2845 2850 #
2846 2851 # | common | rename | meta | ext
2847 2852 # -------------------------------------------------------
2848 2853 # flags() | 0 | 0 | 0 | not 0
2849 2854 # renamed() | False | True | False | ?
2850 2855 # rawtext[0:2]=='\1\n'| False | True | True | ?
2851 2856 #
2852 2857 # "rawtext" means the raw text stored in revlog data, which
2853 2858 # could be retrieved by "rawdata(rev)". "text"
2854 2859 # mentioned below is "revision(rev)".
2855 2860 #
2856 2861 # There are 3 different lengths stored physically:
2857 2862 # 1. L1: rawsize, stored in revlog index
2858 2863 # 2. L2: len(rawtext), stored in revlog data
2859 2864 # 3. L3: len(text), stored in revlog data if flags==0, or
2860 2865 # possibly somewhere else if flags!=0
2861 2866 #
2862 2867 # L1 should be equal to L2. L3 could be different from them.
2863 2868 # "text" may or may not affect commit hash depending on flag
2864 2869 # processors (see flagutil.addflagprocessor).
2865 2870 #
2866 2871 # | common | rename | meta | ext
2867 2872 # -------------------------------------------------
2868 2873 # rawsize() | L1 | L1 | L1 | L1
2869 2874 # size() | L1 | L2-LM | L1(*) | L1 (?)
2870 2875 # len(rawtext) | L2 | L2 | L2 | L2
2871 2876 # len(text) | L2 | L2 | L2 | L3
2872 2877 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2873 2878 #
2874 2879 # LM: length of metadata, depending on rawtext
2875 2880 # (*): not ideal, see comment in filelog.size
2876 2881 # (?): could be "- len(meta)" if the resolved content has
2877 2882 # rename metadata
2878 2883 #
2879 2884 # Checks needed to be done:
2880 2885 # 1. length check: L1 == L2, in all cases.
2881 2886 # 2. hash check: depending on flag processor, we may need to
2882 2887 # use either "text" (external), or "rawtext" (in revlog).
2883 2888
2884 2889 try:
2885 2890 skipflags = state.get(b'skipflags', 0)
2886 2891 if skipflags:
2887 2892 skipflags &= self.flags(rev)
2888 2893
2889 2894 if skipflags:
2890 2895 state[b'skipread'].add(node)
2891 2896 else:
2892 2897 # Side-effect: read content and verify hash.
2893 2898 self.revision(node)
2894 2899
2895 2900 l1 = self.rawsize(rev)
2896 2901 l2 = len(self.rawdata(node))
2897 2902
2898 2903 if l1 != l2:
2899 2904 yield revlogproblem(
2900 2905 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2901 2906 node=node,
2902 2907 )
2903 2908
2904 2909 except error.CensoredNodeError:
2905 2910 if state[b'erroroncensored']:
2906 2911 yield revlogproblem(
2907 2912 error=_(b'censored file data'), node=node
2908 2913 )
2909 2914 state[b'skipread'].add(node)
2910 2915 except Exception as e:
2911 2916 yield revlogproblem(
2912 2917 error=_(b'unpacking %s: %s')
2913 2918 % (short(node), stringutil.forcebytestr(e)),
2914 2919 node=node,
2915 2920 )
2916 2921 state[b'skipread'].add(node)
2917 2922
2918 2923 def storageinfo(
2919 2924 self,
2920 2925 exclusivefiles=False,
2921 2926 sharedfiles=False,
2922 2927 revisionscount=False,
2923 2928 trackedsize=False,
2924 2929 storedsize=False,
2925 2930 ):
2926 2931 d = {}
2927 2932
2928 2933 if exclusivefiles:
2929 2934 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2930 2935 if not self._inline:
2931 2936 d[b'exclusivefiles'].append((self.opener, self.datafile))
2932 2937
2933 2938 if sharedfiles:
2934 2939 d[b'sharedfiles'] = []
2935 2940
2936 2941 if revisionscount:
2937 2942 d[b'revisionscount'] = len(self)
2938 2943
2939 2944 if trackedsize:
2940 2945 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2941 2946
2942 2947 if storedsize:
2943 2948 d[b'storedsize'] = sum(
2944 2949 self.opener.stat(path).st_size for path in self.files()
2945 2950 )
2946 2951
2947 2952 return d
General Comments 0
You need to be logged in to leave comments. Login now