##// END OF EJS Templates
revlog: introduce an explicit NodeMap class for pure code...
marmoute -
r43924:a7c0c5b5 default
parent child Browse files
Show More
@@ -1,2961 +1,2963 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 revlogutils,
68 69 templatefilters,
69 70 util,
70 71 )
71 72 from .interfaces import (
72 73 repository,
73 74 util as interfaceutil,
74 75 )
75 76 from .revlogutils import (
76 77 deltas as deltautil,
77 78 flagutil,
78 79 sidedata as sidedatautil,
79 80 )
80 81 from .utils import (
81 82 storageutil,
82 83 stringutil,
83 84 )
84 85
85 86 # blanked usage of all the name to prevent pyflakes constraints
86 87 # We need these name available in the module for extensions.
87 88 REVLOGV0
88 89 REVLOGV1
89 90 REVLOGV2
90 91 FLAG_INLINE_DATA
91 92 FLAG_GENERALDELTA
92 93 REVLOG_DEFAULT_FLAGS
93 94 REVLOG_DEFAULT_FORMAT
94 95 REVLOG_DEFAULT_VERSION
95 96 REVLOGV1_FLAGS
96 97 REVLOGV2_FLAGS
97 98 REVIDX_ISCENSORED
98 99 REVIDX_ELLIPSIS
99 100 REVIDX_SIDEDATA
100 101 REVIDX_EXTSTORED
101 102 REVIDX_DEFAULT_FLAGS
102 103 REVIDX_FLAGS_ORDER
103 104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 105
105 106 parsers = policy.importmod('parsers')
106 107 rustancestor = policy.importrust('ancestor')
107 108 rustdagop = policy.importrust('dagop')
108 109
109 110 # Aliased for performance.
110 111 _zlibdecompress = zlib.decompress
111 112
112 113 # max size of revlog with inline data
113 114 _maxinline = 131072
114 115 _chunksize = 1048576
115 116
116 117 # Flag processors for REVIDX_ELLIPSIS.
117 118 def ellipsisreadprocessor(rl, text):
118 119 return text, False, {}
119 120
120 121
121 122 def ellipsiswriteprocessor(rl, text, sidedata):
122 123 return text, False
123 124
124 125
125 126 def ellipsisrawprocessor(rl, text):
126 127 return False
127 128
128 129
129 130 ellipsisprocessor = (
130 131 ellipsisreadprocessor,
131 132 ellipsiswriteprocessor,
132 133 ellipsisrawprocessor,
133 134 )
134 135
135 136
136 137 def getoffset(q):
137 138 return int(q >> 16)
138 139
139 140
140 141 def gettype(q):
141 142 return int(q & 0xFFFF)
142 143
143 144
144 145 def offset_type(offset, type):
145 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 147 raise ValueError(b'unknown revlog index flags')
147 148 return int(int(offset) << 16 | type)
148 149
149 150
150 151 @attr.s(slots=True, frozen=True)
151 152 class _revisioninfo(object):
152 153 """Information about a revision that allows building its fulltext
153 154 node: expected hash of the revision
154 155 p1, p2: parent revs of the revision
155 156 btext: built text cache consisting of a one-element list
156 157 cachedelta: (baserev, uncompressed_delta) or None
157 158 flags: flags associated to the revision storage
158 159
159 160 One of btext[0] or cachedelta must be set.
160 161 """
161 162
162 163 node = attr.ib()
163 164 p1 = attr.ib()
164 165 p2 = attr.ib()
165 166 btext = attr.ib()
166 167 textlen = attr.ib()
167 168 cachedelta = attr.ib()
168 169 flags = attr.ib()
169 170
170 171
171 172 @interfaceutil.implementer(repository.irevisiondelta)
172 173 @attr.s(slots=True)
173 174 class revlogrevisiondelta(object):
174 175 node = attr.ib()
175 176 p1node = attr.ib()
176 177 p2node = attr.ib()
177 178 basenode = attr.ib()
178 179 flags = attr.ib()
179 180 baserevisionsize = attr.ib()
180 181 revision = attr.ib()
181 182 delta = attr.ib()
182 183 linknode = attr.ib(default=None)
183 184
184 185
185 186 @interfaceutil.implementer(repository.iverifyproblem)
186 187 @attr.s(frozen=True)
187 188 class revlogproblem(object):
188 189 warning = attr.ib(default=None)
189 190 error = attr.ib(default=None)
190 191 node = attr.ib(default=None)
191 192
192 193
193 194 # index v0:
194 195 # 4 bytes: offset
195 196 # 4 bytes: compressed length
196 197 # 4 bytes: base rev
197 198 # 4 bytes: link rev
198 199 # 20 bytes: parent 1 nodeid
199 200 # 20 bytes: parent 2 nodeid
200 201 # 20 bytes: nodeid
201 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 203 indexformatv0_pack = indexformatv0.pack
203 204 indexformatv0_unpack = indexformatv0.unpack
204 205
205 206
206 207 class revlogoldindex(list):
207 208 def __getitem__(self, i):
208 209 if i == -1:
209 210 return (0, 0, 0, -1, -1, -1, -1, nullid)
210 211 return list.__getitem__(self, i)
211 212
212 213
213 214 class revlogoldio(object):
214 215 def __init__(self):
215 216 self.size = indexformatv0.size
216 217
217 218 def parseindex(self, data, inline):
218 219 s = self.size
219 220 index = []
220 nodemap = {nullid: nullrev}
221 nodemap = revlogutils.NodeMap({nullid: nullrev})
221 222 n = off = 0
222 223 l = len(data)
223 224 while off + s <= l:
224 225 cur = data[off : off + s]
225 226 off += s
226 227 e = indexformatv0_unpack(cur)
227 228 # transform to revlogv1 format
228 229 e2 = (
229 230 offset_type(e[0], 0),
230 231 e[1],
231 232 -1,
232 233 e[2],
233 234 e[3],
234 235 nodemap.get(e[4], nullrev),
235 236 nodemap.get(e[5], nullrev),
236 237 e[6],
237 238 )
238 239 index.append(e2)
239 240 nodemap[e[6]] = n
240 241 n += 1
241 242
242 243 return revlogoldindex(index), nodemap, None
243 244
244 245 def packentry(self, entry, node, version, rev):
245 246 if gettype(entry[0]):
246 247 raise error.RevlogError(
247 248 _(b'index entry flags need revlog version 1')
248 249 )
249 250 e2 = (
250 251 getoffset(entry[0]),
251 252 entry[1],
252 253 entry[3],
253 254 entry[4],
254 255 node(entry[5]),
255 256 node(entry[6]),
256 257 entry[7],
257 258 )
258 259 return indexformatv0_pack(*e2)
259 260
260 261
261 262 # index ng:
262 263 # 6 bytes: offset
263 264 # 2 bytes: flags
264 265 # 4 bytes: compressed length
265 266 # 4 bytes: uncompressed length
266 267 # 4 bytes: base rev
267 268 # 4 bytes: link rev
268 269 # 4 bytes: parent 1 rev
269 270 # 4 bytes: parent 2 rev
270 271 # 32 bytes: nodeid
271 272 indexformatng = struct.Struct(b">Qiiiiii20s12x")
272 273 indexformatng_pack = indexformatng.pack
273 274 versionformat = struct.Struct(b">I")
274 275 versionformat_pack = versionformat.pack
275 276 versionformat_unpack = versionformat.unpack
276 277
277 278 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
278 279 # signed integer)
279 280 _maxentrysize = 0x7FFFFFFF
280 281
281 282
282 283 class revlogio(object):
283 284 def __init__(self):
284 285 self.size = indexformatng.size
285 286
286 287 def parseindex(self, data, inline):
287 288 # call the C implementation to parse the index data
288 289 index, cache = parsers.parse_index2(data, inline)
289 290 return index, getattr(index, 'nodemap', None), cache
290 291
291 292 def packentry(self, entry, node, version, rev):
292 293 p = indexformatng_pack(*entry)
293 294 if rev == 0:
294 295 p = versionformat_pack(version) + p[4:]
295 296 return p
296 297
297 298
298 299 class revlog(object):
299 300 """
300 301 the underlying revision storage object
301 302
302 303 A revlog consists of two parts, an index and the revision data.
303 304
304 305 The index is a file with a fixed record size containing
305 306 information on each revision, including its nodeid (hash), the
306 307 nodeids of its parents, the position and offset of its data within
307 308 the data file, and the revision it's based on. Finally, each entry
308 309 contains a linkrev entry that can serve as a pointer to external
309 310 data.
310 311
311 312 The revision data itself is a linear collection of data chunks.
312 313 Each chunk represents a revision and is usually represented as a
313 314 delta against the previous chunk. To bound lookup time, runs of
314 315 deltas are limited to about 2 times the length of the original
315 316 version data. This makes retrieval of a version proportional to
316 317 its size, or O(1) relative to the number of revisions.
317 318
318 319 Both pieces of the revlog are written to in an append-only
319 320 fashion, which means we never need to rewrite a file to insert or
320 321 remove data, and can use some simple techniques to avoid the need
321 322 for locking while reading.
322 323
323 324 If checkambig, indexfile is opened with checkambig=True at
324 325 writing, to avoid file stat ambiguity.
325 326
326 327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 328 index will be mmapped rather than read if it is larger than the
328 329 configured threshold.
329 330
330 331 If censorable is True, the revlog can have censored revisions.
331 332
332 333 If `upperboundcomp` is not None, this is the expected maximal gain from
333 334 compression for the data content.
334 335 """
335 336
336 337 _flagserrorclass = error.RevlogError
337 338
338 339 def __init__(
339 340 self,
340 341 opener,
341 342 indexfile,
342 343 datafile=None,
343 344 checkambig=False,
344 345 mmaplargeindex=False,
345 346 censorable=False,
346 347 upperboundcomp=None,
347 348 ):
348 349 """
349 350 create a revlog object
350 351
351 352 opener is a function that abstracts the file opening operation
352 353 and can be used to implement COW semantics or the like.
353 354
354 355 """
355 356 self.upperboundcomp = upperboundcomp
356 357 self.indexfile = indexfile
357 358 self.datafile = datafile or (indexfile[:-2] + b".d")
358 359 self.opener = opener
359 360 # When True, indexfile is opened with checkambig=True at writing, to
360 361 # avoid file stat ambiguity.
361 362 self._checkambig = checkambig
362 363 self._mmaplargeindex = mmaplargeindex
363 364 self._censorable = censorable
364 365 # 3-tuple of (node, rev, text) for a raw revision.
365 366 self._revisioncache = None
366 367 # Maps rev to chain base rev.
367 368 self._chainbasecache = util.lrucachedict(100)
368 369 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 370 self._chunkcache = (0, b'')
370 371 # How much data to read and cache into the raw revlog data cache.
371 372 self._chunkcachesize = 65536
372 373 self._maxchainlen = None
373 374 self._deltabothparents = True
374 375 self.index = []
375 376 # Mapping of partial identifiers to full nodes.
376 377 self._pcache = {}
377 378 # Mapping of revision integer to full node.
378 self._nodecache = {nullid: nullrev}
379 self._nodecache = revlogutils.NodeMap({nullid: nullrev})
379 380 self._nodepos = None
380 381 self._compengine = b'zlib'
381 382 self._compengineopts = {}
382 383 self._maxdeltachainspan = -1
383 384 self._withsparseread = False
384 385 self._sparserevlog = False
385 386 self._srdensitythreshold = 0.50
386 387 self._srmingapsize = 262144
387 388
388 389 # Make copy of flag processors so each revlog instance can support
389 390 # custom flags.
390 391 self._flagprocessors = dict(flagutil.flagprocessors)
391 392
392 393 # 2-tuple of file handles being used for active writing.
393 394 self._writinghandles = None
394 395
395 396 self._loadindex()
396 397
397 398 def _loadindex(self):
398 399 mmapindexthreshold = None
399 400 opts = self.opener.options
400 401
401 402 if b'revlogv2' in opts:
402 403 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
403 404 elif b'revlogv1' in opts:
404 405 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
405 406 if b'generaldelta' in opts:
406 407 newversionflags |= FLAG_GENERALDELTA
407 408 elif b'revlogv0' in self.opener.options:
408 409 newversionflags = REVLOGV0
409 410 else:
410 411 newversionflags = REVLOG_DEFAULT_VERSION
411 412
412 413 if b'chunkcachesize' in opts:
413 414 self._chunkcachesize = opts[b'chunkcachesize']
414 415 if b'maxchainlen' in opts:
415 416 self._maxchainlen = opts[b'maxchainlen']
416 417 if b'deltabothparents' in opts:
417 418 self._deltabothparents = opts[b'deltabothparents']
418 419 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 420 self._lazydeltabase = False
420 421 if self._lazydelta:
421 422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 423 if b'compengine' in opts:
423 424 self._compengine = opts[b'compengine']
424 425 if b'zlib.level' in opts:
425 426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 427 if b'zstd.level' in opts:
427 428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 429 if b'maxdeltachainspan' in opts:
429 430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 432 mmapindexthreshold = opts[b'mmapindexthreshold']
432 433 self.hassidedata = bool(opts.get(b'side-data', False))
433 434 if self.hassidedata:
434 435 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
435 436 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
436 437 withsparseread = bool(opts.get(b'with-sparse-read', False))
437 438 # sparse-revlog forces sparse-read
438 439 self._withsparseread = self._sparserevlog or withsparseread
439 440 if b'sparse-read-density-threshold' in opts:
440 441 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
441 442 if b'sparse-read-min-gap-size' in opts:
442 443 self._srmingapsize = opts[b'sparse-read-min-gap-size']
443 444 if opts.get(b'enableellipsis'):
444 445 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
445 446
446 447 # revlog v0 doesn't have flag processors
447 448 for flag, processor in pycompat.iteritems(
448 449 opts.get(b'flagprocessors', {})
449 450 ):
450 451 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
451 452
452 453 if self._chunkcachesize <= 0:
453 454 raise error.RevlogError(
454 455 _(b'revlog chunk cache size %r is not greater than 0')
455 456 % self._chunkcachesize
456 457 )
457 458 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 459 raise error.RevlogError(
459 460 _(b'revlog chunk cache size %r is not a power of 2')
460 461 % self._chunkcachesize
461 462 )
462 463
463 464 indexdata = b''
464 465 self._initempty = True
465 466 try:
466 467 with self._indexfp() as f:
467 468 if (
468 469 mmapindexthreshold is not None
469 470 and self.opener.fstat(f).st_size >= mmapindexthreshold
470 471 ):
471 472 # TODO: should .close() to release resources without
472 473 # relying on Python GC
473 474 indexdata = util.buffer(util.mmapread(f))
474 475 else:
475 476 indexdata = f.read()
476 477 if len(indexdata) > 0:
477 478 versionflags = versionformat_unpack(indexdata[:4])[0]
478 479 self._initempty = False
479 480 else:
480 481 versionflags = newversionflags
481 482 except IOError as inst:
482 483 if inst.errno != errno.ENOENT:
483 484 raise
484 485
485 486 versionflags = newversionflags
486 487
487 488 self.version = versionflags
488 489
489 490 flags = versionflags & ~0xFFFF
490 491 fmt = versionflags & 0xFFFF
491 492
492 493 if fmt == REVLOGV0:
493 494 if flags:
494 495 raise error.RevlogError(
495 496 _(b'unknown flags (%#04x) in version %d revlog %s')
496 497 % (flags >> 16, fmt, self.indexfile)
497 498 )
498 499
499 500 self._inline = False
500 501 self._generaldelta = False
501 502
502 503 elif fmt == REVLOGV1:
503 504 if flags & ~REVLOGV1_FLAGS:
504 505 raise error.RevlogError(
505 506 _(b'unknown flags (%#04x) in version %d revlog %s')
506 507 % (flags >> 16, fmt, self.indexfile)
507 508 )
508 509
509 510 self._inline = versionflags & FLAG_INLINE_DATA
510 511 self._generaldelta = versionflags & FLAG_GENERALDELTA
511 512
512 513 elif fmt == REVLOGV2:
513 514 if flags & ~REVLOGV2_FLAGS:
514 515 raise error.RevlogError(
515 516 _(b'unknown flags (%#04x) in version %d revlog %s')
516 517 % (flags >> 16, fmt, self.indexfile)
517 518 )
518 519
519 520 self._inline = versionflags & FLAG_INLINE_DATA
520 521 # generaldelta implied by version 2 revlogs.
521 522 self._generaldelta = True
522 523
523 524 else:
524 525 raise error.RevlogError(
525 526 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 527 )
527 528 # sparse-revlog can't be on without general-delta (issue6056)
528 529 if not self._generaldelta:
529 530 self._sparserevlog = False
530 531
531 532 self._storedeltachains = True
532 533
533 534 self._io = revlogio()
534 535 if self.version == REVLOGV0:
535 536 self._io = revlogoldio()
536 537 try:
537 538 d = self._io.parseindex(indexdata, self._inline)
538 539 except (ValueError, IndexError):
539 540 raise error.RevlogError(
540 541 _(b"index %s is corrupted") % self.indexfile
541 542 )
542 543 self.index, nodemap, self._chunkcache = d
543 544 if nodemap is not None:
544 545 self.nodemap = self._nodecache = nodemap
545 546 if not self._chunkcache:
546 547 self._chunkclear()
547 548 # revnum -> (chain-length, sum-delta-length)
548 549 self._chaininfocache = {}
549 550 # revlog header -> revlog compressor
550 551 self._decompressors = {}
551 552
552 553 @util.propertycache
553 554 def _compressor(self):
554 555 engine = util.compengines[self._compengine]
555 556 return engine.revlogcompressor(self._compengineopts)
556 557
557 558 def _indexfp(self, mode=b'r'):
558 559 """file object for the revlog's index file"""
559 560 args = {'mode': mode}
560 561 if mode != b'r':
561 562 args['checkambig'] = self._checkambig
562 563 if mode == b'w':
563 564 args['atomictemp'] = True
564 565 return self.opener(self.indexfile, **args)
565 566
566 567 def _datafp(self, mode=b'r'):
567 568 """file object for the revlog's data file"""
568 569 return self.opener(self.datafile, mode=mode)
569 570
570 571 @contextlib.contextmanager
571 572 def _datareadfp(self, existingfp=None):
572 573 """file object suitable to read data"""
573 574 # Use explicit file handle, if given.
574 575 if existingfp is not None:
575 576 yield existingfp
576 577
577 578 # Use a file handle being actively used for writes, if available.
578 579 # There is some danger to doing this because reads will seek the
579 580 # file. However, _writeentry() performs a SEEK_END before all writes,
580 581 # so we should be safe.
581 582 elif self._writinghandles:
582 583 if self._inline:
583 584 yield self._writinghandles[0]
584 585 else:
585 586 yield self._writinghandles[1]
586 587
587 588 # Otherwise open a new file handle.
588 589 else:
589 590 if self._inline:
590 591 func = self._indexfp
591 592 else:
592 593 func = self._datafp
593 594 with func() as fp:
594 595 yield fp
595 596
596 597 def tiprev(self):
597 598 return len(self.index) - 1
598 599
599 600 def tip(self):
600 601 return self.node(self.tiprev())
601 602
602 603 def __contains__(self, rev):
603 604 return 0 <= rev < len(self)
604 605
605 606 def __len__(self):
606 607 return len(self.index)
607 608
608 609 def __iter__(self):
609 610 return iter(pycompat.xrange(len(self)))
610 611
611 612 def revs(self, start=0, stop=None):
612 613 """iterate over all rev in this revlog (from start to stop)"""
613 614 return storageutil.iterrevs(len(self), start=start, stop=stop)
614 615
615 616 @util.propertycache
616 617 def nodemap(self):
617 618 if self.index:
618 619 # populate mapping down to the initial node
619 620 node0 = self.index[0][7] # get around changelog filtering
620 621 self.rev(node0)
621 622 return self._nodecache
622 623
623 624 def hasnode(self, node):
624 625 try:
625 626 self.rev(node)
626 627 return True
627 628 except KeyError:
628 629 return False
629 630
630 631 def candelta(self, baserev, rev):
631 632 """whether two revisions (baserev, rev) can be delta-ed or not"""
632 633 # Disable delta if either rev requires a content-changing flag
633 634 # processor (ex. LFS). This is because such flag processor can alter
634 635 # the rawtext content that the delta will be based on, and two clients
635 636 # could have a same revlog node with different flags (i.e. different
636 637 # rawtext contents) and the delta could be incompatible.
637 638 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
638 639 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
639 640 ):
640 641 return False
641 642 return True
642 643
643 644 def clearcaches(self):
644 645 self._revisioncache = None
645 646 self._chainbasecache.clear()
646 647 self._chunkcache = (0, b'')
647 648 self._pcache = {}
648 649
649 650 try:
650 651 # If we are using the native C version, you are in a fun case
651 652 # where self.index, self.nodemap and self._nodecaches is the same
652 653 # object.
653 654 self._nodecache.clearcaches()
654 655 except AttributeError:
655 self._nodecache = {nullid: nullrev}
656 self._nodecache = revlogutils.NodeMap({nullid: nullrev})
656 657 self._nodepos = None
657 658
658 659 def rev(self, node):
659 660 try:
660 661 return self._nodecache[node]
661 662 except TypeError:
662 663 raise
663 664 except error.RevlogError:
665 if not isinstance(self._nodecache, revlogutils.NodeMap):
664 666 # parsers.c radix tree lookup failed
665 667 if node == wdirid or node in wdirfilenodeids:
666 668 raise error.WdirUnsupported
667 669 raise error.LookupError(node, self.indexfile, _(b'no node'))
668 except KeyError:
670 else:
669 671 # pure python cache lookup failed
670 672 n = self._nodecache
671 673 i = self.index
672 674 p = self._nodepos
673 675 if p is None:
674 676 p = len(i) - 1
675 677 else:
676 678 assert p < len(i)
677 679 for r in pycompat.xrange(p, -1, -1):
678 680 v = i[r][7]
679 681 n[v] = r
680 682 if v == node:
681 683 self._nodepos = r - 1
682 684 return r
683 685 if node == wdirid or node in wdirfilenodeids:
684 686 raise error.WdirUnsupported
685 687 raise error.LookupError(node, self.indexfile, _(b'no node'))
686 688
687 689 # Accessors for index entries.
688 690
689 691 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
690 692 # are flags.
691 693 def start(self, rev):
692 694 return int(self.index[rev][0] >> 16)
693 695
694 696 def flags(self, rev):
695 697 return self.index[rev][0] & 0xFFFF
696 698
697 699 def length(self, rev):
698 700 return self.index[rev][1]
699 701
700 702 def rawsize(self, rev):
701 703 """return the length of the uncompressed text for a given revision"""
702 704 l = self.index[rev][2]
703 705 if l >= 0:
704 706 return l
705 707
706 708 t = self.rawdata(rev)
707 709 return len(t)
708 710
709 711 def size(self, rev):
710 712 """length of non-raw text (processed by a "read" flag processor)"""
711 713 # fast path: if no "read" flag processor could change the content,
712 714 # size is rawsize. note: ELLIPSIS is known to not change the content.
713 715 flags = self.flags(rev)
714 716 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
715 717 return self.rawsize(rev)
716 718
717 719 return len(self.revision(rev, raw=False))
718 720
719 721 def chainbase(self, rev):
720 722 base = self._chainbasecache.get(rev)
721 723 if base is not None:
722 724 return base
723 725
724 726 index = self.index
725 727 iterrev = rev
726 728 base = index[iterrev][3]
727 729 while base != iterrev:
728 730 iterrev = base
729 731 base = index[iterrev][3]
730 732
731 733 self._chainbasecache[rev] = base
732 734 return base
733 735
734 736 def linkrev(self, rev):
735 737 return self.index[rev][4]
736 738
737 739 def parentrevs(self, rev):
738 740 try:
739 741 entry = self.index[rev]
740 742 except IndexError:
741 743 if rev == wdirrev:
742 744 raise error.WdirUnsupported
743 745 raise
744 746
745 747 return entry[5], entry[6]
746 748
747 749 # fast parentrevs(rev) where rev isn't filtered
748 750 _uncheckedparentrevs = parentrevs
749 751
750 752 def node(self, rev):
751 753 try:
752 754 return self.index[rev][7]
753 755 except IndexError:
754 756 if rev == wdirrev:
755 757 raise error.WdirUnsupported
756 758 raise
757 759
758 760 # Derived from index values.
759 761
760 762 def end(self, rev):
761 763 return self.start(rev) + self.length(rev)
762 764
763 765 def parents(self, node):
764 766 i = self.index
765 767 d = i[self.rev(node)]
766 768 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
767 769
768 770 def chainlen(self, rev):
769 771 return self._chaininfo(rev)[0]
770 772
771 773 def _chaininfo(self, rev):
772 774 chaininfocache = self._chaininfocache
773 775 if rev in chaininfocache:
774 776 return chaininfocache[rev]
775 777 index = self.index
776 778 generaldelta = self._generaldelta
777 779 iterrev = rev
778 780 e = index[iterrev]
779 781 clen = 0
780 782 compresseddeltalen = 0
781 783 while iterrev != e[3]:
782 784 clen += 1
783 785 compresseddeltalen += e[1]
784 786 if generaldelta:
785 787 iterrev = e[3]
786 788 else:
787 789 iterrev -= 1
788 790 if iterrev in chaininfocache:
789 791 t = chaininfocache[iterrev]
790 792 clen += t[0]
791 793 compresseddeltalen += t[1]
792 794 break
793 795 e = index[iterrev]
794 796 else:
795 797 # Add text length of base since decompressing that also takes
796 798 # work. For cache hits the length is already included.
797 799 compresseddeltalen += e[1]
798 800 r = (clen, compresseddeltalen)
799 801 chaininfocache[rev] = r
800 802 return r
801 803
802 804 def _deltachain(self, rev, stoprev=None):
803 805 """Obtain the delta chain for a revision.
804 806
805 807 ``stoprev`` specifies a revision to stop at. If not specified, we
806 808 stop at the base of the chain.
807 809
808 810 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
809 811 revs in ascending order and ``stopped`` is a bool indicating whether
810 812 ``stoprev`` was hit.
811 813 """
812 814 # Try C implementation.
813 815 try:
814 816 return self.index.deltachain(rev, stoprev, self._generaldelta)
815 817 except AttributeError:
816 818 pass
817 819
818 820 chain = []
819 821
820 822 # Alias to prevent attribute lookup in tight loop.
821 823 index = self.index
822 824 generaldelta = self._generaldelta
823 825
824 826 iterrev = rev
825 827 e = index[iterrev]
826 828 while iterrev != e[3] and iterrev != stoprev:
827 829 chain.append(iterrev)
828 830 if generaldelta:
829 831 iterrev = e[3]
830 832 else:
831 833 iterrev -= 1
832 834 e = index[iterrev]
833 835
834 836 if iterrev == stoprev:
835 837 stopped = True
836 838 else:
837 839 chain.append(iterrev)
838 840 stopped = False
839 841
840 842 chain.reverse()
841 843 return chain, stopped
842 844
843 845 def ancestors(self, revs, stoprev=0, inclusive=False):
844 846 """Generate the ancestors of 'revs' in reverse revision order.
845 847 Does not generate revs lower than stoprev.
846 848
847 849 See the documentation for ancestor.lazyancestors for more details."""
848 850
849 851 # first, make sure start revisions aren't filtered
850 852 revs = list(revs)
851 853 checkrev = self.node
852 854 for r in revs:
853 855 checkrev(r)
854 856 # and we're sure ancestors aren't filtered as well
855 857
856 858 if rustancestor is not None:
857 859 lazyancestors = rustancestor.LazyAncestors
858 860 arg = self.index
859 861 elif util.safehasattr(parsers, b'rustlazyancestors'):
860 862 lazyancestors = ancestor.rustlazyancestors
861 863 arg = self.index
862 864 else:
863 865 lazyancestors = ancestor.lazyancestors
864 866 arg = self._uncheckedparentrevs
865 867 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
866 868
867 869 def descendants(self, revs):
868 870 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
869 871
870 872 def findcommonmissing(self, common=None, heads=None):
871 873 """Return a tuple of the ancestors of common and the ancestors of heads
872 874 that are not ancestors of common. In revset terminology, we return the
873 875 tuple:
874 876
875 877 ::common, (::heads) - (::common)
876 878
877 879 The list is sorted by revision number, meaning it is
878 880 topologically sorted.
879 881
880 882 'heads' and 'common' are both lists of node IDs. If heads is
881 883 not supplied, uses all of the revlog's heads. If common is not
882 884 supplied, uses nullid."""
883 885 if common is None:
884 886 common = [nullid]
885 887 if heads is None:
886 888 heads = self.heads()
887 889
888 890 common = [self.rev(n) for n in common]
889 891 heads = [self.rev(n) for n in heads]
890 892
891 893 # we want the ancestors, but inclusive
892 894 class lazyset(object):
893 895 def __init__(self, lazyvalues):
894 896 self.addedvalues = set()
895 897 self.lazyvalues = lazyvalues
896 898
897 899 def __contains__(self, value):
898 900 return value in self.addedvalues or value in self.lazyvalues
899 901
900 902 def __iter__(self):
901 903 added = self.addedvalues
902 904 for r in added:
903 905 yield r
904 906 for r in self.lazyvalues:
905 907 if not r in added:
906 908 yield r
907 909
908 910 def add(self, value):
909 911 self.addedvalues.add(value)
910 912
911 913 def update(self, values):
912 914 self.addedvalues.update(values)
913 915
914 916 has = lazyset(self.ancestors(common))
915 917 has.add(nullrev)
916 918 has.update(common)
917 919
918 920 # take all ancestors from heads that aren't in has
919 921 missing = set()
920 922 visit = collections.deque(r for r in heads if r not in has)
921 923 while visit:
922 924 r = visit.popleft()
923 925 if r in missing:
924 926 continue
925 927 else:
926 928 missing.add(r)
927 929 for p in self.parentrevs(r):
928 930 if p not in has:
929 931 visit.append(p)
930 932 missing = list(missing)
931 933 missing.sort()
932 934 return has, [self.node(miss) for miss in missing]
933 935
934 936 def incrementalmissingrevs(self, common=None):
935 937 """Return an object that can be used to incrementally compute the
936 938 revision numbers of the ancestors of arbitrary sets that are not
937 939 ancestors of common. This is an ancestor.incrementalmissingancestors
938 940 object.
939 941
940 942 'common' is a list of revision numbers. If common is not supplied, uses
941 943 nullrev.
942 944 """
943 945 if common is None:
944 946 common = [nullrev]
945 947
946 948 if rustancestor is not None:
947 949 return rustancestor.MissingAncestors(self.index, common)
948 950 return ancestor.incrementalmissingancestors(self.parentrevs, common)
949 951
950 952 def findmissingrevs(self, common=None, heads=None):
951 953 """Return the revision numbers of the ancestors of heads that
952 954 are not ancestors of common.
953 955
954 956 More specifically, return a list of revision numbers corresponding to
955 957 nodes N such that every N satisfies the following constraints:
956 958
957 959 1. N is an ancestor of some node in 'heads'
958 960 2. N is not an ancestor of any node in 'common'
959 961
960 962 The list is sorted by revision number, meaning it is
961 963 topologically sorted.
962 964
963 965 'heads' and 'common' are both lists of revision numbers. If heads is
964 966 not supplied, uses all of the revlog's heads. If common is not
965 967 supplied, uses nullid."""
966 968 if common is None:
967 969 common = [nullrev]
968 970 if heads is None:
969 971 heads = self.headrevs()
970 972
971 973 inc = self.incrementalmissingrevs(common=common)
972 974 return inc.missingancestors(heads)
973 975
974 976 def findmissing(self, common=None, heads=None):
975 977 """Return the ancestors of heads that are not ancestors of common.
976 978
977 979 More specifically, return a list of nodes N such that every N
978 980 satisfies the following constraints:
979 981
980 982 1. N is an ancestor of some node in 'heads'
981 983 2. N is not an ancestor of any node in 'common'
982 984
983 985 The list is sorted by revision number, meaning it is
984 986 topologically sorted.
985 987
986 988 'heads' and 'common' are both lists of node IDs. If heads is
987 989 not supplied, uses all of the revlog's heads. If common is not
988 990 supplied, uses nullid."""
989 991 if common is None:
990 992 common = [nullid]
991 993 if heads is None:
992 994 heads = self.heads()
993 995
994 996 common = [self.rev(n) for n in common]
995 997 heads = [self.rev(n) for n in heads]
996 998
997 999 inc = self.incrementalmissingrevs(common=common)
998 1000 return [self.node(r) for r in inc.missingancestors(heads)]
999 1001
1000 1002 def nodesbetween(self, roots=None, heads=None):
1001 1003 """Return a topological path from 'roots' to 'heads'.
1002 1004
1003 1005 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1004 1006 topologically sorted list of all nodes N that satisfy both of
1005 1007 these constraints:
1006 1008
1007 1009 1. N is a descendant of some node in 'roots'
1008 1010 2. N is an ancestor of some node in 'heads'
1009 1011
1010 1012 Every node is considered to be both a descendant and an ancestor
1011 1013 of itself, so every reachable node in 'roots' and 'heads' will be
1012 1014 included in 'nodes'.
1013 1015
1014 1016 'outroots' is the list of reachable nodes in 'roots', i.e., the
1015 1017 subset of 'roots' that is returned in 'nodes'. Likewise,
1016 1018 'outheads' is the subset of 'heads' that is also in 'nodes'.
1017 1019
1018 1020 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1019 1021 unspecified, uses nullid as the only root. If 'heads' is
1020 1022 unspecified, uses list of all of the revlog's heads."""
1021 1023 nonodes = ([], [], [])
1022 1024 if roots is not None:
1023 1025 roots = list(roots)
1024 1026 if not roots:
1025 1027 return nonodes
1026 1028 lowestrev = min([self.rev(n) for n in roots])
1027 1029 else:
1028 1030 roots = [nullid] # Everybody's a descendant of nullid
1029 1031 lowestrev = nullrev
1030 1032 if (lowestrev == nullrev) and (heads is None):
1031 1033 # We want _all_ the nodes!
1032 1034 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1033 1035 if heads is None:
1034 1036 # All nodes are ancestors, so the latest ancestor is the last
1035 1037 # node.
1036 1038 highestrev = len(self) - 1
1037 1039 # Set ancestors to None to signal that every node is an ancestor.
1038 1040 ancestors = None
1039 1041 # Set heads to an empty dictionary for later discovery of heads
1040 1042 heads = {}
1041 1043 else:
1042 1044 heads = list(heads)
1043 1045 if not heads:
1044 1046 return nonodes
1045 1047 ancestors = set()
1046 1048 # Turn heads into a dictionary so we can remove 'fake' heads.
1047 1049 # Also, later we will be using it to filter out the heads we can't
1048 1050 # find from roots.
1049 1051 heads = dict.fromkeys(heads, False)
1050 1052 # Start at the top and keep marking parents until we're done.
1051 1053 nodestotag = set(heads)
1052 1054 # Remember where the top was so we can use it as a limit later.
1053 1055 highestrev = max([self.rev(n) for n in nodestotag])
1054 1056 while nodestotag:
1055 1057 # grab a node to tag
1056 1058 n = nodestotag.pop()
1057 1059 # Never tag nullid
1058 1060 if n == nullid:
1059 1061 continue
1060 1062 # A node's revision number represents its place in a
1061 1063 # topologically sorted list of nodes.
1062 1064 r = self.rev(n)
1063 1065 if r >= lowestrev:
1064 1066 if n not in ancestors:
1065 1067 # If we are possibly a descendant of one of the roots
1066 1068 # and we haven't already been marked as an ancestor
1067 1069 ancestors.add(n) # Mark as ancestor
1068 1070 # Add non-nullid parents to list of nodes to tag.
1069 1071 nodestotag.update(
1070 1072 [p for p in self.parents(n) if p != nullid]
1071 1073 )
1072 1074 elif n in heads: # We've seen it before, is it a fake head?
1073 1075 # So it is, real heads should not be the ancestors of
1074 1076 # any other heads.
1075 1077 heads.pop(n)
1076 1078 if not ancestors:
1077 1079 return nonodes
1078 1080 # Now that we have our set of ancestors, we want to remove any
1079 1081 # roots that are not ancestors.
1080 1082
1081 1083 # If one of the roots was nullid, everything is included anyway.
1082 1084 if lowestrev > nullrev:
1083 1085 # But, since we weren't, let's recompute the lowest rev to not
1084 1086 # include roots that aren't ancestors.
1085 1087
1086 1088 # Filter out roots that aren't ancestors of heads
1087 1089 roots = [root for root in roots if root in ancestors]
1088 1090 # Recompute the lowest revision
1089 1091 if roots:
1090 1092 lowestrev = min([self.rev(root) for root in roots])
1091 1093 else:
1092 1094 # No more roots? Return empty list
1093 1095 return nonodes
1094 1096 else:
1095 1097 # We are descending from nullid, and don't need to care about
1096 1098 # any other roots.
1097 1099 lowestrev = nullrev
1098 1100 roots = [nullid]
1099 1101 # Transform our roots list into a set.
1100 1102 descendants = set(roots)
1101 1103 # Also, keep the original roots so we can filter out roots that aren't
1102 1104 # 'real' roots (i.e. are descended from other roots).
1103 1105 roots = descendants.copy()
1104 1106 # Our topologically sorted list of output nodes.
1105 1107 orderedout = []
1106 1108 # Don't start at nullid since we don't want nullid in our output list,
1107 1109 # and if nullid shows up in descendants, empty parents will look like
1108 1110 # they're descendants.
1109 1111 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1110 1112 n = self.node(r)
1111 1113 isdescendant = False
1112 1114 if lowestrev == nullrev: # Everybody is a descendant of nullid
1113 1115 isdescendant = True
1114 1116 elif n in descendants:
1115 1117 # n is already a descendant
1116 1118 isdescendant = True
1117 1119 # This check only needs to be done here because all the roots
1118 1120 # will start being marked is descendants before the loop.
1119 1121 if n in roots:
1120 1122 # If n was a root, check if it's a 'real' root.
1121 1123 p = tuple(self.parents(n))
1122 1124 # If any of its parents are descendants, it's not a root.
1123 1125 if (p[0] in descendants) or (p[1] in descendants):
1124 1126 roots.remove(n)
1125 1127 else:
1126 1128 p = tuple(self.parents(n))
1127 1129 # A node is a descendant if either of its parents are
1128 1130 # descendants. (We seeded the dependents list with the roots
1129 1131 # up there, remember?)
1130 1132 if (p[0] in descendants) or (p[1] in descendants):
1131 1133 descendants.add(n)
1132 1134 isdescendant = True
1133 1135 if isdescendant and ((ancestors is None) or (n in ancestors)):
1134 1136 # Only include nodes that are both descendants and ancestors.
1135 1137 orderedout.append(n)
1136 1138 if (ancestors is not None) and (n in heads):
1137 1139 # We're trying to figure out which heads are reachable
1138 1140 # from roots.
1139 1141 # Mark this head as having been reached
1140 1142 heads[n] = True
1141 1143 elif ancestors is None:
1142 1144 # Otherwise, we're trying to discover the heads.
1143 1145 # Assume this is a head because if it isn't, the next step
1144 1146 # will eventually remove it.
1145 1147 heads[n] = True
1146 1148 # But, obviously its parents aren't.
1147 1149 for p in self.parents(n):
1148 1150 heads.pop(p, None)
1149 1151 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1150 1152 roots = list(roots)
1151 1153 assert orderedout
1152 1154 assert roots
1153 1155 assert heads
1154 1156 return (orderedout, roots, heads)
1155 1157
1156 1158 def headrevs(self, revs=None):
1157 1159 if revs is None:
1158 1160 try:
1159 1161 return self.index.headrevs()
1160 1162 except AttributeError:
1161 1163 return self._headrevs()
1162 1164 if rustdagop is not None:
1163 1165 return rustdagop.headrevs(self.index, revs)
1164 1166 return dagop.headrevs(revs, self._uncheckedparentrevs)
1165 1167
1166 1168 def computephases(self, roots):
1167 1169 return self.index.computephasesmapsets(roots)
1168 1170
1169 1171 def _headrevs(self):
1170 1172 count = len(self)
1171 1173 if not count:
1172 1174 return [nullrev]
1173 1175 # we won't iter over filtered rev so nobody is a head at start
1174 1176 ishead = [0] * (count + 1)
1175 1177 index = self.index
1176 1178 for r in self:
1177 1179 ishead[r] = 1 # I may be an head
1178 1180 e = index[r]
1179 1181 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1180 1182 return [r for r, val in enumerate(ishead) if val]
1181 1183
1182 1184 def heads(self, start=None, stop=None):
1183 1185 """return the list of all nodes that have no children
1184 1186
1185 1187 if start is specified, only heads that are descendants of
1186 1188 start will be returned
1187 1189 if stop is specified, it will consider all the revs from stop
1188 1190 as if they had no children
1189 1191 """
1190 1192 if start is None and stop is None:
1191 1193 if not len(self):
1192 1194 return [nullid]
1193 1195 return [self.node(r) for r in self.headrevs()]
1194 1196
1195 1197 if start is None:
1196 1198 start = nullrev
1197 1199 else:
1198 1200 start = self.rev(start)
1199 1201
1200 1202 stoprevs = set(self.rev(n) for n in stop or [])
1201 1203
1202 1204 revs = dagop.headrevssubset(
1203 1205 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1204 1206 )
1205 1207
1206 1208 return [self.node(rev) for rev in revs]
1207 1209
1208 1210 def children(self, node):
1209 1211 """find the children of a given node"""
1210 1212 c = []
1211 1213 p = self.rev(node)
1212 1214 for r in self.revs(start=p + 1):
1213 1215 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1214 1216 if prevs:
1215 1217 for pr in prevs:
1216 1218 if pr == p:
1217 1219 c.append(self.node(r))
1218 1220 elif p == nullrev:
1219 1221 c.append(self.node(r))
1220 1222 return c
1221 1223
1222 1224 def commonancestorsheads(self, a, b):
1223 1225 """calculate all the heads of the common ancestors of nodes a and b"""
1224 1226 a, b = self.rev(a), self.rev(b)
1225 1227 ancs = self._commonancestorsheads(a, b)
1226 1228 return pycompat.maplist(self.node, ancs)
1227 1229
1228 1230 def _commonancestorsheads(self, *revs):
1229 1231 """calculate all the heads of the common ancestors of revs"""
1230 1232 try:
1231 1233 ancs = self.index.commonancestorsheads(*revs)
1232 1234 except (AttributeError, OverflowError): # C implementation failed
1233 1235 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1234 1236 return ancs
1235 1237
1236 1238 def isancestor(self, a, b):
1237 1239 """return True if node a is an ancestor of node b
1238 1240
1239 1241 A revision is considered an ancestor of itself."""
1240 1242 a, b = self.rev(a), self.rev(b)
1241 1243 return self.isancestorrev(a, b)
1242 1244
1243 1245 def isancestorrev(self, a, b):
1244 1246 """return True if revision a is an ancestor of revision b
1245 1247
1246 1248 A revision is considered an ancestor of itself.
1247 1249
1248 1250 The implementation of this is trivial but the use of
1249 1251 reachableroots is not."""
1250 1252 if a == nullrev:
1251 1253 return True
1252 1254 elif a == b:
1253 1255 return True
1254 1256 elif a > b:
1255 1257 return False
1256 1258 return bool(self.reachableroots(a, [b], [a], includepath=False))
1257 1259
1258 1260 def reachableroots(self, minroot, heads, roots, includepath=False):
1259 1261 """return (heads(::<roots> and <roots>::<heads>))
1260 1262
1261 1263 If includepath is True, return (<roots>::<heads>)."""
1262 1264 try:
1263 1265 return self.index.reachableroots2(
1264 1266 minroot, heads, roots, includepath
1265 1267 )
1266 1268 except AttributeError:
1267 1269 return dagop._reachablerootspure(
1268 1270 self.parentrevs, minroot, roots, heads, includepath
1269 1271 )
1270 1272
1271 1273 def ancestor(self, a, b):
1272 1274 """calculate the "best" common ancestor of nodes a and b"""
1273 1275
1274 1276 a, b = self.rev(a), self.rev(b)
1275 1277 try:
1276 1278 ancs = self.index.ancestors(a, b)
1277 1279 except (AttributeError, OverflowError):
1278 1280 ancs = ancestor.ancestors(self.parentrevs, a, b)
1279 1281 if ancs:
1280 1282 # choose a consistent winner when there's a tie
1281 1283 return min(map(self.node, ancs))
1282 1284 return nullid
1283 1285
1284 1286 def _match(self, id):
1285 1287 if isinstance(id, int):
1286 1288 # rev
1287 1289 return self.node(id)
1288 1290 if len(id) == 20:
1289 1291 # possibly a binary node
1290 1292 # odds of a binary node being all hex in ASCII are 1 in 10**25
1291 1293 try:
1292 1294 node = id
1293 1295 self.rev(node) # quick search the index
1294 1296 return node
1295 1297 except error.LookupError:
1296 1298 pass # may be partial hex id
1297 1299 try:
1298 1300 # str(rev)
1299 1301 rev = int(id)
1300 1302 if b"%d" % rev != id:
1301 1303 raise ValueError
1302 1304 if rev < 0:
1303 1305 rev = len(self) + rev
1304 1306 if rev < 0 or rev >= len(self):
1305 1307 raise ValueError
1306 1308 return self.node(rev)
1307 1309 except (ValueError, OverflowError):
1308 1310 pass
1309 1311 if len(id) == 40:
1310 1312 try:
1311 1313 # a full hex nodeid?
1312 1314 node = bin(id)
1313 1315 self.rev(node)
1314 1316 return node
1315 1317 except (TypeError, error.LookupError):
1316 1318 pass
1317 1319
1318 1320 def _partialmatch(self, id):
1319 1321 # we don't care wdirfilenodeids as they should be always full hash
1320 1322 maybewdir = wdirhex.startswith(id)
1321 1323 try:
1322 1324 partial = self.index.partialmatch(id)
1323 1325 if partial and self.hasnode(partial):
1324 1326 if maybewdir:
1325 1327 # single 'ff...' match in radix tree, ambiguous with wdir
1326 1328 raise error.RevlogError
1327 1329 return partial
1328 1330 if maybewdir:
1329 1331 # no 'ff...' match in radix tree, wdir identified
1330 1332 raise error.WdirUnsupported
1331 1333 return None
1332 1334 except error.RevlogError:
1333 1335 # parsers.c radix tree lookup gave multiple matches
1334 1336 # fast path: for unfiltered changelog, radix tree is accurate
1335 1337 if not getattr(self, 'filteredrevs', None):
1336 1338 raise error.AmbiguousPrefixLookupError(
1337 1339 id, self.indexfile, _(b'ambiguous identifier')
1338 1340 )
1339 1341 # fall through to slow path that filters hidden revisions
1340 1342 except (AttributeError, ValueError):
1341 1343 # we are pure python, or key was too short to search radix tree
1342 1344 pass
1343 1345
1344 1346 if id in self._pcache:
1345 1347 return self._pcache[id]
1346 1348
1347 1349 if len(id) <= 40:
1348 1350 try:
1349 1351 # hex(node)[:...]
1350 1352 l = len(id) // 2 # grab an even number of digits
1351 1353 prefix = bin(id[: l * 2])
1352 1354 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1353 1355 nl = [
1354 1356 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1355 1357 ]
1356 1358 if nullhex.startswith(id):
1357 1359 nl.append(nullid)
1358 1360 if len(nl) > 0:
1359 1361 if len(nl) == 1 and not maybewdir:
1360 1362 self._pcache[id] = nl[0]
1361 1363 return nl[0]
1362 1364 raise error.AmbiguousPrefixLookupError(
1363 1365 id, self.indexfile, _(b'ambiguous identifier')
1364 1366 )
1365 1367 if maybewdir:
1366 1368 raise error.WdirUnsupported
1367 1369 return None
1368 1370 except TypeError:
1369 1371 pass
1370 1372
1371 1373 def lookup(self, id):
1372 1374 """locate a node based on:
1373 1375 - revision number or str(revision number)
1374 1376 - nodeid or subset of hex nodeid
1375 1377 """
1376 1378 n = self._match(id)
1377 1379 if n is not None:
1378 1380 return n
1379 1381 n = self._partialmatch(id)
1380 1382 if n:
1381 1383 return n
1382 1384
1383 1385 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1384 1386
1385 1387 def shortest(self, node, minlength=1):
1386 1388 """Find the shortest unambiguous prefix that matches node."""
1387 1389
1388 1390 def isvalid(prefix):
1389 1391 try:
1390 1392 matchednode = self._partialmatch(prefix)
1391 1393 except error.AmbiguousPrefixLookupError:
1392 1394 return False
1393 1395 except error.WdirUnsupported:
1394 1396 # single 'ff...' match
1395 1397 return True
1396 1398 if matchednode is None:
1397 1399 raise error.LookupError(node, self.indexfile, _(b'no node'))
1398 1400 return True
1399 1401
1400 1402 def maybewdir(prefix):
1401 1403 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1402 1404
1403 1405 hexnode = hex(node)
1404 1406
1405 1407 def disambiguate(hexnode, minlength):
1406 1408 """Disambiguate against wdirid."""
1407 1409 for length in range(minlength, 41):
1408 1410 prefix = hexnode[:length]
1409 1411 if not maybewdir(prefix):
1410 1412 return prefix
1411 1413
1412 1414 if not getattr(self, 'filteredrevs', None):
1413 1415 try:
1414 1416 length = max(self.index.shortest(node), minlength)
1415 1417 return disambiguate(hexnode, length)
1416 1418 except error.RevlogError:
1417 1419 if node != wdirid:
1418 1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1419 1421 except AttributeError:
1420 1422 # Fall through to pure code
1421 1423 pass
1422 1424
1423 1425 if node == wdirid:
1424 1426 for length in range(minlength, 41):
1425 1427 prefix = hexnode[:length]
1426 1428 if isvalid(prefix):
1427 1429 return prefix
1428 1430
1429 1431 for length in range(minlength, 41):
1430 1432 prefix = hexnode[:length]
1431 1433 if isvalid(prefix):
1432 1434 return disambiguate(hexnode, length)
1433 1435
1434 1436 def cmp(self, node, text):
1435 1437 """compare text with a given file revision
1436 1438
1437 1439 returns True if text is different than what is stored.
1438 1440 """
1439 1441 p1, p2 = self.parents(node)
1440 1442 return storageutil.hashrevisionsha1(text, p1, p2) != node
1441 1443
1442 1444 def _cachesegment(self, offset, data):
1443 1445 """Add a segment to the revlog cache.
1444 1446
1445 1447 Accepts an absolute offset and the data that is at that location.
1446 1448 """
1447 1449 o, d = self._chunkcache
1448 1450 # try to add to existing cache
1449 1451 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1450 1452 self._chunkcache = o, d + data
1451 1453 else:
1452 1454 self._chunkcache = offset, data
1453 1455
1454 1456 def _readsegment(self, offset, length, df=None):
1455 1457 """Load a segment of raw data from the revlog.
1456 1458
1457 1459 Accepts an absolute offset, length to read, and an optional existing
1458 1460 file handle to read from.
1459 1461
1460 1462 If an existing file handle is passed, it will be seeked and the
1461 1463 original seek position will NOT be restored.
1462 1464
1463 1465 Returns a str or buffer of raw byte data.
1464 1466
1465 1467 Raises if the requested number of bytes could not be read.
1466 1468 """
1467 1469 # Cache data both forward and backward around the requested
1468 1470 # data, in a fixed size window. This helps speed up operations
1469 1471 # involving reading the revlog backwards.
1470 1472 cachesize = self._chunkcachesize
1471 1473 realoffset = offset & ~(cachesize - 1)
1472 1474 reallength = (
1473 1475 (offset + length + cachesize) & ~(cachesize - 1)
1474 1476 ) - realoffset
1475 1477 with self._datareadfp(df) as df:
1476 1478 df.seek(realoffset)
1477 1479 d = df.read(reallength)
1478 1480
1479 1481 self._cachesegment(realoffset, d)
1480 1482 if offset != realoffset or reallength != length:
1481 1483 startoffset = offset - realoffset
1482 1484 if len(d) - startoffset < length:
1483 1485 raise error.RevlogError(
1484 1486 _(
1485 1487 b'partial read of revlog %s; expected %d bytes from '
1486 1488 b'offset %d, got %d'
1487 1489 )
1488 1490 % (
1489 1491 self.indexfile if self._inline else self.datafile,
1490 1492 length,
1491 1493 realoffset,
1492 1494 len(d) - startoffset,
1493 1495 )
1494 1496 )
1495 1497
1496 1498 return util.buffer(d, startoffset, length)
1497 1499
1498 1500 if len(d) < length:
1499 1501 raise error.RevlogError(
1500 1502 _(
1501 1503 b'partial read of revlog %s; expected %d bytes from offset '
1502 1504 b'%d, got %d'
1503 1505 )
1504 1506 % (
1505 1507 self.indexfile if self._inline else self.datafile,
1506 1508 length,
1507 1509 offset,
1508 1510 len(d),
1509 1511 )
1510 1512 )
1511 1513
1512 1514 return d
1513 1515
1514 1516 def _getsegment(self, offset, length, df=None):
1515 1517 """Obtain a segment of raw data from the revlog.
1516 1518
1517 1519 Accepts an absolute offset, length of bytes to obtain, and an
1518 1520 optional file handle to the already-opened revlog. If the file
1519 1521 handle is used, it's original seek position will not be preserved.
1520 1522
1521 1523 Requests for data may be returned from a cache.
1522 1524
1523 1525 Returns a str or a buffer instance of raw byte data.
1524 1526 """
1525 1527 o, d = self._chunkcache
1526 1528 l = len(d)
1527 1529
1528 1530 # is it in the cache?
1529 1531 cachestart = offset - o
1530 1532 cacheend = cachestart + length
1531 1533 if cachestart >= 0 and cacheend <= l:
1532 1534 if cachestart == 0 and cacheend == l:
1533 1535 return d # avoid a copy
1534 1536 return util.buffer(d, cachestart, cacheend - cachestart)
1535 1537
1536 1538 return self._readsegment(offset, length, df=df)
1537 1539
1538 1540 def _getsegmentforrevs(self, startrev, endrev, df=None):
1539 1541 """Obtain a segment of raw data corresponding to a range of revisions.
1540 1542
1541 1543 Accepts the start and end revisions and an optional already-open
1542 1544 file handle to be used for reading. If the file handle is read, its
1543 1545 seek position will not be preserved.
1544 1546
1545 1547 Requests for data may be satisfied by a cache.
1546 1548
1547 1549 Returns a 2-tuple of (offset, data) for the requested range of
1548 1550 revisions. Offset is the integer offset from the beginning of the
1549 1551 revlog and data is a str or buffer of the raw byte data.
1550 1552
1551 1553 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1552 1554 to determine where each revision's data begins and ends.
1553 1555 """
1554 1556 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1555 1557 # (functions are expensive).
1556 1558 index = self.index
1557 1559 istart = index[startrev]
1558 1560 start = int(istart[0] >> 16)
1559 1561 if startrev == endrev:
1560 1562 end = start + istart[1]
1561 1563 else:
1562 1564 iend = index[endrev]
1563 1565 end = int(iend[0] >> 16) + iend[1]
1564 1566
1565 1567 if self._inline:
1566 1568 start += (startrev + 1) * self._io.size
1567 1569 end += (endrev + 1) * self._io.size
1568 1570 length = end - start
1569 1571
1570 1572 return start, self._getsegment(start, length, df=df)
1571 1573
1572 1574 def _chunk(self, rev, df=None):
1573 1575 """Obtain a single decompressed chunk for a revision.
1574 1576
1575 1577 Accepts an integer revision and an optional already-open file handle
1576 1578 to be used for reading. If used, the seek position of the file will not
1577 1579 be preserved.
1578 1580
1579 1581 Returns a str holding uncompressed data for the requested revision.
1580 1582 """
1581 1583 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1582 1584
1583 1585 def _chunks(self, revs, df=None, targetsize=None):
1584 1586 """Obtain decompressed chunks for the specified revisions.
1585 1587
1586 1588 Accepts an iterable of numeric revisions that are assumed to be in
1587 1589 ascending order. Also accepts an optional already-open file handle
1588 1590 to be used for reading. If used, the seek position of the file will
1589 1591 not be preserved.
1590 1592
1591 1593 This function is similar to calling ``self._chunk()`` multiple times,
1592 1594 but is faster.
1593 1595
1594 1596 Returns a list with decompressed data for each requested revision.
1595 1597 """
1596 1598 if not revs:
1597 1599 return []
1598 1600 start = self.start
1599 1601 length = self.length
1600 1602 inline = self._inline
1601 1603 iosize = self._io.size
1602 1604 buffer = util.buffer
1603 1605
1604 1606 l = []
1605 1607 ladd = l.append
1606 1608
1607 1609 if not self._withsparseread:
1608 1610 slicedchunks = (revs,)
1609 1611 else:
1610 1612 slicedchunks = deltautil.slicechunk(
1611 1613 self, revs, targetsize=targetsize
1612 1614 )
1613 1615
1614 1616 for revschunk in slicedchunks:
1615 1617 firstrev = revschunk[0]
1616 1618 # Skip trailing revisions with empty diff
1617 1619 for lastrev in revschunk[::-1]:
1618 1620 if length(lastrev) != 0:
1619 1621 break
1620 1622
1621 1623 try:
1622 1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1623 1625 except OverflowError:
1624 1626 # issue4215 - we can't cache a run of chunks greater than
1625 1627 # 2G on Windows
1626 1628 return [self._chunk(rev, df=df) for rev in revschunk]
1627 1629
1628 1630 decomp = self.decompress
1629 1631 for rev in revschunk:
1630 1632 chunkstart = start(rev)
1631 1633 if inline:
1632 1634 chunkstart += (rev + 1) * iosize
1633 1635 chunklength = length(rev)
1634 1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1635 1637
1636 1638 return l
1637 1639
1638 1640 def _chunkclear(self):
1639 1641 """Clear the raw chunk cache."""
1640 1642 self._chunkcache = (0, b'')
1641 1643
1642 1644 def deltaparent(self, rev):
1643 1645 """return deltaparent of the given revision"""
1644 1646 base = self.index[rev][3]
1645 1647 if base == rev:
1646 1648 return nullrev
1647 1649 elif self._generaldelta:
1648 1650 return base
1649 1651 else:
1650 1652 return rev - 1
1651 1653
1652 1654 def issnapshot(self, rev):
1653 1655 """tells whether rev is a snapshot
1654 1656 """
1655 1657 if not self._sparserevlog:
1656 1658 return self.deltaparent(rev) == nullrev
1657 1659 elif util.safehasattr(self.index, b'issnapshot'):
1658 1660 # directly assign the method to cache the testing and access
1659 1661 self.issnapshot = self.index.issnapshot
1660 1662 return self.issnapshot(rev)
1661 1663 if rev == nullrev:
1662 1664 return True
1663 1665 entry = self.index[rev]
1664 1666 base = entry[3]
1665 1667 if base == rev:
1666 1668 return True
1667 1669 if base == nullrev:
1668 1670 return True
1669 1671 p1 = entry[5]
1670 1672 p2 = entry[6]
1671 1673 if base == p1 or base == p2:
1672 1674 return False
1673 1675 return self.issnapshot(base)
1674 1676
1675 1677 def snapshotdepth(self, rev):
1676 1678 """number of snapshot in the chain before this one"""
1677 1679 if not self.issnapshot(rev):
1678 1680 raise error.ProgrammingError(b'revision %d not a snapshot')
1679 1681 return len(self._deltachain(rev)[0]) - 1
1680 1682
1681 1683 def revdiff(self, rev1, rev2):
1682 1684 """return or calculate a delta between two revisions
1683 1685
1684 1686 The delta calculated is in binary form and is intended to be written to
1685 1687 revlog data directly. So this function needs raw revision data.
1686 1688 """
1687 1689 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1688 1690 return bytes(self._chunk(rev2))
1689 1691
1690 1692 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1691 1693
1692 1694 def _processflags(self, text, flags, operation, raw=False):
1693 1695 """deprecated entry point to access flag processors"""
1694 1696 msg = b'_processflag(...) use the specialized variant'
1695 1697 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1696 1698 if raw:
1697 1699 return text, flagutil.processflagsraw(self, text, flags)
1698 1700 elif operation == b'read':
1699 1701 return flagutil.processflagsread(self, text, flags)
1700 1702 else: # write operation
1701 1703 return flagutil.processflagswrite(self, text, flags)
1702 1704
1703 1705 def revision(self, nodeorrev, _df=None, raw=False):
1704 1706 """return an uncompressed revision of a given node or revision
1705 1707 number.
1706 1708
1707 1709 _df - an existing file handle to read from. (internal-only)
1708 1710 raw - an optional argument specifying if the revision data is to be
1709 1711 treated as raw data when applying flag transforms. 'raw' should be set
1710 1712 to True when generating changegroups or in debug commands.
1711 1713 """
1712 1714 if raw:
1713 1715 msg = (
1714 1716 b'revlog.revision(..., raw=True) is deprecated, '
1715 1717 b'use revlog.rawdata(...)'
1716 1718 )
1717 1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1718 1720 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1719 1721
1720 1722 def sidedata(self, nodeorrev, _df=None):
1721 1723 """a map of extra data related to the changeset but not part of the hash
1722 1724
1723 1725 This function currently return a dictionary. However, more advanced
1724 1726 mapping object will likely be used in the future for a more
1725 1727 efficient/lazy code.
1726 1728 """
1727 1729 return self._revisiondata(nodeorrev, _df)[1]
1728 1730
1729 1731 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1730 1732 # deal with <nodeorrev> argument type
1731 1733 if isinstance(nodeorrev, int):
1732 1734 rev = nodeorrev
1733 1735 node = self.node(rev)
1734 1736 else:
1735 1737 node = nodeorrev
1736 1738 rev = None
1737 1739
1738 1740 # fast path the special `nullid` rev
1739 1741 if node == nullid:
1740 1742 return b"", {}
1741 1743
1742 1744 # The text as stored inside the revlog. Might be the revision or might
1743 1745 # need to be processed to retrieve the revision.
1744 1746 rawtext = None
1745 1747
1746 1748 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1747 1749
1748 1750 if raw and validated:
1749 1751 # if we don't want to process the raw text and that raw
1750 1752 # text is cached, we can exit early.
1751 1753 return rawtext, {}
1752 1754 if rev is None:
1753 1755 rev = self.rev(node)
1754 1756 # the revlog's flag for this revision
1755 1757 # (usually alter its state or content)
1756 1758 flags = self.flags(rev)
1757 1759
1758 1760 if validated and flags == REVIDX_DEFAULT_FLAGS:
1759 1761 # no extra flags set, no flag processor runs, text = rawtext
1760 1762 return rawtext, {}
1761 1763
1762 1764 sidedata = {}
1763 1765 if raw:
1764 1766 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1765 1767 text = rawtext
1766 1768 else:
1767 1769 try:
1768 1770 r = flagutil.processflagsread(self, rawtext, flags)
1769 1771 except error.SidedataHashError as exc:
1770 1772 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1771 1773 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1772 1774 raise error.RevlogError(msg)
1773 1775 text, validatehash, sidedata = r
1774 1776 if validatehash:
1775 1777 self.checkhash(text, node, rev=rev)
1776 1778 if not validated:
1777 1779 self._revisioncache = (node, rev, rawtext)
1778 1780
1779 1781 return text, sidedata
1780 1782
1781 1783 def _rawtext(self, node, rev, _df=None):
1782 1784 """return the possibly unvalidated rawtext for a revision
1783 1785
1784 1786 returns (rev, rawtext, validated)
1785 1787 """
1786 1788
1787 1789 # revision in the cache (could be useful to apply delta)
1788 1790 cachedrev = None
1789 1791 # An intermediate text to apply deltas to
1790 1792 basetext = None
1791 1793
1792 1794 # Check if we have the entry in cache
1793 1795 # The cache entry looks like (node, rev, rawtext)
1794 1796 if self._revisioncache:
1795 1797 if self._revisioncache[0] == node:
1796 1798 return (rev, self._revisioncache[2], True)
1797 1799 cachedrev = self._revisioncache[1]
1798 1800
1799 1801 if rev is None:
1800 1802 rev = self.rev(node)
1801 1803
1802 1804 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1803 1805 if stopped:
1804 1806 basetext = self._revisioncache[2]
1805 1807
1806 1808 # drop cache to save memory, the caller is expected to
1807 1809 # update self._revisioncache after validating the text
1808 1810 self._revisioncache = None
1809 1811
1810 1812 targetsize = None
1811 1813 rawsize = self.index[rev][2]
1812 1814 if 0 <= rawsize:
1813 1815 targetsize = 4 * rawsize
1814 1816
1815 1817 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1816 1818 if basetext is None:
1817 1819 basetext = bytes(bins[0])
1818 1820 bins = bins[1:]
1819 1821
1820 1822 rawtext = mdiff.patches(basetext, bins)
1821 1823 del basetext # let us have a chance to free memory early
1822 1824 return (rev, rawtext, False)
1823 1825
1824 1826 def rawdata(self, nodeorrev, _df=None):
1825 1827 """return an uncompressed raw data of a given node or revision number.
1826 1828
1827 1829 _df - an existing file handle to read from. (internal-only)
1828 1830 """
1829 1831 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1830 1832
1831 1833 def hash(self, text, p1, p2):
1832 1834 """Compute a node hash.
1833 1835
1834 1836 Available as a function so that subclasses can replace the hash
1835 1837 as needed.
1836 1838 """
1837 1839 return storageutil.hashrevisionsha1(text, p1, p2)
1838 1840
1839 1841 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1840 1842 """Check node hash integrity.
1841 1843
1842 1844 Available as a function so that subclasses can extend hash mismatch
1843 1845 behaviors as needed.
1844 1846 """
1845 1847 try:
1846 1848 if p1 is None and p2 is None:
1847 1849 p1, p2 = self.parents(node)
1848 1850 if node != self.hash(text, p1, p2):
1849 1851 # Clear the revision cache on hash failure. The revision cache
1850 1852 # only stores the raw revision and clearing the cache does have
1851 1853 # the side-effect that we won't have a cache hit when the raw
1852 1854 # revision data is accessed. But this case should be rare and
1853 1855 # it is extra work to teach the cache about the hash
1854 1856 # verification state.
1855 1857 if self._revisioncache and self._revisioncache[0] == node:
1856 1858 self._revisioncache = None
1857 1859
1858 1860 revornode = rev
1859 1861 if revornode is None:
1860 1862 revornode = templatefilters.short(hex(node))
1861 1863 raise error.RevlogError(
1862 1864 _(b"integrity check failed on %s:%s")
1863 1865 % (self.indexfile, pycompat.bytestr(revornode))
1864 1866 )
1865 1867 except error.RevlogError:
1866 1868 if self._censorable and storageutil.iscensoredtext(text):
1867 1869 raise error.CensoredNodeError(self.indexfile, node, text)
1868 1870 raise
1869 1871
1870 1872 def _enforceinlinesize(self, tr, fp=None):
1871 1873 """Check if the revlog is too big for inline and convert if so.
1872 1874
1873 1875 This should be called after revisions are added to the revlog. If the
1874 1876 revlog has grown too large to be an inline revlog, it will convert it
1875 1877 to use multiple index and data files.
1876 1878 """
1877 1879 tiprev = len(self) - 1
1878 1880 if (
1879 1881 not self._inline
1880 1882 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1881 1883 ):
1882 1884 return
1883 1885
1884 1886 trinfo = tr.find(self.indexfile)
1885 1887 if trinfo is None:
1886 1888 raise error.RevlogError(
1887 1889 _(b"%s not found in the transaction") % self.indexfile
1888 1890 )
1889 1891
1890 1892 trindex = trinfo[2]
1891 1893 if trindex is not None:
1892 1894 dataoff = self.start(trindex)
1893 1895 else:
1894 1896 # revlog was stripped at start of transaction, use all leftover data
1895 1897 trindex = len(self) - 1
1896 1898 dataoff = self.end(tiprev)
1897 1899
1898 1900 tr.add(self.datafile, dataoff)
1899 1901
1900 1902 if fp:
1901 1903 fp.flush()
1902 1904 fp.close()
1903 1905 # We can't use the cached file handle after close(). So prevent
1904 1906 # its usage.
1905 1907 self._writinghandles = None
1906 1908
1907 1909 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1908 1910 for r in self:
1909 1911 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1910 1912
1911 1913 with self._indexfp(b'w') as fp:
1912 1914 self.version &= ~FLAG_INLINE_DATA
1913 1915 self._inline = False
1914 1916 io = self._io
1915 1917 for i in self:
1916 1918 e = io.packentry(self.index[i], self.node, self.version, i)
1917 1919 fp.write(e)
1918 1920
1919 1921 # the temp file replace the real index when we exit the context
1920 1922 # manager
1921 1923
1922 1924 tr.replace(self.indexfile, trindex * self._io.size)
1923 1925 self._chunkclear()
1924 1926
1925 1927 def _nodeduplicatecallback(self, transaction, node):
1926 1928 """called when trying to add a node already stored.
1927 1929 """
1928 1930
1929 1931 def addrevision(
1930 1932 self,
1931 1933 text,
1932 1934 transaction,
1933 1935 link,
1934 1936 p1,
1935 1937 p2,
1936 1938 cachedelta=None,
1937 1939 node=None,
1938 1940 flags=REVIDX_DEFAULT_FLAGS,
1939 1941 deltacomputer=None,
1940 1942 sidedata=None,
1941 1943 ):
1942 1944 """add a revision to the log
1943 1945
1944 1946 text - the revision data to add
1945 1947 transaction - the transaction object used for rollback
1946 1948 link - the linkrev data to add
1947 1949 p1, p2 - the parent nodeids of the revision
1948 1950 cachedelta - an optional precomputed delta
1949 1951 node - nodeid of revision; typically node is not specified, and it is
1950 1952 computed by default as hash(text, p1, p2), however subclasses might
1951 1953 use different hashing method (and override checkhash() in such case)
1952 1954 flags - the known flags to set on the revision
1953 1955 deltacomputer - an optional deltacomputer instance shared between
1954 1956 multiple calls
1955 1957 """
1956 1958 if link == nullrev:
1957 1959 raise error.RevlogError(
1958 1960 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1959 1961 )
1960 1962
1961 1963 if sidedata is None:
1962 1964 sidedata = {}
1963 1965 flags = flags & ~REVIDX_SIDEDATA
1964 1966 elif not self.hassidedata:
1965 1967 raise error.ProgrammingError(
1966 1968 _(b"trying to add sidedata to a revlog who don't support them")
1967 1969 )
1968 1970 else:
1969 1971 flags |= REVIDX_SIDEDATA
1970 1972
1971 1973 if flags:
1972 1974 node = node or self.hash(text, p1, p2)
1973 1975
1974 1976 rawtext, validatehash = flagutil.processflagswrite(
1975 1977 self, text, flags, sidedata=sidedata
1976 1978 )
1977 1979
1978 1980 # If the flag processor modifies the revision data, ignore any provided
1979 1981 # cachedelta.
1980 1982 if rawtext != text:
1981 1983 cachedelta = None
1982 1984
1983 1985 if len(rawtext) > _maxentrysize:
1984 1986 raise error.RevlogError(
1985 1987 _(
1986 1988 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1987 1989 )
1988 1990 % (self.indexfile, len(rawtext))
1989 1991 )
1990 1992
1991 1993 node = node or self.hash(rawtext, p1, p2)
1992 1994 if node in self.nodemap:
1993 1995 return node
1994 1996
1995 1997 if validatehash:
1996 1998 self.checkhash(rawtext, node, p1=p1, p2=p2)
1997 1999
1998 2000 return self.addrawrevision(
1999 2001 rawtext,
2000 2002 transaction,
2001 2003 link,
2002 2004 p1,
2003 2005 p2,
2004 2006 node,
2005 2007 flags,
2006 2008 cachedelta=cachedelta,
2007 2009 deltacomputer=deltacomputer,
2008 2010 )
2009 2011
2010 2012 def addrawrevision(
2011 2013 self,
2012 2014 rawtext,
2013 2015 transaction,
2014 2016 link,
2015 2017 p1,
2016 2018 p2,
2017 2019 node,
2018 2020 flags,
2019 2021 cachedelta=None,
2020 2022 deltacomputer=None,
2021 2023 ):
2022 2024 """add a raw revision with known flags, node and parents
2023 2025 useful when reusing a revision not stored in this revlog (ex: received
2024 2026 over wire, or read from an external bundle).
2025 2027 """
2026 2028 dfh = None
2027 2029 if not self._inline:
2028 2030 dfh = self._datafp(b"a+")
2029 2031 ifh = self._indexfp(b"a+")
2030 2032 try:
2031 2033 return self._addrevision(
2032 2034 node,
2033 2035 rawtext,
2034 2036 transaction,
2035 2037 link,
2036 2038 p1,
2037 2039 p2,
2038 2040 flags,
2039 2041 cachedelta,
2040 2042 ifh,
2041 2043 dfh,
2042 2044 deltacomputer=deltacomputer,
2043 2045 )
2044 2046 finally:
2045 2047 if dfh:
2046 2048 dfh.close()
2047 2049 ifh.close()
2048 2050
2049 2051 def compress(self, data):
2050 2052 """Generate a possibly-compressed representation of data."""
2051 2053 if not data:
2052 2054 return b'', data
2053 2055
2054 2056 compressed = self._compressor.compress(data)
2055 2057
2056 2058 if compressed:
2057 2059 # The revlog compressor added the header in the returned data.
2058 2060 return b'', compressed
2059 2061
2060 2062 if data[0:1] == b'\0':
2061 2063 return b'', data
2062 2064 return b'u', data
2063 2065
2064 2066 def decompress(self, data):
2065 2067 """Decompress a revlog chunk.
2066 2068
2067 2069 The chunk is expected to begin with a header identifying the
2068 2070 format type so it can be routed to an appropriate decompressor.
2069 2071 """
2070 2072 if not data:
2071 2073 return data
2072 2074
2073 2075 # Revlogs are read much more frequently than they are written and many
2074 2076 # chunks only take microseconds to decompress, so performance is
2075 2077 # important here.
2076 2078 #
2077 2079 # We can make a few assumptions about revlogs:
2078 2080 #
2079 2081 # 1) the majority of chunks will be compressed (as opposed to inline
2080 2082 # raw data).
2081 2083 # 2) decompressing *any* data will likely by at least 10x slower than
2082 2084 # returning raw inline data.
2083 2085 # 3) we want to prioritize common and officially supported compression
2084 2086 # engines
2085 2087 #
2086 2088 # It follows that we want to optimize for "decompress compressed data
2087 2089 # when encoded with common and officially supported compression engines"
2088 2090 # case over "raw data" and "data encoded by less common or non-official
2089 2091 # compression engines." That is why we have the inline lookup first
2090 2092 # followed by the compengines lookup.
2091 2093 #
2092 2094 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2093 2095 # compressed chunks. And this matters for changelog and manifest reads.
2094 2096 t = data[0:1]
2095 2097
2096 2098 if t == b'x':
2097 2099 try:
2098 2100 return _zlibdecompress(data)
2099 2101 except zlib.error as e:
2100 2102 raise error.RevlogError(
2101 2103 _(b'revlog decompress error: %s')
2102 2104 % stringutil.forcebytestr(e)
2103 2105 )
2104 2106 # '\0' is more common than 'u' so it goes first.
2105 2107 elif t == b'\0':
2106 2108 return data
2107 2109 elif t == b'u':
2108 2110 return util.buffer(data, 1)
2109 2111
2110 2112 try:
2111 2113 compressor = self._decompressors[t]
2112 2114 except KeyError:
2113 2115 try:
2114 2116 engine = util.compengines.forrevlogheader(t)
2115 2117 compressor = engine.revlogcompressor(self._compengineopts)
2116 2118 self._decompressors[t] = compressor
2117 2119 except KeyError:
2118 2120 raise error.RevlogError(_(b'unknown compression type %r') % t)
2119 2121
2120 2122 return compressor.decompress(data)
2121 2123
2122 2124 def _addrevision(
2123 2125 self,
2124 2126 node,
2125 2127 rawtext,
2126 2128 transaction,
2127 2129 link,
2128 2130 p1,
2129 2131 p2,
2130 2132 flags,
2131 2133 cachedelta,
2132 2134 ifh,
2133 2135 dfh,
2134 2136 alwayscache=False,
2135 2137 deltacomputer=None,
2136 2138 ):
2137 2139 """internal function to add revisions to the log
2138 2140
2139 2141 see addrevision for argument descriptions.
2140 2142
2141 2143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2142 2144
2143 2145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2144 2146 be used.
2145 2147
2146 2148 invariants:
2147 2149 - rawtext is optional (can be None); if not set, cachedelta must be set.
2148 2150 if both are set, they must correspond to each other.
2149 2151 """
2150 2152 if node == nullid:
2151 2153 raise error.RevlogError(
2152 2154 _(b"%s: attempt to add null revision") % self.indexfile
2153 2155 )
2154 2156 if node == wdirid or node in wdirfilenodeids:
2155 2157 raise error.RevlogError(
2156 2158 _(b"%s: attempt to add wdir revision") % self.indexfile
2157 2159 )
2158 2160
2159 2161 if self._inline:
2160 2162 fh = ifh
2161 2163 else:
2162 2164 fh = dfh
2163 2165
2164 2166 btext = [rawtext]
2165 2167
2166 2168 curr = len(self)
2167 2169 prev = curr - 1
2168 2170 offset = self.end(prev)
2169 2171 p1r, p2r = self.rev(p1), self.rev(p2)
2170 2172
2171 2173 # full versions are inserted when the needed deltas
2172 2174 # become comparable to the uncompressed text
2173 2175 if rawtext is None:
2174 2176 # need rawtext size, before changed by flag processors, which is
2175 2177 # the non-raw size. use revlog explicitly to avoid filelog's extra
2176 2178 # logic that might remove metadata size.
2177 2179 textlen = mdiff.patchedsize(
2178 2180 revlog.size(self, cachedelta[0]), cachedelta[1]
2179 2181 )
2180 2182 else:
2181 2183 textlen = len(rawtext)
2182 2184
2183 2185 if deltacomputer is None:
2184 2186 deltacomputer = deltautil.deltacomputer(self)
2185 2187
2186 2188 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2187 2189
2188 2190 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2189 2191
2190 2192 e = (
2191 2193 offset_type(offset, flags),
2192 2194 deltainfo.deltalen,
2193 2195 textlen,
2194 2196 deltainfo.base,
2195 2197 link,
2196 2198 p1r,
2197 2199 p2r,
2198 2200 node,
2199 2201 )
2200 2202 self.index.append(e)
2201 2203 self.nodemap[node] = curr
2202 2204
2203 2205 # Reset the pure node cache start lookup offset to account for new
2204 2206 # revision.
2205 2207 if self._nodepos is not None:
2206 2208 self._nodepos = curr
2207 2209
2208 2210 entry = self._io.packentry(e, self.node, self.version, curr)
2209 2211 self._writeentry(
2210 2212 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2211 2213 )
2212 2214
2213 2215 rawtext = btext[0]
2214 2216
2215 2217 if alwayscache and rawtext is None:
2216 2218 rawtext = deltacomputer.buildtext(revinfo, fh)
2217 2219
2218 2220 if type(rawtext) == bytes: # only accept immutable objects
2219 2221 self._revisioncache = (node, curr, rawtext)
2220 2222 self._chainbasecache[curr] = deltainfo.chainbase
2221 2223 return node
2222 2224
2223 2225 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2224 2226 # Files opened in a+ mode have inconsistent behavior on various
2225 2227 # platforms. Windows requires that a file positioning call be made
2226 2228 # when the file handle transitions between reads and writes. See
2227 2229 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2228 2230 # platforms, Python or the platform itself can be buggy. Some versions
2229 2231 # of Solaris have been observed to not append at the end of the file
2230 2232 # if the file was seeked to before the end. See issue4943 for more.
2231 2233 #
2232 2234 # We work around this issue by inserting a seek() before writing.
2233 2235 # Note: This is likely not necessary on Python 3. However, because
2234 2236 # the file handle is reused for reads and may be seeked there, we need
2235 2237 # to be careful before changing this.
2236 2238 ifh.seek(0, os.SEEK_END)
2237 2239 if dfh:
2238 2240 dfh.seek(0, os.SEEK_END)
2239 2241
2240 2242 curr = len(self) - 1
2241 2243 if not self._inline:
2242 2244 transaction.add(self.datafile, offset)
2243 2245 transaction.add(self.indexfile, curr * len(entry))
2244 2246 if data[0]:
2245 2247 dfh.write(data[0])
2246 2248 dfh.write(data[1])
2247 2249 ifh.write(entry)
2248 2250 else:
2249 2251 offset += curr * self._io.size
2250 2252 transaction.add(self.indexfile, offset, curr)
2251 2253 ifh.write(entry)
2252 2254 ifh.write(data[0])
2253 2255 ifh.write(data[1])
2254 2256 self._enforceinlinesize(transaction, ifh)
2255 2257
2256 2258 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2257 2259 """
2258 2260 add a delta group
2259 2261
2260 2262 given a set of deltas, add them to the revision log. the
2261 2263 first delta is against its parent, which should be in our
2262 2264 log, the rest are against the previous delta.
2263 2265
2264 2266 If ``addrevisioncb`` is defined, it will be called with arguments of
2265 2267 this revlog and the node that was added.
2266 2268 """
2267 2269
2268 2270 if self._writinghandles:
2269 2271 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2270 2272
2271 2273 nodes = []
2272 2274
2273 2275 r = len(self)
2274 2276 end = 0
2275 2277 if r:
2276 2278 end = self.end(r - 1)
2277 2279 ifh = self._indexfp(b"a+")
2278 2280 isize = r * self._io.size
2279 2281 if self._inline:
2280 2282 transaction.add(self.indexfile, end + isize, r)
2281 2283 dfh = None
2282 2284 else:
2283 2285 transaction.add(self.indexfile, isize, r)
2284 2286 transaction.add(self.datafile, end)
2285 2287 dfh = self._datafp(b"a+")
2286 2288
2287 2289 def flush():
2288 2290 if dfh:
2289 2291 dfh.flush()
2290 2292 ifh.flush()
2291 2293
2292 2294 self._writinghandles = (ifh, dfh)
2293 2295
2294 2296 try:
2295 2297 deltacomputer = deltautil.deltacomputer(self)
2296 2298 # loop through our set of deltas
2297 2299 for data in deltas:
2298 2300 node, p1, p2, linknode, deltabase, delta, flags = data
2299 2301 link = linkmapper(linknode)
2300 2302 flags = flags or REVIDX_DEFAULT_FLAGS
2301 2303
2302 2304 nodes.append(node)
2303 2305
2304 2306 if node in self.nodemap:
2305 2307 self._nodeduplicatecallback(transaction, node)
2306 2308 # this can happen if two branches make the same change
2307 2309 continue
2308 2310
2309 2311 for p in (p1, p2):
2310 2312 if p not in self.nodemap:
2311 2313 raise error.LookupError(
2312 2314 p, self.indexfile, _(b'unknown parent')
2313 2315 )
2314 2316
2315 2317 if deltabase not in self.nodemap:
2316 2318 raise error.LookupError(
2317 2319 deltabase, self.indexfile, _(b'unknown delta base')
2318 2320 )
2319 2321
2320 2322 baserev = self.rev(deltabase)
2321 2323
2322 2324 if baserev != nullrev and self.iscensored(baserev):
2323 2325 # if base is censored, delta must be full replacement in a
2324 2326 # single patch operation
2325 2327 hlen = struct.calcsize(b">lll")
2326 2328 oldlen = self.rawsize(baserev)
2327 2329 newlen = len(delta) - hlen
2328 2330 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2329 2331 raise error.CensoredBaseError(
2330 2332 self.indexfile, self.node(baserev)
2331 2333 )
2332 2334
2333 2335 if not flags and self._peek_iscensored(baserev, delta, flush):
2334 2336 flags |= REVIDX_ISCENSORED
2335 2337
2336 2338 # We assume consumers of addrevisioncb will want to retrieve
2337 2339 # the added revision, which will require a call to
2338 2340 # revision(). revision() will fast path if there is a cache
2339 2341 # hit. So, we tell _addrevision() to always cache in this case.
2340 2342 # We're only using addgroup() in the context of changegroup
2341 2343 # generation so the revision data can always be handled as raw
2342 2344 # by the flagprocessor.
2343 2345 self._addrevision(
2344 2346 node,
2345 2347 None,
2346 2348 transaction,
2347 2349 link,
2348 2350 p1,
2349 2351 p2,
2350 2352 flags,
2351 2353 (baserev, delta),
2352 2354 ifh,
2353 2355 dfh,
2354 2356 alwayscache=bool(addrevisioncb),
2355 2357 deltacomputer=deltacomputer,
2356 2358 )
2357 2359
2358 2360 if addrevisioncb:
2359 2361 addrevisioncb(self, node)
2360 2362
2361 2363 if not dfh and not self._inline:
2362 2364 # addrevision switched from inline to conventional
2363 2365 # reopen the index
2364 2366 ifh.close()
2365 2367 dfh = self._datafp(b"a+")
2366 2368 ifh = self._indexfp(b"a+")
2367 2369 self._writinghandles = (ifh, dfh)
2368 2370 finally:
2369 2371 self._writinghandles = None
2370 2372
2371 2373 if dfh:
2372 2374 dfh.close()
2373 2375 ifh.close()
2374 2376
2375 2377 return nodes
2376 2378
2377 2379 def iscensored(self, rev):
2378 2380 """Check if a file revision is censored."""
2379 2381 if not self._censorable:
2380 2382 return False
2381 2383
2382 2384 return self.flags(rev) & REVIDX_ISCENSORED
2383 2385
2384 2386 def _peek_iscensored(self, baserev, delta, flush):
2385 2387 """Quickly check if a delta produces a censored revision."""
2386 2388 if not self._censorable:
2387 2389 return False
2388 2390
2389 2391 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2390 2392
2391 2393 def getstrippoint(self, minlink):
2392 2394 """find the minimum rev that must be stripped to strip the linkrev
2393 2395
2394 2396 Returns a tuple containing the minimum rev and a set of all revs that
2395 2397 have linkrevs that will be broken by this strip.
2396 2398 """
2397 2399 return storageutil.resolvestripinfo(
2398 2400 minlink,
2399 2401 len(self) - 1,
2400 2402 self.headrevs(),
2401 2403 self.linkrev,
2402 2404 self.parentrevs,
2403 2405 )
2404 2406
2405 2407 def strip(self, minlink, transaction):
2406 2408 """truncate the revlog on the first revision with a linkrev >= minlink
2407 2409
2408 2410 This function is called when we're stripping revision minlink and
2409 2411 its descendants from the repository.
2410 2412
2411 2413 We have to remove all revisions with linkrev >= minlink, because
2412 2414 the equivalent changelog revisions will be renumbered after the
2413 2415 strip.
2414 2416
2415 2417 So we truncate the revlog on the first of these revisions, and
2416 2418 trust that the caller has saved the revisions that shouldn't be
2417 2419 removed and that it'll re-add them after this truncation.
2418 2420 """
2419 2421 if len(self) == 0:
2420 2422 return
2421 2423
2422 2424 rev, _ = self.getstrippoint(minlink)
2423 2425 if rev == len(self):
2424 2426 return
2425 2427
2426 2428 # first truncate the files on disk
2427 2429 end = self.start(rev)
2428 2430 if not self._inline:
2429 2431 transaction.add(self.datafile, end)
2430 2432 end = rev * self._io.size
2431 2433 else:
2432 2434 end += rev * self._io.size
2433 2435
2434 2436 transaction.add(self.indexfile, end)
2435 2437
2436 2438 # then reset internal state in memory to forget those revisions
2437 2439 self._revisioncache = None
2438 2440 self._chaininfocache = {}
2439 2441 self._chunkclear()
2440 2442 for x in pycompat.xrange(rev, len(self)):
2441 2443 del self.nodemap[self.node(x)]
2442 2444
2443 2445 del self.index[rev:-1]
2444 2446 self._nodepos = None
2445 2447
2446 2448 def checksize(self):
2447 2449 """Check size of index and data files
2448 2450
2449 2451 return a (dd, di) tuple.
2450 2452 - dd: extra bytes for the "data" file
2451 2453 - di: extra bytes for the "index" file
2452 2454
2453 2455 A healthy revlog will return (0, 0).
2454 2456 """
2455 2457 expected = 0
2456 2458 if len(self):
2457 2459 expected = max(0, self.end(len(self) - 1))
2458 2460
2459 2461 try:
2460 2462 with self._datafp() as f:
2461 2463 f.seek(0, io.SEEK_END)
2462 2464 actual = f.tell()
2463 2465 dd = actual - expected
2464 2466 except IOError as inst:
2465 2467 if inst.errno != errno.ENOENT:
2466 2468 raise
2467 2469 dd = 0
2468 2470
2469 2471 try:
2470 2472 f = self.opener(self.indexfile)
2471 2473 f.seek(0, io.SEEK_END)
2472 2474 actual = f.tell()
2473 2475 f.close()
2474 2476 s = self._io.size
2475 2477 i = max(0, actual // s)
2476 2478 di = actual - (i * s)
2477 2479 if self._inline:
2478 2480 databytes = 0
2479 2481 for r in self:
2480 2482 databytes += max(0, self.length(r))
2481 2483 dd = 0
2482 2484 di = actual - len(self) * s - databytes
2483 2485 except IOError as inst:
2484 2486 if inst.errno != errno.ENOENT:
2485 2487 raise
2486 2488 di = 0
2487 2489
2488 2490 return (dd, di)
2489 2491
2490 2492 def files(self):
2491 2493 res = [self.indexfile]
2492 2494 if not self._inline:
2493 2495 res.append(self.datafile)
2494 2496 return res
2495 2497
2496 2498 def emitrevisions(
2497 2499 self,
2498 2500 nodes,
2499 2501 nodesorder=None,
2500 2502 revisiondata=False,
2501 2503 assumehaveparentrevisions=False,
2502 2504 deltamode=repository.CG_DELTAMODE_STD,
2503 2505 ):
2504 2506 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2505 2507 raise error.ProgrammingError(
2506 2508 b'unhandled value for nodesorder: %s' % nodesorder
2507 2509 )
2508 2510
2509 2511 if nodesorder is None and not self._generaldelta:
2510 2512 nodesorder = b'storage'
2511 2513
2512 2514 if (
2513 2515 not self._storedeltachains
2514 2516 and deltamode != repository.CG_DELTAMODE_PREV
2515 2517 ):
2516 2518 deltamode = repository.CG_DELTAMODE_FULL
2517 2519
2518 2520 return storageutil.emitrevisions(
2519 2521 self,
2520 2522 nodes,
2521 2523 nodesorder,
2522 2524 revlogrevisiondelta,
2523 2525 deltaparentfn=self.deltaparent,
2524 2526 candeltafn=self.candelta,
2525 2527 rawsizefn=self.rawsize,
2526 2528 revdifffn=self.revdiff,
2527 2529 flagsfn=self.flags,
2528 2530 deltamode=deltamode,
2529 2531 revisiondata=revisiondata,
2530 2532 assumehaveparentrevisions=assumehaveparentrevisions,
2531 2533 )
2532 2534
2533 2535 DELTAREUSEALWAYS = b'always'
2534 2536 DELTAREUSESAMEREVS = b'samerevs'
2535 2537 DELTAREUSENEVER = b'never'
2536 2538
2537 2539 DELTAREUSEFULLADD = b'fulladd'
2538 2540
2539 2541 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2540 2542
2541 2543 def clone(
2542 2544 self,
2543 2545 tr,
2544 2546 destrevlog,
2545 2547 addrevisioncb=None,
2546 2548 deltareuse=DELTAREUSESAMEREVS,
2547 2549 forcedeltabothparents=None,
2548 2550 sidedatacompanion=None,
2549 2551 ):
2550 2552 """Copy this revlog to another, possibly with format changes.
2551 2553
2552 2554 The destination revlog will contain the same revisions and nodes.
2553 2555 However, it may not be bit-for-bit identical due to e.g. delta encoding
2554 2556 differences.
2555 2557
2556 2558 The ``deltareuse`` argument control how deltas from the existing revlog
2557 2559 are preserved in the destination revlog. The argument can have the
2558 2560 following values:
2559 2561
2560 2562 DELTAREUSEALWAYS
2561 2563 Deltas will always be reused (if possible), even if the destination
2562 2564 revlog would not select the same revisions for the delta. This is the
2563 2565 fastest mode of operation.
2564 2566 DELTAREUSESAMEREVS
2565 2567 Deltas will be reused if the destination revlog would pick the same
2566 2568 revisions for the delta. This mode strikes a balance between speed
2567 2569 and optimization.
2568 2570 DELTAREUSENEVER
2569 2571 Deltas will never be reused. This is the slowest mode of execution.
2570 2572 This mode can be used to recompute deltas (e.g. if the diff/delta
2571 2573 algorithm changes).
2572 2574 DELTAREUSEFULLADD
2573 2575 Revision will be re-added as if their were new content. This is
2574 2576 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2575 2577 eg: large file detection and handling.
2576 2578
2577 2579 Delta computation can be slow, so the choice of delta reuse policy can
2578 2580 significantly affect run time.
2579 2581
2580 2582 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2581 2583 two extremes. Deltas will be reused if they are appropriate. But if the
2582 2584 delta could choose a better revision, it will do so. This means if you
2583 2585 are converting a non-generaldelta revlog to a generaldelta revlog,
2584 2586 deltas will be recomputed if the delta's parent isn't a parent of the
2585 2587 revision.
2586 2588
2587 2589 In addition to the delta policy, the ``forcedeltabothparents``
2588 2590 argument controls whether to force compute deltas against both parents
2589 2591 for merges. By default, the current default is used.
2590 2592
2591 2593 If not None, the `sidedatacompanion` is callable that accept two
2592 2594 arguments:
2593 2595
2594 2596 (srcrevlog, rev)
2595 2597
2596 2598 and return a triplet that control changes to sidedata content from the
2597 2599 old revision to the new clone result:
2598 2600
2599 2601 (dropall, filterout, update)
2600 2602
2601 2603 * if `dropall` is True, all sidedata should be dropped
2602 2604 * `filterout` is a set of sidedata keys that should be dropped
2603 2605 * `update` is a mapping of additionnal/new key -> value
2604 2606 """
2605 2607 if deltareuse not in self.DELTAREUSEALL:
2606 2608 raise ValueError(
2607 2609 _(b'value for deltareuse invalid: %s') % deltareuse
2608 2610 )
2609 2611
2610 2612 if len(destrevlog):
2611 2613 raise ValueError(_(b'destination revlog is not empty'))
2612 2614
2613 2615 if getattr(self, 'filteredrevs', None):
2614 2616 raise ValueError(_(b'source revlog has filtered revisions'))
2615 2617 if getattr(destrevlog, 'filteredrevs', None):
2616 2618 raise ValueError(_(b'destination revlog has filtered revisions'))
2617 2619
2618 2620 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2619 2621 # if possible.
2620 2622 oldlazydelta = destrevlog._lazydelta
2621 2623 oldlazydeltabase = destrevlog._lazydeltabase
2622 2624 oldamd = destrevlog._deltabothparents
2623 2625
2624 2626 try:
2625 2627 if deltareuse == self.DELTAREUSEALWAYS:
2626 2628 destrevlog._lazydeltabase = True
2627 2629 destrevlog._lazydelta = True
2628 2630 elif deltareuse == self.DELTAREUSESAMEREVS:
2629 2631 destrevlog._lazydeltabase = False
2630 2632 destrevlog._lazydelta = True
2631 2633 elif deltareuse == self.DELTAREUSENEVER:
2632 2634 destrevlog._lazydeltabase = False
2633 2635 destrevlog._lazydelta = False
2634 2636
2635 2637 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2636 2638
2637 2639 self._clone(
2638 2640 tr,
2639 2641 destrevlog,
2640 2642 addrevisioncb,
2641 2643 deltareuse,
2642 2644 forcedeltabothparents,
2643 2645 sidedatacompanion,
2644 2646 )
2645 2647
2646 2648 finally:
2647 2649 destrevlog._lazydelta = oldlazydelta
2648 2650 destrevlog._lazydeltabase = oldlazydeltabase
2649 2651 destrevlog._deltabothparents = oldamd
2650 2652
2651 2653 def _clone(
2652 2654 self,
2653 2655 tr,
2654 2656 destrevlog,
2655 2657 addrevisioncb,
2656 2658 deltareuse,
2657 2659 forcedeltabothparents,
2658 2660 sidedatacompanion,
2659 2661 ):
2660 2662 """perform the core duty of `revlog.clone` after parameter processing"""
2661 2663 deltacomputer = deltautil.deltacomputer(destrevlog)
2662 2664 index = self.index
2663 2665 for rev in self:
2664 2666 entry = index[rev]
2665 2667
2666 2668 # Some classes override linkrev to take filtered revs into
2667 2669 # account. Use raw entry from index.
2668 2670 flags = entry[0] & 0xFFFF
2669 2671 linkrev = entry[4]
2670 2672 p1 = index[entry[5]][7]
2671 2673 p2 = index[entry[6]][7]
2672 2674 node = entry[7]
2673 2675
2674 2676 sidedataactions = (False, [], {})
2675 2677 if sidedatacompanion is not None:
2676 2678 sidedataactions = sidedatacompanion(self, rev)
2677 2679
2678 2680 # (Possibly) reuse the delta from the revlog if allowed and
2679 2681 # the revlog chunk is a delta.
2680 2682 cachedelta = None
2681 2683 rawtext = None
2682 2684 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2683 2685 dropall, filterout, update = sidedataactions
2684 2686 text, sidedata = self._revisiondata(rev)
2685 2687 if dropall:
2686 2688 sidedata = {}
2687 2689 for key in filterout:
2688 2690 sidedata.pop(key, None)
2689 2691 sidedata.update(update)
2690 2692 if not sidedata:
2691 2693 sidedata = None
2692 2694 destrevlog.addrevision(
2693 2695 text,
2694 2696 tr,
2695 2697 linkrev,
2696 2698 p1,
2697 2699 p2,
2698 2700 cachedelta=cachedelta,
2699 2701 node=node,
2700 2702 flags=flags,
2701 2703 deltacomputer=deltacomputer,
2702 2704 sidedata=sidedata,
2703 2705 )
2704 2706 else:
2705 2707 if destrevlog._lazydelta:
2706 2708 dp = self.deltaparent(rev)
2707 2709 if dp != nullrev:
2708 2710 cachedelta = (dp, bytes(self._chunk(rev)))
2709 2711
2710 2712 if not cachedelta:
2711 2713 rawtext = self.rawdata(rev)
2712 2714
2713 2715 ifh = destrevlog.opener(
2714 2716 destrevlog.indexfile, b'a+', checkambig=False
2715 2717 )
2716 2718 dfh = None
2717 2719 if not destrevlog._inline:
2718 2720 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2719 2721 try:
2720 2722 destrevlog._addrevision(
2721 2723 node,
2722 2724 rawtext,
2723 2725 tr,
2724 2726 linkrev,
2725 2727 p1,
2726 2728 p2,
2727 2729 flags,
2728 2730 cachedelta,
2729 2731 ifh,
2730 2732 dfh,
2731 2733 deltacomputer=deltacomputer,
2732 2734 )
2733 2735 finally:
2734 2736 if dfh:
2735 2737 dfh.close()
2736 2738 ifh.close()
2737 2739
2738 2740 if addrevisioncb:
2739 2741 addrevisioncb(self, rev, node)
2740 2742
2741 2743 def censorrevision(self, tr, censornode, tombstone=b''):
2742 2744 if (self.version & 0xFFFF) == REVLOGV0:
2743 2745 raise error.RevlogError(
2744 2746 _(b'cannot censor with version %d revlogs') % self.version
2745 2747 )
2746 2748
2747 2749 censorrev = self.rev(censornode)
2748 2750 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2749 2751
2750 2752 if len(tombstone) > self.rawsize(censorrev):
2751 2753 raise error.Abort(
2752 2754 _(b'censor tombstone must be no longer than censored data')
2753 2755 )
2754 2756
2755 2757 # Rewriting the revlog in place is hard. Our strategy for censoring is
2756 2758 # to create a new revlog, copy all revisions to it, then replace the
2757 2759 # revlogs on transaction close.
2758 2760
2759 2761 newindexfile = self.indexfile + b'.tmpcensored'
2760 2762 newdatafile = self.datafile + b'.tmpcensored'
2761 2763
2762 2764 # This is a bit dangerous. We could easily have a mismatch of state.
2763 2765 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2764 2766 newrl.version = self.version
2765 2767 newrl._generaldelta = self._generaldelta
2766 2768 newrl._io = self._io
2767 2769
2768 2770 for rev in self.revs():
2769 2771 node = self.node(rev)
2770 2772 p1, p2 = self.parents(node)
2771 2773
2772 2774 if rev == censorrev:
2773 2775 newrl.addrawrevision(
2774 2776 tombstone,
2775 2777 tr,
2776 2778 self.linkrev(censorrev),
2777 2779 p1,
2778 2780 p2,
2779 2781 censornode,
2780 2782 REVIDX_ISCENSORED,
2781 2783 )
2782 2784
2783 2785 if newrl.deltaparent(rev) != nullrev:
2784 2786 raise error.Abort(
2785 2787 _(
2786 2788 b'censored revision stored as delta; '
2787 2789 b'cannot censor'
2788 2790 ),
2789 2791 hint=_(
2790 2792 b'censoring of revlogs is not '
2791 2793 b'fully implemented; please report '
2792 2794 b'this bug'
2793 2795 ),
2794 2796 )
2795 2797 continue
2796 2798
2797 2799 if self.iscensored(rev):
2798 2800 if self.deltaparent(rev) != nullrev:
2799 2801 raise error.Abort(
2800 2802 _(
2801 2803 b'cannot censor due to censored '
2802 2804 b'revision having delta stored'
2803 2805 )
2804 2806 )
2805 2807 rawtext = self._chunk(rev)
2806 2808 else:
2807 2809 rawtext = self.rawdata(rev)
2808 2810
2809 2811 newrl.addrawrevision(
2810 2812 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2811 2813 )
2812 2814
2813 2815 tr.addbackup(self.indexfile, location=b'store')
2814 2816 if not self._inline:
2815 2817 tr.addbackup(self.datafile, location=b'store')
2816 2818
2817 2819 self.opener.rename(newrl.indexfile, self.indexfile)
2818 2820 if not self._inline:
2819 2821 self.opener.rename(newrl.datafile, self.datafile)
2820 2822
2821 2823 self.clearcaches()
2822 2824 self._loadindex()
2823 2825
2824 2826 def verifyintegrity(self, state):
2825 2827 """Verifies the integrity of the revlog.
2826 2828
2827 2829 Yields ``revlogproblem`` instances describing problems that are
2828 2830 found.
2829 2831 """
2830 2832 dd, di = self.checksize()
2831 2833 if dd:
2832 2834 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2833 2835 if di:
2834 2836 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2835 2837
2836 2838 version = self.version & 0xFFFF
2837 2839
2838 2840 # The verifier tells us what version revlog we should be.
2839 2841 if version != state[b'expectedversion']:
2840 2842 yield revlogproblem(
2841 2843 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2842 2844 % (self.indexfile, version, state[b'expectedversion'])
2843 2845 )
2844 2846
2845 2847 state[b'skipread'] = set()
2846 2848
2847 2849 for rev in self:
2848 2850 node = self.node(rev)
2849 2851
2850 2852 # Verify contents. 4 cases to care about:
2851 2853 #
2852 2854 # common: the most common case
2853 2855 # rename: with a rename
2854 2856 # meta: file content starts with b'\1\n', the metadata
2855 2857 # header defined in filelog.py, but without a rename
2856 2858 # ext: content stored externally
2857 2859 #
2858 2860 # More formally, their differences are shown below:
2859 2861 #
2860 2862 # | common | rename | meta | ext
2861 2863 # -------------------------------------------------------
2862 2864 # flags() | 0 | 0 | 0 | not 0
2863 2865 # renamed() | False | True | False | ?
2864 2866 # rawtext[0:2]=='\1\n'| False | True | True | ?
2865 2867 #
2866 2868 # "rawtext" means the raw text stored in revlog data, which
2867 2869 # could be retrieved by "rawdata(rev)". "text"
2868 2870 # mentioned below is "revision(rev)".
2869 2871 #
2870 2872 # There are 3 different lengths stored physically:
2871 2873 # 1. L1: rawsize, stored in revlog index
2872 2874 # 2. L2: len(rawtext), stored in revlog data
2873 2875 # 3. L3: len(text), stored in revlog data if flags==0, or
2874 2876 # possibly somewhere else if flags!=0
2875 2877 #
2876 2878 # L1 should be equal to L2. L3 could be different from them.
2877 2879 # "text" may or may not affect commit hash depending on flag
2878 2880 # processors (see flagutil.addflagprocessor).
2879 2881 #
2880 2882 # | common | rename | meta | ext
2881 2883 # -------------------------------------------------
2882 2884 # rawsize() | L1 | L1 | L1 | L1
2883 2885 # size() | L1 | L2-LM | L1(*) | L1 (?)
2884 2886 # len(rawtext) | L2 | L2 | L2 | L2
2885 2887 # len(text) | L2 | L2 | L2 | L3
2886 2888 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2887 2889 #
2888 2890 # LM: length of metadata, depending on rawtext
2889 2891 # (*): not ideal, see comment in filelog.size
2890 2892 # (?): could be "- len(meta)" if the resolved content has
2891 2893 # rename metadata
2892 2894 #
2893 2895 # Checks needed to be done:
2894 2896 # 1. length check: L1 == L2, in all cases.
2895 2897 # 2. hash check: depending on flag processor, we may need to
2896 2898 # use either "text" (external), or "rawtext" (in revlog).
2897 2899
2898 2900 try:
2899 2901 skipflags = state.get(b'skipflags', 0)
2900 2902 if skipflags:
2901 2903 skipflags &= self.flags(rev)
2902 2904
2903 2905 if skipflags:
2904 2906 state[b'skipread'].add(node)
2905 2907 else:
2906 2908 # Side-effect: read content and verify hash.
2907 2909 self.revision(node)
2908 2910
2909 2911 l1 = self.rawsize(rev)
2910 2912 l2 = len(self.rawdata(node))
2911 2913
2912 2914 if l1 != l2:
2913 2915 yield revlogproblem(
2914 2916 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2915 2917 node=node,
2916 2918 )
2917 2919
2918 2920 except error.CensoredNodeError:
2919 2921 if state[b'erroroncensored']:
2920 2922 yield revlogproblem(
2921 2923 error=_(b'censored file data'), node=node
2922 2924 )
2923 2925 state[b'skipread'].add(node)
2924 2926 except Exception as e:
2925 2927 yield revlogproblem(
2926 2928 error=_(b'unpacking %s: %s')
2927 2929 % (short(node), stringutil.forcebytestr(e)),
2928 2930 node=node,
2929 2931 )
2930 2932 state[b'skipread'].add(node)
2931 2933
2932 2934 def storageinfo(
2933 2935 self,
2934 2936 exclusivefiles=False,
2935 2937 sharedfiles=False,
2936 2938 revisionscount=False,
2937 2939 trackedsize=False,
2938 2940 storedsize=False,
2939 2941 ):
2940 2942 d = {}
2941 2943
2942 2944 if exclusivefiles:
2943 2945 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2944 2946 if not self._inline:
2945 2947 d[b'exclusivefiles'].append((self.opener, self.datafile))
2946 2948
2947 2949 if sharedfiles:
2948 2950 d[b'sharedfiles'] = []
2949 2951
2950 2952 if revisionscount:
2951 2953 d[b'revisionscount'] = len(self)
2952 2954
2953 2955 if trackedsize:
2954 2956 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2955 2957
2956 2958 if storedsize:
2957 2959 d[b'storedsize'] = sum(
2958 2960 self.opener.stat(path).st_size for path in self.files()
2959 2961 )
2960 2962
2961 2963 return d
@@ -0,0 +1,14 b''
1 # mercurial.revlogutils -- basic utilities for revlog
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9 from .. import error
10
11
12 class NodeMap(dict):
13 def __missing__(self, x):
14 raise error.RevlogError('unknown node: %s' % x)
General Comments 0
You need to be logged in to leave comments. Login now