##// END OF EJS Templates
revlogv2: temporarily forbid inline revlogs...
Raphaël Gomès -
r47450:c8bb7b89 default
parent child Browse files
Show More
@@ -1,3203 +1,3207 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86 from .pure import parsers as pureparsers
87 87
88 88 # blanked usage of all the name to prevent pyflakes constraints
89 89 # We need these name available in the module for extensions.
90 90 REVLOGV0
91 91 REVLOGV1
92 92 REVLOGV2
93 93 FLAG_INLINE_DATA
94 94 FLAG_GENERALDELTA
95 95 REVLOG_DEFAULT_FLAGS
96 96 REVLOG_DEFAULT_FORMAT
97 97 REVLOG_DEFAULT_VERSION
98 98 REVLOGV1_FLAGS
99 99 REVLOGV2_FLAGS
100 100 REVIDX_ISCENSORED
101 101 REVIDX_ELLIPSIS
102 102 REVIDX_SIDEDATA
103 103 REVIDX_HASCOPIESINFO
104 104 REVIDX_EXTSTORED
105 105 REVIDX_DEFAULT_FLAGS
106 106 REVIDX_FLAGS_ORDER
107 107 REVIDX_RAWTEXT_CHANGING_FLAGS
108 108
109 109 parsers = policy.importmod('parsers')
110 110 rustancestor = policy.importrust('ancestor')
111 111 rustdagop = policy.importrust('dagop')
112 112 rustrevlog = policy.importrust('revlog')
113 113
114 114 # Aliased for performance.
115 115 _zlibdecompress = zlib.decompress
116 116
117 117 # max size of revlog with inline data
118 118 _maxinline = 131072
119 119 _chunksize = 1048576
120 120
121 121 # Flag processors for REVIDX_ELLIPSIS.
122 122 def ellipsisreadprocessor(rl, text):
123 123 return text, False
124 124
125 125
126 126 def ellipsiswriteprocessor(rl, text):
127 127 return text, False
128 128
129 129
130 130 def ellipsisrawprocessor(rl, text):
131 131 return False
132 132
133 133
134 134 ellipsisprocessor = (
135 135 ellipsisreadprocessor,
136 136 ellipsiswriteprocessor,
137 137 ellipsisrawprocessor,
138 138 )
139 139
140 140
141 141 def getoffset(q):
142 142 return int(q >> 16)
143 143
144 144
145 145 def gettype(q):
146 146 return int(q & 0xFFFF)
147 147
148 148
149 149 def offset_type(offset, type):
150 150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 151 raise ValueError(b'unknown revlog index flags')
152 152 return int(int(offset) << 16 | type)
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @attr.s(slots=True, frozen=True)
176 176 class _revisioninfo(object):
177 177 """Information about a revision that allows building its fulltext
178 178 node: expected hash of the revision
179 179 p1, p2: parent revs of the revision
180 180 btext: built text cache consisting of a one-element list
181 181 cachedelta: (baserev, uncompressed_delta) or None
182 182 flags: flags associated to the revision storage
183 183
184 184 One of btext[0] or cachedelta must be set.
185 185 """
186 186
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195
196 196 @interfaceutil.implementer(repository.irevisiondelta)
197 197 @attr.s(slots=True)
198 198 class revlogrevisiondelta(object):
199 199 node = attr.ib()
200 200 p1node = attr.ib()
201 201 p2node = attr.ib()
202 202 basenode = attr.ib()
203 203 flags = attr.ib()
204 204 baserevisionsize = attr.ib()
205 205 revision = attr.ib()
206 206 delta = attr.ib()
207 207 sidedata = attr.ib()
208 208 linknode = attr.ib(default=None)
209 209
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218
219 219 # index v0:
220 220 # 4 bytes: offset
221 221 # 4 bytes: compressed length
222 222 # 4 bytes: base rev
223 223 # 4 bytes: link rev
224 224 # 20 bytes: parent 1 nodeid
225 225 # 20 bytes: parent 2 nodeid
226 226 # 20 bytes: nodeid
227 227 indexformatv0 = struct.Struct(b">4l20s20s20s")
228 228 indexformatv0_pack = indexformatv0.pack
229 229 indexformatv0_unpack = indexformatv0.unpack
230 230
231 231
232 232 class revlogoldindex(list):
233 233 @property
234 234 def nodemap(self):
235 235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
236 236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
237 237 return self._nodemap
238 238
239 239 @util.propertycache
240 240 def _nodemap(self):
241 241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
242 242 for r in range(0, len(self)):
243 243 n = self[r][7]
244 244 nodemap[n] = r
245 245 return nodemap
246 246
247 247 def has_node(self, node):
248 248 """return True if the node exist in the index"""
249 249 return node in self._nodemap
250 250
251 251 def rev(self, node):
252 252 """return a revision for a node
253 253
254 254 If the node is unknown, raise a RevlogError"""
255 255 return self._nodemap[node]
256 256
257 257 def get_rev(self, node):
258 258 """return a revision for a node
259 259
260 260 If the node is unknown, return None"""
261 261 return self._nodemap.get(node)
262 262
263 263 def append(self, tup):
264 264 self._nodemap[tup[7]] = len(self)
265 265 super(revlogoldindex, self).append(tup)
266 266
267 267 def __delitem__(self, i):
268 268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
269 269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
270 270 for r in pycompat.xrange(i.start, len(self)):
271 271 del self._nodemap[self[r][7]]
272 272 super(revlogoldindex, self).__delitem__(i)
273 273
274 274 def clearcaches(self):
275 275 self.__dict__.pop('_nodemap', None)
276 276
277 277 def __getitem__(self, i):
278 278 if i == -1:
279 279 return (0, 0, 0, -1, -1, -1, -1, nullid)
280 280 return list.__getitem__(self, i)
281 281
282 282
283 283 class revlogoldio(object):
284 284 def __init__(self):
285 285 self.size = indexformatv0.size
286 286
287 287 def parseindex(self, data, inline):
288 288 s = self.size
289 289 index = []
290 290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
291 291 n = off = 0
292 292 l = len(data)
293 293 while off + s <= l:
294 294 cur = data[off : off + s]
295 295 off += s
296 296 e = indexformatv0_unpack(cur)
297 297 # transform to revlogv1 format
298 298 e2 = (
299 299 offset_type(e[0], 0),
300 300 e[1],
301 301 -1,
302 302 e[2],
303 303 e[3],
304 304 nodemap.get(e[4], nullrev),
305 305 nodemap.get(e[5], nullrev),
306 306 e[6],
307 307 )
308 308 index.append(e2)
309 309 nodemap[e[6]] = n
310 310 n += 1
311 311
312 312 index = revlogoldindex(index)
313 313 return index, None
314 314
315 315 def packentry(self, entry, node, version, rev):
316 316 if gettype(entry[0]):
317 317 raise error.RevlogError(
318 318 _(b'index entry flags need revlog version 1')
319 319 )
320 320 e2 = (
321 321 getoffset(entry[0]),
322 322 entry[1],
323 323 entry[3],
324 324 entry[4],
325 325 node(entry[5]),
326 326 node(entry[6]),
327 327 entry[7],
328 328 )
329 329 return indexformatv0_pack(*e2)
330 330
331 331
332 332 # index ng:
333 333 # 6 bytes: offset
334 334 # 2 bytes: flags
335 335 # 4 bytes: compressed length
336 336 # 4 bytes: uncompressed length
337 337 # 4 bytes: base rev
338 338 # 4 bytes: link rev
339 339 # 4 bytes: parent 1 rev
340 340 # 4 bytes: parent 2 rev
341 341 # 32 bytes: nodeid
342 342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
343 343 indexformatng_pack = indexformatng.pack
344 344 versionformat = struct.Struct(b">I")
345 345 versionformat_pack = versionformat.pack
346 346 versionformat_unpack = versionformat.unpack
347 347
348 348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 349 # signed integer)
350 350 _maxentrysize = 0x7FFFFFFF
351 351
352 352
353 353 class revlogio(object):
354 354 def __init__(self):
355 355 self.size = indexformatng.size
356 356
357 357 def parseindex(self, data, inline):
358 358 # call the C implementation to parse the index data
359 359 index, cache = parsers.parse_index2(data, inline)
360 360 return index, cache
361 361
362 362 def packentry(self, entry, node, version, rev):
363 363 p = indexformatng_pack(*entry)
364 364 if rev == 0:
365 365 p = versionformat_pack(version) + p[4:]
366 366 return p
367 367
368 368
369 369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
370 370 indexformatv2_pack = indexformatv2.pack
371 371
372 372
373 373 class revlogv2io(object):
374 374 def __init__(self):
375 375 self.size = indexformatv2.size
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
379 379 return index, cache
380 380
381 381 def packentry(self, entry, node, version, rev):
382 382 p = indexformatv2_pack(*entry)
383 383 if rev == 0:
384 384 p = versionformat_pack(version) + p[4:]
385 385 return p
386 386
387 387
388 388 NodemapRevlogIO = None
389 389
390 390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
391 391
392 392 class NodemapRevlogIO(revlogio):
393 393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
394 394
395 395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
396 396 """
397 397
398 398 def parseindex(self, data, inline):
399 399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
400 400 return index, cache
401 401
402 402
403 403 class rustrevlogio(revlogio):
404 404 def parseindex(self, data, inline):
405 405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
406 406 return rustrevlog.MixedIndex(index), cache
407 407
408 408
409 409 class revlog(object):
410 410 """
411 411 the underlying revision storage object
412 412
413 413 A revlog consists of two parts, an index and the revision data.
414 414
415 415 The index is a file with a fixed record size containing
416 416 information on each revision, including its nodeid (hash), the
417 417 nodeids of its parents, the position and offset of its data within
418 418 the data file, and the revision it's based on. Finally, each entry
419 419 contains a linkrev entry that can serve as a pointer to external
420 420 data.
421 421
422 422 The revision data itself is a linear collection of data chunks.
423 423 Each chunk represents a revision and is usually represented as a
424 424 delta against the previous chunk. To bound lookup time, runs of
425 425 deltas are limited to about 2 times the length of the original
426 426 version data. This makes retrieval of a version proportional to
427 427 its size, or O(1) relative to the number of revisions.
428 428
429 429 Both pieces of the revlog are written to in an append-only
430 430 fashion, which means we never need to rewrite a file to insert or
431 431 remove data, and can use some simple techniques to avoid the need
432 432 for locking while reading.
433 433
434 434 If checkambig, indexfile is opened with checkambig=True at
435 435 writing, to avoid file stat ambiguity.
436 436
437 437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
438 438 index will be mmapped rather than read if it is larger than the
439 439 configured threshold.
440 440
441 441 If censorable is True, the revlog can have censored revisions.
442 442
443 443 If `upperboundcomp` is not None, this is the expected maximal gain from
444 444 compression for the data content.
445 445
446 446 `concurrencychecker` is an optional function that receives 3 arguments: a
447 447 file handle, a filename, and an expected position. It should check whether
448 448 the current position in the file handle is valid, and log/warn/fail (by
449 449 raising).
450 450 """
451 451
452 452 _flagserrorclass = error.RevlogError
453 453
454 454 def __init__(
455 455 self,
456 456 opener,
457 457 indexfile,
458 458 datafile=None,
459 459 checkambig=False,
460 460 mmaplargeindex=False,
461 461 censorable=False,
462 462 upperboundcomp=None,
463 463 persistentnodemap=False,
464 464 concurrencychecker=None,
465 465 ):
466 466 """
467 467 create a revlog object
468 468
469 469 opener is a function that abstracts the file opening operation
470 470 and can be used to implement COW semantics or the like.
471 471
472 472 """
473 473 self.upperboundcomp = upperboundcomp
474 474 self.indexfile = indexfile
475 475 self.datafile = datafile or (indexfile[:-2] + b".d")
476 476 self.nodemap_file = None
477 477 if persistentnodemap:
478 478 self.nodemap_file = nodemaputil.get_nodemap_file(
479 479 opener, self.indexfile
480 480 )
481 481
482 482 self.opener = opener
483 483 # When True, indexfile is opened with checkambig=True at writing, to
484 484 # avoid file stat ambiguity.
485 485 self._checkambig = checkambig
486 486 self._mmaplargeindex = mmaplargeindex
487 487 self._censorable = censorable
488 488 # 3-tuple of (node, rev, text) for a raw revision.
489 489 self._revisioncache = None
490 490 # Maps rev to chain base rev.
491 491 self._chainbasecache = util.lrucachedict(100)
492 492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
493 493 self._chunkcache = (0, b'')
494 494 # How much data to read and cache into the raw revlog data cache.
495 495 self._chunkcachesize = 65536
496 496 self._maxchainlen = None
497 497 self._deltabothparents = True
498 498 self.index = None
499 499 self._nodemap_docket = None
500 500 # Mapping of partial identifiers to full nodes.
501 501 self._pcache = {}
502 502 # Mapping of revision integer to full node.
503 503 self._compengine = b'zlib'
504 504 self._compengineopts = {}
505 505 self._maxdeltachainspan = -1
506 506 self._withsparseread = False
507 507 self._sparserevlog = False
508 508 self._srdensitythreshold = 0.50
509 509 self._srmingapsize = 262144
510 510
511 511 # Make copy of flag processors so each revlog instance can support
512 512 # custom flags.
513 513 self._flagprocessors = dict(flagutil.flagprocessors)
514 514
515 515 # 2-tuple of file handles being used for active writing.
516 516 self._writinghandles = None
517 517
518 518 self._loadindex()
519 519
520 520 self._concurrencychecker = concurrencychecker
521 521
522 522 def _loadindex(self):
523 523 mmapindexthreshold = None
524 524 opts = self.opener.options
525 525
526 526 if b'revlogv2' in opts:
527 527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
528 528 elif b'revlogv1' in opts:
529 529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
530 530 if b'generaldelta' in opts:
531 531 newversionflags |= FLAG_GENERALDELTA
532 532 elif b'revlogv0' in self.opener.options:
533 533 newversionflags = REVLOGV0
534 534 else:
535 535 newversionflags = REVLOG_DEFAULT_VERSION
536 536
537 537 if b'chunkcachesize' in opts:
538 538 self._chunkcachesize = opts[b'chunkcachesize']
539 539 if b'maxchainlen' in opts:
540 540 self._maxchainlen = opts[b'maxchainlen']
541 541 if b'deltabothparents' in opts:
542 542 self._deltabothparents = opts[b'deltabothparents']
543 543 self._lazydelta = bool(opts.get(b'lazydelta', True))
544 544 self._lazydeltabase = False
545 545 if self._lazydelta:
546 546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
547 547 if b'compengine' in opts:
548 548 self._compengine = opts[b'compengine']
549 549 if b'zlib.level' in opts:
550 550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
551 551 if b'zstd.level' in opts:
552 552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
553 553 if b'maxdeltachainspan' in opts:
554 554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
555 555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
556 556 mmapindexthreshold = opts[b'mmapindexthreshold']
557 557 self.hassidedata = bool(opts.get(b'side-data', False))
558 558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
559 559 withsparseread = bool(opts.get(b'with-sparse-read', False))
560 560 # sparse-revlog forces sparse-read
561 561 self._withsparseread = self._sparserevlog or withsparseread
562 562 if b'sparse-read-density-threshold' in opts:
563 563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
564 564 if b'sparse-read-min-gap-size' in opts:
565 565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
566 566 if opts.get(b'enableellipsis'):
567 567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
568 568
569 569 # revlog v0 doesn't have flag processors
570 570 for flag, processor in pycompat.iteritems(
571 571 opts.get(b'flagprocessors', {})
572 572 ):
573 573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
574 574
575 575 if self._chunkcachesize <= 0:
576 576 raise error.RevlogError(
577 577 _(b'revlog chunk cache size %r is not greater than 0')
578 578 % self._chunkcachesize
579 579 )
580 580 elif self._chunkcachesize & (self._chunkcachesize - 1):
581 581 raise error.RevlogError(
582 582 _(b'revlog chunk cache size %r is not a power of 2')
583 583 % self._chunkcachesize
584 584 )
585 585
586 586 indexdata = b''
587 587 self._initempty = True
588 588 try:
589 589 with self._indexfp() as f:
590 590 if (
591 591 mmapindexthreshold is not None
592 592 and self.opener.fstat(f).st_size >= mmapindexthreshold
593 593 ):
594 594 # TODO: should .close() to release resources without
595 595 # relying on Python GC
596 596 indexdata = util.buffer(util.mmapread(f))
597 597 else:
598 598 indexdata = f.read()
599 599 if len(indexdata) > 0:
600 600 versionflags = versionformat_unpack(indexdata[:4])[0]
601 601 self._initempty = False
602 602 else:
603 603 versionflags = newversionflags
604 604 except IOError as inst:
605 605 if inst.errno != errno.ENOENT:
606 606 raise
607 607
608 608 versionflags = newversionflags
609 609
610 610 self.version = versionflags
611 611
612 612 flags = versionflags & ~0xFFFF
613 613 fmt = versionflags & 0xFFFF
614 614
615 615 if fmt == REVLOGV0:
616 616 if flags:
617 617 raise error.RevlogError(
618 618 _(b'unknown flags (%#04x) in version %d revlog %s')
619 619 % (flags >> 16, fmt, self.indexfile)
620 620 )
621 621
622 622 self._inline = False
623 623 self._generaldelta = False
624 624
625 625 elif fmt == REVLOGV1:
626 626 if flags & ~REVLOGV1_FLAGS:
627 627 raise error.RevlogError(
628 628 _(b'unknown flags (%#04x) in version %d revlog %s')
629 629 % (flags >> 16, fmt, self.indexfile)
630 630 )
631 631
632 632 self._inline = versionflags & FLAG_INLINE_DATA
633 633 self._generaldelta = versionflags & FLAG_GENERALDELTA
634 634
635 635 elif fmt == REVLOGV2:
636 636 if flags & ~REVLOGV2_FLAGS:
637 637 raise error.RevlogError(
638 638 _(b'unknown flags (%#04x) in version %d revlog %s')
639 639 % (flags >> 16, fmt, self.indexfile)
640 640 )
641 641
642 self._inline = versionflags & FLAG_INLINE_DATA
642 # There is a bug in the transaction handling when going from an
643 # inline revlog to a separate index and data file. Turn it off until
644 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
645 # See issue6485
646 self._inline = False
643 647 # generaldelta implied by version 2 revlogs.
644 648 self._generaldelta = True
645 649
646 650 else:
647 651 raise error.RevlogError(
648 652 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
649 653 )
650 654 # sparse-revlog can't be on without general-delta (issue6056)
651 655 if not self._generaldelta:
652 656 self._sparserevlog = False
653 657
654 658 self._storedeltachains = True
655 659
656 660 devel_nodemap = (
657 661 self.nodemap_file
658 662 and opts.get(b'devel-force-nodemap', False)
659 663 and NodemapRevlogIO is not None
660 664 )
661 665
662 666 use_rust_index = False
663 667 if rustrevlog is not None:
664 668 if self.nodemap_file is not None:
665 669 use_rust_index = True
666 670 else:
667 671 use_rust_index = self.opener.options.get(b'rust.index')
668 672
669 673 self._io = revlogio()
670 674 if self.version == REVLOGV0:
671 675 self._io = revlogoldio()
672 676 elif fmt == REVLOGV2:
673 677 self._io = revlogv2io()
674 678 elif devel_nodemap:
675 679 self._io = NodemapRevlogIO()
676 680 elif use_rust_index:
677 681 self._io = rustrevlogio()
678 682 try:
679 683 d = self._io.parseindex(indexdata, self._inline)
680 684 index, _chunkcache = d
681 685 use_nodemap = (
682 686 not self._inline
683 687 and self.nodemap_file is not None
684 688 and util.safehasattr(index, 'update_nodemap_data')
685 689 )
686 690 if use_nodemap:
687 691 nodemap_data = nodemaputil.persisted_data(self)
688 692 if nodemap_data is not None:
689 693 docket = nodemap_data[0]
690 694 if (
691 695 len(d[0]) > docket.tip_rev
692 696 and d[0][docket.tip_rev][7] == docket.tip_node
693 697 ):
694 698 # no changelog tampering
695 699 self._nodemap_docket = docket
696 700 index.update_nodemap_data(*nodemap_data)
697 701 except (ValueError, IndexError):
698 702 raise error.RevlogError(
699 703 _(b"index %s is corrupted") % self.indexfile
700 704 )
701 705 self.index, self._chunkcache = d
702 706 if not self._chunkcache:
703 707 self._chunkclear()
704 708 # revnum -> (chain-length, sum-delta-length)
705 709 self._chaininfocache = util.lrucachedict(500)
706 710 # revlog header -> revlog compressor
707 711 self._decompressors = {}
708 712
709 713 @util.propertycache
710 714 def _compressor(self):
711 715 engine = util.compengines[self._compengine]
712 716 return engine.revlogcompressor(self._compengineopts)
713 717
714 718 def _indexfp(self, mode=b'r'):
715 719 """file object for the revlog's index file"""
716 720 args = {'mode': mode}
717 721 if mode != b'r':
718 722 args['checkambig'] = self._checkambig
719 723 if mode == b'w':
720 724 args['atomictemp'] = True
721 725 return self.opener(self.indexfile, **args)
722 726
723 727 def _datafp(self, mode=b'r'):
724 728 """file object for the revlog's data file"""
725 729 return self.opener(self.datafile, mode=mode)
726 730
727 731 @contextlib.contextmanager
728 732 def _datareadfp(self, existingfp=None):
729 733 """file object suitable to read data"""
730 734 # Use explicit file handle, if given.
731 735 if existingfp is not None:
732 736 yield existingfp
733 737
734 738 # Use a file handle being actively used for writes, if available.
735 739 # There is some danger to doing this because reads will seek the
736 740 # file. However, _writeentry() performs a SEEK_END before all writes,
737 741 # so we should be safe.
738 742 elif self._writinghandles:
739 743 if self._inline:
740 744 yield self._writinghandles[0]
741 745 else:
742 746 yield self._writinghandles[1]
743 747
744 748 # Otherwise open a new file handle.
745 749 else:
746 750 if self._inline:
747 751 func = self._indexfp
748 752 else:
749 753 func = self._datafp
750 754 with func() as fp:
751 755 yield fp
752 756
753 757 def tiprev(self):
754 758 return len(self.index) - 1
755 759
756 760 def tip(self):
757 761 return self.node(self.tiprev())
758 762
759 763 def __contains__(self, rev):
760 764 return 0 <= rev < len(self)
761 765
762 766 def __len__(self):
763 767 return len(self.index)
764 768
765 769 def __iter__(self):
766 770 return iter(pycompat.xrange(len(self)))
767 771
768 772 def revs(self, start=0, stop=None):
769 773 """iterate over all rev in this revlog (from start to stop)"""
770 774 return storageutil.iterrevs(len(self), start=start, stop=stop)
771 775
772 776 @property
773 777 def nodemap(self):
774 778 msg = (
775 779 b"revlog.nodemap is deprecated, "
776 780 b"use revlog.index.[has_node|rev|get_rev]"
777 781 )
778 782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
779 783 return self.index.nodemap
780 784
781 785 @property
782 786 def _nodecache(self):
783 787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
784 788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
785 789 return self.index.nodemap
786 790
787 791 def hasnode(self, node):
788 792 try:
789 793 self.rev(node)
790 794 return True
791 795 except KeyError:
792 796 return False
793 797
794 798 def candelta(self, baserev, rev):
795 799 """whether two revisions (baserev, rev) can be delta-ed or not"""
796 800 # Disable delta if either rev requires a content-changing flag
797 801 # processor (ex. LFS). This is because such flag processor can alter
798 802 # the rawtext content that the delta will be based on, and two clients
799 803 # could have a same revlog node with different flags (i.e. different
800 804 # rawtext contents) and the delta could be incompatible.
801 805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
802 806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
803 807 ):
804 808 return False
805 809 return True
806 810
807 811 def update_caches(self, transaction):
808 812 if self.nodemap_file is not None:
809 813 if transaction is None:
810 814 nodemaputil.update_persistent_nodemap(self)
811 815 else:
812 816 nodemaputil.setup_persistent_nodemap(transaction, self)
813 817
814 818 def clearcaches(self):
815 819 self._revisioncache = None
816 820 self._chainbasecache.clear()
817 821 self._chunkcache = (0, b'')
818 822 self._pcache = {}
819 823 self._nodemap_docket = None
820 824 self.index.clearcaches()
821 825 # The python code is the one responsible for validating the docket, we
822 826 # end up having to refresh it here.
823 827 use_nodemap = (
824 828 not self._inline
825 829 and self.nodemap_file is not None
826 830 and util.safehasattr(self.index, 'update_nodemap_data')
827 831 )
828 832 if use_nodemap:
829 833 nodemap_data = nodemaputil.persisted_data(self)
830 834 if nodemap_data is not None:
831 835 self._nodemap_docket = nodemap_data[0]
832 836 self.index.update_nodemap_data(*nodemap_data)
833 837
834 838 def rev(self, node):
835 839 try:
836 840 return self.index.rev(node)
837 841 except TypeError:
838 842 raise
839 843 except error.RevlogError:
840 844 # parsers.c radix tree lookup failed
841 845 if node == wdirid or node in wdirfilenodeids:
842 846 raise error.WdirUnsupported
843 847 raise error.LookupError(node, self.indexfile, _(b'no node'))
844 848
845 849 # Accessors for index entries.
846 850
847 851 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
848 852 # are flags.
849 853 def start(self, rev):
850 854 return int(self.index[rev][0] >> 16)
851 855
852 856 def flags(self, rev):
853 857 return self.index[rev][0] & 0xFFFF
854 858
855 859 def length(self, rev):
856 860 return self.index[rev][1]
857 861
858 862 def sidedata_length(self, rev):
859 863 if self.version & 0xFFFF != REVLOGV2:
860 864 return 0
861 865 return self.index[rev][9]
862 866
863 867 def rawsize(self, rev):
864 868 """return the length of the uncompressed text for a given revision"""
865 869 l = self.index[rev][2]
866 870 if l >= 0:
867 871 return l
868 872
869 873 t = self.rawdata(rev)
870 874 return len(t)
871 875
872 876 def size(self, rev):
873 877 """length of non-raw text (processed by a "read" flag processor)"""
874 878 # fast path: if no "read" flag processor could change the content,
875 879 # size is rawsize. note: ELLIPSIS is known to not change the content.
876 880 flags = self.flags(rev)
877 881 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
878 882 return self.rawsize(rev)
879 883
880 884 return len(self.revision(rev, raw=False))
881 885
882 886 def chainbase(self, rev):
883 887 base = self._chainbasecache.get(rev)
884 888 if base is not None:
885 889 return base
886 890
887 891 index = self.index
888 892 iterrev = rev
889 893 base = index[iterrev][3]
890 894 while base != iterrev:
891 895 iterrev = base
892 896 base = index[iterrev][3]
893 897
894 898 self._chainbasecache[rev] = base
895 899 return base
896 900
897 901 def linkrev(self, rev):
898 902 return self.index[rev][4]
899 903
900 904 def parentrevs(self, rev):
901 905 try:
902 906 entry = self.index[rev]
903 907 except IndexError:
904 908 if rev == wdirrev:
905 909 raise error.WdirUnsupported
906 910 raise
907 911
908 912 return entry[5], entry[6]
909 913
910 914 # fast parentrevs(rev) where rev isn't filtered
911 915 _uncheckedparentrevs = parentrevs
912 916
913 917 def node(self, rev):
914 918 try:
915 919 return self.index[rev][7]
916 920 except IndexError:
917 921 if rev == wdirrev:
918 922 raise error.WdirUnsupported
919 923 raise
920 924
921 925 # Derived from index values.
922 926
923 927 def end(self, rev):
924 928 return self.start(rev) + self.length(rev)
925 929
926 930 def parents(self, node):
927 931 i = self.index
928 932 d = i[self.rev(node)]
929 933 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
930 934
931 935 def chainlen(self, rev):
932 936 return self._chaininfo(rev)[0]
933 937
934 938 def _chaininfo(self, rev):
935 939 chaininfocache = self._chaininfocache
936 940 if rev in chaininfocache:
937 941 return chaininfocache[rev]
938 942 index = self.index
939 943 generaldelta = self._generaldelta
940 944 iterrev = rev
941 945 e = index[iterrev]
942 946 clen = 0
943 947 compresseddeltalen = 0
944 948 while iterrev != e[3]:
945 949 clen += 1
946 950 compresseddeltalen += e[1]
947 951 if generaldelta:
948 952 iterrev = e[3]
949 953 else:
950 954 iterrev -= 1
951 955 if iterrev in chaininfocache:
952 956 t = chaininfocache[iterrev]
953 957 clen += t[0]
954 958 compresseddeltalen += t[1]
955 959 break
956 960 e = index[iterrev]
957 961 else:
958 962 # Add text length of base since decompressing that also takes
959 963 # work. For cache hits the length is already included.
960 964 compresseddeltalen += e[1]
961 965 r = (clen, compresseddeltalen)
962 966 chaininfocache[rev] = r
963 967 return r
964 968
965 969 def _deltachain(self, rev, stoprev=None):
966 970 """Obtain the delta chain for a revision.
967 971
968 972 ``stoprev`` specifies a revision to stop at. If not specified, we
969 973 stop at the base of the chain.
970 974
971 975 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
972 976 revs in ascending order and ``stopped`` is a bool indicating whether
973 977 ``stoprev`` was hit.
974 978 """
975 979 # Try C implementation.
976 980 try:
977 981 return self.index.deltachain(rev, stoprev, self._generaldelta)
978 982 except AttributeError:
979 983 pass
980 984
981 985 chain = []
982 986
983 987 # Alias to prevent attribute lookup in tight loop.
984 988 index = self.index
985 989 generaldelta = self._generaldelta
986 990
987 991 iterrev = rev
988 992 e = index[iterrev]
989 993 while iterrev != e[3] and iterrev != stoprev:
990 994 chain.append(iterrev)
991 995 if generaldelta:
992 996 iterrev = e[3]
993 997 else:
994 998 iterrev -= 1
995 999 e = index[iterrev]
996 1000
997 1001 if iterrev == stoprev:
998 1002 stopped = True
999 1003 else:
1000 1004 chain.append(iterrev)
1001 1005 stopped = False
1002 1006
1003 1007 chain.reverse()
1004 1008 return chain, stopped
1005 1009
1006 1010 def ancestors(self, revs, stoprev=0, inclusive=False):
1007 1011 """Generate the ancestors of 'revs' in reverse revision order.
1008 1012 Does not generate revs lower than stoprev.
1009 1013
1010 1014 See the documentation for ancestor.lazyancestors for more details."""
1011 1015
1012 1016 # first, make sure start revisions aren't filtered
1013 1017 revs = list(revs)
1014 1018 checkrev = self.node
1015 1019 for r in revs:
1016 1020 checkrev(r)
1017 1021 # and we're sure ancestors aren't filtered as well
1018 1022
1019 1023 if rustancestor is not None:
1020 1024 lazyancestors = rustancestor.LazyAncestors
1021 1025 arg = self.index
1022 1026 else:
1023 1027 lazyancestors = ancestor.lazyancestors
1024 1028 arg = self._uncheckedparentrevs
1025 1029 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1026 1030
1027 1031 def descendants(self, revs):
1028 1032 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1029 1033
1030 1034 def findcommonmissing(self, common=None, heads=None):
1031 1035 """Return a tuple of the ancestors of common and the ancestors of heads
1032 1036 that are not ancestors of common. In revset terminology, we return the
1033 1037 tuple:
1034 1038
1035 1039 ::common, (::heads) - (::common)
1036 1040
1037 1041 The list is sorted by revision number, meaning it is
1038 1042 topologically sorted.
1039 1043
1040 1044 'heads' and 'common' are both lists of node IDs. If heads is
1041 1045 not supplied, uses all of the revlog's heads. If common is not
1042 1046 supplied, uses nullid."""
1043 1047 if common is None:
1044 1048 common = [nullid]
1045 1049 if heads is None:
1046 1050 heads = self.heads()
1047 1051
1048 1052 common = [self.rev(n) for n in common]
1049 1053 heads = [self.rev(n) for n in heads]
1050 1054
1051 1055 # we want the ancestors, but inclusive
1052 1056 class lazyset(object):
1053 1057 def __init__(self, lazyvalues):
1054 1058 self.addedvalues = set()
1055 1059 self.lazyvalues = lazyvalues
1056 1060
1057 1061 def __contains__(self, value):
1058 1062 return value in self.addedvalues or value in self.lazyvalues
1059 1063
1060 1064 def __iter__(self):
1061 1065 added = self.addedvalues
1062 1066 for r in added:
1063 1067 yield r
1064 1068 for r in self.lazyvalues:
1065 1069 if not r in added:
1066 1070 yield r
1067 1071
1068 1072 def add(self, value):
1069 1073 self.addedvalues.add(value)
1070 1074
1071 1075 def update(self, values):
1072 1076 self.addedvalues.update(values)
1073 1077
1074 1078 has = lazyset(self.ancestors(common))
1075 1079 has.add(nullrev)
1076 1080 has.update(common)
1077 1081
1078 1082 # take all ancestors from heads that aren't in has
1079 1083 missing = set()
1080 1084 visit = collections.deque(r for r in heads if r not in has)
1081 1085 while visit:
1082 1086 r = visit.popleft()
1083 1087 if r in missing:
1084 1088 continue
1085 1089 else:
1086 1090 missing.add(r)
1087 1091 for p in self.parentrevs(r):
1088 1092 if p not in has:
1089 1093 visit.append(p)
1090 1094 missing = list(missing)
1091 1095 missing.sort()
1092 1096 return has, [self.node(miss) for miss in missing]
1093 1097
1094 1098 def incrementalmissingrevs(self, common=None):
1095 1099 """Return an object that can be used to incrementally compute the
1096 1100 revision numbers of the ancestors of arbitrary sets that are not
1097 1101 ancestors of common. This is an ancestor.incrementalmissingancestors
1098 1102 object.
1099 1103
1100 1104 'common' is a list of revision numbers. If common is not supplied, uses
1101 1105 nullrev.
1102 1106 """
1103 1107 if common is None:
1104 1108 common = [nullrev]
1105 1109
1106 1110 if rustancestor is not None:
1107 1111 return rustancestor.MissingAncestors(self.index, common)
1108 1112 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1109 1113
1110 1114 def findmissingrevs(self, common=None, heads=None):
1111 1115 """Return the revision numbers of the ancestors of heads that
1112 1116 are not ancestors of common.
1113 1117
1114 1118 More specifically, return a list of revision numbers corresponding to
1115 1119 nodes N such that every N satisfies the following constraints:
1116 1120
1117 1121 1. N is an ancestor of some node in 'heads'
1118 1122 2. N is not an ancestor of any node in 'common'
1119 1123
1120 1124 The list is sorted by revision number, meaning it is
1121 1125 topologically sorted.
1122 1126
1123 1127 'heads' and 'common' are both lists of revision numbers. If heads is
1124 1128 not supplied, uses all of the revlog's heads. If common is not
1125 1129 supplied, uses nullid."""
1126 1130 if common is None:
1127 1131 common = [nullrev]
1128 1132 if heads is None:
1129 1133 heads = self.headrevs()
1130 1134
1131 1135 inc = self.incrementalmissingrevs(common=common)
1132 1136 return inc.missingancestors(heads)
1133 1137
1134 1138 def findmissing(self, common=None, heads=None):
1135 1139 """Return the ancestors of heads that are not ancestors of common.
1136 1140
1137 1141 More specifically, return a list of nodes N such that every N
1138 1142 satisfies the following constraints:
1139 1143
1140 1144 1. N is an ancestor of some node in 'heads'
1141 1145 2. N is not an ancestor of any node in 'common'
1142 1146
1143 1147 The list is sorted by revision number, meaning it is
1144 1148 topologically sorted.
1145 1149
1146 1150 'heads' and 'common' are both lists of node IDs. If heads is
1147 1151 not supplied, uses all of the revlog's heads. If common is not
1148 1152 supplied, uses nullid."""
1149 1153 if common is None:
1150 1154 common = [nullid]
1151 1155 if heads is None:
1152 1156 heads = self.heads()
1153 1157
1154 1158 common = [self.rev(n) for n in common]
1155 1159 heads = [self.rev(n) for n in heads]
1156 1160
1157 1161 inc = self.incrementalmissingrevs(common=common)
1158 1162 return [self.node(r) for r in inc.missingancestors(heads)]
1159 1163
1160 1164 def nodesbetween(self, roots=None, heads=None):
1161 1165 """Return a topological path from 'roots' to 'heads'.
1162 1166
1163 1167 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1164 1168 topologically sorted list of all nodes N that satisfy both of
1165 1169 these constraints:
1166 1170
1167 1171 1. N is a descendant of some node in 'roots'
1168 1172 2. N is an ancestor of some node in 'heads'
1169 1173
1170 1174 Every node is considered to be both a descendant and an ancestor
1171 1175 of itself, so every reachable node in 'roots' and 'heads' will be
1172 1176 included in 'nodes'.
1173 1177
1174 1178 'outroots' is the list of reachable nodes in 'roots', i.e., the
1175 1179 subset of 'roots' that is returned in 'nodes'. Likewise,
1176 1180 'outheads' is the subset of 'heads' that is also in 'nodes'.
1177 1181
1178 1182 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1179 1183 unspecified, uses nullid as the only root. If 'heads' is
1180 1184 unspecified, uses list of all of the revlog's heads."""
1181 1185 nonodes = ([], [], [])
1182 1186 if roots is not None:
1183 1187 roots = list(roots)
1184 1188 if not roots:
1185 1189 return nonodes
1186 1190 lowestrev = min([self.rev(n) for n in roots])
1187 1191 else:
1188 1192 roots = [nullid] # Everybody's a descendant of nullid
1189 1193 lowestrev = nullrev
1190 1194 if (lowestrev == nullrev) and (heads is None):
1191 1195 # We want _all_ the nodes!
1192 1196 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1193 1197 if heads is None:
1194 1198 # All nodes are ancestors, so the latest ancestor is the last
1195 1199 # node.
1196 1200 highestrev = len(self) - 1
1197 1201 # Set ancestors to None to signal that every node is an ancestor.
1198 1202 ancestors = None
1199 1203 # Set heads to an empty dictionary for later discovery of heads
1200 1204 heads = {}
1201 1205 else:
1202 1206 heads = list(heads)
1203 1207 if not heads:
1204 1208 return nonodes
1205 1209 ancestors = set()
1206 1210 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 1211 # Also, later we will be using it to filter out the heads we can't
1208 1212 # find from roots.
1209 1213 heads = dict.fromkeys(heads, False)
1210 1214 # Start at the top and keep marking parents until we're done.
1211 1215 nodestotag = set(heads)
1212 1216 # Remember where the top was so we can use it as a limit later.
1213 1217 highestrev = max([self.rev(n) for n in nodestotag])
1214 1218 while nodestotag:
1215 1219 # grab a node to tag
1216 1220 n = nodestotag.pop()
1217 1221 # Never tag nullid
1218 1222 if n == nullid:
1219 1223 continue
1220 1224 # A node's revision number represents its place in a
1221 1225 # topologically sorted list of nodes.
1222 1226 r = self.rev(n)
1223 1227 if r >= lowestrev:
1224 1228 if n not in ancestors:
1225 1229 # If we are possibly a descendant of one of the roots
1226 1230 # and we haven't already been marked as an ancestor
1227 1231 ancestors.add(n) # Mark as ancestor
1228 1232 # Add non-nullid parents to list of nodes to tag.
1229 1233 nodestotag.update(
1230 1234 [p for p in self.parents(n) if p != nullid]
1231 1235 )
1232 1236 elif n in heads: # We've seen it before, is it a fake head?
1233 1237 # So it is, real heads should not be the ancestors of
1234 1238 # any other heads.
1235 1239 heads.pop(n)
1236 1240 if not ancestors:
1237 1241 return nonodes
1238 1242 # Now that we have our set of ancestors, we want to remove any
1239 1243 # roots that are not ancestors.
1240 1244
1241 1245 # If one of the roots was nullid, everything is included anyway.
1242 1246 if lowestrev > nullrev:
1243 1247 # But, since we weren't, let's recompute the lowest rev to not
1244 1248 # include roots that aren't ancestors.
1245 1249
1246 1250 # Filter out roots that aren't ancestors of heads
1247 1251 roots = [root for root in roots if root in ancestors]
1248 1252 # Recompute the lowest revision
1249 1253 if roots:
1250 1254 lowestrev = min([self.rev(root) for root in roots])
1251 1255 else:
1252 1256 # No more roots? Return empty list
1253 1257 return nonodes
1254 1258 else:
1255 1259 # We are descending from nullid, and don't need to care about
1256 1260 # any other roots.
1257 1261 lowestrev = nullrev
1258 1262 roots = [nullid]
1259 1263 # Transform our roots list into a set.
1260 1264 descendants = set(roots)
1261 1265 # Also, keep the original roots so we can filter out roots that aren't
1262 1266 # 'real' roots (i.e. are descended from other roots).
1263 1267 roots = descendants.copy()
1264 1268 # Our topologically sorted list of output nodes.
1265 1269 orderedout = []
1266 1270 # Don't start at nullid since we don't want nullid in our output list,
1267 1271 # and if nullid shows up in descendants, empty parents will look like
1268 1272 # they're descendants.
1269 1273 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1270 1274 n = self.node(r)
1271 1275 isdescendant = False
1272 1276 if lowestrev == nullrev: # Everybody is a descendant of nullid
1273 1277 isdescendant = True
1274 1278 elif n in descendants:
1275 1279 # n is already a descendant
1276 1280 isdescendant = True
1277 1281 # This check only needs to be done here because all the roots
1278 1282 # will start being marked is descendants before the loop.
1279 1283 if n in roots:
1280 1284 # If n was a root, check if it's a 'real' root.
1281 1285 p = tuple(self.parents(n))
1282 1286 # If any of its parents are descendants, it's not a root.
1283 1287 if (p[0] in descendants) or (p[1] in descendants):
1284 1288 roots.remove(n)
1285 1289 else:
1286 1290 p = tuple(self.parents(n))
1287 1291 # A node is a descendant if either of its parents are
1288 1292 # descendants. (We seeded the dependents list with the roots
1289 1293 # up there, remember?)
1290 1294 if (p[0] in descendants) or (p[1] in descendants):
1291 1295 descendants.add(n)
1292 1296 isdescendant = True
1293 1297 if isdescendant and ((ancestors is None) or (n in ancestors)):
1294 1298 # Only include nodes that are both descendants and ancestors.
1295 1299 orderedout.append(n)
1296 1300 if (ancestors is not None) and (n in heads):
1297 1301 # We're trying to figure out which heads are reachable
1298 1302 # from roots.
1299 1303 # Mark this head as having been reached
1300 1304 heads[n] = True
1301 1305 elif ancestors is None:
1302 1306 # Otherwise, we're trying to discover the heads.
1303 1307 # Assume this is a head because if it isn't, the next step
1304 1308 # will eventually remove it.
1305 1309 heads[n] = True
1306 1310 # But, obviously its parents aren't.
1307 1311 for p in self.parents(n):
1308 1312 heads.pop(p, None)
1309 1313 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1310 1314 roots = list(roots)
1311 1315 assert orderedout
1312 1316 assert roots
1313 1317 assert heads
1314 1318 return (orderedout, roots, heads)
1315 1319
1316 1320 def headrevs(self, revs=None):
1317 1321 if revs is None:
1318 1322 try:
1319 1323 return self.index.headrevs()
1320 1324 except AttributeError:
1321 1325 return self._headrevs()
1322 1326 if rustdagop is not None:
1323 1327 return rustdagop.headrevs(self.index, revs)
1324 1328 return dagop.headrevs(revs, self._uncheckedparentrevs)
1325 1329
1326 1330 def computephases(self, roots):
1327 1331 return self.index.computephasesmapsets(roots)
1328 1332
1329 1333 def _headrevs(self):
1330 1334 count = len(self)
1331 1335 if not count:
1332 1336 return [nullrev]
1333 1337 # we won't iter over filtered rev so nobody is a head at start
1334 1338 ishead = [0] * (count + 1)
1335 1339 index = self.index
1336 1340 for r in self:
1337 1341 ishead[r] = 1 # I may be an head
1338 1342 e = index[r]
1339 1343 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1340 1344 return [r for r, val in enumerate(ishead) if val]
1341 1345
1342 1346 def heads(self, start=None, stop=None):
1343 1347 """return the list of all nodes that have no children
1344 1348
1345 1349 if start is specified, only heads that are descendants of
1346 1350 start will be returned
1347 1351 if stop is specified, it will consider all the revs from stop
1348 1352 as if they had no children
1349 1353 """
1350 1354 if start is None and stop is None:
1351 1355 if not len(self):
1352 1356 return [nullid]
1353 1357 return [self.node(r) for r in self.headrevs()]
1354 1358
1355 1359 if start is None:
1356 1360 start = nullrev
1357 1361 else:
1358 1362 start = self.rev(start)
1359 1363
1360 1364 stoprevs = {self.rev(n) for n in stop or []}
1361 1365
1362 1366 revs = dagop.headrevssubset(
1363 1367 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1364 1368 )
1365 1369
1366 1370 return [self.node(rev) for rev in revs]
1367 1371
1368 1372 def children(self, node):
1369 1373 """find the children of a given node"""
1370 1374 c = []
1371 1375 p = self.rev(node)
1372 1376 for r in self.revs(start=p + 1):
1373 1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 1378 if prevs:
1375 1379 for pr in prevs:
1376 1380 if pr == p:
1377 1381 c.append(self.node(r))
1378 1382 elif p == nullrev:
1379 1383 c.append(self.node(r))
1380 1384 return c
1381 1385
1382 1386 def commonancestorsheads(self, a, b):
1383 1387 """calculate all the heads of the common ancestors of nodes a and b"""
1384 1388 a, b = self.rev(a), self.rev(b)
1385 1389 ancs = self._commonancestorsheads(a, b)
1386 1390 return pycompat.maplist(self.node, ancs)
1387 1391
1388 1392 def _commonancestorsheads(self, *revs):
1389 1393 """calculate all the heads of the common ancestors of revs"""
1390 1394 try:
1391 1395 ancs = self.index.commonancestorsheads(*revs)
1392 1396 except (AttributeError, OverflowError): # C implementation failed
1393 1397 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1394 1398 return ancs
1395 1399
1396 1400 def isancestor(self, a, b):
1397 1401 """return True if node a is an ancestor of node b
1398 1402
1399 1403 A revision is considered an ancestor of itself."""
1400 1404 a, b = self.rev(a), self.rev(b)
1401 1405 return self.isancestorrev(a, b)
1402 1406
1403 1407 def isancestorrev(self, a, b):
1404 1408 """return True if revision a is an ancestor of revision b
1405 1409
1406 1410 A revision is considered an ancestor of itself.
1407 1411
1408 1412 The implementation of this is trivial but the use of
1409 1413 reachableroots is not."""
1410 1414 if a == nullrev:
1411 1415 return True
1412 1416 elif a == b:
1413 1417 return True
1414 1418 elif a > b:
1415 1419 return False
1416 1420 return bool(self.reachableroots(a, [b], [a], includepath=False))
1417 1421
1418 1422 def reachableroots(self, minroot, heads, roots, includepath=False):
1419 1423 """return (heads(::(<roots> and <roots>::<heads>)))
1420 1424
1421 1425 If includepath is True, return (<roots>::<heads>)."""
1422 1426 try:
1423 1427 return self.index.reachableroots2(
1424 1428 minroot, heads, roots, includepath
1425 1429 )
1426 1430 except AttributeError:
1427 1431 return dagop._reachablerootspure(
1428 1432 self.parentrevs, minroot, roots, heads, includepath
1429 1433 )
1430 1434
1431 1435 def ancestor(self, a, b):
1432 1436 """calculate the "best" common ancestor of nodes a and b"""
1433 1437
1434 1438 a, b = self.rev(a), self.rev(b)
1435 1439 try:
1436 1440 ancs = self.index.ancestors(a, b)
1437 1441 except (AttributeError, OverflowError):
1438 1442 ancs = ancestor.ancestors(self.parentrevs, a, b)
1439 1443 if ancs:
1440 1444 # choose a consistent winner when there's a tie
1441 1445 return min(map(self.node, ancs))
1442 1446 return nullid
1443 1447
1444 1448 def _match(self, id):
1445 1449 if isinstance(id, int):
1446 1450 # rev
1447 1451 return self.node(id)
1448 1452 if len(id) == 20:
1449 1453 # possibly a binary node
1450 1454 # odds of a binary node being all hex in ASCII are 1 in 10**25
1451 1455 try:
1452 1456 node = id
1453 1457 self.rev(node) # quick search the index
1454 1458 return node
1455 1459 except error.LookupError:
1456 1460 pass # may be partial hex id
1457 1461 try:
1458 1462 # str(rev)
1459 1463 rev = int(id)
1460 1464 if b"%d" % rev != id:
1461 1465 raise ValueError
1462 1466 if rev < 0:
1463 1467 rev = len(self) + rev
1464 1468 if rev < 0 or rev >= len(self):
1465 1469 raise ValueError
1466 1470 return self.node(rev)
1467 1471 except (ValueError, OverflowError):
1468 1472 pass
1469 1473 if len(id) == 40:
1470 1474 try:
1471 1475 # a full hex nodeid?
1472 1476 node = bin(id)
1473 1477 self.rev(node)
1474 1478 return node
1475 1479 except (TypeError, error.LookupError):
1476 1480 pass
1477 1481
1478 1482 def _partialmatch(self, id):
1479 1483 # we don't care wdirfilenodeids as they should be always full hash
1480 1484 maybewdir = wdirhex.startswith(id)
1481 1485 try:
1482 1486 partial = self.index.partialmatch(id)
1483 1487 if partial and self.hasnode(partial):
1484 1488 if maybewdir:
1485 1489 # single 'ff...' match in radix tree, ambiguous with wdir
1486 1490 raise error.RevlogError
1487 1491 return partial
1488 1492 if maybewdir:
1489 1493 # no 'ff...' match in radix tree, wdir identified
1490 1494 raise error.WdirUnsupported
1491 1495 return None
1492 1496 except error.RevlogError:
1493 1497 # parsers.c radix tree lookup gave multiple matches
1494 1498 # fast path: for unfiltered changelog, radix tree is accurate
1495 1499 if not getattr(self, 'filteredrevs', None):
1496 1500 raise error.AmbiguousPrefixLookupError(
1497 1501 id, self.indexfile, _(b'ambiguous identifier')
1498 1502 )
1499 1503 # fall through to slow path that filters hidden revisions
1500 1504 except (AttributeError, ValueError):
1501 1505 # we are pure python, or key was too short to search radix tree
1502 1506 pass
1503 1507
1504 1508 if id in self._pcache:
1505 1509 return self._pcache[id]
1506 1510
1507 1511 if len(id) <= 40:
1508 1512 try:
1509 1513 # hex(node)[:...]
1510 1514 l = len(id) // 2 # grab an even number of digits
1511 1515 prefix = bin(id[: l * 2])
1512 1516 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1513 1517 nl = [
1514 1518 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1515 1519 ]
1516 1520 if nullhex.startswith(id):
1517 1521 nl.append(nullid)
1518 1522 if len(nl) > 0:
1519 1523 if len(nl) == 1 and not maybewdir:
1520 1524 self._pcache[id] = nl[0]
1521 1525 return nl[0]
1522 1526 raise error.AmbiguousPrefixLookupError(
1523 1527 id, self.indexfile, _(b'ambiguous identifier')
1524 1528 )
1525 1529 if maybewdir:
1526 1530 raise error.WdirUnsupported
1527 1531 return None
1528 1532 except TypeError:
1529 1533 pass
1530 1534
1531 1535 def lookup(self, id):
1532 1536 """locate a node based on:
1533 1537 - revision number or str(revision number)
1534 1538 - nodeid or subset of hex nodeid
1535 1539 """
1536 1540 n = self._match(id)
1537 1541 if n is not None:
1538 1542 return n
1539 1543 n = self._partialmatch(id)
1540 1544 if n:
1541 1545 return n
1542 1546
1543 1547 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1544 1548
1545 1549 def shortest(self, node, minlength=1):
1546 1550 """Find the shortest unambiguous prefix that matches node."""
1547 1551
1548 1552 def isvalid(prefix):
1549 1553 try:
1550 1554 matchednode = self._partialmatch(prefix)
1551 1555 except error.AmbiguousPrefixLookupError:
1552 1556 return False
1553 1557 except error.WdirUnsupported:
1554 1558 # single 'ff...' match
1555 1559 return True
1556 1560 if matchednode is None:
1557 1561 raise error.LookupError(node, self.indexfile, _(b'no node'))
1558 1562 return True
1559 1563
1560 1564 def maybewdir(prefix):
1561 1565 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1562 1566
1563 1567 hexnode = hex(node)
1564 1568
1565 1569 def disambiguate(hexnode, minlength):
1566 1570 """Disambiguate against wdirid."""
1567 1571 for length in range(minlength, len(hexnode) + 1):
1568 1572 prefix = hexnode[:length]
1569 1573 if not maybewdir(prefix):
1570 1574 return prefix
1571 1575
1572 1576 if not getattr(self, 'filteredrevs', None):
1573 1577 try:
1574 1578 length = max(self.index.shortest(node), minlength)
1575 1579 return disambiguate(hexnode, length)
1576 1580 except error.RevlogError:
1577 1581 if node != wdirid:
1578 1582 raise error.LookupError(node, self.indexfile, _(b'no node'))
1579 1583 except AttributeError:
1580 1584 # Fall through to pure code
1581 1585 pass
1582 1586
1583 1587 if node == wdirid:
1584 1588 for length in range(minlength, len(hexnode) + 1):
1585 1589 prefix = hexnode[:length]
1586 1590 if isvalid(prefix):
1587 1591 return prefix
1588 1592
1589 1593 for length in range(minlength, len(hexnode) + 1):
1590 1594 prefix = hexnode[:length]
1591 1595 if isvalid(prefix):
1592 1596 return disambiguate(hexnode, length)
1593 1597
1594 1598 def cmp(self, node, text):
1595 1599 """compare text with a given file revision
1596 1600
1597 1601 returns True if text is different than what is stored.
1598 1602 """
1599 1603 p1, p2 = self.parents(node)
1600 1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1601 1605
1602 1606 def _cachesegment(self, offset, data):
1603 1607 """Add a segment to the revlog cache.
1604 1608
1605 1609 Accepts an absolute offset and the data that is at that location.
1606 1610 """
1607 1611 o, d = self._chunkcache
1608 1612 # try to add to existing cache
1609 1613 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1610 1614 self._chunkcache = o, d + data
1611 1615 else:
1612 1616 self._chunkcache = offset, data
1613 1617
1614 1618 def _readsegment(self, offset, length, df=None):
1615 1619 """Load a segment of raw data from the revlog.
1616 1620
1617 1621 Accepts an absolute offset, length to read, and an optional existing
1618 1622 file handle to read from.
1619 1623
1620 1624 If an existing file handle is passed, it will be seeked and the
1621 1625 original seek position will NOT be restored.
1622 1626
1623 1627 Returns a str or buffer of raw byte data.
1624 1628
1625 1629 Raises if the requested number of bytes could not be read.
1626 1630 """
1627 1631 # Cache data both forward and backward around the requested
1628 1632 # data, in a fixed size window. This helps speed up operations
1629 1633 # involving reading the revlog backwards.
1630 1634 cachesize = self._chunkcachesize
1631 1635 realoffset = offset & ~(cachesize - 1)
1632 1636 reallength = (
1633 1637 (offset + length + cachesize) & ~(cachesize - 1)
1634 1638 ) - realoffset
1635 1639 with self._datareadfp(df) as df:
1636 1640 df.seek(realoffset)
1637 1641 d = df.read(reallength)
1638 1642
1639 1643 self._cachesegment(realoffset, d)
1640 1644 if offset != realoffset or reallength != length:
1641 1645 startoffset = offset - realoffset
1642 1646 if len(d) - startoffset < length:
1643 1647 raise error.RevlogError(
1644 1648 _(
1645 1649 b'partial read of revlog %s; expected %d bytes from '
1646 1650 b'offset %d, got %d'
1647 1651 )
1648 1652 % (
1649 1653 self.indexfile if self._inline else self.datafile,
1650 1654 length,
1651 1655 realoffset,
1652 1656 len(d) - startoffset,
1653 1657 )
1654 1658 )
1655 1659
1656 1660 return util.buffer(d, startoffset, length)
1657 1661
1658 1662 if len(d) < length:
1659 1663 raise error.RevlogError(
1660 1664 _(
1661 1665 b'partial read of revlog %s; expected %d bytes from offset '
1662 1666 b'%d, got %d'
1663 1667 )
1664 1668 % (
1665 1669 self.indexfile if self._inline else self.datafile,
1666 1670 length,
1667 1671 offset,
1668 1672 len(d),
1669 1673 )
1670 1674 )
1671 1675
1672 1676 return d
1673 1677
1674 1678 def _getsegment(self, offset, length, df=None):
1675 1679 """Obtain a segment of raw data from the revlog.
1676 1680
1677 1681 Accepts an absolute offset, length of bytes to obtain, and an
1678 1682 optional file handle to the already-opened revlog. If the file
1679 1683 handle is used, it's original seek position will not be preserved.
1680 1684
1681 1685 Requests for data may be returned from a cache.
1682 1686
1683 1687 Returns a str or a buffer instance of raw byte data.
1684 1688 """
1685 1689 o, d = self._chunkcache
1686 1690 l = len(d)
1687 1691
1688 1692 # is it in the cache?
1689 1693 cachestart = offset - o
1690 1694 cacheend = cachestart + length
1691 1695 if cachestart >= 0 and cacheend <= l:
1692 1696 if cachestart == 0 and cacheend == l:
1693 1697 return d # avoid a copy
1694 1698 return util.buffer(d, cachestart, cacheend - cachestart)
1695 1699
1696 1700 return self._readsegment(offset, length, df=df)
1697 1701
1698 1702 def _getsegmentforrevs(self, startrev, endrev, df=None):
1699 1703 """Obtain a segment of raw data corresponding to a range of revisions.
1700 1704
1701 1705 Accepts the start and end revisions and an optional already-open
1702 1706 file handle to be used for reading. If the file handle is read, its
1703 1707 seek position will not be preserved.
1704 1708
1705 1709 Requests for data may be satisfied by a cache.
1706 1710
1707 1711 Returns a 2-tuple of (offset, data) for the requested range of
1708 1712 revisions. Offset is the integer offset from the beginning of the
1709 1713 revlog and data is a str or buffer of the raw byte data.
1710 1714
1711 1715 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1712 1716 to determine where each revision's data begins and ends.
1713 1717 """
1714 1718 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1715 1719 # (functions are expensive).
1716 1720 index = self.index
1717 1721 istart = index[startrev]
1718 1722 start = int(istart[0] >> 16)
1719 1723 if startrev == endrev:
1720 1724 end = start + istart[1]
1721 1725 else:
1722 1726 iend = index[endrev]
1723 1727 end = int(iend[0] >> 16) + iend[1]
1724 1728
1725 1729 if self._inline:
1726 1730 start += (startrev + 1) * self._io.size
1727 1731 end += (endrev + 1) * self._io.size
1728 1732 length = end - start
1729 1733
1730 1734 return start, self._getsegment(start, length, df=df)
1731 1735
1732 1736 def _chunk(self, rev, df=None):
1733 1737 """Obtain a single decompressed chunk for a revision.
1734 1738
1735 1739 Accepts an integer revision and an optional already-open file handle
1736 1740 to be used for reading. If used, the seek position of the file will not
1737 1741 be preserved.
1738 1742
1739 1743 Returns a str holding uncompressed data for the requested revision.
1740 1744 """
1741 1745 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1742 1746
1743 1747 def _chunks(self, revs, df=None, targetsize=None):
1744 1748 """Obtain decompressed chunks for the specified revisions.
1745 1749
1746 1750 Accepts an iterable of numeric revisions that are assumed to be in
1747 1751 ascending order. Also accepts an optional already-open file handle
1748 1752 to be used for reading. If used, the seek position of the file will
1749 1753 not be preserved.
1750 1754
1751 1755 This function is similar to calling ``self._chunk()`` multiple times,
1752 1756 but is faster.
1753 1757
1754 1758 Returns a list with decompressed data for each requested revision.
1755 1759 """
1756 1760 if not revs:
1757 1761 return []
1758 1762 start = self.start
1759 1763 length = self.length
1760 1764 inline = self._inline
1761 1765 iosize = self._io.size
1762 1766 buffer = util.buffer
1763 1767
1764 1768 l = []
1765 1769 ladd = l.append
1766 1770
1767 1771 if not self._withsparseread:
1768 1772 slicedchunks = (revs,)
1769 1773 else:
1770 1774 slicedchunks = deltautil.slicechunk(
1771 1775 self, revs, targetsize=targetsize
1772 1776 )
1773 1777
1774 1778 for revschunk in slicedchunks:
1775 1779 firstrev = revschunk[0]
1776 1780 # Skip trailing revisions with empty diff
1777 1781 for lastrev in revschunk[::-1]:
1778 1782 if length(lastrev) != 0:
1779 1783 break
1780 1784
1781 1785 try:
1782 1786 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1783 1787 except OverflowError:
1784 1788 # issue4215 - we can't cache a run of chunks greater than
1785 1789 # 2G on Windows
1786 1790 return [self._chunk(rev, df=df) for rev in revschunk]
1787 1791
1788 1792 decomp = self.decompress
1789 1793 for rev in revschunk:
1790 1794 chunkstart = start(rev)
1791 1795 if inline:
1792 1796 chunkstart += (rev + 1) * iosize
1793 1797 chunklength = length(rev)
1794 1798 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1795 1799
1796 1800 return l
1797 1801
1798 1802 def _chunkclear(self):
1799 1803 """Clear the raw chunk cache."""
1800 1804 self._chunkcache = (0, b'')
1801 1805
1802 1806 def deltaparent(self, rev):
1803 1807 """return deltaparent of the given revision"""
1804 1808 base = self.index[rev][3]
1805 1809 if base == rev:
1806 1810 return nullrev
1807 1811 elif self._generaldelta:
1808 1812 return base
1809 1813 else:
1810 1814 return rev - 1
1811 1815
1812 1816 def issnapshot(self, rev):
1813 1817 """tells whether rev is a snapshot"""
1814 1818 if not self._sparserevlog:
1815 1819 return self.deltaparent(rev) == nullrev
1816 1820 elif util.safehasattr(self.index, b'issnapshot'):
1817 1821 # directly assign the method to cache the testing and access
1818 1822 self.issnapshot = self.index.issnapshot
1819 1823 return self.issnapshot(rev)
1820 1824 if rev == nullrev:
1821 1825 return True
1822 1826 entry = self.index[rev]
1823 1827 base = entry[3]
1824 1828 if base == rev:
1825 1829 return True
1826 1830 if base == nullrev:
1827 1831 return True
1828 1832 p1 = entry[5]
1829 1833 p2 = entry[6]
1830 1834 if base == p1 or base == p2:
1831 1835 return False
1832 1836 return self.issnapshot(base)
1833 1837
1834 1838 def snapshotdepth(self, rev):
1835 1839 """number of snapshot in the chain before this one"""
1836 1840 if not self.issnapshot(rev):
1837 1841 raise error.ProgrammingError(b'revision %d not a snapshot')
1838 1842 return len(self._deltachain(rev)[0]) - 1
1839 1843
1840 1844 def revdiff(self, rev1, rev2):
1841 1845 """return or calculate a delta between two revisions
1842 1846
1843 1847 The delta calculated is in binary form and is intended to be written to
1844 1848 revlog data directly. So this function needs raw revision data.
1845 1849 """
1846 1850 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1847 1851 return bytes(self._chunk(rev2))
1848 1852
1849 1853 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1850 1854
1851 1855 def _processflags(self, text, flags, operation, raw=False):
1852 1856 """deprecated entry point to access flag processors"""
1853 1857 msg = b'_processflag(...) use the specialized variant'
1854 1858 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1855 1859 if raw:
1856 1860 return text, flagutil.processflagsraw(self, text, flags)
1857 1861 elif operation == b'read':
1858 1862 return flagutil.processflagsread(self, text, flags)
1859 1863 else: # write operation
1860 1864 return flagutil.processflagswrite(self, text, flags)
1861 1865
1862 1866 def revision(self, nodeorrev, _df=None, raw=False):
1863 1867 """return an uncompressed revision of a given node or revision
1864 1868 number.
1865 1869
1866 1870 _df - an existing file handle to read from. (internal-only)
1867 1871 raw - an optional argument specifying if the revision data is to be
1868 1872 treated as raw data when applying flag transforms. 'raw' should be set
1869 1873 to True when generating changegroups or in debug commands.
1870 1874 """
1871 1875 if raw:
1872 1876 msg = (
1873 1877 b'revlog.revision(..., raw=True) is deprecated, '
1874 1878 b'use revlog.rawdata(...)'
1875 1879 )
1876 1880 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1877 1881 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1878 1882
1879 1883 def sidedata(self, nodeorrev, _df=None):
1880 1884 """a map of extra data related to the changeset but not part of the hash
1881 1885
1882 1886 This function currently return a dictionary. However, more advanced
1883 1887 mapping object will likely be used in the future for a more
1884 1888 efficient/lazy code.
1885 1889 """
1886 1890 return self._revisiondata(nodeorrev, _df)[1]
1887 1891
1888 1892 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1889 1893 # deal with <nodeorrev> argument type
1890 1894 if isinstance(nodeorrev, int):
1891 1895 rev = nodeorrev
1892 1896 node = self.node(rev)
1893 1897 else:
1894 1898 node = nodeorrev
1895 1899 rev = None
1896 1900
1897 1901 # fast path the special `nullid` rev
1898 1902 if node == nullid:
1899 1903 return b"", {}
1900 1904
1901 1905 # ``rawtext`` is the text as stored inside the revlog. Might be the
1902 1906 # revision or might need to be processed to retrieve the revision.
1903 1907 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1904 1908
1905 1909 if self.version & 0xFFFF == REVLOGV2:
1906 1910 if rev is None:
1907 1911 rev = self.rev(node)
1908 1912 sidedata = self._sidedata(rev)
1909 1913 else:
1910 1914 sidedata = {}
1911 1915
1912 1916 if raw and validated:
1913 1917 # if we don't want to process the raw text and that raw
1914 1918 # text is cached, we can exit early.
1915 1919 return rawtext, sidedata
1916 1920 if rev is None:
1917 1921 rev = self.rev(node)
1918 1922 # the revlog's flag for this revision
1919 1923 # (usually alter its state or content)
1920 1924 flags = self.flags(rev)
1921 1925
1922 1926 if validated and flags == REVIDX_DEFAULT_FLAGS:
1923 1927 # no extra flags set, no flag processor runs, text = rawtext
1924 1928 return rawtext, sidedata
1925 1929
1926 1930 if raw:
1927 1931 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1928 1932 text = rawtext
1929 1933 else:
1930 1934 r = flagutil.processflagsread(self, rawtext, flags)
1931 1935 text, validatehash = r
1932 1936 if validatehash:
1933 1937 self.checkhash(text, node, rev=rev)
1934 1938 if not validated:
1935 1939 self._revisioncache = (node, rev, rawtext)
1936 1940
1937 1941 return text, sidedata
1938 1942
1939 1943 def _rawtext(self, node, rev, _df=None):
1940 1944 """return the possibly unvalidated rawtext for a revision
1941 1945
1942 1946 returns (rev, rawtext, validated)
1943 1947 """
1944 1948
1945 1949 # revision in the cache (could be useful to apply delta)
1946 1950 cachedrev = None
1947 1951 # An intermediate text to apply deltas to
1948 1952 basetext = None
1949 1953
1950 1954 # Check if we have the entry in cache
1951 1955 # The cache entry looks like (node, rev, rawtext)
1952 1956 if self._revisioncache:
1953 1957 if self._revisioncache[0] == node:
1954 1958 return (rev, self._revisioncache[2], True)
1955 1959 cachedrev = self._revisioncache[1]
1956 1960
1957 1961 if rev is None:
1958 1962 rev = self.rev(node)
1959 1963
1960 1964 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1961 1965 if stopped:
1962 1966 basetext = self._revisioncache[2]
1963 1967
1964 1968 # drop cache to save memory, the caller is expected to
1965 1969 # update self._revisioncache after validating the text
1966 1970 self._revisioncache = None
1967 1971
1968 1972 targetsize = None
1969 1973 rawsize = self.index[rev][2]
1970 1974 if 0 <= rawsize:
1971 1975 targetsize = 4 * rawsize
1972 1976
1973 1977 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1974 1978 if basetext is None:
1975 1979 basetext = bytes(bins[0])
1976 1980 bins = bins[1:]
1977 1981
1978 1982 rawtext = mdiff.patches(basetext, bins)
1979 1983 del basetext # let us have a chance to free memory early
1980 1984 return (rev, rawtext, False)
1981 1985
1982 1986 def _sidedata(self, rev):
1983 1987 """Return the sidedata for a given revision number."""
1984 1988 index_entry = self.index[rev]
1985 1989 sidedata_offset = index_entry[8]
1986 1990 sidedata_size = index_entry[9]
1987 1991
1988 1992 if self._inline:
1989 1993 sidedata_offset += self._io.size * (1 + rev)
1990 1994 if sidedata_size == 0:
1991 1995 return {}
1992 1996
1993 1997 segment = self._getsegment(sidedata_offset, sidedata_size)
1994 1998 sidedata = sidedatautil.deserialize_sidedata(segment)
1995 1999 return sidedata
1996 2000
1997 2001 def rawdata(self, nodeorrev, _df=None):
1998 2002 """return an uncompressed raw data of a given node or revision number.
1999 2003
2000 2004 _df - an existing file handle to read from. (internal-only)
2001 2005 """
2002 2006 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2003 2007
2004 2008 def hash(self, text, p1, p2):
2005 2009 """Compute a node hash.
2006 2010
2007 2011 Available as a function so that subclasses can replace the hash
2008 2012 as needed.
2009 2013 """
2010 2014 return storageutil.hashrevisionsha1(text, p1, p2)
2011 2015
2012 2016 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2013 2017 """Check node hash integrity.
2014 2018
2015 2019 Available as a function so that subclasses can extend hash mismatch
2016 2020 behaviors as needed.
2017 2021 """
2018 2022 try:
2019 2023 if p1 is None and p2 is None:
2020 2024 p1, p2 = self.parents(node)
2021 2025 if node != self.hash(text, p1, p2):
2022 2026 # Clear the revision cache on hash failure. The revision cache
2023 2027 # only stores the raw revision and clearing the cache does have
2024 2028 # the side-effect that we won't have a cache hit when the raw
2025 2029 # revision data is accessed. But this case should be rare and
2026 2030 # it is extra work to teach the cache about the hash
2027 2031 # verification state.
2028 2032 if self._revisioncache and self._revisioncache[0] == node:
2029 2033 self._revisioncache = None
2030 2034
2031 2035 revornode = rev
2032 2036 if revornode is None:
2033 2037 revornode = templatefilters.short(hex(node))
2034 2038 raise error.RevlogError(
2035 2039 _(b"integrity check failed on %s:%s")
2036 2040 % (self.indexfile, pycompat.bytestr(revornode))
2037 2041 )
2038 2042 except error.RevlogError:
2039 2043 if self._censorable and storageutil.iscensoredtext(text):
2040 2044 raise error.CensoredNodeError(self.indexfile, node, text)
2041 2045 raise
2042 2046
2043 2047 def _enforceinlinesize(self, tr, fp=None):
2044 2048 """Check if the revlog is too big for inline and convert if so.
2045 2049
2046 2050 This should be called after revisions are added to the revlog. If the
2047 2051 revlog has grown too large to be an inline revlog, it will convert it
2048 2052 to use multiple index and data files.
2049 2053 """
2050 2054 tiprev = len(self) - 1
2051 2055 if (
2052 2056 not self._inline
2053 2057 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2054 2058 ):
2055 2059 return
2056 2060
2057 2061 troffset = tr.findoffset(self.indexfile)
2058 2062 if troffset is None:
2059 2063 raise error.RevlogError(
2060 2064 _(b"%s not found in the transaction") % self.indexfile
2061 2065 )
2062 2066 trindex = 0
2063 2067 tr.add(self.datafile, 0)
2064 2068
2065 2069 if fp:
2066 2070 fp.flush()
2067 2071 fp.close()
2068 2072 # We can't use the cached file handle after close(). So prevent
2069 2073 # its usage.
2070 2074 self._writinghandles = None
2071 2075
2072 2076 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2073 2077 for r in self:
2074 2078 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2075 2079 if troffset <= self.start(r):
2076 2080 trindex = r
2077 2081
2078 2082 with self._indexfp(b'w') as fp:
2079 2083 self.version &= ~FLAG_INLINE_DATA
2080 2084 self._inline = False
2081 2085 io = self._io
2082 2086 for i in self:
2083 2087 e = io.packentry(self.index[i], self.node, self.version, i)
2084 2088 fp.write(e)
2085 2089
2086 2090 # the temp file replace the real index when we exit the context
2087 2091 # manager
2088 2092
2089 2093 tr.replace(self.indexfile, trindex * self._io.size)
2090 2094 nodemaputil.setup_persistent_nodemap(tr, self)
2091 2095 self._chunkclear()
2092 2096
2093 2097 def _nodeduplicatecallback(self, transaction, node):
2094 2098 """called when trying to add a node already stored."""
2095 2099
2096 2100 def addrevision(
2097 2101 self,
2098 2102 text,
2099 2103 transaction,
2100 2104 link,
2101 2105 p1,
2102 2106 p2,
2103 2107 cachedelta=None,
2104 2108 node=None,
2105 2109 flags=REVIDX_DEFAULT_FLAGS,
2106 2110 deltacomputer=None,
2107 2111 sidedata=None,
2108 2112 ):
2109 2113 """add a revision to the log
2110 2114
2111 2115 text - the revision data to add
2112 2116 transaction - the transaction object used for rollback
2113 2117 link - the linkrev data to add
2114 2118 p1, p2 - the parent nodeids of the revision
2115 2119 cachedelta - an optional precomputed delta
2116 2120 node - nodeid of revision; typically node is not specified, and it is
2117 2121 computed by default as hash(text, p1, p2), however subclasses might
2118 2122 use different hashing method (and override checkhash() in such case)
2119 2123 flags - the known flags to set on the revision
2120 2124 deltacomputer - an optional deltacomputer instance shared between
2121 2125 multiple calls
2122 2126 """
2123 2127 if link == nullrev:
2124 2128 raise error.RevlogError(
2125 2129 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2126 2130 )
2127 2131
2128 2132 if sidedata is None:
2129 2133 sidedata = {}
2130 2134 elif not self.hassidedata:
2131 2135 raise error.ProgrammingError(
2132 2136 _(b"trying to add sidedata to a revlog who don't support them")
2133 2137 )
2134 2138
2135 2139 if flags:
2136 2140 node = node or self.hash(text, p1, p2)
2137 2141
2138 2142 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2139 2143
2140 2144 # If the flag processor modifies the revision data, ignore any provided
2141 2145 # cachedelta.
2142 2146 if rawtext != text:
2143 2147 cachedelta = None
2144 2148
2145 2149 if len(rawtext) > _maxentrysize:
2146 2150 raise error.RevlogError(
2147 2151 _(
2148 2152 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2149 2153 )
2150 2154 % (self.indexfile, len(rawtext))
2151 2155 )
2152 2156
2153 2157 node = node or self.hash(rawtext, p1, p2)
2154 2158 rev = self.index.get_rev(node)
2155 2159 if rev is not None:
2156 2160 return rev
2157 2161
2158 2162 if validatehash:
2159 2163 self.checkhash(rawtext, node, p1=p1, p2=p2)
2160 2164
2161 2165 return self.addrawrevision(
2162 2166 rawtext,
2163 2167 transaction,
2164 2168 link,
2165 2169 p1,
2166 2170 p2,
2167 2171 node,
2168 2172 flags,
2169 2173 cachedelta=cachedelta,
2170 2174 deltacomputer=deltacomputer,
2171 2175 sidedata=sidedata,
2172 2176 )
2173 2177
2174 2178 def addrawrevision(
2175 2179 self,
2176 2180 rawtext,
2177 2181 transaction,
2178 2182 link,
2179 2183 p1,
2180 2184 p2,
2181 2185 node,
2182 2186 flags,
2183 2187 cachedelta=None,
2184 2188 deltacomputer=None,
2185 2189 sidedata=None,
2186 2190 ):
2187 2191 """add a raw revision with known flags, node and parents
2188 2192 useful when reusing a revision not stored in this revlog (ex: received
2189 2193 over wire, or read from an external bundle).
2190 2194 """
2191 2195 dfh = None
2192 2196 if not self._inline:
2193 2197 dfh = self._datafp(b"a+")
2194 2198 ifh = self._indexfp(b"a+")
2195 2199 try:
2196 2200 return self._addrevision(
2197 2201 node,
2198 2202 rawtext,
2199 2203 transaction,
2200 2204 link,
2201 2205 p1,
2202 2206 p2,
2203 2207 flags,
2204 2208 cachedelta,
2205 2209 ifh,
2206 2210 dfh,
2207 2211 deltacomputer=deltacomputer,
2208 2212 sidedata=sidedata,
2209 2213 )
2210 2214 finally:
2211 2215 if dfh:
2212 2216 dfh.close()
2213 2217 ifh.close()
2214 2218
2215 2219 def compress(self, data):
2216 2220 """Generate a possibly-compressed representation of data."""
2217 2221 if not data:
2218 2222 return b'', data
2219 2223
2220 2224 compressed = self._compressor.compress(data)
2221 2225
2222 2226 if compressed:
2223 2227 # The revlog compressor added the header in the returned data.
2224 2228 return b'', compressed
2225 2229
2226 2230 if data[0:1] == b'\0':
2227 2231 return b'', data
2228 2232 return b'u', data
2229 2233
2230 2234 def decompress(self, data):
2231 2235 """Decompress a revlog chunk.
2232 2236
2233 2237 The chunk is expected to begin with a header identifying the
2234 2238 format type so it can be routed to an appropriate decompressor.
2235 2239 """
2236 2240 if not data:
2237 2241 return data
2238 2242
2239 2243 # Revlogs are read much more frequently than they are written and many
2240 2244 # chunks only take microseconds to decompress, so performance is
2241 2245 # important here.
2242 2246 #
2243 2247 # We can make a few assumptions about revlogs:
2244 2248 #
2245 2249 # 1) the majority of chunks will be compressed (as opposed to inline
2246 2250 # raw data).
2247 2251 # 2) decompressing *any* data will likely by at least 10x slower than
2248 2252 # returning raw inline data.
2249 2253 # 3) we want to prioritize common and officially supported compression
2250 2254 # engines
2251 2255 #
2252 2256 # It follows that we want to optimize for "decompress compressed data
2253 2257 # when encoded with common and officially supported compression engines"
2254 2258 # case over "raw data" and "data encoded by less common or non-official
2255 2259 # compression engines." That is why we have the inline lookup first
2256 2260 # followed by the compengines lookup.
2257 2261 #
2258 2262 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2259 2263 # compressed chunks. And this matters for changelog and manifest reads.
2260 2264 t = data[0:1]
2261 2265
2262 2266 if t == b'x':
2263 2267 try:
2264 2268 return _zlibdecompress(data)
2265 2269 except zlib.error as e:
2266 2270 raise error.RevlogError(
2267 2271 _(b'revlog decompress error: %s')
2268 2272 % stringutil.forcebytestr(e)
2269 2273 )
2270 2274 # '\0' is more common than 'u' so it goes first.
2271 2275 elif t == b'\0':
2272 2276 return data
2273 2277 elif t == b'u':
2274 2278 return util.buffer(data, 1)
2275 2279
2276 2280 try:
2277 2281 compressor = self._decompressors[t]
2278 2282 except KeyError:
2279 2283 try:
2280 2284 engine = util.compengines.forrevlogheader(t)
2281 2285 compressor = engine.revlogcompressor(self._compengineopts)
2282 2286 self._decompressors[t] = compressor
2283 2287 except KeyError:
2284 2288 raise error.RevlogError(_(b'unknown compression type %r') % t)
2285 2289
2286 2290 return compressor.decompress(data)
2287 2291
2288 2292 def _addrevision(
2289 2293 self,
2290 2294 node,
2291 2295 rawtext,
2292 2296 transaction,
2293 2297 link,
2294 2298 p1,
2295 2299 p2,
2296 2300 flags,
2297 2301 cachedelta,
2298 2302 ifh,
2299 2303 dfh,
2300 2304 alwayscache=False,
2301 2305 deltacomputer=None,
2302 2306 sidedata=None,
2303 2307 ):
2304 2308 """internal function to add revisions to the log
2305 2309
2306 2310 see addrevision for argument descriptions.
2307 2311
2308 2312 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2309 2313
2310 2314 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2311 2315 be used.
2312 2316
2313 2317 invariants:
2314 2318 - rawtext is optional (can be None); if not set, cachedelta must be set.
2315 2319 if both are set, they must correspond to each other.
2316 2320 """
2317 2321 if node == nullid:
2318 2322 raise error.RevlogError(
2319 2323 _(b"%s: attempt to add null revision") % self.indexfile
2320 2324 )
2321 2325 if node == wdirid or node in wdirfilenodeids:
2322 2326 raise error.RevlogError(
2323 2327 _(b"%s: attempt to add wdir revision") % self.indexfile
2324 2328 )
2325 2329
2326 2330 if self._inline:
2327 2331 fh = ifh
2328 2332 else:
2329 2333 fh = dfh
2330 2334
2331 2335 btext = [rawtext]
2332 2336
2333 2337 curr = len(self)
2334 2338 prev = curr - 1
2335 2339
2336 2340 offset = self._get_data_offset(prev)
2337 2341
2338 2342 if self._concurrencychecker:
2339 2343 if self._inline:
2340 2344 # offset is "as if" it were in the .d file, so we need to add on
2341 2345 # the size of the entry metadata.
2342 2346 self._concurrencychecker(
2343 2347 ifh, self.indexfile, offset + curr * self._io.size
2344 2348 )
2345 2349 else:
2346 2350 # Entries in the .i are a consistent size.
2347 2351 self._concurrencychecker(
2348 2352 ifh, self.indexfile, curr * self._io.size
2349 2353 )
2350 2354 self._concurrencychecker(dfh, self.datafile, offset)
2351 2355
2352 2356 p1r, p2r = self.rev(p1), self.rev(p2)
2353 2357
2354 2358 # full versions are inserted when the needed deltas
2355 2359 # become comparable to the uncompressed text
2356 2360 if rawtext is None:
2357 2361 # need rawtext size, before changed by flag processors, which is
2358 2362 # the non-raw size. use revlog explicitly to avoid filelog's extra
2359 2363 # logic that might remove metadata size.
2360 2364 textlen = mdiff.patchedsize(
2361 2365 revlog.size(self, cachedelta[0]), cachedelta[1]
2362 2366 )
2363 2367 else:
2364 2368 textlen = len(rawtext)
2365 2369
2366 2370 if deltacomputer is None:
2367 2371 deltacomputer = deltautil.deltacomputer(self)
2368 2372
2369 2373 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2370 2374
2371 2375 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2372 2376
2373 2377 if sidedata:
2374 2378 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2375 2379 sidedata_offset = offset + deltainfo.deltalen
2376 2380 else:
2377 2381 serialized_sidedata = b""
2378 2382 # Don't store the offset if the sidedata is empty, that way
2379 2383 # we can easily detect empty sidedata and they will be no different
2380 2384 # than ones we manually add.
2381 2385 sidedata_offset = 0
2382 2386
2383 2387 e = (
2384 2388 offset_type(offset, flags),
2385 2389 deltainfo.deltalen,
2386 2390 textlen,
2387 2391 deltainfo.base,
2388 2392 link,
2389 2393 p1r,
2390 2394 p2r,
2391 2395 node,
2392 2396 sidedata_offset,
2393 2397 len(serialized_sidedata),
2394 2398 )
2395 2399
2396 2400 if self.version & 0xFFFF != REVLOGV2:
2397 2401 e = e[:8]
2398 2402
2399 2403 self.index.append(e)
2400 2404 entry = self._io.packentry(e, self.node, self.version, curr)
2401 2405 self._writeentry(
2402 2406 transaction,
2403 2407 ifh,
2404 2408 dfh,
2405 2409 entry,
2406 2410 deltainfo.data,
2407 2411 link,
2408 2412 offset,
2409 2413 serialized_sidedata,
2410 2414 )
2411 2415
2412 2416 rawtext = btext[0]
2413 2417
2414 2418 if alwayscache and rawtext is None:
2415 2419 rawtext = deltacomputer.buildtext(revinfo, fh)
2416 2420
2417 2421 if type(rawtext) == bytes: # only accept immutable objects
2418 2422 self._revisioncache = (node, curr, rawtext)
2419 2423 self._chainbasecache[curr] = deltainfo.chainbase
2420 2424 return curr
2421 2425
2422 2426 def _get_data_offset(self, prev):
2423 2427 """Returns the current offset in the (in-transaction) data file.
2424 2428 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2425 2429 file to store that information: since sidedata can be rewritten to the
2426 2430 end of the data file within a transaction, you can have cases where, for
2427 2431 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2428 2432 to `n - 1`'s sidedata being written after `n`'s data.
2429 2433
2430 2434 TODO cache this in a docket file before getting out of experimental."""
2431 2435 if self.version & 0xFFFF != REVLOGV2:
2432 2436 return self.end(prev)
2433 2437
2434 2438 offset = 0
2435 2439 for rev, entry in enumerate(self.index):
2436 2440 sidedata_end = entry[8] + entry[9]
2437 2441 # Sidedata for a previous rev has potentially been written after
2438 2442 # this rev's end, so take the max.
2439 2443 offset = max(self.end(rev), offset, sidedata_end)
2440 2444 return offset
2441 2445
2442 2446 def _writeentry(
2443 2447 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2444 2448 ):
2445 2449 # Files opened in a+ mode have inconsistent behavior on various
2446 2450 # platforms. Windows requires that a file positioning call be made
2447 2451 # when the file handle transitions between reads and writes. See
2448 2452 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2449 2453 # platforms, Python or the platform itself can be buggy. Some versions
2450 2454 # of Solaris have been observed to not append at the end of the file
2451 2455 # if the file was seeked to before the end. See issue4943 for more.
2452 2456 #
2453 2457 # We work around this issue by inserting a seek() before writing.
2454 2458 # Note: This is likely not necessary on Python 3. However, because
2455 2459 # the file handle is reused for reads and may be seeked there, we need
2456 2460 # to be careful before changing this.
2457 2461 ifh.seek(0, os.SEEK_END)
2458 2462 if dfh:
2459 2463 dfh.seek(0, os.SEEK_END)
2460 2464
2461 2465 curr = len(self) - 1
2462 2466 if not self._inline:
2463 2467 transaction.add(self.datafile, offset)
2464 2468 transaction.add(self.indexfile, curr * len(entry))
2465 2469 if data[0]:
2466 2470 dfh.write(data[0])
2467 2471 dfh.write(data[1])
2468 2472 if sidedata:
2469 2473 dfh.write(sidedata)
2470 2474 ifh.write(entry)
2471 2475 else:
2472 2476 offset += curr * self._io.size
2473 2477 transaction.add(self.indexfile, offset)
2474 2478 ifh.write(entry)
2475 2479 ifh.write(data[0])
2476 2480 ifh.write(data[1])
2477 2481 if sidedata:
2478 2482 ifh.write(sidedata)
2479 2483 self._enforceinlinesize(transaction, ifh)
2480 2484 nodemaputil.setup_persistent_nodemap(transaction, self)
2481 2485
2482 2486 def addgroup(
2483 2487 self,
2484 2488 deltas,
2485 2489 linkmapper,
2486 2490 transaction,
2487 2491 alwayscache=False,
2488 2492 addrevisioncb=None,
2489 2493 duplicaterevisioncb=None,
2490 2494 ):
2491 2495 """
2492 2496 add a delta group
2493 2497
2494 2498 given a set of deltas, add them to the revision log. the
2495 2499 first delta is against its parent, which should be in our
2496 2500 log, the rest are against the previous delta.
2497 2501
2498 2502 If ``addrevisioncb`` is defined, it will be called with arguments of
2499 2503 this revlog and the node that was added.
2500 2504 """
2501 2505
2502 2506 if self._writinghandles:
2503 2507 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2504 2508
2505 2509 r = len(self)
2506 2510 end = 0
2507 2511 if r:
2508 2512 end = self.end(r - 1)
2509 2513 ifh = self._indexfp(b"a+")
2510 2514 isize = r * self._io.size
2511 2515 if self._inline:
2512 2516 transaction.add(self.indexfile, end + isize)
2513 2517 dfh = None
2514 2518 else:
2515 2519 transaction.add(self.indexfile, isize)
2516 2520 transaction.add(self.datafile, end)
2517 2521 dfh = self._datafp(b"a+")
2518 2522
2519 2523 def flush():
2520 2524 if dfh:
2521 2525 dfh.flush()
2522 2526 ifh.flush()
2523 2527
2524 2528 self._writinghandles = (ifh, dfh)
2525 2529 empty = True
2526 2530
2527 2531 try:
2528 2532 deltacomputer = deltautil.deltacomputer(self)
2529 2533 # loop through our set of deltas
2530 2534 for data in deltas:
2531 2535 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2532 2536 link = linkmapper(linknode)
2533 2537 flags = flags or REVIDX_DEFAULT_FLAGS
2534 2538
2535 2539 rev = self.index.get_rev(node)
2536 2540 if rev is not None:
2537 2541 # this can happen if two branches make the same change
2538 2542 self._nodeduplicatecallback(transaction, rev)
2539 2543 if duplicaterevisioncb:
2540 2544 duplicaterevisioncb(self, rev)
2541 2545 empty = False
2542 2546 continue
2543 2547
2544 2548 for p in (p1, p2):
2545 2549 if not self.index.has_node(p):
2546 2550 raise error.LookupError(
2547 2551 p, self.indexfile, _(b'unknown parent')
2548 2552 )
2549 2553
2550 2554 if not self.index.has_node(deltabase):
2551 2555 raise error.LookupError(
2552 2556 deltabase, self.indexfile, _(b'unknown delta base')
2553 2557 )
2554 2558
2555 2559 baserev = self.rev(deltabase)
2556 2560
2557 2561 if baserev != nullrev and self.iscensored(baserev):
2558 2562 # if base is censored, delta must be full replacement in a
2559 2563 # single patch operation
2560 2564 hlen = struct.calcsize(b">lll")
2561 2565 oldlen = self.rawsize(baserev)
2562 2566 newlen = len(delta) - hlen
2563 2567 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2564 2568 raise error.CensoredBaseError(
2565 2569 self.indexfile, self.node(baserev)
2566 2570 )
2567 2571
2568 2572 if not flags and self._peek_iscensored(baserev, delta, flush):
2569 2573 flags |= REVIDX_ISCENSORED
2570 2574
2571 2575 # We assume consumers of addrevisioncb will want to retrieve
2572 2576 # the added revision, which will require a call to
2573 2577 # revision(). revision() will fast path if there is a cache
2574 2578 # hit. So, we tell _addrevision() to always cache in this case.
2575 2579 # We're only using addgroup() in the context of changegroup
2576 2580 # generation so the revision data can always be handled as raw
2577 2581 # by the flagprocessor.
2578 2582 rev = self._addrevision(
2579 2583 node,
2580 2584 None,
2581 2585 transaction,
2582 2586 link,
2583 2587 p1,
2584 2588 p2,
2585 2589 flags,
2586 2590 (baserev, delta),
2587 2591 ifh,
2588 2592 dfh,
2589 2593 alwayscache=alwayscache,
2590 2594 deltacomputer=deltacomputer,
2591 2595 sidedata=sidedata,
2592 2596 )
2593 2597
2594 2598 if addrevisioncb:
2595 2599 addrevisioncb(self, rev)
2596 2600 empty = False
2597 2601
2598 2602 if not dfh and not self._inline:
2599 2603 # addrevision switched from inline to conventional
2600 2604 # reopen the index
2601 2605 ifh.close()
2602 2606 dfh = self._datafp(b"a+")
2603 2607 ifh = self._indexfp(b"a+")
2604 2608 self._writinghandles = (ifh, dfh)
2605 2609 finally:
2606 2610 self._writinghandles = None
2607 2611
2608 2612 if dfh:
2609 2613 dfh.close()
2610 2614 ifh.close()
2611 2615 return not empty
2612 2616
2613 2617 def iscensored(self, rev):
2614 2618 """Check if a file revision is censored."""
2615 2619 if not self._censorable:
2616 2620 return False
2617 2621
2618 2622 return self.flags(rev) & REVIDX_ISCENSORED
2619 2623
2620 2624 def _peek_iscensored(self, baserev, delta, flush):
2621 2625 """Quickly check if a delta produces a censored revision."""
2622 2626 if not self._censorable:
2623 2627 return False
2624 2628
2625 2629 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2626 2630
2627 2631 def getstrippoint(self, minlink):
2628 2632 """find the minimum rev that must be stripped to strip the linkrev
2629 2633
2630 2634 Returns a tuple containing the minimum rev and a set of all revs that
2631 2635 have linkrevs that will be broken by this strip.
2632 2636 """
2633 2637 return storageutil.resolvestripinfo(
2634 2638 minlink,
2635 2639 len(self) - 1,
2636 2640 self.headrevs(),
2637 2641 self.linkrev,
2638 2642 self.parentrevs,
2639 2643 )
2640 2644
2641 2645 def strip(self, minlink, transaction):
2642 2646 """truncate the revlog on the first revision with a linkrev >= minlink
2643 2647
2644 2648 This function is called when we're stripping revision minlink and
2645 2649 its descendants from the repository.
2646 2650
2647 2651 We have to remove all revisions with linkrev >= minlink, because
2648 2652 the equivalent changelog revisions will be renumbered after the
2649 2653 strip.
2650 2654
2651 2655 So we truncate the revlog on the first of these revisions, and
2652 2656 trust that the caller has saved the revisions that shouldn't be
2653 2657 removed and that it'll re-add them after this truncation.
2654 2658 """
2655 2659 if len(self) == 0:
2656 2660 return
2657 2661
2658 2662 rev, _ = self.getstrippoint(minlink)
2659 2663 if rev == len(self):
2660 2664 return
2661 2665
2662 2666 # first truncate the files on disk
2663 2667 end = self.start(rev)
2664 2668 if not self._inline:
2665 2669 transaction.add(self.datafile, end)
2666 2670 end = rev * self._io.size
2667 2671 else:
2668 2672 end += rev * self._io.size
2669 2673
2670 2674 transaction.add(self.indexfile, end)
2671 2675
2672 2676 # then reset internal state in memory to forget those revisions
2673 2677 self._revisioncache = None
2674 2678 self._chaininfocache = util.lrucachedict(500)
2675 2679 self._chunkclear()
2676 2680
2677 2681 del self.index[rev:-1]
2678 2682
2679 2683 def checksize(self):
2680 2684 """Check size of index and data files
2681 2685
2682 2686 return a (dd, di) tuple.
2683 2687 - dd: extra bytes for the "data" file
2684 2688 - di: extra bytes for the "index" file
2685 2689
2686 2690 A healthy revlog will return (0, 0).
2687 2691 """
2688 2692 expected = 0
2689 2693 if len(self):
2690 2694 expected = max(0, self.end(len(self) - 1))
2691 2695
2692 2696 try:
2693 2697 with self._datafp() as f:
2694 2698 f.seek(0, io.SEEK_END)
2695 2699 actual = f.tell()
2696 2700 dd = actual - expected
2697 2701 except IOError as inst:
2698 2702 if inst.errno != errno.ENOENT:
2699 2703 raise
2700 2704 dd = 0
2701 2705
2702 2706 try:
2703 2707 f = self.opener(self.indexfile)
2704 2708 f.seek(0, io.SEEK_END)
2705 2709 actual = f.tell()
2706 2710 f.close()
2707 2711 s = self._io.size
2708 2712 i = max(0, actual // s)
2709 2713 di = actual - (i * s)
2710 2714 if self._inline:
2711 2715 databytes = 0
2712 2716 for r in self:
2713 2717 databytes += max(0, self.length(r))
2714 2718 dd = 0
2715 2719 di = actual - len(self) * s - databytes
2716 2720 except IOError as inst:
2717 2721 if inst.errno != errno.ENOENT:
2718 2722 raise
2719 2723 di = 0
2720 2724
2721 2725 return (dd, di)
2722 2726
2723 2727 def files(self):
2724 2728 res = [self.indexfile]
2725 2729 if not self._inline:
2726 2730 res.append(self.datafile)
2727 2731 return res
2728 2732
2729 2733 def emitrevisions(
2730 2734 self,
2731 2735 nodes,
2732 2736 nodesorder=None,
2733 2737 revisiondata=False,
2734 2738 assumehaveparentrevisions=False,
2735 2739 deltamode=repository.CG_DELTAMODE_STD,
2736 2740 sidedata_helpers=None,
2737 2741 ):
2738 2742 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2739 2743 raise error.ProgrammingError(
2740 2744 b'unhandled value for nodesorder: %s' % nodesorder
2741 2745 )
2742 2746
2743 2747 if nodesorder is None and not self._generaldelta:
2744 2748 nodesorder = b'storage'
2745 2749
2746 2750 if (
2747 2751 not self._storedeltachains
2748 2752 and deltamode != repository.CG_DELTAMODE_PREV
2749 2753 ):
2750 2754 deltamode = repository.CG_DELTAMODE_FULL
2751 2755
2752 2756 return storageutil.emitrevisions(
2753 2757 self,
2754 2758 nodes,
2755 2759 nodesorder,
2756 2760 revlogrevisiondelta,
2757 2761 deltaparentfn=self.deltaparent,
2758 2762 candeltafn=self.candelta,
2759 2763 rawsizefn=self.rawsize,
2760 2764 revdifffn=self.revdiff,
2761 2765 flagsfn=self.flags,
2762 2766 deltamode=deltamode,
2763 2767 revisiondata=revisiondata,
2764 2768 assumehaveparentrevisions=assumehaveparentrevisions,
2765 2769 sidedata_helpers=sidedata_helpers,
2766 2770 )
2767 2771
2768 2772 DELTAREUSEALWAYS = b'always'
2769 2773 DELTAREUSESAMEREVS = b'samerevs'
2770 2774 DELTAREUSENEVER = b'never'
2771 2775
2772 2776 DELTAREUSEFULLADD = b'fulladd'
2773 2777
2774 2778 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2775 2779
2776 2780 def clone(
2777 2781 self,
2778 2782 tr,
2779 2783 destrevlog,
2780 2784 addrevisioncb=None,
2781 2785 deltareuse=DELTAREUSESAMEREVS,
2782 2786 forcedeltabothparents=None,
2783 2787 sidedatacompanion=None,
2784 2788 ):
2785 2789 """Copy this revlog to another, possibly with format changes.
2786 2790
2787 2791 The destination revlog will contain the same revisions and nodes.
2788 2792 However, it may not be bit-for-bit identical due to e.g. delta encoding
2789 2793 differences.
2790 2794
2791 2795 The ``deltareuse`` argument control how deltas from the existing revlog
2792 2796 are preserved in the destination revlog. The argument can have the
2793 2797 following values:
2794 2798
2795 2799 DELTAREUSEALWAYS
2796 2800 Deltas will always be reused (if possible), even if the destination
2797 2801 revlog would not select the same revisions for the delta. This is the
2798 2802 fastest mode of operation.
2799 2803 DELTAREUSESAMEREVS
2800 2804 Deltas will be reused if the destination revlog would pick the same
2801 2805 revisions for the delta. This mode strikes a balance between speed
2802 2806 and optimization.
2803 2807 DELTAREUSENEVER
2804 2808 Deltas will never be reused. This is the slowest mode of execution.
2805 2809 This mode can be used to recompute deltas (e.g. if the diff/delta
2806 2810 algorithm changes).
2807 2811 DELTAREUSEFULLADD
2808 2812 Revision will be re-added as if their were new content. This is
2809 2813 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2810 2814 eg: large file detection and handling.
2811 2815
2812 2816 Delta computation can be slow, so the choice of delta reuse policy can
2813 2817 significantly affect run time.
2814 2818
2815 2819 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2816 2820 two extremes. Deltas will be reused if they are appropriate. But if the
2817 2821 delta could choose a better revision, it will do so. This means if you
2818 2822 are converting a non-generaldelta revlog to a generaldelta revlog,
2819 2823 deltas will be recomputed if the delta's parent isn't a parent of the
2820 2824 revision.
2821 2825
2822 2826 In addition to the delta policy, the ``forcedeltabothparents``
2823 2827 argument controls whether to force compute deltas against both parents
2824 2828 for merges. By default, the current default is used.
2825 2829
2826 2830 If not None, the `sidedatacompanion` is callable that accept two
2827 2831 arguments:
2828 2832
2829 2833 (srcrevlog, rev)
2830 2834
2831 2835 and return a quintet that control changes to sidedata content from the
2832 2836 old revision to the new clone result:
2833 2837
2834 2838 (dropall, filterout, update, new_flags, dropped_flags)
2835 2839
2836 2840 * if `dropall` is True, all sidedata should be dropped
2837 2841 * `filterout` is a set of sidedata keys that should be dropped
2838 2842 * `update` is a mapping of additionnal/new key -> value
2839 2843 * new_flags is a bitfields of new flags that the revision should get
2840 2844 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2841 2845 """
2842 2846 if deltareuse not in self.DELTAREUSEALL:
2843 2847 raise ValueError(
2844 2848 _(b'value for deltareuse invalid: %s') % deltareuse
2845 2849 )
2846 2850
2847 2851 if len(destrevlog):
2848 2852 raise ValueError(_(b'destination revlog is not empty'))
2849 2853
2850 2854 if getattr(self, 'filteredrevs', None):
2851 2855 raise ValueError(_(b'source revlog has filtered revisions'))
2852 2856 if getattr(destrevlog, 'filteredrevs', None):
2853 2857 raise ValueError(_(b'destination revlog has filtered revisions'))
2854 2858
2855 2859 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2856 2860 # if possible.
2857 2861 oldlazydelta = destrevlog._lazydelta
2858 2862 oldlazydeltabase = destrevlog._lazydeltabase
2859 2863 oldamd = destrevlog._deltabothparents
2860 2864
2861 2865 try:
2862 2866 if deltareuse == self.DELTAREUSEALWAYS:
2863 2867 destrevlog._lazydeltabase = True
2864 2868 destrevlog._lazydelta = True
2865 2869 elif deltareuse == self.DELTAREUSESAMEREVS:
2866 2870 destrevlog._lazydeltabase = False
2867 2871 destrevlog._lazydelta = True
2868 2872 elif deltareuse == self.DELTAREUSENEVER:
2869 2873 destrevlog._lazydeltabase = False
2870 2874 destrevlog._lazydelta = False
2871 2875
2872 2876 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2873 2877
2874 2878 self._clone(
2875 2879 tr,
2876 2880 destrevlog,
2877 2881 addrevisioncb,
2878 2882 deltareuse,
2879 2883 forcedeltabothparents,
2880 2884 sidedatacompanion,
2881 2885 )
2882 2886
2883 2887 finally:
2884 2888 destrevlog._lazydelta = oldlazydelta
2885 2889 destrevlog._lazydeltabase = oldlazydeltabase
2886 2890 destrevlog._deltabothparents = oldamd
2887 2891
2888 2892 def _clone(
2889 2893 self,
2890 2894 tr,
2891 2895 destrevlog,
2892 2896 addrevisioncb,
2893 2897 deltareuse,
2894 2898 forcedeltabothparents,
2895 2899 sidedatacompanion,
2896 2900 ):
2897 2901 """perform the core duty of `revlog.clone` after parameter processing"""
2898 2902 deltacomputer = deltautil.deltacomputer(destrevlog)
2899 2903 index = self.index
2900 2904 for rev in self:
2901 2905 entry = index[rev]
2902 2906
2903 2907 # Some classes override linkrev to take filtered revs into
2904 2908 # account. Use raw entry from index.
2905 2909 flags = entry[0] & 0xFFFF
2906 2910 linkrev = entry[4]
2907 2911 p1 = index[entry[5]][7]
2908 2912 p2 = index[entry[6]][7]
2909 2913 node = entry[7]
2910 2914
2911 2915 sidedataactions = (False, [], {}, 0, 0)
2912 2916 if sidedatacompanion is not None:
2913 2917 sidedataactions = sidedatacompanion(self, rev)
2914 2918
2915 2919 # (Possibly) reuse the delta from the revlog if allowed and
2916 2920 # the revlog chunk is a delta.
2917 2921 cachedelta = None
2918 2922 rawtext = None
2919 2923 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2920 2924 dropall = sidedataactions[0]
2921 2925 filterout = sidedataactions[1]
2922 2926 update = sidedataactions[2]
2923 2927 new_flags = sidedataactions[3]
2924 2928 dropped_flags = sidedataactions[4]
2925 2929 text, sidedata = self._revisiondata(rev)
2926 2930 if dropall:
2927 2931 sidedata = {}
2928 2932 for key in filterout:
2929 2933 sidedata.pop(key, None)
2930 2934 sidedata.update(update)
2931 2935 if not sidedata:
2932 2936 sidedata = None
2933 2937
2934 2938 flags |= new_flags
2935 2939 flags &= ~dropped_flags
2936 2940
2937 2941 destrevlog.addrevision(
2938 2942 text,
2939 2943 tr,
2940 2944 linkrev,
2941 2945 p1,
2942 2946 p2,
2943 2947 cachedelta=cachedelta,
2944 2948 node=node,
2945 2949 flags=flags,
2946 2950 deltacomputer=deltacomputer,
2947 2951 sidedata=sidedata,
2948 2952 )
2949 2953 else:
2950 2954 if destrevlog._lazydelta:
2951 2955 dp = self.deltaparent(rev)
2952 2956 if dp != nullrev:
2953 2957 cachedelta = (dp, bytes(self._chunk(rev)))
2954 2958
2955 2959 if not cachedelta:
2956 2960 rawtext = self.rawdata(rev)
2957 2961
2958 2962 ifh = destrevlog.opener(
2959 2963 destrevlog.indexfile, b'a+', checkambig=False
2960 2964 )
2961 2965 dfh = None
2962 2966 if not destrevlog._inline:
2963 2967 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2964 2968 try:
2965 2969 destrevlog._addrevision(
2966 2970 node,
2967 2971 rawtext,
2968 2972 tr,
2969 2973 linkrev,
2970 2974 p1,
2971 2975 p2,
2972 2976 flags,
2973 2977 cachedelta,
2974 2978 ifh,
2975 2979 dfh,
2976 2980 deltacomputer=deltacomputer,
2977 2981 )
2978 2982 finally:
2979 2983 if dfh:
2980 2984 dfh.close()
2981 2985 ifh.close()
2982 2986
2983 2987 if addrevisioncb:
2984 2988 addrevisioncb(self, rev, node)
2985 2989
2986 2990 def censorrevision(self, tr, censornode, tombstone=b''):
2987 2991 if (self.version & 0xFFFF) == REVLOGV0:
2988 2992 raise error.RevlogError(
2989 2993 _(b'cannot censor with version %d revlogs') % self.version
2990 2994 )
2991 2995
2992 2996 censorrev = self.rev(censornode)
2993 2997 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2994 2998
2995 2999 if len(tombstone) > self.rawsize(censorrev):
2996 3000 raise error.Abort(
2997 3001 _(b'censor tombstone must be no longer than censored data')
2998 3002 )
2999 3003
3000 3004 # Rewriting the revlog in place is hard. Our strategy for censoring is
3001 3005 # to create a new revlog, copy all revisions to it, then replace the
3002 3006 # revlogs on transaction close.
3003 3007
3004 3008 newindexfile = self.indexfile + b'.tmpcensored'
3005 3009 newdatafile = self.datafile + b'.tmpcensored'
3006 3010
3007 3011 # This is a bit dangerous. We could easily have a mismatch of state.
3008 3012 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3009 3013 newrl.version = self.version
3010 3014 newrl._generaldelta = self._generaldelta
3011 3015 newrl._io = self._io
3012 3016
3013 3017 for rev in self.revs():
3014 3018 node = self.node(rev)
3015 3019 p1, p2 = self.parents(node)
3016 3020
3017 3021 if rev == censorrev:
3018 3022 newrl.addrawrevision(
3019 3023 tombstone,
3020 3024 tr,
3021 3025 self.linkrev(censorrev),
3022 3026 p1,
3023 3027 p2,
3024 3028 censornode,
3025 3029 REVIDX_ISCENSORED,
3026 3030 )
3027 3031
3028 3032 if newrl.deltaparent(rev) != nullrev:
3029 3033 raise error.Abort(
3030 3034 _(
3031 3035 b'censored revision stored as delta; '
3032 3036 b'cannot censor'
3033 3037 ),
3034 3038 hint=_(
3035 3039 b'censoring of revlogs is not '
3036 3040 b'fully implemented; please report '
3037 3041 b'this bug'
3038 3042 ),
3039 3043 )
3040 3044 continue
3041 3045
3042 3046 if self.iscensored(rev):
3043 3047 if self.deltaparent(rev) != nullrev:
3044 3048 raise error.Abort(
3045 3049 _(
3046 3050 b'cannot censor due to censored '
3047 3051 b'revision having delta stored'
3048 3052 )
3049 3053 )
3050 3054 rawtext = self._chunk(rev)
3051 3055 else:
3052 3056 rawtext = self.rawdata(rev)
3053 3057
3054 3058 newrl.addrawrevision(
3055 3059 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3056 3060 )
3057 3061
3058 3062 tr.addbackup(self.indexfile, location=b'store')
3059 3063 if not self._inline:
3060 3064 tr.addbackup(self.datafile, location=b'store')
3061 3065
3062 3066 self.opener.rename(newrl.indexfile, self.indexfile)
3063 3067 if not self._inline:
3064 3068 self.opener.rename(newrl.datafile, self.datafile)
3065 3069
3066 3070 self.clearcaches()
3067 3071 self._loadindex()
3068 3072
3069 3073 def verifyintegrity(self, state):
3070 3074 """Verifies the integrity of the revlog.
3071 3075
3072 3076 Yields ``revlogproblem`` instances describing problems that are
3073 3077 found.
3074 3078 """
3075 3079 dd, di = self.checksize()
3076 3080 if dd:
3077 3081 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3078 3082 if di:
3079 3083 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3080 3084
3081 3085 version = self.version & 0xFFFF
3082 3086
3083 3087 # The verifier tells us what version revlog we should be.
3084 3088 if version != state[b'expectedversion']:
3085 3089 yield revlogproblem(
3086 3090 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3087 3091 % (self.indexfile, version, state[b'expectedversion'])
3088 3092 )
3089 3093
3090 3094 state[b'skipread'] = set()
3091 3095 state[b'safe_renamed'] = set()
3092 3096
3093 3097 for rev in self:
3094 3098 node = self.node(rev)
3095 3099
3096 3100 # Verify contents. 4 cases to care about:
3097 3101 #
3098 3102 # common: the most common case
3099 3103 # rename: with a rename
3100 3104 # meta: file content starts with b'\1\n', the metadata
3101 3105 # header defined in filelog.py, but without a rename
3102 3106 # ext: content stored externally
3103 3107 #
3104 3108 # More formally, their differences are shown below:
3105 3109 #
3106 3110 # | common | rename | meta | ext
3107 3111 # -------------------------------------------------------
3108 3112 # flags() | 0 | 0 | 0 | not 0
3109 3113 # renamed() | False | True | False | ?
3110 3114 # rawtext[0:2]=='\1\n'| False | True | True | ?
3111 3115 #
3112 3116 # "rawtext" means the raw text stored in revlog data, which
3113 3117 # could be retrieved by "rawdata(rev)". "text"
3114 3118 # mentioned below is "revision(rev)".
3115 3119 #
3116 3120 # There are 3 different lengths stored physically:
3117 3121 # 1. L1: rawsize, stored in revlog index
3118 3122 # 2. L2: len(rawtext), stored in revlog data
3119 3123 # 3. L3: len(text), stored in revlog data if flags==0, or
3120 3124 # possibly somewhere else if flags!=0
3121 3125 #
3122 3126 # L1 should be equal to L2. L3 could be different from them.
3123 3127 # "text" may or may not affect commit hash depending on flag
3124 3128 # processors (see flagutil.addflagprocessor).
3125 3129 #
3126 3130 # | common | rename | meta | ext
3127 3131 # -------------------------------------------------
3128 3132 # rawsize() | L1 | L1 | L1 | L1
3129 3133 # size() | L1 | L2-LM | L1(*) | L1 (?)
3130 3134 # len(rawtext) | L2 | L2 | L2 | L2
3131 3135 # len(text) | L2 | L2 | L2 | L3
3132 3136 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3133 3137 #
3134 3138 # LM: length of metadata, depending on rawtext
3135 3139 # (*): not ideal, see comment in filelog.size
3136 3140 # (?): could be "- len(meta)" if the resolved content has
3137 3141 # rename metadata
3138 3142 #
3139 3143 # Checks needed to be done:
3140 3144 # 1. length check: L1 == L2, in all cases.
3141 3145 # 2. hash check: depending on flag processor, we may need to
3142 3146 # use either "text" (external), or "rawtext" (in revlog).
3143 3147
3144 3148 try:
3145 3149 skipflags = state.get(b'skipflags', 0)
3146 3150 if skipflags:
3147 3151 skipflags &= self.flags(rev)
3148 3152
3149 3153 _verify_revision(self, skipflags, state, node)
3150 3154
3151 3155 l1 = self.rawsize(rev)
3152 3156 l2 = len(self.rawdata(node))
3153 3157
3154 3158 if l1 != l2:
3155 3159 yield revlogproblem(
3156 3160 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3157 3161 node=node,
3158 3162 )
3159 3163
3160 3164 except error.CensoredNodeError:
3161 3165 if state[b'erroroncensored']:
3162 3166 yield revlogproblem(
3163 3167 error=_(b'censored file data'), node=node
3164 3168 )
3165 3169 state[b'skipread'].add(node)
3166 3170 except Exception as e:
3167 3171 yield revlogproblem(
3168 3172 error=_(b'unpacking %s: %s')
3169 3173 % (short(node), stringutil.forcebytestr(e)),
3170 3174 node=node,
3171 3175 )
3172 3176 state[b'skipread'].add(node)
3173 3177
3174 3178 def storageinfo(
3175 3179 self,
3176 3180 exclusivefiles=False,
3177 3181 sharedfiles=False,
3178 3182 revisionscount=False,
3179 3183 trackedsize=False,
3180 3184 storedsize=False,
3181 3185 ):
3182 3186 d = {}
3183 3187
3184 3188 if exclusivefiles:
3185 3189 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3186 3190 if not self._inline:
3187 3191 d[b'exclusivefiles'].append((self.opener, self.datafile))
3188 3192
3189 3193 if sharedfiles:
3190 3194 d[b'sharedfiles'] = []
3191 3195
3192 3196 if revisionscount:
3193 3197 d[b'revisionscount'] = len(self)
3194 3198
3195 3199 if trackedsize:
3196 3200 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3197 3201
3198 3202 if storedsize:
3199 3203 d[b'storedsize'] = sum(
3200 3204 self.opener.stat(path).st_size for path in self.files()
3201 3205 )
3202 3206
3203 3207 return d
General Comments 0
You need to be logged in to leave comments. Login now