##// END OF EJS Templates
revlog: move the details of revlog "v0" index inside revlog.utils.constants...
marmoute -
r47615:34e1fa4b default
parent child Browse files
Show More
@@ -1,3272 +1,3267 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullhex,
30 30 nullid,
31 31 nullrev,
32 32 sha1nodeconstants,
33 33 short,
34 34 wdirfilenodeids,
35 35 wdirhex,
36 36 wdirid,
37 37 wdirrev,
38 38 )
39 39 from .i18n import _
40 40 from .pycompat import getattr
41 41 from .revlogutils.constants import (
42 42 FLAG_GENERALDELTA,
43 43 FLAG_INLINE_DATA,
44 INDEX_ENTRY_V0,
44 45 REVLOGV0,
45 46 REVLOGV1,
46 47 REVLOGV1_FLAGS,
47 48 REVLOGV2,
48 49 REVLOGV2_FLAGS,
49 50 REVLOG_DEFAULT_FLAGS,
50 51 REVLOG_DEFAULT_FORMAT,
51 52 REVLOG_DEFAULT_VERSION,
52 53 )
53 54 from .revlogutils.flagutil import (
54 55 REVIDX_DEFAULT_FLAGS,
55 56 REVIDX_ELLIPSIS,
56 57 REVIDX_EXTSTORED,
57 58 REVIDX_FLAGS_ORDER,
58 59 REVIDX_HASCOPIESINFO,
59 60 REVIDX_ISCENSORED,
60 61 REVIDX_RAWTEXT_CHANGING_FLAGS,
61 62 REVIDX_SIDEDATA,
62 63 )
63 64 from .thirdparty import attr
64 65 from . import (
65 66 ancestor,
66 67 dagop,
67 68 error,
68 69 mdiff,
69 70 policy,
70 71 pycompat,
71 72 templatefilters,
72 73 util,
73 74 )
74 75 from .interfaces import (
75 76 repository,
76 77 util as interfaceutil,
77 78 )
78 79 from .revlogutils import (
79 80 deltas as deltautil,
80 81 flagutil,
81 82 nodemap as nodemaputil,
82 83 sidedata as sidedatautil,
83 84 )
84 85 from .utils import (
85 86 storageutil,
86 87 stringutil,
87 88 )
88 89 from .pure import parsers as pureparsers
89 90
90 91 # blanked usage of all the name to prevent pyflakes constraints
91 92 # We need these name available in the module for extensions.
92 93 REVLOGV0
93 94 REVLOGV1
94 95 REVLOGV2
95 96 FLAG_INLINE_DATA
96 97 FLAG_GENERALDELTA
97 98 REVLOG_DEFAULT_FLAGS
98 99 REVLOG_DEFAULT_FORMAT
99 100 REVLOG_DEFAULT_VERSION
100 101 REVLOGV1_FLAGS
101 102 REVLOGV2_FLAGS
102 103 REVIDX_ISCENSORED
103 104 REVIDX_ELLIPSIS
104 105 REVIDX_SIDEDATA
105 106 REVIDX_HASCOPIESINFO
106 107 REVIDX_EXTSTORED
107 108 REVIDX_DEFAULT_FLAGS
108 109 REVIDX_FLAGS_ORDER
109 110 REVIDX_RAWTEXT_CHANGING_FLAGS
110 111
111 112 parsers = policy.importmod('parsers')
112 113 rustancestor = policy.importrust('ancestor')
113 114 rustdagop = policy.importrust('dagop')
114 115 rustrevlog = policy.importrust('revlog')
115 116
116 117 # Aliased for performance.
117 118 _zlibdecompress = zlib.decompress
118 119
119 120 # max size of revlog with inline data
120 121 _maxinline = 131072
121 122 _chunksize = 1048576
122 123
123 124 # Flag processors for REVIDX_ELLIPSIS.
124 125 def ellipsisreadprocessor(rl, text):
125 126 return text, False
126 127
127 128
128 129 def ellipsiswriteprocessor(rl, text):
129 130 return text, False
130 131
131 132
132 133 def ellipsisrawprocessor(rl, text):
133 134 return False
134 135
135 136
136 137 ellipsisprocessor = (
137 138 ellipsisreadprocessor,
138 139 ellipsiswriteprocessor,
139 140 ellipsisrawprocessor,
140 141 )
141 142
142 143
143 144 def getoffset(q):
144 145 return int(q >> 16)
145 146
146 147
147 148 def gettype(q):
148 149 return int(q & 0xFFFF)
149 150
150 151
151 152 def offset_type(offset, type):
152 153 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
153 154 raise ValueError(b'unknown revlog index flags')
154 155 return int(int(offset) << 16 | type)
155 156
156 157
157 158 def _verify_revision(rl, skipflags, state, node):
158 159 """Verify the integrity of the given revlog ``node`` while providing a hook
159 160 point for extensions to influence the operation."""
160 161 if skipflags:
161 162 state[b'skipread'].add(node)
162 163 else:
163 164 # Side-effect: read content and verify hash.
164 165 rl.revision(node)
165 166
166 167
167 168 # True if a fast implementation for persistent-nodemap is available
168 169 #
169 170 # We also consider we have a "fast" implementation in "pure" python because
170 171 # people using pure don't really have performance consideration (and a
171 172 # wheelbarrow of other slowness source)
172 173 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
173 174 parsers, 'BaseIndexObject'
174 175 )
175 176
176 177
177 178 @attr.s(slots=True, frozen=True)
178 179 class _revisioninfo(object):
179 180 """Information about a revision that allows building its fulltext
180 181 node: expected hash of the revision
181 182 p1, p2: parent revs of the revision
182 183 btext: built text cache consisting of a one-element list
183 184 cachedelta: (baserev, uncompressed_delta) or None
184 185 flags: flags associated to the revision storage
185 186
186 187 One of btext[0] or cachedelta must be set.
187 188 """
188 189
189 190 node = attr.ib()
190 191 p1 = attr.ib()
191 192 p2 = attr.ib()
192 193 btext = attr.ib()
193 194 textlen = attr.ib()
194 195 cachedelta = attr.ib()
195 196 flags = attr.ib()
196 197
197 198
198 199 @interfaceutil.implementer(repository.irevisiondelta)
199 200 @attr.s(slots=True)
200 201 class revlogrevisiondelta(object):
201 202 node = attr.ib()
202 203 p1node = attr.ib()
203 204 p2node = attr.ib()
204 205 basenode = attr.ib()
205 206 flags = attr.ib()
206 207 baserevisionsize = attr.ib()
207 208 revision = attr.ib()
208 209 delta = attr.ib()
209 210 sidedata = attr.ib()
210 211 linknode = attr.ib(default=None)
211 212
212 213
213 214 @interfaceutil.implementer(repository.iverifyproblem)
214 215 @attr.s(frozen=True)
215 216 class revlogproblem(object):
216 217 warning = attr.ib(default=None)
217 218 error = attr.ib(default=None)
218 219 node = attr.ib(default=None)
219 220
220 221
221 # index v0:
222 # 4 bytes: offset
223 # 4 bytes: compressed length
224 # 4 bytes: base rev
225 # 4 bytes: link rev
226 # 20 bytes: parent 1 nodeid
227 # 20 bytes: parent 2 nodeid
228 # 20 bytes: nodeid
229 indexformatv0 = struct.Struct(b">4l20s20s20s")
230 indexformatv0_pack = indexformatv0.pack
231 indexformatv0_unpack = indexformatv0.unpack
232
233
234 222 class revlogoldindex(list):
235 223 @property
236 224 def nodemap(self):
237 225 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
238 226 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
239 227 return self._nodemap
240 228
241 229 @util.propertycache
242 230 def _nodemap(self):
243 231 nodemap = nodemaputil.NodeMap({nullid: nullrev})
244 232 for r in range(0, len(self)):
245 233 n = self[r][7]
246 234 nodemap[n] = r
247 235 return nodemap
248 236
249 237 def has_node(self, node):
250 238 """return True if the node exist in the index"""
251 239 return node in self._nodemap
252 240
253 241 def rev(self, node):
254 242 """return a revision for a node
255 243
256 244 If the node is unknown, raise a RevlogError"""
257 245 return self._nodemap[node]
258 246
259 247 def get_rev(self, node):
260 248 """return a revision for a node
261 249
262 250 If the node is unknown, return None"""
263 251 return self._nodemap.get(node)
264 252
265 253 def append(self, tup):
266 254 self._nodemap[tup[7]] = len(self)
267 255 super(revlogoldindex, self).append(tup)
268 256
269 257 def __delitem__(self, i):
270 258 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
271 259 raise ValueError(b"deleting slices only supports a:-1 with step 1")
272 260 for r in pycompat.xrange(i.start, len(self)):
273 261 del self._nodemap[self[r][7]]
274 262 super(revlogoldindex, self).__delitem__(i)
275 263
276 264 def clearcaches(self):
277 265 self.__dict__.pop('_nodemap', None)
278 266
279 267 def __getitem__(self, i):
280 268 if i == -1:
281 269 return (0, 0, 0, -1, -1, -1, -1, nullid)
282 270 return list.__getitem__(self, i)
283 271
284 272
285 273 class revlogoldio(object):
286 274 def __init__(self):
287 self.size = indexformatv0.size
275 self.size = INDEX_ENTRY_V0.size
288 276
289 277 def parseindex(self, data, inline):
290 278 s = self.size
291 279 index = []
292 280 nodemap = nodemaputil.NodeMap({nullid: nullrev})
293 281 n = off = 0
294 282 l = len(data)
295 283 while off + s <= l:
296 284 cur = data[off : off + s]
297 285 off += s
298 e = indexformatv0_unpack(cur)
286 e = INDEX_ENTRY_V0.unpack(cur)
299 287 # transform to revlogv1 format
300 288 e2 = (
301 289 offset_type(e[0], 0),
302 290 e[1],
303 291 -1,
304 292 e[2],
305 293 e[3],
306 294 nodemap.get(e[4], nullrev),
307 295 nodemap.get(e[5], nullrev),
308 296 e[6],
309 297 )
310 298 index.append(e2)
311 299 nodemap[e[6]] = n
312 300 n += 1
313 301
314 302 index = revlogoldindex(index)
315 303 return index, None
316 304
317 305 def packentry(self, entry, node, version, rev):
306 """return the binary representation of an entry
307
308 entry: a tuple containing all the values (see index.__getitem__)
309 node: a callback to convert a revision to nodeid
310 version: the changelog version
311 rev: the revision number
312 """
318 313 if gettype(entry[0]):
319 314 raise error.RevlogError(
320 315 _(b'index entry flags need revlog version 1')
321 316 )
322 317 e2 = (
323 318 getoffset(entry[0]),
324 319 entry[1],
325 320 entry[3],
326 321 entry[4],
327 322 node(entry[5]),
328 323 node(entry[6]),
329 324 entry[7],
330 325 )
331 return indexformatv0_pack(*e2)
326 return INDEX_ENTRY_V0.pack(*e2)
332 327
333 328
334 329 # index ng:
335 330 # 6 bytes: offset
336 331 # 2 bytes: flags
337 332 # 4 bytes: compressed length
338 333 # 4 bytes: uncompressed length
339 334 # 4 bytes: base rev
340 335 # 4 bytes: link rev
341 336 # 4 bytes: parent 1 rev
342 337 # 4 bytes: parent 2 rev
343 338 # 32 bytes: nodeid
344 339 indexformatng = struct.Struct(b">Qiiiiii20s12x")
345 340 indexformatng_pack = indexformatng.pack
346 341 versionformat = struct.Struct(b">I")
347 342 versionformat_pack = versionformat.pack
348 343 versionformat_unpack = versionformat.unpack
349 344
350 345 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
351 346 # signed integer)
352 347 _maxentrysize = 0x7FFFFFFF
353 348
354 349
355 350 class revlogio(object):
356 351 def __init__(self):
357 352 self.size = indexformatng.size
358 353
359 354 def parseindex(self, data, inline):
360 355 # call the C implementation to parse the index data
361 356 index, cache = parsers.parse_index2(data, inline)
362 357 return index, cache
363 358
364 359 def packentry(self, entry, node, version, rev):
365 360 p = indexformatng_pack(*entry)
366 361 if rev == 0:
367 362 p = versionformat_pack(version) + p[4:]
368 363 return p
369 364
370 365
371 366 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
372 367 indexformatv2_pack = indexformatv2.pack
373 368
374 369
375 370 class revlogv2io(object):
376 371 def __init__(self):
377 372 self.size = indexformatv2.size
378 373
379 374 def parseindex(self, data, inline):
380 375 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
381 376 return index, cache
382 377
383 378 def packentry(self, entry, node, version, rev):
384 379 p = indexformatv2_pack(*entry)
385 380 if rev == 0:
386 381 p = versionformat_pack(version) + p[4:]
387 382 return p
388 383
389 384
390 385 NodemapRevlogIO = None
391 386
392 387 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
393 388
394 389 class NodemapRevlogIO(revlogio):
395 390 """A debug oriented IO class that return a PersistentNodeMapIndexObject
396 391
397 392 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
398 393 """
399 394
400 395 def parseindex(self, data, inline):
401 396 index, cache = parsers.parse_index_devel_nodemap(data, inline)
402 397 return index, cache
403 398
404 399
405 400 class rustrevlogio(revlogio):
406 401 def parseindex(self, data, inline):
407 402 index, cache = super(rustrevlogio, self).parseindex(data, inline)
408 403 return rustrevlog.MixedIndex(index), cache
409 404
410 405
411 406 class revlog(object):
412 407 """
413 408 the underlying revision storage object
414 409
415 410 A revlog consists of two parts, an index and the revision data.
416 411
417 412 The index is a file with a fixed record size containing
418 413 information on each revision, including its nodeid (hash), the
419 414 nodeids of its parents, the position and offset of its data within
420 415 the data file, and the revision it's based on. Finally, each entry
421 416 contains a linkrev entry that can serve as a pointer to external
422 417 data.
423 418
424 419 The revision data itself is a linear collection of data chunks.
425 420 Each chunk represents a revision and is usually represented as a
426 421 delta against the previous chunk. To bound lookup time, runs of
427 422 deltas are limited to about 2 times the length of the original
428 423 version data. This makes retrieval of a version proportional to
429 424 its size, or O(1) relative to the number of revisions.
430 425
431 426 Both pieces of the revlog are written to in an append-only
432 427 fashion, which means we never need to rewrite a file to insert or
433 428 remove data, and can use some simple techniques to avoid the need
434 429 for locking while reading.
435 430
436 431 If checkambig, indexfile is opened with checkambig=True at
437 432 writing, to avoid file stat ambiguity.
438 433
439 434 If mmaplargeindex is True, and an mmapindexthreshold is set, the
440 435 index will be mmapped rather than read if it is larger than the
441 436 configured threshold.
442 437
443 438 If censorable is True, the revlog can have censored revisions.
444 439
445 440 If `upperboundcomp` is not None, this is the expected maximal gain from
446 441 compression for the data content.
447 442
448 443 `concurrencychecker` is an optional function that receives 3 arguments: a
449 444 file handle, a filename, and an expected position. It should check whether
450 445 the current position in the file handle is valid, and log/warn/fail (by
451 446 raising).
452 447 """
453 448
454 449 _flagserrorclass = error.RevlogError
455 450
456 451 def __init__(
457 452 self,
458 453 opener,
459 454 indexfile,
460 455 datafile=None,
461 456 checkambig=False,
462 457 mmaplargeindex=False,
463 458 censorable=False,
464 459 upperboundcomp=None,
465 460 persistentnodemap=False,
466 461 concurrencychecker=None,
467 462 ):
468 463 """
469 464 create a revlog object
470 465
471 466 opener is a function that abstracts the file opening operation
472 467 and can be used to implement COW semantics or the like.
473 468
474 469 """
475 470 self.upperboundcomp = upperboundcomp
476 471 self.indexfile = indexfile
477 472 self.datafile = datafile or (indexfile[:-2] + b".d")
478 473 self.nodemap_file = None
479 474 if persistentnodemap:
480 475 self.nodemap_file = nodemaputil.get_nodemap_file(
481 476 opener, self.indexfile
482 477 )
483 478
484 479 self.opener = opener
485 480 # When True, indexfile is opened with checkambig=True at writing, to
486 481 # avoid file stat ambiguity.
487 482 self._checkambig = checkambig
488 483 self._mmaplargeindex = mmaplargeindex
489 484 self._censorable = censorable
490 485 # 3-tuple of (node, rev, text) for a raw revision.
491 486 self._revisioncache = None
492 487 # Maps rev to chain base rev.
493 488 self._chainbasecache = util.lrucachedict(100)
494 489 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
495 490 self._chunkcache = (0, b'')
496 491 # How much data to read and cache into the raw revlog data cache.
497 492 self._chunkcachesize = 65536
498 493 self._maxchainlen = None
499 494 self._deltabothparents = True
500 495 self.index = None
501 496 self._nodemap_docket = None
502 497 # Mapping of partial identifiers to full nodes.
503 498 self._pcache = {}
504 499 # Mapping of revision integer to full node.
505 500 self._compengine = b'zlib'
506 501 self._compengineopts = {}
507 502 self._maxdeltachainspan = -1
508 503 self._withsparseread = False
509 504 self._sparserevlog = False
510 505 self._srdensitythreshold = 0.50
511 506 self._srmingapsize = 262144
512 507
513 508 # Make copy of flag processors so each revlog instance can support
514 509 # custom flags.
515 510 self._flagprocessors = dict(flagutil.flagprocessors)
516 511
517 512 # 2-tuple of file handles being used for active writing.
518 513 self._writinghandles = None
519 514
520 515 self._loadindex()
521 516
522 517 self._concurrencychecker = concurrencychecker
523 518
524 519 def _loadindex(self):
525 520 mmapindexthreshold = None
526 521 opts = self.opener.options
527 522
528 523 if b'revlogv2' in opts:
529 524 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
530 525 elif b'revlogv1' in opts:
531 526 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
532 527 if b'generaldelta' in opts:
533 528 newversionflags |= FLAG_GENERALDELTA
534 529 elif b'revlogv0' in self.opener.options:
535 530 newversionflags = REVLOGV0
536 531 else:
537 532 newversionflags = REVLOG_DEFAULT_VERSION
538 533
539 534 if b'chunkcachesize' in opts:
540 535 self._chunkcachesize = opts[b'chunkcachesize']
541 536 if b'maxchainlen' in opts:
542 537 self._maxchainlen = opts[b'maxchainlen']
543 538 if b'deltabothparents' in opts:
544 539 self._deltabothparents = opts[b'deltabothparents']
545 540 self._lazydelta = bool(opts.get(b'lazydelta', True))
546 541 self._lazydeltabase = False
547 542 if self._lazydelta:
548 543 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
549 544 if b'compengine' in opts:
550 545 self._compengine = opts[b'compengine']
551 546 if b'zlib.level' in opts:
552 547 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
553 548 if b'zstd.level' in opts:
554 549 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
555 550 if b'maxdeltachainspan' in opts:
556 551 self._maxdeltachainspan = opts[b'maxdeltachainspan']
557 552 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
558 553 mmapindexthreshold = opts[b'mmapindexthreshold']
559 554 self.hassidedata = bool(opts.get(b'side-data', False))
560 555 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
561 556 withsparseread = bool(opts.get(b'with-sparse-read', False))
562 557 # sparse-revlog forces sparse-read
563 558 self._withsparseread = self._sparserevlog or withsparseread
564 559 if b'sparse-read-density-threshold' in opts:
565 560 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
566 561 if b'sparse-read-min-gap-size' in opts:
567 562 self._srmingapsize = opts[b'sparse-read-min-gap-size']
568 563 if opts.get(b'enableellipsis'):
569 564 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
570 565
571 566 # revlog v0 doesn't have flag processors
572 567 for flag, processor in pycompat.iteritems(
573 568 opts.get(b'flagprocessors', {})
574 569 ):
575 570 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
576 571
577 572 if self._chunkcachesize <= 0:
578 573 raise error.RevlogError(
579 574 _(b'revlog chunk cache size %r is not greater than 0')
580 575 % self._chunkcachesize
581 576 )
582 577 elif self._chunkcachesize & (self._chunkcachesize - 1):
583 578 raise error.RevlogError(
584 579 _(b'revlog chunk cache size %r is not a power of 2')
585 580 % self._chunkcachesize
586 581 )
587 582
588 583 indexdata = b''
589 584 self._initempty = True
590 585 try:
591 586 with self._indexfp() as f:
592 587 if (
593 588 mmapindexthreshold is not None
594 589 and self.opener.fstat(f).st_size >= mmapindexthreshold
595 590 ):
596 591 # TODO: should .close() to release resources without
597 592 # relying on Python GC
598 593 indexdata = util.buffer(util.mmapread(f))
599 594 else:
600 595 indexdata = f.read()
601 596 if len(indexdata) > 0:
602 597 versionflags = versionformat_unpack(indexdata[:4])[0]
603 598 self._initempty = False
604 599 else:
605 600 versionflags = newversionflags
606 601 except IOError as inst:
607 602 if inst.errno != errno.ENOENT:
608 603 raise
609 604
610 605 versionflags = newversionflags
611 606
612 607 self.version = versionflags
613 608
614 609 flags = versionflags & ~0xFFFF
615 610 fmt = versionflags & 0xFFFF
616 611
617 612 if fmt == REVLOGV0:
618 613 if flags:
619 614 raise error.RevlogError(
620 615 _(b'unknown flags (%#04x) in version %d revlog %s')
621 616 % (flags >> 16, fmt, self.indexfile)
622 617 )
623 618
624 619 self._inline = False
625 620 self._generaldelta = False
626 621
627 622 elif fmt == REVLOGV1:
628 623 if flags & ~REVLOGV1_FLAGS:
629 624 raise error.RevlogError(
630 625 _(b'unknown flags (%#04x) in version %d revlog %s')
631 626 % (flags >> 16, fmt, self.indexfile)
632 627 )
633 628
634 629 self._inline = versionflags & FLAG_INLINE_DATA
635 630 self._generaldelta = versionflags & FLAG_GENERALDELTA
636 631
637 632 elif fmt == REVLOGV2:
638 633 if flags & ~REVLOGV2_FLAGS:
639 634 raise error.RevlogError(
640 635 _(b'unknown flags (%#04x) in version %d revlog %s')
641 636 % (flags >> 16, fmt, self.indexfile)
642 637 )
643 638
644 639 # There is a bug in the transaction handling when going from an
645 640 # inline revlog to a separate index and data file. Turn it off until
646 641 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
647 642 # See issue6485
648 643 self._inline = False
649 644 # generaldelta implied by version 2 revlogs.
650 645 self._generaldelta = True
651 646
652 647 else:
653 648 raise error.RevlogError(
654 649 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
655 650 )
656 651
657 652 self.nodeconstants = sha1nodeconstants
658 653 self.nullid = self.nodeconstants.nullid
659 654
660 655 # sparse-revlog can't be on without general-delta (issue6056)
661 656 if not self._generaldelta:
662 657 self._sparserevlog = False
663 658
664 659 self._storedeltachains = True
665 660
666 661 devel_nodemap = (
667 662 self.nodemap_file
668 663 and opts.get(b'devel-force-nodemap', False)
669 664 and NodemapRevlogIO is not None
670 665 )
671 666
672 667 use_rust_index = False
673 668 if rustrevlog is not None:
674 669 if self.nodemap_file is not None:
675 670 use_rust_index = True
676 671 else:
677 672 use_rust_index = self.opener.options.get(b'rust.index')
678 673
679 674 self._io = revlogio()
680 675 if self.version == REVLOGV0:
681 676 self._io = revlogoldio()
682 677 elif fmt == REVLOGV2:
683 678 self._io = revlogv2io()
684 679 elif devel_nodemap:
685 680 self._io = NodemapRevlogIO()
686 681 elif use_rust_index:
687 682 self._io = rustrevlogio()
688 683 try:
689 684 d = self._io.parseindex(indexdata, self._inline)
690 685 index, _chunkcache = d
691 686 use_nodemap = (
692 687 not self._inline
693 688 and self.nodemap_file is not None
694 689 and util.safehasattr(index, 'update_nodemap_data')
695 690 )
696 691 if use_nodemap:
697 692 nodemap_data = nodemaputil.persisted_data(self)
698 693 if nodemap_data is not None:
699 694 docket = nodemap_data[0]
700 695 if (
701 696 len(d[0]) > docket.tip_rev
702 697 and d[0][docket.tip_rev][7] == docket.tip_node
703 698 ):
704 699 # no changelog tampering
705 700 self._nodemap_docket = docket
706 701 index.update_nodemap_data(*nodemap_data)
707 702 except (ValueError, IndexError):
708 703 raise error.RevlogError(
709 704 _(b"index %s is corrupted") % self.indexfile
710 705 )
711 706 self.index, self._chunkcache = d
712 707 if not self._chunkcache:
713 708 self._chunkclear()
714 709 # revnum -> (chain-length, sum-delta-length)
715 710 self._chaininfocache = util.lrucachedict(500)
716 711 # revlog header -> revlog compressor
717 712 self._decompressors = {}
718 713
719 714 @util.propertycache
720 715 def _compressor(self):
721 716 engine = util.compengines[self._compengine]
722 717 return engine.revlogcompressor(self._compengineopts)
723 718
724 719 def _indexfp(self, mode=b'r'):
725 720 """file object for the revlog's index file"""
726 721 args = {'mode': mode}
727 722 if mode != b'r':
728 723 args['checkambig'] = self._checkambig
729 724 if mode == b'w':
730 725 args['atomictemp'] = True
731 726 return self.opener(self.indexfile, **args)
732 727
733 728 def _datafp(self, mode=b'r'):
734 729 """file object for the revlog's data file"""
735 730 return self.opener(self.datafile, mode=mode)
736 731
737 732 @contextlib.contextmanager
738 733 def _datareadfp(self, existingfp=None):
739 734 """file object suitable to read data"""
740 735 # Use explicit file handle, if given.
741 736 if existingfp is not None:
742 737 yield existingfp
743 738
744 739 # Use a file handle being actively used for writes, if available.
745 740 # There is some danger to doing this because reads will seek the
746 741 # file. However, _writeentry() performs a SEEK_END before all writes,
747 742 # so we should be safe.
748 743 elif self._writinghandles:
749 744 if self._inline:
750 745 yield self._writinghandles[0]
751 746 else:
752 747 yield self._writinghandles[1]
753 748
754 749 # Otherwise open a new file handle.
755 750 else:
756 751 if self._inline:
757 752 func = self._indexfp
758 753 else:
759 754 func = self._datafp
760 755 with func() as fp:
761 756 yield fp
762 757
763 758 def tiprev(self):
764 759 return len(self.index) - 1
765 760
766 761 def tip(self):
767 762 return self.node(self.tiprev())
768 763
769 764 def __contains__(self, rev):
770 765 return 0 <= rev < len(self)
771 766
772 767 def __len__(self):
773 768 return len(self.index)
774 769
775 770 def __iter__(self):
776 771 return iter(pycompat.xrange(len(self)))
777 772
778 773 def revs(self, start=0, stop=None):
779 774 """iterate over all rev in this revlog (from start to stop)"""
780 775 return storageutil.iterrevs(len(self), start=start, stop=stop)
781 776
782 777 @property
783 778 def nodemap(self):
784 779 msg = (
785 780 b"revlog.nodemap is deprecated, "
786 781 b"use revlog.index.[has_node|rev|get_rev]"
787 782 )
788 783 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 784 return self.index.nodemap
790 785
791 786 @property
792 787 def _nodecache(self):
793 788 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
794 789 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
795 790 return self.index.nodemap
796 791
797 792 def hasnode(self, node):
798 793 try:
799 794 self.rev(node)
800 795 return True
801 796 except KeyError:
802 797 return False
803 798
804 799 def candelta(self, baserev, rev):
805 800 """whether two revisions (baserev, rev) can be delta-ed or not"""
806 801 # Disable delta if either rev requires a content-changing flag
807 802 # processor (ex. LFS). This is because such flag processor can alter
808 803 # the rawtext content that the delta will be based on, and two clients
809 804 # could have a same revlog node with different flags (i.e. different
810 805 # rawtext contents) and the delta could be incompatible.
811 806 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
812 807 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
813 808 ):
814 809 return False
815 810 return True
816 811
817 812 def update_caches(self, transaction):
818 813 if self.nodemap_file is not None:
819 814 if transaction is None:
820 815 nodemaputil.update_persistent_nodemap(self)
821 816 else:
822 817 nodemaputil.setup_persistent_nodemap(transaction, self)
823 818
824 819 def clearcaches(self):
825 820 self._revisioncache = None
826 821 self._chainbasecache.clear()
827 822 self._chunkcache = (0, b'')
828 823 self._pcache = {}
829 824 self._nodemap_docket = None
830 825 self.index.clearcaches()
831 826 # The python code is the one responsible for validating the docket, we
832 827 # end up having to refresh it here.
833 828 use_nodemap = (
834 829 not self._inline
835 830 and self.nodemap_file is not None
836 831 and util.safehasattr(self.index, 'update_nodemap_data')
837 832 )
838 833 if use_nodemap:
839 834 nodemap_data = nodemaputil.persisted_data(self)
840 835 if nodemap_data is not None:
841 836 self._nodemap_docket = nodemap_data[0]
842 837 self.index.update_nodemap_data(*nodemap_data)
843 838
844 839 def rev(self, node):
845 840 try:
846 841 return self.index.rev(node)
847 842 except TypeError:
848 843 raise
849 844 except error.RevlogError:
850 845 # parsers.c radix tree lookup failed
851 846 if node == wdirid or node in wdirfilenodeids:
852 847 raise error.WdirUnsupported
853 848 raise error.LookupError(node, self.indexfile, _(b'no node'))
854 849
855 850 # Accessors for index entries.
856 851
857 852 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
858 853 # are flags.
859 854 def start(self, rev):
860 855 return int(self.index[rev][0] >> 16)
861 856
862 857 def flags(self, rev):
863 858 return self.index[rev][0] & 0xFFFF
864 859
865 860 def length(self, rev):
866 861 return self.index[rev][1]
867 862
868 863 def sidedata_length(self, rev):
869 864 if self.version & 0xFFFF != REVLOGV2:
870 865 return 0
871 866 return self.index[rev][9]
872 867
873 868 def rawsize(self, rev):
874 869 """return the length of the uncompressed text for a given revision"""
875 870 l = self.index[rev][2]
876 871 if l >= 0:
877 872 return l
878 873
879 874 t = self.rawdata(rev)
880 875 return len(t)
881 876
882 877 def size(self, rev):
883 878 """length of non-raw text (processed by a "read" flag processor)"""
884 879 # fast path: if no "read" flag processor could change the content,
885 880 # size is rawsize. note: ELLIPSIS is known to not change the content.
886 881 flags = self.flags(rev)
887 882 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
888 883 return self.rawsize(rev)
889 884
890 885 return len(self.revision(rev, raw=False))
891 886
892 887 def chainbase(self, rev):
893 888 base = self._chainbasecache.get(rev)
894 889 if base is not None:
895 890 return base
896 891
897 892 index = self.index
898 893 iterrev = rev
899 894 base = index[iterrev][3]
900 895 while base != iterrev:
901 896 iterrev = base
902 897 base = index[iterrev][3]
903 898
904 899 self._chainbasecache[rev] = base
905 900 return base
906 901
907 902 def linkrev(self, rev):
908 903 return self.index[rev][4]
909 904
910 905 def parentrevs(self, rev):
911 906 try:
912 907 entry = self.index[rev]
913 908 except IndexError:
914 909 if rev == wdirrev:
915 910 raise error.WdirUnsupported
916 911 raise
917 912 if entry[5] == nullrev:
918 913 return entry[6], entry[5]
919 914 else:
920 915 return entry[5], entry[6]
921 916
922 917 # fast parentrevs(rev) where rev isn't filtered
923 918 _uncheckedparentrevs = parentrevs
924 919
925 920 def node(self, rev):
926 921 try:
927 922 return self.index[rev][7]
928 923 except IndexError:
929 924 if rev == wdirrev:
930 925 raise error.WdirUnsupported
931 926 raise
932 927
933 928 # Derived from index values.
934 929
935 930 def end(self, rev):
936 931 return self.start(rev) + self.length(rev)
937 932
938 933 def parents(self, node):
939 934 i = self.index
940 935 d = i[self.rev(node)]
941 936 # inline node() to avoid function call overhead
942 937 if d[5] == nullid:
943 938 return i[d[6]][7], i[d[5]][7]
944 939 else:
945 940 return i[d[5]][7], i[d[6]][7]
946 941
947 942 def chainlen(self, rev):
948 943 return self._chaininfo(rev)[0]
949 944
950 945 def _chaininfo(self, rev):
951 946 chaininfocache = self._chaininfocache
952 947 if rev in chaininfocache:
953 948 return chaininfocache[rev]
954 949 index = self.index
955 950 generaldelta = self._generaldelta
956 951 iterrev = rev
957 952 e = index[iterrev]
958 953 clen = 0
959 954 compresseddeltalen = 0
960 955 while iterrev != e[3]:
961 956 clen += 1
962 957 compresseddeltalen += e[1]
963 958 if generaldelta:
964 959 iterrev = e[3]
965 960 else:
966 961 iterrev -= 1
967 962 if iterrev in chaininfocache:
968 963 t = chaininfocache[iterrev]
969 964 clen += t[0]
970 965 compresseddeltalen += t[1]
971 966 break
972 967 e = index[iterrev]
973 968 else:
974 969 # Add text length of base since decompressing that also takes
975 970 # work. For cache hits the length is already included.
976 971 compresseddeltalen += e[1]
977 972 r = (clen, compresseddeltalen)
978 973 chaininfocache[rev] = r
979 974 return r
980 975
981 976 def _deltachain(self, rev, stoprev=None):
982 977 """Obtain the delta chain for a revision.
983 978
984 979 ``stoprev`` specifies a revision to stop at. If not specified, we
985 980 stop at the base of the chain.
986 981
987 982 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
988 983 revs in ascending order and ``stopped`` is a bool indicating whether
989 984 ``stoprev`` was hit.
990 985 """
991 986 # Try C implementation.
992 987 try:
993 988 return self.index.deltachain(rev, stoprev, self._generaldelta)
994 989 except AttributeError:
995 990 pass
996 991
997 992 chain = []
998 993
999 994 # Alias to prevent attribute lookup in tight loop.
1000 995 index = self.index
1001 996 generaldelta = self._generaldelta
1002 997
1003 998 iterrev = rev
1004 999 e = index[iterrev]
1005 1000 while iterrev != e[3] and iterrev != stoprev:
1006 1001 chain.append(iterrev)
1007 1002 if generaldelta:
1008 1003 iterrev = e[3]
1009 1004 else:
1010 1005 iterrev -= 1
1011 1006 e = index[iterrev]
1012 1007
1013 1008 if iterrev == stoprev:
1014 1009 stopped = True
1015 1010 else:
1016 1011 chain.append(iterrev)
1017 1012 stopped = False
1018 1013
1019 1014 chain.reverse()
1020 1015 return chain, stopped
1021 1016
1022 1017 def ancestors(self, revs, stoprev=0, inclusive=False):
1023 1018 """Generate the ancestors of 'revs' in reverse revision order.
1024 1019 Does not generate revs lower than stoprev.
1025 1020
1026 1021 See the documentation for ancestor.lazyancestors for more details."""
1027 1022
1028 1023 # first, make sure start revisions aren't filtered
1029 1024 revs = list(revs)
1030 1025 checkrev = self.node
1031 1026 for r in revs:
1032 1027 checkrev(r)
1033 1028 # and we're sure ancestors aren't filtered as well
1034 1029
1035 1030 if rustancestor is not None:
1036 1031 lazyancestors = rustancestor.LazyAncestors
1037 1032 arg = self.index
1038 1033 else:
1039 1034 lazyancestors = ancestor.lazyancestors
1040 1035 arg = self._uncheckedparentrevs
1041 1036 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1042 1037
1043 1038 def descendants(self, revs):
1044 1039 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1045 1040
1046 1041 def findcommonmissing(self, common=None, heads=None):
1047 1042 """Return a tuple of the ancestors of common and the ancestors of heads
1048 1043 that are not ancestors of common. In revset terminology, we return the
1049 1044 tuple:
1050 1045
1051 1046 ::common, (::heads) - (::common)
1052 1047
1053 1048 The list is sorted by revision number, meaning it is
1054 1049 topologically sorted.
1055 1050
1056 1051 'heads' and 'common' are both lists of node IDs. If heads is
1057 1052 not supplied, uses all of the revlog's heads. If common is not
1058 1053 supplied, uses nullid."""
1059 1054 if common is None:
1060 1055 common = [nullid]
1061 1056 if heads is None:
1062 1057 heads = self.heads()
1063 1058
1064 1059 common = [self.rev(n) for n in common]
1065 1060 heads = [self.rev(n) for n in heads]
1066 1061
1067 1062 # we want the ancestors, but inclusive
1068 1063 class lazyset(object):
1069 1064 def __init__(self, lazyvalues):
1070 1065 self.addedvalues = set()
1071 1066 self.lazyvalues = lazyvalues
1072 1067
1073 1068 def __contains__(self, value):
1074 1069 return value in self.addedvalues or value in self.lazyvalues
1075 1070
1076 1071 def __iter__(self):
1077 1072 added = self.addedvalues
1078 1073 for r in added:
1079 1074 yield r
1080 1075 for r in self.lazyvalues:
1081 1076 if not r in added:
1082 1077 yield r
1083 1078
1084 1079 def add(self, value):
1085 1080 self.addedvalues.add(value)
1086 1081
1087 1082 def update(self, values):
1088 1083 self.addedvalues.update(values)
1089 1084
1090 1085 has = lazyset(self.ancestors(common))
1091 1086 has.add(nullrev)
1092 1087 has.update(common)
1093 1088
1094 1089 # take all ancestors from heads that aren't in has
1095 1090 missing = set()
1096 1091 visit = collections.deque(r for r in heads if r not in has)
1097 1092 while visit:
1098 1093 r = visit.popleft()
1099 1094 if r in missing:
1100 1095 continue
1101 1096 else:
1102 1097 missing.add(r)
1103 1098 for p in self.parentrevs(r):
1104 1099 if p not in has:
1105 1100 visit.append(p)
1106 1101 missing = list(missing)
1107 1102 missing.sort()
1108 1103 return has, [self.node(miss) for miss in missing]
1109 1104
1110 1105 def incrementalmissingrevs(self, common=None):
1111 1106 """Return an object that can be used to incrementally compute the
1112 1107 revision numbers of the ancestors of arbitrary sets that are not
1113 1108 ancestors of common. This is an ancestor.incrementalmissingancestors
1114 1109 object.
1115 1110
1116 1111 'common' is a list of revision numbers. If common is not supplied, uses
1117 1112 nullrev.
1118 1113 """
1119 1114 if common is None:
1120 1115 common = [nullrev]
1121 1116
1122 1117 if rustancestor is not None:
1123 1118 return rustancestor.MissingAncestors(self.index, common)
1124 1119 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1125 1120
1126 1121 def findmissingrevs(self, common=None, heads=None):
1127 1122 """Return the revision numbers of the ancestors of heads that
1128 1123 are not ancestors of common.
1129 1124
1130 1125 More specifically, return a list of revision numbers corresponding to
1131 1126 nodes N such that every N satisfies the following constraints:
1132 1127
1133 1128 1. N is an ancestor of some node in 'heads'
1134 1129 2. N is not an ancestor of any node in 'common'
1135 1130
1136 1131 The list is sorted by revision number, meaning it is
1137 1132 topologically sorted.
1138 1133
1139 1134 'heads' and 'common' are both lists of revision numbers. If heads is
1140 1135 not supplied, uses all of the revlog's heads. If common is not
1141 1136 supplied, uses nullid."""
1142 1137 if common is None:
1143 1138 common = [nullrev]
1144 1139 if heads is None:
1145 1140 heads = self.headrevs()
1146 1141
1147 1142 inc = self.incrementalmissingrevs(common=common)
1148 1143 return inc.missingancestors(heads)
1149 1144
1150 1145 def findmissing(self, common=None, heads=None):
1151 1146 """Return the ancestors of heads that are not ancestors of common.
1152 1147
1153 1148 More specifically, return a list of nodes N such that every N
1154 1149 satisfies the following constraints:
1155 1150
1156 1151 1. N is an ancestor of some node in 'heads'
1157 1152 2. N is not an ancestor of any node in 'common'
1158 1153
1159 1154 The list is sorted by revision number, meaning it is
1160 1155 topologically sorted.
1161 1156
1162 1157 'heads' and 'common' are both lists of node IDs. If heads is
1163 1158 not supplied, uses all of the revlog's heads. If common is not
1164 1159 supplied, uses nullid."""
1165 1160 if common is None:
1166 1161 common = [nullid]
1167 1162 if heads is None:
1168 1163 heads = self.heads()
1169 1164
1170 1165 common = [self.rev(n) for n in common]
1171 1166 heads = [self.rev(n) for n in heads]
1172 1167
1173 1168 inc = self.incrementalmissingrevs(common=common)
1174 1169 return [self.node(r) for r in inc.missingancestors(heads)]
1175 1170
1176 1171 def nodesbetween(self, roots=None, heads=None):
1177 1172 """Return a topological path from 'roots' to 'heads'.
1178 1173
1179 1174 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1180 1175 topologically sorted list of all nodes N that satisfy both of
1181 1176 these constraints:
1182 1177
1183 1178 1. N is a descendant of some node in 'roots'
1184 1179 2. N is an ancestor of some node in 'heads'
1185 1180
1186 1181 Every node is considered to be both a descendant and an ancestor
1187 1182 of itself, so every reachable node in 'roots' and 'heads' will be
1188 1183 included in 'nodes'.
1189 1184
1190 1185 'outroots' is the list of reachable nodes in 'roots', i.e., the
1191 1186 subset of 'roots' that is returned in 'nodes'. Likewise,
1192 1187 'outheads' is the subset of 'heads' that is also in 'nodes'.
1193 1188
1194 1189 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1195 1190 unspecified, uses nullid as the only root. If 'heads' is
1196 1191 unspecified, uses list of all of the revlog's heads."""
1197 1192 nonodes = ([], [], [])
1198 1193 if roots is not None:
1199 1194 roots = list(roots)
1200 1195 if not roots:
1201 1196 return nonodes
1202 1197 lowestrev = min([self.rev(n) for n in roots])
1203 1198 else:
1204 1199 roots = [nullid] # Everybody's a descendant of nullid
1205 1200 lowestrev = nullrev
1206 1201 if (lowestrev == nullrev) and (heads is None):
1207 1202 # We want _all_ the nodes!
1208 1203 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1209 1204 if heads is None:
1210 1205 # All nodes are ancestors, so the latest ancestor is the last
1211 1206 # node.
1212 1207 highestrev = len(self) - 1
1213 1208 # Set ancestors to None to signal that every node is an ancestor.
1214 1209 ancestors = None
1215 1210 # Set heads to an empty dictionary for later discovery of heads
1216 1211 heads = {}
1217 1212 else:
1218 1213 heads = list(heads)
1219 1214 if not heads:
1220 1215 return nonodes
1221 1216 ancestors = set()
1222 1217 # Turn heads into a dictionary so we can remove 'fake' heads.
1223 1218 # Also, later we will be using it to filter out the heads we can't
1224 1219 # find from roots.
1225 1220 heads = dict.fromkeys(heads, False)
1226 1221 # Start at the top and keep marking parents until we're done.
1227 1222 nodestotag = set(heads)
1228 1223 # Remember where the top was so we can use it as a limit later.
1229 1224 highestrev = max([self.rev(n) for n in nodestotag])
1230 1225 while nodestotag:
1231 1226 # grab a node to tag
1232 1227 n = nodestotag.pop()
1233 1228 # Never tag nullid
1234 1229 if n == nullid:
1235 1230 continue
1236 1231 # A node's revision number represents its place in a
1237 1232 # topologically sorted list of nodes.
1238 1233 r = self.rev(n)
1239 1234 if r >= lowestrev:
1240 1235 if n not in ancestors:
1241 1236 # If we are possibly a descendant of one of the roots
1242 1237 # and we haven't already been marked as an ancestor
1243 1238 ancestors.add(n) # Mark as ancestor
1244 1239 # Add non-nullid parents to list of nodes to tag.
1245 1240 nodestotag.update(
1246 1241 [p for p in self.parents(n) if p != nullid]
1247 1242 )
1248 1243 elif n in heads: # We've seen it before, is it a fake head?
1249 1244 # So it is, real heads should not be the ancestors of
1250 1245 # any other heads.
1251 1246 heads.pop(n)
1252 1247 if not ancestors:
1253 1248 return nonodes
1254 1249 # Now that we have our set of ancestors, we want to remove any
1255 1250 # roots that are not ancestors.
1256 1251
1257 1252 # If one of the roots was nullid, everything is included anyway.
1258 1253 if lowestrev > nullrev:
1259 1254 # But, since we weren't, let's recompute the lowest rev to not
1260 1255 # include roots that aren't ancestors.
1261 1256
1262 1257 # Filter out roots that aren't ancestors of heads
1263 1258 roots = [root for root in roots if root in ancestors]
1264 1259 # Recompute the lowest revision
1265 1260 if roots:
1266 1261 lowestrev = min([self.rev(root) for root in roots])
1267 1262 else:
1268 1263 # No more roots? Return empty list
1269 1264 return nonodes
1270 1265 else:
1271 1266 # We are descending from nullid, and don't need to care about
1272 1267 # any other roots.
1273 1268 lowestrev = nullrev
1274 1269 roots = [nullid]
1275 1270 # Transform our roots list into a set.
1276 1271 descendants = set(roots)
1277 1272 # Also, keep the original roots so we can filter out roots that aren't
1278 1273 # 'real' roots (i.e. are descended from other roots).
1279 1274 roots = descendants.copy()
1280 1275 # Our topologically sorted list of output nodes.
1281 1276 orderedout = []
1282 1277 # Don't start at nullid since we don't want nullid in our output list,
1283 1278 # and if nullid shows up in descendants, empty parents will look like
1284 1279 # they're descendants.
1285 1280 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1286 1281 n = self.node(r)
1287 1282 isdescendant = False
1288 1283 if lowestrev == nullrev: # Everybody is a descendant of nullid
1289 1284 isdescendant = True
1290 1285 elif n in descendants:
1291 1286 # n is already a descendant
1292 1287 isdescendant = True
1293 1288 # This check only needs to be done here because all the roots
1294 1289 # will start being marked is descendants before the loop.
1295 1290 if n in roots:
1296 1291 # If n was a root, check if it's a 'real' root.
1297 1292 p = tuple(self.parents(n))
1298 1293 # If any of its parents are descendants, it's not a root.
1299 1294 if (p[0] in descendants) or (p[1] in descendants):
1300 1295 roots.remove(n)
1301 1296 else:
1302 1297 p = tuple(self.parents(n))
1303 1298 # A node is a descendant if either of its parents are
1304 1299 # descendants. (We seeded the dependents list with the roots
1305 1300 # up there, remember?)
1306 1301 if (p[0] in descendants) or (p[1] in descendants):
1307 1302 descendants.add(n)
1308 1303 isdescendant = True
1309 1304 if isdescendant and ((ancestors is None) or (n in ancestors)):
1310 1305 # Only include nodes that are both descendants and ancestors.
1311 1306 orderedout.append(n)
1312 1307 if (ancestors is not None) and (n in heads):
1313 1308 # We're trying to figure out which heads are reachable
1314 1309 # from roots.
1315 1310 # Mark this head as having been reached
1316 1311 heads[n] = True
1317 1312 elif ancestors is None:
1318 1313 # Otherwise, we're trying to discover the heads.
1319 1314 # Assume this is a head because if it isn't, the next step
1320 1315 # will eventually remove it.
1321 1316 heads[n] = True
1322 1317 # But, obviously its parents aren't.
1323 1318 for p in self.parents(n):
1324 1319 heads.pop(p, None)
1325 1320 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1326 1321 roots = list(roots)
1327 1322 assert orderedout
1328 1323 assert roots
1329 1324 assert heads
1330 1325 return (orderedout, roots, heads)
1331 1326
1332 1327 def headrevs(self, revs=None):
1333 1328 if revs is None:
1334 1329 try:
1335 1330 return self.index.headrevs()
1336 1331 except AttributeError:
1337 1332 return self._headrevs()
1338 1333 if rustdagop is not None:
1339 1334 return rustdagop.headrevs(self.index, revs)
1340 1335 return dagop.headrevs(revs, self._uncheckedparentrevs)
1341 1336
1342 1337 def computephases(self, roots):
1343 1338 return self.index.computephasesmapsets(roots)
1344 1339
1345 1340 def _headrevs(self):
1346 1341 count = len(self)
1347 1342 if not count:
1348 1343 return [nullrev]
1349 1344 # we won't iter over filtered rev so nobody is a head at start
1350 1345 ishead = [0] * (count + 1)
1351 1346 index = self.index
1352 1347 for r in self:
1353 1348 ishead[r] = 1 # I may be an head
1354 1349 e = index[r]
1355 1350 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1356 1351 return [r for r, val in enumerate(ishead) if val]
1357 1352
1358 1353 def heads(self, start=None, stop=None):
1359 1354 """return the list of all nodes that have no children
1360 1355
1361 1356 if start is specified, only heads that are descendants of
1362 1357 start will be returned
1363 1358 if stop is specified, it will consider all the revs from stop
1364 1359 as if they had no children
1365 1360 """
1366 1361 if start is None and stop is None:
1367 1362 if not len(self):
1368 1363 return [nullid]
1369 1364 return [self.node(r) for r in self.headrevs()]
1370 1365
1371 1366 if start is None:
1372 1367 start = nullrev
1373 1368 else:
1374 1369 start = self.rev(start)
1375 1370
1376 1371 stoprevs = {self.rev(n) for n in stop or []}
1377 1372
1378 1373 revs = dagop.headrevssubset(
1379 1374 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1380 1375 )
1381 1376
1382 1377 return [self.node(rev) for rev in revs]
1383 1378
1384 1379 def children(self, node):
1385 1380 """find the children of a given node"""
1386 1381 c = []
1387 1382 p = self.rev(node)
1388 1383 for r in self.revs(start=p + 1):
1389 1384 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1390 1385 if prevs:
1391 1386 for pr in prevs:
1392 1387 if pr == p:
1393 1388 c.append(self.node(r))
1394 1389 elif p == nullrev:
1395 1390 c.append(self.node(r))
1396 1391 return c
1397 1392
1398 1393 def commonancestorsheads(self, a, b):
1399 1394 """calculate all the heads of the common ancestors of nodes a and b"""
1400 1395 a, b = self.rev(a), self.rev(b)
1401 1396 ancs = self._commonancestorsheads(a, b)
1402 1397 return pycompat.maplist(self.node, ancs)
1403 1398
1404 1399 def _commonancestorsheads(self, *revs):
1405 1400 """calculate all the heads of the common ancestors of revs"""
1406 1401 try:
1407 1402 ancs = self.index.commonancestorsheads(*revs)
1408 1403 except (AttributeError, OverflowError): # C implementation failed
1409 1404 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1410 1405 return ancs
1411 1406
1412 1407 def isancestor(self, a, b):
1413 1408 """return True if node a is an ancestor of node b
1414 1409
1415 1410 A revision is considered an ancestor of itself."""
1416 1411 a, b = self.rev(a), self.rev(b)
1417 1412 return self.isancestorrev(a, b)
1418 1413
1419 1414 def isancestorrev(self, a, b):
1420 1415 """return True if revision a is an ancestor of revision b
1421 1416
1422 1417 A revision is considered an ancestor of itself.
1423 1418
1424 1419 The implementation of this is trivial but the use of
1425 1420 reachableroots is not."""
1426 1421 if a == nullrev:
1427 1422 return True
1428 1423 elif a == b:
1429 1424 return True
1430 1425 elif a > b:
1431 1426 return False
1432 1427 return bool(self.reachableroots(a, [b], [a], includepath=False))
1433 1428
1434 1429 def reachableroots(self, minroot, heads, roots, includepath=False):
1435 1430 """return (heads(::(<roots> and <roots>::<heads>)))
1436 1431
1437 1432 If includepath is True, return (<roots>::<heads>)."""
1438 1433 try:
1439 1434 return self.index.reachableroots2(
1440 1435 minroot, heads, roots, includepath
1441 1436 )
1442 1437 except AttributeError:
1443 1438 return dagop._reachablerootspure(
1444 1439 self.parentrevs, minroot, roots, heads, includepath
1445 1440 )
1446 1441
1447 1442 def ancestor(self, a, b):
1448 1443 """calculate the "best" common ancestor of nodes a and b"""
1449 1444
1450 1445 a, b = self.rev(a), self.rev(b)
1451 1446 try:
1452 1447 ancs = self.index.ancestors(a, b)
1453 1448 except (AttributeError, OverflowError):
1454 1449 ancs = ancestor.ancestors(self.parentrevs, a, b)
1455 1450 if ancs:
1456 1451 # choose a consistent winner when there's a tie
1457 1452 return min(map(self.node, ancs))
1458 1453 return nullid
1459 1454
1460 1455 def _match(self, id):
1461 1456 if isinstance(id, int):
1462 1457 # rev
1463 1458 return self.node(id)
1464 1459 if len(id) == 20:
1465 1460 # possibly a binary node
1466 1461 # odds of a binary node being all hex in ASCII are 1 in 10**25
1467 1462 try:
1468 1463 node = id
1469 1464 self.rev(node) # quick search the index
1470 1465 return node
1471 1466 except error.LookupError:
1472 1467 pass # may be partial hex id
1473 1468 try:
1474 1469 # str(rev)
1475 1470 rev = int(id)
1476 1471 if b"%d" % rev != id:
1477 1472 raise ValueError
1478 1473 if rev < 0:
1479 1474 rev = len(self) + rev
1480 1475 if rev < 0 or rev >= len(self):
1481 1476 raise ValueError
1482 1477 return self.node(rev)
1483 1478 except (ValueError, OverflowError):
1484 1479 pass
1485 1480 if len(id) == 40:
1486 1481 try:
1487 1482 # a full hex nodeid?
1488 1483 node = bin(id)
1489 1484 self.rev(node)
1490 1485 return node
1491 1486 except (TypeError, error.LookupError):
1492 1487 pass
1493 1488
1494 1489 def _partialmatch(self, id):
1495 1490 # we don't care wdirfilenodeids as they should be always full hash
1496 1491 maybewdir = wdirhex.startswith(id)
1497 1492 try:
1498 1493 partial = self.index.partialmatch(id)
1499 1494 if partial and self.hasnode(partial):
1500 1495 if maybewdir:
1501 1496 # single 'ff...' match in radix tree, ambiguous with wdir
1502 1497 raise error.RevlogError
1503 1498 return partial
1504 1499 if maybewdir:
1505 1500 # no 'ff...' match in radix tree, wdir identified
1506 1501 raise error.WdirUnsupported
1507 1502 return None
1508 1503 except error.RevlogError:
1509 1504 # parsers.c radix tree lookup gave multiple matches
1510 1505 # fast path: for unfiltered changelog, radix tree is accurate
1511 1506 if not getattr(self, 'filteredrevs', None):
1512 1507 raise error.AmbiguousPrefixLookupError(
1513 1508 id, self.indexfile, _(b'ambiguous identifier')
1514 1509 )
1515 1510 # fall through to slow path that filters hidden revisions
1516 1511 except (AttributeError, ValueError):
1517 1512 # we are pure python, or key was too short to search radix tree
1518 1513 pass
1519 1514
1520 1515 if id in self._pcache:
1521 1516 return self._pcache[id]
1522 1517
1523 1518 if len(id) <= 40:
1524 1519 try:
1525 1520 # hex(node)[:...]
1526 1521 l = len(id) // 2 # grab an even number of digits
1527 1522 prefix = bin(id[: l * 2])
1528 1523 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1529 1524 nl = [
1530 1525 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1531 1526 ]
1532 1527 if nullhex.startswith(id):
1533 1528 nl.append(nullid)
1534 1529 if len(nl) > 0:
1535 1530 if len(nl) == 1 and not maybewdir:
1536 1531 self._pcache[id] = nl[0]
1537 1532 return nl[0]
1538 1533 raise error.AmbiguousPrefixLookupError(
1539 1534 id, self.indexfile, _(b'ambiguous identifier')
1540 1535 )
1541 1536 if maybewdir:
1542 1537 raise error.WdirUnsupported
1543 1538 return None
1544 1539 except TypeError:
1545 1540 pass
1546 1541
1547 1542 def lookup(self, id):
1548 1543 """locate a node based on:
1549 1544 - revision number or str(revision number)
1550 1545 - nodeid or subset of hex nodeid
1551 1546 """
1552 1547 n = self._match(id)
1553 1548 if n is not None:
1554 1549 return n
1555 1550 n = self._partialmatch(id)
1556 1551 if n:
1557 1552 return n
1558 1553
1559 1554 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1560 1555
1561 1556 def shortest(self, node, minlength=1):
1562 1557 """Find the shortest unambiguous prefix that matches node."""
1563 1558
1564 1559 def isvalid(prefix):
1565 1560 try:
1566 1561 matchednode = self._partialmatch(prefix)
1567 1562 except error.AmbiguousPrefixLookupError:
1568 1563 return False
1569 1564 except error.WdirUnsupported:
1570 1565 # single 'ff...' match
1571 1566 return True
1572 1567 if matchednode is None:
1573 1568 raise error.LookupError(node, self.indexfile, _(b'no node'))
1574 1569 return True
1575 1570
1576 1571 def maybewdir(prefix):
1577 1572 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1578 1573
1579 1574 hexnode = hex(node)
1580 1575
1581 1576 def disambiguate(hexnode, minlength):
1582 1577 """Disambiguate against wdirid."""
1583 1578 for length in range(minlength, len(hexnode) + 1):
1584 1579 prefix = hexnode[:length]
1585 1580 if not maybewdir(prefix):
1586 1581 return prefix
1587 1582
1588 1583 if not getattr(self, 'filteredrevs', None):
1589 1584 try:
1590 1585 length = max(self.index.shortest(node), minlength)
1591 1586 return disambiguate(hexnode, length)
1592 1587 except error.RevlogError:
1593 1588 if node != wdirid:
1594 1589 raise error.LookupError(node, self.indexfile, _(b'no node'))
1595 1590 except AttributeError:
1596 1591 # Fall through to pure code
1597 1592 pass
1598 1593
1599 1594 if node == wdirid:
1600 1595 for length in range(minlength, len(hexnode) + 1):
1601 1596 prefix = hexnode[:length]
1602 1597 if isvalid(prefix):
1603 1598 return prefix
1604 1599
1605 1600 for length in range(minlength, len(hexnode) + 1):
1606 1601 prefix = hexnode[:length]
1607 1602 if isvalid(prefix):
1608 1603 return disambiguate(hexnode, length)
1609 1604
1610 1605 def cmp(self, node, text):
1611 1606 """compare text with a given file revision
1612 1607
1613 1608 returns True if text is different than what is stored.
1614 1609 """
1615 1610 p1, p2 = self.parents(node)
1616 1611 return storageutil.hashrevisionsha1(text, p1, p2) != node
1617 1612
1618 1613 def _cachesegment(self, offset, data):
1619 1614 """Add a segment to the revlog cache.
1620 1615
1621 1616 Accepts an absolute offset and the data that is at that location.
1622 1617 """
1623 1618 o, d = self._chunkcache
1624 1619 # try to add to existing cache
1625 1620 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1626 1621 self._chunkcache = o, d + data
1627 1622 else:
1628 1623 self._chunkcache = offset, data
1629 1624
1630 1625 def _readsegment(self, offset, length, df=None):
1631 1626 """Load a segment of raw data from the revlog.
1632 1627
1633 1628 Accepts an absolute offset, length to read, and an optional existing
1634 1629 file handle to read from.
1635 1630
1636 1631 If an existing file handle is passed, it will be seeked and the
1637 1632 original seek position will NOT be restored.
1638 1633
1639 1634 Returns a str or buffer of raw byte data.
1640 1635
1641 1636 Raises if the requested number of bytes could not be read.
1642 1637 """
1643 1638 # Cache data both forward and backward around the requested
1644 1639 # data, in a fixed size window. This helps speed up operations
1645 1640 # involving reading the revlog backwards.
1646 1641 cachesize = self._chunkcachesize
1647 1642 realoffset = offset & ~(cachesize - 1)
1648 1643 reallength = (
1649 1644 (offset + length + cachesize) & ~(cachesize - 1)
1650 1645 ) - realoffset
1651 1646 with self._datareadfp(df) as df:
1652 1647 df.seek(realoffset)
1653 1648 d = df.read(reallength)
1654 1649
1655 1650 self._cachesegment(realoffset, d)
1656 1651 if offset != realoffset or reallength != length:
1657 1652 startoffset = offset - realoffset
1658 1653 if len(d) - startoffset < length:
1659 1654 raise error.RevlogError(
1660 1655 _(
1661 1656 b'partial read of revlog %s; expected %d bytes from '
1662 1657 b'offset %d, got %d'
1663 1658 )
1664 1659 % (
1665 1660 self.indexfile if self._inline else self.datafile,
1666 1661 length,
1667 1662 realoffset,
1668 1663 len(d) - startoffset,
1669 1664 )
1670 1665 )
1671 1666
1672 1667 return util.buffer(d, startoffset, length)
1673 1668
1674 1669 if len(d) < length:
1675 1670 raise error.RevlogError(
1676 1671 _(
1677 1672 b'partial read of revlog %s; expected %d bytes from offset '
1678 1673 b'%d, got %d'
1679 1674 )
1680 1675 % (
1681 1676 self.indexfile if self._inline else self.datafile,
1682 1677 length,
1683 1678 offset,
1684 1679 len(d),
1685 1680 )
1686 1681 )
1687 1682
1688 1683 return d
1689 1684
1690 1685 def _getsegment(self, offset, length, df=None):
1691 1686 """Obtain a segment of raw data from the revlog.
1692 1687
1693 1688 Accepts an absolute offset, length of bytes to obtain, and an
1694 1689 optional file handle to the already-opened revlog. If the file
1695 1690 handle is used, it's original seek position will not be preserved.
1696 1691
1697 1692 Requests for data may be returned from a cache.
1698 1693
1699 1694 Returns a str or a buffer instance of raw byte data.
1700 1695 """
1701 1696 o, d = self._chunkcache
1702 1697 l = len(d)
1703 1698
1704 1699 # is it in the cache?
1705 1700 cachestart = offset - o
1706 1701 cacheend = cachestart + length
1707 1702 if cachestart >= 0 and cacheend <= l:
1708 1703 if cachestart == 0 and cacheend == l:
1709 1704 return d # avoid a copy
1710 1705 return util.buffer(d, cachestart, cacheend - cachestart)
1711 1706
1712 1707 return self._readsegment(offset, length, df=df)
1713 1708
1714 1709 def _getsegmentforrevs(self, startrev, endrev, df=None):
1715 1710 """Obtain a segment of raw data corresponding to a range of revisions.
1716 1711
1717 1712 Accepts the start and end revisions and an optional already-open
1718 1713 file handle to be used for reading. If the file handle is read, its
1719 1714 seek position will not be preserved.
1720 1715
1721 1716 Requests for data may be satisfied by a cache.
1722 1717
1723 1718 Returns a 2-tuple of (offset, data) for the requested range of
1724 1719 revisions. Offset is the integer offset from the beginning of the
1725 1720 revlog and data is a str or buffer of the raw byte data.
1726 1721
1727 1722 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1728 1723 to determine where each revision's data begins and ends.
1729 1724 """
1730 1725 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1731 1726 # (functions are expensive).
1732 1727 index = self.index
1733 1728 istart = index[startrev]
1734 1729 start = int(istart[0] >> 16)
1735 1730 if startrev == endrev:
1736 1731 end = start + istart[1]
1737 1732 else:
1738 1733 iend = index[endrev]
1739 1734 end = int(iend[0] >> 16) + iend[1]
1740 1735
1741 1736 if self._inline:
1742 1737 start += (startrev + 1) * self._io.size
1743 1738 end += (endrev + 1) * self._io.size
1744 1739 length = end - start
1745 1740
1746 1741 return start, self._getsegment(start, length, df=df)
1747 1742
1748 1743 def _chunk(self, rev, df=None):
1749 1744 """Obtain a single decompressed chunk for a revision.
1750 1745
1751 1746 Accepts an integer revision and an optional already-open file handle
1752 1747 to be used for reading. If used, the seek position of the file will not
1753 1748 be preserved.
1754 1749
1755 1750 Returns a str holding uncompressed data for the requested revision.
1756 1751 """
1757 1752 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1758 1753
1759 1754 def _chunks(self, revs, df=None, targetsize=None):
1760 1755 """Obtain decompressed chunks for the specified revisions.
1761 1756
1762 1757 Accepts an iterable of numeric revisions that are assumed to be in
1763 1758 ascending order. Also accepts an optional already-open file handle
1764 1759 to be used for reading. If used, the seek position of the file will
1765 1760 not be preserved.
1766 1761
1767 1762 This function is similar to calling ``self._chunk()`` multiple times,
1768 1763 but is faster.
1769 1764
1770 1765 Returns a list with decompressed data for each requested revision.
1771 1766 """
1772 1767 if not revs:
1773 1768 return []
1774 1769 start = self.start
1775 1770 length = self.length
1776 1771 inline = self._inline
1777 1772 iosize = self._io.size
1778 1773 buffer = util.buffer
1779 1774
1780 1775 l = []
1781 1776 ladd = l.append
1782 1777
1783 1778 if not self._withsparseread:
1784 1779 slicedchunks = (revs,)
1785 1780 else:
1786 1781 slicedchunks = deltautil.slicechunk(
1787 1782 self, revs, targetsize=targetsize
1788 1783 )
1789 1784
1790 1785 for revschunk in slicedchunks:
1791 1786 firstrev = revschunk[0]
1792 1787 # Skip trailing revisions with empty diff
1793 1788 for lastrev in revschunk[::-1]:
1794 1789 if length(lastrev) != 0:
1795 1790 break
1796 1791
1797 1792 try:
1798 1793 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1799 1794 except OverflowError:
1800 1795 # issue4215 - we can't cache a run of chunks greater than
1801 1796 # 2G on Windows
1802 1797 return [self._chunk(rev, df=df) for rev in revschunk]
1803 1798
1804 1799 decomp = self.decompress
1805 1800 for rev in revschunk:
1806 1801 chunkstart = start(rev)
1807 1802 if inline:
1808 1803 chunkstart += (rev + 1) * iosize
1809 1804 chunklength = length(rev)
1810 1805 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1811 1806
1812 1807 return l
1813 1808
1814 1809 def _chunkclear(self):
1815 1810 """Clear the raw chunk cache."""
1816 1811 self._chunkcache = (0, b'')
1817 1812
1818 1813 def deltaparent(self, rev):
1819 1814 """return deltaparent of the given revision"""
1820 1815 base = self.index[rev][3]
1821 1816 if base == rev:
1822 1817 return nullrev
1823 1818 elif self._generaldelta:
1824 1819 return base
1825 1820 else:
1826 1821 return rev - 1
1827 1822
1828 1823 def issnapshot(self, rev):
1829 1824 """tells whether rev is a snapshot"""
1830 1825 if not self._sparserevlog:
1831 1826 return self.deltaparent(rev) == nullrev
1832 1827 elif util.safehasattr(self.index, b'issnapshot'):
1833 1828 # directly assign the method to cache the testing and access
1834 1829 self.issnapshot = self.index.issnapshot
1835 1830 return self.issnapshot(rev)
1836 1831 if rev == nullrev:
1837 1832 return True
1838 1833 entry = self.index[rev]
1839 1834 base = entry[3]
1840 1835 if base == rev:
1841 1836 return True
1842 1837 if base == nullrev:
1843 1838 return True
1844 1839 p1 = entry[5]
1845 1840 p2 = entry[6]
1846 1841 if base == p1 or base == p2:
1847 1842 return False
1848 1843 return self.issnapshot(base)
1849 1844
1850 1845 def snapshotdepth(self, rev):
1851 1846 """number of snapshot in the chain before this one"""
1852 1847 if not self.issnapshot(rev):
1853 1848 raise error.ProgrammingError(b'revision %d not a snapshot')
1854 1849 return len(self._deltachain(rev)[0]) - 1
1855 1850
1856 1851 def revdiff(self, rev1, rev2):
1857 1852 """return or calculate a delta between two revisions
1858 1853
1859 1854 The delta calculated is in binary form and is intended to be written to
1860 1855 revlog data directly. So this function needs raw revision data.
1861 1856 """
1862 1857 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1863 1858 return bytes(self._chunk(rev2))
1864 1859
1865 1860 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1866 1861
1867 1862 def _processflags(self, text, flags, operation, raw=False):
1868 1863 """deprecated entry point to access flag processors"""
1869 1864 msg = b'_processflag(...) use the specialized variant'
1870 1865 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1871 1866 if raw:
1872 1867 return text, flagutil.processflagsraw(self, text, flags)
1873 1868 elif operation == b'read':
1874 1869 return flagutil.processflagsread(self, text, flags)
1875 1870 else: # write operation
1876 1871 return flagutil.processflagswrite(self, text, flags)
1877 1872
1878 1873 def revision(self, nodeorrev, _df=None, raw=False):
1879 1874 """return an uncompressed revision of a given node or revision
1880 1875 number.
1881 1876
1882 1877 _df - an existing file handle to read from. (internal-only)
1883 1878 raw - an optional argument specifying if the revision data is to be
1884 1879 treated as raw data when applying flag transforms. 'raw' should be set
1885 1880 to True when generating changegroups or in debug commands.
1886 1881 """
1887 1882 if raw:
1888 1883 msg = (
1889 1884 b'revlog.revision(..., raw=True) is deprecated, '
1890 1885 b'use revlog.rawdata(...)'
1891 1886 )
1892 1887 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1893 1888 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1894 1889
1895 1890 def sidedata(self, nodeorrev, _df=None):
1896 1891 """a map of extra data related to the changeset but not part of the hash
1897 1892
1898 1893 This function currently return a dictionary. However, more advanced
1899 1894 mapping object will likely be used in the future for a more
1900 1895 efficient/lazy code.
1901 1896 """
1902 1897 return self._revisiondata(nodeorrev, _df)[1]
1903 1898
1904 1899 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1905 1900 # deal with <nodeorrev> argument type
1906 1901 if isinstance(nodeorrev, int):
1907 1902 rev = nodeorrev
1908 1903 node = self.node(rev)
1909 1904 else:
1910 1905 node = nodeorrev
1911 1906 rev = None
1912 1907
1913 1908 # fast path the special `nullid` rev
1914 1909 if node == nullid:
1915 1910 return b"", {}
1916 1911
1917 1912 # ``rawtext`` is the text as stored inside the revlog. Might be the
1918 1913 # revision or might need to be processed to retrieve the revision.
1919 1914 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1920 1915
1921 1916 if self.version & 0xFFFF == REVLOGV2:
1922 1917 if rev is None:
1923 1918 rev = self.rev(node)
1924 1919 sidedata = self._sidedata(rev)
1925 1920 else:
1926 1921 sidedata = {}
1927 1922
1928 1923 if raw and validated:
1929 1924 # if we don't want to process the raw text and that raw
1930 1925 # text is cached, we can exit early.
1931 1926 return rawtext, sidedata
1932 1927 if rev is None:
1933 1928 rev = self.rev(node)
1934 1929 # the revlog's flag for this revision
1935 1930 # (usually alter its state or content)
1936 1931 flags = self.flags(rev)
1937 1932
1938 1933 if validated and flags == REVIDX_DEFAULT_FLAGS:
1939 1934 # no extra flags set, no flag processor runs, text = rawtext
1940 1935 return rawtext, sidedata
1941 1936
1942 1937 if raw:
1943 1938 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1944 1939 text = rawtext
1945 1940 else:
1946 1941 r = flagutil.processflagsread(self, rawtext, flags)
1947 1942 text, validatehash = r
1948 1943 if validatehash:
1949 1944 self.checkhash(text, node, rev=rev)
1950 1945 if not validated:
1951 1946 self._revisioncache = (node, rev, rawtext)
1952 1947
1953 1948 return text, sidedata
1954 1949
1955 1950 def _rawtext(self, node, rev, _df=None):
1956 1951 """return the possibly unvalidated rawtext for a revision
1957 1952
1958 1953 returns (rev, rawtext, validated)
1959 1954 """
1960 1955
1961 1956 # revision in the cache (could be useful to apply delta)
1962 1957 cachedrev = None
1963 1958 # An intermediate text to apply deltas to
1964 1959 basetext = None
1965 1960
1966 1961 # Check if we have the entry in cache
1967 1962 # The cache entry looks like (node, rev, rawtext)
1968 1963 if self._revisioncache:
1969 1964 if self._revisioncache[0] == node:
1970 1965 return (rev, self._revisioncache[2], True)
1971 1966 cachedrev = self._revisioncache[1]
1972 1967
1973 1968 if rev is None:
1974 1969 rev = self.rev(node)
1975 1970
1976 1971 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1977 1972 if stopped:
1978 1973 basetext = self._revisioncache[2]
1979 1974
1980 1975 # drop cache to save memory, the caller is expected to
1981 1976 # update self._revisioncache after validating the text
1982 1977 self._revisioncache = None
1983 1978
1984 1979 targetsize = None
1985 1980 rawsize = self.index[rev][2]
1986 1981 if 0 <= rawsize:
1987 1982 targetsize = 4 * rawsize
1988 1983
1989 1984 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1990 1985 if basetext is None:
1991 1986 basetext = bytes(bins[0])
1992 1987 bins = bins[1:]
1993 1988
1994 1989 rawtext = mdiff.patches(basetext, bins)
1995 1990 del basetext # let us have a chance to free memory early
1996 1991 return (rev, rawtext, False)
1997 1992
1998 1993 def _sidedata(self, rev):
1999 1994 """Return the sidedata for a given revision number."""
2000 1995 index_entry = self.index[rev]
2001 1996 sidedata_offset = index_entry[8]
2002 1997 sidedata_size = index_entry[9]
2003 1998
2004 1999 if self._inline:
2005 2000 sidedata_offset += self._io.size * (1 + rev)
2006 2001 if sidedata_size == 0:
2007 2002 return {}
2008 2003
2009 2004 segment = self._getsegment(sidedata_offset, sidedata_size)
2010 2005 sidedata = sidedatautil.deserialize_sidedata(segment)
2011 2006 return sidedata
2012 2007
2013 2008 def rawdata(self, nodeorrev, _df=None):
2014 2009 """return an uncompressed raw data of a given node or revision number.
2015 2010
2016 2011 _df - an existing file handle to read from. (internal-only)
2017 2012 """
2018 2013 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2019 2014
2020 2015 def hash(self, text, p1, p2):
2021 2016 """Compute a node hash.
2022 2017
2023 2018 Available as a function so that subclasses can replace the hash
2024 2019 as needed.
2025 2020 """
2026 2021 return storageutil.hashrevisionsha1(text, p1, p2)
2027 2022
2028 2023 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2029 2024 """Check node hash integrity.
2030 2025
2031 2026 Available as a function so that subclasses can extend hash mismatch
2032 2027 behaviors as needed.
2033 2028 """
2034 2029 try:
2035 2030 if p1 is None and p2 is None:
2036 2031 p1, p2 = self.parents(node)
2037 2032 if node != self.hash(text, p1, p2):
2038 2033 # Clear the revision cache on hash failure. The revision cache
2039 2034 # only stores the raw revision and clearing the cache does have
2040 2035 # the side-effect that we won't have a cache hit when the raw
2041 2036 # revision data is accessed. But this case should be rare and
2042 2037 # it is extra work to teach the cache about the hash
2043 2038 # verification state.
2044 2039 if self._revisioncache and self._revisioncache[0] == node:
2045 2040 self._revisioncache = None
2046 2041
2047 2042 revornode = rev
2048 2043 if revornode is None:
2049 2044 revornode = templatefilters.short(hex(node))
2050 2045 raise error.RevlogError(
2051 2046 _(b"integrity check failed on %s:%s")
2052 2047 % (self.indexfile, pycompat.bytestr(revornode))
2053 2048 )
2054 2049 except error.RevlogError:
2055 2050 if self._censorable and storageutil.iscensoredtext(text):
2056 2051 raise error.CensoredNodeError(self.indexfile, node, text)
2057 2052 raise
2058 2053
2059 2054 def _enforceinlinesize(self, tr, fp=None):
2060 2055 """Check if the revlog is too big for inline and convert if so.
2061 2056
2062 2057 This should be called after revisions are added to the revlog. If the
2063 2058 revlog has grown too large to be an inline revlog, it will convert it
2064 2059 to use multiple index and data files.
2065 2060 """
2066 2061 tiprev = len(self) - 1
2067 2062 if (
2068 2063 not self._inline
2069 2064 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2070 2065 ):
2071 2066 return
2072 2067
2073 2068 troffset = tr.findoffset(self.indexfile)
2074 2069 if troffset is None:
2075 2070 raise error.RevlogError(
2076 2071 _(b"%s not found in the transaction") % self.indexfile
2077 2072 )
2078 2073 trindex = 0
2079 2074 tr.add(self.datafile, 0)
2080 2075
2081 2076 if fp:
2082 2077 fp.flush()
2083 2078 fp.close()
2084 2079 # We can't use the cached file handle after close(). So prevent
2085 2080 # its usage.
2086 2081 self._writinghandles = None
2087 2082
2088 2083 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2089 2084 for r in self:
2090 2085 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2091 2086 if troffset <= self.start(r):
2092 2087 trindex = r
2093 2088
2094 2089 with self._indexfp(b'w') as fp:
2095 2090 self.version &= ~FLAG_INLINE_DATA
2096 2091 self._inline = False
2097 2092 io = self._io
2098 2093 for i in self:
2099 2094 e = io.packentry(self.index[i], self.node, self.version, i)
2100 2095 fp.write(e)
2101 2096
2102 2097 # the temp file replace the real index when we exit the context
2103 2098 # manager
2104 2099
2105 2100 tr.replace(self.indexfile, trindex * self._io.size)
2106 2101 nodemaputil.setup_persistent_nodemap(tr, self)
2107 2102 self._chunkclear()
2108 2103
2109 2104 def _nodeduplicatecallback(self, transaction, node):
2110 2105 """called when trying to add a node already stored."""
2111 2106
2112 2107 def addrevision(
2113 2108 self,
2114 2109 text,
2115 2110 transaction,
2116 2111 link,
2117 2112 p1,
2118 2113 p2,
2119 2114 cachedelta=None,
2120 2115 node=None,
2121 2116 flags=REVIDX_DEFAULT_FLAGS,
2122 2117 deltacomputer=None,
2123 2118 sidedata=None,
2124 2119 ):
2125 2120 """add a revision to the log
2126 2121
2127 2122 text - the revision data to add
2128 2123 transaction - the transaction object used for rollback
2129 2124 link - the linkrev data to add
2130 2125 p1, p2 - the parent nodeids of the revision
2131 2126 cachedelta - an optional precomputed delta
2132 2127 node - nodeid of revision; typically node is not specified, and it is
2133 2128 computed by default as hash(text, p1, p2), however subclasses might
2134 2129 use different hashing method (and override checkhash() in such case)
2135 2130 flags - the known flags to set on the revision
2136 2131 deltacomputer - an optional deltacomputer instance shared between
2137 2132 multiple calls
2138 2133 """
2139 2134 if link == nullrev:
2140 2135 raise error.RevlogError(
2141 2136 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2142 2137 )
2143 2138
2144 2139 if sidedata is None:
2145 2140 sidedata = {}
2146 2141 elif not self.hassidedata:
2147 2142 raise error.ProgrammingError(
2148 2143 _(b"trying to add sidedata to a revlog who don't support them")
2149 2144 )
2150 2145
2151 2146 if flags:
2152 2147 node = node or self.hash(text, p1, p2)
2153 2148
2154 2149 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2155 2150
2156 2151 # If the flag processor modifies the revision data, ignore any provided
2157 2152 # cachedelta.
2158 2153 if rawtext != text:
2159 2154 cachedelta = None
2160 2155
2161 2156 if len(rawtext) > _maxentrysize:
2162 2157 raise error.RevlogError(
2163 2158 _(
2164 2159 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2165 2160 )
2166 2161 % (self.indexfile, len(rawtext))
2167 2162 )
2168 2163
2169 2164 node = node or self.hash(rawtext, p1, p2)
2170 2165 rev = self.index.get_rev(node)
2171 2166 if rev is not None:
2172 2167 return rev
2173 2168
2174 2169 if validatehash:
2175 2170 self.checkhash(rawtext, node, p1=p1, p2=p2)
2176 2171
2177 2172 return self.addrawrevision(
2178 2173 rawtext,
2179 2174 transaction,
2180 2175 link,
2181 2176 p1,
2182 2177 p2,
2183 2178 node,
2184 2179 flags,
2185 2180 cachedelta=cachedelta,
2186 2181 deltacomputer=deltacomputer,
2187 2182 sidedata=sidedata,
2188 2183 )
2189 2184
2190 2185 def addrawrevision(
2191 2186 self,
2192 2187 rawtext,
2193 2188 transaction,
2194 2189 link,
2195 2190 p1,
2196 2191 p2,
2197 2192 node,
2198 2193 flags,
2199 2194 cachedelta=None,
2200 2195 deltacomputer=None,
2201 2196 sidedata=None,
2202 2197 ):
2203 2198 """add a raw revision with known flags, node and parents
2204 2199 useful when reusing a revision not stored in this revlog (ex: received
2205 2200 over wire, or read from an external bundle).
2206 2201 """
2207 2202 dfh = None
2208 2203 if not self._inline:
2209 2204 dfh = self._datafp(b"a+")
2210 2205 ifh = self._indexfp(b"a+")
2211 2206 try:
2212 2207 return self._addrevision(
2213 2208 node,
2214 2209 rawtext,
2215 2210 transaction,
2216 2211 link,
2217 2212 p1,
2218 2213 p2,
2219 2214 flags,
2220 2215 cachedelta,
2221 2216 ifh,
2222 2217 dfh,
2223 2218 deltacomputer=deltacomputer,
2224 2219 sidedata=sidedata,
2225 2220 )
2226 2221 finally:
2227 2222 if dfh:
2228 2223 dfh.close()
2229 2224 ifh.close()
2230 2225
2231 2226 def compress(self, data):
2232 2227 """Generate a possibly-compressed representation of data."""
2233 2228 if not data:
2234 2229 return b'', data
2235 2230
2236 2231 compressed = self._compressor.compress(data)
2237 2232
2238 2233 if compressed:
2239 2234 # The revlog compressor added the header in the returned data.
2240 2235 return b'', compressed
2241 2236
2242 2237 if data[0:1] == b'\0':
2243 2238 return b'', data
2244 2239 return b'u', data
2245 2240
2246 2241 def decompress(self, data):
2247 2242 """Decompress a revlog chunk.
2248 2243
2249 2244 The chunk is expected to begin with a header identifying the
2250 2245 format type so it can be routed to an appropriate decompressor.
2251 2246 """
2252 2247 if not data:
2253 2248 return data
2254 2249
2255 2250 # Revlogs are read much more frequently than they are written and many
2256 2251 # chunks only take microseconds to decompress, so performance is
2257 2252 # important here.
2258 2253 #
2259 2254 # We can make a few assumptions about revlogs:
2260 2255 #
2261 2256 # 1) the majority of chunks will be compressed (as opposed to inline
2262 2257 # raw data).
2263 2258 # 2) decompressing *any* data will likely by at least 10x slower than
2264 2259 # returning raw inline data.
2265 2260 # 3) we want to prioritize common and officially supported compression
2266 2261 # engines
2267 2262 #
2268 2263 # It follows that we want to optimize for "decompress compressed data
2269 2264 # when encoded with common and officially supported compression engines"
2270 2265 # case over "raw data" and "data encoded by less common or non-official
2271 2266 # compression engines." That is why we have the inline lookup first
2272 2267 # followed by the compengines lookup.
2273 2268 #
2274 2269 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2275 2270 # compressed chunks. And this matters for changelog and manifest reads.
2276 2271 t = data[0:1]
2277 2272
2278 2273 if t == b'x':
2279 2274 try:
2280 2275 return _zlibdecompress(data)
2281 2276 except zlib.error as e:
2282 2277 raise error.RevlogError(
2283 2278 _(b'revlog decompress error: %s')
2284 2279 % stringutil.forcebytestr(e)
2285 2280 )
2286 2281 # '\0' is more common than 'u' so it goes first.
2287 2282 elif t == b'\0':
2288 2283 return data
2289 2284 elif t == b'u':
2290 2285 return util.buffer(data, 1)
2291 2286
2292 2287 try:
2293 2288 compressor = self._decompressors[t]
2294 2289 except KeyError:
2295 2290 try:
2296 2291 engine = util.compengines.forrevlogheader(t)
2297 2292 compressor = engine.revlogcompressor(self._compengineopts)
2298 2293 self._decompressors[t] = compressor
2299 2294 except KeyError:
2300 2295 raise error.RevlogError(
2301 2296 _(b'unknown compression type %s') % binascii.hexlify(t)
2302 2297 )
2303 2298
2304 2299 return compressor.decompress(data)
2305 2300
2306 2301 def _addrevision(
2307 2302 self,
2308 2303 node,
2309 2304 rawtext,
2310 2305 transaction,
2311 2306 link,
2312 2307 p1,
2313 2308 p2,
2314 2309 flags,
2315 2310 cachedelta,
2316 2311 ifh,
2317 2312 dfh,
2318 2313 alwayscache=False,
2319 2314 deltacomputer=None,
2320 2315 sidedata=None,
2321 2316 ):
2322 2317 """internal function to add revisions to the log
2323 2318
2324 2319 see addrevision for argument descriptions.
2325 2320
2326 2321 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2327 2322
2328 2323 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2329 2324 be used.
2330 2325
2331 2326 invariants:
2332 2327 - rawtext is optional (can be None); if not set, cachedelta must be set.
2333 2328 if both are set, they must correspond to each other.
2334 2329 """
2335 2330 if node == nullid:
2336 2331 raise error.RevlogError(
2337 2332 _(b"%s: attempt to add null revision") % self.indexfile
2338 2333 )
2339 2334 if node == wdirid or node in wdirfilenodeids:
2340 2335 raise error.RevlogError(
2341 2336 _(b"%s: attempt to add wdir revision") % self.indexfile
2342 2337 )
2343 2338
2344 2339 if self._inline:
2345 2340 fh = ifh
2346 2341 else:
2347 2342 fh = dfh
2348 2343
2349 2344 btext = [rawtext]
2350 2345
2351 2346 curr = len(self)
2352 2347 prev = curr - 1
2353 2348
2354 2349 offset = self._get_data_offset(prev)
2355 2350
2356 2351 if self._concurrencychecker:
2357 2352 if self._inline:
2358 2353 # offset is "as if" it were in the .d file, so we need to add on
2359 2354 # the size of the entry metadata.
2360 2355 self._concurrencychecker(
2361 2356 ifh, self.indexfile, offset + curr * self._io.size
2362 2357 )
2363 2358 else:
2364 2359 # Entries in the .i are a consistent size.
2365 2360 self._concurrencychecker(
2366 2361 ifh, self.indexfile, curr * self._io.size
2367 2362 )
2368 2363 self._concurrencychecker(dfh, self.datafile, offset)
2369 2364
2370 2365 p1r, p2r = self.rev(p1), self.rev(p2)
2371 2366
2372 2367 # full versions are inserted when the needed deltas
2373 2368 # become comparable to the uncompressed text
2374 2369 if rawtext is None:
2375 2370 # need rawtext size, before changed by flag processors, which is
2376 2371 # the non-raw size. use revlog explicitly to avoid filelog's extra
2377 2372 # logic that might remove metadata size.
2378 2373 textlen = mdiff.patchedsize(
2379 2374 revlog.size(self, cachedelta[0]), cachedelta[1]
2380 2375 )
2381 2376 else:
2382 2377 textlen = len(rawtext)
2383 2378
2384 2379 if deltacomputer is None:
2385 2380 deltacomputer = deltautil.deltacomputer(self)
2386 2381
2387 2382 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2388 2383
2389 2384 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2390 2385
2391 2386 if sidedata:
2392 2387 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2393 2388 sidedata_offset = offset + deltainfo.deltalen
2394 2389 else:
2395 2390 serialized_sidedata = b""
2396 2391 # Don't store the offset if the sidedata is empty, that way
2397 2392 # we can easily detect empty sidedata and they will be no different
2398 2393 # than ones we manually add.
2399 2394 sidedata_offset = 0
2400 2395
2401 2396 e = (
2402 2397 offset_type(offset, flags),
2403 2398 deltainfo.deltalen,
2404 2399 textlen,
2405 2400 deltainfo.base,
2406 2401 link,
2407 2402 p1r,
2408 2403 p2r,
2409 2404 node,
2410 2405 sidedata_offset,
2411 2406 len(serialized_sidedata),
2412 2407 )
2413 2408
2414 2409 if self.version & 0xFFFF != REVLOGV2:
2415 2410 e = e[:8]
2416 2411
2417 2412 self.index.append(e)
2418 2413 entry = self._io.packentry(e, self.node, self.version, curr)
2419 2414 self._writeentry(
2420 2415 transaction,
2421 2416 ifh,
2422 2417 dfh,
2423 2418 entry,
2424 2419 deltainfo.data,
2425 2420 link,
2426 2421 offset,
2427 2422 serialized_sidedata,
2428 2423 )
2429 2424
2430 2425 rawtext = btext[0]
2431 2426
2432 2427 if alwayscache and rawtext is None:
2433 2428 rawtext = deltacomputer.buildtext(revinfo, fh)
2434 2429
2435 2430 if type(rawtext) == bytes: # only accept immutable objects
2436 2431 self._revisioncache = (node, curr, rawtext)
2437 2432 self._chainbasecache[curr] = deltainfo.chainbase
2438 2433 return curr
2439 2434
2440 2435 def _get_data_offset(self, prev):
2441 2436 """Returns the current offset in the (in-transaction) data file.
2442 2437 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2443 2438 file to store that information: since sidedata can be rewritten to the
2444 2439 end of the data file within a transaction, you can have cases where, for
2445 2440 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2446 2441 to `n - 1`'s sidedata being written after `n`'s data.
2447 2442
2448 2443 TODO cache this in a docket file before getting out of experimental."""
2449 2444 if self.version & 0xFFFF != REVLOGV2:
2450 2445 return self.end(prev)
2451 2446
2452 2447 offset = 0
2453 2448 for rev, entry in enumerate(self.index):
2454 2449 sidedata_end = entry[8] + entry[9]
2455 2450 # Sidedata for a previous rev has potentially been written after
2456 2451 # this rev's end, so take the max.
2457 2452 offset = max(self.end(rev), offset, sidedata_end)
2458 2453 return offset
2459 2454
2460 2455 def _writeentry(
2461 2456 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2462 2457 ):
2463 2458 # Files opened in a+ mode have inconsistent behavior on various
2464 2459 # platforms. Windows requires that a file positioning call be made
2465 2460 # when the file handle transitions between reads and writes. See
2466 2461 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2467 2462 # platforms, Python or the platform itself can be buggy. Some versions
2468 2463 # of Solaris have been observed to not append at the end of the file
2469 2464 # if the file was seeked to before the end. See issue4943 for more.
2470 2465 #
2471 2466 # We work around this issue by inserting a seek() before writing.
2472 2467 # Note: This is likely not necessary on Python 3. However, because
2473 2468 # the file handle is reused for reads and may be seeked there, we need
2474 2469 # to be careful before changing this.
2475 2470 ifh.seek(0, os.SEEK_END)
2476 2471 if dfh:
2477 2472 dfh.seek(0, os.SEEK_END)
2478 2473
2479 2474 curr = len(self) - 1
2480 2475 if not self._inline:
2481 2476 transaction.add(self.datafile, offset)
2482 2477 transaction.add(self.indexfile, curr * len(entry))
2483 2478 if data[0]:
2484 2479 dfh.write(data[0])
2485 2480 dfh.write(data[1])
2486 2481 if sidedata:
2487 2482 dfh.write(sidedata)
2488 2483 ifh.write(entry)
2489 2484 else:
2490 2485 offset += curr * self._io.size
2491 2486 transaction.add(self.indexfile, offset)
2492 2487 ifh.write(entry)
2493 2488 ifh.write(data[0])
2494 2489 ifh.write(data[1])
2495 2490 if sidedata:
2496 2491 ifh.write(sidedata)
2497 2492 self._enforceinlinesize(transaction, ifh)
2498 2493 nodemaputil.setup_persistent_nodemap(transaction, self)
2499 2494
2500 2495 def addgroup(
2501 2496 self,
2502 2497 deltas,
2503 2498 linkmapper,
2504 2499 transaction,
2505 2500 alwayscache=False,
2506 2501 addrevisioncb=None,
2507 2502 duplicaterevisioncb=None,
2508 2503 ):
2509 2504 """
2510 2505 add a delta group
2511 2506
2512 2507 given a set of deltas, add them to the revision log. the
2513 2508 first delta is against its parent, which should be in our
2514 2509 log, the rest are against the previous delta.
2515 2510
2516 2511 If ``addrevisioncb`` is defined, it will be called with arguments of
2517 2512 this revlog and the node that was added.
2518 2513 """
2519 2514
2520 2515 if self._writinghandles:
2521 2516 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2522 2517
2523 2518 r = len(self)
2524 2519 end = 0
2525 2520 if r:
2526 2521 end = self.end(r - 1)
2527 2522 ifh = self._indexfp(b"a+")
2528 2523 isize = r * self._io.size
2529 2524 if self._inline:
2530 2525 transaction.add(self.indexfile, end + isize)
2531 2526 dfh = None
2532 2527 else:
2533 2528 transaction.add(self.indexfile, isize)
2534 2529 transaction.add(self.datafile, end)
2535 2530 dfh = self._datafp(b"a+")
2536 2531
2537 2532 def flush():
2538 2533 if dfh:
2539 2534 dfh.flush()
2540 2535 ifh.flush()
2541 2536
2542 2537 self._writinghandles = (ifh, dfh)
2543 2538 empty = True
2544 2539
2545 2540 try:
2546 2541 deltacomputer = deltautil.deltacomputer(self)
2547 2542 # loop through our set of deltas
2548 2543 for data in deltas:
2549 2544 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2550 2545 link = linkmapper(linknode)
2551 2546 flags = flags or REVIDX_DEFAULT_FLAGS
2552 2547
2553 2548 rev = self.index.get_rev(node)
2554 2549 if rev is not None:
2555 2550 # this can happen if two branches make the same change
2556 2551 self._nodeduplicatecallback(transaction, rev)
2557 2552 if duplicaterevisioncb:
2558 2553 duplicaterevisioncb(self, rev)
2559 2554 empty = False
2560 2555 continue
2561 2556
2562 2557 for p in (p1, p2):
2563 2558 if not self.index.has_node(p):
2564 2559 raise error.LookupError(
2565 2560 p, self.indexfile, _(b'unknown parent')
2566 2561 )
2567 2562
2568 2563 if not self.index.has_node(deltabase):
2569 2564 raise error.LookupError(
2570 2565 deltabase, self.indexfile, _(b'unknown delta base')
2571 2566 )
2572 2567
2573 2568 baserev = self.rev(deltabase)
2574 2569
2575 2570 if baserev != nullrev and self.iscensored(baserev):
2576 2571 # if base is censored, delta must be full replacement in a
2577 2572 # single patch operation
2578 2573 hlen = struct.calcsize(b">lll")
2579 2574 oldlen = self.rawsize(baserev)
2580 2575 newlen = len(delta) - hlen
2581 2576 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2582 2577 raise error.CensoredBaseError(
2583 2578 self.indexfile, self.node(baserev)
2584 2579 )
2585 2580
2586 2581 if not flags and self._peek_iscensored(baserev, delta, flush):
2587 2582 flags |= REVIDX_ISCENSORED
2588 2583
2589 2584 # We assume consumers of addrevisioncb will want to retrieve
2590 2585 # the added revision, which will require a call to
2591 2586 # revision(). revision() will fast path if there is a cache
2592 2587 # hit. So, we tell _addrevision() to always cache in this case.
2593 2588 # We're only using addgroup() in the context of changegroup
2594 2589 # generation so the revision data can always be handled as raw
2595 2590 # by the flagprocessor.
2596 2591 rev = self._addrevision(
2597 2592 node,
2598 2593 None,
2599 2594 transaction,
2600 2595 link,
2601 2596 p1,
2602 2597 p2,
2603 2598 flags,
2604 2599 (baserev, delta),
2605 2600 ifh,
2606 2601 dfh,
2607 2602 alwayscache=alwayscache,
2608 2603 deltacomputer=deltacomputer,
2609 2604 sidedata=sidedata,
2610 2605 )
2611 2606
2612 2607 if addrevisioncb:
2613 2608 addrevisioncb(self, rev)
2614 2609 empty = False
2615 2610
2616 2611 if not dfh and not self._inline:
2617 2612 # addrevision switched from inline to conventional
2618 2613 # reopen the index
2619 2614 ifh.close()
2620 2615 dfh = self._datafp(b"a+")
2621 2616 ifh = self._indexfp(b"a+")
2622 2617 self._writinghandles = (ifh, dfh)
2623 2618 finally:
2624 2619 self._writinghandles = None
2625 2620
2626 2621 if dfh:
2627 2622 dfh.close()
2628 2623 ifh.close()
2629 2624 return not empty
2630 2625
2631 2626 def iscensored(self, rev):
2632 2627 """Check if a file revision is censored."""
2633 2628 if not self._censorable:
2634 2629 return False
2635 2630
2636 2631 return self.flags(rev) & REVIDX_ISCENSORED
2637 2632
2638 2633 def _peek_iscensored(self, baserev, delta, flush):
2639 2634 """Quickly check if a delta produces a censored revision."""
2640 2635 if not self._censorable:
2641 2636 return False
2642 2637
2643 2638 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2644 2639
2645 2640 def getstrippoint(self, minlink):
2646 2641 """find the minimum rev that must be stripped to strip the linkrev
2647 2642
2648 2643 Returns a tuple containing the minimum rev and a set of all revs that
2649 2644 have linkrevs that will be broken by this strip.
2650 2645 """
2651 2646 return storageutil.resolvestripinfo(
2652 2647 minlink,
2653 2648 len(self) - 1,
2654 2649 self.headrevs(),
2655 2650 self.linkrev,
2656 2651 self.parentrevs,
2657 2652 )
2658 2653
2659 2654 def strip(self, minlink, transaction):
2660 2655 """truncate the revlog on the first revision with a linkrev >= minlink
2661 2656
2662 2657 This function is called when we're stripping revision minlink and
2663 2658 its descendants from the repository.
2664 2659
2665 2660 We have to remove all revisions with linkrev >= minlink, because
2666 2661 the equivalent changelog revisions will be renumbered after the
2667 2662 strip.
2668 2663
2669 2664 So we truncate the revlog on the first of these revisions, and
2670 2665 trust that the caller has saved the revisions that shouldn't be
2671 2666 removed and that it'll re-add them after this truncation.
2672 2667 """
2673 2668 if len(self) == 0:
2674 2669 return
2675 2670
2676 2671 rev, _ = self.getstrippoint(minlink)
2677 2672 if rev == len(self):
2678 2673 return
2679 2674
2680 2675 # first truncate the files on disk
2681 2676 end = self.start(rev)
2682 2677 if not self._inline:
2683 2678 transaction.add(self.datafile, end)
2684 2679 end = rev * self._io.size
2685 2680 else:
2686 2681 end += rev * self._io.size
2687 2682
2688 2683 transaction.add(self.indexfile, end)
2689 2684
2690 2685 # then reset internal state in memory to forget those revisions
2691 2686 self._revisioncache = None
2692 2687 self._chaininfocache = util.lrucachedict(500)
2693 2688 self._chunkclear()
2694 2689
2695 2690 del self.index[rev:-1]
2696 2691
2697 2692 def checksize(self):
2698 2693 """Check size of index and data files
2699 2694
2700 2695 return a (dd, di) tuple.
2701 2696 - dd: extra bytes for the "data" file
2702 2697 - di: extra bytes for the "index" file
2703 2698
2704 2699 A healthy revlog will return (0, 0).
2705 2700 """
2706 2701 expected = 0
2707 2702 if len(self):
2708 2703 expected = max(0, self.end(len(self) - 1))
2709 2704
2710 2705 try:
2711 2706 with self._datafp() as f:
2712 2707 f.seek(0, io.SEEK_END)
2713 2708 actual = f.tell()
2714 2709 dd = actual - expected
2715 2710 except IOError as inst:
2716 2711 if inst.errno != errno.ENOENT:
2717 2712 raise
2718 2713 dd = 0
2719 2714
2720 2715 try:
2721 2716 f = self.opener(self.indexfile)
2722 2717 f.seek(0, io.SEEK_END)
2723 2718 actual = f.tell()
2724 2719 f.close()
2725 2720 s = self._io.size
2726 2721 i = max(0, actual // s)
2727 2722 di = actual - (i * s)
2728 2723 if self._inline:
2729 2724 databytes = 0
2730 2725 for r in self:
2731 2726 databytes += max(0, self.length(r))
2732 2727 dd = 0
2733 2728 di = actual - len(self) * s - databytes
2734 2729 except IOError as inst:
2735 2730 if inst.errno != errno.ENOENT:
2736 2731 raise
2737 2732 di = 0
2738 2733
2739 2734 return (dd, di)
2740 2735
2741 2736 def files(self):
2742 2737 res = [self.indexfile]
2743 2738 if not self._inline:
2744 2739 res.append(self.datafile)
2745 2740 return res
2746 2741
2747 2742 def emitrevisions(
2748 2743 self,
2749 2744 nodes,
2750 2745 nodesorder=None,
2751 2746 revisiondata=False,
2752 2747 assumehaveparentrevisions=False,
2753 2748 deltamode=repository.CG_DELTAMODE_STD,
2754 2749 sidedata_helpers=None,
2755 2750 ):
2756 2751 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2757 2752 raise error.ProgrammingError(
2758 2753 b'unhandled value for nodesorder: %s' % nodesorder
2759 2754 )
2760 2755
2761 2756 if nodesorder is None and not self._generaldelta:
2762 2757 nodesorder = b'storage'
2763 2758
2764 2759 if (
2765 2760 not self._storedeltachains
2766 2761 and deltamode != repository.CG_DELTAMODE_PREV
2767 2762 ):
2768 2763 deltamode = repository.CG_DELTAMODE_FULL
2769 2764
2770 2765 return storageutil.emitrevisions(
2771 2766 self,
2772 2767 nodes,
2773 2768 nodesorder,
2774 2769 revlogrevisiondelta,
2775 2770 deltaparentfn=self.deltaparent,
2776 2771 candeltafn=self.candelta,
2777 2772 rawsizefn=self.rawsize,
2778 2773 revdifffn=self.revdiff,
2779 2774 flagsfn=self.flags,
2780 2775 deltamode=deltamode,
2781 2776 revisiondata=revisiondata,
2782 2777 assumehaveparentrevisions=assumehaveparentrevisions,
2783 2778 sidedata_helpers=sidedata_helpers,
2784 2779 )
2785 2780
2786 2781 DELTAREUSEALWAYS = b'always'
2787 2782 DELTAREUSESAMEREVS = b'samerevs'
2788 2783 DELTAREUSENEVER = b'never'
2789 2784
2790 2785 DELTAREUSEFULLADD = b'fulladd'
2791 2786
2792 2787 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2793 2788
2794 2789 def clone(
2795 2790 self,
2796 2791 tr,
2797 2792 destrevlog,
2798 2793 addrevisioncb=None,
2799 2794 deltareuse=DELTAREUSESAMEREVS,
2800 2795 forcedeltabothparents=None,
2801 2796 sidedatacompanion=None,
2802 2797 ):
2803 2798 """Copy this revlog to another, possibly with format changes.
2804 2799
2805 2800 The destination revlog will contain the same revisions and nodes.
2806 2801 However, it may not be bit-for-bit identical due to e.g. delta encoding
2807 2802 differences.
2808 2803
2809 2804 The ``deltareuse`` argument control how deltas from the existing revlog
2810 2805 are preserved in the destination revlog. The argument can have the
2811 2806 following values:
2812 2807
2813 2808 DELTAREUSEALWAYS
2814 2809 Deltas will always be reused (if possible), even if the destination
2815 2810 revlog would not select the same revisions for the delta. This is the
2816 2811 fastest mode of operation.
2817 2812 DELTAREUSESAMEREVS
2818 2813 Deltas will be reused if the destination revlog would pick the same
2819 2814 revisions for the delta. This mode strikes a balance between speed
2820 2815 and optimization.
2821 2816 DELTAREUSENEVER
2822 2817 Deltas will never be reused. This is the slowest mode of execution.
2823 2818 This mode can be used to recompute deltas (e.g. if the diff/delta
2824 2819 algorithm changes).
2825 2820 DELTAREUSEFULLADD
2826 2821 Revision will be re-added as if their were new content. This is
2827 2822 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2828 2823 eg: large file detection and handling.
2829 2824
2830 2825 Delta computation can be slow, so the choice of delta reuse policy can
2831 2826 significantly affect run time.
2832 2827
2833 2828 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2834 2829 two extremes. Deltas will be reused if they are appropriate. But if the
2835 2830 delta could choose a better revision, it will do so. This means if you
2836 2831 are converting a non-generaldelta revlog to a generaldelta revlog,
2837 2832 deltas will be recomputed if the delta's parent isn't a parent of the
2838 2833 revision.
2839 2834
2840 2835 In addition to the delta policy, the ``forcedeltabothparents``
2841 2836 argument controls whether to force compute deltas against both parents
2842 2837 for merges. By default, the current default is used.
2843 2838
2844 2839 If not None, the `sidedatacompanion` is callable that accept two
2845 2840 arguments:
2846 2841
2847 2842 (srcrevlog, rev)
2848 2843
2849 2844 and return a quintet that control changes to sidedata content from the
2850 2845 old revision to the new clone result:
2851 2846
2852 2847 (dropall, filterout, update, new_flags, dropped_flags)
2853 2848
2854 2849 * if `dropall` is True, all sidedata should be dropped
2855 2850 * `filterout` is a set of sidedata keys that should be dropped
2856 2851 * `update` is a mapping of additionnal/new key -> value
2857 2852 * new_flags is a bitfields of new flags that the revision should get
2858 2853 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2859 2854 """
2860 2855 if deltareuse not in self.DELTAREUSEALL:
2861 2856 raise ValueError(
2862 2857 _(b'value for deltareuse invalid: %s') % deltareuse
2863 2858 )
2864 2859
2865 2860 if len(destrevlog):
2866 2861 raise ValueError(_(b'destination revlog is not empty'))
2867 2862
2868 2863 if getattr(self, 'filteredrevs', None):
2869 2864 raise ValueError(_(b'source revlog has filtered revisions'))
2870 2865 if getattr(destrevlog, 'filteredrevs', None):
2871 2866 raise ValueError(_(b'destination revlog has filtered revisions'))
2872 2867
2873 2868 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2874 2869 # if possible.
2875 2870 oldlazydelta = destrevlog._lazydelta
2876 2871 oldlazydeltabase = destrevlog._lazydeltabase
2877 2872 oldamd = destrevlog._deltabothparents
2878 2873
2879 2874 try:
2880 2875 if deltareuse == self.DELTAREUSEALWAYS:
2881 2876 destrevlog._lazydeltabase = True
2882 2877 destrevlog._lazydelta = True
2883 2878 elif deltareuse == self.DELTAREUSESAMEREVS:
2884 2879 destrevlog._lazydeltabase = False
2885 2880 destrevlog._lazydelta = True
2886 2881 elif deltareuse == self.DELTAREUSENEVER:
2887 2882 destrevlog._lazydeltabase = False
2888 2883 destrevlog._lazydelta = False
2889 2884
2890 2885 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2891 2886
2892 2887 self._clone(
2893 2888 tr,
2894 2889 destrevlog,
2895 2890 addrevisioncb,
2896 2891 deltareuse,
2897 2892 forcedeltabothparents,
2898 2893 sidedatacompanion,
2899 2894 )
2900 2895
2901 2896 finally:
2902 2897 destrevlog._lazydelta = oldlazydelta
2903 2898 destrevlog._lazydeltabase = oldlazydeltabase
2904 2899 destrevlog._deltabothparents = oldamd
2905 2900
2906 2901 def _clone(
2907 2902 self,
2908 2903 tr,
2909 2904 destrevlog,
2910 2905 addrevisioncb,
2911 2906 deltareuse,
2912 2907 forcedeltabothparents,
2913 2908 sidedatacompanion,
2914 2909 ):
2915 2910 """perform the core duty of `revlog.clone` after parameter processing"""
2916 2911 deltacomputer = deltautil.deltacomputer(destrevlog)
2917 2912 index = self.index
2918 2913 for rev in self:
2919 2914 entry = index[rev]
2920 2915
2921 2916 # Some classes override linkrev to take filtered revs into
2922 2917 # account. Use raw entry from index.
2923 2918 flags = entry[0] & 0xFFFF
2924 2919 linkrev = entry[4]
2925 2920 p1 = index[entry[5]][7]
2926 2921 p2 = index[entry[6]][7]
2927 2922 node = entry[7]
2928 2923
2929 2924 sidedataactions = (False, [], {}, 0, 0)
2930 2925 if sidedatacompanion is not None:
2931 2926 sidedataactions = sidedatacompanion(self, rev)
2932 2927
2933 2928 # (Possibly) reuse the delta from the revlog if allowed and
2934 2929 # the revlog chunk is a delta.
2935 2930 cachedelta = None
2936 2931 rawtext = None
2937 2932 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2938 2933 dropall = sidedataactions[0]
2939 2934 filterout = sidedataactions[1]
2940 2935 update = sidedataactions[2]
2941 2936 new_flags = sidedataactions[3]
2942 2937 dropped_flags = sidedataactions[4]
2943 2938 text, sidedata = self._revisiondata(rev)
2944 2939 if dropall:
2945 2940 sidedata = {}
2946 2941 for key in filterout:
2947 2942 sidedata.pop(key, None)
2948 2943 sidedata.update(update)
2949 2944 if not sidedata:
2950 2945 sidedata = None
2951 2946
2952 2947 flags |= new_flags
2953 2948 flags &= ~dropped_flags
2954 2949
2955 2950 destrevlog.addrevision(
2956 2951 text,
2957 2952 tr,
2958 2953 linkrev,
2959 2954 p1,
2960 2955 p2,
2961 2956 cachedelta=cachedelta,
2962 2957 node=node,
2963 2958 flags=flags,
2964 2959 deltacomputer=deltacomputer,
2965 2960 sidedata=sidedata,
2966 2961 )
2967 2962 else:
2968 2963 if destrevlog._lazydelta:
2969 2964 dp = self.deltaparent(rev)
2970 2965 if dp != nullrev:
2971 2966 cachedelta = (dp, bytes(self._chunk(rev)))
2972 2967
2973 2968 if not cachedelta:
2974 2969 rawtext = self.rawdata(rev)
2975 2970
2976 2971 ifh = destrevlog.opener(
2977 2972 destrevlog.indexfile, b'a+', checkambig=False
2978 2973 )
2979 2974 dfh = None
2980 2975 if not destrevlog._inline:
2981 2976 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2982 2977 try:
2983 2978 destrevlog._addrevision(
2984 2979 node,
2985 2980 rawtext,
2986 2981 tr,
2987 2982 linkrev,
2988 2983 p1,
2989 2984 p2,
2990 2985 flags,
2991 2986 cachedelta,
2992 2987 ifh,
2993 2988 dfh,
2994 2989 deltacomputer=deltacomputer,
2995 2990 )
2996 2991 finally:
2997 2992 if dfh:
2998 2993 dfh.close()
2999 2994 ifh.close()
3000 2995
3001 2996 if addrevisioncb:
3002 2997 addrevisioncb(self, rev, node)
3003 2998
3004 2999 def censorrevision(self, tr, censornode, tombstone=b''):
3005 3000 if (self.version & 0xFFFF) == REVLOGV0:
3006 3001 raise error.RevlogError(
3007 3002 _(b'cannot censor with version %d revlogs') % self.version
3008 3003 )
3009 3004
3010 3005 censorrev = self.rev(censornode)
3011 3006 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3012 3007
3013 3008 if len(tombstone) > self.rawsize(censorrev):
3014 3009 raise error.Abort(
3015 3010 _(b'censor tombstone must be no longer than censored data')
3016 3011 )
3017 3012
3018 3013 # Rewriting the revlog in place is hard. Our strategy for censoring is
3019 3014 # to create a new revlog, copy all revisions to it, then replace the
3020 3015 # revlogs on transaction close.
3021 3016
3022 3017 newindexfile = self.indexfile + b'.tmpcensored'
3023 3018 newdatafile = self.datafile + b'.tmpcensored'
3024 3019
3025 3020 # This is a bit dangerous. We could easily have a mismatch of state.
3026 3021 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3027 3022 newrl.version = self.version
3028 3023 newrl._generaldelta = self._generaldelta
3029 3024 newrl._io = self._io
3030 3025
3031 3026 for rev in self.revs():
3032 3027 node = self.node(rev)
3033 3028 p1, p2 = self.parents(node)
3034 3029
3035 3030 if rev == censorrev:
3036 3031 newrl.addrawrevision(
3037 3032 tombstone,
3038 3033 tr,
3039 3034 self.linkrev(censorrev),
3040 3035 p1,
3041 3036 p2,
3042 3037 censornode,
3043 3038 REVIDX_ISCENSORED,
3044 3039 )
3045 3040
3046 3041 if newrl.deltaparent(rev) != nullrev:
3047 3042 raise error.Abort(
3048 3043 _(
3049 3044 b'censored revision stored as delta; '
3050 3045 b'cannot censor'
3051 3046 ),
3052 3047 hint=_(
3053 3048 b'censoring of revlogs is not '
3054 3049 b'fully implemented; please report '
3055 3050 b'this bug'
3056 3051 ),
3057 3052 )
3058 3053 continue
3059 3054
3060 3055 if self.iscensored(rev):
3061 3056 if self.deltaparent(rev) != nullrev:
3062 3057 raise error.Abort(
3063 3058 _(
3064 3059 b'cannot censor due to censored '
3065 3060 b'revision having delta stored'
3066 3061 )
3067 3062 )
3068 3063 rawtext = self._chunk(rev)
3069 3064 else:
3070 3065 rawtext = self.rawdata(rev)
3071 3066
3072 3067 newrl.addrawrevision(
3073 3068 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3074 3069 )
3075 3070
3076 3071 tr.addbackup(self.indexfile, location=b'store')
3077 3072 if not self._inline:
3078 3073 tr.addbackup(self.datafile, location=b'store')
3079 3074
3080 3075 self.opener.rename(newrl.indexfile, self.indexfile)
3081 3076 if not self._inline:
3082 3077 self.opener.rename(newrl.datafile, self.datafile)
3083 3078
3084 3079 self.clearcaches()
3085 3080 self._loadindex()
3086 3081
3087 3082 def verifyintegrity(self, state):
3088 3083 """Verifies the integrity of the revlog.
3089 3084
3090 3085 Yields ``revlogproblem`` instances describing problems that are
3091 3086 found.
3092 3087 """
3093 3088 dd, di = self.checksize()
3094 3089 if dd:
3095 3090 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3096 3091 if di:
3097 3092 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3098 3093
3099 3094 version = self.version & 0xFFFF
3100 3095
3101 3096 # The verifier tells us what version revlog we should be.
3102 3097 if version != state[b'expectedversion']:
3103 3098 yield revlogproblem(
3104 3099 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3105 3100 % (self.indexfile, version, state[b'expectedversion'])
3106 3101 )
3107 3102
3108 3103 state[b'skipread'] = set()
3109 3104 state[b'safe_renamed'] = set()
3110 3105
3111 3106 for rev in self:
3112 3107 node = self.node(rev)
3113 3108
3114 3109 # Verify contents. 4 cases to care about:
3115 3110 #
3116 3111 # common: the most common case
3117 3112 # rename: with a rename
3118 3113 # meta: file content starts with b'\1\n', the metadata
3119 3114 # header defined in filelog.py, but without a rename
3120 3115 # ext: content stored externally
3121 3116 #
3122 3117 # More formally, their differences are shown below:
3123 3118 #
3124 3119 # | common | rename | meta | ext
3125 3120 # -------------------------------------------------------
3126 3121 # flags() | 0 | 0 | 0 | not 0
3127 3122 # renamed() | False | True | False | ?
3128 3123 # rawtext[0:2]=='\1\n'| False | True | True | ?
3129 3124 #
3130 3125 # "rawtext" means the raw text stored in revlog data, which
3131 3126 # could be retrieved by "rawdata(rev)". "text"
3132 3127 # mentioned below is "revision(rev)".
3133 3128 #
3134 3129 # There are 3 different lengths stored physically:
3135 3130 # 1. L1: rawsize, stored in revlog index
3136 3131 # 2. L2: len(rawtext), stored in revlog data
3137 3132 # 3. L3: len(text), stored in revlog data if flags==0, or
3138 3133 # possibly somewhere else if flags!=0
3139 3134 #
3140 3135 # L1 should be equal to L2. L3 could be different from them.
3141 3136 # "text" may or may not affect commit hash depending on flag
3142 3137 # processors (see flagutil.addflagprocessor).
3143 3138 #
3144 3139 # | common | rename | meta | ext
3145 3140 # -------------------------------------------------
3146 3141 # rawsize() | L1 | L1 | L1 | L1
3147 3142 # size() | L1 | L2-LM | L1(*) | L1 (?)
3148 3143 # len(rawtext) | L2 | L2 | L2 | L2
3149 3144 # len(text) | L2 | L2 | L2 | L3
3150 3145 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3151 3146 #
3152 3147 # LM: length of metadata, depending on rawtext
3153 3148 # (*): not ideal, see comment in filelog.size
3154 3149 # (?): could be "- len(meta)" if the resolved content has
3155 3150 # rename metadata
3156 3151 #
3157 3152 # Checks needed to be done:
3158 3153 # 1. length check: L1 == L2, in all cases.
3159 3154 # 2. hash check: depending on flag processor, we may need to
3160 3155 # use either "text" (external), or "rawtext" (in revlog).
3161 3156
3162 3157 try:
3163 3158 skipflags = state.get(b'skipflags', 0)
3164 3159 if skipflags:
3165 3160 skipflags &= self.flags(rev)
3166 3161
3167 3162 _verify_revision(self, skipflags, state, node)
3168 3163
3169 3164 l1 = self.rawsize(rev)
3170 3165 l2 = len(self.rawdata(node))
3171 3166
3172 3167 if l1 != l2:
3173 3168 yield revlogproblem(
3174 3169 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3175 3170 node=node,
3176 3171 )
3177 3172
3178 3173 except error.CensoredNodeError:
3179 3174 if state[b'erroroncensored']:
3180 3175 yield revlogproblem(
3181 3176 error=_(b'censored file data'), node=node
3182 3177 )
3183 3178 state[b'skipread'].add(node)
3184 3179 except Exception as e:
3185 3180 yield revlogproblem(
3186 3181 error=_(b'unpacking %s: %s')
3187 3182 % (short(node), stringutil.forcebytestr(e)),
3188 3183 node=node,
3189 3184 )
3190 3185 state[b'skipread'].add(node)
3191 3186
3192 3187 def storageinfo(
3193 3188 self,
3194 3189 exclusivefiles=False,
3195 3190 sharedfiles=False,
3196 3191 revisionscount=False,
3197 3192 trackedsize=False,
3198 3193 storedsize=False,
3199 3194 ):
3200 3195 d = {}
3201 3196
3202 3197 if exclusivefiles:
3203 3198 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3204 3199 if not self._inline:
3205 3200 d[b'exclusivefiles'].append((self.opener, self.datafile))
3206 3201
3207 3202 if sharedfiles:
3208 3203 d[b'sharedfiles'] = []
3209 3204
3210 3205 if revisionscount:
3211 3206 d[b'revisionscount'] = len(self)
3212 3207
3213 3208 if trackedsize:
3214 3209 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3215 3210
3216 3211 if storedsize:
3217 3212 d[b'storedsize'] = sum(
3218 3213 self.opener.stat(path).st_size for path in self.files()
3219 3214 )
3220 3215
3221 3216 return d
3222 3217
3223 3218 def rewrite_sidedata(self, helpers, startrev, endrev):
3224 3219 if self.version & 0xFFFF != REVLOGV2:
3225 3220 return
3226 3221 # inline are not yet supported because they suffer from an issue when
3227 3222 # rewriting them (since it's not an append-only operation).
3228 3223 # See issue6485.
3229 3224 assert not self._inline
3230 3225 if not helpers[1] and not helpers[2]:
3231 3226 # Nothing to generate or remove
3232 3227 return
3233 3228
3234 3229 new_entries = []
3235 3230 # append the new sidedata
3236 3231 with self._datafp(b'a+') as fp:
3237 3232 # Maybe this bug still exists, see revlog._writeentry
3238 3233 fp.seek(0, os.SEEK_END)
3239 3234 current_offset = fp.tell()
3240 3235 for rev in range(startrev, endrev + 1):
3241 3236 entry = self.index[rev]
3242 3237 new_sidedata = storageutil.run_sidedata_helpers(
3243 3238 store=self,
3244 3239 sidedata_helpers=helpers,
3245 3240 sidedata={},
3246 3241 rev=rev,
3247 3242 )
3248 3243
3249 3244 serialized_sidedata = sidedatautil.serialize_sidedata(
3250 3245 new_sidedata
3251 3246 )
3252 3247 if entry[8] != 0 or entry[9] != 0:
3253 3248 # rewriting entries that already have sidedata is not
3254 3249 # supported yet, because it introduces garbage data in the
3255 3250 # revlog.
3256 3251 msg = b"Rewriting existing sidedata is not supported yet"
3257 3252 raise error.Abort(msg)
3258 3253 entry = entry[:8]
3259 3254 entry += (current_offset, len(serialized_sidedata))
3260 3255
3261 3256 fp.write(serialized_sidedata)
3262 3257 new_entries.append(entry)
3263 3258 current_offset += len(serialized_sidedata)
3264 3259
3265 3260 # rewrite the new index entries
3266 3261 with self._indexfp(b'w+') as fp:
3267 3262 fp.seek(startrev * self._io.size)
3268 3263 for i, entry in enumerate(new_entries):
3269 3264 rev = startrev + i
3270 3265 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3271 3266 packed = self._io.packentry(entry, self.node, self.version, rev)
3272 3267 fp.write(packed)
@@ -1,65 +1,77 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 import struct
13
12 14 from ..interfaces import repository
13 15
14 16 ### main revlog header
15 17
16 18 ## revlog version
17 19 REVLOGV0 = 0
18 20 REVLOGV1 = 1
19 21 # Dummy value until file format is finalized.
20 22 REVLOGV2 = 0xDEAD
21 23
22 24 ## global revlog header flags
23 25 # Shared across v1 and v2.
24 26 FLAG_INLINE_DATA = 1 << 16
25 27 # Only used by v1, implied by v2.
26 28 FLAG_GENERALDELTA = 1 << 17
27 29 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
28 30 REVLOG_DEFAULT_FORMAT = REVLOGV1
29 31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 32 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
31 33 REVLOGV2_FLAGS = FLAG_INLINE_DATA
32 34
33 35 ### individual entry
34 36
37 ## index v0:
38 # 4 bytes: offset
39 # 4 bytes: compressed length
40 # 4 bytes: base rev
41 # 4 bytes: link rev
42 # 20 bytes: parent 1 nodeid
43 # 20 bytes: parent 2 nodeid
44 # 20 bytes: nodeid
45 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
46
35 47 # revlog index flags
36 48
37 49 # For historical reasons, revlog's internal flags were exposed via the
38 50 # wire protocol and are even exposed in parts of the storage APIs.
39 51
40 52 # revision has censor metadata, must be verified
41 53 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
42 54 # revision hash does not match data (narrowhg)
43 55 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
44 56 # revision data is stored externally
45 57 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
46 58 # revision data contains extra metadata not part of the official digest
47 59 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
48 60 # revision changes files in a way that could affect copy tracing.
49 61 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
50 62 REVIDX_DEFAULT_FLAGS = 0
51 63 # stable order in which flags need to be processed and their processors applied
52 64 REVIDX_FLAGS_ORDER = [
53 65 REVIDX_ISCENSORED,
54 66 REVIDX_ELLIPSIS,
55 67 REVIDX_EXTSTORED,
56 68 REVIDX_SIDEDATA,
57 69 REVIDX_HASCOPIESINFO,
58 70 ]
59 71
60 72 # bitmark for flags that could cause rawdata content change
61 73 REVIDX_RAWTEXT_CHANGING_FLAGS = (
62 74 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
63 75 )
64 76
65 77 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now