##// END OF EJS Templates
revlog: implement a "default compression" mode...
marmoute -
r48029:ff9fd710 default
parent child Browse files
Show More
@@ -1,3364 +1,3384 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 COMP_MODE_DEFAULT,
38 39 COMP_MODE_INLINE,
39 40 COMP_MODE_PLAIN,
40 41 FEATURES_BY_VERSION,
41 42 FLAG_GENERALDELTA,
42 43 FLAG_INLINE_DATA,
43 44 INDEX_HEADER,
44 45 REVLOGV0,
45 46 REVLOGV1,
46 47 REVLOGV1_FLAGS,
47 48 REVLOGV2,
48 49 REVLOGV2_FLAGS,
49 50 REVLOG_DEFAULT_FLAGS,
50 51 REVLOG_DEFAULT_FORMAT,
51 52 REVLOG_DEFAULT_VERSION,
52 53 SUPPORTED_FLAGS,
53 54 )
54 55 from .revlogutils.flagutil import (
55 56 REVIDX_DEFAULT_FLAGS,
56 57 REVIDX_ELLIPSIS,
57 58 REVIDX_EXTSTORED,
58 59 REVIDX_FLAGS_ORDER,
59 60 REVIDX_HASCOPIESINFO,
60 61 REVIDX_ISCENSORED,
61 62 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 63 )
63 64 from .thirdparty import attr
64 65 from . import (
65 66 ancestor,
66 67 dagop,
67 68 error,
68 69 mdiff,
69 70 policy,
70 71 pycompat,
71 72 templatefilters,
72 73 util,
73 74 )
74 75 from .interfaces import (
75 76 repository,
76 77 util as interfaceutil,
77 78 )
78 79 from .revlogutils import (
79 80 deltas as deltautil,
80 81 docket as docketutil,
81 82 flagutil,
82 83 nodemap as nodemaputil,
83 84 revlogv0,
84 85 sidedata as sidedatautil,
85 86 )
86 87 from .utils import (
87 88 storageutil,
88 89 stringutil,
89 90 )
90 91
91 92 # blanked usage of all the name to prevent pyflakes constraints
92 93 # We need these name available in the module for extensions.
93 94
94 95 REVLOGV0
95 96 REVLOGV1
96 97 REVLOGV2
97 98 FLAG_INLINE_DATA
98 99 FLAG_GENERALDELTA
99 100 REVLOG_DEFAULT_FLAGS
100 101 REVLOG_DEFAULT_FORMAT
101 102 REVLOG_DEFAULT_VERSION
102 103 REVLOGV1_FLAGS
103 104 REVLOGV2_FLAGS
104 105 REVIDX_ISCENSORED
105 106 REVIDX_ELLIPSIS
106 107 REVIDX_HASCOPIESINFO
107 108 REVIDX_EXTSTORED
108 109 REVIDX_DEFAULT_FLAGS
109 110 REVIDX_FLAGS_ORDER
110 111 REVIDX_RAWTEXT_CHANGING_FLAGS
111 112
112 113 parsers = policy.importmod('parsers')
113 114 rustancestor = policy.importrust('ancestor')
114 115 rustdagop = policy.importrust('dagop')
115 116 rustrevlog = policy.importrust('revlog')
116 117
117 118 # Aliased for performance.
118 119 _zlibdecompress = zlib.decompress
119 120
120 121 # max size of revlog with inline data
121 122 _maxinline = 131072
122 123 _chunksize = 1048576
123 124
124 125 # Flag processors for REVIDX_ELLIPSIS.
125 126 def ellipsisreadprocessor(rl, text):
126 127 return text, False
127 128
128 129
129 130 def ellipsiswriteprocessor(rl, text):
130 131 return text, False
131 132
132 133
133 134 def ellipsisrawprocessor(rl, text):
134 135 return False
135 136
136 137
137 138 ellipsisprocessor = (
138 139 ellipsisreadprocessor,
139 140 ellipsiswriteprocessor,
140 141 ellipsisrawprocessor,
141 142 )
142 143
143 144
144 145 def offset_type(offset, type):
145 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 147 raise ValueError(b'unknown revlog index flags')
147 148 return int(int(offset) << 16 | type)
148 149
149 150
150 151 def _verify_revision(rl, skipflags, state, node):
151 152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 153 point for extensions to influence the operation."""
153 154 if skipflags:
154 155 state[b'skipread'].add(node)
155 156 else:
156 157 # Side-effect: read content and verify hash.
157 158 rl.revision(node)
158 159
159 160
160 161 # True if a fast implementation for persistent-nodemap is available
161 162 #
162 163 # We also consider we have a "fast" implementation in "pure" python because
163 164 # people using pure don't really have performance consideration (and a
164 165 # wheelbarrow of other slowness source)
165 166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 167 parsers, 'BaseIndexObject'
167 168 )
168 169
169 170
170 171 @attr.s(slots=True, frozen=True)
171 172 class _revisioninfo(object):
172 173 """Information about a revision that allows building its fulltext
173 174 node: expected hash of the revision
174 175 p1, p2: parent revs of the revision
175 176 btext: built text cache consisting of a one-element list
176 177 cachedelta: (baserev, uncompressed_delta) or None
177 178 flags: flags associated to the revision storage
178 179
179 180 One of btext[0] or cachedelta must be set.
180 181 """
181 182
182 183 node = attr.ib()
183 184 p1 = attr.ib()
184 185 p2 = attr.ib()
185 186 btext = attr.ib()
186 187 textlen = attr.ib()
187 188 cachedelta = attr.ib()
188 189 flags = attr.ib()
189 190
190 191
191 192 @interfaceutil.implementer(repository.irevisiondelta)
192 193 @attr.s(slots=True)
193 194 class revlogrevisiondelta(object):
194 195 node = attr.ib()
195 196 p1node = attr.ib()
196 197 p2node = attr.ib()
197 198 basenode = attr.ib()
198 199 flags = attr.ib()
199 200 baserevisionsize = attr.ib()
200 201 revision = attr.ib()
201 202 delta = attr.ib()
202 203 sidedata = attr.ib()
203 204 protocol_flags = attr.ib()
204 205 linknode = attr.ib(default=None)
205 206
206 207
207 208 @interfaceutil.implementer(repository.iverifyproblem)
208 209 @attr.s(frozen=True)
209 210 class revlogproblem(object):
210 211 warning = attr.ib(default=None)
211 212 error = attr.ib(default=None)
212 213 node = attr.ib(default=None)
213 214
214 215
215 216 def parse_index_v1(data, inline):
216 217 # call the C implementation to parse the index data
217 218 index, cache = parsers.parse_index2(data, inline)
218 219 return index, cache
219 220
220 221
221 222 def parse_index_v2(data, inline):
222 223 # call the C implementation to parse the index data
223 224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 225 return index, cache
225 226
226 227
227 228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
228 229
229 230 def parse_index_v1_nodemap(data, inline):
230 231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
231 232 return index, cache
232 233
233 234
234 235 else:
235 236 parse_index_v1_nodemap = None
236 237
237 238
238 239 def parse_index_v1_mixed(data, inline):
239 240 index, cache = parse_index_v1(data, inline)
240 241 return rustrevlog.MixedIndex(index), cache
241 242
242 243
243 244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 245 # signed integer)
245 246 _maxentrysize = 0x7FFFFFFF
246 247
247 248
248 249 class revlog(object):
249 250 """
250 251 the underlying revision storage object
251 252
252 253 A revlog consists of two parts, an index and the revision data.
253 254
254 255 The index is a file with a fixed record size containing
255 256 information on each revision, including its nodeid (hash), the
256 257 nodeids of its parents, the position and offset of its data within
257 258 the data file, and the revision it's based on. Finally, each entry
258 259 contains a linkrev entry that can serve as a pointer to external
259 260 data.
260 261
261 262 The revision data itself is a linear collection of data chunks.
262 263 Each chunk represents a revision and is usually represented as a
263 264 delta against the previous chunk. To bound lookup time, runs of
264 265 deltas are limited to about 2 times the length of the original
265 266 version data. This makes retrieval of a version proportional to
266 267 its size, or O(1) relative to the number of revisions.
267 268
268 269 Both pieces of the revlog are written to in an append-only
269 270 fashion, which means we never need to rewrite a file to insert or
270 271 remove data, and can use some simple techniques to avoid the need
271 272 for locking while reading.
272 273
273 274 If checkambig, indexfile is opened with checkambig=True at
274 275 writing, to avoid file stat ambiguity.
275 276
276 277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
277 278 index will be mmapped rather than read if it is larger than the
278 279 configured threshold.
279 280
280 281 If censorable is True, the revlog can have censored revisions.
281 282
282 283 If `upperboundcomp` is not None, this is the expected maximal gain from
283 284 compression for the data content.
284 285
285 286 `concurrencychecker` is an optional function that receives 3 arguments: a
286 287 file handle, a filename, and an expected position. It should check whether
287 288 the current position in the file handle is valid, and log/warn/fail (by
288 289 raising).
289 290
290 291
291 292 Internal details
292 293 ----------------
293 294
294 295 A large part of the revlog logic deals with revisions' "index entries", tuple
295 296 objects that contains the same "items" whatever the revlog version.
296 297 Different versions will have different ways of storing these items (sometimes
297 298 not having them at all), but the tuple will always be the same. New fields
298 299 are usually added at the end to avoid breaking existing code that relies
299 300 on the existing order. The field are defined as follows:
300 301
301 302 [0] offset:
302 303 The byte index of the start of revision data chunk.
303 304 That value is shifted up by 16 bits. use "offset = field >> 16" to
304 305 retrieve it.
305 306
306 307 flags:
307 308 A flag field that carries special information or changes the behavior
308 309 of the revision. (see `REVIDX_*` constants for details)
309 310 The flag field only occupies the first 16 bits of this field,
310 311 use "flags = field & 0xFFFF" to retrieve the value.
311 312
312 313 [1] compressed length:
313 314 The size, in bytes, of the chunk on disk
314 315
315 316 [2] uncompressed length:
316 317 The size, in bytes, of the full revision once reconstructed.
317 318
318 319 [3] base rev:
319 320 Either the base of the revision delta chain (without general
320 321 delta), or the base of the delta (stored in the data chunk)
321 322 with general delta.
322 323
323 324 [4] link rev:
324 325 Changelog revision number of the changeset introducing this
325 326 revision.
326 327
327 328 [5] parent 1 rev:
328 329 Revision number of the first parent
329 330
330 331 [6] parent 2 rev:
331 332 Revision number of the second parent
332 333
333 334 [7] node id:
334 335 The node id of the current revision
335 336
336 337 [8] sidedata offset:
337 338 The byte index of the start of the revision's side-data chunk.
338 339
339 340 [9] sidedata chunk length:
340 341 The size, in bytes, of the revision's side-data chunk.
341 342
342 343 [10] data compression mode:
343 344 two bits that detail the way the data chunk is compressed on disk.
344 345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
345 346 1 this will always be COMP_MODE_INLINE.
346 347
347 348 """
348 349
349 350 _flagserrorclass = error.RevlogError
350 351
351 352 def __init__(
352 353 self,
353 354 opener,
354 355 target,
355 356 radix,
356 357 postfix=None, # only exist for `tmpcensored` now
357 358 checkambig=False,
358 359 mmaplargeindex=False,
359 360 censorable=False,
360 361 upperboundcomp=None,
361 362 persistentnodemap=False,
362 363 concurrencychecker=None,
363 364 trypending=False,
364 365 ):
365 366 """
366 367 create a revlog object
367 368
368 369 opener is a function that abstracts the file opening operation
369 370 and can be used to implement COW semantics or the like.
370 371
371 372 `target`: a (KIND, ID) tuple that identify the content stored in
372 373 this revlog. It help the rest of the code to understand what the revlog
373 374 is about without having to resort to heuristic and index filename
374 375 analysis. Note: that this must be reliably be set by normal code, but
375 376 that test, debug, or performance measurement code might not set this to
376 377 accurate value.
377 378 """
378 379 self.upperboundcomp = upperboundcomp
379 380
380 381 self.radix = radix
381 382
382 383 self._docket_file = None
383 384 self._indexfile = None
384 385 self._datafile = None
385 386 self._nodemap_file = None
386 387 self.postfix = postfix
387 388 self._trypending = trypending
388 389 self.opener = opener
389 390 if persistentnodemap:
390 391 self._nodemap_file = nodemaputil.get_nodemap_file(self)
391 392
392 393 assert target[0] in ALL_KINDS
393 394 assert len(target) == 2
394 395 self.target = target
395 396 # When True, indexfile is opened with checkambig=True at writing, to
396 397 # avoid file stat ambiguity.
397 398 self._checkambig = checkambig
398 399 self._mmaplargeindex = mmaplargeindex
399 400 self._censorable = censorable
400 401 # 3-tuple of (node, rev, text) for a raw revision.
401 402 self._revisioncache = None
402 403 # Maps rev to chain base rev.
403 404 self._chainbasecache = util.lrucachedict(100)
404 405 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
405 406 self._chunkcache = (0, b'')
406 407 # How much data to read and cache into the raw revlog data cache.
407 408 self._chunkcachesize = 65536
408 409 self._maxchainlen = None
409 410 self._deltabothparents = True
410 411 self.index = None
411 412 self._docket = None
412 413 self._nodemap_docket = None
413 414 # Mapping of partial identifiers to full nodes.
414 415 self._pcache = {}
415 416 # Mapping of revision integer to full node.
416 417 self._compengine = b'zlib'
417 418 self._compengineopts = {}
418 419 self._maxdeltachainspan = -1
419 420 self._withsparseread = False
420 421 self._sparserevlog = False
421 422 self.hassidedata = False
422 423 self._srdensitythreshold = 0.50
423 424 self._srmingapsize = 262144
424 425
425 426 # Make copy of flag processors so each revlog instance can support
426 427 # custom flags.
427 428 self._flagprocessors = dict(flagutil.flagprocessors)
428 429
429 430 # 2-tuple of file handles being used for active writing.
430 431 self._writinghandles = None
431 432 # prevent nesting of addgroup
432 433 self._adding_group = None
433 434
434 435 self._loadindex()
435 436
436 437 self._concurrencychecker = concurrencychecker
437 438
438 439 def _init_opts(self):
439 440 """process options (from above/config) to setup associated default revlog mode
440 441
441 442 These values might be affected when actually reading on disk information.
442 443
443 444 The relevant values are returned for use in _loadindex().
444 445
445 446 * newversionflags:
446 447 version header to use if we need to create a new revlog
447 448
448 449 * mmapindexthreshold:
449 450 minimal index size for start to use mmap
450 451
451 452 * force_nodemap:
452 453 force the usage of a "development" version of the nodemap code
453 454 """
454 455 mmapindexthreshold = None
455 456 opts = self.opener.options
456 457
457 458 if b'revlogv2' in opts:
458 459 new_header = REVLOGV2 | FLAG_INLINE_DATA
459 460 elif b'revlogv1' in opts:
460 461 new_header = REVLOGV1 | FLAG_INLINE_DATA
461 462 if b'generaldelta' in opts:
462 463 new_header |= FLAG_GENERALDELTA
463 464 elif b'revlogv0' in self.opener.options:
464 465 new_header = REVLOGV0
465 466 else:
466 467 new_header = REVLOG_DEFAULT_VERSION
467 468
468 469 if b'chunkcachesize' in opts:
469 470 self._chunkcachesize = opts[b'chunkcachesize']
470 471 if b'maxchainlen' in opts:
471 472 self._maxchainlen = opts[b'maxchainlen']
472 473 if b'deltabothparents' in opts:
473 474 self._deltabothparents = opts[b'deltabothparents']
474 475 self._lazydelta = bool(opts.get(b'lazydelta', True))
475 476 self._lazydeltabase = False
476 477 if self._lazydelta:
477 478 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
478 479 if b'compengine' in opts:
479 480 self._compengine = opts[b'compengine']
480 481 if b'zlib.level' in opts:
481 482 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
482 483 if b'zstd.level' in opts:
483 484 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
484 485 if b'maxdeltachainspan' in opts:
485 486 self._maxdeltachainspan = opts[b'maxdeltachainspan']
486 487 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
487 488 mmapindexthreshold = opts[b'mmapindexthreshold']
488 489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
489 490 withsparseread = bool(opts.get(b'with-sparse-read', False))
490 491 # sparse-revlog forces sparse-read
491 492 self._withsparseread = self._sparserevlog or withsparseread
492 493 if b'sparse-read-density-threshold' in opts:
493 494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
494 495 if b'sparse-read-min-gap-size' in opts:
495 496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
496 497 if opts.get(b'enableellipsis'):
497 498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
498 499
499 500 # revlog v0 doesn't have flag processors
500 501 for flag, processor in pycompat.iteritems(
501 502 opts.get(b'flagprocessors', {})
502 503 ):
503 504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
504 505
505 506 if self._chunkcachesize <= 0:
506 507 raise error.RevlogError(
507 508 _(b'revlog chunk cache size %r is not greater than 0')
508 509 % self._chunkcachesize
509 510 )
510 511 elif self._chunkcachesize & (self._chunkcachesize - 1):
511 512 raise error.RevlogError(
512 513 _(b'revlog chunk cache size %r is not a power of 2')
513 514 % self._chunkcachesize
514 515 )
515 516 force_nodemap = opts.get(b'devel-force-nodemap', False)
516 517 return new_header, mmapindexthreshold, force_nodemap
517 518
518 519 def _get_data(self, filepath, mmap_threshold, size=None):
519 520 """return a file content with or without mmap
520 521
521 522 If the file is missing return the empty string"""
522 523 try:
523 524 with self.opener(filepath) as fp:
524 525 if mmap_threshold is not None:
525 526 file_size = self.opener.fstat(fp).st_size
526 527 if file_size >= mmap_threshold:
527 528 if size is not None:
528 529 # avoid potentiel mmap crash
529 530 size = min(file_size, size)
530 531 # TODO: should .close() to release resources without
531 532 # relying on Python GC
532 533 if size is None:
533 534 return util.buffer(util.mmapread(fp))
534 535 else:
535 536 return util.buffer(util.mmapread(fp, size))
536 537 if size is None:
537 538 return fp.read()
538 539 else:
539 540 return fp.read(size)
540 541 except IOError as inst:
541 542 if inst.errno != errno.ENOENT:
542 543 raise
543 544 return b''
544 545
545 546 def _loadindex(self):
546 547
547 548 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
548 549
549 550 if self.postfix is not None:
550 551 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
551 552 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
552 553 entry_point = b'%s.i.a' % self.radix
553 554 else:
554 555 entry_point = b'%s.i' % self.radix
555 556
556 557 entry_data = b''
557 558 self._initempty = True
558 559 entry_data = self._get_data(entry_point, mmapindexthreshold)
559 560 if len(entry_data) > 0:
560 561 header = INDEX_HEADER.unpack(entry_data[:4])[0]
561 562 self._initempty = False
562 563 else:
563 564 header = new_header
564 565
565 566 self._format_flags = header & ~0xFFFF
566 567 self._format_version = header & 0xFFFF
567 568
568 569 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
569 570 if supported_flags is None:
570 571 msg = _(b'unknown version (%d) in revlog %s')
571 572 msg %= (self._format_version, self.display_id)
572 573 raise error.RevlogError(msg)
573 574 elif self._format_flags & ~supported_flags:
574 575 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
575 576 display_flag = self._format_flags >> 16
576 577 msg %= (display_flag, self._format_version, self.display_id)
577 578 raise error.RevlogError(msg)
578 579
579 580 features = FEATURES_BY_VERSION[self._format_version]
580 581 self._inline = features[b'inline'](self._format_flags)
581 582 self._generaldelta = features[b'generaldelta'](self._format_flags)
582 583 self.hassidedata = features[b'sidedata']
583 584
584 585 if not features[b'docket']:
585 586 self._indexfile = entry_point
586 587 index_data = entry_data
587 588 else:
588 589 self._docket_file = entry_point
589 590 if self._initempty:
590 591 self._docket = docketutil.default_docket(self, header)
591 592 else:
592 593 self._docket = docketutil.parse_docket(
593 594 self, entry_data, use_pending=self._trypending
594 595 )
595 596 self._indexfile = self._docket.index_filepath()
596 597 index_data = b''
597 598 index_size = self._docket.index_end
598 599 if index_size > 0:
599 600 index_data = self._get_data(
600 601 self._indexfile, mmapindexthreshold, size=index_size
601 602 )
602 603 if len(index_data) < index_size:
603 604 msg = _(b'too few index data for %s: got %d, expected %d')
604 605 msg %= (self.display_id, len(index_data), index_size)
605 606 raise error.RevlogError(msg)
606 607
607 608 self._inline = False
608 609 # generaldelta implied by version 2 revlogs.
609 610 self._generaldelta = True
610 611 # the logic for persistent nodemap will be dealt with within the
611 612 # main docket, so disable it for now.
612 613 self._nodemap_file = None
613 614
614 615 if self.postfix is None:
615 616 self._datafile = b'%s.d' % self.radix
616 617 else:
617 618 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
618 619
619 620 self.nodeconstants = sha1nodeconstants
620 621 self.nullid = self.nodeconstants.nullid
621 622
622 623 # sparse-revlog can't be on without general-delta (issue6056)
623 624 if not self._generaldelta:
624 625 self._sparserevlog = False
625 626
626 627 self._storedeltachains = True
627 628
628 629 devel_nodemap = (
629 630 self._nodemap_file
630 631 and force_nodemap
631 632 and parse_index_v1_nodemap is not None
632 633 )
633 634
634 635 use_rust_index = False
635 636 if rustrevlog is not None:
636 637 if self._nodemap_file is not None:
637 638 use_rust_index = True
638 639 else:
639 640 use_rust_index = self.opener.options.get(b'rust.index')
640 641
641 642 self._parse_index = parse_index_v1
642 643 if self._format_version == REVLOGV0:
643 644 self._parse_index = revlogv0.parse_index_v0
644 645 elif self._format_version == REVLOGV2:
645 646 self._parse_index = parse_index_v2
646 647 elif devel_nodemap:
647 648 self._parse_index = parse_index_v1_nodemap
648 649 elif use_rust_index:
649 650 self._parse_index = parse_index_v1_mixed
650 651 try:
651 652 d = self._parse_index(index_data, self._inline)
652 653 index, _chunkcache = d
653 654 use_nodemap = (
654 655 not self._inline
655 656 and self._nodemap_file is not None
656 657 and util.safehasattr(index, 'update_nodemap_data')
657 658 )
658 659 if use_nodemap:
659 660 nodemap_data = nodemaputil.persisted_data(self)
660 661 if nodemap_data is not None:
661 662 docket = nodemap_data[0]
662 663 if (
663 664 len(d[0]) > docket.tip_rev
664 665 and d[0][docket.tip_rev][7] == docket.tip_node
665 666 ):
666 667 # no changelog tampering
667 668 self._nodemap_docket = docket
668 669 index.update_nodemap_data(*nodemap_data)
669 670 except (ValueError, IndexError):
670 671 raise error.RevlogError(
671 672 _(b"index %s is corrupted") % self.display_id
672 673 )
673 674 self.index, self._chunkcache = d
674 675 if not self._chunkcache:
675 676 self._chunkclear()
676 677 # revnum -> (chain-length, sum-delta-length)
677 678 self._chaininfocache = util.lrucachedict(500)
678 679 # revlog header -> revlog compressor
679 680 self._decompressors = {}
680 681
681 682 @util.propertycache
682 683 def revlog_kind(self):
683 684 return self.target[0]
684 685
685 686 @util.propertycache
686 687 def display_id(self):
687 688 """The public facing "ID" of the revlog that we use in message"""
688 689 # Maybe we should build a user facing representation of
689 690 # revlog.target instead of using `self.radix`
690 691 return self.radix
691 692
692 693 def _get_decompressor(self, t):
693 694 try:
694 695 compressor = self._decompressors[t]
695 696 except KeyError:
696 697 try:
697 698 engine = util.compengines.forrevlogheader(t)
698 699 compressor = engine.revlogcompressor(self._compengineopts)
699 700 self._decompressors[t] = compressor
700 701 except KeyError:
701 702 raise error.RevlogError(
702 703 _(b'unknown compression type %s') % binascii.hexlify(t)
703 704 )
704 705 return compressor
705 706
706 707 @util.propertycache
707 708 def _compressor(self):
708 709 engine = util.compengines[self._compengine]
709 710 return engine.revlogcompressor(self._compengineopts)
710 711
712 @util.propertycache
713 def _decompressor(self):
714 """the default decompressor"""
715 if self._docket is None:
716 return None
717 t = self._docket.default_compression_header
718 c = self._get_decompressor(t)
719 return c.decompress
720
711 721 def _indexfp(self):
712 722 """file object for the revlog's index file"""
713 723 return self.opener(self._indexfile, mode=b"r")
714 724
715 725 def __index_write_fp(self):
716 726 # You should not use this directly and use `_writing` instead
717 727 try:
718 728 f = self.opener(
719 729 self._indexfile, mode=b"r+", checkambig=self._checkambig
720 730 )
721 731 if self._docket is None:
722 732 f.seek(0, os.SEEK_END)
723 733 else:
724 734 f.seek(self._docket.index_end, os.SEEK_SET)
725 735 return f
726 736 except IOError as inst:
727 737 if inst.errno != errno.ENOENT:
728 738 raise
729 739 return self.opener(
730 740 self._indexfile, mode=b"w+", checkambig=self._checkambig
731 741 )
732 742
733 743 def __index_new_fp(self):
734 744 # You should not use this unless you are upgrading from inline revlog
735 745 return self.opener(
736 746 self._indexfile,
737 747 mode=b"w",
738 748 checkambig=self._checkambig,
739 749 atomictemp=True,
740 750 )
741 751
742 752 def _datafp(self, mode=b'r'):
743 753 """file object for the revlog's data file"""
744 754 return self.opener(self._datafile, mode=mode)
745 755
746 756 @contextlib.contextmanager
747 757 def _datareadfp(self, existingfp=None):
748 758 """file object suitable to read data"""
749 759 # Use explicit file handle, if given.
750 760 if existingfp is not None:
751 761 yield existingfp
752 762
753 763 # Use a file handle being actively used for writes, if available.
754 764 # There is some danger to doing this because reads will seek the
755 765 # file. However, _writeentry() performs a SEEK_END before all writes,
756 766 # so we should be safe.
757 767 elif self._writinghandles:
758 768 if self._inline:
759 769 yield self._writinghandles[0]
760 770 else:
761 771 yield self._writinghandles[1]
762 772
763 773 # Otherwise open a new file handle.
764 774 else:
765 775 if self._inline:
766 776 func = self._indexfp
767 777 else:
768 778 func = self._datafp
769 779 with func() as fp:
770 780 yield fp
771 781
772 782 def tiprev(self):
773 783 return len(self.index) - 1
774 784
775 785 def tip(self):
776 786 return self.node(self.tiprev())
777 787
778 788 def __contains__(self, rev):
779 789 return 0 <= rev < len(self)
780 790
781 791 def __len__(self):
782 792 return len(self.index)
783 793
784 794 def __iter__(self):
785 795 return iter(pycompat.xrange(len(self)))
786 796
787 797 def revs(self, start=0, stop=None):
788 798 """iterate over all rev in this revlog (from start to stop)"""
789 799 return storageutil.iterrevs(len(self), start=start, stop=stop)
790 800
791 801 @property
792 802 def nodemap(self):
793 803 msg = (
794 804 b"revlog.nodemap is deprecated, "
795 805 b"use revlog.index.[has_node|rev|get_rev]"
796 806 )
797 807 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
798 808 return self.index.nodemap
799 809
800 810 @property
801 811 def _nodecache(self):
802 812 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
803 813 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
804 814 return self.index.nodemap
805 815
806 816 def hasnode(self, node):
807 817 try:
808 818 self.rev(node)
809 819 return True
810 820 except KeyError:
811 821 return False
812 822
813 823 def candelta(self, baserev, rev):
814 824 """whether two revisions (baserev, rev) can be delta-ed or not"""
815 825 # Disable delta if either rev requires a content-changing flag
816 826 # processor (ex. LFS). This is because such flag processor can alter
817 827 # the rawtext content that the delta will be based on, and two clients
818 828 # could have a same revlog node with different flags (i.e. different
819 829 # rawtext contents) and the delta could be incompatible.
820 830 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
821 831 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
822 832 ):
823 833 return False
824 834 return True
825 835
826 836 def update_caches(self, transaction):
827 837 if self._nodemap_file is not None:
828 838 if transaction is None:
829 839 nodemaputil.update_persistent_nodemap(self)
830 840 else:
831 841 nodemaputil.setup_persistent_nodemap(transaction, self)
832 842
833 843 def clearcaches(self):
834 844 self._revisioncache = None
835 845 self._chainbasecache.clear()
836 846 self._chunkcache = (0, b'')
837 847 self._pcache = {}
838 848 self._nodemap_docket = None
839 849 self.index.clearcaches()
840 850 # The python code is the one responsible for validating the docket, we
841 851 # end up having to refresh it here.
842 852 use_nodemap = (
843 853 not self._inline
844 854 and self._nodemap_file is not None
845 855 and util.safehasattr(self.index, 'update_nodemap_data')
846 856 )
847 857 if use_nodemap:
848 858 nodemap_data = nodemaputil.persisted_data(self)
849 859 if nodemap_data is not None:
850 860 self._nodemap_docket = nodemap_data[0]
851 861 self.index.update_nodemap_data(*nodemap_data)
852 862
853 863 def rev(self, node):
854 864 try:
855 865 return self.index.rev(node)
856 866 except TypeError:
857 867 raise
858 868 except error.RevlogError:
859 869 # parsers.c radix tree lookup failed
860 870 if (
861 871 node == self.nodeconstants.wdirid
862 872 or node in self.nodeconstants.wdirfilenodeids
863 873 ):
864 874 raise error.WdirUnsupported
865 875 raise error.LookupError(node, self.display_id, _(b'no node'))
866 876
867 877 # Accessors for index entries.
868 878
869 879 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
870 880 # are flags.
871 881 def start(self, rev):
872 882 return int(self.index[rev][0] >> 16)
873 883
874 884 def flags(self, rev):
875 885 return self.index[rev][0] & 0xFFFF
876 886
877 887 def length(self, rev):
878 888 return self.index[rev][1]
879 889
880 890 def sidedata_length(self, rev):
881 891 if not self.hassidedata:
882 892 return 0
883 893 return self.index[rev][9]
884 894
885 895 def rawsize(self, rev):
886 896 """return the length of the uncompressed text for a given revision"""
887 897 l = self.index[rev][2]
888 898 if l >= 0:
889 899 return l
890 900
891 901 t = self.rawdata(rev)
892 902 return len(t)
893 903
894 904 def size(self, rev):
895 905 """length of non-raw text (processed by a "read" flag processor)"""
896 906 # fast path: if no "read" flag processor could change the content,
897 907 # size is rawsize. note: ELLIPSIS is known to not change the content.
898 908 flags = self.flags(rev)
899 909 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
900 910 return self.rawsize(rev)
901 911
902 912 return len(self.revision(rev, raw=False))
903 913
904 914 def chainbase(self, rev):
905 915 base = self._chainbasecache.get(rev)
906 916 if base is not None:
907 917 return base
908 918
909 919 index = self.index
910 920 iterrev = rev
911 921 base = index[iterrev][3]
912 922 while base != iterrev:
913 923 iterrev = base
914 924 base = index[iterrev][3]
915 925
916 926 self._chainbasecache[rev] = base
917 927 return base
918 928
919 929 def linkrev(self, rev):
920 930 return self.index[rev][4]
921 931
922 932 def parentrevs(self, rev):
923 933 try:
924 934 entry = self.index[rev]
925 935 except IndexError:
926 936 if rev == wdirrev:
927 937 raise error.WdirUnsupported
928 938 raise
929 939 if entry[5] == nullrev:
930 940 return entry[6], entry[5]
931 941 else:
932 942 return entry[5], entry[6]
933 943
934 944 # fast parentrevs(rev) where rev isn't filtered
935 945 _uncheckedparentrevs = parentrevs
936 946
937 947 def node(self, rev):
938 948 try:
939 949 return self.index[rev][7]
940 950 except IndexError:
941 951 if rev == wdirrev:
942 952 raise error.WdirUnsupported
943 953 raise
944 954
945 955 # Derived from index values.
946 956
947 957 def end(self, rev):
948 958 return self.start(rev) + self.length(rev)
949 959
950 960 def parents(self, node):
951 961 i = self.index
952 962 d = i[self.rev(node)]
953 963 # inline node() to avoid function call overhead
954 964 if d[5] == self.nullid:
955 965 return i[d[6]][7], i[d[5]][7]
956 966 else:
957 967 return i[d[5]][7], i[d[6]][7]
958 968
959 969 def chainlen(self, rev):
960 970 return self._chaininfo(rev)[0]
961 971
962 972 def _chaininfo(self, rev):
963 973 chaininfocache = self._chaininfocache
964 974 if rev in chaininfocache:
965 975 return chaininfocache[rev]
966 976 index = self.index
967 977 generaldelta = self._generaldelta
968 978 iterrev = rev
969 979 e = index[iterrev]
970 980 clen = 0
971 981 compresseddeltalen = 0
972 982 while iterrev != e[3]:
973 983 clen += 1
974 984 compresseddeltalen += e[1]
975 985 if generaldelta:
976 986 iterrev = e[3]
977 987 else:
978 988 iterrev -= 1
979 989 if iterrev in chaininfocache:
980 990 t = chaininfocache[iterrev]
981 991 clen += t[0]
982 992 compresseddeltalen += t[1]
983 993 break
984 994 e = index[iterrev]
985 995 else:
986 996 # Add text length of base since decompressing that also takes
987 997 # work. For cache hits the length is already included.
988 998 compresseddeltalen += e[1]
989 999 r = (clen, compresseddeltalen)
990 1000 chaininfocache[rev] = r
991 1001 return r
992 1002
993 1003 def _deltachain(self, rev, stoprev=None):
994 1004 """Obtain the delta chain for a revision.
995 1005
996 1006 ``stoprev`` specifies a revision to stop at. If not specified, we
997 1007 stop at the base of the chain.
998 1008
999 1009 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1000 1010 revs in ascending order and ``stopped`` is a bool indicating whether
1001 1011 ``stoprev`` was hit.
1002 1012 """
1003 1013 # Try C implementation.
1004 1014 try:
1005 1015 return self.index.deltachain(rev, stoprev, self._generaldelta)
1006 1016 except AttributeError:
1007 1017 pass
1008 1018
1009 1019 chain = []
1010 1020
1011 1021 # Alias to prevent attribute lookup in tight loop.
1012 1022 index = self.index
1013 1023 generaldelta = self._generaldelta
1014 1024
1015 1025 iterrev = rev
1016 1026 e = index[iterrev]
1017 1027 while iterrev != e[3] and iterrev != stoprev:
1018 1028 chain.append(iterrev)
1019 1029 if generaldelta:
1020 1030 iterrev = e[3]
1021 1031 else:
1022 1032 iterrev -= 1
1023 1033 e = index[iterrev]
1024 1034
1025 1035 if iterrev == stoprev:
1026 1036 stopped = True
1027 1037 else:
1028 1038 chain.append(iterrev)
1029 1039 stopped = False
1030 1040
1031 1041 chain.reverse()
1032 1042 return chain, stopped
1033 1043
1034 1044 def ancestors(self, revs, stoprev=0, inclusive=False):
1035 1045 """Generate the ancestors of 'revs' in reverse revision order.
1036 1046 Does not generate revs lower than stoprev.
1037 1047
1038 1048 See the documentation for ancestor.lazyancestors for more details."""
1039 1049
1040 1050 # first, make sure start revisions aren't filtered
1041 1051 revs = list(revs)
1042 1052 checkrev = self.node
1043 1053 for r in revs:
1044 1054 checkrev(r)
1045 1055 # and we're sure ancestors aren't filtered as well
1046 1056
1047 1057 if rustancestor is not None:
1048 1058 lazyancestors = rustancestor.LazyAncestors
1049 1059 arg = self.index
1050 1060 else:
1051 1061 lazyancestors = ancestor.lazyancestors
1052 1062 arg = self._uncheckedparentrevs
1053 1063 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1054 1064
1055 1065 def descendants(self, revs):
1056 1066 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1057 1067
1058 1068 def findcommonmissing(self, common=None, heads=None):
1059 1069 """Return a tuple of the ancestors of common and the ancestors of heads
1060 1070 that are not ancestors of common. In revset terminology, we return the
1061 1071 tuple:
1062 1072
1063 1073 ::common, (::heads) - (::common)
1064 1074
1065 1075 The list is sorted by revision number, meaning it is
1066 1076 topologically sorted.
1067 1077
1068 1078 'heads' and 'common' are both lists of node IDs. If heads is
1069 1079 not supplied, uses all of the revlog's heads. If common is not
1070 1080 supplied, uses nullid."""
1071 1081 if common is None:
1072 1082 common = [self.nullid]
1073 1083 if heads is None:
1074 1084 heads = self.heads()
1075 1085
1076 1086 common = [self.rev(n) for n in common]
1077 1087 heads = [self.rev(n) for n in heads]
1078 1088
1079 1089 # we want the ancestors, but inclusive
1080 1090 class lazyset(object):
1081 1091 def __init__(self, lazyvalues):
1082 1092 self.addedvalues = set()
1083 1093 self.lazyvalues = lazyvalues
1084 1094
1085 1095 def __contains__(self, value):
1086 1096 return value in self.addedvalues or value in self.lazyvalues
1087 1097
1088 1098 def __iter__(self):
1089 1099 added = self.addedvalues
1090 1100 for r in added:
1091 1101 yield r
1092 1102 for r in self.lazyvalues:
1093 1103 if not r in added:
1094 1104 yield r
1095 1105
1096 1106 def add(self, value):
1097 1107 self.addedvalues.add(value)
1098 1108
1099 1109 def update(self, values):
1100 1110 self.addedvalues.update(values)
1101 1111
1102 1112 has = lazyset(self.ancestors(common))
1103 1113 has.add(nullrev)
1104 1114 has.update(common)
1105 1115
1106 1116 # take all ancestors from heads that aren't in has
1107 1117 missing = set()
1108 1118 visit = collections.deque(r for r in heads if r not in has)
1109 1119 while visit:
1110 1120 r = visit.popleft()
1111 1121 if r in missing:
1112 1122 continue
1113 1123 else:
1114 1124 missing.add(r)
1115 1125 for p in self.parentrevs(r):
1116 1126 if p not in has:
1117 1127 visit.append(p)
1118 1128 missing = list(missing)
1119 1129 missing.sort()
1120 1130 return has, [self.node(miss) for miss in missing]
1121 1131
1122 1132 def incrementalmissingrevs(self, common=None):
1123 1133 """Return an object that can be used to incrementally compute the
1124 1134 revision numbers of the ancestors of arbitrary sets that are not
1125 1135 ancestors of common. This is an ancestor.incrementalmissingancestors
1126 1136 object.
1127 1137
1128 1138 'common' is a list of revision numbers. If common is not supplied, uses
1129 1139 nullrev.
1130 1140 """
1131 1141 if common is None:
1132 1142 common = [nullrev]
1133 1143
1134 1144 if rustancestor is not None:
1135 1145 return rustancestor.MissingAncestors(self.index, common)
1136 1146 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137 1147
1138 1148 def findmissingrevs(self, common=None, heads=None):
1139 1149 """Return the revision numbers of the ancestors of heads that
1140 1150 are not ancestors of common.
1141 1151
1142 1152 More specifically, return a list of revision numbers corresponding to
1143 1153 nodes N such that every N satisfies the following constraints:
1144 1154
1145 1155 1. N is an ancestor of some node in 'heads'
1146 1156 2. N is not an ancestor of any node in 'common'
1147 1157
1148 1158 The list is sorted by revision number, meaning it is
1149 1159 topologically sorted.
1150 1160
1151 1161 'heads' and 'common' are both lists of revision numbers. If heads is
1152 1162 not supplied, uses all of the revlog's heads. If common is not
1153 1163 supplied, uses nullid."""
1154 1164 if common is None:
1155 1165 common = [nullrev]
1156 1166 if heads is None:
1157 1167 heads = self.headrevs()
1158 1168
1159 1169 inc = self.incrementalmissingrevs(common=common)
1160 1170 return inc.missingancestors(heads)
1161 1171
1162 1172 def findmissing(self, common=None, heads=None):
1163 1173 """Return the ancestors of heads that are not ancestors of common.
1164 1174
1165 1175 More specifically, return a list of nodes N such that every N
1166 1176 satisfies the following constraints:
1167 1177
1168 1178 1. N is an ancestor of some node in 'heads'
1169 1179 2. N is not an ancestor of any node in 'common'
1170 1180
1171 1181 The list is sorted by revision number, meaning it is
1172 1182 topologically sorted.
1173 1183
1174 1184 'heads' and 'common' are both lists of node IDs. If heads is
1175 1185 not supplied, uses all of the revlog's heads. If common is not
1176 1186 supplied, uses nullid."""
1177 1187 if common is None:
1178 1188 common = [self.nullid]
1179 1189 if heads is None:
1180 1190 heads = self.heads()
1181 1191
1182 1192 common = [self.rev(n) for n in common]
1183 1193 heads = [self.rev(n) for n in heads]
1184 1194
1185 1195 inc = self.incrementalmissingrevs(common=common)
1186 1196 return [self.node(r) for r in inc.missingancestors(heads)]
1187 1197
1188 1198 def nodesbetween(self, roots=None, heads=None):
1189 1199 """Return a topological path from 'roots' to 'heads'.
1190 1200
1191 1201 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 1202 topologically sorted list of all nodes N that satisfy both of
1193 1203 these constraints:
1194 1204
1195 1205 1. N is a descendant of some node in 'roots'
1196 1206 2. N is an ancestor of some node in 'heads'
1197 1207
1198 1208 Every node is considered to be both a descendant and an ancestor
1199 1209 of itself, so every reachable node in 'roots' and 'heads' will be
1200 1210 included in 'nodes'.
1201 1211
1202 1212 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 1213 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 1214 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205 1215
1206 1216 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 1217 unspecified, uses nullid as the only root. If 'heads' is
1208 1218 unspecified, uses list of all of the revlog's heads."""
1209 1219 nonodes = ([], [], [])
1210 1220 if roots is not None:
1211 1221 roots = list(roots)
1212 1222 if not roots:
1213 1223 return nonodes
1214 1224 lowestrev = min([self.rev(n) for n in roots])
1215 1225 else:
1216 1226 roots = [self.nullid] # Everybody's a descendant of nullid
1217 1227 lowestrev = nullrev
1218 1228 if (lowestrev == nullrev) and (heads is None):
1219 1229 # We want _all_ the nodes!
1220 1230 return (
1221 1231 [self.node(r) for r in self],
1222 1232 [self.nullid],
1223 1233 list(self.heads()),
1224 1234 )
1225 1235 if heads is None:
1226 1236 # All nodes are ancestors, so the latest ancestor is the last
1227 1237 # node.
1228 1238 highestrev = len(self) - 1
1229 1239 # Set ancestors to None to signal that every node is an ancestor.
1230 1240 ancestors = None
1231 1241 # Set heads to an empty dictionary for later discovery of heads
1232 1242 heads = {}
1233 1243 else:
1234 1244 heads = list(heads)
1235 1245 if not heads:
1236 1246 return nonodes
1237 1247 ancestors = set()
1238 1248 # Turn heads into a dictionary so we can remove 'fake' heads.
1239 1249 # Also, later we will be using it to filter out the heads we can't
1240 1250 # find from roots.
1241 1251 heads = dict.fromkeys(heads, False)
1242 1252 # Start at the top and keep marking parents until we're done.
1243 1253 nodestotag = set(heads)
1244 1254 # Remember where the top was so we can use it as a limit later.
1245 1255 highestrev = max([self.rev(n) for n in nodestotag])
1246 1256 while nodestotag:
1247 1257 # grab a node to tag
1248 1258 n = nodestotag.pop()
1249 1259 # Never tag nullid
1250 1260 if n == self.nullid:
1251 1261 continue
1252 1262 # A node's revision number represents its place in a
1253 1263 # topologically sorted list of nodes.
1254 1264 r = self.rev(n)
1255 1265 if r >= lowestrev:
1256 1266 if n not in ancestors:
1257 1267 # If we are possibly a descendant of one of the roots
1258 1268 # and we haven't already been marked as an ancestor
1259 1269 ancestors.add(n) # Mark as ancestor
1260 1270 # Add non-nullid parents to list of nodes to tag.
1261 1271 nodestotag.update(
1262 1272 [p for p in self.parents(n) if p != self.nullid]
1263 1273 )
1264 1274 elif n in heads: # We've seen it before, is it a fake head?
1265 1275 # So it is, real heads should not be the ancestors of
1266 1276 # any other heads.
1267 1277 heads.pop(n)
1268 1278 if not ancestors:
1269 1279 return nonodes
1270 1280 # Now that we have our set of ancestors, we want to remove any
1271 1281 # roots that are not ancestors.
1272 1282
1273 1283 # If one of the roots was nullid, everything is included anyway.
1274 1284 if lowestrev > nullrev:
1275 1285 # But, since we weren't, let's recompute the lowest rev to not
1276 1286 # include roots that aren't ancestors.
1277 1287
1278 1288 # Filter out roots that aren't ancestors of heads
1279 1289 roots = [root for root in roots if root in ancestors]
1280 1290 # Recompute the lowest revision
1281 1291 if roots:
1282 1292 lowestrev = min([self.rev(root) for root in roots])
1283 1293 else:
1284 1294 # No more roots? Return empty list
1285 1295 return nonodes
1286 1296 else:
1287 1297 # We are descending from nullid, and don't need to care about
1288 1298 # any other roots.
1289 1299 lowestrev = nullrev
1290 1300 roots = [self.nullid]
1291 1301 # Transform our roots list into a set.
1292 1302 descendants = set(roots)
1293 1303 # Also, keep the original roots so we can filter out roots that aren't
1294 1304 # 'real' roots (i.e. are descended from other roots).
1295 1305 roots = descendants.copy()
1296 1306 # Our topologically sorted list of output nodes.
1297 1307 orderedout = []
1298 1308 # Don't start at nullid since we don't want nullid in our output list,
1299 1309 # and if nullid shows up in descendants, empty parents will look like
1300 1310 # they're descendants.
1301 1311 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1302 1312 n = self.node(r)
1303 1313 isdescendant = False
1304 1314 if lowestrev == nullrev: # Everybody is a descendant of nullid
1305 1315 isdescendant = True
1306 1316 elif n in descendants:
1307 1317 # n is already a descendant
1308 1318 isdescendant = True
1309 1319 # This check only needs to be done here because all the roots
1310 1320 # will start being marked is descendants before the loop.
1311 1321 if n in roots:
1312 1322 # If n was a root, check if it's a 'real' root.
1313 1323 p = tuple(self.parents(n))
1314 1324 # If any of its parents are descendants, it's not a root.
1315 1325 if (p[0] in descendants) or (p[1] in descendants):
1316 1326 roots.remove(n)
1317 1327 else:
1318 1328 p = tuple(self.parents(n))
1319 1329 # A node is a descendant if either of its parents are
1320 1330 # descendants. (We seeded the dependents list with the roots
1321 1331 # up there, remember?)
1322 1332 if (p[0] in descendants) or (p[1] in descendants):
1323 1333 descendants.add(n)
1324 1334 isdescendant = True
1325 1335 if isdescendant and ((ancestors is None) or (n in ancestors)):
1326 1336 # Only include nodes that are both descendants and ancestors.
1327 1337 orderedout.append(n)
1328 1338 if (ancestors is not None) and (n in heads):
1329 1339 # We're trying to figure out which heads are reachable
1330 1340 # from roots.
1331 1341 # Mark this head as having been reached
1332 1342 heads[n] = True
1333 1343 elif ancestors is None:
1334 1344 # Otherwise, we're trying to discover the heads.
1335 1345 # Assume this is a head because if it isn't, the next step
1336 1346 # will eventually remove it.
1337 1347 heads[n] = True
1338 1348 # But, obviously its parents aren't.
1339 1349 for p in self.parents(n):
1340 1350 heads.pop(p, None)
1341 1351 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1342 1352 roots = list(roots)
1343 1353 assert orderedout
1344 1354 assert roots
1345 1355 assert heads
1346 1356 return (orderedout, roots, heads)
1347 1357
1348 1358 def headrevs(self, revs=None):
1349 1359 if revs is None:
1350 1360 try:
1351 1361 return self.index.headrevs()
1352 1362 except AttributeError:
1353 1363 return self._headrevs()
1354 1364 if rustdagop is not None:
1355 1365 return rustdagop.headrevs(self.index, revs)
1356 1366 return dagop.headrevs(revs, self._uncheckedparentrevs)
1357 1367
1358 1368 def computephases(self, roots):
1359 1369 return self.index.computephasesmapsets(roots)
1360 1370
1361 1371 def _headrevs(self):
1362 1372 count = len(self)
1363 1373 if not count:
1364 1374 return [nullrev]
1365 1375 # we won't iter over filtered rev so nobody is a head at start
1366 1376 ishead = [0] * (count + 1)
1367 1377 index = self.index
1368 1378 for r in self:
1369 1379 ishead[r] = 1 # I may be an head
1370 1380 e = index[r]
1371 1381 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1372 1382 return [r for r, val in enumerate(ishead) if val]
1373 1383
1374 1384 def heads(self, start=None, stop=None):
1375 1385 """return the list of all nodes that have no children
1376 1386
1377 1387 if start is specified, only heads that are descendants of
1378 1388 start will be returned
1379 1389 if stop is specified, it will consider all the revs from stop
1380 1390 as if they had no children
1381 1391 """
1382 1392 if start is None and stop is None:
1383 1393 if not len(self):
1384 1394 return [self.nullid]
1385 1395 return [self.node(r) for r in self.headrevs()]
1386 1396
1387 1397 if start is None:
1388 1398 start = nullrev
1389 1399 else:
1390 1400 start = self.rev(start)
1391 1401
1392 1402 stoprevs = {self.rev(n) for n in stop or []}
1393 1403
1394 1404 revs = dagop.headrevssubset(
1395 1405 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1396 1406 )
1397 1407
1398 1408 return [self.node(rev) for rev in revs]
1399 1409
1400 1410 def children(self, node):
1401 1411 """find the children of a given node"""
1402 1412 c = []
1403 1413 p = self.rev(node)
1404 1414 for r in self.revs(start=p + 1):
1405 1415 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1406 1416 if prevs:
1407 1417 for pr in prevs:
1408 1418 if pr == p:
1409 1419 c.append(self.node(r))
1410 1420 elif p == nullrev:
1411 1421 c.append(self.node(r))
1412 1422 return c
1413 1423
1414 1424 def commonancestorsheads(self, a, b):
1415 1425 """calculate all the heads of the common ancestors of nodes a and b"""
1416 1426 a, b = self.rev(a), self.rev(b)
1417 1427 ancs = self._commonancestorsheads(a, b)
1418 1428 return pycompat.maplist(self.node, ancs)
1419 1429
1420 1430 def _commonancestorsheads(self, *revs):
1421 1431 """calculate all the heads of the common ancestors of revs"""
1422 1432 try:
1423 1433 ancs = self.index.commonancestorsheads(*revs)
1424 1434 except (AttributeError, OverflowError): # C implementation failed
1425 1435 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1426 1436 return ancs
1427 1437
1428 1438 def isancestor(self, a, b):
1429 1439 """return True if node a is an ancestor of node b
1430 1440
1431 1441 A revision is considered an ancestor of itself."""
1432 1442 a, b = self.rev(a), self.rev(b)
1433 1443 return self.isancestorrev(a, b)
1434 1444
1435 1445 def isancestorrev(self, a, b):
1436 1446 """return True if revision a is an ancestor of revision b
1437 1447
1438 1448 A revision is considered an ancestor of itself.
1439 1449
1440 1450 The implementation of this is trivial but the use of
1441 1451 reachableroots is not."""
1442 1452 if a == nullrev:
1443 1453 return True
1444 1454 elif a == b:
1445 1455 return True
1446 1456 elif a > b:
1447 1457 return False
1448 1458 return bool(self.reachableroots(a, [b], [a], includepath=False))
1449 1459
1450 1460 def reachableroots(self, minroot, heads, roots, includepath=False):
1451 1461 """return (heads(::(<roots> and <roots>::<heads>)))
1452 1462
1453 1463 If includepath is True, return (<roots>::<heads>)."""
1454 1464 try:
1455 1465 return self.index.reachableroots2(
1456 1466 minroot, heads, roots, includepath
1457 1467 )
1458 1468 except AttributeError:
1459 1469 return dagop._reachablerootspure(
1460 1470 self.parentrevs, minroot, roots, heads, includepath
1461 1471 )
1462 1472
1463 1473 def ancestor(self, a, b):
1464 1474 """calculate the "best" common ancestor of nodes a and b"""
1465 1475
1466 1476 a, b = self.rev(a), self.rev(b)
1467 1477 try:
1468 1478 ancs = self.index.ancestors(a, b)
1469 1479 except (AttributeError, OverflowError):
1470 1480 ancs = ancestor.ancestors(self.parentrevs, a, b)
1471 1481 if ancs:
1472 1482 # choose a consistent winner when there's a tie
1473 1483 return min(map(self.node, ancs))
1474 1484 return self.nullid
1475 1485
1476 1486 def _match(self, id):
1477 1487 if isinstance(id, int):
1478 1488 # rev
1479 1489 return self.node(id)
1480 1490 if len(id) == self.nodeconstants.nodelen:
1481 1491 # possibly a binary node
1482 1492 # odds of a binary node being all hex in ASCII are 1 in 10**25
1483 1493 try:
1484 1494 node = id
1485 1495 self.rev(node) # quick search the index
1486 1496 return node
1487 1497 except error.LookupError:
1488 1498 pass # may be partial hex id
1489 1499 try:
1490 1500 # str(rev)
1491 1501 rev = int(id)
1492 1502 if b"%d" % rev != id:
1493 1503 raise ValueError
1494 1504 if rev < 0:
1495 1505 rev = len(self) + rev
1496 1506 if rev < 0 or rev >= len(self):
1497 1507 raise ValueError
1498 1508 return self.node(rev)
1499 1509 except (ValueError, OverflowError):
1500 1510 pass
1501 1511 if len(id) == 2 * self.nodeconstants.nodelen:
1502 1512 try:
1503 1513 # a full hex nodeid?
1504 1514 node = bin(id)
1505 1515 self.rev(node)
1506 1516 return node
1507 1517 except (TypeError, error.LookupError):
1508 1518 pass
1509 1519
1510 1520 def _partialmatch(self, id):
1511 1521 # we don't care wdirfilenodeids as they should be always full hash
1512 1522 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1513 1523 try:
1514 1524 partial = self.index.partialmatch(id)
1515 1525 if partial and self.hasnode(partial):
1516 1526 if maybewdir:
1517 1527 # single 'ff...' match in radix tree, ambiguous with wdir
1518 1528 raise error.RevlogError
1519 1529 return partial
1520 1530 if maybewdir:
1521 1531 # no 'ff...' match in radix tree, wdir identified
1522 1532 raise error.WdirUnsupported
1523 1533 return None
1524 1534 except error.RevlogError:
1525 1535 # parsers.c radix tree lookup gave multiple matches
1526 1536 # fast path: for unfiltered changelog, radix tree is accurate
1527 1537 if not getattr(self, 'filteredrevs', None):
1528 1538 raise error.AmbiguousPrefixLookupError(
1529 1539 id, self.display_id, _(b'ambiguous identifier')
1530 1540 )
1531 1541 # fall through to slow path that filters hidden revisions
1532 1542 except (AttributeError, ValueError):
1533 1543 # we are pure python, or key was too short to search radix tree
1534 1544 pass
1535 1545
1536 1546 if id in self._pcache:
1537 1547 return self._pcache[id]
1538 1548
1539 1549 if len(id) <= 40:
1540 1550 try:
1541 1551 # hex(node)[:...]
1542 1552 l = len(id) // 2 # grab an even number of digits
1543 1553 prefix = bin(id[: l * 2])
1544 1554 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1545 1555 nl = [
1546 1556 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1547 1557 ]
1548 1558 if self.nodeconstants.nullhex.startswith(id):
1549 1559 nl.append(self.nullid)
1550 1560 if len(nl) > 0:
1551 1561 if len(nl) == 1 and not maybewdir:
1552 1562 self._pcache[id] = nl[0]
1553 1563 return nl[0]
1554 1564 raise error.AmbiguousPrefixLookupError(
1555 1565 id, self.display_id, _(b'ambiguous identifier')
1556 1566 )
1557 1567 if maybewdir:
1558 1568 raise error.WdirUnsupported
1559 1569 return None
1560 1570 except TypeError:
1561 1571 pass
1562 1572
1563 1573 def lookup(self, id):
1564 1574 """locate a node based on:
1565 1575 - revision number or str(revision number)
1566 1576 - nodeid or subset of hex nodeid
1567 1577 """
1568 1578 n = self._match(id)
1569 1579 if n is not None:
1570 1580 return n
1571 1581 n = self._partialmatch(id)
1572 1582 if n:
1573 1583 return n
1574 1584
1575 1585 raise error.LookupError(id, self.display_id, _(b'no match found'))
1576 1586
1577 1587 def shortest(self, node, minlength=1):
1578 1588 """Find the shortest unambiguous prefix that matches node."""
1579 1589
1580 1590 def isvalid(prefix):
1581 1591 try:
1582 1592 matchednode = self._partialmatch(prefix)
1583 1593 except error.AmbiguousPrefixLookupError:
1584 1594 return False
1585 1595 except error.WdirUnsupported:
1586 1596 # single 'ff...' match
1587 1597 return True
1588 1598 if matchednode is None:
1589 1599 raise error.LookupError(node, self.display_id, _(b'no node'))
1590 1600 return True
1591 1601
1592 1602 def maybewdir(prefix):
1593 1603 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1594 1604
1595 1605 hexnode = hex(node)
1596 1606
1597 1607 def disambiguate(hexnode, minlength):
1598 1608 """Disambiguate against wdirid."""
1599 1609 for length in range(minlength, len(hexnode) + 1):
1600 1610 prefix = hexnode[:length]
1601 1611 if not maybewdir(prefix):
1602 1612 return prefix
1603 1613
1604 1614 if not getattr(self, 'filteredrevs', None):
1605 1615 try:
1606 1616 length = max(self.index.shortest(node), minlength)
1607 1617 return disambiguate(hexnode, length)
1608 1618 except error.RevlogError:
1609 1619 if node != self.nodeconstants.wdirid:
1610 1620 raise error.LookupError(
1611 1621 node, self.display_id, _(b'no node')
1612 1622 )
1613 1623 except AttributeError:
1614 1624 # Fall through to pure code
1615 1625 pass
1616 1626
1617 1627 if node == self.nodeconstants.wdirid:
1618 1628 for length in range(minlength, len(hexnode) + 1):
1619 1629 prefix = hexnode[:length]
1620 1630 if isvalid(prefix):
1621 1631 return prefix
1622 1632
1623 1633 for length in range(minlength, len(hexnode) + 1):
1624 1634 prefix = hexnode[:length]
1625 1635 if isvalid(prefix):
1626 1636 return disambiguate(hexnode, length)
1627 1637
1628 1638 def cmp(self, node, text):
1629 1639 """compare text with a given file revision
1630 1640
1631 1641 returns True if text is different than what is stored.
1632 1642 """
1633 1643 p1, p2 = self.parents(node)
1634 1644 return storageutil.hashrevisionsha1(text, p1, p2) != node
1635 1645
1636 1646 def _cachesegment(self, offset, data):
1637 1647 """Add a segment to the revlog cache.
1638 1648
1639 1649 Accepts an absolute offset and the data that is at that location.
1640 1650 """
1641 1651 o, d = self._chunkcache
1642 1652 # try to add to existing cache
1643 1653 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1644 1654 self._chunkcache = o, d + data
1645 1655 else:
1646 1656 self._chunkcache = offset, data
1647 1657
1648 1658 def _readsegment(self, offset, length, df=None):
1649 1659 """Load a segment of raw data from the revlog.
1650 1660
1651 1661 Accepts an absolute offset, length to read, and an optional existing
1652 1662 file handle to read from.
1653 1663
1654 1664 If an existing file handle is passed, it will be seeked and the
1655 1665 original seek position will NOT be restored.
1656 1666
1657 1667 Returns a str or buffer of raw byte data.
1658 1668
1659 1669 Raises if the requested number of bytes could not be read.
1660 1670 """
1661 1671 # Cache data both forward and backward around the requested
1662 1672 # data, in a fixed size window. This helps speed up operations
1663 1673 # involving reading the revlog backwards.
1664 1674 cachesize = self._chunkcachesize
1665 1675 realoffset = offset & ~(cachesize - 1)
1666 1676 reallength = (
1667 1677 (offset + length + cachesize) & ~(cachesize - 1)
1668 1678 ) - realoffset
1669 1679 with self._datareadfp(df) as df:
1670 1680 df.seek(realoffset)
1671 1681 d = df.read(reallength)
1672 1682
1673 1683 self._cachesegment(realoffset, d)
1674 1684 if offset != realoffset or reallength != length:
1675 1685 startoffset = offset - realoffset
1676 1686 if len(d) - startoffset < length:
1677 1687 raise error.RevlogError(
1678 1688 _(
1679 1689 b'partial read of revlog %s; expected %d bytes from '
1680 1690 b'offset %d, got %d'
1681 1691 )
1682 1692 % (
1683 1693 self._indexfile if self._inline else self._datafile,
1684 1694 length,
1685 1695 offset,
1686 1696 len(d) - startoffset,
1687 1697 )
1688 1698 )
1689 1699
1690 1700 return util.buffer(d, startoffset, length)
1691 1701
1692 1702 if len(d) < length:
1693 1703 raise error.RevlogError(
1694 1704 _(
1695 1705 b'partial read of revlog %s; expected %d bytes from offset '
1696 1706 b'%d, got %d'
1697 1707 )
1698 1708 % (
1699 1709 self._indexfile if self._inline else self._datafile,
1700 1710 length,
1701 1711 offset,
1702 1712 len(d),
1703 1713 )
1704 1714 )
1705 1715
1706 1716 return d
1707 1717
1708 1718 def _getsegment(self, offset, length, df=None):
1709 1719 """Obtain a segment of raw data from the revlog.
1710 1720
1711 1721 Accepts an absolute offset, length of bytes to obtain, and an
1712 1722 optional file handle to the already-opened revlog. If the file
1713 1723 handle is used, it's original seek position will not be preserved.
1714 1724
1715 1725 Requests for data may be returned from a cache.
1716 1726
1717 1727 Returns a str or a buffer instance of raw byte data.
1718 1728 """
1719 1729 o, d = self._chunkcache
1720 1730 l = len(d)
1721 1731
1722 1732 # is it in the cache?
1723 1733 cachestart = offset - o
1724 1734 cacheend = cachestart + length
1725 1735 if cachestart >= 0 and cacheend <= l:
1726 1736 if cachestart == 0 and cacheend == l:
1727 1737 return d # avoid a copy
1728 1738 return util.buffer(d, cachestart, cacheend - cachestart)
1729 1739
1730 1740 return self._readsegment(offset, length, df=df)
1731 1741
1732 1742 def _getsegmentforrevs(self, startrev, endrev, df=None):
1733 1743 """Obtain a segment of raw data corresponding to a range of revisions.
1734 1744
1735 1745 Accepts the start and end revisions and an optional already-open
1736 1746 file handle to be used for reading. If the file handle is read, its
1737 1747 seek position will not be preserved.
1738 1748
1739 1749 Requests for data may be satisfied by a cache.
1740 1750
1741 1751 Returns a 2-tuple of (offset, data) for the requested range of
1742 1752 revisions. Offset is the integer offset from the beginning of the
1743 1753 revlog and data is a str or buffer of the raw byte data.
1744 1754
1745 1755 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1746 1756 to determine where each revision's data begins and ends.
1747 1757 """
1748 1758 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1749 1759 # (functions are expensive).
1750 1760 index = self.index
1751 1761 istart = index[startrev]
1752 1762 start = int(istart[0] >> 16)
1753 1763 if startrev == endrev:
1754 1764 end = start + istart[1]
1755 1765 else:
1756 1766 iend = index[endrev]
1757 1767 end = int(iend[0] >> 16) + iend[1]
1758 1768
1759 1769 if self._inline:
1760 1770 start += (startrev + 1) * self.index.entry_size
1761 1771 end += (endrev + 1) * self.index.entry_size
1762 1772 length = end - start
1763 1773
1764 1774 return start, self._getsegment(start, length, df=df)
1765 1775
1766 1776 def _chunk(self, rev, df=None):
1767 1777 """Obtain a single decompressed chunk for a revision.
1768 1778
1769 1779 Accepts an integer revision and an optional already-open file handle
1770 1780 to be used for reading. If used, the seek position of the file will not
1771 1781 be preserved.
1772 1782
1773 1783 Returns a str holding uncompressed data for the requested revision.
1774 1784 """
1775 1785 compression_mode = self.index[rev][10]
1776 1786 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1777 1787 if compression_mode == COMP_MODE_PLAIN:
1778 1788 return data
1789 elif compression_mode == COMP_MODE_DEFAULT:
1790 return self._decompressor(data)
1779 1791 elif compression_mode == COMP_MODE_INLINE:
1780 1792 return self.decompress(data)
1781 1793 else:
1782 1794 msg = 'unknown compression mode %d'
1783 1795 msg %= compression_mode
1784 1796 raise error.RevlogError(msg)
1785 1797
1786 1798 def _chunks(self, revs, df=None, targetsize=None):
1787 1799 """Obtain decompressed chunks for the specified revisions.
1788 1800
1789 1801 Accepts an iterable of numeric revisions that are assumed to be in
1790 1802 ascending order. Also accepts an optional already-open file handle
1791 1803 to be used for reading. If used, the seek position of the file will
1792 1804 not be preserved.
1793 1805
1794 1806 This function is similar to calling ``self._chunk()`` multiple times,
1795 1807 but is faster.
1796 1808
1797 1809 Returns a list with decompressed data for each requested revision.
1798 1810 """
1799 1811 if not revs:
1800 1812 return []
1801 1813 start = self.start
1802 1814 length = self.length
1803 1815 inline = self._inline
1804 1816 iosize = self.index.entry_size
1805 1817 buffer = util.buffer
1806 1818
1807 1819 l = []
1808 1820 ladd = l.append
1809 1821
1810 1822 if not self._withsparseread:
1811 1823 slicedchunks = (revs,)
1812 1824 else:
1813 1825 slicedchunks = deltautil.slicechunk(
1814 1826 self, revs, targetsize=targetsize
1815 1827 )
1816 1828
1817 1829 for revschunk in slicedchunks:
1818 1830 firstrev = revschunk[0]
1819 1831 # Skip trailing revisions with empty diff
1820 1832 for lastrev in revschunk[::-1]:
1821 1833 if length(lastrev) != 0:
1822 1834 break
1823 1835
1824 1836 try:
1825 1837 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1826 1838 except OverflowError:
1827 1839 # issue4215 - we can't cache a run of chunks greater than
1828 1840 # 2G on Windows
1829 1841 return [self._chunk(rev, df=df) for rev in revschunk]
1830 1842
1831 1843 decomp = self.decompress
1844 # self._decompressor might be None, but will not be used in that case
1845 def_decomp = self._decompressor
1832 1846 for rev in revschunk:
1833 1847 chunkstart = start(rev)
1834 1848 if inline:
1835 1849 chunkstart += (rev + 1) * iosize
1836 1850 chunklength = length(rev)
1837 1851 comp_mode = self.index[rev][10]
1838 1852 c = buffer(data, chunkstart - offset, chunklength)
1839 1853 if comp_mode == COMP_MODE_PLAIN:
1840 1854 ladd(c)
1841 1855 elif comp_mode == COMP_MODE_INLINE:
1842 1856 ladd(decomp(c))
1857 elif comp_mode == COMP_MODE_DEFAULT:
1858 ladd(def_decomp(c))
1843 1859 else:
1844 1860 msg = 'unknown compression mode %d'
1845 1861 msg %= comp_mode
1846 1862 raise error.RevlogError(msg)
1847 1863
1848 1864 return l
1849 1865
1850 1866 def _chunkclear(self):
1851 1867 """Clear the raw chunk cache."""
1852 1868 self._chunkcache = (0, b'')
1853 1869
1854 1870 def deltaparent(self, rev):
1855 1871 """return deltaparent of the given revision"""
1856 1872 base = self.index[rev][3]
1857 1873 if base == rev:
1858 1874 return nullrev
1859 1875 elif self._generaldelta:
1860 1876 return base
1861 1877 else:
1862 1878 return rev - 1
1863 1879
1864 1880 def issnapshot(self, rev):
1865 1881 """tells whether rev is a snapshot"""
1866 1882 if not self._sparserevlog:
1867 1883 return self.deltaparent(rev) == nullrev
1868 1884 elif util.safehasattr(self.index, b'issnapshot'):
1869 1885 # directly assign the method to cache the testing and access
1870 1886 self.issnapshot = self.index.issnapshot
1871 1887 return self.issnapshot(rev)
1872 1888 if rev == nullrev:
1873 1889 return True
1874 1890 entry = self.index[rev]
1875 1891 base = entry[3]
1876 1892 if base == rev:
1877 1893 return True
1878 1894 if base == nullrev:
1879 1895 return True
1880 1896 p1 = entry[5]
1881 1897 p2 = entry[6]
1882 1898 if base == p1 or base == p2:
1883 1899 return False
1884 1900 return self.issnapshot(base)
1885 1901
1886 1902 def snapshotdepth(self, rev):
1887 1903 """number of snapshot in the chain before this one"""
1888 1904 if not self.issnapshot(rev):
1889 1905 raise error.ProgrammingError(b'revision %d not a snapshot')
1890 1906 return len(self._deltachain(rev)[0]) - 1
1891 1907
1892 1908 def revdiff(self, rev1, rev2):
1893 1909 """return or calculate a delta between two revisions
1894 1910
1895 1911 The delta calculated is in binary form and is intended to be written to
1896 1912 revlog data directly. So this function needs raw revision data.
1897 1913 """
1898 1914 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1899 1915 return bytes(self._chunk(rev2))
1900 1916
1901 1917 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1902 1918
1903 1919 def _processflags(self, text, flags, operation, raw=False):
1904 1920 """deprecated entry point to access flag processors"""
1905 1921 msg = b'_processflag(...) use the specialized variant'
1906 1922 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1907 1923 if raw:
1908 1924 return text, flagutil.processflagsraw(self, text, flags)
1909 1925 elif operation == b'read':
1910 1926 return flagutil.processflagsread(self, text, flags)
1911 1927 else: # write operation
1912 1928 return flagutil.processflagswrite(self, text, flags)
1913 1929
1914 1930 def revision(self, nodeorrev, _df=None, raw=False):
1915 1931 """return an uncompressed revision of a given node or revision
1916 1932 number.
1917 1933
1918 1934 _df - an existing file handle to read from. (internal-only)
1919 1935 raw - an optional argument specifying if the revision data is to be
1920 1936 treated as raw data when applying flag transforms. 'raw' should be set
1921 1937 to True when generating changegroups or in debug commands.
1922 1938 """
1923 1939 if raw:
1924 1940 msg = (
1925 1941 b'revlog.revision(..., raw=True) is deprecated, '
1926 1942 b'use revlog.rawdata(...)'
1927 1943 )
1928 1944 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 1945 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1930 1946
1931 1947 def sidedata(self, nodeorrev, _df=None):
1932 1948 """a map of extra data related to the changeset but not part of the hash
1933 1949
1934 1950 This function currently return a dictionary. However, more advanced
1935 1951 mapping object will likely be used in the future for a more
1936 1952 efficient/lazy code.
1937 1953 """
1938 1954 return self._revisiondata(nodeorrev, _df)[1]
1939 1955
1940 1956 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1941 1957 # deal with <nodeorrev> argument type
1942 1958 if isinstance(nodeorrev, int):
1943 1959 rev = nodeorrev
1944 1960 node = self.node(rev)
1945 1961 else:
1946 1962 node = nodeorrev
1947 1963 rev = None
1948 1964
1949 1965 # fast path the special `nullid` rev
1950 1966 if node == self.nullid:
1951 1967 return b"", {}
1952 1968
1953 1969 # ``rawtext`` is the text as stored inside the revlog. Might be the
1954 1970 # revision or might need to be processed to retrieve the revision.
1955 1971 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1956 1972
1957 1973 if self.hassidedata:
1958 1974 if rev is None:
1959 1975 rev = self.rev(node)
1960 1976 sidedata = self._sidedata(rev)
1961 1977 else:
1962 1978 sidedata = {}
1963 1979
1964 1980 if raw and validated:
1965 1981 # if we don't want to process the raw text and that raw
1966 1982 # text is cached, we can exit early.
1967 1983 return rawtext, sidedata
1968 1984 if rev is None:
1969 1985 rev = self.rev(node)
1970 1986 # the revlog's flag for this revision
1971 1987 # (usually alter its state or content)
1972 1988 flags = self.flags(rev)
1973 1989
1974 1990 if validated and flags == REVIDX_DEFAULT_FLAGS:
1975 1991 # no extra flags set, no flag processor runs, text = rawtext
1976 1992 return rawtext, sidedata
1977 1993
1978 1994 if raw:
1979 1995 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1980 1996 text = rawtext
1981 1997 else:
1982 1998 r = flagutil.processflagsread(self, rawtext, flags)
1983 1999 text, validatehash = r
1984 2000 if validatehash:
1985 2001 self.checkhash(text, node, rev=rev)
1986 2002 if not validated:
1987 2003 self._revisioncache = (node, rev, rawtext)
1988 2004
1989 2005 return text, sidedata
1990 2006
1991 2007 def _rawtext(self, node, rev, _df=None):
1992 2008 """return the possibly unvalidated rawtext for a revision
1993 2009
1994 2010 returns (rev, rawtext, validated)
1995 2011 """
1996 2012
1997 2013 # revision in the cache (could be useful to apply delta)
1998 2014 cachedrev = None
1999 2015 # An intermediate text to apply deltas to
2000 2016 basetext = None
2001 2017
2002 2018 # Check if we have the entry in cache
2003 2019 # The cache entry looks like (node, rev, rawtext)
2004 2020 if self._revisioncache:
2005 2021 if self._revisioncache[0] == node:
2006 2022 return (rev, self._revisioncache[2], True)
2007 2023 cachedrev = self._revisioncache[1]
2008 2024
2009 2025 if rev is None:
2010 2026 rev = self.rev(node)
2011 2027
2012 2028 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2013 2029 if stopped:
2014 2030 basetext = self._revisioncache[2]
2015 2031
2016 2032 # drop cache to save memory, the caller is expected to
2017 2033 # update self._revisioncache after validating the text
2018 2034 self._revisioncache = None
2019 2035
2020 2036 targetsize = None
2021 2037 rawsize = self.index[rev][2]
2022 2038 if 0 <= rawsize:
2023 2039 targetsize = 4 * rawsize
2024 2040
2025 2041 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2026 2042 if basetext is None:
2027 2043 basetext = bytes(bins[0])
2028 2044 bins = bins[1:]
2029 2045
2030 2046 rawtext = mdiff.patches(basetext, bins)
2031 2047 del basetext # let us have a chance to free memory early
2032 2048 return (rev, rawtext, False)
2033 2049
2034 2050 def _sidedata(self, rev):
2035 2051 """Return the sidedata for a given revision number."""
2036 2052 index_entry = self.index[rev]
2037 2053 sidedata_offset = index_entry[8]
2038 2054 sidedata_size = index_entry[9]
2039 2055
2040 2056 if self._inline:
2041 2057 sidedata_offset += self.index.entry_size * (1 + rev)
2042 2058 if sidedata_size == 0:
2043 2059 return {}
2044 2060
2045 2061 segment = self._getsegment(sidedata_offset, sidedata_size)
2046 2062 sidedata = sidedatautil.deserialize_sidedata(segment)
2047 2063 return sidedata
2048 2064
2049 2065 def rawdata(self, nodeorrev, _df=None):
2050 2066 """return an uncompressed raw data of a given node or revision number.
2051 2067
2052 2068 _df - an existing file handle to read from. (internal-only)
2053 2069 """
2054 2070 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2055 2071
2056 2072 def hash(self, text, p1, p2):
2057 2073 """Compute a node hash.
2058 2074
2059 2075 Available as a function so that subclasses can replace the hash
2060 2076 as needed.
2061 2077 """
2062 2078 return storageutil.hashrevisionsha1(text, p1, p2)
2063 2079
2064 2080 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2065 2081 """Check node hash integrity.
2066 2082
2067 2083 Available as a function so that subclasses can extend hash mismatch
2068 2084 behaviors as needed.
2069 2085 """
2070 2086 try:
2071 2087 if p1 is None and p2 is None:
2072 2088 p1, p2 = self.parents(node)
2073 2089 if node != self.hash(text, p1, p2):
2074 2090 # Clear the revision cache on hash failure. The revision cache
2075 2091 # only stores the raw revision and clearing the cache does have
2076 2092 # the side-effect that we won't have a cache hit when the raw
2077 2093 # revision data is accessed. But this case should be rare and
2078 2094 # it is extra work to teach the cache about the hash
2079 2095 # verification state.
2080 2096 if self._revisioncache and self._revisioncache[0] == node:
2081 2097 self._revisioncache = None
2082 2098
2083 2099 revornode = rev
2084 2100 if revornode is None:
2085 2101 revornode = templatefilters.short(hex(node))
2086 2102 raise error.RevlogError(
2087 2103 _(b"integrity check failed on %s:%s")
2088 2104 % (self.display_id, pycompat.bytestr(revornode))
2089 2105 )
2090 2106 except error.RevlogError:
2091 2107 if self._censorable and storageutil.iscensoredtext(text):
2092 2108 raise error.CensoredNodeError(self.display_id, node, text)
2093 2109 raise
2094 2110
2095 2111 def _enforceinlinesize(self, tr):
2096 2112 """Check if the revlog is too big for inline and convert if so.
2097 2113
2098 2114 This should be called after revisions are added to the revlog. If the
2099 2115 revlog has grown too large to be an inline revlog, it will convert it
2100 2116 to use multiple index and data files.
2101 2117 """
2102 2118 tiprev = len(self) - 1
2103 2119 total_size = self.start(tiprev) + self.length(tiprev)
2104 2120 if not self._inline or total_size < _maxinline:
2105 2121 return
2106 2122
2107 2123 troffset = tr.findoffset(self._indexfile)
2108 2124 if troffset is None:
2109 2125 raise error.RevlogError(
2110 2126 _(b"%s not found in the transaction") % self._indexfile
2111 2127 )
2112 2128 trindex = 0
2113 2129 tr.add(self._datafile, 0)
2114 2130
2115 2131 existing_handles = False
2116 2132 if self._writinghandles is not None:
2117 2133 existing_handles = True
2118 2134 fp = self._writinghandles[0]
2119 2135 fp.flush()
2120 2136 fp.close()
2121 2137 # We can't use the cached file handle after close(). So prevent
2122 2138 # its usage.
2123 2139 self._writinghandles = None
2124 2140
2125 2141 new_dfh = self._datafp(b'w+')
2126 2142 new_dfh.truncate(0) # drop any potentially existing data
2127 2143 try:
2128 2144 with self._indexfp() as read_ifh:
2129 2145 for r in self:
2130 2146 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2131 2147 if troffset <= self.start(r):
2132 2148 trindex = r
2133 2149 new_dfh.flush()
2134 2150
2135 2151 with self.__index_new_fp() as fp:
2136 2152 self._format_flags &= ~FLAG_INLINE_DATA
2137 2153 self._inline = False
2138 2154 for i in self:
2139 2155 e = self.index.entry_binary(i)
2140 2156 if i == 0 and self._docket is None:
2141 2157 header = self._format_flags | self._format_version
2142 2158 header = self.index.pack_header(header)
2143 2159 e = header + e
2144 2160 fp.write(e)
2145 2161 if self._docket is not None:
2146 2162 self._docket.index_end = fp.tell()
2147 2163 # the temp file replace the real index when we exit the context
2148 2164 # manager
2149 2165
2150 2166 tr.replace(self._indexfile, trindex * self.index.entry_size)
2151 2167 nodemaputil.setup_persistent_nodemap(tr, self)
2152 2168 self._chunkclear()
2153 2169
2154 2170 if existing_handles:
2155 2171 # switched from inline to conventional reopen the index
2156 2172 ifh = self.__index_write_fp()
2157 2173 self._writinghandles = (ifh, new_dfh)
2158 2174 new_dfh = None
2159 2175 finally:
2160 2176 if new_dfh is not None:
2161 2177 new_dfh.close()
2162 2178
2163 2179 def _nodeduplicatecallback(self, transaction, node):
2164 2180 """called when trying to add a node already stored."""
2165 2181
2166 2182 @contextlib.contextmanager
2167 2183 def _writing(self, transaction):
2168 2184 if self._trypending:
2169 2185 msg = b'try to write in a `trypending` revlog: %s'
2170 2186 msg %= self.display_id
2171 2187 raise error.ProgrammingError(msg)
2172 2188 if self._writinghandles is not None:
2173 2189 yield
2174 2190 else:
2175 2191 r = len(self)
2176 2192 dsize = 0
2177 2193 if r:
2178 2194 dsize = self.end(r - 1)
2179 2195 dfh = None
2180 2196 if not self._inline:
2181 2197 try:
2182 2198 dfh = self._datafp(b"r+")
2183 2199 if self._docket is None:
2184 2200 dfh.seek(0, os.SEEK_END)
2185 2201 else:
2186 2202 dfh.seek(self._docket.data_end, os.SEEK_SET)
2187 2203 except IOError as inst:
2188 2204 if inst.errno != errno.ENOENT:
2189 2205 raise
2190 2206 dfh = self._datafp(b"w+")
2191 2207 transaction.add(self._datafile, dsize)
2192 2208 try:
2193 2209 isize = r * self.index.entry_size
2194 2210 ifh = self.__index_write_fp()
2195 2211 if self._inline:
2196 2212 transaction.add(self._indexfile, dsize + isize)
2197 2213 else:
2198 2214 transaction.add(self._indexfile, isize)
2199 2215 try:
2200 2216 self._writinghandles = (ifh, dfh)
2201 2217 try:
2202 2218 yield
2203 2219 if self._docket is not None:
2204 2220 self._write_docket(transaction)
2205 2221 finally:
2206 2222 self._writinghandles = None
2207 2223 finally:
2208 2224 ifh.close()
2209 2225 finally:
2210 2226 if dfh is not None:
2211 2227 dfh.close()
2212 2228
2213 2229 def _write_docket(self, transaction):
2214 2230 """write the current docket on disk
2215 2231
2216 2232 Exist as a method to help changelog to implement transaction logic
2217 2233
2218 2234 We could also imagine using the same transaction logic for all revlog
2219 2235 since docket are cheap."""
2220 2236 self._docket.write(transaction)
2221 2237
2222 2238 def addrevision(
2223 2239 self,
2224 2240 text,
2225 2241 transaction,
2226 2242 link,
2227 2243 p1,
2228 2244 p2,
2229 2245 cachedelta=None,
2230 2246 node=None,
2231 2247 flags=REVIDX_DEFAULT_FLAGS,
2232 2248 deltacomputer=None,
2233 2249 sidedata=None,
2234 2250 ):
2235 2251 """add a revision to the log
2236 2252
2237 2253 text - the revision data to add
2238 2254 transaction - the transaction object used for rollback
2239 2255 link - the linkrev data to add
2240 2256 p1, p2 - the parent nodeids of the revision
2241 2257 cachedelta - an optional precomputed delta
2242 2258 node - nodeid of revision; typically node is not specified, and it is
2243 2259 computed by default as hash(text, p1, p2), however subclasses might
2244 2260 use different hashing method (and override checkhash() in such case)
2245 2261 flags - the known flags to set on the revision
2246 2262 deltacomputer - an optional deltacomputer instance shared between
2247 2263 multiple calls
2248 2264 """
2249 2265 if link == nullrev:
2250 2266 raise error.RevlogError(
2251 2267 _(b"attempted to add linkrev -1 to %s") % self.display_id
2252 2268 )
2253 2269
2254 2270 if sidedata is None:
2255 2271 sidedata = {}
2256 2272 elif sidedata and not self.hassidedata:
2257 2273 raise error.ProgrammingError(
2258 2274 _(b"trying to add sidedata to a revlog who don't support them")
2259 2275 )
2260 2276
2261 2277 if flags:
2262 2278 node = node or self.hash(text, p1, p2)
2263 2279
2264 2280 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2265 2281
2266 2282 # If the flag processor modifies the revision data, ignore any provided
2267 2283 # cachedelta.
2268 2284 if rawtext != text:
2269 2285 cachedelta = None
2270 2286
2271 2287 if len(rawtext) > _maxentrysize:
2272 2288 raise error.RevlogError(
2273 2289 _(
2274 2290 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2275 2291 )
2276 2292 % (self.display_id, len(rawtext))
2277 2293 )
2278 2294
2279 2295 node = node or self.hash(rawtext, p1, p2)
2280 2296 rev = self.index.get_rev(node)
2281 2297 if rev is not None:
2282 2298 return rev
2283 2299
2284 2300 if validatehash:
2285 2301 self.checkhash(rawtext, node, p1=p1, p2=p2)
2286 2302
2287 2303 return self.addrawrevision(
2288 2304 rawtext,
2289 2305 transaction,
2290 2306 link,
2291 2307 p1,
2292 2308 p2,
2293 2309 node,
2294 2310 flags,
2295 2311 cachedelta=cachedelta,
2296 2312 deltacomputer=deltacomputer,
2297 2313 sidedata=sidedata,
2298 2314 )
2299 2315
2300 2316 def addrawrevision(
2301 2317 self,
2302 2318 rawtext,
2303 2319 transaction,
2304 2320 link,
2305 2321 p1,
2306 2322 p2,
2307 2323 node,
2308 2324 flags,
2309 2325 cachedelta=None,
2310 2326 deltacomputer=None,
2311 2327 sidedata=None,
2312 2328 ):
2313 2329 """add a raw revision with known flags, node and parents
2314 2330 useful when reusing a revision not stored in this revlog (ex: received
2315 2331 over wire, or read from an external bundle).
2316 2332 """
2317 2333 with self._writing(transaction):
2318 2334 return self._addrevision(
2319 2335 node,
2320 2336 rawtext,
2321 2337 transaction,
2322 2338 link,
2323 2339 p1,
2324 2340 p2,
2325 2341 flags,
2326 2342 cachedelta,
2327 2343 deltacomputer=deltacomputer,
2328 2344 sidedata=sidedata,
2329 2345 )
2330 2346
2331 2347 def compress(self, data):
2332 2348 """Generate a possibly-compressed representation of data."""
2333 2349 if not data:
2334 2350 return b'', data
2335 2351
2336 2352 compressed = self._compressor.compress(data)
2337 2353
2338 2354 if compressed:
2339 2355 # The revlog compressor added the header in the returned data.
2340 2356 return b'', compressed
2341 2357
2342 2358 if data[0:1] == b'\0':
2343 2359 return b'', data
2344 2360 return b'u', data
2345 2361
2346 2362 def decompress(self, data):
2347 2363 """Decompress a revlog chunk.
2348 2364
2349 2365 The chunk is expected to begin with a header identifying the
2350 2366 format type so it can be routed to an appropriate decompressor.
2351 2367 """
2352 2368 if not data:
2353 2369 return data
2354 2370
2355 2371 # Revlogs are read much more frequently than they are written and many
2356 2372 # chunks only take microseconds to decompress, so performance is
2357 2373 # important here.
2358 2374 #
2359 2375 # We can make a few assumptions about revlogs:
2360 2376 #
2361 2377 # 1) the majority of chunks will be compressed (as opposed to inline
2362 2378 # raw data).
2363 2379 # 2) decompressing *any* data will likely by at least 10x slower than
2364 2380 # returning raw inline data.
2365 2381 # 3) we want to prioritize common and officially supported compression
2366 2382 # engines
2367 2383 #
2368 2384 # It follows that we want to optimize for "decompress compressed data
2369 2385 # when encoded with common and officially supported compression engines"
2370 2386 # case over "raw data" and "data encoded by less common or non-official
2371 2387 # compression engines." That is why we have the inline lookup first
2372 2388 # followed by the compengines lookup.
2373 2389 #
2374 2390 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2375 2391 # compressed chunks. And this matters for changelog and manifest reads.
2376 2392 t = data[0:1]
2377 2393
2378 2394 if t == b'x':
2379 2395 try:
2380 2396 return _zlibdecompress(data)
2381 2397 except zlib.error as e:
2382 2398 raise error.RevlogError(
2383 2399 _(b'revlog decompress error: %s')
2384 2400 % stringutil.forcebytestr(e)
2385 2401 )
2386 2402 # '\0' is more common than 'u' so it goes first.
2387 2403 elif t == b'\0':
2388 2404 return data
2389 2405 elif t == b'u':
2390 2406 return util.buffer(data, 1)
2391 2407
2392 2408 compressor = self._get_decompressor(t)
2393 2409
2394 2410 return compressor.decompress(data)
2395 2411
2396 2412 def _addrevision(
2397 2413 self,
2398 2414 node,
2399 2415 rawtext,
2400 2416 transaction,
2401 2417 link,
2402 2418 p1,
2403 2419 p2,
2404 2420 flags,
2405 2421 cachedelta,
2406 2422 alwayscache=False,
2407 2423 deltacomputer=None,
2408 2424 sidedata=None,
2409 2425 ):
2410 2426 """internal function to add revisions to the log
2411 2427
2412 2428 see addrevision for argument descriptions.
2413 2429
2414 2430 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2415 2431
2416 2432 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2417 2433 be used.
2418 2434
2419 2435 invariants:
2420 2436 - rawtext is optional (can be None); if not set, cachedelta must be set.
2421 2437 if both are set, they must correspond to each other.
2422 2438 """
2423 2439 if node == self.nullid:
2424 2440 raise error.RevlogError(
2425 2441 _(b"%s: attempt to add null revision") % self.display_id
2426 2442 )
2427 2443 if (
2428 2444 node == self.nodeconstants.wdirid
2429 2445 or node in self.nodeconstants.wdirfilenodeids
2430 2446 ):
2431 2447 raise error.RevlogError(
2432 2448 _(b"%s: attempt to add wdir revision") % self.display_id
2433 2449 )
2434 2450 if self._writinghandles is None:
2435 2451 msg = b'adding revision outside `revlog._writing` context'
2436 2452 raise error.ProgrammingError(msg)
2437 2453
2438 2454 if self._inline:
2439 2455 fh = self._writinghandles[0]
2440 2456 else:
2441 2457 fh = self._writinghandles[1]
2442 2458
2443 2459 btext = [rawtext]
2444 2460
2445 2461 curr = len(self)
2446 2462 prev = curr - 1
2447 2463
2448 2464 offset = self._get_data_offset(prev)
2449 2465
2450 2466 if self._concurrencychecker:
2451 2467 ifh, dfh = self._writinghandles
2452 2468 if self._inline:
2453 2469 # offset is "as if" it were in the .d file, so we need to add on
2454 2470 # the size of the entry metadata.
2455 2471 self._concurrencychecker(
2456 2472 ifh, self._indexfile, offset + curr * self.index.entry_size
2457 2473 )
2458 2474 else:
2459 2475 # Entries in the .i are a consistent size.
2460 2476 self._concurrencychecker(
2461 2477 ifh, self._indexfile, curr * self.index.entry_size
2462 2478 )
2463 2479 self._concurrencychecker(dfh, self._datafile, offset)
2464 2480
2465 2481 p1r, p2r = self.rev(p1), self.rev(p2)
2466 2482
2467 2483 # full versions are inserted when the needed deltas
2468 2484 # become comparable to the uncompressed text
2469 2485 if rawtext is None:
2470 2486 # need rawtext size, before changed by flag processors, which is
2471 2487 # the non-raw size. use revlog explicitly to avoid filelog's extra
2472 2488 # logic that might remove metadata size.
2473 2489 textlen = mdiff.patchedsize(
2474 2490 revlog.size(self, cachedelta[0]), cachedelta[1]
2475 2491 )
2476 2492 else:
2477 2493 textlen = len(rawtext)
2478 2494
2479 2495 if deltacomputer is None:
2480 2496 deltacomputer = deltautil.deltacomputer(self)
2481 2497
2482 2498 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2483 2499
2484 2500 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2485 2501
2486 2502 compression_mode = COMP_MODE_INLINE
2487 2503 if self._docket is not None:
2488 2504 h, d = deltainfo.data
2489 2505 if not h and not d:
2490 2506 # not data to store at all... declare them uncompressed
2491 2507 compression_mode = COMP_MODE_PLAIN
2492 elif not h and d[0:1] == b'\0':
2508 elif not h:
2509 t = d[0:1]
2510 if t == b'\0':
2493 2511 compression_mode = COMP_MODE_PLAIN
2512 elif t == self._docket.default_compression_header:
2513 compression_mode = COMP_MODE_DEFAULT
2494 2514 elif h == b'u':
2495 2515 # we have a more efficient way to declare uncompressed
2496 2516 h = b''
2497 2517 compression_mode = COMP_MODE_PLAIN
2498 2518 deltainfo = deltautil.drop_u_compression(deltainfo)
2499 2519
2500 2520 if sidedata and self.hassidedata:
2501 2521 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2502 2522 sidedata_offset = offset + deltainfo.deltalen
2503 2523 else:
2504 2524 serialized_sidedata = b""
2505 2525 # Don't store the offset if the sidedata is empty, that way
2506 2526 # we can easily detect empty sidedata and they will be no different
2507 2527 # than ones we manually add.
2508 2528 sidedata_offset = 0
2509 2529
2510 2530 e = (
2511 2531 offset_type(offset, flags),
2512 2532 deltainfo.deltalen,
2513 2533 textlen,
2514 2534 deltainfo.base,
2515 2535 link,
2516 2536 p1r,
2517 2537 p2r,
2518 2538 node,
2519 2539 sidedata_offset,
2520 2540 len(serialized_sidedata),
2521 2541 compression_mode,
2522 2542 )
2523 2543
2524 2544 self.index.append(e)
2525 2545 entry = self.index.entry_binary(curr)
2526 2546 if curr == 0 and self._docket is None:
2527 2547 header = self._format_flags | self._format_version
2528 2548 header = self.index.pack_header(header)
2529 2549 entry = header + entry
2530 2550 self._writeentry(
2531 2551 transaction,
2532 2552 entry,
2533 2553 deltainfo.data,
2534 2554 link,
2535 2555 offset,
2536 2556 serialized_sidedata,
2537 2557 )
2538 2558
2539 2559 rawtext = btext[0]
2540 2560
2541 2561 if alwayscache and rawtext is None:
2542 2562 rawtext = deltacomputer.buildtext(revinfo, fh)
2543 2563
2544 2564 if type(rawtext) == bytes: # only accept immutable objects
2545 2565 self._revisioncache = (node, curr, rawtext)
2546 2566 self._chainbasecache[curr] = deltainfo.chainbase
2547 2567 return curr
2548 2568
2549 2569 def _get_data_offset(self, prev):
2550 2570 """Returns the current offset in the (in-transaction) data file.
2551 2571 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2552 2572 file to store that information: since sidedata can be rewritten to the
2553 2573 end of the data file within a transaction, you can have cases where, for
2554 2574 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2555 2575 to `n - 1`'s sidedata being written after `n`'s data.
2556 2576
2557 2577 TODO cache this in a docket file before getting out of experimental."""
2558 2578 if self._docket is None:
2559 2579 return self.end(prev)
2560 2580 else:
2561 2581 return self._docket.data_end
2562 2582
2563 2583 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2564 2584 # Files opened in a+ mode have inconsistent behavior on various
2565 2585 # platforms. Windows requires that a file positioning call be made
2566 2586 # when the file handle transitions between reads and writes. See
2567 2587 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2568 2588 # platforms, Python or the platform itself can be buggy. Some versions
2569 2589 # of Solaris have been observed to not append at the end of the file
2570 2590 # if the file was seeked to before the end. See issue4943 for more.
2571 2591 #
2572 2592 # We work around this issue by inserting a seek() before writing.
2573 2593 # Note: This is likely not necessary on Python 3. However, because
2574 2594 # the file handle is reused for reads and may be seeked there, we need
2575 2595 # to be careful before changing this.
2576 2596 if self._writinghandles is None:
2577 2597 msg = b'adding revision outside `revlog._writing` context'
2578 2598 raise error.ProgrammingError(msg)
2579 2599 ifh, dfh = self._writinghandles
2580 2600 if self._docket is None:
2581 2601 ifh.seek(0, os.SEEK_END)
2582 2602 else:
2583 2603 ifh.seek(self._docket.index_end, os.SEEK_SET)
2584 2604 if dfh:
2585 2605 if self._docket is None:
2586 2606 dfh.seek(0, os.SEEK_END)
2587 2607 else:
2588 2608 dfh.seek(self._docket.data_end, os.SEEK_SET)
2589 2609
2590 2610 curr = len(self) - 1
2591 2611 if not self._inline:
2592 2612 transaction.add(self._datafile, offset)
2593 2613 transaction.add(self._indexfile, curr * len(entry))
2594 2614 if data[0]:
2595 2615 dfh.write(data[0])
2596 2616 dfh.write(data[1])
2597 2617 if sidedata:
2598 2618 dfh.write(sidedata)
2599 2619 ifh.write(entry)
2600 2620 else:
2601 2621 offset += curr * self.index.entry_size
2602 2622 transaction.add(self._indexfile, offset)
2603 2623 ifh.write(entry)
2604 2624 ifh.write(data[0])
2605 2625 ifh.write(data[1])
2606 2626 if sidedata:
2607 2627 ifh.write(sidedata)
2608 2628 self._enforceinlinesize(transaction)
2609 2629 if self._docket is not None:
2610 2630 self._docket.index_end = self._writinghandles[0].tell()
2611 2631 self._docket.data_end = self._writinghandles[1].tell()
2612 2632
2613 2633 nodemaputil.setup_persistent_nodemap(transaction, self)
2614 2634
2615 2635 def addgroup(
2616 2636 self,
2617 2637 deltas,
2618 2638 linkmapper,
2619 2639 transaction,
2620 2640 alwayscache=False,
2621 2641 addrevisioncb=None,
2622 2642 duplicaterevisioncb=None,
2623 2643 ):
2624 2644 """
2625 2645 add a delta group
2626 2646
2627 2647 given a set of deltas, add them to the revision log. the
2628 2648 first delta is against its parent, which should be in our
2629 2649 log, the rest are against the previous delta.
2630 2650
2631 2651 If ``addrevisioncb`` is defined, it will be called with arguments of
2632 2652 this revlog and the node that was added.
2633 2653 """
2634 2654
2635 2655 if self._adding_group:
2636 2656 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2637 2657
2638 2658 self._adding_group = True
2639 2659 empty = True
2640 2660 try:
2641 2661 with self._writing(transaction):
2642 2662 deltacomputer = deltautil.deltacomputer(self)
2643 2663 # loop through our set of deltas
2644 2664 for data in deltas:
2645 2665 (
2646 2666 node,
2647 2667 p1,
2648 2668 p2,
2649 2669 linknode,
2650 2670 deltabase,
2651 2671 delta,
2652 2672 flags,
2653 2673 sidedata,
2654 2674 ) = data
2655 2675 link = linkmapper(linknode)
2656 2676 flags = flags or REVIDX_DEFAULT_FLAGS
2657 2677
2658 2678 rev = self.index.get_rev(node)
2659 2679 if rev is not None:
2660 2680 # this can happen if two branches make the same change
2661 2681 self._nodeduplicatecallback(transaction, rev)
2662 2682 if duplicaterevisioncb:
2663 2683 duplicaterevisioncb(self, rev)
2664 2684 empty = False
2665 2685 continue
2666 2686
2667 2687 for p in (p1, p2):
2668 2688 if not self.index.has_node(p):
2669 2689 raise error.LookupError(
2670 2690 p, self.radix, _(b'unknown parent')
2671 2691 )
2672 2692
2673 2693 if not self.index.has_node(deltabase):
2674 2694 raise error.LookupError(
2675 2695 deltabase, self.display_id, _(b'unknown delta base')
2676 2696 )
2677 2697
2678 2698 baserev = self.rev(deltabase)
2679 2699
2680 2700 if baserev != nullrev and self.iscensored(baserev):
2681 2701 # if base is censored, delta must be full replacement in a
2682 2702 # single patch operation
2683 2703 hlen = struct.calcsize(b">lll")
2684 2704 oldlen = self.rawsize(baserev)
2685 2705 newlen = len(delta) - hlen
2686 2706 if delta[:hlen] != mdiff.replacediffheader(
2687 2707 oldlen, newlen
2688 2708 ):
2689 2709 raise error.CensoredBaseError(
2690 2710 self.display_id, self.node(baserev)
2691 2711 )
2692 2712
2693 2713 if not flags and self._peek_iscensored(baserev, delta):
2694 2714 flags |= REVIDX_ISCENSORED
2695 2715
2696 2716 # We assume consumers of addrevisioncb will want to retrieve
2697 2717 # the added revision, which will require a call to
2698 2718 # revision(). revision() will fast path if there is a cache
2699 2719 # hit. So, we tell _addrevision() to always cache in this case.
2700 2720 # We're only using addgroup() in the context of changegroup
2701 2721 # generation so the revision data can always be handled as raw
2702 2722 # by the flagprocessor.
2703 2723 rev = self._addrevision(
2704 2724 node,
2705 2725 None,
2706 2726 transaction,
2707 2727 link,
2708 2728 p1,
2709 2729 p2,
2710 2730 flags,
2711 2731 (baserev, delta),
2712 2732 alwayscache=alwayscache,
2713 2733 deltacomputer=deltacomputer,
2714 2734 sidedata=sidedata,
2715 2735 )
2716 2736
2717 2737 if addrevisioncb:
2718 2738 addrevisioncb(self, rev)
2719 2739 empty = False
2720 2740 finally:
2721 2741 self._adding_group = False
2722 2742 return not empty
2723 2743
2724 2744 def iscensored(self, rev):
2725 2745 """Check if a file revision is censored."""
2726 2746 if not self._censorable:
2727 2747 return False
2728 2748
2729 2749 return self.flags(rev) & REVIDX_ISCENSORED
2730 2750
2731 2751 def _peek_iscensored(self, baserev, delta):
2732 2752 """Quickly check if a delta produces a censored revision."""
2733 2753 if not self._censorable:
2734 2754 return False
2735 2755
2736 2756 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2737 2757
2738 2758 def getstrippoint(self, minlink):
2739 2759 """find the minimum rev that must be stripped to strip the linkrev
2740 2760
2741 2761 Returns a tuple containing the minimum rev and a set of all revs that
2742 2762 have linkrevs that will be broken by this strip.
2743 2763 """
2744 2764 return storageutil.resolvestripinfo(
2745 2765 minlink,
2746 2766 len(self) - 1,
2747 2767 self.headrevs(),
2748 2768 self.linkrev,
2749 2769 self.parentrevs,
2750 2770 )
2751 2771
2752 2772 def strip(self, minlink, transaction):
2753 2773 """truncate the revlog on the first revision with a linkrev >= minlink
2754 2774
2755 2775 This function is called when we're stripping revision minlink and
2756 2776 its descendants from the repository.
2757 2777
2758 2778 We have to remove all revisions with linkrev >= minlink, because
2759 2779 the equivalent changelog revisions will be renumbered after the
2760 2780 strip.
2761 2781
2762 2782 So we truncate the revlog on the first of these revisions, and
2763 2783 trust that the caller has saved the revisions that shouldn't be
2764 2784 removed and that it'll re-add them after this truncation.
2765 2785 """
2766 2786 if len(self) == 0:
2767 2787 return
2768 2788
2769 2789 rev, _ = self.getstrippoint(minlink)
2770 2790 if rev == len(self):
2771 2791 return
2772 2792
2773 2793 # first truncate the files on disk
2774 2794 data_end = self.start(rev)
2775 2795 if not self._inline:
2776 2796 transaction.add(self._datafile, data_end)
2777 2797 end = rev * self.index.entry_size
2778 2798 else:
2779 2799 end = data_end + (rev * self.index.entry_size)
2780 2800
2781 2801 transaction.add(self._indexfile, end)
2782 2802 if self._docket is not None:
2783 2803 # XXX we could, leverage the docket while stripping. However it is
2784 2804 # not powerfull enough at the time of this comment
2785 2805 self._docket.index_end = end
2786 2806 self._docket.data_end = data_end
2787 2807 self._docket.write(transaction, stripping=True)
2788 2808
2789 2809 # then reset internal state in memory to forget those revisions
2790 2810 self._revisioncache = None
2791 2811 self._chaininfocache = util.lrucachedict(500)
2792 2812 self._chunkclear()
2793 2813
2794 2814 del self.index[rev:-1]
2795 2815
2796 2816 def checksize(self):
2797 2817 """Check size of index and data files
2798 2818
2799 2819 return a (dd, di) tuple.
2800 2820 - dd: extra bytes for the "data" file
2801 2821 - di: extra bytes for the "index" file
2802 2822
2803 2823 A healthy revlog will return (0, 0).
2804 2824 """
2805 2825 expected = 0
2806 2826 if len(self):
2807 2827 expected = max(0, self.end(len(self) - 1))
2808 2828
2809 2829 try:
2810 2830 with self._datafp() as f:
2811 2831 f.seek(0, io.SEEK_END)
2812 2832 actual = f.tell()
2813 2833 dd = actual - expected
2814 2834 except IOError as inst:
2815 2835 if inst.errno != errno.ENOENT:
2816 2836 raise
2817 2837 dd = 0
2818 2838
2819 2839 try:
2820 2840 f = self.opener(self._indexfile)
2821 2841 f.seek(0, io.SEEK_END)
2822 2842 actual = f.tell()
2823 2843 f.close()
2824 2844 s = self.index.entry_size
2825 2845 i = max(0, actual // s)
2826 2846 di = actual - (i * s)
2827 2847 if self._inline:
2828 2848 databytes = 0
2829 2849 for r in self:
2830 2850 databytes += max(0, self.length(r))
2831 2851 dd = 0
2832 2852 di = actual - len(self) * s - databytes
2833 2853 except IOError as inst:
2834 2854 if inst.errno != errno.ENOENT:
2835 2855 raise
2836 2856 di = 0
2837 2857
2838 2858 return (dd, di)
2839 2859
2840 2860 def files(self):
2841 2861 res = [self._indexfile]
2842 2862 if not self._inline:
2843 2863 res.append(self._datafile)
2844 2864 return res
2845 2865
2846 2866 def emitrevisions(
2847 2867 self,
2848 2868 nodes,
2849 2869 nodesorder=None,
2850 2870 revisiondata=False,
2851 2871 assumehaveparentrevisions=False,
2852 2872 deltamode=repository.CG_DELTAMODE_STD,
2853 2873 sidedata_helpers=None,
2854 2874 ):
2855 2875 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2856 2876 raise error.ProgrammingError(
2857 2877 b'unhandled value for nodesorder: %s' % nodesorder
2858 2878 )
2859 2879
2860 2880 if nodesorder is None and not self._generaldelta:
2861 2881 nodesorder = b'storage'
2862 2882
2863 2883 if (
2864 2884 not self._storedeltachains
2865 2885 and deltamode != repository.CG_DELTAMODE_PREV
2866 2886 ):
2867 2887 deltamode = repository.CG_DELTAMODE_FULL
2868 2888
2869 2889 return storageutil.emitrevisions(
2870 2890 self,
2871 2891 nodes,
2872 2892 nodesorder,
2873 2893 revlogrevisiondelta,
2874 2894 deltaparentfn=self.deltaparent,
2875 2895 candeltafn=self.candelta,
2876 2896 rawsizefn=self.rawsize,
2877 2897 revdifffn=self.revdiff,
2878 2898 flagsfn=self.flags,
2879 2899 deltamode=deltamode,
2880 2900 revisiondata=revisiondata,
2881 2901 assumehaveparentrevisions=assumehaveparentrevisions,
2882 2902 sidedata_helpers=sidedata_helpers,
2883 2903 )
2884 2904
2885 2905 DELTAREUSEALWAYS = b'always'
2886 2906 DELTAREUSESAMEREVS = b'samerevs'
2887 2907 DELTAREUSENEVER = b'never'
2888 2908
2889 2909 DELTAREUSEFULLADD = b'fulladd'
2890 2910
2891 2911 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2892 2912
2893 2913 def clone(
2894 2914 self,
2895 2915 tr,
2896 2916 destrevlog,
2897 2917 addrevisioncb=None,
2898 2918 deltareuse=DELTAREUSESAMEREVS,
2899 2919 forcedeltabothparents=None,
2900 2920 sidedata_helpers=None,
2901 2921 ):
2902 2922 """Copy this revlog to another, possibly with format changes.
2903 2923
2904 2924 The destination revlog will contain the same revisions and nodes.
2905 2925 However, it may not be bit-for-bit identical due to e.g. delta encoding
2906 2926 differences.
2907 2927
2908 2928 The ``deltareuse`` argument control how deltas from the existing revlog
2909 2929 are preserved in the destination revlog. The argument can have the
2910 2930 following values:
2911 2931
2912 2932 DELTAREUSEALWAYS
2913 2933 Deltas will always be reused (if possible), even if the destination
2914 2934 revlog would not select the same revisions for the delta. This is the
2915 2935 fastest mode of operation.
2916 2936 DELTAREUSESAMEREVS
2917 2937 Deltas will be reused if the destination revlog would pick the same
2918 2938 revisions for the delta. This mode strikes a balance between speed
2919 2939 and optimization.
2920 2940 DELTAREUSENEVER
2921 2941 Deltas will never be reused. This is the slowest mode of execution.
2922 2942 This mode can be used to recompute deltas (e.g. if the diff/delta
2923 2943 algorithm changes).
2924 2944 DELTAREUSEFULLADD
2925 2945 Revision will be re-added as if their were new content. This is
2926 2946 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2927 2947 eg: large file detection and handling.
2928 2948
2929 2949 Delta computation can be slow, so the choice of delta reuse policy can
2930 2950 significantly affect run time.
2931 2951
2932 2952 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2933 2953 two extremes. Deltas will be reused if they are appropriate. But if the
2934 2954 delta could choose a better revision, it will do so. This means if you
2935 2955 are converting a non-generaldelta revlog to a generaldelta revlog,
2936 2956 deltas will be recomputed if the delta's parent isn't a parent of the
2937 2957 revision.
2938 2958
2939 2959 In addition to the delta policy, the ``forcedeltabothparents``
2940 2960 argument controls whether to force compute deltas against both parents
2941 2961 for merges. By default, the current default is used.
2942 2962
2943 2963 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2944 2964 `sidedata_helpers`.
2945 2965 """
2946 2966 if deltareuse not in self.DELTAREUSEALL:
2947 2967 raise ValueError(
2948 2968 _(b'value for deltareuse invalid: %s') % deltareuse
2949 2969 )
2950 2970
2951 2971 if len(destrevlog):
2952 2972 raise ValueError(_(b'destination revlog is not empty'))
2953 2973
2954 2974 if getattr(self, 'filteredrevs', None):
2955 2975 raise ValueError(_(b'source revlog has filtered revisions'))
2956 2976 if getattr(destrevlog, 'filteredrevs', None):
2957 2977 raise ValueError(_(b'destination revlog has filtered revisions'))
2958 2978
2959 2979 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2960 2980 # if possible.
2961 2981 oldlazydelta = destrevlog._lazydelta
2962 2982 oldlazydeltabase = destrevlog._lazydeltabase
2963 2983 oldamd = destrevlog._deltabothparents
2964 2984
2965 2985 try:
2966 2986 if deltareuse == self.DELTAREUSEALWAYS:
2967 2987 destrevlog._lazydeltabase = True
2968 2988 destrevlog._lazydelta = True
2969 2989 elif deltareuse == self.DELTAREUSESAMEREVS:
2970 2990 destrevlog._lazydeltabase = False
2971 2991 destrevlog._lazydelta = True
2972 2992 elif deltareuse == self.DELTAREUSENEVER:
2973 2993 destrevlog._lazydeltabase = False
2974 2994 destrevlog._lazydelta = False
2975 2995
2976 2996 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2977 2997
2978 2998 self._clone(
2979 2999 tr,
2980 3000 destrevlog,
2981 3001 addrevisioncb,
2982 3002 deltareuse,
2983 3003 forcedeltabothparents,
2984 3004 sidedata_helpers,
2985 3005 )
2986 3006
2987 3007 finally:
2988 3008 destrevlog._lazydelta = oldlazydelta
2989 3009 destrevlog._lazydeltabase = oldlazydeltabase
2990 3010 destrevlog._deltabothparents = oldamd
2991 3011
2992 3012 def _clone(
2993 3013 self,
2994 3014 tr,
2995 3015 destrevlog,
2996 3016 addrevisioncb,
2997 3017 deltareuse,
2998 3018 forcedeltabothparents,
2999 3019 sidedata_helpers,
3000 3020 ):
3001 3021 """perform the core duty of `revlog.clone` after parameter processing"""
3002 3022 deltacomputer = deltautil.deltacomputer(destrevlog)
3003 3023 index = self.index
3004 3024 for rev in self:
3005 3025 entry = index[rev]
3006 3026
3007 3027 # Some classes override linkrev to take filtered revs into
3008 3028 # account. Use raw entry from index.
3009 3029 flags = entry[0] & 0xFFFF
3010 3030 linkrev = entry[4]
3011 3031 p1 = index[entry[5]][7]
3012 3032 p2 = index[entry[6]][7]
3013 3033 node = entry[7]
3014 3034
3015 3035 # (Possibly) reuse the delta from the revlog if allowed and
3016 3036 # the revlog chunk is a delta.
3017 3037 cachedelta = None
3018 3038 rawtext = None
3019 3039 if deltareuse == self.DELTAREUSEFULLADD:
3020 3040 text, sidedata = self._revisiondata(rev)
3021 3041
3022 3042 if sidedata_helpers is not None:
3023 3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3024 3044 self, sidedata_helpers, sidedata, rev
3025 3045 )
3026 3046 flags = flags | new_flags[0] & ~new_flags[1]
3027 3047
3028 3048 destrevlog.addrevision(
3029 3049 text,
3030 3050 tr,
3031 3051 linkrev,
3032 3052 p1,
3033 3053 p2,
3034 3054 cachedelta=cachedelta,
3035 3055 node=node,
3036 3056 flags=flags,
3037 3057 deltacomputer=deltacomputer,
3038 3058 sidedata=sidedata,
3039 3059 )
3040 3060 else:
3041 3061 if destrevlog._lazydelta:
3042 3062 dp = self.deltaparent(rev)
3043 3063 if dp != nullrev:
3044 3064 cachedelta = (dp, bytes(self._chunk(rev)))
3045 3065
3046 3066 sidedata = None
3047 3067 if not cachedelta:
3048 3068 rawtext, sidedata = self._revisiondata(rev)
3049 3069 if sidedata is None:
3050 3070 sidedata = self.sidedata(rev)
3051 3071
3052 3072 if sidedata_helpers is not None:
3053 3073 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3054 3074 self, sidedata_helpers, sidedata, rev
3055 3075 )
3056 3076 flags = flags | new_flags[0] & ~new_flags[1]
3057 3077
3058 3078 with destrevlog._writing(tr):
3059 3079 destrevlog._addrevision(
3060 3080 node,
3061 3081 rawtext,
3062 3082 tr,
3063 3083 linkrev,
3064 3084 p1,
3065 3085 p2,
3066 3086 flags,
3067 3087 cachedelta,
3068 3088 deltacomputer=deltacomputer,
3069 3089 sidedata=sidedata,
3070 3090 )
3071 3091
3072 3092 if addrevisioncb:
3073 3093 addrevisioncb(self, rev, node)
3074 3094
3075 3095 def censorrevision(self, tr, censornode, tombstone=b''):
3076 3096 if self._format_version == REVLOGV0:
3077 3097 raise error.RevlogError(
3078 3098 _(b'cannot censor with version %d revlogs')
3079 3099 % self._format_version
3080 3100 )
3081 3101
3082 3102 censorrev = self.rev(censornode)
3083 3103 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3084 3104
3085 3105 if len(tombstone) > self.rawsize(censorrev):
3086 3106 raise error.Abort(
3087 3107 _(b'censor tombstone must be no longer than censored data')
3088 3108 )
3089 3109
3090 3110 # Rewriting the revlog in place is hard. Our strategy for censoring is
3091 3111 # to create a new revlog, copy all revisions to it, then replace the
3092 3112 # revlogs on transaction close.
3093 3113 #
3094 3114 # This is a bit dangerous. We could easily have a mismatch of state.
3095 3115 newrl = revlog(
3096 3116 self.opener,
3097 3117 target=self.target,
3098 3118 radix=self.radix,
3099 3119 postfix=b'tmpcensored',
3100 3120 censorable=True,
3101 3121 )
3102 3122 newrl._format_version = self._format_version
3103 3123 newrl._format_flags = self._format_flags
3104 3124 newrl._generaldelta = self._generaldelta
3105 3125 newrl._parse_index = self._parse_index
3106 3126
3107 3127 for rev in self.revs():
3108 3128 node = self.node(rev)
3109 3129 p1, p2 = self.parents(node)
3110 3130
3111 3131 if rev == censorrev:
3112 3132 newrl.addrawrevision(
3113 3133 tombstone,
3114 3134 tr,
3115 3135 self.linkrev(censorrev),
3116 3136 p1,
3117 3137 p2,
3118 3138 censornode,
3119 3139 REVIDX_ISCENSORED,
3120 3140 )
3121 3141
3122 3142 if newrl.deltaparent(rev) != nullrev:
3123 3143 raise error.Abort(
3124 3144 _(
3125 3145 b'censored revision stored as delta; '
3126 3146 b'cannot censor'
3127 3147 ),
3128 3148 hint=_(
3129 3149 b'censoring of revlogs is not '
3130 3150 b'fully implemented; please report '
3131 3151 b'this bug'
3132 3152 ),
3133 3153 )
3134 3154 continue
3135 3155
3136 3156 if self.iscensored(rev):
3137 3157 if self.deltaparent(rev) != nullrev:
3138 3158 raise error.Abort(
3139 3159 _(
3140 3160 b'cannot censor due to censored '
3141 3161 b'revision having delta stored'
3142 3162 )
3143 3163 )
3144 3164 rawtext = self._chunk(rev)
3145 3165 else:
3146 3166 rawtext = self.rawdata(rev)
3147 3167
3148 3168 newrl.addrawrevision(
3149 3169 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3150 3170 )
3151 3171
3152 3172 tr.addbackup(self._indexfile, location=b'store')
3153 3173 if not self._inline:
3154 3174 tr.addbackup(self._datafile, location=b'store')
3155 3175
3156 3176 self.opener.rename(newrl._indexfile, self._indexfile)
3157 3177 if not self._inline:
3158 3178 self.opener.rename(newrl._datafile, self._datafile)
3159 3179
3160 3180 self.clearcaches()
3161 3181 self._loadindex()
3162 3182
3163 3183 def verifyintegrity(self, state):
3164 3184 """Verifies the integrity of the revlog.
3165 3185
3166 3186 Yields ``revlogproblem`` instances describing problems that are
3167 3187 found.
3168 3188 """
3169 3189 dd, di = self.checksize()
3170 3190 if dd:
3171 3191 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3172 3192 if di:
3173 3193 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3174 3194
3175 3195 version = self._format_version
3176 3196
3177 3197 # The verifier tells us what version revlog we should be.
3178 3198 if version != state[b'expectedversion']:
3179 3199 yield revlogproblem(
3180 3200 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3181 3201 % (self.display_id, version, state[b'expectedversion'])
3182 3202 )
3183 3203
3184 3204 state[b'skipread'] = set()
3185 3205 state[b'safe_renamed'] = set()
3186 3206
3187 3207 for rev in self:
3188 3208 node = self.node(rev)
3189 3209
3190 3210 # Verify contents. 4 cases to care about:
3191 3211 #
3192 3212 # common: the most common case
3193 3213 # rename: with a rename
3194 3214 # meta: file content starts with b'\1\n', the metadata
3195 3215 # header defined in filelog.py, but without a rename
3196 3216 # ext: content stored externally
3197 3217 #
3198 3218 # More formally, their differences are shown below:
3199 3219 #
3200 3220 # | common | rename | meta | ext
3201 3221 # -------------------------------------------------------
3202 3222 # flags() | 0 | 0 | 0 | not 0
3203 3223 # renamed() | False | True | False | ?
3204 3224 # rawtext[0:2]=='\1\n'| False | True | True | ?
3205 3225 #
3206 3226 # "rawtext" means the raw text stored in revlog data, which
3207 3227 # could be retrieved by "rawdata(rev)". "text"
3208 3228 # mentioned below is "revision(rev)".
3209 3229 #
3210 3230 # There are 3 different lengths stored physically:
3211 3231 # 1. L1: rawsize, stored in revlog index
3212 3232 # 2. L2: len(rawtext), stored in revlog data
3213 3233 # 3. L3: len(text), stored in revlog data if flags==0, or
3214 3234 # possibly somewhere else if flags!=0
3215 3235 #
3216 3236 # L1 should be equal to L2. L3 could be different from them.
3217 3237 # "text" may or may not affect commit hash depending on flag
3218 3238 # processors (see flagutil.addflagprocessor).
3219 3239 #
3220 3240 # | common | rename | meta | ext
3221 3241 # -------------------------------------------------
3222 3242 # rawsize() | L1 | L1 | L1 | L1
3223 3243 # size() | L1 | L2-LM | L1(*) | L1 (?)
3224 3244 # len(rawtext) | L2 | L2 | L2 | L2
3225 3245 # len(text) | L2 | L2 | L2 | L3
3226 3246 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3227 3247 #
3228 3248 # LM: length of metadata, depending on rawtext
3229 3249 # (*): not ideal, see comment in filelog.size
3230 3250 # (?): could be "- len(meta)" if the resolved content has
3231 3251 # rename metadata
3232 3252 #
3233 3253 # Checks needed to be done:
3234 3254 # 1. length check: L1 == L2, in all cases.
3235 3255 # 2. hash check: depending on flag processor, we may need to
3236 3256 # use either "text" (external), or "rawtext" (in revlog).
3237 3257
3238 3258 try:
3239 3259 skipflags = state.get(b'skipflags', 0)
3240 3260 if skipflags:
3241 3261 skipflags &= self.flags(rev)
3242 3262
3243 3263 _verify_revision(self, skipflags, state, node)
3244 3264
3245 3265 l1 = self.rawsize(rev)
3246 3266 l2 = len(self.rawdata(node))
3247 3267
3248 3268 if l1 != l2:
3249 3269 yield revlogproblem(
3250 3270 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3251 3271 node=node,
3252 3272 )
3253 3273
3254 3274 except error.CensoredNodeError:
3255 3275 if state[b'erroroncensored']:
3256 3276 yield revlogproblem(
3257 3277 error=_(b'censored file data'), node=node
3258 3278 )
3259 3279 state[b'skipread'].add(node)
3260 3280 except Exception as e:
3261 3281 yield revlogproblem(
3262 3282 error=_(b'unpacking %s: %s')
3263 3283 % (short(node), stringutil.forcebytestr(e)),
3264 3284 node=node,
3265 3285 )
3266 3286 state[b'skipread'].add(node)
3267 3287
3268 3288 def storageinfo(
3269 3289 self,
3270 3290 exclusivefiles=False,
3271 3291 sharedfiles=False,
3272 3292 revisionscount=False,
3273 3293 trackedsize=False,
3274 3294 storedsize=False,
3275 3295 ):
3276 3296 d = {}
3277 3297
3278 3298 if exclusivefiles:
3279 3299 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3280 3300 if not self._inline:
3281 3301 d[b'exclusivefiles'].append((self.opener, self._datafile))
3282 3302
3283 3303 if sharedfiles:
3284 3304 d[b'sharedfiles'] = []
3285 3305
3286 3306 if revisionscount:
3287 3307 d[b'revisionscount'] = len(self)
3288 3308
3289 3309 if trackedsize:
3290 3310 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3291 3311
3292 3312 if storedsize:
3293 3313 d[b'storedsize'] = sum(
3294 3314 self.opener.stat(path).st_size for path in self.files()
3295 3315 )
3296 3316
3297 3317 return d
3298 3318
3299 3319 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3300 3320 if not self.hassidedata:
3301 3321 return
3302 3322 # revlog formats with sidedata support does not support inline
3303 3323 assert not self._inline
3304 3324 if not helpers[1] and not helpers[2]:
3305 3325 # Nothing to generate or remove
3306 3326 return
3307 3327
3308 3328 new_entries = []
3309 3329 # append the new sidedata
3310 3330 with self._writing(transaction):
3311 3331 ifh, dfh = self._writinghandles
3312 3332 if self._docket is not None:
3313 3333 dfh.seek(self._docket.data_end, os.SEEK_SET)
3314 3334 else:
3315 3335 dfh.seek(0, os.SEEK_END)
3316 3336
3317 3337 current_offset = dfh.tell()
3318 3338 for rev in range(startrev, endrev + 1):
3319 3339 entry = self.index[rev]
3320 3340 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3321 3341 store=self,
3322 3342 sidedata_helpers=helpers,
3323 3343 sidedata={},
3324 3344 rev=rev,
3325 3345 )
3326 3346
3327 3347 serialized_sidedata = sidedatautil.serialize_sidedata(
3328 3348 new_sidedata
3329 3349 )
3330 3350 if entry[8] != 0 or entry[9] != 0:
3331 3351 # rewriting entries that already have sidedata is not
3332 3352 # supported yet, because it introduces garbage data in the
3333 3353 # revlog.
3334 3354 msg = b"rewriting existing sidedata is not supported yet"
3335 3355 raise error.Abort(msg)
3336 3356
3337 3357 # Apply (potential) flags to add and to remove after running
3338 3358 # the sidedata helpers
3339 3359 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3340 3360 entry_update = (
3341 3361 current_offset,
3342 3362 len(serialized_sidedata),
3343 3363 new_offset_flags,
3344 3364 )
3345 3365
3346 3366 # the sidedata computation might have move the file cursors around
3347 3367 dfh.seek(current_offset, os.SEEK_SET)
3348 3368 dfh.write(serialized_sidedata)
3349 3369 new_entries.append(entry_update)
3350 3370 current_offset += len(serialized_sidedata)
3351 3371 if self._docket is not None:
3352 3372 self._docket.data_end = dfh.tell()
3353 3373
3354 3374 # rewrite the new index entries
3355 3375 ifh.seek(startrev * self.index.entry_size)
3356 3376 for i, e in enumerate(new_entries):
3357 3377 rev = startrev + i
3358 3378 self.index.replace_sidedata_info(rev, *e)
3359 3379 packed = self.index.entry_binary(rev)
3360 3380 if rev == 0 and self._docket is None:
3361 3381 header = self._format_flags | self._format_version
3362 3382 header = self.index.pack_header(header)
3363 3383 packed = header + packed
3364 3384 ifh.write(packed)
@@ -1,169 +1,179 b''
1 1 # revlogdeltas.py - constant used for revlog logic.
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 16 ### Internal utily constants
17 17
18 18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 19 KIND_MANIFESTLOG = 1002
20 20 KIND_FILELOG = 1003
21 21 KIND_OTHER = 1004
22 22
23 23 ALL_KINDS = {
24 24 KIND_CHANGELOG,
25 25 KIND_MANIFESTLOG,
26 26 KIND_FILELOG,
27 27 KIND_OTHER,
28 28 }
29 29
30 30 ### main revlog header
31 31
32 32 INDEX_HEADER = struct.Struct(b">I")
33 33
34 34 ## revlog version
35 35 REVLOGV0 = 0
36 36 REVLOGV1 = 1
37 37 # Dummy value until file format is finalized.
38 38 REVLOGV2 = 0xDEAD
39 39
40 40 ## global revlog header flags
41 41 # Shared across v1 and v2.
42 42 FLAG_INLINE_DATA = 1 << 16
43 43 # Only used by v1, implied by v2.
44 44 FLAG_GENERALDELTA = 1 << 17
45 45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 48 REVLOGV0_FLAGS = 0
49 49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51 51
52 52 ### individual entry
53 53
54 54 ## index v0:
55 55 # 4 bytes: offset
56 56 # 4 bytes: compressed length
57 57 # 4 bytes: base rev
58 58 # 4 bytes: link rev
59 59 # 20 bytes: parent 1 nodeid
60 60 # 20 bytes: parent 2 nodeid
61 61 # 20 bytes: nodeid
62 62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63 63
64 64 ## index v1
65 65 # 6 bytes: offset
66 66 # 2 bytes: flags
67 67 # 4 bytes: compressed length
68 68 # 4 bytes: uncompressed length
69 69 # 4 bytes: base rev
70 70 # 4 bytes: link rev
71 71 # 4 bytes: parent 1 rev
72 72 # 4 bytes: parent 2 rev
73 73 # 32 bytes: nodeid
74 74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 75 assert INDEX_ENTRY_V1.size == 32 * 2
76 76
77 77 # 6 bytes: offset
78 78 # 2 bytes: flags
79 79 # 4 bytes: compressed length
80 80 # 4 bytes: uncompressed length
81 81 # 4 bytes: base rev
82 82 # 4 bytes: link rev
83 83 # 4 bytes: parent 1 rev
84 84 # 4 bytes: parent 2 rev
85 85 # 32 bytes: nodeid
86 86 # 8 bytes: sidedata offset
87 87 # 4 bytes: sidedata compressed length
88 88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
92 92
93 93 # revlog index flags
94 94
95 95 # For historical reasons, revlog's internal flags were exposed via the
96 96 # wire protocol and are even exposed in parts of the storage APIs.
97 97
98 98 # revision has censor metadata, must be verified
99 99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
100 100 # revision hash does not match data (narrowhg)
101 101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
102 102 # revision data is stored externally
103 103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
104 104 # revision changes files in a way that could affect copy tracing.
105 105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
106 106 REVIDX_DEFAULT_FLAGS = 0
107 107 # stable order in which flags need to be processed and their processors applied
108 108 REVIDX_FLAGS_ORDER = [
109 109 REVIDX_ISCENSORED,
110 110 REVIDX_ELLIPSIS,
111 111 REVIDX_EXTSTORED,
112 112 REVIDX_HASCOPIESINFO,
113 113 ]
114 114
115 115 # bitmark for flags that could cause rawdata content change
116 116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
117 117
118 118 ## chunk compression mode constants:
119 119 # These constants are used in revlog version >=2 to denote the compression used
120 120 # for a chunk.
121 121
122 122 # Chunk use no compression, the data stored on disk can be directly use as
123 123 # chunk value. Without any header information prefixed.
124 124 COMP_MODE_PLAIN = 0
125 125
126 # Chunk use the "default compression" for the revlog (usually defined in the
127 # revlog docket). A header is still used.
128 #
129 # XXX: keeping a header is probably not useful and we should probably drop it.
130 #
131 # XXX: The value of allow mixed type of compression in the revlog is unclear
132 # and we should consider making PLAIN/DEFAULT the only available mode for
133 # revlog v2, disallowing INLINE mode.
134 COMP_MODE_DEFAULT = 1
135
126 136 # Chunk use a compression mode stored "inline" at the start of the chunk
127 137 # itself. This is the mode always used for revlog version "0" and "1"
128 138 COMP_MODE_INLINE = 2
129 139
130 140 SUPPORTED_FLAGS = {
131 141 REVLOGV0: REVLOGV0_FLAGS,
132 142 REVLOGV1: REVLOGV1_FLAGS,
133 143 REVLOGV2: REVLOGV2_FLAGS,
134 144 }
135 145
136 146 _no = lambda flags: False
137 147 _yes = lambda flags: True
138 148
139 149
140 150 def _from_flag(flag):
141 151 return lambda flags: bool(flags & flag)
142 152
143 153
144 154 FEATURES_BY_VERSION = {
145 155 REVLOGV0: {
146 156 b'inline': _no,
147 157 b'generaldelta': _no,
148 158 b'sidedata': False,
149 159 b'docket': False,
150 160 },
151 161 REVLOGV1: {
152 162 b'inline': _from_flag(FLAG_INLINE_DATA),
153 163 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
154 164 b'sidedata': False,
155 165 b'docket': False,
156 166 },
157 167 REVLOGV2: {
158 168 # The point of inline-revlog is to reduce the number of files used in
159 169 # the store. Using a docket defeat this purpose. So we needs other
160 170 # means to reduce the number of files for revlogv2.
161 171 b'inline': _no,
162 172 b'generaldelta': _yes,
163 173 b'sidedata': True,
164 174 b'docket': True,
165 175 },
166 176 }
167 177
168 178
169 179 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,167 +1,179 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import struct
21 21
22 22 from .. import (
23 23 error,
24 util,
24 25 )
25 26
26 27 from . import (
27 28 constants,
28 29 )
29 30
30 31 # Docket format
31 32 #
32 33 # * 4 bytes: revlog version
33 34 # | This is mandatory as docket must be compatible with the previous
34 35 # | revlog index header.
35 36 # * 8 bytes: size of index-data
36 37 # * 8 bytes: pending size of index-data
37 38 # * 8 bytes: size of data
38 39 # * 8 bytes: pending size of data
39 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL')
40 # * 1 bytes: default compression header
41 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
40 42
41 43
42 44 class RevlogDocket(object):
43 45 """metadata associated with revlog"""
44 46
45 47 def __init__(
46 48 self,
47 49 revlog,
48 50 use_pending=False,
49 51 version_header=None,
50 52 index_end=0,
51 53 pending_index_end=0,
52 54 data_end=0,
53 55 pending_data_end=0,
56 default_compression_header=None,
54 57 ):
55 58 self._version_header = version_header
56 59 self._read_only = bool(use_pending)
57 60 self._dirty = False
58 61 self._radix = revlog.radix
59 62 self._path = revlog._docket_file
60 63 self._opener = revlog.opener
61 64 # thes asserts should be True as long as we have a single index filename
62 65 assert index_end <= pending_index_end
63 66 assert data_end <= pending_data_end
64 67 self._initial_index_end = index_end
65 68 self._pending_index_end = pending_index_end
66 69 self._initial_data_end = data_end
67 70 self._pending_data_end = pending_data_end
68 71 if use_pending:
69 72 self._index_end = self._pending_index_end
70 73 self._data_end = self._pending_data_end
71 74 else:
72 75 self._index_end = self._initial_index_end
73 76 self._data_end = self._initial_data_end
77 self.default_compression_header = default_compression_header
74 78
75 79 def index_filepath(self):
76 80 """file path to the current index file associated to this docket"""
77 81 # very simplistic version at first
78 82 return b"%s.idx" % self._radix
79 83
80 84 @property
81 85 def index_end(self):
82 86 return self._index_end
83 87
84 88 @index_end.setter
85 89 def index_end(self, new_size):
86 90 if new_size != self._index_end:
87 91 self._index_end = new_size
88 92 self._dirty = True
89 93
90 94 @property
91 95 def data_end(self):
92 96 return self._data_end
93 97
94 98 @data_end.setter
95 99 def data_end(self, new_size):
96 100 if new_size != self._data_end:
97 101 self._data_end = new_size
98 102 self._dirty = True
99 103
100 104 def write(self, transaction, pending=False, stripping=False):
101 105 """write the modification of disk if any
102 106
103 107 This make the new content visible to all process"""
104 108 if not self._dirty:
105 109 return False
106 110 else:
107 111 if self._read_only:
108 112 msg = b'writing read-only docket: %s'
109 113 msg %= self._path
110 114 raise error.ProgrammingError(msg)
111 115 if not stripping:
112 116 # XXX we could, leverage the docket while stripping. However it
113 117 # is not powerfull enough at the time of this comment
114 118 transaction.addbackup(self._path, location=b'store')
115 119 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
116 120 f.write(self._serialize(pending=pending))
117 121 # if pending we still need to the write final data eventually
118 122 self._dirty = pending
119 123 return True
120 124
121 125 def _serialize(self, pending=False):
122 126 if pending:
123 127 official_index_end = self._initial_index_end
124 128 official_data_end = self._initial_data_end
125 129 else:
126 130 official_index_end = self._index_end
127 131 official_data_end = self._data_end
128 132
129 133 # this assert should be True as long as we have a single index filename
130 134 assert official_data_end <= self._data_end
131 135 data = (
132 136 self._version_header,
133 137 official_index_end,
134 138 self._index_end,
135 139 official_data_end,
136 140 self._data_end,
141 self.default_compression_header,
137 142 )
138 143 return S_HEADER.pack(*data)
139 144
140 145
141 146 def default_docket(revlog, version_header):
142 147 """given a revlog version a new docket object for the given revlog"""
143 148 if (version_header & 0xFFFF) != constants.REVLOGV2:
144 149 return None
145 docket = RevlogDocket(revlog, version_header=version_header)
150 comp = util.compengines[revlog._compengine].revlogheader()
151 docket = RevlogDocket(
152 revlog,
153 version_header=version_header,
154 default_compression_header=comp,
155 )
146 156 docket._dirty = True
147 157 return docket
148 158
149 159
150 160 def parse_docket(revlog, data, use_pending=False):
151 161 """given some docket data return a docket object for the given revlog"""
152 162 header = S_HEADER.unpack(data[: S_HEADER.size])
153 163 version_header = header[0]
154 164 index_size = header[1]
155 165 pending_index_size = header[2]
156 166 data_size = header[3]
157 167 pending_data_size = header[4]
168 default_compression_header = header[5]
158 169 docket = RevlogDocket(
159 170 revlog,
160 171 use_pending=use_pending,
161 172 version_header=version_header,
162 173 index_end=index_size,
163 174 pending_index_end=pending_index_size,
164 175 data_end=data_size,
165 176 pending_data_end=pending_data_size,
177 default_compression_header=default_compression_header,
166 178 )
167 179 return docket
General Comments 0
You need to be logged in to leave comments. Login now