##// END OF EJS Templates
changelogv2: use a dedicated version number...
marmoute -
r48040:921648d3 default
parent child Browse files
Show More
@@ -1,3442 +1,3445 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 CHANGELOGV2,
38 39 COMP_MODE_DEFAULT,
39 40 COMP_MODE_INLINE,
40 41 COMP_MODE_PLAIN,
41 42 FEATURES_BY_VERSION,
42 43 FLAG_GENERALDELTA,
43 44 FLAG_INLINE_DATA,
44 45 INDEX_HEADER,
45 46 KIND_CHANGELOG,
46 47 REVLOGV0,
47 48 REVLOGV1,
48 49 REVLOGV1_FLAGS,
49 50 REVLOGV2,
50 51 REVLOGV2_FLAGS,
51 52 REVLOG_DEFAULT_FLAGS,
52 53 REVLOG_DEFAULT_FORMAT,
53 54 REVLOG_DEFAULT_VERSION,
54 55 SUPPORTED_FLAGS,
55 56 )
56 57 from .revlogutils.flagutil import (
57 58 REVIDX_DEFAULT_FLAGS,
58 59 REVIDX_ELLIPSIS,
59 60 REVIDX_EXTSTORED,
60 61 REVIDX_FLAGS_ORDER,
61 62 REVIDX_HASCOPIESINFO,
62 63 REVIDX_ISCENSORED,
63 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 65 )
65 66 from .thirdparty import attr
66 67 from . import (
67 68 ancestor,
68 69 dagop,
69 70 error,
70 71 mdiff,
71 72 policy,
72 73 pycompat,
73 74 templatefilters,
74 75 util,
75 76 )
76 77 from .interfaces import (
77 78 repository,
78 79 util as interfaceutil,
79 80 )
80 81 from .revlogutils import (
81 82 deltas as deltautil,
82 83 docket as docketutil,
83 84 flagutil,
84 85 nodemap as nodemaputil,
85 86 revlogv0,
86 87 sidedata as sidedatautil,
87 88 )
88 89 from .utils import (
89 90 storageutil,
90 91 stringutil,
91 92 )
92 93
93 94 # blanked usage of all the name to prevent pyflakes constraints
94 95 # We need these name available in the module for extensions.
95 96
96 97 REVLOGV0
97 98 REVLOGV1
98 99 REVLOGV2
99 100 FLAG_INLINE_DATA
100 101 FLAG_GENERALDELTA
101 102 REVLOG_DEFAULT_FLAGS
102 103 REVLOG_DEFAULT_FORMAT
103 104 REVLOG_DEFAULT_VERSION
104 105 REVLOGV1_FLAGS
105 106 REVLOGV2_FLAGS
106 107 REVIDX_ISCENSORED
107 108 REVIDX_ELLIPSIS
108 109 REVIDX_HASCOPIESINFO
109 110 REVIDX_EXTSTORED
110 111 REVIDX_DEFAULT_FLAGS
111 112 REVIDX_FLAGS_ORDER
112 113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 114
114 115 parsers = policy.importmod('parsers')
115 116 rustancestor = policy.importrust('ancestor')
116 117 rustdagop = policy.importrust('dagop')
117 118 rustrevlog = policy.importrust('revlog')
118 119
119 120 # Aliased for performance.
120 121 _zlibdecompress = zlib.decompress
121 122
122 123 # max size of revlog with inline data
123 124 _maxinline = 131072
124 125 _chunksize = 1048576
125 126
126 127 # Flag processors for REVIDX_ELLIPSIS.
127 128 def ellipsisreadprocessor(rl, text):
128 129 return text, False
129 130
130 131
131 132 def ellipsiswriteprocessor(rl, text):
132 133 return text, False
133 134
134 135
135 136 def ellipsisrawprocessor(rl, text):
136 137 return False
137 138
138 139
139 140 ellipsisprocessor = (
140 141 ellipsisreadprocessor,
141 142 ellipsiswriteprocessor,
142 143 ellipsisrawprocessor,
143 144 )
144 145
145 146
146 147 def offset_type(offset, type):
147 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 149 raise ValueError(b'unknown revlog index flags')
149 150 return int(int(offset) << 16 | type)
150 151
151 152
152 153 def _verify_revision(rl, skipflags, state, node):
153 154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 155 point for extensions to influence the operation."""
155 156 if skipflags:
156 157 state[b'skipread'].add(node)
157 158 else:
158 159 # Side-effect: read content and verify hash.
159 160 rl.revision(node)
160 161
161 162
162 163 # True if a fast implementation for persistent-nodemap is available
163 164 #
164 165 # We also consider we have a "fast" implementation in "pure" python because
165 166 # people using pure don't really have performance consideration (and a
166 167 # wheelbarrow of other slowness source)
167 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 169 parsers, 'BaseIndexObject'
169 170 )
170 171
171 172
172 173 @attr.s(slots=True, frozen=True)
173 174 class _revisioninfo(object):
174 175 """Information about a revision that allows building its fulltext
175 176 node: expected hash of the revision
176 177 p1, p2: parent revs of the revision
177 178 btext: built text cache consisting of a one-element list
178 179 cachedelta: (baserev, uncompressed_delta) or None
179 180 flags: flags associated to the revision storage
180 181
181 182 One of btext[0] or cachedelta must be set.
182 183 """
183 184
184 185 node = attr.ib()
185 186 p1 = attr.ib()
186 187 p2 = attr.ib()
187 188 btext = attr.ib()
188 189 textlen = attr.ib()
189 190 cachedelta = attr.ib()
190 191 flags = attr.ib()
191 192
192 193
193 194 @interfaceutil.implementer(repository.irevisiondelta)
194 195 @attr.s(slots=True)
195 196 class revlogrevisiondelta(object):
196 197 node = attr.ib()
197 198 p1node = attr.ib()
198 199 p2node = attr.ib()
199 200 basenode = attr.ib()
200 201 flags = attr.ib()
201 202 baserevisionsize = attr.ib()
202 203 revision = attr.ib()
203 204 delta = attr.ib()
204 205 sidedata = attr.ib()
205 206 protocol_flags = attr.ib()
206 207 linknode = attr.ib(default=None)
207 208
208 209
209 210 @interfaceutil.implementer(repository.iverifyproblem)
210 211 @attr.s(frozen=True)
211 212 class revlogproblem(object):
212 213 warning = attr.ib(default=None)
213 214 error = attr.ib(default=None)
214 215 node = attr.ib(default=None)
215 216
216 217
217 218 def parse_index_v1(data, inline):
218 219 # call the C implementation to parse the index data
219 220 index, cache = parsers.parse_index2(data, inline)
220 221 return index, cache
221 222
222 223
223 224 def parse_index_v2(data, inline):
224 225 # call the C implementation to parse the index data
225 226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 227 return index, cache
227 228
228 229
229 230 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
230 231
231 232 def parse_index_v1_nodemap(data, inline):
232 233 index, cache = parsers.parse_index_devel_nodemap(data, inline)
233 234 return index, cache
234 235
235 236
236 237 else:
237 238 parse_index_v1_nodemap = None
238 239
239 240
240 241 def parse_index_v1_mixed(data, inline):
241 242 index, cache = parse_index_v1(data, inline)
242 243 return rustrevlog.MixedIndex(index), cache
243 244
244 245
245 246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 247 # signed integer)
247 248 _maxentrysize = 0x7FFFFFFF
248 249
249 250
250 251 class revlog(object):
251 252 """
252 253 the underlying revision storage object
253 254
254 255 A revlog consists of two parts, an index and the revision data.
255 256
256 257 The index is a file with a fixed record size containing
257 258 information on each revision, including its nodeid (hash), the
258 259 nodeids of its parents, the position and offset of its data within
259 260 the data file, and the revision it's based on. Finally, each entry
260 261 contains a linkrev entry that can serve as a pointer to external
261 262 data.
262 263
263 264 The revision data itself is a linear collection of data chunks.
264 265 Each chunk represents a revision and is usually represented as a
265 266 delta against the previous chunk. To bound lookup time, runs of
266 267 deltas are limited to about 2 times the length of the original
267 268 version data. This makes retrieval of a version proportional to
268 269 its size, or O(1) relative to the number of revisions.
269 270
270 271 Both pieces of the revlog are written to in an append-only
271 272 fashion, which means we never need to rewrite a file to insert or
272 273 remove data, and can use some simple techniques to avoid the need
273 274 for locking while reading.
274 275
275 276 If checkambig, indexfile is opened with checkambig=True at
276 277 writing, to avoid file stat ambiguity.
277 278
278 279 If mmaplargeindex is True, and an mmapindexthreshold is set, the
279 280 index will be mmapped rather than read if it is larger than the
280 281 configured threshold.
281 282
282 283 If censorable is True, the revlog can have censored revisions.
283 284
284 285 If `upperboundcomp` is not None, this is the expected maximal gain from
285 286 compression for the data content.
286 287
287 288 `concurrencychecker` is an optional function that receives 3 arguments: a
288 289 file handle, a filename, and an expected position. It should check whether
289 290 the current position in the file handle is valid, and log/warn/fail (by
290 291 raising).
291 292
292 293
293 294 Internal details
294 295 ----------------
295 296
296 297 A large part of the revlog logic deals with revisions' "index entries", tuple
297 298 objects that contains the same "items" whatever the revlog version.
298 299 Different versions will have different ways of storing these items (sometimes
299 300 not having them at all), but the tuple will always be the same. New fields
300 301 are usually added at the end to avoid breaking existing code that relies
301 302 on the existing order. The field are defined as follows:
302 303
303 304 [0] offset:
304 305 The byte index of the start of revision data chunk.
305 306 That value is shifted up by 16 bits. use "offset = field >> 16" to
306 307 retrieve it.
307 308
308 309 flags:
309 310 A flag field that carries special information or changes the behavior
310 311 of the revision. (see `REVIDX_*` constants for details)
311 312 The flag field only occupies the first 16 bits of this field,
312 313 use "flags = field & 0xFFFF" to retrieve the value.
313 314
314 315 [1] compressed length:
315 316 The size, in bytes, of the chunk on disk
316 317
317 318 [2] uncompressed length:
318 319 The size, in bytes, of the full revision once reconstructed.
319 320
320 321 [3] base rev:
321 322 Either the base of the revision delta chain (without general
322 323 delta), or the base of the delta (stored in the data chunk)
323 324 with general delta.
324 325
325 326 [4] link rev:
326 327 Changelog revision number of the changeset introducing this
327 328 revision.
328 329
329 330 [5] parent 1 rev:
330 331 Revision number of the first parent
331 332
332 333 [6] parent 2 rev:
333 334 Revision number of the second parent
334 335
335 336 [7] node id:
336 337 The node id of the current revision
337 338
338 339 [8] sidedata offset:
339 340 The byte index of the start of the revision's side-data chunk.
340 341
341 342 [9] sidedata chunk length:
342 343 The size, in bytes, of the revision's side-data chunk.
343 344
344 345 [10] data compression mode:
345 346 two bits that detail the way the data chunk is compressed on disk.
346 347 (see "COMP_MODE_*" constants for details). For revlog version 0 and
347 348 1 this will always be COMP_MODE_INLINE.
348 349
349 350 [11] side-data compression mode:
350 351 two bits that detail the way the sidedata chunk is compressed on disk.
351 352 (see "COMP_MODE_*" constants for details)
352 353 """
353 354
354 355 _flagserrorclass = error.RevlogError
355 356
356 357 def __init__(
357 358 self,
358 359 opener,
359 360 target,
360 361 radix,
361 362 postfix=None, # only exist for `tmpcensored` now
362 363 checkambig=False,
363 364 mmaplargeindex=False,
364 365 censorable=False,
365 366 upperboundcomp=None,
366 367 persistentnodemap=False,
367 368 concurrencychecker=None,
368 369 trypending=False,
369 370 ):
370 371 """
371 372 create a revlog object
372 373
373 374 opener is a function that abstracts the file opening operation
374 375 and can be used to implement COW semantics or the like.
375 376
376 377 `target`: a (KIND, ID) tuple that identify the content stored in
377 378 this revlog. It help the rest of the code to understand what the revlog
378 379 is about without having to resort to heuristic and index filename
379 380 analysis. Note: that this must be reliably be set by normal code, but
380 381 that test, debug, or performance measurement code might not set this to
381 382 accurate value.
382 383 """
383 384 self.upperboundcomp = upperboundcomp
384 385
385 386 self.radix = radix
386 387
387 388 self._docket_file = None
388 389 self._indexfile = None
389 390 self._datafile = None
390 391 self._nodemap_file = None
391 392 self.postfix = postfix
392 393 self._trypending = trypending
393 394 self.opener = opener
394 395 if persistentnodemap:
395 396 self._nodemap_file = nodemaputil.get_nodemap_file(self)
396 397
397 398 assert target[0] in ALL_KINDS
398 399 assert len(target) == 2
399 400 self.target = target
400 401 # When True, indexfile is opened with checkambig=True at writing, to
401 402 # avoid file stat ambiguity.
402 403 self._checkambig = checkambig
403 404 self._mmaplargeindex = mmaplargeindex
404 405 self._censorable = censorable
405 406 # 3-tuple of (node, rev, text) for a raw revision.
406 407 self._revisioncache = None
407 408 # Maps rev to chain base rev.
408 409 self._chainbasecache = util.lrucachedict(100)
409 410 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
410 411 self._chunkcache = (0, b'')
411 412 # How much data to read and cache into the raw revlog data cache.
412 413 self._chunkcachesize = 65536
413 414 self._maxchainlen = None
414 415 self._deltabothparents = True
415 416 self.index = None
416 417 self._docket = None
417 418 self._nodemap_docket = None
418 419 # Mapping of partial identifiers to full nodes.
419 420 self._pcache = {}
420 421 # Mapping of revision integer to full node.
421 422 self._compengine = b'zlib'
422 423 self._compengineopts = {}
423 424 self._maxdeltachainspan = -1
424 425 self._withsparseread = False
425 426 self._sparserevlog = False
426 427 self.hassidedata = False
427 428 self._srdensitythreshold = 0.50
428 429 self._srmingapsize = 262144
429 430
430 431 # Make copy of flag processors so each revlog instance can support
431 432 # custom flags.
432 433 self._flagprocessors = dict(flagutil.flagprocessors)
433 434
434 435 # 2-tuple of file handles being used for active writing.
435 436 self._writinghandles = None
436 437 # prevent nesting of addgroup
437 438 self._adding_group = None
438 439
439 440 self._loadindex()
440 441
441 442 self._concurrencychecker = concurrencychecker
442 443
443 444 def _init_opts(self):
444 445 """process options (from above/config) to setup associated default revlog mode
445 446
446 447 These values might be affected when actually reading on disk information.
447 448
448 449 The relevant values are returned for use in _loadindex().
449 450
450 451 * newversionflags:
451 452 version header to use if we need to create a new revlog
452 453
453 454 * mmapindexthreshold:
454 455 minimal index size for start to use mmap
455 456
456 457 * force_nodemap:
457 458 force the usage of a "development" version of the nodemap code
458 459 """
459 460 mmapindexthreshold = None
460 461 opts = self.opener.options
461 462
462 463 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
463 new_header = REVLOGV2
464 new_header = CHANGELOGV2
464 465 elif b'revlogv2' in opts:
465 466 new_header = REVLOGV2
466 467 elif b'revlogv1' in opts:
467 468 new_header = REVLOGV1 | FLAG_INLINE_DATA
468 469 if b'generaldelta' in opts:
469 470 new_header |= FLAG_GENERALDELTA
470 471 elif b'revlogv0' in self.opener.options:
471 472 new_header = REVLOGV0
472 473 else:
473 474 new_header = REVLOG_DEFAULT_VERSION
474 475
475 476 if b'chunkcachesize' in opts:
476 477 self._chunkcachesize = opts[b'chunkcachesize']
477 478 if b'maxchainlen' in opts:
478 479 self._maxchainlen = opts[b'maxchainlen']
479 480 if b'deltabothparents' in opts:
480 481 self._deltabothparents = opts[b'deltabothparents']
481 482 self._lazydelta = bool(opts.get(b'lazydelta', True))
482 483 self._lazydeltabase = False
483 484 if self._lazydelta:
484 485 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
485 486 if b'compengine' in opts:
486 487 self._compengine = opts[b'compengine']
487 488 if b'zlib.level' in opts:
488 489 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
489 490 if b'zstd.level' in opts:
490 491 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
491 492 if b'maxdeltachainspan' in opts:
492 493 self._maxdeltachainspan = opts[b'maxdeltachainspan']
493 494 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
494 495 mmapindexthreshold = opts[b'mmapindexthreshold']
495 496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
496 497 withsparseread = bool(opts.get(b'with-sparse-read', False))
497 498 # sparse-revlog forces sparse-read
498 499 self._withsparseread = self._sparserevlog or withsparseread
499 500 if b'sparse-read-density-threshold' in opts:
500 501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
501 502 if b'sparse-read-min-gap-size' in opts:
502 503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
503 504 if opts.get(b'enableellipsis'):
504 505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
505 506
506 507 # revlog v0 doesn't have flag processors
507 508 for flag, processor in pycompat.iteritems(
508 509 opts.get(b'flagprocessors', {})
509 510 ):
510 511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
511 512
512 513 if self._chunkcachesize <= 0:
513 514 raise error.RevlogError(
514 515 _(b'revlog chunk cache size %r is not greater than 0')
515 516 % self._chunkcachesize
516 517 )
517 518 elif self._chunkcachesize & (self._chunkcachesize - 1):
518 519 raise error.RevlogError(
519 520 _(b'revlog chunk cache size %r is not a power of 2')
520 521 % self._chunkcachesize
521 522 )
522 523 force_nodemap = opts.get(b'devel-force-nodemap', False)
523 524 return new_header, mmapindexthreshold, force_nodemap
524 525
525 526 def _get_data(self, filepath, mmap_threshold, size=None):
526 527 """return a file content with or without mmap
527 528
528 529 If the file is missing return the empty string"""
529 530 try:
530 531 with self.opener(filepath) as fp:
531 532 if mmap_threshold is not None:
532 533 file_size = self.opener.fstat(fp).st_size
533 534 if file_size >= mmap_threshold:
534 535 if size is not None:
535 536 # avoid potentiel mmap crash
536 537 size = min(file_size, size)
537 538 # TODO: should .close() to release resources without
538 539 # relying on Python GC
539 540 if size is None:
540 541 return util.buffer(util.mmapread(fp))
541 542 else:
542 543 return util.buffer(util.mmapread(fp, size))
543 544 if size is None:
544 545 return fp.read()
545 546 else:
546 547 return fp.read(size)
547 548 except IOError as inst:
548 549 if inst.errno != errno.ENOENT:
549 550 raise
550 551 return b''
551 552
552 553 def _loadindex(self):
553 554
554 555 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
555 556
556 557 if self.postfix is not None:
557 558 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
558 559 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
559 560 entry_point = b'%s.i.a' % self.radix
560 561 else:
561 562 entry_point = b'%s.i' % self.radix
562 563
563 564 entry_data = b''
564 565 self._initempty = True
565 566 entry_data = self._get_data(entry_point, mmapindexthreshold)
566 567 if len(entry_data) > 0:
567 568 header = INDEX_HEADER.unpack(entry_data[:4])[0]
568 569 self._initempty = False
569 570 else:
570 571 header = new_header
571 572
572 573 self._format_flags = header & ~0xFFFF
573 574 self._format_version = header & 0xFFFF
574 575
575 576 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
576 577 if supported_flags is None:
577 578 msg = _(b'unknown version (%d) in revlog %s')
578 579 msg %= (self._format_version, self.display_id)
579 580 raise error.RevlogError(msg)
580 581 elif self._format_flags & ~supported_flags:
581 582 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
582 583 display_flag = self._format_flags >> 16
583 584 msg %= (display_flag, self._format_version, self.display_id)
584 585 raise error.RevlogError(msg)
585 586
586 587 features = FEATURES_BY_VERSION[self._format_version]
587 588 self._inline = features[b'inline'](self._format_flags)
588 589 self._generaldelta = features[b'generaldelta'](self._format_flags)
589 590 self.hassidedata = features[b'sidedata']
590 591
591 592 if not features[b'docket']:
592 593 self._indexfile = entry_point
593 594 index_data = entry_data
594 595 else:
595 596 self._docket_file = entry_point
596 597 if self._initempty:
597 598 self._docket = docketutil.default_docket(self, header)
598 599 else:
599 600 self._docket = docketutil.parse_docket(
600 601 self, entry_data, use_pending=self._trypending
601 602 )
602 603 self._indexfile = self._docket.index_filepath()
603 604 index_data = b''
604 605 index_size = self._docket.index_end
605 606 if index_size > 0:
606 607 index_data = self._get_data(
607 608 self._indexfile, mmapindexthreshold, size=index_size
608 609 )
609 610 if len(index_data) < index_size:
610 611 msg = _(b'too few index data for %s: got %d, expected %d')
611 612 msg %= (self.display_id, len(index_data), index_size)
612 613 raise error.RevlogError(msg)
613 614
614 615 self._inline = False
615 616 # generaldelta implied by version 2 revlogs.
616 617 self._generaldelta = True
617 618 # the logic for persistent nodemap will be dealt with within the
618 619 # main docket, so disable it for now.
619 620 self._nodemap_file = None
620 621
621 622 if self.postfix is None:
622 623 self._datafile = b'%s.d' % self.radix
623 624 else:
624 625 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
625 626
626 627 self.nodeconstants = sha1nodeconstants
627 628 self.nullid = self.nodeconstants.nullid
628 629
629 630 # sparse-revlog can't be on without general-delta (issue6056)
630 631 if not self._generaldelta:
631 632 self._sparserevlog = False
632 633
633 634 self._storedeltachains = True
634 635
635 636 devel_nodemap = (
636 637 self._nodemap_file
637 638 and force_nodemap
638 639 and parse_index_v1_nodemap is not None
639 640 )
640 641
641 642 use_rust_index = False
642 643 if rustrevlog is not None:
643 644 if self._nodemap_file is not None:
644 645 use_rust_index = True
645 646 else:
646 647 use_rust_index = self.opener.options.get(b'rust.index')
647 648
648 649 self._parse_index = parse_index_v1
649 650 if self._format_version == REVLOGV0:
650 651 self._parse_index = revlogv0.parse_index_v0
651 652 elif self._format_version == REVLOGV2:
652 653 self._parse_index = parse_index_v2
654 elif self._format_version == CHANGELOGV2:
655 self._parse_index = parse_index_v2
653 656 elif devel_nodemap:
654 657 self._parse_index = parse_index_v1_nodemap
655 658 elif use_rust_index:
656 659 self._parse_index = parse_index_v1_mixed
657 660 try:
658 661 d = self._parse_index(index_data, self._inline)
659 662 index, _chunkcache = d
660 663 use_nodemap = (
661 664 not self._inline
662 665 and self._nodemap_file is not None
663 666 and util.safehasattr(index, 'update_nodemap_data')
664 667 )
665 668 if use_nodemap:
666 669 nodemap_data = nodemaputil.persisted_data(self)
667 670 if nodemap_data is not None:
668 671 docket = nodemap_data[0]
669 672 if (
670 673 len(d[0]) > docket.tip_rev
671 674 and d[0][docket.tip_rev][7] == docket.tip_node
672 675 ):
673 676 # no changelog tampering
674 677 self._nodemap_docket = docket
675 678 index.update_nodemap_data(*nodemap_data)
676 679 except (ValueError, IndexError):
677 680 raise error.RevlogError(
678 681 _(b"index %s is corrupted") % self.display_id
679 682 )
680 683 self.index, self._chunkcache = d
681 684 if not self._chunkcache:
682 685 self._chunkclear()
683 686 # revnum -> (chain-length, sum-delta-length)
684 687 self._chaininfocache = util.lrucachedict(500)
685 688 # revlog header -> revlog compressor
686 689 self._decompressors = {}
687 690
688 691 @util.propertycache
689 692 def revlog_kind(self):
690 693 return self.target[0]
691 694
692 695 @util.propertycache
693 696 def display_id(self):
694 697 """The public facing "ID" of the revlog that we use in message"""
695 698 # Maybe we should build a user facing representation of
696 699 # revlog.target instead of using `self.radix`
697 700 return self.radix
698 701
699 702 def _get_decompressor(self, t):
700 703 try:
701 704 compressor = self._decompressors[t]
702 705 except KeyError:
703 706 try:
704 707 engine = util.compengines.forrevlogheader(t)
705 708 compressor = engine.revlogcompressor(self._compengineopts)
706 709 self._decompressors[t] = compressor
707 710 except KeyError:
708 711 raise error.RevlogError(
709 712 _(b'unknown compression type %s') % binascii.hexlify(t)
710 713 )
711 714 return compressor
712 715
713 716 @util.propertycache
714 717 def _compressor(self):
715 718 engine = util.compengines[self._compengine]
716 719 return engine.revlogcompressor(self._compengineopts)
717 720
718 721 @util.propertycache
719 722 def _decompressor(self):
720 723 """the default decompressor"""
721 724 if self._docket is None:
722 725 return None
723 726 t = self._docket.default_compression_header
724 727 c = self._get_decompressor(t)
725 728 return c.decompress
726 729
727 730 def _indexfp(self):
728 731 """file object for the revlog's index file"""
729 732 return self.opener(self._indexfile, mode=b"r")
730 733
731 734 def __index_write_fp(self):
732 735 # You should not use this directly and use `_writing` instead
733 736 try:
734 737 f = self.opener(
735 738 self._indexfile, mode=b"r+", checkambig=self._checkambig
736 739 )
737 740 if self._docket is None:
738 741 f.seek(0, os.SEEK_END)
739 742 else:
740 743 f.seek(self._docket.index_end, os.SEEK_SET)
741 744 return f
742 745 except IOError as inst:
743 746 if inst.errno != errno.ENOENT:
744 747 raise
745 748 return self.opener(
746 749 self._indexfile, mode=b"w+", checkambig=self._checkambig
747 750 )
748 751
749 752 def __index_new_fp(self):
750 753 # You should not use this unless you are upgrading from inline revlog
751 754 return self.opener(
752 755 self._indexfile,
753 756 mode=b"w",
754 757 checkambig=self._checkambig,
755 758 atomictemp=True,
756 759 )
757 760
758 761 def _datafp(self, mode=b'r'):
759 762 """file object for the revlog's data file"""
760 763 return self.opener(self._datafile, mode=mode)
761 764
762 765 @contextlib.contextmanager
763 766 def _datareadfp(self, existingfp=None):
764 767 """file object suitable to read data"""
765 768 # Use explicit file handle, if given.
766 769 if existingfp is not None:
767 770 yield existingfp
768 771
769 772 # Use a file handle being actively used for writes, if available.
770 773 # There is some danger to doing this because reads will seek the
771 774 # file. However, _writeentry() performs a SEEK_END before all writes,
772 775 # so we should be safe.
773 776 elif self._writinghandles:
774 777 if self._inline:
775 778 yield self._writinghandles[0]
776 779 else:
777 780 yield self._writinghandles[1]
778 781
779 782 # Otherwise open a new file handle.
780 783 else:
781 784 if self._inline:
782 785 func = self._indexfp
783 786 else:
784 787 func = self._datafp
785 788 with func() as fp:
786 789 yield fp
787 790
788 791 def tiprev(self):
789 792 return len(self.index) - 1
790 793
791 794 def tip(self):
792 795 return self.node(self.tiprev())
793 796
794 797 def __contains__(self, rev):
795 798 return 0 <= rev < len(self)
796 799
797 800 def __len__(self):
798 801 return len(self.index)
799 802
800 803 def __iter__(self):
801 804 return iter(pycompat.xrange(len(self)))
802 805
803 806 def revs(self, start=0, stop=None):
804 807 """iterate over all rev in this revlog (from start to stop)"""
805 808 return storageutil.iterrevs(len(self), start=start, stop=stop)
806 809
807 810 @property
808 811 def nodemap(self):
809 812 msg = (
810 813 b"revlog.nodemap is deprecated, "
811 814 b"use revlog.index.[has_node|rev|get_rev]"
812 815 )
813 816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
814 817 return self.index.nodemap
815 818
816 819 @property
817 820 def _nodecache(self):
818 821 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
819 822 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
820 823 return self.index.nodemap
821 824
822 825 def hasnode(self, node):
823 826 try:
824 827 self.rev(node)
825 828 return True
826 829 except KeyError:
827 830 return False
828 831
829 832 def candelta(self, baserev, rev):
830 833 """whether two revisions (baserev, rev) can be delta-ed or not"""
831 834 # Disable delta if either rev requires a content-changing flag
832 835 # processor (ex. LFS). This is because such flag processor can alter
833 836 # the rawtext content that the delta will be based on, and two clients
834 837 # could have a same revlog node with different flags (i.e. different
835 838 # rawtext contents) and the delta could be incompatible.
836 839 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
837 840 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
838 841 ):
839 842 return False
840 843 return True
841 844
842 845 def update_caches(self, transaction):
843 846 if self._nodemap_file is not None:
844 847 if transaction is None:
845 848 nodemaputil.update_persistent_nodemap(self)
846 849 else:
847 850 nodemaputil.setup_persistent_nodemap(transaction, self)
848 851
849 852 def clearcaches(self):
850 853 self._revisioncache = None
851 854 self._chainbasecache.clear()
852 855 self._chunkcache = (0, b'')
853 856 self._pcache = {}
854 857 self._nodemap_docket = None
855 858 self.index.clearcaches()
856 859 # The python code is the one responsible for validating the docket, we
857 860 # end up having to refresh it here.
858 861 use_nodemap = (
859 862 not self._inline
860 863 and self._nodemap_file is not None
861 864 and util.safehasattr(self.index, 'update_nodemap_data')
862 865 )
863 866 if use_nodemap:
864 867 nodemap_data = nodemaputil.persisted_data(self)
865 868 if nodemap_data is not None:
866 869 self._nodemap_docket = nodemap_data[0]
867 870 self.index.update_nodemap_data(*nodemap_data)
868 871
869 872 def rev(self, node):
870 873 try:
871 874 return self.index.rev(node)
872 875 except TypeError:
873 876 raise
874 877 except error.RevlogError:
875 878 # parsers.c radix tree lookup failed
876 879 if (
877 880 node == self.nodeconstants.wdirid
878 881 or node in self.nodeconstants.wdirfilenodeids
879 882 ):
880 883 raise error.WdirUnsupported
881 884 raise error.LookupError(node, self.display_id, _(b'no node'))
882 885
883 886 # Accessors for index entries.
884 887
885 888 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
886 889 # are flags.
887 890 def start(self, rev):
888 891 return int(self.index[rev][0] >> 16)
889 892
890 893 def flags(self, rev):
891 894 return self.index[rev][0] & 0xFFFF
892 895
893 896 def length(self, rev):
894 897 return self.index[rev][1]
895 898
896 899 def sidedata_length(self, rev):
897 900 if not self.hassidedata:
898 901 return 0
899 902 return self.index[rev][9]
900 903
901 904 def rawsize(self, rev):
902 905 """return the length of the uncompressed text for a given revision"""
903 906 l = self.index[rev][2]
904 907 if l >= 0:
905 908 return l
906 909
907 910 t = self.rawdata(rev)
908 911 return len(t)
909 912
910 913 def size(self, rev):
911 914 """length of non-raw text (processed by a "read" flag processor)"""
912 915 # fast path: if no "read" flag processor could change the content,
913 916 # size is rawsize. note: ELLIPSIS is known to not change the content.
914 917 flags = self.flags(rev)
915 918 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
916 919 return self.rawsize(rev)
917 920
918 921 return len(self.revision(rev, raw=False))
919 922
920 923 def chainbase(self, rev):
921 924 base = self._chainbasecache.get(rev)
922 925 if base is not None:
923 926 return base
924 927
925 928 index = self.index
926 929 iterrev = rev
927 930 base = index[iterrev][3]
928 931 while base != iterrev:
929 932 iterrev = base
930 933 base = index[iterrev][3]
931 934
932 935 self._chainbasecache[rev] = base
933 936 return base
934 937
935 938 def linkrev(self, rev):
936 939 return self.index[rev][4]
937 940
938 941 def parentrevs(self, rev):
939 942 try:
940 943 entry = self.index[rev]
941 944 except IndexError:
942 945 if rev == wdirrev:
943 946 raise error.WdirUnsupported
944 947 raise
945 948 if entry[5] == nullrev:
946 949 return entry[6], entry[5]
947 950 else:
948 951 return entry[5], entry[6]
949 952
950 953 # fast parentrevs(rev) where rev isn't filtered
951 954 _uncheckedparentrevs = parentrevs
952 955
953 956 def node(self, rev):
954 957 try:
955 958 return self.index[rev][7]
956 959 except IndexError:
957 960 if rev == wdirrev:
958 961 raise error.WdirUnsupported
959 962 raise
960 963
961 964 # Derived from index values.
962 965
963 966 def end(self, rev):
964 967 return self.start(rev) + self.length(rev)
965 968
966 969 def parents(self, node):
967 970 i = self.index
968 971 d = i[self.rev(node)]
969 972 # inline node() to avoid function call overhead
970 973 if d[5] == self.nullid:
971 974 return i[d[6]][7], i[d[5]][7]
972 975 else:
973 976 return i[d[5]][7], i[d[6]][7]
974 977
975 978 def chainlen(self, rev):
976 979 return self._chaininfo(rev)[0]
977 980
978 981 def _chaininfo(self, rev):
979 982 chaininfocache = self._chaininfocache
980 983 if rev in chaininfocache:
981 984 return chaininfocache[rev]
982 985 index = self.index
983 986 generaldelta = self._generaldelta
984 987 iterrev = rev
985 988 e = index[iterrev]
986 989 clen = 0
987 990 compresseddeltalen = 0
988 991 while iterrev != e[3]:
989 992 clen += 1
990 993 compresseddeltalen += e[1]
991 994 if generaldelta:
992 995 iterrev = e[3]
993 996 else:
994 997 iterrev -= 1
995 998 if iterrev in chaininfocache:
996 999 t = chaininfocache[iterrev]
997 1000 clen += t[0]
998 1001 compresseddeltalen += t[1]
999 1002 break
1000 1003 e = index[iterrev]
1001 1004 else:
1002 1005 # Add text length of base since decompressing that also takes
1003 1006 # work. For cache hits the length is already included.
1004 1007 compresseddeltalen += e[1]
1005 1008 r = (clen, compresseddeltalen)
1006 1009 chaininfocache[rev] = r
1007 1010 return r
1008 1011
1009 1012 def _deltachain(self, rev, stoprev=None):
1010 1013 """Obtain the delta chain for a revision.
1011 1014
1012 1015 ``stoprev`` specifies a revision to stop at. If not specified, we
1013 1016 stop at the base of the chain.
1014 1017
1015 1018 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1016 1019 revs in ascending order and ``stopped`` is a bool indicating whether
1017 1020 ``stoprev`` was hit.
1018 1021 """
1019 1022 # Try C implementation.
1020 1023 try:
1021 1024 return self.index.deltachain(rev, stoprev, self._generaldelta)
1022 1025 except AttributeError:
1023 1026 pass
1024 1027
1025 1028 chain = []
1026 1029
1027 1030 # Alias to prevent attribute lookup in tight loop.
1028 1031 index = self.index
1029 1032 generaldelta = self._generaldelta
1030 1033
1031 1034 iterrev = rev
1032 1035 e = index[iterrev]
1033 1036 while iterrev != e[3] and iterrev != stoprev:
1034 1037 chain.append(iterrev)
1035 1038 if generaldelta:
1036 1039 iterrev = e[3]
1037 1040 else:
1038 1041 iterrev -= 1
1039 1042 e = index[iterrev]
1040 1043
1041 1044 if iterrev == stoprev:
1042 1045 stopped = True
1043 1046 else:
1044 1047 chain.append(iterrev)
1045 1048 stopped = False
1046 1049
1047 1050 chain.reverse()
1048 1051 return chain, stopped
1049 1052
1050 1053 def ancestors(self, revs, stoprev=0, inclusive=False):
1051 1054 """Generate the ancestors of 'revs' in reverse revision order.
1052 1055 Does not generate revs lower than stoprev.
1053 1056
1054 1057 See the documentation for ancestor.lazyancestors for more details."""
1055 1058
1056 1059 # first, make sure start revisions aren't filtered
1057 1060 revs = list(revs)
1058 1061 checkrev = self.node
1059 1062 for r in revs:
1060 1063 checkrev(r)
1061 1064 # and we're sure ancestors aren't filtered as well
1062 1065
1063 1066 if rustancestor is not None:
1064 1067 lazyancestors = rustancestor.LazyAncestors
1065 1068 arg = self.index
1066 1069 else:
1067 1070 lazyancestors = ancestor.lazyancestors
1068 1071 arg = self._uncheckedparentrevs
1069 1072 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1070 1073
1071 1074 def descendants(self, revs):
1072 1075 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1073 1076
1074 1077 def findcommonmissing(self, common=None, heads=None):
1075 1078 """Return a tuple of the ancestors of common and the ancestors of heads
1076 1079 that are not ancestors of common. In revset terminology, we return the
1077 1080 tuple:
1078 1081
1079 1082 ::common, (::heads) - (::common)
1080 1083
1081 1084 The list is sorted by revision number, meaning it is
1082 1085 topologically sorted.
1083 1086
1084 1087 'heads' and 'common' are both lists of node IDs. If heads is
1085 1088 not supplied, uses all of the revlog's heads. If common is not
1086 1089 supplied, uses nullid."""
1087 1090 if common is None:
1088 1091 common = [self.nullid]
1089 1092 if heads is None:
1090 1093 heads = self.heads()
1091 1094
1092 1095 common = [self.rev(n) for n in common]
1093 1096 heads = [self.rev(n) for n in heads]
1094 1097
1095 1098 # we want the ancestors, but inclusive
1096 1099 class lazyset(object):
1097 1100 def __init__(self, lazyvalues):
1098 1101 self.addedvalues = set()
1099 1102 self.lazyvalues = lazyvalues
1100 1103
1101 1104 def __contains__(self, value):
1102 1105 return value in self.addedvalues or value in self.lazyvalues
1103 1106
1104 1107 def __iter__(self):
1105 1108 added = self.addedvalues
1106 1109 for r in added:
1107 1110 yield r
1108 1111 for r in self.lazyvalues:
1109 1112 if not r in added:
1110 1113 yield r
1111 1114
1112 1115 def add(self, value):
1113 1116 self.addedvalues.add(value)
1114 1117
1115 1118 def update(self, values):
1116 1119 self.addedvalues.update(values)
1117 1120
1118 1121 has = lazyset(self.ancestors(common))
1119 1122 has.add(nullrev)
1120 1123 has.update(common)
1121 1124
1122 1125 # take all ancestors from heads that aren't in has
1123 1126 missing = set()
1124 1127 visit = collections.deque(r for r in heads if r not in has)
1125 1128 while visit:
1126 1129 r = visit.popleft()
1127 1130 if r in missing:
1128 1131 continue
1129 1132 else:
1130 1133 missing.add(r)
1131 1134 for p in self.parentrevs(r):
1132 1135 if p not in has:
1133 1136 visit.append(p)
1134 1137 missing = list(missing)
1135 1138 missing.sort()
1136 1139 return has, [self.node(miss) for miss in missing]
1137 1140
1138 1141 def incrementalmissingrevs(self, common=None):
1139 1142 """Return an object that can be used to incrementally compute the
1140 1143 revision numbers of the ancestors of arbitrary sets that are not
1141 1144 ancestors of common. This is an ancestor.incrementalmissingancestors
1142 1145 object.
1143 1146
1144 1147 'common' is a list of revision numbers. If common is not supplied, uses
1145 1148 nullrev.
1146 1149 """
1147 1150 if common is None:
1148 1151 common = [nullrev]
1149 1152
1150 1153 if rustancestor is not None:
1151 1154 return rustancestor.MissingAncestors(self.index, common)
1152 1155 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1153 1156
1154 1157 def findmissingrevs(self, common=None, heads=None):
1155 1158 """Return the revision numbers of the ancestors of heads that
1156 1159 are not ancestors of common.
1157 1160
1158 1161 More specifically, return a list of revision numbers corresponding to
1159 1162 nodes N such that every N satisfies the following constraints:
1160 1163
1161 1164 1. N is an ancestor of some node in 'heads'
1162 1165 2. N is not an ancestor of any node in 'common'
1163 1166
1164 1167 The list is sorted by revision number, meaning it is
1165 1168 topologically sorted.
1166 1169
1167 1170 'heads' and 'common' are both lists of revision numbers. If heads is
1168 1171 not supplied, uses all of the revlog's heads. If common is not
1169 1172 supplied, uses nullid."""
1170 1173 if common is None:
1171 1174 common = [nullrev]
1172 1175 if heads is None:
1173 1176 heads = self.headrevs()
1174 1177
1175 1178 inc = self.incrementalmissingrevs(common=common)
1176 1179 return inc.missingancestors(heads)
1177 1180
1178 1181 def findmissing(self, common=None, heads=None):
1179 1182 """Return the ancestors of heads that are not ancestors of common.
1180 1183
1181 1184 More specifically, return a list of nodes N such that every N
1182 1185 satisfies the following constraints:
1183 1186
1184 1187 1. N is an ancestor of some node in 'heads'
1185 1188 2. N is not an ancestor of any node in 'common'
1186 1189
1187 1190 The list is sorted by revision number, meaning it is
1188 1191 topologically sorted.
1189 1192
1190 1193 'heads' and 'common' are both lists of node IDs. If heads is
1191 1194 not supplied, uses all of the revlog's heads. If common is not
1192 1195 supplied, uses nullid."""
1193 1196 if common is None:
1194 1197 common = [self.nullid]
1195 1198 if heads is None:
1196 1199 heads = self.heads()
1197 1200
1198 1201 common = [self.rev(n) for n in common]
1199 1202 heads = [self.rev(n) for n in heads]
1200 1203
1201 1204 inc = self.incrementalmissingrevs(common=common)
1202 1205 return [self.node(r) for r in inc.missingancestors(heads)]
1203 1206
1204 1207 def nodesbetween(self, roots=None, heads=None):
1205 1208 """Return a topological path from 'roots' to 'heads'.
1206 1209
1207 1210 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1208 1211 topologically sorted list of all nodes N that satisfy both of
1209 1212 these constraints:
1210 1213
1211 1214 1. N is a descendant of some node in 'roots'
1212 1215 2. N is an ancestor of some node in 'heads'
1213 1216
1214 1217 Every node is considered to be both a descendant and an ancestor
1215 1218 of itself, so every reachable node in 'roots' and 'heads' will be
1216 1219 included in 'nodes'.
1217 1220
1218 1221 'outroots' is the list of reachable nodes in 'roots', i.e., the
1219 1222 subset of 'roots' that is returned in 'nodes'. Likewise,
1220 1223 'outheads' is the subset of 'heads' that is also in 'nodes'.
1221 1224
1222 1225 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1223 1226 unspecified, uses nullid as the only root. If 'heads' is
1224 1227 unspecified, uses list of all of the revlog's heads."""
1225 1228 nonodes = ([], [], [])
1226 1229 if roots is not None:
1227 1230 roots = list(roots)
1228 1231 if not roots:
1229 1232 return nonodes
1230 1233 lowestrev = min([self.rev(n) for n in roots])
1231 1234 else:
1232 1235 roots = [self.nullid] # Everybody's a descendant of nullid
1233 1236 lowestrev = nullrev
1234 1237 if (lowestrev == nullrev) and (heads is None):
1235 1238 # We want _all_ the nodes!
1236 1239 return (
1237 1240 [self.node(r) for r in self],
1238 1241 [self.nullid],
1239 1242 list(self.heads()),
1240 1243 )
1241 1244 if heads is None:
1242 1245 # All nodes are ancestors, so the latest ancestor is the last
1243 1246 # node.
1244 1247 highestrev = len(self) - 1
1245 1248 # Set ancestors to None to signal that every node is an ancestor.
1246 1249 ancestors = None
1247 1250 # Set heads to an empty dictionary for later discovery of heads
1248 1251 heads = {}
1249 1252 else:
1250 1253 heads = list(heads)
1251 1254 if not heads:
1252 1255 return nonodes
1253 1256 ancestors = set()
1254 1257 # Turn heads into a dictionary so we can remove 'fake' heads.
1255 1258 # Also, later we will be using it to filter out the heads we can't
1256 1259 # find from roots.
1257 1260 heads = dict.fromkeys(heads, False)
1258 1261 # Start at the top and keep marking parents until we're done.
1259 1262 nodestotag = set(heads)
1260 1263 # Remember where the top was so we can use it as a limit later.
1261 1264 highestrev = max([self.rev(n) for n in nodestotag])
1262 1265 while nodestotag:
1263 1266 # grab a node to tag
1264 1267 n = nodestotag.pop()
1265 1268 # Never tag nullid
1266 1269 if n == self.nullid:
1267 1270 continue
1268 1271 # A node's revision number represents its place in a
1269 1272 # topologically sorted list of nodes.
1270 1273 r = self.rev(n)
1271 1274 if r >= lowestrev:
1272 1275 if n not in ancestors:
1273 1276 # If we are possibly a descendant of one of the roots
1274 1277 # and we haven't already been marked as an ancestor
1275 1278 ancestors.add(n) # Mark as ancestor
1276 1279 # Add non-nullid parents to list of nodes to tag.
1277 1280 nodestotag.update(
1278 1281 [p for p in self.parents(n) if p != self.nullid]
1279 1282 )
1280 1283 elif n in heads: # We've seen it before, is it a fake head?
1281 1284 # So it is, real heads should not be the ancestors of
1282 1285 # any other heads.
1283 1286 heads.pop(n)
1284 1287 if not ancestors:
1285 1288 return nonodes
1286 1289 # Now that we have our set of ancestors, we want to remove any
1287 1290 # roots that are not ancestors.
1288 1291
1289 1292 # If one of the roots was nullid, everything is included anyway.
1290 1293 if lowestrev > nullrev:
1291 1294 # But, since we weren't, let's recompute the lowest rev to not
1292 1295 # include roots that aren't ancestors.
1293 1296
1294 1297 # Filter out roots that aren't ancestors of heads
1295 1298 roots = [root for root in roots if root in ancestors]
1296 1299 # Recompute the lowest revision
1297 1300 if roots:
1298 1301 lowestrev = min([self.rev(root) for root in roots])
1299 1302 else:
1300 1303 # No more roots? Return empty list
1301 1304 return nonodes
1302 1305 else:
1303 1306 # We are descending from nullid, and don't need to care about
1304 1307 # any other roots.
1305 1308 lowestrev = nullrev
1306 1309 roots = [self.nullid]
1307 1310 # Transform our roots list into a set.
1308 1311 descendants = set(roots)
1309 1312 # Also, keep the original roots so we can filter out roots that aren't
1310 1313 # 'real' roots (i.e. are descended from other roots).
1311 1314 roots = descendants.copy()
1312 1315 # Our topologically sorted list of output nodes.
1313 1316 orderedout = []
1314 1317 # Don't start at nullid since we don't want nullid in our output list,
1315 1318 # and if nullid shows up in descendants, empty parents will look like
1316 1319 # they're descendants.
1317 1320 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1318 1321 n = self.node(r)
1319 1322 isdescendant = False
1320 1323 if lowestrev == nullrev: # Everybody is a descendant of nullid
1321 1324 isdescendant = True
1322 1325 elif n in descendants:
1323 1326 # n is already a descendant
1324 1327 isdescendant = True
1325 1328 # This check only needs to be done here because all the roots
1326 1329 # will start being marked is descendants before the loop.
1327 1330 if n in roots:
1328 1331 # If n was a root, check if it's a 'real' root.
1329 1332 p = tuple(self.parents(n))
1330 1333 # If any of its parents are descendants, it's not a root.
1331 1334 if (p[0] in descendants) or (p[1] in descendants):
1332 1335 roots.remove(n)
1333 1336 else:
1334 1337 p = tuple(self.parents(n))
1335 1338 # A node is a descendant if either of its parents are
1336 1339 # descendants. (We seeded the dependents list with the roots
1337 1340 # up there, remember?)
1338 1341 if (p[0] in descendants) or (p[1] in descendants):
1339 1342 descendants.add(n)
1340 1343 isdescendant = True
1341 1344 if isdescendant and ((ancestors is None) or (n in ancestors)):
1342 1345 # Only include nodes that are both descendants and ancestors.
1343 1346 orderedout.append(n)
1344 1347 if (ancestors is not None) and (n in heads):
1345 1348 # We're trying to figure out which heads are reachable
1346 1349 # from roots.
1347 1350 # Mark this head as having been reached
1348 1351 heads[n] = True
1349 1352 elif ancestors is None:
1350 1353 # Otherwise, we're trying to discover the heads.
1351 1354 # Assume this is a head because if it isn't, the next step
1352 1355 # will eventually remove it.
1353 1356 heads[n] = True
1354 1357 # But, obviously its parents aren't.
1355 1358 for p in self.parents(n):
1356 1359 heads.pop(p, None)
1357 1360 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1358 1361 roots = list(roots)
1359 1362 assert orderedout
1360 1363 assert roots
1361 1364 assert heads
1362 1365 return (orderedout, roots, heads)
1363 1366
1364 1367 def headrevs(self, revs=None):
1365 1368 if revs is None:
1366 1369 try:
1367 1370 return self.index.headrevs()
1368 1371 except AttributeError:
1369 1372 return self._headrevs()
1370 1373 if rustdagop is not None:
1371 1374 return rustdagop.headrevs(self.index, revs)
1372 1375 return dagop.headrevs(revs, self._uncheckedparentrevs)
1373 1376
1374 1377 def computephases(self, roots):
1375 1378 return self.index.computephasesmapsets(roots)
1376 1379
1377 1380 def _headrevs(self):
1378 1381 count = len(self)
1379 1382 if not count:
1380 1383 return [nullrev]
1381 1384 # we won't iter over filtered rev so nobody is a head at start
1382 1385 ishead = [0] * (count + 1)
1383 1386 index = self.index
1384 1387 for r in self:
1385 1388 ishead[r] = 1 # I may be an head
1386 1389 e = index[r]
1387 1390 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1388 1391 return [r for r, val in enumerate(ishead) if val]
1389 1392
1390 1393 def heads(self, start=None, stop=None):
1391 1394 """return the list of all nodes that have no children
1392 1395
1393 1396 if start is specified, only heads that are descendants of
1394 1397 start will be returned
1395 1398 if stop is specified, it will consider all the revs from stop
1396 1399 as if they had no children
1397 1400 """
1398 1401 if start is None and stop is None:
1399 1402 if not len(self):
1400 1403 return [self.nullid]
1401 1404 return [self.node(r) for r in self.headrevs()]
1402 1405
1403 1406 if start is None:
1404 1407 start = nullrev
1405 1408 else:
1406 1409 start = self.rev(start)
1407 1410
1408 1411 stoprevs = {self.rev(n) for n in stop or []}
1409 1412
1410 1413 revs = dagop.headrevssubset(
1411 1414 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1412 1415 )
1413 1416
1414 1417 return [self.node(rev) for rev in revs]
1415 1418
1416 1419 def children(self, node):
1417 1420 """find the children of a given node"""
1418 1421 c = []
1419 1422 p = self.rev(node)
1420 1423 for r in self.revs(start=p + 1):
1421 1424 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1422 1425 if prevs:
1423 1426 for pr in prevs:
1424 1427 if pr == p:
1425 1428 c.append(self.node(r))
1426 1429 elif p == nullrev:
1427 1430 c.append(self.node(r))
1428 1431 return c
1429 1432
1430 1433 def commonancestorsheads(self, a, b):
1431 1434 """calculate all the heads of the common ancestors of nodes a and b"""
1432 1435 a, b = self.rev(a), self.rev(b)
1433 1436 ancs = self._commonancestorsheads(a, b)
1434 1437 return pycompat.maplist(self.node, ancs)
1435 1438
1436 1439 def _commonancestorsheads(self, *revs):
1437 1440 """calculate all the heads of the common ancestors of revs"""
1438 1441 try:
1439 1442 ancs = self.index.commonancestorsheads(*revs)
1440 1443 except (AttributeError, OverflowError): # C implementation failed
1441 1444 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1442 1445 return ancs
1443 1446
1444 1447 def isancestor(self, a, b):
1445 1448 """return True if node a is an ancestor of node b
1446 1449
1447 1450 A revision is considered an ancestor of itself."""
1448 1451 a, b = self.rev(a), self.rev(b)
1449 1452 return self.isancestorrev(a, b)
1450 1453
1451 1454 def isancestorrev(self, a, b):
1452 1455 """return True if revision a is an ancestor of revision b
1453 1456
1454 1457 A revision is considered an ancestor of itself.
1455 1458
1456 1459 The implementation of this is trivial but the use of
1457 1460 reachableroots is not."""
1458 1461 if a == nullrev:
1459 1462 return True
1460 1463 elif a == b:
1461 1464 return True
1462 1465 elif a > b:
1463 1466 return False
1464 1467 return bool(self.reachableroots(a, [b], [a], includepath=False))
1465 1468
1466 1469 def reachableroots(self, minroot, heads, roots, includepath=False):
1467 1470 """return (heads(::(<roots> and <roots>::<heads>)))
1468 1471
1469 1472 If includepath is True, return (<roots>::<heads>)."""
1470 1473 try:
1471 1474 return self.index.reachableroots2(
1472 1475 minroot, heads, roots, includepath
1473 1476 )
1474 1477 except AttributeError:
1475 1478 return dagop._reachablerootspure(
1476 1479 self.parentrevs, minroot, roots, heads, includepath
1477 1480 )
1478 1481
1479 1482 def ancestor(self, a, b):
1480 1483 """calculate the "best" common ancestor of nodes a and b"""
1481 1484
1482 1485 a, b = self.rev(a), self.rev(b)
1483 1486 try:
1484 1487 ancs = self.index.ancestors(a, b)
1485 1488 except (AttributeError, OverflowError):
1486 1489 ancs = ancestor.ancestors(self.parentrevs, a, b)
1487 1490 if ancs:
1488 1491 # choose a consistent winner when there's a tie
1489 1492 return min(map(self.node, ancs))
1490 1493 return self.nullid
1491 1494
1492 1495 def _match(self, id):
1493 1496 if isinstance(id, int):
1494 1497 # rev
1495 1498 return self.node(id)
1496 1499 if len(id) == self.nodeconstants.nodelen:
1497 1500 # possibly a binary node
1498 1501 # odds of a binary node being all hex in ASCII are 1 in 10**25
1499 1502 try:
1500 1503 node = id
1501 1504 self.rev(node) # quick search the index
1502 1505 return node
1503 1506 except error.LookupError:
1504 1507 pass # may be partial hex id
1505 1508 try:
1506 1509 # str(rev)
1507 1510 rev = int(id)
1508 1511 if b"%d" % rev != id:
1509 1512 raise ValueError
1510 1513 if rev < 0:
1511 1514 rev = len(self) + rev
1512 1515 if rev < 0 or rev >= len(self):
1513 1516 raise ValueError
1514 1517 return self.node(rev)
1515 1518 except (ValueError, OverflowError):
1516 1519 pass
1517 1520 if len(id) == 2 * self.nodeconstants.nodelen:
1518 1521 try:
1519 1522 # a full hex nodeid?
1520 1523 node = bin(id)
1521 1524 self.rev(node)
1522 1525 return node
1523 1526 except (TypeError, error.LookupError):
1524 1527 pass
1525 1528
1526 1529 def _partialmatch(self, id):
1527 1530 # we don't care wdirfilenodeids as they should be always full hash
1528 1531 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1529 1532 try:
1530 1533 partial = self.index.partialmatch(id)
1531 1534 if partial and self.hasnode(partial):
1532 1535 if maybewdir:
1533 1536 # single 'ff...' match in radix tree, ambiguous with wdir
1534 1537 raise error.RevlogError
1535 1538 return partial
1536 1539 if maybewdir:
1537 1540 # no 'ff...' match in radix tree, wdir identified
1538 1541 raise error.WdirUnsupported
1539 1542 return None
1540 1543 except error.RevlogError:
1541 1544 # parsers.c radix tree lookup gave multiple matches
1542 1545 # fast path: for unfiltered changelog, radix tree is accurate
1543 1546 if not getattr(self, 'filteredrevs', None):
1544 1547 raise error.AmbiguousPrefixLookupError(
1545 1548 id, self.display_id, _(b'ambiguous identifier')
1546 1549 )
1547 1550 # fall through to slow path that filters hidden revisions
1548 1551 except (AttributeError, ValueError):
1549 1552 # we are pure python, or key was too short to search radix tree
1550 1553 pass
1551 1554
1552 1555 if id in self._pcache:
1553 1556 return self._pcache[id]
1554 1557
1555 1558 if len(id) <= 40:
1556 1559 try:
1557 1560 # hex(node)[:...]
1558 1561 l = len(id) // 2 # grab an even number of digits
1559 1562 prefix = bin(id[: l * 2])
1560 1563 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1561 1564 nl = [
1562 1565 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1563 1566 ]
1564 1567 if self.nodeconstants.nullhex.startswith(id):
1565 1568 nl.append(self.nullid)
1566 1569 if len(nl) > 0:
1567 1570 if len(nl) == 1 and not maybewdir:
1568 1571 self._pcache[id] = nl[0]
1569 1572 return nl[0]
1570 1573 raise error.AmbiguousPrefixLookupError(
1571 1574 id, self.display_id, _(b'ambiguous identifier')
1572 1575 )
1573 1576 if maybewdir:
1574 1577 raise error.WdirUnsupported
1575 1578 return None
1576 1579 except TypeError:
1577 1580 pass
1578 1581
1579 1582 def lookup(self, id):
1580 1583 """locate a node based on:
1581 1584 - revision number or str(revision number)
1582 1585 - nodeid or subset of hex nodeid
1583 1586 """
1584 1587 n = self._match(id)
1585 1588 if n is not None:
1586 1589 return n
1587 1590 n = self._partialmatch(id)
1588 1591 if n:
1589 1592 return n
1590 1593
1591 1594 raise error.LookupError(id, self.display_id, _(b'no match found'))
1592 1595
1593 1596 def shortest(self, node, minlength=1):
1594 1597 """Find the shortest unambiguous prefix that matches node."""
1595 1598
1596 1599 def isvalid(prefix):
1597 1600 try:
1598 1601 matchednode = self._partialmatch(prefix)
1599 1602 except error.AmbiguousPrefixLookupError:
1600 1603 return False
1601 1604 except error.WdirUnsupported:
1602 1605 # single 'ff...' match
1603 1606 return True
1604 1607 if matchednode is None:
1605 1608 raise error.LookupError(node, self.display_id, _(b'no node'))
1606 1609 return True
1607 1610
1608 1611 def maybewdir(prefix):
1609 1612 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1610 1613
1611 1614 hexnode = hex(node)
1612 1615
1613 1616 def disambiguate(hexnode, minlength):
1614 1617 """Disambiguate against wdirid."""
1615 1618 for length in range(minlength, len(hexnode) + 1):
1616 1619 prefix = hexnode[:length]
1617 1620 if not maybewdir(prefix):
1618 1621 return prefix
1619 1622
1620 1623 if not getattr(self, 'filteredrevs', None):
1621 1624 try:
1622 1625 length = max(self.index.shortest(node), minlength)
1623 1626 return disambiguate(hexnode, length)
1624 1627 except error.RevlogError:
1625 1628 if node != self.nodeconstants.wdirid:
1626 1629 raise error.LookupError(
1627 1630 node, self.display_id, _(b'no node')
1628 1631 )
1629 1632 except AttributeError:
1630 1633 # Fall through to pure code
1631 1634 pass
1632 1635
1633 1636 if node == self.nodeconstants.wdirid:
1634 1637 for length in range(minlength, len(hexnode) + 1):
1635 1638 prefix = hexnode[:length]
1636 1639 if isvalid(prefix):
1637 1640 return prefix
1638 1641
1639 1642 for length in range(minlength, len(hexnode) + 1):
1640 1643 prefix = hexnode[:length]
1641 1644 if isvalid(prefix):
1642 1645 return disambiguate(hexnode, length)
1643 1646
1644 1647 def cmp(self, node, text):
1645 1648 """compare text with a given file revision
1646 1649
1647 1650 returns True if text is different than what is stored.
1648 1651 """
1649 1652 p1, p2 = self.parents(node)
1650 1653 return storageutil.hashrevisionsha1(text, p1, p2) != node
1651 1654
1652 1655 def _cachesegment(self, offset, data):
1653 1656 """Add a segment to the revlog cache.
1654 1657
1655 1658 Accepts an absolute offset and the data that is at that location.
1656 1659 """
1657 1660 o, d = self._chunkcache
1658 1661 # try to add to existing cache
1659 1662 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1660 1663 self._chunkcache = o, d + data
1661 1664 else:
1662 1665 self._chunkcache = offset, data
1663 1666
1664 1667 def _readsegment(self, offset, length, df=None):
1665 1668 """Load a segment of raw data from the revlog.
1666 1669
1667 1670 Accepts an absolute offset, length to read, and an optional existing
1668 1671 file handle to read from.
1669 1672
1670 1673 If an existing file handle is passed, it will be seeked and the
1671 1674 original seek position will NOT be restored.
1672 1675
1673 1676 Returns a str or buffer of raw byte data.
1674 1677
1675 1678 Raises if the requested number of bytes could not be read.
1676 1679 """
1677 1680 # Cache data both forward and backward around the requested
1678 1681 # data, in a fixed size window. This helps speed up operations
1679 1682 # involving reading the revlog backwards.
1680 1683 cachesize = self._chunkcachesize
1681 1684 realoffset = offset & ~(cachesize - 1)
1682 1685 reallength = (
1683 1686 (offset + length + cachesize) & ~(cachesize - 1)
1684 1687 ) - realoffset
1685 1688 with self._datareadfp(df) as df:
1686 1689 df.seek(realoffset)
1687 1690 d = df.read(reallength)
1688 1691
1689 1692 self._cachesegment(realoffset, d)
1690 1693 if offset != realoffset or reallength != length:
1691 1694 startoffset = offset - realoffset
1692 1695 if len(d) - startoffset < length:
1693 1696 raise error.RevlogError(
1694 1697 _(
1695 1698 b'partial read of revlog %s; expected %d bytes from '
1696 1699 b'offset %d, got %d'
1697 1700 )
1698 1701 % (
1699 1702 self._indexfile if self._inline else self._datafile,
1700 1703 length,
1701 1704 offset,
1702 1705 len(d) - startoffset,
1703 1706 )
1704 1707 )
1705 1708
1706 1709 return util.buffer(d, startoffset, length)
1707 1710
1708 1711 if len(d) < length:
1709 1712 raise error.RevlogError(
1710 1713 _(
1711 1714 b'partial read of revlog %s; expected %d bytes from offset '
1712 1715 b'%d, got %d'
1713 1716 )
1714 1717 % (
1715 1718 self._indexfile if self._inline else self._datafile,
1716 1719 length,
1717 1720 offset,
1718 1721 len(d),
1719 1722 )
1720 1723 )
1721 1724
1722 1725 return d
1723 1726
1724 1727 def _getsegment(self, offset, length, df=None):
1725 1728 """Obtain a segment of raw data from the revlog.
1726 1729
1727 1730 Accepts an absolute offset, length of bytes to obtain, and an
1728 1731 optional file handle to the already-opened revlog. If the file
1729 1732 handle is used, it's original seek position will not be preserved.
1730 1733
1731 1734 Requests for data may be returned from a cache.
1732 1735
1733 1736 Returns a str or a buffer instance of raw byte data.
1734 1737 """
1735 1738 o, d = self._chunkcache
1736 1739 l = len(d)
1737 1740
1738 1741 # is it in the cache?
1739 1742 cachestart = offset - o
1740 1743 cacheend = cachestart + length
1741 1744 if cachestart >= 0 and cacheend <= l:
1742 1745 if cachestart == 0 and cacheend == l:
1743 1746 return d # avoid a copy
1744 1747 return util.buffer(d, cachestart, cacheend - cachestart)
1745 1748
1746 1749 return self._readsegment(offset, length, df=df)
1747 1750
1748 1751 def _getsegmentforrevs(self, startrev, endrev, df=None):
1749 1752 """Obtain a segment of raw data corresponding to a range of revisions.
1750 1753
1751 1754 Accepts the start and end revisions and an optional already-open
1752 1755 file handle to be used for reading. If the file handle is read, its
1753 1756 seek position will not be preserved.
1754 1757
1755 1758 Requests for data may be satisfied by a cache.
1756 1759
1757 1760 Returns a 2-tuple of (offset, data) for the requested range of
1758 1761 revisions. Offset is the integer offset from the beginning of the
1759 1762 revlog and data is a str or buffer of the raw byte data.
1760 1763
1761 1764 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1762 1765 to determine where each revision's data begins and ends.
1763 1766 """
1764 1767 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1765 1768 # (functions are expensive).
1766 1769 index = self.index
1767 1770 istart = index[startrev]
1768 1771 start = int(istart[0] >> 16)
1769 1772 if startrev == endrev:
1770 1773 end = start + istart[1]
1771 1774 else:
1772 1775 iend = index[endrev]
1773 1776 end = int(iend[0] >> 16) + iend[1]
1774 1777
1775 1778 if self._inline:
1776 1779 start += (startrev + 1) * self.index.entry_size
1777 1780 end += (endrev + 1) * self.index.entry_size
1778 1781 length = end - start
1779 1782
1780 1783 return start, self._getsegment(start, length, df=df)
1781 1784
1782 1785 def _chunk(self, rev, df=None):
1783 1786 """Obtain a single decompressed chunk for a revision.
1784 1787
1785 1788 Accepts an integer revision and an optional already-open file handle
1786 1789 to be used for reading. If used, the seek position of the file will not
1787 1790 be preserved.
1788 1791
1789 1792 Returns a str holding uncompressed data for the requested revision.
1790 1793 """
1791 1794 compression_mode = self.index[rev][10]
1792 1795 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1793 1796 if compression_mode == COMP_MODE_PLAIN:
1794 1797 return data
1795 1798 elif compression_mode == COMP_MODE_DEFAULT:
1796 1799 return self._decompressor(data)
1797 1800 elif compression_mode == COMP_MODE_INLINE:
1798 1801 return self.decompress(data)
1799 1802 else:
1800 1803 msg = 'unknown compression mode %d'
1801 1804 msg %= compression_mode
1802 1805 raise error.RevlogError(msg)
1803 1806
1804 1807 def _chunks(self, revs, df=None, targetsize=None):
1805 1808 """Obtain decompressed chunks for the specified revisions.
1806 1809
1807 1810 Accepts an iterable of numeric revisions that are assumed to be in
1808 1811 ascending order. Also accepts an optional already-open file handle
1809 1812 to be used for reading. If used, the seek position of the file will
1810 1813 not be preserved.
1811 1814
1812 1815 This function is similar to calling ``self._chunk()`` multiple times,
1813 1816 but is faster.
1814 1817
1815 1818 Returns a list with decompressed data for each requested revision.
1816 1819 """
1817 1820 if not revs:
1818 1821 return []
1819 1822 start = self.start
1820 1823 length = self.length
1821 1824 inline = self._inline
1822 1825 iosize = self.index.entry_size
1823 1826 buffer = util.buffer
1824 1827
1825 1828 l = []
1826 1829 ladd = l.append
1827 1830
1828 1831 if not self._withsparseread:
1829 1832 slicedchunks = (revs,)
1830 1833 else:
1831 1834 slicedchunks = deltautil.slicechunk(
1832 1835 self, revs, targetsize=targetsize
1833 1836 )
1834 1837
1835 1838 for revschunk in slicedchunks:
1836 1839 firstrev = revschunk[0]
1837 1840 # Skip trailing revisions with empty diff
1838 1841 for lastrev in revschunk[::-1]:
1839 1842 if length(lastrev) != 0:
1840 1843 break
1841 1844
1842 1845 try:
1843 1846 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1844 1847 except OverflowError:
1845 1848 # issue4215 - we can't cache a run of chunks greater than
1846 1849 # 2G on Windows
1847 1850 return [self._chunk(rev, df=df) for rev in revschunk]
1848 1851
1849 1852 decomp = self.decompress
1850 1853 # self._decompressor might be None, but will not be used in that case
1851 1854 def_decomp = self._decompressor
1852 1855 for rev in revschunk:
1853 1856 chunkstart = start(rev)
1854 1857 if inline:
1855 1858 chunkstart += (rev + 1) * iosize
1856 1859 chunklength = length(rev)
1857 1860 comp_mode = self.index[rev][10]
1858 1861 c = buffer(data, chunkstart - offset, chunklength)
1859 1862 if comp_mode == COMP_MODE_PLAIN:
1860 1863 ladd(c)
1861 1864 elif comp_mode == COMP_MODE_INLINE:
1862 1865 ladd(decomp(c))
1863 1866 elif comp_mode == COMP_MODE_DEFAULT:
1864 1867 ladd(def_decomp(c))
1865 1868 else:
1866 1869 msg = 'unknown compression mode %d'
1867 1870 msg %= comp_mode
1868 1871 raise error.RevlogError(msg)
1869 1872
1870 1873 return l
1871 1874
1872 1875 def _chunkclear(self):
1873 1876 """Clear the raw chunk cache."""
1874 1877 self._chunkcache = (0, b'')
1875 1878
1876 1879 def deltaparent(self, rev):
1877 1880 """return deltaparent of the given revision"""
1878 1881 base = self.index[rev][3]
1879 1882 if base == rev:
1880 1883 return nullrev
1881 1884 elif self._generaldelta:
1882 1885 return base
1883 1886 else:
1884 1887 return rev - 1
1885 1888
1886 1889 def issnapshot(self, rev):
1887 1890 """tells whether rev is a snapshot"""
1888 1891 if not self._sparserevlog:
1889 1892 return self.deltaparent(rev) == nullrev
1890 1893 elif util.safehasattr(self.index, b'issnapshot'):
1891 1894 # directly assign the method to cache the testing and access
1892 1895 self.issnapshot = self.index.issnapshot
1893 1896 return self.issnapshot(rev)
1894 1897 if rev == nullrev:
1895 1898 return True
1896 1899 entry = self.index[rev]
1897 1900 base = entry[3]
1898 1901 if base == rev:
1899 1902 return True
1900 1903 if base == nullrev:
1901 1904 return True
1902 1905 p1 = entry[5]
1903 1906 p2 = entry[6]
1904 1907 if base == p1 or base == p2:
1905 1908 return False
1906 1909 return self.issnapshot(base)
1907 1910
1908 1911 def snapshotdepth(self, rev):
1909 1912 """number of snapshot in the chain before this one"""
1910 1913 if not self.issnapshot(rev):
1911 1914 raise error.ProgrammingError(b'revision %d not a snapshot')
1912 1915 return len(self._deltachain(rev)[0]) - 1
1913 1916
1914 1917 def revdiff(self, rev1, rev2):
1915 1918 """return or calculate a delta between two revisions
1916 1919
1917 1920 The delta calculated is in binary form and is intended to be written to
1918 1921 revlog data directly. So this function needs raw revision data.
1919 1922 """
1920 1923 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1921 1924 return bytes(self._chunk(rev2))
1922 1925
1923 1926 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1924 1927
1925 1928 def _processflags(self, text, flags, operation, raw=False):
1926 1929 """deprecated entry point to access flag processors"""
1927 1930 msg = b'_processflag(...) use the specialized variant'
1928 1931 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 1932 if raw:
1930 1933 return text, flagutil.processflagsraw(self, text, flags)
1931 1934 elif operation == b'read':
1932 1935 return flagutil.processflagsread(self, text, flags)
1933 1936 else: # write operation
1934 1937 return flagutil.processflagswrite(self, text, flags)
1935 1938
1936 1939 def revision(self, nodeorrev, _df=None, raw=False):
1937 1940 """return an uncompressed revision of a given node or revision
1938 1941 number.
1939 1942
1940 1943 _df - an existing file handle to read from. (internal-only)
1941 1944 raw - an optional argument specifying if the revision data is to be
1942 1945 treated as raw data when applying flag transforms. 'raw' should be set
1943 1946 to True when generating changegroups or in debug commands.
1944 1947 """
1945 1948 if raw:
1946 1949 msg = (
1947 1950 b'revlog.revision(..., raw=True) is deprecated, '
1948 1951 b'use revlog.rawdata(...)'
1949 1952 )
1950 1953 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1951 1954 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1952 1955
1953 1956 def sidedata(self, nodeorrev, _df=None):
1954 1957 """a map of extra data related to the changeset but not part of the hash
1955 1958
1956 1959 This function currently return a dictionary. However, more advanced
1957 1960 mapping object will likely be used in the future for a more
1958 1961 efficient/lazy code.
1959 1962 """
1960 1963 return self._revisiondata(nodeorrev, _df)[1]
1961 1964
1962 1965 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1963 1966 # deal with <nodeorrev> argument type
1964 1967 if isinstance(nodeorrev, int):
1965 1968 rev = nodeorrev
1966 1969 node = self.node(rev)
1967 1970 else:
1968 1971 node = nodeorrev
1969 1972 rev = None
1970 1973
1971 1974 # fast path the special `nullid` rev
1972 1975 if node == self.nullid:
1973 1976 return b"", {}
1974 1977
1975 1978 # ``rawtext`` is the text as stored inside the revlog. Might be the
1976 1979 # revision or might need to be processed to retrieve the revision.
1977 1980 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1978 1981
1979 1982 if self.hassidedata:
1980 1983 if rev is None:
1981 1984 rev = self.rev(node)
1982 1985 sidedata = self._sidedata(rev)
1983 1986 else:
1984 1987 sidedata = {}
1985 1988
1986 1989 if raw and validated:
1987 1990 # if we don't want to process the raw text and that raw
1988 1991 # text is cached, we can exit early.
1989 1992 return rawtext, sidedata
1990 1993 if rev is None:
1991 1994 rev = self.rev(node)
1992 1995 # the revlog's flag for this revision
1993 1996 # (usually alter its state or content)
1994 1997 flags = self.flags(rev)
1995 1998
1996 1999 if validated and flags == REVIDX_DEFAULT_FLAGS:
1997 2000 # no extra flags set, no flag processor runs, text = rawtext
1998 2001 return rawtext, sidedata
1999 2002
2000 2003 if raw:
2001 2004 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2002 2005 text = rawtext
2003 2006 else:
2004 2007 r = flagutil.processflagsread(self, rawtext, flags)
2005 2008 text, validatehash = r
2006 2009 if validatehash:
2007 2010 self.checkhash(text, node, rev=rev)
2008 2011 if not validated:
2009 2012 self._revisioncache = (node, rev, rawtext)
2010 2013
2011 2014 return text, sidedata
2012 2015
2013 2016 def _rawtext(self, node, rev, _df=None):
2014 2017 """return the possibly unvalidated rawtext for a revision
2015 2018
2016 2019 returns (rev, rawtext, validated)
2017 2020 """
2018 2021
2019 2022 # revision in the cache (could be useful to apply delta)
2020 2023 cachedrev = None
2021 2024 # An intermediate text to apply deltas to
2022 2025 basetext = None
2023 2026
2024 2027 # Check if we have the entry in cache
2025 2028 # The cache entry looks like (node, rev, rawtext)
2026 2029 if self._revisioncache:
2027 2030 if self._revisioncache[0] == node:
2028 2031 return (rev, self._revisioncache[2], True)
2029 2032 cachedrev = self._revisioncache[1]
2030 2033
2031 2034 if rev is None:
2032 2035 rev = self.rev(node)
2033 2036
2034 2037 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2035 2038 if stopped:
2036 2039 basetext = self._revisioncache[2]
2037 2040
2038 2041 # drop cache to save memory, the caller is expected to
2039 2042 # update self._revisioncache after validating the text
2040 2043 self._revisioncache = None
2041 2044
2042 2045 targetsize = None
2043 2046 rawsize = self.index[rev][2]
2044 2047 if 0 <= rawsize:
2045 2048 targetsize = 4 * rawsize
2046 2049
2047 2050 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2048 2051 if basetext is None:
2049 2052 basetext = bytes(bins[0])
2050 2053 bins = bins[1:]
2051 2054
2052 2055 rawtext = mdiff.patches(basetext, bins)
2053 2056 del basetext # let us have a chance to free memory early
2054 2057 return (rev, rawtext, False)
2055 2058
2056 2059 def _sidedata(self, rev):
2057 2060 """Return the sidedata for a given revision number."""
2058 2061 index_entry = self.index[rev]
2059 2062 sidedata_offset = index_entry[8]
2060 2063 sidedata_size = index_entry[9]
2061 2064
2062 2065 if self._inline:
2063 2066 sidedata_offset += self.index.entry_size * (1 + rev)
2064 2067 if sidedata_size == 0:
2065 2068 return {}
2066 2069
2067 2070 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2068 2071 comp = self.index[rev][11]
2069 2072 if comp == COMP_MODE_PLAIN:
2070 2073 segment = comp_segment
2071 2074 elif comp == COMP_MODE_DEFAULT:
2072 2075 segment = self._decompressor(comp_segment)
2073 2076 elif comp == COMP_MODE_INLINE:
2074 2077 segment = self.decompress(comp_segment)
2075 2078 else:
2076 2079 msg = 'unknown compression mode %d'
2077 2080 msg %= comp
2078 2081 raise error.RevlogError(msg)
2079 2082
2080 2083 sidedata = sidedatautil.deserialize_sidedata(segment)
2081 2084 return sidedata
2082 2085
2083 2086 def rawdata(self, nodeorrev, _df=None):
2084 2087 """return an uncompressed raw data of a given node or revision number.
2085 2088
2086 2089 _df - an existing file handle to read from. (internal-only)
2087 2090 """
2088 2091 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2089 2092
2090 2093 def hash(self, text, p1, p2):
2091 2094 """Compute a node hash.
2092 2095
2093 2096 Available as a function so that subclasses can replace the hash
2094 2097 as needed.
2095 2098 """
2096 2099 return storageutil.hashrevisionsha1(text, p1, p2)
2097 2100
2098 2101 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2099 2102 """Check node hash integrity.
2100 2103
2101 2104 Available as a function so that subclasses can extend hash mismatch
2102 2105 behaviors as needed.
2103 2106 """
2104 2107 try:
2105 2108 if p1 is None and p2 is None:
2106 2109 p1, p2 = self.parents(node)
2107 2110 if node != self.hash(text, p1, p2):
2108 2111 # Clear the revision cache on hash failure. The revision cache
2109 2112 # only stores the raw revision and clearing the cache does have
2110 2113 # the side-effect that we won't have a cache hit when the raw
2111 2114 # revision data is accessed. But this case should be rare and
2112 2115 # it is extra work to teach the cache about the hash
2113 2116 # verification state.
2114 2117 if self._revisioncache and self._revisioncache[0] == node:
2115 2118 self._revisioncache = None
2116 2119
2117 2120 revornode = rev
2118 2121 if revornode is None:
2119 2122 revornode = templatefilters.short(hex(node))
2120 2123 raise error.RevlogError(
2121 2124 _(b"integrity check failed on %s:%s")
2122 2125 % (self.display_id, pycompat.bytestr(revornode))
2123 2126 )
2124 2127 except error.RevlogError:
2125 2128 if self._censorable and storageutil.iscensoredtext(text):
2126 2129 raise error.CensoredNodeError(self.display_id, node, text)
2127 2130 raise
2128 2131
2129 2132 def _enforceinlinesize(self, tr):
2130 2133 """Check if the revlog is too big for inline and convert if so.
2131 2134
2132 2135 This should be called after revisions are added to the revlog. If the
2133 2136 revlog has grown too large to be an inline revlog, it will convert it
2134 2137 to use multiple index and data files.
2135 2138 """
2136 2139 tiprev = len(self) - 1
2137 2140 total_size = self.start(tiprev) + self.length(tiprev)
2138 2141 if not self._inline or total_size < _maxinline:
2139 2142 return
2140 2143
2141 2144 troffset = tr.findoffset(self._indexfile)
2142 2145 if troffset is None:
2143 2146 raise error.RevlogError(
2144 2147 _(b"%s not found in the transaction") % self._indexfile
2145 2148 )
2146 2149 trindex = 0
2147 2150 tr.add(self._datafile, 0)
2148 2151
2149 2152 existing_handles = False
2150 2153 if self._writinghandles is not None:
2151 2154 existing_handles = True
2152 2155 fp = self._writinghandles[0]
2153 2156 fp.flush()
2154 2157 fp.close()
2155 2158 # We can't use the cached file handle after close(). So prevent
2156 2159 # its usage.
2157 2160 self._writinghandles = None
2158 2161
2159 2162 new_dfh = self._datafp(b'w+')
2160 2163 new_dfh.truncate(0) # drop any potentially existing data
2161 2164 try:
2162 2165 with self._indexfp() as read_ifh:
2163 2166 for r in self:
2164 2167 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2165 2168 if troffset <= self.start(r):
2166 2169 trindex = r
2167 2170 new_dfh.flush()
2168 2171
2169 2172 with self.__index_new_fp() as fp:
2170 2173 self._format_flags &= ~FLAG_INLINE_DATA
2171 2174 self._inline = False
2172 2175 for i in self:
2173 2176 e = self.index.entry_binary(i)
2174 2177 if i == 0 and self._docket is None:
2175 2178 header = self._format_flags | self._format_version
2176 2179 header = self.index.pack_header(header)
2177 2180 e = header + e
2178 2181 fp.write(e)
2179 2182 if self._docket is not None:
2180 2183 self._docket.index_end = fp.tell()
2181 2184 # the temp file replace the real index when we exit the context
2182 2185 # manager
2183 2186
2184 2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2185 2188 nodemaputil.setup_persistent_nodemap(tr, self)
2186 2189 self._chunkclear()
2187 2190
2188 2191 if existing_handles:
2189 2192 # switched from inline to conventional reopen the index
2190 2193 ifh = self.__index_write_fp()
2191 2194 self._writinghandles = (ifh, new_dfh)
2192 2195 new_dfh = None
2193 2196 finally:
2194 2197 if new_dfh is not None:
2195 2198 new_dfh.close()
2196 2199
2197 2200 def _nodeduplicatecallback(self, transaction, node):
2198 2201 """called when trying to add a node already stored."""
2199 2202
2200 2203 @contextlib.contextmanager
2201 2204 def _writing(self, transaction):
2202 2205 if self._trypending:
2203 2206 msg = b'try to write in a `trypending` revlog: %s'
2204 2207 msg %= self.display_id
2205 2208 raise error.ProgrammingError(msg)
2206 2209 if self._writinghandles is not None:
2207 2210 yield
2208 2211 else:
2209 2212 r = len(self)
2210 2213 dsize = 0
2211 2214 if r:
2212 2215 dsize = self.end(r - 1)
2213 2216 dfh = None
2214 2217 if not self._inline:
2215 2218 try:
2216 2219 dfh = self._datafp(b"r+")
2217 2220 if self._docket is None:
2218 2221 dfh.seek(0, os.SEEK_END)
2219 2222 else:
2220 2223 dfh.seek(self._docket.data_end, os.SEEK_SET)
2221 2224 except IOError as inst:
2222 2225 if inst.errno != errno.ENOENT:
2223 2226 raise
2224 2227 dfh = self._datafp(b"w+")
2225 2228 transaction.add(self._datafile, dsize)
2226 2229 try:
2227 2230 isize = r * self.index.entry_size
2228 2231 ifh = self.__index_write_fp()
2229 2232 if self._inline:
2230 2233 transaction.add(self._indexfile, dsize + isize)
2231 2234 else:
2232 2235 transaction.add(self._indexfile, isize)
2233 2236 try:
2234 2237 self._writinghandles = (ifh, dfh)
2235 2238 try:
2236 2239 yield
2237 2240 if self._docket is not None:
2238 2241 self._write_docket(transaction)
2239 2242 finally:
2240 2243 self._writinghandles = None
2241 2244 finally:
2242 2245 ifh.close()
2243 2246 finally:
2244 2247 if dfh is not None:
2245 2248 dfh.close()
2246 2249
2247 2250 def _write_docket(self, transaction):
2248 2251 """write the current docket on disk
2249 2252
2250 2253 Exist as a method to help changelog to implement transaction logic
2251 2254
2252 2255 We could also imagine using the same transaction logic for all revlog
2253 2256 since docket are cheap."""
2254 2257 self._docket.write(transaction)
2255 2258
2256 2259 def addrevision(
2257 2260 self,
2258 2261 text,
2259 2262 transaction,
2260 2263 link,
2261 2264 p1,
2262 2265 p2,
2263 2266 cachedelta=None,
2264 2267 node=None,
2265 2268 flags=REVIDX_DEFAULT_FLAGS,
2266 2269 deltacomputer=None,
2267 2270 sidedata=None,
2268 2271 ):
2269 2272 """add a revision to the log
2270 2273
2271 2274 text - the revision data to add
2272 2275 transaction - the transaction object used for rollback
2273 2276 link - the linkrev data to add
2274 2277 p1, p2 - the parent nodeids of the revision
2275 2278 cachedelta - an optional precomputed delta
2276 2279 node - nodeid of revision; typically node is not specified, and it is
2277 2280 computed by default as hash(text, p1, p2), however subclasses might
2278 2281 use different hashing method (and override checkhash() in such case)
2279 2282 flags - the known flags to set on the revision
2280 2283 deltacomputer - an optional deltacomputer instance shared between
2281 2284 multiple calls
2282 2285 """
2283 2286 if link == nullrev:
2284 2287 raise error.RevlogError(
2285 2288 _(b"attempted to add linkrev -1 to %s") % self.display_id
2286 2289 )
2287 2290
2288 2291 if sidedata is None:
2289 2292 sidedata = {}
2290 2293 elif sidedata and not self.hassidedata:
2291 2294 raise error.ProgrammingError(
2292 2295 _(b"trying to add sidedata to a revlog who don't support them")
2293 2296 )
2294 2297
2295 2298 if flags:
2296 2299 node = node or self.hash(text, p1, p2)
2297 2300
2298 2301 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2299 2302
2300 2303 # If the flag processor modifies the revision data, ignore any provided
2301 2304 # cachedelta.
2302 2305 if rawtext != text:
2303 2306 cachedelta = None
2304 2307
2305 2308 if len(rawtext) > _maxentrysize:
2306 2309 raise error.RevlogError(
2307 2310 _(
2308 2311 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2309 2312 )
2310 2313 % (self.display_id, len(rawtext))
2311 2314 )
2312 2315
2313 2316 node = node or self.hash(rawtext, p1, p2)
2314 2317 rev = self.index.get_rev(node)
2315 2318 if rev is not None:
2316 2319 return rev
2317 2320
2318 2321 if validatehash:
2319 2322 self.checkhash(rawtext, node, p1=p1, p2=p2)
2320 2323
2321 2324 return self.addrawrevision(
2322 2325 rawtext,
2323 2326 transaction,
2324 2327 link,
2325 2328 p1,
2326 2329 p2,
2327 2330 node,
2328 2331 flags,
2329 2332 cachedelta=cachedelta,
2330 2333 deltacomputer=deltacomputer,
2331 2334 sidedata=sidedata,
2332 2335 )
2333 2336
2334 2337 def addrawrevision(
2335 2338 self,
2336 2339 rawtext,
2337 2340 transaction,
2338 2341 link,
2339 2342 p1,
2340 2343 p2,
2341 2344 node,
2342 2345 flags,
2343 2346 cachedelta=None,
2344 2347 deltacomputer=None,
2345 2348 sidedata=None,
2346 2349 ):
2347 2350 """add a raw revision with known flags, node and parents
2348 2351 useful when reusing a revision not stored in this revlog (ex: received
2349 2352 over wire, or read from an external bundle).
2350 2353 """
2351 2354 with self._writing(transaction):
2352 2355 return self._addrevision(
2353 2356 node,
2354 2357 rawtext,
2355 2358 transaction,
2356 2359 link,
2357 2360 p1,
2358 2361 p2,
2359 2362 flags,
2360 2363 cachedelta,
2361 2364 deltacomputer=deltacomputer,
2362 2365 sidedata=sidedata,
2363 2366 )
2364 2367
2365 2368 def compress(self, data):
2366 2369 """Generate a possibly-compressed representation of data."""
2367 2370 if not data:
2368 2371 return b'', data
2369 2372
2370 2373 compressed = self._compressor.compress(data)
2371 2374
2372 2375 if compressed:
2373 2376 # The revlog compressor added the header in the returned data.
2374 2377 return b'', compressed
2375 2378
2376 2379 if data[0:1] == b'\0':
2377 2380 return b'', data
2378 2381 return b'u', data
2379 2382
2380 2383 def decompress(self, data):
2381 2384 """Decompress a revlog chunk.
2382 2385
2383 2386 The chunk is expected to begin with a header identifying the
2384 2387 format type so it can be routed to an appropriate decompressor.
2385 2388 """
2386 2389 if not data:
2387 2390 return data
2388 2391
2389 2392 # Revlogs are read much more frequently than they are written and many
2390 2393 # chunks only take microseconds to decompress, so performance is
2391 2394 # important here.
2392 2395 #
2393 2396 # We can make a few assumptions about revlogs:
2394 2397 #
2395 2398 # 1) the majority of chunks will be compressed (as opposed to inline
2396 2399 # raw data).
2397 2400 # 2) decompressing *any* data will likely by at least 10x slower than
2398 2401 # returning raw inline data.
2399 2402 # 3) we want to prioritize common and officially supported compression
2400 2403 # engines
2401 2404 #
2402 2405 # It follows that we want to optimize for "decompress compressed data
2403 2406 # when encoded with common and officially supported compression engines"
2404 2407 # case over "raw data" and "data encoded by less common or non-official
2405 2408 # compression engines." That is why we have the inline lookup first
2406 2409 # followed by the compengines lookup.
2407 2410 #
2408 2411 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2409 2412 # compressed chunks. And this matters for changelog and manifest reads.
2410 2413 t = data[0:1]
2411 2414
2412 2415 if t == b'x':
2413 2416 try:
2414 2417 return _zlibdecompress(data)
2415 2418 except zlib.error as e:
2416 2419 raise error.RevlogError(
2417 2420 _(b'revlog decompress error: %s')
2418 2421 % stringutil.forcebytestr(e)
2419 2422 )
2420 2423 # '\0' is more common than 'u' so it goes first.
2421 2424 elif t == b'\0':
2422 2425 return data
2423 2426 elif t == b'u':
2424 2427 return util.buffer(data, 1)
2425 2428
2426 2429 compressor = self._get_decompressor(t)
2427 2430
2428 2431 return compressor.decompress(data)
2429 2432
2430 2433 def _addrevision(
2431 2434 self,
2432 2435 node,
2433 2436 rawtext,
2434 2437 transaction,
2435 2438 link,
2436 2439 p1,
2437 2440 p2,
2438 2441 flags,
2439 2442 cachedelta,
2440 2443 alwayscache=False,
2441 2444 deltacomputer=None,
2442 2445 sidedata=None,
2443 2446 ):
2444 2447 """internal function to add revisions to the log
2445 2448
2446 2449 see addrevision for argument descriptions.
2447 2450
2448 2451 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2449 2452
2450 2453 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2451 2454 be used.
2452 2455
2453 2456 invariants:
2454 2457 - rawtext is optional (can be None); if not set, cachedelta must be set.
2455 2458 if both are set, they must correspond to each other.
2456 2459 """
2457 2460 if node == self.nullid:
2458 2461 raise error.RevlogError(
2459 2462 _(b"%s: attempt to add null revision") % self.display_id
2460 2463 )
2461 2464 if (
2462 2465 node == self.nodeconstants.wdirid
2463 2466 or node in self.nodeconstants.wdirfilenodeids
2464 2467 ):
2465 2468 raise error.RevlogError(
2466 2469 _(b"%s: attempt to add wdir revision") % self.display_id
2467 2470 )
2468 2471 if self._writinghandles is None:
2469 2472 msg = b'adding revision outside `revlog._writing` context'
2470 2473 raise error.ProgrammingError(msg)
2471 2474
2472 2475 if self._inline:
2473 2476 fh = self._writinghandles[0]
2474 2477 else:
2475 2478 fh = self._writinghandles[1]
2476 2479
2477 2480 btext = [rawtext]
2478 2481
2479 2482 curr = len(self)
2480 2483 prev = curr - 1
2481 2484
2482 2485 offset = self._get_data_offset(prev)
2483 2486
2484 2487 if self._concurrencychecker:
2485 2488 ifh, dfh = self._writinghandles
2486 2489 if self._inline:
2487 2490 # offset is "as if" it were in the .d file, so we need to add on
2488 2491 # the size of the entry metadata.
2489 2492 self._concurrencychecker(
2490 2493 ifh, self._indexfile, offset + curr * self.index.entry_size
2491 2494 )
2492 2495 else:
2493 2496 # Entries in the .i are a consistent size.
2494 2497 self._concurrencychecker(
2495 2498 ifh, self._indexfile, curr * self.index.entry_size
2496 2499 )
2497 2500 self._concurrencychecker(dfh, self._datafile, offset)
2498 2501
2499 2502 p1r, p2r = self.rev(p1), self.rev(p2)
2500 2503
2501 2504 # full versions are inserted when the needed deltas
2502 2505 # become comparable to the uncompressed text
2503 2506 if rawtext is None:
2504 2507 # need rawtext size, before changed by flag processors, which is
2505 2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2506 2509 # logic that might remove metadata size.
2507 2510 textlen = mdiff.patchedsize(
2508 2511 revlog.size(self, cachedelta[0]), cachedelta[1]
2509 2512 )
2510 2513 else:
2511 2514 textlen = len(rawtext)
2512 2515
2513 2516 if deltacomputer is None:
2514 2517 deltacomputer = deltautil.deltacomputer(self)
2515 2518
2516 2519 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2517 2520
2518 2521 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2519 2522
2520 2523 compression_mode = COMP_MODE_INLINE
2521 2524 if self._docket is not None:
2522 2525 h, d = deltainfo.data
2523 2526 if not h and not d:
2524 2527 # not data to store at all... declare them uncompressed
2525 2528 compression_mode = COMP_MODE_PLAIN
2526 2529 elif not h:
2527 2530 t = d[0:1]
2528 2531 if t == b'\0':
2529 2532 compression_mode = COMP_MODE_PLAIN
2530 2533 elif t == self._docket.default_compression_header:
2531 2534 compression_mode = COMP_MODE_DEFAULT
2532 2535 elif h == b'u':
2533 2536 # we have a more efficient way to declare uncompressed
2534 2537 h = b''
2535 2538 compression_mode = COMP_MODE_PLAIN
2536 2539 deltainfo = deltautil.drop_u_compression(deltainfo)
2537 2540
2538 2541 sidedata_compression_mode = COMP_MODE_INLINE
2539 2542 if sidedata and self.hassidedata:
2540 2543 sidedata_compression_mode = COMP_MODE_PLAIN
2541 2544 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2542 2545 sidedata_offset = offset + deltainfo.deltalen
2543 2546 h, comp_sidedata = self.compress(serialized_sidedata)
2544 2547 if (
2545 2548 h != b'u'
2546 2549 and comp_sidedata[0:1] != b'\0'
2547 2550 and len(comp_sidedata) < len(serialized_sidedata)
2548 2551 ):
2549 2552 assert not h
2550 2553 if (
2551 2554 comp_sidedata[0:1]
2552 2555 == self._docket.default_compression_header
2553 2556 ):
2554 2557 sidedata_compression_mode = COMP_MODE_DEFAULT
2555 2558 serialized_sidedata = comp_sidedata
2556 2559 else:
2557 2560 sidedata_compression_mode = COMP_MODE_INLINE
2558 2561 serialized_sidedata = comp_sidedata
2559 2562 else:
2560 2563 serialized_sidedata = b""
2561 2564 # Don't store the offset if the sidedata is empty, that way
2562 2565 # we can easily detect empty sidedata and they will be no different
2563 2566 # than ones we manually add.
2564 2567 sidedata_offset = 0
2565 2568
2566 2569 e = (
2567 2570 offset_type(offset, flags),
2568 2571 deltainfo.deltalen,
2569 2572 textlen,
2570 2573 deltainfo.base,
2571 2574 link,
2572 2575 p1r,
2573 2576 p2r,
2574 2577 node,
2575 2578 sidedata_offset,
2576 2579 len(serialized_sidedata),
2577 2580 compression_mode,
2578 2581 sidedata_compression_mode,
2579 2582 )
2580 2583
2581 2584 self.index.append(e)
2582 2585 entry = self.index.entry_binary(curr)
2583 2586 if curr == 0 and self._docket is None:
2584 2587 header = self._format_flags | self._format_version
2585 2588 header = self.index.pack_header(header)
2586 2589 entry = header + entry
2587 2590 self._writeentry(
2588 2591 transaction,
2589 2592 entry,
2590 2593 deltainfo.data,
2591 2594 link,
2592 2595 offset,
2593 2596 serialized_sidedata,
2594 2597 )
2595 2598
2596 2599 rawtext = btext[0]
2597 2600
2598 2601 if alwayscache and rawtext is None:
2599 2602 rawtext = deltacomputer.buildtext(revinfo, fh)
2600 2603
2601 2604 if type(rawtext) == bytes: # only accept immutable objects
2602 2605 self._revisioncache = (node, curr, rawtext)
2603 2606 self._chainbasecache[curr] = deltainfo.chainbase
2604 2607 return curr
2605 2608
2606 2609 def _get_data_offset(self, prev):
2607 2610 """Returns the current offset in the (in-transaction) data file.
2608 2611 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2609 2612 file to store that information: since sidedata can be rewritten to the
2610 2613 end of the data file within a transaction, you can have cases where, for
2611 2614 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2612 2615 to `n - 1`'s sidedata being written after `n`'s data.
2613 2616
2614 2617 TODO cache this in a docket file before getting out of experimental."""
2615 2618 if self._docket is None:
2616 2619 return self.end(prev)
2617 2620 else:
2618 2621 return self._docket.data_end
2619 2622
2620 2623 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2621 2624 # Files opened in a+ mode have inconsistent behavior on various
2622 2625 # platforms. Windows requires that a file positioning call be made
2623 2626 # when the file handle transitions between reads and writes. See
2624 2627 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2625 2628 # platforms, Python or the platform itself can be buggy. Some versions
2626 2629 # of Solaris have been observed to not append at the end of the file
2627 2630 # if the file was seeked to before the end. See issue4943 for more.
2628 2631 #
2629 2632 # We work around this issue by inserting a seek() before writing.
2630 2633 # Note: This is likely not necessary on Python 3. However, because
2631 2634 # the file handle is reused for reads and may be seeked there, we need
2632 2635 # to be careful before changing this.
2633 2636 if self._writinghandles is None:
2634 2637 msg = b'adding revision outside `revlog._writing` context'
2635 2638 raise error.ProgrammingError(msg)
2636 2639 ifh, dfh = self._writinghandles
2637 2640 if self._docket is None:
2638 2641 ifh.seek(0, os.SEEK_END)
2639 2642 else:
2640 2643 ifh.seek(self._docket.index_end, os.SEEK_SET)
2641 2644 if dfh:
2642 2645 if self._docket is None:
2643 2646 dfh.seek(0, os.SEEK_END)
2644 2647 else:
2645 2648 dfh.seek(self._docket.data_end, os.SEEK_SET)
2646 2649
2647 2650 curr = len(self) - 1
2648 2651 if not self._inline:
2649 2652 transaction.add(self._datafile, offset)
2650 2653 transaction.add(self._indexfile, curr * len(entry))
2651 2654 if data[0]:
2652 2655 dfh.write(data[0])
2653 2656 dfh.write(data[1])
2654 2657 if sidedata:
2655 2658 dfh.write(sidedata)
2656 2659 ifh.write(entry)
2657 2660 else:
2658 2661 offset += curr * self.index.entry_size
2659 2662 transaction.add(self._indexfile, offset)
2660 2663 ifh.write(entry)
2661 2664 ifh.write(data[0])
2662 2665 ifh.write(data[1])
2663 2666 if sidedata:
2664 2667 ifh.write(sidedata)
2665 2668 self._enforceinlinesize(transaction)
2666 2669 if self._docket is not None:
2667 2670 self._docket.index_end = self._writinghandles[0].tell()
2668 2671 self._docket.data_end = self._writinghandles[1].tell()
2669 2672
2670 2673 nodemaputil.setup_persistent_nodemap(transaction, self)
2671 2674
2672 2675 def addgroup(
2673 2676 self,
2674 2677 deltas,
2675 2678 linkmapper,
2676 2679 transaction,
2677 2680 alwayscache=False,
2678 2681 addrevisioncb=None,
2679 2682 duplicaterevisioncb=None,
2680 2683 ):
2681 2684 """
2682 2685 add a delta group
2683 2686
2684 2687 given a set of deltas, add them to the revision log. the
2685 2688 first delta is against its parent, which should be in our
2686 2689 log, the rest are against the previous delta.
2687 2690
2688 2691 If ``addrevisioncb`` is defined, it will be called with arguments of
2689 2692 this revlog and the node that was added.
2690 2693 """
2691 2694
2692 2695 if self._adding_group:
2693 2696 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2694 2697
2695 2698 self._adding_group = True
2696 2699 empty = True
2697 2700 try:
2698 2701 with self._writing(transaction):
2699 2702 deltacomputer = deltautil.deltacomputer(self)
2700 2703 # loop through our set of deltas
2701 2704 for data in deltas:
2702 2705 (
2703 2706 node,
2704 2707 p1,
2705 2708 p2,
2706 2709 linknode,
2707 2710 deltabase,
2708 2711 delta,
2709 2712 flags,
2710 2713 sidedata,
2711 2714 ) = data
2712 2715 link = linkmapper(linknode)
2713 2716 flags = flags or REVIDX_DEFAULT_FLAGS
2714 2717
2715 2718 rev = self.index.get_rev(node)
2716 2719 if rev is not None:
2717 2720 # this can happen if two branches make the same change
2718 2721 self._nodeduplicatecallback(transaction, rev)
2719 2722 if duplicaterevisioncb:
2720 2723 duplicaterevisioncb(self, rev)
2721 2724 empty = False
2722 2725 continue
2723 2726
2724 2727 for p in (p1, p2):
2725 2728 if not self.index.has_node(p):
2726 2729 raise error.LookupError(
2727 2730 p, self.radix, _(b'unknown parent')
2728 2731 )
2729 2732
2730 2733 if not self.index.has_node(deltabase):
2731 2734 raise error.LookupError(
2732 2735 deltabase, self.display_id, _(b'unknown delta base')
2733 2736 )
2734 2737
2735 2738 baserev = self.rev(deltabase)
2736 2739
2737 2740 if baserev != nullrev and self.iscensored(baserev):
2738 2741 # if base is censored, delta must be full replacement in a
2739 2742 # single patch operation
2740 2743 hlen = struct.calcsize(b">lll")
2741 2744 oldlen = self.rawsize(baserev)
2742 2745 newlen = len(delta) - hlen
2743 2746 if delta[:hlen] != mdiff.replacediffheader(
2744 2747 oldlen, newlen
2745 2748 ):
2746 2749 raise error.CensoredBaseError(
2747 2750 self.display_id, self.node(baserev)
2748 2751 )
2749 2752
2750 2753 if not flags and self._peek_iscensored(baserev, delta):
2751 2754 flags |= REVIDX_ISCENSORED
2752 2755
2753 2756 # We assume consumers of addrevisioncb will want to retrieve
2754 2757 # the added revision, which will require a call to
2755 2758 # revision(). revision() will fast path if there is a cache
2756 2759 # hit. So, we tell _addrevision() to always cache in this case.
2757 2760 # We're only using addgroup() in the context of changegroup
2758 2761 # generation so the revision data can always be handled as raw
2759 2762 # by the flagprocessor.
2760 2763 rev = self._addrevision(
2761 2764 node,
2762 2765 None,
2763 2766 transaction,
2764 2767 link,
2765 2768 p1,
2766 2769 p2,
2767 2770 flags,
2768 2771 (baserev, delta),
2769 2772 alwayscache=alwayscache,
2770 2773 deltacomputer=deltacomputer,
2771 2774 sidedata=sidedata,
2772 2775 )
2773 2776
2774 2777 if addrevisioncb:
2775 2778 addrevisioncb(self, rev)
2776 2779 empty = False
2777 2780 finally:
2778 2781 self._adding_group = False
2779 2782 return not empty
2780 2783
2781 2784 def iscensored(self, rev):
2782 2785 """Check if a file revision is censored."""
2783 2786 if not self._censorable:
2784 2787 return False
2785 2788
2786 2789 return self.flags(rev) & REVIDX_ISCENSORED
2787 2790
2788 2791 def _peek_iscensored(self, baserev, delta):
2789 2792 """Quickly check if a delta produces a censored revision."""
2790 2793 if not self._censorable:
2791 2794 return False
2792 2795
2793 2796 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2794 2797
2795 2798 def getstrippoint(self, minlink):
2796 2799 """find the minimum rev that must be stripped to strip the linkrev
2797 2800
2798 2801 Returns a tuple containing the minimum rev and a set of all revs that
2799 2802 have linkrevs that will be broken by this strip.
2800 2803 """
2801 2804 return storageutil.resolvestripinfo(
2802 2805 minlink,
2803 2806 len(self) - 1,
2804 2807 self.headrevs(),
2805 2808 self.linkrev,
2806 2809 self.parentrevs,
2807 2810 )
2808 2811
2809 2812 def strip(self, minlink, transaction):
2810 2813 """truncate the revlog on the first revision with a linkrev >= minlink
2811 2814
2812 2815 This function is called when we're stripping revision minlink and
2813 2816 its descendants from the repository.
2814 2817
2815 2818 We have to remove all revisions with linkrev >= minlink, because
2816 2819 the equivalent changelog revisions will be renumbered after the
2817 2820 strip.
2818 2821
2819 2822 So we truncate the revlog on the first of these revisions, and
2820 2823 trust that the caller has saved the revisions that shouldn't be
2821 2824 removed and that it'll re-add them after this truncation.
2822 2825 """
2823 2826 if len(self) == 0:
2824 2827 return
2825 2828
2826 2829 rev, _ = self.getstrippoint(minlink)
2827 2830 if rev == len(self):
2828 2831 return
2829 2832
2830 2833 # first truncate the files on disk
2831 2834 data_end = self.start(rev)
2832 2835 if not self._inline:
2833 2836 transaction.add(self._datafile, data_end)
2834 2837 end = rev * self.index.entry_size
2835 2838 else:
2836 2839 end = data_end + (rev * self.index.entry_size)
2837 2840
2838 2841 transaction.add(self._indexfile, end)
2839 2842 if self._docket is not None:
2840 2843 # XXX we could, leverage the docket while stripping. However it is
2841 2844 # not powerfull enough at the time of this comment
2842 2845 self._docket.index_end = end
2843 2846 self._docket.data_end = data_end
2844 2847 self._docket.write(transaction, stripping=True)
2845 2848
2846 2849 # then reset internal state in memory to forget those revisions
2847 2850 self._revisioncache = None
2848 2851 self._chaininfocache = util.lrucachedict(500)
2849 2852 self._chunkclear()
2850 2853
2851 2854 del self.index[rev:-1]
2852 2855
2853 2856 def checksize(self):
2854 2857 """Check size of index and data files
2855 2858
2856 2859 return a (dd, di) tuple.
2857 2860 - dd: extra bytes for the "data" file
2858 2861 - di: extra bytes for the "index" file
2859 2862
2860 2863 A healthy revlog will return (0, 0).
2861 2864 """
2862 2865 expected = 0
2863 2866 if len(self):
2864 2867 expected = max(0, self.end(len(self) - 1))
2865 2868
2866 2869 try:
2867 2870 with self._datafp() as f:
2868 2871 f.seek(0, io.SEEK_END)
2869 2872 actual = f.tell()
2870 2873 dd = actual - expected
2871 2874 except IOError as inst:
2872 2875 if inst.errno != errno.ENOENT:
2873 2876 raise
2874 2877 dd = 0
2875 2878
2876 2879 try:
2877 2880 f = self.opener(self._indexfile)
2878 2881 f.seek(0, io.SEEK_END)
2879 2882 actual = f.tell()
2880 2883 f.close()
2881 2884 s = self.index.entry_size
2882 2885 i = max(0, actual // s)
2883 2886 di = actual - (i * s)
2884 2887 if self._inline:
2885 2888 databytes = 0
2886 2889 for r in self:
2887 2890 databytes += max(0, self.length(r))
2888 2891 dd = 0
2889 2892 di = actual - len(self) * s - databytes
2890 2893 except IOError as inst:
2891 2894 if inst.errno != errno.ENOENT:
2892 2895 raise
2893 2896 di = 0
2894 2897
2895 2898 return (dd, di)
2896 2899
2897 2900 def files(self):
2898 2901 res = [self._indexfile]
2899 2902 if not self._inline:
2900 2903 res.append(self._datafile)
2901 2904 return res
2902 2905
2903 2906 def emitrevisions(
2904 2907 self,
2905 2908 nodes,
2906 2909 nodesorder=None,
2907 2910 revisiondata=False,
2908 2911 assumehaveparentrevisions=False,
2909 2912 deltamode=repository.CG_DELTAMODE_STD,
2910 2913 sidedata_helpers=None,
2911 2914 ):
2912 2915 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2913 2916 raise error.ProgrammingError(
2914 2917 b'unhandled value for nodesorder: %s' % nodesorder
2915 2918 )
2916 2919
2917 2920 if nodesorder is None and not self._generaldelta:
2918 2921 nodesorder = b'storage'
2919 2922
2920 2923 if (
2921 2924 not self._storedeltachains
2922 2925 and deltamode != repository.CG_DELTAMODE_PREV
2923 2926 ):
2924 2927 deltamode = repository.CG_DELTAMODE_FULL
2925 2928
2926 2929 return storageutil.emitrevisions(
2927 2930 self,
2928 2931 nodes,
2929 2932 nodesorder,
2930 2933 revlogrevisiondelta,
2931 2934 deltaparentfn=self.deltaparent,
2932 2935 candeltafn=self.candelta,
2933 2936 rawsizefn=self.rawsize,
2934 2937 revdifffn=self.revdiff,
2935 2938 flagsfn=self.flags,
2936 2939 deltamode=deltamode,
2937 2940 revisiondata=revisiondata,
2938 2941 assumehaveparentrevisions=assumehaveparentrevisions,
2939 2942 sidedata_helpers=sidedata_helpers,
2940 2943 )
2941 2944
2942 2945 DELTAREUSEALWAYS = b'always'
2943 2946 DELTAREUSESAMEREVS = b'samerevs'
2944 2947 DELTAREUSENEVER = b'never'
2945 2948
2946 2949 DELTAREUSEFULLADD = b'fulladd'
2947 2950
2948 2951 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2949 2952
2950 2953 def clone(
2951 2954 self,
2952 2955 tr,
2953 2956 destrevlog,
2954 2957 addrevisioncb=None,
2955 2958 deltareuse=DELTAREUSESAMEREVS,
2956 2959 forcedeltabothparents=None,
2957 2960 sidedata_helpers=None,
2958 2961 ):
2959 2962 """Copy this revlog to another, possibly with format changes.
2960 2963
2961 2964 The destination revlog will contain the same revisions and nodes.
2962 2965 However, it may not be bit-for-bit identical due to e.g. delta encoding
2963 2966 differences.
2964 2967
2965 2968 The ``deltareuse`` argument control how deltas from the existing revlog
2966 2969 are preserved in the destination revlog. The argument can have the
2967 2970 following values:
2968 2971
2969 2972 DELTAREUSEALWAYS
2970 2973 Deltas will always be reused (if possible), even if the destination
2971 2974 revlog would not select the same revisions for the delta. This is the
2972 2975 fastest mode of operation.
2973 2976 DELTAREUSESAMEREVS
2974 2977 Deltas will be reused if the destination revlog would pick the same
2975 2978 revisions for the delta. This mode strikes a balance between speed
2976 2979 and optimization.
2977 2980 DELTAREUSENEVER
2978 2981 Deltas will never be reused. This is the slowest mode of execution.
2979 2982 This mode can be used to recompute deltas (e.g. if the diff/delta
2980 2983 algorithm changes).
2981 2984 DELTAREUSEFULLADD
2982 2985 Revision will be re-added as if their were new content. This is
2983 2986 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2984 2987 eg: large file detection and handling.
2985 2988
2986 2989 Delta computation can be slow, so the choice of delta reuse policy can
2987 2990 significantly affect run time.
2988 2991
2989 2992 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2990 2993 two extremes. Deltas will be reused if they are appropriate. But if the
2991 2994 delta could choose a better revision, it will do so. This means if you
2992 2995 are converting a non-generaldelta revlog to a generaldelta revlog,
2993 2996 deltas will be recomputed if the delta's parent isn't a parent of the
2994 2997 revision.
2995 2998
2996 2999 In addition to the delta policy, the ``forcedeltabothparents``
2997 3000 argument controls whether to force compute deltas against both parents
2998 3001 for merges. By default, the current default is used.
2999 3002
3000 3003 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3001 3004 `sidedata_helpers`.
3002 3005 """
3003 3006 if deltareuse not in self.DELTAREUSEALL:
3004 3007 raise ValueError(
3005 3008 _(b'value for deltareuse invalid: %s') % deltareuse
3006 3009 )
3007 3010
3008 3011 if len(destrevlog):
3009 3012 raise ValueError(_(b'destination revlog is not empty'))
3010 3013
3011 3014 if getattr(self, 'filteredrevs', None):
3012 3015 raise ValueError(_(b'source revlog has filtered revisions'))
3013 3016 if getattr(destrevlog, 'filteredrevs', None):
3014 3017 raise ValueError(_(b'destination revlog has filtered revisions'))
3015 3018
3016 3019 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3017 3020 # if possible.
3018 3021 oldlazydelta = destrevlog._lazydelta
3019 3022 oldlazydeltabase = destrevlog._lazydeltabase
3020 3023 oldamd = destrevlog._deltabothparents
3021 3024
3022 3025 try:
3023 3026 if deltareuse == self.DELTAREUSEALWAYS:
3024 3027 destrevlog._lazydeltabase = True
3025 3028 destrevlog._lazydelta = True
3026 3029 elif deltareuse == self.DELTAREUSESAMEREVS:
3027 3030 destrevlog._lazydeltabase = False
3028 3031 destrevlog._lazydelta = True
3029 3032 elif deltareuse == self.DELTAREUSENEVER:
3030 3033 destrevlog._lazydeltabase = False
3031 3034 destrevlog._lazydelta = False
3032 3035
3033 3036 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3034 3037
3035 3038 self._clone(
3036 3039 tr,
3037 3040 destrevlog,
3038 3041 addrevisioncb,
3039 3042 deltareuse,
3040 3043 forcedeltabothparents,
3041 3044 sidedata_helpers,
3042 3045 )
3043 3046
3044 3047 finally:
3045 3048 destrevlog._lazydelta = oldlazydelta
3046 3049 destrevlog._lazydeltabase = oldlazydeltabase
3047 3050 destrevlog._deltabothparents = oldamd
3048 3051
3049 3052 def _clone(
3050 3053 self,
3051 3054 tr,
3052 3055 destrevlog,
3053 3056 addrevisioncb,
3054 3057 deltareuse,
3055 3058 forcedeltabothparents,
3056 3059 sidedata_helpers,
3057 3060 ):
3058 3061 """perform the core duty of `revlog.clone` after parameter processing"""
3059 3062 deltacomputer = deltautil.deltacomputer(destrevlog)
3060 3063 index = self.index
3061 3064 for rev in self:
3062 3065 entry = index[rev]
3063 3066
3064 3067 # Some classes override linkrev to take filtered revs into
3065 3068 # account. Use raw entry from index.
3066 3069 flags = entry[0] & 0xFFFF
3067 3070 linkrev = entry[4]
3068 3071 p1 = index[entry[5]][7]
3069 3072 p2 = index[entry[6]][7]
3070 3073 node = entry[7]
3071 3074
3072 3075 # (Possibly) reuse the delta from the revlog if allowed and
3073 3076 # the revlog chunk is a delta.
3074 3077 cachedelta = None
3075 3078 rawtext = None
3076 3079 if deltareuse == self.DELTAREUSEFULLADD:
3077 3080 text, sidedata = self._revisiondata(rev)
3078 3081
3079 3082 if sidedata_helpers is not None:
3080 3083 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3081 3084 self, sidedata_helpers, sidedata, rev
3082 3085 )
3083 3086 flags = flags | new_flags[0] & ~new_flags[1]
3084 3087
3085 3088 destrevlog.addrevision(
3086 3089 text,
3087 3090 tr,
3088 3091 linkrev,
3089 3092 p1,
3090 3093 p2,
3091 3094 cachedelta=cachedelta,
3092 3095 node=node,
3093 3096 flags=flags,
3094 3097 deltacomputer=deltacomputer,
3095 3098 sidedata=sidedata,
3096 3099 )
3097 3100 else:
3098 3101 if destrevlog._lazydelta:
3099 3102 dp = self.deltaparent(rev)
3100 3103 if dp != nullrev:
3101 3104 cachedelta = (dp, bytes(self._chunk(rev)))
3102 3105
3103 3106 sidedata = None
3104 3107 if not cachedelta:
3105 3108 rawtext, sidedata = self._revisiondata(rev)
3106 3109 if sidedata is None:
3107 3110 sidedata = self.sidedata(rev)
3108 3111
3109 3112 if sidedata_helpers is not None:
3110 3113 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3111 3114 self, sidedata_helpers, sidedata, rev
3112 3115 )
3113 3116 flags = flags | new_flags[0] & ~new_flags[1]
3114 3117
3115 3118 with destrevlog._writing(tr):
3116 3119 destrevlog._addrevision(
3117 3120 node,
3118 3121 rawtext,
3119 3122 tr,
3120 3123 linkrev,
3121 3124 p1,
3122 3125 p2,
3123 3126 flags,
3124 3127 cachedelta,
3125 3128 deltacomputer=deltacomputer,
3126 3129 sidedata=sidedata,
3127 3130 )
3128 3131
3129 3132 if addrevisioncb:
3130 3133 addrevisioncb(self, rev, node)
3131 3134
3132 3135 def censorrevision(self, tr, censornode, tombstone=b''):
3133 3136 if self._format_version == REVLOGV0:
3134 3137 raise error.RevlogError(
3135 3138 _(b'cannot censor with version %d revlogs')
3136 3139 % self._format_version
3137 3140 )
3138 3141
3139 3142 censorrev = self.rev(censornode)
3140 3143 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3141 3144
3142 3145 if len(tombstone) > self.rawsize(censorrev):
3143 3146 raise error.Abort(
3144 3147 _(b'censor tombstone must be no longer than censored data')
3145 3148 )
3146 3149
3147 3150 # Rewriting the revlog in place is hard. Our strategy for censoring is
3148 3151 # to create a new revlog, copy all revisions to it, then replace the
3149 3152 # revlogs on transaction close.
3150 3153 #
3151 3154 # This is a bit dangerous. We could easily have a mismatch of state.
3152 3155 newrl = revlog(
3153 3156 self.opener,
3154 3157 target=self.target,
3155 3158 radix=self.radix,
3156 3159 postfix=b'tmpcensored',
3157 3160 censorable=True,
3158 3161 )
3159 3162 newrl._format_version = self._format_version
3160 3163 newrl._format_flags = self._format_flags
3161 3164 newrl._generaldelta = self._generaldelta
3162 3165 newrl._parse_index = self._parse_index
3163 3166
3164 3167 for rev in self.revs():
3165 3168 node = self.node(rev)
3166 3169 p1, p2 = self.parents(node)
3167 3170
3168 3171 if rev == censorrev:
3169 3172 newrl.addrawrevision(
3170 3173 tombstone,
3171 3174 tr,
3172 3175 self.linkrev(censorrev),
3173 3176 p1,
3174 3177 p2,
3175 3178 censornode,
3176 3179 REVIDX_ISCENSORED,
3177 3180 )
3178 3181
3179 3182 if newrl.deltaparent(rev) != nullrev:
3180 3183 raise error.Abort(
3181 3184 _(
3182 3185 b'censored revision stored as delta; '
3183 3186 b'cannot censor'
3184 3187 ),
3185 3188 hint=_(
3186 3189 b'censoring of revlogs is not '
3187 3190 b'fully implemented; please report '
3188 3191 b'this bug'
3189 3192 ),
3190 3193 )
3191 3194 continue
3192 3195
3193 3196 if self.iscensored(rev):
3194 3197 if self.deltaparent(rev) != nullrev:
3195 3198 raise error.Abort(
3196 3199 _(
3197 3200 b'cannot censor due to censored '
3198 3201 b'revision having delta stored'
3199 3202 )
3200 3203 )
3201 3204 rawtext = self._chunk(rev)
3202 3205 else:
3203 3206 rawtext = self.rawdata(rev)
3204 3207
3205 3208 newrl.addrawrevision(
3206 3209 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3207 3210 )
3208 3211
3209 3212 tr.addbackup(self._indexfile, location=b'store')
3210 3213 if not self._inline:
3211 3214 tr.addbackup(self._datafile, location=b'store')
3212 3215
3213 3216 self.opener.rename(newrl._indexfile, self._indexfile)
3214 3217 if not self._inline:
3215 3218 self.opener.rename(newrl._datafile, self._datafile)
3216 3219
3217 3220 self.clearcaches()
3218 3221 self._loadindex()
3219 3222
3220 3223 def verifyintegrity(self, state):
3221 3224 """Verifies the integrity of the revlog.
3222 3225
3223 3226 Yields ``revlogproblem`` instances describing problems that are
3224 3227 found.
3225 3228 """
3226 3229 dd, di = self.checksize()
3227 3230 if dd:
3228 3231 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3229 3232 if di:
3230 3233 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3231 3234
3232 3235 version = self._format_version
3233 3236
3234 3237 # The verifier tells us what version revlog we should be.
3235 3238 if version != state[b'expectedversion']:
3236 3239 yield revlogproblem(
3237 3240 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3238 3241 % (self.display_id, version, state[b'expectedversion'])
3239 3242 )
3240 3243
3241 3244 state[b'skipread'] = set()
3242 3245 state[b'safe_renamed'] = set()
3243 3246
3244 3247 for rev in self:
3245 3248 node = self.node(rev)
3246 3249
3247 3250 # Verify contents. 4 cases to care about:
3248 3251 #
3249 3252 # common: the most common case
3250 3253 # rename: with a rename
3251 3254 # meta: file content starts with b'\1\n', the metadata
3252 3255 # header defined in filelog.py, but without a rename
3253 3256 # ext: content stored externally
3254 3257 #
3255 3258 # More formally, their differences are shown below:
3256 3259 #
3257 3260 # | common | rename | meta | ext
3258 3261 # -------------------------------------------------------
3259 3262 # flags() | 0 | 0 | 0 | not 0
3260 3263 # renamed() | False | True | False | ?
3261 3264 # rawtext[0:2]=='\1\n'| False | True | True | ?
3262 3265 #
3263 3266 # "rawtext" means the raw text stored in revlog data, which
3264 3267 # could be retrieved by "rawdata(rev)". "text"
3265 3268 # mentioned below is "revision(rev)".
3266 3269 #
3267 3270 # There are 3 different lengths stored physically:
3268 3271 # 1. L1: rawsize, stored in revlog index
3269 3272 # 2. L2: len(rawtext), stored in revlog data
3270 3273 # 3. L3: len(text), stored in revlog data if flags==0, or
3271 3274 # possibly somewhere else if flags!=0
3272 3275 #
3273 3276 # L1 should be equal to L2. L3 could be different from them.
3274 3277 # "text" may or may not affect commit hash depending on flag
3275 3278 # processors (see flagutil.addflagprocessor).
3276 3279 #
3277 3280 # | common | rename | meta | ext
3278 3281 # -------------------------------------------------
3279 3282 # rawsize() | L1 | L1 | L1 | L1
3280 3283 # size() | L1 | L2-LM | L1(*) | L1 (?)
3281 3284 # len(rawtext) | L2 | L2 | L2 | L2
3282 3285 # len(text) | L2 | L2 | L2 | L3
3283 3286 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3284 3287 #
3285 3288 # LM: length of metadata, depending on rawtext
3286 3289 # (*): not ideal, see comment in filelog.size
3287 3290 # (?): could be "- len(meta)" if the resolved content has
3288 3291 # rename metadata
3289 3292 #
3290 3293 # Checks needed to be done:
3291 3294 # 1. length check: L1 == L2, in all cases.
3292 3295 # 2. hash check: depending on flag processor, we may need to
3293 3296 # use either "text" (external), or "rawtext" (in revlog).
3294 3297
3295 3298 try:
3296 3299 skipflags = state.get(b'skipflags', 0)
3297 3300 if skipflags:
3298 3301 skipflags &= self.flags(rev)
3299 3302
3300 3303 _verify_revision(self, skipflags, state, node)
3301 3304
3302 3305 l1 = self.rawsize(rev)
3303 3306 l2 = len(self.rawdata(node))
3304 3307
3305 3308 if l1 != l2:
3306 3309 yield revlogproblem(
3307 3310 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3308 3311 node=node,
3309 3312 )
3310 3313
3311 3314 except error.CensoredNodeError:
3312 3315 if state[b'erroroncensored']:
3313 3316 yield revlogproblem(
3314 3317 error=_(b'censored file data'), node=node
3315 3318 )
3316 3319 state[b'skipread'].add(node)
3317 3320 except Exception as e:
3318 3321 yield revlogproblem(
3319 3322 error=_(b'unpacking %s: %s')
3320 3323 % (short(node), stringutil.forcebytestr(e)),
3321 3324 node=node,
3322 3325 )
3323 3326 state[b'skipread'].add(node)
3324 3327
3325 3328 def storageinfo(
3326 3329 self,
3327 3330 exclusivefiles=False,
3328 3331 sharedfiles=False,
3329 3332 revisionscount=False,
3330 3333 trackedsize=False,
3331 3334 storedsize=False,
3332 3335 ):
3333 3336 d = {}
3334 3337
3335 3338 if exclusivefiles:
3336 3339 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3337 3340 if not self._inline:
3338 3341 d[b'exclusivefiles'].append((self.opener, self._datafile))
3339 3342
3340 3343 if sharedfiles:
3341 3344 d[b'sharedfiles'] = []
3342 3345
3343 3346 if revisionscount:
3344 3347 d[b'revisionscount'] = len(self)
3345 3348
3346 3349 if trackedsize:
3347 3350 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3348 3351
3349 3352 if storedsize:
3350 3353 d[b'storedsize'] = sum(
3351 3354 self.opener.stat(path).st_size for path in self.files()
3352 3355 )
3353 3356
3354 3357 return d
3355 3358
3356 3359 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3357 3360 if not self.hassidedata:
3358 3361 return
3359 3362 # revlog formats with sidedata support does not support inline
3360 3363 assert not self._inline
3361 3364 if not helpers[1] and not helpers[2]:
3362 3365 # Nothing to generate or remove
3363 3366 return
3364 3367
3365 3368 new_entries = []
3366 3369 # append the new sidedata
3367 3370 with self._writing(transaction):
3368 3371 ifh, dfh = self._writinghandles
3369 3372 if self._docket is not None:
3370 3373 dfh.seek(self._docket.data_end, os.SEEK_SET)
3371 3374 else:
3372 3375 dfh.seek(0, os.SEEK_END)
3373 3376
3374 3377 current_offset = dfh.tell()
3375 3378 for rev in range(startrev, endrev + 1):
3376 3379 entry = self.index[rev]
3377 3380 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3378 3381 store=self,
3379 3382 sidedata_helpers=helpers,
3380 3383 sidedata={},
3381 3384 rev=rev,
3382 3385 )
3383 3386
3384 3387 serialized_sidedata = sidedatautil.serialize_sidedata(
3385 3388 new_sidedata
3386 3389 )
3387 3390
3388 3391 sidedata_compression_mode = COMP_MODE_INLINE
3389 3392 if serialized_sidedata and self.hassidedata:
3390 3393 sidedata_compression_mode = COMP_MODE_PLAIN
3391 3394 h, comp_sidedata = self.compress(serialized_sidedata)
3392 3395 if (
3393 3396 h != b'u'
3394 3397 and comp_sidedata[0] != b'\0'
3395 3398 and len(comp_sidedata) < len(serialized_sidedata)
3396 3399 ):
3397 3400 assert not h
3398 3401 if (
3399 3402 comp_sidedata[0]
3400 3403 == self._docket.default_compression_header
3401 3404 ):
3402 3405 sidedata_compression_mode = COMP_MODE_DEFAULT
3403 3406 serialized_sidedata = comp_sidedata
3404 3407 else:
3405 3408 sidedata_compression_mode = COMP_MODE_INLINE
3406 3409 serialized_sidedata = comp_sidedata
3407 3410 if entry[8] != 0 or entry[9] != 0:
3408 3411 # rewriting entries that already have sidedata is not
3409 3412 # supported yet, because it introduces garbage data in the
3410 3413 # revlog.
3411 3414 msg = b"rewriting existing sidedata is not supported yet"
3412 3415 raise error.Abort(msg)
3413 3416
3414 3417 # Apply (potential) flags to add and to remove after running
3415 3418 # the sidedata helpers
3416 3419 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3417 3420 entry_update = (
3418 3421 current_offset,
3419 3422 len(serialized_sidedata),
3420 3423 new_offset_flags,
3421 3424 sidedata_compression_mode,
3422 3425 )
3423 3426
3424 3427 # the sidedata computation might have move the file cursors around
3425 3428 dfh.seek(current_offset, os.SEEK_SET)
3426 3429 dfh.write(serialized_sidedata)
3427 3430 new_entries.append(entry_update)
3428 3431 current_offset += len(serialized_sidedata)
3429 3432 if self._docket is not None:
3430 3433 self._docket.data_end = dfh.tell()
3431 3434
3432 3435 # rewrite the new index entries
3433 3436 ifh.seek(startrev * self.index.entry_size)
3434 3437 for i, e in enumerate(new_entries):
3435 3438 rev = startrev + i
3436 3439 self.index.replace_sidedata_info(rev, *e)
3437 3440 packed = self.index.entry_binary(rev)
3438 3441 if rev == 0 and self._docket is None:
3439 3442 header = self._format_flags | self._format_version
3440 3443 header = self.index.pack_header(header)
3441 3444 packed = header + packed
3442 3445 ifh.write(packed)
@@ -1,179 +1,190 b''
1 1 # revlogdeltas.py - constant used for revlog logic.
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 16 ### Internal utily constants
17 17
18 18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 19 KIND_MANIFESTLOG = 1002
20 20 KIND_FILELOG = 1003
21 21 KIND_OTHER = 1004
22 22
23 23 ALL_KINDS = {
24 24 KIND_CHANGELOG,
25 25 KIND_MANIFESTLOG,
26 26 KIND_FILELOG,
27 27 KIND_OTHER,
28 28 }
29 29
30 30 ### main revlog header
31 31
32 32 INDEX_HEADER = struct.Struct(b">I")
33 33
34 34 ## revlog version
35 35 REVLOGV0 = 0
36 36 REVLOGV1 = 1
37 37 # Dummy value until file format is finalized.
38 38 REVLOGV2 = 0xDEAD
39 # Dummy value until file format is finalized.
40 CHANGELOGV2 = 0xD34D
39 41
40 42 ## global revlog header flags
41 43 # Shared across v1 and v2.
42 44 FLAG_INLINE_DATA = 1 << 16
43 45 # Only used by v1, implied by v2.
44 46 FLAG_GENERALDELTA = 1 << 17
45 47 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 48 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 49 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 50 REVLOGV0_FLAGS = 0
49 51 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 52 REVLOGV2_FLAGS = FLAG_INLINE_DATA
53 CHANGELOGV2_FLAGS = 0
51 54
52 55 ### individual entry
53 56
54 57 ## index v0:
55 58 # 4 bytes: offset
56 59 # 4 bytes: compressed length
57 60 # 4 bytes: base rev
58 61 # 4 bytes: link rev
59 62 # 20 bytes: parent 1 nodeid
60 63 # 20 bytes: parent 2 nodeid
61 64 # 20 bytes: nodeid
62 65 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63 66
64 67 ## index v1
65 68 # 6 bytes: offset
66 69 # 2 bytes: flags
67 70 # 4 bytes: compressed length
68 71 # 4 bytes: uncompressed length
69 72 # 4 bytes: base rev
70 73 # 4 bytes: link rev
71 74 # 4 bytes: parent 1 rev
72 75 # 4 bytes: parent 2 rev
73 76 # 32 bytes: nodeid
74 77 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 78 assert INDEX_ENTRY_V1.size == 32 * 2
76 79
77 80 # 6 bytes: offset
78 81 # 2 bytes: flags
79 82 # 4 bytes: compressed length
80 83 # 4 bytes: uncompressed length
81 84 # 4 bytes: base rev
82 85 # 4 bytes: link rev
83 86 # 4 bytes: parent 1 rev
84 87 # 4 bytes: parent 2 rev
85 88 # 32 bytes: nodeid
86 89 # 8 bytes: sidedata offset
87 90 # 4 bytes: sidedata compressed length
88 91 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 92 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 93 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 94 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
92 95
93 96 # revlog index flags
94 97
95 98 # For historical reasons, revlog's internal flags were exposed via the
96 99 # wire protocol and are even exposed in parts of the storage APIs.
97 100
98 101 # revision has censor metadata, must be verified
99 102 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
100 103 # revision hash does not match data (narrowhg)
101 104 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
102 105 # revision data is stored externally
103 106 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
104 107 # revision changes files in a way that could affect copy tracing.
105 108 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
106 109 REVIDX_DEFAULT_FLAGS = 0
107 110 # stable order in which flags need to be processed and their processors applied
108 111 REVIDX_FLAGS_ORDER = [
109 112 REVIDX_ISCENSORED,
110 113 REVIDX_ELLIPSIS,
111 114 REVIDX_EXTSTORED,
112 115 REVIDX_HASCOPIESINFO,
113 116 ]
114 117
115 118 # bitmark for flags that could cause rawdata content change
116 119 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
117 120
118 121 ## chunk compression mode constants:
119 122 # These constants are used in revlog version >=2 to denote the compression used
120 123 # for a chunk.
121 124
122 125 # Chunk use no compression, the data stored on disk can be directly use as
123 126 # chunk value. Without any header information prefixed.
124 127 COMP_MODE_PLAIN = 0
125 128
126 129 # Chunk use the "default compression" for the revlog (usually defined in the
127 130 # revlog docket). A header is still used.
128 131 #
129 132 # XXX: keeping a header is probably not useful and we should probably drop it.
130 133 #
131 134 # XXX: The value of allow mixed type of compression in the revlog is unclear
132 135 # and we should consider making PLAIN/DEFAULT the only available mode for
133 136 # revlog v2, disallowing INLINE mode.
134 137 COMP_MODE_DEFAULT = 1
135 138
136 139 # Chunk use a compression mode stored "inline" at the start of the chunk
137 140 # itself. This is the mode always used for revlog version "0" and "1"
138 141 COMP_MODE_INLINE = 2
139 142
140 143 SUPPORTED_FLAGS = {
141 144 REVLOGV0: REVLOGV0_FLAGS,
142 145 REVLOGV1: REVLOGV1_FLAGS,
143 146 REVLOGV2: REVLOGV2_FLAGS,
147 CHANGELOGV2: CHANGELOGV2_FLAGS,
144 148 }
145 149
146 150 _no = lambda flags: False
147 151 _yes = lambda flags: True
148 152
149 153
150 154 def _from_flag(flag):
151 155 return lambda flags: bool(flags & flag)
152 156
153 157
154 158 FEATURES_BY_VERSION = {
155 159 REVLOGV0: {
156 160 b'inline': _no,
157 161 b'generaldelta': _no,
158 162 b'sidedata': False,
159 163 b'docket': False,
160 164 },
161 165 REVLOGV1: {
162 166 b'inline': _from_flag(FLAG_INLINE_DATA),
163 167 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
164 168 b'sidedata': False,
165 169 b'docket': False,
166 170 },
167 171 REVLOGV2: {
168 172 # The point of inline-revlog is to reduce the number of files used in
169 173 # the store. Using a docket defeat this purpose. So we needs other
170 174 # means to reduce the number of files for revlogv2.
171 175 b'inline': _no,
172 176 b'generaldelta': _yes,
173 177 b'sidedata': True,
174 178 b'docket': True,
175 179 },
180 CHANGELOGV2: {
181 b'inline': _no,
182 # General delta is useless for changelog since we don't do any delta
183 b'generaldelta': _no,
184 b'sidedata': True,
185 b'docket': True,
186 },
176 187 }
177 188
178 189
179 190 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,179 +1,180 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import struct
21 21
22 22 from .. import (
23 23 error,
24 24 util,
25 25 )
26 26
27 27 from . import (
28 28 constants,
29 29 )
30 30
31 31 # Docket format
32 32 #
33 33 # * 4 bytes: revlog version
34 34 # | This is mandatory as docket must be compatible with the previous
35 35 # | revlog index header.
36 36 # * 8 bytes: size of index-data
37 37 # * 8 bytes: pending size of index-data
38 38 # * 8 bytes: size of data
39 39 # * 8 bytes: pending size of data
40 40 # * 1 bytes: default compression header
41 41 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
42 42
43 43
44 44 class RevlogDocket(object):
45 45 """metadata associated with revlog"""
46 46
47 47 def __init__(
48 48 self,
49 49 revlog,
50 50 use_pending=False,
51 51 version_header=None,
52 52 index_end=0,
53 53 pending_index_end=0,
54 54 data_end=0,
55 55 pending_data_end=0,
56 56 default_compression_header=None,
57 57 ):
58 58 self._version_header = version_header
59 59 self._read_only = bool(use_pending)
60 60 self._dirty = False
61 61 self._radix = revlog.radix
62 62 self._path = revlog._docket_file
63 63 self._opener = revlog.opener
64 64 # thes asserts should be True as long as we have a single index filename
65 65 assert index_end <= pending_index_end
66 66 assert data_end <= pending_data_end
67 67 self._initial_index_end = index_end
68 68 self._pending_index_end = pending_index_end
69 69 self._initial_data_end = data_end
70 70 self._pending_data_end = pending_data_end
71 71 if use_pending:
72 72 self._index_end = self._pending_index_end
73 73 self._data_end = self._pending_data_end
74 74 else:
75 75 self._index_end = self._initial_index_end
76 76 self._data_end = self._initial_data_end
77 77 self.default_compression_header = default_compression_header
78 78
79 79 def index_filepath(self):
80 80 """file path to the current index file associated to this docket"""
81 81 # very simplistic version at first
82 82 return b"%s.idx" % self._radix
83 83
84 84 @property
85 85 def index_end(self):
86 86 return self._index_end
87 87
88 88 @index_end.setter
89 89 def index_end(self, new_size):
90 90 if new_size != self._index_end:
91 91 self._index_end = new_size
92 92 self._dirty = True
93 93
94 94 @property
95 95 def data_end(self):
96 96 return self._data_end
97 97
98 98 @data_end.setter
99 99 def data_end(self, new_size):
100 100 if new_size != self._data_end:
101 101 self._data_end = new_size
102 102 self._dirty = True
103 103
104 104 def write(self, transaction, pending=False, stripping=False):
105 105 """write the modification of disk if any
106 106
107 107 This make the new content visible to all process"""
108 108 if not self._dirty:
109 109 return False
110 110 else:
111 111 if self._read_only:
112 112 msg = b'writing read-only docket: %s'
113 113 msg %= self._path
114 114 raise error.ProgrammingError(msg)
115 115 if not stripping:
116 116 # XXX we could, leverage the docket while stripping. However it
117 117 # is not powerfull enough at the time of this comment
118 118 transaction.addbackup(self._path, location=b'store')
119 119 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
120 120 f.write(self._serialize(pending=pending))
121 121 # if pending we still need to the write final data eventually
122 122 self._dirty = pending
123 123 return True
124 124
125 125 def _serialize(self, pending=False):
126 126 if pending:
127 127 official_index_end = self._initial_index_end
128 128 official_data_end = self._initial_data_end
129 129 else:
130 130 official_index_end = self._index_end
131 131 official_data_end = self._data_end
132 132
133 133 # this assert should be True as long as we have a single index filename
134 134 assert official_data_end <= self._data_end
135 135 data = (
136 136 self._version_header,
137 137 official_index_end,
138 138 self._index_end,
139 139 official_data_end,
140 140 self._data_end,
141 141 self.default_compression_header,
142 142 )
143 143 return S_HEADER.pack(*data)
144 144
145 145
146 146 def default_docket(revlog, version_header):
147 147 """given a revlog version a new docket object for the given revlog"""
148 if (version_header & 0xFFFF) != constants.REVLOGV2:
148 rl_version = version_header & 0xFFFF
149 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
149 150 return None
150 151 comp = util.compengines[revlog._compengine].revlogheader()
151 152 docket = RevlogDocket(
152 153 revlog,
153 154 version_header=version_header,
154 155 default_compression_header=comp,
155 156 )
156 157 docket._dirty = True
157 158 return docket
158 159
159 160
160 161 def parse_docket(revlog, data, use_pending=False):
161 162 """given some docket data return a docket object for the given revlog"""
162 163 header = S_HEADER.unpack(data[: S_HEADER.size])
163 164 version_header = header[0]
164 165 index_size = header[1]
165 166 pending_index_size = header[2]
166 167 data_size = header[3]
167 168 pending_data_size = header[4]
168 169 default_compression_header = header[5]
169 170 docket = RevlogDocket(
170 171 revlog,
171 172 use_pending=use_pending,
172 173 version_header=version_header,
173 174 index_end=index_size,
174 175 pending_index_end=pending_index_size,
175 176 data_end=data_size,
176 177 pending_data_end=pending_data_size,
177 178 default_compression_header=default_compression_header,
178 179 )
179 180 return docket
General Comments 0
You need to be logged in to leave comments. Login now