##// END OF EJS Templates
revlog: fix index computation during inline->non-inline transition...
Joerg Sonnenberger -
r48064:21ed126b default
parent child Browse files
Show More
@@ -0,0 +1,5 b''
1 import os
2
3
4 def killme(ui, repo, hooktype, **wkargs):
5 os._exit(80)
@@ -0,0 +1,35 b''
1 Test correctness of revlog inline -> non-inline transition
2 ----------------------------------------------------------
3
4 Test offset computation to correctly factor in the index entries themselve.
5 Test repo has one small, one moderate and one big change. The clone has
6 the small and moderate change and will transition to non-inline storage when
7 adding the big change.
8
9 $ hg init troffset-computation --config format.revlog-compression=none
10 $ cd troffset-computation
11 $ printf '% 20d' '1' > file
12 $ hg commit -Aqm_
13 $ printf '% 1024d' '1' > file
14 $ hg commit -Aqm_
15 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
16 $ hg commit -Aqm_
17 $ cd ..
18
19 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
20 $ cd troffset-computation-copy
21 $ cat > .hg/hgrc <<EOF
22 > [hooks]
23 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
24 > EOF
25 #if chg
26 $ hg pull ../troffset-computation
27 pulling from ../troffset-computation
28 [255]
29 #else
30 $ hg pull ../troffset-computation
31 pulling from ../troffset-computation
32 [80]
33 #endif
34 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
35 data/file.i 128
@@ -1,3454 +1,3454 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 FEATURES_BY_VERSION,
43 43 FLAG_GENERALDELTA,
44 44 FLAG_INLINE_DATA,
45 45 INDEX_HEADER,
46 46 KIND_CHANGELOG,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 SUPPORTED_FLAGS,
56 56 )
57 57 from .revlogutils.flagutil import (
58 58 REVIDX_DEFAULT_FLAGS,
59 59 REVIDX_ELLIPSIS,
60 60 REVIDX_EXTSTORED,
61 61 REVIDX_FLAGS_ORDER,
62 62 REVIDX_HASCOPIESINFO,
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 65 )
66 66 from .thirdparty import attr
67 67 from . import (
68 68 ancestor,
69 69 dagop,
70 70 error,
71 71 mdiff,
72 72 policy,
73 73 pycompat,
74 74 templatefilters,
75 75 util,
76 76 )
77 77 from .interfaces import (
78 78 repository,
79 79 util as interfaceutil,
80 80 )
81 81 from .revlogutils import (
82 82 deltas as deltautil,
83 83 docket as docketutil,
84 84 flagutil,
85 85 nodemap as nodemaputil,
86 86 revlogv0,
87 87 sidedata as sidedatautil,
88 88 )
89 89 from .utils import (
90 90 storageutil,
91 91 stringutil,
92 92 )
93 93
94 94 # blanked usage of all the name to prevent pyflakes constraints
95 95 # We need these name available in the module for extensions.
96 96
97 97 REVLOGV0
98 98 REVLOGV1
99 99 REVLOGV2
100 100 FLAG_INLINE_DATA
101 101 FLAG_GENERALDELTA
102 102 REVLOG_DEFAULT_FLAGS
103 103 REVLOG_DEFAULT_FORMAT
104 104 REVLOG_DEFAULT_VERSION
105 105 REVLOGV1_FLAGS
106 106 REVLOGV2_FLAGS
107 107 REVIDX_ISCENSORED
108 108 REVIDX_ELLIPSIS
109 109 REVIDX_HASCOPIESINFO
110 110 REVIDX_EXTSTORED
111 111 REVIDX_DEFAULT_FLAGS
112 112 REVIDX_FLAGS_ORDER
113 113 REVIDX_RAWTEXT_CHANGING_FLAGS
114 114
115 115 parsers = policy.importmod('parsers')
116 116 rustancestor = policy.importrust('ancestor')
117 117 rustdagop = policy.importrust('dagop')
118 118 rustrevlog = policy.importrust('revlog')
119 119
120 120 # Aliased for performance.
121 121 _zlibdecompress = zlib.decompress
122 122
123 123 # max size of revlog with inline data
124 124 _maxinline = 131072
125 125 _chunksize = 1048576
126 126
127 127 # Flag processors for REVIDX_ELLIPSIS.
128 128 def ellipsisreadprocessor(rl, text):
129 129 return text, False
130 130
131 131
132 132 def ellipsiswriteprocessor(rl, text):
133 133 return text, False
134 134
135 135
136 136 def ellipsisrawprocessor(rl, text):
137 137 return False
138 138
139 139
140 140 ellipsisprocessor = (
141 141 ellipsisreadprocessor,
142 142 ellipsiswriteprocessor,
143 143 ellipsisrawprocessor,
144 144 )
145 145
146 146
147 147 def offset_type(offset, type):
148 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 149 raise ValueError(b'unknown revlog index flags')
150 150 return int(int(offset) << 16 | type)
151 151
152 152
153 153 def _verify_revision(rl, skipflags, state, node):
154 154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 155 point for extensions to influence the operation."""
156 156 if skipflags:
157 157 state[b'skipread'].add(node)
158 158 else:
159 159 # Side-effect: read content and verify hash.
160 160 rl.revision(node)
161 161
162 162
163 163 # True if a fast implementation for persistent-nodemap is available
164 164 #
165 165 # We also consider we have a "fast" implementation in "pure" python because
166 166 # people using pure don't really have performance consideration (and a
167 167 # wheelbarrow of other slowness source)
168 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 169 parsers, 'BaseIndexObject'
170 170 )
171 171
172 172
173 173 @attr.s(slots=True, frozen=True)
174 174 class _revisioninfo(object):
175 175 """Information about a revision that allows building its fulltext
176 176 node: expected hash of the revision
177 177 p1, p2: parent revs of the revision
178 178 btext: built text cache consisting of a one-element list
179 179 cachedelta: (baserev, uncompressed_delta) or None
180 180 flags: flags associated to the revision storage
181 181
182 182 One of btext[0] or cachedelta must be set.
183 183 """
184 184
185 185 node = attr.ib()
186 186 p1 = attr.ib()
187 187 p2 = attr.ib()
188 188 btext = attr.ib()
189 189 textlen = attr.ib()
190 190 cachedelta = attr.ib()
191 191 flags = attr.ib()
192 192
193 193
194 194 @interfaceutil.implementer(repository.irevisiondelta)
195 195 @attr.s(slots=True)
196 196 class revlogrevisiondelta(object):
197 197 node = attr.ib()
198 198 p1node = attr.ib()
199 199 p2node = attr.ib()
200 200 basenode = attr.ib()
201 201 flags = attr.ib()
202 202 baserevisionsize = attr.ib()
203 203 revision = attr.ib()
204 204 delta = attr.ib()
205 205 sidedata = attr.ib()
206 206 protocol_flags = attr.ib()
207 207 linknode = attr.ib(default=None)
208 208
209 209
210 210 @interfaceutil.implementer(repository.iverifyproblem)
211 211 @attr.s(frozen=True)
212 212 class revlogproblem(object):
213 213 warning = attr.ib(default=None)
214 214 error = attr.ib(default=None)
215 215 node = attr.ib(default=None)
216 216
217 217
218 218 def parse_index_v1(data, inline):
219 219 # call the C implementation to parse the index data
220 220 index, cache = parsers.parse_index2(data, inline)
221 221 return index, cache
222 222
223 223
224 224 def parse_index_v2(data, inline):
225 225 # call the C implementation to parse the index data
226 226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 227 return index, cache
228 228
229 229
230 230 def parse_index_cl_v2(data, inline):
231 231 # call the C implementation to parse the index data
232 232 assert not inline
233 233 from .pure.parsers import parse_index_cl_v2
234 234
235 235 index, cache = parse_index_cl_v2(data)
236 236 return index, cache
237 237
238 238
239 239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240 240
241 241 def parse_index_v1_nodemap(data, inline):
242 242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 243 return index, cache
244 244
245 245
246 246 else:
247 247 parse_index_v1_nodemap = None
248 248
249 249
250 250 def parse_index_v1_mixed(data, inline):
251 251 index, cache = parse_index_v1(data, inline)
252 252 return rustrevlog.MixedIndex(index), cache
253 253
254 254
255 255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 256 # signed integer)
257 257 _maxentrysize = 0x7FFFFFFF
258 258
259 259
260 260 class revlog(object):
261 261 """
262 262 the underlying revision storage object
263 263
264 264 A revlog consists of two parts, an index and the revision data.
265 265
266 266 The index is a file with a fixed record size containing
267 267 information on each revision, including its nodeid (hash), the
268 268 nodeids of its parents, the position and offset of its data within
269 269 the data file, and the revision it's based on. Finally, each entry
270 270 contains a linkrev entry that can serve as a pointer to external
271 271 data.
272 272
273 273 The revision data itself is a linear collection of data chunks.
274 274 Each chunk represents a revision and is usually represented as a
275 275 delta against the previous chunk. To bound lookup time, runs of
276 276 deltas are limited to about 2 times the length of the original
277 277 version data. This makes retrieval of a version proportional to
278 278 its size, or O(1) relative to the number of revisions.
279 279
280 280 Both pieces of the revlog are written to in an append-only
281 281 fashion, which means we never need to rewrite a file to insert or
282 282 remove data, and can use some simple techniques to avoid the need
283 283 for locking while reading.
284 284
285 285 If checkambig, indexfile is opened with checkambig=True at
286 286 writing, to avoid file stat ambiguity.
287 287
288 288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 289 index will be mmapped rather than read if it is larger than the
290 290 configured threshold.
291 291
292 292 If censorable is True, the revlog can have censored revisions.
293 293
294 294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 295 compression for the data content.
296 296
297 297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 298 file handle, a filename, and an expected position. It should check whether
299 299 the current position in the file handle is valid, and log/warn/fail (by
300 300 raising).
301 301
302 302
303 303 Internal details
304 304 ----------------
305 305
306 306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 307 objects that contains the same "items" whatever the revlog version.
308 308 Different versions will have different ways of storing these items (sometimes
309 309 not having them at all), but the tuple will always be the same. New fields
310 310 are usually added at the end to avoid breaking existing code that relies
311 311 on the existing order. The field are defined as follows:
312 312
313 313 [0] offset:
314 314 The byte index of the start of revision data chunk.
315 315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 316 retrieve it.
317 317
318 318 flags:
319 319 A flag field that carries special information or changes the behavior
320 320 of the revision. (see `REVIDX_*` constants for details)
321 321 The flag field only occupies the first 16 bits of this field,
322 322 use "flags = field & 0xFFFF" to retrieve the value.
323 323
324 324 [1] compressed length:
325 325 The size, in bytes, of the chunk on disk
326 326
327 327 [2] uncompressed length:
328 328 The size, in bytes, of the full revision once reconstructed.
329 329
330 330 [3] base rev:
331 331 Either the base of the revision delta chain (without general
332 332 delta), or the base of the delta (stored in the data chunk)
333 333 with general delta.
334 334
335 335 [4] link rev:
336 336 Changelog revision number of the changeset introducing this
337 337 revision.
338 338
339 339 [5] parent 1 rev:
340 340 Revision number of the first parent
341 341
342 342 [6] parent 2 rev:
343 343 Revision number of the second parent
344 344
345 345 [7] node id:
346 346 The node id of the current revision
347 347
348 348 [8] sidedata offset:
349 349 The byte index of the start of the revision's side-data chunk.
350 350
351 351 [9] sidedata chunk length:
352 352 The size, in bytes, of the revision's side-data chunk.
353 353
354 354 [10] data compression mode:
355 355 two bits that detail the way the data chunk is compressed on disk.
356 356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 357 1 this will always be COMP_MODE_INLINE.
358 358
359 359 [11] side-data compression mode:
360 360 two bits that detail the way the sidedata chunk is compressed on disk.
361 361 (see "COMP_MODE_*" constants for details)
362 362 """
363 363
364 364 _flagserrorclass = error.RevlogError
365 365
366 366 def __init__(
367 367 self,
368 368 opener,
369 369 target,
370 370 radix,
371 371 postfix=None, # only exist for `tmpcensored` now
372 372 checkambig=False,
373 373 mmaplargeindex=False,
374 374 censorable=False,
375 375 upperboundcomp=None,
376 376 persistentnodemap=False,
377 377 concurrencychecker=None,
378 378 trypending=False,
379 379 ):
380 380 """
381 381 create a revlog object
382 382
383 383 opener is a function that abstracts the file opening operation
384 384 and can be used to implement COW semantics or the like.
385 385
386 386 `target`: a (KIND, ID) tuple that identify the content stored in
387 387 this revlog. It help the rest of the code to understand what the revlog
388 388 is about without having to resort to heuristic and index filename
389 389 analysis. Note: that this must be reliably be set by normal code, but
390 390 that test, debug, or performance measurement code might not set this to
391 391 accurate value.
392 392 """
393 393 self.upperboundcomp = upperboundcomp
394 394
395 395 self.radix = radix
396 396
397 397 self._docket_file = None
398 398 self._indexfile = None
399 399 self._datafile = None
400 400 self._nodemap_file = None
401 401 self.postfix = postfix
402 402 self._trypending = trypending
403 403 self.opener = opener
404 404 if persistentnodemap:
405 405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406 406
407 407 assert target[0] in ALL_KINDS
408 408 assert len(target) == 2
409 409 self.target = target
410 410 # When True, indexfile is opened with checkambig=True at writing, to
411 411 # avoid file stat ambiguity.
412 412 self._checkambig = checkambig
413 413 self._mmaplargeindex = mmaplargeindex
414 414 self._censorable = censorable
415 415 # 3-tuple of (node, rev, text) for a raw revision.
416 416 self._revisioncache = None
417 417 # Maps rev to chain base rev.
418 418 self._chainbasecache = util.lrucachedict(100)
419 419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 420 self._chunkcache = (0, b'')
421 421 # How much data to read and cache into the raw revlog data cache.
422 422 self._chunkcachesize = 65536
423 423 self._maxchainlen = None
424 424 self._deltabothparents = True
425 425 self.index = None
426 426 self._docket = None
427 427 self._nodemap_docket = None
428 428 # Mapping of partial identifiers to full nodes.
429 429 self._pcache = {}
430 430 # Mapping of revision integer to full node.
431 431 self._compengine = b'zlib'
432 432 self._compengineopts = {}
433 433 self._maxdeltachainspan = -1
434 434 self._withsparseread = False
435 435 self._sparserevlog = False
436 436 self.hassidedata = False
437 437 self._srdensitythreshold = 0.50
438 438 self._srmingapsize = 262144
439 439
440 440 # Make copy of flag processors so each revlog instance can support
441 441 # custom flags.
442 442 self._flagprocessors = dict(flagutil.flagprocessors)
443 443
444 444 # 2-tuple of file handles being used for active writing.
445 445 self._writinghandles = None
446 446 # prevent nesting of addgroup
447 447 self._adding_group = None
448 448
449 449 self._loadindex()
450 450
451 451 self._concurrencychecker = concurrencychecker
452 452
453 453 def _init_opts(self):
454 454 """process options (from above/config) to setup associated default revlog mode
455 455
456 456 These values might be affected when actually reading on disk information.
457 457
458 458 The relevant values are returned for use in _loadindex().
459 459
460 460 * newversionflags:
461 461 version header to use if we need to create a new revlog
462 462
463 463 * mmapindexthreshold:
464 464 minimal index size for start to use mmap
465 465
466 466 * force_nodemap:
467 467 force the usage of a "development" version of the nodemap code
468 468 """
469 469 mmapindexthreshold = None
470 470 opts = self.opener.options
471 471
472 472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 473 new_header = CHANGELOGV2
474 474 elif b'revlogv2' in opts:
475 475 new_header = REVLOGV2
476 476 elif b'revlogv1' in opts:
477 477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 478 if b'generaldelta' in opts:
479 479 new_header |= FLAG_GENERALDELTA
480 480 elif b'revlogv0' in self.opener.options:
481 481 new_header = REVLOGV0
482 482 else:
483 483 new_header = REVLOG_DEFAULT_VERSION
484 484
485 485 if b'chunkcachesize' in opts:
486 486 self._chunkcachesize = opts[b'chunkcachesize']
487 487 if b'maxchainlen' in opts:
488 488 self._maxchainlen = opts[b'maxchainlen']
489 489 if b'deltabothparents' in opts:
490 490 self._deltabothparents = opts[b'deltabothparents']
491 491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 492 self._lazydeltabase = False
493 493 if self._lazydelta:
494 494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 495 if b'compengine' in opts:
496 496 self._compengine = opts[b'compengine']
497 497 if b'zlib.level' in opts:
498 498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 499 if b'zstd.level' in opts:
500 500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 501 if b'maxdeltachainspan' in opts:
502 502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 507 # sparse-revlog forces sparse-read
508 508 self._withsparseread = self._sparserevlog or withsparseread
509 509 if b'sparse-read-density-threshold' in opts:
510 510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 511 if b'sparse-read-min-gap-size' in opts:
512 512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 513 if opts.get(b'enableellipsis'):
514 514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515 515
516 516 # revlog v0 doesn't have flag processors
517 517 for flag, processor in pycompat.iteritems(
518 518 opts.get(b'flagprocessors', {})
519 519 ):
520 520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521 521
522 522 if self._chunkcachesize <= 0:
523 523 raise error.RevlogError(
524 524 _(b'revlog chunk cache size %r is not greater than 0')
525 525 % self._chunkcachesize
526 526 )
527 527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 528 raise error.RevlogError(
529 529 _(b'revlog chunk cache size %r is not a power of 2')
530 530 % self._chunkcachesize
531 531 )
532 532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 533 return new_header, mmapindexthreshold, force_nodemap
534 534
535 535 def _get_data(self, filepath, mmap_threshold, size=None):
536 536 """return a file content with or without mmap
537 537
538 538 If the file is missing return the empty string"""
539 539 try:
540 540 with self.opener(filepath) as fp:
541 541 if mmap_threshold is not None:
542 542 file_size = self.opener.fstat(fp).st_size
543 543 if file_size >= mmap_threshold:
544 544 if size is not None:
545 545 # avoid potentiel mmap crash
546 546 size = min(file_size, size)
547 547 # TODO: should .close() to release resources without
548 548 # relying on Python GC
549 549 if size is None:
550 550 return util.buffer(util.mmapread(fp))
551 551 else:
552 552 return util.buffer(util.mmapread(fp, size))
553 553 if size is None:
554 554 return fp.read()
555 555 else:
556 556 return fp.read(size)
557 557 except IOError as inst:
558 558 if inst.errno != errno.ENOENT:
559 559 raise
560 560 return b''
561 561
562 562 def _loadindex(self):
563 563
564 564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565 565
566 566 if self.postfix is not None:
567 567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 569 entry_point = b'%s.i.a' % self.radix
570 570 else:
571 571 entry_point = b'%s.i' % self.radix
572 572
573 573 entry_data = b''
574 574 self._initempty = True
575 575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 576 if len(entry_data) > 0:
577 577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 578 self._initempty = False
579 579 else:
580 580 header = new_header
581 581
582 582 self._format_flags = header & ~0xFFFF
583 583 self._format_version = header & 0xFFFF
584 584
585 585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 586 if supported_flags is None:
587 587 msg = _(b'unknown version (%d) in revlog %s')
588 588 msg %= (self._format_version, self.display_id)
589 589 raise error.RevlogError(msg)
590 590 elif self._format_flags & ~supported_flags:
591 591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 display_flag = self._format_flags >> 16
593 593 msg %= (display_flag, self._format_version, self.display_id)
594 594 raise error.RevlogError(msg)
595 595
596 596 features = FEATURES_BY_VERSION[self._format_version]
597 597 self._inline = features[b'inline'](self._format_flags)
598 598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 599 self.hassidedata = features[b'sidedata']
600 600
601 601 if not features[b'docket']:
602 602 self._indexfile = entry_point
603 603 index_data = entry_data
604 604 else:
605 605 self._docket_file = entry_point
606 606 if self._initempty:
607 607 self._docket = docketutil.default_docket(self, header)
608 608 else:
609 609 self._docket = docketutil.parse_docket(
610 610 self, entry_data, use_pending=self._trypending
611 611 )
612 612 self._indexfile = self._docket.index_filepath()
613 613 index_data = b''
614 614 index_size = self._docket.index_end
615 615 if index_size > 0:
616 616 index_data = self._get_data(
617 617 self._indexfile, mmapindexthreshold, size=index_size
618 618 )
619 619 if len(index_data) < index_size:
620 620 msg = _(b'too few index data for %s: got %d, expected %d')
621 621 msg %= (self.display_id, len(index_data), index_size)
622 622 raise error.RevlogError(msg)
623 623
624 624 self._inline = False
625 625 # generaldelta implied by version 2 revlogs.
626 626 self._generaldelta = True
627 627 # the logic for persistent nodemap will be dealt with within the
628 628 # main docket, so disable it for now.
629 629 self._nodemap_file = None
630 630
631 631 if self.postfix is None:
632 632 self._datafile = b'%s.d' % self.radix
633 633 else:
634 634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635 635
636 636 self.nodeconstants = sha1nodeconstants
637 637 self.nullid = self.nodeconstants.nullid
638 638
639 639 # sparse-revlog can't be on without general-delta (issue6056)
640 640 if not self._generaldelta:
641 641 self._sparserevlog = False
642 642
643 643 self._storedeltachains = True
644 644
645 645 devel_nodemap = (
646 646 self._nodemap_file
647 647 and force_nodemap
648 648 and parse_index_v1_nodemap is not None
649 649 )
650 650
651 651 use_rust_index = False
652 652 if rustrevlog is not None:
653 653 if self._nodemap_file is not None:
654 654 use_rust_index = True
655 655 else:
656 656 use_rust_index = self.opener.options.get(b'rust.index')
657 657
658 658 self._parse_index = parse_index_v1
659 659 if self._format_version == REVLOGV0:
660 660 self._parse_index = revlogv0.parse_index_v0
661 661 elif self._format_version == REVLOGV2:
662 662 self._parse_index = parse_index_v2
663 663 elif self._format_version == CHANGELOGV2:
664 664 self._parse_index = parse_index_cl_v2
665 665 elif devel_nodemap:
666 666 self._parse_index = parse_index_v1_nodemap
667 667 elif use_rust_index:
668 668 self._parse_index = parse_index_v1_mixed
669 669 try:
670 670 d = self._parse_index(index_data, self._inline)
671 671 index, _chunkcache = d
672 672 use_nodemap = (
673 673 not self._inline
674 674 and self._nodemap_file is not None
675 675 and util.safehasattr(index, 'update_nodemap_data')
676 676 )
677 677 if use_nodemap:
678 678 nodemap_data = nodemaputil.persisted_data(self)
679 679 if nodemap_data is not None:
680 680 docket = nodemap_data[0]
681 681 if (
682 682 len(d[0]) > docket.tip_rev
683 683 and d[0][docket.tip_rev][7] == docket.tip_node
684 684 ):
685 685 # no changelog tampering
686 686 self._nodemap_docket = docket
687 687 index.update_nodemap_data(*nodemap_data)
688 688 except (ValueError, IndexError):
689 689 raise error.RevlogError(
690 690 _(b"index %s is corrupted") % self.display_id
691 691 )
692 692 self.index, self._chunkcache = d
693 693 if not self._chunkcache:
694 694 self._chunkclear()
695 695 # revnum -> (chain-length, sum-delta-length)
696 696 self._chaininfocache = util.lrucachedict(500)
697 697 # revlog header -> revlog compressor
698 698 self._decompressors = {}
699 699
700 700 @util.propertycache
701 701 def revlog_kind(self):
702 702 return self.target[0]
703 703
704 704 @util.propertycache
705 705 def display_id(self):
706 706 """The public facing "ID" of the revlog that we use in message"""
707 707 # Maybe we should build a user facing representation of
708 708 # revlog.target instead of using `self.radix`
709 709 return self.radix
710 710
711 711 def _get_decompressor(self, t):
712 712 try:
713 713 compressor = self._decompressors[t]
714 714 except KeyError:
715 715 try:
716 716 engine = util.compengines.forrevlogheader(t)
717 717 compressor = engine.revlogcompressor(self._compengineopts)
718 718 self._decompressors[t] = compressor
719 719 except KeyError:
720 720 raise error.RevlogError(
721 721 _(b'unknown compression type %s') % binascii.hexlify(t)
722 722 )
723 723 return compressor
724 724
725 725 @util.propertycache
726 726 def _compressor(self):
727 727 engine = util.compengines[self._compengine]
728 728 return engine.revlogcompressor(self._compengineopts)
729 729
730 730 @util.propertycache
731 731 def _decompressor(self):
732 732 """the default decompressor"""
733 733 if self._docket is None:
734 734 return None
735 735 t = self._docket.default_compression_header
736 736 c = self._get_decompressor(t)
737 737 return c.decompress
738 738
739 739 def _indexfp(self):
740 740 """file object for the revlog's index file"""
741 741 return self.opener(self._indexfile, mode=b"r")
742 742
743 743 def __index_write_fp(self):
744 744 # You should not use this directly and use `_writing` instead
745 745 try:
746 746 f = self.opener(
747 747 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 748 )
749 749 if self._docket is None:
750 750 f.seek(0, os.SEEK_END)
751 751 else:
752 752 f.seek(self._docket.index_end, os.SEEK_SET)
753 753 return f
754 754 except IOError as inst:
755 755 if inst.errno != errno.ENOENT:
756 756 raise
757 757 return self.opener(
758 758 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 759 )
760 760
761 761 def __index_new_fp(self):
762 762 # You should not use this unless you are upgrading from inline revlog
763 763 return self.opener(
764 764 self._indexfile,
765 765 mode=b"w",
766 766 checkambig=self._checkambig,
767 767 atomictemp=True,
768 768 )
769 769
770 770 def _datafp(self, mode=b'r'):
771 771 """file object for the revlog's data file"""
772 772 return self.opener(self._datafile, mode=mode)
773 773
774 774 @contextlib.contextmanager
775 775 def _datareadfp(self, existingfp=None):
776 776 """file object suitable to read data"""
777 777 # Use explicit file handle, if given.
778 778 if existingfp is not None:
779 779 yield existingfp
780 780
781 781 # Use a file handle being actively used for writes, if available.
782 782 # There is some danger to doing this because reads will seek the
783 783 # file. However, _writeentry() performs a SEEK_END before all writes,
784 784 # so we should be safe.
785 785 elif self._writinghandles:
786 786 if self._inline:
787 787 yield self._writinghandles[0]
788 788 else:
789 789 yield self._writinghandles[1]
790 790
791 791 # Otherwise open a new file handle.
792 792 else:
793 793 if self._inline:
794 794 func = self._indexfp
795 795 else:
796 796 func = self._datafp
797 797 with func() as fp:
798 798 yield fp
799 799
800 800 def tiprev(self):
801 801 return len(self.index) - 1
802 802
803 803 def tip(self):
804 804 return self.node(self.tiprev())
805 805
806 806 def __contains__(self, rev):
807 807 return 0 <= rev < len(self)
808 808
809 809 def __len__(self):
810 810 return len(self.index)
811 811
812 812 def __iter__(self):
813 813 return iter(pycompat.xrange(len(self)))
814 814
815 815 def revs(self, start=0, stop=None):
816 816 """iterate over all rev in this revlog (from start to stop)"""
817 817 return storageutil.iterrevs(len(self), start=start, stop=stop)
818 818
819 819 @property
820 820 def nodemap(self):
821 821 msg = (
822 822 b"revlog.nodemap is deprecated, "
823 823 b"use revlog.index.[has_node|rev|get_rev]"
824 824 )
825 825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 826 return self.index.nodemap
827 827
828 828 @property
829 829 def _nodecache(self):
830 830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 832 return self.index.nodemap
833 833
834 834 def hasnode(self, node):
835 835 try:
836 836 self.rev(node)
837 837 return True
838 838 except KeyError:
839 839 return False
840 840
841 841 def candelta(self, baserev, rev):
842 842 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 843 # Disable delta if either rev requires a content-changing flag
844 844 # processor (ex. LFS). This is because such flag processor can alter
845 845 # the rawtext content that the delta will be based on, and two clients
846 846 # could have a same revlog node with different flags (i.e. different
847 847 # rawtext contents) and the delta could be incompatible.
848 848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 850 ):
851 851 return False
852 852 return True
853 853
854 854 def update_caches(self, transaction):
855 855 if self._nodemap_file is not None:
856 856 if transaction is None:
857 857 nodemaputil.update_persistent_nodemap(self)
858 858 else:
859 859 nodemaputil.setup_persistent_nodemap(transaction, self)
860 860
861 861 def clearcaches(self):
862 862 self._revisioncache = None
863 863 self._chainbasecache.clear()
864 864 self._chunkcache = (0, b'')
865 865 self._pcache = {}
866 866 self._nodemap_docket = None
867 867 self.index.clearcaches()
868 868 # The python code is the one responsible for validating the docket, we
869 869 # end up having to refresh it here.
870 870 use_nodemap = (
871 871 not self._inline
872 872 and self._nodemap_file is not None
873 873 and util.safehasattr(self.index, 'update_nodemap_data')
874 874 )
875 875 if use_nodemap:
876 876 nodemap_data = nodemaputil.persisted_data(self)
877 877 if nodemap_data is not None:
878 878 self._nodemap_docket = nodemap_data[0]
879 879 self.index.update_nodemap_data(*nodemap_data)
880 880
881 881 def rev(self, node):
882 882 try:
883 883 return self.index.rev(node)
884 884 except TypeError:
885 885 raise
886 886 except error.RevlogError:
887 887 # parsers.c radix tree lookup failed
888 888 if (
889 889 node == self.nodeconstants.wdirid
890 890 or node in self.nodeconstants.wdirfilenodeids
891 891 ):
892 892 raise error.WdirUnsupported
893 893 raise error.LookupError(node, self.display_id, _(b'no node'))
894 894
895 895 # Accessors for index entries.
896 896
897 897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 898 # are flags.
899 899 def start(self, rev):
900 900 return int(self.index[rev][0] >> 16)
901 901
902 902 def flags(self, rev):
903 903 return self.index[rev][0] & 0xFFFF
904 904
905 905 def length(self, rev):
906 906 return self.index[rev][1]
907 907
908 908 def sidedata_length(self, rev):
909 909 if not self.hassidedata:
910 910 return 0
911 911 return self.index[rev][9]
912 912
913 913 def rawsize(self, rev):
914 914 """return the length of the uncompressed text for a given revision"""
915 915 l = self.index[rev][2]
916 916 if l >= 0:
917 917 return l
918 918
919 919 t = self.rawdata(rev)
920 920 return len(t)
921 921
922 922 def size(self, rev):
923 923 """length of non-raw text (processed by a "read" flag processor)"""
924 924 # fast path: if no "read" flag processor could change the content,
925 925 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 926 flags = self.flags(rev)
927 927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 928 return self.rawsize(rev)
929 929
930 930 return len(self.revision(rev, raw=False))
931 931
932 932 def chainbase(self, rev):
933 933 base = self._chainbasecache.get(rev)
934 934 if base is not None:
935 935 return base
936 936
937 937 index = self.index
938 938 iterrev = rev
939 939 base = index[iterrev][3]
940 940 while base != iterrev:
941 941 iterrev = base
942 942 base = index[iterrev][3]
943 943
944 944 self._chainbasecache[rev] = base
945 945 return base
946 946
947 947 def linkrev(self, rev):
948 948 return self.index[rev][4]
949 949
950 950 def parentrevs(self, rev):
951 951 try:
952 952 entry = self.index[rev]
953 953 except IndexError:
954 954 if rev == wdirrev:
955 955 raise error.WdirUnsupported
956 956 raise
957 957 if entry[5] == nullrev:
958 958 return entry[6], entry[5]
959 959 else:
960 960 return entry[5], entry[6]
961 961
962 962 # fast parentrevs(rev) where rev isn't filtered
963 963 _uncheckedparentrevs = parentrevs
964 964
965 965 def node(self, rev):
966 966 try:
967 967 return self.index[rev][7]
968 968 except IndexError:
969 969 if rev == wdirrev:
970 970 raise error.WdirUnsupported
971 971 raise
972 972
973 973 # Derived from index values.
974 974
975 975 def end(self, rev):
976 976 return self.start(rev) + self.length(rev)
977 977
978 978 def parents(self, node):
979 979 i = self.index
980 980 d = i[self.rev(node)]
981 981 # inline node() to avoid function call overhead
982 982 if d[5] == self.nullid:
983 983 return i[d[6]][7], i[d[5]][7]
984 984 else:
985 985 return i[d[5]][7], i[d[6]][7]
986 986
987 987 def chainlen(self, rev):
988 988 return self._chaininfo(rev)[0]
989 989
990 990 def _chaininfo(self, rev):
991 991 chaininfocache = self._chaininfocache
992 992 if rev in chaininfocache:
993 993 return chaininfocache[rev]
994 994 index = self.index
995 995 generaldelta = self._generaldelta
996 996 iterrev = rev
997 997 e = index[iterrev]
998 998 clen = 0
999 999 compresseddeltalen = 0
1000 1000 while iterrev != e[3]:
1001 1001 clen += 1
1002 1002 compresseddeltalen += e[1]
1003 1003 if generaldelta:
1004 1004 iterrev = e[3]
1005 1005 else:
1006 1006 iterrev -= 1
1007 1007 if iterrev in chaininfocache:
1008 1008 t = chaininfocache[iterrev]
1009 1009 clen += t[0]
1010 1010 compresseddeltalen += t[1]
1011 1011 break
1012 1012 e = index[iterrev]
1013 1013 else:
1014 1014 # Add text length of base since decompressing that also takes
1015 1015 # work. For cache hits the length is already included.
1016 1016 compresseddeltalen += e[1]
1017 1017 r = (clen, compresseddeltalen)
1018 1018 chaininfocache[rev] = r
1019 1019 return r
1020 1020
1021 1021 def _deltachain(self, rev, stoprev=None):
1022 1022 """Obtain the delta chain for a revision.
1023 1023
1024 1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 1025 stop at the base of the chain.
1026 1026
1027 1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 1028 revs in ascending order and ``stopped`` is a bool indicating whether
1029 1029 ``stoprev`` was hit.
1030 1030 """
1031 1031 # Try C implementation.
1032 1032 try:
1033 1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 1034 except AttributeError:
1035 1035 pass
1036 1036
1037 1037 chain = []
1038 1038
1039 1039 # Alias to prevent attribute lookup in tight loop.
1040 1040 index = self.index
1041 1041 generaldelta = self._generaldelta
1042 1042
1043 1043 iterrev = rev
1044 1044 e = index[iterrev]
1045 1045 while iterrev != e[3] and iterrev != stoprev:
1046 1046 chain.append(iterrev)
1047 1047 if generaldelta:
1048 1048 iterrev = e[3]
1049 1049 else:
1050 1050 iterrev -= 1
1051 1051 e = index[iterrev]
1052 1052
1053 1053 if iterrev == stoprev:
1054 1054 stopped = True
1055 1055 else:
1056 1056 chain.append(iterrev)
1057 1057 stopped = False
1058 1058
1059 1059 chain.reverse()
1060 1060 return chain, stopped
1061 1061
1062 1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 1063 """Generate the ancestors of 'revs' in reverse revision order.
1064 1064 Does not generate revs lower than stoprev.
1065 1065
1066 1066 See the documentation for ancestor.lazyancestors for more details."""
1067 1067
1068 1068 # first, make sure start revisions aren't filtered
1069 1069 revs = list(revs)
1070 1070 checkrev = self.node
1071 1071 for r in revs:
1072 1072 checkrev(r)
1073 1073 # and we're sure ancestors aren't filtered as well
1074 1074
1075 1075 if rustancestor is not None and self.index.rust_ext_compat:
1076 1076 lazyancestors = rustancestor.LazyAncestors
1077 1077 arg = self.index
1078 1078 else:
1079 1079 lazyancestors = ancestor.lazyancestors
1080 1080 arg = self._uncheckedparentrevs
1081 1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082 1082
1083 1083 def descendants(self, revs):
1084 1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085 1085
1086 1086 def findcommonmissing(self, common=None, heads=None):
1087 1087 """Return a tuple of the ancestors of common and the ancestors of heads
1088 1088 that are not ancestors of common. In revset terminology, we return the
1089 1089 tuple:
1090 1090
1091 1091 ::common, (::heads) - (::common)
1092 1092
1093 1093 The list is sorted by revision number, meaning it is
1094 1094 topologically sorted.
1095 1095
1096 1096 'heads' and 'common' are both lists of node IDs. If heads is
1097 1097 not supplied, uses all of the revlog's heads. If common is not
1098 1098 supplied, uses nullid."""
1099 1099 if common is None:
1100 1100 common = [self.nullid]
1101 1101 if heads is None:
1102 1102 heads = self.heads()
1103 1103
1104 1104 common = [self.rev(n) for n in common]
1105 1105 heads = [self.rev(n) for n in heads]
1106 1106
1107 1107 # we want the ancestors, but inclusive
1108 1108 class lazyset(object):
1109 1109 def __init__(self, lazyvalues):
1110 1110 self.addedvalues = set()
1111 1111 self.lazyvalues = lazyvalues
1112 1112
1113 1113 def __contains__(self, value):
1114 1114 return value in self.addedvalues or value in self.lazyvalues
1115 1115
1116 1116 def __iter__(self):
1117 1117 added = self.addedvalues
1118 1118 for r in added:
1119 1119 yield r
1120 1120 for r in self.lazyvalues:
1121 1121 if not r in added:
1122 1122 yield r
1123 1123
1124 1124 def add(self, value):
1125 1125 self.addedvalues.add(value)
1126 1126
1127 1127 def update(self, values):
1128 1128 self.addedvalues.update(values)
1129 1129
1130 1130 has = lazyset(self.ancestors(common))
1131 1131 has.add(nullrev)
1132 1132 has.update(common)
1133 1133
1134 1134 # take all ancestors from heads that aren't in has
1135 1135 missing = set()
1136 1136 visit = collections.deque(r for r in heads if r not in has)
1137 1137 while visit:
1138 1138 r = visit.popleft()
1139 1139 if r in missing:
1140 1140 continue
1141 1141 else:
1142 1142 missing.add(r)
1143 1143 for p in self.parentrevs(r):
1144 1144 if p not in has:
1145 1145 visit.append(p)
1146 1146 missing = list(missing)
1147 1147 missing.sort()
1148 1148 return has, [self.node(miss) for miss in missing]
1149 1149
1150 1150 def incrementalmissingrevs(self, common=None):
1151 1151 """Return an object that can be used to incrementally compute the
1152 1152 revision numbers of the ancestors of arbitrary sets that are not
1153 1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 1154 object.
1155 1155
1156 1156 'common' is a list of revision numbers. If common is not supplied, uses
1157 1157 nullrev.
1158 1158 """
1159 1159 if common is None:
1160 1160 common = [nullrev]
1161 1161
1162 1162 if rustancestor is not None and self.index.rust_ext_compat:
1163 1163 return rustancestor.MissingAncestors(self.index, common)
1164 1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165 1165
1166 1166 def findmissingrevs(self, common=None, heads=None):
1167 1167 """Return the revision numbers of the ancestors of heads that
1168 1168 are not ancestors of common.
1169 1169
1170 1170 More specifically, return a list of revision numbers corresponding to
1171 1171 nodes N such that every N satisfies the following constraints:
1172 1172
1173 1173 1. N is an ancestor of some node in 'heads'
1174 1174 2. N is not an ancestor of any node in 'common'
1175 1175
1176 1176 The list is sorted by revision number, meaning it is
1177 1177 topologically sorted.
1178 1178
1179 1179 'heads' and 'common' are both lists of revision numbers. If heads is
1180 1180 not supplied, uses all of the revlog's heads. If common is not
1181 1181 supplied, uses nullid."""
1182 1182 if common is None:
1183 1183 common = [nullrev]
1184 1184 if heads is None:
1185 1185 heads = self.headrevs()
1186 1186
1187 1187 inc = self.incrementalmissingrevs(common=common)
1188 1188 return inc.missingancestors(heads)
1189 1189
1190 1190 def findmissing(self, common=None, heads=None):
1191 1191 """Return the ancestors of heads that are not ancestors of common.
1192 1192
1193 1193 More specifically, return a list of nodes N such that every N
1194 1194 satisfies the following constraints:
1195 1195
1196 1196 1. N is an ancestor of some node in 'heads'
1197 1197 2. N is not an ancestor of any node in 'common'
1198 1198
1199 1199 The list is sorted by revision number, meaning it is
1200 1200 topologically sorted.
1201 1201
1202 1202 'heads' and 'common' are both lists of node IDs. If heads is
1203 1203 not supplied, uses all of the revlog's heads. If common is not
1204 1204 supplied, uses nullid."""
1205 1205 if common is None:
1206 1206 common = [self.nullid]
1207 1207 if heads is None:
1208 1208 heads = self.heads()
1209 1209
1210 1210 common = [self.rev(n) for n in common]
1211 1211 heads = [self.rev(n) for n in heads]
1212 1212
1213 1213 inc = self.incrementalmissingrevs(common=common)
1214 1214 return [self.node(r) for r in inc.missingancestors(heads)]
1215 1215
1216 1216 def nodesbetween(self, roots=None, heads=None):
1217 1217 """Return a topological path from 'roots' to 'heads'.
1218 1218
1219 1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 1220 topologically sorted list of all nodes N that satisfy both of
1221 1221 these constraints:
1222 1222
1223 1223 1. N is a descendant of some node in 'roots'
1224 1224 2. N is an ancestor of some node in 'heads'
1225 1225
1226 1226 Every node is considered to be both a descendant and an ancestor
1227 1227 of itself, so every reachable node in 'roots' and 'heads' will be
1228 1228 included in 'nodes'.
1229 1229
1230 1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233 1233
1234 1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 1235 unspecified, uses nullid as the only root. If 'heads' is
1236 1236 unspecified, uses list of all of the revlog's heads."""
1237 1237 nonodes = ([], [], [])
1238 1238 if roots is not None:
1239 1239 roots = list(roots)
1240 1240 if not roots:
1241 1241 return nonodes
1242 1242 lowestrev = min([self.rev(n) for n in roots])
1243 1243 else:
1244 1244 roots = [self.nullid] # Everybody's a descendant of nullid
1245 1245 lowestrev = nullrev
1246 1246 if (lowestrev == nullrev) and (heads is None):
1247 1247 # We want _all_ the nodes!
1248 1248 return (
1249 1249 [self.node(r) for r in self],
1250 1250 [self.nullid],
1251 1251 list(self.heads()),
1252 1252 )
1253 1253 if heads is None:
1254 1254 # All nodes are ancestors, so the latest ancestor is the last
1255 1255 # node.
1256 1256 highestrev = len(self) - 1
1257 1257 # Set ancestors to None to signal that every node is an ancestor.
1258 1258 ancestors = None
1259 1259 # Set heads to an empty dictionary for later discovery of heads
1260 1260 heads = {}
1261 1261 else:
1262 1262 heads = list(heads)
1263 1263 if not heads:
1264 1264 return nonodes
1265 1265 ancestors = set()
1266 1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 1267 # Also, later we will be using it to filter out the heads we can't
1268 1268 # find from roots.
1269 1269 heads = dict.fromkeys(heads, False)
1270 1270 # Start at the top and keep marking parents until we're done.
1271 1271 nodestotag = set(heads)
1272 1272 # Remember where the top was so we can use it as a limit later.
1273 1273 highestrev = max([self.rev(n) for n in nodestotag])
1274 1274 while nodestotag:
1275 1275 # grab a node to tag
1276 1276 n = nodestotag.pop()
1277 1277 # Never tag nullid
1278 1278 if n == self.nullid:
1279 1279 continue
1280 1280 # A node's revision number represents its place in a
1281 1281 # topologically sorted list of nodes.
1282 1282 r = self.rev(n)
1283 1283 if r >= lowestrev:
1284 1284 if n not in ancestors:
1285 1285 # If we are possibly a descendant of one of the roots
1286 1286 # and we haven't already been marked as an ancestor
1287 1287 ancestors.add(n) # Mark as ancestor
1288 1288 # Add non-nullid parents to list of nodes to tag.
1289 1289 nodestotag.update(
1290 1290 [p for p in self.parents(n) if p != self.nullid]
1291 1291 )
1292 1292 elif n in heads: # We've seen it before, is it a fake head?
1293 1293 # So it is, real heads should not be the ancestors of
1294 1294 # any other heads.
1295 1295 heads.pop(n)
1296 1296 if not ancestors:
1297 1297 return nonodes
1298 1298 # Now that we have our set of ancestors, we want to remove any
1299 1299 # roots that are not ancestors.
1300 1300
1301 1301 # If one of the roots was nullid, everything is included anyway.
1302 1302 if lowestrev > nullrev:
1303 1303 # But, since we weren't, let's recompute the lowest rev to not
1304 1304 # include roots that aren't ancestors.
1305 1305
1306 1306 # Filter out roots that aren't ancestors of heads
1307 1307 roots = [root for root in roots if root in ancestors]
1308 1308 # Recompute the lowest revision
1309 1309 if roots:
1310 1310 lowestrev = min([self.rev(root) for root in roots])
1311 1311 else:
1312 1312 # No more roots? Return empty list
1313 1313 return nonodes
1314 1314 else:
1315 1315 # We are descending from nullid, and don't need to care about
1316 1316 # any other roots.
1317 1317 lowestrev = nullrev
1318 1318 roots = [self.nullid]
1319 1319 # Transform our roots list into a set.
1320 1320 descendants = set(roots)
1321 1321 # Also, keep the original roots so we can filter out roots that aren't
1322 1322 # 'real' roots (i.e. are descended from other roots).
1323 1323 roots = descendants.copy()
1324 1324 # Our topologically sorted list of output nodes.
1325 1325 orderedout = []
1326 1326 # Don't start at nullid since we don't want nullid in our output list,
1327 1327 # and if nullid shows up in descendants, empty parents will look like
1328 1328 # they're descendants.
1329 1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 1330 n = self.node(r)
1331 1331 isdescendant = False
1332 1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 1333 isdescendant = True
1334 1334 elif n in descendants:
1335 1335 # n is already a descendant
1336 1336 isdescendant = True
1337 1337 # This check only needs to be done here because all the roots
1338 1338 # will start being marked is descendants before the loop.
1339 1339 if n in roots:
1340 1340 # If n was a root, check if it's a 'real' root.
1341 1341 p = tuple(self.parents(n))
1342 1342 # If any of its parents are descendants, it's not a root.
1343 1343 if (p[0] in descendants) or (p[1] in descendants):
1344 1344 roots.remove(n)
1345 1345 else:
1346 1346 p = tuple(self.parents(n))
1347 1347 # A node is a descendant if either of its parents are
1348 1348 # descendants. (We seeded the dependents list with the roots
1349 1349 # up there, remember?)
1350 1350 if (p[0] in descendants) or (p[1] in descendants):
1351 1351 descendants.add(n)
1352 1352 isdescendant = True
1353 1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 1354 # Only include nodes that are both descendants and ancestors.
1355 1355 orderedout.append(n)
1356 1356 if (ancestors is not None) and (n in heads):
1357 1357 # We're trying to figure out which heads are reachable
1358 1358 # from roots.
1359 1359 # Mark this head as having been reached
1360 1360 heads[n] = True
1361 1361 elif ancestors is None:
1362 1362 # Otherwise, we're trying to discover the heads.
1363 1363 # Assume this is a head because if it isn't, the next step
1364 1364 # will eventually remove it.
1365 1365 heads[n] = True
1366 1366 # But, obviously its parents aren't.
1367 1367 for p in self.parents(n):
1368 1368 heads.pop(p, None)
1369 1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 1370 roots = list(roots)
1371 1371 assert orderedout
1372 1372 assert roots
1373 1373 assert heads
1374 1374 return (orderedout, roots, heads)
1375 1375
1376 1376 def headrevs(self, revs=None):
1377 1377 if revs is None:
1378 1378 try:
1379 1379 return self.index.headrevs()
1380 1380 except AttributeError:
1381 1381 return self._headrevs()
1382 1382 if rustdagop is not None and self.index.rust_ext_compat:
1383 1383 return rustdagop.headrevs(self.index, revs)
1384 1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385 1385
1386 1386 def computephases(self, roots):
1387 1387 return self.index.computephasesmapsets(roots)
1388 1388
1389 1389 def _headrevs(self):
1390 1390 count = len(self)
1391 1391 if not count:
1392 1392 return [nullrev]
1393 1393 # we won't iter over filtered rev so nobody is a head at start
1394 1394 ishead = [0] * (count + 1)
1395 1395 index = self.index
1396 1396 for r in self:
1397 1397 ishead[r] = 1 # I may be an head
1398 1398 e = index[r]
1399 1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 1400 return [r for r, val in enumerate(ishead) if val]
1401 1401
1402 1402 def heads(self, start=None, stop=None):
1403 1403 """return the list of all nodes that have no children
1404 1404
1405 1405 if start is specified, only heads that are descendants of
1406 1406 start will be returned
1407 1407 if stop is specified, it will consider all the revs from stop
1408 1408 as if they had no children
1409 1409 """
1410 1410 if start is None and stop is None:
1411 1411 if not len(self):
1412 1412 return [self.nullid]
1413 1413 return [self.node(r) for r in self.headrevs()]
1414 1414
1415 1415 if start is None:
1416 1416 start = nullrev
1417 1417 else:
1418 1418 start = self.rev(start)
1419 1419
1420 1420 stoprevs = {self.rev(n) for n in stop or []}
1421 1421
1422 1422 revs = dagop.headrevssubset(
1423 1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 1424 )
1425 1425
1426 1426 return [self.node(rev) for rev in revs]
1427 1427
1428 1428 def children(self, node):
1429 1429 """find the children of a given node"""
1430 1430 c = []
1431 1431 p = self.rev(node)
1432 1432 for r in self.revs(start=p + 1):
1433 1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 1434 if prevs:
1435 1435 for pr in prevs:
1436 1436 if pr == p:
1437 1437 c.append(self.node(r))
1438 1438 elif p == nullrev:
1439 1439 c.append(self.node(r))
1440 1440 return c
1441 1441
1442 1442 def commonancestorsheads(self, a, b):
1443 1443 """calculate all the heads of the common ancestors of nodes a and b"""
1444 1444 a, b = self.rev(a), self.rev(b)
1445 1445 ancs = self._commonancestorsheads(a, b)
1446 1446 return pycompat.maplist(self.node, ancs)
1447 1447
1448 1448 def _commonancestorsheads(self, *revs):
1449 1449 """calculate all the heads of the common ancestors of revs"""
1450 1450 try:
1451 1451 ancs = self.index.commonancestorsheads(*revs)
1452 1452 except (AttributeError, OverflowError): # C implementation failed
1453 1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 1454 return ancs
1455 1455
1456 1456 def isancestor(self, a, b):
1457 1457 """return True if node a is an ancestor of node b
1458 1458
1459 1459 A revision is considered an ancestor of itself."""
1460 1460 a, b = self.rev(a), self.rev(b)
1461 1461 return self.isancestorrev(a, b)
1462 1462
1463 1463 def isancestorrev(self, a, b):
1464 1464 """return True if revision a is an ancestor of revision b
1465 1465
1466 1466 A revision is considered an ancestor of itself.
1467 1467
1468 1468 The implementation of this is trivial but the use of
1469 1469 reachableroots is not."""
1470 1470 if a == nullrev:
1471 1471 return True
1472 1472 elif a == b:
1473 1473 return True
1474 1474 elif a > b:
1475 1475 return False
1476 1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477 1477
1478 1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 1479 """return (heads(::(<roots> and <roots>::<heads>)))
1480 1480
1481 1481 If includepath is True, return (<roots>::<heads>)."""
1482 1482 try:
1483 1483 return self.index.reachableroots2(
1484 1484 minroot, heads, roots, includepath
1485 1485 )
1486 1486 except AttributeError:
1487 1487 return dagop._reachablerootspure(
1488 1488 self.parentrevs, minroot, roots, heads, includepath
1489 1489 )
1490 1490
1491 1491 def ancestor(self, a, b):
1492 1492 """calculate the "best" common ancestor of nodes a and b"""
1493 1493
1494 1494 a, b = self.rev(a), self.rev(b)
1495 1495 try:
1496 1496 ancs = self.index.ancestors(a, b)
1497 1497 except (AttributeError, OverflowError):
1498 1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 1499 if ancs:
1500 1500 # choose a consistent winner when there's a tie
1501 1501 return min(map(self.node, ancs))
1502 1502 return self.nullid
1503 1503
1504 1504 def _match(self, id):
1505 1505 if isinstance(id, int):
1506 1506 # rev
1507 1507 return self.node(id)
1508 1508 if len(id) == self.nodeconstants.nodelen:
1509 1509 # possibly a binary node
1510 1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 1511 try:
1512 1512 node = id
1513 1513 self.rev(node) # quick search the index
1514 1514 return node
1515 1515 except error.LookupError:
1516 1516 pass # may be partial hex id
1517 1517 try:
1518 1518 # str(rev)
1519 1519 rev = int(id)
1520 1520 if b"%d" % rev != id:
1521 1521 raise ValueError
1522 1522 if rev < 0:
1523 1523 rev = len(self) + rev
1524 1524 if rev < 0 or rev >= len(self):
1525 1525 raise ValueError
1526 1526 return self.node(rev)
1527 1527 except (ValueError, OverflowError):
1528 1528 pass
1529 1529 if len(id) == 2 * self.nodeconstants.nodelen:
1530 1530 try:
1531 1531 # a full hex nodeid?
1532 1532 node = bin(id)
1533 1533 self.rev(node)
1534 1534 return node
1535 1535 except (TypeError, error.LookupError):
1536 1536 pass
1537 1537
1538 1538 def _partialmatch(self, id):
1539 1539 # we don't care wdirfilenodeids as they should be always full hash
1540 1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 1541 try:
1542 1542 partial = self.index.partialmatch(id)
1543 1543 if partial and self.hasnode(partial):
1544 1544 if maybewdir:
1545 1545 # single 'ff...' match in radix tree, ambiguous with wdir
1546 1546 raise error.RevlogError
1547 1547 return partial
1548 1548 if maybewdir:
1549 1549 # no 'ff...' match in radix tree, wdir identified
1550 1550 raise error.WdirUnsupported
1551 1551 return None
1552 1552 except error.RevlogError:
1553 1553 # parsers.c radix tree lookup gave multiple matches
1554 1554 # fast path: for unfiltered changelog, radix tree is accurate
1555 1555 if not getattr(self, 'filteredrevs', None):
1556 1556 raise error.AmbiguousPrefixLookupError(
1557 1557 id, self.display_id, _(b'ambiguous identifier')
1558 1558 )
1559 1559 # fall through to slow path that filters hidden revisions
1560 1560 except (AttributeError, ValueError):
1561 1561 # we are pure python, or key was too short to search radix tree
1562 1562 pass
1563 1563
1564 1564 if id in self._pcache:
1565 1565 return self._pcache[id]
1566 1566
1567 1567 if len(id) <= 40:
1568 1568 try:
1569 1569 # hex(node)[:...]
1570 1570 l = len(id) // 2 # grab an even number of digits
1571 1571 prefix = bin(id[: l * 2])
1572 1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1573 1573 nl = [
1574 1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1575 1575 ]
1576 1576 if self.nodeconstants.nullhex.startswith(id):
1577 1577 nl.append(self.nullid)
1578 1578 if len(nl) > 0:
1579 1579 if len(nl) == 1 and not maybewdir:
1580 1580 self._pcache[id] = nl[0]
1581 1581 return nl[0]
1582 1582 raise error.AmbiguousPrefixLookupError(
1583 1583 id, self.display_id, _(b'ambiguous identifier')
1584 1584 )
1585 1585 if maybewdir:
1586 1586 raise error.WdirUnsupported
1587 1587 return None
1588 1588 except TypeError:
1589 1589 pass
1590 1590
1591 1591 def lookup(self, id):
1592 1592 """locate a node based on:
1593 1593 - revision number or str(revision number)
1594 1594 - nodeid or subset of hex nodeid
1595 1595 """
1596 1596 n = self._match(id)
1597 1597 if n is not None:
1598 1598 return n
1599 1599 n = self._partialmatch(id)
1600 1600 if n:
1601 1601 return n
1602 1602
1603 1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1604 1604
1605 1605 def shortest(self, node, minlength=1):
1606 1606 """Find the shortest unambiguous prefix that matches node."""
1607 1607
1608 1608 def isvalid(prefix):
1609 1609 try:
1610 1610 matchednode = self._partialmatch(prefix)
1611 1611 except error.AmbiguousPrefixLookupError:
1612 1612 return False
1613 1613 except error.WdirUnsupported:
1614 1614 # single 'ff...' match
1615 1615 return True
1616 1616 if matchednode is None:
1617 1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1618 1618 return True
1619 1619
1620 1620 def maybewdir(prefix):
1621 1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1622 1622
1623 1623 hexnode = hex(node)
1624 1624
1625 1625 def disambiguate(hexnode, minlength):
1626 1626 """Disambiguate against wdirid."""
1627 1627 for length in range(minlength, len(hexnode) + 1):
1628 1628 prefix = hexnode[:length]
1629 1629 if not maybewdir(prefix):
1630 1630 return prefix
1631 1631
1632 1632 if not getattr(self, 'filteredrevs', None):
1633 1633 try:
1634 1634 length = max(self.index.shortest(node), minlength)
1635 1635 return disambiguate(hexnode, length)
1636 1636 except error.RevlogError:
1637 1637 if node != self.nodeconstants.wdirid:
1638 1638 raise error.LookupError(
1639 1639 node, self.display_id, _(b'no node')
1640 1640 )
1641 1641 except AttributeError:
1642 1642 # Fall through to pure code
1643 1643 pass
1644 1644
1645 1645 if node == self.nodeconstants.wdirid:
1646 1646 for length in range(minlength, len(hexnode) + 1):
1647 1647 prefix = hexnode[:length]
1648 1648 if isvalid(prefix):
1649 1649 return prefix
1650 1650
1651 1651 for length in range(minlength, len(hexnode) + 1):
1652 1652 prefix = hexnode[:length]
1653 1653 if isvalid(prefix):
1654 1654 return disambiguate(hexnode, length)
1655 1655
1656 1656 def cmp(self, node, text):
1657 1657 """compare text with a given file revision
1658 1658
1659 1659 returns True if text is different than what is stored.
1660 1660 """
1661 1661 p1, p2 = self.parents(node)
1662 1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1663 1663
1664 1664 def _cachesegment(self, offset, data):
1665 1665 """Add a segment to the revlog cache.
1666 1666
1667 1667 Accepts an absolute offset and the data that is at that location.
1668 1668 """
1669 1669 o, d = self._chunkcache
1670 1670 # try to add to existing cache
1671 1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 1672 self._chunkcache = o, d + data
1673 1673 else:
1674 1674 self._chunkcache = offset, data
1675 1675
1676 1676 def _readsegment(self, offset, length, df=None):
1677 1677 """Load a segment of raw data from the revlog.
1678 1678
1679 1679 Accepts an absolute offset, length to read, and an optional existing
1680 1680 file handle to read from.
1681 1681
1682 1682 If an existing file handle is passed, it will be seeked and the
1683 1683 original seek position will NOT be restored.
1684 1684
1685 1685 Returns a str or buffer of raw byte data.
1686 1686
1687 1687 Raises if the requested number of bytes could not be read.
1688 1688 """
1689 1689 # Cache data both forward and backward around the requested
1690 1690 # data, in a fixed size window. This helps speed up operations
1691 1691 # involving reading the revlog backwards.
1692 1692 cachesize = self._chunkcachesize
1693 1693 realoffset = offset & ~(cachesize - 1)
1694 1694 reallength = (
1695 1695 (offset + length + cachesize) & ~(cachesize - 1)
1696 1696 ) - realoffset
1697 1697 with self._datareadfp(df) as df:
1698 1698 df.seek(realoffset)
1699 1699 d = df.read(reallength)
1700 1700
1701 1701 self._cachesegment(realoffset, d)
1702 1702 if offset != realoffset or reallength != length:
1703 1703 startoffset = offset - realoffset
1704 1704 if len(d) - startoffset < length:
1705 1705 raise error.RevlogError(
1706 1706 _(
1707 1707 b'partial read of revlog %s; expected %d bytes from '
1708 1708 b'offset %d, got %d'
1709 1709 )
1710 1710 % (
1711 1711 self._indexfile if self._inline else self._datafile,
1712 1712 length,
1713 1713 offset,
1714 1714 len(d) - startoffset,
1715 1715 )
1716 1716 )
1717 1717
1718 1718 return util.buffer(d, startoffset, length)
1719 1719
1720 1720 if len(d) < length:
1721 1721 raise error.RevlogError(
1722 1722 _(
1723 1723 b'partial read of revlog %s; expected %d bytes from offset '
1724 1724 b'%d, got %d'
1725 1725 )
1726 1726 % (
1727 1727 self._indexfile if self._inline else self._datafile,
1728 1728 length,
1729 1729 offset,
1730 1730 len(d),
1731 1731 )
1732 1732 )
1733 1733
1734 1734 return d
1735 1735
1736 1736 def _getsegment(self, offset, length, df=None):
1737 1737 """Obtain a segment of raw data from the revlog.
1738 1738
1739 1739 Accepts an absolute offset, length of bytes to obtain, and an
1740 1740 optional file handle to the already-opened revlog. If the file
1741 1741 handle is used, it's original seek position will not be preserved.
1742 1742
1743 1743 Requests for data may be returned from a cache.
1744 1744
1745 1745 Returns a str or a buffer instance of raw byte data.
1746 1746 """
1747 1747 o, d = self._chunkcache
1748 1748 l = len(d)
1749 1749
1750 1750 # is it in the cache?
1751 1751 cachestart = offset - o
1752 1752 cacheend = cachestart + length
1753 1753 if cachestart >= 0 and cacheend <= l:
1754 1754 if cachestart == 0 and cacheend == l:
1755 1755 return d # avoid a copy
1756 1756 return util.buffer(d, cachestart, cacheend - cachestart)
1757 1757
1758 1758 return self._readsegment(offset, length, df=df)
1759 1759
1760 1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1761 1761 """Obtain a segment of raw data corresponding to a range of revisions.
1762 1762
1763 1763 Accepts the start and end revisions and an optional already-open
1764 1764 file handle to be used for reading. If the file handle is read, its
1765 1765 seek position will not be preserved.
1766 1766
1767 1767 Requests for data may be satisfied by a cache.
1768 1768
1769 1769 Returns a 2-tuple of (offset, data) for the requested range of
1770 1770 revisions. Offset is the integer offset from the beginning of the
1771 1771 revlog and data is a str or buffer of the raw byte data.
1772 1772
1773 1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1774 1774 to determine where each revision's data begins and ends.
1775 1775 """
1776 1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1777 1777 # (functions are expensive).
1778 1778 index = self.index
1779 1779 istart = index[startrev]
1780 1780 start = int(istart[0] >> 16)
1781 1781 if startrev == endrev:
1782 1782 end = start + istart[1]
1783 1783 else:
1784 1784 iend = index[endrev]
1785 1785 end = int(iend[0] >> 16) + iend[1]
1786 1786
1787 1787 if self._inline:
1788 1788 start += (startrev + 1) * self.index.entry_size
1789 1789 end += (endrev + 1) * self.index.entry_size
1790 1790 length = end - start
1791 1791
1792 1792 return start, self._getsegment(start, length, df=df)
1793 1793
1794 1794 def _chunk(self, rev, df=None):
1795 1795 """Obtain a single decompressed chunk for a revision.
1796 1796
1797 1797 Accepts an integer revision and an optional already-open file handle
1798 1798 to be used for reading. If used, the seek position of the file will not
1799 1799 be preserved.
1800 1800
1801 1801 Returns a str holding uncompressed data for the requested revision.
1802 1802 """
1803 1803 compression_mode = self.index[rev][10]
1804 1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1805 1805 if compression_mode == COMP_MODE_PLAIN:
1806 1806 return data
1807 1807 elif compression_mode == COMP_MODE_DEFAULT:
1808 1808 return self._decompressor(data)
1809 1809 elif compression_mode == COMP_MODE_INLINE:
1810 1810 return self.decompress(data)
1811 1811 else:
1812 1812 msg = 'unknown compression mode %d'
1813 1813 msg %= compression_mode
1814 1814 raise error.RevlogError(msg)
1815 1815
1816 1816 def _chunks(self, revs, df=None, targetsize=None):
1817 1817 """Obtain decompressed chunks for the specified revisions.
1818 1818
1819 1819 Accepts an iterable of numeric revisions that are assumed to be in
1820 1820 ascending order. Also accepts an optional already-open file handle
1821 1821 to be used for reading. If used, the seek position of the file will
1822 1822 not be preserved.
1823 1823
1824 1824 This function is similar to calling ``self._chunk()`` multiple times,
1825 1825 but is faster.
1826 1826
1827 1827 Returns a list with decompressed data for each requested revision.
1828 1828 """
1829 1829 if not revs:
1830 1830 return []
1831 1831 start = self.start
1832 1832 length = self.length
1833 1833 inline = self._inline
1834 1834 iosize = self.index.entry_size
1835 1835 buffer = util.buffer
1836 1836
1837 1837 l = []
1838 1838 ladd = l.append
1839 1839
1840 1840 if not self._withsparseread:
1841 1841 slicedchunks = (revs,)
1842 1842 else:
1843 1843 slicedchunks = deltautil.slicechunk(
1844 1844 self, revs, targetsize=targetsize
1845 1845 )
1846 1846
1847 1847 for revschunk in slicedchunks:
1848 1848 firstrev = revschunk[0]
1849 1849 # Skip trailing revisions with empty diff
1850 1850 for lastrev in revschunk[::-1]:
1851 1851 if length(lastrev) != 0:
1852 1852 break
1853 1853
1854 1854 try:
1855 1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1856 1856 except OverflowError:
1857 1857 # issue4215 - we can't cache a run of chunks greater than
1858 1858 # 2G on Windows
1859 1859 return [self._chunk(rev, df=df) for rev in revschunk]
1860 1860
1861 1861 decomp = self.decompress
1862 1862 # self._decompressor might be None, but will not be used in that case
1863 1863 def_decomp = self._decompressor
1864 1864 for rev in revschunk:
1865 1865 chunkstart = start(rev)
1866 1866 if inline:
1867 1867 chunkstart += (rev + 1) * iosize
1868 1868 chunklength = length(rev)
1869 1869 comp_mode = self.index[rev][10]
1870 1870 c = buffer(data, chunkstart - offset, chunklength)
1871 1871 if comp_mode == COMP_MODE_PLAIN:
1872 1872 ladd(c)
1873 1873 elif comp_mode == COMP_MODE_INLINE:
1874 1874 ladd(decomp(c))
1875 1875 elif comp_mode == COMP_MODE_DEFAULT:
1876 1876 ladd(def_decomp(c))
1877 1877 else:
1878 1878 msg = 'unknown compression mode %d'
1879 1879 msg %= comp_mode
1880 1880 raise error.RevlogError(msg)
1881 1881
1882 1882 return l
1883 1883
1884 1884 def _chunkclear(self):
1885 1885 """Clear the raw chunk cache."""
1886 1886 self._chunkcache = (0, b'')
1887 1887
1888 1888 def deltaparent(self, rev):
1889 1889 """return deltaparent of the given revision"""
1890 1890 base = self.index[rev][3]
1891 1891 if base == rev:
1892 1892 return nullrev
1893 1893 elif self._generaldelta:
1894 1894 return base
1895 1895 else:
1896 1896 return rev - 1
1897 1897
1898 1898 def issnapshot(self, rev):
1899 1899 """tells whether rev is a snapshot"""
1900 1900 if not self._sparserevlog:
1901 1901 return self.deltaparent(rev) == nullrev
1902 1902 elif util.safehasattr(self.index, b'issnapshot'):
1903 1903 # directly assign the method to cache the testing and access
1904 1904 self.issnapshot = self.index.issnapshot
1905 1905 return self.issnapshot(rev)
1906 1906 if rev == nullrev:
1907 1907 return True
1908 1908 entry = self.index[rev]
1909 1909 base = entry[3]
1910 1910 if base == rev:
1911 1911 return True
1912 1912 if base == nullrev:
1913 1913 return True
1914 1914 p1 = entry[5]
1915 1915 p2 = entry[6]
1916 1916 if base == p1 or base == p2:
1917 1917 return False
1918 1918 return self.issnapshot(base)
1919 1919
1920 1920 def snapshotdepth(self, rev):
1921 1921 """number of snapshot in the chain before this one"""
1922 1922 if not self.issnapshot(rev):
1923 1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 1924 return len(self._deltachain(rev)[0]) - 1
1925 1925
1926 1926 def revdiff(self, rev1, rev2):
1927 1927 """return or calculate a delta between two revisions
1928 1928
1929 1929 The delta calculated is in binary form and is intended to be written to
1930 1930 revlog data directly. So this function needs raw revision data.
1931 1931 """
1932 1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 1933 return bytes(self._chunk(rev2))
1934 1934
1935 1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936 1936
1937 1937 def _processflags(self, text, flags, operation, raw=False):
1938 1938 """deprecated entry point to access flag processors"""
1939 1939 msg = b'_processflag(...) use the specialized variant'
1940 1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1941 1941 if raw:
1942 1942 return text, flagutil.processflagsraw(self, text, flags)
1943 1943 elif operation == b'read':
1944 1944 return flagutil.processflagsread(self, text, flags)
1945 1945 else: # write operation
1946 1946 return flagutil.processflagswrite(self, text, flags)
1947 1947
1948 1948 def revision(self, nodeorrev, _df=None, raw=False):
1949 1949 """return an uncompressed revision of a given node or revision
1950 1950 number.
1951 1951
1952 1952 _df - an existing file handle to read from. (internal-only)
1953 1953 raw - an optional argument specifying if the revision data is to be
1954 1954 treated as raw data when applying flag transforms. 'raw' should be set
1955 1955 to True when generating changegroups or in debug commands.
1956 1956 """
1957 1957 if raw:
1958 1958 msg = (
1959 1959 b'revlog.revision(..., raw=True) is deprecated, '
1960 1960 b'use revlog.rawdata(...)'
1961 1961 )
1962 1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1963 1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1964 1964
1965 1965 def sidedata(self, nodeorrev, _df=None):
1966 1966 """a map of extra data related to the changeset but not part of the hash
1967 1967
1968 1968 This function currently return a dictionary. However, more advanced
1969 1969 mapping object will likely be used in the future for a more
1970 1970 efficient/lazy code.
1971 1971 """
1972 1972 return self._revisiondata(nodeorrev, _df)[1]
1973 1973
1974 1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1975 1975 # deal with <nodeorrev> argument type
1976 1976 if isinstance(nodeorrev, int):
1977 1977 rev = nodeorrev
1978 1978 node = self.node(rev)
1979 1979 else:
1980 1980 node = nodeorrev
1981 1981 rev = None
1982 1982
1983 1983 # fast path the special `nullid` rev
1984 1984 if node == self.nullid:
1985 1985 return b"", {}
1986 1986
1987 1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1988 1988 # revision or might need to be processed to retrieve the revision.
1989 1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1990 1990
1991 1991 if self.hassidedata:
1992 1992 if rev is None:
1993 1993 rev = self.rev(node)
1994 1994 sidedata = self._sidedata(rev)
1995 1995 else:
1996 1996 sidedata = {}
1997 1997
1998 1998 if raw and validated:
1999 1999 # if we don't want to process the raw text and that raw
2000 2000 # text is cached, we can exit early.
2001 2001 return rawtext, sidedata
2002 2002 if rev is None:
2003 2003 rev = self.rev(node)
2004 2004 # the revlog's flag for this revision
2005 2005 # (usually alter its state or content)
2006 2006 flags = self.flags(rev)
2007 2007
2008 2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2009 2009 # no extra flags set, no flag processor runs, text = rawtext
2010 2010 return rawtext, sidedata
2011 2011
2012 2012 if raw:
2013 2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2014 2014 text = rawtext
2015 2015 else:
2016 2016 r = flagutil.processflagsread(self, rawtext, flags)
2017 2017 text, validatehash = r
2018 2018 if validatehash:
2019 2019 self.checkhash(text, node, rev=rev)
2020 2020 if not validated:
2021 2021 self._revisioncache = (node, rev, rawtext)
2022 2022
2023 2023 return text, sidedata
2024 2024
2025 2025 def _rawtext(self, node, rev, _df=None):
2026 2026 """return the possibly unvalidated rawtext for a revision
2027 2027
2028 2028 returns (rev, rawtext, validated)
2029 2029 """
2030 2030
2031 2031 # revision in the cache (could be useful to apply delta)
2032 2032 cachedrev = None
2033 2033 # An intermediate text to apply deltas to
2034 2034 basetext = None
2035 2035
2036 2036 # Check if we have the entry in cache
2037 2037 # The cache entry looks like (node, rev, rawtext)
2038 2038 if self._revisioncache:
2039 2039 if self._revisioncache[0] == node:
2040 2040 return (rev, self._revisioncache[2], True)
2041 2041 cachedrev = self._revisioncache[1]
2042 2042
2043 2043 if rev is None:
2044 2044 rev = self.rev(node)
2045 2045
2046 2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2047 2047 if stopped:
2048 2048 basetext = self._revisioncache[2]
2049 2049
2050 2050 # drop cache to save memory, the caller is expected to
2051 2051 # update self._revisioncache after validating the text
2052 2052 self._revisioncache = None
2053 2053
2054 2054 targetsize = None
2055 2055 rawsize = self.index[rev][2]
2056 2056 if 0 <= rawsize:
2057 2057 targetsize = 4 * rawsize
2058 2058
2059 2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2060 2060 if basetext is None:
2061 2061 basetext = bytes(bins[0])
2062 2062 bins = bins[1:]
2063 2063
2064 2064 rawtext = mdiff.patches(basetext, bins)
2065 2065 del basetext # let us have a chance to free memory early
2066 2066 return (rev, rawtext, False)
2067 2067
2068 2068 def _sidedata(self, rev):
2069 2069 """Return the sidedata for a given revision number."""
2070 2070 index_entry = self.index[rev]
2071 2071 sidedata_offset = index_entry[8]
2072 2072 sidedata_size = index_entry[9]
2073 2073
2074 2074 if self._inline:
2075 2075 sidedata_offset += self.index.entry_size * (1 + rev)
2076 2076 if sidedata_size == 0:
2077 2077 return {}
2078 2078
2079 2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2080 2080 comp = self.index[rev][11]
2081 2081 if comp == COMP_MODE_PLAIN:
2082 2082 segment = comp_segment
2083 2083 elif comp == COMP_MODE_DEFAULT:
2084 2084 segment = self._decompressor(comp_segment)
2085 2085 elif comp == COMP_MODE_INLINE:
2086 2086 segment = self.decompress(comp_segment)
2087 2087 else:
2088 2088 msg = 'unknown compression mode %d'
2089 2089 msg %= comp
2090 2090 raise error.RevlogError(msg)
2091 2091
2092 2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2093 2093 return sidedata
2094 2094
2095 2095 def rawdata(self, nodeorrev, _df=None):
2096 2096 """return an uncompressed raw data of a given node or revision number.
2097 2097
2098 2098 _df - an existing file handle to read from. (internal-only)
2099 2099 """
2100 2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2101 2101
2102 2102 def hash(self, text, p1, p2):
2103 2103 """Compute a node hash.
2104 2104
2105 2105 Available as a function so that subclasses can replace the hash
2106 2106 as needed.
2107 2107 """
2108 2108 return storageutil.hashrevisionsha1(text, p1, p2)
2109 2109
2110 2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2111 2111 """Check node hash integrity.
2112 2112
2113 2113 Available as a function so that subclasses can extend hash mismatch
2114 2114 behaviors as needed.
2115 2115 """
2116 2116 try:
2117 2117 if p1 is None and p2 is None:
2118 2118 p1, p2 = self.parents(node)
2119 2119 if node != self.hash(text, p1, p2):
2120 2120 # Clear the revision cache on hash failure. The revision cache
2121 2121 # only stores the raw revision and clearing the cache does have
2122 2122 # the side-effect that we won't have a cache hit when the raw
2123 2123 # revision data is accessed. But this case should be rare and
2124 2124 # it is extra work to teach the cache about the hash
2125 2125 # verification state.
2126 2126 if self._revisioncache and self._revisioncache[0] == node:
2127 2127 self._revisioncache = None
2128 2128
2129 2129 revornode = rev
2130 2130 if revornode is None:
2131 2131 revornode = templatefilters.short(hex(node))
2132 2132 raise error.RevlogError(
2133 2133 _(b"integrity check failed on %s:%s")
2134 2134 % (self.display_id, pycompat.bytestr(revornode))
2135 2135 )
2136 2136 except error.RevlogError:
2137 2137 if self._censorable and storageutil.iscensoredtext(text):
2138 2138 raise error.CensoredNodeError(self.display_id, node, text)
2139 2139 raise
2140 2140
2141 2141 def _enforceinlinesize(self, tr):
2142 2142 """Check if the revlog is too big for inline and convert if so.
2143 2143
2144 2144 This should be called after revisions are added to the revlog. If the
2145 2145 revlog has grown too large to be an inline revlog, it will convert it
2146 2146 to use multiple index and data files.
2147 2147 """
2148 2148 tiprev = len(self) - 1
2149 2149 total_size = self.start(tiprev) + self.length(tiprev)
2150 2150 if not self._inline or total_size < _maxinline:
2151 2151 return
2152 2152
2153 2153 troffset = tr.findoffset(self._indexfile)
2154 2154 if troffset is None:
2155 2155 raise error.RevlogError(
2156 2156 _(b"%s not found in the transaction") % self._indexfile
2157 2157 )
2158 2158 trindex = 0
2159 2159 tr.add(self._datafile, 0)
2160 2160
2161 2161 existing_handles = False
2162 2162 if self._writinghandles is not None:
2163 2163 existing_handles = True
2164 2164 fp = self._writinghandles[0]
2165 2165 fp.flush()
2166 2166 fp.close()
2167 2167 # We can't use the cached file handle after close(). So prevent
2168 2168 # its usage.
2169 2169 self._writinghandles = None
2170 2170
2171 2171 new_dfh = self._datafp(b'w+')
2172 2172 new_dfh.truncate(0) # drop any potentially existing data
2173 2173 try:
2174 2174 with self._indexfp() as read_ifh:
2175 2175 for r in self:
2176 2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2177 if troffset <= self.start(r):
2177 if troffset <= self.start(r) + r * self.index.entry_size:
2178 2178 trindex = r
2179 2179 new_dfh.flush()
2180 2180
2181 2181 with self.__index_new_fp() as fp:
2182 2182 self._format_flags &= ~FLAG_INLINE_DATA
2183 2183 self._inline = False
2184 2184 for i in self:
2185 2185 e = self.index.entry_binary(i)
2186 2186 if i == 0 and self._docket is None:
2187 2187 header = self._format_flags | self._format_version
2188 2188 header = self.index.pack_header(header)
2189 2189 e = header + e
2190 2190 fp.write(e)
2191 2191 if self._docket is not None:
2192 2192 self._docket.index_end = fp.tell()
2193 2193 # the temp file replace the real index when we exit the context
2194 2194 # manager
2195 2195
2196 2196 tr.replace(self._indexfile, trindex * self.index.entry_size)
2197 2197 nodemaputil.setup_persistent_nodemap(tr, self)
2198 2198 self._chunkclear()
2199 2199
2200 2200 if existing_handles:
2201 2201 # switched from inline to conventional reopen the index
2202 2202 ifh = self.__index_write_fp()
2203 2203 self._writinghandles = (ifh, new_dfh)
2204 2204 new_dfh = None
2205 2205 finally:
2206 2206 if new_dfh is not None:
2207 2207 new_dfh.close()
2208 2208
2209 2209 def _nodeduplicatecallback(self, transaction, node):
2210 2210 """called when trying to add a node already stored."""
2211 2211
2212 2212 @contextlib.contextmanager
2213 2213 def _writing(self, transaction):
2214 2214 if self._trypending:
2215 2215 msg = b'try to write in a `trypending` revlog: %s'
2216 2216 msg %= self.display_id
2217 2217 raise error.ProgrammingError(msg)
2218 2218 if self._writinghandles is not None:
2219 2219 yield
2220 2220 else:
2221 2221 r = len(self)
2222 2222 dsize = 0
2223 2223 if r:
2224 2224 dsize = self.end(r - 1)
2225 2225 dfh = None
2226 2226 if not self._inline:
2227 2227 try:
2228 2228 dfh = self._datafp(b"r+")
2229 2229 if self._docket is None:
2230 2230 dfh.seek(0, os.SEEK_END)
2231 2231 else:
2232 2232 dfh.seek(self._docket.data_end, os.SEEK_SET)
2233 2233 except IOError as inst:
2234 2234 if inst.errno != errno.ENOENT:
2235 2235 raise
2236 2236 dfh = self._datafp(b"w+")
2237 2237 transaction.add(self._datafile, dsize)
2238 2238 try:
2239 2239 isize = r * self.index.entry_size
2240 2240 ifh = self.__index_write_fp()
2241 2241 if self._inline:
2242 2242 transaction.add(self._indexfile, dsize + isize)
2243 2243 else:
2244 2244 transaction.add(self._indexfile, isize)
2245 2245 try:
2246 2246 self._writinghandles = (ifh, dfh)
2247 2247 try:
2248 2248 yield
2249 2249 if self._docket is not None:
2250 2250 self._write_docket(transaction)
2251 2251 finally:
2252 2252 self._writinghandles = None
2253 2253 finally:
2254 2254 ifh.close()
2255 2255 finally:
2256 2256 if dfh is not None:
2257 2257 dfh.close()
2258 2258
2259 2259 def _write_docket(self, transaction):
2260 2260 """write the current docket on disk
2261 2261
2262 2262 Exist as a method to help changelog to implement transaction logic
2263 2263
2264 2264 We could also imagine using the same transaction logic for all revlog
2265 2265 since docket are cheap."""
2266 2266 self._docket.write(transaction)
2267 2267
2268 2268 def addrevision(
2269 2269 self,
2270 2270 text,
2271 2271 transaction,
2272 2272 link,
2273 2273 p1,
2274 2274 p2,
2275 2275 cachedelta=None,
2276 2276 node=None,
2277 2277 flags=REVIDX_DEFAULT_FLAGS,
2278 2278 deltacomputer=None,
2279 2279 sidedata=None,
2280 2280 ):
2281 2281 """add a revision to the log
2282 2282
2283 2283 text - the revision data to add
2284 2284 transaction - the transaction object used for rollback
2285 2285 link - the linkrev data to add
2286 2286 p1, p2 - the parent nodeids of the revision
2287 2287 cachedelta - an optional precomputed delta
2288 2288 node - nodeid of revision; typically node is not specified, and it is
2289 2289 computed by default as hash(text, p1, p2), however subclasses might
2290 2290 use different hashing method (and override checkhash() in such case)
2291 2291 flags - the known flags to set on the revision
2292 2292 deltacomputer - an optional deltacomputer instance shared between
2293 2293 multiple calls
2294 2294 """
2295 2295 if link == nullrev:
2296 2296 raise error.RevlogError(
2297 2297 _(b"attempted to add linkrev -1 to %s") % self.display_id
2298 2298 )
2299 2299
2300 2300 if sidedata is None:
2301 2301 sidedata = {}
2302 2302 elif sidedata and not self.hassidedata:
2303 2303 raise error.ProgrammingError(
2304 2304 _(b"trying to add sidedata to a revlog who don't support them")
2305 2305 )
2306 2306
2307 2307 if flags:
2308 2308 node = node or self.hash(text, p1, p2)
2309 2309
2310 2310 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2311 2311
2312 2312 # If the flag processor modifies the revision data, ignore any provided
2313 2313 # cachedelta.
2314 2314 if rawtext != text:
2315 2315 cachedelta = None
2316 2316
2317 2317 if len(rawtext) > _maxentrysize:
2318 2318 raise error.RevlogError(
2319 2319 _(
2320 2320 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2321 2321 )
2322 2322 % (self.display_id, len(rawtext))
2323 2323 )
2324 2324
2325 2325 node = node or self.hash(rawtext, p1, p2)
2326 2326 rev = self.index.get_rev(node)
2327 2327 if rev is not None:
2328 2328 return rev
2329 2329
2330 2330 if validatehash:
2331 2331 self.checkhash(rawtext, node, p1=p1, p2=p2)
2332 2332
2333 2333 return self.addrawrevision(
2334 2334 rawtext,
2335 2335 transaction,
2336 2336 link,
2337 2337 p1,
2338 2338 p2,
2339 2339 node,
2340 2340 flags,
2341 2341 cachedelta=cachedelta,
2342 2342 deltacomputer=deltacomputer,
2343 2343 sidedata=sidedata,
2344 2344 )
2345 2345
2346 2346 def addrawrevision(
2347 2347 self,
2348 2348 rawtext,
2349 2349 transaction,
2350 2350 link,
2351 2351 p1,
2352 2352 p2,
2353 2353 node,
2354 2354 flags,
2355 2355 cachedelta=None,
2356 2356 deltacomputer=None,
2357 2357 sidedata=None,
2358 2358 ):
2359 2359 """add a raw revision with known flags, node and parents
2360 2360 useful when reusing a revision not stored in this revlog (ex: received
2361 2361 over wire, or read from an external bundle).
2362 2362 """
2363 2363 with self._writing(transaction):
2364 2364 return self._addrevision(
2365 2365 node,
2366 2366 rawtext,
2367 2367 transaction,
2368 2368 link,
2369 2369 p1,
2370 2370 p2,
2371 2371 flags,
2372 2372 cachedelta,
2373 2373 deltacomputer=deltacomputer,
2374 2374 sidedata=sidedata,
2375 2375 )
2376 2376
2377 2377 def compress(self, data):
2378 2378 """Generate a possibly-compressed representation of data."""
2379 2379 if not data:
2380 2380 return b'', data
2381 2381
2382 2382 compressed = self._compressor.compress(data)
2383 2383
2384 2384 if compressed:
2385 2385 # The revlog compressor added the header in the returned data.
2386 2386 return b'', compressed
2387 2387
2388 2388 if data[0:1] == b'\0':
2389 2389 return b'', data
2390 2390 return b'u', data
2391 2391
2392 2392 def decompress(self, data):
2393 2393 """Decompress a revlog chunk.
2394 2394
2395 2395 The chunk is expected to begin with a header identifying the
2396 2396 format type so it can be routed to an appropriate decompressor.
2397 2397 """
2398 2398 if not data:
2399 2399 return data
2400 2400
2401 2401 # Revlogs are read much more frequently than they are written and many
2402 2402 # chunks only take microseconds to decompress, so performance is
2403 2403 # important here.
2404 2404 #
2405 2405 # We can make a few assumptions about revlogs:
2406 2406 #
2407 2407 # 1) the majority of chunks will be compressed (as opposed to inline
2408 2408 # raw data).
2409 2409 # 2) decompressing *any* data will likely by at least 10x slower than
2410 2410 # returning raw inline data.
2411 2411 # 3) we want to prioritize common and officially supported compression
2412 2412 # engines
2413 2413 #
2414 2414 # It follows that we want to optimize for "decompress compressed data
2415 2415 # when encoded with common and officially supported compression engines"
2416 2416 # case over "raw data" and "data encoded by less common or non-official
2417 2417 # compression engines." That is why we have the inline lookup first
2418 2418 # followed by the compengines lookup.
2419 2419 #
2420 2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2421 2421 # compressed chunks. And this matters for changelog and manifest reads.
2422 2422 t = data[0:1]
2423 2423
2424 2424 if t == b'x':
2425 2425 try:
2426 2426 return _zlibdecompress(data)
2427 2427 except zlib.error as e:
2428 2428 raise error.RevlogError(
2429 2429 _(b'revlog decompress error: %s')
2430 2430 % stringutil.forcebytestr(e)
2431 2431 )
2432 2432 # '\0' is more common than 'u' so it goes first.
2433 2433 elif t == b'\0':
2434 2434 return data
2435 2435 elif t == b'u':
2436 2436 return util.buffer(data, 1)
2437 2437
2438 2438 compressor = self._get_decompressor(t)
2439 2439
2440 2440 return compressor.decompress(data)
2441 2441
2442 2442 def _addrevision(
2443 2443 self,
2444 2444 node,
2445 2445 rawtext,
2446 2446 transaction,
2447 2447 link,
2448 2448 p1,
2449 2449 p2,
2450 2450 flags,
2451 2451 cachedelta,
2452 2452 alwayscache=False,
2453 2453 deltacomputer=None,
2454 2454 sidedata=None,
2455 2455 ):
2456 2456 """internal function to add revisions to the log
2457 2457
2458 2458 see addrevision for argument descriptions.
2459 2459
2460 2460 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2461 2461
2462 2462 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2463 2463 be used.
2464 2464
2465 2465 invariants:
2466 2466 - rawtext is optional (can be None); if not set, cachedelta must be set.
2467 2467 if both are set, they must correspond to each other.
2468 2468 """
2469 2469 if node == self.nullid:
2470 2470 raise error.RevlogError(
2471 2471 _(b"%s: attempt to add null revision") % self.display_id
2472 2472 )
2473 2473 if (
2474 2474 node == self.nodeconstants.wdirid
2475 2475 or node in self.nodeconstants.wdirfilenodeids
2476 2476 ):
2477 2477 raise error.RevlogError(
2478 2478 _(b"%s: attempt to add wdir revision") % self.display_id
2479 2479 )
2480 2480 if self._writinghandles is None:
2481 2481 msg = b'adding revision outside `revlog._writing` context'
2482 2482 raise error.ProgrammingError(msg)
2483 2483
2484 2484 if self._inline:
2485 2485 fh = self._writinghandles[0]
2486 2486 else:
2487 2487 fh = self._writinghandles[1]
2488 2488
2489 2489 btext = [rawtext]
2490 2490
2491 2491 curr = len(self)
2492 2492 prev = curr - 1
2493 2493
2494 2494 offset = self._get_data_offset(prev)
2495 2495
2496 2496 if self._concurrencychecker:
2497 2497 ifh, dfh = self._writinghandles
2498 2498 if self._inline:
2499 2499 # offset is "as if" it were in the .d file, so we need to add on
2500 2500 # the size of the entry metadata.
2501 2501 self._concurrencychecker(
2502 2502 ifh, self._indexfile, offset + curr * self.index.entry_size
2503 2503 )
2504 2504 else:
2505 2505 # Entries in the .i are a consistent size.
2506 2506 self._concurrencychecker(
2507 2507 ifh, self._indexfile, curr * self.index.entry_size
2508 2508 )
2509 2509 self._concurrencychecker(dfh, self._datafile, offset)
2510 2510
2511 2511 p1r, p2r = self.rev(p1), self.rev(p2)
2512 2512
2513 2513 # full versions are inserted when the needed deltas
2514 2514 # become comparable to the uncompressed text
2515 2515 if rawtext is None:
2516 2516 # need rawtext size, before changed by flag processors, which is
2517 2517 # the non-raw size. use revlog explicitly to avoid filelog's extra
2518 2518 # logic that might remove metadata size.
2519 2519 textlen = mdiff.patchedsize(
2520 2520 revlog.size(self, cachedelta[0]), cachedelta[1]
2521 2521 )
2522 2522 else:
2523 2523 textlen = len(rawtext)
2524 2524
2525 2525 if deltacomputer is None:
2526 2526 deltacomputer = deltautil.deltacomputer(self)
2527 2527
2528 2528 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2529 2529
2530 2530 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2531 2531
2532 2532 compression_mode = COMP_MODE_INLINE
2533 2533 if self._docket is not None:
2534 2534 h, d = deltainfo.data
2535 2535 if not h and not d:
2536 2536 # not data to store at all... declare them uncompressed
2537 2537 compression_mode = COMP_MODE_PLAIN
2538 2538 elif not h:
2539 2539 t = d[0:1]
2540 2540 if t == b'\0':
2541 2541 compression_mode = COMP_MODE_PLAIN
2542 2542 elif t == self._docket.default_compression_header:
2543 2543 compression_mode = COMP_MODE_DEFAULT
2544 2544 elif h == b'u':
2545 2545 # we have a more efficient way to declare uncompressed
2546 2546 h = b''
2547 2547 compression_mode = COMP_MODE_PLAIN
2548 2548 deltainfo = deltautil.drop_u_compression(deltainfo)
2549 2549
2550 2550 sidedata_compression_mode = COMP_MODE_INLINE
2551 2551 if sidedata and self.hassidedata:
2552 2552 sidedata_compression_mode = COMP_MODE_PLAIN
2553 2553 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2554 2554 sidedata_offset = offset + deltainfo.deltalen
2555 2555 h, comp_sidedata = self.compress(serialized_sidedata)
2556 2556 if (
2557 2557 h != b'u'
2558 2558 and comp_sidedata[0:1] != b'\0'
2559 2559 and len(comp_sidedata) < len(serialized_sidedata)
2560 2560 ):
2561 2561 assert not h
2562 2562 if (
2563 2563 comp_sidedata[0:1]
2564 2564 == self._docket.default_compression_header
2565 2565 ):
2566 2566 sidedata_compression_mode = COMP_MODE_DEFAULT
2567 2567 serialized_sidedata = comp_sidedata
2568 2568 else:
2569 2569 sidedata_compression_mode = COMP_MODE_INLINE
2570 2570 serialized_sidedata = comp_sidedata
2571 2571 else:
2572 2572 serialized_sidedata = b""
2573 2573 # Don't store the offset if the sidedata is empty, that way
2574 2574 # we can easily detect empty sidedata and they will be no different
2575 2575 # than ones we manually add.
2576 2576 sidedata_offset = 0
2577 2577
2578 2578 e = (
2579 2579 offset_type(offset, flags),
2580 2580 deltainfo.deltalen,
2581 2581 textlen,
2582 2582 deltainfo.base,
2583 2583 link,
2584 2584 p1r,
2585 2585 p2r,
2586 2586 node,
2587 2587 sidedata_offset,
2588 2588 len(serialized_sidedata),
2589 2589 compression_mode,
2590 2590 sidedata_compression_mode,
2591 2591 )
2592 2592
2593 2593 self.index.append(e)
2594 2594 entry = self.index.entry_binary(curr)
2595 2595 if curr == 0 and self._docket is None:
2596 2596 header = self._format_flags | self._format_version
2597 2597 header = self.index.pack_header(header)
2598 2598 entry = header + entry
2599 2599 self._writeentry(
2600 2600 transaction,
2601 2601 entry,
2602 2602 deltainfo.data,
2603 2603 link,
2604 2604 offset,
2605 2605 serialized_sidedata,
2606 2606 )
2607 2607
2608 2608 rawtext = btext[0]
2609 2609
2610 2610 if alwayscache and rawtext is None:
2611 2611 rawtext = deltacomputer.buildtext(revinfo, fh)
2612 2612
2613 2613 if type(rawtext) == bytes: # only accept immutable objects
2614 2614 self._revisioncache = (node, curr, rawtext)
2615 2615 self._chainbasecache[curr] = deltainfo.chainbase
2616 2616 return curr
2617 2617
2618 2618 def _get_data_offset(self, prev):
2619 2619 """Returns the current offset in the (in-transaction) data file.
2620 2620 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2621 2621 file to store that information: since sidedata can be rewritten to the
2622 2622 end of the data file within a transaction, you can have cases where, for
2623 2623 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2624 2624 to `n - 1`'s sidedata being written after `n`'s data.
2625 2625
2626 2626 TODO cache this in a docket file before getting out of experimental."""
2627 2627 if self._docket is None:
2628 2628 return self.end(prev)
2629 2629 else:
2630 2630 return self._docket.data_end
2631 2631
2632 2632 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2633 2633 # Files opened in a+ mode have inconsistent behavior on various
2634 2634 # platforms. Windows requires that a file positioning call be made
2635 2635 # when the file handle transitions between reads and writes. See
2636 2636 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2637 2637 # platforms, Python or the platform itself can be buggy. Some versions
2638 2638 # of Solaris have been observed to not append at the end of the file
2639 2639 # if the file was seeked to before the end. See issue4943 for more.
2640 2640 #
2641 2641 # We work around this issue by inserting a seek() before writing.
2642 2642 # Note: This is likely not necessary on Python 3. However, because
2643 2643 # the file handle is reused for reads and may be seeked there, we need
2644 2644 # to be careful before changing this.
2645 2645 if self._writinghandles is None:
2646 2646 msg = b'adding revision outside `revlog._writing` context'
2647 2647 raise error.ProgrammingError(msg)
2648 2648 ifh, dfh = self._writinghandles
2649 2649 if self._docket is None:
2650 2650 ifh.seek(0, os.SEEK_END)
2651 2651 else:
2652 2652 ifh.seek(self._docket.index_end, os.SEEK_SET)
2653 2653 if dfh:
2654 2654 if self._docket is None:
2655 2655 dfh.seek(0, os.SEEK_END)
2656 2656 else:
2657 2657 dfh.seek(self._docket.data_end, os.SEEK_SET)
2658 2658
2659 2659 curr = len(self) - 1
2660 2660 if not self._inline:
2661 2661 transaction.add(self._datafile, offset)
2662 2662 transaction.add(self._indexfile, curr * len(entry))
2663 2663 if data[0]:
2664 2664 dfh.write(data[0])
2665 2665 dfh.write(data[1])
2666 2666 if sidedata:
2667 2667 dfh.write(sidedata)
2668 2668 ifh.write(entry)
2669 2669 else:
2670 2670 offset += curr * self.index.entry_size
2671 2671 transaction.add(self._indexfile, offset)
2672 2672 ifh.write(entry)
2673 2673 ifh.write(data[0])
2674 2674 ifh.write(data[1])
2675 2675 if sidedata:
2676 2676 ifh.write(sidedata)
2677 2677 self._enforceinlinesize(transaction)
2678 2678 if self._docket is not None:
2679 2679 self._docket.index_end = self._writinghandles[0].tell()
2680 2680 self._docket.data_end = self._writinghandles[1].tell()
2681 2681
2682 2682 nodemaputil.setup_persistent_nodemap(transaction, self)
2683 2683
2684 2684 def addgroup(
2685 2685 self,
2686 2686 deltas,
2687 2687 linkmapper,
2688 2688 transaction,
2689 2689 alwayscache=False,
2690 2690 addrevisioncb=None,
2691 2691 duplicaterevisioncb=None,
2692 2692 ):
2693 2693 """
2694 2694 add a delta group
2695 2695
2696 2696 given a set of deltas, add them to the revision log. the
2697 2697 first delta is against its parent, which should be in our
2698 2698 log, the rest are against the previous delta.
2699 2699
2700 2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 2701 this revlog and the node that was added.
2702 2702 """
2703 2703
2704 2704 if self._adding_group:
2705 2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706 2706
2707 2707 self._adding_group = True
2708 2708 empty = True
2709 2709 try:
2710 2710 with self._writing(transaction):
2711 2711 deltacomputer = deltautil.deltacomputer(self)
2712 2712 # loop through our set of deltas
2713 2713 for data in deltas:
2714 2714 (
2715 2715 node,
2716 2716 p1,
2717 2717 p2,
2718 2718 linknode,
2719 2719 deltabase,
2720 2720 delta,
2721 2721 flags,
2722 2722 sidedata,
2723 2723 ) = data
2724 2724 link = linkmapper(linknode)
2725 2725 flags = flags or REVIDX_DEFAULT_FLAGS
2726 2726
2727 2727 rev = self.index.get_rev(node)
2728 2728 if rev is not None:
2729 2729 # this can happen if two branches make the same change
2730 2730 self._nodeduplicatecallback(transaction, rev)
2731 2731 if duplicaterevisioncb:
2732 2732 duplicaterevisioncb(self, rev)
2733 2733 empty = False
2734 2734 continue
2735 2735
2736 2736 for p in (p1, p2):
2737 2737 if not self.index.has_node(p):
2738 2738 raise error.LookupError(
2739 2739 p, self.radix, _(b'unknown parent')
2740 2740 )
2741 2741
2742 2742 if not self.index.has_node(deltabase):
2743 2743 raise error.LookupError(
2744 2744 deltabase, self.display_id, _(b'unknown delta base')
2745 2745 )
2746 2746
2747 2747 baserev = self.rev(deltabase)
2748 2748
2749 2749 if baserev != nullrev and self.iscensored(baserev):
2750 2750 # if base is censored, delta must be full replacement in a
2751 2751 # single patch operation
2752 2752 hlen = struct.calcsize(b">lll")
2753 2753 oldlen = self.rawsize(baserev)
2754 2754 newlen = len(delta) - hlen
2755 2755 if delta[:hlen] != mdiff.replacediffheader(
2756 2756 oldlen, newlen
2757 2757 ):
2758 2758 raise error.CensoredBaseError(
2759 2759 self.display_id, self.node(baserev)
2760 2760 )
2761 2761
2762 2762 if not flags and self._peek_iscensored(baserev, delta):
2763 2763 flags |= REVIDX_ISCENSORED
2764 2764
2765 2765 # We assume consumers of addrevisioncb will want to retrieve
2766 2766 # the added revision, which will require a call to
2767 2767 # revision(). revision() will fast path if there is a cache
2768 2768 # hit. So, we tell _addrevision() to always cache in this case.
2769 2769 # We're only using addgroup() in the context of changegroup
2770 2770 # generation so the revision data can always be handled as raw
2771 2771 # by the flagprocessor.
2772 2772 rev = self._addrevision(
2773 2773 node,
2774 2774 None,
2775 2775 transaction,
2776 2776 link,
2777 2777 p1,
2778 2778 p2,
2779 2779 flags,
2780 2780 (baserev, delta),
2781 2781 alwayscache=alwayscache,
2782 2782 deltacomputer=deltacomputer,
2783 2783 sidedata=sidedata,
2784 2784 )
2785 2785
2786 2786 if addrevisioncb:
2787 2787 addrevisioncb(self, rev)
2788 2788 empty = False
2789 2789 finally:
2790 2790 self._adding_group = False
2791 2791 return not empty
2792 2792
2793 2793 def iscensored(self, rev):
2794 2794 """Check if a file revision is censored."""
2795 2795 if not self._censorable:
2796 2796 return False
2797 2797
2798 2798 return self.flags(rev) & REVIDX_ISCENSORED
2799 2799
2800 2800 def _peek_iscensored(self, baserev, delta):
2801 2801 """Quickly check if a delta produces a censored revision."""
2802 2802 if not self._censorable:
2803 2803 return False
2804 2804
2805 2805 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2806 2806
2807 2807 def getstrippoint(self, minlink):
2808 2808 """find the minimum rev that must be stripped to strip the linkrev
2809 2809
2810 2810 Returns a tuple containing the minimum rev and a set of all revs that
2811 2811 have linkrevs that will be broken by this strip.
2812 2812 """
2813 2813 return storageutil.resolvestripinfo(
2814 2814 minlink,
2815 2815 len(self) - 1,
2816 2816 self.headrevs(),
2817 2817 self.linkrev,
2818 2818 self.parentrevs,
2819 2819 )
2820 2820
2821 2821 def strip(self, minlink, transaction):
2822 2822 """truncate the revlog on the first revision with a linkrev >= minlink
2823 2823
2824 2824 This function is called when we're stripping revision minlink and
2825 2825 its descendants from the repository.
2826 2826
2827 2827 We have to remove all revisions with linkrev >= minlink, because
2828 2828 the equivalent changelog revisions will be renumbered after the
2829 2829 strip.
2830 2830
2831 2831 So we truncate the revlog on the first of these revisions, and
2832 2832 trust that the caller has saved the revisions that shouldn't be
2833 2833 removed and that it'll re-add them after this truncation.
2834 2834 """
2835 2835 if len(self) == 0:
2836 2836 return
2837 2837
2838 2838 rev, _ = self.getstrippoint(minlink)
2839 2839 if rev == len(self):
2840 2840 return
2841 2841
2842 2842 # first truncate the files on disk
2843 2843 data_end = self.start(rev)
2844 2844 if not self._inline:
2845 2845 transaction.add(self._datafile, data_end)
2846 2846 end = rev * self.index.entry_size
2847 2847 else:
2848 2848 end = data_end + (rev * self.index.entry_size)
2849 2849
2850 2850 transaction.add(self._indexfile, end)
2851 2851 if self._docket is not None:
2852 2852 # XXX we could, leverage the docket while stripping. However it is
2853 2853 # not powerfull enough at the time of this comment
2854 2854 self._docket.index_end = end
2855 2855 self._docket.data_end = data_end
2856 2856 self._docket.write(transaction, stripping=True)
2857 2857
2858 2858 # then reset internal state in memory to forget those revisions
2859 2859 self._revisioncache = None
2860 2860 self._chaininfocache = util.lrucachedict(500)
2861 2861 self._chunkclear()
2862 2862
2863 2863 del self.index[rev:-1]
2864 2864
2865 2865 def checksize(self):
2866 2866 """Check size of index and data files
2867 2867
2868 2868 return a (dd, di) tuple.
2869 2869 - dd: extra bytes for the "data" file
2870 2870 - di: extra bytes for the "index" file
2871 2871
2872 2872 A healthy revlog will return (0, 0).
2873 2873 """
2874 2874 expected = 0
2875 2875 if len(self):
2876 2876 expected = max(0, self.end(len(self) - 1))
2877 2877
2878 2878 try:
2879 2879 with self._datafp() as f:
2880 2880 f.seek(0, io.SEEK_END)
2881 2881 actual = f.tell()
2882 2882 dd = actual - expected
2883 2883 except IOError as inst:
2884 2884 if inst.errno != errno.ENOENT:
2885 2885 raise
2886 2886 dd = 0
2887 2887
2888 2888 try:
2889 2889 f = self.opener(self._indexfile)
2890 2890 f.seek(0, io.SEEK_END)
2891 2891 actual = f.tell()
2892 2892 f.close()
2893 2893 s = self.index.entry_size
2894 2894 i = max(0, actual // s)
2895 2895 di = actual - (i * s)
2896 2896 if self._inline:
2897 2897 databytes = 0
2898 2898 for r in self:
2899 2899 databytes += max(0, self.length(r))
2900 2900 dd = 0
2901 2901 di = actual - len(self) * s - databytes
2902 2902 except IOError as inst:
2903 2903 if inst.errno != errno.ENOENT:
2904 2904 raise
2905 2905 di = 0
2906 2906
2907 2907 return (dd, di)
2908 2908
2909 2909 def files(self):
2910 2910 res = [self._indexfile]
2911 2911 if not self._inline:
2912 2912 res.append(self._datafile)
2913 2913 return res
2914 2914
2915 2915 def emitrevisions(
2916 2916 self,
2917 2917 nodes,
2918 2918 nodesorder=None,
2919 2919 revisiondata=False,
2920 2920 assumehaveparentrevisions=False,
2921 2921 deltamode=repository.CG_DELTAMODE_STD,
2922 2922 sidedata_helpers=None,
2923 2923 ):
2924 2924 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2925 2925 raise error.ProgrammingError(
2926 2926 b'unhandled value for nodesorder: %s' % nodesorder
2927 2927 )
2928 2928
2929 2929 if nodesorder is None and not self._generaldelta:
2930 2930 nodesorder = b'storage'
2931 2931
2932 2932 if (
2933 2933 not self._storedeltachains
2934 2934 and deltamode != repository.CG_DELTAMODE_PREV
2935 2935 ):
2936 2936 deltamode = repository.CG_DELTAMODE_FULL
2937 2937
2938 2938 return storageutil.emitrevisions(
2939 2939 self,
2940 2940 nodes,
2941 2941 nodesorder,
2942 2942 revlogrevisiondelta,
2943 2943 deltaparentfn=self.deltaparent,
2944 2944 candeltafn=self.candelta,
2945 2945 rawsizefn=self.rawsize,
2946 2946 revdifffn=self.revdiff,
2947 2947 flagsfn=self.flags,
2948 2948 deltamode=deltamode,
2949 2949 revisiondata=revisiondata,
2950 2950 assumehaveparentrevisions=assumehaveparentrevisions,
2951 2951 sidedata_helpers=sidedata_helpers,
2952 2952 )
2953 2953
2954 2954 DELTAREUSEALWAYS = b'always'
2955 2955 DELTAREUSESAMEREVS = b'samerevs'
2956 2956 DELTAREUSENEVER = b'never'
2957 2957
2958 2958 DELTAREUSEFULLADD = b'fulladd'
2959 2959
2960 2960 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2961 2961
2962 2962 def clone(
2963 2963 self,
2964 2964 tr,
2965 2965 destrevlog,
2966 2966 addrevisioncb=None,
2967 2967 deltareuse=DELTAREUSESAMEREVS,
2968 2968 forcedeltabothparents=None,
2969 2969 sidedata_helpers=None,
2970 2970 ):
2971 2971 """Copy this revlog to another, possibly with format changes.
2972 2972
2973 2973 The destination revlog will contain the same revisions and nodes.
2974 2974 However, it may not be bit-for-bit identical due to e.g. delta encoding
2975 2975 differences.
2976 2976
2977 2977 The ``deltareuse`` argument control how deltas from the existing revlog
2978 2978 are preserved in the destination revlog. The argument can have the
2979 2979 following values:
2980 2980
2981 2981 DELTAREUSEALWAYS
2982 2982 Deltas will always be reused (if possible), even if the destination
2983 2983 revlog would not select the same revisions for the delta. This is the
2984 2984 fastest mode of operation.
2985 2985 DELTAREUSESAMEREVS
2986 2986 Deltas will be reused if the destination revlog would pick the same
2987 2987 revisions for the delta. This mode strikes a balance between speed
2988 2988 and optimization.
2989 2989 DELTAREUSENEVER
2990 2990 Deltas will never be reused. This is the slowest mode of execution.
2991 2991 This mode can be used to recompute deltas (e.g. if the diff/delta
2992 2992 algorithm changes).
2993 2993 DELTAREUSEFULLADD
2994 2994 Revision will be re-added as if their were new content. This is
2995 2995 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2996 2996 eg: large file detection and handling.
2997 2997
2998 2998 Delta computation can be slow, so the choice of delta reuse policy can
2999 2999 significantly affect run time.
3000 3000
3001 3001 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3002 3002 two extremes. Deltas will be reused if they are appropriate. But if the
3003 3003 delta could choose a better revision, it will do so. This means if you
3004 3004 are converting a non-generaldelta revlog to a generaldelta revlog,
3005 3005 deltas will be recomputed if the delta's parent isn't a parent of the
3006 3006 revision.
3007 3007
3008 3008 In addition to the delta policy, the ``forcedeltabothparents``
3009 3009 argument controls whether to force compute deltas against both parents
3010 3010 for merges. By default, the current default is used.
3011 3011
3012 3012 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3013 3013 `sidedata_helpers`.
3014 3014 """
3015 3015 if deltareuse not in self.DELTAREUSEALL:
3016 3016 raise ValueError(
3017 3017 _(b'value for deltareuse invalid: %s') % deltareuse
3018 3018 )
3019 3019
3020 3020 if len(destrevlog):
3021 3021 raise ValueError(_(b'destination revlog is not empty'))
3022 3022
3023 3023 if getattr(self, 'filteredrevs', None):
3024 3024 raise ValueError(_(b'source revlog has filtered revisions'))
3025 3025 if getattr(destrevlog, 'filteredrevs', None):
3026 3026 raise ValueError(_(b'destination revlog has filtered revisions'))
3027 3027
3028 3028 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3029 3029 # if possible.
3030 3030 oldlazydelta = destrevlog._lazydelta
3031 3031 oldlazydeltabase = destrevlog._lazydeltabase
3032 3032 oldamd = destrevlog._deltabothparents
3033 3033
3034 3034 try:
3035 3035 if deltareuse == self.DELTAREUSEALWAYS:
3036 3036 destrevlog._lazydeltabase = True
3037 3037 destrevlog._lazydelta = True
3038 3038 elif deltareuse == self.DELTAREUSESAMEREVS:
3039 3039 destrevlog._lazydeltabase = False
3040 3040 destrevlog._lazydelta = True
3041 3041 elif deltareuse == self.DELTAREUSENEVER:
3042 3042 destrevlog._lazydeltabase = False
3043 3043 destrevlog._lazydelta = False
3044 3044
3045 3045 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3046 3046
3047 3047 self._clone(
3048 3048 tr,
3049 3049 destrevlog,
3050 3050 addrevisioncb,
3051 3051 deltareuse,
3052 3052 forcedeltabothparents,
3053 3053 sidedata_helpers,
3054 3054 )
3055 3055
3056 3056 finally:
3057 3057 destrevlog._lazydelta = oldlazydelta
3058 3058 destrevlog._lazydeltabase = oldlazydeltabase
3059 3059 destrevlog._deltabothparents = oldamd
3060 3060
3061 3061 def _clone(
3062 3062 self,
3063 3063 tr,
3064 3064 destrevlog,
3065 3065 addrevisioncb,
3066 3066 deltareuse,
3067 3067 forcedeltabothparents,
3068 3068 sidedata_helpers,
3069 3069 ):
3070 3070 """perform the core duty of `revlog.clone` after parameter processing"""
3071 3071 deltacomputer = deltautil.deltacomputer(destrevlog)
3072 3072 index = self.index
3073 3073 for rev in self:
3074 3074 entry = index[rev]
3075 3075
3076 3076 # Some classes override linkrev to take filtered revs into
3077 3077 # account. Use raw entry from index.
3078 3078 flags = entry[0] & 0xFFFF
3079 3079 linkrev = entry[4]
3080 3080 p1 = index[entry[5]][7]
3081 3081 p2 = index[entry[6]][7]
3082 3082 node = entry[7]
3083 3083
3084 3084 # (Possibly) reuse the delta from the revlog if allowed and
3085 3085 # the revlog chunk is a delta.
3086 3086 cachedelta = None
3087 3087 rawtext = None
3088 3088 if deltareuse == self.DELTAREUSEFULLADD:
3089 3089 text, sidedata = self._revisiondata(rev)
3090 3090
3091 3091 if sidedata_helpers is not None:
3092 3092 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3093 3093 self, sidedata_helpers, sidedata, rev
3094 3094 )
3095 3095 flags = flags | new_flags[0] & ~new_flags[1]
3096 3096
3097 3097 destrevlog.addrevision(
3098 3098 text,
3099 3099 tr,
3100 3100 linkrev,
3101 3101 p1,
3102 3102 p2,
3103 3103 cachedelta=cachedelta,
3104 3104 node=node,
3105 3105 flags=flags,
3106 3106 deltacomputer=deltacomputer,
3107 3107 sidedata=sidedata,
3108 3108 )
3109 3109 else:
3110 3110 if destrevlog._lazydelta:
3111 3111 dp = self.deltaparent(rev)
3112 3112 if dp != nullrev:
3113 3113 cachedelta = (dp, bytes(self._chunk(rev)))
3114 3114
3115 3115 sidedata = None
3116 3116 if not cachedelta:
3117 3117 rawtext, sidedata = self._revisiondata(rev)
3118 3118 if sidedata is None:
3119 3119 sidedata = self.sidedata(rev)
3120 3120
3121 3121 if sidedata_helpers is not None:
3122 3122 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3123 3123 self, sidedata_helpers, sidedata, rev
3124 3124 )
3125 3125 flags = flags | new_flags[0] & ~new_flags[1]
3126 3126
3127 3127 with destrevlog._writing(tr):
3128 3128 destrevlog._addrevision(
3129 3129 node,
3130 3130 rawtext,
3131 3131 tr,
3132 3132 linkrev,
3133 3133 p1,
3134 3134 p2,
3135 3135 flags,
3136 3136 cachedelta,
3137 3137 deltacomputer=deltacomputer,
3138 3138 sidedata=sidedata,
3139 3139 )
3140 3140
3141 3141 if addrevisioncb:
3142 3142 addrevisioncb(self, rev, node)
3143 3143
3144 3144 def censorrevision(self, tr, censornode, tombstone=b''):
3145 3145 if self._format_version == REVLOGV0:
3146 3146 raise error.RevlogError(
3147 3147 _(b'cannot censor with version %d revlogs')
3148 3148 % self._format_version
3149 3149 )
3150 3150
3151 3151 censorrev = self.rev(censornode)
3152 3152 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3153 3153
3154 3154 if len(tombstone) > self.rawsize(censorrev):
3155 3155 raise error.Abort(
3156 3156 _(b'censor tombstone must be no longer than censored data')
3157 3157 )
3158 3158
3159 3159 # Rewriting the revlog in place is hard. Our strategy for censoring is
3160 3160 # to create a new revlog, copy all revisions to it, then replace the
3161 3161 # revlogs on transaction close.
3162 3162 #
3163 3163 # This is a bit dangerous. We could easily have a mismatch of state.
3164 3164 newrl = revlog(
3165 3165 self.opener,
3166 3166 target=self.target,
3167 3167 radix=self.radix,
3168 3168 postfix=b'tmpcensored',
3169 3169 censorable=True,
3170 3170 )
3171 3171 newrl._format_version = self._format_version
3172 3172 newrl._format_flags = self._format_flags
3173 3173 newrl._generaldelta = self._generaldelta
3174 3174 newrl._parse_index = self._parse_index
3175 3175
3176 3176 for rev in self.revs():
3177 3177 node = self.node(rev)
3178 3178 p1, p2 = self.parents(node)
3179 3179
3180 3180 if rev == censorrev:
3181 3181 newrl.addrawrevision(
3182 3182 tombstone,
3183 3183 tr,
3184 3184 self.linkrev(censorrev),
3185 3185 p1,
3186 3186 p2,
3187 3187 censornode,
3188 3188 REVIDX_ISCENSORED,
3189 3189 )
3190 3190
3191 3191 if newrl.deltaparent(rev) != nullrev:
3192 3192 raise error.Abort(
3193 3193 _(
3194 3194 b'censored revision stored as delta; '
3195 3195 b'cannot censor'
3196 3196 ),
3197 3197 hint=_(
3198 3198 b'censoring of revlogs is not '
3199 3199 b'fully implemented; please report '
3200 3200 b'this bug'
3201 3201 ),
3202 3202 )
3203 3203 continue
3204 3204
3205 3205 if self.iscensored(rev):
3206 3206 if self.deltaparent(rev) != nullrev:
3207 3207 raise error.Abort(
3208 3208 _(
3209 3209 b'cannot censor due to censored '
3210 3210 b'revision having delta stored'
3211 3211 )
3212 3212 )
3213 3213 rawtext = self._chunk(rev)
3214 3214 else:
3215 3215 rawtext = self.rawdata(rev)
3216 3216
3217 3217 newrl.addrawrevision(
3218 3218 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3219 3219 )
3220 3220
3221 3221 tr.addbackup(self._indexfile, location=b'store')
3222 3222 if not self._inline:
3223 3223 tr.addbackup(self._datafile, location=b'store')
3224 3224
3225 3225 self.opener.rename(newrl._indexfile, self._indexfile)
3226 3226 if not self._inline:
3227 3227 self.opener.rename(newrl._datafile, self._datafile)
3228 3228
3229 3229 self.clearcaches()
3230 3230 self._loadindex()
3231 3231
3232 3232 def verifyintegrity(self, state):
3233 3233 """Verifies the integrity of the revlog.
3234 3234
3235 3235 Yields ``revlogproblem`` instances describing problems that are
3236 3236 found.
3237 3237 """
3238 3238 dd, di = self.checksize()
3239 3239 if dd:
3240 3240 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3241 3241 if di:
3242 3242 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3243 3243
3244 3244 version = self._format_version
3245 3245
3246 3246 # The verifier tells us what version revlog we should be.
3247 3247 if version != state[b'expectedversion']:
3248 3248 yield revlogproblem(
3249 3249 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3250 3250 % (self.display_id, version, state[b'expectedversion'])
3251 3251 )
3252 3252
3253 3253 state[b'skipread'] = set()
3254 3254 state[b'safe_renamed'] = set()
3255 3255
3256 3256 for rev in self:
3257 3257 node = self.node(rev)
3258 3258
3259 3259 # Verify contents. 4 cases to care about:
3260 3260 #
3261 3261 # common: the most common case
3262 3262 # rename: with a rename
3263 3263 # meta: file content starts with b'\1\n', the metadata
3264 3264 # header defined in filelog.py, but without a rename
3265 3265 # ext: content stored externally
3266 3266 #
3267 3267 # More formally, their differences are shown below:
3268 3268 #
3269 3269 # | common | rename | meta | ext
3270 3270 # -------------------------------------------------------
3271 3271 # flags() | 0 | 0 | 0 | not 0
3272 3272 # renamed() | False | True | False | ?
3273 3273 # rawtext[0:2]=='\1\n'| False | True | True | ?
3274 3274 #
3275 3275 # "rawtext" means the raw text stored in revlog data, which
3276 3276 # could be retrieved by "rawdata(rev)". "text"
3277 3277 # mentioned below is "revision(rev)".
3278 3278 #
3279 3279 # There are 3 different lengths stored physically:
3280 3280 # 1. L1: rawsize, stored in revlog index
3281 3281 # 2. L2: len(rawtext), stored in revlog data
3282 3282 # 3. L3: len(text), stored in revlog data if flags==0, or
3283 3283 # possibly somewhere else if flags!=0
3284 3284 #
3285 3285 # L1 should be equal to L2. L3 could be different from them.
3286 3286 # "text" may or may not affect commit hash depending on flag
3287 3287 # processors (see flagutil.addflagprocessor).
3288 3288 #
3289 3289 # | common | rename | meta | ext
3290 3290 # -------------------------------------------------
3291 3291 # rawsize() | L1 | L1 | L1 | L1
3292 3292 # size() | L1 | L2-LM | L1(*) | L1 (?)
3293 3293 # len(rawtext) | L2 | L2 | L2 | L2
3294 3294 # len(text) | L2 | L2 | L2 | L3
3295 3295 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3296 3296 #
3297 3297 # LM: length of metadata, depending on rawtext
3298 3298 # (*): not ideal, see comment in filelog.size
3299 3299 # (?): could be "- len(meta)" if the resolved content has
3300 3300 # rename metadata
3301 3301 #
3302 3302 # Checks needed to be done:
3303 3303 # 1. length check: L1 == L2, in all cases.
3304 3304 # 2. hash check: depending on flag processor, we may need to
3305 3305 # use either "text" (external), or "rawtext" (in revlog).
3306 3306
3307 3307 try:
3308 3308 skipflags = state.get(b'skipflags', 0)
3309 3309 if skipflags:
3310 3310 skipflags &= self.flags(rev)
3311 3311
3312 3312 _verify_revision(self, skipflags, state, node)
3313 3313
3314 3314 l1 = self.rawsize(rev)
3315 3315 l2 = len(self.rawdata(node))
3316 3316
3317 3317 if l1 != l2:
3318 3318 yield revlogproblem(
3319 3319 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3320 3320 node=node,
3321 3321 )
3322 3322
3323 3323 except error.CensoredNodeError:
3324 3324 if state[b'erroroncensored']:
3325 3325 yield revlogproblem(
3326 3326 error=_(b'censored file data'), node=node
3327 3327 )
3328 3328 state[b'skipread'].add(node)
3329 3329 except Exception as e:
3330 3330 yield revlogproblem(
3331 3331 error=_(b'unpacking %s: %s')
3332 3332 % (short(node), stringutil.forcebytestr(e)),
3333 3333 node=node,
3334 3334 )
3335 3335 state[b'skipread'].add(node)
3336 3336
3337 3337 def storageinfo(
3338 3338 self,
3339 3339 exclusivefiles=False,
3340 3340 sharedfiles=False,
3341 3341 revisionscount=False,
3342 3342 trackedsize=False,
3343 3343 storedsize=False,
3344 3344 ):
3345 3345 d = {}
3346 3346
3347 3347 if exclusivefiles:
3348 3348 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3349 3349 if not self._inline:
3350 3350 d[b'exclusivefiles'].append((self.opener, self._datafile))
3351 3351
3352 3352 if sharedfiles:
3353 3353 d[b'sharedfiles'] = []
3354 3354
3355 3355 if revisionscount:
3356 3356 d[b'revisionscount'] = len(self)
3357 3357
3358 3358 if trackedsize:
3359 3359 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3360 3360
3361 3361 if storedsize:
3362 3362 d[b'storedsize'] = sum(
3363 3363 self.opener.stat(path).st_size for path in self.files()
3364 3364 )
3365 3365
3366 3366 return d
3367 3367
3368 3368 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3369 3369 if not self.hassidedata:
3370 3370 return
3371 3371 # revlog formats with sidedata support does not support inline
3372 3372 assert not self._inline
3373 3373 if not helpers[1] and not helpers[2]:
3374 3374 # Nothing to generate or remove
3375 3375 return
3376 3376
3377 3377 new_entries = []
3378 3378 # append the new sidedata
3379 3379 with self._writing(transaction):
3380 3380 ifh, dfh = self._writinghandles
3381 3381 if self._docket is not None:
3382 3382 dfh.seek(self._docket.data_end, os.SEEK_SET)
3383 3383 else:
3384 3384 dfh.seek(0, os.SEEK_END)
3385 3385
3386 3386 current_offset = dfh.tell()
3387 3387 for rev in range(startrev, endrev + 1):
3388 3388 entry = self.index[rev]
3389 3389 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3390 3390 store=self,
3391 3391 sidedata_helpers=helpers,
3392 3392 sidedata={},
3393 3393 rev=rev,
3394 3394 )
3395 3395
3396 3396 serialized_sidedata = sidedatautil.serialize_sidedata(
3397 3397 new_sidedata
3398 3398 )
3399 3399
3400 3400 sidedata_compression_mode = COMP_MODE_INLINE
3401 3401 if serialized_sidedata and self.hassidedata:
3402 3402 sidedata_compression_mode = COMP_MODE_PLAIN
3403 3403 h, comp_sidedata = self.compress(serialized_sidedata)
3404 3404 if (
3405 3405 h != b'u'
3406 3406 and comp_sidedata[0] != b'\0'
3407 3407 and len(comp_sidedata) < len(serialized_sidedata)
3408 3408 ):
3409 3409 assert not h
3410 3410 if (
3411 3411 comp_sidedata[0]
3412 3412 == self._docket.default_compression_header
3413 3413 ):
3414 3414 sidedata_compression_mode = COMP_MODE_DEFAULT
3415 3415 serialized_sidedata = comp_sidedata
3416 3416 else:
3417 3417 sidedata_compression_mode = COMP_MODE_INLINE
3418 3418 serialized_sidedata = comp_sidedata
3419 3419 if entry[8] != 0 or entry[9] != 0:
3420 3420 # rewriting entries that already have sidedata is not
3421 3421 # supported yet, because it introduces garbage data in the
3422 3422 # revlog.
3423 3423 msg = b"rewriting existing sidedata is not supported yet"
3424 3424 raise error.Abort(msg)
3425 3425
3426 3426 # Apply (potential) flags to add and to remove after running
3427 3427 # the sidedata helpers
3428 3428 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3429 3429 entry_update = (
3430 3430 current_offset,
3431 3431 len(serialized_sidedata),
3432 3432 new_offset_flags,
3433 3433 sidedata_compression_mode,
3434 3434 )
3435 3435
3436 3436 # the sidedata computation might have move the file cursors around
3437 3437 dfh.seek(current_offset, os.SEEK_SET)
3438 3438 dfh.write(serialized_sidedata)
3439 3439 new_entries.append(entry_update)
3440 3440 current_offset += len(serialized_sidedata)
3441 3441 if self._docket is not None:
3442 3442 self._docket.data_end = dfh.tell()
3443 3443
3444 3444 # rewrite the new index entries
3445 3445 ifh.seek(startrev * self.index.entry_size)
3446 3446 for i, e in enumerate(new_entries):
3447 3447 rev = startrev + i
3448 3448 self.index.replace_sidedata_info(rev, *e)
3449 3449 packed = self.index.entry_binary(rev)
3450 3450 if rev == 0 and self._docket is None:
3451 3451 header = self._format_flags | self._format_version
3452 3452 header = self.index.pack_header(header)
3453 3453 packed = header + packed
3454 3454 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now