##// END OF EJS Templates
revlog: fix the naming scheme use by split temporary file...
marmoute -
r51707:4a3a9d96 stable
parent child Browse files
Show More
@@ -1,3527 +1,3530 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 @staticmethod
294 294 def is_inline_index(header_bytes):
295 295 header = INDEX_HEADER.unpack(header_bytes)[0]
296 296
297 297 _format_flags = header & ~0xFFFF
298 298 _format_version = header & 0xFFFF
299 299
300 300 features = FEATURES_BY_VERSION[_format_version]
301 301 return features[b'inline'](_format_flags)
302 302
303 303 def __init__(
304 304 self,
305 305 opener,
306 306 target,
307 307 radix,
308 308 postfix=None, # only exist for `tmpcensored` now
309 309 checkambig=False,
310 310 mmaplargeindex=False,
311 311 censorable=False,
312 312 upperboundcomp=None,
313 313 persistentnodemap=False,
314 314 concurrencychecker=None,
315 315 trypending=False,
316 316 try_split=False,
317 317 canonical_parent_order=True,
318 318 ):
319 319 """
320 320 create a revlog object
321 321
322 322 opener is a function that abstracts the file opening operation
323 323 and can be used to implement COW semantics or the like.
324 324
325 325 `target`: a (KIND, ID) tuple that identify the content stored in
326 326 this revlog. It help the rest of the code to understand what the revlog
327 327 is about without having to resort to heuristic and index filename
328 328 analysis. Note: that this must be reliably be set by normal code, but
329 329 that test, debug, or performance measurement code might not set this to
330 330 accurate value.
331 331 """
332 332 self.upperboundcomp = upperboundcomp
333 333
334 334 self.radix = radix
335 335
336 336 self._docket_file = None
337 337 self._indexfile = None
338 338 self._datafile = None
339 339 self._sidedatafile = None
340 340 self._nodemap_file = None
341 341 self.postfix = postfix
342 342 self._trypending = trypending
343 343 self._try_split = try_split
344 344 self.opener = opener
345 345 if persistentnodemap:
346 346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347 347
348 348 assert target[0] in ALL_KINDS
349 349 assert len(target) == 2
350 350 self.target = target
351 351 # When True, indexfile is opened with checkambig=True at writing, to
352 352 # avoid file stat ambiguity.
353 353 self._checkambig = checkambig
354 354 self._mmaplargeindex = mmaplargeindex
355 355 self._censorable = censorable
356 356 # 3-tuple of (node, rev, text) for a raw revision.
357 357 self._revisioncache = None
358 358 # Maps rev to chain base rev.
359 359 self._chainbasecache = util.lrucachedict(100)
360 360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 361 self._chunkcache = (0, b'')
362 362 # How much data to read and cache into the raw revlog data cache.
363 363 self._chunkcachesize = 65536
364 364 self._maxchainlen = None
365 365 self._deltabothparents = True
366 366 self._candidate_group_chunk_size = 0
367 367 self._debug_delta = False
368 368 self.index = None
369 369 self._docket = None
370 370 self._nodemap_docket = None
371 371 # Mapping of partial identifiers to full nodes.
372 372 self._pcache = {}
373 373 # Mapping of revision integer to full node.
374 374 self._compengine = b'zlib'
375 375 self._compengineopts = {}
376 376 self._maxdeltachainspan = -1
377 377 self._withsparseread = False
378 378 self._sparserevlog = False
379 379 self.hassidedata = False
380 380 self._srdensitythreshold = 0.50
381 381 self._srmingapsize = 262144
382 382
383 383 # other optionnals features
384 384
385 385 # might remove rank configuration once the computation has no impact
386 386 self._compute_rank = False
387 387
388 388 # Make copy of flag processors so each revlog instance can support
389 389 # custom flags.
390 390 self._flagprocessors = dict(flagutil.flagprocessors)
391 391
392 392 # 3-tuple of file handles being used for active writing.
393 393 self._writinghandles = None
394 394 # prevent nesting of addgroup
395 395 self._adding_group = None
396 396
397 397 self._loadindex()
398 398
399 399 self._concurrencychecker = concurrencychecker
400 400
401 401 # parent order is supposed to be semantically irrelevant, so we
402 402 # normally resort parents to ensure that the first parent is non-null,
403 403 # if there is a non-null parent at all.
404 404 # filelog abuses the parent order as flag to mark some instances of
405 405 # meta-encoded files, so allow it to disable this behavior.
406 406 self.canonical_parent_order = canonical_parent_order
407 407
408 408 def _init_opts(self):
409 409 """process options (from above/config) to setup associated default revlog mode
410 410
411 411 These values might be affected when actually reading on disk information.
412 412
413 413 The relevant values are returned for use in _loadindex().
414 414
415 415 * newversionflags:
416 416 version header to use if we need to create a new revlog
417 417
418 418 * mmapindexthreshold:
419 419 minimal index size for start to use mmap
420 420
421 421 * force_nodemap:
422 422 force the usage of a "development" version of the nodemap code
423 423 """
424 424 mmapindexthreshold = None
425 425 opts = self.opener.options
426 426
427 427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 428 new_header = CHANGELOGV2
429 429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 430 elif b'revlogv2' in opts:
431 431 new_header = REVLOGV2
432 432 elif b'revlogv1' in opts:
433 433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 434 if b'generaldelta' in opts:
435 435 new_header |= FLAG_GENERALDELTA
436 436 elif b'revlogv0' in self.opener.options:
437 437 new_header = REVLOGV0
438 438 else:
439 439 new_header = REVLOG_DEFAULT_VERSION
440 440
441 441 if b'chunkcachesize' in opts:
442 442 self._chunkcachesize = opts[b'chunkcachesize']
443 443 if b'maxchainlen' in opts:
444 444 self._maxchainlen = opts[b'maxchainlen']
445 445 if b'deltabothparents' in opts:
446 446 self._deltabothparents = opts[b'deltabothparents']
447 447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 448 if dps_cgds:
449 449 self._candidate_group_chunk_size = dps_cgds
450 450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 451 self._lazydeltabase = False
452 452 if self._lazydelta:
453 453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 454 if b'debug-delta' in opts:
455 455 self._debug_delta = opts[b'debug-delta']
456 456 if b'compengine' in opts:
457 457 self._compengine = opts[b'compengine']
458 458 if b'zlib.level' in opts:
459 459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 460 if b'zstd.level' in opts:
461 461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 462 if b'maxdeltachainspan' in opts:
463 463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 468 # sparse-revlog forces sparse-read
469 469 self._withsparseread = self._sparserevlog or withsparseread
470 470 if b'sparse-read-density-threshold' in opts:
471 471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 472 if b'sparse-read-min-gap-size' in opts:
473 473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 474 if opts.get(b'enableellipsis'):
475 475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476 476
477 477 # revlog v0 doesn't have flag processors
478 478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480 480
481 481 if self._chunkcachesize <= 0:
482 482 raise error.RevlogError(
483 483 _(b'revlog chunk cache size %r is not greater than 0')
484 484 % self._chunkcachesize
485 485 )
486 486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 487 raise error.RevlogError(
488 488 _(b'revlog chunk cache size %r is not a power of 2')
489 489 % self._chunkcachesize
490 490 )
491 491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 492 return new_header, mmapindexthreshold, force_nodemap
493 493
494 494 def _get_data(self, filepath, mmap_threshold, size=None):
495 495 """return a file content with or without mmap
496 496
497 497 If the file is missing return the empty string"""
498 498 try:
499 499 with self.opener(filepath) as fp:
500 500 if mmap_threshold is not None:
501 501 file_size = self.opener.fstat(fp).st_size
502 502 if file_size >= mmap_threshold:
503 503 if size is not None:
504 504 # avoid potentiel mmap crash
505 505 size = min(file_size, size)
506 506 # TODO: should .close() to release resources without
507 507 # relying on Python GC
508 508 if size is None:
509 509 return util.buffer(util.mmapread(fp))
510 510 else:
511 511 return util.buffer(util.mmapread(fp, size))
512 512 if size is None:
513 513 return fp.read()
514 514 else:
515 515 return fp.read(size)
516 516 except FileNotFoundError:
517 517 return b''
518 518
519 519 def get_streams(self, max_linkrev, force_inline=False):
520 520 n = len(self)
521 521 index = self.index
522 522 while n > 0:
523 523 linkrev = index[n - 1][4]
524 524 if linkrev < max_linkrev:
525 525 break
526 526 # note: this loop will rarely go through multiple iterations, since
527 527 # it only traverses commits created during the current streaming
528 528 # pull operation.
529 529 #
530 530 # If this become a problem, using a binary search should cap the
531 531 # runtime of this.
532 532 n = n - 1
533 533 if n == 0:
534 534 # no data to send
535 535 return []
536 536 index_size = n * index.entry_size
537 537 data_size = self.end(n - 1)
538 538
539 539 # XXX we might have been split (or stripped) since the object
540 540 # initialization, We need to close this race too, but having a way to
541 541 # pre-open the file we feed to the revlog and never closing them before
542 542 # we are done streaming.
543 543
544 544 if self._inline:
545 545
546 546 def get_stream():
547 547 with self._indexfp() as fp:
548 548 yield None
549 549 size = index_size + data_size
550 550 if size <= 65536:
551 551 yield fp.read(size)
552 552 else:
553 553 yield from util.filechunkiter(fp, limit=size)
554 554
555 555 inline_stream = get_stream()
556 556 next(inline_stream)
557 557 return [
558 558 (self._indexfile, inline_stream, index_size + data_size),
559 559 ]
560 560 elif force_inline:
561 561
562 562 def get_stream():
563 563 with self._datafp() as fp_d:
564 564 yield None
565 565
566 566 for rev in range(n):
567 567 idx = self.index.entry_binary(rev)
568 568 if rev == 0 and self._docket is None:
569 569 # re-inject the inline flag
570 570 header = self._format_flags
571 571 header |= self._format_version
572 572 header |= FLAG_INLINE_DATA
573 573 header = self.index.pack_header(header)
574 574 idx = header + idx
575 575 yield idx
576 576 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
577 577
578 578 inline_stream = get_stream()
579 579 next(inline_stream)
580 580 return [
581 581 (self._indexfile, inline_stream, index_size + data_size),
582 582 ]
583 583 else:
584 584
585 585 def get_index_stream():
586 586 with self._indexfp() as fp:
587 587 yield None
588 588 if index_size <= 65536:
589 589 yield fp.read(index_size)
590 590 else:
591 591 yield from util.filechunkiter(fp, limit=index_size)
592 592
593 593 def get_data_stream():
594 594 with self._datafp() as fp:
595 595 yield None
596 596 if data_size <= 65536:
597 597 yield fp.read(data_size)
598 598 else:
599 599 yield from util.filechunkiter(fp, limit=data_size)
600 600
601 601 index_stream = get_index_stream()
602 602 next(index_stream)
603 603 data_stream = get_data_stream()
604 604 next(data_stream)
605 605 return [
606 606 (self._datafile, data_stream, data_size),
607 607 (self._indexfile, index_stream, index_size),
608 608 ]
609 609
610 610 def _loadindex(self, docket=None):
611 611
612 612 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
613 613
614 614 if self.postfix is not None:
615 615 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
616 616 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
617 617 entry_point = b'%s.i.a' % self.radix
618 618 elif self._try_split and self.opener.exists(self._split_index_file):
619 619 entry_point = self._split_index_file
620 620 else:
621 621 entry_point = b'%s.i' % self.radix
622 622
623 623 if docket is not None:
624 624 self._docket = docket
625 625 self._docket_file = entry_point
626 626 else:
627 627 self._initempty = True
628 628 entry_data = self._get_data(entry_point, mmapindexthreshold)
629 629 if len(entry_data) > 0:
630 630 header = INDEX_HEADER.unpack(entry_data[:4])[0]
631 631 self._initempty = False
632 632 else:
633 633 header = new_header
634 634
635 635 self._format_flags = header & ~0xFFFF
636 636 self._format_version = header & 0xFFFF
637 637
638 638 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
639 639 if supported_flags is None:
640 640 msg = _(b'unknown version (%d) in revlog %s')
641 641 msg %= (self._format_version, self.display_id)
642 642 raise error.RevlogError(msg)
643 643 elif self._format_flags & ~supported_flags:
644 644 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
645 645 display_flag = self._format_flags >> 16
646 646 msg %= (display_flag, self._format_version, self.display_id)
647 647 raise error.RevlogError(msg)
648 648
649 649 features = FEATURES_BY_VERSION[self._format_version]
650 650 self._inline = features[b'inline'](self._format_flags)
651 651 self._generaldelta = features[b'generaldelta'](self._format_flags)
652 652 self.hassidedata = features[b'sidedata']
653 653
654 654 if not features[b'docket']:
655 655 self._indexfile = entry_point
656 656 index_data = entry_data
657 657 else:
658 658 self._docket_file = entry_point
659 659 if self._initempty:
660 660 self._docket = docketutil.default_docket(self, header)
661 661 else:
662 662 self._docket = docketutil.parse_docket(
663 663 self, entry_data, use_pending=self._trypending
664 664 )
665 665
666 666 if self._docket is not None:
667 667 self._indexfile = self._docket.index_filepath()
668 668 index_data = b''
669 669 index_size = self._docket.index_end
670 670 if index_size > 0:
671 671 index_data = self._get_data(
672 672 self._indexfile, mmapindexthreshold, size=index_size
673 673 )
674 674 if len(index_data) < index_size:
675 675 msg = _(b'too few index data for %s: got %d, expected %d')
676 676 msg %= (self.display_id, len(index_data), index_size)
677 677 raise error.RevlogError(msg)
678 678
679 679 self._inline = False
680 680 # generaldelta implied by version 2 revlogs.
681 681 self._generaldelta = True
682 682 # the logic for persistent nodemap will be dealt with within the
683 683 # main docket, so disable it for now.
684 684 self._nodemap_file = None
685 685
686 686 if self._docket is not None:
687 687 self._datafile = self._docket.data_filepath()
688 688 self._sidedatafile = self._docket.sidedata_filepath()
689 689 elif self.postfix is None:
690 690 self._datafile = b'%s.d' % self.radix
691 691 else:
692 692 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
693 693
694 694 self.nodeconstants = sha1nodeconstants
695 695 self.nullid = self.nodeconstants.nullid
696 696
697 697 # sparse-revlog can't be on without general-delta (issue6056)
698 698 if not self._generaldelta:
699 699 self._sparserevlog = False
700 700
701 701 self._storedeltachains = True
702 702
703 703 devel_nodemap = (
704 704 self._nodemap_file
705 705 and force_nodemap
706 706 and parse_index_v1_nodemap is not None
707 707 )
708 708
709 709 use_rust_index = False
710 710 if rustrevlog is not None:
711 711 if self._nodemap_file is not None:
712 712 use_rust_index = True
713 713 else:
714 714 use_rust_index = self.opener.options.get(b'rust.index')
715 715
716 716 self._parse_index = parse_index_v1
717 717 if self._format_version == REVLOGV0:
718 718 self._parse_index = revlogv0.parse_index_v0
719 719 elif self._format_version == REVLOGV2:
720 720 self._parse_index = parse_index_v2
721 721 elif self._format_version == CHANGELOGV2:
722 722 self._parse_index = parse_index_cl_v2
723 723 elif devel_nodemap:
724 724 self._parse_index = parse_index_v1_nodemap
725 725 elif use_rust_index:
726 726 self._parse_index = parse_index_v1_mixed
727 727 try:
728 728 d = self._parse_index(index_data, self._inline)
729 729 index, chunkcache = d
730 730 use_nodemap = (
731 731 not self._inline
732 732 and self._nodemap_file is not None
733 733 and util.safehasattr(index, 'update_nodemap_data')
734 734 )
735 735 if use_nodemap:
736 736 nodemap_data = nodemaputil.persisted_data(self)
737 737 if nodemap_data is not None:
738 738 docket = nodemap_data[0]
739 739 if (
740 740 len(d[0]) > docket.tip_rev
741 741 and d[0][docket.tip_rev][7] == docket.tip_node
742 742 ):
743 743 # no changelog tampering
744 744 self._nodemap_docket = docket
745 745 index.update_nodemap_data(*nodemap_data)
746 746 except (ValueError, IndexError):
747 747 raise error.RevlogError(
748 748 _(b"index %s is corrupted") % self.display_id
749 749 )
750 750 self.index = index
751 751 self._segmentfile = randomaccessfile.randomaccessfile(
752 752 self.opener,
753 753 (self._indexfile if self._inline else self._datafile),
754 754 self._chunkcachesize,
755 755 chunkcache,
756 756 )
757 757 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
758 758 self.opener,
759 759 self._sidedatafile,
760 760 self._chunkcachesize,
761 761 )
762 762 # revnum -> (chain-length, sum-delta-length)
763 763 self._chaininfocache = util.lrucachedict(500)
764 764 # revlog header -> revlog compressor
765 765 self._decompressors = {}
766 766
767 767 def get_revlog(self):
768 768 """simple function to mirror API of other not-really-revlog API"""
769 769 return self
770 770
771 771 @util.propertycache
772 772 def revlog_kind(self):
773 773 return self.target[0]
774 774
775 775 @util.propertycache
776 776 def display_id(self):
777 777 """The public facing "ID" of the revlog that we use in message"""
778 778 if self.revlog_kind == KIND_FILELOG:
779 779 # Reference the file without the "data/" prefix, so it is familiar
780 780 # to the user.
781 781 return self.target[1]
782 782 else:
783 783 return self.radix
784 784
785 785 def _get_decompressor(self, t):
786 786 try:
787 787 compressor = self._decompressors[t]
788 788 except KeyError:
789 789 try:
790 790 engine = util.compengines.forrevlogheader(t)
791 791 compressor = engine.revlogcompressor(self._compengineopts)
792 792 self._decompressors[t] = compressor
793 793 except KeyError:
794 794 raise error.RevlogError(
795 795 _(b'unknown compression type %s') % binascii.hexlify(t)
796 796 )
797 797 return compressor
798 798
799 799 @util.propertycache
800 800 def _compressor(self):
801 801 engine = util.compengines[self._compengine]
802 802 return engine.revlogcompressor(self._compengineopts)
803 803
804 804 @util.propertycache
805 805 def _decompressor(self):
806 806 """the default decompressor"""
807 807 if self._docket is None:
808 808 return None
809 809 t = self._docket.default_compression_header
810 810 c = self._get_decompressor(t)
811 811 return c.decompress
812 812
813 813 def _indexfp(self):
814 814 """file object for the revlog's index file"""
815 815 return self.opener(self._indexfile, mode=b"r")
816 816
817 817 def __index_write_fp(self):
818 818 # You should not use this directly and use `_writing` instead
819 819 try:
820 820 f = self.opener(
821 821 self._indexfile, mode=b"r+", checkambig=self._checkambig
822 822 )
823 823 if self._docket is None:
824 824 f.seek(0, os.SEEK_END)
825 825 else:
826 826 f.seek(self._docket.index_end, os.SEEK_SET)
827 827 return f
828 828 except FileNotFoundError:
829 829 return self.opener(
830 830 self._indexfile, mode=b"w+", checkambig=self._checkambig
831 831 )
832 832
833 833 def __index_new_fp(self):
834 834 # You should not use this unless you are upgrading from inline revlog
835 835 return self.opener(
836 836 self._indexfile,
837 837 mode=b"w",
838 838 checkambig=self._checkambig,
839 839 atomictemp=True,
840 840 )
841 841
842 842 def _datafp(self, mode=b'r'):
843 843 """file object for the revlog's data file"""
844 844 return self.opener(self._datafile, mode=mode)
845 845
846 846 @contextlib.contextmanager
847 847 def _sidedatareadfp(self):
848 848 """file object suitable to read sidedata"""
849 849 if self._writinghandles:
850 850 yield self._writinghandles[2]
851 851 else:
852 852 with self.opener(self._sidedatafile) as fp:
853 853 yield fp
854 854
855 855 def tiprev(self):
856 856 return len(self.index) - 1
857 857
858 858 def tip(self):
859 859 return self.node(self.tiprev())
860 860
861 861 def __contains__(self, rev):
862 862 return 0 <= rev < len(self)
863 863
864 864 def __len__(self):
865 865 return len(self.index)
866 866
867 867 def __iter__(self):
868 868 return iter(range(len(self)))
869 869
870 870 def revs(self, start=0, stop=None):
871 871 """iterate over all rev in this revlog (from start to stop)"""
872 872 return storageutil.iterrevs(len(self), start=start, stop=stop)
873 873
874 874 def hasnode(self, node):
875 875 try:
876 876 self.rev(node)
877 877 return True
878 878 except KeyError:
879 879 return False
880 880
881 881 def candelta(self, baserev, rev):
882 882 """whether two revisions (baserev, rev) can be delta-ed or not"""
883 883 # Disable delta if either rev requires a content-changing flag
884 884 # processor (ex. LFS). This is because such flag processor can alter
885 885 # the rawtext content that the delta will be based on, and two clients
886 886 # could have a same revlog node with different flags (i.e. different
887 887 # rawtext contents) and the delta could be incompatible.
888 888 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
889 889 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
890 890 ):
891 891 return False
892 892 return True
893 893
894 894 def update_caches(self, transaction):
895 895 if self._nodemap_file is not None:
896 896 if transaction is None:
897 897 nodemaputil.update_persistent_nodemap(self)
898 898 else:
899 899 nodemaputil.setup_persistent_nodemap(transaction, self)
900 900
901 901 def clearcaches(self):
902 902 self._revisioncache = None
903 903 self._chainbasecache.clear()
904 904 self._segmentfile.clear_cache()
905 905 self._segmentfile_sidedata.clear_cache()
906 906 self._pcache = {}
907 907 self._nodemap_docket = None
908 908 self.index.clearcaches()
909 909 # The python code is the one responsible for validating the docket, we
910 910 # end up having to refresh it here.
911 911 use_nodemap = (
912 912 not self._inline
913 913 and self._nodemap_file is not None
914 914 and util.safehasattr(self.index, 'update_nodemap_data')
915 915 )
916 916 if use_nodemap:
917 917 nodemap_data = nodemaputil.persisted_data(self)
918 918 if nodemap_data is not None:
919 919 self._nodemap_docket = nodemap_data[0]
920 920 self.index.update_nodemap_data(*nodemap_data)
921 921
922 922 def rev(self, node):
923 923 try:
924 924 return self.index.rev(node)
925 925 except TypeError:
926 926 raise
927 927 except error.RevlogError:
928 928 # parsers.c radix tree lookup failed
929 929 if (
930 930 node == self.nodeconstants.wdirid
931 931 or node in self.nodeconstants.wdirfilenodeids
932 932 ):
933 933 raise error.WdirUnsupported
934 934 raise error.LookupError(node, self.display_id, _(b'no node'))
935 935
936 936 # Accessors for index entries.
937 937
938 938 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
939 939 # are flags.
940 940 def start(self, rev):
941 941 return int(self.index[rev][0] >> 16)
942 942
943 943 def sidedata_cut_off(self, rev):
944 944 sd_cut_off = self.index[rev][8]
945 945 if sd_cut_off != 0:
946 946 return sd_cut_off
947 947 # This is some annoying dance, because entries without sidedata
948 948 # currently use 0 as their ofsset. (instead of previous-offset +
949 949 # previous-size)
950 950 #
951 951 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
952 952 # In the meantime, we need this.
953 953 while 0 <= rev:
954 954 e = self.index[rev]
955 955 if e[9] != 0:
956 956 return e[8] + e[9]
957 957 rev -= 1
958 958 return 0
959 959
960 960 def flags(self, rev):
961 961 return self.index[rev][0] & 0xFFFF
962 962
963 963 def length(self, rev):
964 964 return self.index[rev][1]
965 965
966 966 def sidedata_length(self, rev):
967 967 if not self.hassidedata:
968 968 return 0
969 969 return self.index[rev][9]
970 970
971 971 def rawsize(self, rev):
972 972 """return the length of the uncompressed text for a given revision"""
973 973 l = self.index[rev][2]
974 974 if l >= 0:
975 975 return l
976 976
977 977 t = self.rawdata(rev)
978 978 return len(t)
979 979
980 980 def size(self, rev):
981 981 """length of non-raw text (processed by a "read" flag processor)"""
982 982 # fast path: if no "read" flag processor could change the content,
983 983 # size is rawsize. note: ELLIPSIS is known to not change the content.
984 984 flags = self.flags(rev)
985 985 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
986 986 return self.rawsize(rev)
987 987
988 988 return len(self.revision(rev))
989 989
990 990 def fast_rank(self, rev):
991 991 """Return the rank of a revision if already known, or None otherwise.
992 992
993 993 The rank of a revision is the size of the sub-graph it defines as a
994 994 head. Equivalently, the rank of a revision `r` is the size of the set
995 995 `ancestors(r)`, `r` included.
996 996
997 997 This method returns the rank retrieved from the revlog in constant
998 998 time. It makes no attempt at computing unknown values for versions of
999 999 the revlog which do not persist the rank.
1000 1000 """
1001 1001 rank = self.index[rev][ENTRY_RANK]
1002 1002 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1003 1003 return None
1004 1004 if rev == nullrev:
1005 1005 return 0 # convention
1006 1006 return rank
1007 1007
1008 1008 def chainbase(self, rev):
1009 1009 base = self._chainbasecache.get(rev)
1010 1010 if base is not None:
1011 1011 return base
1012 1012
1013 1013 index = self.index
1014 1014 iterrev = rev
1015 1015 base = index[iterrev][3]
1016 1016 while base != iterrev:
1017 1017 iterrev = base
1018 1018 base = index[iterrev][3]
1019 1019
1020 1020 self._chainbasecache[rev] = base
1021 1021 return base
1022 1022
1023 1023 def linkrev(self, rev):
1024 1024 return self.index[rev][4]
1025 1025
1026 1026 def parentrevs(self, rev):
1027 1027 try:
1028 1028 entry = self.index[rev]
1029 1029 except IndexError:
1030 1030 if rev == wdirrev:
1031 1031 raise error.WdirUnsupported
1032 1032 raise
1033 1033
1034 1034 if self.canonical_parent_order and entry[5] == nullrev:
1035 1035 return entry[6], entry[5]
1036 1036 else:
1037 1037 return entry[5], entry[6]
1038 1038
1039 1039 # fast parentrevs(rev) where rev isn't filtered
1040 1040 _uncheckedparentrevs = parentrevs
1041 1041
1042 1042 def node(self, rev):
1043 1043 try:
1044 1044 return self.index[rev][7]
1045 1045 except IndexError:
1046 1046 if rev == wdirrev:
1047 1047 raise error.WdirUnsupported
1048 1048 raise
1049 1049
1050 1050 # Derived from index values.
1051 1051
1052 1052 def end(self, rev):
1053 1053 return self.start(rev) + self.length(rev)
1054 1054
1055 1055 def parents(self, node):
1056 1056 i = self.index
1057 1057 d = i[self.rev(node)]
1058 1058 # inline node() to avoid function call overhead
1059 1059 if self.canonical_parent_order and d[5] == self.nullid:
1060 1060 return i[d[6]][7], i[d[5]][7]
1061 1061 else:
1062 1062 return i[d[5]][7], i[d[6]][7]
1063 1063
1064 1064 def chainlen(self, rev):
1065 1065 return self._chaininfo(rev)[0]
1066 1066
1067 1067 def _chaininfo(self, rev):
1068 1068 chaininfocache = self._chaininfocache
1069 1069 if rev in chaininfocache:
1070 1070 return chaininfocache[rev]
1071 1071 index = self.index
1072 1072 generaldelta = self._generaldelta
1073 1073 iterrev = rev
1074 1074 e = index[iterrev]
1075 1075 clen = 0
1076 1076 compresseddeltalen = 0
1077 1077 while iterrev != e[3]:
1078 1078 clen += 1
1079 1079 compresseddeltalen += e[1]
1080 1080 if generaldelta:
1081 1081 iterrev = e[3]
1082 1082 else:
1083 1083 iterrev -= 1
1084 1084 if iterrev in chaininfocache:
1085 1085 t = chaininfocache[iterrev]
1086 1086 clen += t[0]
1087 1087 compresseddeltalen += t[1]
1088 1088 break
1089 1089 e = index[iterrev]
1090 1090 else:
1091 1091 # Add text length of base since decompressing that also takes
1092 1092 # work. For cache hits the length is already included.
1093 1093 compresseddeltalen += e[1]
1094 1094 r = (clen, compresseddeltalen)
1095 1095 chaininfocache[rev] = r
1096 1096 return r
1097 1097
1098 1098 def _deltachain(self, rev, stoprev=None):
1099 1099 """Obtain the delta chain for a revision.
1100 1100
1101 1101 ``stoprev`` specifies a revision to stop at. If not specified, we
1102 1102 stop at the base of the chain.
1103 1103
1104 1104 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1105 1105 revs in ascending order and ``stopped`` is a bool indicating whether
1106 1106 ``stoprev`` was hit.
1107 1107 """
1108 1108 # Try C implementation.
1109 1109 try:
1110 1110 return self.index.deltachain(rev, stoprev, self._generaldelta)
1111 1111 except AttributeError:
1112 1112 pass
1113 1113
1114 1114 chain = []
1115 1115
1116 1116 # Alias to prevent attribute lookup in tight loop.
1117 1117 index = self.index
1118 1118 generaldelta = self._generaldelta
1119 1119
1120 1120 iterrev = rev
1121 1121 e = index[iterrev]
1122 1122 while iterrev != e[3] and iterrev != stoprev:
1123 1123 chain.append(iterrev)
1124 1124 if generaldelta:
1125 1125 iterrev = e[3]
1126 1126 else:
1127 1127 iterrev -= 1
1128 1128 e = index[iterrev]
1129 1129
1130 1130 if iterrev == stoprev:
1131 1131 stopped = True
1132 1132 else:
1133 1133 chain.append(iterrev)
1134 1134 stopped = False
1135 1135
1136 1136 chain.reverse()
1137 1137 return chain, stopped
1138 1138
1139 1139 def ancestors(self, revs, stoprev=0, inclusive=False):
1140 1140 """Generate the ancestors of 'revs' in reverse revision order.
1141 1141 Does not generate revs lower than stoprev.
1142 1142
1143 1143 See the documentation for ancestor.lazyancestors for more details."""
1144 1144
1145 1145 # first, make sure start revisions aren't filtered
1146 1146 revs = list(revs)
1147 1147 checkrev = self.node
1148 1148 for r in revs:
1149 1149 checkrev(r)
1150 1150 # and we're sure ancestors aren't filtered as well
1151 1151
1152 1152 if rustancestor is not None and self.index.rust_ext_compat:
1153 1153 lazyancestors = rustancestor.LazyAncestors
1154 1154 arg = self.index
1155 1155 else:
1156 1156 lazyancestors = ancestor.lazyancestors
1157 1157 arg = self._uncheckedparentrevs
1158 1158 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1159 1159
1160 1160 def descendants(self, revs):
1161 1161 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1162 1162
1163 1163 def findcommonmissing(self, common=None, heads=None):
1164 1164 """Return a tuple of the ancestors of common and the ancestors of heads
1165 1165 that are not ancestors of common. In revset terminology, we return the
1166 1166 tuple:
1167 1167
1168 1168 ::common, (::heads) - (::common)
1169 1169
1170 1170 The list is sorted by revision number, meaning it is
1171 1171 topologically sorted.
1172 1172
1173 1173 'heads' and 'common' are both lists of node IDs. If heads is
1174 1174 not supplied, uses all of the revlog's heads. If common is not
1175 1175 supplied, uses nullid."""
1176 1176 if common is None:
1177 1177 common = [self.nullid]
1178 1178 if heads is None:
1179 1179 heads = self.heads()
1180 1180
1181 1181 common = [self.rev(n) for n in common]
1182 1182 heads = [self.rev(n) for n in heads]
1183 1183
1184 1184 # we want the ancestors, but inclusive
1185 1185 class lazyset:
1186 1186 def __init__(self, lazyvalues):
1187 1187 self.addedvalues = set()
1188 1188 self.lazyvalues = lazyvalues
1189 1189
1190 1190 def __contains__(self, value):
1191 1191 return value in self.addedvalues or value in self.lazyvalues
1192 1192
1193 1193 def __iter__(self):
1194 1194 added = self.addedvalues
1195 1195 for r in added:
1196 1196 yield r
1197 1197 for r in self.lazyvalues:
1198 1198 if not r in added:
1199 1199 yield r
1200 1200
1201 1201 def add(self, value):
1202 1202 self.addedvalues.add(value)
1203 1203
1204 1204 def update(self, values):
1205 1205 self.addedvalues.update(values)
1206 1206
1207 1207 has = lazyset(self.ancestors(common))
1208 1208 has.add(nullrev)
1209 1209 has.update(common)
1210 1210
1211 1211 # take all ancestors from heads that aren't in has
1212 1212 missing = set()
1213 1213 visit = collections.deque(r for r in heads if r not in has)
1214 1214 while visit:
1215 1215 r = visit.popleft()
1216 1216 if r in missing:
1217 1217 continue
1218 1218 else:
1219 1219 missing.add(r)
1220 1220 for p in self.parentrevs(r):
1221 1221 if p not in has:
1222 1222 visit.append(p)
1223 1223 missing = list(missing)
1224 1224 missing.sort()
1225 1225 return has, [self.node(miss) for miss in missing]
1226 1226
1227 1227 def incrementalmissingrevs(self, common=None):
1228 1228 """Return an object that can be used to incrementally compute the
1229 1229 revision numbers of the ancestors of arbitrary sets that are not
1230 1230 ancestors of common. This is an ancestor.incrementalmissingancestors
1231 1231 object.
1232 1232
1233 1233 'common' is a list of revision numbers. If common is not supplied, uses
1234 1234 nullrev.
1235 1235 """
1236 1236 if common is None:
1237 1237 common = [nullrev]
1238 1238
1239 1239 if rustancestor is not None and self.index.rust_ext_compat:
1240 1240 return rustancestor.MissingAncestors(self.index, common)
1241 1241 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1242 1242
1243 1243 def findmissingrevs(self, common=None, heads=None):
1244 1244 """Return the revision numbers of the ancestors of heads that
1245 1245 are not ancestors of common.
1246 1246
1247 1247 More specifically, return a list of revision numbers corresponding to
1248 1248 nodes N such that every N satisfies the following constraints:
1249 1249
1250 1250 1. N is an ancestor of some node in 'heads'
1251 1251 2. N is not an ancestor of any node in 'common'
1252 1252
1253 1253 The list is sorted by revision number, meaning it is
1254 1254 topologically sorted.
1255 1255
1256 1256 'heads' and 'common' are both lists of revision numbers. If heads is
1257 1257 not supplied, uses all of the revlog's heads. If common is not
1258 1258 supplied, uses nullid."""
1259 1259 if common is None:
1260 1260 common = [nullrev]
1261 1261 if heads is None:
1262 1262 heads = self.headrevs()
1263 1263
1264 1264 inc = self.incrementalmissingrevs(common=common)
1265 1265 return inc.missingancestors(heads)
1266 1266
1267 1267 def findmissing(self, common=None, heads=None):
1268 1268 """Return the ancestors of heads that are not ancestors of common.
1269 1269
1270 1270 More specifically, return a list of nodes N such that every N
1271 1271 satisfies the following constraints:
1272 1272
1273 1273 1. N is an ancestor of some node in 'heads'
1274 1274 2. N is not an ancestor of any node in 'common'
1275 1275
1276 1276 The list is sorted by revision number, meaning it is
1277 1277 topologically sorted.
1278 1278
1279 1279 'heads' and 'common' are both lists of node IDs. If heads is
1280 1280 not supplied, uses all of the revlog's heads. If common is not
1281 1281 supplied, uses nullid."""
1282 1282 if common is None:
1283 1283 common = [self.nullid]
1284 1284 if heads is None:
1285 1285 heads = self.heads()
1286 1286
1287 1287 common = [self.rev(n) for n in common]
1288 1288 heads = [self.rev(n) for n in heads]
1289 1289
1290 1290 inc = self.incrementalmissingrevs(common=common)
1291 1291 return [self.node(r) for r in inc.missingancestors(heads)]
1292 1292
1293 1293 def nodesbetween(self, roots=None, heads=None):
1294 1294 """Return a topological path from 'roots' to 'heads'.
1295 1295
1296 1296 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1297 1297 topologically sorted list of all nodes N that satisfy both of
1298 1298 these constraints:
1299 1299
1300 1300 1. N is a descendant of some node in 'roots'
1301 1301 2. N is an ancestor of some node in 'heads'
1302 1302
1303 1303 Every node is considered to be both a descendant and an ancestor
1304 1304 of itself, so every reachable node in 'roots' and 'heads' will be
1305 1305 included in 'nodes'.
1306 1306
1307 1307 'outroots' is the list of reachable nodes in 'roots', i.e., the
1308 1308 subset of 'roots' that is returned in 'nodes'. Likewise,
1309 1309 'outheads' is the subset of 'heads' that is also in 'nodes'.
1310 1310
1311 1311 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1312 1312 unspecified, uses nullid as the only root. If 'heads' is
1313 1313 unspecified, uses list of all of the revlog's heads."""
1314 1314 nonodes = ([], [], [])
1315 1315 if roots is not None:
1316 1316 roots = list(roots)
1317 1317 if not roots:
1318 1318 return nonodes
1319 1319 lowestrev = min([self.rev(n) for n in roots])
1320 1320 else:
1321 1321 roots = [self.nullid] # Everybody's a descendant of nullid
1322 1322 lowestrev = nullrev
1323 1323 if (lowestrev == nullrev) and (heads is None):
1324 1324 # We want _all_ the nodes!
1325 1325 return (
1326 1326 [self.node(r) for r in self],
1327 1327 [self.nullid],
1328 1328 list(self.heads()),
1329 1329 )
1330 1330 if heads is None:
1331 1331 # All nodes are ancestors, so the latest ancestor is the last
1332 1332 # node.
1333 1333 highestrev = len(self) - 1
1334 1334 # Set ancestors to None to signal that every node is an ancestor.
1335 1335 ancestors = None
1336 1336 # Set heads to an empty dictionary for later discovery of heads
1337 1337 heads = {}
1338 1338 else:
1339 1339 heads = list(heads)
1340 1340 if not heads:
1341 1341 return nonodes
1342 1342 ancestors = set()
1343 1343 # Turn heads into a dictionary so we can remove 'fake' heads.
1344 1344 # Also, later we will be using it to filter out the heads we can't
1345 1345 # find from roots.
1346 1346 heads = dict.fromkeys(heads, False)
1347 1347 # Start at the top and keep marking parents until we're done.
1348 1348 nodestotag = set(heads)
1349 1349 # Remember where the top was so we can use it as a limit later.
1350 1350 highestrev = max([self.rev(n) for n in nodestotag])
1351 1351 while nodestotag:
1352 1352 # grab a node to tag
1353 1353 n = nodestotag.pop()
1354 1354 # Never tag nullid
1355 1355 if n == self.nullid:
1356 1356 continue
1357 1357 # A node's revision number represents its place in a
1358 1358 # topologically sorted list of nodes.
1359 1359 r = self.rev(n)
1360 1360 if r >= lowestrev:
1361 1361 if n not in ancestors:
1362 1362 # If we are possibly a descendant of one of the roots
1363 1363 # and we haven't already been marked as an ancestor
1364 1364 ancestors.add(n) # Mark as ancestor
1365 1365 # Add non-nullid parents to list of nodes to tag.
1366 1366 nodestotag.update(
1367 1367 [p for p in self.parents(n) if p != self.nullid]
1368 1368 )
1369 1369 elif n in heads: # We've seen it before, is it a fake head?
1370 1370 # So it is, real heads should not be the ancestors of
1371 1371 # any other heads.
1372 1372 heads.pop(n)
1373 1373 if not ancestors:
1374 1374 return nonodes
1375 1375 # Now that we have our set of ancestors, we want to remove any
1376 1376 # roots that are not ancestors.
1377 1377
1378 1378 # If one of the roots was nullid, everything is included anyway.
1379 1379 if lowestrev > nullrev:
1380 1380 # But, since we weren't, let's recompute the lowest rev to not
1381 1381 # include roots that aren't ancestors.
1382 1382
1383 1383 # Filter out roots that aren't ancestors of heads
1384 1384 roots = [root for root in roots if root in ancestors]
1385 1385 # Recompute the lowest revision
1386 1386 if roots:
1387 1387 lowestrev = min([self.rev(root) for root in roots])
1388 1388 else:
1389 1389 # No more roots? Return empty list
1390 1390 return nonodes
1391 1391 else:
1392 1392 # We are descending from nullid, and don't need to care about
1393 1393 # any other roots.
1394 1394 lowestrev = nullrev
1395 1395 roots = [self.nullid]
1396 1396 # Transform our roots list into a set.
1397 1397 descendants = set(roots)
1398 1398 # Also, keep the original roots so we can filter out roots that aren't
1399 1399 # 'real' roots (i.e. are descended from other roots).
1400 1400 roots = descendants.copy()
1401 1401 # Our topologically sorted list of output nodes.
1402 1402 orderedout = []
1403 1403 # Don't start at nullid since we don't want nullid in our output list,
1404 1404 # and if nullid shows up in descendants, empty parents will look like
1405 1405 # they're descendants.
1406 1406 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1407 1407 n = self.node(r)
1408 1408 isdescendant = False
1409 1409 if lowestrev == nullrev: # Everybody is a descendant of nullid
1410 1410 isdescendant = True
1411 1411 elif n in descendants:
1412 1412 # n is already a descendant
1413 1413 isdescendant = True
1414 1414 # This check only needs to be done here because all the roots
1415 1415 # will start being marked is descendants before the loop.
1416 1416 if n in roots:
1417 1417 # If n was a root, check if it's a 'real' root.
1418 1418 p = tuple(self.parents(n))
1419 1419 # If any of its parents are descendants, it's not a root.
1420 1420 if (p[0] in descendants) or (p[1] in descendants):
1421 1421 roots.remove(n)
1422 1422 else:
1423 1423 p = tuple(self.parents(n))
1424 1424 # A node is a descendant if either of its parents are
1425 1425 # descendants. (We seeded the dependents list with the roots
1426 1426 # up there, remember?)
1427 1427 if (p[0] in descendants) or (p[1] in descendants):
1428 1428 descendants.add(n)
1429 1429 isdescendant = True
1430 1430 if isdescendant and ((ancestors is None) or (n in ancestors)):
1431 1431 # Only include nodes that are both descendants and ancestors.
1432 1432 orderedout.append(n)
1433 1433 if (ancestors is not None) and (n in heads):
1434 1434 # We're trying to figure out which heads are reachable
1435 1435 # from roots.
1436 1436 # Mark this head as having been reached
1437 1437 heads[n] = True
1438 1438 elif ancestors is None:
1439 1439 # Otherwise, we're trying to discover the heads.
1440 1440 # Assume this is a head because if it isn't, the next step
1441 1441 # will eventually remove it.
1442 1442 heads[n] = True
1443 1443 # But, obviously its parents aren't.
1444 1444 for p in self.parents(n):
1445 1445 heads.pop(p, None)
1446 1446 heads = [head for head, flag in heads.items() if flag]
1447 1447 roots = list(roots)
1448 1448 assert orderedout
1449 1449 assert roots
1450 1450 assert heads
1451 1451 return (orderedout, roots, heads)
1452 1452
1453 1453 def headrevs(self, revs=None):
1454 1454 if revs is None:
1455 1455 try:
1456 1456 return self.index.headrevs()
1457 1457 except AttributeError:
1458 1458 return self._headrevs()
1459 1459 if rustdagop is not None and self.index.rust_ext_compat:
1460 1460 return rustdagop.headrevs(self.index, revs)
1461 1461 return dagop.headrevs(revs, self._uncheckedparentrevs)
1462 1462
1463 1463 def computephases(self, roots):
1464 1464 return self.index.computephasesmapsets(roots)
1465 1465
1466 1466 def _headrevs(self):
1467 1467 count = len(self)
1468 1468 if not count:
1469 1469 return [nullrev]
1470 1470 # we won't iter over filtered rev so nobody is a head at start
1471 1471 ishead = [0] * (count + 1)
1472 1472 index = self.index
1473 1473 for r in self:
1474 1474 ishead[r] = 1 # I may be an head
1475 1475 e = index[r]
1476 1476 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1477 1477 return [r for r, val in enumerate(ishead) if val]
1478 1478
1479 1479 def heads(self, start=None, stop=None):
1480 1480 """return the list of all nodes that have no children
1481 1481
1482 1482 if start is specified, only heads that are descendants of
1483 1483 start will be returned
1484 1484 if stop is specified, it will consider all the revs from stop
1485 1485 as if they had no children
1486 1486 """
1487 1487 if start is None and stop is None:
1488 1488 if not len(self):
1489 1489 return [self.nullid]
1490 1490 return [self.node(r) for r in self.headrevs()]
1491 1491
1492 1492 if start is None:
1493 1493 start = nullrev
1494 1494 else:
1495 1495 start = self.rev(start)
1496 1496
1497 1497 stoprevs = {self.rev(n) for n in stop or []}
1498 1498
1499 1499 revs = dagop.headrevssubset(
1500 1500 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1501 1501 )
1502 1502
1503 1503 return [self.node(rev) for rev in revs]
1504 1504
1505 1505 def children(self, node):
1506 1506 """find the children of a given node"""
1507 1507 c = []
1508 1508 p = self.rev(node)
1509 1509 for r in self.revs(start=p + 1):
1510 1510 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1511 1511 if prevs:
1512 1512 for pr in prevs:
1513 1513 if pr == p:
1514 1514 c.append(self.node(r))
1515 1515 elif p == nullrev:
1516 1516 c.append(self.node(r))
1517 1517 return c
1518 1518
1519 1519 def commonancestorsheads(self, a, b):
1520 1520 """calculate all the heads of the common ancestors of nodes a and b"""
1521 1521 a, b = self.rev(a), self.rev(b)
1522 1522 ancs = self._commonancestorsheads(a, b)
1523 1523 return pycompat.maplist(self.node, ancs)
1524 1524
1525 1525 def _commonancestorsheads(self, *revs):
1526 1526 """calculate all the heads of the common ancestors of revs"""
1527 1527 try:
1528 1528 ancs = self.index.commonancestorsheads(*revs)
1529 1529 except (AttributeError, OverflowError): # C implementation failed
1530 1530 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1531 1531 return ancs
1532 1532
1533 1533 def isancestor(self, a, b):
1534 1534 """return True if node a is an ancestor of node b
1535 1535
1536 1536 A revision is considered an ancestor of itself."""
1537 1537 a, b = self.rev(a), self.rev(b)
1538 1538 return self.isancestorrev(a, b)
1539 1539
1540 1540 def isancestorrev(self, a, b):
1541 1541 """return True if revision a is an ancestor of revision b
1542 1542
1543 1543 A revision is considered an ancestor of itself.
1544 1544
1545 1545 The implementation of this is trivial but the use of
1546 1546 reachableroots is not."""
1547 1547 if a == nullrev:
1548 1548 return True
1549 1549 elif a == b:
1550 1550 return True
1551 1551 elif a > b:
1552 1552 return False
1553 1553 return bool(self.reachableroots(a, [b], [a], includepath=False))
1554 1554
1555 1555 def reachableroots(self, minroot, heads, roots, includepath=False):
1556 1556 """return (heads(::(<roots> and <roots>::<heads>)))
1557 1557
1558 1558 If includepath is True, return (<roots>::<heads>)."""
1559 1559 try:
1560 1560 return self.index.reachableroots2(
1561 1561 minroot, heads, roots, includepath
1562 1562 )
1563 1563 except AttributeError:
1564 1564 return dagop._reachablerootspure(
1565 1565 self.parentrevs, minroot, roots, heads, includepath
1566 1566 )
1567 1567
1568 1568 def ancestor(self, a, b):
1569 1569 """calculate the "best" common ancestor of nodes a and b"""
1570 1570
1571 1571 a, b = self.rev(a), self.rev(b)
1572 1572 try:
1573 1573 ancs = self.index.ancestors(a, b)
1574 1574 except (AttributeError, OverflowError):
1575 1575 ancs = ancestor.ancestors(self.parentrevs, a, b)
1576 1576 if ancs:
1577 1577 # choose a consistent winner when there's a tie
1578 1578 return min(map(self.node, ancs))
1579 1579 return self.nullid
1580 1580
1581 1581 def _match(self, id):
1582 1582 if isinstance(id, int):
1583 1583 # rev
1584 1584 return self.node(id)
1585 1585 if len(id) == self.nodeconstants.nodelen:
1586 1586 # possibly a binary node
1587 1587 # odds of a binary node being all hex in ASCII are 1 in 10**25
1588 1588 try:
1589 1589 node = id
1590 1590 self.rev(node) # quick search the index
1591 1591 return node
1592 1592 except error.LookupError:
1593 1593 pass # may be partial hex id
1594 1594 try:
1595 1595 # str(rev)
1596 1596 rev = int(id)
1597 1597 if b"%d" % rev != id:
1598 1598 raise ValueError
1599 1599 if rev < 0:
1600 1600 rev = len(self) + rev
1601 1601 if rev < 0 or rev >= len(self):
1602 1602 raise ValueError
1603 1603 return self.node(rev)
1604 1604 except (ValueError, OverflowError):
1605 1605 pass
1606 1606 if len(id) == 2 * self.nodeconstants.nodelen:
1607 1607 try:
1608 1608 # a full hex nodeid?
1609 1609 node = bin(id)
1610 1610 self.rev(node)
1611 1611 return node
1612 1612 except (binascii.Error, error.LookupError):
1613 1613 pass
1614 1614
1615 1615 def _partialmatch(self, id):
1616 1616 # we don't care wdirfilenodeids as they should be always full hash
1617 1617 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1618 1618 ambiguous = False
1619 1619 try:
1620 1620 partial = self.index.partialmatch(id)
1621 1621 if partial and self.hasnode(partial):
1622 1622 if maybewdir:
1623 1623 # single 'ff...' match in radix tree, ambiguous with wdir
1624 1624 ambiguous = True
1625 1625 else:
1626 1626 return partial
1627 1627 elif maybewdir:
1628 1628 # no 'ff...' match in radix tree, wdir identified
1629 1629 raise error.WdirUnsupported
1630 1630 else:
1631 1631 return None
1632 1632 except error.RevlogError:
1633 1633 # parsers.c radix tree lookup gave multiple matches
1634 1634 # fast path: for unfiltered changelog, radix tree is accurate
1635 1635 if not getattr(self, 'filteredrevs', None):
1636 1636 ambiguous = True
1637 1637 # fall through to slow path that filters hidden revisions
1638 1638 except (AttributeError, ValueError):
1639 1639 # we are pure python, or key is not hex
1640 1640 pass
1641 1641 if ambiguous:
1642 1642 raise error.AmbiguousPrefixLookupError(
1643 1643 id, self.display_id, _(b'ambiguous identifier')
1644 1644 )
1645 1645
1646 1646 if id in self._pcache:
1647 1647 return self._pcache[id]
1648 1648
1649 1649 if len(id) <= 40:
1650 1650 # hex(node)[:...]
1651 1651 l = len(id) // 2 * 2 # grab an even number of digits
1652 1652 try:
1653 1653 # we're dropping the last digit, so let's check that it's hex,
1654 1654 # to avoid the expensive computation below if it's not
1655 1655 if len(id) % 2 > 0:
1656 1656 if not (id[-1] in hexdigits):
1657 1657 return None
1658 1658 prefix = bin(id[:l])
1659 1659 except binascii.Error:
1660 1660 pass
1661 1661 else:
1662 1662 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1663 1663 nl = [
1664 1664 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1665 1665 ]
1666 1666 if self.nodeconstants.nullhex.startswith(id):
1667 1667 nl.append(self.nullid)
1668 1668 if len(nl) > 0:
1669 1669 if len(nl) == 1 and not maybewdir:
1670 1670 self._pcache[id] = nl[0]
1671 1671 return nl[0]
1672 1672 raise error.AmbiguousPrefixLookupError(
1673 1673 id, self.display_id, _(b'ambiguous identifier')
1674 1674 )
1675 1675 if maybewdir:
1676 1676 raise error.WdirUnsupported
1677 1677 return None
1678 1678
1679 1679 def lookup(self, id):
1680 1680 """locate a node based on:
1681 1681 - revision number or str(revision number)
1682 1682 - nodeid or subset of hex nodeid
1683 1683 """
1684 1684 n = self._match(id)
1685 1685 if n is not None:
1686 1686 return n
1687 1687 n = self._partialmatch(id)
1688 1688 if n:
1689 1689 return n
1690 1690
1691 1691 raise error.LookupError(id, self.display_id, _(b'no match found'))
1692 1692
1693 1693 def shortest(self, node, minlength=1):
1694 1694 """Find the shortest unambiguous prefix that matches node."""
1695 1695
1696 1696 def isvalid(prefix):
1697 1697 try:
1698 1698 matchednode = self._partialmatch(prefix)
1699 1699 except error.AmbiguousPrefixLookupError:
1700 1700 return False
1701 1701 except error.WdirUnsupported:
1702 1702 # single 'ff...' match
1703 1703 return True
1704 1704 if matchednode is None:
1705 1705 raise error.LookupError(node, self.display_id, _(b'no node'))
1706 1706 return True
1707 1707
1708 1708 def maybewdir(prefix):
1709 1709 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1710 1710
1711 1711 hexnode = hex(node)
1712 1712
1713 1713 def disambiguate(hexnode, minlength):
1714 1714 """Disambiguate against wdirid."""
1715 1715 for length in range(minlength, len(hexnode) + 1):
1716 1716 prefix = hexnode[:length]
1717 1717 if not maybewdir(prefix):
1718 1718 return prefix
1719 1719
1720 1720 if not getattr(self, 'filteredrevs', None):
1721 1721 try:
1722 1722 length = max(self.index.shortest(node), minlength)
1723 1723 return disambiguate(hexnode, length)
1724 1724 except error.RevlogError:
1725 1725 if node != self.nodeconstants.wdirid:
1726 1726 raise error.LookupError(
1727 1727 node, self.display_id, _(b'no node')
1728 1728 )
1729 1729 except AttributeError:
1730 1730 # Fall through to pure code
1731 1731 pass
1732 1732
1733 1733 if node == self.nodeconstants.wdirid:
1734 1734 for length in range(minlength, len(hexnode) + 1):
1735 1735 prefix = hexnode[:length]
1736 1736 if isvalid(prefix):
1737 1737 return prefix
1738 1738
1739 1739 for length in range(minlength, len(hexnode) + 1):
1740 1740 prefix = hexnode[:length]
1741 1741 if isvalid(prefix):
1742 1742 return disambiguate(hexnode, length)
1743 1743
1744 1744 def cmp(self, node, text):
1745 1745 """compare text with a given file revision
1746 1746
1747 1747 returns True if text is different than what is stored.
1748 1748 """
1749 1749 p1, p2 = self.parents(node)
1750 1750 return storageutil.hashrevisionsha1(text, p1, p2) != node
1751 1751
1752 1752 def _getsegmentforrevs(self, startrev, endrev, df=None):
1753 1753 """Obtain a segment of raw data corresponding to a range of revisions.
1754 1754
1755 1755 Accepts the start and end revisions and an optional already-open
1756 1756 file handle to be used for reading. If the file handle is read, its
1757 1757 seek position will not be preserved.
1758 1758
1759 1759 Requests for data may be satisfied by a cache.
1760 1760
1761 1761 Returns a 2-tuple of (offset, data) for the requested range of
1762 1762 revisions. Offset is the integer offset from the beginning of the
1763 1763 revlog and data is a str or buffer of the raw byte data.
1764 1764
1765 1765 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1766 1766 to determine where each revision's data begins and ends.
1767 1767 """
1768 1768 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1769 1769 # (functions are expensive).
1770 1770 index = self.index
1771 1771 istart = index[startrev]
1772 1772 start = int(istart[0] >> 16)
1773 1773 if startrev == endrev:
1774 1774 end = start + istart[1]
1775 1775 else:
1776 1776 iend = index[endrev]
1777 1777 end = int(iend[0] >> 16) + iend[1]
1778 1778
1779 1779 if self._inline:
1780 1780 start += (startrev + 1) * self.index.entry_size
1781 1781 end += (endrev + 1) * self.index.entry_size
1782 1782 length = end - start
1783 1783
1784 1784 return start, self._segmentfile.read_chunk(start, length, df)
1785 1785
1786 1786 def _chunk(self, rev, df=None):
1787 1787 """Obtain a single decompressed chunk for a revision.
1788 1788
1789 1789 Accepts an integer revision and an optional already-open file handle
1790 1790 to be used for reading. If used, the seek position of the file will not
1791 1791 be preserved.
1792 1792
1793 1793 Returns a str holding uncompressed data for the requested revision.
1794 1794 """
1795 1795 compression_mode = self.index[rev][10]
1796 1796 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1797 1797 if compression_mode == COMP_MODE_PLAIN:
1798 1798 return data
1799 1799 elif compression_mode == COMP_MODE_DEFAULT:
1800 1800 return self._decompressor(data)
1801 1801 elif compression_mode == COMP_MODE_INLINE:
1802 1802 return self.decompress(data)
1803 1803 else:
1804 1804 msg = b'unknown compression mode %d'
1805 1805 msg %= compression_mode
1806 1806 raise error.RevlogError(msg)
1807 1807
1808 1808 def _chunks(self, revs, df=None, targetsize=None):
1809 1809 """Obtain decompressed chunks for the specified revisions.
1810 1810
1811 1811 Accepts an iterable of numeric revisions that are assumed to be in
1812 1812 ascending order. Also accepts an optional already-open file handle
1813 1813 to be used for reading. If used, the seek position of the file will
1814 1814 not be preserved.
1815 1815
1816 1816 This function is similar to calling ``self._chunk()`` multiple times,
1817 1817 but is faster.
1818 1818
1819 1819 Returns a list with decompressed data for each requested revision.
1820 1820 """
1821 1821 if not revs:
1822 1822 return []
1823 1823 start = self.start
1824 1824 length = self.length
1825 1825 inline = self._inline
1826 1826 iosize = self.index.entry_size
1827 1827 buffer = util.buffer
1828 1828
1829 1829 l = []
1830 1830 ladd = l.append
1831 1831
1832 1832 if not self._withsparseread:
1833 1833 slicedchunks = (revs,)
1834 1834 else:
1835 1835 slicedchunks = deltautil.slicechunk(
1836 1836 self, revs, targetsize=targetsize
1837 1837 )
1838 1838
1839 1839 for revschunk in slicedchunks:
1840 1840 firstrev = revschunk[0]
1841 1841 # Skip trailing revisions with empty diff
1842 1842 for lastrev in revschunk[::-1]:
1843 1843 if length(lastrev) != 0:
1844 1844 break
1845 1845
1846 1846 try:
1847 1847 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1848 1848 except OverflowError:
1849 1849 # issue4215 - we can't cache a run of chunks greater than
1850 1850 # 2G on Windows
1851 1851 return [self._chunk(rev, df=df) for rev in revschunk]
1852 1852
1853 1853 decomp = self.decompress
1854 1854 # self._decompressor might be None, but will not be used in that case
1855 1855 def_decomp = self._decompressor
1856 1856 for rev in revschunk:
1857 1857 chunkstart = start(rev)
1858 1858 if inline:
1859 1859 chunkstart += (rev + 1) * iosize
1860 1860 chunklength = length(rev)
1861 1861 comp_mode = self.index[rev][10]
1862 1862 c = buffer(data, chunkstart - offset, chunklength)
1863 1863 if comp_mode == COMP_MODE_PLAIN:
1864 1864 ladd(c)
1865 1865 elif comp_mode == COMP_MODE_INLINE:
1866 1866 ladd(decomp(c))
1867 1867 elif comp_mode == COMP_MODE_DEFAULT:
1868 1868 ladd(def_decomp(c))
1869 1869 else:
1870 1870 msg = b'unknown compression mode %d'
1871 1871 msg %= comp_mode
1872 1872 raise error.RevlogError(msg)
1873 1873
1874 1874 return l
1875 1875
1876 1876 def deltaparent(self, rev):
1877 1877 """return deltaparent of the given revision"""
1878 1878 base = self.index[rev][3]
1879 1879 if base == rev:
1880 1880 return nullrev
1881 1881 elif self._generaldelta:
1882 1882 return base
1883 1883 else:
1884 1884 return rev - 1
1885 1885
1886 1886 def issnapshot(self, rev):
1887 1887 """tells whether rev is a snapshot"""
1888 1888 if not self._sparserevlog:
1889 1889 return self.deltaparent(rev) == nullrev
1890 1890 elif util.safehasattr(self.index, 'issnapshot'):
1891 1891 # directly assign the method to cache the testing and access
1892 1892 self.issnapshot = self.index.issnapshot
1893 1893 return self.issnapshot(rev)
1894 1894 if rev == nullrev:
1895 1895 return True
1896 1896 entry = self.index[rev]
1897 1897 base = entry[3]
1898 1898 if base == rev:
1899 1899 return True
1900 1900 if base == nullrev:
1901 1901 return True
1902 1902 p1 = entry[5]
1903 1903 while self.length(p1) == 0:
1904 1904 b = self.deltaparent(p1)
1905 1905 if b == p1:
1906 1906 break
1907 1907 p1 = b
1908 1908 p2 = entry[6]
1909 1909 while self.length(p2) == 0:
1910 1910 b = self.deltaparent(p2)
1911 1911 if b == p2:
1912 1912 break
1913 1913 p2 = b
1914 1914 if base == p1 or base == p2:
1915 1915 return False
1916 1916 return self.issnapshot(base)
1917 1917
1918 1918 def snapshotdepth(self, rev):
1919 1919 """number of snapshot in the chain before this one"""
1920 1920 if not self.issnapshot(rev):
1921 1921 raise error.ProgrammingError(b'revision %d not a snapshot')
1922 1922 return len(self._deltachain(rev)[0]) - 1
1923 1923
1924 1924 def revdiff(self, rev1, rev2):
1925 1925 """return or calculate a delta between two revisions
1926 1926
1927 1927 The delta calculated is in binary form and is intended to be written to
1928 1928 revlog data directly. So this function needs raw revision data.
1929 1929 """
1930 1930 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1931 1931 return bytes(self._chunk(rev2))
1932 1932
1933 1933 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1934 1934
1935 1935 def revision(self, nodeorrev, _df=None):
1936 1936 """return an uncompressed revision of a given node or revision
1937 1937 number.
1938 1938
1939 1939 _df - an existing file handle to read from. (internal-only)
1940 1940 """
1941 1941 return self._revisiondata(nodeorrev, _df)
1942 1942
1943 1943 def sidedata(self, nodeorrev, _df=None):
1944 1944 """a map of extra data related to the changeset but not part of the hash
1945 1945
1946 1946 This function currently return a dictionary. However, more advanced
1947 1947 mapping object will likely be used in the future for a more
1948 1948 efficient/lazy code.
1949 1949 """
1950 1950 # deal with <nodeorrev> argument type
1951 1951 if isinstance(nodeorrev, int):
1952 1952 rev = nodeorrev
1953 1953 else:
1954 1954 rev = self.rev(nodeorrev)
1955 1955 return self._sidedata(rev)
1956 1956
1957 1957 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1958 1958 # deal with <nodeorrev> argument type
1959 1959 if isinstance(nodeorrev, int):
1960 1960 rev = nodeorrev
1961 1961 node = self.node(rev)
1962 1962 else:
1963 1963 node = nodeorrev
1964 1964 rev = None
1965 1965
1966 1966 # fast path the special `nullid` rev
1967 1967 if node == self.nullid:
1968 1968 return b""
1969 1969
1970 1970 # ``rawtext`` is the text as stored inside the revlog. Might be the
1971 1971 # revision or might need to be processed to retrieve the revision.
1972 1972 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1973 1973
1974 1974 if raw and validated:
1975 1975 # if we don't want to process the raw text and that raw
1976 1976 # text is cached, we can exit early.
1977 1977 return rawtext
1978 1978 if rev is None:
1979 1979 rev = self.rev(node)
1980 1980 # the revlog's flag for this revision
1981 1981 # (usually alter its state or content)
1982 1982 flags = self.flags(rev)
1983 1983
1984 1984 if validated and flags == REVIDX_DEFAULT_FLAGS:
1985 1985 # no extra flags set, no flag processor runs, text = rawtext
1986 1986 return rawtext
1987 1987
1988 1988 if raw:
1989 1989 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1990 1990 text = rawtext
1991 1991 else:
1992 1992 r = flagutil.processflagsread(self, rawtext, flags)
1993 1993 text, validatehash = r
1994 1994 if validatehash:
1995 1995 self.checkhash(text, node, rev=rev)
1996 1996 if not validated:
1997 1997 self._revisioncache = (node, rev, rawtext)
1998 1998
1999 1999 return text
2000 2000
2001 2001 def _rawtext(self, node, rev, _df=None):
2002 2002 """return the possibly unvalidated rawtext for a revision
2003 2003
2004 2004 returns (rev, rawtext, validated)
2005 2005 """
2006 2006
2007 2007 # revision in the cache (could be useful to apply delta)
2008 2008 cachedrev = None
2009 2009 # An intermediate text to apply deltas to
2010 2010 basetext = None
2011 2011
2012 2012 # Check if we have the entry in cache
2013 2013 # The cache entry looks like (node, rev, rawtext)
2014 2014 if self._revisioncache:
2015 2015 if self._revisioncache[0] == node:
2016 2016 return (rev, self._revisioncache[2], True)
2017 2017 cachedrev = self._revisioncache[1]
2018 2018
2019 2019 if rev is None:
2020 2020 rev = self.rev(node)
2021 2021
2022 2022 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2023 2023 if stopped:
2024 2024 basetext = self._revisioncache[2]
2025 2025
2026 2026 # drop cache to save memory, the caller is expected to
2027 2027 # update self._revisioncache after validating the text
2028 2028 self._revisioncache = None
2029 2029
2030 2030 targetsize = None
2031 2031 rawsize = self.index[rev][2]
2032 2032 if 0 <= rawsize:
2033 2033 targetsize = 4 * rawsize
2034 2034
2035 2035 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2036 2036 if basetext is None:
2037 2037 basetext = bytes(bins[0])
2038 2038 bins = bins[1:]
2039 2039
2040 2040 rawtext = mdiff.patches(basetext, bins)
2041 2041 del basetext # let us have a chance to free memory early
2042 2042 return (rev, rawtext, False)
2043 2043
2044 2044 def _sidedata(self, rev):
2045 2045 """Return the sidedata for a given revision number."""
2046 2046 index_entry = self.index[rev]
2047 2047 sidedata_offset = index_entry[8]
2048 2048 sidedata_size = index_entry[9]
2049 2049
2050 2050 if self._inline:
2051 2051 sidedata_offset += self.index.entry_size * (1 + rev)
2052 2052 if sidedata_size == 0:
2053 2053 return {}
2054 2054
2055 2055 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2056 2056 filename = self._sidedatafile
2057 2057 end = self._docket.sidedata_end
2058 2058 offset = sidedata_offset
2059 2059 length = sidedata_size
2060 2060 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2061 2061 raise error.RevlogError(m)
2062 2062
2063 2063 comp_segment = self._segmentfile_sidedata.read_chunk(
2064 2064 sidedata_offset, sidedata_size
2065 2065 )
2066 2066
2067 2067 comp = self.index[rev][11]
2068 2068 if comp == COMP_MODE_PLAIN:
2069 2069 segment = comp_segment
2070 2070 elif comp == COMP_MODE_DEFAULT:
2071 2071 segment = self._decompressor(comp_segment)
2072 2072 elif comp == COMP_MODE_INLINE:
2073 2073 segment = self.decompress(comp_segment)
2074 2074 else:
2075 2075 msg = b'unknown compression mode %d'
2076 2076 msg %= comp
2077 2077 raise error.RevlogError(msg)
2078 2078
2079 2079 sidedata = sidedatautil.deserialize_sidedata(segment)
2080 2080 return sidedata
2081 2081
2082 2082 def rawdata(self, nodeorrev, _df=None):
2083 2083 """return an uncompressed raw data of a given node or revision number.
2084 2084
2085 2085 _df - an existing file handle to read from. (internal-only)
2086 2086 """
2087 2087 return self._revisiondata(nodeorrev, _df, raw=True)
2088 2088
2089 2089 def hash(self, text, p1, p2):
2090 2090 """Compute a node hash.
2091 2091
2092 2092 Available as a function so that subclasses can replace the hash
2093 2093 as needed.
2094 2094 """
2095 2095 return storageutil.hashrevisionsha1(text, p1, p2)
2096 2096
2097 2097 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2098 2098 """Check node hash integrity.
2099 2099
2100 2100 Available as a function so that subclasses can extend hash mismatch
2101 2101 behaviors as needed.
2102 2102 """
2103 2103 try:
2104 2104 if p1 is None and p2 is None:
2105 2105 p1, p2 = self.parents(node)
2106 2106 if node != self.hash(text, p1, p2):
2107 2107 # Clear the revision cache on hash failure. The revision cache
2108 2108 # only stores the raw revision and clearing the cache does have
2109 2109 # the side-effect that we won't have a cache hit when the raw
2110 2110 # revision data is accessed. But this case should be rare and
2111 2111 # it is extra work to teach the cache about the hash
2112 2112 # verification state.
2113 2113 if self._revisioncache and self._revisioncache[0] == node:
2114 2114 self._revisioncache = None
2115 2115
2116 2116 revornode = rev
2117 2117 if revornode is None:
2118 2118 revornode = templatefilters.short(hex(node))
2119 2119 raise error.RevlogError(
2120 2120 _(b"integrity check failed on %s:%s")
2121 2121 % (self.display_id, pycompat.bytestr(revornode))
2122 2122 )
2123 2123 except error.RevlogError:
2124 2124 if self._censorable and storageutil.iscensoredtext(text):
2125 2125 raise error.CensoredNodeError(self.display_id, node, text)
2126 2126 raise
2127 2127
2128 2128 @property
2129 2129 def _split_index_file(self):
2130 2130 """the path where to expect the index of an ongoing splitting operation
2131 2131
2132 2132 The file will only exist if a splitting operation is in progress, but
2133 2133 it is always expected at the same location."""
2134 parts = os.path.split(self.radix)
2134 parts = self.radix.split(b'/')
2135 2135 if len(parts) > 1:
2136 2136 # adds a '-s' prefix to the ``data/` or `meta/` base
2137 2137 head = parts[0] + b'-s'
2138 return os.path.join(head, *parts[1:])
2138 mids = parts[1:-1]
2139 tail = parts[-1] + b'.i'
2140 pieces = [head] + mids + [tail]
2141 return b'/'.join(pieces)
2139 2142 else:
2140 2143 # the revlog is stored at the root of the store (changelog or
2141 2144 # manifest), no risk of collision.
2142 2145 return self.radix + b'.i.s'
2143 2146
2144 2147 def _enforceinlinesize(self, tr, side_write=True):
2145 2148 """Check if the revlog is too big for inline and convert if so.
2146 2149
2147 2150 This should be called after revisions are added to the revlog. If the
2148 2151 revlog has grown too large to be an inline revlog, it will convert it
2149 2152 to use multiple index and data files.
2150 2153 """
2151 2154 tiprev = len(self) - 1
2152 2155 total_size = self.start(tiprev) + self.length(tiprev)
2153 2156 if not self._inline or total_size < _maxinline:
2154 2157 return
2155 2158
2156 2159 troffset = tr.findoffset(self._indexfile)
2157 2160 if troffset is None:
2158 2161 raise error.RevlogError(
2159 2162 _(b"%s not found in the transaction") % self._indexfile
2160 2163 )
2161 2164 if troffset:
2162 2165 tr.addbackup(self._indexfile, for_offset=True)
2163 2166 tr.add(self._datafile, 0)
2164 2167
2165 2168 existing_handles = False
2166 2169 if self._writinghandles is not None:
2167 2170 existing_handles = True
2168 2171 fp = self._writinghandles[0]
2169 2172 fp.flush()
2170 2173 fp.close()
2171 2174 # We can't use the cached file handle after close(). So prevent
2172 2175 # its usage.
2173 2176 self._writinghandles = None
2174 2177 self._segmentfile.writing_handle = None
2175 2178 # No need to deal with sidedata writing handle as it is only
2176 2179 # relevant with revlog-v2 which is never inline, not reaching
2177 2180 # this code
2178 2181 if side_write:
2179 2182 old_index_file_path = self._indexfile
2180 2183 new_index_file_path = self._split_index_file
2181 2184 opener = self.opener
2182 2185 weak_self = weakref.ref(self)
2183 2186
2184 2187 # the "split" index replace the real index when the transaction is finalized
2185 2188 def finalize_callback(tr):
2186 2189 opener.rename(
2187 2190 new_index_file_path,
2188 2191 old_index_file_path,
2189 2192 checkambig=True,
2190 2193 )
2191 2194 maybe_self = weak_self()
2192 2195 if maybe_self is not None:
2193 2196 maybe_self._indexfile = old_index_file_path
2194 2197
2195 2198 def abort_callback(tr):
2196 2199 maybe_self = weak_self()
2197 2200 if maybe_self is not None:
2198 2201 maybe_self._indexfile = old_index_file_path
2199 2202
2200 2203 tr.registertmp(new_index_file_path)
2201 2204 if self.target[1] is not None:
2202 2205 callback_id = b'000-revlog-split-%d-%s' % self.target
2203 2206 else:
2204 2207 callback_id = b'000-revlog-split-%d' % self.target[0]
2205 2208 tr.addfinalize(callback_id, finalize_callback)
2206 2209 tr.addabort(callback_id, abort_callback)
2207 2210
2208 2211 new_dfh = self._datafp(b'w+')
2209 2212 new_dfh.truncate(0) # drop any potentially existing data
2210 2213 try:
2211 2214 with self._indexfp() as read_ifh:
2212 2215 for r in self:
2213 2216 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2214 2217 new_dfh.flush()
2215 2218
2216 2219 if side_write:
2217 2220 self._indexfile = new_index_file_path
2218 2221 with self.__index_new_fp() as fp:
2219 2222 self._format_flags &= ~FLAG_INLINE_DATA
2220 2223 self._inline = False
2221 2224 for i in self:
2222 2225 e = self.index.entry_binary(i)
2223 2226 if i == 0 and self._docket is None:
2224 2227 header = self._format_flags | self._format_version
2225 2228 header = self.index.pack_header(header)
2226 2229 e = header + e
2227 2230 fp.write(e)
2228 2231 if self._docket is not None:
2229 2232 self._docket.index_end = fp.tell()
2230 2233
2231 2234 # If we don't use side-write, the temp file replace the real
2232 2235 # index when we exit the context manager
2233 2236
2234 2237 nodemaputil.setup_persistent_nodemap(tr, self)
2235 2238 self._segmentfile = randomaccessfile.randomaccessfile(
2236 2239 self.opener,
2237 2240 self._datafile,
2238 2241 self._chunkcachesize,
2239 2242 )
2240 2243
2241 2244 if existing_handles:
2242 2245 # switched from inline to conventional reopen the index
2243 2246 ifh = self.__index_write_fp()
2244 2247 self._writinghandles = (ifh, new_dfh, None)
2245 2248 self._segmentfile.writing_handle = new_dfh
2246 2249 new_dfh = None
2247 2250 # No need to deal with sidedata writing handle as it is only
2248 2251 # relevant with revlog-v2 which is never inline, not reaching
2249 2252 # this code
2250 2253 finally:
2251 2254 if new_dfh is not None:
2252 2255 new_dfh.close()
2253 2256
2254 2257 def _nodeduplicatecallback(self, transaction, node):
2255 2258 """called when trying to add a node already stored."""
2256 2259
2257 2260 @contextlib.contextmanager
2258 2261 def reading(self):
2259 2262 """Context manager that keeps data and sidedata files open for reading"""
2260 2263 with self._segmentfile.reading():
2261 2264 with self._segmentfile_sidedata.reading():
2262 2265 yield
2263 2266
2264 2267 @contextlib.contextmanager
2265 2268 def _writing(self, transaction):
2266 2269 if self._trypending:
2267 2270 msg = b'try to write in a `trypending` revlog: %s'
2268 2271 msg %= self.display_id
2269 2272 raise error.ProgrammingError(msg)
2270 2273 if self._writinghandles is not None:
2271 2274 yield
2272 2275 else:
2273 2276 ifh = dfh = sdfh = None
2274 2277 try:
2275 2278 r = len(self)
2276 2279 # opening the data file.
2277 2280 dsize = 0
2278 2281 if r:
2279 2282 dsize = self.end(r - 1)
2280 2283 dfh = None
2281 2284 if not self._inline:
2282 2285 try:
2283 2286 dfh = self._datafp(b"r+")
2284 2287 if self._docket is None:
2285 2288 dfh.seek(0, os.SEEK_END)
2286 2289 else:
2287 2290 dfh.seek(self._docket.data_end, os.SEEK_SET)
2288 2291 except FileNotFoundError:
2289 2292 dfh = self._datafp(b"w+")
2290 2293 transaction.add(self._datafile, dsize)
2291 2294 if self._sidedatafile is not None:
2292 2295 # revlog-v2 does not inline, help Pytype
2293 2296 assert dfh is not None
2294 2297 try:
2295 2298 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2296 2299 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2297 2300 except FileNotFoundError:
2298 2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2299 2302 transaction.add(
2300 2303 self._sidedatafile, self._docket.sidedata_end
2301 2304 )
2302 2305
2303 2306 # opening the index file.
2304 2307 isize = r * self.index.entry_size
2305 2308 ifh = self.__index_write_fp()
2306 2309 if self._inline:
2307 2310 transaction.add(self._indexfile, dsize + isize)
2308 2311 else:
2309 2312 transaction.add(self._indexfile, isize)
2310 2313 # exposing all file handle for writing.
2311 2314 self._writinghandles = (ifh, dfh, sdfh)
2312 2315 self._segmentfile.writing_handle = ifh if self._inline else dfh
2313 2316 self._segmentfile_sidedata.writing_handle = sdfh
2314 2317 yield
2315 2318 if self._docket is not None:
2316 2319 self._write_docket(transaction)
2317 2320 finally:
2318 2321 self._writinghandles = None
2319 2322 self._segmentfile.writing_handle = None
2320 2323 self._segmentfile_sidedata.writing_handle = None
2321 2324 if dfh is not None:
2322 2325 dfh.close()
2323 2326 if sdfh is not None:
2324 2327 sdfh.close()
2325 2328 # closing the index file last to avoid exposing referent to
2326 2329 # potential unflushed data content.
2327 2330 if ifh is not None:
2328 2331 ifh.close()
2329 2332
2330 2333 def _write_docket(self, transaction):
2331 2334 """write the current docket on disk
2332 2335
2333 2336 Exist as a method to help changelog to implement transaction logic
2334 2337
2335 2338 We could also imagine using the same transaction logic for all revlog
2336 2339 since docket are cheap."""
2337 2340 self._docket.write(transaction)
2338 2341
2339 2342 def addrevision(
2340 2343 self,
2341 2344 text,
2342 2345 transaction,
2343 2346 link,
2344 2347 p1,
2345 2348 p2,
2346 2349 cachedelta=None,
2347 2350 node=None,
2348 2351 flags=REVIDX_DEFAULT_FLAGS,
2349 2352 deltacomputer=None,
2350 2353 sidedata=None,
2351 2354 ):
2352 2355 """add a revision to the log
2353 2356
2354 2357 text - the revision data to add
2355 2358 transaction - the transaction object used for rollback
2356 2359 link - the linkrev data to add
2357 2360 p1, p2 - the parent nodeids of the revision
2358 2361 cachedelta - an optional precomputed delta
2359 2362 node - nodeid of revision; typically node is not specified, and it is
2360 2363 computed by default as hash(text, p1, p2), however subclasses might
2361 2364 use different hashing method (and override checkhash() in such case)
2362 2365 flags - the known flags to set on the revision
2363 2366 deltacomputer - an optional deltacomputer instance shared between
2364 2367 multiple calls
2365 2368 """
2366 2369 if link == nullrev:
2367 2370 raise error.RevlogError(
2368 2371 _(b"attempted to add linkrev -1 to %s") % self.display_id
2369 2372 )
2370 2373
2371 2374 if sidedata is None:
2372 2375 sidedata = {}
2373 2376 elif sidedata and not self.hassidedata:
2374 2377 raise error.ProgrammingError(
2375 2378 _(b"trying to add sidedata to a revlog who don't support them")
2376 2379 )
2377 2380
2378 2381 if flags:
2379 2382 node = node or self.hash(text, p1, p2)
2380 2383
2381 2384 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2382 2385
2383 2386 # If the flag processor modifies the revision data, ignore any provided
2384 2387 # cachedelta.
2385 2388 if rawtext != text:
2386 2389 cachedelta = None
2387 2390
2388 2391 if len(rawtext) > _maxentrysize:
2389 2392 raise error.RevlogError(
2390 2393 _(
2391 2394 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2392 2395 )
2393 2396 % (self.display_id, len(rawtext))
2394 2397 )
2395 2398
2396 2399 node = node or self.hash(rawtext, p1, p2)
2397 2400 rev = self.index.get_rev(node)
2398 2401 if rev is not None:
2399 2402 return rev
2400 2403
2401 2404 if validatehash:
2402 2405 self.checkhash(rawtext, node, p1=p1, p2=p2)
2403 2406
2404 2407 return self.addrawrevision(
2405 2408 rawtext,
2406 2409 transaction,
2407 2410 link,
2408 2411 p1,
2409 2412 p2,
2410 2413 node,
2411 2414 flags,
2412 2415 cachedelta=cachedelta,
2413 2416 deltacomputer=deltacomputer,
2414 2417 sidedata=sidedata,
2415 2418 )
2416 2419
2417 2420 def addrawrevision(
2418 2421 self,
2419 2422 rawtext,
2420 2423 transaction,
2421 2424 link,
2422 2425 p1,
2423 2426 p2,
2424 2427 node,
2425 2428 flags,
2426 2429 cachedelta=None,
2427 2430 deltacomputer=None,
2428 2431 sidedata=None,
2429 2432 ):
2430 2433 """add a raw revision with known flags, node and parents
2431 2434 useful when reusing a revision not stored in this revlog (ex: received
2432 2435 over wire, or read from an external bundle).
2433 2436 """
2434 2437 with self._writing(transaction):
2435 2438 return self._addrevision(
2436 2439 node,
2437 2440 rawtext,
2438 2441 transaction,
2439 2442 link,
2440 2443 p1,
2441 2444 p2,
2442 2445 flags,
2443 2446 cachedelta,
2444 2447 deltacomputer=deltacomputer,
2445 2448 sidedata=sidedata,
2446 2449 )
2447 2450
2448 2451 def compress(self, data):
2449 2452 """Generate a possibly-compressed representation of data."""
2450 2453 if not data:
2451 2454 return b'', data
2452 2455
2453 2456 compressed = self._compressor.compress(data)
2454 2457
2455 2458 if compressed:
2456 2459 # The revlog compressor added the header in the returned data.
2457 2460 return b'', compressed
2458 2461
2459 2462 if data[0:1] == b'\0':
2460 2463 return b'', data
2461 2464 return b'u', data
2462 2465
2463 2466 def decompress(self, data):
2464 2467 """Decompress a revlog chunk.
2465 2468
2466 2469 The chunk is expected to begin with a header identifying the
2467 2470 format type so it can be routed to an appropriate decompressor.
2468 2471 """
2469 2472 if not data:
2470 2473 return data
2471 2474
2472 2475 # Revlogs are read much more frequently than they are written and many
2473 2476 # chunks only take microseconds to decompress, so performance is
2474 2477 # important here.
2475 2478 #
2476 2479 # We can make a few assumptions about revlogs:
2477 2480 #
2478 2481 # 1) the majority of chunks will be compressed (as opposed to inline
2479 2482 # raw data).
2480 2483 # 2) decompressing *any* data will likely by at least 10x slower than
2481 2484 # returning raw inline data.
2482 2485 # 3) we want to prioritize common and officially supported compression
2483 2486 # engines
2484 2487 #
2485 2488 # It follows that we want to optimize for "decompress compressed data
2486 2489 # when encoded with common and officially supported compression engines"
2487 2490 # case over "raw data" and "data encoded by less common or non-official
2488 2491 # compression engines." That is why we have the inline lookup first
2489 2492 # followed by the compengines lookup.
2490 2493 #
2491 2494 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2492 2495 # compressed chunks. And this matters for changelog and manifest reads.
2493 2496 t = data[0:1]
2494 2497
2495 2498 if t == b'x':
2496 2499 try:
2497 2500 return _zlibdecompress(data)
2498 2501 except zlib.error as e:
2499 2502 raise error.RevlogError(
2500 2503 _(b'revlog decompress error: %s')
2501 2504 % stringutil.forcebytestr(e)
2502 2505 )
2503 2506 # '\0' is more common than 'u' so it goes first.
2504 2507 elif t == b'\0':
2505 2508 return data
2506 2509 elif t == b'u':
2507 2510 return util.buffer(data, 1)
2508 2511
2509 2512 compressor = self._get_decompressor(t)
2510 2513
2511 2514 return compressor.decompress(data)
2512 2515
2513 2516 def _addrevision(
2514 2517 self,
2515 2518 node,
2516 2519 rawtext,
2517 2520 transaction,
2518 2521 link,
2519 2522 p1,
2520 2523 p2,
2521 2524 flags,
2522 2525 cachedelta,
2523 2526 alwayscache=False,
2524 2527 deltacomputer=None,
2525 2528 sidedata=None,
2526 2529 ):
2527 2530 """internal function to add revisions to the log
2528 2531
2529 2532 see addrevision for argument descriptions.
2530 2533
2531 2534 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2532 2535
2533 2536 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2534 2537 be used.
2535 2538
2536 2539 invariants:
2537 2540 - rawtext is optional (can be None); if not set, cachedelta must be set.
2538 2541 if both are set, they must correspond to each other.
2539 2542 """
2540 2543 if node == self.nullid:
2541 2544 raise error.RevlogError(
2542 2545 _(b"%s: attempt to add null revision") % self.display_id
2543 2546 )
2544 2547 if (
2545 2548 node == self.nodeconstants.wdirid
2546 2549 or node in self.nodeconstants.wdirfilenodeids
2547 2550 ):
2548 2551 raise error.RevlogError(
2549 2552 _(b"%s: attempt to add wdir revision") % self.display_id
2550 2553 )
2551 2554 if self._writinghandles is None:
2552 2555 msg = b'adding revision outside `revlog._writing` context'
2553 2556 raise error.ProgrammingError(msg)
2554 2557
2555 2558 if self._inline:
2556 2559 fh = self._writinghandles[0]
2557 2560 else:
2558 2561 fh = self._writinghandles[1]
2559 2562
2560 2563 btext = [rawtext]
2561 2564
2562 2565 curr = len(self)
2563 2566 prev = curr - 1
2564 2567
2565 2568 offset = self._get_data_offset(prev)
2566 2569
2567 2570 if self._concurrencychecker:
2568 2571 ifh, dfh, sdfh = self._writinghandles
2569 2572 # XXX no checking for the sidedata file
2570 2573 if self._inline:
2571 2574 # offset is "as if" it were in the .d file, so we need to add on
2572 2575 # the size of the entry metadata.
2573 2576 self._concurrencychecker(
2574 2577 ifh, self._indexfile, offset + curr * self.index.entry_size
2575 2578 )
2576 2579 else:
2577 2580 # Entries in the .i are a consistent size.
2578 2581 self._concurrencychecker(
2579 2582 ifh, self._indexfile, curr * self.index.entry_size
2580 2583 )
2581 2584 self._concurrencychecker(dfh, self._datafile, offset)
2582 2585
2583 2586 p1r, p2r = self.rev(p1), self.rev(p2)
2584 2587
2585 2588 # full versions are inserted when the needed deltas
2586 2589 # become comparable to the uncompressed text
2587 2590 if rawtext is None:
2588 2591 # need rawtext size, before changed by flag processors, which is
2589 2592 # the non-raw size. use revlog explicitly to avoid filelog's extra
2590 2593 # logic that might remove metadata size.
2591 2594 textlen = mdiff.patchedsize(
2592 2595 revlog.size(self, cachedelta[0]), cachedelta[1]
2593 2596 )
2594 2597 else:
2595 2598 textlen = len(rawtext)
2596 2599
2597 2600 if deltacomputer is None:
2598 2601 write_debug = None
2599 2602 if self._debug_delta:
2600 2603 write_debug = transaction._report
2601 2604 deltacomputer = deltautil.deltacomputer(
2602 2605 self, write_debug=write_debug
2603 2606 )
2604 2607
2605 2608 if cachedelta is not None and len(cachedelta) == 2:
2606 2609 # If the cached delta has no information about how it should be
2607 2610 # reused, add the default reuse instruction according to the
2608 2611 # revlog's configuration.
2609 2612 if self._generaldelta and self._lazydeltabase:
2610 2613 delta_base_reuse = DELTA_BASE_REUSE_TRY
2611 2614 else:
2612 2615 delta_base_reuse = DELTA_BASE_REUSE_NO
2613 2616 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2614 2617
2615 2618 revinfo = revlogutils.revisioninfo(
2616 2619 node,
2617 2620 p1,
2618 2621 p2,
2619 2622 btext,
2620 2623 textlen,
2621 2624 cachedelta,
2622 2625 flags,
2623 2626 )
2624 2627
2625 2628 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2626 2629
2627 2630 compression_mode = COMP_MODE_INLINE
2628 2631 if self._docket is not None:
2629 2632 default_comp = self._docket.default_compression_header
2630 2633 r = deltautil.delta_compression(default_comp, deltainfo)
2631 2634 compression_mode, deltainfo = r
2632 2635
2633 2636 sidedata_compression_mode = COMP_MODE_INLINE
2634 2637 if sidedata and self.hassidedata:
2635 2638 sidedata_compression_mode = COMP_MODE_PLAIN
2636 2639 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2637 2640 sidedata_offset = self._docket.sidedata_end
2638 2641 h, comp_sidedata = self.compress(serialized_sidedata)
2639 2642 if (
2640 2643 h != b'u'
2641 2644 and comp_sidedata[0:1] != b'\0'
2642 2645 and len(comp_sidedata) < len(serialized_sidedata)
2643 2646 ):
2644 2647 assert not h
2645 2648 if (
2646 2649 comp_sidedata[0:1]
2647 2650 == self._docket.default_compression_header
2648 2651 ):
2649 2652 sidedata_compression_mode = COMP_MODE_DEFAULT
2650 2653 serialized_sidedata = comp_sidedata
2651 2654 else:
2652 2655 sidedata_compression_mode = COMP_MODE_INLINE
2653 2656 serialized_sidedata = comp_sidedata
2654 2657 else:
2655 2658 serialized_sidedata = b""
2656 2659 # Don't store the offset if the sidedata is empty, that way
2657 2660 # we can easily detect empty sidedata and they will be no different
2658 2661 # than ones we manually add.
2659 2662 sidedata_offset = 0
2660 2663
2661 2664 rank = RANK_UNKNOWN
2662 2665 if self._compute_rank:
2663 2666 if (p1r, p2r) == (nullrev, nullrev):
2664 2667 rank = 1
2665 2668 elif p1r != nullrev and p2r == nullrev:
2666 2669 rank = 1 + self.fast_rank(p1r)
2667 2670 elif p1r == nullrev and p2r != nullrev:
2668 2671 rank = 1 + self.fast_rank(p2r)
2669 2672 else: # merge node
2670 2673 if rustdagop is not None and self.index.rust_ext_compat:
2671 2674 rank = rustdagop.rank(self.index, p1r, p2r)
2672 2675 else:
2673 2676 pmin, pmax = sorted((p1r, p2r))
2674 2677 rank = 1 + self.fast_rank(pmax)
2675 2678 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2676 2679
2677 2680 e = revlogutils.entry(
2678 2681 flags=flags,
2679 2682 data_offset=offset,
2680 2683 data_compressed_length=deltainfo.deltalen,
2681 2684 data_uncompressed_length=textlen,
2682 2685 data_compression_mode=compression_mode,
2683 2686 data_delta_base=deltainfo.base,
2684 2687 link_rev=link,
2685 2688 parent_rev_1=p1r,
2686 2689 parent_rev_2=p2r,
2687 2690 node_id=node,
2688 2691 sidedata_offset=sidedata_offset,
2689 2692 sidedata_compressed_length=len(serialized_sidedata),
2690 2693 sidedata_compression_mode=sidedata_compression_mode,
2691 2694 rank=rank,
2692 2695 )
2693 2696
2694 2697 self.index.append(e)
2695 2698 entry = self.index.entry_binary(curr)
2696 2699 if curr == 0 and self._docket is None:
2697 2700 header = self._format_flags | self._format_version
2698 2701 header = self.index.pack_header(header)
2699 2702 entry = header + entry
2700 2703 self._writeentry(
2701 2704 transaction,
2702 2705 entry,
2703 2706 deltainfo.data,
2704 2707 link,
2705 2708 offset,
2706 2709 serialized_sidedata,
2707 2710 sidedata_offset,
2708 2711 )
2709 2712
2710 2713 rawtext = btext[0]
2711 2714
2712 2715 if alwayscache and rawtext is None:
2713 2716 rawtext = deltacomputer.buildtext(revinfo, fh)
2714 2717
2715 2718 if type(rawtext) == bytes: # only accept immutable objects
2716 2719 self._revisioncache = (node, curr, rawtext)
2717 2720 self._chainbasecache[curr] = deltainfo.chainbase
2718 2721 return curr
2719 2722
2720 2723 def _get_data_offset(self, prev):
2721 2724 """Returns the current offset in the (in-transaction) data file.
2722 2725 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2723 2726 file to store that information: since sidedata can be rewritten to the
2724 2727 end of the data file within a transaction, you can have cases where, for
2725 2728 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2726 2729 to `n - 1`'s sidedata being written after `n`'s data.
2727 2730
2728 2731 TODO cache this in a docket file before getting out of experimental."""
2729 2732 if self._docket is None:
2730 2733 return self.end(prev)
2731 2734 else:
2732 2735 return self._docket.data_end
2733 2736
2734 2737 def _writeentry(
2735 2738 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2736 2739 ):
2737 2740 # Files opened in a+ mode have inconsistent behavior on various
2738 2741 # platforms. Windows requires that a file positioning call be made
2739 2742 # when the file handle transitions between reads and writes. See
2740 2743 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2741 2744 # platforms, Python or the platform itself can be buggy. Some versions
2742 2745 # of Solaris have been observed to not append at the end of the file
2743 2746 # if the file was seeked to before the end. See issue4943 for more.
2744 2747 #
2745 2748 # We work around this issue by inserting a seek() before writing.
2746 2749 # Note: This is likely not necessary on Python 3. However, because
2747 2750 # the file handle is reused for reads and may be seeked there, we need
2748 2751 # to be careful before changing this.
2749 2752 if self._writinghandles is None:
2750 2753 msg = b'adding revision outside `revlog._writing` context'
2751 2754 raise error.ProgrammingError(msg)
2752 2755 ifh, dfh, sdfh = self._writinghandles
2753 2756 if self._docket is None:
2754 2757 ifh.seek(0, os.SEEK_END)
2755 2758 else:
2756 2759 ifh.seek(self._docket.index_end, os.SEEK_SET)
2757 2760 if dfh:
2758 2761 if self._docket is None:
2759 2762 dfh.seek(0, os.SEEK_END)
2760 2763 else:
2761 2764 dfh.seek(self._docket.data_end, os.SEEK_SET)
2762 2765 if sdfh:
2763 2766 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2764 2767
2765 2768 curr = len(self) - 1
2766 2769 if not self._inline:
2767 2770 transaction.add(self._datafile, offset)
2768 2771 if self._sidedatafile:
2769 2772 transaction.add(self._sidedatafile, sidedata_offset)
2770 2773 transaction.add(self._indexfile, curr * len(entry))
2771 2774 if data[0]:
2772 2775 dfh.write(data[0])
2773 2776 dfh.write(data[1])
2774 2777 if sidedata:
2775 2778 sdfh.write(sidedata)
2776 2779 ifh.write(entry)
2777 2780 else:
2778 2781 offset += curr * self.index.entry_size
2779 2782 transaction.add(self._indexfile, offset)
2780 2783 ifh.write(entry)
2781 2784 ifh.write(data[0])
2782 2785 ifh.write(data[1])
2783 2786 assert not sidedata
2784 2787 self._enforceinlinesize(transaction)
2785 2788 if self._docket is not None:
2786 2789 # revlog-v2 always has 3 writing handles, help Pytype
2787 2790 wh1 = self._writinghandles[0]
2788 2791 wh2 = self._writinghandles[1]
2789 2792 wh3 = self._writinghandles[2]
2790 2793 assert wh1 is not None
2791 2794 assert wh2 is not None
2792 2795 assert wh3 is not None
2793 2796 self._docket.index_end = wh1.tell()
2794 2797 self._docket.data_end = wh2.tell()
2795 2798 self._docket.sidedata_end = wh3.tell()
2796 2799
2797 2800 nodemaputil.setup_persistent_nodemap(transaction, self)
2798 2801
2799 2802 def addgroup(
2800 2803 self,
2801 2804 deltas,
2802 2805 linkmapper,
2803 2806 transaction,
2804 2807 alwayscache=False,
2805 2808 addrevisioncb=None,
2806 2809 duplicaterevisioncb=None,
2807 2810 debug_info=None,
2808 2811 delta_base_reuse_policy=None,
2809 2812 ):
2810 2813 """
2811 2814 add a delta group
2812 2815
2813 2816 given a set of deltas, add them to the revision log. the
2814 2817 first delta is against its parent, which should be in our
2815 2818 log, the rest are against the previous delta.
2816 2819
2817 2820 If ``addrevisioncb`` is defined, it will be called with arguments of
2818 2821 this revlog and the node that was added.
2819 2822 """
2820 2823
2821 2824 if self._adding_group:
2822 2825 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2823 2826
2824 2827 # read the default delta-base reuse policy from revlog config if the
2825 2828 # group did not specify one.
2826 2829 if delta_base_reuse_policy is None:
2827 2830 if self._generaldelta and self._lazydeltabase:
2828 2831 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2829 2832 else:
2830 2833 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2831 2834
2832 2835 self._adding_group = True
2833 2836 empty = True
2834 2837 try:
2835 2838 with self._writing(transaction):
2836 2839 write_debug = None
2837 2840 if self._debug_delta:
2838 2841 write_debug = transaction._report
2839 2842 deltacomputer = deltautil.deltacomputer(
2840 2843 self,
2841 2844 write_debug=write_debug,
2842 2845 debug_info=debug_info,
2843 2846 )
2844 2847 # loop through our set of deltas
2845 2848 for data in deltas:
2846 2849 (
2847 2850 node,
2848 2851 p1,
2849 2852 p2,
2850 2853 linknode,
2851 2854 deltabase,
2852 2855 delta,
2853 2856 flags,
2854 2857 sidedata,
2855 2858 ) = data
2856 2859 link = linkmapper(linknode)
2857 2860 flags = flags or REVIDX_DEFAULT_FLAGS
2858 2861
2859 2862 rev = self.index.get_rev(node)
2860 2863 if rev is not None:
2861 2864 # this can happen if two branches make the same change
2862 2865 self._nodeduplicatecallback(transaction, rev)
2863 2866 if duplicaterevisioncb:
2864 2867 duplicaterevisioncb(self, rev)
2865 2868 empty = False
2866 2869 continue
2867 2870
2868 2871 for p in (p1, p2):
2869 2872 if not self.index.has_node(p):
2870 2873 raise error.LookupError(
2871 2874 p, self.radix, _(b'unknown parent')
2872 2875 )
2873 2876
2874 2877 if not self.index.has_node(deltabase):
2875 2878 raise error.LookupError(
2876 2879 deltabase, self.display_id, _(b'unknown delta base')
2877 2880 )
2878 2881
2879 2882 baserev = self.rev(deltabase)
2880 2883
2881 2884 if baserev != nullrev and self.iscensored(baserev):
2882 2885 # if base is censored, delta must be full replacement in a
2883 2886 # single patch operation
2884 2887 hlen = struct.calcsize(b">lll")
2885 2888 oldlen = self.rawsize(baserev)
2886 2889 newlen = len(delta) - hlen
2887 2890 if delta[:hlen] != mdiff.replacediffheader(
2888 2891 oldlen, newlen
2889 2892 ):
2890 2893 raise error.CensoredBaseError(
2891 2894 self.display_id, self.node(baserev)
2892 2895 )
2893 2896
2894 2897 if not flags and self._peek_iscensored(baserev, delta):
2895 2898 flags |= REVIDX_ISCENSORED
2896 2899
2897 2900 # We assume consumers of addrevisioncb will want to retrieve
2898 2901 # the added revision, which will require a call to
2899 2902 # revision(). revision() will fast path if there is a cache
2900 2903 # hit. So, we tell _addrevision() to always cache in this case.
2901 2904 # We're only using addgroup() in the context of changegroup
2902 2905 # generation so the revision data can always be handled as raw
2903 2906 # by the flagprocessor.
2904 2907 rev = self._addrevision(
2905 2908 node,
2906 2909 None,
2907 2910 transaction,
2908 2911 link,
2909 2912 p1,
2910 2913 p2,
2911 2914 flags,
2912 2915 (baserev, delta, delta_base_reuse_policy),
2913 2916 alwayscache=alwayscache,
2914 2917 deltacomputer=deltacomputer,
2915 2918 sidedata=sidedata,
2916 2919 )
2917 2920
2918 2921 if addrevisioncb:
2919 2922 addrevisioncb(self, rev)
2920 2923 empty = False
2921 2924 finally:
2922 2925 self._adding_group = False
2923 2926 return not empty
2924 2927
2925 2928 def iscensored(self, rev):
2926 2929 """Check if a file revision is censored."""
2927 2930 if not self._censorable:
2928 2931 return False
2929 2932
2930 2933 return self.flags(rev) & REVIDX_ISCENSORED
2931 2934
2932 2935 def _peek_iscensored(self, baserev, delta):
2933 2936 """Quickly check if a delta produces a censored revision."""
2934 2937 if not self._censorable:
2935 2938 return False
2936 2939
2937 2940 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2938 2941
2939 2942 def getstrippoint(self, minlink):
2940 2943 """find the minimum rev that must be stripped to strip the linkrev
2941 2944
2942 2945 Returns a tuple containing the minimum rev and a set of all revs that
2943 2946 have linkrevs that will be broken by this strip.
2944 2947 """
2945 2948 return storageutil.resolvestripinfo(
2946 2949 minlink,
2947 2950 len(self) - 1,
2948 2951 self.headrevs(),
2949 2952 self.linkrev,
2950 2953 self.parentrevs,
2951 2954 )
2952 2955
2953 2956 def strip(self, minlink, transaction):
2954 2957 """truncate the revlog on the first revision with a linkrev >= minlink
2955 2958
2956 2959 This function is called when we're stripping revision minlink and
2957 2960 its descendants from the repository.
2958 2961
2959 2962 We have to remove all revisions with linkrev >= minlink, because
2960 2963 the equivalent changelog revisions will be renumbered after the
2961 2964 strip.
2962 2965
2963 2966 So we truncate the revlog on the first of these revisions, and
2964 2967 trust that the caller has saved the revisions that shouldn't be
2965 2968 removed and that it'll re-add them after this truncation.
2966 2969 """
2967 2970 if len(self) == 0:
2968 2971 return
2969 2972
2970 2973 rev, _ = self.getstrippoint(minlink)
2971 2974 if rev == len(self):
2972 2975 return
2973 2976
2974 2977 # first truncate the files on disk
2975 2978 data_end = self.start(rev)
2976 2979 if not self._inline:
2977 2980 transaction.add(self._datafile, data_end)
2978 2981 end = rev * self.index.entry_size
2979 2982 else:
2980 2983 end = data_end + (rev * self.index.entry_size)
2981 2984
2982 2985 if self._sidedatafile:
2983 2986 sidedata_end = self.sidedata_cut_off(rev)
2984 2987 transaction.add(self._sidedatafile, sidedata_end)
2985 2988
2986 2989 transaction.add(self._indexfile, end)
2987 2990 if self._docket is not None:
2988 2991 # XXX we could, leverage the docket while stripping. However it is
2989 2992 # not powerfull enough at the time of this comment
2990 2993 self._docket.index_end = end
2991 2994 self._docket.data_end = data_end
2992 2995 self._docket.sidedata_end = sidedata_end
2993 2996 self._docket.write(transaction, stripping=True)
2994 2997
2995 2998 # then reset internal state in memory to forget those revisions
2996 2999 self._revisioncache = None
2997 3000 self._chaininfocache = util.lrucachedict(500)
2998 3001 self._segmentfile.clear_cache()
2999 3002 self._segmentfile_sidedata.clear_cache()
3000 3003
3001 3004 del self.index[rev:-1]
3002 3005
3003 3006 def checksize(self):
3004 3007 """Check size of index and data files
3005 3008
3006 3009 return a (dd, di) tuple.
3007 3010 - dd: extra bytes for the "data" file
3008 3011 - di: extra bytes for the "index" file
3009 3012
3010 3013 A healthy revlog will return (0, 0).
3011 3014 """
3012 3015 expected = 0
3013 3016 if len(self):
3014 3017 expected = max(0, self.end(len(self) - 1))
3015 3018
3016 3019 try:
3017 3020 with self._datafp() as f:
3018 3021 f.seek(0, io.SEEK_END)
3019 3022 actual = f.tell()
3020 3023 dd = actual - expected
3021 3024 except FileNotFoundError:
3022 3025 dd = 0
3023 3026
3024 3027 try:
3025 3028 f = self.opener(self._indexfile)
3026 3029 f.seek(0, io.SEEK_END)
3027 3030 actual = f.tell()
3028 3031 f.close()
3029 3032 s = self.index.entry_size
3030 3033 i = max(0, actual // s)
3031 3034 di = actual - (i * s)
3032 3035 if self._inline:
3033 3036 databytes = 0
3034 3037 for r in self:
3035 3038 databytes += max(0, self.length(r))
3036 3039 dd = 0
3037 3040 di = actual - len(self) * s - databytes
3038 3041 except FileNotFoundError:
3039 3042 di = 0
3040 3043
3041 3044 return (dd, di)
3042 3045
3043 3046 def files(self):
3044 3047 res = [self._indexfile]
3045 3048 if self._docket_file is None:
3046 3049 if not self._inline:
3047 3050 res.append(self._datafile)
3048 3051 else:
3049 3052 res.append(self._docket_file)
3050 3053 res.extend(self._docket.old_index_filepaths(include_empty=False))
3051 3054 if self._docket.data_end:
3052 3055 res.append(self._datafile)
3053 3056 res.extend(self._docket.old_data_filepaths(include_empty=False))
3054 3057 if self._docket.sidedata_end:
3055 3058 res.append(self._sidedatafile)
3056 3059 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3057 3060 return res
3058 3061
3059 3062 def emitrevisions(
3060 3063 self,
3061 3064 nodes,
3062 3065 nodesorder=None,
3063 3066 revisiondata=False,
3064 3067 assumehaveparentrevisions=False,
3065 3068 deltamode=repository.CG_DELTAMODE_STD,
3066 3069 sidedata_helpers=None,
3067 3070 debug_info=None,
3068 3071 ):
3069 3072 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3070 3073 raise error.ProgrammingError(
3071 3074 b'unhandled value for nodesorder: %s' % nodesorder
3072 3075 )
3073 3076
3074 3077 if nodesorder is None and not self._generaldelta:
3075 3078 nodesorder = b'storage'
3076 3079
3077 3080 if (
3078 3081 not self._storedeltachains
3079 3082 and deltamode != repository.CG_DELTAMODE_PREV
3080 3083 ):
3081 3084 deltamode = repository.CG_DELTAMODE_FULL
3082 3085
3083 3086 return storageutil.emitrevisions(
3084 3087 self,
3085 3088 nodes,
3086 3089 nodesorder,
3087 3090 revlogrevisiondelta,
3088 3091 deltaparentfn=self.deltaparent,
3089 3092 candeltafn=self.candelta,
3090 3093 rawsizefn=self.rawsize,
3091 3094 revdifffn=self.revdiff,
3092 3095 flagsfn=self.flags,
3093 3096 deltamode=deltamode,
3094 3097 revisiondata=revisiondata,
3095 3098 assumehaveparentrevisions=assumehaveparentrevisions,
3096 3099 sidedata_helpers=sidedata_helpers,
3097 3100 debug_info=debug_info,
3098 3101 )
3099 3102
3100 3103 DELTAREUSEALWAYS = b'always'
3101 3104 DELTAREUSESAMEREVS = b'samerevs'
3102 3105 DELTAREUSENEVER = b'never'
3103 3106
3104 3107 DELTAREUSEFULLADD = b'fulladd'
3105 3108
3106 3109 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3107 3110
3108 3111 def clone(
3109 3112 self,
3110 3113 tr,
3111 3114 destrevlog,
3112 3115 addrevisioncb=None,
3113 3116 deltareuse=DELTAREUSESAMEREVS,
3114 3117 forcedeltabothparents=None,
3115 3118 sidedata_helpers=None,
3116 3119 ):
3117 3120 """Copy this revlog to another, possibly with format changes.
3118 3121
3119 3122 The destination revlog will contain the same revisions and nodes.
3120 3123 However, it may not be bit-for-bit identical due to e.g. delta encoding
3121 3124 differences.
3122 3125
3123 3126 The ``deltareuse`` argument control how deltas from the existing revlog
3124 3127 are preserved in the destination revlog. The argument can have the
3125 3128 following values:
3126 3129
3127 3130 DELTAREUSEALWAYS
3128 3131 Deltas will always be reused (if possible), even if the destination
3129 3132 revlog would not select the same revisions for the delta. This is the
3130 3133 fastest mode of operation.
3131 3134 DELTAREUSESAMEREVS
3132 3135 Deltas will be reused if the destination revlog would pick the same
3133 3136 revisions for the delta. This mode strikes a balance between speed
3134 3137 and optimization.
3135 3138 DELTAREUSENEVER
3136 3139 Deltas will never be reused. This is the slowest mode of execution.
3137 3140 This mode can be used to recompute deltas (e.g. if the diff/delta
3138 3141 algorithm changes).
3139 3142 DELTAREUSEFULLADD
3140 3143 Revision will be re-added as if their were new content. This is
3141 3144 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3142 3145 eg: large file detection and handling.
3143 3146
3144 3147 Delta computation can be slow, so the choice of delta reuse policy can
3145 3148 significantly affect run time.
3146 3149
3147 3150 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3148 3151 two extremes. Deltas will be reused if they are appropriate. But if the
3149 3152 delta could choose a better revision, it will do so. This means if you
3150 3153 are converting a non-generaldelta revlog to a generaldelta revlog,
3151 3154 deltas will be recomputed if the delta's parent isn't a parent of the
3152 3155 revision.
3153 3156
3154 3157 In addition to the delta policy, the ``forcedeltabothparents``
3155 3158 argument controls whether to force compute deltas against both parents
3156 3159 for merges. By default, the current default is used.
3157 3160
3158 3161 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3159 3162 `sidedata_helpers`.
3160 3163 """
3161 3164 if deltareuse not in self.DELTAREUSEALL:
3162 3165 raise ValueError(
3163 3166 _(b'value for deltareuse invalid: %s') % deltareuse
3164 3167 )
3165 3168
3166 3169 if len(destrevlog):
3167 3170 raise ValueError(_(b'destination revlog is not empty'))
3168 3171
3169 3172 if getattr(self, 'filteredrevs', None):
3170 3173 raise ValueError(_(b'source revlog has filtered revisions'))
3171 3174 if getattr(destrevlog, 'filteredrevs', None):
3172 3175 raise ValueError(_(b'destination revlog has filtered revisions'))
3173 3176
3174 3177 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3175 3178 # if possible.
3176 3179 oldlazydelta = destrevlog._lazydelta
3177 3180 oldlazydeltabase = destrevlog._lazydeltabase
3178 3181 oldamd = destrevlog._deltabothparents
3179 3182
3180 3183 try:
3181 3184 if deltareuse == self.DELTAREUSEALWAYS:
3182 3185 destrevlog._lazydeltabase = True
3183 3186 destrevlog._lazydelta = True
3184 3187 elif deltareuse == self.DELTAREUSESAMEREVS:
3185 3188 destrevlog._lazydeltabase = False
3186 3189 destrevlog._lazydelta = True
3187 3190 elif deltareuse == self.DELTAREUSENEVER:
3188 3191 destrevlog._lazydeltabase = False
3189 3192 destrevlog._lazydelta = False
3190 3193
3191 3194 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3192 3195
3193 3196 self._clone(
3194 3197 tr,
3195 3198 destrevlog,
3196 3199 addrevisioncb,
3197 3200 deltareuse,
3198 3201 forcedeltabothparents,
3199 3202 sidedata_helpers,
3200 3203 )
3201 3204
3202 3205 finally:
3203 3206 destrevlog._lazydelta = oldlazydelta
3204 3207 destrevlog._lazydeltabase = oldlazydeltabase
3205 3208 destrevlog._deltabothparents = oldamd
3206 3209
3207 3210 def _clone(
3208 3211 self,
3209 3212 tr,
3210 3213 destrevlog,
3211 3214 addrevisioncb,
3212 3215 deltareuse,
3213 3216 forcedeltabothparents,
3214 3217 sidedata_helpers,
3215 3218 ):
3216 3219 """perform the core duty of `revlog.clone` after parameter processing"""
3217 3220 write_debug = None
3218 3221 if self._debug_delta:
3219 3222 write_debug = tr._report
3220 3223 deltacomputer = deltautil.deltacomputer(
3221 3224 destrevlog,
3222 3225 write_debug=write_debug,
3223 3226 )
3224 3227 index = self.index
3225 3228 for rev in self:
3226 3229 entry = index[rev]
3227 3230
3228 3231 # Some classes override linkrev to take filtered revs into
3229 3232 # account. Use raw entry from index.
3230 3233 flags = entry[0] & 0xFFFF
3231 3234 linkrev = entry[4]
3232 3235 p1 = index[entry[5]][7]
3233 3236 p2 = index[entry[6]][7]
3234 3237 node = entry[7]
3235 3238
3236 3239 # (Possibly) reuse the delta from the revlog if allowed and
3237 3240 # the revlog chunk is a delta.
3238 3241 cachedelta = None
3239 3242 rawtext = None
3240 3243 if deltareuse == self.DELTAREUSEFULLADD:
3241 3244 text = self._revisiondata(rev)
3242 3245 sidedata = self.sidedata(rev)
3243 3246
3244 3247 if sidedata_helpers is not None:
3245 3248 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3246 3249 self, sidedata_helpers, sidedata, rev
3247 3250 )
3248 3251 flags = flags | new_flags[0] & ~new_flags[1]
3249 3252
3250 3253 destrevlog.addrevision(
3251 3254 text,
3252 3255 tr,
3253 3256 linkrev,
3254 3257 p1,
3255 3258 p2,
3256 3259 cachedelta=cachedelta,
3257 3260 node=node,
3258 3261 flags=flags,
3259 3262 deltacomputer=deltacomputer,
3260 3263 sidedata=sidedata,
3261 3264 )
3262 3265 else:
3263 3266 if destrevlog._lazydelta:
3264 3267 dp = self.deltaparent(rev)
3265 3268 if dp != nullrev:
3266 3269 cachedelta = (dp, bytes(self._chunk(rev)))
3267 3270
3268 3271 sidedata = None
3269 3272 if not cachedelta:
3270 3273 rawtext = self._revisiondata(rev)
3271 3274 sidedata = self.sidedata(rev)
3272 3275 if sidedata is None:
3273 3276 sidedata = self.sidedata(rev)
3274 3277
3275 3278 if sidedata_helpers is not None:
3276 3279 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3277 3280 self, sidedata_helpers, sidedata, rev
3278 3281 )
3279 3282 flags = flags | new_flags[0] & ~new_flags[1]
3280 3283
3281 3284 with destrevlog._writing(tr):
3282 3285 destrevlog._addrevision(
3283 3286 node,
3284 3287 rawtext,
3285 3288 tr,
3286 3289 linkrev,
3287 3290 p1,
3288 3291 p2,
3289 3292 flags,
3290 3293 cachedelta,
3291 3294 deltacomputer=deltacomputer,
3292 3295 sidedata=sidedata,
3293 3296 )
3294 3297
3295 3298 if addrevisioncb:
3296 3299 addrevisioncb(self, rev, node)
3297 3300
3298 3301 def censorrevision(self, tr, censornode, tombstone=b''):
3299 3302 if self._format_version == REVLOGV0:
3300 3303 raise error.RevlogError(
3301 3304 _(b'cannot censor with version %d revlogs')
3302 3305 % self._format_version
3303 3306 )
3304 3307 elif self._format_version == REVLOGV1:
3305 3308 rewrite.v1_censor(self, tr, censornode, tombstone)
3306 3309 else:
3307 3310 rewrite.v2_censor(self, tr, censornode, tombstone)
3308 3311
3309 3312 def verifyintegrity(self, state):
3310 3313 """Verifies the integrity of the revlog.
3311 3314
3312 3315 Yields ``revlogproblem`` instances describing problems that are
3313 3316 found.
3314 3317 """
3315 3318 dd, di = self.checksize()
3316 3319 if dd:
3317 3320 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3318 3321 if di:
3319 3322 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3320 3323
3321 3324 version = self._format_version
3322 3325
3323 3326 # The verifier tells us what version revlog we should be.
3324 3327 if version != state[b'expectedversion']:
3325 3328 yield revlogproblem(
3326 3329 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3327 3330 % (self.display_id, version, state[b'expectedversion'])
3328 3331 )
3329 3332
3330 3333 state[b'skipread'] = set()
3331 3334 state[b'safe_renamed'] = set()
3332 3335
3333 3336 for rev in self:
3334 3337 node = self.node(rev)
3335 3338
3336 3339 # Verify contents. 4 cases to care about:
3337 3340 #
3338 3341 # common: the most common case
3339 3342 # rename: with a rename
3340 3343 # meta: file content starts with b'\1\n', the metadata
3341 3344 # header defined in filelog.py, but without a rename
3342 3345 # ext: content stored externally
3343 3346 #
3344 3347 # More formally, their differences are shown below:
3345 3348 #
3346 3349 # | common | rename | meta | ext
3347 3350 # -------------------------------------------------------
3348 3351 # flags() | 0 | 0 | 0 | not 0
3349 3352 # renamed() | False | True | False | ?
3350 3353 # rawtext[0:2]=='\1\n'| False | True | True | ?
3351 3354 #
3352 3355 # "rawtext" means the raw text stored in revlog data, which
3353 3356 # could be retrieved by "rawdata(rev)". "text"
3354 3357 # mentioned below is "revision(rev)".
3355 3358 #
3356 3359 # There are 3 different lengths stored physically:
3357 3360 # 1. L1: rawsize, stored in revlog index
3358 3361 # 2. L2: len(rawtext), stored in revlog data
3359 3362 # 3. L3: len(text), stored in revlog data if flags==0, or
3360 3363 # possibly somewhere else if flags!=0
3361 3364 #
3362 3365 # L1 should be equal to L2. L3 could be different from them.
3363 3366 # "text" may or may not affect commit hash depending on flag
3364 3367 # processors (see flagutil.addflagprocessor).
3365 3368 #
3366 3369 # | common | rename | meta | ext
3367 3370 # -------------------------------------------------
3368 3371 # rawsize() | L1 | L1 | L1 | L1
3369 3372 # size() | L1 | L2-LM | L1(*) | L1 (?)
3370 3373 # len(rawtext) | L2 | L2 | L2 | L2
3371 3374 # len(text) | L2 | L2 | L2 | L3
3372 3375 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3373 3376 #
3374 3377 # LM: length of metadata, depending on rawtext
3375 3378 # (*): not ideal, see comment in filelog.size
3376 3379 # (?): could be "- len(meta)" if the resolved content has
3377 3380 # rename metadata
3378 3381 #
3379 3382 # Checks needed to be done:
3380 3383 # 1. length check: L1 == L2, in all cases.
3381 3384 # 2. hash check: depending on flag processor, we may need to
3382 3385 # use either "text" (external), or "rawtext" (in revlog).
3383 3386
3384 3387 try:
3385 3388 skipflags = state.get(b'skipflags', 0)
3386 3389 if skipflags:
3387 3390 skipflags &= self.flags(rev)
3388 3391
3389 3392 _verify_revision(self, skipflags, state, node)
3390 3393
3391 3394 l1 = self.rawsize(rev)
3392 3395 l2 = len(self.rawdata(node))
3393 3396
3394 3397 if l1 != l2:
3395 3398 yield revlogproblem(
3396 3399 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3397 3400 node=node,
3398 3401 )
3399 3402
3400 3403 except error.CensoredNodeError:
3401 3404 if state[b'erroroncensored']:
3402 3405 yield revlogproblem(
3403 3406 error=_(b'censored file data'), node=node
3404 3407 )
3405 3408 state[b'skipread'].add(node)
3406 3409 except Exception as e:
3407 3410 yield revlogproblem(
3408 3411 error=_(b'unpacking %s: %s')
3409 3412 % (short(node), stringutil.forcebytestr(e)),
3410 3413 node=node,
3411 3414 )
3412 3415 state[b'skipread'].add(node)
3413 3416
3414 3417 def storageinfo(
3415 3418 self,
3416 3419 exclusivefiles=False,
3417 3420 sharedfiles=False,
3418 3421 revisionscount=False,
3419 3422 trackedsize=False,
3420 3423 storedsize=False,
3421 3424 ):
3422 3425 d = {}
3423 3426
3424 3427 if exclusivefiles:
3425 3428 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3426 3429 if not self._inline:
3427 3430 d[b'exclusivefiles'].append((self.opener, self._datafile))
3428 3431
3429 3432 if sharedfiles:
3430 3433 d[b'sharedfiles'] = []
3431 3434
3432 3435 if revisionscount:
3433 3436 d[b'revisionscount'] = len(self)
3434 3437
3435 3438 if trackedsize:
3436 3439 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3437 3440
3438 3441 if storedsize:
3439 3442 d[b'storedsize'] = sum(
3440 3443 self.opener.stat(path).st_size for path in self.files()
3441 3444 )
3442 3445
3443 3446 return d
3444 3447
3445 3448 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3446 3449 if not self.hassidedata:
3447 3450 return
3448 3451 # revlog formats with sidedata support does not support inline
3449 3452 assert not self._inline
3450 3453 if not helpers[1] and not helpers[2]:
3451 3454 # Nothing to generate or remove
3452 3455 return
3453 3456
3454 3457 new_entries = []
3455 3458 # append the new sidedata
3456 3459 with self._writing(transaction):
3457 3460 ifh, dfh, sdfh = self._writinghandles
3458 3461 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3459 3462
3460 3463 current_offset = sdfh.tell()
3461 3464 for rev in range(startrev, endrev + 1):
3462 3465 entry = self.index[rev]
3463 3466 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3464 3467 store=self,
3465 3468 sidedata_helpers=helpers,
3466 3469 sidedata={},
3467 3470 rev=rev,
3468 3471 )
3469 3472
3470 3473 serialized_sidedata = sidedatautil.serialize_sidedata(
3471 3474 new_sidedata
3472 3475 )
3473 3476
3474 3477 sidedata_compression_mode = COMP_MODE_INLINE
3475 3478 if serialized_sidedata and self.hassidedata:
3476 3479 sidedata_compression_mode = COMP_MODE_PLAIN
3477 3480 h, comp_sidedata = self.compress(serialized_sidedata)
3478 3481 if (
3479 3482 h != b'u'
3480 3483 and comp_sidedata[0] != b'\0'
3481 3484 and len(comp_sidedata) < len(serialized_sidedata)
3482 3485 ):
3483 3486 assert not h
3484 3487 if (
3485 3488 comp_sidedata[0]
3486 3489 == self._docket.default_compression_header
3487 3490 ):
3488 3491 sidedata_compression_mode = COMP_MODE_DEFAULT
3489 3492 serialized_sidedata = comp_sidedata
3490 3493 else:
3491 3494 sidedata_compression_mode = COMP_MODE_INLINE
3492 3495 serialized_sidedata = comp_sidedata
3493 3496 if entry[8] != 0 or entry[9] != 0:
3494 3497 # rewriting entries that already have sidedata is not
3495 3498 # supported yet, because it introduces garbage data in the
3496 3499 # revlog.
3497 3500 msg = b"rewriting existing sidedata is not supported yet"
3498 3501 raise error.Abort(msg)
3499 3502
3500 3503 # Apply (potential) flags to add and to remove after running
3501 3504 # the sidedata helpers
3502 3505 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3503 3506 entry_update = (
3504 3507 current_offset,
3505 3508 len(serialized_sidedata),
3506 3509 new_offset_flags,
3507 3510 sidedata_compression_mode,
3508 3511 )
3509 3512
3510 3513 # the sidedata computation might have move the file cursors around
3511 3514 sdfh.seek(current_offset, os.SEEK_SET)
3512 3515 sdfh.write(serialized_sidedata)
3513 3516 new_entries.append(entry_update)
3514 3517 current_offset += len(serialized_sidedata)
3515 3518 self._docket.sidedata_end = sdfh.tell()
3516 3519
3517 3520 # rewrite the new index entries
3518 3521 ifh.seek(startrev * self.index.entry_size)
3519 3522 for i, e in enumerate(new_entries):
3520 3523 rev = startrev + i
3521 3524 self.index.replace_sidedata_info(rev, *e)
3522 3525 packed = self.index.entry_binary(rev)
3523 3526 if rev == 0 and self._docket is None:
3524 3527 header = self._format_flags | self._format_version
3525 3528 header = self.index.pack_header(header)
3526 3529 packed = header + packed
3527 3530 ifh.write(packed)
@@ -1,519 +1,519 b''
1 1 Test correctness of revlog inline -> non-inline transition
2 2 ----------------------------------------------------------
3 3
4 4 We test various file length and naming pattern as this created issue in the
5 5 past.
6 6
7 7 Helper extension to intercept renames and kill process
8 8
9 9 $ cat > $TESTTMP/intercept_before_rename.py << EOF
10 10 > import os
11 11 > import signal
12 12 > from mercurial import extensions, util
13 13 >
14 14 > def extsetup(ui):
15 15 > def rename(orig, src, dest, *args, **kwargs):
16 16 > path = util.normpath(dest)
17 17 > if path.endswith(b'data/file.i'):
18 18 > os.kill(os.getpid(), signal.SIGKILL)
19 19 > return orig(src, dest, *args, **kwargs)
20 20 > extensions.wrapfunction(util, 'rename', rename)
21 21 > EOF
22 22
23 23 $ cat > $TESTTMP/intercept_after_rename.py << EOF
24 24 > import os
25 25 > import signal
26 26 > from mercurial import extensions, util
27 27 >
28 28 > def extsetup(ui):
29 29 > def close(orig, *args, **kwargs):
30 30 > path = util.normpath(args[0]._atomictempfile__name)
31 31 > r = orig(*args, **kwargs)
32 32 > if path.endswith(b'/.hg/store/data/file.i'):
33 33 > os.kill(os.getpid(), signal.SIGKILL)
34 34 > return r
35 35 > extensions.wrapfunction(util.atomictempfile, 'close', close)
36 36 > def extsetup(ui):
37 37 > def rename(orig, src, dest, *args, **kwargs):
38 38 > path = util.normpath(dest)
39 39 > r = orig(src, dest, *args, **kwargs)
40 40 > if path.endswith(b'data/file.i'):
41 41 > os.kill(os.getpid(), signal.SIGKILL)
42 42 > return r
43 43 > extensions.wrapfunction(util, 'rename', rename)
44 44 > EOF
45 45
46 46 $ cat > $TESTTMP/killme.py << EOF
47 47 > import os
48 48 > import signal
49 49 >
50 50 > def killme(ui, repo, hooktype, **kwargs):
51 51 > os.kill(os.getpid(), signal.SIGKILL)
52 52 > EOF
53 53
54 54 $ cat > $TESTTMP/reader_wait_split.py << EOF
55 55 > import os
56 56 > import signal
57 57 > from mercurial import extensions, revlog, testing
58 58 > def _wait_post_load(orig, self, *args, **kwargs):
59 59 > wait = b'data/file' in self.radix
60 60 > if wait:
61 61 > testing.wait_file(b"$TESTTMP/writer-revlog-split")
62 62 > r = orig(self, *args, **kwargs)
63 63 > if wait:
64 64 > testing.write_file(b"$TESTTMP/reader-index-read")
65 65 > testing.wait_file(b"$TESTTMP/writer-revlog-unsplit")
66 66 > return r
67 67 >
68 68 > def extsetup(ui):
69 69 > extensions.wrapfunction(revlog.revlog, '_loadindex', _wait_post_load)
70 70 > EOF
71 71
72 72 setup a repository for tests
73 73 ----------------------------
74 74
75 75 $ cat >> $HGRCPATH << EOF
76 76 > [format]
77 77 > revlog-compression=none
78 78 > EOF
79 79
80 80 $ hg init troffset-computation
81 81 $ cd troffset-computation
82 82 $ files="
83 83 > file
84 84 > Directory_With,Special%Char/Complex_File.babar
85 85 > foo/bar/babar_celeste/foo
86 86 > 1234567890/1234567890/1234567890/1234567890/1234567890/1234567890/1234567890/1234567890/1234567890/1234567890/f
87 87 > some_dir/sub_dir/foo_bar
88 88 > some_dir/sub_dir/foo_bar.i.s/tutu
89 89 > "
90 90 $ for f in $files; do
91 91 > mkdir -p `dirname $f`
92 92 > done
93 93 $ for f in $files; do
94 94 > printf '%20d' '1' > $f
95 95 > done
96 96 $ hg commit -Aqma
97 97 $ for f in $files; do
98 98 > printf '%1024d' '1' > $f
99 99 > done
100 100 $ hg commit -Aqmb
101 101 $ for f in $files; do
102 102 > printf '%20d' '1' > $f
103 103 > done
104 104 $ hg commit -Aqmc
105 105 $ for f in $files; do
106 106 > dd if=/dev/zero of=$f bs=1k count=128 > /dev/null 2>&1
107 107 > done
108 108 $ hg commit -AqmD --traceback
109 109 $ for f in $files; do
110 110 > dd if=/dev/zero of=$f bs=1k count=132 > /dev/null 2>&1
111 111 > done
112 112 $ hg commit -AqmD --traceback
113 113
114 114 Reference size:
115 115 $ f -s file
116 116 file: size=135168
117 117 $ f -s .hg/store/data*/file*
118 118 .hg/store/data/file.d: size=267307
119 119 .hg/store/data/file.i: size=320
120 120
121 121 $ cd ..
122 122
123 123 Test a succesful pull
124 124 =====================
125 125
126 126 Make sure everything goes though as expect if we don't do any crash
127 127
128 128 $ hg clone --quiet --rev 1 troffset-computation troffset-success
129 129 $ cd troffset-success
130 130
131 131 Reference size:
132 132 $ f -s file
133 133 file: size=1024
134 134 $ f -s .hg/store/data/file*
135 135 .hg/store/data/file.i: size=1174
136 136
137 137 $ hg pull ../troffset-computation
138 138 pulling from ../troffset-computation
139 139 searching for changes
140 140 adding changesets
141 141 adding manifests
142 142 adding file changes
143 143 added 3 changesets with 18 changes to 6 files
144 144 new changesets c99a94cae9b1:64874a3b0160
145 145 (run 'hg update' to get a working copy)
146 146
147 147
148 148 The inline revlog has been replaced
149 149
150 150 $ f -s .hg/store/data/file*
151 151 .hg/store/data/file.d: size=267307
152 152 .hg/store/data/file.i: size=320
153 153
154 154
155 155 $ hg verify -q
156 156 $ cd ..
157 157
158 158
159 159 Test a hard crash after the file was split but before the transaction was committed
160 160 ===================================================================================
161 161
162 162 Test offset computation to correctly factor in the index entries themselves.
163 163 Also test that the new data size has the correct size if the transaction is aborted
164 164 after the index has been replaced.
165 165
166 166 Test repo has commits a, b, c, D, where D is large (grows the revlog enough that it
167 167 transitions to non-inline storage). The clone initially has changes a, b
168 168 and will transition to non-inline storage when adding c, D.
169 169
170 170 If the transaction adding c, D is rolled back, then we don't undo the revlog split,
171 171 but truncate the index and the data to remove both c and D.
172 172
173 173
174 174 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-copy
175 175 $ cd troffset-computation-copy
176 176
177 177 Reference size:
178 178 $ f -s file
179 179 file: size=1024
180 180 $ f -s .hg/store/data*/file*
181 181 .hg/store/data/file.i: size=1174
182 182
183 183 $ cat > .hg/hgrc <<EOF
184 184 > [hooks]
185 185 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
186 186 > EOF
187 187 #if chg
188 188 $ hg pull ../troffset-computation
189 189 pulling from ../troffset-computation
190 190 [255]
191 191 #else
192 192 $ hg pull ../troffset-computation
193 193 pulling from ../troffset-computation
194 194 *Killed* (glob)
195 195 [137]
196 196 #endif
197 197
198 198
199 199 The inline revlog still exist, but a split version exist next to it
200 200
201 201 $ cat .hg/store/journal | tr '\0' ' ' | grep '\.s'
202 202 data/some_dir/sub_dir/foo_bar.i.s/tutu.i 1174
203 203 data/some_dir/sub_dir/foo_bar.i.s/tutu.d 0
204 204 $ f -s .hg/store/data*/file*
205 .hg/store/data-s/file: size=320
205 .hg/store/data-s/file.i: size=320
206 206 .hg/store/data/file.d: size=267307
207 207 .hg/store/data/file.i: size=132395
208 208 $ f -s .hg/store/data*/foo*/bar*/babar__celeste*/foo*
209 .hg/store/data/foo/bar/babar__celeste-s/foo: size=320
209 .hg/store/data-s/foo/bar/babar__celeste/foo.i: size=320
210 210 .hg/store/data/foo/bar/babar__celeste/foo.d: size=267307
211 211 .hg/store/data/foo/bar/babar__celeste/foo.i: size=132395
212 212
213 213
214 214 The first file.i entry should match the "Reference size" above.
215 215 The first file.d entry is the temporary record during the split,
216 216
217 217 A "temporary file" entry exist for the split index.
218 218
219 219 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
220 220 data/file.i 1174
221 221 data/file.d 0
222 222 $ cat .hg/store/journal.backupfiles | tr -s '\000' ' ' | tr -s '\00' ' '| grep 'data.*/file'
223 223 data/file.i data/journal.backup.file.i.bck 0
224 data-s/file 0
224 data-s/file.i 0
225 225
226 226 recover is rolling the split back, the fncache is still valid
227 227
228 228 $ hg recover
229 229 rolling back interrupted transaction
230 230 (verify step skipped, run `hg verify` to check your repository content)
231 231 $ f -s .hg/store/data*/file*
232 232 .hg/store/data/file.i: size=1174
233 233 $ hg tip
234 234 changeset: 1:64b04c8dc267
235 235 tag: tip
236 236 user: test
237 237 date: Thu Jan 01 00:00:00 1970 +0000
238 238 summary: b
239 239
240 240 $ hg verify -q
241 241 $ hg debugrebuildfncache --only-data
242 242 fncache already up to date
243 243 $ hg verify -q
244 244 $ cd ..
245 245
246 246 Test a hard crash right before the index is move into place
247 247 ===========================================================
248 248
249 249 Now retry the procedure but intercept the rename of the index and check that
250 250 the journal does not contain the new index size. This demonstrates the edge case
251 251 where the data file is left as garbage.
252 252
253 253 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-copy2
254 254 $ cd troffset-computation-copy2
255 255
256 256 Reference size:
257 257 $ f -s file
258 258 file: size=1024
259 259 $ f -s .hg/store/data*/file*
260 260 .hg/store/data/file.i: size=1174
261 261
262 262 $ cat > .hg/hgrc <<EOF
263 263 > [extensions]
264 264 > intercept_rename = $TESTTMP/intercept_before_rename.py
265 265 > EOF
266 266 #if chg
267 267 $ hg pull ../troffset-computation
268 268 pulling from ../troffset-computation
269 269 searching for changes
270 270 adding changesets
271 271 adding manifests
272 272 adding file changes
273 273 [255]
274 274 #else
275 275 $ hg pull ../troffset-computation
276 276 pulling from ../troffset-computation
277 277 searching for changes
278 278 adding changesets
279 279 adding manifests
280 280 adding file changes
281 281 *Killed* (glob)
282 282 [137]
283 283 #endif
284 284
285 285 The inline revlog still exist, but a split version exist next to it
286 286
287 287 $ f -s .hg/store/data*/file*
288 .hg/store/data-s/file: size=320
288 .hg/store/data-s/file.i: size=320
289 289 .hg/store/data/file.d: size=267307
290 290 .hg/store/data/file.i: size=132395
291 291
292 292 $ cat .hg/store/journal | tr -s '\000' ' ' | grep 'data.*/file'
293 293 data/file.i 1174
294 294 data/file.d 0
295 295
296 296 recover is rolling the split back, the fncache is still valid
297 297
298 298 $ hg recover
299 299 rolling back interrupted transaction
300 300 (verify step skipped, run `hg verify` to check your repository content)
301 301 $ f -s .hg/store/data*/file*
302 302 .hg/store/data/file.i: size=1174
303 303 $ hg tip
304 304 changeset: 1:64b04c8dc267
305 305 tag: tip
306 306 user: test
307 307 date: Thu Jan 01 00:00:00 1970 +0000
308 308 summary: b
309 309
310 310 $ hg verify -q
311 311 $ cd ..
312 312
313 313 Test a hard crash right after the index is move into place
314 314 ===========================================================
315 315
316 316 Now retry the procedure but intercept the rename of the index.
317 317
318 318 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-crash-after-rename
319 319 $ cd troffset-computation-crash-after-rename
320 320
321 321 Reference size:
322 322 $ f -s file
323 323 file: size=1024
324 324 $ f -s .hg/store/data*/file*
325 325 .hg/store/data/file.i: size=1174
326 326
327 327 $ cat > .hg/hgrc <<EOF
328 328 > [extensions]
329 329 > intercept_rename = $TESTTMP/intercept_after_rename.py
330 330 > EOF
331 331 #if chg
332 332 $ hg pull ../troffset-computation
333 333 pulling from ../troffset-computation
334 334 searching for changes
335 335 adding changesets
336 336 adding manifests
337 337 adding file changes
338 338 [255]
339 339 #else
340 340 $ hg pull ../troffset-computation
341 341 pulling from ../troffset-computation
342 342 searching for changes
343 343 adding changesets
344 344 adding manifests
345 345 adding file changes
346 346 *Killed* (glob)
347 347 [137]
348 348 #endif
349 349
350 350 The inline revlog was over written on disk
351 351
352 352 $ f -s .hg/store/data*/file*
353 353 .hg/store/data/file.d: size=267307
354 354 .hg/store/data/file.i: size=320
355 355
356 356 $ cat .hg/store/journal | tr -s '\000' ' ' | grep 'data.*/file'
357 357 data/file.i 1174
358 358 data/file.d 0
359 359
360 360 recover is rolling the split back, the fncache is still valid
361 361
362 362 $ hg recover
363 363 rolling back interrupted transaction
364 364 (verify step skipped, run `hg verify` to check your repository content)
365 365 $ f -s .hg/store/data*/file*
366 366 .hg/store/data/file.i: size=1174
367 367 $ hg tip
368 368 changeset: 1:64b04c8dc267
369 369 tag: tip
370 370 user: test
371 371 date: Thu Jan 01 00:00:00 1970 +0000
372 372 summary: b
373 373
374 374 $ hg verify -q
375 375 $ cd ..
376 376
377 377 Have the transaction rollback itself without any hard crash
378 378 ===========================================================
379 379
380 380
381 381 Repeat the original test but let hg rollback the transaction.
382 382
383 383 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-copy-rb
384 384 $ cd troffset-computation-copy-rb
385 385 $ cat > .hg/hgrc <<EOF
386 386 > [hooks]
387 387 > pretxnchangegroup = false
388 388 > EOF
389 389 $ hg pull ../troffset-computation
390 390 pulling from ../troffset-computation
391 391 searching for changes
392 392 adding changesets
393 393 adding manifests
394 394 adding file changes
395 395 transaction abort!
396 396 rollback completed
397 397 abort: pretxnchangegroup hook exited with status 1
398 398 [40]
399 399
400 400 The split was rollback
401 401
402 402 $ f -s .hg/store/data*/file*
403 403 .hg/store/data/file.d: size=0
404 404 .hg/store/data/file.i: size=1174
405 405
406 406 $ hg tip
407 407 changeset: 1:64b04c8dc267
408 408 tag: tip
409 409 user: test
410 410 date: Thu Jan 01 00:00:00 1970 +0000
411 411 summary: b
412 412
413 413 $ hg verify -q
414 414
415 415 $ cat > .hg/hgrc <<EOF
416 416 > [hooks]
417 417 > EOF
418 418 $ hg pull ../troffset-computation
419 419 pulling from ../troffset-computation
420 420 searching for changes
421 421 adding changesets
422 422 adding manifests
423 423 adding file changes
424 424 added 3 changesets with 18 changes to 6 files
425 425 new changesets c99a94cae9b1:64874a3b0160
426 426 (run 'hg update' to get a working copy)
427 427
428 428 $ f -s .hg/store/data*/file*
429 429 .hg/store/data/file.d: size=267307
430 430 .hg/store/data/file.i: size=320
431 431 $ hg verify -q
432 432
433 433 $ cd ..
434 434
435 435 Read race
436 436 =========
437 437
438 438 We check that a client that started reading a revlog (its index) after the
439 439 split and end reading (the data) after the rollback should be fine
440 440
441 441 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-race
442 442 $ cd troffset-computation-race
443 443 $ cat > .hg/hgrc <<EOF
444 444 > [hooks]
445 445 > pretxnchangegroup=$RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/reader-index-read $TESTTMP/writer-revlog-split
446 446 > pretxnclose = false
447 447 > EOF
448 448
449 449 start a reader
450 450
451 451 $ hg cat --rev 0 file \
452 452 > --config "extensions.wait_read=$TESTTMP/reader_wait_split.py" \
453 453 > 2> $TESTTMP/reader.stderr \
454 454 > > $TESTTMP/reader.stdout &
455 455
456 456 Do a failed pull in //
457 457
458 458 $ hg pull ../troffset-computation
459 459 pulling from ../troffset-computation
460 460 searching for changes
461 461 adding changesets
462 462 adding manifests
463 463 adding file changes
464 464 transaction abort!
465 465 rollback completed
466 466 abort: pretxnclose hook exited with status 1
467 467 [40]
468 468 $ touch $TESTTMP/writer-revlog-unsplit
469 469 $ wait
470 470
471 471 The reader should be fine
472 472 $ cat $TESTTMP/reader.stderr
473 473 $ cat $TESTTMP/reader.stdout
474 474 1 (no-eol)
475 475
476 476 $ hg verify -q
477 477
478 478 $ cd ..
479 479
480 480 pending hooks
481 481 =============
482 482
483 483 We checks that hooks properly see the inside of the transaction, while other process don't.
484 484
485 485 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-hooks
486 486 $ cd troffset-computation-hooks
487 487 $ cat > .hg/hgrc <<EOF
488 488 > [hooks]
489 489 > pretxnclose.01-echo = hg cat -r 'max(all())' file | f --size
490 490 > pretxnclose.02-echo = $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/hook-done $TESTTMP/hook-tr-ready
491 491 > pretxnclose.03-abort = false
492 492 > EOF
493 493
494 494 $ (
495 495 > $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/hook-tr-ready;\
496 496 > hg cat -r 'max(all())' file | f --size;\
497 497 > touch $TESTTMP/hook-done
498 498 > ) >stdout 2>stderr &
499 499
500 500 $ hg pull ../troffset-computation
501 501 pulling from ../troffset-computation
502 502 searching for changes
503 503 adding changesets
504 504 adding manifests
505 505 adding file changes
506 506 size=135168
507 507 transaction abort!
508 508 rollback completed
509 509 abort: pretxnclose.03-abort hook exited with status 1
510 510 [40]
511 511
512 512 $ cat stdout
513 513 size=1024
514 514 $ cat stderr
515 515
516 516 $ hg verify -q
517 517
518 518
519 519 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now