##// END OF EJS Templates
censor: accept censored revision during upgrade...
marmoute -
r52006:2dec2365 stable
parent child Browse files
Show More
@@ -1,3533 +1,3537 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 @staticmethod
294 294 def is_inline_index(header_bytes):
295 295 if len(header_bytes) == 0:
296 296 return True
297 297
298 298 header = INDEX_HEADER.unpack(header_bytes)[0]
299 299
300 300 _format_flags = header & ~0xFFFF
301 301 _format_version = header & 0xFFFF
302 302
303 303 features = FEATURES_BY_VERSION[_format_version]
304 304 return features[b'inline'](_format_flags)
305 305
306 306 def __init__(
307 307 self,
308 308 opener,
309 309 target,
310 310 radix,
311 311 postfix=None, # only exist for `tmpcensored` now
312 312 checkambig=False,
313 313 mmaplargeindex=False,
314 314 censorable=False,
315 315 upperboundcomp=None,
316 316 persistentnodemap=False,
317 317 concurrencychecker=None,
318 318 trypending=False,
319 319 try_split=False,
320 320 canonical_parent_order=True,
321 321 ):
322 322 """
323 323 create a revlog object
324 324
325 325 opener is a function that abstracts the file opening operation
326 326 and can be used to implement COW semantics or the like.
327 327
328 328 `target`: a (KIND, ID) tuple that identify the content stored in
329 329 this revlog. It help the rest of the code to understand what the revlog
330 330 is about without having to resort to heuristic and index filename
331 331 analysis. Note: that this must be reliably be set by normal code, but
332 332 that test, debug, or performance measurement code might not set this to
333 333 accurate value.
334 334 """
335 335 self.upperboundcomp = upperboundcomp
336 336
337 337 self.radix = radix
338 338
339 339 self._docket_file = None
340 340 self._indexfile = None
341 341 self._datafile = None
342 342 self._sidedatafile = None
343 343 self._nodemap_file = None
344 344 self.postfix = postfix
345 345 self._trypending = trypending
346 346 self._try_split = try_split
347 347 self.opener = opener
348 348 if persistentnodemap:
349 349 self._nodemap_file = nodemaputil.get_nodemap_file(self)
350 350
351 351 assert target[0] in ALL_KINDS
352 352 assert len(target) == 2
353 353 self.target = target
354 354 # When True, indexfile is opened with checkambig=True at writing, to
355 355 # avoid file stat ambiguity.
356 356 self._checkambig = checkambig
357 357 self._mmaplargeindex = mmaplargeindex
358 358 self._censorable = censorable
359 359 # 3-tuple of (node, rev, text) for a raw revision.
360 360 self._revisioncache = None
361 361 # Maps rev to chain base rev.
362 362 self._chainbasecache = util.lrucachedict(100)
363 363 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
364 364 self._chunkcache = (0, b'')
365 365 # How much data to read and cache into the raw revlog data cache.
366 366 self._chunkcachesize = 65536
367 367 self._maxchainlen = None
368 368 self._deltabothparents = True
369 369 self._candidate_group_chunk_size = 0
370 370 self._debug_delta = False
371 371 self.index = None
372 372 self._docket = None
373 373 self._nodemap_docket = None
374 374 # Mapping of partial identifiers to full nodes.
375 375 self._pcache = {}
376 376 # Mapping of revision integer to full node.
377 377 self._compengine = b'zlib'
378 378 self._compengineopts = {}
379 379 self._maxdeltachainspan = -1
380 380 self._withsparseread = False
381 381 self._sparserevlog = False
382 382 self.hassidedata = False
383 383 self._srdensitythreshold = 0.50
384 384 self._srmingapsize = 262144
385 385
386 386 # other optionnals features
387 387
388 388 # might remove rank configuration once the computation has no impact
389 389 self._compute_rank = False
390 390
391 391 # Make copy of flag processors so each revlog instance can support
392 392 # custom flags.
393 393 self._flagprocessors = dict(flagutil.flagprocessors)
394 394
395 395 # 3-tuple of file handles being used for active writing.
396 396 self._writinghandles = None
397 397 # prevent nesting of addgroup
398 398 self._adding_group = None
399 399
400 400 self._loadindex()
401 401
402 402 self._concurrencychecker = concurrencychecker
403 403
404 404 # parent order is supposed to be semantically irrelevant, so we
405 405 # normally resort parents to ensure that the first parent is non-null,
406 406 # if there is a non-null parent at all.
407 407 # filelog abuses the parent order as flag to mark some instances of
408 408 # meta-encoded files, so allow it to disable this behavior.
409 409 self.canonical_parent_order = canonical_parent_order
410 410
411 411 def _init_opts(self):
412 412 """process options (from above/config) to setup associated default revlog mode
413 413
414 414 These values might be affected when actually reading on disk information.
415 415
416 416 The relevant values are returned for use in _loadindex().
417 417
418 418 * newversionflags:
419 419 version header to use if we need to create a new revlog
420 420
421 421 * mmapindexthreshold:
422 422 minimal index size for start to use mmap
423 423
424 424 * force_nodemap:
425 425 force the usage of a "development" version of the nodemap code
426 426 """
427 427 mmapindexthreshold = None
428 428 opts = self.opener.options
429 429
430 430 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
431 431 new_header = CHANGELOGV2
432 432 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
433 433 elif b'revlogv2' in opts:
434 434 new_header = REVLOGV2
435 435 elif b'revlogv1' in opts:
436 436 new_header = REVLOGV1 | FLAG_INLINE_DATA
437 437 if b'generaldelta' in opts:
438 438 new_header |= FLAG_GENERALDELTA
439 439 elif b'revlogv0' in self.opener.options:
440 440 new_header = REVLOGV0
441 441 else:
442 442 new_header = REVLOG_DEFAULT_VERSION
443 443
444 444 if b'chunkcachesize' in opts:
445 445 self._chunkcachesize = opts[b'chunkcachesize']
446 446 if b'maxchainlen' in opts:
447 447 self._maxchainlen = opts[b'maxchainlen']
448 448 if b'deltabothparents' in opts:
449 449 self._deltabothparents = opts[b'deltabothparents']
450 450 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
451 451 if dps_cgds:
452 452 self._candidate_group_chunk_size = dps_cgds
453 453 self._lazydelta = bool(opts.get(b'lazydelta', True))
454 454 self._lazydeltabase = False
455 455 if self._lazydelta:
456 456 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
457 457 if b'debug-delta' in opts:
458 458 self._debug_delta = opts[b'debug-delta']
459 459 if b'compengine' in opts:
460 460 self._compengine = opts[b'compengine']
461 461 if b'zlib.level' in opts:
462 462 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
463 463 if b'zstd.level' in opts:
464 464 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
465 465 if b'maxdeltachainspan' in opts:
466 466 self._maxdeltachainspan = opts[b'maxdeltachainspan']
467 467 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
468 468 mmapindexthreshold = opts[b'mmapindexthreshold']
469 469 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
470 470 withsparseread = bool(opts.get(b'with-sparse-read', False))
471 471 # sparse-revlog forces sparse-read
472 472 self._withsparseread = self._sparserevlog or withsparseread
473 473 if b'sparse-read-density-threshold' in opts:
474 474 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
475 475 if b'sparse-read-min-gap-size' in opts:
476 476 self._srmingapsize = opts[b'sparse-read-min-gap-size']
477 477 if opts.get(b'enableellipsis'):
478 478 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
479 479
480 480 # revlog v0 doesn't have flag processors
481 481 for flag, processor in opts.get(b'flagprocessors', {}).items():
482 482 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
483 483
484 484 if self._chunkcachesize <= 0:
485 485 raise error.RevlogError(
486 486 _(b'revlog chunk cache size %r is not greater than 0')
487 487 % self._chunkcachesize
488 488 )
489 489 elif self._chunkcachesize & (self._chunkcachesize - 1):
490 490 raise error.RevlogError(
491 491 _(b'revlog chunk cache size %r is not a power of 2')
492 492 % self._chunkcachesize
493 493 )
494 494 force_nodemap = opts.get(b'devel-force-nodemap', False)
495 495 return new_header, mmapindexthreshold, force_nodemap
496 496
497 497 def _get_data(self, filepath, mmap_threshold, size=None):
498 498 """return a file content with or without mmap
499 499
500 500 If the file is missing return the empty string"""
501 501 try:
502 502 with self.opener(filepath) as fp:
503 503 if mmap_threshold is not None:
504 504 file_size = self.opener.fstat(fp).st_size
505 505 if file_size >= mmap_threshold:
506 506 if size is not None:
507 507 # avoid potentiel mmap crash
508 508 size = min(file_size, size)
509 509 # TODO: should .close() to release resources without
510 510 # relying on Python GC
511 511 if size is None:
512 512 return util.buffer(util.mmapread(fp))
513 513 else:
514 514 return util.buffer(util.mmapread(fp, size))
515 515 if size is None:
516 516 return fp.read()
517 517 else:
518 518 return fp.read(size)
519 519 except FileNotFoundError:
520 520 return b''
521 521
522 522 def get_streams(self, max_linkrev, force_inline=False):
523 523 n = len(self)
524 524 index = self.index
525 525 while n > 0:
526 526 linkrev = index[n - 1][4]
527 527 if linkrev < max_linkrev:
528 528 break
529 529 # note: this loop will rarely go through multiple iterations, since
530 530 # it only traverses commits created during the current streaming
531 531 # pull operation.
532 532 #
533 533 # If this become a problem, using a binary search should cap the
534 534 # runtime of this.
535 535 n = n - 1
536 536 if n == 0:
537 537 # no data to send
538 538 return []
539 539 index_size = n * index.entry_size
540 540 data_size = self.end(n - 1)
541 541
542 542 # XXX we might have been split (or stripped) since the object
543 543 # initialization, We need to close this race too, but having a way to
544 544 # pre-open the file we feed to the revlog and never closing them before
545 545 # we are done streaming.
546 546
547 547 if self._inline:
548 548
549 549 def get_stream():
550 550 with self._indexfp() as fp:
551 551 yield None
552 552 size = index_size + data_size
553 553 if size <= 65536:
554 554 yield fp.read(size)
555 555 else:
556 556 yield from util.filechunkiter(fp, limit=size)
557 557
558 558 inline_stream = get_stream()
559 559 next(inline_stream)
560 560 return [
561 561 (self._indexfile, inline_stream, index_size + data_size),
562 562 ]
563 563 elif force_inline:
564 564
565 565 def get_stream():
566 566 with self._datafp() as fp_d:
567 567 yield None
568 568
569 569 for rev in range(n):
570 570 idx = self.index.entry_binary(rev)
571 571 if rev == 0 and self._docket is None:
572 572 # re-inject the inline flag
573 573 header = self._format_flags
574 574 header |= self._format_version
575 575 header |= FLAG_INLINE_DATA
576 576 header = self.index.pack_header(header)
577 577 idx = header + idx
578 578 yield idx
579 579 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
580 580
581 581 inline_stream = get_stream()
582 582 next(inline_stream)
583 583 return [
584 584 (self._indexfile, inline_stream, index_size + data_size),
585 585 ]
586 586 else:
587 587
588 588 def get_index_stream():
589 589 with self._indexfp() as fp:
590 590 yield None
591 591 if index_size <= 65536:
592 592 yield fp.read(index_size)
593 593 else:
594 594 yield from util.filechunkiter(fp, limit=index_size)
595 595
596 596 def get_data_stream():
597 597 with self._datafp() as fp:
598 598 yield None
599 599 if data_size <= 65536:
600 600 yield fp.read(data_size)
601 601 else:
602 602 yield from util.filechunkiter(fp, limit=data_size)
603 603
604 604 index_stream = get_index_stream()
605 605 next(index_stream)
606 606 data_stream = get_data_stream()
607 607 next(data_stream)
608 608 return [
609 609 (self._datafile, data_stream, data_size),
610 610 (self._indexfile, index_stream, index_size),
611 611 ]
612 612
613 613 def _loadindex(self, docket=None):
614 614
615 615 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
616 616
617 617 if self.postfix is not None:
618 618 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
619 619 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
620 620 entry_point = b'%s.i.a' % self.radix
621 621 elif self._try_split and self.opener.exists(self._split_index_file):
622 622 entry_point = self._split_index_file
623 623 else:
624 624 entry_point = b'%s.i' % self.radix
625 625
626 626 if docket is not None:
627 627 self._docket = docket
628 628 self._docket_file = entry_point
629 629 else:
630 630 self._initempty = True
631 631 entry_data = self._get_data(entry_point, mmapindexthreshold)
632 632 if len(entry_data) > 0:
633 633 header = INDEX_HEADER.unpack(entry_data[:4])[0]
634 634 self._initempty = False
635 635 else:
636 636 header = new_header
637 637
638 638 self._format_flags = header & ~0xFFFF
639 639 self._format_version = header & 0xFFFF
640 640
641 641 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
642 642 if supported_flags is None:
643 643 msg = _(b'unknown version (%d) in revlog %s')
644 644 msg %= (self._format_version, self.display_id)
645 645 raise error.RevlogError(msg)
646 646 elif self._format_flags & ~supported_flags:
647 647 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
648 648 display_flag = self._format_flags >> 16
649 649 msg %= (display_flag, self._format_version, self.display_id)
650 650 raise error.RevlogError(msg)
651 651
652 652 features = FEATURES_BY_VERSION[self._format_version]
653 653 self._inline = features[b'inline'](self._format_flags)
654 654 self._generaldelta = features[b'generaldelta'](self._format_flags)
655 655 self.hassidedata = features[b'sidedata']
656 656
657 657 if not features[b'docket']:
658 658 self._indexfile = entry_point
659 659 index_data = entry_data
660 660 else:
661 661 self._docket_file = entry_point
662 662 if self._initempty:
663 663 self._docket = docketutil.default_docket(self, header)
664 664 else:
665 665 self._docket = docketutil.parse_docket(
666 666 self, entry_data, use_pending=self._trypending
667 667 )
668 668
669 669 if self._docket is not None:
670 670 self._indexfile = self._docket.index_filepath()
671 671 index_data = b''
672 672 index_size = self._docket.index_end
673 673 if index_size > 0:
674 674 index_data = self._get_data(
675 675 self._indexfile, mmapindexthreshold, size=index_size
676 676 )
677 677 if len(index_data) < index_size:
678 678 msg = _(b'too few index data for %s: got %d, expected %d')
679 679 msg %= (self.display_id, len(index_data), index_size)
680 680 raise error.RevlogError(msg)
681 681
682 682 self._inline = False
683 683 # generaldelta implied by version 2 revlogs.
684 684 self._generaldelta = True
685 685 # the logic for persistent nodemap will be dealt with within the
686 686 # main docket, so disable it for now.
687 687 self._nodemap_file = None
688 688
689 689 if self._docket is not None:
690 690 self._datafile = self._docket.data_filepath()
691 691 self._sidedatafile = self._docket.sidedata_filepath()
692 692 elif self.postfix is None:
693 693 self._datafile = b'%s.d' % self.radix
694 694 else:
695 695 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
696 696
697 697 self.nodeconstants = sha1nodeconstants
698 698 self.nullid = self.nodeconstants.nullid
699 699
700 700 # sparse-revlog can't be on without general-delta (issue6056)
701 701 if not self._generaldelta:
702 702 self._sparserevlog = False
703 703
704 704 self._storedeltachains = True
705 705
706 706 devel_nodemap = (
707 707 self._nodemap_file
708 708 and force_nodemap
709 709 and parse_index_v1_nodemap is not None
710 710 )
711 711
712 712 use_rust_index = False
713 713 if rustrevlog is not None:
714 714 if self._nodemap_file is not None:
715 715 use_rust_index = True
716 716 else:
717 717 use_rust_index = self.opener.options.get(b'rust.index')
718 718
719 719 self._parse_index = parse_index_v1
720 720 if self._format_version == REVLOGV0:
721 721 self._parse_index = revlogv0.parse_index_v0
722 722 elif self._format_version == REVLOGV2:
723 723 self._parse_index = parse_index_v2
724 724 elif self._format_version == CHANGELOGV2:
725 725 self._parse_index = parse_index_cl_v2
726 726 elif devel_nodemap:
727 727 self._parse_index = parse_index_v1_nodemap
728 728 elif use_rust_index:
729 729 self._parse_index = parse_index_v1_mixed
730 730 try:
731 731 d = self._parse_index(index_data, self._inline)
732 732 index, chunkcache = d
733 733 use_nodemap = (
734 734 not self._inline
735 735 and self._nodemap_file is not None
736 736 and util.safehasattr(index, 'update_nodemap_data')
737 737 )
738 738 if use_nodemap:
739 739 nodemap_data = nodemaputil.persisted_data(self)
740 740 if nodemap_data is not None:
741 741 docket = nodemap_data[0]
742 742 if (
743 743 len(d[0]) > docket.tip_rev
744 744 and d[0][docket.tip_rev][7] == docket.tip_node
745 745 ):
746 746 # no changelog tampering
747 747 self._nodemap_docket = docket
748 748 index.update_nodemap_data(*nodemap_data)
749 749 except (ValueError, IndexError):
750 750 raise error.RevlogError(
751 751 _(b"index %s is corrupted") % self.display_id
752 752 )
753 753 self.index = index
754 754 self._segmentfile = randomaccessfile.randomaccessfile(
755 755 self.opener,
756 756 (self._indexfile if self._inline else self._datafile),
757 757 self._chunkcachesize,
758 758 chunkcache,
759 759 )
760 760 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
761 761 self.opener,
762 762 self._sidedatafile,
763 763 self._chunkcachesize,
764 764 )
765 765 # revnum -> (chain-length, sum-delta-length)
766 766 self._chaininfocache = util.lrucachedict(500)
767 767 # revlog header -> revlog compressor
768 768 self._decompressors = {}
769 769
770 770 def get_revlog(self):
771 771 """simple function to mirror API of other not-really-revlog API"""
772 772 return self
773 773
774 774 @util.propertycache
775 775 def revlog_kind(self):
776 776 return self.target[0]
777 777
778 778 @util.propertycache
779 779 def display_id(self):
780 780 """The public facing "ID" of the revlog that we use in message"""
781 781 if self.revlog_kind == KIND_FILELOG:
782 782 # Reference the file without the "data/" prefix, so it is familiar
783 783 # to the user.
784 784 return self.target[1]
785 785 else:
786 786 return self.radix
787 787
788 788 def _get_decompressor(self, t):
789 789 try:
790 790 compressor = self._decompressors[t]
791 791 except KeyError:
792 792 try:
793 793 engine = util.compengines.forrevlogheader(t)
794 794 compressor = engine.revlogcompressor(self._compengineopts)
795 795 self._decompressors[t] = compressor
796 796 except KeyError:
797 797 raise error.RevlogError(
798 798 _(b'unknown compression type %s') % binascii.hexlify(t)
799 799 )
800 800 return compressor
801 801
802 802 @util.propertycache
803 803 def _compressor(self):
804 804 engine = util.compengines[self._compengine]
805 805 return engine.revlogcompressor(self._compengineopts)
806 806
807 807 @util.propertycache
808 808 def _decompressor(self):
809 809 """the default decompressor"""
810 810 if self._docket is None:
811 811 return None
812 812 t = self._docket.default_compression_header
813 813 c = self._get_decompressor(t)
814 814 return c.decompress
815 815
816 816 def _indexfp(self):
817 817 """file object for the revlog's index file"""
818 818 return self.opener(self._indexfile, mode=b"r")
819 819
820 820 def __index_write_fp(self):
821 821 # You should not use this directly and use `_writing` instead
822 822 try:
823 823 f = self.opener(
824 824 self._indexfile, mode=b"r+", checkambig=self._checkambig
825 825 )
826 826 if self._docket is None:
827 827 f.seek(0, os.SEEK_END)
828 828 else:
829 829 f.seek(self._docket.index_end, os.SEEK_SET)
830 830 return f
831 831 except FileNotFoundError:
832 832 return self.opener(
833 833 self._indexfile, mode=b"w+", checkambig=self._checkambig
834 834 )
835 835
836 836 def __index_new_fp(self):
837 837 # You should not use this unless you are upgrading from inline revlog
838 838 return self.opener(
839 839 self._indexfile,
840 840 mode=b"w",
841 841 checkambig=self._checkambig,
842 842 atomictemp=True,
843 843 )
844 844
845 845 def _datafp(self, mode=b'r'):
846 846 """file object for the revlog's data file"""
847 847 return self.opener(self._datafile, mode=mode)
848 848
849 849 @contextlib.contextmanager
850 850 def _sidedatareadfp(self):
851 851 """file object suitable to read sidedata"""
852 852 if self._writinghandles:
853 853 yield self._writinghandles[2]
854 854 else:
855 855 with self.opener(self._sidedatafile) as fp:
856 856 yield fp
857 857
858 858 def tiprev(self):
859 859 return len(self.index) - 1
860 860
861 861 def tip(self):
862 862 return self.node(self.tiprev())
863 863
864 864 def __contains__(self, rev):
865 865 return 0 <= rev < len(self)
866 866
867 867 def __len__(self):
868 868 return len(self.index)
869 869
870 870 def __iter__(self):
871 871 return iter(range(len(self)))
872 872
873 873 def revs(self, start=0, stop=None):
874 874 """iterate over all rev in this revlog (from start to stop)"""
875 875 return storageutil.iterrevs(len(self), start=start, stop=stop)
876 876
877 877 def hasnode(self, node):
878 878 try:
879 879 self.rev(node)
880 880 return True
881 881 except KeyError:
882 882 return False
883 883
884 884 def candelta(self, baserev, rev):
885 885 """whether two revisions (baserev, rev) can be delta-ed or not"""
886 886 # Disable delta if either rev requires a content-changing flag
887 887 # processor (ex. LFS). This is because such flag processor can alter
888 888 # the rawtext content that the delta will be based on, and two clients
889 889 # could have a same revlog node with different flags (i.e. different
890 890 # rawtext contents) and the delta could be incompatible.
891 891 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
892 892 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
893 893 ):
894 894 return False
895 895 return True
896 896
897 897 def update_caches(self, transaction):
898 898 if self._nodemap_file is not None:
899 899 if transaction is None:
900 900 nodemaputil.update_persistent_nodemap(self)
901 901 else:
902 902 nodemaputil.setup_persistent_nodemap(transaction, self)
903 903
904 904 def clearcaches(self):
905 905 self._revisioncache = None
906 906 self._chainbasecache.clear()
907 907 self._segmentfile.clear_cache()
908 908 self._segmentfile_sidedata.clear_cache()
909 909 self._pcache = {}
910 910 self._nodemap_docket = None
911 911 self.index.clearcaches()
912 912 # The python code is the one responsible for validating the docket, we
913 913 # end up having to refresh it here.
914 914 use_nodemap = (
915 915 not self._inline
916 916 and self._nodemap_file is not None
917 917 and util.safehasattr(self.index, 'update_nodemap_data')
918 918 )
919 919 if use_nodemap:
920 920 nodemap_data = nodemaputil.persisted_data(self)
921 921 if nodemap_data is not None:
922 922 self._nodemap_docket = nodemap_data[0]
923 923 self.index.update_nodemap_data(*nodemap_data)
924 924
925 925 def rev(self, node):
926 926 try:
927 927 return self.index.rev(node)
928 928 except TypeError:
929 929 raise
930 930 except error.RevlogError:
931 931 # parsers.c radix tree lookup failed
932 932 if (
933 933 node == self.nodeconstants.wdirid
934 934 or node in self.nodeconstants.wdirfilenodeids
935 935 ):
936 936 raise error.WdirUnsupported
937 937 raise error.LookupError(node, self.display_id, _(b'no node'))
938 938
939 939 # Accessors for index entries.
940 940
941 941 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
942 942 # are flags.
943 943 def start(self, rev):
944 944 return int(self.index[rev][0] >> 16)
945 945
946 946 def sidedata_cut_off(self, rev):
947 947 sd_cut_off = self.index[rev][8]
948 948 if sd_cut_off != 0:
949 949 return sd_cut_off
950 950 # This is some annoying dance, because entries without sidedata
951 951 # currently use 0 as their ofsset. (instead of previous-offset +
952 952 # previous-size)
953 953 #
954 954 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
955 955 # In the meantime, we need this.
956 956 while 0 <= rev:
957 957 e = self.index[rev]
958 958 if e[9] != 0:
959 959 return e[8] + e[9]
960 960 rev -= 1
961 961 return 0
962 962
963 963 def flags(self, rev):
964 964 return self.index[rev][0] & 0xFFFF
965 965
966 966 def length(self, rev):
967 967 return self.index[rev][1]
968 968
969 969 def sidedata_length(self, rev):
970 970 if not self.hassidedata:
971 971 return 0
972 972 return self.index[rev][9]
973 973
974 974 def rawsize(self, rev):
975 975 """return the length of the uncompressed text for a given revision"""
976 976 l = self.index[rev][2]
977 977 if l >= 0:
978 978 return l
979 979
980 980 t = self.rawdata(rev)
981 981 return len(t)
982 982
983 983 def size(self, rev):
984 984 """length of non-raw text (processed by a "read" flag processor)"""
985 985 # fast path: if no "read" flag processor could change the content,
986 986 # size is rawsize. note: ELLIPSIS is known to not change the content.
987 987 flags = self.flags(rev)
988 988 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
989 989 return self.rawsize(rev)
990 990
991 991 return len(self.revision(rev))
992 992
993 993 def fast_rank(self, rev):
994 994 """Return the rank of a revision if already known, or None otherwise.
995 995
996 996 The rank of a revision is the size of the sub-graph it defines as a
997 997 head. Equivalently, the rank of a revision `r` is the size of the set
998 998 `ancestors(r)`, `r` included.
999 999
1000 1000 This method returns the rank retrieved from the revlog in constant
1001 1001 time. It makes no attempt at computing unknown values for versions of
1002 1002 the revlog which do not persist the rank.
1003 1003 """
1004 1004 rank = self.index[rev][ENTRY_RANK]
1005 1005 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1006 1006 return None
1007 1007 if rev == nullrev:
1008 1008 return 0 # convention
1009 1009 return rank
1010 1010
1011 1011 def chainbase(self, rev):
1012 1012 base = self._chainbasecache.get(rev)
1013 1013 if base is not None:
1014 1014 return base
1015 1015
1016 1016 index = self.index
1017 1017 iterrev = rev
1018 1018 base = index[iterrev][3]
1019 1019 while base != iterrev:
1020 1020 iterrev = base
1021 1021 base = index[iterrev][3]
1022 1022
1023 1023 self._chainbasecache[rev] = base
1024 1024 return base
1025 1025
1026 1026 def linkrev(self, rev):
1027 1027 return self.index[rev][4]
1028 1028
1029 1029 def parentrevs(self, rev):
1030 1030 try:
1031 1031 entry = self.index[rev]
1032 1032 except IndexError:
1033 1033 if rev == wdirrev:
1034 1034 raise error.WdirUnsupported
1035 1035 raise
1036 1036
1037 1037 if self.canonical_parent_order and entry[5] == nullrev:
1038 1038 return entry[6], entry[5]
1039 1039 else:
1040 1040 return entry[5], entry[6]
1041 1041
1042 1042 # fast parentrevs(rev) where rev isn't filtered
1043 1043 _uncheckedparentrevs = parentrevs
1044 1044
1045 1045 def node(self, rev):
1046 1046 try:
1047 1047 return self.index[rev][7]
1048 1048 except IndexError:
1049 1049 if rev == wdirrev:
1050 1050 raise error.WdirUnsupported
1051 1051 raise
1052 1052
1053 1053 # Derived from index values.
1054 1054
1055 1055 def end(self, rev):
1056 1056 return self.start(rev) + self.length(rev)
1057 1057
1058 1058 def parents(self, node):
1059 1059 i = self.index
1060 1060 d = i[self.rev(node)]
1061 1061 # inline node() to avoid function call overhead
1062 1062 if self.canonical_parent_order and d[5] == self.nullid:
1063 1063 return i[d[6]][7], i[d[5]][7]
1064 1064 else:
1065 1065 return i[d[5]][7], i[d[6]][7]
1066 1066
1067 1067 def chainlen(self, rev):
1068 1068 return self._chaininfo(rev)[0]
1069 1069
1070 1070 def _chaininfo(self, rev):
1071 1071 chaininfocache = self._chaininfocache
1072 1072 if rev in chaininfocache:
1073 1073 return chaininfocache[rev]
1074 1074 index = self.index
1075 1075 generaldelta = self._generaldelta
1076 1076 iterrev = rev
1077 1077 e = index[iterrev]
1078 1078 clen = 0
1079 1079 compresseddeltalen = 0
1080 1080 while iterrev != e[3]:
1081 1081 clen += 1
1082 1082 compresseddeltalen += e[1]
1083 1083 if generaldelta:
1084 1084 iterrev = e[3]
1085 1085 else:
1086 1086 iterrev -= 1
1087 1087 if iterrev in chaininfocache:
1088 1088 t = chaininfocache[iterrev]
1089 1089 clen += t[0]
1090 1090 compresseddeltalen += t[1]
1091 1091 break
1092 1092 e = index[iterrev]
1093 1093 else:
1094 1094 # Add text length of base since decompressing that also takes
1095 1095 # work. For cache hits the length is already included.
1096 1096 compresseddeltalen += e[1]
1097 1097 r = (clen, compresseddeltalen)
1098 1098 chaininfocache[rev] = r
1099 1099 return r
1100 1100
1101 1101 def _deltachain(self, rev, stoprev=None):
1102 1102 """Obtain the delta chain for a revision.
1103 1103
1104 1104 ``stoprev`` specifies a revision to stop at. If not specified, we
1105 1105 stop at the base of the chain.
1106 1106
1107 1107 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1108 1108 revs in ascending order and ``stopped`` is a bool indicating whether
1109 1109 ``stoprev`` was hit.
1110 1110 """
1111 1111 # Try C implementation.
1112 1112 try:
1113 1113 return self.index.deltachain(rev, stoprev, self._generaldelta)
1114 1114 except AttributeError:
1115 1115 pass
1116 1116
1117 1117 chain = []
1118 1118
1119 1119 # Alias to prevent attribute lookup in tight loop.
1120 1120 index = self.index
1121 1121 generaldelta = self._generaldelta
1122 1122
1123 1123 iterrev = rev
1124 1124 e = index[iterrev]
1125 1125 while iterrev != e[3] and iterrev != stoprev:
1126 1126 chain.append(iterrev)
1127 1127 if generaldelta:
1128 1128 iterrev = e[3]
1129 1129 else:
1130 1130 iterrev -= 1
1131 1131 e = index[iterrev]
1132 1132
1133 1133 if iterrev == stoprev:
1134 1134 stopped = True
1135 1135 else:
1136 1136 chain.append(iterrev)
1137 1137 stopped = False
1138 1138
1139 1139 chain.reverse()
1140 1140 return chain, stopped
1141 1141
1142 1142 def ancestors(self, revs, stoprev=0, inclusive=False):
1143 1143 """Generate the ancestors of 'revs' in reverse revision order.
1144 1144 Does not generate revs lower than stoprev.
1145 1145
1146 1146 See the documentation for ancestor.lazyancestors for more details."""
1147 1147
1148 1148 # first, make sure start revisions aren't filtered
1149 1149 revs = list(revs)
1150 1150 checkrev = self.node
1151 1151 for r in revs:
1152 1152 checkrev(r)
1153 1153 # and we're sure ancestors aren't filtered as well
1154 1154
1155 1155 if rustancestor is not None and self.index.rust_ext_compat:
1156 1156 lazyancestors = rustancestor.LazyAncestors
1157 1157 arg = self.index
1158 1158 else:
1159 1159 lazyancestors = ancestor.lazyancestors
1160 1160 arg = self._uncheckedparentrevs
1161 1161 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1162 1162
1163 1163 def descendants(self, revs):
1164 1164 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1165 1165
1166 1166 def findcommonmissing(self, common=None, heads=None):
1167 1167 """Return a tuple of the ancestors of common and the ancestors of heads
1168 1168 that are not ancestors of common. In revset terminology, we return the
1169 1169 tuple:
1170 1170
1171 1171 ::common, (::heads) - (::common)
1172 1172
1173 1173 The list is sorted by revision number, meaning it is
1174 1174 topologically sorted.
1175 1175
1176 1176 'heads' and 'common' are both lists of node IDs. If heads is
1177 1177 not supplied, uses all of the revlog's heads. If common is not
1178 1178 supplied, uses nullid."""
1179 1179 if common is None:
1180 1180 common = [self.nullid]
1181 1181 if heads is None:
1182 1182 heads = self.heads()
1183 1183
1184 1184 common = [self.rev(n) for n in common]
1185 1185 heads = [self.rev(n) for n in heads]
1186 1186
1187 1187 # we want the ancestors, but inclusive
1188 1188 class lazyset:
1189 1189 def __init__(self, lazyvalues):
1190 1190 self.addedvalues = set()
1191 1191 self.lazyvalues = lazyvalues
1192 1192
1193 1193 def __contains__(self, value):
1194 1194 return value in self.addedvalues or value in self.lazyvalues
1195 1195
1196 1196 def __iter__(self):
1197 1197 added = self.addedvalues
1198 1198 for r in added:
1199 1199 yield r
1200 1200 for r in self.lazyvalues:
1201 1201 if not r in added:
1202 1202 yield r
1203 1203
1204 1204 def add(self, value):
1205 1205 self.addedvalues.add(value)
1206 1206
1207 1207 def update(self, values):
1208 1208 self.addedvalues.update(values)
1209 1209
1210 1210 has = lazyset(self.ancestors(common))
1211 1211 has.add(nullrev)
1212 1212 has.update(common)
1213 1213
1214 1214 # take all ancestors from heads that aren't in has
1215 1215 missing = set()
1216 1216 visit = collections.deque(r for r in heads if r not in has)
1217 1217 while visit:
1218 1218 r = visit.popleft()
1219 1219 if r in missing:
1220 1220 continue
1221 1221 else:
1222 1222 missing.add(r)
1223 1223 for p in self.parentrevs(r):
1224 1224 if p not in has:
1225 1225 visit.append(p)
1226 1226 missing = list(missing)
1227 1227 missing.sort()
1228 1228 return has, [self.node(miss) for miss in missing]
1229 1229
1230 1230 def incrementalmissingrevs(self, common=None):
1231 1231 """Return an object that can be used to incrementally compute the
1232 1232 revision numbers of the ancestors of arbitrary sets that are not
1233 1233 ancestors of common. This is an ancestor.incrementalmissingancestors
1234 1234 object.
1235 1235
1236 1236 'common' is a list of revision numbers. If common is not supplied, uses
1237 1237 nullrev.
1238 1238 """
1239 1239 if common is None:
1240 1240 common = [nullrev]
1241 1241
1242 1242 if rustancestor is not None and self.index.rust_ext_compat:
1243 1243 return rustancestor.MissingAncestors(self.index, common)
1244 1244 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1245 1245
1246 1246 def findmissingrevs(self, common=None, heads=None):
1247 1247 """Return the revision numbers of the ancestors of heads that
1248 1248 are not ancestors of common.
1249 1249
1250 1250 More specifically, return a list of revision numbers corresponding to
1251 1251 nodes N such that every N satisfies the following constraints:
1252 1252
1253 1253 1. N is an ancestor of some node in 'heads'
1254 1254 2. N is not an ancestor of any node in 'common'
1255 1255
1256 1256 The list is sorted by revision number, meaning it is
1257 1257 topologically sorted.
1258 1258
1259 1259 'heads' and 'common' are both lists of revision numbers. If heads is
1260 1260 not supplied, uses all of the revlog's heads. If common is not
1261 1261 supplied, uses nullid."""
1262 1262 if common is None:
1263 1263 common = [nullrev]
1264 1264 if heads is None:
1265 1265 heads = self.headrevs()
1266 1266
1267 1267 inc = self.incrementalmissingrevs(common=common)
1268 1268 return inc.missingancestors(heads)
1269 1269
1270 1270 def findmissing(self, common=None, heads=None):
1271 1271 """Return the ancestors of heads that are not ancestors of common.
1272 1272
1273 1273 More specifically, return a list of nodes N such that every N
1274 1274 satisfies the following constraints:
1275 1275
1276 1276 1. N is an ancestor of some node in 'heads'
1277 1277 2. N is not an ancestor of any node in 'common'
1278 1278
1279 1279 The list is sorted by revision number, meaning it is
1280 1280 topologically sorted.
1281 1281
1282 1282 'heads' and 'common' are both lists of node IDs. If heads is
1283 1283 not supplied, uses all of the revlog's heads. If common is not
1284 1284 supplied, uses nullid."""
1285 1285 if common is None:
1286 1286 common = [self.nullid]
1287 1287 if heads is None:
1288 1288 heads = self.heads()
1289 1289
1290 1290 common = [self.rev(n) for n in common]
1291 1291 heads = [self.rev(n) for n in heads]
1292 1292
1293 1293 inc = self.incrementalmissingrevs(common=common)
1294 1294 return [self.node(r) for r in inc.missingancestors(heads)]
1295 1295
1296 1296 def nodesbetween(self, roots=None, heads=None):
1297 1297 """Return a topological path from 'roots' to 'heads'.
1298 1298
1299 1299 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1300 1300 topologically sorted list of all nodes N that satisfy both of
1301 1301 these constraints:
1302 1302
1303 1303 1. N is a descendant of some node in 'roots'
1304 1304 2. N is an ancestor of some node in 'heads'
1305 1305
1306 1306 Every node is considered to be both a descendant and an ancestor
1307 1307 of itself, so every reachable node in 'roots' and 'heads' will be
1308 1308 included in 'nodes'.
1309 1309
1310 1310 'outroots' is the list of reachable nodes in 'roots', i.e., the
1311 1311 subset of 'roots' that is returned in 'nodes'. Likewise,
1312 1312 'outheads' is the subset of 'heads' that is also in 'nodes'.
1313 1313
1314 1314 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1315 1315 unspecified, uses nullid as the only root. If 'heads' is
1316 1316 unspecified, uses list of all of the revlog's heads."""
1317 1317 nonodes = ([], [], [])
1318 1318 if roots is not None:
1319 1319 roots = list(roots)
1320 1320 if not roots:
1321 1321 return nonodes
1322 1322 lowestrev = min([self.rev(n) for n in roots])
1323 1323 else:
1324 1324 roots = [self.nullid] # Everybody's a descendant of nullid
1325 1325 lowestrev = nullrev
1326 1326 if (lowestrev == nullrev) and (heads is None):
1327 1327 # We want _all_ the nodes!
1328 1328 return (
1329 1329 [self.node(r) for r in self],
1330 1330 [self.nullid],
1331 1331 list(self.heads()),
1332 1332 )
1333 1333 if heads is None:
1334 1334 # All nodes are ancestors, so the latest ancestor is the last
1335 1335 # node.
1336 1336 highestrev = len(self) - 1
1337 1337 # Set ancestors to None to signal that every node is an ancestor.
1338 1338 ancestors = None
1339 1339 # Set heads to an empty dictionary for later discovery of heads
1340 1340 heads = {}
1341 1341 else:
1342 1342 heads = list(heads)
1343 1343 if not heads:
1344 1344 return nonodes
1345 1345 ancestors = set()
1346 1346 # Turn heads into a dictionary so we can remove 'fake' heads.
1347 1347 # Also, later we will be using it to filter out the heads we can't
1348 1348 # find from roots.
1349 1349 heads = dict.fromkeys(heads, False)
1350 1350 # Start at the top and keep marking parents until we're done.
1351 1351 nodestotag = set(heads)
1352 1352 # Remember where the top was so we can use it as a limit later.
1353 1353 highestrev = max([self.rev(n) for n in nodestotag])
1354 1354 while nodestotag:
1355 1355 # grab a node to tag
1356 1356 n = nodestotag.pop()
1357 1357 # Never tag nullid
1358 1358 if n == self.nullid:
1359 1359 continue
1360 1360 # A node's revision number represents its place in a
1361 1361 # topologically sorted list of nodes.
1362 1362 r = self.rev(n)
1363 1363 if r >= lowestrev:
1364 1364 if n not in ancestors:
1365 1365 # If we are possibly a descendant of one of the roots
1366 1366 # and we haven't already been marked as an ancestor
1367 1367 ancestors.add(n) # Mark as ancestor
1368 1368 # Add non-nullid parents to list of nodes to tag.
1369 1369 nodestotag.update(
1370 1370 [p for p in self.parents(n) if p != self.nullid]
1371 1371 )
1372 1372 elif n in heads: # We've seen it before, is it a fake head?
1373 1373 # So it is, real heads should not be the ancestors of
1374 1374 # any other heads.
1375 1375 heads.pop(n)
1376 1376 if not ancestors:
1377 1377 return nonodes
1378 1378 # Now that we have our set of ancestors, we want to remove any
1379 1379 # roots that are not ancestors.
1380 1380
1381 1381 # If one of the roots was nullid, everything is included anyway.
1382 1382 if lowestrev > nullrev:
1383 1383 # But, since we weren't, let's recompute the lowest rev to not
1384 1384 # include roots that aren't ancestors.
1385 1385
1386 1386 # Filter out roots that aren't ancestors of heads
1387 1387 roots = [root for root in roots if root in ancestors]
1388 1388 # Recompute the lowest revision
1389 1389 if roots:
1390 1390 lowestrev = min([self.rev(root) for root in roots])
1391 1391 else:
1392 1392 # No more roots? Return empty list
1393 1393 return nonodes
1394 1394 else:
1395 1395 # We are descending from nullid, and don't need to care about
1396 1396 # any other roots.
1397 1397 lowestrev = nullrev
1398 1398 roots = [self.nullid]
1399 1399 # Transform our roots list into a set.
1400 1400 descendants = set(roots)
1401 1401 # Also, keep the original roots so we can filter out roots that aren't
1402 1402 # 'real' roots (i.e. are descended from other roots).
1403 1403 roots = descendants.copy()
1404 1404 # Our topologically sorted list of output nodes.
1405 1405 orderedout = []
1406 1406 # Don't start at nullid since we don't want nullid in our output list,
1407 1407 # and if nullid shows up in descendants, empty parents will look like
1408 1408 # they're descendants.
1409 1409 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1410 1410 n = self.node(r)
1411 1411 isdescendant = False
1412 1412 if lowestrev == nullrev: # Everybody is a descendant of nullid
1413 1413 isdescendant = True
1414 1414 elif n in descendants:
1415 1415 # n is already a descendant
1416 1416 isdescendant = True
1417 1417 # This check only needs to be done here because all the roots
1418 1418 # will start being marked is descendants before the loop.
1419 1419 if n in roots:
1420 1420 # If n was a root, check if it's a 'real' root.
1421 1421 p = tuple(self.parents(n))
1422 1422 # If any of its parents are descendants, it's not a root.
1423 1423 if (p[0] in descendants) or (p[1] in descendants):
1424 1424 roots.remove(n)
1425 1425 else:
1426 1426 p = tuple(self.parents(n))
1427 1427 # A node is a descendant if either of its parents are
1428 1428 # descendants. (We seeded the dependents list with the roots
1429 1429 # up there, remember?)
1430 1430 if (p[0] in descendants) or (p[1] in descendants):
1431 1431 descendants.add(n)
1432 1432 isdescendant = True
1433 1433 if isdescendant and ((ancestors is None) or (n in ancestors)):
1434 1434 # Only include nodes that are both descendants and ancestors.
1435 1435 orderedout.append(n)
1436 1436 if (ancestors is not None) and (n in heads):
1437 1437 # We're trying to figure out which heads are reachable
1438 1438 # from roots.
1439 1439 # Mark this head as having been reached
1440 1440 heads[n] = True
1441 1441 elif ancestors is None:
1442 1442 # Otherwise, we're trying to discover the heads.
1443 1443 # Assume this is a head because if it isn't, the next step
1444 1444 # will eventually remove it.
1445 1445 heads[n] = True
1446 1446 # But, obviously its parents aren't.
1447 1447 for p in self.parents(n):
1448 1448 heads.pop(p, None)
1449 1449 heads = [head for head, flag in heads.items() if flag]
1450 1450 roots = list(roots)
1451 1451 assert orderedout
1452 1452 assert roots
1453 1453 assert heads
1454 1454 return (orderedout, roots, heads)
1455 1455
1456 1456 def headrevs(self, revs=None):
1457 1457 if revs is None:
1458 1458 try:
1459 1459 return self.index.headrevs()
1460 1460 except AttributeError:
1461 1461 return self._headrevs()
1462 1462 if rustdagop is not None and self.index.rust_ext_compat:
1463 1463 return rustdagop.headrevs(self.index, revs)
1464 1464 return dagop.headrevs(revs, self._uncheckedparentrevs)
1465 1465
1466 1466 def computephases(self, roots):
1467 1467 return self.index.computephasesmapsets(roots)
1468 1468
1469 1469 def _headrevs(self):
1470 1470 count = len(self)
1471 1471 if not count:
1472 1472 return [nullrev]
1473 1473 # we won't iter over filtered rev so nobody is a head at start
1474 1474 ishead = [0] * (count + 1)
1475 1475 index = self.index
1476 1476 for r in self:
1477 1477 ishead[r] = 1 # I may be an head
1478 1478 e = index[r]
1479 1479 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1480 1480 return [r for r, val in enumerate(ishead) if val]
1481 1481
1482 1482 def heads(self, start=None, stop=None):
1483 1483 """return the list of all nodes that have no children
1484 1484
1485 1485 if start is specified, only heads that are descendants of
1486 1486 start will be returned
1487 1487 if stop is specified, it will consider all the revs from stop
1488 1488 as if they had no children
1489 1489 """
1490 1490 if start is None and stop is None:
1491 1491 if not len(self):
1492 1492 return [self.nullid]
1493 1493 return [self.node(r) for r in self.headrevs()]
1494 1494
1495 1495 if start is None:
1496 1496 start = nullrev
1497 1497 else:
1498 1498 start = self.rev(start)
1499 1499
1500 1500 stoprevs = {self.rev(n) for n in stop or []}
1501 1501
1502 1502 revs = dagop.headrevssubset(
1503 1503 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1504 1504 )
1505 1505
1506 1506 return [self.node(rev) for rev in revs]
1507 1507
1508 1508 def children(self, node):
1509 1509 """find the children of a given node"""
1510 1510 c = []
1511 1511 p = self.rev(node)
1512 1512 for r in self.revs(start=p + 1):
1513 1513 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1514 1514 if prevs:
1515 1515 for pr in prevs:
1516 1516 if pr == p:
1517 1517 c.append(self.node(r))
1518 1518 elif p == nullrev:
1519 1519 c.append(self.node(r))
1520 1520 return c
1521 1521
1522 1522 def commonancestorsheads(self, a, b):
1523 1523 """calculate all the heads of the common ancestors of nodes a and b"""
1524 1524 a, b = self.rev(a), self.rev(b)
1525 1525 ancs = self._commonancestorsheads(a, b)
1526 1526 return pycompat.maplist(self.node, ancs)
1527 1527
1528 1528 def _commonancestorsheads(self, *revs):
1529 1529 """calculate all the heads of the common ancestors of revs"""
1530 1530 try:
1531 1531 ancs = self.index.commonancestorsheads(*revs)
1532 1532 except (AttributeError, OverflowError): # C implementation failed
1533 1533 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1534 1534 return ancs
1535 1535
1536 1536 def isancestor(self, a, b):
1537 1537 """return True if node a is an ancestor of node b
1538 1538
1539 1539 A revision is considered an ancestor of itself."""
1540 1540 a, b = self.rev(a), self.rev(b)
1541 1541 return self.isancestorrev(a, b)
1542 1542
1543 1543 def isancestorrev(self, a, b):
1544 1544 """return True if revision a is an ancestor of revision b
1545 1545
1546 1546 A revision is considered an ancestor of itself.
1547 1547
1548 1548 The implementation of this is trivial but the use of
1549 1549 reachableroots is not."""
1550 1550 if a == nullrev:
1551 1551 return True
1552 1552 elif a == b:
1553 1553 return True
1554 1554 elif a > b:
1555 1555 return False
1556 1556 return bool(self.reachableroots(a, [b], [a], includepath=False))
1557 1557
1558 1558 def reachableroots(self, minroot, heads, roots, includepath=False):
1559 1559 """return (heads(::(<roots> and <roots>::<heads>)))
1560 1560
1561 1561 If includepath is True, return (<roots>::<heads>)."""
1562 1562 try:
1563 1563 return self.index.reachableroots2(
1564 1564 minroot, heads, roots, includepath
1565 1565 )
1566 1566 except AttributeError:
1567 1567 return dagop._reachablerootspure(
1568 1568 self.parentrevs, minroot, roots, heads, includepath
1569 1569 )
1570 1570
1571 1571 def ancestor(self, a, b):
1572 1572 """calculate the "best" common ancestor of nodes a and b"""
1573 1573
1574 1574 a, b = self.rev(a), self.rev(b)
1575 1575 try:
1576 1576 ancs = self.index.ancestors(a, b)
1577 1577 except (AttributeError, OverflowError):
1578 1578 ancs = ancestor.ancestors(self.parentrevs, a, b)
1579 1579 if ancs:
1580 1580 # choose a consistent winner when there's a tie
1581 1581 return min(map(self.node, ancs))
1582 1582 return self.nullid
1583 1583
1584 1584 def _match(self, id):
1585 1585 if isinstance(id, int):
1586 1586 # rev
1587 1587 return self.node(id)
1588 1588 if len(id) == self.nodeconstants.nodelen:
1589 1589 # possibly a binary node
1590 1590 # odds of a binary node being all hex in ASCII are 1 in 10**25
1591 1591 try:
1592 1592 node = id
1593 1593 self.rev(node) # quick search the index
1594 1594 return node
1595 1595 except error.LookupError:
1596 1596 pass # may be partial hex id
1597 1597 try:
1598 1598 # str(rev)
1599 1599 rev = int(id)
1600 1600 if b"%d" % rev != id:
1601 1601 raise ValueError
1602 1602 if rev < 0:
1603 1603 rev = len(self) + rev
1604 1604 if rev < 0 or rev >= len(self):
1605 1605 raise ValueError
1606 1606 return self.node(rev)
1607 1607 except (ValueError, OverflowError):
1608 1608 pass
1609 1609 if len(id) == 2 * self.nodeconstants.nodelen:
1610 1610 try:
1611 1611 # a full hex nodeid?
1612 1612 node = bin(id)
1613 1613 self.rev(node)
1614 1614 return node
1615 1615 except (binascii.Error, error.LookupError):
1616 1616 pass
1617 1617
1618 1618 def _partialmatch(self, id):
1619 1619 # we don't care wdirfilenodeids as they should be always full hash
1620 1620 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1621 1621 ambiguous = False
1622 1622 try:
1623 1623 partial = self.index.partialmatch(id)
1624 1624 if partial and self.hasnode(partial):
1625 1625 if maybewdir:
1626 1626 # single 'ff...' match in radix tree, ambiguous with wdir
1627 1627 ambiguous = True
1628 1628 else:
1629 1629 return partial
1630 1630 elif maybewdir:
1631 1631 # no 'ff...' match in radix tree, wdir identified
1632 1632 raise error.WdirUnsupported
1633 1633 else:
1634 1634 return None
1635 1635 except error.RevlogError:
1636 1636 # parsers.c radix tree lookup gave multiple matches
1637 1637 # fast path: for unfiltered changelog, radix tree is accurate
1638 1638 if not getattr(self, 'filteredrevs', None):
1639 1639 ambiguous = True
1640 1640 # fall through to slow path that filters hidden revisions
1641 1641 except (AttributeError, ValueError):
1642 1642 # we are pure python, or key is not hex
1643 1643 pass
1644 1644 if ambiguous:
1645 1645 raise error.AmbiguousPrefixLookupError(
1646 1646 id, self.display_id, _(b'ambiguous identifier')
1647 1647 )
1648 1648
1649 1649 if id in self._pcache:
1650 1650 return self._pcache[id]
1651 1651
1652 1652 if len(id) <= 40:
1653 1653 # hex(node)[:...]
1654 1654 l = len(id) // 2 * 2 # grab an even number of digits
1655 1655 try:
1656 1656 # we're dropping the last digit, so let's check that it's hex,
1657 1657 # to avoid the expensive computation below if it's not
1658 1658 if len(id) % 2 > 0:
1659 1659 if not (id[-1] in hexdigits):
1660 1660 return None
1661 1661 prefix = bin(id[:l])
1662 1662 except binascii.Error:
1663 1663 pass
1664 1664 else:
1665 1665 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1666 1666 nl = [
1667 1667 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1668 1668 ]
1669 1669 if self.nodeconstants.nullhex.startswith(id):
1670 1670 nl.append(self.nullid)
1671 1671 if len(nl) > 0:
1672 1672 if len(nl) == 1 and not maybewdir:
1673 1673 self._pcache[id] = nl[0]
1674 1674 return nl[0]
1675 1675 raise error.AmbiguousPrefixLookupError(
1676 1676 id, self.display_id, _(b'ambiguous identifier')
1677 1677 )
1678 1678 if maybewdir:
1679 1679 raise error.WdirUnsupported
1680 1680 return None
1681 1681
1682 1682 def lookup(self, id):
1683 1683 """locate a node based on:
1684 1684 - revision number or str(revision number)
1685 1685 - nodeid or subset of hex nodeid
1686 1686 """
1687 1687 n = self._match(id)
1688 1688 if n is not None:
1689 1689 return n
1690 1690 n = self._partialmatch(id)
1691 1691 if n:
1692 1692 return n
1693 1693
1694 1694 raise error.LookupError(id, self.display_id, _(b'no match found'))
1695 1695
1696 1696 def shortest(self, node, minlength=1):
1697 1697 """Find the shortest unambiguous prefix that matches node."""
1698 1698
1699 1699 def isvalid(prefix):
1700 1700 try:
1701 1701 matchednode = self._partialmatch(prefix)
1702 1702 except error.AmbiguousPrefixLookupError:
1703 1703 return False
1704 1704 except error.WdirUnsupported:
1705 1705 # single 'ff...' match
1706 1706 return True
1707 1707 if matchednode is None:
1708 1708 raise error.LookupError(node, self.display_id, _(b'no node'))
1709 1709 return True
1710 1710
1711 1711 def maybewdir(prefix):
1712 1712 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1713 1713
1714 1714 hexnode = hex(node)
1715 1715
1716 1716 def disambiguate(hexnode, minlength):
1717 1717 """Disambiguate against wdirid."""
1718 1718 for length in range(minlength, len(hexnode) + 1):
1719 1719 prefix = hexnode[:length]
1720 1720 if not maybewdir(prefix):
1721 1721 return prefix
1722 1722
1723 1723 if not getattr(self, 'filteredrevs', None):
1724 1724 try:
1725 1725 length = max(self.index.shortest(node), minlength)
1726 1726 return disambiguate(hexnode, length)
1727 1727 except error.RevlogError:
1728 1728 if node != self.nodeconstants.wdirid:
1729 1729 raise error.LookupError(
1730 1730 node, self.display_id, _(b'no node')
1731 1731 )
1732 1732 except AttributeError:
1733 1733 # Fall through to pure code
1734 1734 pass
1735 1735
1736 1736 if node == self.nodeconstants.wdirid:
1737 1737 for length in range(minlength, len(hexnode) + 1):
1738 1738 prefix = hexnode[:length]
1739 1739 if isvalid(prefix):
1740 1740 return prefix
1741 1741
1742 1742 for length in range(minlength, len(hexnode) + 1):
1743 1743 prefix = hexnode[:length]
1744 1744 if isvalid(prefix):
1745 1745 return disambiguate(hexnode, length)
1746 1746
1747 1747 def cmp(self, node, text):
1748 1748 """compare text with a given file revision
1749 1749
1750 1750 returns True if text is different than what is stored.
1751 1751 """
1752 1752 p1, p2 = self.parents(node)
1753 1753 return storageutil.hashrevisionsha1(text, p1, p2) != node
1754 1754
1755 1755 def _getsegmentforrevs(self, startrev, endrev, df=None):
1756 1756 """Obtain a segment of raw data corresponding to a range of revisions.
1757 1757
1758 1758 Accepts the start and end revisions and an optional already-open
1759 1759 file handle to be used for reading. If the file handle is read, its
1760 1760 seek position will not be preserved.
1761 1761
1762 1762 Requests for data may be satisfied by a cache.
1763 1763
1764 1764 Returns a 2-tuple of (offset, data) for the requested range of
1765 1765 revisions. Offset is the integer offset from the beginning of the
1766 1766 revlog and data is a str or buffer of the raw byte data.
1767 1767
1768 1768 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1769 1769 to determine where each revision's data begins and ends.
1770 1770 """
1771 1771 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1772 1772 # (functions are expensive).
1773 1773 index = self.index
1774 1774 istart = index[startrev]
1775 1775 start = int(istart[0] >> 16)
1776 1776 if startrev == endrev:
1777 1777 end = start + istart[1]
1778 1778 else:
1779 1779 iend = index[endrev]
1780 1780 end = int(iend[0] >> 16) + iend[1]
1781 1781
1782 1782 if self._inline:
1783 1783 start += (startrev + 1) * self.index.entry_size
1784 1784 end += (endrev + 1) * self.index.entry_size
1785 1785 length = end - start
1786 1786
1787 1787 return start, self._segmentfile.read_chunk(start, length, df)
1788 1788
1789 1789 def _chunk(self, rev, df=None):
1790 1790 """Obtain a single decompressed chunk for a revision.
1791 1791
1792 1792 Accepts an integer revision and an optional already-open file handle
1793 1793 to be used for reading. If used, the seek position of the file will not
1794 1794 be preserved.
1795 1795
1796 1796 Returns a str holding uncompressed data for the requested revision.
1797 1797 """
1798 1798 compression_mode = self.index[rev][10]
1799 1799 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1800 1800 if compression_mode == COMP_MODE_PLAIN:
1801 1801 return data
1802 1802 elif compression_mode == COMP_MODE_DEFAULT:
1803 1803 return self._decompressor(data)
1804 1804 elif compression_mode == COMP_MODE_INLINE:
1805 1805 return self.decompress(data)
1806 1806 else:
1807 1807 msg = b'unknown compression mode %d'
1808 1808 msg %= compression_mode
1809 1809 raise error.RevlogError(msg)
1810 1810
1811 1811 def _chunks(self, revs, df=None, targetsize=None):
1812 1812 """Obtain decompressed chunks for the specified revisions.
1813 1813
1814 1814 Accepts an iterable of numeric revisions that are assumed to be in
1815 1815 ascending order. Also accepts an optional already-open file handle
1816 1816 to be used for reading. If used, the seek position of the file will
1817 1817 not be preserved.
1818 1818
1819 1819 This function is similar to calling ``self._chunk()`` multiple times,
1820 1820 but is faster.
1821 1821
1822 1822 Returns a list with decompressed data for each requested revision.
1823 1823 """
1824 1824 if not revs:
1825 1825 return []
1826 1826 start = self.start
1827 1827 length = self.length
1828 1828 inline = self._inline
1829 1829 iosize = self.index.entry_size
1830 1830 buffer = util.buffer
1831 1831
1832 1832 l = []
1833 1833 ladd = l.append
1834 1834
1835 1835 if not self._withsparseread:
1836 1836 slicedchunks = (revs,)
1837 1837 else:
1838 1838 slicedchunks = deltautil.slicechunk(
1839 1839 self, revs, targetsize=targetsize
1840 1840 )
1841 1841
1842 1842 for revschunk in slicedchunks:
1843 1843 firstrev = revschunk[0]
1844 1844 # Skip trailing revisions with empty diff
1845 1845 for lastrev in revschunk[::-1]:
1846 1846 if length(lastrev) != 0:
1847 1847 break
1848 1848
1849 1849 try:
1850 1850 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1851 1851 except OverflowError:
1852 1852 # issue4215 - we can't cache a run of chunks greater than
1853 1853 # 2G on Windows
1854 1854 return [self._chunk(rev, df=df) for rev in revschunk]
1855 1855
1856 1856 decomp = self.decompress
1857 1857 # self._decompressor might be None, but will not be used in that case
1858 1858 def_decomp = self._decompressor
1859 1859 for rev in revschunk:
1860 1860 chunkstart = start(rev)
1861 1861 if inline:
1862 1862 chunkstart += (rev + 1) * iosize
1863 1863 chunklength = length(rev)
1864 1864 comp_mode = self.index[rev][10]
1865 1865 c = buffer(data, chunkstart - offset, chunklength)
1866 1866 if comp_mode == COMP_MODE_PLAIN:
1867 1867 ladd(c)
1868 1868 elif comp_mode == COMP_MODE_INLINE:
1869 1869 ladd(decomp(c))
1870 1870 elif comp_mode == COMP_MODE_DEFAULT:
1871 1871 ladd(def_decomp(c))
1872 1872 else:
1873 1873 msg = b'unknown compression mode %d'
1874 1874 msg %= comp_mode
1875 1875 raise error.RevlogError(msg)
1876 1876
1877 1877 return l
1878 1878
1879 1879 def deltaparent(self, rev):
1880 1880 """return deltaparent of the given revision"""
1881 1881 base = self.index[rev][3]
1882 1882 if base == rev:
1883 1883 return nullrev
1884 1884 elif self._generaldelta:
1885 1885 return base
1886 1886 else:
1887 1887 return rev - 1
1888 1888
1889 1889 def issnapshot(self, rev):
1890 1890 """tells whether rev is a snapshot"""
1891 1891 if not self._sparserevlog:
1892 1892 return self.deltaparent(rev) == nullrev
1893 1893 elif util.safehasattr(self.index, 'issnapshot'):
1894 1894 # directly assign the method to cache the testing and access
1895 1895 self.issnapshot = self.index.issnapshot
1896 1896 return self.issnapshot(rev)
1897 1897 if rev == nullrev:
1898 1898 return True
1899 1899 entry = self.index[rev]
1900 1900 base = entry[3]
1901 1901 if base == rev:
1902 1902 return True
1903 1903 if base == nullrev:
1904 1904 return True
1905 1905 p1 = entry[5]
1906 1906 while self.length(p1) == 0:
1907 1907 b = self.deltaparent(p1)
1908 1908 if b == p1:
1909 1909 break
1910 1910 p1 = b
1911 1911 p2 = entry[6]
1912 1912 while self.length(p2) == 0:
1913 1913 b = self.deltaparent(p2)
1914 1914 if b == p2:
1915 1915 break
1916 1916 p2 = b
1917 1917 if base == p1 or base == p2:
1918 1918 return False
1919 1919 return self.issnapshot(base)
1920 1920
1921 1921 def snapshotdepth(self, rev):
1922 1922 """number of snapshot in the chain before this one"""
1923 1923 if not self.issnapshot(rev):
1924 1924 raise error.ProgrammingError(b'revision %d not a snapshot')
1925 1925 return len(self._deltachain(rev)[0]) - 1
1926 1926
1927 1927 def revdiff(self, rev1, rev2):
1928 1928 """return or calculate a delta between two revisions
1929 1929
1930 1930 The delta calculated is in binary form and is intended to be written to
1931 1931 revlog data directly. So this function needs raw revision data.
1932 1932 """
1933 1933 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1934 1934 return bytes(self._chunk(rev2))
1935 1935
1936 1936 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1937 1937
1938 1938 def revision(self, nodeorrev, _df=None):
1939 1939 """return an uncompressed revision of a given node or revision
1940 1940 number.
1941 1941
1942 1942 _df - an existing file handle to read from. (internal-only)
1943 1943 """
1944 1944 return self._revisiondata(nodeorrev, _df)
1945 1945
1946 1946 def sidedata(self, nodeorrev, _df=None):
1947 1947 """a map of extra data related to the changeset but not part of the hash
1948 1948
1949 1949 This function currently return a dictionary. However, more advanced
1950 1950 mapping object will likely be used in the future for a more
1951 1951 efficient/lazy code.
1952 1952 """
1953 1953 # deal with <nodeorrev> argument type
1954 1954 if isinstance(nodeorrev, int):
1955 1955 rev = nodeorrev
1956 1956 else:
1957 1957 rev = self.rev(nodeorrev)
1958 1958 return self._sidedata(rev)
1959 1959
1960 1960 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1961 1961 # deal with <nodeorrev> argument type
1962 1962 if isinstance(nodeorrev, int):
1963 1963 rev = nodeorrev
1964 1964 node = self.node(rev)
1965 1965 else:
1966 1966 node = nodeorrev
1967 1967 rev = None
1968 1968
1969 1969 # fast path the special `nullid` rev
1970 1970 if node == self.nullid:
1971 1971 return b""
1972 1972
1973 1973 # ``rawtext`` is the text as stored inside the revlog. Might be the
1974 1974 # revision or might need to be processed to retrieve the revision.
1975 1975 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1976 1976
1977 1977 if raw and validated:
1978 1978 # if we don't want to process the raw text and that raw
1979 1979 # text is cached, we can exit early.
1980 1980 return rawtext
1981 1981 if rev is None:
1982 1982 rev = self.rev(node)
1983 1983 # the revlog's flag for this revision
1984 1984 # (usually alter its state or content)
1985 1985 flags = self.flags(rev)
1986 1986
1987 1987 if validated and flags == REVIDX_DEFAULT_FLAGS:
1988 1988 # no extra flags set, no flag processor runs, text = rawtext
1989 1989 return rawtext
1990 1990
1991 1991 if raw:
1992 1992 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1993 1993 text = rawtext
1994 1994 else:
1995 1995 r = flagutil.processflagsread(self, rawtext, flags)
1996 1996 text, validatehash = r
1997 1997 if validatehash:
1998 1998 self.checkhash(text, node, rev=rev)
1999 1999 if not validated:
2000 2000 self._revisioncache = (node, rev, rawtext)
2001 2001
2002 2002 return text
2003 2003
2004 2004 def _rawtext(self, node, rev, _df=None):
2005 2005 """return the possibly unvalidated rawtext for a revision
2006 2006
2007 2007 returns (rev, rawtext, validated)
2008 2008 """
2009 2009
2010 2010 # revision in the cache (could be useful to apply delta)
2011 2011 cachedrev = None
2012 2012 # An intermediate text to apply deltas to
2013 2013 basetext = None
2014 2014
2015 2015 # Check if we have the entry in cache
2016 2016 # The cache entry looks like (node, rev, rawtext)
2017 2017 if self._revisioncache:
2018 2018 if self._revisioncache[0] == node:
2019 2019 return (rev, self._revisioncache[2], True)
2020 2020 cachedrev = self._revisioncache[1]
2021 2021
2022 2022 if rev is None:
2023 2023 rev = self.rev(node)
2024 2024
2025 2025 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2026 2026 if stopped:
2027 2027 basetext = self._revisioncache[2]
2028 2028
2029 2029 # drop cache to save memory, the caller is expected to
2030 2030 # update self._revisioncache after validating the text
2031 2031 self._revisioncache = None
2032 2032
2033 2033 targetsize = None
2034 2034 rawsize = self.index[rev][2]
2035 2035 if 0 <= rawsize:
2036 2036 targetsize = 4 * rawsize
2037 2037
2038 2038 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2039 2039 if basetext is None:
2040 2040 basetext = bytes(bins[0])
2041 2041 bins = bins[1:]
2042 2042
2043 2043 rawtext = mdiff.patches(basetext, bins)
2044 2044 del basetext # let us have a chance to free memory early
2045 2045 return (rev, rawtext, False)
2046 2046
2047 2047 def _sidedata(self, rev):
2048 2048 """Return the sidedata for a given revision number."""
2049 2049 index_entry = self.index[rev]
2050 2050 sidedata_offset = index_entry[8]
2051 2051 sidedata_size = index_entry[9]
2052 2052
2053 2053 if self._inline:
2054 2054 sidedata_offset += self.index.entry_size * (1 + rev)
2055 2055 if sidedata_size == 0:
2056 2056 return {}
2057 2057
2058 2058 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2059 2059 filename = self._sidedatafile
2060 2060 end = self._docket.sidedata_end
2061 2061 offset = sidedata_offset
2062 2062 length = sidedata_size
2063 2063 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2064 2064 raise error.RevlogError(m)
2065 2065
2066 2066 comp_segment = self._segmentfile_sidedata.read_chunk(
2067 2067 sidedata_offset, sidedata_size
2068 2068 )
2069 2069
2070 2070 comp = self.index[rev][11]
2071 2071 if comp == COMP_MODE_PLAIN:
2072 2072 segment = comp_segment
2073 2073 elif comp == COMP_MODE_DEFAULT:
2074 2074 segment = self._decompressor(comp_segment)
2075 2075 elif comp == COMP_MODE_INLINE:
2076 2076 segment = self.decompress(comp_segment)
2077 2077 else:
2078 2078 msg = b'unknown compression mode %d'
2079 2079 msg %= comp
2080 2080 raise error.RevlogError(msg)
2081 2081
2082 2082 sidedata = sidedatautil.deserialize_sidedata(segment)
2083 2083 return sidedata
2084 2084
2085 2085 def rawdata(self, nodeorrev, _df=None):
2086 2086 """return an uncompressed raw data of a given node or revision number.
2087 2087
2088 2088 _df - an existing file handle to read from. (internal-only)
2089 2089 """
2090 2090 return self._revisiondata(nodeorrev, _df, raw=True)
2091 2091
2092 2092 def hash(self, text, p1, p2):
2093 2093 """Compute a node hash.
2094 2094
2095 2095 Available as a function so that subclasses can replace the hash
2096 2096 as needed.
2097 2097 """
2098 2098 return storageutil.hashrevisionsha1(text, p1, p2)
2099 2099
2100 2100 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2101 2101 """Check node hash integrity.
2102 2102
2103 2103 Available as a function so that subclasses can extend hash mismatch
2104 2104 behaviors as needed.
2105 2105 """
2106 2106 try:
2107 2107 if p1 is None and p2 is None:
2108 2108 p1, p2 = self.parents(node)
2109 2109 if node != self.hash(text, p1, p2):
2110 2110 # Clear the revision cache on hash failure. The revision cache
2111 2111 # only stores the raw revision and clearing the cache does have
2112 2112 # the side-effect that we won't have a cache hit when the raw
2113 2113 # revision data is accessed. But this case should be rare and
2114 2114 # it is extra work to teach the cache about the hash
2115 2115 # verification state.
2116 2116 if self._revisioncache and self._revisioncache[0] == node:
2117 2117 self._revisioncache = None
2118 2118
2119 2119 revornode = rev
2120 2120 if revornode is None:
2121 2121 revornode = templatefilters.short(hex(node))
2122 2122 raise error.RevlogError(
2123 2123 _(b"integrity check failed on %s:%s")
2124 2124 % (self.display_id, pycompat.bytestr(revornode))
2125 2125 )
2126 2126 except error.RevlogError:
2127 2127 if self._censorable and storageutil.iscensoredtext(text):
2128 2128 raise error.CensoredNodeError(self.display_id, node, text)
2129 2129 raise
2130 2130
2131 2131 @property
2132 2132 def _split_index_file(self):
2133 2133 """the path where to expect the index of an ongoing splitting operation
2134 2134
2135 2135 The file will only exist if a splitting operation is in progress, but
2136 2136 it is always expected at the same location."""
2137 2137 parts = self.radix.split(b'/')
2138 2138 if len(parts) > 1:
2139 2139 # adds a '-s' prefix to the ``data/` or `meta/` base
2140 2140 head = parts[0] + b'-s'
2141 2141 mids = parts[1:-1]
2142 2142 tail = parts[-1] + b'.i'
2143 2143 pieces = [head] + mids + [tail]
2144 2144 return b'/'.join(pieces)
2145 2145 else:
2146 2146 # the revlog is stored at the root of the store (changelog or
2147 2147 # manifest), no risk of collision.
2148 2148 return self.radix + b'.i.s'
2149 2149
2150 2150 def _enforceinlinesize(self, tr, side_write=True):
2151 2151 """Check if the revlog is too big for inline and convert if so.
2152 2152
2153 2153 This should be called after revisions are added to the revlog. If the
2154 2154 revlog has grown too large to be an inline revlog, it will convert it
2155 2155 to use multiple index and data files.
2156 2156 """
2157 2157 tiprev = len(self) - 1
2158 2158 total_size = self.start(tiprev) + self.length(tiprev)
2159 2159 if not self._inline or total_size < _maxinline:
2160 2160 return
2161 2161
2162 2162 troffset = tr.findoffset(self._indexfile)
2163 2163 if troffset is None:
2164 2164 raise error.RevlogError(
2165 2165 _(b"%s not found in the transaction") % self._indexfile
2166 2166 )
2167 2167 if troffset:
2168 2168 tr.addbackup(self._indexfile, for_offset=True)
2169 2169 tr.add(self._datafile, 0)
2170 2170
2171 2171 existing_handles = False
2172 2172 if self._writinghandles is not None:
2173 2173 existing_handles = True
2174 2174 fp = self._writinghandles[0]
2175 2175 fp.flush()
2176 2176 fp.close()
2177 2177 # We can't use the cached file handle after close(). So prevent
2178 2178 # its usage.
2179 2179 self._writinghandles = None
2180 2180 self._segmentfile.writing_handle = None
2181 2181 # No need to deal with sidedata writing handle as it is only
2182 2182 # relevant with revlog-v2 which is never inline, not reaching
2183 2183 # this code
2184 2184 if side_write:
2185 2185 old_index_file_path = self._indexfile
2186 2186 new_index_file_path = self._split_index_file
2187 2187 opener = self.opener
2188 2188 weak_self = weakref.ref(self)
2189 2189
2190 2190 # the "split" index replace the real index when the transaction is finalized
2191 2191 def finalize_callback(tr):
2192 2192 opener.rename(
2193 2193 new_index_file_path,
2194 2194 old_index_file_path,
2195 2195 checkambig=True,
2196 2196 )
2197 2197 maybe_self = weak_self()
2198 2198 if maybe_self is not None:
2199 2199 maybe_self._indexfile = old_index_file_path
2200 2200
2201 2201 def abort_callback(tr):
2202 2202 maybe_self = weak_self()
2203 2203 if maybe_self is not None:
2204 2204 maybe_self._indexfile = old_index_file_path
2205 2205
2206 2206 tr.registertmp(new_index_file_path)
2207 2207 if self.target[1] is not None:
2208 2208 callback_id = b'000-revlog-split-%d-%s' % self.target
2209 2209 else:
2210 2210 callback_id = b'000-revlog-split-%d' % self.target[0]
2211 2211 tr.addfinalize(callback_id, finalize_callback)
2212 2212 tr.addabort(callback_id, abort_callback)
2213 2213
2214 2214 new_dfh = self._datafp(b'w+')
2215 2215 new_dfh.truncate(0) # drop any potentially existing data
2216 2216 try:
2217 2217 with self._indexfp() as read_ifh:
2218 2218 for r in self:
2219 2219 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2220 2220 new_dfh.flush()
2221 2221
2222 2222 if side_write:
2223 2223 self._indexfile = new_index_file_path
2224 2224 with self.__index_new_fp() as fp:
2225 2225 self._format_flags &= ~FLAG_INLINE_DATA
2226 2226 self._inline = False
2227 2227 for i in self:
2228 2228 e = self.index.entry_binary(i)
2229 2229 if i == 0 and self._docket is None:
2230 2230 header = self._format_flags | self._format_version
2231 2231 header = self.index.pack_header(header)
2232 2232 e = header + e
2233 2233 fp.write(e)
2234 2234 if self._docket is not None:
2235 2235 self._docket.index_end = fp.tell()
2236 2236
2237 2237 # If we don't use side-write, the temp file replace the real
2238 2238 # index when we exit the context manager
2239 2239
2240 2240 nodemaputil.setup_persistent_nodemap(tr, self)
2241 2241 self._segmentfile = randomaccessfile.randomaccessfile(
2242 2242 self.opener,
2243 2243 self._datafile,
2244 2244 self._chunkcachesize,
2245 2245 )
2246 2246
2247 2247 if existing_handles:
2248 2248 # switched from inline to conventional reopen the index
2249 2249 ifh = self.__index_write_fp()
2250 2250 self._writinghandles = (ifh, new_dfh, None)
2251 2251 self._segmentfile.writing_handle = new_dfh
2252 2252 new_dfh = None
2253 2253 # No need to deal with sidedata writing handle as it is only
2254 2254 # relevant with revlog-v2 which is never inline, not reaching
2255 2255 # this code
2256 2256 finally:
2257 2257 if new_dfh is not None:
2258 2258 new_dfh.close()
2259 2259
2260 2260 def _nodeduplicatecallback(self, transaction, node):
2261 2261 """called when trying to add a node already stored."""
2262 2262
2263 2263 @contextlib.contextmanager
2264 2264 def reading(self):
2265 2265 """Context manager that keeps data and sidedata files open for reading"""
2266 2266 with self._segmentfile.reading():
2267 2267 with self._segmentfile_sidedata.reading():
2268 2268 yield
2269 2269
2270 2270 @contextlib.contextmanager
2271 2271 def _writing(self, transaction):
2272 2272 if self._trypending:
2273 2273 msg = b'try to write in a `trypending` revlog: %s'
2274 2274 msg %= self.display_id
2275 2275 raise error.ProgrammingError(msg)
2276 2276 if self._writinghandles is not None:
2277 2277 yield
2278 2278 else:
2279 2279 ifh = dfh = sdfh = None
2280 2280 try:
2281 2281 r = len(self)
2282 2282 # opening the data file.
2283 2283 dsize = 0
2284 2284 if r:
2285 2285 dsize = self.end(r - 1)
2286 2286 dfh = None
2287 2287 if not self._inline:
2288 2288 try:
2289 2289 dfh = self._datafp(b"r+")
2290 2290 if self._docket is None:
2291 2291 dfh.seek(0, os.SEEK_END)
2292 2292 else:
2293 2293 dfh.seek(self._docket.data_end, os.SEEK_SET)
2294 2294 except FileNotFoundError:
2295 2295 dfh = self._datafp(b"w+")
2296 2296 transaction.add(self._datafile, dsize)
2297 2297 if self._sidedatafile is not None:
2298 2298 # revlog-v2 does not inline, help Pytype
2299 2299 assert dfh is not None
2300 2300 try:
2301 2301 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2302 2302 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2303 2303 except FileNotFoundError:
2304 2304 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2305 2305 transaction.add(
2306 2306 self._sidedatafile, self._docket.sidedata_end
2307 2307 )
2308 2308
2309 2309 # opening the index file.
2310 2310 isize = r * self.index.entry_size
2311 2311 ifh = self.__index_write_fp()
2312 2312 if self._inline:
2313 2313 transaction.add(self._indexfile, dsize + isize)
2314 2314 else:
2315 2315 transaction.add(self._indexfile, isize)
2316 2316 # exposing all file handle for writing.
2317 2317 self._writinghandles = (ifh, dfh, sdfh)
2318 2318 self._segmentfile.writing_handle = ifh if self._inline else dfh
2319 2319 self._segmentfile_sidedata.writing_handle = sdfh
2320 2320 yield
2321 2321 if self._docket is not None:
2322 2322 self._write_docket(transaction)
2323 2323 finally:
2324 2324 self._writinghandles = None
2325 2325 self._segmentfile.writing_handle = None
2326 2326 self._segmentfile_sidedata.writing_handle = None
2327 2327 if dfh is not None:
2328 2328 dfh.close()
2329 2329 if sdfh is not None:
2330 2330 sdfh.close()
2331 2331 # closing the index file last to avoid exposing referent to
2332 2332 # potential unflushed data content.
2333 2333 if ifh is not None:
2334 2334 ifh.close()
2335 2335
2336 2336 def _write_docket(self, transaction):
2337 2337 """write the current docket on disk
2338 2338
2339 2339 Exist as a method to help changelog to implement transaction logic
2340 2340
2341 2341 We could also imagine using the same transaction logic for all revlog
2342 2342 since docket are cheap."""
2343 2343 self._docket.write(transaction)
2344 2344
2345 2345 def addrevision(
2346 2346 self,
2347 2347 text,
2348 2348 transaction,
2349 2349 link,
2350 2350 p1,
2351 2351 p2,
2352 2352 cachedelta=None,
2353 2353 node=None,
2354 2354 flags=REVIDX_DEFAULT_FLAGS,
2355 2355 deltacomputer=None,
2356 2356 sidedata=None,
2357 2357 ):
2358 2358 """add a revision to the log
2359 2359
2360 2360 text - the revision data to add
2361 2361 transaction - the transaction object used for rollback
2362 2362 link - the linkrev data to add
2363 2363 p1, p2 - the parent nodeids of the revision
2364 2364 cachedelta - an optional precomputed delta
2365 2365 node - nodeid of revision; typically node is not specified, and it is
2366 2366 computed by default as hash(text, p1, p2), however subclasses might
2367 2367 use different hashing method (and override checkhash() in such case)
2368 2368 flags - the known flags to set on the revision
2369 2369 deltacomputer - an optional deltacomputer instance shared between
2370 2370 multiple calls
2371 2371 """
2372 2372 if link == nullrev:
2373 2373 raise error.RevlogError(
2374 2374 _(b"attempted to add linkrev -1 to %s") % self.display_id
2375 2375 )
2376 2376
2377 2377 if sidedata is None:
2378 2378 sidedata = {}
2379 2379 elif sidedata and not self.hassidedata:
2380 2380 raise error.ProgrammingError(
2381 2381 _(b"trying to add sidedata to a revlog who don't support them")
2382 2382 )
2383 2383
2384 2384 if flags:
2385 2385 node = node or self.hash(text, p1, p2)
2386 2386
2387 2387 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2388 2388
2389 2389 # If the flag processor modifies the revision data, ignore any provided
2390 2390 # cachedelta.
2391 2391 if rawtext != text:
2392 2392 cachedelta = None
2393 2393
2394 2394 if len(rawtext) > _maxentrysize:
2395 2395 raise error.RevlogError(
2396 2396 _(
2397 2397 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2398 2398 )
2399 2399 % (self.display_id, len(rawtext))
2400 2400 )
2401 2401
2402 2402 node = node or self.hash(rawtext, p1, p2)
2403 2403 rev = self.index.get_rev(node)
2404 2404 if rev is not None:
2405 2405 return rev
2406 2406
2407 2407 if validatehash:
2408 2408 self.checkhash(rawtext, node, p1=p1, p2=p2)
2409 2409
2410 2410 return self.addrawrevision(
2411 2411 rawtext,
2412 2412 transaction,
2413 2413 link,
2414 2414 p1,
2415 2415 p2,
2416 2416 node,
2417 2417 flags,
2418 2418 cachedelta=cachedelta,
2419 2419 deltacomputer=deltacomputer,
2420 2420 sidedata=sidedata,
2421 2421 )
2422 2422
2423 2423 def addrawrevision(
2424 2424 self,
2425 2425 rawtext,
2426 2426 transaction,
2427 2427 link,
2428 2428 p1,
2429 2429 p2,
2430 2430 node,
2431 2431 flags,
2432 2432 cachedelta=None,
2433 2433 deltacomputer=None,
2434 2434 sidedata=None,
2435 2435 ):
2436 2436 """add a raw revision with known flags, node and parents
2437 2437 useful when reusing a revision not stored in this revlog (ex: received
2438 2438 over wire, or read from an external bundle).
2439 2439 """
2440 2440 with self._writing(transaction):
2441 2441 return self._addrevision(
2442 2442 node,
2443 2443 rawtext,
2444 2444 transaction,
2445 2445 link,
2446 2446 p1,
2447 2447 p2,
2448 2448 flags,
2449 2449 cachedelta,
2450 2450 deltacomputer=deltacomputer,
2451 2451 sidedata=sidedata,
2452 2452 )
2453 2453
2454 2454 def compress(self, data):
2455 2455 """Generate a possibly-compressed representation of data."""
2456 2456 if not data:
2457 2457 return b'', data
2458 2458
2459 2459 compressed = self._compressor.compress(data)
2460 2460
2461 2461 if compressed:
2462 2462 # The revlog compressor added the header in the returned data.
2463 2463 return b'', compressed
2464 2464
2465 2465 if data[0:1] == b'\0':
2466 2466 return b'', data
2467 2467 return b'u', data
2468 2468
2469 2469 def decompress(self, data):
2470 2470 """Decompress a revlog chunk.
2471 2471
2472 2472 The chunk is expected to begin with a header identifying the
2473 2473 format type so it can be routed to an appropriate decompressor.
2474 2474 """
2475 2475 if not data:
2476 2476 return data
2477 2477
2478 2478 # Revlogs are read much more frequently than they are written and many
2479 2479 # chunks only take microseconds to decompress, so performance is
2480 2480 # important here.
2481 2481 #
2482 2482 # We can make a few assumptions about revlogs:
2483 2483 #
2484 2484 # 1) the majority of chunks will be compressed (as opposed to inline
2485 2485 # raw data).
2486 2486 # 2) decompressing *any* data will likely by at least 10x slower than
2487 2487 # returning raw inline data.
2488 2488 # 3) we want to prioritize common and officially supported compression
2489 2489 # engines
2490 2490 #
2491 2491 # It follows that we want to optimize for "decompress compressed data
2492 2492 # when encoded with common and officially supported compression engines"
2493 2493 # case over "raw data" and "data encoded by less common or non-official
2494 2494 # compression engines." That is why we have the inline lookup first
2495 2495 # followed by the compengines lookup.
2496 2496 #
2497 2497 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2498 2498 # compressed chunks. And this matters for changelog and manifest reads.
2499 2499 t = data[0:1]
2500 2500
2501 2501 if t == b'x':
2502 2502 try:
2503 2503 return _zlibdecompress(data)
2504 2504 except zlib.error as e:
2505 2505 raise error.RevlogError(
2506 2506 _(b'revlog decompress error: %s')
2507 2507 % stringutil.forcebytestr(e)
2508 2508 )
2509 2509 # '\0' is more common than 'u' so it goes first.
2510 2510 elif t == b'\0':
2511 2511 return data
2512 2512 elif t == b'u':
2513 2513 return util.buffer(data, 1)
2514 2514
2515 2515 compressor = self._get_decompressor(t)
2516 2516
2517 2517 return compressor.decompress(data)
2518 2518
2519 2519 def _addrevision(
2520 2520 self,
2521 2521 node,
2522 2522 rawtext,
2523 2523 transaction,
2524 2524 link,
2525 2525 p1,
2526 2526 p2,
2527 2527 flags,
2528 2528 cachedelta,
2529 2529 alwayscache=False,
2530 2530 deltacomputer=None,
2531 2531 sidedata=None,
2532 2532 ):
2533 2533 """internal function to add revisions to the log
2534 2534
2535 2535 see addrevision for argument descriptions.
2536 2536
2537 2537 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2538 2538
2539 2539 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2540 2540 be used.
2541 2541
2542 2542 invariants:
2543 2543 - rawtext is optional (can be None); if not set, cachedelta must be set.
2544 2544 if both are set, they must correspond to each other.
2545 2545 """
2546 2546 if node == self.nullid:
2547 2547 raise error.RevlogError(
2548 2548 _(b"%s: attempt to add null revision") % self.display_id
2549 2549 )
2550 2550 if (
2551 2551 node == self.nodeconstants.wdirid
2552 2552 or node in self.nodeconstants.wdirfilenodeids
2553 2553 ):
2554 2554 raise error.RevlogError(
2555 2555 _(b"%s: attempt to add wdir revision") % self.display_id
2556 2556 )
2557 2557 if self._writinghandles is None:
2558 2558 msg = b'adding revision outside `revlog._writing` context'
2559 2559 raise error.ProgrammingError(msg)
2560 2560
2561 2561 if self._inline:
2562 2562 fh = self._writinghandles[0]
2563 2563 else:
2564 2564 fh = self._writinghandles[1]
2565 2565
2566 2566 btext = [rawtext]
2567 2567
2568 2568 curr = len(self)
2569 2569 prev = curr - 1
2570 2570
2571 2571 offset = self._get_data_offset(prev)
2572 2572
2573 2573 if self._concurrencychecker:
2574 2574 ifh, dfh, sdfh = self._writinghandles
2575 2575 # XXX no checking for the sidedata file
2576 2576 if self._inline:
2577 2577 # offset is "as if" it were in the .d file, so we need to add on
2578 2578 # the size of the entry metadata.
2579 2579 self._concurrencychecker(
2580 2580 ifh, self._indexfile, offset + curr * self.index.entry_size
2581 2581 )
2582 2582 else:
2583 2583 # Entries in the .i are a consistent size.
2584 2584 self._concurrencychecker(
2585 2585 ifh, self._indexfile, curr * self.index.entry_size
2586 2586 )
2587 2587 self._concurrencychecker(dfh, self._datafile, offset)
2588 2588
2589 2589 p1r, p2r = self.rev(p1), self.rev(p2)
2590 2590
2591 2591 # full versions are inserted when the needed deltas
2592 2592 # become comparable to the uncompressed text
2593 2593 if rawtext is None:
2594 2594 # need rawtext size, before changed by flag processors, which is
2595 2595 # the non-raw size. use revlog explicitly to avoid filelog's extra
2596 2596 # logic that might remove metadata size.
2597 2597 textlen = mdiff.patchedsize(
2598 2598 revlog.size(self, cachedelta[0]), cachedelta[1]
2599 2599 )
2600 2600 else:
2601 2601 textlen = len(rawtext)
2602 2602
2603 2603 if deltacomputer is None:
2604 2604 write_debug = None
2605 2605 if self._debug_delta:
2606 2606 write_debug = transaction._report
2607 2607 deltacomputer = deltautil.deltacomputer(
2608 2608 self, write_debug=write_debug
2609 2609 )
2610 2610
2611 2611 if cachedelta is not None and len(cachedelta) == 2:
2612 2612 # If the cached delta has no information about how it should be
2613 2613 # reused, add the default reuse instruction according to the
2614 2614 # revlog's configuration.
2615 2615 if self._generaldelta and self._lazydeltabase:
2616 2616 delta_base_reuse = DELTA_BASE_REUSE_TRY
2617 2617 else:
2618 2618 delta_base_reuse = DELTA_BASE_REUSE_NO
2619 2619 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2620 2620
2621 2621 revinfo = revlogutils.revisioninfo(
2622 2622 node,
2623 2623 p1,
2624 2624 p2,
2625 2625 btext,
2626 2626 textlen,
2627 2627 cachedelta,
2628 2628 flags,
2629 2629 )
2630 2630
2631 2631 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2632 2632
2633 2633 compression_mode = COMP_MODE_INLINE
2634 2634 if self._docket is not None:
2635 2635 default_comp = self._docket.default_compression_header
2636 2636 r = deltautil.delta_compression(default_comp, deltainfo)
2637 2637 compression_mode, deltainfo = r
2638 2638
2639 2639 sidedata_compression_mode = COMP_MODE_INLINE
2640 2640 if sidedata and self.hassidedata:
2641 2641 sidedata_compression_mode = COMP_MODE_PLAIN
2642 2642 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2643 2643 sidedata_offset = self._docket.sidedata_end
2644 2644 h, comp_sidedata = self.compress(serialized_sidedata)
2645 2645 if (
2646 2646 h != b'u'
2647 2647 and comp_sidedata[0:1] != b'\0'
2648 2648 and len(comp_sidedata) < len(serialized_sidedata)
2649 2649 ):
2650 2650 assert not h
2651 2651 if (
2652 2652 comp_sidedata[0:1]
2653 2653 == self._docket.default_compression_header
2654 2654 ):
2655 2655 sidedata_compression_mode = COMP_MODE_DEFAULT
2656 2656 serialized_sidedata = comp_sidedata
2657 2657 else:
2658 2658 sidedata_compression_mode = COMP_MODE_INLINE
2659 2659 serialized_sidedata = comp_sidedata
2660 2660 else:
2661 2661 serialized_sidedata = b""
2662 2662 # Don't store the offset if the sidedata is empty, that way
2663 2663 # we can easily detect empty sidedata and they will be no different
2664 2664 # than ones we manually add.
2665 2665 sidedata_offset = 0
2666 2666
2667 2667 rank = RANK_UNKNOWN
2668 2668 if self._compute_rank:
2669 2669 if (p1r, p2r) == (nullrev, nullrev):
2670 2670 rank = 1
2671 2671 elif p1r != nullrev and p2r == nullrev:
2672 2672 rank = 1 + self.fast_rank(p1r)
2673 2673 elif p1r == nullrev and p2r != nullrev:
2674 2674 rank = 1 + self.fast_rank(p2r)
2675 2675 else: # merge node
2676 2676 if rustdagop is not None and self.index.rust_ext_compat:
2677 2677 rank = rustdagop.rank(self.index, p1r, p2r)
2678 2678 else:
2679 2679 pmin, pmax = sorted((p1r, p2r))
2680 2680 rank = 1 + self.fast_rank(pmax)
2681 2681 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2682 2682
2683 2683 e = revlogutils.entry(
2684 2684 flags=flags,
2685 2685 data_offset=offset,
2686 2686 data_compressed_length=deltainfo.deltalen,
2687 2687 data_uncompressed_length=textlen,
2688 2688 data_compression_mode=compression_mode,
2689 2689 data_delta_base=deltainfo.base,
2690 2690 link_rev=link,
2691 2691 parent_rev_1=p1r,
2692 2692 parent_rev_2=p2r,
2693 2693 node_id=node,
2694 2694 sidedata_offset=sidedata_offset,
2695 2695 sidedata_compressed_length=len(serialized_sidedata),
2696 2696 sidedata_compression_mode=sidedata_compression_mode,
2697 2697 rank=rank,
2698 2698 )
2699 2699
2700 2700 self.index.append(e)
2701 2701 entry = self.index.entry_binary(curr)
2702 2702 if curr == 0 and self._docket is None:
2703 2703 header = self._format_flags | self._format_version
2704 2704 header = self.index.pack_header(header)
2705 2705 entry = header + entry
2706 2706 self._writeentry(
2707 2707 transaction,
2708 2708 entry,
2709 2709 deltainfo.data,
2710 2710 link,
2711 2711 offset,
2712 2712 serialized_sidedata,
2713 2713 sidedata_offset,
2714 2714 )
2715 2715
2716 2716 rawtext = btext[0]
2717 2717
2718 2718 if alwayscache and rawtext is None:
2719 2719 rawtext = deltacomputer.buildtext(revinfo, fh)
2720 2720
2721 2721 if type(rawtext) == bytes: # only accept immutable objects
2722 2722 self._revisioncache = (node, curr, rawtext)
2723 2723 self._chainbasecache[curr] = deltainfo.chainbase
2724 2724 return curr
2725 2725
2726 2726 def _get_data_offset(self, prev):
2727 2727 """Returns the current offset in the (in-transaction) data file.
2728 2728 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2729 2729 file to store that information: since sidedata can be rewritten to the
2730 2730 end of the data file within a transaction, you can have cases where, for
2731 2731 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2732 2732 to `n - 1`'s sidedata being written after `n`'s data.
2733 2733
2734 2734 TODO cache this in a docket file before getting out of experimental."""
2735 2735 if self._docket is None:
2736 2736 return self.end(prev)
2737 2737 else:
2738 2738 return self._docket.data_end
2739 2739
2740 2740 def _writeentry(
2741 2741 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2742 2742 ):
2743 2743 # Files opened in a+ mode have inconsistent behavior on various
2744 2744 # platforms. Windows requires that a file positioning call be made
2745 2745 # when the file handle transitions between reads and writes. See
2746 2746 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2747 2747 # platforms, Python or the platform itself can be buggy. Some versions
2748 2748 # of Solaris have been observed to not append at the end of the file
2749 2749 # if the file was seeked to before the end. See issue4943 for more.
2750 2750 #
2751 2751 # We work around this issue by inserting a seek() before writing.
2752 2752 # Note: This is likely not necessary on Python 3. However, because
2753 2753 # the file handle is reused for reads and may be seeked there, we need
2754 2754 # to be careful before changing this.
2755 2755 if self._writinghandles is None:
2756 2756 msg = b'adding revision outside `revlog._writing` context'
2757 2757 raise error.ProgrammingError(msg)
2758 2758 ifh, dfh, sdfh = self._writinghandles
2759 2759 if self._docket is None:
2760 2760 ifh.seek(0, os.SEEK_END)
2761 2761 else:
2762 2762 ifh.seek(self._docket.index_end, os.SEEK_SET)
2763 2763 if dfh:
2764 2764 if self._docket is None:
2765 2765 dfh.seek(0, os.SEEK_END)
2766 2766 else:
2767 2767 dfh.seek(self._docket.data_end, os.SEEK_SET)
2768 2768 if sdfh:
2769 2769 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2770 2770
2771 2771 curr = len(self) - 1
2772 2772 if not self._inline:
2773 2773 transaction.add(self._datafile, offset)
2774 2774 if self._sidedatafile:
2775 2775 transaction.add(self._sidedatafile, sidedata_offset)
2776 2776 transaction.add(self._indexfile, curr * len(entry))
2777 2777 if data[0]:
2778 2778 dfh.write(data[0])
2779 2779 dfh.write(data[1])
2780 2780 if sidedata:
2781 2781 sdfh.write(sidedata)
2782 2782 ifh.write(entry)
2783 2783 else:
2784 2784 offset += curr * self.index.entry_size
2785 2785 transaction.add(self._indexfile, offset)
2786 2786 ifh.write(entry)
2787 2787 ifh.write(data[0])
2788 2788 ifh.write(data[1])
2789 2789 assert not sidedata
2790 2790 self._enforceinlinesize(transaction)
2791 2791 if self._docket is not None:
2792 2792 # revlog-v2 always has 3 writing handles, help Pytype
2793 2793 wh1 = self._writinghandles[0]
2794 2794 wh2 = self._writinghandles[1]
2795 2795 wh3 = self._writinghandles[2]
2796 2796 assert wh1 is not None
2797 2797 assert wh2 is not None
2798 2798 assert wh3 is not None
2799 2799 self._docket.index_end = wh1.tell()
2800 2800 self._docket.data_end = wh2.tell()
2801 2801 self._docket.sidedata_end = wh3.tell()
2802 2802
2803 2803 nodemaputil.setup_persistent_nodemap(transaction, self)
2804 2804
2805 2805 def addgroup(
2806 2806 self,
2807 2807 deltas,
2808 2808 linkmapper,
2809 2809 transaction,
2810 2810 alwayscache=False,
2811 2811 addrevisioncb=None,
2812 2812 duplicaterevisioncb=None,
2813 2813 debug_info=None,
2814 2814 delta_base_reuse_policy=None,
2815 2815 ):
2816 2816 """
2817 2817 add a delta group
2818 2818
2819 2819 given a set of deltas, add them to the revision log. the
2820 2820 first delta is against its parent, which should be in our
2821 2821 log, the rest are against the previous delta.
2822 2822
2823 2823 If ``addrevisioncb`` is defined, it will be called with arguments of
2824 2824 this revlog and the node that was added.
2825 2825 """
2826 2826
2827 2827 if self._adding_group:
2828 2828 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2829 2829
2830 2830 # read the default delta-base reuse policy from revlog config if the
2831 2831 # group did not specify one.
2832 2832 if delta_base_reuse_policy is None:
2833 2833 if self._generaldelta and self._lazydeltabase:
2834 2834 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2835 2835 else:
2836 2836 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2837 2837
2838 2838 self._adding_group = True
2839 2839 empty = True
2840 2840 try:
2841 2841 with self._writing(transaction):
2842 2842 write_debug = None
2843 2843 if self._debug_delta:
2844 2844 write_debug = transaction._report
2845 2845 deltacomputer = deltautil.deltacomputer(
2846 2846 self,
2847 2847 write_debug=write_debug,
2848 2848 debug_info=debug_info,
2849 2849 )
2850 2850 # loop through our set of deltas
2851 2851 for data in deltas:
2852 2852 (
2853 2853 node,
2854 2854 p1,
2855 2855 p2,
2856 2856 linknode,
2857 2857 deltabase,
2858 2858 delta,
2859 2859 flags,
2860 2860 sidedata,
2861 2861 ) = data
2862 2862 link = linkmapper(linknode)
2863 2863 flags = flags or REVIDX_DEFAULT_FLAGS
2864 2864
2865 2865 rev = self.index.get_rev(node)
2866 2866 if rev is not None:
2867 2867 # this can happen if two branches make the same change
2868 2868 self._nodeduplicatecallback(transaction, rev)
2869 2869 if duplicaterevisioncb:
2870 2870 duplicaterevisioncb(self, rev)
2871 2871 empty = False
2872 2872 continue
2873 2873
2874 2874 for p in (p1, p2):
2875 2875 if not self.index.has_node(p):
2876 2876 raise error.LookupError(
2877 2877 p, self.radix, _(b'unknown parent')
2878 2878 )
2879 2879
2880 2880 if not self.index.has_node(deltabase):
2881 2881 raise error.LookupError(
2882 2882 deltabase, self.display_id, _(b'unknown delta base')
2883 2883 )
2884 2884
2885 2885 baserev = self.rev(deltabase)
2886 2886
2887 2887 if baserev != nullrev and self.iscensored(baserev):
2888 2888 # if base is censored, delta must be full replacement in a
2889 2889 # single patch operation
2890 2890 hlen = struct.calcsize(b">lll")
2891 2891 oldlen = self.rawsize(baserev)
2892 2892 newlen = len(delta) - hlen
2893 2893 if delta[:hlen] != mdiff.replacediffheader(
2894 2894 oldlen, newlen
2895 2895 ):
2896 2896 raise error.CensoredBaseError(
2897 2897 self.display_id, self.node(baserev)
2898 2898 )
2899 2899
2900 2900 if not flags and self._peek_iscensored(baserev, delta):
2901 2901 flags |= REVIDX_ISCENSORED
2902 2902
2903 2903 # We assume consumers of addrevisioncb will want to retrieve
2904 2904 # the added revision, which will require a call to
2905 2905 # revision(). revision() will fast path if there is a cache
2906 2906 # hit. So, we tell _addrevision() to always cache in this case.
2907 2907 # We're only using addgroup() in the context of changegroup
2908 2908 # generation so the revision data can always be handled as raw
2909 2909 # by the flagprocessor.
2910 2910 rev = self._addrevision(
2911 2911 node,
2912 2912 None,
2913 2913 transaction,
2914 2914 link,
2915 2915 p1,
2916 2916 p2,
2917 2917 flags,
2918 2918 (baserev, delta, delta_base_reuse_policy),
2919 2919 alwayscache=alwayscache,
2920 2920 deltacomputer=deltacomputer,
2921 2921 sidedata=sidedata,
2922 2922 )
2923 2923
2924 2924 if addrevisioncb:
2925 2925 addrevisioncb(self, rev)
2926 2926 empty = False
2927 2927 finally:
2928 2928 self._adding_group = False
2929 2929 return not empty
2930 2930
2931 2931 def iscensored(self, rev):
2932 2932 """Check if a file revision is censored."""
2933 2933 if not self._censorable:
2934 2934 return False
2935 2935
2936 2936 return self.flags(rev) & REVIDX_ISCENSORED
2937 2937
2938 2938 def _peek_iscensored(self, baserev, delta):
2939 2939 """Quickly check if a delta produces a censored revision."""
2940 2940 if not self._censorable:
2941 2941 return False
2942 2942
2943 2943 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2944 2944
2945 2945 def getstrippoint(self, minlink):
2946 2946 """find the minimum rev that must be stripped to strip the linkrev
2947 2947
2948 2948 Returns a tuple containing the minimum rev and a set of all revs that
2949 2949 have linkrevs that will be broken by this strip.
2950 2950 """
2951 2951 return storageutil.resolvestripinfo(
2952 2952 minlink,
2953 2953 len(self) - 1,
2954 2954 self.headrevs(),
2955 2955 self.linkrev,
2956 2956 self.parentrevs,
2957 2957 )
2958 2958
2959 2959 def strip(self, minlink, transaction):
2960 2960 """truncate the revlog on the first revision with a linkrev >= minlink
2961 2961
2962 2962 This function is called when we're stripping revision minlink and
2963 2963 its descendants from the repository.
2964 2964
2965 2965 We have to remove all revisions with linkrev >= minlink, because
2966 2966 the equivalent changelog revisions will be renumbered after the
2967 2967 strip.
2968 2968
2969 2969 So we truncate the revlog on the first of these revisions, and
2970 2970 trust that the caller has saved the revisions that shouldn't be
2971 2971 removed and that it'll re-add them after this truncation.
2972 2972 """
2973 2973 if len(self) == 0:
2974 2974 return
2975 2975
2976 2976 rev, _ = self.getstrippoint(minlink)
2977 2977 if rev == len(self):
2978 2978 return
2979 2979
2980 2980 # first truncate the files on disk
2981 2981 data_end = self.start(rev)
2982 2982 if not self._inline:
2983 2983 transaction.add(self._datafile, data_end)
2984 2984 end = rev * self.index.entry_size
2985 2985 else:
2986 2986 end = data_end + (rev * self.index.entry_size)
2987 2987
2988 2988 if self._sidedatafile:
2989 2989 sidedata_end = self.sidedata_cut_off(rev)
2990 2990 transaction.add(self._sidedatafile, sidedata_end)
2991 2991
2992 2992 transaction.add(self._indexfile, end)
2993 2993 if self._docket is not None:
2994 2994 # XXX we could, leverage the docket while stripping. However it is
2995 2995 # not powerfull enough at the time of this comment
2996 2996 self._docket.index_end = end
2997 2997 self._docket.data_end = data_end
2998 2998 self._docket.sidedata_end = sidedata_end
2999 2999 self._docket.write(transaction, stripping=True)
3000 3000
3001 3001 # then reset internal state in memory to forget those revisions
3002 3002 self._revisioncache = None
3003 3003 self._chaininfocache = util.lrucachedict(500)
3004 3004 self._segmentfile.clear_cache()
3005 3005 self._segmentfile_sidedata.clear_cache()
3006 3006
3007 3007 del self.index[rev:-1]
3008 3008
3009 3009 def checksize(self):
3010 3010 """Check size of index and data files
3011 3011
3012 3012 return a (dd, di) tuple.
3013 3013 - dd: extra bytes for the "data" file
3014 3014 - di: extra bytes for the "index" file
3015 3015
3016 3016 A healthy revlog will return (0, 0).
3017 3017 """
3018 3018 expected = 0
3019 3019 if len(self):
3020 3020 expected = max(0, self.end(len(self) - 1))
3021 3021
3022 3022 try:
3023 3023 with self._datafp() as f:
3024 3024 f.seek(0, io.SEEK_END)
3025 3025 actual = f.tell()
3026 3026 dd = actual - expected
3027 3027 except FileNotFoundError:
3028 3028 dd = 0
3029 3029
3030 3030 try:
3031 3031 f = self.opener(self._indexfile)
3032 3032 f.seek(0, io.SEEK_END)
3033 3033 actual = f.tell()
3034 3034 f.close()
3035 3035 s = self.index.entry_size
3036 3036 i = max(0, actual // s)
3037 3037 di = actual - (i * s)
3038 3038 if self._inline:
3039 3039 databytes = 0
3040 3040 for r in self:
3041 3041 databytes += max(0, self.length(r))
3042 3042 dd = 0
3043 3043 di = actual - len(self) * s - databytes
3044 3044 except FileNotFoundError:
3045 3045 di = 0
3046 3046
3047 3047 return (dd, di)
3048 3048
3049 3049 def files(self):
3050 3050 res = [self._indexfile]
3051 3051 if self._docket_file is None:
3052 3052 if not self._inline:
3053 3053 res.append(self._datafile)
3054 3054 else:
3055 3055 res.append(self._docket_file)
3056 3056 res.extend(self._docket.old_index_filepaths(include_empty=False))
3057 3057 if self._docket.data_end:
3058 3058 res.append(self._datafile)
3059 3059 res.extend(self._docket.old_data_filepaths(include_empty=False))
3060 3060 if self._docket.sidedata_end:
3061 3061 res.append(self._sidedatafile)
3062 3062 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3063 3063 return res
3064 3064
3065 3065 def emitrevisions(
3066 3066 self,
3067 3067 nodes,
3068 3068 nodesorder=None,
3069 3069 revisiondata=False,
3070 3070 assumehaveparentrevisions=False,
3071 3071 deltamode=repository.CG_DELTAMODE_STD,
3072 3072 sidedata_helpers=None,
3073 3073 debug_info=None,
3074 3074 ):
3075 3075 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3076 3076 raise error.ProgrammingError(
3077 3077 b'unhandled value for nodesorder: %s' % nodesorder
3078 3078 )
3079 3079
3080 3080 if nodesorder is None and not self._generaldelta:
3081 3081 nodesorder = b'storage'
3082 3082
3083 3083 if (
3084 3084 not self._storedeltachains
3085 3085 and deltamode != repository.CG_DELTAMODE_PREV
3086 3086 ):
3087 3087 deltamode = repository.CG_DELTAMODE_FULL
3088 3088
3089 3089 return storageutil.emitrevisions(
3090 3090 self,
3091 3091 nodes,
3092 3092 nodesorder,
3093 3093 revlogrevisiondelta,
3094 3094 deltaparentfn=self.deltaparent,
3095 3095 candeltafn=self.candelta,
3096 3096 rawsizefn=self.rawsize,
3097 3097 revdifffn=self.revdiff,
3098 3098 flagsfn=self.flags,
3099 3099 deltamode=deltamode,
3100 3100 revisiondata=revisiondata,
3101 3101 assumehaveparentrevisions=assumehaveparentrevisions,
3102 3102 sidedata_helpers=sidedata_helpers,
3103 3103 debug_info=debug_info,
3104 3104 )
3105 3105
3106 3106 DELTAREUSEALWAYS = b'always'
3107 3107 DELTAREUSESAMEREVS = b'samerevs'
3108 3108 DELTAREUSENEVER = b'never'
3109 3109
3110 3110 DELTAREUSEFULLADD = b'fulladd'
3111 3111
3112 3112 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3113 3113
3114 3114 def clone(
3115 3115 self,
3116 3116 tr,
3117 3117 destrevlog,
3118 3118 addrevisioncb=None,
3119 3119 deltareuse=DELTAREUSESAMEREVS,
3120 3120 forcedeltabothparents=None,
3121 3121 sidedata_helpers=None,
3122 3122 ):
3123 3123 """Copy this revlog to another, possibly with format changes.
3124 3124
3125 3125 The destination revlog will contain the same revisions and nodes.
3126 3126 However, it may not be bit-for-bit identical due to e.g. delta encoding
3127 3127 differences.
3128 3128
3129 3129 The ``deltareuse`` argument control how deltas from the existing revlog
3130 3130 are preserved in the destination revlog. The argument can have the
3131 3131 following values:
3132 3132
3133 3133 DELTAREUSEALWAYS
3134 3134 Deltas will always be reused (if possible), even if the destination
3135 3135 revlog would not select the same revisions for the delta. This is the
3136 3136 fastest mode of operation.
3137 3137 DELTAREUSESAMEREVS
3138 3138 Deltas will be reused if the destination revlog would pick the same
3139 3139 revisions for the delta. This mode strikes a balance between speed
3140 3140 and optimization.
3141 3141 DELTAREUSENEVER
3142 3142 Deltas will never be reused. This is the slowest mode of execution.
3143 3143 This mode can be used to recompute deltas (e.g. if the diff/delta
3144 3144 algorithm changes).
3145 3145 DELTAREUSEFULLADD
3146 3146 Revision will be re-added as if their were new content. This is
3147 3147 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3148 3148 eg: large file detection and handling.
3149 3149
3150 3150 Delta computation can be slow, so the choice of delta reuse policy can
3151 3151 significantly affect run time.
3152 3152
3153 3153 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3154 3154 two extremes. Deltas will be reused if they are appropriate. But if the
3155 3155 delta could choose a better revision, it will do so. This means if you
3156 3156 are converting a non-generaldelta revlog to a generaldelta revlog,
3157 3157 deltas will be recomputed if the delta's parent isn't a parent of the
3158 3158 revision.
3159 3159
3160 3160 In addition to the delta policy, the ``forcedeltabothparents``
3161 3161 argument controls whether to force compute deltas against both parents
3162 3162 for merges. By default, the current default is used.
3163 3163
3164 3164 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3165 3165 `sidedata_helpers`.
3166 3166 """
3167 3167 if deltareuse not in self.DELTAREUSEALL:
3168 3168 raise ValueError(
3169 3169 _(b'value for deltareuse invalid: %s') % deltareuse
3170 3170 )
3171 3171
3172 3172 if len(destrevlog):
3173 3173 raise ValueError(_(b'destination revlog is not empty'))
3174 3174
3175 3175 if getattr(self, 'filteredrevs', None):
3176 3176 raise ValueError(_(b'source revlog has filtered revisions'))
3177 3177 if getattr(destrevlog, 'filteredrevs', None):
3178 3178 raise ValueError(_(b'destination revlog has filtered revisions'))
3179 3179
3180 3180 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3181 3181 # if possible.
3182 3182 oldlazydelta = destrevlog._lazydelta
3183 3183 oldlazydeltabase = destrevlog._lazydeltabase
3184 3184 oldamd = destrevlog._deltabothparents
3185 3185
3186 3186 try:
3187 3187 if deltareuse == self.DELTAREUSEALWAYS:
3188 3188 destrevlog._lazydeltabase = True
3189 3189 destrevlog._lazydelta = True
3190 3190 elif deltareuse == self.DELTAREUSESAMEREVS:
3191 3191 destrevlog._lazydeltabase = False
3192 3192 destrevlog._lazydelta = True
3193 3193 elif deltareuse == self.DELTAREUSENEVER:
3194 3194 destrevlog._lazydeltabase = False
3195 3195 destrevlog._lazydelta = False
3196 3196
3197 3197 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3198 3198
3199 3199 self._clone(
3200 3200 tr,
3201 3201 destrevlog,
3202 3202 addrevisioncb,
3203 3203 deltareuse,
3204 3204 forcedeltabothparents,
3205 3205 sidedata_helpers,
3206 3206 )
3207 3207
3208 3208 finally:
3209 3209 destrevlog._lazydelta = oldlazydelta
3210 3210 destrevlog._lazydeltabase = oldlazydeltabase
3211 3211 destrevlog._deltabothparents = oldamd
3212 3212
3213 3213 def _clone(
3214 3214 self,
3215 3215 tr,
3216 3216 destrevlog,
3217 3217 addrevisioncb,
3218 3218 deltareuse,
3219 3219 forcedeltabothparents,
3220 3220 sidedata_helpers,
3221 3221 ):
3222 3222 """perform the core duty of `revlog.clone` after parameter processing"""
3223 3223 write_debug = None
3224 3224 if self._debug_delta:
3225 3225 write_debug = tr._report
3226 3226 deltacomputer = deltautil.deltacomputer(
3227 3227 destrevlog,
3228 3228 write_debug=write_debug,
3229 3229 )
3230 3230 index = self.index
3231 3231 for rev in self:
3232 3232 entry = index[rev]
3233 3233
3234 3234 # Some classes override linkrev to take filtered revs into
3235 3235 # account. Use raw entry from index.
3236 3236 flags = entry[0] & 0xFFFF
3237 3237 linkrev = entry[4]
3238 3238 p1 = index[entry[5]][7]
3239 3239 p2 = index[entry[6]][7]
3240 3240 node = entry[7]
3241 3241
3242 3242 # (Possibly) reuse the delta from the revlog if allowed and
3243 3243 # the revlog chunk is a delta.
3244 3244 cachedelta = None
3245 3245 rawtext = None
3246 3246 if deltareuse == self.DELTAREUSEFULLADD:
3247 3247 text = self._revisiondata(rev)
3248 3248 sidedata = self.sidedata(rev)
3249 3249
3250 3250 if sidedata_helpers is not None:
3251 3251 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3252 3252 self, sidedata_helpers, sidedata, rev
3253 3253 )
3254 3254 flags = flags | new_flags[0] & ~new_flags[1]
3255 3255
3256 3256 destrevlog.addrevision(
3257 3257 text,
3258 3258 tr,
3259 3259 linkrev,
3260 3260 p1,
3261 3261 p2,
3262 3262 cachedelta=cachedelta,
3263 3263 node=node,
3264 3264 flags=flags,
3265 3265 deltacomputer=deltacomputer,
3266 3266 sidedata=sidedata,
3267 3267 )
3268 3268 else:
3269 3269 if destrevlog._lazydelta:
3270 3270 dp = self.deltaparent(rev)
3271 3271 if dp != nullrev:
3272 3272 cachedelta = (dp, bytes(self._chunk(rev)))
3273 3273
3274 3274 sidedata = None
3275 3275 if not cachedelta:
3276 rawtext = self._revisiondata(rev)
3276 try:
3277 rawtext = self._revisiondata(rev)
3278 except error.CensoredNodeError as censored:
3279 assert flags & REVIDX_ISCENSORED
3280 rawtext = censored.tombstone
3277 3281 sidedata = self.sidedata(rev)
3278 3282 if sidedata is None:
3279 3283 sidedata = self.sidedata(rev)
3280 3284
3281 3285 if sidedata_helpers is not None:
3282 3286 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3283 3287 self, sidedata_helpers, sidedata, rev
3284 3288 )
3285 3289 flags = flags | new_flags[0] & ~new_flags[1]
3286 3290
3287 3291 with destrevlog._writing(tr):
3288 3292 destrevlog._addrevision(
3289 3293 node,
3290 3294 rawtext,
3291 3295 tr,
3292 3296 linkrev,
3293 3297 p1,
3294 3298 p2,
3295 3299 flags,
3296 3300 cachedelta,
3297 3301 deltacomputer=deltacomputer,
3298 3302 sidedata=sidedata,
3299 3303 )
3300 3304
3301 3305 if addrevisioncb:
3302 3306 addrevisioncb(self, rev, node)
3303 3307
3304 3308 def censorrevision(self, tr, censornode, tombstone=b''):
3305 3309 if self._format_version == REVLOGV0:
3306 3310 raise error.RevlogError(
3307 3311 _(b'cannot censor with version %d revlogs')
3308 3312 % self._format_version
3309 3313 )
3310 3314 elif self._format_version == REVLOGV1:
3311 3315 rewrite.v1_censor(self, tr, censornode, tombstone)
3312 3316 else:
3313 3317 rewrite.v2_censor(self, tr, censornode, tombstone)
3314 3318
3315 3319 def verifyintegrity(self, state):
3316 3320 """Verifies the integrity of the revlog.
3317 3321
3318 3322 Yields ``revlogproblem`` instances describing problems that are
3319 3323 found.
3320 3324 """
3321 3325 dd, di = self.checksize()
3322 3326 if dd:
3323 3327 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3324 3328 if di:
3325 3329 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3326 3330
3327 3331 version = self._format_version
3328 3332
3329 3333 # The verifier tells us what version revlog we should be.
3330 3334 if version != state[b'expectedversion']:
3331 3335 yield revlogproblem(
3332 3336 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3333 3337 % (self.display_id, version, state[b'expectedversion'])
3334 3338 )
3335 3339
3336 3340 state[b'skipread'] = set()
3337 3341 state[b'safe_renamed'] = set()
3338 3342
3339 3343 for rev in self:
3340 3344 node = self.node(rev)
3341 3345
3342 3346 # Verify contents. 4 cases to care about:
3343 3347 #
3344 3348 # common: the most common case
3345 3349 # rename: with a rename
3346 3350 # meta: file content starts with b'\1\n', the metadata
3347 3351 # header defined in filelog.py, but without a rename
3348 3352 # ext: content stored externally
3349 3353 #
3350 3354 # More formally, their differences are shown below:
3351 3355 #
3352 3356 # | common | rename | meta | ext
3353 3357 # -------------------------------------------------------
3354 3358 # flags() | 0 | 0 | 0 | not 0
3355 3359 # renamed() | False | True | False | ?
3356 3360 # rawtext[0:2]=='\1\n'| False | True | True | ?
3357 3361 #
3358 3362 # "rawtext" means the raw text stored in revlog data, which
3359 3363 # could be retrieved by "rawdata(rev)". "text"
3360 3364 # mentioned below is "revision(rev)".
3361 3365 #
3362 3366 # There are 3 different lengths stored physically:
3363 3367 # 1. L1: rawsize, stored in revlog index
3364 3368 # 2. L2: len(rawtext), stored in revlog data
3365 3369 # 3. L3: len(text), stored in revlog data if flags==0, or
3366 3370 # possibly somewhere else if flags!=0
3367 3371 #
3368 3372 # L1 should be equal to L2. L3 could be different from them.
3369 3373 # "text" may or may not affect commit hash depending on flag
3370 3374 # processors (see flagutil.addflagprocessor).
3371 3375 #
3372 3376 # | common | rename | meta | ext
3373 3377 # -------------------------------------------------
3374 3378 # rawsize() | L1 | L1 | L1 | L1
3375 3379 # size() | L1 | L2-LM | L1(*) | L1 (?)
3376 3380 # len(rawtext) | L2 | L2 | L2 | L2
3377 3381 # len(text) | L2 | L2 | L2 | L3
3378 3382 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3379 3383 #
3380 3384 # LM: length of metadata, depending on rawtext
3381 3385 # (*): not ideal, see comment in filelog.size
3382 3386 # (?): could be "- len(meta)" if the resolved content has
3383 3387 # rename metadata
3384 3388 #
3385 3389 # Checks needed to be done:
3386 3390 # 1. length check: L1 == L2, in all cases.
3387 3391 # 2. hash check: depending on flag processor, we may need to
3388 3392 # use either "text" (external), or "rawtext" (in revlog).
3389 3393
3390 3394 try:
3391 3395 skipflags = state.get(b'skipflags', 0)
3392 3396 if skipflags:
3393 3397 skipflags &= self.flags(rev)
3394 3398
3395 3399 _verify_revision(self, skipflags, state, node)
3396 3400
3397 3401 l1 = self.rawsize(rev)
3398 3402 l2 = len(self.rawdata(node))
3399 3403
3400 3404 if l1 != l2:
3401 3405 yield revlogproblem(
3402 3406 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3403 3407 node=node,
3404 3408 )
3405 3409
3406 3410 except error.CensoredNodeError:
3407 3411 if state[b'erroroncensored']:
3408 3412 yield revlogproblem(
3409 3413 error=_(b'censored file data'), node=node
3410 3414 )
3411 3415 state[b'skipread'].add(node)
3412 3416 except Exception as e:
3413 3417 yield revlogproblem(
3414 3418 error=_(b'unpacking %s: %s')
3415 3419 % (short(node), stringutil.forcebytestr(e)),
3416 3420 node=node,
3417 3421 )
3418 3422 state[b'skipread'].add(node)
3419 3423
3420 3424 def storageinfo(
3421 3425 self,
3422 3426 exclusivefiles=False,
3423 3427 sharedfiles=False,
3424 3428 revisionscount=False,
3425 3429 trackedsize=False,
3426 3430 storedsize=False,
3427 3431 ):
3428 3432 d = {}
3429 3433
3430 3434 if exclusivefiles:
3431 3435 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3432 3436 if not self._inline:
3433 3437 d[b'exclusivefiles'].append((self.opener, self._datafile))
3434 3438
3435 3439 if sharedfiles:
3436 3440 d[b'sharedfiles'] = []
3437 3441
3438 3442 if revisionscount:
3439 3443 d[b'revisionscount'] = len(self)
3440 3444
3441 3445 if trackedsize:
3442 3446 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3443 3447
3444 3448 if storedsize:
3445 3449 d[b'storedsize'] = sum(
3446 3450 self.opener.stat(path).st_size for path in self.files()
3447 3451 )
3448 3452
3449 3453 return d
3450 3454
3451 3455 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3452 3456 if not self.hassidedata:
3453 3457 return
3454 3458 # revlog formats with sidedata support does not support inline
3455 3459 assert not self._inline
3456 3460 if not helpers[1] and not helpers[2]:
3457 3461 # Nothing to generate or remove
3458 3462 return
3459 3463
3460 3464 new_entries = []
3461 3465 # append the new sidedata
3462 3466 with self._writing(transaction):
3463 3467 ifh, dfh, sdfh = self._writinghandles
3464 3468 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3465 3469
3466 3470 current_offset = sdfh.tell()
3467 3471 for rev in range(startrev, endrev + 1):
3468 3472 entry = self.index[rev]
3469 3473 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3470 3474 store=self,
3471 3475 sidedata_helpers=helpers,
3472 3476 sidedata={},
3473 3477 rev=rev,
3474 3478 )
3475 3479
3476 3480 serialized_sidedata = sidedatautil.serialize_sidedata(
3477 3481 new_sidedata
3478 3482 )
3479 3483
3480 3484 sidedata_compression_mode = COMP_MODE_INLINE
3481 3485 if serialized_sidedata and self.hassidedata:
3482 3486 sidedata_compression_mode = COMP_MODE_PLAIN
3483 3487 h, comp_sidedata = self.compress(serialized_sidedata)
3484 3488 if (
3485 3489 h != b'u'
3486 3490 and comp_sidedata[0] != b'\0'
3487 3491 and len(comp_sidedata) < len(serialized_sidedata)
3488 3492 ):
3489 3493 assert not h
3490 3494 if (
3491 3495 comp_sidedata[0]
3492 3496 == self._docket.default_compression_header
3493 3497 ):
3494 3498 sidedata_compression_mode = COMP_MODE_DEFAULT
3495 3499 serialized_sidedata = comp_sidedata
3496 3500 else:
3497 3501 sidedata_compression_mode = COMP_MODE_INLINE
3498 3502 serialized_sidedata = comp_sidedata
3499 3503 if entry[8] != 0 or entry[9] != 0:
3500 3504 # rewriting entries that already have sidedata is not
3501 3505 # supported yet, because it introduces garbage data in the
3502 3506 # revlog.
3503 3507 msg = b"rewriting existing sidedata is not supported yet"
3504 3508 raise error.Abort(msg)
3505 3509
3506 3510 # Apply (potential) flags to add and to remove after running
3507 3511 # the sidedata helpers
3508 3512 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3509 3513 entry_update = (
3510 3514 current_offset,
3511 3515 len(serialized_sidedata),
3512 3516 new_offset_flags,
3513 3517 sidedata_compression_mode,
3514 3518 )
3515 3519
3516 3520 # the sidedata computation might have move the file cursors around
3517 3521 sdfh.seek(current_offset, os.SEEK_SET)
3518 3522 sdfh.write(serialized_sidedata)
3519 3523 new_entries.append(entry_update)
3520 3524 current_offset += len(serialized_sidedata)
3521 3525 self._docket.sidedata_end = sdfh.tell()
3522 3526
3523 3527 # rewrite the new index entries
3524 3528 ifh.seek(startrev * self.index.entry_size)
3525 3529 for i, e in enumerate(new_entries):
3526 3530 rev = startrev + i
3527 3531 self.index.replace_sidedata_info(rev, *e)
3528 3532 packed = self.index.entry_binary(rev)
3529 3533 if rev == 0 and self._docket is None:
3530 3534 header = self._format_flags | self._format_version
3531 3535 header = self.index.pack_header(header)
3532 3536 packed = header + packed
3533 3537 ifh.write(packed)
@@ -1,611 +1,603 b''
1 1 #require no-reposimplestore
2 2 #testcases revlogv1 revlogv2
3 3
4 4 #if revlogv2
5 5
6 6 $ cat >> $HGRCPATH <<EOF
7 7 > [experimental]
8 8 > revlogv2=enable-unstable-format-and-corrupt-my-data
9 9 > EOF
10 10
11 11 #endif
12 12
13 13 $ cp $HGRCPATH $HGRCPATH.orig
14 14
15 15 Create repo with unimpeachable content
16 16
17 17 $ hg init r
18 18 $ cd r
19 19 $ echo 'Initially untainted file' > target
20 20 $ echo 'Normal file here' > bystander
21 21 $ hg add target bystander
22 22 $ hg ci -m init
23 23
24 24 Clone repo so we can test pull later
25 25
26 26 $ cd ..
27 27 $ hg clone r rpull
28 28 updating to branch default
29 29 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
30 30 $ cd r
31 31
32 32 Introduce content which will ultimately require censorship. Name the first
33 33 censored node C1, second C2, and so on
34 34
35 35 $ echo 'Tainted file' > target
36 36 $ echo 'Passwords: hunter2' >> target
37 37 $ hg ci -m taint target
38 38 $ C1=`hg id --debug -i`
39 39
40 40 $ echo 'hunter3' >> target
41 41 $ echo 'Normal file v2' > bystander
42 42 $ hg ci -m moretaint target bystander
43 43 $ C2=`hg id --debug -i`
44 44
45 45 Add a new sanitized versions to correct our mistake. Name the first head H1,
46 46 the second head H2, and so on
47 47
48 48 $ echo 'Tainted file is now sanitized' > target
49 49 $ hg ci -m sanitized target
50 50 $ H1=`hg id --debug -i`
51 51
52 52 $ hg update -r $C2
53 53 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
54 54 $ echo 'Tainted file now super sanitized' > target
55 55 $ hg ci -m 'super sanitized' target
56 56 created new head
57 57 $ H2=`hg id --debug -i`
58 58
59 59 Verify target contents before censorship at each revision
60 60
61 61 $ hg cat -r $H1 target | head -n 10
62 62 Tainted file is now sanitized
63 63 $ hg cat -r $H2 target | head -n 10
64 64 Tainted file now super sanitized
65 65 $ hg cat -r $C2 target | head -n 10
66 66 Tainted file
67 67 Passwords: hunter2
68 68 hunter3
69 69 $ hg cat -r $C1 target | head -n 10
70 70 Tainted file
71 71 Passwords: hunter2
72 72 $ hg cat -r 0 target | head -n 10
73 73 Initially untainted file
74 74
75 75 Censor revision with 2 offenses
76 76
77 77 (this also tests file pattern matching: path relative to cwd case)
78 78
79 79 $ mkdir -p foo/bar/baz
80 80 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target
81 81 $ hg cat -r $H1 target | head -n 10
82 82 Tainted file is now sanitized
83 83 $ hg cat -r $H2 target | head -n 10
84 84 Tainted file now super sanitized
85 85 $ hg cat -r $C2 target | head -n 10
86 86 abort: censored node: 1e0247a9a4b7
87 87 (set censor.policy to ignore errors)
88 88 $ hg cat -r $C1 target | head -n 10
89 89 Tainted file
90 90 Passwords: hunter2
91 91 $ hg cat -r 0 target | head -n 10
92 92 Initially untainted file
93 93
94 94 Censor revision with 1 offense
95 95
96 96 (this also tests file pattern matching: with 'path:' scheme)
97 97
98 98 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C1 path:target
99 99 $ hg cat -r $H1 target | head -n 10
100 100 Tainted file is now sanitized
101 101 $ hg cat -r $H2 target | head -n 10
102 102 Tainted file now super sanitized
103 103 $ hg cat -r $C2 target | head -n 10
104 104 abort: censored node: 1e0247a9a4b7
105 105 (set censor.policy to ignore errors)
106 106 $ hg cat -r $C1 target | head -n 10
107 107 abort: censored node: 613bc869fceb
108 108 (set censor.policy to ignore errors)
109 109 $ hg cat -r 0 target | head -n 10
110 110 Initially untainted file
111 111
112 112 Can only checkout target at uncensored revisions, -X is workaround for --all
113 113
114 114 $ hg revert -r $C2 target | head -n 10
115 115 abort: censored node: 1e0247a9a4b7
116 116 (set censor.policy to ignore errors)
117 117 $ hg revert -r $C1 target | head -n 10
118 118 abort: censored node: 613bc869fceb
119 119 (set censor.policy to ignore errors)
120 120 $ hg revert -r $C1 --all
121 121 reverting bystander
122 122 reverting target
123 123 abort: censored node: 613bc869fceb
124 124 (set censor.policy to ignore errors)
125 125 [255]
126 126 $ hg revert -r $C1 --all -X target
127 127 $ cat target | head -n 10
128 128 Tainted file now super sanitized
129 129 $ hg revert -r 0 --all
130 130 reverting target
131 131 $ cat target | head -n 10
132 132 Initially untainted file
133 133 $ hg revert -r $H2 --all
134 134 reverting bystander
135 135 reverting target
136 136 $ cat target | head -n 10
137 137 Tainted file now super sanitized
138 138
139 139 Uncensored file can be viewed at any revision
140 140
141 141 $ hg cat -r $H1 bystander | head -n 10
142 142 Normal file v2
143 143 $ hg cat -r $C2 bystander | head -n 10
144 144 Normal file v2
145 145 $ hg cat -r $C1 bystander | head -n 10
146 146 Normal file here
147 147 $ hg cat -r 0 bystander | head -n 10
148 148 Normal file here
149 149
150 150 Can update to children of censored revision
151 151
152 152 $ hg update -r $H1
153 153 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
154 154 $ cat target | head -n 10
155 155 Tainted file is now sanitized
156 156 $ hg update -r $H2
157 157 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
158 158 $ cat target | head -n 10
159 159 Tainted file now super sanitized
160 160
161 161 Set censor policy to abort in trusted $HGRC so hg verify fails
162 162
163 163 $ cp $HGRCPATH.orig $HGRCPATH
164 164 $ cat >> $HGRCPATH <<EOF
165 165 > [censor]
166 166 > policy = abort
167 167 > EOF
168 168
169 169 Repo fails verification due to censorship
170 170
171 171 $ hg verify
172 172 checking changesets
173 173 checking manifests
174 174 crosschecking files in changesets and manifests
175 175 checking files
176 176 target@1: censored file data
177 177 target@2: censored file data
178 178 not checking dirstate because of previous errors
179 179 checked 5 changesets with 7 changes to 2 files
180 180 2 integrity errors encountered!
181 181 (first damaged changeset appears to be 1)
182 182 [1]
183 183
184 184 Cannot update to revision with censored data
185 185
186 186 $ hg update -r $C2
187 187 abort: censored node: 1e0247a9a4b7
188 188 (set censor.policy to ignore errors)
189 189 [255]
190 190 $ hg update -r $C1
191 191 abort: censored node: 613bc869fceb
192 192 (set censor.policy to ignore errors)
193 193 [255]
194 194 $ hg update -r 0
195 195 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
196 196 $ hg update -r $H2
197 197 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
198 198
199 199 Set censor policy to ignore in trusted $HGRC so hg verify passes
200 200
201 201 $ cp $HGRCPATH.orig $HGRCPATH
202 202 $ cat >> $HGRCPATH <<EOF
203 203 > [censor]
204 204 > policy = ignore
205 205 > EOF
206 206
207 207 Repo passes verification with warnings with explicit config
208 208
209 209 $ hg verify -q
210 210
211 211 May update to revision with censored data with explicit config
212 212
213 213 $ hg update -r $C2
214 214 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
215 215 $ cat target | head -n 10
216 216 $ hg update -r $C1
217 217 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
218 218 $ cat target | head -n 10
219 219 $ hg update -r 0
220 220 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
221 221 $ cat target | head -n 10
222 222 Initially untainted file
223 223 $ hg update -r $H2
224 224 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
225 225 $ cat target | head -n 10
226 226 Tainted file now super sanitized
227 227
228 228 Can merge in revision with censored data. Test requires one branch of history
229 229 with the file censored, but we can't censor at a head, so advance H1.
230 230
231 231 $ hg update -r $H1
232 232 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
233 233 $ C3=$H1
234 234 $ echo 'advanced head H1' > target
235 235 $ hg ci -m 'advance head H1' target
236 236 $ H1=`hg id --debug -i`
237 237 $ hg --config extensions.censor= censor -r $C3 target
238 238 $ hg update -r $H2
239 239 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
240 240 $ hg merge -r $C3
241 241 merging target
242 242 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
243 243 (branch merge, don't forget to commit)
244 244
245 245 Revisions present in repository heads may not be censored
246 246
247 247 $ hg update -C -r $H2
248 248 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
249 249 $ hg --config extensions.censor= censor -r $H2 target
250 250 abort: cannot censor file in heads (78a8fc215e79)
251 251 (clean/delete and commit first)
252 252 [255]
253 253 $ echo 'twiddling thumbs' > bystander
254 254 $ hg ci -m 'bystander commit'
255 255 $ H2=`hg id --debug -i`
256 256 $ hg --config extensions.censor= censor -r "$H2^" target
257 257 abort: cannot censor file in heads (efbe78065929)
258 258 (clean/delete and commit first)
259 259 [255]
260 260
261 261 Cannot censor working directory
262 262
263 263 $ echo 'seriously no passwords' > target
264 264 $ hg ci -m 'extend second head arbitrarily' target
265 265 $ H2=`hg id --debug -i`
266 266 $ hg update -r "$H2^"
267 267 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
268 268 $ hg --config extensions.censor= censor -r . target
269 269 abort: cannot censor working directory
270 270 (clean/delete/update first)
271 271 [255]
272 272 $ hg update -r $H2
273 273 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
274 274
275 275 Can re-add file after being deleted + censored
276 276
277 277 $ C4=$H2
278 278 $ hg rm target
279 279 $ hg ci -m 'delete target so it may be censored'
280 280 $ H2=`hg id --debug -i`
281 281 $ hg --config extensions.censor= censor -r $C4 target
282 282 $ hg cat -r $C4 target | head -n 10
283 283 $ hg cat -r "$H2^^" target | head -n 10
284 284 Tainted file now super sanitized
285 285 $ echo 'fresh start' > target
286 286 $ hg add target
287 287 $ hg ci -m reincarnated target
288 288 $ H2=`hg id --debug -i`
289 289 $ hg cat -r $H2 target | head -n 10
290 290 fresh start
291 291 $ hg cat -r "$H2^" target | head -n 10
292 292 target: no such file in rev 452ec1762369
293 293 $ hg cat -r $C4 target | head -n 10
294 294 $ hg cat -r "$H2^^^" target | head -n 10
295 295 Tainted file now super sanitized
296 296
297 297 Can censor after revlog has expanded to no longer permit inline storage
298 298
299 299 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
300 300 > do
301 301 > echo "Password: hunter$x" >> target
302 302 > done
303 303 $ hg ci -m 'add 100k passwords'
304 304 $ H2=`hg id --debug -i`
305 305 $ C5=$H2
306 306 $ hg revert -r "$H2^" target
307 307 $ hg ci -m 'cleaned 100k passwords'
308 308 $ H2=`hg id --debug -i`
309 309 $ hg --config extensions.censor= censor -r $C5 target
310 310 $ hg cat -r $C5 target | head -n 10
311 311 $ hg cat -r $H2 target | head -n 10
312 312 fresh start
313 313
314 314 Repo with censored nodes can be cloned and cloned nodes are censored
315 315
316 316 $ cd ..
317 317 $ hg clone r rclone
318 318 updating to branch default
319 319 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
320 320 $ cd rclone
321 321 $ hg cat -r $H1 target | head -n 10
322 322 advanced head H1
323 323 $ hg cat -r $H2~5 target | head -n 10
324 324 Tainted file now super sanitized
325 325 $ hg cat -r $C2 target | head -n 10
326 326 $ hg cat -r $C1 target | head -n 10
327 327 $ hg cat -r 0 target | head -n 10
328 328 Initially untainted file
329 329 $ hg verify -q
330 330
331 331 Repo cloned before tainted content introduced can pull censored nodes
332 332
333 333 $ cd ../rpull
334 334 $ hg cat -r tip target | head -n 10
335 335 Initially untainted file
336 336 $ hg verify -q
337 337 $ hg pull -r $H1 -r $H2
338 338 pulling from $TESTTMP/r
339 339 searching for changes
340 340 adding changesets
341 341 adding manifests
342 342 adding file changes
343 343 added 11 changesets with 11 changes to 2 files (+1 heads)
344 344 new changesets 186fb27560c3:683e4645fded
345 345 (run 'hg heads' to see heads, 'hg merge' to merge)
346 346 $ hg update 4
347 347 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
348 348 $ cat target | head -n 10
349 349 Tainted file now super sanitized
350 350 $ hg cat -r $H1 target | head -n 10
351 351 advanced head H1
352 352 $ hg cat -r $H2~5 target | head -n 10
353 353 Tainted file now super sanitized
354 354 $ hg cat -r $C2 target | head -n 10
355 355 $ hg cat -r $C1 target | head -n 10
356 356 $ hg cat -r 0 target | head -n 10
357 357 Initially untainted file
358 358 $ hg verify -q
359 359
360 360 Censored nodes can be pushed if they censor previously unexchanged nodes
361 361
362 362 $ echo 'Passwords: hunter2hunter2' > target
363 363 $ hg ci -m 're-add password from clone' target
364 364 created new head
365 365 $ H3=`hg id --debug -i`
366 366 $ REV=$H3
367 367 $ echo 'Re-sanitized; nothing to see here' > target
368 368 $ hg ci -m 're-sanitized' target
369 369 $ H2=`hg id --debug -i`
370 370 $ CLEANREV=$H2
371 371 $ hg cat -r $REV target | head -n 10
372 372 Passwords: hunter2hunter2
373 373 $ hg --config extensions.censor= censor -r $REV target
374 374 $ hg cat -r $REV target | head -n 10
375 375 $ hg cat -r $CLEANREV target | head -n 10
376 376 Re-sanitized; nothing to see here
377 377 $ hg push -f -r $H2
378 378 pushing to $TESTTMP/r
379 379 searching for changes
380 380 adding changesets
381 381 adding manifests
382 382 adding file changes
383 383 added 2 changesets with 2 changes to 1 files (+1 heads)
384 384
385 385 $ cd ../r
386 386 $ hg cat -r $REV target | head -n 10
387 387 $ hg cat -r $CLEANREV target | head -n 10
388 388 Re-sanitized; nothing to see here
389 389 $ hg update $CLEANREV
390 390 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
391 391 $ cat target | head -n 10
392 392 Re-sanitized; nothing to see here
393 393
394 394 Censored nodes can be bundled up and unbundled in another repo
395 395
396 396 $ hg bundle --base 0 ../pwbundle
397 397 13 changesets found
398 398 $ cd ../rclone
399 399 $ hg unbundle ../pwbundle
400 400 adding changesets
401 401 adding manifests
402 402 adding file changes
403 403 added 2 changesets with 2 changes to 2 files (+1 heads)
404 404 new changesets 075be80ac777:dcbaf17bf3a1 (2 drafts)
405 405 (run 'hg heads .' to see heads, 'hg merge' to merge)
406 406 $ hg cat -r $REV target | head -n 10
407 407 $ hg cat -r $CLEANREV target | head -n 10
408 408 Re-sanitized; nothing to see here
409 409 $ hg update $CLEANREV
410 410 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
411 411 $ cat target | head -n 10
412 412 Re-sanitized; nothing to see here
413 413 $ hg verify -q
414 414
415 415 Grepping only warns, doesn't error out
416 416
417 417 $ cd ../rpull
418 418 $ hg grep 'Normal file'
419 419 bystander:Normal file v2
420 420 $ hg grep nothing
421 421 target:Re-sanitized; nothing to see here
422 422 $ hg grep --diff 'Normal file'
423 423 cannot search in censored file: target:7
424 424 cannot search in censored file: target:10
425 425 cannot search in censored file: target:12
426 426 bystander:6:-:Normal file v2
427 427 cannot search in censored file: target:1
428 428 cannot search in censored file: target:2
429 429 cannot search in censored file: target:3
430 430 bystander:2:-:Normal file here
431 431 bystander:2:+:Normal file v2
432 432 bystander:0:+:Normal file here
433 433 $ hg grep --diff nothing
434 434 cannot search in censored file: target:7
435 435 cannot search in censored file: target:10
436 436 cannot search in censored file: target:12
437 437 target:13:+:Re-sanitized; nothing to see here
438 438 cannot search in censored file: target:1
439 439 cannot search in censored file: target:2
440 440 cannot search in censored file: target:3
441 441
442 442 Censored nodes can be imported on top of censored nodes, consecutively
443 443
444 444 $ hg init ../rimport
445 445 $ hg bundle --base 1 ../rimport/splitbundle
446 446 12 changesets found
447 447 $ cd ../rimport
448 448 $ hg pull -r $H1 -r $H2 ../r
449 449 pulling from ../r
450 450 adding changesets
451 451 adding manifests
452 452 adding file changes
453 453 added 8 changesets with 10 changes to 2 files (+1 heads)
454 454 new changesets e97f55b2665a:dcbaf17bf3a1
455 455 (run 'hg heads' to see heads, 'hg merge' to merge)
456 456 $ hg unbundle splitbundle
457 457 adding changesets
458 458 adding manifests
459 459 adding file changes
460 460 added 6 changesets with 5 changes to 2 files (+1 heads)
461 461 new changesets efbe78065929:683e4645fded (6 drafts)
462 462 (run 'hg heads .' to see heads, 'hg merge' to merge)
463 463 $ hg update $H2
464 464 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
465 465 $ cat target | head -n 10
466 466 Re-sanitized; nothing to see here
467 467 $ hg verify -q
468 468 $ cd ../r
469 469
470 470 Can import bundle where first revision of a file is censored
471 471
472 472 $ hg init ../rinit
473 473 $ hg --config extensions.censor= censor -r 0 target
474 474 $ hg bundle -r 0 --base null ../rinit/initbundle
475 475 1 changesets found
476 476 $ cd ../rinit
477 477 $ hg unbundle initbundle
478 478 adding changesets
479 479 adding manifests
480 480 adding file changes
481 481 added 1 changesets with 2 changes to 2 files
482 482 new changesets e97f55b2665a (1 drafts)
483 483 (run 'hg update' to get a working copy)
484 484 $ hg cat -r 0 target | head -n 10
485 485
486 486 #if revlogv2
487 487
488 488 Testing feature that does not work in revlog v1
489 489 ===============================================
490 490
491 491 Censoring a revision that is used as delta base
492 492 -----------------------------------------------
493 493
494 494 $ cd ..
495 495 $ hg init censor-with-delta
496 496 $ cd censor-with-delta
497 497 $ echo root > target
498 498 $ hg add target
499 499 $ hg commit -m root
500 500 $ B0=`hg id --debug -i`
501 501 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
502 502 > do
503 503 > echo "Password: hunter$x" >> target
504 504 > done
505 505 $ hg ci -m 'write a long file'
506 506 $ B1=`hg id --debug -i`
507 507 $ echo 'small change (should create a delta)' >> target
508 508 $ hg ci -m 'create a delta over the password'
509 509 (should show that the last revision is a delta, not a snapshot)
510 510 $ B2=`hg id --debug -i`
511 511
512 512 Make sure the last revision is a delta against the revision we will censor
513 513
514 514 $ hg debugdeltachain target -T '{rev} {chainid} {chainlen} {prevrev}\n'
515 515 0 1 1 -1
516 516 1 2 1 -1
517 517 2 2 2 1
518 518
519 519 Censor the file
520 520
521 521 $ hg cat -r $B1 target | wc -l
522 522 *50002 (re)
523 523 $ hg --config extensions.censor= censor -r $B1 target
524 524 $ hg cat -r $B1 target | wc -l
525 525 *0 (re)
526 526
527 527 Check the children is fine
528 528
529 529 $ hg cat -r $B2 target | wc -l
530 530 *50003 (re)
531 531
532 532 #endif
533 533
534 534 Testing repository upgrade with censors revision
535 535 ================================================
536 536
537 537 $ cd ../rclone
538 538
539 539 With the "abort" policy
540 540 =======================
541 541
542 542 $ hg verify --config censor.policy=ignore
543 543 checking changesets
544 544 checking manifests
545 545 crosschecking files in changesets and manifests
546 546 checking files
547 547 checking dirstate
548 548 checked 14 changesets with 15 changes to 2 files
549 549 $ hg debugupgraderepo --run --quiet \
550 550 > --optimize re-delta-parent \
551 551 > --config censor.policy=abort
552 552 upgrade will perform the following actions:
553 553
554 554 requirements
555 555 preserved: * (glob)
556 556
557 557 optimisations: re-delta-parent
558 558
559 559 processed revlogs:
560 560 - all-filelogs
561 561 - changelog
562 562 - manifest
563 563
564 transaction abort!
565 rollback completed
566 abort: file censored target:613bc869fceb
567 [255]
568 564 $ hg verify --config censor.policy=ignore
569 565 checking changesets
570 566 checking manifests
571 567 crosschecking files in changesets and manifests
572 568 checking files
573 569 checking dirstate
574 570 checked 14 changesets with 15 changes to 2 files
575 571
576 572 With the "ignore" policy
577 573 ========================
578 574
579 575 $ hg verify --config censor.policy=ignore
580 576 checking changesets
581 577 checking manifests
582 578 crosschecking files in changesets and manifests
583 579 checking files
584 580 checking dirstate
585 581 checked 14 changesets with 15 changes to 2 files
586 582 $ hg debugupgraderepo --run --quiet \
587 583 > --optimize re-delta-parent \
588 584 > --config censor.policy=ignore
589 585 upgrade will perform the following actions:
590 586
591 587 requirements
592 588 preserved: * (glob)
593 589
594 590 optimisations: re-delta-parent
595 591
596 592 processed revlogs:
597 593 - all-filelogs
598 594 - changelog
599 595 - manifest
600 596
601 transaction abort!
602 rollback completed
603 abort: file censored target:613bc869fceb
604 [255]
605 597 $ hg verify --config censor.policy=ignore
606 598 checking changesets
607 599 checking manifests
608 600 crosschecking files in changesets and manifests
609 601 checking files
610 602 checking dirstate
611 603 checked 14 changesets with 15 changes to 2 files
General Comments 0
You need to be logged in to leave comments. Login now