##// END OF EJS Templates
stream-clone: smoothly detect and handle a case were a revlog is split...
marmoute -
r51534:54604240 default
parent child Browse files
Show More
@@ -1,3478 +1,3501 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 def __init__(
294 294 self,
295 295 opener,
296 296 target,
297 297 radix,
298 298 postfix=None, # only exist for `tmpcensored` now
299 299 checkambig=False,
300 300 mmaplargeindex=False,
301 301 censorable=False,
302 302 upperboundcomp=None,
303 303 persistentnodemap=False,
304 304 concurrencychecker=None,
305 305 trypending=False,
306 306 try_split=False,
307 307 canonical_parent_order=True,
308 308 ):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314
315 315 `target`: a (KIND, ID) tuple that identify the content stored in
316 316 this revlog. It help the rest of the code to understand what the revlog
317 317 is about without having to resort to heuristic and index filename
318 318 analysis. Note: that this must be reliably be set by normal code, but
319 319 that test, debug, or performance measurement code might not set this to
320 320 accurate value.
321 321 """
322 322 self.upperboundcomp = upperboundcomp
323 323
324 324 self.radix = radix
325 325
326 326 self._docket_file = None
327 327 self._indexfile = None
328 328 self._datafile = None
329 329 self._sidedatafile = None
330 330 self._nodemap_file = None
331 331 self.postfix = postfix
332 332 self._trypending = trypending
333 333 self._try_split = try_split
334 334 self.opener = opener
335 335 if persistentnodemap:
336 336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337 337
338 338 assert target[0] in ALL_KINDS
339 339 assert len(target) == 2
340 340 self.target = target
341 341 # When True, indexfile is opened with checkambig=True at writing, to
342 342 # avoid file stat ambiguity.
343 343 self._checkambig = checkambig
344 344 self._mmaplargeindex = mmaplargeindex
345 345 self._censorable = censorable
346 346 # 3-tuple of (node, rev, text) for a raw revision.
347 347 self._revisioncache = None
348 348 # Maps rev to chain base rev.
349 349 self._chainbasecache = util.lrucachedict(100)
350 350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 351 self._chunkcache = (0, b'')
352 352 # How much data to read and cache into the raw revlog data cache.
353 353 self._chunkcachesize = 65536
354 354 self._maxchainlen = None
355 355 self._deltabothparents = True
356 356 self._candidate_group_chunk_size = 0
357 357 self._debug_delta = False
358 358 self.index = None
359 359 self._docket = None
360 360 self._nodemap_docket = None
361 361 # Mapping of partial identifiers to full nodes.
362 362 self._pcache = {}
363 363 # Mapping of revision integer to full node.
364 364 self._compengine = b'zlib'
365 365 self._compengineopts = {}
366 366 self._maxdeltachainspan = -1
367 367 self._withsparseread = False
368 368 self._sparserevlog = False
369 369 self.hassidedata = False
370 370 self._srdensitythreshold = 0.50
371 371 self._srmingapsize = 262144
372 372
373 373 # other optionnals features
374 374
375 375 # might remove rank configuration once the computation has no impact
376 376 self._compute_rank = False
377 377
378 378 # Make copy of flag processors so each revlog instance can support
379 379 # custom flags.
380 380 self._flagprocessors = dict(flagutil.flagprocessors)
381 381
382 382 # 3-tuple of file handles being used for active writing.
383 383 self._writinghandles = None
384 384 # prevent nesting of addgroup
385 385 self._adding_group = None
386 386
387 387 self._loadindex()
388 388
389 389 self._concurrencychecker = concurrencychecker
390 390
391 391 # parent order is supposed to be semantically irrelevant, so we
392 392 # normally resort parents to ensure that the first parent is non-null,
393 393 # if there is a non-null parent at all.
394 394 # filelog abuses the parent order as flag to mark some instances of
395 395 # meta-encoded files, so allow it to disable this behavior.
396 396 self.canonical_parent_order = canonical_parent_order
397 397
398 398 def _init_opts(self):
399 399 """process options (from above/config) to setup associated default revlog mode
400 400
401 401 These values might be affected when actually reading on disk information.
402 402
403 403 The relevant values are returned for use in _loadindex().
404 404
405 405 * newversionflags:
406 406 version header to use if we need to create a new revlog
407 407
408 408 * mmapindexthreshold:
409 409 minimal index size for start to use mmap
410 410
411 411 * force_nodemap:
412 412 force the usage of a "development" version of the nodemap code
413 413 """
414 414 mmapindexthreshold = None
415 415 opts = self.opener.options
416 416
417 417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 418 new_header = CHANGELOGV2
419 419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 420 elif b'revlogv2' in opts:
421 421 new_header = REVLOGV2
422 422 elif b'revlogv1' in opts:
423 423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 424 if b'generaldelta' in opts:
425 425 new_header |= FLAG_GENERALDELTA
426 426 elif b'revlogv0' in self.opener.options:
427 427 new_header = REVLOGV0
428 428 else:
429 429 new_header = REVLOG_DEFAULT_VERSION
430 430
431 431 if b'chunkcachesize' in opts:
432 432 self._chunkcachesize = opts[b'chunkcachesize']
433 433 if b'maxchainlen' in opts:
434 434 self._maxchainlen = opts[b'maxchainlen']
435 435 if b'deltabothparents' in opts:
436 436 self._deltabothparents = opts[b'deltabothparents']
437 437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 438 if dps_cgds:
439 439 self._candidate_group_chunk_size = dps_cgds
440 440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 441 self._lazydeltabase = False
442 442 if self._lazydelta:
443 443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 444 if b'debug-delta' in opts:
445 445 self._debug_delta = opts[b'debug-delta']
446 446 if b'compengine' in opts:
447 447 self._compengine = opts[b'compengine']
448 448 if b'zlib.level' in opts:
449 449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 450 if b'zstd.level' in opts:
451 451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 452 if b'maxdeltachainspan' in opts:
453 453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 458 # sparse-revlog forces sparse-read
459 459 self._withsparseread = self._sparserevlog or withsparseread
460 460 if b'sparse-read-density-threshold' in opts:
461 461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 462 if b'sparse-read-min-gap-size' in opts:
463 463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 464 if opts.get(b'enableellipsis'):
465 465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466 466
467 467 # revlog v0 doesn't have flag processors
468 468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470 470
471 471 if self._chunkcachesize <= 0:
472 472 raise error.RevlogError(
473 473 _(b'revlog chunk cache size %r is not greater than 0')
474 474 % self._chunkcachesize
475 475 )
476 476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 477 raise error.RevlogError(
478 478 _(b'revlog chunk cache size %r is not a power of 2')
479 479 % self._chunkcachesize
480 480 )
481 481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 482 return new_header, mmapindexthreshold, force_nodemap
483 483
484 484 def _get_data(self, filepath, mmap_threshold, size=None):
485 485 """return a file content with or without mmap
486 486
487 487 If the file is missing return the empty string"""
488 488 try:
489 489 with self.opener(filepath) as fp:
490 490 if mmap_threshold is not None:
491 491 file_size = self.opener.fstat(fp).st_size
492 492 if file_size >= mmap_threshold:
493 493 if size is not None:
494 494 # avoid potentiel mmap crash
495 495 size = min(file_size, size)
496 496 # TODO: should .close() to release resources without
497 497 # relying on Python GC
498 498 if size is None:
499 499 return util.buffer(util.mmapread(fp))
500 500 else:
501 501 return util.buffer(util.mmapread(fp, size))
502 502 if size is None:
503 503 return fp.read()
504 504 else:
505 505 return fp.read(size)
506 506 except FileNotFoundError:
507 507 return b''
508 508
509 def get_streams(self, max_linkrev):
509 def get_streams(self, max_linkrev, force_inline=False):
510 510 n = len(self)
511 511 index = self.index
512 512 while n > 0:
513 513 linkrev = index[n - 1][4]
514 514 if linkrev < max_linkrev:
515 515 break
516 516 # note: this loop will rarely go through multiple iterations, since
517 517 # it only traverses commits created during the current streaming
518 518 # pull operation.
519 519 #
520 520 # If this become a problem, using a binary search should cap the
521 521 # runtime of this.
522 522 n = n - 1
523 523 if n == 0:
524 524 # no data to send
525 525 return []
526 526 index_size = n * index.entry_size
527 527 data_size = self.end(n - 1)
528 528
529 529 # XXX we might have been split (or stripped) since the object
530 530 # initialization, We need to close this race too, but having a way to
531 531 # pre-open the file we feed to the revlog and never closing them before
532 532 # we are done streaming.
533 533
534 534 if self._inline:
535 535
536 536 def get_stream():
537 537 with self._indexfp() as fp:
538 538 yield None
539 539 size = index_size + data_size
540 540 if size <= 65536:
541 541 yield fp.read(size)
542 542 else:
543 543 yield from util.filechunkiter(fp, limit=size)
544 544
545 545 inline_stream = get_stream()
546 546 next(inline_stream)
547 547 return [
548 548 (self._indexfile, inline_stream, index_size + data_size),
549 549 ]
550 elif force_inline:
551
552 def get_stream():
553 with self._datafp() as fp_d:
554 yield None
555
556 for rev in range(n):
557 idx = self.index.entry_binary(rev)
558 if rev == 0 and self._docket is None:
559 # re-inject the inline flag
560 header = self._format_flags
561 header |= self._format_version
562 header |= FLAG_INLINE_DATA
563 header = self.index.pack_header(header)
564 idx = header + idx
565 yield idx
566 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
567
568 inline_stream = get_stream()
569 next(inline_stream)
570 return [
571 (self._indexfile, inline_stream, index_size + data_size),
572 ]
550 573 else:
551 574
552 575 def get_index_stream():
553 576 with self._indexfp() as fp:
554 577 yield None
555 578 if index_size <= 65536:
556 579 yield fp.read(index_size)
557 580 else:
558 581 yield from util.filechunkiter(fp, limit=index_size)
559 582
560 583 def get_data_stream():
561 584 with self._datafp() as fp:
562 585 yield None
563 586 if data_size <= 65536:
564 587 yield fp.read(data_size)
565 588 else:
566 589 yield from util.filechunkiter(fp, limit=data_size)
567 590
568 591 index_stream = get_index_stream()
569 592 next(index_stream)
570 593 data_stream = get_data_stream()
571 594 next(data_stream)
572 595 return [
573 596 (self._datafile, data_stream, data_size),
574 597 (self._indexfile, index_stream, index_size),
575 598 ]
576 599
577 600 def _loadindex(self, docket=None):
578 601
579 602 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
580 603
581 604 if self.postfix is not None:
582 605 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
583 606 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
584 607 entry_point = b'%s.i.a' % self.radix
585 608 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
586 609 entry_point = b'%s.i.s' % self.radix
587 610 else:
588 611 entry_point = b'%s.i' % self.radix
589 612
590 613 if docket is not None:
591 614 self._docket = docket
592 615 self._docket_file = entry_point
593 616 else:
594 617 self._initempty = True
595 618 entry_data = self._get_data(entry_point, mmapindexthreshold)
596 619 if len(entry_data) > 0:
597 620 header = INDEX_HEADER.unpack(entry_data[:4])[0]
598 621 self._initempty = False
599 622 else:
600 623 header = new_header
601 624
602 625 self._format_flags = header & ~0xFFFF
603 626 self._format_version = header & 0xFFFF
604 627
605 628 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
606 629 if supported_flags is None:
607 630 msg = _(b'unknown version (%d) in revlog %s')
608 631 msg %= (self._format_version, self.display_id)
609 632 raise error.RevlogError(msg)
610 633 elif self._format_flags & ~supported_flags:
611 634 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
612 635 display_flag = self._format_flags >> 16
613 636 msg %= (display_flag, self._format_version, self.display_id)
614 637 raise error.RevlogError(msg)
615 638
616 639 features = FEATURES_BY_VERSION[self._format_version]
617 640 self._inline = features[b'inline'](self._format_flags)
618 641 self._generaldelta = features[b'generaldelta'](self._format_flags)
619 642 self.hassidedata = features[b'sidedata']
620 643
621 644 if not features[b'docket']:
622 645 self._indexfile = entry_point
623 646 index_data = entry_data
624 647 else:
625 648 self._docket_file = entry_point
626 649 if self._initempty:
627 650 self._docket = docketutil.default_docket(self, header)
628 651 else:
629 652 self._docket = docketutil.parse_docket(
630 653 self, entry_data, use_pending=self._trypending
631 654 )
632 655
633 656 if self._docket is not None:
634 657 self._indexfile = self._docket.index_filepath()
635 658 index_data = b''
636 659 index_size = self._docket.index_end
637 660 if index_size > 0:
638 661 index_data = self._get_data(
639 662 self._indexfile, mmapindexthreshold, size=index_size
640 663 )
641 664 if len(index_data) < index_size:
642 665 msg = _(b'too few index data for %s: got %d, expected %d')
643 666 msg %= (self.display_id, len(index_data), index_size)
644 667 raise error.RevlogError(msg)
645 668
646 669 self._inline = False
647 670 # generaldelta implied by version 2 revlogs.
648 671 self._generaldelta = True
649 672 # the logic for persistent nodemap will be dealt with within the
650 673 # main docket, so disable it for now.
651 674 self._nodemap_file = None
652 675
653 676 if self._docket is not None:
654 677 self._datafile = self._docket.data_filepath()
655 678 self._sidedatafile = self._docket.sidedata_filepath()
656 679 elif self.postfix is None:
657 680 self._datafile = b'%s.d' % self.radix
658 681 else:
659 682 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
660 683
661 684 self.nodeconstants = sha1nodeconstants
662 685 self.nullid = self.nodeconstants.nullid
663 686
664 687 # sparse-revlog can't be on without general-delta (issue6056)
665 688 if not self._generaldelta:
666 689 self._sparserevlog = False
667 690
668 691 self._storedeltachains = True
669 692
670 693 devel_nodemap = (
671 694 self._nodemap_file
672 695 and force_nodemap
673 696 and parse_index_v1_nodemap is not None
674 697 )
675 698
676 699 use_rust_index = False
677 700 if rustrevlog is not None:
678 701 if self._nodemap_file is not None:
679 702 use_rust_index = True
680 703 else:
681 704 use_rust_index = self.opener.options.get(b'rust.index')
682 705
683 706 self._parse_index = parse_index_v1
684 707 if self._format_version == REVLOGV0:
685 708 self._parse_index = revlogv0.parse_index_v0
686 709 elif self._format_version == REVLOGV2:
687 710 self._parse_index = parse_index_v2
688 711 elif self._format_version == CHANGELOGV2:
689 712 self._parse_index = parse_index_cl_v2
690 713 elif devel_nodemap:
691 714 self._parse_index = parse_index_v1_nodemap
692 715 elif use_rust_index:
693 716 self._parse_index = parse_index_v1_mixed
694 717 try:
695 718 d = self._parse_index(index_data, self._inline)
696 719 index, chunkcache = d
697 720 use_nodemap = (
698 721 not self._inline
699 722 and self._nodemap_file is not None
700 723 and util.safehasattr(index, 'update_nodemap_data')
701 724 )
702 725 if use_nodemap:
703 726 nodemap_data = nodemaputil.persisted_data(self)
704 727 if nodemap_data is not None:
705 728 docket = nodemap_data[0]
706 729 if (
707 730 len(d[0]) > docket.tip_rev
708 731 and d[0][docket.tip_rev][7] == docket.tip_node
709 732 ):
710 733 # no changelog tampering
711 734 self._nodemap_docket = docket
712 735 index.update_nodemap_data(*nodemap_data)
713 736 except (ValueError, IndexError):
714 737 raise error.RevlogError(
715 738 _(b"index %s is corrupted") % self.display_id
716 739 )
717 740 self.index = index
718 741 self._segmentfile = randomaccessfile.randomaccessfile(
719 742 self.opener,
720 743 (self._indexfile if self._inline else self._datafile),
721 744 self._chunkcachesize,
722 745 chunkcache,
723 746 )
724 747 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
725 748 self.opener,
726 749 self._sidedatafile,
727 750 self._chunkcachesize,
728 751 )
729 752 # revnum -> (chain-length, sum-delta-length)
730 753 self._chaininfocache = util.lrucachedict(500)
731 754 # revlog header -> revlog compressor
732 755 self._decompressors = {}
733 756
734 757 def get_revlog(self):
735 758 """simple function to mirror API of other not-really-revlog API"""
736 759 return self
737 760
738 761 @util.propertycache
739 762 def revlog_kind(self):
740 763 return self.target[0]
741 764
742 765 @util.propertycache
743 766 def display_id(self):
744 767 """The public facing "ID" of the revlog that we use in message"""
745 768 if self.revlog_kind == KIND_FILELOG:
746 769 # Reference the file without the "data/" prefix, so it is familiar
747 770 # to the user.
748 771 return self.target[1]
749 772 else:
750 773 return self.radix
751 774
752 775 def _get_decompressor(self, t):
753 776 try:
754 777 compressor = self._decompressors[t]
755 778 except KeyError:
756 779 try:
757 780 engine = util.compengines.forrevlogheader(t)
758 781 compressor = engine.revlogcompressor(self._compengineopts)
759 782 self._decompressors[t] = compressor
760 783 except KeyError:
761 784 raise error.RevlogError(
762 785 _(b'unknown compression type %s') % binascii.hexlify(t)
763 786 )
764 787 return compressor
765 788
766 789 @util.propertycache
767 790 def _compressor(self):
768 791 engine = util.compengines[self._compengine]
769 792 return engine.revlogcompressor(self._compengineopts)
770 793
771 794 @util.propertycache
772 795 def _decompressor(self):
773 796 """the default decompressor"""
774 797 if self._docket is None:
775 798 return None
776 799 t = self._docket.default_compression_header
777 800 c = self._get_decompressor(t)
778 801 return c.decompress
779 802
780 803 def _indexfp(self):
781 804 """file object for the revlog's index file"""
782 805 return self.opener(self._indexfile, mode=b"r")
783 806
784 807 def __index_write_fp(self):
785 808 # You should not use this directly and use `_writing` instead
786 809 try:
787 810 f = self.opener(
788 811 self._indexfile, mode=b"r+", checkambig=self._checkambig
789 812 )
790 813 if self._docket is None:
791 814 f.seek(0, os.SEEK_END)
792 815 else:
793 816 f.seek(self._docket.index_end, os.SEEK_SET)
794 817 return f
795 818 except FileNotFoundError:
796 819 return self.opener(
797 820 self._indexfile, mode=b"w+", checkambig=self._checkambig
798 821 )
799 822
800 823 def __index_new_fp(self):
801 824 # You should not use this unless you are upgrading from inline revlog
802 825 return self.opener(
803 826 self._indexfile,
804 827 mode=b"w",
805 828 checkambig=self._checkambig,
806 829 atomictemp=True,
807 830 )
808 831
809 832 def _datafp(self, mode=b'r'):
810 833 """file object for the revlog's data file"""
811 834 return self.opener(self._datafile, mode=mode)
812 835
813 836 @contextlib.contextmanager
814 837 def _sidedatareadfp(self):
815 838 """file object suitable to read sidedata"""
816 839 if self._writinghandles:
817 840 yield self._writinghandles[2]
818 841 else:
819 842 with self.opener(self._sidedatafile) as fp:
820 843 yield fp
821 844
822 845 def tiprev(self):
823 846 return len(self.index) - 1
824 847
825 848 def tip(self):
826 849 return self.node(self.tiprev())
827 850
828 851 def __contains__(self, rev):
829 852 return 0 <= rev < len(self)
830 853
831 854 def __len__(self):
832 855 return len(self.index)
833 856
834 857 def __iter__(self):
835 858 return iter(range(len(self)))
836 859
837 860 def revs(self, start=0, stop=None):
838 861 """iterate over all rev in this revlog (from start to stop)"""
839 862 return storageutil.iterrevs(len(self), start=start, stop=stop)
840 863
841 864 def hasnode(self, node):
842 865 try:
843 866 self.rev(node)
844 867 return True
845 868 except KeyError:
846 869 return False
847 870
848 871 def candelta(self, baserev, rev):
849 872 """whether two revisions (baserev, rev) can be delta-ed or not"""
850 873 # Disable delta if either rev requires a content-changing flag
851 874 # processor (ex. LFS). This is because such flag processor can alter
852 875 # the rawtext content that the delta will be based on, and two clients
853 876 # could have a same revlog node with different flags (i.e. different
854 877 # rawtext contents) and the delta could be incompatible.
855 878 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
856 879 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
857 880 ):
858 881 return False
859 882 return True
860 883
861 884 def update_caches(self, transaction):
862 885 if self._nodemap_file is not None:
863 886 if transaction is None:
864 887 nodemaputil.update_persistent_nodemap(self)
865 888 else:
866 889 nodemaputil.setup_persistent_nodemap(transaction, self)
867 890
868 891 def clearcaches(self):
869 892 self._revisioncache = None
870 893 self._chainbasecache.clear()
871 894 self._segmentfile.clear_cache()
872 895 self._segmentfile_sidedata.clear_cache()
873 896 self._pcache = {}
874 897 self._nodemap_docket = None
875 898 self.index.clearcaches()
876 899 # The python code is the one responsible for validating the docket, we
877 900 # end up having to refresh it here.
878 901 use_nodemap = (
879 902 not self._inline
880 903 and self._nodemap_file is not None
881 904 and util.safehasattr(self.index, 'update_nodemap_data')
882 905 )
883 906 if use_nodemap:
884 907 nodemap_data = nodemaputil.persisted_data(self)
885 908 if nodemap_data is not None:
886 909 self._nodemap_docket = nodemap_data[0]
887 910 self.index.update_nodemap_data(*nodemap_data)
888 911
889 912 def rev(self, node):
890 913 try:
891 914 return self.index.rev(node)
892 915 except TypeError:
893 916 raise
894 917 except error.RevlogError:
895 918 # parsers.c radix tree lookup failed
896 919 if (
897 920 node == self.nodeconstants.wdirid
898 921 or node in self.nodeconstants.wdirfilenodeids
899 922 ):
900 923 raise error.WdirUnsupported
901 924 raise error.LookupError(node, self.display_id, _(b'no node'))
902 925
903 926 # Accessors for index entries.
904 927
905 928 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
906 929 # are flags.
907 930 def start(self, rev):
908 931 return int(self.index[rev][0] >> 16)
909 932
910 933 def sidedata_cut_off(self, rev):
911 934 sd_cut_off = self.index[rev][8]
912 935 if sd_cut_off != 0:
913 936 return sd_cut_off
914 937 # This is some annoying dance, because entries without sidedata
915 938 # currently use 0 as their ofsset. (instead of previous-offset +
916 939 # previous-size)
917 940 #
918 941 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
919 942 # In the meantime, we need this.
920 943 while 0 <= rev:
921 944 e = self.index[rev]
922 945 if e[9] != 0:
923 946 return e[8] + e[9]
924 947 rev -= 1
925 948 return 0
926 949
927 950 def flags(self, rev):
928 951 return self.index[rev][0] & 0xFFFF
929 952
930 953 def length(self, rev):
931 954 return self.index[rev][1]
932 955
933 956 def sidedata_length(self, rev):
934 957 if not self.hassidedata:
935 958 return 0
936 959 return self.index[rev][9]
937 960
938 961 def rawsize(self, rev):
939 962 """return the length of the uncompressed text for a given revision"""
940 963 l = self.index[rev][2]
941 964 if l >= 0:
942 965 return l
943 966
944 967 t = self.rawdata(rev)
945 968 return len(t)
946 969
947 970 def size(self, rev):
948 971 """length of non-raw text (processed by a "read" flag processor)"""
949 972 # fast path: if no "read" flag processor could change the content,
950 973 # size is rawsize. note: ELLIPSIS is known to not change the content.
951 974 flags = self.flags(rev)
952 975 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
953 976 return self.rawsize(rev)
954 977
955 978 return len(self.revision(rev))
956 979
957 980 def fast_rank(self, rev):
958 981 """Return the rank of a revision if already known, or None otherwise.
959 982
960 983 The rank of a revision is the size of the sub-graph it defines as a
961 984 head. Equivalently, the rank of a revision `r` is the size of the set
962 985 `ancestors(r)`, `r` included.
963 986
964 987 This method returns the rank retrieved from the revlog in constant
965 988 time. It makes no attempt at computing unknown values for versions of
966 989 the revlog which do not persist the rank.
967 990 """
968 991 rank = self.index[rev][ENTRY_RANK]
969 992 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
970 993 return None
971 994 if rev == nullrev:
972 995 return 0 # convention
973 996 return rank
974 997
975 998 def chainbase(self, rev):
976 999 base = self._chainbasecache.get(rev)
977 1000 if base is not None:
978 1001 return base
979 1002
980 1003 index = self.index
981 1004 iterrev = rev
982 1005 base = index[iterrev][3]
983 1006 while base != iterrev:
984 1007 iterrev = base
985 1008 base = index[iterrev][3]
986 1009
987 1010 self._chainbasecache[rev] = base
988 1011 return base
989 1012
990 1013 def linkrev(self, rev):
991 1014 return self.index[rev][4]
992 1015
993 1016 def parentrevs(self, rev):
994 1017 try:
995 1018 entry = self.index[rev]
996 1019 except IndexError:
997 1020 if rev == wdirrev:
998 1021 raise error.WdirUnsupported
999 1022 raise
1000 1023
1001 1024 if self.canonical_parent_order and entry[5] == nullrev:
1002 1025 return entry[6], entry[5]
1003 1026 else:
1004 1027 return entry[5], entry[6]
1005 1028
1006 1029 # fast parentrevs(rev) where rev isn't filtered
1007 1030 _uncheckedparentrevs = parentrevs
1008 1031
1009 1032 def node(self, rev):
1010 1033 try:
1011 1034 return self.index[rev][7]
1012 1035 except IndexError:
1013 1036 if rev == wdirrev:
1014 1037 raise error.WdirUnsupported
1015 1038 raise
1016 1039
1017 1040 # Derived from index values.
1018 1041
1019 1042 def end(self, rev):
1020 1043 return self.start(rev) + self.length(rev)
1021 1044
1022 1045 def parents(self, node):
1023 1046 i = self.index
1024 1047 d = i[self.rev(node)]
1025 1048 # inline node() to avoid function call overhead
1026 1049 if self.canonical_parent_order and d[5] == self.nullid:
1027 1050 return i[d[6]][7], i[d[5]][7]
1028 1051 else:
1029 1052 return i[d[5]][7], i[d[6]][7]
1030 1053
1031 1054 def chainlen(self, rev):
1032 1055 return self._chaininfo(rev)[0]
1033 1056
1034 1057 def _chaininfo(self, rev):
1035 1058 chaininfocache = self._chaininfocache
1036 1059 if rev in chaininfocache:
1037 1060 return chaininfocache[rev]
1038 1061 index = self.index
1039 1062 generaldelta = self._generaldelta
1040 1063 iterrev = rev
1041 1064 e = index[iterrev]
1042 1065 clen = 0
1043 1066 compresseddeltalen = 0
1044 1067 while iterrev != e[3]:
1045 1068 clen += 1
1046 1069 compresseddeltalen += e[1]
1047 1070 if generaldelta:
1048 1071 iterrev = e[3]
1049 1072 else:
1050 1073 iterrev -= 1
1051 1074 if iterrev in chaininfocache:
1052 1075 t = chaininfocache[iterrev]
1053 1076 clen += t[0]
1054 1077 compresseddeltalen += t[1]
1055 1078 break
1056 1079 e = index[iterrev]
1057 1080 else:
1058 1081 # Add text length of base since decompressing that also takes
1059 1082 # work. For cache hits the length is already included.
1060 1083 compresseddeltalen += e[1]
1061 1084 r = (clen, compresseddeltalen)
1062 1085 chaininfocache[rev] = r
1063 1086 return r
1064 1087
1065 1088 def _deltachain(self, rev, stoprev=None):
1066 1089 """Obtain the delta chain for a revision.
1067 1090
1068 1091 ``stoprev`` specifies a revision to stop at. If not specified, we
1069 1092 stop at the base of the chain.
1070 1093
1071 1094 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1072 1095 revs in ascending order and ``stopped`` is a bool indicating whether
1073 1096 ``stoprev`` was hit.
1074 1097 """
1075 1098 # Try C implementation.
1076 1099 try:
1077 1100 return self.index.deltachain(rev, stoprev, self._generaldelta)
1078 1101 except AttributeError:
1079 1102 pass
1080 1103
1081 1104 chain = []
1082 1105
1083 1106 # Alias to prevent attribute lookup in tight loop.
1084 1107 index = self.index
1085 1108 generaldelta = self._generaldelta
1086 1109
1087 1110 iterrev = rev
1088 1111 e = index[iterrev]
1089 1112 while iterrev != e[3] and iterrev != stoprev:
1090 1113 chain.append(iterrev)
1091 1114 if generaldelta:
1092 1115 iterrev = e[3]
1093 1116 else:
1094 1117 iterrev -= 1
1095 1118 e = index[iterrev]
1096 1119
1097 1120 if iterrev == stoprev:
1098 1121 stopped = True
1099 1122 else:
1100 1123 chain.append(iterrev)
1101 1124 stopped = False
1102 1125
1103 1126 chain.reverse()
1104 1127 return chain, stopped
1105 1128
1106 1129 def ancestors(self, revs, stoprev=0, inclusive=False):
1107 1130 """Generate the ancestors of 'revs' in reverse revision order.
1108 1131 Does not generate revs lower than stoprev.
1109 1132
1110 1133 See the documentation for ancestor.lazyancestors for more details."""
1111 1134
1112 1135 # first, make sure start revisions aren't filtered
1113 1136 revs = list(revs)
1114 1137 checkrev = self.node
1115 1138 for r in revs:
1116 1139 checkrev(r)
1117 1140 # and we're sure ancestors aren't filtered as well
1118 1141
1119 1142 if rustancestor is not None and self.index.rust_ext_compat:
1120 1143 lazyancestors = rustancestor.LazyAncestors
1121 1144 arg = self.index
1122 1145 else:
1123 1146 lazyancestors = ancestor.lazyancestors
1124 1147 arg = self._uncheckedparentrevs
1125 1148 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1126 1149
1127 1150 def descendants(self, revs):
1128 1151 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1129 1152
1130 1153 def findcommonmissing(self, common=None, heads=None):
1131 1154 """Return a tuple of the ancestors of common and the ancestors of heads
1132 1155 that are not ancestors of common. In revset terminology, we return the
1133 1156 tuple:
1134 1157
1135 1158 ::common, (::heads) - (::common)
1136 1159
1137 1160 The list is sorted by revision number, meaning it is
1138 1161 topologically sorted.
1139 1162
1140 1163 'heads' and 'common' are both lists of node IDs. If heads is
1141 1164 not supplied, uses all of the revlog's heads. If common is not
1142 1165 supplied, uses nullid."""
1143 1166 if common is None:
1144 1167 common = [self.nullid]
1145 1168 if heads is None:
1146 1169 heads = self.heads()
1147 1170
1148 1171 common = [self.rev(n) for n in common]
1149 1172 heads = [self.rev(n) for n in heads]
1150 1173
1151 1174 # we want the ancestors, but inclusive
1152 1175 class lazyset:
1153 1176 def __init__(self, lazyvalues):
1154 1177 self.addedvalues = set()
1155 1178 self.lazyvalues = lazyvalues
1156 1179
1157 1180 def __contains__(self, value):
1158 1181 return value in self.addedvalues or value in self.lazyvalues
1159 1182
1160 1183 def __iter__(self):
1161 1184 added = self.addedvalues
1162 1185 for r in added:
1163 1186 yield r
1164 1187 for r in self.lazyvalues:
1165 1188 if not r in added:
1166 1189 yield r
1167 1190
1168 1191 def add(self, value):
1169 1192 self.addedvalues.add(value)
1170 1193
1171 1194 def update(self, values):
1172 1195 self.addedvalues.update(values)
1173 1196
1174 1197 has = lazyset(self.ancestors(common))
1175 1198 has.add(nullrev)
1176 1199 has.update(common)
1177 1200
1178 1201 # take all ancestors from heads that aren't in has
1179 1202 missing = set()
1180 1203 visit = collections.deque(r for r in heads if r not in has)
1181 1204 while visit:
1182 1205 r = visit.popleft()
1183 1206 if r in missing:
1184 1207 continue
1185 1208 else:
1186 1209 missing.add(r)
1187 1210 for p in self.parentrevs(r):
1188 1211 if p not in has:
1189 1212 visit.append(p)
1190 1213 missing = list(missing)
1191 1214 missing.sort()
1192 1215 return has, [self.node(miss) for miss in missing]
1193 1216
1194 1217 def incrementalmissingrevs(self, common=None):
1195 1218 """Return an object that can be used to incrementally compute the
1196 1219 revision numbers of the ancestors of arbitrary sets that are not
1197 1220 ancestors of common. This is an ancestor.incrementalmissingancestors
1198 1221 object.
1199 1222
1200 1223 'common' is a list of revision numbers. If common is not supplied, uses
1201 1224 nullrev.
1202 1225 """
1203 1226 if common is None:
1204 1227 common = [nullrev]
1205 1228
1206 1229 if rustancestor is not None and self.index.rust_ext_compat:
1207 1230 return rustancestor.MissingAncestors(self.index, common)
1208 1231 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1209 1232
1210 1233 def findmissingrevs(self, common=None, heads=None):
1211 1234 """Return the revision numbers of the ancestors of heads that
1212 1235 are not ancestors of common.
1213 1236
1214 1237 More specifically, return a list of revision numbers corresponding to
1215 1238 nodes N such that every N satisfies the following constraints:
1216 1239
1217 1240 1. N is an ancestor of some node in 'heads'
1218 1241 2. N is not an ancestor of any node in 'common'
1219 1242
1220 1243 The list is sorted by revision number, meaning it is
1221 1244 topologically sorted.
1222 1245
1223 1246 'heads' and 'common' are both lists of revision numbers. If heads is
1224 1247 not supplied, uses all of the revlog's heads. If common is not
1225 1248 supplied, uses nullid."""
1226 1249 if common is None:
1227 1250 common = [nullrev]
1228 1251 if heads is None:
1229 1252 heads = self.headrevs()
1230 1253
1231 1254 inc = self.incrementalmissingrevs(common=common)
1232 1255 return inc.missingancestors(heads)
1233 1256
1234 1257 def findmissing(self, common=None, heads=None):
1235 1258 """Return the ancestors of heads that are not ancestors of common.
1236 1259
1237 1260 More specifically, return a list of nodes N such that every N
1238 1261 satisfies the following constraints:
1239 1262
1240 1263 1. N is an ancestor of some node in 'heads'
1241 1264 2. N is not an ancestor of any node in 'common'
1242 1265
1243 1266 The list is sorted by revision number, meaning it is
1244 1267 topologically sorted.
1245 1268
1246 1269 'heads' and 'common' are both lists of node IDs. If heads is
1247 1270 not supplied, uses all of the revlog's heads. If common is not
1248 1271 supplied, uses nullid."""
1249 1272 if common is None:
1250 1273 common = [self.nullid]
1251 1274 if heads is None:
1252 1275 heads = self.heads()
1253 1276
1254 1277 common = [self.rev(n) for n in common]
1255 1278 heads = [self.rev(n) for n in heads]
1256 1279
1257 1280 inc = self.incrementalmissingrevs(common=common)
1258 1281 return [self.node(r) for r in inc.missingancestors(heads)]
1259 1282
1260 1283 def nodesbetween(self, roots=None, heads=None):
1261 1284 """Return a topological path from 'roots' to 'heads'.
1262 1285
1263 1286 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1264 1287 topologically sorted list of all nodes N that satisfy both of
1265 1288 these constraints:
1266 1289
1267 1290 1. N is a descendant of some node in 'roots'
1268 1291 2. N is an ancestor of some node in 'heads'
1269 1292
1270 1293 Every node is considered to be both a descendant and an ancestor
1271 1294 of itself, so every reachable node in 'roots' and 'heads' will be
1272 1295 included in 'nodes'.
1273 1296
1274 1297 'outroots' is the list of reachable nodes in 'roots', i.e., the
1275 1298 subset of 'roots' that is returned in 'nodes'. Likewise,
1276 1299 'outheads' is the subset of 'heads' that is also in 'nodes'.
1277 1300
1278 1301 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1279 1302 unspecified, uses nullid as the only root. If 'heads' is
1280 1303 unspecified, uses list of all of the revlog's heads."""
1281 1304 nonodes = ([], [], [])
1282 1305 if roots is not None:
1283 1306 roots = list(roots)
1284 1307 if not roots:
1285 1308 return nonodes
1286 1309 lowestrev = min([self.rev(n) for n in roots])
1287 1310 else:
1288 1311 roots = [self.nullid] # Everybody's a descendant of nullid
1289 1312 lowestrev = nullrev
1290 1313 if (lowestrev == nullrev) and (heads is None):
1291 1314 # We want _all_ the nodes!
1292 1315 return (
1293 1316 [self.node(r) for r in self],
1294 1317 [self.nullid],
1295 1318 list(self.heads()),
1296 1319 )
1297 1320 if heads is None:
1298 1321 # All nodes are ancestors, so the latest ancestor is the last
1299 1322 # node.
1300 1323 highestrev = len(self) - 1
1301 1324 # Set ancestors to None to signal that every node is an ancestor.
1302 1325 ancestors = None
1303 1326 # Set heads to an empty dictionary for later discovery of heads
1304 1327 heads = {}
1305 1328 else:
1306 1329 heads = list(heads)
1307 1330 if not heads:
1308 1331 return nonodes
1309 1332 ancestors = set()
1310 1333 # Turn heads into a dictionary so we can remove 'fake' heads.
1311 1334 # Also, later we will be using it to filter out the heads we can't
1312 1335 # find from roots.
1313 1336 heads = dict.fromkeys(heads, False)
1314 1337 # Start at the top and keep marking parents until we're done.
1315 1338 nodestotag = set(heads)
1316 1339 # Remember where the top was so we can use it as a limit later.
1317 1340 highestrev = max([self.rev(n) for n in nodestotag])
1318 1341 while nodestotag:
1319 1342 # grab a node to tag
1320 1343 n = nodestotag.pop()
1321 1344 # Never tag nullid
1322 1345 if n == self.nullid:
1323 1346 continue
1324 1347 # A node's revision number represents its place in a
1325 1348 # topologically sorted list of nodes.
1326 1349 r = self.rev(n)
1327 1350 if r >= lowestrev:
1328 1351 if n not in ancestors:
1329 1352 # If we are possibly a descendant of one of the roots
1330 1353 # and we haven't already been marked as an ancestor
1331 1354 ancestors.add(n) # Mark as ancestor
1332 1355 # Add non-nullid parents to list of nodes to tag.
1333 1356 nodestotag.update(
1334 1357 [p for p in self.parents(n) if p != self.nullid]
1335 1358 )
1336 1359 elif n in heads: # We've seen it before, is it a fake head?
1337 1360 # So it is, real heads should not be the ancestors of
1338 1361 # any other heads.
1339 1362 heads.pop(n)
1340 1363 if not ancestors:
1341 1364 return nonodes
1342 1365 # Now that we have our set of ancestors, we want to remove any
1343 1366 # roots that are not ancestors.
1344 1367
1345 1368 # If one of the roots was nullid, everything is included anyway.
1346 1369 if lowestrev > nullrev:
1347 1370 # But, since we weren't, let's recompute the lowest rev to not
1348 1371 # include roots that aren't ancestors.
1349 1372
1350 1373 # Filter out roots that aren't ancestors of heads
1351 1374 roots = [root for root in roots if root in ancestors]
1352 1375 # Recompute the lowest revision
1353 1376 if roots:
1354 1377 lowestrev = min([self.rev(root) for root in roots])
1355 1378 else:
1356 1379 # No more roots? Return empty list
1357 1380 return nonodes
1358 1381 else:
1359 1382 # We are descending from nullid, and don't need to care about
1360 1383 # any other roots.
1361 1384 lowestrev = nullrev
1362 1385 roots = [self.nullid]
1363 1386 # Transform our roots list into a set.
1364 1387 descendants = set(roots)
1365 1388 # Also, keep the original roots so we can filter out roots that aren't
1366 1389 # 'real' roots (i.e. are descended from other roots).
1367 1390 roots = descendants.copy()
1368 1391 # Our topologically sorted list of output nodes.
1369 1392 orderedout = []
1370 1393 # Don't start at nullid since we don't want nullid in our output list,
1371 1394 # and if nullid shows up in descendants, empty parents will look like
1372 1395 # they're descendants.
1373 1396 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1374 1397 n = self.node(r)
1375 1398 isdescendant = False
1376 1399 if lowestrev == nullrev: # Everybody is a descendant of nullid
1377 1400 isdescendant = True
1378 1401 elif n in descendants:
1379 1402 # n is already a descendant
1380 1403 isdescendant = True
1381 1404 # This check only needs to be done here because all the roots
1382 1405 # will start being marked is descendants before the loop.
1383 1406 if n in roots:
1384 1407 # If n was a root, check if it's a 'real' root.
1385 1408 p = tuple(self.parents(n))
1386 1409 # If any of its parents are descendants, it's not a root.
1387 1410 if (p[0] in descendants) or (p[1] in descendants):
1388 1411 roots.remove(n)
1389 1412 else:
1390 1413 p = tuple(self.parents(n))
1391 1414 # A node is a descendant if either of its parents are
1392 1415 # descendants. (We seeded the dependents list with the roots
1393 1416 # up there, remember?)
1394 1417 if (p[0] in descendants) or (p[1] in descendants):
1395 1418 descendants.add(n)
1396 1419 isdescendant = True
1397 1420 if isdescendant and ((ancestors is None) or (n in ancestors)):
1398 1421 # Only include nodes that are both descendants and ancestors.
1399 1422 orderedout.append(n)
1400 1423 if (ancestors is not None) and (n in heads):
1401 1424 # We're trying to figure out which heads are reachable
1402 1425 # from roots.
1403 1426 # Mark this head as having been reached
1404 1427 heads[n] = True
1405 1428 elif ancestors is None:
1406 1429 # Otherwise, we're trying to discover the heads.
1407 1430 # Assume this is a head because if it isn't, the next step
1408 1431 # will eventually remove it.
1409 1432 heads[n] = True
1410 1433 # But, obviously its parents aren't.
1411 1434 for p in self.parents(n):
1412 1435 heads.pop(p, None)
1413 1436 heads = [head for head, flag in heads.items() if flag]
1414 1437 roots = list(roots)
1415 1438 assert orderedout
1416 1439 assert roots
1417 1440 assert heads
1418 1441 return (orderedout, roots, heads)
1419 1442
1420 1443 def headrevs(self, revs=None):
1421 1444 if revs is None:
1422 1445 try:
1423 1446 return self.index.headrevs()
1424 1447 except AttributeError:
1425 1448 return self._headrevs()
1426 1449 if rustdagop is not None and self.index.rust_ext_compat:
1427 1450 return rustdagop.headrevs(self.index, revs)
1428 1451 return dagop.headrevs(revs, self._uncheckedparentrevs)
1429 1452
1430 1453 def computephases(self, roots):
1431 1454 return self.index.computephasesmapsets(roots)
1432 1455
1433 1456 def _headrevs(self):
1434 1457 count = len(self)
1435 1458 if not count:
1436 1459 return [nullrev]
1437 1460 # we won't iter over filtered rev so nobody is a head at start
1438 1461 ishead = [0] * (count + 1)
1439 1462 index = self.index
1440 1463 for r in self:
1441 1464 ishead[r] = 1 # I may be an head
1442 1465 e = index[r]
1443 1466 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1444 1467 return [r for r, val in enumerate(ishead) if val]
1445 1468
1446 1469 def heads(self, start=None, stop=None):
1447 1470 """return the list of all nodes that have no children
1448 1471
1449 1472 if start is specified, only heads that are descendants of
1450 1473 start will be returned
1451 1474 if stop is specified, it will consider all the revs from stop
1452 1475 as if they had no children
1453 1476 """
1454 1477 if start is None and stop is None:
1455 1478 if not len(self):
1456 1479 return [self.nullid]
1457 1480 return [self.node(r) for r in self.headrevs()]
1458 1481
1459 1482 if start is None:
1460 1483 start = nullrev
1461 1484 else:
1462 1485 start = self.rev(start)
1463 1486
1464 1487 stoprevs = {self.rev(n) for n in stop or []}
1465 1488
1466 1489 revs = dagop.headrevssubset(
1467 1490 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1468 1491 )
1469 1492
1470 1493 return [self.node(rev) for rev in revs]
1471 1494
1472 1495 def children(self, node):
1473 1496 """find the children of a given node"""
1474 1497 c = []
1475 1498 p = self.rev(node)
1476 1499 for r in self.revs(start=p + 1):
1477 1500 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1478 1501 if prevs:
1479 1502 for pr in prevs:
1480 1503 if pr == p:
1481 1504 c.append(self.node(r))
1482 1505 elif p == nullrev:
1483 1506 c.append(self.node(r))
1484 1507 return c
1485 1508
1486 1509 def commonancestorsheads(self, a, b):
1487 1510 """calculate all the heads of the common ancestors of nodes a and b"""
1488 1511 a, b = self.rev(a), self.rev(b)
1489 1512 ancs = self._commonancestorsheads(a, b)
1490 1513 return pycompat.maplist(self.node, ancs)
1491 1514
1492 1515 def _commonancestorsheads(self, *revs):
1493 1516 """calculate all the heads of the common ancestors of revs"""
1494 1517 try:
1495 1518 ancs = self.index.commonancestorsheads(*revs)
1496 1519 except (AttributeError, OverflowError): # C implementation failed
1497 1520 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1498 1521 return ancs
1499 1522
1500 1523 def isancestor(self, a, b):
1501 1524 """return True if node a is an ancestor of node b
1502 1525
1503 1526 A revision is considered an ancestor of itself."""
1504 1527 a, b = self.rev(a), self.rev(b)
1505 1528 return self.isancestorrev(a, b)
1506 1529
1507 1530 def isancestorrev(self, a, b):
1508 1531 """return True if revision a is an ancestor of revision b
1509 1532
1510 1533 A revision is considered an ancestor of itself.
1511 1534
1512 1535 The implementation of this is trivial but the use of
1513 1536 reachableroots is not."""
1514 1537 if a == nullrev:
1515 1538 return True
1516 1539 elif a == b:
1517 1540 return True
1518 1541 elif a > b:
1519 1542 return False
1520 1543 return bool(self.reachableroots(a, [b], [a], includepath=False))
1521 1544
1522 1545 def reachableroots(self, minroot, heads, roots, includepath=False):
1523 1546 """return (heads(::(<roots> and <roots>::<heads>)))
1524 1547
1525 1548 If includepath is True, return (<roots>::<heads>)."""
1526 1549 try:
1527 1550 return self.index.reachableroots2(
1528 1551 minroot, heads, roots, includepath
1529 1552 )
1530 1553 except AttributeError:
1531 1554 return dagop._reachablerootspure(
1532 1555 self.parentrevs, minroot, roots, heads, includepath
1533 1556 )
1534 1557
1535 1558 def ancestor(self, a, b):
1536 1559 """calculate the "best" common ancestor of nodes a and b"""
1537 1560
1538 1561 a, b = self.rev(a), self.rev(b)
1539 1562 try:
1540 1563 ancs = self.index.ancestors(a, b)
1541 1564 except (AttributeError, OverflowError):
1542 1565 ancs = ancestor.ancestors(self.parentrevs, a, b)
1543 1566 if ancs:
1544 1567 # choose a consistent winner when there's a tie
1545 1568 return min(map(self.node, ancs))
1546 1569 return self.nullid
1547 1570
1548 1571 def _match(self, id):
1549 1572 if isinstance(id, int):
1550 1573 # rev
1551 1574 return self.node(id)
1552 1575 if len(id) == self.nodeconstants.nodelen:
1553 1576 # possibly a binary node
1554 1577 # odds of a binary node being all hex in ASCII are 1 in 10**25
1555 1578 try:
1556 1579 node = id
1557 1580 self.rev(node) # quick search the index
1558 1581 return node
1559 1582 except error.LookupError:
1560 1583 pass # may be partial hex id
1561 1584 try:
1562 1585 # str(rev)
1563 1586 rev = int(id)
1564 1587 if b"%d" % rev != id:
1565 1588 raise ValueError
1566 1589 if rev < 0:
1567 1590 rev = len(self) + rev
1568 1591 if rev < 0 or rev >= len(self):
1569 1592 raise ValueError
1570 1593 return self.node(rev)
1571 1594 except (ValueError, OverflowError):
1572 1595 pass
1573 1596 if len(id) == 2 * self.nodeconstants.nodelen:
1574 1597 try:
1575 1598 # a full hex nodeid?
1576 1599 node = bin(id)
1577 1600 self.rev(node)
1578 1601 return node
1579 1602 except (binascii.Error, error.LookupError):
1580 1603 pass
1581 1604
1582 1605 def _partialmatch(self, id):
1583 1606 # we don't care wdirfilenodeids as they should be always full hash
1584 1607 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1585 1608 ambiguous = False
1586 1609 try:
1587 1610 partial = self.index.partialmatch(id)
1588 1611 if partial and self.hasnode(partial):
1589 1612 if maybewdir:
1590 1613 # single 'ff...' match in radix tree, ambiguous with wdir
1591 1614 ambiguous = True
1592 1615 else:
1593 1616 return partial
1594 1617 elif maybewdir:
1595 1618 # no 'ff...' match in radix tree, wdir identified
1596 1619 raise error.WdirUnsupported
1597 1620 else:
1598 1621 return None
1599 1622 except error.RevlogError:
1600 1623 # parsers.c radix tree lookup gave multiple matches
1601 1624 # fast path: for unfiltered changelog, radix tree is accurate
1602 1625 if not getattr(self, 'filteredrevs', None):
1603 1626 ambiguous = True
1604 1627 # fall through to slow path that filters hidden revisions
1605 1628 except (AttributeError, ValueError):
1606 1629 # we are pure python, or key is not hex
1607 1630 pass
1608 1631 if ambiguous:
1609 1632 raise error.AmbiguousPrefixLookupError(
1610 1633 id, self.display_id, _(b'ambiguous identifier')
1611 1634 )
1612 1635
1613 1636 if id in self._pcache:
1614 1637 return self._pcache[id]
1615 1638
1616 1639 if len(id) <= 40:
1617 1640 # hex(node)[:...]
1618 1641 l = len(id) // 2 * 2 # grab an even number of digits
1619 1642 try:
1620 1643 # we're dropping the last digit, so let's check that it's hex,
1621 1644 # to avoid the expensive computation below if it's not
1622 1645 if len(id) % 2 > 0:
1623 1646 if not (id[-1] in hexdigits):
1624 1647 return None
1625 1648 prefix = bin(id[:l])
1626 1649 except binascii.Error:
1627 1650 pass
1628 1651 else:
1629 1652 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1630 1653 nl = [
1631 1654 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1632 1655 ]
1633 1656 if self.nodeconstants.nullhex.startswith(id):
1634 1657 nl.append(self.nullid)
1635 1658 if len(nl) > 0:
1636 1659 if len(nl) == 1 and not maybewdir:
1637 1660 self._pcache[id] = nl[0]
1638 1661 return nl[0]
1639 1662 raise error.AmbiguousPrefixLookupError(
1640 1663 id, self.display_id, _(b'ambiguous identifier')
1641 1664 )
1642 1665 if maybewdir:
1643 1666 raise error.WdirUnsupported
1644 1667 return None
1645 1668
1646 1669 def lookup(self, id):
1647 1670 """locate a node based on:
1648 1671 - revision number or str(revision number)
1649 1672 - nodeid or subset of hex nodeid
1650 1673 """
1651 1674 n = self._match(id)
1652 1675 if n is not None:
1653 1676 return n
1654 1677 n = self._partialmatch(id)
1655 1678 if n:
1656 1679 return n
1657 1680
1658 1681 raise error.LookupError(id, self.display_id, _(b'no match found'))
1659 1682
1660 1683 def shortest(self, node, minlength=1):
1661 1684 """Find the shortest unambiguous prefix that matches node."""
1662 1685
1663 1686 def isvalid(prefix):
1664 1687 try:
1665 1688 matchednode = self._partialmatch(prefix)
1666 1689 except error.AmbiguousPrefixLookupError:
1667 1690 return False
1668 1691 except error.WdirUnsupported:
1669 1692 # single 'ff...' match
1670 1693 return True
1671 1694 if matchednode is None:
1672 1695 raise error.LookupError(node, self.display_id, _(b'no node'))
1673 1696 return True
1674 1697
1675 1698 def maybewdir(prefix):
1676 1699 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1677 1700
1678 1701 hexnode = hex(node)
1679 1702
1680 1703 def disambiguate(hexnode, minlength):
1681 1704 """Disambiguate against wdirid."""
1682 1705 for length in range(minlength, len(hexnode) + 1):
1683 1706 prefix = hexnode[:length]
1684 1707 if not maybewdir(prefix):
1685 1708 return prefix
1686 1709
1687 1710 if not getattr(self, 'filteredrevs', None):
1688 1711 try:
1689 1712 length = max(self.index.shortest(node), minlength)
1690 1713 return disambiguate(hexnode, length)
1691 1714 except error.RevlogError:
1692 1715 if node != self.nodeconstants.wdirid:
1693 1716 raise error.LookupError(
1694 1717 node, self.display_id, _(b'no node')
1695 1718 )
1696 1719 except AttributeError:
1697 1720 # Fall through to pure code
1698 1721 pass
1699 1722
1700 1723 if node == self.nodeconstants.wdirid:
1701 1724 for length in range(minlength, len(hexnode) + 1):
1702 1725 prefix = hexnode[:length]
1703 1726 if isvalid(prefix):
1704 1727 return prefix
1705 1728
1706 1729 for length in range(minlength, len(hexnode) + 1):
1707 1730 prefix = hexnode[:length]
1708 1731 if isvalid(prefix):
1709 1732 return disambiguate(hexnode, length)
1710 1733
1711 1734 def cmp(self, node, text):
1712 1735 """compare text with a given file revision
1713 1736
1714 1737 returns True if text is different than what is stored.
1715 1738 """
1716 1739 p1, p2 = self.parents(node)
1717 1740 return storageutil.hashrevisionsha1(text, p1, p2) != node
1718 1741
1719 1742 def _getsegmentforrevs(self, startrev, endrev, df=None):
1720 1743 """Obtain a segment of raw data corresponding to a range of revisions.
1721 1744
1722 1745 Accepts the start and end revisions and an optional already-open
1723 1746 file handle to be used for reading. If the file handle is read, its
1724 1747 seek position will not be preserved.
1725 1748
1726 1749 Requests for data may be satisfied by a cache.
1727 1750
1728 1751 Returns a 2-tuple of (offset, data) for the requested range of
1729 1752 revisions. Offset is the integer offset from the beginning of the
1730 1753 revlog and data is a str or buffer of the raw byte data.
1731 1754
1732 1755 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1733 1756 to determine where each revision's data begins and ends.
1734 1757 """
1735 1758 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1736 1759 # (functions are expensive).
1737 1760 index = self.index
1738 1761 istart = index[startrev]
1739 1762 start = int(istart[0] >> 16)
1740 1763 if startrev == endrev:
1741 1764 end = start + istart[1]
1742 1765 else:
1743 1766 iend = index[endrev]
1744 1767 end = int(iend[0] >> 16) + iend[1]
1745 1768
1746 1769 if self._inline:
1747 1770 start += (startrev + 1) * self.index.entry_size
1748 1771 end += (endrev + 1) * self.index.entry_size
1749 1772 length = end - start
1750 1773
1751 1774 return start, self._segmentfile.read_chunk(start, length, df)
1752 1775
1753 1776 def _chunk(self, rev, df=None):
1754 1777 """Obtain a single decompressed chunk for a revision.
1755 1778
1756 1779 Accepts an integer revision and an optional already-open file handle
1757 1780 to be used for reading. If used, the seek position of the file will not
1758 1781 be preserved.
1759 1782
1760 1783 Returns a str holding uncompressed data for the requested revision.
1761 1784 """
1762 1785 compression_mode = self.index[rev][10]
1763 1786 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1764 1787 if compression_mode == COMP_MODE_PLAIN:
1765 1788 return data
1766 1789 elif compression_mode == COMP_MODE_DEFAULT:
1767 1790 return self._decompressor(data)
1768 1791 elif compression_mode == COMP_MODE_INLINE:
1769 1792 return self.decompress(data)
1770 1793 else:
1771 1794 msg = b'unknown compression mode %d'
1772 1795 msg %= compression_mode
1773 1796 raise error.RevlogError(msg)
1774 1797
1775 1798 def _chunks(self, revs, df=None, targetsize=None):
1776 1799 """Obtain decompressed chunks for the specified revisions.
1777 1800
1778 1801 Accepts an iterable of numeric revisions that are assumed to be in
1779 1802 ascending order. Also accepts an optional already-open file handle
1780 1803 to be used for reading. If used, the seek position of the file will
1781 1804 not be preserved.
1782 1805
1783 1806 This function is similar to calling ``self._chunk()`` multiple times,
1784 1807 but is faster.
1785 1808
1786 1809 Returns a list with decompressed data for each requested revision.
1787 1810 """
1788 1811 if not revs:
1789 1812 return []
1790 1813 start = self.start
1791 1814 length = self.length
1792 1815 inline = self._inline
1793 1816 iosize = self.index.entry_size
1794 1817 buffer = util.buffer
1795 1818
1796 1819 l = []
1797 1820 ladd = l.append
1798 1821
1799 1822 if not self._withsparseread:
1800 1823 slicedchunks = (revs,)
1801 1824 else:
1802 1825 slicedchunks = deltautil.slicechunk(
1803 1826 self, revs, targetsize=targetsize
1804 1827 )
1805 1828
1806 1829 for revschunk in slicedchunks:
1807 1830 firstrev = revschunk[0]
1808 1831 # Skip trailing revisions with empty diff
1809 1832 for lastrev in revschunk[::-1]:
1810 1833 if length(lastrev) != 0:
1811 1834 break
1812 1835
1813 1836 try:
1814 1837 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1815 1838 except OverflowError:
1816 1839 # issue4215 - we can't cache a run of chunks greater than
1817 1840 # 2G on Windows
1818 1841 return [self._chunk(rev, df=df) for rev in revschunk]
1819 1842
1820 1843 decomp = self.decompress
1821 1844 # self._decompressor might be None, but will not be used in that case
1822 1845 def_decomp = self._decompressor
1823 1846 for rev in revschunk:
1824 1847 chunkstart = start(rev)
1825 1848 if inline:
1826 1849 chunkstart += (rev + 1) * iosize
1827 1850 chunklength = length(rev)
1828 1851 comp_mode = self.index[rev][10]
1829 1852 c = buffer(data, chunkstart - offset, chunklength)
1830 1853 if comp_mode == COMP_MODE_PLAIN:
1831 1854 ladd(c)
1832 1855 elif comp_mode == COMP_MODE_INLINE:
1833 1856 ladd(decomp(c))
1834 1857 elif comp_mode == COMP_MODE_DEFAULT:
1835 1858 ladd(def_decomp(c))
1836 1859 else:
1837 1860 msg = b'unknown compression mode %d'
1838 1861 msg %= comp_mode
1839 1862 raise error.RevlogError(msg)
1840 1863
1841 1864 return l
1842 1865
1843 1866 def deltaparent(self, rev):
1844 1867 """return deltaparent of the given revision"""
1845 1868 base = self.index[rev][3]
1846 1869 if base == rev:
1847 1870 return nullrev
1848 1871 elif self._generaldelta:
1849 1872 return base
1850 1873 else:
1851 1874 return rev - 1
1852 1875
1853 1876 def issnapshot(self, rev):
1854 1877 """tells whether rev is a snapshot"""
1855 1878 if not self._sparserevlog:
1856 1879 return self.deltaparent(rev) == nullrev
1857 1880 elif util.safehasattr(self.index, 'issnapshot'):
1858 1881 # directly assign the method to cache the testing and access
1859 1882 self.issnapshot = self.index.issnapshot
1860 1883 return self.issnapshot(rev)
1861 1884 if rev == nullrev:
1862 1885 return True
1863 1886 entry = self.index[rev]
1864 1887 base = entry[3]
1865 1888 if base == rev:
1866 1889 return True
1867 1890 if base == nullrev:
1868 1891 return True
1869 1892 p1 = entry[5]
1870 1893 while self.length(p1) == 0:
1871 1894 b = self.deltaparent(p1)
1872 1895 if b == p1:
1873 1896 break
1874 1897 p1 = b
1875 1898 p2 = entry[6]
1876 1899 while self.length(p2) == 0:
1877 1900 b = self.deltaparent(p2)
1878 1901 if b == p2:
1879 1902 break
1880 1903 p2 = b
1881 1904 if base == p1 or base == p2:
1882 1905 return False
1883 1906 return self.issnapshot(base)
1884 1907
1885 1908 def snapshotdepth(self, rev):
1886 1909 """number of snapshot in the chain before this one"""
1887 1910 if not self.issnapshot(rev):
1888 1911 raise error.ProgrammingError(b'revision %d not a snapshot')
1889 1912 return len(self._deltachain(rev)[0]) - 1
1890 1913
1891 1914 def revdiff(self, rev1, rev2):
1892 1915 """return or calculate a delta between two revisions
1893 1916
1894 1917 The delta calculated is in binary form and is intended to be written to
1895 1918 revlog data directly. So this function needs raw revision data.
1896 1919 """
1897 1920 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1898 1921 return bytes(self._chunk(rev2))
1899 1922
1900 1923 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1901 1924
1902 1925 def revision(self, nodeorrev, _df=None):
1903 1926 """return an uncompressed revision of a given node or revision
1904 1927 number.
1905 1928
1906 1929 _df - an existing file handle to read from. (internal-only)
1907 1930 """
1908 1931 return self._revisiondata(nodeorrev, _df)
1909 1932
1910 1933 def sidedata(self, nodeorrev, _df=None):
1911 1934 """a map of extra data related to the changeset but not part of the hash
1912 1935
1913 1936 This function currently return a dictionary. However, more advanced
1914 1937 mapping object will likely be used in the future for a more
1915 1938 efficient/lazy code.
1916 1939 """
1917 1940 # deal with <nodeorrev> argument type
1918 1941 if isinstance(nodeorrev, int):
1919 1942 rev = nodeorrev
1920 1943 else:
1921 1944 rev = self.rev(nodeorrev)
1922 1945 return self._sidedata(rev)
1923 1946
1924 1947 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1925 1948 # deal with <nodeorrev> argument type
1926 1949 if isinstance(nodeorrev, int):
1927 1950 rev = nodeorrev
1928 1951 node = self.node(rev)
1929 1952 else:
1930 1953 node = nodeorrev
1931 1954 rev = None
1932 1955
1933 1956 # fast path the special `nullid` rev
1934 1957 if node == self.nullid:
1935 1958 return b""
1936 1959
1937 1960 # ``rawtext`` is the text as stored inside the revlog. Might be the
1938 1961 # revision or might need to be processed to retrieve the revision.
1939 1962 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1940 1963
1941 1964 if raw and validated:
1942 1965 # if we don't want to process the raw text and that raw
1943 1966 # text is cached, we can exit early.
1944 1967 return rawtext
1945 1968 if rev is None:
1946 1969 rev = self.rev(node)
1947 1970 # the revlog's flag for this revision
1948 1971 # (usually alter its state or content)
1949 1972 flags = self.flags(rev)
1950 1973
1951 1974 if validated and flags == REVIDX_DEFAULT_FLAGS:
1952 1975 # no extra flags set, no flag processor runs, text = rawtext
1953 1976 return rawtext
1954 1977
1955 1978 if raw:
1956 1979 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1957 1980 text = rawtext
1958 1981 else:
1959 1982 r = flagutil.processflagsread(self, rawtext, flags)
1960 1983 text, validatehash = r
1961 1984 if validatehash:
1962 1985 self.checkhash(text, node, rev=rev)
1963 1986 if not validated:
1964 1987 self._revisioncache = (node, rev, rawtext)
1965 1988
1966 1989 return text
1967 1990
1968 1991 def _rawtext(self, node, rev, _df=None):
1969 1992 """return the possibly unvalidated rawtext for a revision
1970 1993
1971 1994 returns (rev, rawtext, validated)
1972 1995 """
1973 1996
1974 1997 # revision in the cache (could be useful to apply delta)
1975 1998 cachedrev = None
1976 1999 # An intermediate text to apply deltas to
1977 2000 basetext = None
1978 2001
1979 2002 # Check if we have the entry in cache
1980 2003 # The cache entry looks like (node, rev, rawtext)
1981 2004 if self._revisioncache:
1982 2005 if self._revisioncache[0] == node:
1983 2006 return (rev, self._revisioncache[2], True)
1984 2007 cachedrev = self._revisioncache[1]
1985 2008
1986 2009 if rev is None:
1987 2010 rev = self.rev(node)
1988 2011
1989 2012 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1990 2013 if stopped:
1991 2014 basetext = self._revisioncache[2]
1992 2015
1993 2016 # drop cache to save memory, the caller is expected to
1994 2017 # update self._revisioncache after validating the text
1995 2018 self._revisioncache = None
1996 2019
1997 2020 targetsize = None
1998 2021 rawsize = self.index[rev][2]
1999 2022 if 0 <= rawsize:
2000 2023 targetsize = 4 * rawsize
2001 2024
2002 2025 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2003 2026 if basetext is None:
2004 2027 basetext = bytes(bins[0])
2005 2028 bins = bins[1:]
2006 2029
2007 2030 rawtext = mdiff.patches(basetext, bins)
2008 2031 del basetext # let us have a chance to free memory early
2009 2032 return (rev, rawtext, False)
2010 2033
2011 2034 def _sidedata(self, rev):
2012 2035 """Return the sidedata for a given revision number."""
2013 2036 index_entry = self.index[rev]
2014 2037 sidedata_offset = index_entry[8]
2015 2038 sidedata_size = index_entry[9]
2016 2039
2017 2040 if self._inline:
2018 2041 sidedata_offset += self.index.entry_size * (1 + rev)
2019 2042 if sidedata_size == 0:
2020 2043 return {}
2021 2044
2022 2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2023 2046 filename = self._sidedatafile
2024 2047 end = self._docket.sidedata_end
2025 2048 offset = sidedata_offset
2026 2049 length = sidedata_size
2027 2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2028 2051 raise error.RevlogError(m)
2029 2052
2030 2053 comp_segment = self._segmentfile_sidedata.read_chunk(
2031 2054 sidedata_offset, sidedata_size
2032 2055 )
2033 2056
2034 2057 comp = self.index[rev][11]
2035 2058 if comp == COMP_MODE_PLAIN:
2036 2059 segment = comp_segment
2037 2060 elif comp == COMP_MODE_DEFAULT:
2038 2061 segment = self._decompressor(comp_segment)
2039 2062 elif comp == COMP_MODE_INLINE:
2040 2063 segment = self.decompress(comp_segment)
2041 2064 else:
2042 2065 msg = b'unknown compression mode %d'
2043 2066 msg %= comp
2044 2067 raise error.RevlogError(msg)
2045 2068
2046 2069 sidedata = sidedatautil.deserialize_sidedata(segment)
2047 2070 return sidedata
2048 2071
2049 2072 def rawdata(self, nodeorrev, _df=None):
2050 2073 """return an uncompressed raw data of a given node or revision number.
2051 2074
2052 2075 _df - an existing file handle to read from. (internal-only)
2053 2076 """
2054 2077 return self._revisiondata(nodeorrev, _df, raw=True)
2055 2078
2056 2079 def hash(self, text, p1, p2):
2057 2080 """Compute a node hash.
2058 2081
2059 2082 Available as a function so that subclasses can replace the hash
2060 2083 as needed.
2061 2084 """
2062 2085 return storageutil.hashrevisionsha1(text, p1, p2)
2063 2086
2064 2087 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2065 2088 """Check node hash integrity.
2066 2089
2067 2090 Available as a function so that subclasses can extend hash mismatch
2068 2091 behaviors as needed.
2069 2092 """
2070 2093 try:
2071 2094 if p1 is None and p2 is None:
2072 2095 p1, p2 = self.parents(node)
2073 2096 if node != self.hash(text, p1, p2):
2074 2097 # Clear the revision cache on hash failure. The revision cache
2075 2098 # only stores the raw revision and clearing the cache does have
2076 2099 # the side-effect that we won't have a cache hit when the raw
2077 2100 # revision data is accessed. But this case should be rare and
2078 2101 # it is extra work to teach the cache about the hash
2079 2102 # verification state.
2080 2103 if self._revisioncache and self._revisioncache[0] == node:
2081 2104 self._revisioncache = None
2082 2105
2083 2106 revornode = rev
2084 2107 if revornode is None:
2085 2108 revornode = templatefilters.short(hex(node))
2086 2109 raise error.RevlogError(
2087 2110 _(b"integrity check failed on %s:%s")
2088 2111 % (self.display_id, pycompat.bytestr(revornode))
2089 2112 )
2090 2113 except error.RevlogError:
2091 2114 if self._censorable and storageutil.iscensoredtext(text):
2092 2115 raise error.CensoredNodeError(self.display_id, node, text)
2093 2116 raise
2094 2117
2095 2118 def _enforceinlinesize(self, tr, side_write=True):
2096 2119 """Check if the revlog is too big for inline and convert if so.
2097 2120
2098 2121 This should be called after revisions are added to the revlog. If the
2099 2122 revlog has grown too large to be an inline revlog, it will convert it
2100 2123 to use multiple index and data files.
2101 2124 """
2102 2125 tiprev = len(self) - 1
2103 2126 total_size = self.start(tiprev) + self.length(tiprev)
2104 2127 if not self._inline or total_size < _maxinline:
2105 2128 return
2106 2129
2107 2130 troffset = tr.findoffset(self._indexfile)
2108 2131 if troffset is None:
2109 2132 raise error.RevlogError(
2110 2133 _(b"%s not found in the transaction") % self._indexfile
2111 2134 )
2112 2135 if troffset:
2113 2136 tr.addbackup(self._indexfile, for_offset=True)
2114 2137 tr.add(self._datafile, 0)
2115 2138
2116 2139 existing_handles = False
2117 2140 if self._writinghandles is not None:
2118 2141 existing_handles = True
2119 2142 fp = self._writinghandles[0]
2120 2143 fp.flush()
2121 2144 fp.close()
2122 2145 # We can't use the cached file handle after close(). So prevent
2123 2146 # its usage.
2124 2147 self._writinghandles = None
2125 2148 self._segmentfile.writing_handle = None
2126 2149 # No need to deal with sidedata writing handle as it is only
2127 2150 # relevant with revlog-v2 which is never inline, not reaching
2128 2151 # this code
2129 2152 if side_write:
2130 2153 old_index_file_path = self._indexfile
2131 2154 new_index_file_path = self._indexfile + b'.s'
2132 2155 opener = self.opener
2133 2156 weak_self = weakref.ref(self)
2134 2157
2135 2158 # the "split" index replace the real index when the transaction is finalized
2136 2159 def finalize_callback(tr):
2137 2160 opener.rename(
2138 2161 new_index_file_path,
2139 2162 old_index_file_path,
2140 2163 checkambig=True,
2141 2164 )
2142 2165 maybe_self = weak_self()
2143 2166 if maybe_self is not None:
2144 2167 maybe_self._indexfile = old_index_file_path
2145 2168
2146 2169 def abort_callback(tr):
2147 2170 maybe_self = weak_self()
2148 2171 if maybe_self is not None:
2149 2172 maybe_self._indexfile = old_index_file_path
2150 2173
2151 2174 tr.registertmp(new_index_file_path)
2152 2175 if self.target[1] is not None:
2153 2176 callback_id = b'000-revlog-split-%d-%s' % self.target
2154 2177 else:
2155 2178 callback_id = b'000-revlog-split-%d' % self.target[0]
2156 2179 tr.addfinalize(callback_id, finalize_callback)
2157 2180 tr.addabort(callback_id, abort_callback)
2158 2181
2159 2182 new_dfh = self._datafp(b'w+')
2160 2183 new_dfh.truncate(0) # drop any potentially existing data
2161 2184 try:
2162 2185 with self._indexfp() as read_ifh:
2163 2186 for r in self:
2164 2187 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2165 2188 new_dfh.flush()
2166 2189
2167 2190 if side_write:
2168 2191 self._indexfile = new_index_file_path
2169 2192 with self.__index_new_fp() as fp:
2170 2193 self._format_flags &= ~FLAG_INLINE_DATA
2171 2194 self._inline = False
2172 2195 for i in self:
2173 2196 e = self.index.entry_binary(i)
2174 2197 if i == 0 and self._docket is None:
2175 2198 header = self._format_flags | self._format_version
2176 2199 header = self.index.pack_header(header)
2177 2200 e = header + e
2178 2201 fp.write(e)
2179 2202 if self._docket is not None:
2180 2203 self._docket.index_end = fp.tell()
2181 2204
2182 2205 # If we don't use side-write, the temp file replace the real
2183 2206 # index when we exit the context manager
2184 2207
2185 2208 nodemaputil.setup_persistent_nodemap(tr, self)
2186 2209 self._segmentfile = randomaccessfile.randomaccessfile(
2187 2210 self.opener,
2188 2211 self._datafile,
2189 2212 self._chunkcachesize,
2190 2213 )
2191 2214
2192 2215 if existing_handles:
2193 2216 # switched from inline to conventional reopen the index
2194 2217 ifh = self.__index_write_fp()
2195 2218 self._writinghandles = (ifh, new_dfh, None)
2196 2219 self._segmentfile.writing_handle = new_dfh
2197 2220 new_dfh = None
2198 2221 # No need to deal with sidedata writing handle as it is only
2199 2222 # relevant with revlog-v2 which is never inline, not reaching
2200 2223 # this code
2201 2224 finally:
2202 2225 if new_dfh is not None:
2203 2226 new_dfh.close()
2204 2227
2205 2228 def _nodeduplicatecallback(self, transaction, node):
2206 2229 """called when trying to add a node already stored."""
2207 2230
2208 2231 @contextlib.contextmanager
2209 2232 def reading(self):
2210 2233 """Context manager that keeps data and sidedata files open for reading"""
2211 2234 with self._segmentfile.reading():
2212 2235 with self._segmentfile_sidedata.reading():
2213 2236 yield
2214 2237
2215 2238 @contextlib.contextmanager
2216 2239 def _writing(self, transaction):
2217 2240 if self._trypending:
2218 2241 msg = b'try to write in a `trypending` revlog: %s'
2219 2242 msg %= self.display_id
2220 2243 raise error.ProgrammingError(msg)
2221 2244 if self._writinghandles is not None:
2222 2245 yield
2223 2246 else:
2224 2247 ifh = dfh = sdfh = None
2225 2248 try:
2226 2249 r = len(self)
2227 2250 # opening the data file.
2228 2251 dsize = 0
2229 2252 if r:
2230 2253 dsize = self.end(r - 1)
2231 2254 dfh = None
2232 2255 if not self._inline:
2233 2256 try:
2234 2257 dfh = self._datafp(b"r+")
2235 2258 if self._docket is None:
2236 2259 dfh.seek(0, os.SEEK_END)
2237 2260 else:
2238 2261 dfh.seek(self._docket.data_end, os.SEEK_SET)
2239 2262 except FileNotFoundError:
2240 2263 dfh = self._datafp(b"w+")
2241 2264 transaction.add(self._datafile, dsize)
2242 2265 if self._sidedatafile is not None:
2243 2266 # revlog-v2 does not inline, help Pytype
2244 2267 assert dfh is not None
2245 2268 try:
2246 2269 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2247 2270 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2248 2271 except FileNotFoundError:
2249 2272 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2250 2273 transaction.add(
2251 2274 self._sidedatafile, self._docket.sidedata_end
2252 2275 )
2253 2276
2254 2277 # opening the index file.
2255 2278 isize = r * self.index.entry_size
2256 2279 ifh = self.__index_write_fp()
2257 2280 if self._inline:
2258 2281 transaction.add(self._indexfile, dsize + isize)
2259 2282 else:
2260 2283 transaction.add(self._indexfile, isize)
2261 2284 # exposing all file handle for writing.
2262 2285 self._writinghandles = (ifh, dfh, sdfh)
2263 2286 self._segmentfile.writing_handle = ifh if self._inline else dfh
2264 2287 self._segmentfile_sidedata.writing_handle = sdfh
2265 2288 yield
2266 2289 if self._docket is not None:
2267 2290 self._write_docket(transaction)
2268 2291 finally:
2269 2292 self._writinghandles = None
2270 2293 self._segmentfile.writing_handle = None
2271 2294 self._segmentfile_sidedata.writing_handle = None
2272 2295 if dfh is not None:
2273 2296 dfh.close()
2274 2297 if sdfh is not None:
2275 2298 sdfh.close()
2276 2299 # closing the index file last to avoid exposing referent to
2277 2300 # potential unflushed data content.
2278 2301 if ifh is not None:
2279 2302 ifh.close()
2280 2303
2281 2304 def _write_docket(self, transaction):
2282 2305 """write the current docket on disk
2283 2306
2284 2307 Exist as a method to help changelog to implement transaction logic
2285 2308
2286 2309 We could also imagine using the same transaction logic for all revlog
2287 2310 since docket are cheap."""
2288 2311 self._docket.write(transaction)
2289 2312
2290 2313 def addrevision(
2291 2314 self,
2292 2315 text,
2293 2316 transaction,
2294 2317 link,
2295 2318 p1,
2296 2319 p2,
2297 2320 cachedelta=None,
2298 2321 node=None,
2299 2322 flags=REVIDX_DEFAULT_FLAGS,
2300 2323 deltacomputer=None,
2301 2324 sidedata=None,
2302 2325 ):
2303 2326 """add a revision to the log
2304 2327
2305 2328 text - the revision data to add
2306 2329 transaction - the transaction object used for rollback
2307 2330 link - the linkrev data to add
2308 2331 p1, p2 - the parent nodeids of the revision
2309 2332 cachedelta - an optional precomputed delta
2310 2333 node - nodeid of revision; typically node is not specified, and it is
2311 2334 computed by default as hash(text, p1, p2), however subclasses might
2312 2335 use different hashing method (and override checkhash() in such case)
2313 2336 flags - the known flags to set on the revision
2314 2337 deltacomputer - an optional deltacomputer instance shared between
2315 2338 multiple calls
2316 2339 """
2317 2340 if link == nullrev:
2318 2341 raise error.RevlogError(
2319 2342 _(b"attempted to add linkrev -1 to %s") % self.display_id
2320 2343 )
2321 2344
2322 2345 if sidedata is None:
2323 2346 sidedata = {}
2324 2347 elif sidedata and not self.hassidedata:
2325 2348 raise error.ProgrammingError(
2326 2349 _(b"trying to add sidedata to a revlog who don't support them")
2327 2350 )
2328 2351
2329 2352 if flags:
2330 2353 node = node or self.hash(text, p1, p2)
2331 2354
2332 2355 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2333 2356
2334 2357 # If the flag processor modifies the revision data, ignore any provided
2335 2358 # cachedelta.
2336 2359 if rawtext != text:
2337 2360 cachedelta = None
2338 2361
2339 2362 if len(rawtext) > _maxentrysize:
2340 2363 raise error.RevlogError(
2341 2364 _(
2342 2365 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2343 2366 )
2344 2367 % (self.display_id, len(rawtext))
2345 2368 )
2346 2369
2347 2370 node = node or self.hash(rawtext, p1, p2)
2348 2371 rev = self.index.get_rev(node)
2349 2372 if rev is not None:
2350 2373 return rev
2351 2374
2352 2375 if validatehash:
2353 2376 self.checkhash(rawtext, node, p1=p1, p2=p2)
2354 2377
2355 2378 return self.addrawrevision(
2356 2379 rawtext,
2357 2380 transaction,
2358 2381 link,
2359 2382 p1,
2360 2383 p2,
2361 2384 node,
2362 2385 flags,
2363 2386 cachedelta=cachedelta,
2364 2387 deltacomputer=deltacomputer,
2365 2388 sidedata=sidedata,
2366 2389 )
2367 2390
2368 2391 def addrawrevision(
2369 2392 self,
2370 2393 rawtext,
2371 2394 transaction,
2372 2395 link,
2373 2396 p1,
2374 2397 p2,
2375 2398 node,
2376 2399 flags,
2377 2400 cachedelta=None,
2378 2401 deltacomputer=None,
2379 2402 sidedata=None,
2380 2403 ):
2381 2404 """add a raw revision with known flags, node and parents
2382 2405 useful when reusing a revision not stored in this revlog (ex: received
2383 2406 over wire, or read from an external bundle).
2384 2407 """
2385 2408 with self._writing(transaction):
2386 2409 return self._addrevision(
2387 2410 node,
2388 2411 rawtext,
2389 2412 transaction,
2390 2413 link,
2391 2414 p1,
2392 2415 p2,
2393 2416 flags,
2394 2417 cachedelta,
2395 2418 deltacomputer=deltacomputer,
2396 2419 sidedata=sidedata,
2397 2420 )
2398 2421
2399 2422 def compress(self, data):
2400 2423 """Generate a possibly-compressed representation of data."""
2401 2424 if not data:
2402 2425 return b'', data
2403 2426
2404 2427 compressed = self._compressor.compress(data)
2405 2428
2406 2429 if compressed:
2407 2430 # The revlog compressor added the header in the returned data.
2408 2431 return b'', compressed
2409 2432
2410 2433 if data[0:1] == b'\0':
2411 2434 return b'', data
2412 2435 return b'u', data
2413 2436
2414 2437 def decompress(self, data):
2415 2438 """Decompress a revlog chunk.
2416 2439
2417 2440 The chunk is expected to begin with a header identifying the
2418 2441 format type so it can be routed to an appropriate decompressor.
2419 2442 """
2420 2443 if not data:
2421 2444 return data
2422 2445
2423 2446 # Revlogs are read much more frequently than they are written and many
2424 2447 # chunks only take microseconds to decompress, so performance is
2425 2448 # important here.
2426 2449 #
2427 2450 # We can make a few assumptions about revlogs:
2428 2451 #
2429 2452 # 1) the majority of chunks will be compressed (as opposed to inline
2430 2453 # raw data).
2431 2454 # 2) decompressing *any* data will likely by at least 10x slower than
2432 2455 # returning raw inline data.
2433 2456 # 3) we want to prioritize common and officially supported compression
2434 2457 # engines
2435 2458 #
2436 2459 # It follows that we want to optimize for "decompress compressed data
2437 2460 # when encoded with common and officially supported compression engines"
2438 2461 # case over "raw data" and "data encoded by less common or non-official
2439 2462 # compression engines." That is why we have the inline lookup first
2440 2463 # followed by the compengines lookup.
2441 2464 #
2442 2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2443 2466 # compressed chunks. And this matters for changelog and manifest reads.
2444 2467 t = data[0:1]
2445 2468
2446 2469 if t == b'x':
2447 2470 try:
2448 2471 return _zlibdecompress(data)
2449 2472 except zlib.error as e:
2450 2473 raise error.RevlogError(
2451 2474 _(b'revlog decompress error: %s')
2452 2475 % stringutil.forcebytestr(e)
2453 2476 )
2454 2477 # '\0' is more common than 'u' so it goes first.
2455 2478 elif t == b'\0':
2456 2479 return data
2457 2480 elif t == b'u':
2458 2481 return util.buffer(data, 1)
2459 2482
2460 2483 compressor = self._get_decompressor(t)
2461 2484
2462 2485 return compressor.decompress(data)
2463 2486
2464 2487 def _addrevision(
2465 2488 self,
2466 2489 node,
2467 2490 rawtext,
2468 2491 transaction,
2469 2492 link,
2470 2493 p1,
2471 2494 p2,
2472 2495 flags,
2473 2496 cachedelta,
2474 2497 alwayscache=False,
2475 2498 deltacomputer=None,
2476 2499 sidedata=None,
2477 2500 ):
2478 2501 """internal function to add revisions to the log
2479 2502
2480 2503 see addrevision for argument descriptions.
2481 2504
2482 2505 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2483 2506
2484 2507 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2485 2508 be used.
2486 2509
2487 2510 invariants:
2488 2511 - rawtext is optional (can be None); if not set, cachedelta must be set.
2489 2512 if both are set, they must correspond to each other.
2490 2513 """
2491 2514 if node == self.nullid:
2492 2515 raise error.RevlogError(
2493 2516 _(b"%s: attempt to add null revision") % self.display_id
2494 2517 )
2495 2518 if (
2496 2519 node == self.nodeconstants.wdirid
2497 2520 or node in self.nodeconstants.wdirfilenodeids
2498 2521 ):
2499 2522 raise error.RevlogError(
2500 2523 _(b"%s: attempt to add wdir revision") % self.display_id
2501 2524 )
2502 2525 if self._writinghandles is None:
2503 2526 msg = b'adding revision outside `revlog._writing` context'
2504 2527 raise error.ProgrammingError(msg)
2505 2528
2506 2529 if self._inline:
2507 2530 fh = self._writinghandles[0]
2508 2531 else:
2509 2532 fh = self._writinghandles[1]
2510 2533
2511 2534 btext = [rawtext]
2512 2535
2513 2536 curr = len(self)
2514 2537 prev = curr - 1
2515 2538
2516 2539 offset = self._get_data_offset(prev)
2517 2540
2518 2541 if self._concurrencychecker:
2519 2542 ifh, dfh, sdfh = self._writinghandles
2520 2543 # XXX no checking for the sidedata file
2521 2544 if self._inline:
2522 2545 # offset is "as if" it were in the .d file, so we need to add on
2523 2546 # the size of the entry metadata.
2524 2547 self._concurrencychecker(
2525 2548 ifh, self._indexfile, offset + curr * self.index.entry_size
2526 2549 )
2527 2550 else:
2528 2551 # Entries in the .i are a consistent size.
2529 2552 self._concurrencychecker(
2530 2553 ifh, self._indexfile, curr * self.index.entry_size
2531 2554 )
2532 2555 self._concurrencychecker(dfh, self._datafile, offset)
2533 2556
2534 2557 p1r, p2r = self.rev(p1), self.rev(p2)
2535 2558
2536 2559 # full versions are inserted when the needed deltas
2537 2560 # become comparable to the uncompressed text
2538 2561 if rawtext is None:
2539 2562 # need rawtext size, before changed by flag processors, which is
2540 2563 # the non-raw size. use revlog explicitly to avoid filelog's extra
2541 2564 # logic that might remove metadata size.
2542 2565 textlen = mdiff.patchedsize(
2543 2566 revlog.size(self, cachedelta[0]), cachedelta[1]
2544 2567 )
2545 2568 else:
2546 2569 textlen = len(rawtext)
2547 2570
2548 2571 if deltacomputer is None:
2549 2572 write_debug = None
2550 2573 if self._debug_delta:
2551 2574 write_debug = transaction._report
2552 2575 deltacomputer = deltautil.deltacomputer(
2553 2576 self, write_debug=write_debug
2554 2577 )
2555 2578
2556 2579 if cachedelta is not None and len(cachedelta) == 2:
2557 2580 # If the cached delta has no information about how it should be
2558 2581 # reused, add the default reuse instruction according to the
2559 2582 # revlog's configuration.
2560 2583 if self._generaldelta and self._lazydeltabase:
2561 2584 delta_base_reuse = DELTA_BASE_REUSE_TRY
2562 2585 else:
2563 2586 delta_base_reuse = DELTA_BASE_REUSE_NO
2564 2587 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2565 2588
2566 2589 revinfo = revlogutils.revisioninfo(
2567 2590 node,
2568 2591 p1,
2569 2592 p2,
2570 2593 btext,
2571 2594 textlen,
2572 2595 cachedelta,
2573 2596 flags,
2574 2597 )
2575 2598
2576 2599 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2577 2600
2578 2601 compression_mode = COMP_MODE_INLINE
2579 2602 if self._docket is not None:
2580 2603 default_comp = self._docket.default_compression_header
2581 2604 r = deltautil.delta_compression(default_comp, deltainfo)
2582 2605 compression_mode, deltainfo = r
2583 2606
2584 2607 sidedata_compression_mode = COMP_MODE_INLINE
2585 2608 if sidedata and self.hassidedata:
2586 2609 sidedata_compression_mode = COMP_MODE_PLAIN
2587 2610 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2588 2611 sidedata_offset = self._docket.sidedata_end
2589 2612 h, comp_sidedata = self.compress(serialized_sidedata)
2590 2613 if (
2591 2614 h != b'u'
2592 2615 and comp_sidedata[0:1] != b'\0'
2593 2616 and len(comp_sidedata) < len(serialized_sidedata)
2594 2617 ):
2595 2618 assert not h
2596 2619 if (
2597 2620 comp_sidedata[0:1]
2598 2621 == self._docket.default_compression_header
2599 2622 ):
2600 2623 sidedata_compression_mode = COMP_MODE_DEFAULT
2601 2624 serialized_sidedata = comp_sidedata
2602 2625 else:
2603 2626 sidedata_compression_mode = COMP_MODE_INLINE
2604 2627 serialized_sidedata = comp_sidedata
2605 2628 else:
2606 2629 serialized_sidedata = b""
2607 2630 # Don't store the offset if the sidedata is empty, that way
2608 2631 # we can easily detect empty sidedata and they will be no different
2609 2632 # than ones we manually add.
2610 2633 sidedata_offset = 0
2611 2634
2612 2635 rank = RANK_UNKNOWN
2613 2636 if self._compute_rank:
2614 2637 if (p1r, p2r) == (nullrev, nullrev):
2615 2638 rank = 1
2616 2639 elif p1r != nullrev and p2r == nullrev:
2617 2640 rank = 1 + self.fast_rank(p1r)
2618 2641 elif p1r == nullrev and p2r != nullrev:
2619 2642 rank = 1 + self.fast_rank(p2r)
2620 2643 else: # merge node
2621 2644 if rustdagop is not None and self.index.rust_ext_compat:
2622 2645 rank = rustdagop.rank(self.index, p1r, p2r)
2623 2646 else:
2624 2647 pmin, pmax = sorted((p1r, p2r))
2625 2648 rank = 1 + self.fast_rank(pmax)
2626 2649 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2627 2650
2628 2651 e = revlogutils.entry(
2629 2652 flags=flags,
2630 2653 data_offset=offset,
2631 2654 data_compressed_length=deltainfo.deltalen,
2632 2655 data_uncompressed_length=textlen,
2633 2656 data_compression_mode=compression_mode,
2634 2657 data_delta_base=deltainfo.base,
2635 2658 link_rev=link,
2636 2659 parent_rev_1=p1r,
2637 2660 parent_rev_2=p2r,
2638 2661 node_id=node,
2639 2662 sidedata_offset=sidedata_offset,
2640 2663 sidedata_compressed_length=len(serialized_sidedata),
2641 2664 sidedata_compression_mode=sidedata_compression_mode,
2642 2665 rank=rank,
2643 2666 )
2644 2667
2645 2668 self.index.append(e)
2646 2669 entry = self.index.entry_binary(curr)
2647 2670 if curr == 0 and self._docket is None:
2648 2671 header = self._format_flags | self._format_version
2649 2672 header = self.index.pack_header(header)
2650 2673 entry = header + entry
2651 2674 self._writeentry(
2652 2675 transaction,
2653 2676 entry,
2654 2677 deltainfo.data,
2655 2678 link,
2656 2679 offset,
2657 2680 serialized_sidedata,
2658 2681 sidedata_offset,
2659 2682 )
2660 2683
2661 2684 rawtext = btext[0]
2662 2685
2663 2686 if alwayscache and rawtext is None:
2664 2687 rawtext = deltacomputer.buildtext(revinfo, fh)
2665 2688
2666 2689 if type(rawtext) == bytes: # only accept immutable objects
2667 2690 self._revisioncache = (node, curr, rawtext)
2668 2691 self._chainbasecache[curr] = deltainfo.chainbase
2669 2692 return curr
2670 2693
2671 2694 def _get_data_offset(self, prev):
2672 2695 """Returns the current offset in the (in-transaction) data file.
2673 2696 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2674 2697 file to store that information: since sidedata can be rewritten to the
2675 2698 end of the data file within a transaction, you can have cases where, for
2676 2699 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2677 2700 to `n - 1`'s sidedata being written after `n`'s data.
2678 2701
2679 2702 TODO cache this in a docket file before getting out of experimental."""
2680 2703 if self._docket is None:
2681 2704 return self.end(prev)
2682 2705 else:
2683 2706 return self._docket.data_end
2684 2707
2685 2708 def _writeentry(
2686 2709 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2687 2710 ):
2688 2711 # Files opened in a+ mode have inconsistent behavior on various
2689 2712 # platforms. Windows requires that a file positioning call be made
2690 2713 # when the file handle transitions between reads and writes. See
2691 2714 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2692 2715 # platforms, Python or the platform itself can be buggy. Some versions
2693 2716 # of Solaris have been observed to not append at the end of the file
2694 2717 # if the file was seeked to before the end. See issue4943 for more.
2695 2718 #
2696 2719 # We work around this issue by inserting a seek() before writing.
2697 2720 # Note: This is likely not necessary on Python 3. However, because
2698 2721 # the file handle is reused for reads and may be seeked there, we need
2699 2722 # to be careful before changing this.
2700 2723 if self._writinghandles is None:
2701 2724 msg = b'adding revision outside `revlog._writing` context'
2702 2725 raise error.ProgrammingError(msg)
2703 2726 ifh, dfh, sdfh = self._writinghandles
2704 2727 if self._docket is None:
2705 2728 ifh.seek(0, os.SEEK_END)
2706 2729 else:
2707 2730 ifh.seek(self._docket.index_end, os.SEEK_SET)
2708 2731 if dfh:
2709 2732 if self._docket is None:
2710 2733 dfh.seek(0, os.SEEK_END)
2711 2734 else:
2712 2735 dfh.seek(self._docket.data_end, os.SEEK_SET)
2713 2736 if sdfh:
2714 2737 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2715 2738
2716 2739 curr = len(self) - 1
2717 2740 if not self._inline:
2718 2741 transaction.add(self._datafile, offset)
2719 2742 if self._sidedatafile:
2720 2743 transaction.add(self._sidedatafile, sidedata_offset)
2721 2744 transaction.add(self._indexfile, curr * len(entry))
2722 2745 if data[0]:
2723 2746 dfh.write(data[0])
2724 2747 dfh.write(data[1])
2725 2748 if sidedata:
2726 2749 sdfh.write(sidedata)
2727 2750 ifh.write(entry)
2728 2751 else:
2729 2752 offset += curr * self.index.entry_size
2730 2753 transaction.add(self._indexfile, offset)
2731 2754 ifh.write(entry)
2732 2755 ifh.write(data[0])
2733 2756 ifh.write(data[1])
2734 2757 assert not sidedata
2735 2758 self._enforceinlinesize(transaction)
2736 2759 if self._docket is not None:
2737 2760 # revlog-v2 always has 3 writing handles, help Pytype
2738 2761 wh1 = self._writinghandles[0]
2739 2762 wh2 = self._writinghandles[1]
2740 2763 wh3 = self._writinghandles[2]
2741 2764 assert wh1 is not None
2742 2765 assert wh2 is not None
2743 2766 assert wh3 is not None
2744 2767 self._docket.index_end = wh1.tell()
2745 2768 self._docket.data_end = wh2.tell()
2746 2769 self._docket.sidedata_end = wh3.tell()
2747 2770
2748 2771 nodemaputil.setup_persistent_nodemap(transaction, self)
2749 2772
2750 2773 def addgroup(
2751 2774 self,
2752 2775 deltas,
2753 2776 linkmapper,
2754 2777 transaction,
2755 2778 alwayscache=False,
2756 2779 addrevisioncb=None,
2757 2780 duplicaterevisioncb=None,
2758 2781 debug_info=None,
2759 2782 delta_base_reuse_policy=None,
2760 2783 ):
2761 2784 """
2762 2785 add a delta group
2763 2786
2764 2787 given a set of deltas, add them to the revision log. the
2765 2788 first delta is against its parent, which should be in our
2766 2789 log, the rest are against the previous delta.
2767 2790
2768 2791 If ``addrevisioncb`` is defined, it will be called with arguments of
2769 2792 this revlog and the node that was added.
2770 2793 """
2771 2794
2772 2795 if self._adding_group:
2773 2796 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2774 2797
2775 2798 # read the default delta-base reuse policy from revlog config if the
2776 2799 # group did not specify one.
2777 2800 if delta_base_reuse_policy is None:
2778 2801 if self._generaldelta and self._lazydeltabase:
2779 2802 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2780 2803 else:
2781 2804 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2782 2805
2783 2806 self._adding_group = True
2784 2807 empty = True
2785 2808 try:
2786 2809 with self._writing(transaction):
2787 2810 write_debug = None
2788 2811 if self._debug_delta:
2789 2812 write_debug = transaction._report
2790 2813 deltacomputer = deltautil.deltacomputer(
2791 2814 self,
2792 2815 write_debug=write_debug,
2793 2816 debug_info=debug_info,
2794 2817 )
2795 2818 # loop through our set of deltas
2796 2819 for data in deltas:
2797 2820 (
2798 2821 node,
2799 2822 p1,
2800 2823 p2,
2801 2824 linknode,
2802 2825 deltabase,
2803 2826 delta,
2804 2827 flags,
2805 2828 sidedata,
2806 2829 ) = data
2807 2830 link = linkmapper(linknode)
2808 2831 flags = flags or REVIDX_DEFAULT_FLAGS
2809 2832
2810 2833 rev = self.index.get_rev(node)
2811 2834 if rev is not None:
2812 2835 # this can happen if two branches make the same change
2813 2836 self._nodeduplicatecallback(transaction, rev)
2814 2837 if duplicaterevisioncb:
2815 2838 duplicaterevisioncb(self, rev)
2816 2839 empty = False
2817 2840 continue
2818 2841
2819 2842 for p in (p1, p2):
2820 2843 if not self.index.has_node(p):
2821 2844 raise error.LookupError(
2822 2845 p, self.radix, _(b'unknown parent')
2823 2846 )
2824 2847
2825 2848 if not self.index.has_node(deltabase):
2826 2849 raise error.LookupError(
2827 2850 deltabase, self.display_id, _(b'unknown delta base')
2828 2851 )
2829 2852
2830 2853 baserev = self.rev(deltabase)
2831 2854
2832 2855 if baserev != nullrev and self.iscensored(baserev):
2833 2856 # if base is censored, delta must be full replacement in a
2834 2857 # single patch operation
2835 2858 hlen = struct.calcsize(b">lll")
2836 2859 oldlen = self.rawsize(baserev)
2837 2860 newlen = len(delta) - hlen
2838 2861 if delta[:hlen] != mdiff.replacediffheader(
2839 2862 oldlen, newlen
2840 2863 ):
2841 2864 raise error.CensoredBaseError(
2842 2865 self.display_id, self.node(baserev)
2843 2866 )
2844 2867
2845 2868 if not flags and self._peek_iscensored(baserev, delta):
2846 2869 flags |= REVIDX_ISCENSORED
2847 2870
2848 2871 # We assume consumers of addrevisioncb will want to retrieve
2849 2872 # the added revision, which will require a call to
2850 2873 # revision(). revision() will fast path if there is a cache
2851 2874 # hit. So, we tell _addrevision() to always cache in this case.
2852 2875 # We're only using addgroup() in the context of changegroup
2853 2876 # generation so the revision data can always be handled as raw
2854 2877 # by the flagprocessor.
2855 2878 rev = self._addrevision(
2856 2879 node,
2857 2880 None,
2858 2881 transaction,
2859 2882 link,
2860 2883 p1,
2861 2884 p2,
2862 2885 flags,
2863 2886 (baserev, delta, delta_base_reuse_policy),
2864 2887 alwayscache=alwayscache,
2865 2888 deltacomputer=deltacomputer,
2866 2889 sidedata=sidedata,
2867 2890 )
2868 2891
2869 2892 if addrevisioncb:
2870 2893 addrevisioncb(self, rev)
2871 2894 empty = False
2872 2895 finally:
2873 2896 self._adding_group = False
2874 2897 return not empty
2875 2898
2876 2899 def iscensored(self, rev):
2877 2900 """Check if a file revision is censored."""
2878 2901 if not self._censorable:
2879 2902 return False
2880 2903
2881 2904 return self.flags(rev) & REVIDX_ISCENSORED
2882 2905
2883 2906 def _peek_iscensored(self, baserev, delta):
2884 2907 """Quickly check if a delta produces a censored revision."""
2885 2908 if not self._censorable:
2886 2909 return False
2887 2910
2888 2911 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2889 2912
2890 2913 def getstrippoint(self, minlink):
2891 2914 """find the minimum rev that must be stripped to strip the linkrev
2892 2915
2893 2916 Returns a tuple containing the minimum rev and a set of all revs that
2894 2917 have linkrevs that will be broken by this strip.
2895 2918 """
2896 2919 return storageutil.resolvestripinfo(
2897 2920 minlink,
2898 2921 len(self) - 1,
2899 2922 self.headrevs(),
2900 2923 self.linkrev,
2901 2924 self.parentrevs,
2902 2925 )
2903 2926
2904 2927 def strip(self, minlink, transaction):
2905 2928 """truncate the revlog on the first revision with a linkrev >= minlink
2906 2929
2907 2930 This function is called when we're stripping revision minlink and
2908 2931 its descendants from the repository.
2909 2932
2910 2933 We have to remove all revisions with linkrev >= minlink, because
2911 2934 the equivalent changelog revisions will be renumbered after the
2912 2935 strip.
2913 2936
2914 2937 So we truncate the revlog on the first of these revisions, and
2915 2938 trust that the caller has saved the revisions that shouldn't be
2916 2939 removed and that it'll re-add them after this truncation.
2917 2940 """
2918 2941 if len(self) == 0:
2919 2942 return
2920 2943
2921 2944 rev, _ = self.getstrippoint(minlink)
2922 2945 if rev == len(self):
2923 2946 return
2924 2947
2925 2948 # first truncate the files on disk
2926 2949 data_end = self.start(rev)
2927 2950 if not self._inline:
2928 2951 transaction.add(self._datafile, data_end)
2929 2952 end = rev * self.index.entry_size
2930 2953 else:
2931 2954 end = data_end + (rev * self.index.entry_size)
2932 2955
2933 2956 if self._sidedatafile:
2934 2957 sidedata_end = self.sidedata_cut_off(rev)
2935 2958 transaction.add(self._sidedatafile, sidedata_end)
2936 2959
2937 2960 transaction.add(self._indexfile, end)
2938 2961 if self._docket is not None:
2939 2962 # XXX we could, leverage the docket while stripping. However it is
2940 2963 # not powerfull enough at the time of this comment
2941 2964 self._docket.index_end = end
2942 2965 self._docket.data_end = data_end
2943 2966 self._docket.sidedata_end = sidedata_end
2944 2967 self._docket.write(transaction, stripping=True)
2945 2968
2946 2969 # then reset internal state in memory to forget those revisions
2947 2970 self._revisioncache = None
2948 2971 self._chaininfocache = util.lrucachedict(500)
2949 2972 self._segmentfile.clear_cache()
2950 2973 self._segmentfile_sidedata.clear_cache()
2951 2974
2952 2975 del self.index[rev:-1]
2953 2976
2954 2977 def checksize(self):
2955 2978 """Check size of index and data files
2956 2979
2957 2980 return a (dd, di) tuple.
2958 2981 - dd: extra bytes for the "data" file
2959 2982 - di: extra bytes for the "index" file
2960 2983
2961 2984 A healthy revlog will return (0, 0).
2962 2985 """
2963 2986 expected = 0
2964 2987 if len(self):
2965 2988 expected = max(0, self.end(len(self) - 1))
2966 2989
2967 2990 try:
2968 2991 with self._datafp() as f:
2969 2992 f.seek(0, io.SEEK_END)
2970 2993 actual = f.tell()
2971 2994 dd = actual - expected
2972 2995 except FileNotFoundError:
2973 2996 dd = 0
2974 2997
2975 2998 try:
2976 2999 f = self.opener(self._indexfile)
2977 3000 f.seek(0, io.SEEK_END)
2978 3001 actual = f.tell()
2979 3002 f.close()
2980 3003 s = self.index.entry_size
2981 3004 i = max(0, actual // s)
2982 3005 di = actual - (i * s)
2983 3006 if self._inline:
2984 3007 databytes = 0
2985 3008 for r in self:
2986 3009 databytes += max(0, self.length(r))
2987 3010 dd = 0
2988 3011 di = actual - len(self) * s - databytes
2989 3012 except FileNotFoundError:
2990 3013 di = 0
2991 3014
2992 3015 return (dd, di)
2993 3016
2994 3017 def files(self):
2995 3018 res = [self._indexfile]
2996 3019 if self._docket_file is None:
2997 3020 if not self._inline:
2998 3021 res.append(self._datafile)
2999 3022 else:
3000 3023 res.append(self._docket_file)
3001 3024 res.extend(self._docket.old_index_filepaths(include_empty=False))
3002 3025 if self._docket.data_end:
3003 3026 res.append(self._datafile)
3004 3027 res.extend(self._docket.old_data_filepaths(include_empty=False))
3005 3028 if self._docket.sidedata_end:
3006 3029 res.append(self._sidedatafile)
3007 3030 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3008 3031 return res
3009 3032
3010 3033 def emitrevisions(
3011 3034 self,
3012 3035 nodes,
3013 3036 nodesorder=None,
3014 3037 revisiondata=False,
3015 3038 assumehaveparentrevisions=False,
3016 3039 deltamode=repository.CG_DELTAMODE_STD,
3017 3040 sidedata_helpers=None,
3018 3041 debug_info=None,
3019 3042 ):
3020 3043 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3021 3044 raise error.ProgrammingError(
3022 3045 b'unhandled value for nodesorder: %s' % nodesorder
3023 3046 )
3024 3047
3025 3048 if nodesorder is None and not self._generaldelta:
3026 3049 nodesorder = b'storage'
3027 3050
3028 3051 if (
3029 3052 not self._storedeltachains
3030 3053 and deltamode != repository.CG_DELTAMODE_PREV
3031 3054 ):
3032 3055 deltamode = repository.CG_DELTAMODE_FULL
3033 3056
3034 3057 return storageutil.emitrevisions(
3035 3058 self,
3036 3059 nodes,
3037 3060 nodesorder,
3038 3061 revlogrevisiondelta,
3039 3062 deltaparentfn=self.deltaparent,
3040 3063 candeltafn=self.candelta,
3041 3064 rawsizefn=self.rawsize,
3042 3065 revdifffn=self.revdiff,
3043 3066 flagsfn=self.flags,
3044 3067 deltamode=deltamode,
3045 3068 revisiondata=revisiondata,
3046 3069 assumehaveparentrevisions=assumehaveparentrevisions,
3047 3070 sidedata_helpers=sidedata_helpers,
3048 3071 debug_info=debug_info,
3049 3072 )
3050 3073
3051 3074 DELTAREUSEALWAYS = b'always'
3052 3075 DELTAREUSESAMEREVS = b'samerevs'
3053 3076 DELTAREUSENEVER = b'never'
3054 3077
3055 3078 DELTAREUSEFULLADD = b'fulladd'
3056 3079
3057 3080 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3058 3081
3059 3082 def clone(
3060 3083 self,
3061 3084 tr,
3062 3085 destrevlog,
3063 3086 addrevisioncb=None,
3064 3087 deltareuse=DELTAREUSESAMEREVS,
3065 3088 forcedeltabothparents=None,
3066 3089 sidedata_helpers=None,
3067 3090 ):
3068 3091 """Copy this revlog to another, possibly with format changes.
3069 3092
3070 3093 The destination revlog will contain the same revisions and nodes.
3071 3094 However, it may not be bit-for-bit identical due to e.g. delta encoding
3072 3095 differences.
3073 3096
3074 3097 The ``deltareuse`` argument control how deltas from the existing revlog
3075 3098 are preserved in the destination revlog. The argument can have the
3076 3099 following values:
3077 3100
3078 3101 DELTAREUSEALWAYS
3079 3102 Deltas will always be reused (if possible), even if the destination
3080 3103 revlog would not select the same revisions for the delta. This is the
3081 3104 fastest mode of operation.
3082 3105 DELTAREUSESAMEREVS
3083 3106 Deltas will be reused if the destination revlog would pick the same
3084 3107 revisions for the delta. This mode strikes a balance between speed
3085 3108 and optimization.
3086 3109 DELTAREUSENEVER
3087 3110 Deltas will never be reused. This is the slowest mode of execution.
3088 3111 This mode can be used to recompute deltas (e.g. if the diff/delta
3089 3112 algorithm changes).
3090 3113 DELTAREUSEFULLADD
3091 3114 Revision will be re-added as if their were new content. This is
3092 3115 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3093 3116 eg: large file detection and handling.
3094 3117
3095 3118 Delta computation can be slow, so the choice of delta reuse policy can
3096 3119 significantly affect run time.
3097 3120
3098 3121 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3099 3122 two extremes. Deltas will be reused if they are appropriate. But if the
3100 3123 delta could choose a better revision, it will do so. This means if you
3101 3124 are converting a non-generaldelta revlog to a generaldelta revlog,
3102 3125 deltas will be recomputed if the delta's parent isn't a parent of the
3103 3126 revision.
3104 3127
3105 3128 In addition to the delta policy, the ``forcedeltabothparents``
3106 3129 argument controls whether to force compute deltas against both parents
3107 3130 for merges. By default, the current default is used.
3108 3131
3109 3132 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3110 3133 `sidedata_helpers`.
3111 3134 """
3112 3135 if deltareuse not in self.DELTAREUSEALL:
3113 3136 raise ValueError(
3114 3137 _(b'value for deltareuse invalid: %s') % deltareuse
3115 3138 )
3116 3139
3117 3140 if len(destrevlog):
3118 3141 raise ValueError(_(b'destination revlog is not empty'))
3119 3142
3120 3143 if getattr(self, 'filteredrevs', None):
3121 3144 raise ValueError(_(b'source revlog has filtered revisions'))
3122 3145 if getattr(destrevlog, 'filteredrevs', None):
3123 3146 raise ValueError(_(b'destination revlog has filtered revisions'))
3124 3147
3125 3148 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3126 3149 # if possible.
3127 3150 oldlazydelta = destrevlog._lazydelta
3128 3151 oldlazydeltabase = destrevlog._lazydeltabase
3129 3152 oldamd = destrevlog._deltabothparents
3130 3153
3131 3154 try:
3132 3155 if deltareuse == self.DELTAREUSEALWAYS:
3133 3156 destrevlog._lazydeltabase = True
3134 3157 destrevlog._lazydelta = True
3135 3158 elif deltareuse == self.DELTAREUSESAMEREVS:
3136 3159 destrevlog._lazydeltabase = False
3137 3160 destrevlog._lazydelta = True
3138 3161 elif deltareuse == self.DELTAREUSENEVER:
3139 3162 destrevlog._lazydeltabase = False
3140 3163 destrevlog._lazydelta = False
3141 3164
3142 3165 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3143 3166
3144 3167 self._clone(
3145 3168 tr,
3146 3169 destrevlog,
3147 3170 addrevisioncb,
3148 3171 deltareuse,
3149 3172 forcedeltabothparents,
3150 3173 sidedata_helpers,
3151 3174 )
3152 3175
3153 3176 finally:
3154 3177 destrevlog._lazydelta = oldlazydelta
3155 3178 destrevlog._lazydeltabase = oldlazydeltabase
3156 3179 destrevlog._deltabothparents = oldamd
3157 3180
3158 3181 def _clone(
3159 3182 self,
3160 3183 tr,
3161 3184 destrevlog,
3162 3185 addrevisioncb,
3163 3186 deltareuse,
3164 3187 forcedeltabothparents,
3165 3188 sidedata_helpers,
3166 3189 ):
3167 3190 """perform the core duty of `revlog.clone` after parameter processing"""
3168 3191 write_debug = None
3169 3192 if self._debug_delta:
3170 3193 write_debug = tr._report
3171 3194 deltacomputer = deltautil.deltacomputer(
3172 3195 destrevlog,
3173 3196 write_debug=write_debug,
3174 3197 )
3175 3198 index = self.index
3176 3199 for rev in self:
3177 3200 entry = index[rev]
3178 3201
3179 3202 # Some classes override linkrev to take filtered revs into
3180 3203 # account. Use raw entry from index.
3181 3204 flags = entry[0] & 0xFFFF
3182 3205 linkrev = entry[4]
3183 3206 p1 = index[entry[5]][7]
3184 3207 p2 = index[entry[6]][7]
3185 3208 node = entry[7]
3186 3209
3187 3210 # (Possibly) reuse the delta from the revlog if allowed and
3188 3211 # the revlog chunk is a delta.
3189 3212 cachedelta = None
3190 3213 rawtext = None
3191 3214 if deltareuse == self.DELTAREUSEFULLADD:
3192 3215 text = self._revisiondata(rev)
3193 3216 sidedata = self.sidedata(rev)
3194 3217
3195 3218 if sidedata_helpers is not None:
3196 3219 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3197 3220 self, sidedata_helpers, sidedata, rev
3198 3221 )
3199 3222 flags = flags | new_flags[0] & ~new_flags[1]
3200 3223
3201 3224 destrevlog.addrevision(
3202 3225 text,
3203 3226 tr,
3204 3227 linkrev,
3205 3228 p1,
3206 3229 p2,
3207 3230 cachedelta=cachedelta,
3208 3231 node=node,
3209 3232 flags=flags,
3210 3233 deltacomputer=deltacomputer,
3211 3234 sidedata=sidedata,
3212 3235 )
3213 3236 else:
3214 3237 if destrevlog._lazydelta:
3215 3238 dp = self.deltaparent(rev)
3216 3239 if dp != nullrev:
3217 3240 cachedelta = (dp, bytes(self._chunk(rev)))
3218 3241
3219 3242 sidedata = None
3220 3243 if not cachedelta:
3221 3244 rawtext = self._revisiondata(rev)
3222 3245 sidedata = self.sidedata(rev)
3223 3246 if sidedata is None:
3224 3247 sidedata = self.sidedata(rev)
3225 3248
3226 3249 if sidedata_helpers is not None:
3227 3250 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3228 3251 self, sidedata_helpers, sidedata, rev
3229 3252 )
3230 3253 flags = flags | new_flags[0] & ~new_flags[1]
3231 3254
3232 3255 with destrevlog._writing(tr):
3233 3256 destrevlog._addrevision(
3234 3257 node,
3235 3258 rawtext,
3236 3259 tr,
3237 3260 linkrev,
3238 3261 p1,
3239 3262 p2,
3240 3263 flags,
3241 3264 cachedelta,
3242 3265 deltacomputer=deltacomputer,
3243 3266 sidedata=sidedata,
3244 3267 )
3245 3268
3246 3269 if addrevisioncb:
3247 3270 addrevisioncb(self, rev, node)
3248 3271
3249 3272 def censorrevision(self, tr, censornode, tombstone=b''):
3250 3273 if self._format_version == REVLOGV0:
3251 3274 raise error.RevlogError(
3252 3275 _(b'cannot censor with version %d revlogs')
3253 3276 % self._format_version
3254 3277 )
3255 3278 elif self._format_version == REVLOGV1:
3256 3279 rewrite.v1_censor(self, tr, censornode, tombstone)
3257 3280 else:
3258 3281 rewrite.v2_censor(self, tr, censornode, tombstone)
3259 3282
3260 3283 def verifyintegrity(self, state):
3261 3284 """Verifies the integrity of the revlog.
3262 3285
3263 3286 Yields ``revlogproblem`` instances describing problems that are
3264 3287 found.
3265 3288 """
3266 3289 dd, di = self.checksize()
3267 3290 if dd:
3268 3291 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3269 3292 if di:
3270 3293 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3271 3294
3272 3295 version = self._format_version
3273 3296
3274 3297 # The verifier tells us what version revlog we should be.
3275 3298 if version != state[b'expectedversion']:
3276 3299 yield revlogproblem(
3277 3300 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3278 3301 % (self.display_id, version, state[b'expectedversion'])
3279 3302 )
3280 3303
3281 3304 state[b'skipread'] = set()
3282 3305 state[b'safe_renamed'] = set()
3283 3306
3284 3307 for rev in self:
3285 3308 node = self.node(rev)
3286 3309
3287 3310 # Verify contents. 4 cases to care about:
3288 3311 #
3289 3312 # common: the most common case
3290 3313 # rename: with a rename
3291 3314 # meta: file content starts with b'\1\n', the metadata
3292 3315 # header defined in filelog.py, but without a rename
3293 3316 # ext: content stored externally
3294 3317 #
3295 3318 # More formally, their differences are shown below:
3296 3319 #
3297 3320 # | common | rename | meta | ext
3298 3321 # -------------------------------------------------------
3299 3322 # flags() | 0 | 0 | 0 | not 0
3300 3323 # renamed() | False | True | False | ?
3301 3324 # rawtext[0:2]=='\1\n'| False | True | True | ?
3302 3325 #
3303 3326 # "rawtext" means the raw text stored in revlog data, which
3304 3327 # could be retrieved by "rawdata(rev)". "text"
3305 3328 # mentioned below is "revision(rev)".
3306 3329 #
3307 3330 # There are 3 different lengths stored physically:
3308 3331 # 1. L1: rawsize, stored in revlog index
3309 3332 # 2. L2: len(rawtext), stored in revlog data
3310 3333 # 3. L3: len(text), stored in revlog data if flags==0, or
3311 3334 # possibly somewhere else if flags!=0
3312 3335 #
3313 3336 # L1 should be equal to L2. L3 could be different from them.
3314 3337 # "text" may or may not affect commit hash depending on flag
3315 3338 # processors (see flagutil.addflagprocessor).
3316 3339 #
3317 3340 # | common | rename | meta | ext
3318 3341 # -------------------------------------------------
3319 3342 # rawsize() | L1 | L1 | L1 | L1
3320 3343 # size() | L1 | L2-LM | L1(*) | L1 (?)
3321 3344 # len(rawtext) | L2 | L2 | L2 | L2
3322 3345 # len(text) | L2 | L2 | L2 | L3
3323 3346 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3324 3347 #
3325 3348 # LM: length of metadata, depending on rawtext
3326 3349 # (*): not ideal, see comment in filelog.size
3327 3350 # (?): could be "- len(meta)" if the resolved content has
3328 3351 # rename metadata
3329 3352 #
3330 3353 # Checks needed to be done:
3331 3354 # 1. length check: L1 == L2, in all cases.
3332 3355 # 2. hash check: depending on flag processor, we may need to
3333 3356 # use either "text" (external), or "rawtext" (in revlog).
3334 3357
3335 3358 try:
3336 3359 skipflags = state.get(b'skipflags', 0)
3337 3360 if skipflags:
3338 3361 skipflags &= self.flags(rev)
3339 3362
3340 3363 _verify_revision(self, skipflags, state, node)
3341 3364
3342 3365 l1 = self.rawsize(rev)
3343 3366 l2 = len(self.rawdata(node))
3344 3367
3345 3368 if l1 != l2:
3346 3369 yield revlogproblem(
3347 3370 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3348 3371 node=node,
3349 3372 )
3350 3373
3351 3374 except error.CensoredNodeError:
3352 3375 if state[b'erroroncensored']:
3353 3376 yield revlogproblem(
3354 3377 error=_(b'censored file data'), node=node
3355 3378 )
3356 3379 state[b'skipread'].add(node)
3357 3380 except Exception as e:
3358 3381 yield revlogproblem(
3359 3382 error=_(b'unpacking %s: %s')
3360 3383 % (short(node), stringutil.forcebytestr(e)),
3361 3384 node=node,
3362 3385 )
3363 3386 state[b'skipread'].add(node)
3364 3387
3365 3388 def storageinfo(
3366 3389 self,
3367 3390 exclusivefiles=False,
3368 3391 sharedfiles=False,
3369 3392 revisionscount=False,
3370 3393 trackedsize=False,
3371 3394 storedsize=False,
3372 3395 ):
3373 3396 d = {}
3374 3397
3375 3398 if exclusivefiles:
3376 3399 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3377 3400 if not self._inline:
3378 3401 d[b'exclusivefiles'].append((self.opener, self._datafile))
3379 3402
3380 3403 if sharedfiles:
3381 3404 d[b'sharedfiles'] = []
3382 3405
3383 3406 if revisionscount:
3384 3407 d[b'revisionscount'] = len(self)
3385 3408
3386 3409 if trackedsize:
3387 3410 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3388 3411
3389 3412 if storedsize:
3390 3413 d[b'storedsize'] = sum(
3391 3414 self.opener.stat(path).st_size for path in self.files()
3392 3415 )
3393 3416
3394 3417 return d
3395 3418
3396 3419 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3397 3420 if not self.hassidedata:
3398 3421 return
3399 3422 # revlog formats with sidedata support does not support inline
3400 3423 assert not self._inline
3401 3424 if not helpers[1] and not helpers[2]:
3402 3425 # Nothing to generate or remove
3403 3426 return
3404 3427
3405 3428 new_entries = []
3406 3429 # append the new sidedata
3407 3430 with self._writing(transaction):
3408 3431 ifh, dfh, sdfh = self._writinghandles
3409 3432 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3410 3433
3411 3434 current_offset = sdfh.tell()
3412 3435 for rev in range(startrev, endrev + 1):
3413 3436 entry = self.index[rev]
3414 3437 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3415 3438 store=self,
3416 3439 sidedata_helpers=helpers,
3417 3440 sidedata={},
3418 3441 rev=rev,
3419 3442 )
3420 3443
3421 3444 serialized_sidedata = sidedatautil.serialize_sidedata(
3422 3445 new_sidedata
3423 3446 )
3424 3447
3425 3448 sidedata_compression_mode = COMP_MODE_INLINE
3426 3449 if serialized_sidedata and self.hassidedata:
3427 3450 sidedata_compression_mode = COMP_MODE_PLAIN
3428 3451 h, comp_sidedata = self.compress(serialized_sidedata)
3429 3452 if (
3430 3453 h != b'u'
3431 3454 and comp_sidedata[0] != b'\0'
3432 3455 and len(comp_sidedata) < len(serialized_sidedata)
3433 3456 ):
3434 3457 assert not h
3435 3458 if (
3436 3459 comp_sidedata[0]
3437 3460 == self._docket.default_compression_header
3438 3461 ):
3439 3462 sidedata_compression_mode = COMP_MODE_DEFAULT
3440 3463 serialized_sidedata = comp_sidedata
3441 3464 else:
3442 3465 sidedata_compression_mode = COMP_MODE_INLINE
3443 3466 serialized_sidedata = comp_sidedata
3444 3467 if entry[8] != 0 or entry[9] != 0:
3445 3468 # rewriting entries that already have sidedata is not
3446 3469 # supported yet, because it introduces garbage data in the
3447 3470 # revlog.
3448 3471 msg = b"rewriting existing sidedata is not supported yet"
3449 3472 raise error.Abort(msg)
3450 3473
3451 3474 # Apply (potential) flags to add and to remove after running
3452 3475 # the sidedata helpers
3453 3476 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3454 3477 entry_update = (
3455 3478 current_offset,
3456 3479 len(serialized_sidedata),
3457 3480 new_offset_flags,
3458 3481 sidedata_compression_mode,
3459 3482 )
3460 3483
3461 3484 # the sidedata computation might have move the file cursors around
3462 3485 sdfh.seek(current_offset, os.SEEK_SET)
3463 3486 sdfh.write(serialized_sidedata)
3464 3487 new_entries.append(entry_update)
3465 3488 current_offset += len(serialized_sidedata)
3466 3489 self._docket.sidedata_end = sdfh.tell()
3467 3490
3468 3491 # rewrite the new index entries
3469 3492 ifh.seek(startrev * self.index.entry_size)
3470 3493 for i, e in enumerate(new_entries):
3471 3494 rev = startrev + i
3472 3495 self.index.replace_sidedata_info(rev, *e)
3473 3496 packed = self.index.entry_binary(rev)
3474 3497 if rev == 0 and self._docket is None:
3475 3498 header = self._format_flags | self._format_version
3476 3499 header = self.index.pack_header(header)
3477 3500 packed = header + packed
3478 3501 ifh.write(packed)
@@ -1,1216 +1,1219 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 filelog,
23 23 manifest,
24 24 policy,
25 25 pycompat,
26 26 util,
27 27 vfs as vfsmod,
28 28 )
29 29 from .utils import hashutil
30 30
31 31 parsers = policy.importmod('parsers')
32 32 # how much bytes should be read from fncache in one read
33 33 # It is done to prevent loading large fncache files into memory
34 34 fncache_chunksize = 10 ** 6
35 35
36 36
37 37 def _match_tracked_entry(entry, matcher):
38 38 """parses a fncache entry and returns whether the entry is tracking a path
39 39 matched by matcher or not.
40 40
41 41 If matcher is None, returns True"""
42 42
43 43 if matcher is None:
44 44 return True
45 45 if entry.is_filelog:
46 46 return matcher(entry.target_id)
47 47 elif entry.is_manifestlog:
48 48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50 50
51 51
52 52 # This avoids a collision between a file named foo and a dir named
53 53 # foo.i or foo.d
54 54 def _encodedir(path):
55 55 """
56 56 >>> _encodedir(b'data/foo.i')
57 57 'data/foo.i'
58 58 >>> _encodedir(b'data/foo.i/bla.i')
59 59 'data/foo.i.hg/bla.i'
60 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 61 'data/foo.i.hg.hg/bla.i'
62 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 64 """
65 65 return (
66 66 path.replace(b".hg/", b".hg.hg/")
67 67 .replace(b".i/", b".i.hg/")
68 68 .replace(b".d/", b".d.hg/")
69 69 )
70 70
71 71
72 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 73
74 74
75 75 def decodedir(path):
76 76 """
77 77 >>> decodedir(b'data/foo.i')
78 78 'data/foo.i'
79 79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 80 'data/foo.i/bla.i'
81 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 82 'data/foo.i.hg/bla.i'
83 83 """
84 84 if b".hg/" not in path:
85 85 return path
86 86 return (
87 87 path.replace(b".d.hg/", b".d/")
88 88 .replace(b".i.hg/", b".i/")
89 89 .replace(b".hg.hg/", b".hg/")
90 90 )
91 91
92 92
93 93 def _reserved():
94 94 """characters that are problematic for filesystems
95 95
96 96 * ascii escapes (0..31)
97 97 * ascii hi (126..255)
98 98 * windows specials
99 99
100 100 these characters will be escaped by encodefunctions
101 101 """
102 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 103 for x in range(32):
104 104 yield x
105 105 for x in range(126, 256):
106 106 yield x
107 107 for x in winreserved:
108 108 yield x
109 109
110 110
111 111 def _buildencodefun():
112 112 """
113 113 >>> enc, dec = _buildencodefun()
114 114
115 115 >>> enc(b'nothing/special.txt')
116 116 'nothing/special.txt'
117 117 >>> dec(b'nothing/special.txt')
118 118 'nothing/special.txt'
119 119
120 120 >>> enc(b'HELLO')
121 121 '_h_e_l_l_o'
122 122 >>> dec(b'_h_e_l_l_o')
123 123 'HELLO'
124 124
125 125 >>> enc(b'hello:world?')
126 126 'hello~3aworld~3f'
127 127 >>> dec(b'hello~3aworld~3f')
128 128 'hello:world?'
129 129
130 130 >>> enc(b'the\\x07quick\\xADshot')
131 131 'the~07quick~adshot'
132 132 >>> dec(b'the~07quick~adshot')
133 133 'the\\x07quick\\xadshot'
134 134 """
135 135 e = b'_'
136 136 xchr = pycompat.bytechr
137 137 asciistr = list(map(xchr, range(127)))
138 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 139
140 140 cmap = {x: x for x in asciistr}
141 141 for x in _reserved():
142 142 cmap[xchr(x)] = b"~%02x" % x
143 143 for x in capitals + [ord(e)]:
144 144 cmap[xchr(x)] = e + xchr(x).lower()
145 145
146 146 dmap = {}
147 147 for k, v in cmap.items():
148 148 dmap[v] = k
149 149
150 150 def decode(s):
151 151 i = 0
152 152 while i < len(s):
153 153 for l in range(1, 4):
154 154 try:
155 155 yield dmap[s[i : i + l]]
156 156 i += l
157 157 break
158 158 except KeyError:
159 159 pass
160 160 else:
161 161 raise KeyError
162 162
163 163 return (
164 164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 165 lambda s: b''.join(list(decode(s))),
166 166 )
167 167
168 168
169 169 _encodefname, _decodefname = _buildencodefun()
170 170
171 171
172 172 def encodefilename(s):
173 173 """
174 174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 176 """
177 177 return _encodefname(encodedir(s))
178 178
179 179
180 180 def decodefilename(s):
181 181 """
182 182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 184 """
185 185 return decodedir(_decodefname(s))
186 186
187 187
188 188 def _buildlowerencodefun():
189 189 """
190 190 >>> f = _buildlowerencodefun()
191 191 >>> f(b'nothing/special.txt')
192 192 'nothing/special.txt'
193 193 >>> f(b'HELLO')
194 194 'hello'
195 195 >>> f(b'hello:world?')
196 196 'hello~3aworld~3f'
197 197 >>> f(b'the\\x07quick\\xADshot')
198 198 'the~07quick~adshot'
199 199 """
200 200 xchr = pycompat.bytechr
201 201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 202 for x in _reserved():
203 203 cmap[xchr(x)] = b"~%02x" % x
204 204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 205 cmap[xchr(x)] = xchr(x).lower()
206 206
207 207 def lowerencode(s):
208 208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 209
210 210 return lowerencode
211 211
212 212
213 213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 214
215 215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 218
219 219
220 220 def _auxencode(path, dotencode):
221 221 """
222 222 Encodes filenames containing names reserved by Windows or which end in
223 223 period or space. Does not touch other single reserved characters c.
224 224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 225 Additionally encodes space or period at the beginning, if dotencode is
226 226 True. Parameter path is assumed to be all lowercase.
227 227 A segment only needs encoding if a reserved name appears as a
228 228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 229 doesn't need encoding.
230 230
231 231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 232 >>> _auxencode(s.split(b'/'), True)
233 233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 235 >>> _auxencode(s.split(b'/'), False)
236 236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 237 >>> _auxencode([b'foo. '], True)
238 238 ['foo.~20']
239 239 >>> _auxencode([b' .foo'], True)
240 240 ['~20.foo']
241 241 """
242 242 for i, n in enumerate(path):
243 243 if not n:
244 244 continue
245 245 if dotencode and n[0] in b'. ':
246 246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 247 path[i] = n
248 248 else:
249 249 l = n.find(b'.')
250 250 if l == -1:
251 251 l = len(n)
252 252 if (l == 3 and n[:3] in _winres3) or (
253 253 l == 4
254 254 and n[3:4] <= b'9'
255 255 and n[3:4] >= b'1'
256 256 and n[:3] in _winres4
257 257 ):
258 258 # encode third letter ('aux' -> 'au~78')
259 259 ec = b"~%02x" % ord(n[2:3])
260 260 n = n[0:2] + ec + n[3:]
261 261 path[i] = n
262 262 if n[-1] in b'. ':
263 263 # encode last period or space ('foo...' -> 'foo..~2e')
264 264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 265 return path
266 266
267 267
268 268 _maxstorepathlen = 120
269 269 _dirprefixlen = 8
270 270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 271
272 272
273 273 def _hashencode(path, dotencode):
274 274 digest = hex(hashutil.sha1(path).digest())
275 275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 276 parts = _auxencode(le, dotencode)
277 277 basename = parts[-1]
278 278 _root, ext = os.path.splitext(basename)
279 279 sdirs = []
280 280 sdirslen = 0
281 281 for p in parts[:-1]:
282 282 d = p[:_dirprefixlen]
283 283 if d[-1] in b'. ':
284 284 # Windows can't access dirs ending in period or space
285 285 d = d[:-1] + b'_'
286 286 if sdirslen == 0:
287 287 t = len(d)
288 288 else:
289 289 t = sdirslen + 1 + len(d)
290 290 if t > _maxshortdirslen:
291 291 break
292 292 sdirs.append(d)
293 293 sdirslen = t
294 294 dirs = b'/'.join(sdirs)
295 295 if len(dirs) > 0:
296 296 dirs += b'/'
297 297 res = b'dh/' + dirs + digest + ext
298 298 spaceleft = _maxstorepathlen - len(res)
299 299 if spaceleft > 0:
300 300 filler = basename[:spaceleft]
301 301 res = b'dh/' + dirs + filler + digest + ext
302 302 return res
303 303
304 304
305 305 def _hybridencode(path, dotencode):
306 306 """encodes path with a length limit
307 307
308 308 Encodes all paths that begin with 'data/', according to the following.
309 309
310 310 Default encoding (reversible):
311 311
312 312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 313 characters are encoded as '~xx', where xx is the two digit hex code
314 314 of the character (see encodefilename).
315 315 Relevant path components consisting of Windows reserved filenames are
316 316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 317
318 318 Hashed encoding (not reversible):
319 319
320 320 If the default-encoded path is longer than _maxstorepathlen, a
321 321 non-reversible hybrid hashing of the path is done instead.
322 322 This encoding uses up to _dirprefixlen characters of all directory
323 323 levels of the lowerencoded path, but not more levels than can fit into
324 324 _maxshortdirslen.
325 325 Then follows the filler followed by the sha digest of the full path.
326 326 The filler is the beginning of the basename of the lowerencoded path
327 327 (the basename is everything after the last path separator). The filler
328 328 is as long as possible, filling in characters from the basename until
329 329 the encoded path has _maxstorepathlen characters (or all chars of the
330 330 basename have been taken).
331 331 The extension (e.g. '.i' or '.d') is preserved.
332 332
333 333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 334 encoding was used.
335 335 """
336 336 path = encodedir(path)
337 337 ef = _encodefname(path).split(b'/')
338 338 res = b'/'.join(_auxencode(ef, dotencode))
339 339 if len(res) > _maxstorepathlen:
340 340 res = _hashencode(path, dotencode)
341 341 return res
342 342
343 343
344 344 def _pathencode(path):
345 345 de = encodedir(path)
346 346 if len(path) > _maxstorepathlen:
347 347 return _hashencode(de, True)
348 348 ef = _encodefname(de).split(b'/')
349 349 res = b'/'.join(_auxencode(ef, True))
350 350 if len(res) > _maxstorepathlen:
351 351 return _hashencode(de, True)
352 352 return res
353 353
354 354
355 355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 356
357 357
358 358 def _plainhybridencode(f):
359 359 return _hybridencode(f, False)
360 360
361 361
362 362 def _calcmode(vfs):
363 363 try:
364 364 # files in .hg/ will be created using this mode
365 365 mode = vfs.stat().st_mode
366 366 # avoid some useless chmods
367 367 if (0o777 & ~util.umask) == (0o777 & mode):
368 368 mode = None
369 369 except OSError:
370 370 mode = None
371 371 return mode
372 372
373 373
374 374 _data = [
375 375 b'bookmarks',
376 376 b'narrowspec',
377 377 b'data',
378 378 b'meta',
379 379 b'00manifest.d',
380 380 b'00manifest.i',
381 381 b'00changelog.d',
382 382 b'00changelog.i',
383 383 b'phaseroots',
384 384 b'obsstore',
385 385 b'requires',
386 386 ]
387 387
388 388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 389 REVLOG_FILES_OTHER_EXT = (
390 390 b'.idx',
391 391 b'.d',
392 392 b'.dat',
393 393 b'.n',
394 394 b'.nd',
395 395 b'.sda',
396 396 )
397 397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 398 REVLOG_FILES_LONG_EXT = (
399 399 b'.nd',
400 400 b'.idx',
401 401 b'.dat',
402 402 b'.sda',
403 403 )
404 404 # files that are "volatile" and might change between listing and streaming
405 405 #
406 406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 407 # deleted.
408 408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 409
410 410 # some exception to the above matching
411 411 #
412 412 # XXX This is currently not in use because of issue6542
413 413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 414
415 415
416 416 def is_revlog(f, kind, st):
417 417 if kind != stat.S_IFREG:
418 418 return None
419 419 return revlog_type(f)
420 420
421 421
422 422 def revlog_type(f):
423 423 # XXX we need to filter `undo.` created by the transaction here, however
424 424 # being naive about it also filter revlog for `undo.*` files, leading to
425 425 # issue6542. So we no longer use EXCLUDED.
426 426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 427 return FILEFLAGS_REVLOG_MAIN
428 428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 429 t = FILETYPE_FILELOG_OTHER
430 430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 431 t |= FILEFLAGS_VOLATILE
432 432 return t
433 433 return None
434 434
435 435
436 436 # the file is part of changelog data
437 437 FILEFLAGS_CHANGELOG = 1 << 13
438 438 # the file is part of manifest data
439 439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 440 # the file is part of filelog data
441 441 FILEFLAGS_FILELOG = 1 << 11
442 442 # file that are not directly part of a revlog
443 443 FILEFLAGS_OTHER = 1 << 10
444 444
445 445 # the main entry point for a revlog
446 446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 447 # a secondary file for a revlog
448 448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 449
450 450 # files that are "volatile" and might change between listing and streaming
451 451 FILEFLAGS_VOLATILE = 1 << 20
452 452
453 453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 459 FILETYPE_OTHER = FILEFLAGS_OTHER
460 460
461 461
462 462 @attr.s(slots=True)
463 463 class StoreFile:
464 464 """a file matching a store entry"""
465 465
466 466 unencoded_path = attr.ib()
467 467 _file_size = attr.ib(default=None)
468 468 is_volatile = attr.ib(default=False)
469 469
470 470 def file_size(self, vfs):
471 471 if self._file_size is None:
472 472 if vfs is None:
473 473 msg = b"calling vfs-less file_size without prior call: %s"
474 474 msg %= self.unencoded_path
475 475 raise error.ProgrammingError(msg)
476 476 try:
477 477 self._file_size = vfs.stat(self.unencoded_path).st_size
478 478 except FileNotFoundError:
479 479 self._file_size = 0
480 480 return self._file_size
481 481
482 482 def get_stream(self, vfs, copies):
483 483 """return data "stream" information for this file
484 484
485 485 (unencoded_file_path, content_iterator, content_size)
486 486 """
487 487 size = self.file_size(None)
488 488
489 489 def get_stream():
490 490 actual_path = copies[vfs.join(self.unencoded_path)]
491 491 with open(actual_path, 'rb') as fp:
492 492 yield None # ready to stream
493 493 if size <= 65536:
494 494 yield fp.read(size)
495 495 else:
496 496 yield from util.filechunkiter(fp, limit=size)
497 497
498 498 s = get_stream()
499 499 next(s)
500 500 return (self.unencoded_path, s, size)
501 501
502 502
503 503 @attr.s(slots=True, init=False)
504 504 class BaseStoreEntry:
505 505 """An entry in the store
506 506
507 507 This is returned by `store.walk` and represent some data in the store."""
508 508
509 509 def files(self) -> List[StoreFile]:
510 510 raise NotImplementedError
511 511
512 512 def get_streams(
513 513 self,
514 514 repo=None,
515 515 vfs=None,
516 516 copies=None,
517 517 max_changeset=None,
518 518 ):
519 519 """return a list of data stream associated to files for this entry
520 520
521 521 return [(unencoded_file_path, content_iterator, content_size), …]
522 522 """
523 523 assert vfs is not None
524 524 return [f.get_stream(vfs, copies) for f in self.files()]
525 525
526 526
527 527 @attr.s(slots=True, init=False)
528 528 class SimpleStoreEntry(BaseStoreEntry):
529 529 """A generic entry in the store"""
530 530
531 531 is_revlog = False
532 532
533 533 _entry_path = attr.ib()
534 534 _is_volatile = attr.ib(default=False)
535 535 _file_size = attr.ib(default=None)
536 536 _files = attr.ib(default=None)
537 537
538 538 def __init__(
539 539 self,
540 540 entry_path,
541 541 is_volatile=False,
542 542 file_size=None,
543 543 ):
544 544 super().__init__()
545 545 self._entry_path = entry_path
546 546 self._is_volatile = is_volatile
547 547 self._file_size = file_size
548 548 self._files = None
549 549
550 550 def files(self) -> List[StoreFile]:
551 551 if self._files is None:
552 552 self._files = [
553 553 StoreFile(
554 554 unencoded_path=self._entry_path,
555 555 file_size=self._file_size,
556 556 is_volatile=self._is_volatile,
557 557 )
558 558 ]
559 559 return self._files
560 560
561 561
562 562 @attr.s(slots=True, init=False)
563 563 class RevlogStoreEntry(BaseStoreEntry):
564 564 """A revlog entry in the store"""
565 565
566 566 is_revlog = True
567 567
568 568 revlog_type = attr.ib(default=None)
569 569 target_id = attr.ib(default=None)
570 570 _path_prefix = attr.ib(default=None)
571 571 _details = attr.ib(default=None)
572 572 _files = attr.ib(default=None)
573 573
574 574 def __init__(
575 575 self,
576 576 revlog_type,
577 577 path_prefix,
578 578 target_id,
579 579 details,
580 580 ):
581 581 super().__init__()
582 582 self.revlog_type = revlog_type
583 583 self.target_id = target_id
584 584 self._path_prefix = path_prefix
585 585 assert b'.i' in details, (path_prefix, details)
586 586 self._details = details
587 587 self._files = None
588 588
589 589 @property
590 590 def is_changelog(self):
591 591 return self.revlog_type & FILEFLAGS_CHANGELOG
592 592
593 593 @property
594 594 def is_manifestlog(self):
595 595 return self.revlog_type & FILEFLAGS_MANIFESTLOG
596 596
597 597 @property
598 598 def is_filelog(self):
599 599 return self.revlog_type & FILEFLAGS_FILELOG
600 600
601 601 def main_file_path(self):
602 602 """unencoded path of the main revlog file"""
603 603 return self._path_prefix + b'.i'
604 604
605 605 def files(self) -> List[StoreFile]:
606 606 if self._files is None:
607 607 self._files = []
608 608 for ext in sorted(self._details, key=_ext_key):
609 609 path = self._path_prefix + ext
610 610 data = self._details[ext]
611 611 self._files.append(StoreFile(unencoded_path=path, **data))
612 612 return self._files
613 613
614 614 def get_streams(
615 615 self,
616 616 repo=None,
617 617 vfs=None,
618 618 copies=None,
619 619 max_changeset=None,
620 620 ):
621 621 if repo is None or max_changeset is None:
622 622 return super().get_streams(
623 623 repo=repo,
624 624 vfs=vfs,
625 625 copies=copies,
626 626 max_changeset=max_changeset,
627 627 )
628 628 if any(k.endswith(b'.idx') for k in self._details.keys()):
629 629 # This use revlog-v2, ignore for now
630 630 return super().get_streams(
631 631 repo=repo,
632 632 vfs=vfs,
633 633 copies=copies,
634 634 max_changeset=max_changeset,
635 635 )
636 636 name_to_ext = {}
637 637 for ext in self._details.keys():
638 638 name_to_ext[self._path_prefix + ext] = ext
639 639 name_to_size = {}
640 640 for f in self.files():
641 641 name_to_size[f.unencoded_path] = f.file_size(None)
642 642 stream = [
643 643 f.get_stream(vfs, copies)
644 644 for f in self.files()
645 645 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
646 646 ]
647 647
648 is_inline = b'.d' not in self._details
649
648 650 rl = self.get_revlog_instance(repo).get_revlog()
649 rl_stream = rl.get_streams(max_changeset)
651 rl_stream = rl.get_streams(max_changeset, force_inline=is_inline)
652
650 653 for name, s, size in rl_stream:
651 654 if name_to_size.get(name, 0) != size:
652 655 msg = _(b"expected %d bytes but %d provided for %s")
653 656 msg %= name_to_size.get(name, 0), size, name
654 657 raise error.Abort(msg)
655 658 stream.extend(rl_stream)
656 659 files = self.files()
657 660 assert len(stream) == len(files), (
658 661 stream,
659 662 files,
660 663 self._path_prefix,
661 664 self.target_id,
662 665 )
663 666 return stream
664 667
665 668 def get_revlog_instance(self, repo):
666 669 """Obtain a revlog instance from this store entry
667 670
668 671 An instance of the appropriate class is returned.
669 672 """
670 673 if self.is_changelog:
671 674 return changelog.changelog(repo.svfs)
672 675 elif self.is_manifestlog:
673 676 mandir = self.target_id
674 677 return manifest.manifestrevlog(
675 678 repo.nodeconstants, repo.svfs, tree=mandir
676 679 )
677 680 else:
678 681 return filelog.filelog(repo.svfs, self.target_id)
679 682
680 683
681 684 def _gather_revlog(files_data):
682 685 """group files per revlog prefix
683 686
684 687 The returns a two level nested dict. The top level key is the revlog prefix
685 688 without extension, the second level is all the file "suffix" that were
686 689 seen for this revlog and arbitrary file data as value.
687 690 """
688 691 revlogs = collections.defaultdict(dict)
689 692 for u, value in files_data:
690 693 name, ext = _split_revlog_ext(u)
691 694 revlogs[name][ext] = value
692 695 return sorted(revlogs.items())
693 696
694 697
695 698 def _split_revlog_ext(filename):
696 699 """split the revlog file prefix from the variable extension"""
697 700 if filename.endswith(REVLOG_FILES_LONG_EXT):
698 701 char = b'-'
699 702 else:
700 703 char = b'.'
701 704 idx = filename.rfind(char)
702 705 return filename[:idx], filename[idx:]
703 706
704 707
705 708 def _ext_key(ext):
706 709 """a key to order revlog suffix
707 710
708 711 important to issue .i after other entry."""
709 712 # the only important part of this order is to keep the `.i` last.
710 713 if ext.endswith(b'.n'):
711 714 return (0, ext)
712 715 elif ext.endswith(b'.nd'):
713 716 return (10, ext)
714 717 elif ext.endswith(b'.d'):
715 718 return (20, ext)
716 719 elif ext.endswith(b'.i'):
717 720 return (50, ext)
718 721 else:
719 722 return (40, ext)
720 723
721 724
722 725 class basicstore:
723 726 '''base class for local repository stores'''
724 727
725 728 def __init__(self, path, vfstype):
726 729 vfs = vfstype(path)
727 730 self.path = vfs.base
728 731 self.createmode = _calcmode(vfs)
729 732 vfs.createmode = self.createmode
730 733 self.rawvfs = vfs
731 734 self.vfs = vfsmod.filtervfs(vfs, encodedir)
732 735 self.opener = self.vfs
733 736
734 737 def join(self, f):
735 738 return self.path + b'/' + encodedir(f)
736 739
737 740 def _walk(self, relpath, recurse, undecodable=None):
738 741 '''yields (revlog_type, unencoded, size)'''
739 742 path = self.path
740 743 if relpath:
741 744 path += b'/' + relpath
742 745 striplen = len(self.path) + 1
743 746 l = []
744 747 if self.rawvfs.isdir(path):
745 748 visit = [path]
746 749 readdir = self.rawvfs.readdir
747 750 while visit:
748 751 p = visit.pop()
749 752 for f, kind, st in readdir(p, stat=True):
750 753 fp = p + b'/' + f
751 754 rl_type = is_revlog(f, kind, st)
752 755 if rl_type is not None:
753 756 n = util.pconvert(fp[striplen:])
754 757 l.append((decodedir(n), (rl_type, st.st_size)))
755 758 elif kind == stat.S_IFDIR and recurse:
756 759 visit.append(fp)
757 760
758 761 l.sort()
759 762 return l
760 763
761 764 def changelog(self, trypending, concurrencychecker=None):
762 765 return changelog.changelog(
763 766 self.vfs,
764 767 trypending=trypending,
765 768 concurrencychecker=concurrencychecker,
766 769 )
767 770
768 771 def manifestlog(self, repo, storenarrowmatch):
769 772 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
770 773 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
771 774
772 775 def data_entries(
773 776 self, matcher=None, undecodable=None
774 777 ) -> Generator[BaseStoreEntry, None, None]:
775 778 """Like walk, but excluding the changelog and root manifest.
776 779
777 780 When [undecodable] is None, revlogs names that can't be
778 781 decoded cause an exception. When it is provided, it should
779 782 be a list and the filenames that can't be decoded are added
780 783 to it instead. This is very rarely needed."""
781 784 dirs = [
782 785 (b'data', FILEFLAGS_FILELOG, False),
783 786 (b'meta', FILEFLAGS_MANIFESTLOG, True),
784 787 ]
785 788 for base_dir, rl_type, strip_filename in dirs:
786 789 files = self._walk(base_dir, True, undecodable=undecodable)
787 790 files = (f for f in files if f[1][0] is not None)
788 791 for revlog, details in _gather_revlog(files):
789 792 file_details = {}
790 793 revlog_target_id = revlog.split(b'/', 1)[1]
791 794 if strip_filename and b'/' in revlog:
792 795 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
793 796 revlog_target_id += b'/'
794 797 for ext, (t, s) in sorted(details.items()):
795 798 file_details[ext] = {
796 799 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
797 800 'file_size': s,
798 801 }
799 802 yield RevlogStoreEntry(
800 803 path_prefix=revlog,
801 804 revlog_type=rl_type,
802 805 target_id=revlog_target_id,
803 806 details=file_details,
804 807 )
805 808
806 809 def top_entries(
807 810 self, phase=False, obsolescence=False
808 811 ) -> Generator[BaseStoreEntry, None, None]:
809 812 if phase and self.vfs.exists(b'phaseroots'):
810 813 yield SimpleStoreEntry(
811 814 entry_path=b'phaseroots',
812 815 is_volatile=True,
813 816 )
814 817
815 818 if obsolescence and self.vfs.exists(b'obsstore'):
816 819 # XXX if we had the file size it could be non-volatile
817 820 yield SimpleStoreEntry(
818 821 entry_path=b'obsstore',
819 822 is_volatile=True,
820 823 )
821 824
822 825 files = reversed(self._walk(b'', False))
823 826
824 827 changelogs = collections.defaultdict(dict)
825 828 manifestlogs = collections.defaultdict(dict)
826 829
827 830 for u, (t, s) in files:
828 831 if u.startswith(b'00changelog'):
829 832 name, ext = _split_revlog_ext(u)
830 833 changelogs[name][ext] = (t, s)
831 834 elif u.startswith(b'00manifest'):
832 835 name, ext = _split_revlog_ext(u)
833 836 manifestlogs[name][ext] = (t, s)
834 837 else:
835 838 yield SimpleStoreEntry(
836 839 entry_path=u,
837 840 is_volatile=bool(t & FILEFLAGS_VOLATILE),
838 841 file_size=s,
839 842 )
840 843 # yield manifest before changelog
841 844 top_rl = [
842 845 (manifestlogs, FILEFLAGS_MANIFESTLOG),
843 846 (changelogs, FILEFLAGS_CHANGELOG),
844 847 ]
845 848 assert len(manifestlogs) <= 1
846 849 assert len(changelogs) <= 1
847 850 for data, revlog_type in top_rl:
848 851 for revlog, details in sorted(data.items()):
849 852 file_details = {}
850 853 for ext, (t, s) in details.items():
851 854 file_details[ext] = {
852 855 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
853 856 'file_size': s,
854 857 }
855 858 yield RevlogStoreEntry(
856 859 path_prefix=revlog,
857 860 revlog_type=revlog_type,
858 861 target_id=b'',
859 862 details=file_details,
860 863 )
861 864
862 865 def walk(
863 866 self, matcher=None, phase=False, obsolescence=False
864 867 ) -> Generator[BaseStoreEntry, None, None]:
865 868 """return files related to data storage (ie: revlogs)
866 869
867 870 yields instance from BaseStoreEntry subclasses
868 871
869 872 if a matcher is passed, storage files of only those tracked paths
870 873 are passed with matches the matcher
871 874 """
872 875 # yield data files first
873 876 for x in self.data_entries(matcher):
874 877 yield x
875 878 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
876 879 yield x
877 880
878 881 def copylist(self):
879 882 return _data
880 883
881 884 def write(self, tr):
882 885 pass
883 886
884 887 def invalidatecaches(self):
885 888 pass
886 889
887 890 def markremoved(self, fn):
888 891 pass
889 892
890 893 def __contains__(self, path):
891 894 '''Checks if the store contains path'''
892 895 path = b"/".join((b"data", path))
893 896 # file?
894 897 if self.vfs.exists(path + b".i"):
895 898 return True
896 899 # dir?
897 900 if not path.endswith(b"/"):
898 901 path = path + b"/"
899 902 return self.vfs.exists(path)
900 903
901 904
902 905 class encodedstore(basicstore):
903 906 def __init__(self, path, vfstype):
904 907 vfs = vfstype(path + b'/store')
905 908 self.path = vfs.base
906 909 self.createmode = _calcmode(vfs)
907 910 vfs.createmode = self.createmode
908 911 self.rawvfs = vfs
909 912 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
910 913 self.opener = self.vfs
911 914
912 915 def _walk(self, relpath, recurse, undecodable=None):
913 916 old = super()._walk(relpath, recurse)
914 917 new = []
915 918 for f1, value in old:
916 919 try:
917 920 f2 = decodefilename(f1)
918 921 except KeyError:
919 922 if undecodable is None:
920 923 msg = _(b'undecodable revlog name %s') % f1
921 924 raise error.StorageError(msg)
922 925 else:
923 926 undecodable.append(f1)
924 927 continue
925 928 new.append((f2, value))
926 929 return new
927 930
928 931 def data_entries(
929 932 self, matcher=None, undecodable=None
930 933 ) -> Generator[BaseStoreEntry, None, None]:
931 934 entries = super(encodedstore, self).data_entries(
932 935 undecodable=undecodable
933 936 )
934 937 for entry in entries:
935 938 if _match_tracked_entry(entry, matcher):
936 939 yield entry
937 940
938 941 def join(self, f):
939 942 return self.path + b'/' + encodefilename(f)
940 943
941 944 def copylist(self):
942 945 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
943 946
944 947
945 948 class fncache:
946 949 # the filename used to be partially encoded
947 950 # hence the encodedir/decodedir dance
948 951 def __init__(self, vfs):
949 952 self.vfs = vfs
950 953 self._ignores = set()
951 954 self.entries = None
952 955 self._dirty = False
953 956 # set of new additions to fncache
954 957 self.addls = set()
955 958
956 959 def ensureloaded(self, warn=None):
957 960 """read the fncache file if not already read.
958 961
959 962 If the file on disk is corrupted, raise. If warn is provided,
960 963 warn and keep going instead."""
961 964 if self.entries is None:
962 965 self._load(warn)
963 966
964 967 def _load(self, warn=None):
965 968 '''fill the entries from the fncache file'''
966 969 self._dirty = False
967 970 try:
968 971 fp = self.vfs(b'fncache', mode=b'rb')
969 972 except IOError:
970 973 # skip nonexistent file
971 974 self.entries = set()
972 975 return
973 976
974 977 self.entries = set()
975 978 chunk = b''
976 979 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
977 980 chunk += c
978 981 try:
979 982 p = chunk.rindex(b'\n')
980 983 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
981 984 chunk = chunk[p + 1 :]
982 985 except ValueError:
983 986 # substring '\n' not found, maybe the entry is bigger than the
984 987 # chunksize, so let's keep iterating
985 988 pass
986 989
987 990 if chunk:
988 991 msg = _(b"fncache does not ends with a newline")
989 992 if warn:
990 993 warn(msg + b'\n')
991 994 else:
992 995 raise error.Abort(
993 996 msg,
994 997 hint=_(
995 998 b"use 'hg debugrebuildfncache' to "
996 999 b"rebuild the fncache"
997 1000 ),
998 1001 )
999 1002 self._checkentries(fp, warn)
1000 1003 fp.close()
1001 1004
1002 1005 def _checkentries(self, fp, warn):
1003 1006 """make sure there is no empty string in entries"""
1004 1007 if b'' in self.entries:
1005 1008 fp.seek(0)
1006 1009 for n, line in enumerate(fp):
1007 1010 if not line.rstrip(b'\n'):
1008 1011 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1009 1012 if warn:
1010 1013 warn(t + b'\n')
1011 1014 else:
1012 1015 raise error.Abort(t)
1013 1016
1014 1017 def write(self, tr):
1015 1018 if self._dirty:
1016 1019 assert self.entries is not None
1017 1020 self.entries = self.entries | self.addls
1018 1021 self.addls = set()
1019 1022 tr.addbackup(b'fncache')
1020 1023 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1021 1024 if self.entries:
1022 1025 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1023 1026 fp.close()
1024 1027 self._dirty = False
1025 1028 if self.addls:
1026 1029 # if we have just new entries, let's append them to the fncache
1027 1030 tr.addbackup(b'fncache')
1028 1031 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1029 1032 if self.addls:
1030 1033 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1031 1034 fp.close()
1032 1035 self.entries = None
1033 1036 self.addls = set()
1034 1037
1035 1038 def addignore(self, fn):
1036 1039 self._ignores.add(fn)
1037 1040
1038 1041 def add(self, fn):
1039 1042 if fn in self._ignores:
1040 1043 return
1041 1044 if self.entries is None:
1042 1045 self._load()
1043 1046 if fn not in self.entries:
1044 1047 self.addls.add(fn)
1045 1048
1046 1049 def remove(self, fn):
1047 1050 if self.entries is None:
1048 1051 self._load()
1049 1052 if fn in self.addls:
1050 1053 self.addls.remove(fn)
1051 1054 return
1052 1055 try:
1053 1056 self.entries.remove(fn)
1054 1057 self._dirty = True
1055 1058 except KeyError:
1056 1059 pass
1057 1060
1058 1061 def __contains__(self, fn):
1059 1062 if fn in self.addls:
1060 1063 return True
1061 1064 if self.entries is None:
1062 1065 self._load()
1063 1066 return fn in self.entries
1064 1067
1065 1068 def __iter__(self):
1066 1069 if self.entries is None:
1067 1070 self._load()
1068 1071 return iter(self.entries | self.addls)
1069 1072
1070 1073
1071 1074 class _fncachevfs(vfsmod.proxyvfs):
1072 1075 def __init__(self, vfs, fnc, encode):
1073 1076 vfsmod.proxyvfs.__init__(self, vfs)
1074 1077 self.fncache = fnc
1075 1078 self.encode = encode
1076 1079
1077 1080 def __call__(self, path, mode=b'r', *args, **kw):
1078 1081 encoded = self.encode(path)
1079 1082 if (
1080 1083 mode not in (b'r', b'rb')
1081 1084 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1082 1085 and revlog_type(path) is not None
1083 1086 ):
1084 1087 # do not trigger a fncache load when adding a file that already is
1085 1088 # known to exist.
1086 1089 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1087 1090 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1088 1091 # when appending to an existing file, if the file has size zero,
1089 1092 # it should be considered as missing. Such zero-size files are
1090 1093 # the result of truncation when a transaction is aborted.
1091 1094 notload = False
1092 1095 if not notload:
1093 1096 self.fncache.add(path)
1094 1097 return self.vfs(encoded, mode, *args, **kw)
1095 1098
1096 1099 def join(self, path):
1097 1100 if path:
1098 1101 return self.vfs.join(self.encode(path))
1099 1102 else:
1100 1103 return self.vfs.join(path)
1101 1104
1102 1105 def register_file(self, path):
1103 1106 """generic hook point to lets fncache steer its stew"""
1104 1107 if path.startswith(b'data/') or path.startswith(b'meta/'):
1105 1108 self.fncache.add(path)
1106 1109
1107 1110
1108 1111 class fncachestore(basicstore):
1109 1112 def __init__(self, path, vfstype, dotencode):
1110 1113 if dotencode:
1111 1114 encode = _pathencode
1112 1115 else:
1113 1116 encode = _plainhybridencode
1114 1117 self.encode = encode
1115 1118 vfs = vfstype(path + b'/store')
1116 1119 self.path = vfs.base
1117 1120 self.pathsep = self.path + b'/'
1118 1121 self.createmode = _calcmode(vfs)
1119 1122 vfs.createmode = self.createmode
1120 1123 self.rawvfs = vfs
1121 1124 fnc = fncache(vfs)
1122 1125 self.fncache = fnc
1123 1126 self.vfs = _fncachevfs(vfs, fnc, encode)
1124 1127 self.opener = self.vfs
1125 1128
1126 1129 def join(self, f):
1127 1130 return self.pathsep + self.encode(f)
1128 1131
1129 1132 def getsize(self, path):
1130 1133 return self.rawvfs.stat(path).st_size
1131 1134
1132 1135 def data_entries(
1133 1136 self, matcher=None, undecodable=None
1134 1137 ) -> Generator[BaseStoreEntry, None, None]:
1135 1138 files = ((f, revlog_type(f)) for f in self.fncache)
1136 1139 # Note: all files in fncache should be revlog related, However the
1137 1140 # fncache might contains such file added by previous version of
1138 1141 # Mercurial.
1139 1142 files = (f for f in files if f[1] is not None)
1140 1143 by_revlog = _gather_revlog(files)
1141 1144 for revlog, details in by_revlog:
1142 1145 file_details = {}
1143 1146 if revlog.startswith(b'data/'):
1144 1147 rl_type = FILEFLAGS_FILELOG
1145 1148 revlog_target_id = revlog.split(b'/', 1)[1]
1146 1149 elif revlog.startswith(b'meta/'):
1147 1150 rl_type = FILEFLAGS_MANIFESTLOG
1148 1151 # drop the initial directory and the `00manifest` file part
1149 1152 tmp = revlog.split(b'/', 1)[1]
1150 1153 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1151 1154 else:
1152 1155 # unreachable
1153 1156 assert False, revlog
1154 1157 for ext, t in details.items():
1155 1158 file_details[ext] = {
1156 1159 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1157 1160 }
1158 1161 entry = RevlogStoreEntry(
1159 1162 path_prefix=revlog,
1160 1163 revlog_type=rl_type,
1161 1164 target_id=revlog_target_id,
1162 1165 details=file_details,
1163 1166 )
1164 1167 if _match_tracked_entry(entry, matcher):
1165 1168 yield entry
1166 1169
1167 1170 def copylist(self):
1168 1171 d = (
1169 1172 b'bookmarks',
1170 1173 b'narrowspec',
1171 1174 b'data',
1172 1175 b'meta',
1173 1176 b'dh',
1174 1177 b'fncache',
1175 1178 b'phaseroots',
1176 1179 b'obsstore',
1177 1180 b'00manifest.d',
1178 1181 b'00manifest.i',
1179 1182 b'00changelog.d',
1180 1183 b'00changelog.i',
1181 1184 b'requires',
1182 1185 )
1183 1186 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1184 1187
1185 1188 def write(self, tr):
1186 1189 self.fncache.write(tr)
1187 1190
1188 1191 def invalidatecaches(self):
1189 1192 self.fncache.entries = None
1190 1193 self.fncache.addls = set()
1191 1194
1192 1195 def markremoved(self, fn):
1193 1196 self.fncache.remove(fn)
1194 1197
1195 1198 def _exists(self, f):
1196 1199 ef = self.encode(f)
1197 1200 try:
1198 1201 self.getsize(ef)
1199 1202 return True
1200 1203 except FileNotFoundError:
1201 1204 return False
1202 1205
1203 1206 def __contains__(self, path):
1204 1207 '''Checks if the store contains path'''
1205 1208 path = b"/".join((b"data", path))
1206 1209 # check for files (exact match)
1207 1210 e = path + b'.i'
1208 1211 if e in self.fncache and self._exists(e):
1209 1212 return True
1210 1213 # now check for directories (prefix match)
1211 1214 if not path.endswith(b'/'):
1212 1215 path += b'/'
1213 1216 for e in self.fncache:
1214 1217 if e.startswith(path) and self._exists(e):
1215 1218 return True
1216 1219 return False
@@ -1,150 +1,175 b''
1 1 Test stream cloning while a revlog split happens
2 2 ------------------------------------------------
3 3
4 4 #testcases stream-bundle2-v2 stream-bundle2-v3
5 5
6 6 #if stream-bundle2-v3
7 7 $ cat << EOF >> $HGRCPATH
8 8 > [experimental]
9 9 > stream-v3 = yes
10 10 > EOF
11 11 #endif
12 12
13 13 setup a repository for tests
14 14 ----------------------------
15 15
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [format]
18 18 > # skip compression to make it easy to trigger a split
19 19 > revlog-compression=none
20 > [phases]
21 > publish=no
20 22 > EOF
21 23
22 24 $ hg init server
23 25 $ cd server
24 26 $ file="some-file"
25 27 $ printf '%20d' '1' > $file
26 28 $ hg commit -Aqma
27 29 $ printf '%1024d' '1' > $file
28 30 $ hg commit -Aqmb
29 31 $ printf '%20d' '1' > $file
30 32 $ hg commit -Aqmc
31 33
32 34 check the revlog is inline
33 35
34 36 $ f -s .hg/store/data/some-file*
35 37 .hg/store/data/some-file.i: size=1259
36 38 $ hg debug-revlog-index some-file
37 39 rev linkrev nodeid p1-nodeid p2-nodeid
38 40 0 0 ed70cecbc103 000000000000 000000000000
39 41 1 1 7241018db64c ed70cecbc103 000000000000
40 42 2 2 fa1120531cc1 7241018db64c 000000000000
41 43 $ cd ..
42 44
43 45 setup synchronisation file
44 46
45 47 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
46 48 $ export HG_TEST_STREAM_WALKED_FILE_1
47 49 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
48 50 $ export HG_TEST_STREAM_WALKED_FILE_2
49 51 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
50 52 $ export HG_TEST_STREAM_WALKED_FILE_3
51 53
52 54
53 55 Test stream-clone raced by a revlog-split
54 56 =========================================
55 57
56 58 Test stream-clone where the file is split right after the lock section is done
57 59
58 60 Start the server
59 61
60 62 $ hg serve -R server \
61 63 > -p $HGPORT1 -d --error errors.log --pid-file=hg.pid \
62 64 > --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
63 65 $ cat hg.pid >> $DAEMON_PIDS
64 66
65 67 Start a client doing a streaming clone
66 68
67 $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone-while-split > client.log 2>&1; touch "$HG_TEST_STREAM_WALKED_FILE_3") &
69 $ ( \
70 > hg clone --debug --stream -U http://localhost:$HGPORT1 \
71 > clone-while-split > client.log 2>&1; \
72 > touch "$HG_TEST_STREAM_WALKED_FILE_3" \
73 > ) &
68 74
69 75 Wait for the server to be done collecting data
70 76
71 77 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
72 78
73 79 trigger a split
74 80
75 81 $ dd if=/dev/zero of=server/$file bs=1k count=128 > /dev/null 2>&1
76 82 $ hg -R server ci -m "triggering a split" --config ui.timeout.warn=-1
77 83
78 84 unlock the stream generation
79 85
80 86 $ touch $HG_TEST_STREAM_WALKED_FILE_2
81 87
82 88 wait for the client to be done cloning.
83 89
84 90 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
85 91
86 92 Check everything is fine
87 93
88 94 $ cat client.log
89 remote: abort: unexpected error: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
90 abort: pull failed on remote (known-bad-output !)
95 using http://localhost:$HGPORT1/
96 sending capabilities command
97 query 1; heads
98 sending batch command
99 streaming all changes
100 sending getbundle command
101 bundle2-input-bundle: with-transaction
102 bundle2-input-part: "stream2" (params: 3 mandatory) supported (stream-bundle2-v2 !)
103 bundle2-input-part: "stream3-exp" (params: 3 mandatory) supported (stream-bundle2-v3 !)
104 applying stream bundle
105 7 files to transfer, 2.11 KB of data
106 adding [s] data/some-file.i (1.23 KB)
107 adding [s] phaseroots (43 bytes)
108 adding [s] 00manifest.i (348 bytes)
109 adding [s] 00changelog.i (381 bytes)
110 adding [c] branch2-served (94 bytes)
111 adding [c] rbc-names-v1 (7 bytes)
112 adding [c] rbc-revs-v1 (24 bytes)
113 updating the branch cache
114 transferred 2.11 KB in * seconds (* */sec) (glob)
115 bundle2-input-part: total payload size 2268
116 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
117 bundle2-input-bundle: 2 parts total
118 checking for updated bookmarks
119 updating the branch cache
120 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
91 121 $ tail -2 errors.log
92 mercurial.error.Abort: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
93 (known-bad-output !)
94 122 $ hg -R clone-while-split verify
95 checking changesets (missing-correct-output !)
96 checking manifests (missing-correct-output !)
97 crosschecking files in changesets and manifests (missing-correct-output !)
98 checking files (missing-correct-output !)
99 checking dirstate (missing-correct-output !)
100 checked 3 changesets with 3 changes to 1 files (missing-correct-output !)
101 abort: repository clone-while-split not found (known-bad-output !)
102 [255]
123 checking changesets
124 checking manifests
125 crosschecking files in changesets and manifests
126 checking files
127 checking dirstate
128 checked 3 changesets with 3 changes to 1 files
103 129 $ hg -R clone-while-split tip
104 changeset: 2:dbd9854c38a6 (missing-correct-output !)
105 tag: tip (missing-correct-output !)
106 user: test (missing-correct-output !)
107 date: Thu Jan 01 00:00:00 1970 +0000 (missing-correct-output !)
108 summary: c (missing-correct-output !)
109 (missing-correct-output !)
110 abort: repository clone-while-split not found (known-bad-output !)
111 [255]
130 changeset: 2:dbd9854c38a6
131 tag: tip
132 user: test
133 date: Thu Jan 01 00:00:00 1970 +0000
134 summary: c
135
112 136 $ hg -R clone-while-split debug-revlog-index some-file
113 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
114 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
115 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
116 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
117 abort: repository clone-while-split not found (known-bad-output !)
118 [255]
137 rev linkrev nodeid p1-nodeid p2-nodeid
138 0 0 ed70cecbc103 000000000000 000000000000
139 1 1 7241018db64c ed70cecbc103 000000000000
140 2 2 fa1120531cc1 7241018db64c 000000000000
141 $ hg -R server phase --rev 'all()'
142 0: draft
143 1: draft
144 2: draft
145 3: draft
146 $ hg -R clone-while-split phase --rev 'all()'
147 0: draft
148 1: draft
149 2: draft
119 150
120 151 subsequent pull work
121 152
122 153 $ hg -R clone-while-split pull
123 pulling from http://localhost:$HGPORT1/ (missing-correct-output !)
124 searching for changes (missing-correct-output !)
125 adding changesets (missing-correct-output !)
126 adding manifests (missing-correct-output !)
127 adding file changes (missing-correct-output !)
128 added 1 changesets with 1 changes to 1 files (missing-correct-output !)
129 new changesets df05c6cb1406 (missing-correct-output !)
130 (run 'hg update' to get a working copy) (missing-correct-output !)
131 abort: repository clone-while-split not found (known-bad-output !)
132 [255]
154 pulling from http://localhost:$HGPORT1/
155 searching for changes
156 adding changesets
157 adding manifests
158 adding file changes
159 added 1 changesets with 1 changes to 1 files
160 new changesets df05c6cb1406 (1 drafts)
161 (run 'hg update' to get a working copy)
133 162
134 163 $ hg -R clone-while-split debug-revlog-index some-file
135 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
136 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
137 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
138 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
139 3 3 a631378adaa3 fa1120531cc1 000000000000 (missing-correct-output !)
140 abort: repository clone-while-split not found (known-bad-output !)
141 [255]
164 rev linkrev nodeid p1-nodeid p2-nodeid
165 0 0 ed70cecbc103 000000000000 000000000000
166 1 1 7241018db64c ed70cecbc103 000000000000
167 2 2 fa1120531cc1 7241018db64c 000000000000
168 3 3 a631378adaa3 fa1120531cc1 000000000000
142 169 $ hg -R clone-while-split verify
143 checking changesets (missing-correct-output !)
144 checking manifests (missing-correct-output !)
145 crosschecking files in changesets and manifests (missing-correct-output !)
146 checking files (missing-correct-output !)
147 checking dirstate (missing-correct-output !)
148 checked 4 changesets with 4 changes to 1 files (missing-correct-output !)
149 abort: repository clone-while-split not found (known-bad-output !)
150 [255]
170 checking changesets
171 checking manifests
172 crosschecking files in changesets and manifests
173 checking files
174 checking dirstate
175 checked 4 changesets with 4 changes to 1 files
General Comments 0
You need to be logged in to leave comments. Login now