##// END OF EJS Templates
stream-clone: avoid opening a revlog in case we do not need it...
Arseniy Alekseyev -
r51565:3b563954 default
parent child Browse files
Show More
@@ -1,3501 +1,3511 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 @staticmethod
294 def is_inline_index(header_bytes):
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
299
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
302
293 303 def __init__(
294 304 self,
295 305 opener,
296 306 target,
297 307 radix,
298 308 postfix=None, # only exist for `tmpcensored` now
299 309 checkambig=False,
300 310 mmaplargeindex=False,
301 311 censorable=False,
302 312 upperboundcomp=None,
303 313 persistentnodemap=False,
304 314 concurrencychecker=None,
305 315 trypending=False,
306 316 try_split=False,
307 317 canonical_parent_order=True,
308 318 ):
309 319 """
310 320 create a revlog object
311 321
312 322 opener is a function that abstracts the file opening operation
313 323 and can be used to implement COW semantics or the like.
314 324
315 325 `target`: a (KIND, ID) tuple that identify the content stored in
316 326 this revlog. It help the rest of the code to understand what the revlog
317 327 is about without having to resort to heuristic and index filename
318 328 analysis. Note: that this must be reliably be set by normal code, but
319 329 that test, debug, or performance measurement code might not set this to
320 330 accurate value.
321 331 """
322 332 self.upperboundcomp = upperboundcomp
323 333
324 334 self.radix = radix
325 335
326 336 self._docket_file = None
327 337 self._indexfile = None
328 338 self._datafile = None
329 339 self._sidedatafile = None
330 340 self._nodemap_file = None
331 341 self.postfix = postfix
332 342 self._trypending = trypending
333 343 self._try_split = try_split
334 344 self.opener = opener
335 345 if persistentnodemap:
336 346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337 347
338 348 assert target[0] in ALL_KINDS
339 349 assert len(target) == 2
340 350 self.target = target
341 351 # When True, indexfile is opened with checkambig=True at writing, to
342 352 # avoid file stat ambiguity.
343 353 self._checkambig = checkambig
344 354 self._mmaplargeindex = mmaplargeindex
345 355 self._censorable = censorable
346 356 # 3-tuple of (node, rev, text) for a raw revision.
347 357 self._revisioncache = None
348 358 # Maps rev to chain base rev.
349 359 self._chainbasecache = util.lrucachedict(100)
350 360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 361 self._chunkcache = (0, b'')
352 362 # How much data to read and cache into the raw revlog data cache.
353 363 self._chunkcachesize = 65536
354 364 self._maxchainlen = None
355 365 self._deltabothparents = True
356 366 self._candidate_group_chunk_size = 0
357 367 self._debug_delta = False
358 368 self.index = None
359 369 self._docket = None
360 370 self._nodemap_docket = None
361 371 # Mapping of partial identifiers to full nodes.
362 372 self._pcache = {}
363 373 # Mapping of revision integer to full node.
364 374 self._compengine = b'zlib'
365 375 self._compengineopts = {}
366 376 self._maxdeltachainspan = -1
367 377 self._withsparseread = False
368 378 self._sparserevlog = False
369 379 self.hassidedata = False
370 380 self._srdensitythreshold = 0.50
371 381 self._srmingapsize = 262144
372 382
373 383 # other optionnals features
374 384
375 385 # might remove rank configuration once the computation has no impact
376 386 self._compute_rank = False
377 387
378 388 # Make copy of flag processors so each revlog instance can support
379 389 # custom flags.
380 390 self._flagprocessors = dict(flagutil.flagprocessors)
381 391
382 392 # 3-tuple of file handles being used for active writing.
383 393 self._writinghandles = None
384 394 # prevent nesting of addgroup
385 395 self._adding_group = None
386 396
387 397 self._loadindex()
388 398
389 399 self._concurrencychecker = concurrencychecker
390 400
391 401 # parent order is supposed to be semantically irrelevant, so we
392 402 # normally resort parents to ensure that the first parent is non-null,
393 403 # if there is a non-null parent at all.
394 404 # filelog abuses the parent order as flag to mark some instances of
395 405 # meta-encoded files, so allow it to disable this behavior.
396 406 self.canonical_parent_order = canonical_parent_order
397 407
398 408 def _init_opts(self):
399 409 """process options (from above/config) to setup associated default revlog mode
400 410
401 411 These values might be affected when actually reading on disk information.
402 412
403 413 The relevant values are returned for use in _loadindex().
404 414
405 415 * newversionflags:
406 416 version header to use if we need to create a new revlog
407 417
408 418 * mmapindexthreshold:
409 419 minimal index size for start to use mmap
410 420
411 421 * force_nodemap:
412 422 force the usage of a "development" version of the nodemap code
413 423 """
414 424 mmapindexthreshold = None
415 425 opts = self.opener.options
416 426
417 427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 428 new_header = CHANGELOGV2
419 429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 430 elif b'revlogv2' in opts:
421 431 new_header = REVLOGV2
422 432 elif b'revlogv1' in opts:
423 433 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 434 if b'generaldelta' in opts:
425 435 new_header |= FLAG_GENERALDELTA
426 436 elif b'revlogv0' in self.opener.options:
427 437 new_header = REVLOGV0
428 438 else:
429 439 new_header = REVLOG_DEFAULT_VERSION
430 440
431 441 if b'chunkcachesize' in opts:
432 442 self._chunkcachesize = opts[b'chunkcachesize']
433 443 if b'maxchainlen' in opts:
434 444 self._maxchainlen = opts[b'maxchainlen']
435 445 if b'deltabothparents' in opts:
436 446 self._deltabothparents = opts[b'deltabothparents']
437 447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 448 if dps_cgds:
439 449 self._candidate_group_chunk_size = dps_cgds
440 450 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 451 self._lazydeltabase = False
442 452 if self._lazydelta:
443 453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 454 if b'debug-delta' in opts:
445 455 self._debug_delta = opts[b'debug-delta']
446 456 if b'compengine' in opts:
447 457 self._compengine = opts[b'compengine']
448 458 if b'zlib.level' in opts:
449 459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 460 if b'zstd.level' in opts:
451 461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 462 if b'maxdeltachainspan' in opts:
453 463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 465 mmapindexthreshold = opts[b'mmapindexthreshold']
456 466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 467 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 468 # sparse-revlog forces sparse-read
459 469 self._withsparseread = self._sparserevlog or withsparseread
460 470 if b'sparse-read-density-threshold' in opts:
461 471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 472 if b'sparse-read-min-gap-size' in opts:
463 473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 474 if opts.get(b'enableellipsis'):
465 475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466 476
467 477 # revlog v0 doesn't have flag processors
468 478 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470 480
471 481 if self._chunkcachesize <= 0:
472 482 raise error.RevlogError(
473 483 _(b'revlog chunk cache size %r is not greater than 0')
474 484 % self._chunkcachesize
475 485 )
476 486 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 487 raise error.RevlogError(
478 488 _(b'revlog chunk cache size %r is not a power of 2')
479 489 % self._chunkcachesize
480 490 )
481 491 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 492 return new_header, mmapindexthreshold, force_nodemap
483 493
484 494 def _get_data(self, filepath, mmap_threshold, size=None):
485 495 """return a file content with or without mmap
486 496
487 497 If the file is missing return the empty string"""
488 498 try:
489 499 with self.opener(filepath) as fp:
490 500 if mmap_threshold is not None:
491 501 file_size = self.opener.fstat(fp).st_size
492 502 if file_size >= mmap_threshold:
493 503 if size is not None:
494 504 # avoid potentiel mmap crash
495 505 size = min(file_size, size)
496 506 # TODO: should .close() to release resources without
497 507 # relying on Python GC
498 508 if size is None:
499 509 return util.buffer(util.mmapread(fp))
500 510 else:
501 511 return util.buffer(util.mmapread(fp, size))
502 512 if size is None:
503 513 return fp.read()
504 514 else:
505 515 return fp.read(size)
506 516 except FileNotFoundError:
507 517 return b''
508 518
509 519 def get_streams(self, max_linkrev, force_inline=False):
510 520 n = len(self)
511 521 index = self.index
512 522 while n > 0:
513 523 linkrev = index[n - 1][4]
514 524 if linkrev < max_linkrev:
515 525 break
516 526 # note: this loop will rarely go through multiple iterations, since
517 527 # it only traverses commits created during the current streaming
518 528 # pull operation.
519 529 #
520 530 # If this become a problem, using a binary search should cap the
521 531 # runtime of this.
522 532 n = n - 1
523 533 if n == 0:
524 534 # no data to send
525 535 return []
526 536 index_size = n * index.entry_size
527 537 data_size = self.end(n - 1)
528 538
529 539 # XXX we might have been split (or stripped) since the object
530 540 # initialization, We need to close this race too, but having a way to
531 541 # pre-open the file we feed to the revlog and never closing them before
532 542 # we are done streaming.
533 543
534 544 if self._inline:
535 545
536 546 def get_stream():
537 547 with self._indexfp() as fp:
538 548 yield None
539 549 size = index_size + data_size
540 550 if size <= 65536:
541 551 yield fp.read(size)
542 552 else:
543 553 yield from util.filechunkiter(fp, limit=size)
544 554
545 555 inline_stream = get_stream()
546 556 next(inline_stream)
547 557 return [
548 558 (self._indexfile, inline_stream, index_size + data_size),
549 559 ]
550 560 elif force_inline:
551 561
552 562 def get_stream():
553 563 with self._datafp() as fp_d:
554 564 yield None
555 565
556 566 for rev in range(n):
557 567 idx = self.index.entry_binary(rev)
558 568 if rev == 0 and self._docket is None:
559 569 # re-inject the inline flag
560 570 header = self._format_flags
561 571 header |= self._format_version
562 572 header |= FLAG_INLINE_DATA
563 573 header = self.index.pack_header(header)
564 574 idx = header + idx
565 575 yield idx
566 576 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
567 577
568 578 inline_stream = get_stream()
569 579 next(inline_stream)
570 580 return [
571 581 (self._indexfile, inline_stream, index_size + data_size),
572 582 ]
573 583 else:
574 584
575 585 def get_index_stream():
576 586 with self._indexfp() as fp:
577 587 yield None
578 588 if index_size <= 65536:
579 589 yield fp.read(index_size)
580 590 else:
581 591 yield from util.filechunkiter(fp, limit=index_size)
582 592
583 593 def get_data_stream():
584 594 with self._datafp() as fp:
585 595 yield None
586 596 if data_size <= 65536:
587 597 yield fp.read(data_size)
588 598 else:
589 599 yield from util.filechunkiter(fp, limit=data_size)
590 600
591 601 index_stream = get_index_stream()
592 602 next(index_stream)
593 603 data_stream = get_data_stream()
594 604 next(data_stream)
595 605 return [
596 606 (self._datafile, data_stream, data_size),
597 607 (self._indexfile, index_stream, index_size),
598 608 ]
599 609
600 610 def _loadindex(self, docket=None):
601 611
602 612 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
603 613
604 614 if self.postfix is not None:
605 615 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
606 616 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
607 617 entry_point = b'%s.i.a' % self.radix
608 618 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
609 619 entry_point = b'%s.i.s' % self.radix
610 620 else:
611 621 entry_point = b'%s.i' % self.radix
612 622
613 623 if docket is not None:
614 624 self._docket = docket
615 625 self._docket_file = entry_point
616 626 else:
617 627 self._initempty = True
618 628 entry_data = self._get_data(entry_point, mmapindexthreshold)
619 629 if len(entry_data) > 0:
620 630 header = INDEX_HEADER.unpack(entry_data[:4])[0]
621 631 self._initempty = False
622 632 else:
623 633 header = new_header
624 634
625 635 self._format_flags = header & ~0xFFFF
626 636 self._format_version = header & 0xFFFF
627 637
628 638 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
629 639 if supported_flags is None:
630 640 msg = _(b'unknown version (%d) in revlog %s')
631 641 msg %= (self._format_version, self.display_id)
632 642 raise error.RevlogError(msg)
633 643 elif self._format_flags & ~supported_flags:
634 644 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
635 645 display_flag = self._format_flags >> 16
636 646 msg %= (display_flag, self._format_version, self.display_id)
637 647 raise error.RevlogError(msg)
638 648
639 649 features = FEATURES_BY_VERSION[self._format_version]
640 650 self._inline = features[b'inline'](self._format_flags)
641 651 self._generaldelta = features[b'generaldelta'](self._format_flags)
642 652 self.hassidedata = features[b'sidedata']
643 653
644 654 if not features[b'docket']:
645 655 self._indexfile = entry_point
646 656 index_data = entry_data
647 657 else:
648 658 self._docket_file = entry_point
649 659 if self._initempty:
650 660 self._docket = docketutil.default_docket(self, header)
651 661 else:
652 662 self._docket = docketutil.parse_docket(
653 663 self, entry_data, use_pending=self._trypending
654 664 )
655 665
656 666 if self._docket is not None:
657 667 self._indexfile = self._docket.index_filepath()
658 668 index_data = b''
659 669 index_size = self._docket.index_end
660 670 if index_size > 0:
661 671 index_data = self._get_data(
662 672 self._indexfile, mmapindexthreshold, size=index_size
663 673 )
664 674 if len(index_data) < index_size:
665 675 msg = _(b'too few index data for %s: got %d, expected %d')
666 676 msg %= (self.display_id, len(index_data), index_size)
667 677 raise error.RevlogError(msg)
668 678
669 679 self._inline = False
670 680 # generaldelta implied by version 2 revlogs.
671 681 self._generaldelta = True
672 682 # the logic for persistent nodemap will be dealt with within the
673 683 # main docket, so disable it for now.
674 684 self._nodemap_file = None
675 685
676 686 if self._docket is not None:
677 687 self._datafile = self._docket.data_filepath()
678 688 self._sidedatafile = self._docket.sidedata_filepath()
679 689 elif self.postfix is None:
680 690 self._datafile = b'%s.d' % self.radix
681 691 else:
682 692 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
683 693
684 694 self.nodeconstants = sha1nodeconstants
685 695 self.nullid = self.nodeconstants.nullid
686 696
687 697 # sparse-revlog can't be on without general-delta (issue6056)
688 698 if not self._generaldelta:
689 699 self._sparserevlog = False
690 700
691 701 self._storedeltachains = True
692 702
693 703 devel_nodemap = (
694 704 self._nodemap_file
695 705 and force_nodemap
696 706 and parse_index_v1_nodemap is not None
697 707 )
698 708
699 709 use_rust_index = False
700 710 if rustrevlog is not None:
701 711 if self._nodemap_file is not None:
702 712 use_rust_index = True
703 713 else:
704 714 use_rust_index = self.opener.options.get(b'rust.index')
705 715
706 716 self._parse_index = parse_index_v1
707 717 if self._format_version == REVLOGV0:
708 718 self._parse_index = revlogv0.parse_index_v0
709 719 elif self._format_version == REVLOGV2:
710 720 self._parse_index = parse_index_v2
711 721 elif self._format_version == CHANGELOGV2:
712 722 self._parse_index = parse_index_cl_v2
713 723 elif devel_nodemap:
714 724 self._parse_index = parse_index_v1_nodemap
715 725 elif use_rust_index:
716 726 self._parse_index = parse_index_v1_mixed
717 727 try:
718 728 d = self._parse_index(index_data, self._inline)
719 729 index, chunkcache = d
720 730 use_nodemap = (
721 731 not self._inline
722 732 and self._nodemap_file is not None
723 733 and util.safehasattr(index, 'update_nodemap_data')
724 734 )
725 735 if use_nodemap:
726 736 nodemap_data = nodemaputil.persisted_data(self)
727 737 if nodemap_data is not None:
728 738 docket = nodemap_data[0]
729 739 if (
730 740 len(d[0]) > docket.tip_rev
731 741 and d[0][docket.tip_rev][7] == docket.tip_node
732 742 ):
733 743 # no changelog tampering
734 744 self._nodemap_docket = docket
735 745 index.update_nodemap_data(*nodemap_data)
736 746 except (ValueError, IndexError):
737 747 raise error.RevlogError(
738 748 _(b"index %s is corrupted") % self.display_id
739 749 )
740 750 self.index = index
741 751 self._segmentfile = randomaccessfile.randomaccessfile(
742 752 self.opener,
743 753 (self._indexfile if self._inline else self._datafile),
744 754 self._chunkcachesize,
745 755 chunkcache,
746 756 )
747 757 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
748 758 self.opener,
749 759 self._sidedatafile,
750 760 self._chunkcachesize,
751 761 )
752 762 # revnum -> (chain-length, sum-delta-length)
753 763 self._chaininfocache = util.lrucachedict(500)
754 764 # revlog header -> revlog compressor
755 765 self._decompressors = {}
756 766
757 767 def get_revlog(self):
758 768 """simple function to mirror API of other not-really-revlog API"""
759 769 return self
760 770
761 771 @util.propertycache
762 772 def revlog_kind(self):
763 773 return self.target[0]
764 774
765 775 @util.propertycache
766 776 def display_id(self):
767 777 """The public facing "ID" of the revlog that we use in message"""
768 778 if self.revlog_kind == KIND_FILELOG:
769 779 # Reference the file without the "data/" prefix, so it is familiar
770 780 # to the user.
771 781 return self.target[1]
772 782 else:
773 783 return self.radix
774 784
775 785 def _get_decompressor(self, t):
776 786 try:
777 787 compressor = self._decompressors[t]
778 788 except KeyError:
779 789 try:
780 790 engine = util.compengines.forrevlogheader(t)
781 791 compressor = engine.revlogcompressor(self._compengineopts)
782 792 self._decompressors[t] = compressor
783 793 except KeyError:
784 794 raise error.RevlogError(
785 795 _(b'unknown compression type %s') % binascii.hexlify(t)
786 796 )
787 797 return compressor
788 798
789 799 @util.propertycache
790 800 def _compressor(self):
791 801 engine = util.compengines[self._compengine]
792 802 return engine.revlogcompressor(self._compengineopts)
793 803
794 804 @util.propertycache
795 805 def _decompressor(self):
796 806 """the default decompressor"""
797 807 if self._docket is None:
798 808 return None
799 809 t = self._docket.default_compression_header
800 810 c = self._get_decompressor(t)
801 811 return c.decompress
802 812
803 813 def _indexfp(self):
804 814 """file object for the revlog's index file"""
805 815 return self.opener(self._indexfile, mode=b"r")
806 816
807 817 def __index_write_fp(self):
808 818 # You should not use this directly and use `_writing` instead
809 819 try:
810 820 f = self.opener(
811 821 self._indexfile, mode=b"r+", checkambig=self._checkambig
812 822 )
813 823 if self._docket is None:
814 824 f.seek(0, os.SEEK_END)
815 825 else:
816 826 f.seek(self._docket.index_end, os.SEEK_SET)
817 827 return f
818 828 except FileNotFoundError:
819 829 return self.opener(
820 830 self._indexfile, mode=b"w+", checkambig=self._checkambig
821 831 )
822 832
823 833 def __index_new_fp(self):
824 834 # You should not use this unless you are upgrading from inline revlog
825 835 return self.opener(
826 836 self._indexfile,
827 837 mode=b"w",
828 838 checkambig=self._checkambig,
829 839 atomictemp=True,
830 840 )
831 841
832 842 def _datafp(self, mode=b'r'):
833 843 """file object for the revlog's data file"""
834 844 return self.opener(self._datafile, mode=mode)
835 845
836 846 @contextlib.contextmanager
837 847 def _sidedatareadfp(self):
838 848 """file object suitable to read sidedata"""
839 849 if self._writinghandles:
840 850 yield self._writinghandles[2]
841 851 else:
842 852 with self.opener(self._sidedatafile) as fp:
843 853 yield fp
844 854
845 855 def tiprev(self):
846 856 return len(self.index) - 1
847 857
848 858 def tip(self):
849 859 return self.node(self.tiprev())
850 860
851 861 def __contains__(self, rev):
852 862 return 0 <= rev < len(self)
853 863
854 864 def __len__(self):
855 865 return len(self.index)
856 866
857 867 def __iter__(self):
858 868 return iter(range(len(self)))
859 869
860 870 def revs(self, start=0, stop=None):
861 871 """iterate over all rev in this revlog (from start to stop)"""
862 872 return storageutil.iterrevs(len(self), start=start, stop=stop)
863 873
864 874 def hasnode(self, node):
865 875 try:
866 876 self.rev(node)
867 877 return True
868 878 except KeyError:
869 879 return False
870 880
871 881 def candelta(self, baserev, rev):
872 882 """whether two revisions (baserev, rev) can be delta-ed or not"""
873 883 # Disable delta if either rev requires a content-changing flag
874 884 # processor (ex. LFS). This is because such flag processor can alter
875 885 # the rawtext content that the delta will be based on, and two clients
876 886 # could have a same revlog node with different flags (i.e. different
877 887 # rawtext contents) and the delta could be incompatible.
878 888 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
879 889 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
880 890 ):
881 891 return False
882 892 return True
883 893
884 894 def update_caches(self, transaction):
885 895 if self._nodemap_file is not None:
886 896 if transaction is None:
887 897 nodemaputil.update_persistent_nodemap(self)
888 898 else:
889 899 nodemaputil.setup_persistent_nodemap(transaction, self)
890 900
891 901 def clearcaches(self):
892 902 self._revisioncache = None
893 903 self._chainbasecache.clear()
894 904 self._segmentfile.clear_cache()
895 905 self._segmentfile_sidedata.clear_cache()
896 906 self._pcache = {}
897 907 self._nodemap_docket = None
898 908 self.index.clearcaches()
899 909 # The python code is the one responsible for validating the docket, we
900 910 # end up having to refresh it here.
901 911 use_nodemap = (
902 912 not self._inline
903 913 and self._nodemap_file is not None
904 914 and util.safehasattr(self.index, 'update_nodemap_data')
905 915 )
906 916 if use_nodemap:
907 917 nodemap_data = nodemaputil.persisted_data(self)
908 918 if nodemap_data is not None:
909 919 self._nodemap_docket = nodemap_data[0]
910 920 self.index.update_nodemap_data(*nodemap_data)
911 921
912 922 def rev(self, node):
913 923 try:
914 924 return self.index.rev(node)
915 925 except TypeError:
916 926 raise
917 927 except error.RevlogError:
918 928 # parsers.c radix tree lookup failed
919 929 if (
920 930 node == self.nodeconstants.wdirid
921 931 or node in self.nodeconstants.wdirfilenodeids
922 932 ):
923 933 raise error.WdirUnsupported
924 934 raise error.LookupError(node, self.display_id, _(b'no node'))
925 935
926 936 # Accessors for index entries.
927 937
928 938 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
929 939 # are flags.
930 940 def start(self, rev):
931 941 return int(self.index[rev][0] >> 16)
932 942
933 943 def sidedata_cut_off(self, rev):
934 944 sd_cut_off = self.index[rev][8]
935 945 if sd_cut_off != 0:
936 946 return sd_cut_off
937 947 # This is some annoying dance, because entries without sidedata
938 948 # currently use 0 as their ofsset. (instead of previous-offset +
939 949 # previous-size)
940 950 #
941 951 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
942 952 # In the meantime, we need this.
943 953 while 0 <= rev:
944 954 e = self.index[rev]
945 955 if e[9] != 0:
946 956 return e[8] + e[9]
947 957 rev -= 1
948 958 return 0
949 959
950 960 def flags(self, rev):
951 961 return self.index[rev][0] & 0xFFFF
952 962
953 963 def length(self, rev):
954 964 return self.index[rev][1]
955 965
956 966 def sidedata_length(self, rev):
957 967 if not self.hassidedata:
958 968 return 0
959 969 return self.index[rev][9]
960 970
961 971 def rawsize(self, rev):
962 972 """return the length of the uncompressed text for a given revision"""
963 973 l = self.index[rev][2]
964 974 if l >= 0:
965 975 return l
966 976
967 977 t = self.rawdata(rev)
968 978 return len(t)
969 979
970 980 def size(self, rev):
971 981 """length of non-raw text (processed by a "read" flag processor)"""
972 982 # fast path: if no "read" flag processor could change the content,
973 983 # size is rawsize. note: ELLIPSIS is known to not change the content.
974 984 flags = self.flags(rev)
975 985 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
976 986 return self.rawsize(rev)
977 987
978 988 return len(self.revision(rev))
979 989
980 990 def fast_rank(self, rev):
981 991 """Return the rank of a revision if already known, or None otherwise.
982 992
983 993 The rank of a revision is the size of the sub-graph it defines as a
984 994 head. Equivalently, the rank of a revision `r` is the size of the set
985 995 `ancestors(r)`, `r` included.
986 996
987 997 This method returns the rank retrieved from the revlog in constant
988 998 time. It makes no attempt at computing unknown values for versions of
989 999 the revlog which do not persist the rank.
990 1000 """
991 1001 rank = self.index[rev][ENTRY_RANK]
992 1002 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
993 1003 return None
994 1004 if rev == nullrev:
995 1005 return 0 # convention
996 1006 return rank
997 1007
998 1008 def chainbase(self, rev):
999 1009 base = self._chainbasecache.get(rev)
1000 1010 if base is not None:
1001 1011 return base
1002 1012
1003 1013 index = self.index
1004 1014 iterrev = rev
1005 1015 base = index[iterrev][3]
1006 1016 while base != iterrev:
1007 1017 iterrev = base
1008 1018 base = index[iterrev][3]
1009 1019
1010 1020 self._chainbasecache[rev] = base
1011 1021 return base
1012 1022
1013 1023 def linkrev(self, rev):
1014 1024 return self.index[rev][4]
1015 1025
1016 1026 def parentrevs(self, rev):
1017 1027 try:
1018 1028 entry = self.index[rev]
1019 1029 except IndexError:
1020 1030 if rev == wdirrev:
1021 1031 raise error.WdirUnsupported
1022 1032 raise
1023 1033
1024 1034 if self.canonical_parent_order and entry[5] == nullrev:
1025 1035 return entry[6], entry[5]
1026 1036 else:
1027 1037 return entry[5], entry[6]
1028 1038
1029 1039 # fast parentrevs(rev) where rev isn't filtered
1030 1040 _uncheckedparentrevs = parentrevs
1031 1041
1032 1042 def node(self, rev):
1033 1043 try:
1034 1044 return self.index[rev][7]
1035 1045 except IndexError:
1036 1046 if rev == wdirrev:
1037 1047 raise error.WdirUnsupported
1038 1048 raise
1039 1049
1040 1050 # Derived from index values.
1041 1051
1042 1052 def end(self, rev):
1043 1053 return self.start(rev) + self.length(rev)
1044 1054
1045 1055 def parents(self, node):
1046 1056 i = self.index
1047 1057 d = i[self.rev(node)]
1048 1058 # inline node() to avoid function call overhead
1049 1059 if self.canonical_parent_order and d[5] == self.nullid:
1050 1060 return i[d[6]][7], i[d[5]][7]
1051 1061 else:
1052 1062 return i[d[5]][7], i[d[6]][7]
1053 1063
1054 1064 def chainlen(self, rev):
1055 1065 return self._chaininfo(rev)[0]
1056 1066
1057 1067 def _chaininfo(self, rev):
1058 1068 chaininfocache = self._chaininfocache
1059 1069 if rev in chaininfocache:
1060 1070 return chaininfocache[rev]
1061 1071 index = self.index
1062 1072 generaldelta = self._generaldelta
1063 1073 iterrev = rev
1064 1074 e = index[iterrev]
1065 1075 clen = 0
1066 1076 compresseddeltalen = 0
1067 1077 while iterrev != e[3]:
1068 1078 clen += 1
1069 1079 compresseddeltalen += e[1]
1070 1080 if generaldelta:
1071 1081 iterrev = e[3]
1072 1082 else:
1073 1083 iterrev -= 1
1074 1084 if iterrev in chaininfocache:
1075 1085 t = chaininfocache[iterrev]
1076 1086 clen += t[0]
1077 1087 compresseddeltalen += t[1]
1078 1088 break
1079 1089 e = index[iterrev]
1080 1090 else:
1081 1091 # Add text length of base since decompressing that also takes
1082 1092 # work. For cache hits the length is already included.
1083 1093 compresseddeltalen += e[1]
1084 1094 r = (clen, compresseddeltalen)
1085 1095 chaininfocache[rev] = r
1086 1096 return r
1087 1097
1088 1098 def _deltachain(self, rev, stoprev=None):
1089 1099 """Obtain the delta chain for a revision.
1090 1100
1091 1101 ``stoprev`` specifies a revision to stop at. If not specified, we
1092 1102 stop at the base of the chain.
1093 1103
1094 1104 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1095 1105 revs in ascending order and ``stopped`` is a bool indicating whether
1096 1106 ``stoprev`` was hit.
1097 1107 """
1098 1108 # Try C implementation.
1099 1109 try:
1100 1110 return self.index.deltachain(rev, stoprev, self._generaldelta)
1101 1111 except AttributeError:
1102 1112 pass
1103 1113
1104 1114 chain = []
1105 1115
1106 1116 # Alias to prevent attribute lookup in tight loop.
1107 1117 index = self.index
1108 1118 generaldelta = self._generaldelta
1109 1119
1110 1120 iterrev = rev
1111 1121 e = index[iterrev]
1112 1122 while iterrev != e[3] and iterrev != stoprev:
1113 1123 chain.append(iterrev)
1114 1124 if generaldelta:
1115 1125 iterrev = e[3]
1116 1126 else:
1117 1127 iterrev -= 1
1118 1128 e = index[iterrev]
1119 1129
1120 1130 if iterrev == stoprev:
1121 1131 stopped = True
1122 1132 else:
1123 1133 chain.append(iterrev)
1124 1134 stopped = False
1125 1135
1126 1136 chain.reverse()
1127 1137 return chain, stopped
1128 1138
1129 1139 def ancestors(self, revs, stoprev=0, inclusive=False):
1130 1140 """Generate the ancestors of 'revs' in reverse revision order.
1131 1141 Does not generate revs lower than stoprev.
1132 1142
1133 1143 See the documentation for ancestor.lazyancestors for more details."""
1134 1144
1135 1145 # first, make sure start revisions aren't filtered
1136 1146 revs = list(revs)
1137 1147 checkrev = self.node
1138 1148 for r in revs:
1139 1149 checkrev(r)
1140 1150 # and we're sure ancestors aren't filtered as well
1141 1151
1142 1152 if rustancestor is not None and self.index.rust_ext_compat:
1143 1153 lazyancestors = rustancestor.LazyAncestors
1144 1154 arg = self.index
1145 1155 else:
1146 1156 lazyancestors = ancestor.lazyancestors
1147 1157 arg = self._uncheckedparentrevs
1148 1158 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1149 1159
1150 1160 def descendants(self, revs):
1151 1161 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1152 1162
1153 1163 def findcommonmissing(self, common=None, heads=None):
1154 1164 """Return a tuple of the ancestors of common and the ancestors of heads
1155 1165 that are not ancestors of common. In revset terminology, we return the
1156 1166 tuple:
1157 1167
1158 1168 ::common, (::heads) - (::common)
1159 1169
1160 1170 The list is sorted by revision number, meaning it is
1161 1171 topologically sorted.
1162 1172
1163 1173 'heads' and 'common' are both lists of node IDs. If heads is
1164 1174 not supplied, uses all of the revlog's heads. If common is not
1165 1175 supplied, uses nullid."""
1166 1176 if common is None:
1167 1177 common = [self.nullid]
1168 1178 if heads is None:
1169 1179 heads = self.heads()
1170 1180
1171 1181 common = [self.rev(n) for n in common]
1172 1182 heads = [self.rev(n) for n in heads]
1173 1183
1174 1184 # we want the ancestors, but inclusive
1175 1185 class lazyset:
1176 1186 def __init__(self, lazyvalues):
1177 1187 self.addedvalues = set()
1178 1188 self.lazyvalues = lazyvalues
1179 1189
1180 1190 def __contains__(self, value):
1181 1191 return value in self.addedvalues or value in self.lazyvalues
1182 1192
1183 1193 def __iter__(self):
1184 1194 added = self.addedvalues
1185 1195 for r in added:
1186 1196 yield r
1187 1197 for r in self.lazyvalues:
1188 1198 if not r in added:
1189 1199 yield r
1190 1200
1191 1201 def add(self, value):
1192 1202 self.addedvalues.add(value)
1193 1203
1194 1204 def update(self, values):
1195 1205 self.addedvalues.update(values)
1196 1206
1197 1207 has = lazyset(self.ancestors(common))
1198 1208 has.add(nullrev)
1199 1209 has.update(common)
1200 1210
1201 1211 # take all ancestors from heads that aren't in has
1202 1212 missing = set()
1203 1213 visit = collections.deque(r for r in heads if r not in has)
1204 1214 while visit:
1205 1215 r = visit.popleft()
1206 1216 if r in missing:
1207 1217 continue
1208 1218 else:
1209 1219 missing.add(r)
1210 1220 for p in self.parentrevs(r):
1211 1221 if p not in has:
1212 1222 visit.append(p)
1213 1223 missing = list(missing)
1214 1224 missing.sort()
1215 1225 return has, [self.node(miss) for miss in missing]
1216 1226
1217 1227 def incrementalmissingrevs(self, common=None):
1218 1228 """Return an object that can be used to incrementally compute the
1219 1229 revision numbers of the ancestors of arbitrary sets that are not
1220 1230 ancestors of common. This is an ancestor.incrementalmissingancestors
1221 1231 object.
1222 1232
1223 1233 'common' is a list of revision numbers. If common is not supplied, uses
1224 1234 nullrev.
1225 1235 """
1226 1236 if common is None:
1227 1237 common = [nullrev]
1228 1238
1229 1239 if rustancestor is not None and self.index.rust_ext_compat:
1230 1240 return rustancestor.MissingAncestors(self.index, common)
1231 1241 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1232 1242
1233 1243 def findmissingrevs(self, common=None, heads=None):
1234 1244 """Return the revision numbers of the ancestors of heads that
1235 1245 are not ancestors of common.
1236 1246
1237 1247 More specifically, return a list of revision numbers corresponding to
1238 1248 nodes N such that every N satisfies the following constraints:
1239 1249
1240 1250 1. N is an ancestor of some node in 'heads'
1241 1251 2. N is not an ancestor of any node in 'common'
1242 1252
1243 1253 The list is sorted by revision number, meaning it is
1244 1254 topologically sorted.
1245 1255
1246 1256 'heads' and 'common' are both lists of revision numbers. If heads is
1247 1257 not supplied, uses all of the revlog's heads. If common is not
1248 1258 supplied, uses nullid."""
1249 1259 if common is None:
1250 1260 common = [nullrev]
1251 1261 if heads is None:
1252 1262 heads = self.headrevs()
1253 1263
1254 1264 inc = self.incrementalmissingrevs(common=common)
1255 1265 return inc.missingancestors(heads)
1256 1266
1257 1267 def findmissing(self, common=None, heads=None):
1258 1268 """Return the ancestors of heads that are not ancestors of common.
1259 1269
1260 1270 More specifically, return a list of nodes N such that every N
1261 1271 satisfies the following constraints:
1262 1272
1263 1273 1. N is an ancestor of some node in 'heads'
1264 1274 2. N is not an ancestor of any node in 'common'
1265 1275
1266 1276 The list is sorted by revision number, meaning it is
1267 1277 topologically sorted.
1268 1278
1269 1279 'heads' and 'common' are both lists of node IDs. If heads is
1270 1280 not supplied, uses all of the revlog's heads. If common is not
1271 1281 supplied, uses nullid."""
1272 1282 if common is None:
1273 1283 common = [self.nullid]
1274 1284 if heads is None:
1275 1285 heads = self.heads()
1276 1286
1277 1287 common = [self.rev(n) for n in common]
1278 1288 heads = [self.rev(n) for n in heads]
1279 1289
1280 1290 inc = self.incrementalmissingrevs(common=common)
1281 1291 return [self.node(r) for r in inc.missingancestors(heads)]
1282 1292
1283 1293 def nodesbetween(self, roots=None, heads=None):
1284 1294 """Return a topological path from 'roots' to 'heads'.
1285 1295
1286 1296 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1287 1297 topologically sorted list of all nodes N that satisfy both of
1288 1298 these constraints:
1289 1299
1290 1300 1. N is a descendant of some node in 'roots'
1291 1301 2. N is an ancestor of some node in 'heads'
1292 1302
1293 1303 Every node is considered to be both a descendant and an ancestor
1294 1304 of itself, so every reachable node in 'roots' and 'heads' will be
1295 1305 included in 'nodes'.
1296 1306
1297 1307 'outroots' is the list of reachable nodes in 'roots', i.e., the
1298 1308 subset of 'roots' that is returned in 'nodes'. Likewise,
1299 1309 'outheads' is the subset of 'heads' that is also in 'nodes'.
1300 1310
1301 1311 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1302 1312 unspecified, uses nullid as the only root. If 'heads' is
1303 1313 unspecified, uses list of all of the revlog's heads."""
1304 1314 nonodes = ([], [], [])
1305 1315 if roots is not None:
1306 1316 roots = list(roots)
1307 1317 if not roots:
1308 1318 return nonodes
1309 1319 lowestrev = min([self.rev(n) for n in roots])
1310 1320 else:
1311 1321 roots = [self.nullid] # Everybody's a descendant of nullid
1312 1322 lowestrev = nullrev
1313 1323 if (lowestrev == nullrev) and (heads is None):
1314 1324 # We want _all_ the nodes!
1315 1325 return (
1316 1326 [self.node(r) for r in self],
1317 1327 [self.nullid],
1318 1328 list(self.heads()),
1319 1329 )
1320 1330 if heads is None:
1321 1331 # All nodes are ancestors, so the latest ancestor is the last
1322 1332 # node.
1323 1333 highestrev = len(self) - 1
1324 1334 # Set ancestors to None to signal that every node is an ancestor.
1325 1335 ancestors = None
1326 1336 # Set heads to an empty dictionary for later discovery of heads
1327 1337 heads = {}
1328 1338 else:
1329 1339 heads = list(heads)
1330 1340 if not heads:
1331 1341 return nonodes
1332 1342 ancestors = set()
1333 1343 # Turn heads into a dictionary so we can remove 'fake' heads.
1334 1344 # Also, later we will be using it to filter out the heads we can't
1335 1345 # find from roots.
1336 1346 heads = dict.fromkeys(heads, False)
1337 1347 # Start at the top and keep marking parents until we're done.
1338 1348 nodestotag = set(heads)
1339 1349 # Remember where the top was so we can use it as a limit later.
1340 1350 highestrev = max([self.rev(n) for n in nodestotag])
1341 1351 while nodestotag:
1342 1352 # grab a node to tag
1343 1353 n = nodestotag.pop()
1344 1354 # Never tag nullid
1345 1355 if n == self.nullid:
1346 1356 continue
1347 1357 # A node's revision number represents its place in a
1348 1358 # topologically sorted list of nodes.
1349 1359 r = self.rev(n)
1350 1360 if r >= lowestrev:
1351 1361 if n not in ancestors:
1352 1362 # If we are possibly a descendant of one of the roots
1353 1363 # and we haven't already been marked as an ancestor
1354 1364 ancestors.add(n) # Mark as ancestor
1355 1365 # Add non-nullid parents to list of nodes to tag.
1356 1366 nodestotag.update(
1357 1367 [p for p in self.parents(n) if p != self.nullid]
1358 1368 )
1359 1369 elif n in heads: # We've seen it before, is it a fake head?
1360 1370 # So it is, real heads should not be the ancestors of
1361 1371 # any other heads.
1362 1372 heads.pop(n)
1363 1373 if not ancestors:
1364 1374 return nonodes
1365 1375 # Now that we have our set of ancestors, we want to remove any
1366 1376 # roots that are not ancestors.
1367 1377
1368 1378 # If one of the roots was nullid, everything is included anyway.
1369 1379 if lowestrev > nullrev:
1370 1380 # But, since we weren't, let's recompute the lowest rev to not
1371 1381 # include roots that aren't ancestors.
1372 1382
1373 1383 # Filter out roots that aren't ancestors of heads
1374 1384 roots = [root for root in roots if root in ancestors]
1375 1385 # Recompute the lowest revision
1376 1386 if roots:
1377 1387 lowestrev = min([self.rev(root) for root in roots])
1378 1388 else:
1379 1389 # No more roots? Return empty list
1380 1390 return nonodes
1381 1391 else:
1382 1392 # We are descending from nullid, and don't need to care about
1383 1393 # any other roots.
1384 1394 lowestrev = nullrev
1385 1395 roots = [self.nullid]
1386 1396 # Transform our roots list into a set.
1387 1397 descendants = set(roots)
1388 1398 # Also, keep the original roots so we can filter out roots that aren't
1389 1399 # 'real' roots (i.e. are descended from other roots).
1390 1400 roots = descendants.copy()
1391 1401 # Our topologically sorted list of output nodes.
1392 1402 orderedout = []
1393 1403 # Don't start at nullid since we don't want nullid in our output list,
1394 1404 # and if nullid shows up in descendants, empty parents will look like
1395 1405 # they're descendants.
1396 1406 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1397 1407 n = self.node(r)
1398 1408 isdescendant = False
1399 1409 if lowestrev == nullrev: # Everybody is a descendant of nullid
1400 1410 isdescendant = True
1401 1411 elif n in descendants:
1402 1412 # n is already a descendant
1403 1413 isdescendant = True
1404 1414 # This check only needs to be done here because all the roots
1405 1415 # will start being marked is descendants before the loop.
1406 1416 if n in roots:
1407 1417 # If n was a root, check if it's a 'real' root.
1408 1418 p = tuple(self.parents(n))
1409 1419 # If any of its parents are descendants, it's not a root.
1410 1420 if (p[0] in descendants) or (p[1] in descendants):
1411 1421 roots.remove(n)
1412 1422 else:
1413 1423 p = tuple(self.parents(n))
1414 1424 # A node is a descendant if either of its parents are
1415 1425 # descendants. (We seeded the dependents list with the roots
1416 1426 # up there, remember?)
1417 1427 if (p[0] in descendants) or (p[1] in descendants):
1418 1428 descendants.add(n)
1419 1429 isdescendant = True
1420 1430 if isdescendant and ((ancestors is None) or (n in ancestors)):
1421 1431 # Only include nodes that are both descendants and ancestors.
1422 1432 orderedout.append(n)
1423 1433 if (ancestors is not None) and (n in heads):
1424 1434 # We're trying to figure out which heads are reachable
1425 1435 # from roots.
1426 1436 # Mark this head as having been reached
1427 1437 heads[n] = True
1428 1438 elif ancestors is None:
1429 1439 # Otherwise, we're trying to discover the heads.
1430 1440 # Assume this is a head because if it isn't, the next step
1431 1441 # will eventually remove it.
1432 1442 heads[n] = True
1433 1443 # But, obviously its parents aren't.
1434 1444 for p in self.parents(n):
1435 1445 heads.pop(p, None)
1436 1446 heads = [head for head, flag in heads.items() if flag]
1437 1447 roots = list(roots)
1438 1448 assert orderedout
1439 1449 assert roots
1440 1450 assert heads
1441 1451 return (orderedout, roots, heads)
1442 1452
1443 1453 def headrevs(self, revs=None):
1444 1454 if revs is None:
1445 1455 try:
1446 1456 return self.index.headrevs()
1447 1457 except AttributeError:
1448 1458 return self._headrevs()
1449 1459 if rustdagop is not None and self.index.rust_ext_compat:
1450 1460 return rustdagop.headrevs(self.index, revs)
1451 1461 return dagop.headrevs(revs, self._uncheckedparentrevs)
1452 1462
1453 1463 def computephases(self, roots):
1454 1464 return self.index.computephasesmapsets(roots)
1455 1465
1456 1466 def _headrevs(self):
1457 1467 count = len(self)
1458 1468 if not count:
1459 1469 return [nullrev]
1460 1470 # we won't iter over filtered rev so nobody is a head at start
1461 1471 ishead = [0] * (count + 1)
1462 1472 index = self.index
1463 1473 for r in self:
1464 1474 ishead[r] = 1 # I may be an head
1465 1475 e = index[r]
1466 1476 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1467 1477 return [r for r, val in enumerate(ishead) if val]
1468 1478
1469 1479 def heads(self, start=None, stop=None):
1470 1480 """return the list of all nodes that have no children
1471 1481
1472 1482 if start is specified, only heads that are descendants of
1473 1483 start will be returned
1474 1484 if stop is specified, it will consider all the revs from stop
1475 1485 as if they had no children
1476 1486 """
1477 1487 if start is None and stop is None:
1478 1488 if not len(self):
1479 1489 return [self.nullid]
1480 1490 return [self.node(r) for r in self.headrevs()]
1481 1491
1482 1492 if start is None:
1483 1493 start = nullrev
1484 1494 else:
1485 1495 start = self.rev(start)
1486 1496
1487 1497 stoprevs = {self.rev(n) for n in stop or []}
1488 1498
1489 1499 revs = dagop.headrevssubset(
1490 1500 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1491 1501 )
1492 1502
1493 1503 return [self.node(rev) for rev in revs]
1494 1504
1495 1505 def children(self, node):
1496 1506 """find the children of a given node"""
1497 1507 c = []
1498 1508 p = self.rev(node)
1499 1509 for r in self.revs(start=p + 1):
1500 1510 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1501 1511 if prevs:
1502 1512 for pr in prevs:
1503 1513 if pr == p:
1504 1514 c.append(self.node(r))
1505 1515 elif p == nullrev:
1506 1516 c.append(self.node(r))
1507 1517 return c
1508 1518
1509 1519 def commonancestorsheads(self, a, b):
1510 1520 """calculate all the heads of the common ancestors of nodes a and b"""
1511 1521 a, b = self.rev(a), self.rev(b)
1512 1522 ancs = self._commonancestorsheads(a, b)
1513 1523 return pycompat.maplist(self.node, ancs)
1514 1524
1515 1525 def _commonancestorsheads(self, *revs):
1516 1526 """calculate all the heads of the common ancestors of revs"""
1517 1527 try:
1518 1528 ancs = self.index.commonancestorsheads(*revs)
1519 1529 except (AttributeError, OverflowError): # C implementation failed
1520 1530 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1521 1531 return ancs
1522 1532
1523 1533 def isancestor(self, a, b):
1524 1534 """return True if node a is an ancestor of node b
1525 1535
1526 1536 A revision is considered an ancestor of itself."""
1527 1537 a, b = self.rev(a), self.rev(b)
1528 1538 return self.isancestorrev(a, b)
1529 1539
1530 1540 def isancestorrev(self, a, b):
1531 1541 """return True if revision a is an ancestor of revision b
1532 1542
1533 1543 A revision is considered an ancestor of itself.
1534 1544
1535 1545 The implementation of this is trivial but the use of
1536 1546 reachableroots is not."""
1537 1547 if a == nullrev:
1538 1548 return True
1539 1549 elif a == b:
1540 1550 return True
1541 1551 elif a > b:
1542 1552 return False
1543 1553 return bool(self.reachableroots(a, [b], [a], includepath=False))
1544 1554
1545 1555 def reachableroots(self, minroot, heads, roots, includepath=False):
1546 1556 """return (heads(::(<roots> and <roots>::<heads>)))
1547 1557
1548 1558 If includepath is True, return (<roots>::<heads>)."""
1549 1559 try:
1550 1560 return self.index.reachableroots2(
1551 1561 minroot, heads, roots, includepath
1552 1562 )
1553 1563 except AttributeError:
1554 1564 return dagop._reachablerootspure(
1555 1565 self.parentrevs, minroot, roots, heads, includepath
1556 1566 )
1557 1567
1558 1568 def ancestor(self, a, b):
1559 1569 """calculate the "best" common ancestor of nodes a and b"""
1560 1570
1561 1571 a, b = self.rev(a), self.rev(b)
1562 1572 try:
1563 1573 ancs = self.index.ancestors(a, b)
1564 1574 except (AttributeError, OverflowError):
1565 1575 ancs = ancestor.ancestors(self.parentrevs, a, b)
1566 1576 if ancs:
1567 1577 # choose a consistent winner when there's a tie
1568 1578 return min(map(self.node, ancs))
1569 1579 return self.nullid
1570 1580
1571 1581 def _match(self, id):
1572 1582 if isinstance(id, int):
1573 1583 # rev
1574 1584 return self.node(id)
1575 1585 if len(id) == self.nodeconstants.nodelen:
1576 1586 # possibly a binary node
1577 1587 # odds of a binary node being all hex in ASCII are 1 in 10**25
1578 1588 try:
1579 1589 node = id
1580 1590 self.rev(node) # quick search the index
1581 1591 return node
1582 1592 except error.LookupError:
1583 1593 pass # may be partial hex id
1584 1594 try:
1585 1595 # str(rev)
1586 1596 rev = int(id)
1587 1597 if b"%d" % rev != id:
1588 1598 raise ValueError
1589 1599 if rev < 0:
1590 1600 rev = len(self) + rev
1591 1601 if rev < 0 or rev >= len(self):
1592 1602 raise ValueError
1593 1603 return self.node(rev)
1594 1604 except (ValueError, OverflowError):
1595 1605 pass
1596 1606 if len(id) == 2 * self.nodeconstants.nodelen:
1597 1607 try:
1598 1608 # a full hex nodeid?
1599 1609 node = bin(id)
1600 1610 self.rev(node)
1601 1611 return node
1602 1612 except (binascii.Error, error.LookupError):
1603 1613 pass
1604 1614
1605 1615 def _partialmatch(self, id):
1606 1616 # we don't care wdirfilenodeids as they should be always full hash
1607 1617 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1608 1618 ambiguous = False
1609 1619 try:
1610 1620 partial = self.index.partialmatch(id)
1611 1621 if partial and self.hasnode(partial):
1612 1622 if maybewdir:
1613 1623 # single 'ff...' match in radix tree, ambiguous with wdir
1614 1624 ambiguous = True
1615 1625 else:
1616 1626 return partial
1617 1627 elif maybewdir:
1618 1628 # no 'ff...' match in radix tree, wdir identified
1619 1629 raise error.WdirUnsupported
1620 1630 else:
1621 1631 return None
1622 1632 except error.RevlogError:
1623 1633 # parsers.c radix tree lookup gave multiple matches
1624 1634 # fast path: for unfiltered changelog, radix tree is accurate
1625 1635 if not getattr(self, 'filteredrevs', None):
1626 1636 ambiguous = True
1627 1637 # fall through to slow path that filters hidden revisions
1628 1638 except (AttributeError, ValueError):
1629 1639 # we are pure python, or key is not hex
1630 1640 pass
1631 1641 if ambiguous:
1632 1642 raise error.AmbiguousPrefixLookupError(
1633 1643 id, self.display_id, _(b'ambiguous identifier')
1634 1644 )
1635 1645
1636 1646 if id in self._pcache:
1637 1647 return self._pcache[id]
1638 1648
1639 1649 if len(id) <= 40:
1640 1650 # hex(node)[:...]
1641 1651 l = len(id) // 2 * 2 # grab an even number of digits
1642 1652 try:
1643 1653 # we're dropping the last digit, so let's check that it's hex,
1644 1654 # to avoid the expensive computation below if it's not
1645 1655 if len(id) % 2 > 0:
1646 1656 if not (id[-1] in hexdigits):
1647 1657 return None
1648 1658 prefix = bin(id[:l])
1649 1659 except binascii.Error:
1650 1660 pass
1651 1661 else:
1652 1662 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1653 1663 nl = [
1654 1664 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1655 1665 ]
1656 1666 if self.nodeconstants.nullhex.startswith(id):
1657 1667 nl.append(self.nullid)
1658 1668 if len(nl) > 0:
1659 1669 if len(nl) == 1 and not maybewdir:
1660 1670 self._pcache[id] = nl[0]
1661 1671 return nl[0]
1662 1672 raise error.AmbiguousPrefixLookupError(
1663 1673 id, self.display_id, _(b'ambiguous identifier')
1664 1674 )
1665 1675 if maybewdir:
1666 1676 raise error.WdirUnsupported
1667 1677 return None
1668 1678
1669 1679 def lookup(self, id):
1670 1680 """locate a node based on:
1671 1681 - revision number or str(revision number)
1672 1682 - nodeid or subset of hex nodeid
1673 1683 """
1674 1684 n = self._match(id)
1675 1685 if n is not None:
1676 1686 return n
1677 1687 n = self._partialmatch(id)
1678 1688 if n:
1679 1689 return n
1680 1690
1681 1691 raise error.LookupError(id, self.display_id, _(b'no match found'))
1682 1692
1683 1693 def shortest(self, node, minlength=1):
1684 1694 """Find the shortest unambiguous prefix that matches node."""
1685 1695
1686 1696 def isvalid(prefix):
1687 1697 try:
1688 1698 matchednode = self._partialmatch(prefix)
1689 1699 except error.AmbiguousPrefixLookupError:
1690 1700 return False
1691 1701 except error.WdirUnsupported:
1692 1702 # single 'ff...' match
1693 1703 return True
1694 1704 if matchednode is None:
1695 1705 raise error.LookupError(node, self.display_id, _(b'no node'))
1696 1706 return True
1697 1707
1698 1708 def maybewdir(prefix):
1699 1709 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1700 1710
1701 1711 hexnode = hex(node)
1702 1712
1703 1713 def disambiguate(hexnode, minlength):
1704 1714 """Disambiguate against wdirid."""
1705 1715 for length in range(minlength, len(hexnode) + 1):
1706 1716 prefix = hexnode[:length]
1707 1717 if not maybewdir(prefix):
1708 1718 return prefix
1709 1719
1710 1720 if not getattr(self, 'filteredrevs', None):
1711 1721 try:
1712 1722 length = max(self.index.shortest(node), minlength)
1713 1723 return disambiguate(hexnode, length)
1714 1724 except error.RevlogError:
1715 1725 if node != self.nodeconstants.wdirid:
1716 1726 raise error.LookupError(
1717 1727 node, self.display_id, _(b'no node')
1718 1728 )
1719 1729 except AttributeError:
1720 1730 # Fall through to pure code
1721 1731 pass
1722 1732
1723 1733 if node == self.nodeconstants.wdirid:
1724 1734 for length in range(minlength, len(hexnode) + 1):
1725 1735 prefix = hexnode[:length]
1726 1736 if isvalid(prefix):
1727 1737 return prefix
1728 1738
1729 1739 for length in range(minlength, len(hexnode) + 1):
1730 1740 prefix = hexnode[:length]
1731 1741 if isvalid(prefix):
1732 1742 return disambiguate(hexnode, length)
1733 1743
1734 1744 def cmp(self, node, text):
1735 1745 """compare text with a given file revision
1736 1746
1737 1747 returns True if text is different than what is stored.
1738 1748 """
1739 1749 p1, p2 = self.parents(node)
1740 1750 return storageutil.hashrevisionsha1(text, p1, p2) != node
1741 1751
1742 1752 def _getsegmentforrevs(self, startrev, endrev, df=None):
1743 1753 """Obtain a segment of raw data corresponding to a range of revisions.
1744 1754
1745 1755 Accepts the start and end revisions and an optional already-open
1746 1756 file handle to be used for reading. If the file handle is read, its
1747 1757 seek position will not be preserved.
1748 1758
1749 1759 Requests for data may be satisfied by a cache.
1750 1760
1751 1761 Returns a 2-tuple of (offset, data) for the requested range of
1752 1762 revisions. Offset is the integer offset from the beginning of the
1753 1763 revlog and data is a str or buffer of the raw byte data.
1754 1764
1755 1765 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1756 1766 to determine where each revision's data begins and ends.
1757 1767 """
1758 1768 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1759 1769 # (functions are expensive).
1760 1770 index = self.index
1761 1771 istart = index[startrev]
1762 1772 start = int(istart[0] >> 16)
1763 1773 if startrev == endrev:
1764 1774 end = start + istart[1]
1765 1775 else:
1766 1776 iend = index[endrev]
1767 1777 end = int(iend[0] >> 16) + iend[1]
1768 1778
1769 1779 if self._inline:
1770 1780 start += (startrev + 1) * self.index.entry_size
1771 1781 end += (endrev + 1) * self.index.entry_size
1772 1782 length = end - start
1773 1783
1774 1784 return start, self._segmentfile.read_chunk(start, length, df)
1775 1785
1776 1786 def _chunk(self, rev, df=None):
1777 1787 """Obtain a single decompressed chunk for a revision.
1778 1788
1779 1789 Accepts an integer revision and an optional already-open file handle
1780 1790 to be used for reading. If used, the seek position of the file will not
1781 1791 be preserved.
1782 1792
1783 1793 Returns a str holding uncompressed data for the requested revision.
1784 1794 """
1785 1795 compression_mode = self.index[rev][10]
1786 1796 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1787 1797 if compression_mode == COMP_MODE_PLAIN:
1788 1798 return data
1789 1799 elif compression_mode == COMP_MODE_DEFAULT:
1790 1800 return self._decompressor(data)
1791 1801 elif compression_mode == COMP_MODE_INLINE:
1792 1802 return self.decompress(data)
1793 1803 else:
1794 1804 msg = b'unknown compression mode %d'
1795 1805 msg %= compression_mode
1796 1806 raise error.RevlogError(msg)
1797 1807
1798 1808 def _chunks(self, revs, df=None, targetsize=None):
1799 1809 """Obtain decompressed chunks for the specified revisions.
1800 1810
1801 1811 Accepts an iterable of numeric revisions that are assumed to be in
1802 1812 ascending order. Also accepts an optional already-open file handle
1803 1813 to be used for reading. If used, the seek position of the file will
1804 1814 not be preserved.
1805 1815
1806 1816 This function is similar to calling ``self._chunk()`` multiple times,
1807 1817 but is faster.
1808 1818
1809 1819 Returns a list with decompressed data for each requested revision.
1810 1820 """
1811 1821 if not revs:
1812 1822 return []
1813 1823 start = self.start
1814 1824 length = self.length
1815 1825 inline = self._inline
1816 1826 iosize = self.index.entry_size
1817 1827 buffer = util.buffer
1818 1828
1819 1829 l = []
1820 1830 ladd = l.append
1821 1831
1822 1832 if not self._withsparseread:
1823 1833 slicedchunks = (revs,)
1824 1834 else:
1825 1835 slicedchunks = deltautil.slicechunk(
1826 1836 self, revs, targetsize=targetsize
1827 1837 )
1828 1838
1829 1839 for revschunk in slicedchunks:
1830 1840 firstrev = revschunk[0]
1831 1841 # Skip trailing revisions with empty diff
1832 1842 for lastrev in revschunk[::-1]:
1833 1843 if length(lastrev) != 0:
1834 1844 break
1835 1845
1836 1846 try:
1837 1847 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1838 1848 except OverflowError:
1839 1849 # issue4215 - we can't cache a run of chunks greater than
1840 1850 # 2G on Windows
1841 1851 return [self._chunk(rev, df=df) for rev in revschunk]
1842 1852
1843 1853 decomp = self.decompress
1844 1854 # self._decompressor might be None, but will not be used in that case
1845 1855 def_decomp = self._decompressor
1846 1856 for rev in revschunk:
1847 1857 chunkstart = start(rev)
1848 1858 if inline:
1849 1859 chunkstart += (rev + 1) * iosize
1850 1860 chunklength = length(rev)
1851 1861 comp_mode = self.index[rev][10]
1852 1862 c = buffer(data, chunkstart - offset, chunklength)
1853 1863 if comp_mode == COMP_MODE_PLAIN:
1854 1864 ladd(c)
1855 1865 elif comp_mode == COMP_MODE_INLINE:
1856 1866 ladd(decomp(c))
1857 1867 elif comp_mode == COMP_MODE_DEFAULT:
1858 1868 ladd(def_decomp(c))
1859 1869 else:
1860 1870 msg = b'unknown compression mode %d'
1861 1871 msg %= comp_mode
1862 1872 raise error.RevlogError(msg)
1863 1873
1864 1874 return l
1865 1875
1866 1876 def deltaparent(self, rev):
1867 1877 """return deltaparent of the given revision"""
1868 1878 base = self.index[rev][3]
1869 1879 if base == rev:
1870 1880 return nullrev
1871 1881 elif self._generaldelta:
1872 1882 return base
1873 1883 else:
1874 1884 return rev - 1
1875 1885
1876 1886 def issnapshot(self, rev):
1877 1887 """tells whether rev is a snapshot"""
1878 1888 if not self._sparserevlog:
1879 1889 return self.deltaparent(rev) == nullrev
1880 1890 elif util.safehasattr(self.index, 'issnapshot'):
1881 1891 # directly assign the method to cache the testing and access
1882 1892 self.issnapshot = self.index.issnapshot
1883 1893 return self.issnapshot(rev)
1884 1894 if rev == nullrev:
1885 1895 return True
1886 1896 entry = self.index[rev]
1887 1897 base = entry[3]
1888 1898 if base == rev:
1889 1899 return True
1890 1900 if base == nullrev:
1891 1901 return True
1892 1902 p1 = entry[5]
1893 1903 while self.length(p1) == 0:
1894 1904 b = self.deltaparent(p1)
1895 1905 if b == p1:
1896 1906 break
1897 1907 p1 = b
1898 1908 p2 = entry[6]
1899 1909 while self.length(p2) == 0:
1900 1910 b = self.deltaparent(p2)
1901 1911 if b == p2:
1902 1912 break
1903 1913 p2 = b
1904 1914 if base == p1 or base == p2:
1905 1915 return False
1906 1916 return self.issnapshot(base)
1907 1917
1908 1918 def snapshotdepth(self, rev):
1909 1919 """number of snapshot in the chain before this one"""
1910 1920 if not self.issnapshot(rev):
1911 1921 raise error.ProgrammingError(b'revision %d not a snapshot')
1912 1922 return len(self._deltachain(rev)[0]) - 1
1913 1923
1914 1924 def revdiff(self, rev1, rev2):
1915 1925 """return or calculate a delta between two revisions
1916 1926
1917 1927 The delta calculated is in binary form and is intended to be written to
1918 1928 revlog data directly. So this function needs raw revision data.
1919 1929 """
1920 1930 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1921 1931 return bytes(self._chunk(rev2))
1922 1932
1923 1933 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1924 1934
1925 1935 def revision(self, nodeorrev, _df=None):
1926 1936 """return an uncompressed revision of a given node or revision
1927 1937 number.
1928 1938
1929 1939 _df - an existing file handle to read from. (internal-only)
1930 1940 """
1931 1941 return self._revisiondata(nodeorrev, _df)
1932 1942
1933 1943 def sidedata(self, nodeorrev, _df=None):
1934 1944 """a map of extra data related to the changeset but not part of the hash
1935 1945
1936 1946 This function currently return a dictionary. However, more advanced
1937 1947 mapping object will likely be used in the future for a more
1938 1948 efficient/lazy code.
1939 1949 """
1940 1950 # deal with <nodeorrev> argument type
1941 1951 if isinstance(nodeorrev, int):
1942 1952 rev = nodeorrev
1943 1953 else:
1944 1954 rev = self.rev(nodeorrev)
1945 1955 return self._sidedata(rev)
1946 1956
1947 1957 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1948 1958 # deal with <nodeorrev> argument type
1949 1959 if isinstance(nodeorrev, int):
1950 1960 rev = nodeorrev
1951 1961 node = self.node(rev)
1952 1962 else:
1953 1963 node = nodeorrev
1954 1964 rev = None
1955 1965
1956 1966 # fast path the special `nullid` rev
1957 1967 if node == self.nullid:
1958 1968 return b""
1959 1969
1960 1970 # ``rawtext`` is the text as stored inside the revlog. Might be the
1961 1971 # revision or might need to be processed to retrieve the revision.
1962 1972 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1963 1973
1964 1974 if raw and validated:
1965 1975 # if we don't want to process the raw text and that raw
1966 1976 # text is cached, we can exit early.
1967 1977 return rawtext
1968 1978 if rev is None:
1969 1979 rev = self.rev(node)
1970 1980 # the revlog's flag for this revision
1971 1981 # (usually alter its state or content)
1972 1982 flags = self.flags(rev)
1973 1983
1974 1984 if validated and flags == REVIDX_DEFAULT_FLAGS:
1975 1985 # no extra flags set, no flag processor runs, text = rawtext
1976 1986 return rawtext
1977 1987
1978 1988 if raw:
1979 1989 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1980 1990 text = rawtext
1981 1991 else:
1982 1992 r = flagutil.processflagsread(self, rawtext, flags)
1983 1993 text, validatehash = r
1984 1994 if validatehash:
1985 1995 self.checkhash(text, node, rev=rev)
1986 1996 if not validated:
1987 1997 self._revisioncache = (node, rev, rawtext)
1988 1998
1989 1999 return text
1990 2000
1991 2001 def _rawtext(self, node, rev, _df=None):
1992 2002 """return the possibly unvalidated rawtext for a revision
1993 2003
1994 2004 returns (rev, rawtext, validated)
1995 2005 """
1996 2006
1997 2007 # revision in the cache (could be useful to apply delta)
1998 2008 cachedrev = None
1999 2009 # An intermediate text to apply deltas to
2000 2010 basetext = None
2001 2011
2002 2012 # Check if we have the entry in cache
2003 2013 # The cache entry looks like (node, rev, rawtext)
2004 2014 if self._revisioncache:
2005 2015 if self._revisioncache[0] == node:
2006 2016 return (rev, self._revisioncache[2], True)
2007 2017 cachedrev = self._revisioncache[1]
2008 2018
2009 2019 if rev is None:
2010 2020 rev = self.rev(node)
2011 2021
2012 2022 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2013 2023 if stopped:
2014 2024 basetext = self._revisioncache[2]
2015 2025
2016 2026 # drop cache to save memory, the caller is expected to
2017 2027 # update self._revisioncache after validating the text
2018 2028 self._revisioncache = None
2019 2029
2020 2030 targetsize = None
2021 2031 rawsize = self.index[rev][2]
2022 2032 if 0 <= rawsize:
2023 2033 targetsize = 4 * rawsize
2024 2034
2025 2035 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2026 2036 if basetext is None:
2027 2037 basetext = bytes(bins[0])
2028 2038 bins = bins[1:]
2029 2039
2030 2040 rawtext = mdiff.patches(basetext, bins)
2031 2041 del basetext # let us have a chance to free memory early
2032 2042 return (rev, rawtext, False)
2033 2043
2034 2044 def _sidedata(self, rev):
2035 2045 """Return the sidedata for a given revision number."""
2036 2046 index_entry = self.index[rev]
2037 2047 sidedata_offset = index_entry[8]
2038 2048 sidedata_size = index_entry[9]
2039 2049
2040 2050 if self._inline:
2041 2051 sidedata_offset += self.index.entry_size * (1 + rev)
2042 2052 if sidedata_size == 0:
2043 2053 return {}
2044 2054
2045 2055 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2046 2056 filename = self._sidedatafile
2047 2057 end = self._docket.sidedata_end
2048 2058 offset = sidedata_offset
2049 2059 length = sidedata_size
2050 2060 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2051 2061 raise error.RevlogError(m)
2052 2062
2053 2063 comp_segment = self._segmentfile_sidedata.read_chunk(
2054 2064 sidedata_offset, sidedata_size
2055 2065 )
2056 2066
2057 2067 comp = self.index[rev][11]
2058 2068 if comp == COMP_MODE_PLAIN:
2059 2069 segment = comp_segment
2060 2070 elif comp == COMP_MODE_DEFAULT:
2061 2071 segment = self._decompressor(comp_segment)
2062 2072 elif comp == COMP_MODE_INLINE:
2063 2073 segment = self.decompress(comp_segment)
2064 2074 else:
2065 2075 msg = b'unknown compression mode %d'
2066 2076 msg %= comp
2067 2077 raise error.RevlogError(msg)
2068 2078
2069 2079 sidedata = sidedatautil.deserialize_sidedata(segment)
2070 2080 return sidedata
2071 2081
2072 2082 def rawdata(self, nodeorrev, _df=None):
2073 2083 """return an uncompressed raw data of a given node or revision number.
2074 2084
2075 2085 _df - an existing file handle to read from. (internal-only)
2076 2086 """
2077 2087 return self._revisiondata(nodeorrev, _df, raw=True)
2078 2088
2079 2089 def hash(self, text, p1, p2):
2080 2090 """Compute a node hash.
2081 2091
2082 2092 Available as a function so that subclasses can replace the hash
2083 2093 as needed.
2084 2094 """
2085 2095 return storageutil.hashrevisionsha1(text, p1, p2)
2086 2096
2087 2097 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2088 2098 """Check node hash integrity.
2089 2099
2090 2100 Available as a function so that subclasses can extend hash mismatch
2091 2101 behaviors as needed.
2092 2102 """
2093 2103 try:
2094 2104 if p1 is None and p2 is None:
2095 2105 p1, p2 = self.parents(node)
2096 2106 if node != self.hash(text, p1, p2):
2097 2107 # Clear the revision cache on hash failure. The revision cache
2098 2108 # only stores the raw revision and clearing the cache does have
2099 2109 # the side-effect that we won't have a cache hit when the raw
2100 2110 # revision data is accessed. But this case should be rare and
2101 2111 # it is extra work to teach the cache about the hash
2102 2112 # verification state.
2103 2113 if self._revisioncache and self._revisioncache[0] == node:
2104 2114 self._revisioncache = None
2105 2115
2106 2116 revornode = rev
2107 2117 if revornode is None:
2108 2118 revornode = templatefilters.short(hex(node))
2109 2119 raise error.RevlogError(
2110 2120 _(b"integrity check failed on %s:%s")
2111 2121 % (self.display_id, pycompat.bytestr(revornode))
2112 2122 )
2113 2123 except error.RevlogError:
2114 2124 if self._censorable and storageutil.iscensoredtext(text):
2115 2125 raise error.CensoredNodeError(self.display_id, node, text)
2116 2126 raise
2117 2127
2118 2128 def _enforceinlinesize(self, tr, side_write=True):
2119 2129 """Check if the revlog is too big for inline and convert if so.
2120 2130
2121 2131 This should be called after revisions are added to the revlog. If the
2122 2132 revlog has grown too large to be an inline revlog, it will convert it
2123 2133 to use multiple index and data files.
2124 2134 """
2125 2135 tiprev = len(self) - 1
2126 2136 total_size = self.start(tiprev) + self.length(tiprev)
2127 2137 if not self._inline or total_size < _maxinline:
2128 2138 return
2129 2139
2130 2140 troffset = tr.findoffset(self._indexfile)
2131 2141 if troffset is None:
2132 2142 raise error.RevlogError(
2133 2143 _(b"%s not found in the transaction") % self._indexfile
2134 2144 )
2135 2145 if troffset:
2136 2146 tr.addbackup(self._indexfile, for_offset=True)
2137 2147 tr.add(self._datafile, 0)
2138 2148
2139 2149 existing_handles = False
2140 2150 if self._writinghandles is not None:
2141 2151 existing_handles = True
2142 2152 fp = self._writinghandles[0]
2143 2153 fp.flush()
2144 2154 fp.close()
2145 2155 # We can't use the cached file handle after close(). So prevent
2146 2156 # its usage.
2147 2157 self._writinghandles = None
2148 2158 self._segmentfile.writing_handle = None
2149 2159 # No need to deal with sidedata writing handle as it is only
2150 2160 # relevant with revlog-v2 which is never inline, not reaching
2151 2161 # this code
2152 2162 if side_write:
2153 2163 old_index_file_path = self._indexfile
2154 2164 new_index_file_path = self._indexfile + b'.s'
2155 2165 opener = self.opener
2156 2166 weak_self = weakref.ref(self)
2157 2167
2158 2168 # the "split" index replace the real index when the transaction is finalized
2159 2169 def finalize_callback(tr):
2160 2170 opener.rename(
2161 2171 new_index_file_path,
2162 2172 old_index_file_path,
2163 2173 checkambig=True,
2164 2174 )
2165 2175 maybe_self = weak_self()
2166 2176 if maybe_self is not None:
2167 2177 maybe_self._indexfile = old_index_file_path
2168 2178
2169 2179 def abort_callback(tr):
2170 2180 maybe_self = weak_self()
2171 2181 if maybe_self is not None:
2172 2182 maybe_self._indexfile = old_index_file_path
2173 2183
2174 2184 tr.registertmp(new_index_file_path)
2175 2185 if self.target[1] is not None:
2176 2186 callback_id = b'000-revlog-split-%d-%s' % self.target
2177 2187 else:
2178 2188 callback_id = b'000-revlog-split-%d' % self.target[0]
2179 2189 tr.addfinalize(callback_id, finalize_callback)
2180 2190 tr.addabort(callback_id, abort_callback)
2181 2191
2182 2192 new_dfh = self._datafp(b'w+')
2183 2193 new_dfh.truncate(0) # drop any potentially existing data
2184 2194 try:
2185 2195 with self._indexfp() as read_ifh:
2186 2196 for r in self:
2187 2197 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2188 2198 new_dfh.flush()
2189 2199
2190 2200 if side_write:
2191 2201 self._indexfile = new_index_file_path
2192 2202 with self.__index_new_fp() as fp:
2193 2203 self._format_flags &= ~FLAG_INLINE_DATA
2194 2204 self._inline = False
2195 2205 for i in self:
2196 2206 e = self.index.entry_binary(i)
2197 2207 if i == 0 and self._docket is None:
2198 2208 header = self._format_flags | self._format_version
2199 2209 header = self.index.pack_header(header)
2200 2210 e = header + e
2201 2211 fp.write(e)
2202 2212 if self._docket is not None:
2203 2213 self._docket.index_end = fp.tell()
2204 2214
2205 2215 # If we don't use side-write, the temp file replace the real
2206 2216 # index when we exit the context manager
2207 2217
2208 2218 nodemaputil.setup_persistent_nodemap(tr, self)
2209 2219 self._segmentfile = randomaccessfile.randomaccessfile(
2210 2220 self.opener,
2211 2221 self._datafile,
2212 2222 self._chunkcachesize,
2213 2223 )
2214 2224
2215 2225 if existing_handles:
2216 2226 # switched from inline to conventional reopen the index
2217 2227 ifh = self.__index_write_fp()
2218 2228 self._writinghandles = (ifh, new_dfh, None)
2219 2229 self._segmentfile.writing_handle = new_dfh
2220 2230 new_dfh = None
2221 2231 # No need to deal with sidedata writing handle as it is only
2222 2232 # relevant with revlog-v2 which is never inline, not reaching
2223 2233 # this code
2224 2234 finally:
2225 2235 if new_dfh is not None:
2226 2236 new_dfh.close()
2227 2237
2228 2238 def _nodeduplicatecallback(self, transaction, node):
2229 2239 """called when trying to add a node already stored."""
2230 2240
2231 2241 @contextlib.contextmanager
2232 2242 def reading(self):
2233 2243 """Context manager that keeps data and sidedata files open for reading"""
2234 2244 with self._segmentfile.reading():
2235 2245 with self._segmentfile_sidedata.reading():
2236 2246 yield
2237 2247
2238 2248 @contextlib.contextmanager
2239 2249 def _writing(self, transaction):
2240 2250 if self._trypending:
2241 2251 msg = b'try to write in a `trypending` revlog: %s'
2242 2252 msg %= self.display_id
2243 2253 raise error.ProgrammingError(msg)
2244 2254 if self._writinghandles is not None:
2245 2255 yield
2246 2256 else:
2247 2257 ifh = dfh = sdfh = None
2248 2258 try:
2249 2259 r = len(self)
2250 2260 # opening the data file.
2251 2261 dsize = 0
2252 2262 if r:
2253 2263 dsize = self.end(r - 1)
2254 2264 dfh = None
2255 2265 if not self._inline:
2256 2266 try:
2257 2267 dfh = self._datafp(b"r+")
2258 2268 if self._docket is None:
2259 2269 dfh.seek(0, os.SEEK_END)
2260 2270 else:
2261 2271 dfh.seek(self._docket.data_end, os.SEEK_SET)
2262 2272 except FileNotFoundError:
2263 2273 dfh = self._datafp(b"w+")
2264 2274 transaction.add(self._datafile, dsize)
2265 2275 if self._sidedatafile is not None:
2266 2276 # revlog-v2 does not inline, help Pytype
2267 2277 assert dfh is not None
2268 2278 try:
2269 2279 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2270 2280 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2271 2281 except FileNotFoundError:
2272 2282 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2273 2283 transaction.add(
2274 2284 self._sidedatafile, self._docket.sidedata_end
2275 2285 )
2276 2286
2277 2287 # opening the index file.
2278 2288 isize = r * self.index.entry_size
2279 2289 ifh = self.__index_write_fp()
2280 2290 if self._inline:
2281 2291 transaction.add(self._indexfile, dsize + isize)
2282 2292 else:
2283 2293 transaction.add(self._indexfile, isize)
2284 2294 # exposing all file handle for writing.
2285 2295 self._writinghandles = (ifh, dfh, sdfh)
2286 2296 self._segmentfile.writing_handle = ifh if self._inline else dfh
2287 2297 self._segmentfile_sidedata.writing_handle = sdfh
2288 2298 yield
2289 2299 if self._docket is not None:
2290 2300 self._write_docket(transaction)
2291 2301 finally:
2292 2302 self._writinghandles = None
2293 2303 self._segmentfile.writing_handle = None
2294 2304 self._segmentfile_sidedata.writing_handle = None
2295 2305 if dfh is not None:
2296 2306 dfh.close()
2297 2307 if sdfh is not None:
2298 2308 sdfh.close()
2299 2309 # closing the index file last to avoid exposing referent to
2300 2310 # potential unflushed data content.
2301 2311 if ifh is not None:
2302 2312 ifh.close()
2303 2313
2304 2314 def _write_docket(self, transaction):
2305 2315 """write the current docket on disk
2306 2316
2307 2317 Exist as a method to help changelog to implement transaction logic
2308 2318
2309 2319 We could also imagine using the same transaction logic for all revlog
2310 2320 since docket are cheap."""
2311 2321 self._docket.write(transaction)
2312 2322
2313 2323 def addrevision(
2314 2324 self,
2315 2325 text,
2316 2326 transaction,
2317 2327 link,
2318 2328 p1,
2319 2329 p2,
2320 2330 cachedelta=None,
2321 2331 node=None,
2322 2332 flags=REVIDX_DEFAULT_FLAGS,
2323 2333 deltacomputer=None,
2324 2334 sidedata=None,
2325 2335 ):
2326 2336 """add a revision to the log
2327 2337
2328 2338 text - the revision data to add
2329 2339 transaction - the transaction object used for rollback
2330 2340 link - the linkrev data to add
2331 2341 p1, p2 - the parent nodeids of the revision
2332 2342 cachedelta - an optional precomputed delta
2333 2343 node - nodeid of revision; typically node is not specified, and it is
2334 2344 computed by default as hash(text, p1, p2), however subclasses might
2335 2345 use different hashing method (and override checkhash() in such case)
2336 2346 flags - the known flags to set on the revision
2337 2347 deltacomputer - an optional deltacomputer instance shared between
2338 2348 multiple calls
2339 2349 """
2340 2350 if link == nullrev:
2341 2351 raise error.RevlogError(
2342 2352 _(b"attempted to add linkrev -1 to %s") % self.display_id
2343 2353 )
2344 2354
2345 2355 if sidedata is None:
2346 2356 sidedata = {}
2347 2357 elif sidedata and not self.hassidedata:
2348 2358 raise error.ProgrammingError(
2349 2359 _(b"trying to add sidedata to a revlog who don't support them")
2350 2360 )
2351 2361
2352 2362 if flags:
2353 2363 node = node or self.hash(text, p1, p2)
2354 2364
2355 2365 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2356 2366
2357 2367 # If the flag processor modifies the revision data, ignore any provided
2358 2368 # cachedelta.
2359 2369 if rawtext != text:
2360 2370 cachedelta = None
2361 2371
2362 2372 if len(rawtext) > _maxentrysize:
2363 2373 raise error.RevlogError(
2364 2374 _(
2365 2375 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2366 2376 )
2367 2377 % (self.display_id, len(rawtext))
2368 2378 )
2369 2379
2370 2380 node = node or self.hash(rawtext, p1, p2)
2371 2381 rev = self.index.get_rev(node)
2372 2382 if rev is not None:
2373 2383 return rev
2374 2384
2375 2385 if validatehash:
2376 2386 self.checkhash(rawtext, node, p1=p1, p2=p2)
2377 2387
2378 2388 return self.addrawrevision(
2379 2389 rawtext,
2380 2390 transaction,
2381 2391 link,
2382 2392 p1,
2383 2393 p2,
2384 2394 node,
2385 2395 flags,
2386 2396 cachedelta=cachedelta,
2387 2397 deltacomputer=deltacomputer,
2388 2398 sidedata=sidedata,
2389 2399 )
2390 2400
2391 2401 def addrawrevision(
2392 2402 self,
2393 2403 rawtext,
2394 2404 transaction,
2395 2405 link,
2396 2406 p1,
2397 2407 p2,
2398 2408 node,
2399 2409 flags,
2400 2410 cachedelta=None,
2401 2411 deltacomputer=None,
2402 2412 sidedata=None,
2403 2413 ):
2404 2414 """add a raw revision with known flags, node and parents
2405 2415 useful when reusing a revision not stored in this revlog (ex: received
2406 2416 over wire, or read from an external bundle).
2407 2417 """
2408 2418 with self._writing(transaction):
2409 2419 return self._addrevision(
2410 2420 node,
2411 2421 rawtext,
2412 2422 transaction,
2413 2423 link,
2414 2424 p1,
2415 2425 p2,
2416 2426 flags,
2417 2427 cachedelta,
2418 2428 deltacomputer=deltacomputer,
2419 2429 sidedata=sidedata,
2420 2430 )
2421 2431
2422 2432 def compress(self, data):
2423 2433 """Generate a possibly-compressed representation of data."""
2424 2434 if not data:
2425 2435 return b'', data
2426 2436
2427 2437 compressed = self._compressor.compress(data)
2428 2438
2429 2439 if compressed:
2430 2440 # The revlog compressor added the header in the returned data.
2431 2441 return b'', compressed
2432 2442
2433 2443 if data[0:1] == b'\0':
2434 2444 return b'', data
2435 2445 return b'u', data
2436 2446
2437 2447 def decompress(self, data):
2438 2448 """Decompress a revlog chunk.
2439 2449
2440 2450 The chunk is expected to begin with a header identifying the
2441 2451 format type so it can be routed to an appropriate decompressor.
2442 2452 """
2443 2453 if not data:
2444 2454 return data
2445 2455
2446 2456 # Revlogs are read much more frequently than they are written and many
2447 2457 # chunks only take microseconds to decompress, so performance is
2448 2458 # important here.
2449 2459 #
2450 2460 # We can make a few assumptions about revlogs:
2451 2461 #
2452 2462 # 1) the majority of chunks will be compressed (as opposed to inline
2453 2463 # raw data).
2454 2464 # 2) decompressing *any* data will likely by at least 10x slower than
2455 2465 # returning raw inline data.
2456 2466 # 3) we want to prioritize common and officially supported compression
2457 2467 # engines
2458 2468 #
2459 2469 # It follows that we want to optimize for "decompress compressed data
2460 2470 # when encoded with common and officially supported compression engines"
2461 2471 # case over "raw data" and "data encoded by less common or non-official
2462 2472 # compression engines." That is why we have the inline lookup first
2463 2473 # followed by the compengines lookup.
2464 2474 #
2465 2475 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2466 2476 # compressed chunks. And this matters for changelog and manifest reads.
2467 2477 t = data[0:1]
2468 2478
2469 2479 if t == b'x':
2470 2480 try:
2471 2481 return _zlibdecompress(data)
2472 2482 except zlib.error as e:
2473 2483 raise error.RevlogError(
2474 2484 _(b'revlog decompress error: %s')
2475 2485 % stringutil.forcebytestr(e)
2476 2486 )
2477 2487 # '\0' is more common than 'u' so it goes first.
2478 2488 elif t == b'\0':
2479 2489 return data
2480 2490 elif t == b'u':
2481 2491 return util.buffer(data, 1)
2482 2492
2483 2493 compressor = self._get_decompressor(t)
2484 2494
2485 2495 return compressor.decompress(data)
2486 2496
2487 2497 def _addrevision(
2488 2498 self,
2489 2499 node,
2490 2500 rawtext,
2491 2501 transaction,
2492 2502 link,
2493 2503 p1,
2494 2504 p2,
2495 2505 flags,
2496 2506 cachedelta,
2497 2507 alwayscache=False,
2498 2508 deltacomputer=None,
2499 2509 sidedata=None,
2500 2510 ):
2501 2511 """internal function to add revisions to the log
2502 2512
2503 2513 see addrevision for argument descriptions.
2504 2514
2505 2515 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2506 2516
2507 2517 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2508 2518 be used.
2509 2519
2510 2520 invariants:
2511 2521 - rawtext is optional (can be None); if not set, cachedelta must be set.
2512 2522 if both are set, they must correspond to each other.
2513 2523 """
2514 2524 if node == self.nullid:
2515 2525 raise error.RevlogError(
2516 2526 _(b"%s: attempt to add null revision") % self.display_id
2517 2527 )
2518 2528 if (
2519 2529 node == self.nodeconstants.wdirid
2520 2530 or node in self.nodeconstants.wdirfilenodeids
2521 2531 ):
2522 2532 raise error.RevlogError(
2523 2533 _(b"%s: attempt to add wdir revision") % self.display_id
2524 2534 )
2525 2535 if self._writinghandles is None:
2526 2536 msg = b'adding revision outside `revlog._writing` context'
2527 2537 raise error.ProgrammingError(msg)
2528 2538
2529 2539 if self._inline:
2530 2540 fh = self._writinghandles[0]
2531 2541 else:
2532 2542 fh = self._writinghandles[1]
2533 2543
2534 2544 btext = [rawtext]
2535 2545
2536 2546 curr = len(self)
2537 2547 prev = curr - 1
2538 2548
2539 2549 offset = self._get_data_offset(prev)
2540 2550
2541 2551 if self._concurrencychecker:
2542 2552 ifh, dfh, sdfh = self._writinghandles
2543 2553 # XXX no checking for the sidedata file
2544 2554 if self._inline:
2545 2555 # offset is "as if" it were in the .d file, so we need to add on
2546 2556 # the size of the entry metadata.
2547 2557 self._concurrencychecker(
2548 2558 ifh, self._indexfile, offset + curr * self.index.entry_size
2549 2559 )
2550 2560 else:
2551 2561 # Entries in the .i are a consistent size.
2552 2562 self._concurrencychecker(
2553 2563 ifh, self._indexfile, curr * self.index.entry_size
2554 2564 )
2555 2565 self._concurrencychecker(dfh, self._datafile, offset)
2556 2566
2557 2567 p1r, p2r = self.rev(p1), self.rev(p2)
2558 2568
2559 2569 # full versions are inserted when the needed deltas
2560 2570 # become comparable to the uncompressed text
2561 2571 if rawtext is None:
2562 2572 # need rawtext size, before changed by flag processors, which is
2563 2573 # the non-raw size. use revlog explicitly to avoid filelog's extra
2564 2574 # logic that might remove metadata size.
2565 2575 textlen = mdiff.patchedsize(
2566 2576 revlog.size(self, cachedelta[0]), cachedelta[1]
2567 2577 )
2568 2578 else:
2569 2579 textlen = len(rawtext)
2570 2580
2571 2581 if deltacomputer is None:
2572 2582 write_debug = None
2573 2583 if self._debug_delta:
2574 2584 write_debug = transaction._report
2575 2585 deltacomputer = deltautil.deltacomputer(
2576 2586 self, write_debug=write_debug
2577 2587 )
2578 2588
2579 2589 if cachedelta is not None and len(cachedelta) == 2:
2580 2590 # If the cached delta has no information about how it should be
2581 2591 # reused, add the default reuse instruction according to the
2582 2592 # revlog's configuration.
2583 2593 if self._generaldelta and self._lazydeltabase:
2584 2594 delta_base_reuse = DELTA_BASE_REUSE_TRY
2585 2595 else:
2586 2596 delta_base_reuse = DELTA_BASE_REUSE_NO
2587 2597 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2588 2598
2589 2599 revinfo = revlogutils.revisioninfo(
2590 2600 node,
2591 2601 p1,
2592 2602 p2,
2593 2603 btext,
2594 2604 textlen,
2595 2605 cachedelta,
2596 2606 flags,
2597 2607 )
2598 2608
2599 2609 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2600 2610
2601 2611 compression_mode = COMP_MODE_INLINE
2602 2612 if self._docket is not None:
2603 2613 default_comp = self._docket.default_compression_header
2604 2614 r = deltautil.delta_compression(default_comp, deltainfo)
2605 2615 compression_mode, deltainfo = r
2606 2616
2607 2617 sidedata_compression_mode = COMP_MODE_INLINE
2608 2618 if sidedata and self.hassidedata:
2609 2619 sidedata_compression_mode = COMP_MODE_PLAIN
2610 2620 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2611 2621 sidedata_offset = self._docket.sidedata_end
2612 2622 h, comp_sidedata = self.compress(serialized_sidedata)
2613 2623 if (
2614 2624 h != b'u'
2615 2625 and comp_sidedata[0:1] != b'\0'
2616 2626 and len(comp_sidedata) < len(serialized_sidedata)
2617 2627 ):
2618 2628 assert not h
2619 2629 if (
2620 2630 comp_sidedata[0:1]
2621 2631 == self._docket.default_compression_header
2622 2632 ):
2623 2633 sidedata_compression_mode = COMP_MODE_DEFAULT
2624 2634 serialized_sidedata = comp_sidedata
2625 2635 else:
2626 2636 sidedata_compression_mode = COMP_MODE_INLINE
2627 2637 serialized_sidedata = comp_sidedata
2628 2638 else:
2629 2639 serialized_sidedata = b""
2630 2640 # Don't store the offset if the sidedata is empty, that way
2631 2641 # we can easily detect empty sidedata and they will be no different
2632 2642 # than ones we manually add.
2633 2643 sidedata_offset = 0
2634 2644
2635 2645 rank = RANK_UNKNOWN
2636 2646 if self._compute_rank:
2637 2647 if (p1r, p2r) == (nullrev, nullrev):
2638 2648 rank = 1
2639 2649 elif p1r != nullrev and p2r == nullrev:
2640 2650 rank = 1 + self.fast_rank(p1r)
2641 2651 elif p1r == nullrev and p2r != nullrev:
2642 2652 rank = 1 + self.fast_rank(p2r)
2643 2653 else: # merge node
2644 2654 if rustdagop is not None and self.index.rust_ext_compat:
2645 2655 rank = rustdagop.rank(self.index, p1r, p2r)
2646 2656 else:
2647 2657 pmin, pmax = sorted((p1r, p2r))
2648 2658 rank = 1 + self.fast_rank(pmax)
2649 2659 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2650 2660
2651 2661 e = revlogutils.entry(
2652 2662 flags=flags,
2653 2663 data_offset=offset,
2654 2664 data_compressed_length=deltainfo.deltalen,
2655 2665 data_uncompressed_length=textlen,
2656 2666 data_compression_mode=compression_mode,
2657 2667 data_delta_base=deltainfo.base,
2658 2668 link_rev=link,
2659 2669 parent_rev_1=p1r,
2660 2670 parent_rev_2=p2r,
2661 2671 node_id=node,
2662 2672 sidedata_offset=sidedata_offset,
2663 2673 sidedata_compressed_length=len(serialized_sidedata),
2664 2674 sidedata_compression_mode=sidedata_compression_mode,
2665 2675 rank=rank,
2666 2676 )
2667 2677
2668 2678 self.index.append(e)
2669 2679 entry = self.index.entry_binary(curr)
2670 2680 if curr == 0 and self._docket is None:
2671 2681 header = self._format_flags | self._format_version
2672 2682 header = self.index.pack_header(header)
2673 2683 entry = header + entry
2674 2684 self._writeentry(
2675 2685 transaction,
2676 2686 entry,
2677 2687 deltainfo.data,
2678 2688 link,
2679 2689 offset,
2680 2690 serialized_sidedata,
2681 2691 sidedata_offset,
2682 2692 )
2683 2693
2684 2694 rawtext = btext[0]
2685 2695
2686 2696 if alwayscache and rawtext is None:
2687 2697 rawtext = deltacomputer.buildtext(revinfo, fh)
2688 2698
2689 2699 if type(rawtext) == bytes: # only accept immutable objects
2690 2700 self._revisioncache = (node, curr, rawtext)
2691 2701 self._chainbasecache[curr] = deltainfo.chainbase
2692 2702 return curr
2693 2703
2694 2704 def _get_data_offset(self, prev):
2695 2705 """Returns the current offset in the (in-transaction) data file.
2696 2706 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2697 2707 file to store that information: since sidedata can be rewritten to the
2698 2708 end of the data file within a transaction, you can have cases where, for
2699 2709 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2700 2710 to `n - 1`'s sidedata being written after `n`'s data.
2701 2711
2702 2712 TODO cache this in a docket file before getting out of experimental."""
2703 2713 if self._docket is None:
2704 2714 return self.end(prev)
2705 2715 else:
2706 2716 return self._docket.data_end
2707 2717
2708 2718 def _writeentry(
2709 2719 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2710 2720 ):
2711 2721 # Files opened in a+ mode have inconsistent behavior on various
2712 2722 # platforms. Windows requires that a file positioning call be made
2713 2723 # when the file handle transitions between reads and writes. See
2714 2724 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2715 2725 # platforms, Python or the platform itself can be buggy. Some versions
2716 2726 # of Solaris have been observed to not append at the end of the file
2717 2727 # if the file was seeked to before the end. See issue4943 for more.
2718 2728 #
2719 2729 # We work around this issue by inserting a seek() before writing.
2720 2730 # Note: This is likely not necessary on Python 3. However, because
2721 2731 # the file handle is reused for reads and may be seeked there, we need
2722 2732 # to be careful before changing this.
2723 2733 if self._writinghandles is None:
2724 2734 msg = b'adding revision outside `revlog._writing` context'
2725 2735 raise error.ProgrammingError(msg)
2726 2736 ifh, dfh, sdfh = self._writinghandles
2727 2737 if self._docket is None:
2728 2738 ifh.seek(0, os.SEEK_END)
2729 2739 else:
2730 2740 ifh.seek(self._docket.index_end, os.SEEK_SET)
2731 2741 if dfh:
2732 2742 if self._docket is None:
2733 2743 dfh.seek(0, os.SEEK_END)
2734 2744 else:
2735 2745 dfh.seek(self._docket.data_end, os.SEEK_SET)
2736 2746 if sdfh:
2737 2747 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2738 2748
2739 2749 curr = len(self) - 1
2740 2750 if not self._inline:
2741 2751 transaction.add(self._datafile, offset)
2742 2752 if self._sidedatafile:
2743 2753 transaction.add(self._sidedatafile, sidedata_offset)
2744 2754 transaction.add(self._indexfile, curr * len(entry))
2745 2755 if data[0]:
2746 2756 dfh.write(data[0])
2747 2757 dfh.write(data[1])
2748 2758 if sidedata:
2749 2759 sdfh.write(sidedata)
2750 2760 ifh.write(entry)
2751 2761 else:
2752 2762 offset += curr * self.index.entry_size
2753 2763 transaction.add(self._indexfile, offset)
2754 2764 ifh.write(entry)
2755 2765 ifh.write(data[0])
2756 2766 ifh.write(data[1])
2757 2767 assert not sidedata
2758 2768 self._enforceinlinesize(transaction)
2759 2769 if self._docket is not None:
2760 2770 # revlog-v2 always has 3 writing handles, help Pytype
2761 2771 wh1 = self._writinghandles[0]
2762 2772 wh2 = self._writinghandles[1]
2763 2773 wh3 = self._writinghandles[2]
2764 2774 assert wh1 is not None
2765 2775 assert wh2 is not None
2766 2776 assert wh3 is not None
2767 2777 self._docket.index_end = wh1.tell()
2768 2778 self._docket.data_end = wh2.tell()
2769 2779 self._docket.sidedata_end = wh3.tell()
2770 2780
2771 2781 nodemaputil.setup_persistent_nodemap(transaction, self)
2772 2782
2773 2783 def addgroup(
2774 2784 self,
2775 2785 deltas,
2776 2786 linkmapper,
2777 2787 transaction,
2778 2788 alwayscache=False,
2779 2789 addrevisioncb=None,
2780 2790 duplicaterevisioncb=None,
2781 2791 debug_info=None,
2782 2792 delta_base_reuse_policy=None,
2783 2793 ):
2784 2794 """
2785 2795 add a delta group
2786 2796
2787 2797 given a set of deltas, add them to the revision log. the
2788 2798 first delta is against its parent, which should be in our
2789 2799 log, the rest are against the previous delta.
2790 2800
2791 2801 If ``addrevisioncb`` is defined, it will be called with arguments of
2792 2802 this revlog and the node that was added.
2793 2803 """
2794 2804
2795 2805 if self._adding_group:
2796 2806 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2797 2807
2798 2808 # read the default delta-base reuse policy from revlog config if the
2799 2809 # group did not specify one.
2800 2810 if delta_base_reuse_policy is None:
2801 2811 if self._generaldelta and self._lazydeltabase:
2802 2812 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2803 2813 else:
2804 2814 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2805 2815
2806 2816 self._adding_group = True
2807 2817 empty = True
2808 2818 try:
2809 2819 with self._writing(transaction):
2810 2820 write_debug = None
2811 2821 if self._debug_delta:
2812 2822 write_debug = transaction._report
2813 2823 deltacomputer = deltautil.deltacomputer(
2814 2824 self,
2815 2825 write_debug=write_debug,
2816 2826 debug_info=debug_info,
2817 2827 )
2818 2828 # loop through our set of deltas
2819 2829 for data in deltas:
2820 2830 (
2821 2831 node,
2822 2832 p1,
2823 2833 p2,
2824 2834 linknode,
2825 2835 deltabase,
2826 2836 delta,
2827 2837 flags,
2828 2838 sidedata,
2829 2839 ) = data
2830 2840 link = linkmapper(linknode)
2831 2841 flags = flags or REVIDX_DEFAULT_FLAGS
2832 2842
2833 2843 rev = self.index.get_rev(node)
2834 2844 if rev is not None:
2835 2845 # this can happen if two branches make the same change
2836 2846 self._nodeduplicatecallback(transaction, rev)
2837 2847 if duplicaterevisioncb:
2838 2848 duplicaterevisioncb(self, rev)
2839 2849 empty = False
2840 2850 continue
2841 2851
2842 2852 for p in (p1, p2):
2843 2853 if not self.index.has_node(p):
2844 2854 raise error.LookupError(
2845 2855 p, self.radix, _(b'unknown parent')
2846 2856 )
2847 2857
2848 2858 if not self.index.has_node(deltabase):
2849 2859 raise error.LookupError(
2850 2860 deltabase, self.display_id, _(b'unknown delta base')
2851 2861 )
2852 2862
2853 2863 baserev = self.rev(deltabase)
2854 2864
2855 2865 if baserev != nullrev and self.iscensored(baserev):
2856 2866 # if base is censored, delta must be full replacement in a
2857 2867 # single patch operation
2858 2868 hlen = struct.calcsize(b">lll")
2859 2869 oldlen = self.rawsize(baserev)
2860 2870 newlen = len(delta) - hlen
2861 2871 if delta[:hlen] != mdiff.replacediffheader(
2862 2872 oldlen, newlen
2863 2873 ):
2864 2874 raise error.CensoredBaseError(
2865 2875 self.display_id, self.node(baserev)
2866 2876 )
2867 2877
2868 2878 if not flags and self._peek_iscensored(baserev, delta):
2869 2879 flags |= REVIDX_ISCENSORED
2870 2880
2871 2881 # We assume consumers of addrevisioncb will want to retrieve
2872 2882 # the added revision, which will require a call to
2873 2883 # revision(). revision() will fast path if there is a cache
2874 2884 # hit. So, we tell _addrevision() to always cache in this case.
2875 2885 # We're only using addgroup() in the context of changegroup
2876 2886 # generation so the revision data can always be handled as raw
2877 2887 # by the flagprocessor.
2878 2888 rev = self._addrevision(
2879 2889 node,
2880 2890 None,
2881 2891 transaction,
2882 2892 link,
2883 2893 p1,
2884 2894 p2,
2885 2895 flags,
2886 2896 (baserev, delta, delta_base_reuse_policy),
2887 2897 alwayscache=alwayscache,
2888 2898 deltacomputer=deltacomputer,
2889 2899 sidedata=sidedata,
2890 2900 )
2891 2901
2892 2902 if addrevisioncb:
2893 2903 addrevisioncb(self, rev)
2894 2904 empty = False
2895 2905 finally:
2896 2906 self._adding_group = False
2897 2907 return not empty
2898 2908
2899 2909 def iscensored(self, rev):
2900 2910 """Check if a file revision is censored."""
2901 2911 if not self._censorable:
2902 2912 return False
2903 2913
2904 2914 return self.flags(rev) & REVIDX_ISCENSORED
2905 2915
2906 2916 def _peek_iscensored(self, baserev, delta):
2907 2917 """Quickly check if a delta produces a censored revision."""
2908 2918 if not self._censorable:
2909 2919 return False
2910 2920
2911 2921 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2912 2922
2913 2923 def getstrippoint(self, minlink):
2914 2924 """find the minimum rev that must be stripped to strip the linkrev
2915 2925
2916 2926 Returns a tuple containing the minimum rev and a set of all revs that
2917 2927 have linkrevs that will be broken by this strip.
2918 2928 """
2919 2929 return storageutil.resolvestripinfo(
2920 2930 minlink,
2921 2931 len(self) - 1,
2922 2932 self.headrevs(),
2923 2933 self.linkrev,
2924 2934 self.parentrevs,
2925 2935 )
2926 2936
2927 2937 def strip(self, minlink, transaction):
2928 2938 """truncate the revlog on the first revision with a linkrev >= minlink
2929 2939
2930 2940 This function is called when we're stripping revision minlink and
2931 2941 its descendants from the repository.
2932 2942
2933 2943 We have to remove all revisions with linkrev >= minlink, because
2934 2944 the equivalent changelog revisions will be renumbered after the
2935 2945 strip.
2936 2946
2937 2947 So we truncate the revlog on the first of these revisions, and
2938 2948 trust that the caller has saved the revisions that shouldn't be
2939 2949 removed and that it'll re-add them after this truncation.
2940 2950 """
2941 2951 if len(self) == 0:
2942 2952 return
2943 2953
2944 2954 rev, _ = self.getstrippoint(minlink)
2945 2955 if rev == len(self):
2946 2956 return
2947 2957
2948 2958 # first truncate the files on disk
2949 2959 data_end = self.start(rev)
2950 2960 if not self._inline:
2951 2961 transaction.add(self._datafile, data_end)
2952 2962 end = rev * self.index.entry_size
2953 2963 else:
2954 2964 end = data_end + (rev * self.index.entry_size)
2955 2965
2956 2966 if self._sidedatafile:
2957 2967 sidedata_end = self.sidedata_cut_off(rev)
2958 2968 transaction.add(self._sidedatafile, sidedata_end)
2959 2969
2960 2970 transaction.add(self._indexfile, end)
2961 2971 if self._docket is not None:
2962 2972 # XXX we could, leverage the docket while stripping. However it is
2963 2973 # not powerfull enough at the time of this comment
2964 2974 self._docket.index_end = end
2965 2975 self._docket.data_end = data_end
2966 2976 self._docket.sidedata_end = sidedata_end
2967 2977 self._docket.write(transaction, stripping=True)
2968 2978
2969 2979 # then reset internal state in memory to forget those revisions
2970 2980 self._revisioncache = None
2971 2981 self._chaininfocache = util.lrucachedict(500)
2972 2982 self._segmentfile.clear_cache()
2973 2983 self._segmentfile_sidedata.clear_cache()
2974 2984
2975 2985 del self.index[rev:-1]
2976 2986
2977 2987 def checksize(self):
2978 2988 """Check size of index and data files
2979 2989
2980 2990 return a (dd, di) tuple.
2981 2991 - dd: extra bytes for the "data" file
2982 2992 - di: extra bytes for the "index" file
2983 2993
2984 2994 A healthy revlog will return (0, 0).
2985 2995 """
2986 2996 expected = 0
2987 2997 if len(self):
2988 2998 expected = max(0, self.end(len(self) - 1))
2989 2999
2990 3000 try:
2991 3001 with self._datafp() as f:
2992 3002 f.seek(0, io.SEEK_END)
2993 3003 actual = f.tell()
2994 3004 dd = actual - expected
2995 3005 except FileNotFoundError:
2996 3006 dd = 0
2997 3007
2998 3008 try:
2999 3009 f = self.opener(self._indexfile)
3000 3010 f.seek(0, io.SEEK_END)
3001 3011 actual = f.tell()
3002 3012 f.close()
3003 3013 s = self.index.entry_size
3004 3014 i = max(0, actual // s)
3005 3015 di = actual - (i * s)
3006 3016 if self._inline:
3007 3017 databytes = 0
3008 3018 for r in self:
3009 3019 databytes += max(0, self.length(r))
3010 3020 dd = 0
3011 3021 di = actual - len(self) * s - databytes
3012 3022 except FileNotFoundError:
3013 3023 di = 0
3014 3024
3015 3025 return (dd, di)
3016 3026
3017 3027 def files(self):
3018 3028 res = [self._indexfile]
3019 3029 if self._docket_file is None:
3020 3030 if not self._inline:
3021 3031 res.append(self._datafile)
3022 3032 else:
3023 3033 res.append(self._docket_file)
3024 3034 res.extend(self._docket.old_index_filepaths(include_empty=False))
3025 3035 if self._docket.data_end:
3026 3036 res.append(self._datafile)
3027 3037 res.extend(self._docket.old_data_filepaths(include_empty=False))
3028 3038 if self._docket.sidedata_end:
3029 3039 res.append(self._sidedatafile)
3030 3040 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3031 3041 return res
3032 3042
3033 3043 def emitrevisions(
3034 3044 self,
3035 3045 nodes,
3036 3046 nodesorder=None,
3037 3047 revisiondata=False,
3038 3048 assumehaveparentrevisions=False,
3039 3049 deltamode=repository.CG_DELTAMODE_STD,
3040 3050 sidedata_helpers=None,
3041 3051 debug_info=None,
3042 3052 ):
3043 3053 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3044 3054 raise error.ProgrammingError(
3045 3055 b'unhandled value for nodesorder: %s' % nodesorder
3046 3056 )
3047 3057
3048 3058 if nodesorder is None and not self._generaldelta:
3049 3059 nodesorder = b'storage'
3050 3060
3051 3061 if (
3052 3062 not self._storedeltachains
3053 3063 and deltamode != repository.CG_DELTAMODE_PREV
3054 3064 ):
3055 3065 deltamode = repository.CG_DELTAMODE_FULL
3056 3066
3057 3067 return storageutil.emitrevisions(
3058 3068 self,
3059 3069 nodes,
3060 3070 nodesorder,
3061 3071 revlogrevisiondelta,
3062 3072 deltaparentfn=self.deltaparent,
3063 3073 candeltafn=self.candelta,
3064 3074 rawsizefn=self.rawsize,
3065 3075 revdifffn=self.revdiff,
3066 3076 flagsfn=self.flags,
3067 3077 deltamode=deltamode,
3068 3078 revisiondata=revisiondata,
3069 3079 assumehaveparentrevisions=assumehaveparentrevisions,
3070 3080 sidedata_helpers=sidedata_helpers,
3071 3081 debug_info=debug_info,
3072 3082 )
3073 3083
3074 3084 DELTAREUSEALWAYS = b'always'
3075 3085 DELTAREUSESAMEREVS = b'samerevs'
3076 3086 DELTAREUSENEVER = b'never'
3077 3087
3078 3088 DELTAREUSEFULLADD = b'fulladd'
3079 3089
3080 3090 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3081 3091
3082 3092 def clone(
3083 3093 self,
3084 3094 tr,
3085 3095 destrevlog,
3086 3096 addrevisioncb=None,
3087 3097 deltareuse=DELTAREUSESAMEREVS,
3088 3098 forcedeltabothparents=None,
3089 3099 sidedata_helpers=None,
3090 3100 ):
3091 3101 """Copy this revlog to another, possibly with format changes.
3092 3102
3093 3103 The destination revlog will contain the same revisions and nodes.
3094 3104 However, it may not be bit-for-bit identical due to e.g. delta encoding
3095 3105 differences.
3096 3106
3097 3107 The ``deltareuse`` argument control how deltas from the existing revlog
3098 3108 are preserved in the destination revlog. The argument can have the
3099 3109 following values:
3100 3110
3101 3111 DELTAREUSEALWAYS
3102 3112 Deltas will always be reused (if possible), even if the destination
3103 3113 revlog would not select the same revisions for the delta. This is the
3104 3114 fastest mode of operation.
3105 3115 DELTAREUSESAMEREVS
3106 3116 Deltas will be reused if the destination revlog would pick the same
3107 3117 revisions for the delta. This mode strikes a balance between speed
3108 3118 and optimization.
3109 3119 DELTAREUSENEVER
3110 3120 Deltas will never be reused. This is the slowest mode of execution.
3111 3121 This mode can be used to recompute deltas (e.g. if the diff/delta
3112 3122 algorithm changes).
3113 3123 DELTAREUSEFULLADD
3114 3124 Revision will be re-added as if their were new content. This is
3115 3125 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3116 3126 eg: large file detection and handling.
3117 3127
3118 3128 Delta computation can be slow, so the choice of delta reuse policy can
3119 3129 significantly affect run time.
3120 3130
3121 3131 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3122 3132 two extremes. Deltas will be reused if they are appropriate. But if the
3123 3133 delta could choose a better revision, it will do so. This means if you
3124 3134 are converting a non-generaldelta revlog to a generaldelta revlog,
3125 3135 deltas will be recomputed if the delta's parent isn't a parent of the
3126 3136 revision.
3127 3137
3128 3138 In addition to the delta policy, the ``forcedeltabothparents``
3129 3139 argument controls whether to force compute deltas against both parents
3130 3140 for merges. By default, the current default is used.
3131 3141
3132 3142 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3133 3143 `sidedata_helpers`.
3134 3144 """
3135 3145 if deltareuse not in self.DELTAREUSEALL:
3136 3146 raise ValueError(
3137 3147 _(b'value for deltareuse invalid: %s') % deltareuse
3138 3148 )
3139 3149
3140 3150 if len(destrevlog):
3141 3151 raise ValueError(_(b'destination revlog is not empty'))
3142 3152
3143 3153 if getattr(self, 'filteredrevs', None):
3144 3154 raise ValueError(_(b'source revlog has filtered revisions'))
3145 3155 if getattr(destrevlog, 'filteredrevs', None):
3146 3156 raise ValueError(_(b'destination revlog has filtered revisions'))
3147 3157
3148 3158 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3149 3159 # if possible.
3150 3160 oldlazydelta = destrevlog._lazydelta
3151 3161 oldlazydeltabase = destrevlog._lazydeltabase
3152 3162 oldamd = destrevlog._deltabothparents
3153 3163
3154 3164 try:
3155 3165 if deltareuse == self.DELTAREUSEALWAYS:
3156 3166 destrevlog._lazydeltabase = True
3157 3167 destrevlog._lazydelta = True
3158 3168 elif deltareuse == self.DELTAREUSESAMEREVS:
3159 3169 destrevlog._lazydeltabase = False
3160 3170 destrevlog._lazydelta = True
3161 3171 elif deltareuse == self.DELTAREUSENEVER:
3162 3172 destrevlog._lazydeltabase = False
3163 3173 destrevlog._lazydelta = False
3164 3174
3165 3175 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3166 3176
3167 3177 self._clone(
3168 3178 tr,
3169 3179 destrevlog,
3170 3180 addrevisioncb,
3171 3181 deltareuse,
3172 3182 forcedeltabothparents,
3173 3183 sidedata_helpers,
3174 3184 )
3175 3185
3176 3186 finally:
3177 3187 destrevlog._lazydelta = oldlazydelta
3178 3188 destrevlog._lazydeltabase = oldlazydeltabase
3179 3189 destrevlog._deltabothparents = oldamd
3180 3190
3181 3191 def _clone(
3182 3192 self,
3183 3193 tr,
3184 3194 destrevlog,
3185 3195 addrevisioncb,
3186 3196 deltareuse,
3187 3197 forcedeltabothparents,
3188 3198 sidedata_helpers,
3189 3199 ):
3190 3200 """perform the core duty of `revlog.clone` after parameter processing"""
3191 3201 write_debug = None
3192 3202 if self._debug_delta:
3193 3203 write_debug = tr._report
3194 3204 deltacomputer = deltautil.deltacomputer(
3195 3205 destrevlog,
3196 3206 write_debug=write_debug,
3197 3207 )
3198 3208 index = self.index
3199 3209 for rev in self:
3200 3210 entry = index[rev]
3201 3211
3202 3212 # Some classes override linkrev to take filtered revs into
3203 3213 # account. Use raw entry from index.
3204 3214 flags = entry[0] & 0xFFFF
3205 3215 linkrev = entry[4]
3206 3216 p1 = index[entry[5]][7]
3207 3217 p2 = index[entry[6]][7]
3208 3218 node = entry[7]
3209 3219
3210 3220 # (Possibly) reuse the delta from the revlog if allowed and
3211 3221 # the revlog chunk is a delta.
3212 3222 cachedelta = None
3213 3223 rawtext = None
3214 3224 if deltareuse == self.DELTAREUSEFULLADD:
3215 3225 text = self._revisiondata(rev)
3216 3226 sidedata = self.sidedata(rev)
3217 3227
3218 3228 if sidedata_helpers is not None:
3219 3229 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3220 3230 self, sidedata_helpers, sidedata, rev
3221 3231 )
3222 3232 flags = flags | new_flags[0] & ~new_flags[1]
3223 3233
3224 3234 destrevlog.addrevision(
3225 3235 text,
3226 3236 tr,
3227 3237 linkrev,
3228 3238 p1,
3229 3239 p2,
3230 3240 cachedelta=cachedelta,
3231 3241 node=node,
3232 3242 flags=flags,
3233 3243 deltacomputer=deltacomputer,
3234 3244 sidedata=sidedata,
3235 3245 )
3236 3246 else:
3237 3247 if destrevlog._lazydelta:
3238 3248 dp = self.deltaparent(rev)
3239 3249 if dp != nullrev:
3240 3250 cachedelta = (dp, bytes(self._chunk(rev)))
3241 3251
3242 3252 sidedata = None
3243 3253 if not cachedelta:
3244 3254 rawtext = self._revisiondata(rev)
3245 3255 sidedata = self.sidedata(rev)
3246 3256 if sidedata is None:
3247 3257 sidedata = self.sidedata(rev)
3248 3258
3249 3259 if sidedata_helpers is not None:
3250 3260 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3251 3261 self, sidedata_helpers, sidedata, rev
3252 3262 )
3253 3263 flags = flags | new_flags[0] & ~new_flags[1]
3254 3264
3255 3265 with destrevlog._writing(tr):
3256 3266 destrevlog._addrevision(
3257 3267 node,
3258 3268 rawtext,
3259 3269 tr,
3260 3270 linkrev,
3261 3271 p1,
3262 3272 p2,
3263 3273 flags,
3264 3274 cachedelta,
3265 3275 deltacomputer=deltacomputer,
3266 3276 sidedata=sidedata,
3267 3277 )
3268 3278
3269 3279 if addrevisioncb:
3270 3280 addrevisioncb(self, rev, node)
3271 3281
3272 3282 def censorrevision(self, tr, censornode, tombstone=b''):
3273 3283 if self._format_version == REVLOGV0:
3274 3284 raise error.RevlogError(
3275 3285 _(b'cannot censor with version %d revlogs')
3276 3286 % self._format_version
3277 3287 )
3278 3288 elif self._format_version == REVLOGV1:
3279 3289 rewrite.v1_censor(self, tr, censornode, tombstone)
3280 3290 else:
3281 3291 rewrite.v2_censor(self, tr, censornode, tombstone)
3282 3292
3283 3293 def verifyintegrity(self, state):
3284 3294 """Verifies the integrity of the revlog.
3285 3295
3286 3296 Yields ``revlogproblem`` instances describing problems that are
3287 3297 found.
3288 3298 """
3289 3299 dd, di = self.checksize()
3290 3300 if dd:
3291 3301 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3292 3302 if di:
3293 3303 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3294 3304
3295 3305 version = self._format_version
3296 3306
3297 3307 # The verifier tells us what version revlog we should be.
3298 3308 if version != state[b'expectedversion']:
3299 3309 yield revlogproblem(
3300 3310 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3301 3311 % (self.display_id, version, state[b'expectedversion'])
3302 3312 )
3303 3313
3304 3314 state[b'skipread'] = set()
3305 3315 state[b'safe_renamed'] = set()
3306 3316
3307 3317 for rev in self:
3308 3318 node = self.node(rev)
3309 3319
3310 3320 # Verify contents. 4 cases to care about:
3311 3321 #
3312 3322 # common: the most common case
3313 3323 # rename: with a rename
3314 3324 # meta: file content starts with b'\1\n', the metadata
3315 3325 # header defined in filelog.py, but without a rename
3316 3326 # ext: content stored externally
3317 3327 #
3318 3328 # More formally, their differences are shown below:
3319 3329 #
3320 3330 # | common | rename | meta | ext
3321 3331 # -------------------------------------------------------
3322 3332 # flags() | 0 | 0 | 0 | not 0
3323 3333 # renamed() | False | True | False | ?
3324 3334 # rawtext[0:2]=='\1\n'| False | True | True | ?
3325 3335 #
3326 3336 # "rawtext" means the raw text stored in revlog data, which
3327 3337 # could be retrieved by "rawdata(rev)". "text"
3328 3338 # mentioned below is "revision(rev)".
3329 3339 #
3330 3340 # There are 3 different lengths stored physically:
3331 3341 # 1. L1: rawsize, stored in revlog index
3332 3342 # 2. L2: len(rawtext), stored in revlog data
3333 3343 # 3. L3: len(text), stored in revlog data if flags==0, or
3334 3344 # possibly somewhere else if flags!=0
3335 3345 #
3336 3346 # L1 should be equal to L2. L3 could be different from them.
3337 3347 # "text" may or may not affect commit hash depending on flag
3338 3348 # processors (see flagutil.addflagprocessor).
3339 3349 #
3340 3350 # | common | rename | meta | ext
3341 3351 # -------------------------------------------------
3342 3352 # rawsize() | L1 | L1 | L1 | L1
3343 3353 # size() | L1 | L2-LM | L1(*) | L1 (?)
3344 3354 # len(rawtext) | L2 | L2 | L2 | L2
3345 3355 # len(text) | L2 | L2 | L2 | L3
3346 3356 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3347 3357 #
3348 3358 # LM: length of metadata, depending on rawtext
3349 3359 # (*): not ideal, see comment in filelog.size
3350 3360 # (?): could be "- len(meta)" if the resolved content has
3351 3361 # rename metadata
3352 3362 #
3353 3363 # Checks needed to be done:
3354 3364 # 1. length check: L1 == L2, in all cases.
3355 3365 # 2. hash check: depending on flag processor, we may need to
3356 3366 # use either "text" (external), or "rawtext" (in revlog).
3357 3367
3358 3368 try:
3359 3369 skipflags = state.get(b'skipflags', 0)
3360 3370 if skipflags:
3361 3371 skipflags &= self.flags(rev)
3362 3372
3363 3373 _verify_revision(self, skipflags, state, node)
3364 3374
3365 3375 l1 = self.rawsize(rev)
3366 3376 l2 = len(self.rawdata(node))
3367 3377
3368 3378 if l1 != l2:
3369 3379 yield revlogproblem(
3370 3380 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3371 3381 node=node,
3372 3382 )
3373 3383
3374 3384 except error.CensoredNodeError:
3375 3385 if state[b'erroroncensored']:
3376 3386 yield revlogproblem(
3377 3387 error=_(b'censored file data'), node=node
3378 3388 )
3379 3389 state[b'skipread'].add(node)
3380 3390 except Exception as e:
3381 3391 yield revlogproblem(
3382 3392 error=_(b'unpacking %s: %s')
3383 3393 % (short(node), stringutil.forcebytestr(e)),
3384 3394 node=node,
3385 3395 )
3386 3396 state[b'skipread'].add(node)
3387 3397
3388 3398 def storageinfo(
3389 3399 self,
3390 3400 exclusivefiles=False,
3391 3401 sharedfiles=False,
3392 3402 revisionscount=False,
3393 3403 trackedsize=False,
3394 3404 storedsize=False,
3395 3405 ):
3396 3406 d = {}
3397 3407
3398 3408 if exclusivefiles:
3399 3409 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3400 3410 if not self._inline:
3401 3411 d[b'exclusivefiles'].append((self.opener, self._datafile))
3402 3412
3403 3413 if sharedfiles:
3404 3414 d[b'sharedfiles'] = []
3405 3415
3406 3416 if revisionscount:
3407 3417 d[b'revisionscount'] = len(self)
3408 3418
3409 3419 if trackedsize:
3410 3420 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3411 3421
3412 3422 if storedsize:
3413 3423 d[b'storedsize'] = sum(
3414 3424 self.opener.stat(path).st_size for path in self.files()
3415 3425 )
3416 3426
3417 3427 return d
3418 3428
3419 3429 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3420 3430 if not self.hassidedata:
3421 3431 return
3422 3432 # revlog formats with sidedata support does not support inline
3423 3433 assert not self._inline
3424 3434 if not helpers[1] and not helpers[2]:
3425 3435 # Nothing to generate or remove
3426 3436 return
3427 3437
3428 3438 new_entries = []
3429 3439 # append the new sidedata
3430 3440 with self._writing(transaction):
3431 3441 ifh, dfh, sdfh = self._writinghandles
3432 3442 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3433 3443
3434 3444 current_offset = sdfh.tell()
3435 3445 for rev in range(startrev, endrev + 1):
3436 3446 entry = self.index[rev]
3437 3447 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3438 3448 store=self,
3439 3449 sidedata_helpers=helpers,
3440 3450 sidedata={},
3441 3451 rev=rev,
3442 3452 )
3443 3453
3444 3454 serialized_sidedata = sidedatautil.serialize_sidedata(
3445 3455 new_sidedata
3446 3456 )
3447 3457
3448 3458 sidedata_compression_mode = COMP_MODE_INLINE
3449 3459 if serialized_sidedata and self.hassidedata:
3450 3460 sidedata_compression_mode = COMP_MODE_PLAIN
3451 3461 h, comp_sidedata = self.compress(serialized_sidedata)
3452 3462 if (
3453 3463 h != b'u'
3454 3464 and comp_sidedata[0] != b'\0'
3455 3465 and len(comp_sidedata) < len(serialized_sidedata)
3456 3466 ):
3457 3467 assert not h
3458 3468 if (
3459 3469 comp_sidedata[0]
3460 3470 == self._docket.default_compression_header
3461 3471 ):
3462 3472 sidedata_compression_mode = COMP_MODE_DEFAULT
3463 3473 serialized_sidedata = comp_sidedata
3464 3474 else:
3465 3475 sidedata_compression_mode = COMP_MODE_INLINE
3466 3476 serialized_sidedata = comp_sidedata
3467 3477 if entry[8] != 0 or entry[9] != 0:
3468 3478 # rewriting entries that already have sidedata is not
3469 3479 # supported yet, because it introduces garbage data in the
3470 3480 # revlog.
3471 3481 msg = b"rewriting existing sidedata is not supported yet"
3472 3482 raise error.Abort(msg)
3473 3483
3474 3484 # Apply (potential) flags to add and to remove after running
3475 3485 # the sidedata helpers
3476 3486 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3477 3487 entry_update = (
3478 3488 current_offset,
3479 3489 len(serialized_sidedata),
3480 3490 new_offset_flags,
3481 3491 sidedata_compression_mode,
3482 3492 )
3483 3493
3484 3494 # the sidedata computation might have move the file cursors around
3485 3495 sdfh.seek(current_offset, os.SEEK_SET)
3486 3496 sdfh.write(serialized_sidedata)
3487 3497 new_entries.append(entry_update)
3488 3498 current_offset += len(serialized_sidedata)
3489 3499 self._docket.sidedata_end = sdfh.tell()
3490 3500
3491 3501 # rewrite the new index entries
3492 3502 ifh.seek(startrev * self.index.entry_size)
3493 3503 for i, e in enumerate(new_entries):
3494 3504 rev = startrev + i
3495 3505 self.index.replace_sidedata_info(rev, *e)
3496 3506 packed = self.index.entry_binary(rev)
3497 3507 if rev == 0 and self._docket is None:
3498 3508 header = self._format_flags | self._format_version
3499 3509 header = self.index.pack_header(header)
3500 3510 packed = header + packed
3501 3511 ifh.write(packed)
@@ -1,1200 +1,1230 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 from .revlogutils.constants import (
20 INDEX_HEADER,
21 )
19 22 from . import (
20 23 changelog,
21 24 error,
22 25 filelog,
23 26 manifest,
24 27 policy,
25 28 pycompat,
29 revlog as revlogmod,
26 30 util,
27 31 vfs as vfsmod,
28 32 )
29 33 from .utils import hashutil
30 34
31 35 parsers = policy.importmod('parsers')
32 36 # how much bytes should be read from fncache in one read
33 37 # It is done to prevent loading large fncache files into memory
34 38 fncache_chunksize = 10 ** 6
35 39
36 40
37 41 def _match_tracked_entry(entry, matcher):
38 42 """parses a fncache entry and returns whether the entry is tracking a path
39 43 matched by matcher or not.
40 44
41 45 If matcher is None, returns True"""
42 46
43 47 if matcher is None:
44 48 return True
45 49 if entry.is_filelog:
46 50 return matcher(entry.target_id)
47 51 elif entry.is_manifestlog:
48 52 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 53 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50 54
51 55
52 56 # This avoids a collision between a file named foo and a dir named
53 57 # foo.i or foo.d
54 58 def _encodedir(path):
55 59 """
56 60 >>> _encodedir(b'data/foo.i')
57 61 'data/foo.i'
58 62 >>> _encodedir(b'data/foo.i/bla.i')
59 63 'data/foo.i.hg/bla.i'
60 64 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 65 'data/foo.i.hg.hg/bla.i'
62 66 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 67 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 68 """
65 69 return (
66 70 path.replace(b".hg/", b".hg.hg/")
67 71 .replace(b".i/", b".i.hg/")
68 72 .replace(b".d/", b".d.hg/")
69 73 )
70 74
71 75
72 76 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 77
74 78
75 79 def decodedir(path):
76 80 """
77 81 >>> decodedir(b'data/foo.i')
78 82 'data/foo.i'
79 83 >>> decodedir(b'data/foo.i.hg/bla.i')
80 84 'data/foo.i/bla.i'
81 85 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 86 'data/foo.i.hg/bla.i'
83 87 """
84 88 if b".hg/" not in path:
85 89 return path
86 90 return (
87 91 path.replace(b".d.hg/", b".d/")
88 92 .replace(b".i.hg/", b".i/")
89 93 .replace(b".hg.hg/", b".hg/")
90 94 )
91 95
92 96
93 97 def _reserved():
94 98 """characters that are problematic for filesystems
95 99
96 100 * ascii escapes (0..31)
97 101 * ascii hi (126..255)
98 102 * windows specials
99 103
100 104 these characters will be escaped by encodefunctions
101 105 """
102 106 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 107 for x in range(32):
104 108 yield x
105 109 for x in range(126, 256):
106 110 yield x
107 111 for x in winreserved:
108 112 yield x
109 113
110 114
111 115 def _buildencodefun():
112 116 """
113 117 >>> enc, dec = _buildencodefun()
114 118
115 119 >>> enc(b'nothing/special.txt')
116 120 'nothing/special.txt'
117 121 >>> dec(b'nothing/special.txt')
118 122 'nothing/special.txt'
119 123
120 124 >>> enc(b'HELLO')
121 125 '_h_e_l_l_o'
122 126 >>> dec(b'_h_e_l_l_o')
123 127 'HELLO'
124 128
125 129 >>> enc(b'hello:world?')
126 130 'hello~3aworld~3f'
127 131 >>> dec(b'hello~3aworld~3f')
128 132 'hello:world?'
129 133
130 134 >>> enc(b'the\\x07quick\\xADshot')
131 135 'the~07quick~adshot'
132 136 >>> dec(b'the~07quick~adshot')
133 137 'the\\x07quick\\xadshot'
134 138 """
135 139 e = b'_'
136 140 xchr = pycompat.bytechr
137 141 asciistr = list(map(xchr, range(127)))
138 142 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 143
140 144 cmap = {x: x for x in asciistr}
141 145 for x in _reserved():
142 146 cmap[xchr(x)] = b"~%02x" % x
143 147 for x in capitals + [ord(e)]:
144 148 cmap[xchr(x)] = e + xchr(x).lower()
145 149
146 150 dmap = {}
147 151 for k, v in cmap.items():
148 152 dmap[v] = k
149 153
150 154 def decode(s):
151 155 i = 0
152 156 while i < len(s):
153 157 for l in range(1, 4):
154 158 try:
155 159 yield dmap[s[i : i + l]]
156 160 i += l
157 161 break
158 162 except KeyError:
159 163 pass
160 164 else:
161 165 raise KeyError
162 166
163 167 return (
164 168 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 169 lambda s: b''.join(list(decode(s))),
166 170 )
167 171
168 172
169 173 _encodefname, _decodefname = _buildencodefun()
170 174
171 175
172 176 def encodefilename(s):
173 177 """
174 178 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 179 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 180 """
177 181 return _encodefname(encodedir(s))
178 182
179 183
180 184 def decodefilename(s):
181 185 """
182 186 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 187 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 188 """
185 189 return decodedir(_decodefname(s))
186 190
187 191
188 192 def _buildlowerencodefun():
189 193 """
190 194 >>> f = _buildlowerencodefun()
191 195 >>> f(b'nothing/special.txt')
192 196 'nothing/special.txt'
193 197 >>> f(b'HELLO')
194 198 'hello'
195 199 >>> f(b'hello:world?')
196 200 'hello~3aworld~3f'
197 201 >>> f(b'the\\x07quick\\xADshot')
198 202 'the~07quick~adshot'
199 203 """
200 204 xchr = pycompat.bytechr
201 205 cmap = {xchr(x): xchr(x) for x in range(127)}
202 206 for x in _reserved():
203 207 cmap[xchr(x)] = b"~%02x" % x
204 208 for x in range(ord(b"A"), ord(b"Z") + 1):
205 209 cmap[xchr(x)] = xchr(x).lower()
206 210
207 211 def lowerencode(s):
208 212 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 213
210 214 return lowerencode
211 215
212 216
213 217 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 218
215 219 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 220 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 221 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 222
219 223
220 224 def _auxencode(path, dotencode):
221 225 """
222 226 Encodes filenames containing names reserved by Windows or which end in
223 227 period or space. Does not touch other single reserved characters c.
224 228 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 229 Additionally encodes space or period at the beginning, if dotencode is
226 230 True. Parameter path is assumed to be all lowercase.
227 231 A segment only needs encoding if a reserved name appears as a
228 232 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 233 doesn't need encoding.
230 234
231 235 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 236 >>> _auxencode(s.split(b'/'), True)
233 237 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 238 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 239 >>> _auxencode(s.split(b'/'), False)
236 240 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 241 >>> _auxencode([b'foo. '], True)
238 242 ['foo.~20']
239 243 >>> _auxencode([b' .foo'], True)
240 244 ['~20.foo']
241 245 """
242 246 for i, n in enumerate(path):
243 247 if not n:
244 248 continue
245 249 if dotencode and n[0] in b'. ':
246 250 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 251 path[i] = n
248 252 else:
249 253 l = n.find(b'.')
250 254 if l == -1:
251 255 l = len(n)
252 256 if (l == 3 and n[:3] in _winres3) or (
253 257 l == 4
254 258 and n[3:4] <= b'9'
255 259 and n[3:4] >= b'1'
256 260 and n[:3] in _winres4
257 261 ):
258 262 # encode third letter ('aux' -> 'au~78')
259 263 ec = b"~%02x" % ord(n[2:3])
260 264 n = n[0:2] + ec + n[3:]
261 265 path[i] = n
262 266 if n[-1] in b'. ':
263 267 # encode last period or space ('foo...' -> 'foo..~2e')
264 268 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 269 return path
266 270
267 271
268 272 _maxstorepathlen = 120
269 273 _dirprefixlen = 8
270 274 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 275
272 276
273 277 def _hashencode(path, dotencode):
274 278 digest = hex(hashutil.sha1(path).digest())
275 279 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 280 parts = _auxencode(le, dotencode)
277 281 basename = parts[-1]
278 282 _root, ext = os.path.splitext(basename)
279 283 sdirs = []
280 284 sdirslen = 0
281 285 for p in parts[:-1]:
282 286 d = p[:_dirprefixlen]
283 287 if d[-1] in b'. ':
284 288 # Windows can't access dirs ending in period or space
285 289 d = d[:-1] + b'_'
286 290 if sdirslen == 0:
287 291 t = len(d)
288 292 else:
289 293 t = sdirslen + 1 + len(d)
290 294 if t > _maxshortdirslen:
291 295 break
292 296 sdirs.append(d)
293 297 sdirslen = t
294 298 dirs = b'/'.join(sdirs)
295 299 if len(dirs) > 0:
296 300 dirs += b'/'
297 301 res = b'dh/' + dirs + digest + ext
298 302 spaceleft = _maxstorepathlen - len(res)
299 303 if spaceleft > 0:
300 304 filler = basename[:spaceleft]
301 305 res = b'dh/' + dirs + filler + digest + ext
302 306 return res
303 307
304 308
305 309 def _hybridencode(path, dotencode):
306 310 """encodes path with a length limit
307 311
308 312 Encodes all paths that begin with 'data/', according to the following.
309 313
310 314 Default encoding (reversible):
311 315
312 316 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 317 characters are encoded as '~xx', where xx is the two digit hex code
314 318 of the character (see encodefilename).
315 319 Relevant path components consisting of Windows reserved filenames are
316 320 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 321
318 322 Hashed encoding (not reversible):
319 323
320 324 If the default-encoded path is longer than _maxstorepathlen, a
321 325 non-reversible hybrid hashing of the path is done instead.
322 326 This encoding uses up to _dirprefixlen characters of all directory
323 327 levels of the lowerencoded path, but not more levels than can fit into
324 328 _maxshortdirslen.
325 329 Then follows the filler followed by the sha digest of the full path.
326 330 The filler is the beginning of the basename of the lowerencoded path
327 331 (the basename is everything after the last path separator). The filler
328 332 is as long as possible, filling in characters from the basename until
329 333 the encoded path has _maxstorepathlen characters (or all chars of the
330 334 basename have been taken).
331 335 The extension (e.g. '.i' or '.d') is preserved.
332 336
333 337 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 338 encoding was used.
335 339 """
336 340 path = encodedir(path)
337 341 ef = _encodefname(path).split(b'/')
338 342 res = b'/'.join(_auxencode(ef, dotencode))
339 343 if len(res) > _maxstorepathlen:
340 344 res = _hashencode(path, dotencode)
341 345 return res
342 346
343 347
344 348 def _pathencode(path):
345 349 de = encodedir(path)
346 350 if len(path) > _maxstorepathlen:
347 351 return _hashencode(de, True)
348 352 ef = _encodefname(de).split(b'/')
349 353 res = b'/'.join(_auxencode(ef, True))
350 354 if len(res) > _maxstorepathlen:
351 355 return _hashencode(de, True)
352 356 return res
353 357
354 358
355 359 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 360
357 361
358 362 def _plainhybridencode(f):
359 363 return _hybridencode(f, False)
360 364
361 365
362 366 def _calcmode(vfs):
363 367 try:
364 368 # files in .hg/ will be created using this mode
365 369 mode = vfs.stat().st_mode
366 370 # avoid some useless chmods
367 371 if (0o777 & ~util.umask) == (0o777 & mode):
368 372 mode = None
369 373 except OSError:
370 374 mode = None
371 375 return mode
372 376
373 377
374 378 _data = [
375 379 b'bookmarks',
376 380 b'narrowspec',
377 381 b'data',
378 382 b'meta',
379 383 b'00manifest.d',
380 384 b'00manifest.i',
381 385 b'00changelog.d',
382 386 b'00changelog.i',
383 387 b'phaseroots',
384 388 b'obsstore',
385 389 b'requires',
386 390 ]
387 391
388 392 REVLOG_FILES_EXT = (
389 393 b'.i',
390 394 b'.idx',
391 395 b'.d',
392 396 b'.dat',
393 397 b'.n',
394 398 b'.nd',
395 399 b'.sda',
396 400 )
397 401 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 402 REVLOG_FILES_LONG_EXT = (
399 403 b'.nd',
400 404 b'.idx',
401 405 b'.dat',
402 406 b'.sda',
403 407 )
404 408 # files that are "volatile" and might change between listing and streaming
405 409 #
406 410 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 411 # deleted.
408 412 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 413
410 414 # some exception to the above matching
411 415 #
412 416 # XXX This is currently not in use because of issue6542
413 417 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 418
415 419
416 420 def is_revlog(f, kind, st):
417 421 if kind != stat.S_IFREG:
418 422 return False
419 423 if f.endswith(REVLOG_FILES_EXT):
420 424 return True
421 425 return False
422 426
423 427
424 428 def is_revlog_file(f):
425 429 if f.endswith(REVLOG_FILES_EXT):
426 430 return True
427 431 return False
428 432
429 433
430 434 # the file is part of changelog data
431 435 FILEFLAGS_CHANGELOG = 1 << 13
432 436 # the file is part of manifest data
433 437 FILEFLAGS_MANIFESTLOG = 1 << 12
434 438 # the file is part of filelog data
435 439 FILEFLAGS_FILELOG = 1 << 11
436 440 # file that are not directly part of a revlog
437 441 FILEFLAGS_OTHER = 1 << 10
438 442
439 443 # the main entry point for a revlog
440 444 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 445 # a secondary file for a revlog
442 446 FILEFLAGS_REVLOG_OTHER = 1 << 0
443 447
444 448 # files that are "volatile" and might change between listing and streaming
445 449 FILEFLAGS_VOLATILE = 1 << 20
446 450
447 451 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 452 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 453 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 454 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 455 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 456 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 457 FILETYPE_OTHER = FILEFLAGS_OTHER
454 458
455 459
456 460 @attr.s(slots=True)
457 461 class StoreFile:
458 462 """a file matching a store entry"""
459 463
460 464 unencoded_path = attr.ib()
461 465 _file_size = attr.ib(default=None)
462 466 is_volatile = attr.ib(default=False)
463 467
464 468 def file_size(self, vfs):
465 469 if self._file_size is None:
466 470 if vfs is None:
467 471 msg = b"calling vfs-less file_size without prior call: %s"
468 472 msg %= self.unencoded_path
469 473 raise error.ProgrammingError(msg)
470 474 try:
471 475 self._file_size = vfs.stat(self.unencoded_path).st_size
472 476 except FileNotFoundError:
473 477 self._file_size = 0
474 478 return self._file_size
475 479
476 480 def get_stream(self, vfs, copies):
477 481 """return data "stream" information for this file
478 482
479 483 (unencoded_file_path, content_iterator, content_size)
480 484 """
481 485 size = self.file_size(None)
482 486
483 487 def get_stream():
484 488 actual_path = copies[vfs.join(self.unencoded_path)]
485 489 with open(actual_path, 'rb') as fp:
486 490 yield None # ready to stream
487 491 if size <= 65536:
488 492 yield fp.read(size)
489 493 else:
490 494 yield from util.filechunkiter(fp, limit=size)
491 495
492 496 s = get_stream()
493 497 next(s)
494 498 return (self.unencoded_path, s, size)
495 499
496 500
497 501 @attr.s(slots=True, init=False)
498 502 class BaseStoreEntry:
499 503 """An entry in the store
500 504
501 505 This is returned by `store.walk` and represent some data in the store."""
502 506
503 507 def files(self) -> List[StoreFile]:
504 508 raise NotImplementedError
505 509
506 510 def get_streams(
507 511 self,
508 512 repo=None,
509 513 vfs=None,
510 514 copies=None,
511 515 max_changeset=None,
512 516 ):
513 517 """return a list of data stream associated to files for this entry
514 518
515 519 return [(unencoded_file_path, content_iterator, content_size), …]
516 520 """
517 521 assert vfs is not None
518 522 return [f.get_stream(vfs, copies) for f in self.files()]
519 523
520 524
521 525 @attr.s(slots=True, init=False)
522 526 class SimpleStoreEntry(BaseStoreEntry):
523 527 """A generic entry in the store"""
524 528
525 529 is_revlog = False
526 530
527 531 _entry_path = attr.ib()
528 532 _is_volatile = attr.ib(default=False)
529 533 _file_size = attr.ib(default=None)
530 534 _files = attr.ib(default=None)
531 535
532 536 def __init__(
533 537 self,
534 538 entry_path,
535 539 is_volatile=False,
536 540 file_size=None,
537 541 ):
538 542 super().__init__()
539 543 self._entry_path = entry_path
540 544 self._is_volatile = is_volatile
541 545 self._file_size = file_size
542 546 self._files = None
543 547
544 548 def files(self) -> List[StoreFile]:
545 549 if self._files is None:
546 550 self._files = [
547 551 StoreFile(
548 552 unencoded_path=self._entry_path,
549 553 file_size=self._file_size,
550 554 is_volatile=self._is_volatile,
551 555 )
552 556 ]
553 557 return self._files
554 558
555 559
556 560 @attr.s(slots=True, init=False)
557 561 class RevlogStoreEntry(BaseStoreEntry):
558 562 """A revlog entry in the store"""
559 563
560 564 is_revlog = True
561 565
562 566 revlog_type = attr.ib(default=None)
563 567 target_id = attr.ib(default=None)
564 568 _path_prefix = attr.ib(default=None)
565 569 _details = attr.ib(default=None)
566 570 _files = attr.ib(default=None)
567 571
568 572 def __init__(
569 573 self,
570 574 revlog_type,
571 575 path_prefix,
572 576 target_id,
573 577 details,
574 578 ):
575 579 super().__init__()
576 580 self.revlog_type = revlog_type
577 581 self.target_id = target_id
578 582 self._path_prefix = path_prefix
579 583 assert b'.i' in details, (path_prefix, details)
580 584 self._details = details
581 585 self._files = None
582 586
583 587 @property
584 588 def is_changelog(self):
585 589 return self.revlog_type & FILEFLAGS_CHANGELOG
586 590
587 591 @property
588 592 def is_manifestlog(self):
589 593 return self.revlog_type & FILEFLAGS_MANIFESTLOG
590 594
591 595 @property
592 596 def is_filelog(self):
593 597 return self.revlog_type & FILEFLAGS_FILELOG
594 598
595 599 def main_file_path(self):
596 600 """unencoded path of the main revlog file"""
597 601 return self._path_prefix + b'.i'
598 602
599 603 def files(self) -> List[StoreFile]:
600 604 if self._files is None:
601 605 self._files = []
602 606 for ext in sorted(self._details, key=_ext_key):
603 607 path = self._path_prefix + ext
604 608 file_size = self._details[ext]
605 609 # files that are "volatile" and might change between
606 610 # listing and streaming
607 611 #
608 612 # note: the ".nd" file are nodemap data and won't "change"
609 613 # but they might be deleted.
610 614 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
611 615 f = StoreFile(path, file_size, volatile)
612 616 self._files.append(f)
613 617 return self._files
614 618
615 619 def get_streams(
616 620 self,
617 621 repo=None,
618 622 vfs=None,
619 623 copies=None,
620 624 max_changeset=None,
621 625 ):
622 if repo is None or max_changeset is None:
623 return super().get_streams(
624 repo=repo,
625 vfs=vfs,
626 copies=copies,
627 max_changeset=max_changeset,
628 )
629 if any(k.endswith(b'.idx') for k in self._details.keys()):
626 if (
627 repo is None
628 or max_changeset is None
630 629 # This use revlog-v2, ignore for now
630 or any(k.endswith(b'.idx') for k in self._details.keys())
631 # This is not inline, no race expected
632 or b'.d' in self._details
633 ):
631 634 return super().get_streams(
632 635 repo=repo,
633 636 vfs=vfs,
634 637 copies=copies,
635 638 max_changeset=max_changeset,
636 639 )
637 name_to_ext = {}
638 for ext in self._details.keys():
639 name_to_ext[self._path_prefix + ext] = ext
640
640 641 name_to_size = {}
641 642 for f in self.files():
642 643 name_to_size[f.unencoded_path] = f.file_size(None)
644
643 645 stream = [
644 646 f.get_stream(vfs, copies)
645 647 for f in self.files()
646 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
648 if not f.unencoded_path.endswith(b'.i')
647 649 ]
648 650
649 is_inline = b'.d' not in self._details
651 index_path = self._path_prefix + b'.i'
650 652
651 rl = self.get_revlog_instance(repo).get_revlog()
652 rl_stream = rl.get_streams(max_changeset, force_inline=is_inline)
653 index_file = None
654 try:
655 index_file = vfs(index_path)
656 header = index_file.read(INDEX_HEADER.size)
657 if revlogmod.revlog.is_inline_index(header):
658 size = name_to_size[index_path]
653 659
654 for name, s, size in rl_stream:
655 if name_to_size.get(name, 0) != size:
656 msg = _(b"expected %d bytes but %d provided for %s")
657 msg %= name_to_size.get(name, 0), size, name
658 raise error.Abort(msg)
659 stream.extend(rl_stream)
660 # no split underneath, just return the stream
661 def get_stream():
662 fp = index_file
663 try:
664 fp.seek(0)
665 yield None
666 if size <= 65536:
667 yield fp.read(size)
668 else:
669 yield from util.filechunkiter(fp, limit=size)
670 finally:
671 fp.close()
672
673 s = get_stream()
674 next(s)
675 index_file = None
676 stream.append((index_path, s, size))
677 else:
678 rl = self.get_revlog_instance(repo).get_revlog()
679 rl_stream = rl.get_streams(max_changeset, force_inline=True)
680 for name, s, size in rl_stream:
681 if name_to_size.get(name, 0) != size:
682 msg = _(b"expected %d bytes but %d provided for %s")
683 msg %= name_to_size.get(name, 0), size, name
684 raise error.Abort(msg)
685 stream.extend(rl_stream)
686 finally:
687 if index_file is not None:
688 index_file.close()
689
660 690 files = self.files()
661 691 assert len(stream) == len(files), (
662 692 stream,
663 693 files,
664 694 self._path_prefix,
665 695 self.target_id,
666 696 )
667 697 return stream
668 698
669 699 def get_revlog_instance(self, repo):
670 700 """Obtain a revlog instance from this store entry
671 701
672 702 An instance of the appropriate class is returned.
673 703 """
674 704 if self.is_changelog:
675 705 return changelog.changelog(repo.svfs)
676 706 elif self.is_manifestlog:
677 707 mandir = self.target_id
678 708 return manifest.manifestrevlog(
679 709 repo.nodeconstants, repo.svfs, tree=mandir
680 710 )
681 711 else:
682 712 return filelog.filelog(repo.svfs, self.target_id)
683 713
684 714
685 715 def _gather_revlog(files_data):
686 716 """group files per revlog prefix
687 717
688 718 The returns a two level nested dict. The top level key is the revlog prefix
689 719 without extension, the second level is all the file "suffix" that were
690 720 seen for this revlog and arbitrary file data as value.
691 721 """
692 722 revlogs = collections.defaultdict(dict)
693 723 for u, value in files_data:
694 724 name, ext = _split_revlog_ext(u)
695 725 revlogs[name][ext] = value
696 726 return sorted(revlogs.items())
697 727
698 728
699 729 def _split_revlog_ext(filename):
700 730 """split the revlog file prefix from the variable extension"""
701 731 if filename.endswith(REVLOG_FILES_LONG_EXT):
702 732 char = b'-'
703 733 else:
704 734 char = b'.'
705 735 idx = filename.rfind(char)
706 736 return filename[:idx], filename[idx:]
707 737
708 738
709 739 def _ext_key(ext):
710 740 """a key to order revlog suffix
711 741
712 742 important to issue .i after other entry."""
713 743 # the only important part of this order is to keep the `.i` last.
714 744 if ext.endswith(b'.n'):
715 745 return (0, ext)
716 746 elif ext.endswith(b'.nd'):
717 747 return (10, ext)
718 748 elif ext.endswith(b'.d'):
719 749 return (20, ext)
720 750 elif ext.endswith(b'.i'):
721 751 return (50, ext)
722 752 else:
723 753 return (40, ext)
724 754
725 755
726 756 class basicstore:
727 757 '''base class for local repository stores'''
728 758
729 759 def __init__(self, path, vfstype):
730 760 vfs = vfstype(path)
731 761 self.path = vfs.base
732 762 self.createmode = _calcmode(vfs)
733 763 vfs.createmode = self.createmode
734 764 self.rawvfs = vfs
735 765 self.vfs = vfsmod.filtervfs(vfs, encodedir)
736 766 self.opener = self.vfs
737 767
738 768 def join(self, f):
739 769 return self.path + b'/' + encodedir(f)
740 770
741 771 def _walk(self, relpath, recurse, undecodable=None):
742 772 '''yields (revlog_type, unencoded, size)'''
743 773 path = self.path
744 774 if relpath:
745 775 path += b'/' + relpath
746 776 striplen = len(self.path) + 1
747 777 l = []
748 778 if self.rawvfs.isdir(path):
749 779 visit = [path]
750 780 readdir = self.rawvfs.readdir
751 781 while visit:
752 782 p = visit.pop()
753 783 for f, kind, st in readdir(p, stat=True):
754 784 fp = p + b'/' + f
755 785 if is_revlog(f, kind, st):
756 786 n = util.pconvert(fp[striplen:])
757 787 l.append((decodedir(n), st.st_size))
758 788 elif kind == stat.S_IFDIR and recurse:
759 789 visit.append(fp)
760 790
761 791 l.sort()
762 792 return l
763 793
764 794 def changelog(self, trypending, concurrencychecker=None):
765 795 return changelog.changelog(
766 796 self.vfs,
767 797 trypending=trypending,
768 798 concurrencychecker=concurrencychecker,
769 799 )
770 800
771 801 def manifestlog(self, repo, storenarrowmatch):
772 802 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
773 803 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
774 804
775 805 def data_entries(
776 806 self, matcher=None, undecodable=None
777 807 ) -> Generator[BaseStoreEntry, None, None]:
778 808 """Like walk, but excluding the changelog and root manifest.
779 809
780 810 When [undecodable] is None, revlogs names that can't be
781 811 decoded cause an exception. When it is provided, it should
782 812 be a list and the filenames that can't be decoded are added
783 813 to it instead. This is very rarely needed."""
784 814 dirs = [
785 815 (b'data', FILEFLAGS_FILELOG, False),
786 816 (b'meta', FILEFLAGS_MANIFESTLOG, True),
787 817 ]
788 818 for base_dir, rl_type, strip_filename in dirs:
789 819 files = self._walk(base_dir, True, undecodable=undecodable)
790 820 for revlog, details in _gather_revlog(files):
791 821 revlog_target_id = revlog.split(b'/', 1)[1]
792 822 if strip_filename and b'/' in revlog:
793 823 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
794 824 revlog_target_id += b'/'
795 825 yield RevlogStoreEntry(
796 826 path_prefix=revlog,
797 827 revlog_type=rl_type,
798 828 target_id=revlog_target_id,
799 829 details=details,
800 830 )
801 831
802 832 def top_entries(
803 833 self, phase=False, obsolescence=False
804 834 ) -> Generator[BaseStoreEntry, None, None]:
805 835 if phase and self.vfs.exists(b'phaseroots'):
806 836 yield SimpleStoreEntry(
807 837 entry_path=b'phaseroots',
808 838 is_volatile=True,
809 839 )
810 840
811 841 if obsolescence and self.vfs.exists(b'obsstore'):
812 842 # XXX if we had the file size it could be non-volatile
813 843 yield SimpleStoreEntry(
814 844 entry_path=b'obsstore',
815 845 is_volatile=True,
816 846 )
817 847
818 848 files = reversed(self._walk(b'', False))
819 849
820 850 changelogs = collections.defaultdict(dict)
821 851 manifestlogs = collections.defaultdict(dict)
822 852
823 853 for u, s in files:
824 854 if u.startswith(b'00changelog'):
825 855 name, ext = _split_revlog_ext(u)
826 856 changelogs[name][ext] = s
827 857 elif u.startswith(b'00manifest'):
828 858 name, ext = _split_revlog_ext(u)
829 859 manifestlogs[name][ext] = s
830 860 else:
831 861 yield SimpleStoreEntry(
832 862 entry_path=u,
833 863 is_volatile=False,
834 864 file_size=s,
835 865 )
836 866 # yield manifest before changelog
837 867 top_rl = [
838 868 (manifestlogs, FILEFLAGS_MANIFESTLOG),
839 869 (changelogs, FILEFLAGS_CHANGELOG),
840 870 ]
841 871 assert len(manifestlogs) <= 1
842 872 assert len(changelogs) <= 1
843 873 for data, revlog_type in top_rl:
844 874 for revlog, details in sorted(data.items()):
845 875 yield RevlogStoreEntry(
846 876 path_prefix=revlog,
847 877 revlog_type=revlog_type,
848 878 target_id=b'',
849 879 details=details,
850 880 )
851 881
852 882 def walk(
853 883 self, matcher=None, phase=False, obsolescence=False
854 884 ) -> Generator[BaseStoreEntry, None, None]:
855 885 """return files related to data storage (ie: revlogs)
856 886
857 887 yields instance from BaseStoreEntry subclasses
858 888
859 889 if a matcher is passed, storage files of only those tracked paths
860 890 are passed with matches the matcher
861 891 """
862 892 # yield data files first
863 893 for x in self.data_entries(matcher):
864 894 yield x
865 895 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
866 896 yield x
867 897
868 898 def copylist(self):
869 899 return _data
870 900
871 901 def write(self, tr):
872 902 pass
873 903
874 904 def invalidatecaches(self):
875 905 pass
876 906
877 907 def markremoved(self, fn):
878 908 pass
879 909
880 910 def __contains__(self, path):
881 911 '''Checks if the store contains path'''
882 912 path = b"/".join((b"data", path))
883 913 # file?
884 914 if self.vfs.exists(path + b".i"):
885 915 return True
886 916 # dir?
887 917 if not path.endswith(b"/"):
888 918 path = path + b"/"
889 919 return self.vfs.exists(path)
890 920
891 921
892 922 class encodedstore(basicstore):
893 923 def __init__(self, path, vfstype):
894 924 vfs = vfstype(path + b'/store')
895 925 self.path = vfs.base
896 926 self.createmode = _calcmode(vfs)
897 927 vfs.createmode = self.createmode
898 928 self.rawvfs = vfs
899 929 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
900 930 self.opener = self.vfs
901 931
902 932 def _walk(self, relpath, recurse, undecodable=None):
903 933 old = super()._walk(relpath, recurse)
904 934 new = []
905 935 for f1, value in old:
906 936 try:
907 937 f2 = decodefilename(f1)
908 938 except KeyError:
909 939 if undecodable is None:
910 940 msg = _(b'undecodable revlog name %s') % f1
911 941 raise error.StorageError(msg)
912 942 else:
913 943 undecodable.append(f1)
914 944 continue
915 945 new.append((f2, value))
916 946 return new
917 947
918 948 def data_entries(
919 949 self, matcher=None, undecodable=None
920 950 ) -> Generator[BaseStoreEntry, None, None]:
921 951 entries = super(encodedstore, self).data_entries(
922 952 undecodable=undecodable
923 953 )
924 954 for entry in entries:
925 955 if _match_tracked_entry(entry, matcher):
926 956 yield entry
927 957
928 958 def join(self, f):
929 959 return self.path + b'/' + encodefilename(f)
930 960
931 961 def copylist(self):
932 962 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
933 963
934 964
935 965 class fncache:
936 966 # the filename used to be partially encoded
937 967 # hence the encodedir/decodedir dance
938 968 def __init__(self, vfs):
939 969 self.vfs = vfs
940 970 self._ignores = set()
941 971 self.entries = None
942 972 self._dirty = False
943 973 # set of new additions to fncache
944 974 self.addls = set()
945 975
946 976 def ensureloaded(self, warn=None):
947 977 """read the fncache file if not already read.
948 978
949 979 If the file on disk is corrupted, raise. If warn is provided,
950 980 warn and keep going instead."""
951 981 if self.entries is None:
952 982 self._load(warn)
953 983
954 984 def _load(self, warn=None):
955 985 '''fill the entries from the fncache file'''
956 986 self._dirty = False
957 987 try:
958 988 fp = self.vfs(b'fncache', mode=b'rb')
959 989 except IOError:
960 990 # skip nonexistent file
961 991 self.entries = set()
962 992 return
963 993
964 994 self.entries = set()
965 995 chunk = b''
966 996 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
967 997 chunk += c
968 998 try:
969 999 p = chunk.rindex(b'\n')
970 1000 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
971 1001 chunk = chunk[p + 1 :]
972 1002 except ValueError:
973 1003 # substring '\n' not found, maybe the entry is bigger than the
974 1004 # chunksize, so let's keep iterating
975 1005 pass
976 1006
977 1007 if chunk:
978 1008 msg = _(b"fncache does not ends with a newline")
979 1009 if warn:
980 1010 warn(msg + b'\n')
981 1011 else:
982 1012 raise error.Abort(
983 1013 msg,
984 1014 hint=_(
985 1015 b"use 'hg debugrebuildfncache' to "
986 1016 b"rebuild the fncache"
987 1017 ),
988 1018 )
989 1019 self._checkentries(fp, warn)
990 1020 fp.close()
991 1021
992 1022 def _checkentries(self, fp, warn):
993 1023 """make sure there is no empty string in entries"""
994 1024 if b'' in self.entries:
995 1025 fp.seek(0)
996 1026 for n, line in enumerate(fp):
997 1027 if not line.rstrip(b'\n'):
998 1028 t = _(b'invalid entry in fncache, line %d') % (n + 1)
999 1029 if warn:
1000 1030 warn(t + b'\n')
1001 1031 else:
1002 1032 raise error.Abort(t)
1003 1033
1004 1034 def write(self, tr):
1005 1035 if self._dirty:
1006 1036 assert self.entries is not None
1007 1037 self.entries = self.entries | self.addls
1008 1038 self.addls = set()
1009 1039 tr.addbackup(b'fncache')
1010 1040 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1011 1041 if self.entries:
1012 1042 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1013 1043 fp.close()
1014 1044 self._dirty = False
1015 1045 if self.addls:
1016 1046 # if we have just new entries, let's append them to the fncache
1017 1047 tr.addbackup(b'fncache')
1018 1048 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1019 1049 if self.addls:
1020 1050 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1021 1051 fp.close()
1022 1052 self.entries = None
1023 1053 self.addls = set()
1024 1054
1025 1055 def addignore(self, fn):
1026 1056 self._ignores.add(fn)
1027 1057
1028 1058 def add(self, fn):
1029 1059 if fn in self._ignores:
1030 1060 return
1031 1061 if self.entries is None:
1032 1062 self._load()
1033 1063 if fn not in self.entries:
1034 1064 self.addls.add(fn)
1035 1065
1036 1066 def remove(self, fn):
1037 1067 if self.entries is None:
1038 1068 self._load()
1039 1069 if fn in self.addls:
1040 1070 self.addls.remove(fn)
1041 1071 return
1042 1072 try:
1043 1073 self.entries.remove(fn)
1044 1074 self._dirty = True
1045 1075 except KeyError:
1046 1076 pass
1047 1077
1048 1078 def __contains__(self, fn):
1049 1079 if fn in self.addls:
1050 1080 return True
1051 1081 if self.entries is None:
1052 1082 self._load()
1053 1083 return fn in self.entries
1054 1084
1055 1085 def __iter__(self):
1056 1086 if self.entries is None:
1057 1087 self._load()
1058 1088 return iter(self.entries | self.addls)
1059 1089
1060 1090
1061 1091 class _fncachevfs(vfsmod.proxyvfs):
1062 1092 def __init__(self, vfs, fnc, encode):
1063 1093 vfsmod.proxyvfs.__init__(self, vfs)
1064 1094 self.fncache = fnc
1065 1095 self.encode = encode
1066 1096
1067 1097 def __call__(self, path, mode=b'r', *args, **kw):
1068 1098 encoded = self.encode(path)
1069 1099 if (
1070 1100 mode not in (b'r', b'rb')
1071 1101 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1072 1102 and is_revlog_file(path)
1073 1103 ):
1074 1104 # do not trigger a fncache load when adding a file that already is
1075 1105 # known to exist.
1076 1106 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1077 1107 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1078 1108 # when appending to an existing file, if the file has size zero,
1079 1109 # it should be considered as missing. Such zero-size files are
1080 1110 # the result of truncation when a transaction is aborted.
1081 1111 notload = False
1082 1112 if not notload:
1083 1113 self.fncache.add(path)
1084 1114 return self.vfs(encoded, mode, *args, **kw)
1085 1115
1086 1116 def join(self, path):
1087 1117 if path:
1088 1118 return self.vfs.join(self.encode(path))
1089 1119 else:
1090 1120 return self.vfs.join(path)
1091 1121
1092 1122 def register_file(self, path):
1093 1123 """generic hook point to lets fncache steer its stew"""
1094 1124 if path.startswith(b'data/') or path.startswith(b'meta/'):
1095 1125 self.fncache.add(path)
1096 1126
1097 1127
1098 1128 class fncachestore(basicstore):
1099 1129 def __init__(self, path, vfstype, dotencode):
1100 1130 if dotencode:
1101 1131 encode = _pathencode
1102 1132 else:
1103 1133 encode = _plainhybridencode
1104 1134 self.encode = encode
1105 1135 vfs = vfstype(path + b'/store')
1106 1136 self.path = vfs.base
1107 1137 self.pathsep = self.path + b'/'
1108 1138 self.createmode = _calcmode(vfs)
1109 1139 vfs.createmode = self.createmode
1110 1140 self.rawvfs = vfs
1111 1141 fnc = fncache(vfs)
1112 1142 self.fncache = fnc
1113 1143 self.vfs = _fncachevfs(vfs, fnc, encode)
1114 1144 self.opener = self.vfs
1115 1145
1116 1146 def join(self, f):
1117 1147 return self.pathsep + self.encode(f)
1118 1148
1119 1149 def getsize(self, path):
1120 1150 return self.rawvfs.stat(path).st_size
1121 1151
1122 1152 def data_entries(
1123 1153 self, matcher=None, undecodable=None
1124 1154 ) -> Generator[BaseStoreEntry, None, None]:
1125 1155 # Note: all files in fncache should be revlog related, However the
1126 1156 # fncache might contains such file added by previous version of
1127 1157 # Mercurial.
1128 1158 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1129 1159 by_revlog = _gather_revlog(files)
1130 1160 for revlog, details in by_revlog:
1131 1161 if revlog.startswith(b'data/'):
1132 1162 rl_type = FILEFLAGS_FILELOG
1133 1163 revlog_target_id = revlog.split(b'/', 1)[1]
1134 1164 elif revlog.startswith(b'meta/'):
1135 1165 rl_type = FILEFLAGS_MANIFESTLOG
1136 1166 # drop the initial directory and the `00manifest` file part
1137 1167 tmp = revlog.split(b'/', 1)[1]
1138 1168 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1139 1169 else:
1140 1170 # unreachable
1141 1171 assert False, revlog
1142 1172 entry = RevlogStoreEntry(
1143 1173 path_prefix=revlog,
1144 1174 revlog_type=rl_type,
1145 1175 target_id=revlog_target_id,
1146 1176 details=details,
1147 1177 )
1148 1178 if _match_tracked_entry(entry, matcher):
1149 1179 yield entry
1150 1180
1151 1181 def copylist(self):
1152 1182 d = (
1153 1183 b'bookmarks',
1154 1184 b'narrowspec',
1155 1185 b'data',
1156 1186 b'meta',
1157 1187 b'dh',
1158 1188 b'fncache',
1159 1189 b'phaseroots',
1160 1190 b'obsstore',
1161 1191 b'00manifest.d',
1162 1192 b'00manifest.i',
1163 1193 b'00changelog.d',
1164 1194 b'00changelog.i',
1165 1195 b'requires',
1166 1196 )
1167 1197 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1168 1198
1169 1199 def write(self, tr):
1170 1200 self.fncache.write(tr)
1171 1201
1172 1202 def invalidatecaches(self):
1173 1203 self.fncache.entries = None
1174 1204 self.fncache.addls = set()
1175 1205
1176 1206 def markremoved(self, fn):
1177 1207 self.fncache.remove(fn)
1178 1208
1179 1209 def _exists(self, f):
1180 1210 ef = self.encode(f)
1181 1211 try:
1182 1212 self.getsize(ef)
1183 1213 return True
1184 1214 except FileNotFoundError:
1185 1215 return False
1186 1216
1187 1217 def __contains__(self, path):
1188 1218 '''Checks if the store contains path'''
1189 1219 path = b"/".join((b"data", path))
1190 1220 # check for files (exact match)
1191 1221 e = path + b'.i'
1192 1222 if e in self.fncache and self._exists(e):
1193 1223 return True
1194 1224 # now check for directories (prefix match)
1195 1225 if not path.endswith(b'/'):
1196 1226 path += b'/'
1197 1227 for e in self.fncache:
1198 1228 if e.startswith(path) and self._exists(e):
1199 1229 return True
1200 1230 return False
General Comments 0
You need to be logged in to leave comments. Login now