##// END OF EJS Templates
revlogv2: use a unique filename for data...
marmoute -
r48115:0a3fa41f default
parent child Browse files
Show More
@@ -1,3466 +1,3468 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 FEATURES_BY_VERSION,
43 43 FLAG_GENERALDELTA,
44 44 FLAG_INLINE_DATA,
45 45 INDEX_HEADER,
46 46 KIND_CHANGELOG,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 SUPPORTED_FLAGS,
56 56 )
57 57 from .revlogutils.flagutil import (
58 58 REVIDX_DEFAULT_FLAGS,
59 59 REVIDX_ELLIPSIS,
60 60 REVIDX_EXTSTORED,
61 61 REVIDX_FLAGS_ORDER,
62 62 REVIDX_HASCOPIESINFO,
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 65 )
66 66 from .thirdparty import attr
67 67 from . import (
68 68 ancestor,
69 69 dagop,
70 70 error,
71 71 mdiff,
72 72 policy,
73 73 pycompat,
74 74 templatefilters,
75 75 util,
76 76 )
77 77 from .interfaces import (
78 78 repository,
79 79 util as interfaceutil,
80 80 )
81 81 from .revlogutils import (
82 82 deltas as deltautil,
83 83 docket as docketutil,
84 84 flagutil,
85 85 nodemap as nodemaputil,
86 86 revlogv0,
87 87 sidedata as sidedatautil,
88 88 )
89 89 from .utils import (
90 90 storageutil,
91 91 stringutil,
92 92 )
93 93
94 94 # blanked usage of all the name to prevent pyflakes constraints
95 95 # We need these name available in the module for extensions.
96 96
97 97 REVLOGV0
98 98 REVLOGV1
99 99 REVLOGV2
100 100 FLAG_INLINE_DATA
101 101 FLAG_GENERALDELTA
102 102 REVLOG_DEFAULT_FLAGS
103 103 REVLOG_DEFAULT_FORMAT
104 104 REVLOG_DEFAULT_VERSION
105 105 REVLOGV1_FLAGS
106 106 REVLOGV2_FLAGS
107 107 REVIDX_ISCENSORED
108 108 REVIDX_ELLIPSIS
109 109 REVIDX_HASCOPIESINFO
110 110 REVIDX_EXTSTORED
111 111 REVIDX_DEFAULT_FLAGS
112 112 REVIDX_FLAGS_ORDER
113 113 REVIDX_RAWTEXT_CHANGING_FLAGS
114 114
115 115 parsers = policy.importmod('parsers')
116 116 rustancestor = policy.importrust('ancestor')
117 117 rustdagop = policy.importrust('dagop')
118 118 rustrevlog = policy.importrust('revlog')
119 119
120 120 # Aliased for performance.
121 121 _zlibdecompress = zlib.decompress
122 122
123 123 # max size of revlog with inline data
124 124 _maxinline = 131072
125 125 _chunksize = 1048576
126 126
127 127 # Flag processors for REVIDX_ELLIPSIS.
128 128 def ellipsisreadprocessor(rl, text):
129 129 return text, False
130 130
131 131
132 132 def ellipsiswriteprocessor(rl, text):
133 133 return text, False
134 134
135 135
136 136 def ellipsisrawprocessor(rl, text):
137 137 return False
138 138
139 139
140 140 ellipsisprocessor = (
141 141 ellipsisreadprocessor,
142 142 ellipsiswriteprocessor,
143 143 ellipsisrawprocessor,
144 144 )
145 145
146 146
147 147 def offset_type(offset, type):
148 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 149 raise ValueError(b'unknown revlog index flags')
150 150 return int(int(offset) << 16 | type)
151 151
152 152
153 153 def _verify_revision(rl, skipflags, state, node):
154 154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 155 point for extensions to influence the operation."""
156 156 if skipflags:
157 157 state[b'skipread'].add(node)
158 158 else:
159 159 # Side-effect: read content and verify hash.
160 160 rl.revision(node)
161 161
162 162
163 163 # True if a fast implementation for persistent-nodemap is available
164 164 #
165 165 # We also consider we have a "fast" implementation in "pure" python because
166 166 # people using pure don't really have performance consideration (and a
167 167 # wheelbarrow of other slowness source)
168 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 169 parsers, 'BaseIndexObject'
170 170 )
171 171
172 172
173 173 @attr.s(slots=True, frozen=True)
174 174 class _revisioninfo(object):
175 175 """Information about a revision that allows building its fulltext
176 176 node: expected hash of the revision
177 177 p1, p2: parent revs of the revision
178 178 btext: built text cache consisting of a one-element list
179 179 cachedelta: (baserev, uncompressed_delta) or None
180 180 flags: flags associated to the revision storage
181 181
182 182 One of btext[0] or cachedelta must be set.
183 183 """
184 184
185 185 node = attr.ib()
186 186 p1 = attr.ib()
187 187 p2 = attr.ib()
188 188 btext = attr.ib()
189 189 textlen = attr.ib()
190 190 cachedelta = attr.ib()
191 191 flags = attr.ib()
192 192
193 193
194 194 @interfaceutil.implementer(repository.irevisiondelta)
195 195 @attr.s(slots=True)
196 196 class revlogrevisiondelta(object):
197 197 node = attr.ib()
198 198 p1node = attr.ib()
199 199 p2node = attr.ib()
200 200 basenode = attr.ib()
201 201 flags = attr.ib()
202 202 baserevisionsize = attr.ib()
203 203 revision = attr.ib()
204 204 delta = attr.ib()
205 205 sidedata = attr.ib()
206 206 protocol_flags = attr.ib()
207 207 linknode = attr.ib(default=None)
208 208
209 209
210 210 @interfaceutil.implementer(repository.iverifyproblem)
211 211 @attr.s(frozen=True)
212 212 class revlogproblem(object):
213 213 warning = attr.ib(default=None)
214 214 error = attr.ib(default=None)
215 215 node = attr.ib(default=None)
216 216
217 217
218 218 def parse_index_v1(data, inline):
219 219 # call the C implementation to parse the index data
220 220 index, cache = parsers.parse_index2(data, inline)
221 221 return index, cache
222 222
223 223
224 224 def parse_index_v2(data, inline):
225 225 # call the C implementation to parse the index data
226 226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 227 return index, cache
228 228
229 229
230 230 def parse_index_cl_v2(data, inline):
231 231 # call the C implementation to parse the index data
232 232 assert not inline
233 233 from .pure.parsers import parse_index_cl_v2
234 234
235 235 index, cache = parse_index_cl_v2(data)
236 236 return index, cache
237 237
238 238
239 239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240 240
241 241 def parse_index_v1_nodemap(data, inline):
242 242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 243 return index, cache
244 244
245 245
246 246 else:
247 247 parse_index_v1_nodemap = None
248 248
249 249
250 250 def parse_index_v1_mixed(data, inline):
251 251 index, cache = parse_index_v1(data, inline)
252 252 return rustrevlog.MixedIndex(index), cache
253 253
254 254
255 255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 256 # signed integer)
257 257 _maxentrysize = 0x7FFFFFFF
258 258
259 259
260 260 class revlog(object):
261 261 """
262 262 the underlying revision storage object
263 263
264 264 A revlog consists of two parts, an index and the revision data.
265 265
266 266 The index is a file with a fixed record size containing
267 267 information on each revision, including its nodeid (hash), the
268 268 nodeids of its parents, the position and offset of its data within
269 269 the data file, and the revision it's based on. Finally, each entry
270 270 contains a linkrev entry that can serve as a pointer to external
271 271 data.
272 272
273 273 The revision data itself is a linear collection of data chunks.
274 274 Each chunk represents a revision and is usually represented as a
275 275 delta against the previous chunk. To bound lookup time, runs of
276 276 deltas are limited to about 2 times the length of the original
277 277 version data. This makes retrieval of a version proportional to
278 278 its size, or O(1) relative to the number of revisions.
279 279
280 280 Both pieces of the revlog are written to in an append-only
281 281 fashion, which means we never need to rewrite a file to insert or
282 282 remove data, and can use some simple techniques to avoid the need
283 283 for locking while reading.
284 284
285 285 If checkambig, indexfile is opened with checkambig=True at
286 286 writing, to avoid file stat ambiguity.
287 287
288 288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 289 index will be mmapped rather than read if it is larger than the
290 290 configured threshold.
291 291
292 292 If censorable is True, the revlog can have censored revisions.
293 293
294 294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 295 compression for the data content.
296 296
297 297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 298 file handle, a filename, and an expected position. It should check whether
299 299 the current position in the file handle is valid, and log/warn/fail (by
300 300 raising).
301 301
302 302
303 303 Internal details
304 304 ----------------
305 305
306 306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 307 objects that contains the same "items" whatever the revlog version.
308 308 Different versions will have different ways of storing these items (sometimes
309 309 not having them at all), but the tuple will always be the same. New fields
310 310 are usually added at the end to avoid breaking existing code that relies
311 311 on the existing order. The field are defined as follows:
312 312
313 313 [0] offset:
314 314 The byte index of the start of revision data chunk.
315 315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 316 retrieve it.
317 317
318 318 flags:
319 319 A flag field that carries special information or changes the behavior
320 320 of the revision. (see `REVIDX_*` constants for details)
321 321 The flag field only occupies the first 16 bits of this field,
322 322 use "flags = field & 0xFFFF" to retrieve the value.
323 323
324 324 [1] compressed length:
325 325 The size, in bytes, of the chunk on disk
326 326
327 327 [2] uncompressed length:
328 328 The size, in bytes, of the full revision once reconstructed.
329 329
330 330 [3] base rev:
331 331 Either the base of the revision delta chain (without general
332 332 delta), or the base of the delta (stored in the data chunk)
333 333 with general delta.
334 334
335 335 [4] link rev:
336 336 Changelog revision number of the changeset introducing this
337 337 revision.
338 338
339 339 [5] parent 1 rev:
340 340 Revision number of the first parent
341 341
342 342 [6] parent 2 rev:
343 343 Revision number of the second parent
344 344
345 345 [7] node id:
346 346 The node id of the current revision
347 347
348 348 [8] sidedata offset:
349 349 The byte index of the start of the revision's side-data chunk.
350 350
351 351 [9] sidedata chunk length:
352 352 The size, in bytes, of the revision's side-data chunk.
353 353
354 354 [10] data compression mode:
355 355 two bits that detail the way the data chunk is compressed on disk.
356 356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 357 1 this will always be COMP_MODE_INLINE.
358 358
359 359 [11] side-data compression mode:
360 360 two bits that detail the way the sidedata chunk is compressed on disk.
361 361 (see "COMP_MODE_*" constants for details)
362 362 """
363 363
364 364 _flagserrorclass = error.RevlogError
365 365
366 366 def __init__(
367 367 self,
368 368 opener,
369 369 target,
370 370 radix,
371 371 postfix=None, # only exist for `tmpcensored` now
372 372 checkambig=False,
373 373 mmaplargeindex=False,
374 374 censorable=False,
375 375 upperboundcomp=None,
376 376 persistentnodemap=False,
377 377 concurrencychecker=None,
378 378 trypending=False,
379 379 ):
380 380 """
381 381 create a revlog object
382 382
383 383 opener is a function that abstracts the file opening operation
384 384 and can be used to implement COW semantics or the like.
385 385
386 386 `target`: a (KIND, ID) tuple that identify the content stored in
387 387 this revlog. It help the rest of the code to understand what the revlog
388 388 is about without having to resort to heuristic and index filename
389 389 analysis. Note: that this must be reliably be set by normal code, but
390 390 that test, debug, or performance measurement code might not set this to
391 391 accurate value.
392 392 """
393 393 self.upperboundcomp = upperboundcomp
394 394
395 395 self.radix = radix
396 396
397 397 self._docket_file = None
398 398 self._indexfile = None
399 399 self._datafile = None
400 400 self._nodemap_file = None
401 401 self.postfix = postfix
402 402 self._trypending = trypending
403 403 self.opener = opener
404 404 if persistentnodemap:
405 405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406 406
407 407 assert target[0] in ALL_KINDS
408 408 assert len(target) == 2
409 409 self.target = target
410 410 # When True, indexfile is opened with checkambig=True at writing, to
411 411 # avoid file stat ambiguity.
412 412 self._checkambig = checkambig
413 413 self._mmaplargeindex = mmaplargeindex
414 414 self._censorable = censorable
415 415 # 3-tuple of (node, rev, text) for a raw revision.
416 416 self._revisioncache = None
417 417 # Maps rev to chain base rev.
418 418 self._chainbasecache = util.lrucachedict(100)
419 419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 420 self._chunkcache = (0, b'')
421 421 # How much data to read and cache into the raw revlog data cache.
422 422 self._chunkcachesize = 65536
423 423 self._maxchainlen = None
424 424 self._deltabothparents = True
425 425 self.index = None
426 426 self._docket = None
427 427 self._nodemap_docket = None
428 428 # Mapping of partial identifiers to full nodes.
429 429 self._pcache = {}
430 430 # Mapping of revision integer to full node.
431 431 self._compengine = b'zlib'
432 432 self._compengineopts = {}
433 433 self._maxdeltachainspan = -1
434 434 self._withsparseread = False
435 435 self._sparserevlog = False
436 436 self.hassidedata = False
437 437 self._srdensitythreshold = 0.50
438 438 self._srmingapsize = 262144
439 439
440 440 # Make copy of flag processors so each revlog instance can support
441 441 # custom flags.
442 442 self._flagprocessors = dict(flagutil.flagprocessors)
443 443
444 444 # 2-tuple of file handles being used for active writing.
445 445 self._writinghandles = None
446 446 # prevent nesting of addgroup
447 447 self._adding_group = None
448 448
449 449 self._loadindex()
450 450
451 451 self._concurrencychecker = concurrencychecker
452 452
453 453 def _init_opts(self):
454 454 """process options (from above/config) to setup associated default revlog mode
455 455
456 456 These values might be affected when actually reading on disk information.
457 457
458 458 The relevant values are returned for use in _loadindex().
459 459
460 460 * newversionflags:
461 461 version header to use if we need to create a new revlog
462 462
463 463 * mmapindexthreshold:
464 464 minimal index size for start to use mmap
465 465
466 466 * force_nodemap:
467 467 force the usage of a "development" version of the nodemap code
468 468 """
469 469 mmapindexthreshold = None
470 470 opts = self.opener.options
471 471
472 472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 473 new_header = CHANGELOGV2
474 474 elif b'revlogv2' in opts:
475 475 new_header = REVLOGV2
476 476 elif b'revlogv1' in opts:
477 477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 478 if b'generaldelta' in opts:
479 479 new_header |= FLAG_GENERALDELTA
480 480 elif b'revlogv0' in self.opener.options:
481 481 new_header = REVLOGV0
482 482 else:
483 483 new_header = REVLOG_DEFAULT_VERSION
484 484
485 485 if b'chunkcachesize' in opts:
486 486 self._chunkcachesize = opts[b'chunkcachesize']
487 487 if b'maxchainlen' in opts:
488 488 self._maxchainlen = opts[b'maxchainlen']
489 489 if b'deltabothparents' in opts:
490 490 self._deltabothparents = opts[b'deltabothparents']
491 491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 492 self._lazydeltabase = False
493 493 if self._lazydelta:
494 494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 495 if b'compengine' in opts:
496 496 self._compengine = opts[b'compengine']
497 497 if b'zlib.level' in opts:
498 498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 499 if b'zstd.level' in opts:
500 500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 501 if b'maxdeltachainspan' in opts:
502 502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 507 # sparse-revlog forces sparse-read
508 508 self._withsparseread = self._sparserevlog or withsparseread
509 509 if b'sparse-read-density-threshold' in opts:
510 510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 511 if b'sparse-read-min-gap-size' in opts:
512 512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 513 if opts.get(b'enableellipsis'):
514 514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515 515
516 516 # revlog v0 doesn't have flag processors
517 517 for flag, processor in pycompat.iteritems(
518 518 opts.get(b'flagprocessors', {})
519 519 ):
520 520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521 521
522 522 if self._chunkcachesize <= 0:
523 523 raise error.RevlogError(
524 524 _(b'revlog chunk cache size %r is not greater than 0')
525 525 % self._chunkcachesize
526 526 )
527 527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 528 raise error.RevlogError(
529 529 _(b'revlog chunk cache size %r is not a power of 2')
530 530 % self._chunkcachesize
531 531 )
532 532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 533 return new_header, mmapindexthreshold, force_nodemap
534 534
535 535 def _get_data(self, filepath, mmap_threshold, size=None):
536 536 """return a file content with or without mmap
537 537
538 538 If the file is missing return the empty string"""
539 539 try:
540 540 with self.opener(filepath) as fp:
541 541 if mmap_threshold is not None:
542 542 file_size = self.opener.fstat(fp).st_size
543 543 if file_size >= mmap_threshold:
544 544 if size is not None:
545 545 # avoid potentiel mmap crash
546 546 size = min(file_size, size)
547 547 # TODO: should .close() to release resources without
548 548 # relying on Python GC
549 549 if size is None:
550 550 return util.buffer(util.mmapread(fp))
551 551 else:
552 552 return util.buffer(util.mmapread(fp, size))
553 553 if size is None:
554 554 return fp.read()
555 555 else:
556 556 return fp.read(size)
557 557 except IOError as inst:
558 558 if inst.errno != errno.ENOENT:
559 559 raise
560 560 return b''
561 561
562 562 def _loadindex(self):
563 563
564 564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565 565
566 566 if self.postfix is not None:
567 567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 569 entry_point = b'%s.i.a' % self.radix
570 570 else:
571 571 entry_point = b'%s.i' % self.radix
572 572
573 573 entry_data = b''
574 574 self._initempty = True
575 575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 576 if len(entry_data) > 0:
577 577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 578 self._initempty = False
579 579 else:
580 580 header = new_header
581 581
582 582 self._format_flags = header & ~0xFFFF
583 583 self._format_version = header & 0xFFFF
584 584
585 585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 586 if supported_flags is None:
587 587 msg = _(b'unknown version (%d) in revlog %s')
588 588 msg %= (self._format_version, self.display_id)
589 589 raise error.RevlogError(msg)
590 590 elif self._format_flags & ~supported_flags:
591 591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 display_flag = self._format_flags >> 16
593 593 msg %= (display_flag, self._format_version, self.display_id)
594 594 raise error.RevlogError(msg)
595 595
596 596 features = FEATURES_BY_VERSION[self._format_version]
597 597 self._inline = features[b'inline'](self._format_flags)
598 598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 599 self.hassidedata = features[b'sidedata']
600 600
601 601 if not features[b'docket']:
602 602 self._indexfile = entry_point
603 603 index_data = entry_data
604 604 else:
605 605 self._docket_file = entry_point
606 606 if self._initempty:
607 607 self._docket = docketutil.default_docket(self, header)
608 608 else:
609 609 self._docket = docketutil.parse_docket(
610 610 self, entry_data, use_pending=self._trypending
611 611 )
612 612 self._indexfile = self._docket.index_filepath()
613 613 index_data = b''
614 614 index_size = self._docket.index_end
615 615 if index_size > 0:
616 616 index_data = self._get_data(
617 617 self._indexfile, mmapindexthreshold, size=index_size
618 618 )
619 619 if len(index_data) < index_size:
620 620 msg = _(b'too few index data for %s: got %d, expected %d')
621 621 msg %= (self.display_id, len(index_data), index_size)
622 622 raise error.RevlogError(msg)
623 623
624 624 self._inline = False
625 625 # generaldelta implied by version 2 revlogs.
626 626 self._generaldelta = True
627 627 # the logic for persistent nodemap will be dealt with within the
628 628 # main docket, so disable it for now.
629 629 self._nodemap_file = None
630 630
631 if self.postfix is None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
632 634 self._datafile = b'%s.d' % self.radix
633 635 else:
634 636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635 637
636 638 self.nodeconstants = sha1nodeconstants
637 639 self.nullid = self.nodeconstants.nullid
638 640
639 641 # sparse-revlog can't be on without general-delta (issue6056)
640 642 if not self._generaldelta:
641 643 self._sparserevlog = False
642 644
643 645 self._storedeltachains = True
644 646
645 647 devel_nodemap = (
646 648 self._nodemap_file
647 649 and force_nodemap
648 650 and parse_index_v1_nodemap is not None
649 651 )
650 652
651 653 use_rust_index = False
652 654 if rustrevlog is not None:
653 655 if self._nodemap_file is not None:
654 656 use_rust_index = True
655 657 else:
656 658 use_rust_index = self.opener.options.get(b'rust.index')
657 659
658 660 self._parse_index = parse_index_v1
659 661 if self._format_version == REVLOGV0:
660 662 self._parse_index = revlogv0.parse_index_v0
661 663 elif self._format_version == REVLOGV2:
662 664 self._parse_index = parse_index_v2
663 665 elif self._format_version == CHANGELOGV2:
664 666 self._parse_index = parse_index_cl_v2
665 667 elif devel_nodemap:
666 668 self._parse_index = parse_index_v1_nodemap
667 669 elif use_rust_index:
668 670 self._parse_index = parse_index_v1_mixed
669 671 try:
670 672 d = self._parse_index(index_data, self._inline)
671 673 index, _chunkcache = d
672 674 use_nodemap = (
673 675 not self._inline
674 676 and self._nodemap_file is not None
675 677 and util.safehasattr(index, 'update_nodemap_data')
676 678 )
677 679 if use_nodemap:
678 680 nodemap_data = nodemaputil.persisted_data(self)
679 681 if nodemap_data is not None:
680 682 docket = nodemap_data[0]
681 683 if (
682 684 len(d[0]) > docket.tip_rev
683 685 and d[0][docket.tip_rev][7] == docket.tip_node
684 686 ):
685 687 # no changelog tampering
686 688 self._nodemap_docket = docket
687 689 index.update_nodemap_data(*nodemap_data)
688 690 except (ValueError, IndexError):
689 691 raise error.RevlogError(
690 692 _(b"index %s is corrupted") % self.display_id
691 693 )
692 694 self.index, self._chunkcache = d
693 695 if not self._chunkcache:
694 696 self._chunkclear()
695 697 # revnum -> (chain-length, sum-delta-length)
696 698 self._chaininfocache = util.lrucachedict(500)
697 699 # revlog header -> revlog compressor
698 700 self._decompressors = {}
699 701
700 702 @util.propertycache
701 703 def revlog_kind(self):
702 704 return self.target[0]
703 705
704 706 @util.propertycache
705 707 def display_id(self):
706 708 """The public facing "ID" of the revlog that we use in message"""
707 709 # Maybe we should build a user facing representation of
708 710 # revlog.target instead of using `self.radix`
709 711 return self.radix
710 712
711 713 def _get_decompressor(self, t):
712 714 try:
713 715 compressor = self._decompressors[t]
714 716 except KeyError:
715 717 try:
716 718 engine = util.compengines.forrevlogheader(t)
717 719 compressor = engine.revlogcompressor(self._compengineopts)
718 720 self._decompressors[t] = compressor
719 721 except KeyError:
720 722 raise error.RevlogError(
721 723 _(b'unknown compression type %s') % binascii.hexlify(t)
722 724 )
723 725 return compressor
724 726
725 727 @util.propertycache
726 728 def _compressor(self):
727 729 engine = util.compengines[self._compengine]
728 730 return engine.revlogcompressor(self._compengineopts)
729 731
730 732 @util.propertycache
731 733 def _decompressor(self):
732 734 """the default decompressor"""
733 735 if self._docket is None:
734 736 return None
735 737 t = self._docket.default_compression_header
736 738 c = self._get_decompressor(t)
737 739 return c.decompress
738 740
739 741 def _indexfp(self):
740 742 """file object for the revlog's index file"""
741 743 return self.opener(self._indexfile, mode=b"r")
742 744
743 745 def __index_write_fp(self):
744 746 # You should not use this directly and use `_writing` instead
745 747 try:
746 748 f = self.opener(
747 749 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 750 )
749 751 if self._docket is None:
750 752 f.seek(0, os.SEEK_END)
751 753 else:
752 754 f.seek(self._docket.index_end, os.SEEK_SET)
753 755 return f
754 756 except IOError as inst:
755 757 if inst.errno != errno.ENOENT:
756 758 raise
757 759 return self.opener(
758 760 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 761 )
760 762
761 763 def __index_new_fp(self):
762 764 # You should not use this unless you are upgrading from inline revlog
763 765 return self.opener(
764 766 self._indexfile,
765 767 mode=b"w",
766 768 checkambig=self._checkambig,
767 769 atomictemp=True,
768 770 )
769 771
770 772 def _datafp(self, mode=b'r'):
771 773 """file object for the revlog's data file"""
772 774 return self.opener(self._datafile, mode=mode)
773 775
774 776 @contextlib.contextmanager
775 777 def _datareadfp(self, existingfp=None):
776 778 """file object suitable to read data"""
777 779 # Use explicit file handle, if given.
778 780 if existingfp is not None:
779 781 yield existingfp
780 782
781 783 # Use a file handle being actively used for writes, if available.
782 784 # There is some danger to doing this because reads will seek the
783 785 # file. However, _writeentry() performs a SEEK_END before all writes,
784 786 # so we should be safe.
785 787 elif self._writinghandles:
786 788 if self._inline:
787 789 yield self._writinghandles[0]
788 790 else:
789 791 yield self._writinghandles[1]
790 792
791 793 # Otherwise open a new file handle.
792 794 else:
793 795 if self._inline:
794 796 func = self._indexfp
795 797 else:
796 798 func = self._datafp
797 799 with func() as fp:
798 800 yield fp
799 801
800 802 def tiprev(self):
801 803 return len(self.index) - 1
802 804
803 805 def tip(self):
804 806 return self.node(self.tiprev())
805 807
806 808 def __contains__(self, rev):
807 809 return 0 <= rev < len(self)
808 810
809 811 def __len__(self):
810 812 return len(self.index)
811 813
812 814 def __iter__(self):
813 815 return iter(pycompat.xrange(len(self)))
814 816
815 817 def revs(self, start=0, stop=None):
816 818 """iterate over all rev in this revlog (from start to stop)"""
817 819 return storageutil.iterrevs(len(self), start=start, stop=stop)
818 820
819 821 @property
820 822 def nodemap(self):
821 823 msg = (
822 824 b"revlog.nodemap is deprecated, "
823 825 b"use revlog.index.[has_node|rev|get_rev]"
824 826 )
825 827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 828 return self.index.nodemap
827 829
828 830 @property
829 831 def _nodecache(self):
830 832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 834 return self.index.nodemap
833 835
834 836 def hasnode(self, node):
835 837 try:
836 838 self.rev(node)
837 839 return True
838 840 except KeyError:
839 841 return False
840 842
841 843 def candelta(self, baserev, rev):
842 844 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 845 # Disable delta if either rev requires a content-changing flag
844 846 # processor (ex. LFS). This is because such flag processor can alter
845 847 # the rawtext content that the delta will be based on, and two clients
846 848 # could have a same revlog node with different flags (i.e. different
847 849 # rawtext contents) and the delta could be incompatible.
848 850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 852 ):
851 853 return False
852 854 return True
853 855
854 856 def update_caches(self, transaction):
855 857 if self._nodemap_file is not None:
856 858 if transaction is None:
857 859 nodemaputil.update_persistent_nodemap(self)
858 860 else:
859 861 nodemaputil.setup_persistent_nodemap(transaction, self)
860 862
861 863 def clearcaches(self):
862 864 self._revisioncache = None
863 865 self._chainbasecache.clear()
864 866 self._chunkcache = (0, b'')
865 867 self._pcache = {}
866 868 self._nodemap_docket = None
867 869 self.index.clearcaches()
868 870 # The python code is the one responsible for validating the docket, we
869 871 # end up having to refresh it here.
870 872 use_nodemap = (
871 873 not self._inline
872 874 and self._nodemap_file is not None
873 875 and util.safehasattr(self.index, 'update_nodemap_data')
874 876 )
875 877 if use_nodemap:
876 878 nodemap_data = nodemaputil.persisted_data(self)
877 879 if nodemap_data is not None:
878 880 self._nodemap_docket = nodemap_data[0]
879 881 self.index.update_nodemap_data(*nodemap_data)
880 882
881 883 def rev(self, node):
882 884 try:
883 885 return self.index.rev(node)
884 886 except TypeError:
885 887 raise
886 888 except error.RevlogError:
887 889 # parsers.c radix tree lookup failed
888 890 if (
889 891 node == self.nodeconstants.wdirid
890 892 or node in self.nodeconstants.wdirfilenodeids
891 893 ):
892 894 raise error.WdirUnsupported
893 895 raise error.LookupError(node, self.display_id, _(b'no node'))
894 896
895 897 # Accessors for index entries.
896 898
897 899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 900 # are flags.
899 901 def start(self, rev):
900 902 return int(self.index[rev][0] >> 16)
901 903
902 904 def flags(self, rev):
903 905 return self.index[rev][0] & 0xFFFF
904 906
905 907 def length(self, rev):
906 908 return self.index[rev][1]
907 909
908 910 def sidedata_length(self, rev):
909 911 if not self.hassidedata:
910 912 return 0
911 913 return self.index[rev][9]
912 914
913 915 def rawsize(self, rev):
914 916 """return the length of the uncompressed text for a given revision"""
915 917 l = self.index[rev][2]
916 918 if l >= 0:
917 919 return l
918 920
919 921 t = self.rawdata(rev)
920 922 return len(t)
921 923
922 924 def size(self, rev):
923 925 """length of non-raw text (processed by a "read" flag processor)"""
924 926 # fast path: if no "read" flag processor could change the content,
925 927 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 928 flags = self.flags(rev)
927 929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 930 return self.rawsize(rev)
929 931
930 932 return len(self.revision(rev, raw=False))
931 933
932 934 def chainbase(self, rev):
933 935 base = self._chainbasecache.get(rev)
934 936 if base is not None:
935 937 return base
936 938
937 939 index = self.index
938 940 iterrev = rev
939 941 base = index[iterrev][3]
940 942 while base != iterrev:
941 943 iterrev = base
942 944 base = index[iterrev][3]
943 945
944 946 self._chainbasecache[rev] = base
945 947 return base
946 948
947 949 def linkrev(self, rev):
948 950 return self.index[rev][4]
949 951
950 952 def parentrevs(self, rev):
951 953 try:
952 954 entry = self.index[rev]
953 955 except IndexError:
954 956 if rev == wdirrev:
955 957 raise error.WdirUnsupported
956 958 raise
957 959 if entry[5] == nullrev:
958 960 return entry[6], entry[5]
959 961 else:
960 962 return entry[5], entry[6]
961 963
962 964 # fast parentrevs(rev) where rev isn't filtered
963 965 _uncheckedparentrevs = parentrevs
964 966
965 967 def node(self, rev):
966 968 try:
967 969 return self.index[rev][7]
968 970 except IndexError:
969 971 if rev == wdirrev:
970 972 raise error.WdirUnsupported
971 973 raise
972 974
973 975 # Derived from index values.
974 976
975 977 def end(self, rev):
976 978 return self.start(rev) + self.length(rev)
977 979
978 980 def parents(self, node):
979 981 i = self.index
980 982 d = i[self.rev(node)]
981 983 # inline node() to avoid function call overhead
982 984 if d[5] == self.nullid:
983 985 return i[d[6]][7], i[d[5]][7]
984 986 else:
985 987 return i[d[5]][7], i[d[6]][7]
986 988
987 989 def chainlen(self, rev):
988 990 return self._chaininfo(rev)[0]
989 991
990 992 def _chaininfo(self, rev):
991 993 chaininfocache = self._chaininfocache
992 994 if rev in chaininfocache:
993 995 return chaininfocache[rev]
994 996 index = self.index
995 997 generaldelta = self._generaldelta
996 998 iterrev = rev
997 999 e = index[iterrev]
998 1000 clen = 0
999 1001 compresseddeltalen = 0
1000 1002 while iterrev != e[3]:
1001 1003 clen += 1
1002 1004 compresseddeltalen += e[1]
1003 1005 if generaldelta:
1004 1006 iterrev = e[3]
1005 1007 else:
1006 1008 iterrev -= 1
1007 1009 if iterrev in chaininfocache:
1008 1010 t = chaininfocache[iterrev]
1009 1011 clen += t[0]
1010 1012 compresseddeltalen += t[1]
1011 1013 break
1012 1014 e = index[iterrev]
1013 1015 else:
1014 1016 # Add text length of base since decompressing that also takes
1015 1017 # work. For cache hits the length is already included.
1016 1018 compresseddeltalen += e[1]
1017 1019 r = (clen, compresseddeltalen)
1018 1020 chaininfocache[rev] = r
1019 1021 return r
1020 1022
1021 1023 def _deltachain(self, rev, stoprev=None):
1022 1024 """Obtain the delta chain for a revision.
1023 1025
1024 1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 1027 stop at the base of the chain.
1026 1028
1027 1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 1030 revs in ascending order and ``stopped`` is a bool indicating whether
1029 1031 ``stoprev`` was hit.
1030 1032 """
1031 1033 # Try C implementation.
1032 1034 try:
1033 1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 1036 except AttributeError:
1035 1037 pass
1036 1038
1037 1039 chain = []
1038 1040
1039 1041 # Alias to prevent attribute lookup in tight loop.
1040 1042 index = self.index
1041 1043 generaldelta = self._generaldelta
1042 1044
1043 1045 iterrev = rev
1044 1046 e = index[iterrev]
1045 1047 while iterrev != e[3] and iterrev != stoprev:
1046 1048 chain.append(iterrev)
1047 1049 if generaldelta:
1048 1050 iterrev = e[3]
1049 1051 else:
1050 1052 iterrev -= 1
1051 1053 e = index[iterrev]
1052 1054
1053 1055 if iterrev == stoprev:
1054 1056 stopped = True
1055 1057 else:
1056 1058 chain.append(iterrev)
1057 1059 stopped = False
1058 1060
1059 1061 chain.reverse()
1060 1062 return chain, stopped
1061 1063
1062 1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 1065 """Generate the ancestors of 'revs' in reverse revision order.
1064 1066 Does not generate revs lower than stoprev.
1065 1067
1066 1068 See the documentation for ancestor.lazyancestors for more details."""
1067 1069
1068 1070 # first, make sure start revisions aren't filtered
1069 1071 revs = list(revs)
1070 1072 checkrev = self.node
1071 1073 for r in revs:
1072 1074 checkrev(r)
1073 1075 # and we're sure ancestors aren't filtered as well
1074 1076
1075 1077 if rustancestor is not None and self.index.rust_ext_compat:
1076 1078 lazyancestors = rustancestor.LazyAncestors
1077 1079 arg = self.index
1078 1080 else:
1079 1081 lazyancestors = ancestor.lazyancestors
1080 1082 arg = self._uncheckedparentrevs
1081 1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082 1084
1083 1085 def descendants(self, revs):
1084 1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085 1087
1086 1088 def findcommonmissing(self, common=None, heads=None):
1087 1089 """Return a tuple of the ancestors of common and the ancestors of heads
1088 1090 that are not ancestors of common. In revset terminology, we return the
1089 1091 tuple:
1090 1092
1091 1093 ::common, (::heads) - (::common)
1092 1094
1093 1095 The list is sorted by revision number, meaning it is
1094 1096 topologically sorted.
1095 1097
1096 1098 'heads' and 'common' are both lists of node IDs. If heads is
1097 1099 not supplied, uses all of the revlog's heads. If common is not
1098 1100 supplied, uses nullid."""
1099 1101 if common is None:
1100 1102 common = [self.nullid]
1101 1103 if heads is None:
1102 1104 heads = self.heads()
1103 1105
1104 1106 common = [self.rev(n) for n in common]
1105 1107 heads = [self.rev(n) for n in heads]
1106 1108
1107 1109 # we want the ancestors, but inclusive
1108 1110 class lazyset(object):
1109 1111 def __init__(self, lazyvalues):
1110 1112 self.addedvalues = set()
1111 1113 self.lazyvalues = lazyvalues
1112 1114
1113 1115 def __contains__(self, value):
1114 1116 return value in self.addedvalues or value in self.lazyvalues
1115 1117
1116 1118 def __iter__(self):
1117 1119 added = self.addedvalues
1118 1120 for r in added:
1119 1121 yield r
1120 1122 for r in self.lazyvalues:
1121 1123 if not r in added:
1122 1124 yield r
1123 1125
1124 1126 def add(self, value):
1125 1127 self.addedvalues.add(value)
1126 1128
1127 1129 def update(self, values):
1128 1130 self.addedvalues.update(values)
1129 1131
1130 1132 has = lazyset(self.ancestors(common))
1131 1133 has.add(nullrev)
1132 1134 has.update(common)
1133 1135
1134 1136 # take all ancestors from heads that aren't in has
1135 1137 missing = set()
1136 1138 visit = collections.deque(r for r in heads if r not in has)
1137 1139 while visit:
1138 1140 r = visit.popleft()
1139 1141 if r in missing:
1140 1142 continue
1141 1143 else:
1142 1144 missing.add(r)
1143 1145 for p in self.parentrevs(r):
1144 1146 if p not in has:
1145 1147 visit.append(p)
1146 1148 missing = list(missing)
1147 1149 missing.sort()
1148 1150 return has, [self.node(miss) for miss in missing]
1149 1151
1150 1152 def incrementalmissingrevs(self, common=None):
1151 1153 """Return an object that can be used to incrementally compute the
1152 1154 revision numbers of the ancestors of arbitrary sets that are not
1153 1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 1156 object.
1155 1157
1156 1158 'common' is a list of revision numbers. If common is not supplied, uses
1157 1159 nullrev.
1158 1160 """
1159 1161 if common is None:
1160 1162 common = [nullrev]
1161 1163
1162 1164 if rustancestor is not None and self.index.rust_ext_compat:
1163 1165 return rustancestor.MissingAncestors(self.index, common)
1164 1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165 1167
1166 1168 def findmissingrevs(self, common=None, heads=None):
1167 1169 """Return the revision numbers of the ancestors of heads that
1168 1170 are not ancestors of common.
1169 1171
1170 1172 More specifically, return a list of revision numbers corresponding to
1171 1173 nodes N such that every N satisfies the following constraints:
1172 1174
1173 1175 1. N is an ancestor of some node in 'heads'
1174 1176 2. N is not an ancestor of any node in 'common'
1175 1177
1176 1178 The list is sorted by revision number, meaning it is
1177 1179 topologically sorted.
1178 1180
1179 1181 'heads' and 'common' are both lists of revision numbers. If heads is
1180 1182 not supplied, uses all of the revlog's heads. If common is not
1181 1183 supplied, uses nullid."""
1182 1184 if common is None:
1183 1185 common = [nullrev]
1184 1186 if heads is None:
1185 1187 heads = self.headrevs()
1186 1188
1187 1189 inc = self.incrementalmissingrevs(common=common)
1188 1190 return inc.missingancestors(heads)
1189 1191
1190 1192 def findmissing(self, common=None, heads=None):
1191 1193 """Return the ancestors of heads that are not ancestors of common.
1192 1194
1193 1195 More specifically, return a list of nodes N such that every N
1194 1196 satisfies the following constraints:
1195 1197
1196 1198 1. N is an ancestor of some node in 'heads'
1197 1199 2. N is not an ancestor of any node in 'common'
1198 1200
1199 1201 The list is sorted by revision number, meaning it is
1200 1202 topologically sorted.
1201 1203
1202 1204 'heads' and 'common' are both lists of node IDs. If heads is
1203 1205 not supplied, uses all of the revlog's heads. If common is not
1204 1206 supplied, uses nullid."""
1205 1207 if common is None:
1206 1208 common = [self.nullid]
1207 1209 if heads is None:
1208 1210 heads = self.heads()
1209 1211
1210 1212 common = [self.rev(n) for n in common]
1211 1213 heads = [self.rev(n) for n in heads]
1212 1214
1213 1215 inc = self.incrementalmissingrevs(common=common)
1214 1216 return [self.node(r) for r in inc.missingancestors(heads)]
1215 1217
1216 1218 def nodesbetween(self, roots=None, heads=None):
1217 1219 """Return a topological path from 'roots' to 'heads'.
1218 1220
1219 1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 1222 topologically sorted list of all nodes N that satisfy both of
1221 1223 these constraints:
1222 1224
1223 1225 1. N is a descendant of some node in 'roots'
1224 1226 2. N is an ancestor of some node in 'heads'
1225 1227
1226 1228 Every node is considered to be both a descendant and an ancestor
1227 1229 of itself, so every reachable node in 'roots' and 'heads' will be
1228 1230 included in 'nodes'.
1229 1231
1230 1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233 1235
1234 1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 1237 unspecified, uses nullid as the only root. If 'heads' is
1236 1238 unspecified, uses list of all of the revlog's heads."""
1237 1239 nonodes = ([], [], [])
1238 1240 if roots is not None:
1239 1241 roots = list(roots)
1240 1242 if not roots:
1241 1243 return nonodes
1242 1244 lowestrev = min([self.rev(n) for n in roots])
1243 1245 else:
1244 1246 roots = [self.nullid] # Everybody's a descendant of nullid
1245 1247 lowestrev = nullrev
1246 1248 if (lowestrev == nullrev) and (heads is None):
1247 1249 # We want _all_ the nodes!
1248 1250 return (
1249 1251 [self.node(r) for r in self],
1250 1252 [self.nullid],
1251 1253 list(self.heads()),
1252 1254 )
1253 1255 if heads is None:
1254 1256 # All nodes are ancestors, so the latest ancestor is the last
1255 1257 # node.
1256 1258 highestrev = len(self) - 1
1257 1259 # Set ancestors to None to signal that every node is an ancestor.
1258 1260 ancestors = None
1259 1261 # Set heads to an empty dictionary for later discovery of heads
1260 1262 heads = {}
1261 1263 else:
1262 1264 heads = list(heads)
1263 1265 if not heads:
1264 1266 return nonodes
1265 1267 ancestors = set()
1266 1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 1269 # Also, later we will be using it to filter out the heads we can't
1268 1270 # find from roots.
1269 1271 heads = dict.fromkeys(heads, False)
1270 1272 # Start at the top and keep marking parents until we're done.
1271 1273 nodestotag = set(heads)
1272 1274 # Remember where the top was so we can use it as a limit later.
1273 1275 highestrev = max([self.rev(n) for n in nodestotag])
1274 1276 while nodestotag:
1275 1277 # grab a node to tag
1276 1278 n = nodestotag.pop()
1277 1279 # Never tag nullid
1278 1280 if n == self.nullid:
1279 1281 continue
1280 1282 # A node's revision number represents its place in a
1281 1283 # topologically sorted list of nodes.
1282 1284 r = self.rev(n)
1283 1285 if r >= lowestrev:
1284 1286 if n not in ancestors:
1285 1287 # If we are possibly a descendant of one of the roots
1286 1288 # and we haven't already been marked as an ancestor
1287 1289 ancestors.add(n) # Mark as ancestor
1288 1290 # Add non-nullid parents to list of nodes to tag.
1289 1291 nodestotag.update(
1290 1292 [p for p in self.parents(n) if p != self.nullid]
1291 1293 )
1292 1294 elif n in heads: # We've seen it before, is it a fake head?
1293 1295 # So it is, real heads should not be the ancestors of
1294 1296 # any other heads.
1295 1297 heads.pop(n)
1296 1298 if not ancestors:
1297 1299 return nonodes
1298 1300 # Now that we have our set of ancestors, we want to remove any
1299 1301 # roots that are not ancestors.
1300 1302
1301 1303 # If one of the roots was nullid, everything is included anyway.
1302 1304 if lowestrev > nullrev:
1303 1305 # But, since we weren't, let's recompute the lowest rev to not
1304 1306 # include roots that aren't ancestors.
1305 1307
1306 1308 # Filter out roots that aren't ancestors of heads
1307 1309 roots = [root for root in roots if root in ancestors]
1308 1310 # Recompute the lowest revision
1309 1311 if roots:
1310 1312 lowestrev = min([self.rev(root) for root in roots])
1311 1313 else:
1312 1314 # No more roots? Return empty list
1313 1315 return nonodes
1314 1316 else:
1315 1317 # We are descending from nullid, and don't need to care about
1316 1318 # any other roots.
1317 1319 lowestrev = nullrev
1318 1320 roots = [self.nullid]
1319 1321 # Transform our roots list into a set.
1320 1322 descendants = set(roots)
1321 1323 # Also, keep the original roots so we can filter out roots that aren't
1322 1324 # 'real' roots (i.e. are descended from other roots).
1323 1325 roots = descendants.copy()
1324 1326 # Our topologically sorted list of output nodes.
1325 1327 orderedout = []
1326 1328 # Don't start at nullid since we don't want nullid in our output list,
1327 1329 # and if nullid shows up in descendants, empty parents will look like
1328 1330 # they're descendants.
1329 1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 1332 n = self.node(r)
1331 1333 isdescendant = False
1332 1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 1335 isdescendant = True
1334 1336 elif n in descendants:
1335 1337 # n is already a descendant
1336 1338 isdescendant = True
1337 1339 # This check only needs to be done here because all the roots
1338 1340 # will start being marked is descendants before the loop.
1339 1341 if n in roots:
1340 1342 # If n was a root, check if it's a 'real' root.
1341 1343 p = tuple(self.parents(n))
1342 1344 # If any of its parents are descendants, it's not a root.
1343 1345 if (p[0] in descendants) or (p[1] in descendants):
1344 1346 roots.remove(n)
1345 1347 else:
1346 1348 p = tuple(self.parents(n))
1347 1349 # A node is a descendant if either of its parents are
1348 1350 # descendants. (We seeded the dependents list with the roots
1349 1351 # up there, remember?)
1350 1352 if (p[0] in descendants) or (p[1] in descendants):
1351 1353 descendants.add(n)
1352 1354 isdescendant = True
1353 1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 1356 # Only include nodes that are both descendants and ancestors.
1355 1357 orderedout.append(n)
1356 1358 if (ancestors is not None) and (n in heads):
1357 1359 # We're trying to figure out which heads are reachable
1358 1360 # from roots.
1359 1361 # Mark this head as having been reached
1360 1362 heads[n] = True
1361 1363 elif ancestors is None:
1362 1364 # Otherwise, we're trying to discover the heads.
1363 1365 # Assume this is a head because if it isn't, the next step
1364 1366 # will eventually remove it.
1365 1367 heads[n] = True
1366 1368 # But, obviously its parents aren't.
1367 1369 for p in self.parents(n):
1368 1370 heads.pop(p, None)
1369 1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 1372 roots = list(roots)
1371 1373 assert orderedout
1372 1374 assert roots
1373 1375 assert heads
1374 1376 return (orderedout, roots, heads)
1375 1377
1376 1378 def headrevs(self, revs=None):
1377 1379 if revs is None:
1378 1380 try:
1379 1381 return self.index.headrevs()
1380 1382 except AttributeError:
1381 1383 return self._headrevs()
1382 1384 if rustdagop is not None and self.index.rust_ext_compat:
1383 1385 return rustdagop.headrevs(self.index, revs)
1384 1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385 1387
1386 1388 def computephases(self, roots):
1387 1389 return self.index.computephasesmapsets(roots)
1388 1390
1389 1391 def _headrevs(self):
1390 1392 count = len(self)
1391 1393 if not count:
1392 1394 return [nullrev]
1393 1395 # we won't iter over filtered rev so nobody is a head at start
1394 1396 ishead = [0] * (count + 1)
1395 1397 index = self.index
1396 1398 for r in self:
1397 1399 ishead[r] = 1 # I may be an head
1398 1400 e = index[r]
1399 1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 1402 return [r for r, val in enumerate(ishead) if val]
1401 1403
1402 1404 def heads(self, start=None, stop=None):
1403 1405 """return the list of all nodes that have no children
1404 1406
1405 1407 if start is specified, only heads that are descendants of
1406 1408 start will be returned
1407 1409 if stop is specified, it will consider all the revs from stop
1408 1410 as if they had no children
1409 1411 """
1410 1412 if start is None and stop is None:
1411 1413 if not len(self):
1412 1414 return [self.nullid]
1413 1415 return [self.node(r) for r in self.headrevs()]
1414 1416
1415 1417 if start is None:
1416 1418 start = nullrev
1417 1419 else:
1418 1420 start = self.rev(start)
1419 1421
1420 1422 stoprevs = {self.rev(n) for n in stop or []}
1421 1423
1422 1424 revs = dagop.headrevssubset(
1423 1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 1426 )
1425 1427
1426 1428 return [self.node(rev) for rev in revs]
1427 1429
1428 1430 def children(self, node):
1429 1431 """find the children of a given node"""
1430 1432 c = []
1431 1433 p = self.rev(node)
1432 1434 for r in self.revs(start=p + 1):
1433 1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 1436 if prevs:
1435 1437 for pr in prevs:
1436 1438 if pr == p:
1437 1439 c.append(self.node(r))
1438 1440 elif p == nullrev:
1439 1441 c.append(self.node(r))
1440 1442 return c
1441 1443
1442 1444 def commonancestorsheads(self, a, b):
1443 1445 """calculate all the heads of the common ancestors of nodes a and b"""
1444 1446 a, b = self.rev(a), self.rev(b)
1445 1447 ancs = self._commonancestorsheads(a, b)
1446 1448 return pycompat.maplist(self.node, ancs)
1447 1449
1448 1450 def _commonancestorsheads(self, *revs):
1449 1451 """calculate all the heads of the common ancestors of revs"""
1450 1452 try:
1451 1453 ancs = self.index.commonancestorsheads(*revs)
1452 1454 except (AttributeError, OverflowError): # C implementation failed
1453 1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 1456 return ancs
1455 1457
1456 1458 def isancestor(self, a, b):
1457 1459 """return True if node a is an ancestor of node b
1458 1460
1459 1461 A revision is considered an ancestor of itself."""
1460 1462 a, b = self.rev(a), self.rev(b)
1461 1463 return self.isancestorrev(a, b)
1462 1464
1463 1465 def isancestorrev(self, a, b):
1464 1466 """return True if revision a is an ancestor of revision b
1465 1467
1466 1468 A revision is considered an ancestor of itself.
1467 1469
1468 1470 The implementation of this is trivial but the use of
1469 1471 reachableroots is not."""
1470 1472 if a == nullrev:
1471 1473 return True
1472 1474 elif a == b:
1473 1475 return True
1474 1476 elif a > b:
1475 1477 return False
1476 1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477 1479
1478 1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 1481 """return (heads(::(<roots> and <roots>::<heads>)))
1480 1482
1481 1483 If includepath is True, return (<roots>::<heads>)."""
1482 1484 try:
1483 1485 return self.index.reachableroots2(
1484 1486 minroot, heads, roots, includepath
1485 1487 )
1486 1488 except AttributeError:
1487 1489 return dagop._reachablerootspure(
1488 1490 self.parentrevs, minroot, roots, heads, includepath
1489 1491 )
1490 1492
1491 1493 def ancestor(self, a, b):
1492 1494 """calculate the "best" common ancestor of nodes a and b"""
1493 1495
1494 1496 a, b = self.rev(a), self.rev(b)
1495 1497 try:
1496 1498 ancs = self.index.ancestors(a, b)
1497 1499 except (AttributeError, OverflowError):
1498 1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 1501 if ancs:
1500 1502 # choose a consistent winner when there's a tie
1501 1503 return min(map(self.node, ancs))
1502 1504 return self.nullid
1503 1505
1504 1506 def _match(self, id):
1505 1507 if isinstance(id, int):
1506 1508 # rev
1507 1509 return self.node(id)
1508 1510 if len(id) == self.nodeconstants.nodelen:
1509 1511 # possibly a binary node
1510 1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 1513 try:
1512 1514 node = id
1513 1515 self.rev(node) # quick search the index
1514 1516 return node
1515 1517 except error.LookupError:
1516 1518 pass # may be partial hex id
1517 1519 try:
1518 1520 # str(rev)
1519 1521 rev = int(id)
1520 1522 if b"%d" % rev != id:
1521 1523 raise ValueError
1522 1524 if rev < 0:
1523 1525 rev = len(self) + rev
1524 1526 if rev < 0 or rev >= len(self):
1525 1527 raise ValueError
1526 1528 return self.node(rev)
1527 1529 except (ValueError, OverflowError):
1528 1530 pass
1529 1531 if len(id) == 2 * self.nodeconstants.nodelen:
1530 1532 try:
1531 1533 # a full hex nodeid?
1532 1534 node = bin(id)
1533 1535 self.rev(node)
1534 1536 return node
1535 1537 except (TypeError, error.LookupError):
1536 1538 pass
1537 1539
1538 1540 def _partialmatch(self, id):
1539 1541 # we don't care wdirfilenodeids as they should be always full hash
1540 1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 1543 ambiguous = False
1542 1544 try:
1543 1545 partial = self.index.partialmatch(id)
1544 1546 if partial and self.hasnode(partial):
1545 1547 if maybewdir:
1546 1548 # single 'ff...' match in radix tree, ambiguous with wdir
1547 1549 ambiguous = True
1548 1550 else:
1549 1551 return partial
1550 1552 elif maybewdir:
1551 1553 # no 'ff...' match in radix tree, wdir identified
1552 1554 raise error.WdirUnsupported
1553 1555 else:
1554 1556 return None
1555 1557 except error.RevlogError:
1556 1558 # parsers.c radix tree lookup gave multiple matches
1557 1559 # fast path: for unfiltered changelog, radix tree is accurate
1558 1560 if not getattr(self, 'filteredrevs', None):
1559 1561 ambiguous = True
1560 1562 # fall through to slow path that filters hidden revisions
1561 1563 except (AttributeError, ValueError):
1562 1564 # we are pure python, or key was too short to search radix tree
1563 1565 pass
1564 1566 if ambiguous:
1565 1567 raise error.AmbiguousPrefixLookupError(
1566 1568 id, self.display_id, _(b'ambiguous identifier')
1567 1569 )
1568 1570
1569 1571 if id in self._pcache:
1570 1572 return self._pcache[id]
1571 1573
1572 1574 if len(id) <= 40:
1573 1575 try:
1574 1576 # hex(node)[:...]
1575 1577 l = len(id) // 2 # grab an even number of digits
1576 1578 prefix = bin(id[: l * 2])
1577 1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1578 1580 nl = [
1579 1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1580 1582 ]
1581 1583 if self.nodeconstants.nullhex.startswith(id):
1582 1584 nl.append(self.nullid)
1583 1585 if len(nl) > 0:
1584 1586 if len(nl) == 1 and not maybewdir:
1585 1587 self._pcache[id] = nl[0]
1586 1588 return nl[0]
1587 1589 raise error.AmbiguousPrefixLookupError(
1588 1590 id, self.display_id, _(b'ambiguous identifier')
1589 1591 )
1590 1592 if maybewdir:
1591 1593 raise error.WdirUnsupported
1592 1594 return None
1593 1595 except TypeError:
1594 1596 pass
1595 1597
1596 1598 def lookup(self, id):
1597 1599 """locate a node based on:
1598 1600 - revision number or str(revision number)
1599 1601 - nodeid or subset of hex nodeid
1600 1602 """
1601 1603 n = self._match(id)
1602 1604 if n is not None:
1603 1605 return n
1604 1606 n = self._partialmatch(id)
1605 1607 if n:
1606 1608 return n
1607 1609
1608 1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1609 1611
1610 1612 def shortest(self, node, minlength=1):
1611 1613 """Find the shortest unambiguous prefix that matches node."""
1612 1614
1613 1615 def isvalid(prefix):
1614 1616 try:
1615 1617 matchednode = self._partialmatch(prefix)
1616 1618 except error.AmbiguousPrefixLookupError:
1617 1619 return False
1618 1620 except error.WdirUnsupported:
1619 1621 # single 'ff...' match
1620 1622 return True
1621 1623 if matchednode is None:
1622 1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1623 1625 return True
1624 1626
1625 1627 def maybewdir(prefix):
1626 1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1627 1629
1628 1630 hexnode = hex(node)
1629 1631
1630 1632 def disambiguate(hexnode, minlength):
1631 1633 """Disambiguate against wdirid."""
1632 1634 for length in range(minlength, len(hexnode) + 1):
1633 1635 prefix = hexnode[:length]
1634 1636 if not maybewdir(prefix):
1635 1637 return prefix
1636 1638
1637 1639 if not getattr(self, 'filteredrevs', None):
1638 1640 try:
1639 1641 length = max(self.index.shortest(node), minlength)
1640 1642 return disambiguate(hexnode, length)
1641 1643 except error.RevlogError:
1642 1644 if node != self.nodeconstants.wdirid:
1643 1645 raise error.LookupError(
1644 1646 node, self.display_id, _(b'no node')
1645 1647 )
1646 1648 except AttributeError:
1647 1649 # Fall through to pure code
1648 1650 pass
1649 1651
1650 1652 if node == self.nodeconstants.wdirid:
1651 1653 for length in range(minlength, len(hexnode) + 1):
1652 1654 prefix = hexnode[:length]
1653 1655 if isvalid(prefix):
1654 1656 return prefix
1655 1657
1656 1658 for length in range(minlength, len(hexnode) + 1):
1657 1659 prefix = hexnode[:length]
1658 1660 if isvalid(prefix):
1659 1661 return disambiguate(hexnode, length)
1660 1662
1661 1663 def cmp(self, node, text):
1662 1664 """compare text with a given file revision
1663 1665
1664 1666 returns True if text is different than what is stored.
1665 1667 """
1666 1668 p1, p2 = self.parents(node)
1667 1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1668 1670
1669 1671 def _cachesegment(self, offset, data):
1670 1672 """Add a segment to the revlog cache.
1671 1673
1672 1674 Accepts an absolute offset and the data that is at that location.
1673 1675 """
1674 1676 o, d = self._chunkcache
1675 1677 # try to add to existing cache
1676 1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1677 1679 self._chunkcache = o, d + data
1678 1680 else:
1679 1681 self._chunkcache = offset, data
1680 1682
1681 1683 def _readsegment(self, offset, length, df=None):
1682 1684 """Load a segment of raw data from the revlog.
1683 1685
1684 1686 Accepts an absolute offset, length to read, and an optional existing
1685 1687 file handle to read from.
1686 1688
1687 1689 If an existing file handle is passed, it will be seeked and the
1688 1690 original seek position will NOT be restored.
1689 1691
1690 1692 Returns a str or buffer of raw byte data.
1691 1693
1692 1694 Raises if the requested number of bytes could not be read.
1693 1695 """
1694 1696 # Cache data both forward and backward around the requested
1695 1697 # data, in a fixed size window. This helps speed up operations
1696 1698 # involving reading the revlog backwards.
1697 1699 cachesize = self._chunkcachesize
1698 1700 realoffset = offset & ~(cachesize - 1)
1699 1701 reallength = (
1700 1702 (offset + length + cachesize) & ~(cachesize - 1)
1701 1703 ) - realoffset
1702 1704 with self._datareadfp(df) as df:
1703 1705 df.seek(realoffset)
1704 1706 d = df.read(reallength)
1705 1707
1706 1708 self._cachesegment(realoffset, d)
1707 1709 if offset != realoffset or reallength != length:
1708 1710 startoffset = offset - realoffset
1709 1711 if len(d) - startoffset < length:
1710 1712 raise error.RevlogError(
1711 1713 _(
1712 1714 b'partial read of revlog %s; expected %d bytes from '
1713 1715 b'offset %d, got %d'
1714 1716 )
1715 1717 % (
1716 1718 self._indexfile if self._inline else self._datafile,
1717 1719 length,
1718 1720 offset,
1719 1721 len(d) - startoffset,
1720 1722 )
1721 1723 )
1722 1724
1723 1725 return util.buffer(d, startoffset, length)
1724 1726
1725 1727 if len(d) < length:
1726 1728 raise error.RevlogError(
1727 1729 _(
1728 1730 b'partial read of revlog %s; expected %d bytes from offset '
1729 1731 b'%d, got %d'
1730 1732 )
1731 1733 % (
1732 1734 self._indexfile if self._inline else self._datafile,
1733 1735 length,
1734 1736 offset,
1735 1737 len(d),
1736 1738 )
1737 1739 )
1738 1740
1739 1741 return d
1740 1742
1741 1743 def _getsegment(self, offset, length, df=None):
1742 1744 """Obtain a segment of raw data from the revlog.
1743 1745
1744 1746 Accepts an absolute offset, length of bytes to obtain, and an
1745 1747 optional file handle to the already-opened revlog. If the file
1746 1748 handle is used, it's original seek position will not be preserved.
1747 1749
1748 1750 Requests for data may be returned from a cache.
1749 1751
1750 1752 Returns a str or a buffer instance of raw byte data.
1751 1753 """
1752 1754 o, d = self._chunkcache
1753 1755 l = len(d)
1754 1756
1755 1757 # is it in the cache?
1756 1758 cachestart = offset - o
1757 1759 cacheend = cachestart + length
1758 1760 if cachestart >= 0 and cacheend <= l:
1759 1761 if cachestart == 0 and cacheend == l:
1760 1762 return d # avoid a copy
1761 1763 return util.buffer(d, cachestart, cacheend - cachestart)
1762 1764
1763 1765 return self._readsegment(offset, length, df=df)
1764 1766
1765 1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1766 1768 """Obtain a segment of raw data corresponding to a range of revisions.
1767 1769
1768 1770 Accepts the start and end revisions and an optional already-open
1769 1771 file handle to be used for reading. If the file handle is read, its
1770 1772 seek position will not be preserved.
1771 1773
1772 1774 Requests for data may be satisfied by a cache.
1773 1775
1774 1776 Returns a 2-tuple of (offset, data) for the requested range of
1775 1777 revisions. Offset is the integer offset from the beginning of the
1776 1778 revlog and data is a str or buffer of the raw byte data.
1777 1779
1778 1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1779 1781 to determine where each revision's data begins and ends.
1780 1782 """
1781 1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1782 1784 # (functions are expensive).
1783 1785 index = self.index
1784 1786 istart = index[startrev]
1785 1787 start = int(istart[0] >> 16)
1786 1788 if startrev == endrev:
1787 1789 end = start + istart[1]
1788 1790 else:
1789 1791 iend = index[endrev]
1790 1792 end = int(iend[0] >> 16) + iend[1]
1791 1793
1792 1794 if self._inline:
1793 1795 start += (startrev + 1) * self.index.entry_size
1794 1796 end += (endrev + 1) * self.index.entry_size
1795 1797 length = end - start
1796 1798
1797 1799 return start, self._getsegment(start, length, df=df)
1798 1800
1799 1801 def _chunk(self, rev, df=None):
1800 1802 """Obtain a single decompressed chunk for a revision.
1801 1803
1802 1804 Accepts an integer revision and an optional already-open file handle
1803 1805 to be used for reading. If used, the seek position of the file will not
1804 1806 be preserved.
1805 1807
1806 1808 Returns a str holding uncompressed data for the requested revision.
1807 1809 """
1808 1810 compression_mode = self.index[rev][10]
1809 1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1810 1812 if compression_mode == COMP_MODE_PLAIN:
1811 1813 return data
1812 1814 elif compression_mode == COMP_MODE_DEFAULT:
1813 1815 return self._decompressor(data)
1814 1816 elif compression_mode == COMP_MODE_INLINE:
1815 1817 return self.decompress(data)
1816 1818 else:
1817 1819 msg = 'unknown compression mode %d'
1818 1820 msg %= compression_mode
1819 1821 raise error.RevlogError(msg)
1820 1822
1821 1823 def _chunks(self, revs, df=None, targetsize=None):
1822 1824 """Obtain decompressed chunks for the specified revisions.
1823 1825
1824 1826 Accepts an iterable of numeric revisions that are assumed to be in
1825 1827 ascending order. Also accepts an optional already-open file handle
1826 1828 to be used for reading. If used, the seek position of the file will
1827 1829 not be preserved.
1828 1830
1829 1831 This function is similar to calling ``self._chunk()`` multiple times,
1830 1832 but is faster.
1831 1833
1832 1834 Returns a list with decompressed data for each requested revision.
1833 1835 """
1834 1836 if not revs:
1835 1837 return []
1836 1838 start = self.start
1837 1839 length = self.length
1838 1840 inline = self._inline
1839 1841 iosize = self.index.entry_size
1840 1842 buffer = util.buffer
1841 1843
1842 1844 l = []
1843 1845 ladd = l.append
1844 1846
1845 1847 if not self._withsparseread:
1846 1848 slicedchunks = (revs,)
1847 1849 else:
1848 1850 slicedchunks = deltautil.slicechunk(
1849 1851 self, revs, targetsize=targetsize
1850 1852 )
1851 1853
1852 1854 for revschunk in slicedchunks:
1853 1855 firstrev = revschunk[0]
1854 1856 # Skip trailing revisions with empty diff
1855 1857 for lastrev in revschunk[::-1]:
1856 1858 if length(lastrev) != 0:
1857 1859 break
1858 1860
1859 1861 try:
1860 1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1861 1863 except OverflowError:
1862 1864 # issue4215 - we can't cache a run of chunks greater than
1863 1865 # 2G on Windows
1864 1866 return [self._chunk(rev, df=df) for rev in revschunk]
1865 1867
1866 1868 decomp = self.decompress
1867 1869 # self._decompressor might be None, but will not be used in that case
1868 1870 def_decomp = self._decompressor
1869 1871 for rev in revschunk:
1870 1872 chunkstart = start(rev)
1871 1873 if inline:
1872 1874 chunkstart += (rev + 1) * iosize
1873 1875 chunklength = length(rev)
1874 1876 comp_mode = self.index[rev][10]
1875 1877 c = buffer(data, chunkstart - offset, chunklength)
1876 1878 if comp_mode == COMP_MODE_PLAIN:
1877 1879 ladd(c)
1878 1880 elif comp_mode == COMP_MODE_INLINE:
1879 1881 ladd(decomp(c))
1880 1882 elif comp_mode == COMP_MODE_DEFAULT:
1881 1883 ladd(def_decomp(c))
1882 1884 else:
1883 1885 msg = 'unknown compression mode %d'
1884 1886 msg %= comp_mode
1885 1887 raise error.RevlogError(msg)
1886 1888
1887 1889 return l
1888 1890
1889 1891 def _chunkclear(self):
1890 1892 """Clear the raw chunk cache."""
1891 1893 self._chunkcache = (0, b'')
1892 1894
1893 1895 def deltaparent(self, rev):
1894 1896 """return deltaparent of the given revision"""
1895 1897 base = self.index[rev][3]
1896 1898 if base == rev:
1897 1899 return nullrev
1898 1900 elif self._generaldelta:
1899 1901 return base
1900 1902 else:
1901 1903 return rev - 1
1902 1904
1903 1905 def issnapshot(self, rev):
1904 1906 """tells whether rev is a snapshot"""
1905 1907 if not self._sparserevlog:
1906 1908 return self.deltaparent(rev) == nullrev
1907 1909 elif util.safehasattr(self.index, b'issnapshot'):
1908 1910 # directly assign the method to cache the testing and access
1909 1911 self.issnapshot = self.index.issnapshot
1910 1912 return self.issnapshot(rev)
1911 1913 if rev == nullrev:
1912 1914 return True
1913 1915 entry = self.index[rev]
1914 1916 base = entry[3]
1915 1917 if base == rev:
1916 1918 return True
1917 1919 if base == nullrev:
1918 1920 return True
1919 1921 p1 = entry[5]
1920 1922 p2 = entry[6]
1921 1923 if base == p1 or base == p2:
1922 1924 return False
1923 1925 return self.issnapshot(base)
1924 1926
1925 1927 def snapshotdepth(self, rev):
1926 1928 """number of snapshot in the chain before this one"""
1927 1929 if not self.issnapshot(rev):
1928 1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1929 1931 return len(self._deltachain(rev)[0]) - 1
1930 1932
1931 1933 def revdiff(self, rev1, rev2):
1932 1934 """return or calculate a delta between two revisions
1933 1935
1934 1936 The delta calculated is in binary form and is intended to be written to
1935 1937 revlog data directly. So this function needs raw revision data.
1936 1938 """
1937 1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1938 1940 return bytes(self._chunk(rev2))
1939 1941
1940 1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1941 1943
1942 1944 def _processflags(self, text, flags, operation, raw=False):
1943 1945 """deprecated entry point to access flag processors"""
1944 1946 msg = b'_processflag(...) use the specialized variant'
1945 1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1946 1948 if raw:
1947 1949 return text, flagutil.processflagsraw(self, text, flags)
1948 1950 elif operation == b'read':
1949 1951 return flagutil.processflagsread(self, text, flags)
1950 1952 else: # write operation
1951 1953 return flagutil.processflagswrite(self, text, flags)
1952 1954
1953 1955 def revision(self, nodeorrev, _df=None, raw=False):
1954 1956 """return an uncompressed revision of a given node or revision
1955 1957 number.
1956 1958
1957 1959 _df - an existing file handle to read from. (internal-only)
1958 1960 raw - an optional argument specifying if the revision data is to be
1959 1961 treated as raw data when applying flag transforms. 'raw' should be set
1960 1962 to True when generating changegroups or in debug commands.
1961 1963 """
1962 1964 if raw:
1963 1965 msg = (
1964 1966 b'revlog.revision(..., raw=True) is deprecated, '
1965 1967 b'use revlog.rawdata(...)'
1966 1968 )
1967 1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1968 1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1969 1971
1970 1972 def sidedata(self, nodeorrev, _df=None):
1971 1973 """a map of extra data related to the changeset but not part of the hash
1972 1974
1973 1975 This function currently return a dictionary. However, more advanced
1974 1976 mapping object will likely be used in the future for a more
1975 1977 efficient/lazy code.
1976 1978 """
1977 1979 return self._revisiondata(nodeorrev, _df)[1]
1978 1980
1979 1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1980 1982 # deal with <nodeorrev> argument type
1981 1983 if isinstance(nodeorrev, int):
1982 1984 rev = nodeorrev
1983 1985 node = self.node(rev)
1984 1986 else:
1985 1987 node = nodeorrev
1986 1988 rev = None
1987 1989
1988 1990 # fast path the special `nullid` rev
1989 1991 if node == self.nullid:
1990 1992 return b"", {}
1991 1993
1992 1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1993 1995 # revision or might need to be processed to retrieve the revision.
1994 1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1995 1997
1996 1998 if self.hassidedata:
1997 1999 if rev is None:
1998 2000 rev = self.rev(node)
1999 2001 sidedata = self._sidedata(rev)
2000 2002 else:
2001 2003 sidedata = {}
2002 2004
2003 2005 if raw and validated:
2004 2006 # if we don't want to process the raw text and that raw
2005 2007 # text is cached, we can exit early.
2006 2008 return rawtext, sidedata
2007 2009 if rev is None:
2008 2010 rev = self.rev(node)
2009 2011 # the revlog's flag for this revision
2010 2012 # (usually alter its state or content)
2011 2013 flags = self.flags(rev)
2012 2014
2013 2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2014 2016 # no extra flags set, no flag processor runs, text = rawtext
2015 2017 return rawtext, sidedata
2016 2018
2017 2019 if raw:
2018 2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2019 2021 text = rawtext
2020 2022 else:
2021 2023 r = flagutil.processflagsread(self, rawtext, flags)
2022 2024 text, validatehash = r
2023 2025 if validatehash:
2024 2026 self.checkhash(text, node, rev=rev)
2025 2027 if not validated:
2026 2028 self._revisioncache = (node, rev, rawtext)
2027 2029
2028 2030 return text, sidedata
2029 2031
2030 2032 def _rawtext(self, node, rev, _df=None):
2031 2033 """return the possibly unvalidated rawtext for a revision
2032 2034
2033 2035 returns (rev, rawtext, validated)
2034 2036 """
2035 2037
2036 2038 # revision in the cache (could be useful to apply delta)
2037 2039 cachedrev = None
2038 2040 # An intermediate text to apply deltas to
2039 2041 basetext = None
2040 2042
2041 2043 # Check if we have the entry in cache
2042 2044 # The cache entry looks like (node, rev, rawtext)
2043 2045 if self._revisioncache:
2044 2046 if self._revisioncache[0] == node:
2045 2047 return (rev, self._revisioncache[2], True)
2046 2048 cachedrev = self._revisioncache[1]
2047 2049
2048 2050 if rev is None:
2049 2051 rev = self.rev(node)
2050 2052
2051 2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2052 2054 if stopped:
2053 2055 basetext = self._revisioncache[2]
2054 2056
2055 2057 # drop cache to save memory, the caller is expected to
2056 2058 # update self._revisioncache after validating the text
2057 2059 self._revisioncache = None
2058 2060
2059 2061 targetsize = None
2060 2062 rawsize = self.index[rev][2]
2061 2063 if 0 <= rawsize:
2062 2064 targetsize = 4 * rawsize
2063 2065
2064 2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2065 2067 if basetext is None:
2066 2068 basetext = bytes(bins[0])
2067 2069 bins = bins[1:]
2068 2070
2069 2071 rawtext = mdiff.patches(basetext, bins)
2070 2072 del basetext # let us have a chance to free memory early
2071 2073 return (rev, rawtext, False)
2072 2074
2073 2075 def _sidedata(self, rev):
2074 2076 """Return the sidedata for a given revision number."""
2075 2077 index_entry = self.index[rev]
2076 2078 sidedata_offset = index_entry[8]
2077 2079 sidedata_size = index_entry[9]
2078 2080
2079 2081 if self._inline:
2080 2082 sidedata_offset += self.index.entry_size * (1 + rev)
2081 2083 if sidedata_size == 0:
2082 2084 return {}
2083 2085
2084 2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2085 2087 comp = self.index[rev][11]
2086 2088 if comp == COMP_MODE_PLAIN:
2087 2089 segment = comp_segment
2088 2090 elif comp == COMP_MODE_DEFAULT:
2089 2091 segment = self._decompressor(comp_segment)
2090 2092 elif comp == COMP_MODE_INLINE:
2091 2093 segment = self.decompress(comp_segment)
2092 2094 else:
2093 2095 msg = 'unknown compression mode %d'
2094 2096 msg %= comp
2095 2097 raise error.RevlogError(msg)
2096 2098
2097 2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2098 2100 return sidedata
2099 2101
2100 2102 def rawdata(self, nodeorrev, _df=None):
2101 2103 """return an uncompressed raw data of a given node or revision number.
2102 2104
2103 2105 _df - an existing file handle to read from. (internal-only)
2104 2106 """
2105 2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2106 2108
2107 2109 def hash(self, text, p1, p2):
2108 2110 """Compute a node hash.
2109 2111
2110 2112 Available as a function so that subclasses can replace the hash
2111 2113 as needed.
2112 2114 """
2113 2115 return storageutil.hashrevisionsha1(text, p1, p2)
2114 2116
2115 2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2116 2118 """Check node hash integrity.
2117 2119
2118 2120 Available as a function so that subclasses can extend hash mismatch
2119 2121 behaviors as needed.
2120 2122 """
2121 2123 try:
2122 2124 if p1 is None and p2 is None:
2123 2125 p1, p2 = self.parents(node)
2124 2126 if node != self.hash(text, p1, p2):
2125 2127 # Clear the revision cache on hash failure. The revision cache
2126 2128 # only stores the raw revision and clearing the cache does have
2127 2129 # the side-effect that we won't have a cache hit when the raw
2128 2130 # revision data is accessed. But this case should be rare and
2129 2131 # it is extra work to teach the cache about the hash
2130 2132 # verification state.
2131 2133 if self._revisioncache and self._revisioncache[0] == node:
2132 2134 self._revisioncache = None
2133 2135
2134 2136 revornode = rev
2135 2137 if revornode is None:
2136 2138 revornode = templatefilters.short(hex(node))
2137 2139 raise error.RevlogError(
2138 2140 _(b"integrity check failed on %s:%s")
2139 2141 % (self.display_id, pycompat.bytestr(revornode))
2140 2142 )
2141 2143 except error.RevlogError:
2142 2144 if self._censorable and storageutil.iscensoredtext(text):
2143 2145 raise error.CensoredNodeError(self.display_id, node, text)
2144 2146 raise
2145 2147
2146 2148 def _enforceinlinesize(self, tr):
2147 2149 """Check if the revlog is too big for inline and convert if so.
2148 2150
2149 2151 This should be called after revisions are added to the revlog. If the
2150 2152 revlog has grown too large to be an inline revlog, it will convert it
2151 2153 to use multiple index and data files.
2152 2154 """
2153 2155 tiprev = len(self) - 1
2154 2156 total_size = self.start(tiprev) + self.length(tiprev)
2155 2157 if not self._inline or total_size < _maxinline:
2156 2158 return
2157 2159
2158 2160 troffset = tr.findoffset(self._indexfile)
2159 2161 if troffset is None:
2160 2162 raise error.RevlogError(
2161 2163 _(b"%s not found in the transaction") % self._indexfile
2162 2164 )
2163 2165 trindex = 0
2164 2166 tr.add(self._datafile, 0)
2165 2167
2166 2168 existing_handles = False
2167 2169 if self._writinghandles is not None:
2168 2170 existing_handles = True
2169 2171 fp = self._writinghandles[0]
2170 2172 fp.flush()
2171 2173 fp.close()
2172 2174 # We can't use the cached file handle after close(). So prevent
2173 2175 # its usage.
2174 2176 self._writinghandles = None
2175 2177
2176 2178 new_dfh = self._datafp(b'w+')
2177 2179 new_dfh.truncate(0) # drop any potentially existing data
2178 2180 try:
2179 2181 with self._indexfp() as read_ifh:
2180 2182 for r in self:
2181 2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2182 2184 if troffset <= self.start(r) + r * self.index.entry_size:
2183 2185 trindex = r
2184 2186 new_dfh.flush()
2185 2187
2186 2188 with self.__index_new_fp() as fp:
2187 2189 self._format_flags &= ~FLAG_INLINE_DATA
2188 2190 self._inline = False
2189 2191 for i in self:
2190 2192 e = self.index.entry_binary(i)
2191 2193 if i == 0 and self._docket is None:
2192 2194 header = self._format_flags | self._format_version
2193 2195 header = self.index.pack_header(header)
2194 2196 e = header + e
2195 2197 fp.write(e)
2196 2198 if self._docket is not None:
2197 2199 self._docket.index_end = fp.tell()
2198 2200
2199 2201 # There is a small transactional race here. If the rename of
2200 2202 # the index fails, we should remove the datafile. It is more
2201 2203 # important to ensure that the data file is not truncated
2202 2204 # when the index is replaced as otherwise data is lost.
2203 2205 tr.replace(self._datafile, self.start(trindex))
2204 2206
2205 2207 # the temp file replace the real index when we exit the context
2206 2208 # manager
2207 2209
2208 2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2209 2211 nodemaputil.setup_persistent_nodemap(tr, self)
2210 2212 self._chunkclear()
2211 2213
2212 2214 if existing_handles:
2213 2215 # switched from inline to conventional reopen the index
2214 2216 ifh = self.__index_write_fp()
2215 2217 self._writinghandles = (ifh, new_dfh)
2216 2218 new_dfh = None
2217 2219 finally:
2218 2220 if new_dfh is not None:
2219 2221 new_dfh.close()
2220 2222
2221 2223 def _nodeduplicatecallback(self, transaction, node):
2222 2224 """called when trying to add a node already stored."""
2223 2225
2224 2226 @contextlib.contextmanager
2225 2227 def _writing(self, transaction):
2226 2228 if self._trypending:
2227 2229 msg = b'try to write in a `trypending` revlog: %s'
2228 2230 msg %= self.display_id
2229 2231 raise error.ProgrammingError(msg)
2230 2232 if self._writinghandles is not None:
2231 2233 yield
2232 2234 else:
2233 2235 r = len(self)
2234 2236 dsize = 0
2235 2237 if r:
2236 2238 dsize = self.end(r - 1)
2237 2239 dfh = None
2238 2240 if not self._inline:
2239 2241 try:
2240 2242 dfh = self._datafp(b"r+")
2241 2243 if self._docket is None:
2242 2244 dfh.seek(0, os.SEEK_END)
2243 2245 else:
2244 2246 dfh.seek(self._docket.data_end, os.SEEK_SET)
2245 2247 except IOError as inst:
2246 2248 if inst.errno != errno.ENOENT:
2247 2249 raise
2248 2250 dfh = self._datafp(b"w+")
2249 2251 transaction.add(self._datafile, dsize)
2250 2252 try:
2251 2253 isize = r * self.index.entry_size
2252 2254 ifh = self.__index_write_fp()
2253 2255 if self._inline:
2254 2256 transaction.add(self._indexfile, dsize + isize)
2255 2257 else:
2256 2258 transaction.add(self._indexfile, isize)
2257 2259 try:
2258 2260 self._writinghandles = (ifh, dfh)
2259 2261 try:
2260 2262 yield
2261 2263 if self._docket is not None:
2262 2264 self._write_docket(transaction)
2263 2265 finally:
2264 2266 self._writinghandles = None
2265 2267 finally:
2266 2268 ifh.close()
2267 2269 finally:
2268 2270 if dfh is not None:
2269 2271 dfh.close()
2270 2272
2271 2273 def _write_docket(self, transaction):
2272 2274 """write the current docket on disk
2273 2275
2274 2276 Exist as a method to help changelog to implement transaction logic
2275 2277
2276 2278 We could also imagine using the same transaction logic for all revlog
2277 2279 since docket are cheap."""
2278 2280 self._docket.write(transaction)
2279 2281
2280 2282 def addrevision(
2281 2283 self,
2282 2284 text,
2283 2285 transaction,
2284 2286 link,
2285 2287 p1,
2286 2288 p2,
2287 2289 cachedelta=None,
2288 2290 node=None,
2289 2291 flags=REVIDX_DEFAULT_FLAGS,
2290 2292 deltacomputer=None,
2291 2293 sidedata=None,
2292 2294 ):
2293 2295 """add a revision to the log
2294 2296
2295 2297 text - the revision data to add
2296 2298 transaction - the transaction object used for rollback
2297 2299 link - the linkrev data to add
2298 2300 p1, p2 - the parent nodeids of the revision
2299 2301 cachedelta - an optional precomputed delta
2300 2302 node - nodeid of revision; typically node is not specified, and it is
2301 2303 computed by default as hash(text, p1, p2), however subclasses might
2302 2304 use different hashing method (and override checkhash() in such case)
2303 2305 flags - the known flags to set on the revision
2304 2306 deltacomputer - an optional deltacomputer instance shared between
2305 2307 multiple calls
2306 2308 """
2307 2309 if link == nullrev:
2308 2310 raise error.RevlogError(
2309 2311 _(b"attempted to add linkrev -1 to %s") % self.display_id
2310 2312 )
2311 2313
2312 2314 if sidedata is None:
2313 2315 sidedata = {}
2314 2316 elif sidedata and not self.hassidedata:
2315 2317 raise error.ProgrammingError(
2316 2318 _(b"trying to add sidedata to a revlog who don't support them")
2317 2319 )
2318 2320
2319 2321 if flags:
2320 2322 node = node or self.hash(text, p1, p2)
2321 2323
2322 2324 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2323 2325
2324 2326 # If the flag processor modifies the revision data, ignore any provided
2325 2327 # cachedelta.
2326 2328 if rawtext != text:
2327 2329 cachedelta = None
2328 2330
2329 2331 if len(rawtext) > _maxentrysize:
2330 2332 raise error.RevlogError(
2331 2333 _(
2332 2334 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2333 2335 )
2334 2336 % (self.display_id, len(rawtext))
2335 2337 )
2336 2338
2337 2339 node = node or self.hash(rawtext, p1, p2)
2338 2340 rev = self.index.get_rev(node)
2339 2341 if rev is not None:
2340 2342 return rev
2341 2343
2342 2344 if validatehash:
2343 2345 self.checkhash(rawtext, node, p1=p1, p2=p2)
2344 2346
2345 2347 return self.addrawrevision(
2346 2348 rawtext,
2347 2349 transaction,
2348 2350 link,
2349 2351 p1,
2350 2352 p2,
2351 2353 node,
2352 2354 flags,
2353 2355 cachedelta=cachedelta,
2354 2356 deltacomputer=deltacomputer,
2355 2357 sidedata=sidedata,
2356 2358 )
2357 2359
2358 2360 def addrawrevision(
2359 2361 self,
2360 2362 rawtext,
2361 2363 transaction,
2362 2364 link,
2363 2365 p1,
2364 2366 p2,
2365 2367 node,
2366 2368 flags,
2367 2369 cachedelta=None,
2368 2370 deltacomputer=None,
2369 2371 sidedata=None,
2370 2372 ):
2371 2373 """add a raw revision with known flags, node and parents
2372 2374 useful when reusing a revision not stored in this revlog (ex: received
2373 2375 over wire, or read from an external bundle).
2374 2376 """
2375 2377 with self._writing(transaction):
2376 2378 return self._addrevision(
2377 2379 node,
2378 2380 rawtext,
2379 2381 transaction,
2380 2382 link,
2381 2383 p1,
2382 2384 p2,
2383 2385 flags,
2384 2386 cachedelta,
2385 2387 deltacomputer=deltacomputer,
2386 2388 sidedata=sidedata,
2387 2389 )
2388 2390
2389 2391 def compress(self, data):
2390 2392 """Generate a possibly-compressed representation of data."""
2391 2393 if not data:
2392 2394 return b'', data
2393 2395
2394 2396 compressed = self._compressor.compress(data)
2395 2397
2396 2398 if compressed:
2397 2399 # The revlog compressor added the header in the returned data.
2398 2400 return b'', compressed
2399 2401
2400 2402 if data[0:1] == b'\0':
2401 2403 return b'', data
2402 2404 return b'u', data
2403 2405
2404 2406 def decompress(self, data):
2405 2407 """Decompress a revlog chunk.
2406 2408
2407 2409 The chunk is expected to begin with a header identifying the
2408 2410 format type so it can be routed to an appropriate decompressor.
2409 2411 """
2410 2412 if not data:
2411 2413 return data
2412 2414
2413 2415 # Revlogs are read much more frequently than they are written and many
2414 2416 # chunks only take microseconds to decompress, so performance is
2415 2417 # important here.
2416 2418 #
2417 2419 # We can make a few assumptions about revlogs:
2418 2420 #
2419 2421 # 1) the majority of chunks will be compressed (as opposed to inline
2420 2422 # raw data).
2421 2423 # 2) decompressing *any* data will likely by at least 10x slower than
2422 2424 # returning raw inline data.
2423 2425 # 3) we want to prioritize common and officially supported compression
2424 2426 # engines
2425 2427 #
2426 2428 # It follows that we want to optimize for "decompress compressed data
2427 2429 # when encoded with common and officially supported compression engines"
2428 2430 # case over "raw data" and "data encoded by less common or non-official
2429 2431 # compression engines." That is why we have the inline lookup first
2430 2432 # followed by the compengines lookup.
2431 2433 #
2432 2434 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2433 2435 # compressed chunks. And this matters for changelog and manifest reads.
2434 2436 t = data[0:1]
2435 2437
2436 2438 if t == b'x':
2437 2439 try:
2438 2440 return _zlibdecompress(data)
2439 2441 except zlib.error as e:
2440 2442 raise error.RevlogError(
2441 2443 _(b'revlog decompress error: %s')
2442 2444 % stringutil.forcebytestr(e)
2443 2445 )
2444 2446 # '\0' is more common than 'u' so it goes first.
2445 2447 elif t == b'\0':
2446 2448 return data
2447 2449 elif t == b'u':
2448 2450 return util.buffer(data, 1)
2449 2451
2450 2452 compressor = self._get_decompressor(t)
2451 2453
2452 2454 return compressor.decompress(data)
2453 2455
2454 2456 def _addrevision(
2455 2457 self,
2456 2458 node,
2457 2459 rawtext,
2458 2460 transaction,
2459 2461 link,
2460 2462 p1,
2461 2463 p2,
2462 2464 flags,
2463 2465 cachedelta,
2464 2466 alwayscache=False,
2465 2467 deltacomputer=None,
2466 2468 sidedata=None,
2467 2469 ):
2468 2470 """internal function to add revisions to the log
2469 2471
2470 2472 see addrevision for argument descriptions.
2471 2473
2472 2474 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2473 2475
2474 2476 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2475 2477 be used.
2476 2478
2477 2479 invariants:
2478 2480 - rawtext is optional (can be None); if not set, cachedelta must be set.
2479 2481 if both are set, they must correspond to each other.
2480 2482 """
2481 2483 if node == self.nullid:
2482 2484 raise error.RevlogError(
2483 2485 _(b"%s: attempt to add null revision") % self.display_id
2484 2486 )
2485 2487 if (
2486 2488 node == self.nodeconstants.wdirid
2487 2489 or node in self.nodeconstants.wdirfilenodeids
2488 2490 ):
2489 2491 raise error.RevlogError(
2490 2492 _(b"%s: attempt to add wdir revision") % self.display_id
2491 2493 )
2492 2494 if self._writinghandles is None:
2493 2495 msg = b'adding revision outside `revlog._writing` context'
2494 2496 raise error.ProgrammingError(msg)
2495 2497
2496 2498 if self._inline:
2497 2499 fh = self._writinghandles[0]
2498 2500 else:
2499 2501 fh = self._writinghandles[1]
2500 2502
2501 2503 btext = [rawtext]
2502 2504
2503 2505 curr = len(self)
2504 2506 prev = curr - 1
2505 2507
2506 2508 offset = self._get_data_offset(prev)
2507 2509
2508 2510 if self._concurrencychecker:
2509 2511 ifh, dfh = self._writinghandles
2510 2512 if self._inline:
2511 2513 # offset is "as if" it were in the .d file, so we need to add on
2512 2514 # the size of the entry metadata.
2513 2515 self._concurrencychecker(
2514 2516 ifh, self._indexfile, offset + curr * self.index.entry_size
2515 2517 )
2516 2518 else:
2517 2519 # Entries in the .i are a consistent size.
2518 2520 self._concurrencychecker(
2519 2521 ifh, self._indexfile, curr * self.index.entry_size
2520 2522 )
2521 2523 self._concurrencychecker(dfh, self._datafile, offset)
2522 2524
2523 2525 p1r, p2r = self.rev(p1), self.rev(p2)
2524 2526
2525 2527 # full versions are inserted when the needed deltas
2526 2528 # become comparable to the uncompressed text
2527 2529 if rawtext is None:
2528 2530 # need rawtext size, before changed by flag processors, which is
2529 2531 # the non-raw size. use revlog explicitly to avoid filelog's extra
2530 2532 # logic that might remove metadata size.
2531 2533 textlen = mdiff.patchedsize(
2532 2534 revlog.size(self, cachedelta[0]), cachedelta[1]
2533 2535 )
2534 2536 else:
2535 2537 textlen = len(rawtext)
2536 2538
2537 2539 if deltacomputer is None:
2538 2540 deltacomputer = deltautil.deltacomputer(self)
2539 2541
2540 2542 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2541 2543
2542 2544 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2543 2545
2544 2546 compression_mode = COMP_MODE_INLINE
2545 2547 if self._docket is not None:
2546 2548 h, d = deltainfo.data
2547 2549 if not h and not d:
2548 2550 # not data to store at all... declare them uncompressed
2549 2551 compression_mode = COMP_MODE_PLAIN
2550 2552 elif not h:
2551 2553 t = d[0:1]
2552 2554 if t == b'\0':
2553 2555 compression_mode = COMP_MODE_PLAIN
2554 2556 elif t == self._docket.default_compression_header:
2555 2557 compression_mode = COMP_MODE_DEFAULT
2556 2558 elif h == b'u':
2557 2559 # we have a more efficient way to declare uncompressed
2558 2560 h = b''
2559 2561 compression_mode = COMP_MODE_PLAIN
2560 2562 deltainfo = deltautil.drop_u_compression(deltainfo)
2561 2563
2562 2564 sidedata_compression_mode = COMP_MODE_INLINE
2563 2565 if sidedata and self.hassidedata:
2564 2566 sidedata_compression_mode = COMP_MODE_PLAIN
2565 2567 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2566 2568 sidedata_offset = offset + deltainfo.deltalen
2567 2569 h, comp_sidedata = self.compress(serialized_sidedata)
2568 2570 if (
2569 2571 h != b'u'
2570 2572 and comp_sidedata[0:1] != b'\0'
2571 2573 and len(comp_sidedata) < len(serialized_sidedata)
2572 2574 ):
2573 2575 assert not h
2574 2576 if (
2575 2577 comp_sidedata[0:1]
2576 2578 == self._docket.default_compression_header
2577 2579 ):
2578 2580 sidedata_compression_mode = COMP_MODE_DEFAULT
2579 2581 serialized_sidedata = comp_sidedata
2580 2582 else:
2581 2583 sidedata_compression_mode = COMP_MODE_INLINE
2582 2584 serialized_sidedata = comp_sidedata
2583 2585 else:
2584 2586 serialized_sidedata = b""
2585 2587 # Don't store the offset if the sidedata is empty, that way
2586 2588 # we can easily detect empty sidedata and they will be no different
2587 2589 # than ones we manually add.
2588 2590 sidedata_offset = 0
2589 2591
2590 2592 e = (
2591 2593 offset_type(offset, flags),
2592 2594 deltainfo.deltalen,
2593 2595 textlen,
2594 2596 deltainfo.base,
2595 2597 link,
2596 2598 p1r,
2597 2599 p2r,
2598 2600 node,
2599 2601 sidedata_offset,
2600 2602 len(serialized_sidedata),
2601 2603 compression_mode,
2602 2604 sidedata_compression_mode,
2603 2605 )
2604 2606
2605 2607 self.index.append(e)
2606 2608 entry = self.index.entry_binary(curr)
2607 2609 if curr == 0 and self._docket is None:
2608 2610 header = self._format_flags | self._format_version
2609 2611 header = self.index.pack_header(header)
2610 2612 entry = header + entry
2611 2613 self._writeentry(
2612 2614 transaction,
2613 2615 entry,
2614 2616 deltainfo.data,
2615 2617 link,
2616 2618 offset,
2617 2619 serialized_sidedata,
2618 2620 )
2619 2621
2620 2622 rawtext = btext[0]
2621 2623
2622 2624 if alwayscache and rawtext is None:
2623 2625 rawtext = deltacomputer.buildtext(revinfo, fh)
2624 2626
2625 2627 if type(rawtext) == bytes: # only accept immutable objects
2626 2628 self._revisioncache = (node, curr, rawtext)
2627 2629 self._chainbasecache[curr] = deltainfo.chainbase
2628 2630 return curr
2629 2631
2630 2632 def _get_data_offset(self, prev):
2631 2633 """Returns the current offset in the (in-transaction) data file.
2632 2634 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2633 2635 file to store that information: since sidedata can be rewritten to the
2634 2636 end of the data file within a transaction, you can have cases where, for
2635 2637 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2636 2638 to `n - 1`'s sidedata being written after `n`'s data.
2637 2639
2638 2640 TODO cache this in a docket file before getting out of experimental."""
2639 2641 if self._docket is None:
2640 2642 return self.end(prev)
2641 2643 else:
2642 2644 return self._docket.data_end
2643 2645
2644 2646 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2645 2647 # Files opened in a+ mode have inconsistent behavior on various
2646 2648 # platforms. Windows requires that a file positioning call be made
2647 2649 # when the file handle transitions between reads and writes. See
2648 2650 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2649 2651 # platforms, Python or the platform itself can be buggy. Some versions
2650 2652 # of Solaris have been observed to not append at the end of the file
2651 2653 # if the file was seeked to before the end. See issue4943 for more.
2652 2654 #
2653 2655 # We work around this issue by inserting a seek() before writing.
2654 2656 # Note: This is likely not necessary on Python 3. However, because
2655 2657 # the file handle is reused for reads and may be seeked there, we need
2656 2658 # to be careful before changing this.
2657 2659 if self._writinghandles is None:
2658 2660 msg = b'adding revision outside `revlog._writing` context'
2659 2661 raise error.ProgrammingError(msg)
2660 2662 ifh, dfh = self._writinghandles
2661 2663 if self._docket is None:
2662 2664 ifh.seek(0, os.SEEK_END)
2663 2665 else:
2664 2666 ifh.seek(self._docket.index_end, os.SEEK_SET)
2665 2667 if dfh:
2666 2668 if self._docket is None:
2667 2669 dfh.seek(0, os.SEEK_END)
2668 2670 else:
2669 2671 dfh.seek(self._docket.data_end, os.SEEK_SET)
2670 2672
2671 2673 curr = len(self) - 1
2672 2674 if not self._inline:
2673 2675 transaction.add(self._datafile, offset)
2674 2676 transaction.add(self._indexfile, curr * len(entry))
2675 2677 if data[0]:
2676 2678 dfh.write(data[0])
2677 2679 dfh.write(data[1])
2678 2680 if sidedata:
2679 2681 dfh.write(sidedata)
2680 2682 ifh.write(entry)
2681 2683 else:
2682 2684 offset += curr * self.index.entry_size
2683 2685 transaction.add(self._indexfile, offset)
2684 2686 ifh.write(entry)
2685 2687 ifh.write(data[0])
2686 2688 ifh.write(data[1])
2687 2689 if sidedata:
2688 2690 ifh.write(sidedata)
2689 2691 self._enforceinlinesize(transaction)
2690 2692 if self._docket is not None:
2691 2693 self._docket.index_end = self._writinghandles[0].tell()
2692 2694 self._docket.data_end = self._writinghandles[1].tell()
2693 2695
2694 2696 nodemaputil.setup_persistent_nodemap(transaction, self)
2695 2697
2696 2698 def addgroup(
2697 2699 self,
2698 2700 deltas,
2699 2701 linkmapper,
2700 2702 transaction,
2701 2703 alwayscache=False,
2702 2704 addrevisioncb=None,
2703 2705 duplicaterevisioncb=None,
2704 2706 ):
2705 2707 """
2706 2708 add a delta group
2707 2709
2708 2710 given a set of deltas, add them to the revision log. the
2709 2711 first delta is against its parent, which should be in our
2710 2712 log, the rest are against the previous delta.
2711 2713
2712 2714 If ``addrevisioncb`` is defined, it will be called with arguments of
2713 2715 this revlog and the node that was added.
2714 2716 """
2715 2717
2716 2718 if self._adding_group:
2717 2719 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2718 2720
2719 2721 self._adding_group = True
2720 2722 empty = True
2721 2723 try:
2722 2724 with self._writing(transaction):
2723 2725 deltacomputer = deltautil.deltacomputer(self)
2724 2726 # loop through our set of deltas
2725 2727 for data in deltas:
2726 2728 (
2727 2729 node,
2728 2730 p1,
2729 2731 p2,
2730 2732 linknode,
2731 2733 deltabase,
2732 2734 delta,
2733 2735 flags,
2734 2736 sidedata,
2735 2737 ) = data
2736 2738 link = linkmapper(linknode)
2737 2739 flags = flags or REVIDX_DEFAULT_FLAGS
2738 2740
2739 2741 rev = self.index.get_rev(node)
2740 2742 if rev is not None:
2741 2743 # this can happen if two branches make the same change
2742 2744 self._nodeduplicatecallback(transaction, rev)
2743 2745 if duplicaterevisioncb:
2744 2746 duplicaterevisioncb(self, rev)
2745 2747 empty = False
2746 2748 continue
2747 2749
2748 2750 for p in (p1, p2):
2749 2751 if not self.index.has_node(p):
2750 2752 raise error.LookupError(
2751 2753 p, self.radix, _(b'unknown parent')
2752 2754 )
2753 2755
2754 2756 if not self.index.has_node(deltabase):
2755 2757 raise error.LookupError(
2756 2758 deltabase, self.display_id, _(b'unknown delta base')
2757 2759 )
2758 2760
2759 2761 baserev = self.rev(deltabase)
2760 2762
2761 2763 if baserev != nullrev and self.iscensored(baserev):
2762 2764 # if base is censored, delta must be full replacement in a
2763 2765 # single patch operation
2764 2766 hlen = struct.calcsize(b">lll")
2765 2767 oldlen = self.rawsize(baserev)
2766 2768 newlen = len(delta) - hlen
2767 2769 if delta[:hlen] != mdiff.replacediffheader(
2768 2770 oldlen, newlen
2769 2771 ):
2770 2772 raise error.CensoredBaseError(
2771 2773 self.display_id, self.node(baserev)
2772 2774 )
2773 2775
2774 2776 if not flags and self._peek_iscensored(baserev, delta):
2775 2777 flags |= REVIDX_ISCENSORED
2776 2778
2777 2779 # We assume consumers of addrevisioncb will want to retrieve
2778 2780 # the added revision, which will require a call to
2779 2781 # revision(). revision() will fast path if there is a cache
2780 2782 # hit. So, we tell _addrevision() to always cache in this case.
2781 2783 # We're only using addgroup() in the context of changegroup
2782 2784 # generation so the revision data can always be handled as raw
2783 2785 # by the flagprocessor.
2784 2786 rev = self._addrevision(
2785 2787 node,
2786 2788 None,
2787 2789 transaction,
2788 2790 link,
2789 2791 p1,
2790 2792 p2,
2791 2793 flags,
2792 2794 (baserev, delta),
2793 2795 alwayscache=alwayscache,
2794 2796 deltacomputer=deltacomputer,
2795 2797 sidedata=sidedata,
2796 2798 )
2797 2799
2798 2800 if addrevisioncb:
2799 2801 addrevisioncb(self, rev)
2800 2802 empty = False
2801 2803 finally:
2802 2804 self._adding_group = False
2803 2805 return not empty
2804 2806
2805 2807 def iscensored(self, rev):
2806 2808 """Check if a file revision is censored."""
2807 2809 if not self._censorable:
2808 2810 return False
2809 2811
2810 2812 return self.flags(rev) & REVIDX_ISCENSORED
2811 2813
2812 2814 def _peek_iscensored(self, baserev, delta):
2813 2815 """Quickly check if a delta produces a censored revision."""
2814 2816 if not self._censorable:
2815 2817 return False
2816 2818
2817 2819 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2818 2820
2819 2821 def getstrippoint(self, minlink):
2820 2822 """find the minimum rev that must be stripped to strip the linkrev
2821 2823
2822 2824 Returns a tuple containing the minimum rev and a set of all revs that
2823 2825 have linkrevs that will be broken by this strip.
2824 2826 """
2825 2827 return storageutil.resolvestripinfo(
2826 2828 minlink,
2827 2829 len(self) - 1,
2828 2830 self.headrevs(),
2829 2831 self.linkrev,
2830 2832 self.parentrevs,
2831 2833 )
2832 2834
2833 2835 def strip(self, minlink, transaction):
2834 2836 """truncate the revlog on the first revision with a linkrev >= minlink
2835 2837
2836 2838 This function is called when we're stripping revision minlink and
2837 2839 its descendants from the repository.
2838 2840
2839 2841 We have to remove all revisions with linkrev >= minlink, because
2840 2842 the equivalent changelog revisions will be renumbered after the
2841 2843 strip.
2842 2844
2843 2845 So we truncate the revlog on the first of these revisions, and
2844 2846 trust that the caller has saved the revisions that shouldn't be
2845 2847 removed and that it'll re-add them after this truncation.
2846 2848 """
2847 2849 if len(self) == 0:
2848 2850 return
2849 2851
2850 2852 rev, _ = self.getstrippoint(minlink)
2851 2853 if rev == len(self):
2852 2854 return
2853 2855
2854 2856 # first truncate the files on disk
2855 2857 data_end = self.start(rev)
2856 2858 if not self._inline:
2857 2859 transaction.add(self._datafile, data_end)
2858 2860 end = rev * self.index.entry_size
2859 2861 else:
2860 2862 end = data_end + (rev * self.index.entry_size)
2861 2863
2862 2864 transaction.add(self._indexfile, end)
2863 2865 if self._docket is not None:
2864 2866 # XXX we could, leverage the docket while stripping. However it is
2865 2867 # not powerfull enough at the time of this comment
2866 2868 self._docket.index_end = end
2867 2869 self._docket.data_end = data_end
2868 2870 self._docket.write(transaction, stripping=True)
2869 2871
2870 2872 # then reset internal state in memory to forget those revisions
2871 2873 self._revisioncache = None
2872 2874 self._chaininfocache = util.lrucachedict(500)
2873 2875 self._chunkclear()
2874 2876
2875 2877 del self.index[rev:-1]
2876 2878
2877 2879 def checksize(self):
2878 2880 """Check size of index and data files
2879 2881
2880 2882 return a (dd, di) tuple.
2881 2883 - dd: extra bytes for the "data" file
2882 2884 - di: extra bytes for the "index" file
2883 2885
2884 2886 A healthy revlog will return (0, 0).
2885 2887 """
2886 2888 expected = 0
2887 2889 if len(self):
2888 2890 expected = max(0, self.end(len(self) - 1))
2889 2891
2890 2892 try:
2891 2893 with self._datafp() as f:
2892 2894 f.seek(0, io.SEEK_END)
2893 2895 actual = f.tell()
2894 2896 dd = actual - expected
2895 2897 except IOError as inst:
2896 2898 if inst.errno != errno.ENOENT:
2897 2899 raise
2898 2900 dd = 0
2899 2901
2900 2902 try:
2901 2903 f = self.opener(self._indexfile)
2902 2904 f.seek(0, io.SEEK_END)
2903 2905 actual = f.tell()
2904 2906 f.close()
2905 2907 s = self.index.entry_size
2906 2908 i = max(0, actual // s)
2907 2909 di = actual - (i * s)
2908 2910 if self._inline:
2909 2911 databytes = 0
2910 2912 for r in self:
2911 2913 databytes += max(0, self.length(r))
2912 2914 dd = 0
2913 2915 di = actual - len(self) * s - databytes
2914 2916 except IOError as inst:
2915 2917 if inst.errno != errno.ENOENT:
2916 2918 raise
2917 2919 di = 0
2918 2920
2919 2921 return (dd, di)
2920 2922
2921 2923 def files(self):
2922 2924 res = [self._indexfile]
2923 2925 if not self._inline:
2924 2926 res.append(self._datafile)
2925 2927 return res
2926 2928
2927 2929 def emitrevisions(
2928 2930 self,
2929 2931 nodes,
2930 2932 nodesorder=None,
2931 2933 revisiondata=False,
2932 2934 assumehaveparentrevisions=False,
2933 2935 deltamode=repository.CG_DELTAMODE_STD,
2934 2936 sidedata_helpers=None,
2935 2937 ):
2936 2938 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2937 2939 raise error.ProgrammingError(
2938 2940 b'unhandled value for nodesorder: %s' % nodesorder
2939 2941 )
2940 2942
2941 2943 if nodesorder is None and not self._generaldelta:
2942 2944 nodesorder = b'storage'
2943 2945
2944 2946 if (
2945 2947 not self._storedeltachains
2946 2948 and deltamode != repository.CG_DELTAMODE_PREV
2947 2949 ):
2948 2950 deltamode = repository.CG_DELTAMODE_FULL
2949 2951
2950 2952 return storageutil.emitrevisions(
2951 2953 self,
2952 2954 nodes,
2953 2955 nodesorder,
2954 2956 revlogrevisiondelta,
2955 2957 deltaparentfn=self.deltaparent,
2956 2958 candeltafn=self.candelta,
2957 2959 rawsizefn=self.rawsize,
2958 2960 revdifffn=self.revdiff,
2959 2961 flagsfn=self.flags,
2960 2962 deltamode=deltamode,
2961 2963 revisiondata=revisiondata,
2962 2964 assumehaveparentrevisions=assumehaveparentrevisions,
2963 2965 sidedata_helpers=sidedata_helpers,
2964 2966 )
2965 2967
2966 2968 DELTAREUSEALWAYS = b'always'
2967 2969 DELTAREUSESAMEREVS = b'samerevs'
2968 2970 DELTAREUSENEVER = b'never'
2969 2971
2970 2972 DELTAREUSEFULLADD = b'fulladd'
2971 2973
2972 2974 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2973 2975
2974 2976 def clone(
2975 2977 self,
2976 2978 tr,
2977 2979 destrevlog,
2978 2980 addrevisioncb=None,
2979 2981 deltareuse=DELTAREUSESAMEREVS,
2980 2982 forcedeltabothparents=None,
2981 2983 sidedata_helpers=None,
2982 2984 ):
2983 2985 """Copy this revlog to another, possibly with format changes.
2984 2986
2985 2987 The destination revlog will contain the same revisions and nodes.
2986 2988 However, it may not be bit-for-bit identical due to e.g. delta encoding
2987 2989 differences.
2988 2990
2989 2991 The ``deltareuse`` argument control how deltas from the existing revlog
2990 2992 are preserved in the destination revlog. The argument can have the
2991 2993 following values:
2992 2994
2993 2995 DELTAREUSEALWAYS
2994 2996 Deltas will always be reused (if possible), even if the destination
2995 2997 revlog would not select the same revisions for the delta. This is the
2996 2998 fastest mode of operation.
2997 2999 DELTAREUSESAMEREVS
2998 3000 Deltas will be reused if the destination revlog would pick the same
2999 3001 revisions for the delta. This mode strikes a balance between speed
3000 3002 and optimization.
3001 3003 DELTAREUSENEVER
3002 3004 Deltas will never be reused. This is the slowest mode of execution.
3003 3005 This mode can be used to recompute deltas (e.g. if the diff/delta
3004 3006 algorithm changes).
3005 3007 DELTAREUSEFULLADD
3006 3008 Revision will be re-added as if their were new content. This is
3007 3009 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3008 3010 eg: large file detection and handling.
3009 3011
3010 3012 Delta computation can be slow, so the choice of delta reuse policy can
3011 3013 significantly affect run time.
3012 3014
3013 3015 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3014 3016 two extremes. Deltas will be reused if they are appropriate. But if the
3015 3017 delta could choose a better revision, it will do so. This means if you
3016 3018 are converting a non-generaldelta revlog to a generaldelta revlog,
3017 3019 deltas will be recomputed if the delta's parent isn't a parent of the
3018 3020 revision.
3019 3021
3020 3022 In addition to the delta policy, the ``forcedeltabothparents``
3021 3023 argument controls whether to force compute deltas against both parents
3022 3024 for merges. By default, the current default is used.
3023 3025
3024 3026 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3025 3027 `sidedata_helpers`.
3026 3028 """
3027 3029 if deltareuse not in self.DELTAREUSEALL:
3028 3030 raise ValueError(
3029 3031 _(b'value for deltareuse invalid: %s') % deltareuse
3030 3032 )
3031 3033
3032 3034 if len(destrevlog):
3033 3035 raise ValueError(_(b'destination revlog is not empty'))
3034 3036
3035 3037 if getattr(self, 'filteredrevs', None):
3036 3038 raise ValueError(_(b'source revlog has filtered revisions'))
3037 3039 if getattr(destrevlog, 'filteredrevs', None):
3038 3040 raise ValueError(_(b'destination revlog has filtered revisions'))
3039 3041
3040 3042 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3041 3043 # if possible.
3042 3044 oldlazydelta = destrevlog._lazydelta
3043 3045 oldlazydeltabase = destrevlog._lazydeltabase
3044 3046 oldamd = destrevlog._deltabothparents
3045 3047
3046 3048 try:
3047 3049 if deltareuse == self.DELTAREUSEALWAYS:
3048 3050 destrevlog._lazydeltabase = True
3049 3051 destrevlog._lazydelta = True
3050 3052 elif deltareuse == self.DELTAREUSESAMEREVS:
3051 3053 destrevlog._lazydeltabase = False
3052 3054 destrevlog._lazydelta = True
3053 3055 elif deltareuse == self.DELTAREUSENEVER:
3054 3056 destrevlog._lazydeltabase = False
3055 3057 destrevlog._lazydelta = False
3056 3058
3057 3059 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3058 3060
3059 3061 self._clone(
3060 3062 tr,
3061 3063 destrevlog,
3062 3064 addrevisioncb,
3063 3065 deltareuse,
3064 3066 forcedeltabothparents,
3065 3067 sidedata_helpers,
3066 3068 )
3067 3069
3068 3070 finally:
3069 3071 destrevlog._lazydelta = oldlazydelta
3070 3072 destrevlog._lazydeltabase = oldlazydeltabase
3071 3073 destrevlog._deltabothparents = oldamd
3072 3074
3073 3075 def _clone(
3074 3076 self,
3075 3077 tr,
3076 3078 destrevlog,
3077 3079 addrevisioncb,
3078 3080 deltareuse,
3079 3081 forcedeltabothparents,
3080 3082 sidedata_helpers,
3081 3083 ):
3082 3084 """perform the core duty of `revlog.clone` after parameter processing"""
3083 3085 deltacomputer = deltautil.deltacomputer(destrevlog)
3084 3086 index = self.index
3085 3087 for rev in self:
3086 3088 entry = index[rev]
3087 3089
3088 3090 # Some classes override linkrev to take filtered revs into
3089 3091 # account. Use raw entry from index.
3090 3092 flags = entry[0] & 0xFFFF
3091 3093 linkrev = entry[4]
3092 3094 p1 = index[entry[5]][7]
3093 3095 p2 = index[entry[6]][7]
3094 3096 node = entry[7]
3095 3097
3096 3098 # (Possibly) reuse the delta from the revlog if allowed and
3097 3099 # the revlog chunk is a delta.
3098 3100 cachedelta = None
3099 3101 rawtext = None
3100 3102 if deltareuse == self.DELTAREUSEFULLADD:
3101 3103 text, sidedata = self._revisiondata(rev)
3102 3104
3103 3105 if sidedata_helpers is not None:
3104 3106 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3105 3107 self, sidedata_helpers, sidedata, rev
3106 3108 )
3107 3109 flags = flags | new_flags[0] & ~new_flags[1]
3108 3110
3109 3111 destrevlog.addrevision(
3110 3112 text,
3111 3113 tr,
3112 3114 linkrev,
3113 3115 p1,
3114 3116 p2,
3115 3117 cachedelta=cachedelta,
3116 3118 node=node,
3117 3119 flags=flags,
3118 3120 deltacomputer=deltacomputer,
3119 3121 sidedata=sidedata,
3120 3122 )
3121 3123 else:
3122 3124 if destrevlog._lazydelta:
3123 3125 dp = self.deltaparent(rev)
3124 3126 if dp != nullrev:
3125 3127 cachedelta = (dp, bytes(self._chunk(rev)))
3126 3128
3127 3129 sidedata = None
3128 3130 if not cachedelta:
3129 3131 rawtext, sidedata = self._revisiondata(rev)
3130 3132 if sidedata is None:
3131 3133 sidedata = self.sidedata(rev)
3132 3134
3133 3135 if sidedata_helpers is not None:
3134 3136 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3135 3137 self, sidedata_helpers, sidedata, rev
3136 3138 )
3137 3139 flags = flags | new_flags[0] & ~new_flags[1]
3138 3140
3139 3141 with destrevlog._writing(tr):
3140 3142 destrevlog._addrevision(
3141 3143 node,
3142 3144 rawtext,
3143 3145 tr,
3144 3146 linkrev,
3145 3147 p1,
3146 3148 p2,
3147 3149 flags,
3148 3150 cachedelta,
3149 3151 deltacomputer=deltacomputer,
3150 3152 sidedata=sidedata,
3151 3153 )
3152 3154
3153 3155 if addrevisioncb:
3154 3156 addrevisioncb(self, rev, node)
3155 3157
3156 3158 def censorrevision(self, tr, censornode, tombstone=b''):
3157 3159 if self._format_version == REVLOGV0:
3158 3160 raise error.RevlogError(
3159 3161 _(b'cannot censor with version %d revlogs')
3160 3162 % self._format_version
3161 3163 )
3162 3164
3163 3165 censorrev = self.rev(censornode)
3164 3166 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3165 3167
3166 3168 if len(tombstone) > self.rawsize(censorrev):
3167 3169 raise error.Abort(
3168 3170 _(b'censor tombstone must be no longer than censored data')
3169 3171 )
3170 3172
3171 3173 # Rewriting the revlog in place is hard. Our strategy for censoring is
3172 3174 # to create a new revlog, copy all revisions to it, then replace the
3173 3175 # revlogs on transaction close.
3174 3176 #
3175 3177 # This is a bit dangerous. We could easily have a mismatch of state.
3176 3178 newrl = revlog(
3177 3179 self.opener,
3178 3180 target=self.target,
3179 3181 radix=self.radix,
3180 3182 postfix=b'tmpcensored',
3181 3183 censorable=True,
3182 3184 )
3183 3185 newrl._format_version = self._format_version
3184 3186 newrl._format_flags = self._format_flags
3185 3187 newrl._generaldelta = self._generaldelta
3186 3188 newrl._parse_index = self._parse_index
3187 3189
3188 3190 for rev in self.revs():
3189 3191 node = self.node(rev)
3190 3192 p1, p2 = self.parents(node)
3191 3193
3192 3194 if rev == censorrev:
3193 3195 newrl.addrawrevision(
3194 3196 tombstone,
3195 3197 tr,
3196 3198 self.linkrev(censorrev),
3197 3199 p1,
3198 3200 p2,
3199 3201 censornode,
3200 3202 REVIDX_ISCENSORED,
3201 3203 )
3202 3204
3203 3205 if newrl.deltaparent(rev) != nullrev:
3204 3206 raise error.Abort(
3205 3207 _(
3206 3208 b'censored revision stored as delta; '
3207 3209 b'cannot censor'
3208 3210 ),
3209 3211 hint=_(
3210 3212 b'censoring of revlogs is not '
3211 3213 b'fully implemented; please report '
3212 3214 b'this bug'
3213 3215 ),
3214 3216 )
3215 3217 continue
3216 3218
3217 3219 if self.iscensored(rev):
3218 3220 if self.deltaparent(rev) != nullrev:
3219 3221 raise error.Abort(
3220 3222 _(
3221 3223 b'cannot censor due to censored '
3222 3224 b'revision having delta stored'
3223 3225 )
3224 3226 )
3225 3227 rawtext = self._chunk(rev)
3226 3228 else:
3227 3229 rawtext = self.rawdata(rev)
3228 3230
3229 3231 newrl.addrawrevision(
3230 3232 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3231 3233 )
3232 3234
3233 3235 tr.addbackup(self._indexfile, location=b'store')
3234 3236 if not self._inline:
3235 3237 tr.addbackup(self._datafile, location=b'store')
3236 3238
3237 3239 self.opener.rename(newrl._indexfile, self._indexfile)
3238 3240 if not self._inline:
3239 3241 self.opener.rename(newrl._datafile, self._datafile)
3240 3242
3241 3243 self.clearcaches()
3242 3244 self._loadindex()
3243 3245
3244 3246 def verifyintegrity(self, state):
3245 3247 """Verifies the integrity of the revlog.
3246 3248
3247 3249 Yields ``revlogproblem`` instances describing problems that are
3248 3250 found.
3249 3251 """
3250 3252 dd, di = self.checksize()
3251 3253 if dd:
3252 3254 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3253 3255 if di:
3254 3256 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3255 3257
3256 3258 version = self._format_version
3257 3259
3258 3260 # The verifier tells us what version revlog we should be.
3259 3261 if version != state[b'expectedversion']:
3260 3262 yield revlogproblem(
3261 3263 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3262 3264 % (self.display_id, version, state[b'expectedversion'])
3263 3265 )
3264 3266
3265 3267 state[b'skipread'] = set()
3266 3268 state[b'safe_renamed'] = set()
3267 3269
3268 3270 for rev in self:
3269 3271 node = self.node(rev)
3270 3272
3271 3273 # Verify contents. 4 cases to care about:
3272 3274 #
3273 3275 # common: the most common case
3274 3276 # rename: with a rename
3275 3277 # meta: file content starts with b'\1\n', the metadata
3276 3278 # header defined in filelog.py, but without a rename
3277 3279 # ext: content stored externally
3278 3280 #
3279 3281 # More formally, their differences are shown below:
3280 3282 #
3281 3283 # | common | rename | meta | ext
3282 3284 # -------------------------------------------------------
3283 3285 # flags() | 0 | 0 | 0 | not 0
3284 3286 # renamed() | False | True | False | ?
3285 3287 # rawtext[0:2]=='\1\n'| False | True | True | ?
3286 3288 #
3287 3289 # "rawtext" means the raw text stored in revlog data, which
3288 3290 # could be retrieved by "rawdata(rev)". "text"
3289 3291 # mentioned below is "revision(rev)".
3290 3292 #
3291 3293 # There are 3 different lengths stored physically:
3292 3294 # 1. L1: rawsize, stored in revlog index
3293 3295 # 2. L2: len(rawtext), stored in revlog data
3294 3296 # 3. L3: len(text), stored in revlog data if flags==0, or
3295 3297 # possibly somewhere else if flags!=0
3296 3298 #
3297 3299 # L1 should be equal to L2. L3 could be different from them.
3298 3300 # "text" may or may not affect commit hash depending on flag
3299 3301 # processors (see flagutil.addflagprocessor).
3300 3302 #
3301 3303 # | common | rename | meta | ext
3302 3304 # -------------------------------------------------
3303 3305 # rawsize() | L1 | L1 | L1 | L1
3304 3306 # size() | L1 | L2-LM | L1(*) | L1 (?)
3305 3307 # len(rawtext) | L2 | L2 | L2 | L2
3306 3308 # len(text) | L2 | L2 | L2 | L3
3307 3309 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3308 3310 #
3309 3311 # LM: length of metadata, depending on rawtext
3310 3312 # (*): not ideal, see comment in filelog.size
3311 3313 # (?): could be "- len(meta)" if the resolved content has
3312 3314 # rename metadata
3313 3315 #
3314 3316 # Checks needed to be done:
3315 3317 # 1. length check: L1 == L2, in all cases.
3316 3318 # 2. hash check: depending on flag processor, we may need to
3317 3319 # use either "text" (external), or "rawtext" (in revlog).
3318 3320
3319 3321 try:
3320 3322 skipflags = state.get(b'skipflags', 0)
3321 3323 if skipflags:
3322 3324 skipflags &= self.flags(rev)
3323 3325
3324 3326 _verify_revision(self, skipflags, state, node)
3325 3327
3326 3328 l1 = self.rawsize(rev)
3327 3329 l2 = len(self.rawdata(node))
3328 3330
3329 3331 if l1 != l2:
3330 3332 yield revlogproblem(
3331 3333 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3332 3334 node=node,
3333 3335 )
3334 3336
3335 3337 except error.CensoredNodeError:
3336 3338 if state[b'erroroncensored']:
3337 3339 yield revlogproblem(
3338 3340 error=_(b'censored file data'), node=node
3339 3341 )
3340 3342 state[b'skipread'].add(node)
3341 3343 except Exception as e:
3342 3344 yield revlogproblem(
3343 3345 error=_(b'unpacking %s: %s')
3344 3346 % (short(node), stringutil.forcebytestr(e)),
3345 3347 node=node,
3346 3348 )
3347 3349 state[b'skipread'].add(node)
3348 3350
3349 3351 def storageinfo(
3350 3352 self,
3351 3353 exclusivefiles=False,
3352 3354 sharedfiles=False,
3353 3355 revisionscount=False,
3354 3356 trackedsize=False,
3355 3357 storedsize=False,
3356 3358 ):
3357 3359 d = {}
3358 3360
3359 3361 if exclusivefiles:
3360 3362 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3361 3363 if not self._inline:
3362 3364 d[b'exclusivefiles'].append((self.opener, self._datafile))
3363 3365
3364 3366 if sharedfiles:
3365 3367 d[b'sharedfiles'] = []
3366 3368
3367 3369 if revisionscount:
3368 3370 d[b'revisionscount'] = len(self)
3369 3371
3370 3372 if trackedsize:
3371 3373 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3372 3374
3373 3375 if storedsize:
3374 3376 d[b'storedsize'] = sum(
3375 3377 self.opener.stat(path).st_size for path in self.files()
3376 3378 )
3377 3379
3378 3380 return d
3379 3381
3380 3382 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3381 3383 if not self.hassidedata:
3382 3384 return
3383 3385 # revlog formats with sidedata support does not support inline
3384 3386 assert not self._inline
3385 3387 if not helpers[1] and not helpers[2]:
3386 3388 # Nothing to generate or remove
3387 3389 return
3388 3390
3389 3391 new_entries = []
3390 3392 # append the new sidedata
3391 3393 with self._writing(transaction):
3392 3394 ifh, dfh = self._writinghandles
3393 3395 if self._docket is not None:
3394 3396 dfh.seek(self._docket.data_end, os.SEEK_SET)
3395 3397 else:
3396 3398 dfh.seek(0, os.SEEK_END)
3397 3399
3398 3400 current_offset = dfh.tell()
3399 3401 for rev in range(startrev, endrev + 1):
3400 3402 entry = self.index[rev]
3401 3403 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3402 3404 store=self,
3403 3405 sidedata_helpers=helpers,
3404 3406 sidedata={},
3405 3407 rev=rev,
3406 3408 )
3407 3409
3408 3410 serialized_sidedata = sidedatautil.serialize_sidedata(
3409 3411 new_sidedata
3410 3412 )
3411 3413
3412 3414 sidedata_compression_mode = COMP_MODE_INLINE
3413 3415 if serialized_sidedata and self.hassidedata:
3414 3416 sidedata_compression_mode = COMP_MODE_PLAIN
3415 3417 h, comp_sidedata = self.compress(serialized_sidedata)
3416 3418 if (
3417 3419 h != b'u'
3418 3420 and comp_sidedata[0] != b'\0'
3419 3421 and len(comp_sidedata) < len(serialized_sidedata)
3420 3422 ):
3421 3423 assert not h
3422 3424 if (
3423 3425 comp_sidedata[0]
3424 3426 == self._docket.default_compression_header
3425 3427 ):
3426 3428 sidedata_compression_mode = COMP_MODE_DEFAULT
3427 3429 serialized_sidedata = comp_sidedata
3428 3430 else:
3429 3431 sidedata_compression_mode = COMP_MODE_INLINE
3430 3432 serialized_sidedata = comp_sidedata
3431 3433 if entry[8] != 0 or entry[9] != 0:
3432 3434 # rewriting entries that already have sidedata is not
3433 3435 # supported yet, because it introduces garbage data in the
3434 3436 # revlog.
3435 3437 msg = b"rewriting existing sidedata is not supported yet"
3436 3438 raise error.Abort(msg)
3437 3439
3438 3440 # Apply (potential) flags to add and to remove after running
3439 3441 # the sidedata helpers
3440 3442 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3441 3443 entry_update = (
3442 3444 current_offset,
3443 3445 len(serialized_sidedata),
3444 3446 new_offset_flags,
3445 3447 sidedata_compression_mode,
3446 3448 )
3447 3449
3448 3450 # the sidedata computation might have move the file cursors around
3449 3451 dfh.seek(current_offset, os.SEEK_SET)
3450 3452 dfh.write(serialized_sidedata)
3451 3453 new_entries.append(entry_update)
3452 3454 current_offset += len(serialized_sidedata)
3453 3455 if self._docket is not None:
3454 3456 self._docket.data_end = dfh.tell()
3455 3457
3456 3458 # rewrite the new index entries
3457 3459 ifh.seek(startrev * self.index.entry_size)
3458 3460 for i, e in enumerate(new_entries):
3459 3461 rev = startrev + i
3460 3462 self.index.replace_sidedata_info(rev, *e)
3461 3463 packed = self.index.entry_binary(rev)
3462 3464 if rev == 0 and self._docket is None:
3463 3465 header = self._format_flags | self._format_version
3464 3466 header = self.index.pack_header(header)
3465 3467 packed = header + packed
3466 3468 ifh.write(packed)
@@ -1,249 +1,265 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import errno
21 21 import os
22 22 import random
23 23 import struct
24 24
25 25 from .. import (
26 26 encoding,
27 27 error,
28 28 node,
29 29 pycompat,
30 30 util,
31 31 )
32 32
33 33 from . import (
34 34 constants,
35 35 )
36 36
37 37
38 38 def make_uid(id_size=8):
39 39 """return a new unique identifier.
40 40
41 41 The identifier is random and composed of ascii characters."""
42 42 # size we "hex" the result we need half the number of bits to have a final
43 43 # uuid of size ID_SIZE
44 44 return node.hex(os.urandom(id_size // 2))
45 45
46 46
47 47 # some special test logic to avoid anoying random output in the test
48 48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49 49
50 50 if stable_docket_file:
51 51
52 52 def make_uid(id_size=8):
53 53 try:
54 54 with open(stable_docket_file, mode='rb') as f:
55 55 seed = f.read().strip()
56 56 except IOError as inst:
57 57 if inst.errno != errno.ENOENT:
58 58 raise
59 59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 60 if pycompat.ispy3:
61 61 iter_seed = iter(seed)
62 62 else:
63 63 iter_seed = (ord(c) for c in seed)
64 64 # some basic circular sum hashing on 64 bits
65 65 int_seed = 0
66 66 low_mask = int('1' * 35, 2)
67 67 for i in iter_seed:
68 68 high_part = int_seed >> 35
69 69 low_part = (int_seed & low_mask) << 28
70 70 int_seed = high_part + low_part + i
71 71 r = random.Random()
72 72 if pycompat.ispy3:
73 73 r.seed(int_seed, version=1)
74 74 else:
75 75 r.seed(int_seed)
76 76 # once we drop python 3.8 support we can simply use r.randbytes
77 77 raw = r.getrandbits(id_size * 4)
78 78 assert id_size == 8
79 79 p = struct.pack('>L', raw)
80 80 new = node.hex(p)
81 81 with open(stable_docket_file, 'wb') as f:
82 82 f.write(new)
83 83 return new
84 84
85 85
86 86 # Docket format
87 87 #
88 88 # * 4 bytes: revlog version
89 89 # | This is mandatory as docket must be compatible with the previous
90 90 # | revlog index header.
91 91 # * 1 bytes: size of index uuid
92 # * 1 bytes: size of data uuid
92 93 # * 8 bytes: size of index-data
93 94 # * 8 bytes: pending size of index-data
94 95 # * 8 bytes: size of data
95 96 # * 8 bytes: pending size of data
96 97 # * 1 bytes: default compression header
97 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
98 99
99 100
100 101 class RevlogDocket(object):
101 102 """metadata associated with revlog"""
102 103
103 104 def __init__(
104 105 self,
105 106 revlog,
106 107 use_pending=False,
107 108 version_header=None,
108 109 index_uuid=None,
110 data_uuid=None,
109 111 index_end=0,
110 112 pending_index_end=0,
111 113 data_end=0,
112 114 pending_data_end=0,
113 115 default_compression_header=None,
114 116 ):
115 117 self._version_header = version_header
116 118 self._read_only = bool(use_pending)
117 119 self._dirty = False
118 120 self._radix = revlog.radix
119 121 self._path = revlog._docket_file
120 122 self._opener = revlog.opener
121 123 self._index_uuid = index_uuid
124 self._data_uuid = data_uuid
122 125 # thes asserts should be True as long as we have a single index filename
123 126 assert index_end <= pending_index_end
124 127 assert data_end <= pending_data_end
125 128 self._initial_index_end = index_end
126 129 self._pending_index_end = pending_index_end
127 130 self._initial_data_end = data_end
128 131 self._pending_data_end = pending_data_end
129 132 if use_pending:
130 133 self._index_end = self._pending_index_end
131 134 self._data_end = self._pending_data_end
132 135 else:
133 136 self._index_end = self._initial_index_end
134 137 self._data_end = self._initial_data_end
135 138 self.default_compression_header = default_compression_header
136 139
137 140 def index_filepath(self):
138 141 """file path to the current index file associated to this docket"""
139 142 # very simplistic version at first
140 143 if self._index_uuid is None:
141 144 self._index_uuid = make_uid()
142 145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
143 146
147 def data_filepath(self):
148 """file path to the current index file associated to this docket"""
149 # very simplistic version at first
150 if self._data_uuid is None:
151 self._data_uuid = make_uid()
152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
153
144 154 @property
145 155 def index_end(self):
146 156 return self._index_end
147 157
148 158 @index_end.setter
149 159 def index_end(self, new_size):
150 160 if new_size != self._index_end:
151 161 self._index_end = new_size
152 162 self._dirty = True
153 163
154 164 @property
155 165 def data_end(self):
156 166 return self._data_end
157 167
158 168 @data_end.setter
159 169 def data_end(self, new_size):
160 170 if new_size != self._data_end:
161 171 self._data_end = new_size
162 172 self._dirty = True
163 173
164 174 def write(self, transaction, pending=False, stripping=False):
165 175 """write the modification of disk if any
166 176
167 177 This make the new content visible to all process"""
168 178 if not self._dirty:
169 179 return False
170 180 else:
171 181 if self._read_only:
172 182 msg = b'writing read-only docket: %s'
173 183 msg %= self._path
174 184 raise error.ProgrammingError(msg)
175 185 if not stripping:
176 186 # XXX we could, leverage the docket while stripping. However it
177 187 # is not powerfull enough at the time of this comment
178 188 transaction.addbackup(self._path, location=b'store')
179 189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
180 190 f.write(self._serialize(pending=pending))
181 191 # if pending we still need to the write final data eventually
182 192 self._dirty = pending
183 193 return True
184 194
185 195 def _serialize(self, pending=False):
186 196 if pending:
187 197 official_index_end = self._initial_index_end
188 198 official_data_end = self._initial_data_end
189 199 else:
190 200 official_index_end = self._index_end
191 201 official_data_end = self._data_end
192 202
193 203 # this assert should be True as long as we have a single index filename
194 204 assert official_data_end <= self._data_end
195 205 data = (
196 206 self._version_header,
197 207 len(self._index_uuid),
208 len(self._data_uuid),
198 209 official_index_end,
199 210 self._index_end,
200 211 official_data_end,
201 212 self._data_end,
202 213 self.default_compression_header,
203 214 )
204 215 s = []
205 216 s.append(S_HEADER.pack(*data))
206 217 s.append(self._index_uuid)
218 s.append(self._data_uuid)
207 219 return b''.join(s)
208 220
209 221
210 222 def default_docket(revlog, version_header):
211 223 """given a revlog version a new docket object for the given revlog"""
212 224 rl_version = version_header & 0xFFFF
213 225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
214 226 return None
215 227 comp = util.compengines[revlog._compengine].revlogheader()
216 228 docket = RevlogDocket(
217 229 revlog,
218 230 version_header=version_header,
219 231 default_compression_header=comp,
220 232 )
221 233 docket._dirty = True
222 234 return docket
223 235
224 236
225 237 def parse_docket(revlog, data, use_pending=False):
226 238 """given some docket data return a docket object for the given revlog"""
227 239 header = S_HEADER.unpack(data[: S_HEADER.size])
228 240 offset = S_HEADER.size
229 241 version_header = header[0]
230 242 index_uuid_size = header[1]
231 243 index_uuid = data[offset : offset + index_uuid_size]
232 244 offset += index_uuid_size
233 index_size = header[2]
234 pending_index_size = header[3]
235 data_size = header[4]
236 pending_data_size = header[5]
237 default_compression_header = header[6]
245 data_uuid_size = header[2]
246 data_uuid = data[offset : offset + data_uuid_size]
247 offset += data_uuid_size
248 index_size = header[3]
249 pending_index_size = header[4]
250 data_size = header[5]
251 pending_data_size = header[6]
252 default_compression_header = header[7]
238 253 docket = RevlogDocket(
239 254 revlog,
240 255 use_pending=use_pending,
241 256 version_header=version_header,
242 257 index_uuid=index_uuid,
258 data_uuid=data_uuid,
243 259 index_end=index_size,
244 260 pending_index_end=pending_index_size,
245 261 data_end=data_size,
246 262 pending_data_end=pending_data_size,
247 263 default_compression_header=default_compression_header,
248 264 )
249 265 return docket
@@ -1,814 +1,823 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import functools
12 12 import os
13 13 import re
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in pycompat.iteritems(cmap):
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in pycompat.xrange(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join(
166 166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
167 167 ),
168 168 lambda s: b''.join(list(decode(s))),
169 169 )
170 170
171 171
172 172 _encodefname, _decodefname = _buildencodefun()
173 173
174 174
175 175 def encodefilename(s):
176 176 """
177 177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
178 178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
179 179 """
180 180 return _encodefname(encodedir(s))
181 181
182 182
183 183 def decodefilename(s):
184 184 """
185 185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
186 186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
187 187 """
188 188 return decodedir(_decodefname(s))
189 189
190 190
191 191 def _buildlowerencodefun():
192 192 """
193 193 >>> f = _buildlowerencodefun()
194 194 >>> f(b'nothing/special.txt')
195 195 'nothing/special.txt'
196 196 >>> f(b'HELLO')
197 197 'hello'
198 198 >>> f(b'hello:world?')
199 199 'hello~3aworld~3f'
200 200 >>> f(b'the\\x07quick\\xADshot')
201 201 'the~07quick~adshot'
202 202 """
203 203 xchr = pycompat.bytechr
204 204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
205 205 for x in _reserved():
206 206 cmap[xchr(x)] = b"~%02x" % x
207 207 for x in range(ord(b"A"), ord(b"Z") + 1):
208 208 cmap[xchr(x)] = xchr(x).lower()
209 209
210 210 def lowerencode(s):
211 211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
212 212
213 213 return lowerencode
214 214
215 215
216 216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
217 217
218 218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
219 219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
220 220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
221 221
222 222
223 223 def _auxencode(path, dotencode):
224 224 """
225 225 Encodes filenames containing names reserved by Windows or which end in
226 226 period or space. Does not touch other single reserved characters c.
227 227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
228 228 Additionally encodes space or period at the beginning, if dotencode is
229 229 True. Parameter path is assumed to be all lowercase.
230 230 A segment only needs encoding if a reserved name appears as a
231 231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
232 232 doesn't need encoding.
233 233
234 234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
235 235 >>> _auxencode(s.split(b'/'), True)
236 236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
237 237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
238 238 >>> _auxencode(s.split(b'/'), False)
239 239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
240 240 >>> _auxencode([b'foo. '], True)
241 241 ['foo.~20']
242 242 >>> _auxencode([b' .foo'], True)
243 243 ['~20.foo']
244 244 """
245 245 for i, n in enumerate(path):
246 246 if not n:
247 247 continue
248 248 if dotencode and n[0] in b'. ':
249 249 n = b"~%02x" % ord(n[0:1]) + n[1:]
250 250 path[i] = n
251 251 else:
252 252 l = n.find(b'.')
253 253 if l == -1:
254 254 l = len(n)
255 255 if (l == 3 and n[:3] in _winres3) or (
256 256 l == 4
257 257 and n[3:4] <= b'9'
258 258 and n[3:4] >= b'1'
259 259 and n[:3] in _winres4
260 260 ):
261 261 # encode third letter ('aux' -> 'au~78')
262 262 ec = b"~%02x" % ord(n[2:3])
263 263 n = n[0:2] + ec + n[3:]
264 264 path[i] = n
265 265 if n[-1] in b'. ':
266 266 # encode last period or space ('foo...' -> 'foo..~2e')
267 267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
268 268 return path
269 269
270 270
271 271 _maxstorepathlen = 120
272 272 _dirprefixlen = 8
273 273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
274 274
275 275
276 276 def _hashencode(path, dotencode):
277 277 digest = hex(hashutil.sha1(path).digest())
278 278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
279 279 parts = _auxencode(le, dotencode)
280 280 basename = parts[-1]
281 281 _root, ext = os.path.splitext(basename)
282 282 sdirs = []
283 283 sdirslen = 0
284 284 for p in parts[:-1]:
285 285 d = p[:_dirprefixlen]
286 286 if d[-1] in b'. ':
287 287 # Windows can't access dirs ending in period or space
288 288 d = d[:-1] + b'_'
289 289 if sdirslen == 0:
290 290 t = len(d)
291 291 else:
292 292 t = sdirslen + 1 + len(d)
293 293 if t > _maxshortdirslen:
294 294 break
295 295 sdirs.append(d)
296 296 sdirslen = t
297 297 dirs = b'/'.join(sdirs)
298 298 if len(dirs) > 0:
299 299 dirs += b'/'
300 300 res = b'dh/' + dirs + digest + ext
301 301 spaceleft = _maxstorepathlen - len(res)
302 302 if spaceleft > 0:
303 303 filler = basename[:spaceleft]
304 304 res = b'dh/' + dirs + filler + digest + ext
305 305 return res
306 306
307 307
308 308 def _hybridencode(path, dotencode):
309 309 """encodes path with a length limit
310 310
311 311 Encodes all paths that begin with 'data/', according to the following.
312 312
313 313 Default encoding (reversible):
314 314
315 315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
316 316 characters are encoded as '~xx', where xx is the two digit hex code
317 317 of the character (see encodefilename).
318 318 Relevant path components consisting of Windows reserved filenames are
319 319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
320 320
321 321 Hashed encoding (not reversible):
322 322
323 323 If the default-encoded path is longer than _maxstorepathlen, a
324 324 non-reversible hybrid hashing of the path is done instead.
325 325 This encoding uses up to _dirprefixlen characters of all directory
326 326 levels of the lowerencoded path, but not more levels than can fit into
327 327 _maxshortdirslen.
328 328 Then follows the filler followed by the sha digest of the full path.
329 329 The filler is the beginning of the basename of the lowerencoded path
330 330 (the basename is everything after the last path separator). The filler
331 331 is as long as possible, filling in characters from the basename until
332 332 the encoded path has _maxstorepathlen characters (or all chars of the
333 333 basename have been taken).
334 334 The extension (e.g. '.i' or '.d') is preserved.
335 335
336 336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
337 337 encoding was used.
338 338 """
339 339 path = encodedir(path)
340 340 ef = _encodefname(path).split(b'/')
341 341 res = b'/'.join(_auxencode(ef, dotencode))
342 342 if len(res) > _maxstorepathlen:
343 343 res = _hashencode(path, dotencode)
344 344 return res
345 345
346 346
347 347 def _pathencode(path):
348 348 de = encodedir(path)
349 349 if len(path) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 ef = _encodefname(de).split(b'/')
352 352 res = b'/'.join(_auxencode(ef, True))
353 353 if len(res) > _maxstorepathlen:
354 354 return _hashencode(de, True)
355 355 return res
356 356
357 357
358 358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
359 359
360 360
361 361 def _plainhybridencode(f):
362 362 return _hybridencode(f, False)
363 363
364 364
365 365 def _calcmode(vfs):
366 366 try:
367 367 # files in .hg/ will be created using this mode
368 368 mode = vfs.stat().st_mode
369 369 # avoid some useless chmods
370 370 if (0o777 & ~util.umask) == (0o777 & mode):
371 371 mode = None
372 372 except OSError:
373 373 mode = None
374 374 return mode
375 375
376 376
377 377 _data = [
378 378 b'bookmarks',
379 379 b'narrowspec',
380 380 b'data',
381 381 b'meta',
382 382 b'00manifest.d',
383 383 b'00manifest.i',
384 384 b'00changelog.d',
385 385 b'00changelog.i',
386 386 b'phaseroots',
387 387 b'obsstore',
388 388 b'requires',
389 389 ]
390 390
391 391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (
393 b'.idx',
394 b'.d',
395 b'.dat',
396 b'.n',
397 b'.nd',
398 b'd.tmpcensored',
399 )
393 400 # files that are "volatile" and might change between listing and streaming
394 401 #
395 402 # note: the ".nd" file are nodemap data and won't "change" but they might be
396 403 # deleted.
397 404 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
398 405
399 406 # some exception to the above matching
400 407 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
401 408
402 409
403 410 def is_revlog(f, kind, st):
404 411 if kind != stat.S_IFREG:
405 412 return None
406 413 return revlog_type(f)
407 414
408 415
409 416 def revlog_type(f):
410 417 if f.endswith(REVLOG_FILES_MAIN_EXT) and EXCLUDED.match(f) is None:
411 418 return FILEFLAGS_REVLOG_MAIN
412 419 elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None:
413 420 t = FILETYPE_FILELOG_OTHER
414 421 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
415 422 t |= FILEFLAGS_VOLATILE
416 423 return t
424 return None
417 425
418 426
419 427 # the file is part of changelog data
420 428 FILEFLAGS_CHANGELOG = 1 << 13
421 429 # the file is part of manifest data
422 430 FILEFLAGS_MANIFESTLOG = 1 << 12
423 431 # the file is part of filelog data
424 432 FILEFLAGS_FILELOG = 1 << 11
425 433 # file that are not directly part of a revlog
426 434 FILEFLAGS_OTHER = 1 << 10
427 435
428 436 # the main entry point for a revlog
429 437 FILEFLAGS_REVLOG_MAIN = 1 << 1
430 438 # a secondary file for a revlog
431 439 FILEFLAGS_REVLOG_OTHER = 1 << 0
432 440
433 441 # files that are "volatile" and might change between listing and streaming
434 442 FILEFLAGS_VOLATILE = 1 << 20
435 443
436 444 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
437 445 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
438 446 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
439 447 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
440 448 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
441 449 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
442 450 FILETYPE_OTHER = FILEFLAGS_OTHER
443 451
444 452
445 453 class basicstore(object):
446 454 '''base class for local repository stores'''
447 455
448 456 def __init__(self, path, vfstype):
449 457 vfs = vfstype(path)
450 458 self.path = vfs.base
451 459 self.createmode = _calcmode(vfs)
452 460 vfs.createmode = self.createmode
453 461 self.rawvfs = vfs
454 462 self.vfs = vfsmod.filtervfs(vfs, encodedir)
455 463 self.opener = self.vfs
456 464
457 465 def join(self, f):
458 466 return self.path + b'/' + encodedir(f)
459 467
460 468 def _walk(self, relpath, recurse):
461 469 '''yields (unencoded, encoded, size)'''
462 470 path = self.path
463 471 if relpath:
464 472 path += b'/' + relpath
465 473 striplen = len(self.path) + 1
466 474 l = []
467 475 if self.rawvfs.isdir(path):
468 476 visit = [path]
469 477 readdir = self.rawvfs.readdir
470 478 while visit:
471 479 p = visit.pop()
472 480 for f, kind, st in readdir(p, stat=True):
473 481 fp = p + b'/' + f
474 482 rl_type = is_revlog(f, kind, st)
475 483 if rl_type is not None:
476 484 n = util.pconvert(fp[striplen:])
477 485 l.append((rl_type, decodedir(n), n, st.st_size))
478 486 elif kind == stat.S_IFDIR and recurse:
479 487 visit.append(fp)
480 488 l.sort()
481 489 return l
482 490
483 491 def changelog(self, trypending, concurrencychecker=None):
484 492 return changelog.changelog(
485 493 self.vfs,
486 494 trypending=trypending,
487 495 concurrencychecker=concurrencychecker,
488 496 )
489 497
490 498 def manifestlog(self, repo, storenarrowmatch):
491 499 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
492 500 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
493 501
494 502 def datafiles(self, matcher=None):
495 503 files = self._walk(b'data', True) + self._walk(b'meta', True)
496 504 for (t, u, e, s) in files:
497 505 yield (FILEFLAGS_FILELOG | t, u, e, s)
498 506
499 507 def topfiles(self):
500 508 # yield manifest before changelog
501 509 files = reversed(self._walk(b'', False))
502 510 for (t, u, e, s) in files:
503 511 if u.startswith(b'00changelog'):
504 512 yield (FILEFLAGS_CHANGELOG | t, u, e, s)
505 513 elif u.startswith(b'00manifest'):
506 514 yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
507 515 else:
508 516 yield (FILETYPE_OTHER | t, u, e, s)
509 517
510 518 def walk(self, matcher=None):
511 519 """return file related to data storage (ie: revlogs)
512 520
513 521 yields (file_type, unencoded, encoded, size)
514 522
515 523 if a matcher is passed, storage files of only those tracked paths
516 524 are passed with matches the matcher
517 525 """
518 526 # yield data files first
519 527 for x in self.datafiles(matcher):
520 528 yield x
521 529 for x in self.topfiles():
522 530 yield x
523 531
524 532 def copylist(self):
525 533 return _data
526 534
527 535 def write(self, tr):
528 536 pass
529 537
530 538 def invalidatecaches(self):
531 539 pass
532 540
533 541 def markremoved(self, fn):
534 542 pass
535 543
536 544 def __contains__(self, path):
537 545 '''Checks if the store contains path'''
538 546 path = b"/".join((b"data", path))
539 547 # file?
540 548 if self.vfs.exists(path + b".i"):
541 549 return True
542 550 # dir?
543 551 if not path.endswith(b"/"):
544 552 path = path + b"/"
545 553 return self.vfs.exists(path)
546 554
547 555
548 556 class encodedstore(basicstore):
549 557 def __init__(self, path, vfstype):
550 558 vfs = vfstype(path + b'/store')
551 559 self.path = vfs.base
552 560 self.createmode = _calcmode(vfs)
553 561 vfs.createmode = self.createmode
554 562 self.rawvfs = vfs
555 563 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
556 564 self.opener = self.vfs
557 565
558 566 def datafiles(self, matcher=None):
559 567 for t, a, b, size in super(encodedstore, self).datafiles():
560 568 try:
561 569 a = decodefilename(a)
562 570 except KeyError:
563 571 a = None
564 572 if a is not None and not _matchtrackedpath(a, matcher):
565 573 continue
566 574 yield t, a, b, size
567 575
568 576 def join(self, f):
569 577 return self.path + b'/' + encodefilename(f)
570 578
571 579 def copylist(self):
572 580 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
573 581
574 582
575 583 class fncache(object):
576 584 # the filename used to be partially encoded
577 585 # hence the encodedir/decodedir dance
578 586 def __init__(self, vfs):
579 587 self.vfs = vfs
580 588 self.entries = None
581 589 self._dirty = False
582 590 # set of new additions to fncache
583 591 self.addls = set()
584 592
585 593 def ensureloaded(self, warn=None):
586 594 """read the fncache file if not already read.
587 595
588 596 If the file on disk is corrupted, raise. If warn is provided,
589 597 warn and keep going instead."""
590 598 if self.entries is None:
591 599 self._load(warn)
592 600
593 601 def _load(self, warn=None):
594 602 '''fill the entries from the fncache file'''
595 603 self._dirty = False
596 604 try:
597 605 fp = self.vfs(b'fncache', mode=b'rb')
598 606 except IOError:
599 607 # skip nonexistent file
600 608 self.entries = set()
601 609 return
602 610
603 611 self.entries = set()
604 612 chunk = b''
605 613 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
606 614 chunk += c
607 615 try:
608 616 p = chunk.rindex(b'\n')
609 617 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
610 618 chunk = chunk[p + 1 :]
611 619 except ValueError:
612 620 # substring '\n' not found, maybe the entry is bigger than the
613 621 # chunksize, so let's keep iterating
614 622 pass
615 623
616 624 if chunk:
617 625 msg = _(b"fncache does not ends with a newline")
618 626 if warn:
619 627 warn(msg + b'\n')
620 628 else:
621 629 raise error.Abort(
622 630 msg,
623 631 hint=_(
624 632 b"use 'hg debugrebuildfncache' to "
625 633 b"rebuild the fncache"
626 634 ),
627 635 )
628 636 self._checkentries(fp, warn)
629 637 fp.close()
630 638
631 639 def _checkentries(self, fp, warn):
632 640 """make sure there is no empty string in entries"""
633 641 if b'' in self.entries:
634 642 fp.seek(0)
635 643 for n, line in enumerate(util.iterfile(fp)):
636 644 if not line.rstrip(b'\n'):
637 645 t = _(b'invalid entry in fncache, line %d') % (n + 1)
638 646 if warn:
639 647 warn(t + b'\n')
640 648 else:
641 649 raise error.Abort(t)
642 650
643 651 def write(self, tr):
644 652 if self._dirty:
645 653 assert self.entries is not None
646 654 self.entries = self.entries | self.addls
647 655 self.addls = set()
648 656 tr.addbackup(b'fncache')
649 657 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
650 658 if self.entries:
651 659 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
652 660 fp.close()
653 661 self._dirty = False
654 662 if self.addls:
655 663 # if we have just new entries, let's append them to the fncache
656 664 tr.addbackup(b'fncache')
657 665 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
658 666 if self.addls:
659 667 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
660 668 fp.close()
661 669 self.entries = None
662 670 self.addls = set()
663 671
664 672 def add(self, fn):
665 673 if self.entries is None:
666 674 self._load()
667 675 if fn not in self.entries:
668 676 self.addls.add(fn)
669 677
670 678 def remove(self, fn):
671 679 if self.entries is None:
672 680 self._load()
673 681 if fn in self.addls:
674 682 self.addls.remove(fn)
675 683 return
676 684 try:
677 685 self.entries.remove(fn)
678 686 self._dirty = True
679 687 except KeyError:
680 688 pass
681 689
682 690 def __contains__(self, fn):
683 691 if fn in self.addls:
684 692 return True
685 693 if self.entries is None:
686 694 self._load()
687 695 return fn in self.entries
688 696
689 697 def __iter__(self):
690 698 if self.entries is None:
691 699 self._load()
692 700 return iter(self.entries | self.addls)
693 701
694 702
695 703 class _fncachevfs(vfsmod.proxyvfs):
696 704 def __init__(self, vfs, fnc, encode):
697 705 vfsmod.proxyvfs.__init__(self, vfs)
698 706 self.fncache = fnc
699 707 self.encode = encode
700 708
701 709 def __call__(self, path, mode=b'r', *args, **kw):
702 710 encoded = self.encode(path)
703 711 if mode not in (b'r', b'rb') and (
704 712 path.startswith(b'data/') or path.startswith(b'meta/')
705 713 ):
706 714 # do not trigger a fncache load when adding a file that already is
707 715 # known to exist.
708 716 notload = self.fncache.entries is None and self.vfs.exists(encoded)
709 717 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
710 718 # when appending to an existing file, if the file has size zero,
711 719 # it should be considered as missing. Such zero-size files are
712 720 # the result of truncation when a transaction is aborted.
713 721 notload = False
714 722 if not notload:
715 723 self.fncache.add(path)
716 724 return self.vfs(encoded, mode, *args, **kw)
717 725
718 726 def join(self, path):
719 727 if path:
720 728 return self.vfs.join(self.encode(path))
721 729 else:
722 730 return self.vfs.join(path)
723 731
724 732
725 733 class fncachestore(basicstore):
726 734 def __init__(self, path, vfstype, dotencode):
727 735 if dotencode:
728 736 encode = _pathencode
729 737 else:
730 738 encode = _plainhybridencode
731 739 self.encode = encode
732 740 vfs = vfstype(path + b'/store')
733 741 self.path = vfs.base
734 742 self.pathsep = self.path + b'/'
735 743 self.createmode = _calcmode(vfs)
736 744 vfs.createmode = self.createmode
737 745 self.rawvfs = vfs
738 746 fnc = fncache(vfs)
739 747 self.fncache = fnc
740 748 self.vfs = _fncachevfs(vfs, fnc, encode)
741 749 self.opener = self.vfs
742 750
743 751 def join(self, f):
744 752 return self.pathsep + self.encode(f)
745 753
746 754 def getsize(self, path):
747 755 return self.rawvfs.stat(path).st_size
748 756
749 757 def datafiles(self, matcher=None):
750 758 for f in sorted(self.fncache):
751 759 if not _matchtrackedpath(f, matcher):
752 760 continue
753 761 ef = self.encode(f)
754 762 try:
755 763 t = revlog_type(f)
764 assert t is not None, f
756 765 t |= FILEFLAGS_FILELOG
757 766 yield t, f, ef, self.getsize(ef)
758 767 except OSError as err:
759 768 if err.errno != errno.ENOENT:
760 769 raise
761 770
762 771 def copylist(self):
763 772 d = (
764 773 b'bookmarks',
765 774 b'narrowspec',
766 775 b'data',
767 776 b'meta',
768 777 b'dh',
769 778 b'fncache',
770 779 b'phaseroots',
771 780 b'obsstore',
772 781 b'00manifest.d',
773 782 b'00manifest.i',
774 783 b'00changelog.d',
775 784 b'00changelog.i',
776 785 b'requires',
777 786 )
778 787 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
779 788
780 789 def write(self, tr):
781 790 self.fncache.write(tr)
782 791
783 792 def invalidatecaches(self):
784 793 self.fncache.entries = None
785 794 self.fncache.addls = set()
786 795
787 796 def markremoved(self, fn):
788 797 self.fncache.remove(fn)
789 798
790 799 def _exists(self, f):
791 800 ef = self.encode(f)
792 801 try:
793 802 self.getsize(ef)
794 803 return True
795 804 except OSError as err:
796 805 if err.errno != errno.ENOENT:
797 806 raise
798 807 # nonexistent entry
799 808 return False
800 809
801 810 def __contains__(self, path):
802 811 '''Checks if the store contains path'''
803 812 path = b"/".join((b"data", path))
804 813 # check for files (exact match)
805 814 e = path + b'.i'
806 815 if e in self.fncache and self._exists(e):
807 816 return True
808 817 # now check for directories (prefix match)
809 818 if not path.endswith(b'/'):
810 819 path += b'/'
811 820 for e in self.fncache:
812 821 if e.startswith(path) and self._exists(e):
813 822 return True
814 823 return False
@@ -1,85 +1,86 b''
1 1 #require reporevlogstore
2 2
3 3 A repo with unknown revlogv2 requirement string cannot be opened
4 4
5 5 $ hg init invalidreq
6 6 $ cd invalidreq
7 7 $ echo exp-revlogv2.unknown >> .hg/requires
8 8 $ hg log
9 9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
10 10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
11 11 [255]
12 12 $ cd ..
13 13
14 14 Can create and open repo with revlog v2 requirement
15 15
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [experimental]
18 18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
19 19 > EOF
20 20
21 21 $ hg init empty-repo
22 22 $ cd empty-repo
23 23 $ cat .hg/requires
24 24 dotencode
25 25 exp-dirstate-v2 (dirstate-v2 !)
26 26 exp-revlogv2.2
27 27 fncache
28 28 generaldelta
29 29 persistent-nodemap (rust !)
30 30 revlog-compression-zstd (zstd !)
31 31 sparserevlog
32 32 store
33 33
34 34 $ hg log
35 35
36 36 Unknown flags to revlog are rejected
37 37
38 38 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
39 39 ... fh.write(b'\xff\x00\xde\xad') and None
40 40
41 41 $ hg log
42 42 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
43 43 [50]
44 44
45 45 $ cd ..
46 46
47 47 Writing a simple revlog v2 works
48 48
49 49 $ hg init simple
50 50 $ cd simple
51 51 $ touch foo
52 52 $ hg -q commit -A -m initial
53 53
54 54 $ hg log
55 55 changeset: 0:96ee1d7354c4
56 56 tag: tip
57 57 user: test
58 58 date: Thu Jan 01 00:00:00 1970 +0000
59 59 summary: initial
60 60
61
61 62 Header written as expected
62 63
63 64 $ f --hexdump --bytes 4 .hg/store/00changelog.i
64 65 .hg/store/00changelog.i:
65 66 0000: 00 00 de ad |....|
66 67
67 68 $ f --hexdump --bytes 4 .hg/store/data/foo.i
68 69 .hg/store/data/foo.i:
69 70 0000: 00 00 de ad |....|
70 71
71 72 The expected files are generated
72 73 --------------------------------
73 74
74 75 We should have have:
75 76 - a docket
76 77 - a index file with a unique name
77 78 - a data file
78 79
79 80 $ ls .hg/store/00changelog* .hg/store/00manifest*
80 .hg/store/00changelog-b870a51b.idx
81 .hg/store/00changelog.d
81 .hg/store/00changelog-6b8ab34b.dat
82 .hg/store/00changelog-88698448.idx
82 83 .hg/store/00changelog.i
83 .hg/store/00manifest-88698448.idx
84 .hg/store/00manifest.d
84 .hg/store/00manifest-1335303a.dat
85 .hg/store/00manifest-b875dfc5.idx
85 86 .hg/store/00manifest.i
General Comments 0
You need to be logged in to leave comments. Login now