##// END OF EJS Templates
revlog: simplify the try nesting in the `_writing` context...
marmoute -
r48118:1844a2e3 default
parent child Browse files
Show More
@@ -1,3468 +1,3470 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 FEATURES_BY_VERSION,
43 43 FLAG_GENERALDELTA,
44 44 FLAG_INLINE_DATA,
45 45 INDEX_HEADER,
46 46 KIND_CHANGELOG,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 SUPPORTED_FLAGS,
56 56 )
57 57 from .revlogutils.flagutil import (
58 58 REVIDX_DEFAULT_FLAGS,
59 59 REVIDX_ELLIPSIS,
60 60 REVIDX_EXTSTORED,
61 61 REVIDX_FLAGS_ORDER,
62 62 REVIDX_HASCOPIESINFO,
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 65 )
66 66 from .thirdparty import attr
67 67 from . import (
68 68 ancestor,
69 69 dagop,
70 70 error,
71 71 mdiff,
72 72 policy,
73 73 pycompat,
74 74 templatefilters,
75 75 util,
76 76 )
77 77 from .interfaces import (
78 78 repository,
79 79 util as interfaceutil,
80 80 )
81 81 from .revlogutils import (
82 82 deltas as deltautil,
83 83 docket as docketutil,
84 84 flagutil,
85 85 nodemap as nodemaputil,
86 86 revlogv0,
87 87 sidedata as sidedatautil,
88 88 )
89 89 from .utils import (
90 90 storageutil,
91 91 stringutil,
92 92 )
93 93
94 94 # blanked usage of all the name to prevent pyflakes constraints
95 95 # We need these name available in the module for extensions.
96 96
97 97 REVLOGV0
98 98 REVLOGV1
99 99 REVLOGV2
100 100 FLAG_INLINE_DATA
101 101 FLAG_GENERALDELTA
102 102 REVLOG_DEFAULT_FLAGS
103 103 REVLOG_DEFAULT_FORMAT
104 104 REVLOG_DEFAULT_VERSION
105 105 REVLOGV1_FLAGS
106 106 REVLOGV2_FLAGS
107 107 REVIDX_ISCENSORED
108 108 REVIDX_ELLIPSIS
109 109 REVIDX_HASCOPIESINFO
110 110 REVIDX_EXTSTORED
111 111 REVIDX_DEFAULT_FLAGS
112 112 REVIDX_FLAGS_ORDER
113 113 REVIDX_RAWTEXT_CHANGING_FLAGS
114 114
115 115 parsers = policy.importmod('parsers')
116 116 rustancestor = policy.importrust('ancestor')
117 117 rustdagop = policy.importrust('dagop')
118 118 rustrevlog = policy.importrust('revlog')
119 119
120 120 # Aliased for performance.
121 121 _zlibdecompress = zlib.decompress
122 122
123 123 # max size of revlog with inline data
124 124 _maxinline = 131072
125 125 _chunksize = 1048576
126 126
127 127 # Flag processors for REVIDX_ELLIPSIS.
128 128 def ellipsisreadprocessor(rl, text):
129 129 return text, False
130 130
131 131
132 132 def ellipsiswriteprocessor(rl, text):
133 133 return text, False
134 134
135 135
136 136 def ellipsisrawprocessor(rl, text):
137 137 return False
138 138
139 139
140 140 ellipsisprocessor = (
141 141 ellipsisreadprocessor,
142 142 ellipsiswriteprocessor,
143 143 ellipsisrawprocessor,
144 144 )
145 145
146 146
147 147 def offset_type(offset, type):
148 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 149 raise ValueError(b'unknown revlog index flags')
150 150 return int(int(offset) << 16 | type)
151 151
152 152
153 153 def _verify_revision(rl, skipflags, state, node):
154 154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 155 point for extensions to influence the operation."""
156 156 if skipflags:
157 157 state[b'skipread'].add(node)
158 158 else:
159 159 # Side-effect: read content and verify hash.
160 160 rl.revision(node)
161 161
162 162
163 163 # True if a fast implementation for persistent-nodemap is available
164 164 #
165 165 # We also consider we have a "fast" implementation in "pure" python because
166 166 # people using pure don't really have performance consideration (and a
167 167 # wheelbarrow of other slowness source)
168 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 169 parsers, 'BaseIndexObject'
170 170 )
171 171
172 172
173 173 @attr.s(slots=True, frozen=True)
174 174 class _revisioninfo(object):
175 175 """Information about a revision that allows building its fulltext
176 176 node: expected hash of the revision
177 177 p1, p2: parent revs of the revision
178 178 btext: built text cache consisting of a one-element list
179 179 cachedelta: (baserev, uncompressed_delta) or None
180 180 flags: flags associated to the revision storage
181 181
182 182 One of btext[0] or cachedelta must be set.
183 183 """
184 184
185 185 node = attr.ib()
186 186 p1 = attr.ib()
187 187 p2 = attr.ib()
188 188 btext = attr.ib()
189 189 textlen = attr.ib()
190 190 cachedelta = attr.ib()
191 191 flags = attr.ib()
192 192
193 193
194 194 @interfaceutil.implementer(repository.irevisiondelta)
195 195 @attr.s(slots=True)
196 196 class revlogrevisiondelta(object):
197 197 node = attr.ib()
198 198 p1node = attr.ib()
199 199 p2node = attr.ib()
200 200 basenode = attr.ib()
201 201 flags = attr.ib()
202 202 baserevisionsize = attr.ib()
203 203 revision = attr.ib()
204 204 delta = attr.ib()
205 205 sidedata = attr.ib()
206 206 protocol_flags = attr.ib()
207 207 linknode = attr.ib(default=None)
208 208
209 209
210 210 @interfaceutil.implementer(repository.iverifyproblem)
211 211 @attr.s(frozen=True)
212 212 class revlogproblem(object):
213 213 warning = attr.ib(default=None)
214 214 error = attr.ib(default=None)
215 215 node = attr.ib(default=None)
216 216
217 217
218 218 def parse_index_v1(data, inline):
219 219 # call the C implementation to parse the index data
220 220 index, cache = parsers.parse_index2(data, inline)
221 221 return index, cache
222 222
223 223
224 224 def parse_index_v2(data, inline):
225 225 # call the C implementation to parse the index data
226 226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 227 return index, cache
228 228
229 229
230 230 def parse_index_cl_v2(data, inline):
231 231 # call the C implementation to parse the index data
232 232 assert not inline
233 233 from .pure.parsers import parse_index_cl_v2
234 234
235 235 index, cache = parse_index_cl_v2(data)
236 236 return index, cache
237 237
238 238
239 239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240 240
241 241 def parse_index_v1_nodemap(data, inline):
242 242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 243 return index, cache
244 244
245 245
246 246 else:
247 247 parse_index_v1_nodemap = None
248 248
249 249
250 250 def parse_index_v1_mixed(data, inline):
251 251 index, cache = parse_index_v1(data, inline)
252 252 return rustrevlog.MixedIndex(index), cache
253 253
254 254
255 255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 256 # signed integer)
257 257 _maxentrysize = 0x7FFFFFFF
258 258
259 259
260 260 class revlog(object):
261 261 """
262 262 the underlying revision storage object
263 263
264 264 A revlog consists of two parts, an index and the revision data.
265 265
266 266 The index is a file with a fixed record size containing
267 267 information on each revision, including its nodeid (hash), the
268 268 nodeids of its parents, the position and offset of its data within
269 269 the data file, and the revision it's based on. Finally, each entry
270 270 contains a linkrev entry that can serve as a pointer to external
271 271 data.
272 272
273 273 The revision data itself is a linear collection of data chunks.
274 274 Each chunk represents a revision and is usually represented as a
275 275 delta against the previous chunk. To bound lookup time, runs of
276 276 deltas are limited to about 2 times the length of the original
277 277 version data. This makes retrieval of a version proportional to
278 278 its size, or O(1) relative to the number of revisions.
279 279
280 280 Both pieces of the revlog are written to in an append-only
281 281 fashion, which means we never need to rewrite a file to insert or
282 282 remove data, and can use some simple techniques to avoid the need
283 283 for locking while reading.
284 284
285 285 If checkambig, indexfile is opened with checkambig=True at
286 286 writing, to avoid file stat ambiguity.
287 287
288 288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 289 index will be mmapped rather than read if it is larger than the
290 290 configured threshold.
291 291
292 292 If censorable is True, the revlog can have censored revisions.
293 293
294 294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 295 compression for the data content.
296 296
297 297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 298 file handle, a filename, and an expected position. It should check whether
299 299 the current position in the file handle is valid, and log/warn/fail (by
300 300 raising).
301 301
302 302
303 303 Internal details
304 304 ----------------
305 305
306 306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 307 objects that contains the same "items" whatever the revlog version.
308 308 Different versions will have different ways of storing these items (sometimes
309 309 not having them at all), but the tuple will always be the same. New fields
310 310 are usually added at the end to avoid breaking existing code that relies
311 311 on the existing order. The field are defined as follows:
312 312
313 313 [0] offset:
314 314 The byte index of the start of revision data chunk.
315 315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 316 retrieve it.
317 317
318 318 flags:
319 319 A flag field that carries special information or changes the behavior
320 320 of the revision. (see `REVIDX_*` constants for details)
321 321 The flag field only occupies the first 16 bits of this field,
322 322 use "flags = field & 0xFFFF" to retrieve the value.
323 323
324 324 [1] compressed length:
325 325 The size, in bytes, of the chunk on disk
326 326
327 327 [2] uncompressed length:
328 328 The size, in bytes, of the full revision once reconstructed.
329 329
330 330 [3] base rev:
331 331 Either the base of the revision delta chain (without general
332 332 delta), or the base of the delta (stored in the data chunk)
333 333 with general delta.
334 334
335 335 [4] link rev:
336 336 Changelog revision number of the changeset introducing this
337 337 revision.
338 338
339 339 [5] parent 1 rev:
340 340 Revision number of the first parent
341 341
342 342 [6] parent 2 rev:
343 343 Revision number of the second parent
344 344
345 345 [7] node id:
346 346 The node id of the current revision
347 347
348 348 [8] sidedata offset:
349 349 The byte index of the start of the revision's side-data chunk.
350 350
351 351 [9] sidedata chunk length:
352 352 The size, in bytes, of the revision's side-data chunk.
353 353
354 354 [10] data compression mode:
355 355 two bits that detail the way the data chunk is compressed on disk.
356 356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 357 1 this will always be COMP_MODE_INLINE.
358 358
359 359 [11] side-data compression mode:
360 360 two bits that detail the way the sidedata chunk is compressed on disk.
361 361 (see "COMP_MODE_*" constants for details)
362 362 """
363 363
364 364 _flagserrorclass = error.RevlogError
365 365
366 366 def __init__(
367 367 self,
368 368 opener,
369 369 target,
370 370 radix,
371 371 postfix=None, # only exist for `tmpcensored` now
372 372 checkambig=False,
373 373 mmaplargeindex=False,
374 374 censorable=False,
375 375 upperboundcomp=None,
376 376 persistentnodemap=False,
377 377 concurrencychecker=None,
378 378 trypending=False,
379 379 ):
380 380 """
381 381 create a revlog object
382 382
383 383 opener is a function that abstracts the file opening operation
384 384 and can be used to implement COW semantics or the like.
385 385
386 386 `target`: a (KIND, ID) tuple that identify the content stored in
387 387 this revlog. It help the rest of the code to understand what the revlog
388 388 is about without having to resort to heuristic and index filename
389 389 analysis. Note: that this must be reliably be set by normal code, but
390 390 that test, debug, or performance measurement code might not set this to
391 391 accurate value.
392 392 """
393 393 self.upperboundcomp = upperboundcomp
394 394
395 395 self.radix = radix
396 396
397 397 self._docket_file = None
398 398 self._indexfile = None
399 399 self._datafile = None
400 400 self._nodemap_file = None
401 401 self.postfix = postfix
402 402 self._trypending = trypending
403 403 self.opener = opener
404 404 if persistentnodemap:
405 405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406 406
407 407 assert target[0] in ALL_KINDS
408 408 assert len(target) == 2
409 409 self.target = target
410 410 # When True, indexfile is opened with checkambig=True at writing, to
411 411 # avoid file stat ambiguity.
412 412 self._checkambig = checkambig
413 413 self._mmaplargeindex = mmaplargeindex
414 414 self._censorable = censorable
415 415 # 3-tuple of (node, rev, text) for a raw revision.
416 416 self._revisioncache = None
417 417 # Maps rev to chain base rev.
418 418 self._chainbasecache = util.lrucachedict(100)
419 419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 420 self._chunkcache = (0, b'')
421 421 # How much data to read and cache into the raw revlog data cache.
422 422 self._chunkcachesize = 65536
423 423 self._maxchainlen = None
424 424 self._deltabothparents = True
425 425 self.index = None
426 426 self._docket = None
427 427 self._nodemap_docket = None
428 428 # Mapping of partial identifiers to full nodes.
429 429 self._pcache = {}
430 430 # Mapping of revision integer to full node.
431 431 self._compengine = b'zlib'
432 432 self._compengineopts = {}
433 433 self._maxdeltachainspan = -1
434 434 self._withsparseread = False
435 435 self._sparserevlog = False
436 436 self.hassidedata = False
437 437 self._srdensitythreshold = 0.50
438 438 self._srmingapsize = 262144
439 439
440 440 # Make copy of flag processors so each revlog instance can support
441 441 # custom flags.
442 442 self._flagprocessors = dict(flagutil.flagprocessors)
443 443
444 444 # 2-tuple of file handles being used for active writing.
445 445 self._writinghandles = None
446 446 # prevent nesting of addgroup
447 447 self._adding_group = None
448 448
449 449 self._loadindex()
450 450
451 451 self._concurrencychecker = concurrencychecker
452 452
453 453 def _init_opts(self):
454 454 """process options (from above/config) to setup associated default revlog mode
455 455
456 456 These values might be affected when actually reading on disk information.
457 457
458 458 The relevant values are returned for use in _loadindex().
459 459
460 460 * newversionflags:
461 461 version header to use if we need to create a new revlog
462 462
463 463 * mmapindexthreshold:
464 464 minimal index size for start to use mmap
465 465
466 466 * force_nodemap:
467 467 force the usage of a "development" version of the nodemap code
468 468 """
469 469 mmapindexthreshold = None
470 470 opts = self.opener.options
471 471
472 472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 473 new_header = CHANGELOGV2
474 474 elif b'revlogv2' in opts:
475 475 new_header = REVLOGV2
476 476 elif b'revlogv1' in opts:
477 477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 478 if b'generaldelta' in opts:
479 479 new_header |= FLAG_GENERALDELTA
480 480 elif b'revlogv0' in self.opener.options:
481 481 new_header = REVLOGV0
482 482 else:
483 483 new_header = REVLOG_DEFAULT_VERSION
484 484
485 485 if b'chunkcachesize' in opts:
486 486 self._chunkcachesize = opts[b'chunkcachesize']
487 487 if b'maxchainlen' in opts:
488 488 self._maxchainlen = opts[b'maxchainlen']
489 489 if b'deltabothparents' in opts:
490 490 self._deltabothparents = opts[b'deltabothparents']
491 491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 492 self._lazydeltabase = False
493 493 if self._lazydelta:
494 494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 495 if b'compengine' in opts:
496 496 self._compengine = opts[b'compengine']
497 497 if b'zlib.level' in opts:
498 498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 499 if b'zstd.level' in opts:
500 500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 501 if b'maxdeltachainspan' in opts:
502 502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 507 # sparse-revlog forces sparse-read
508 508 self._withsparseread = self._sparserevlog or withsparseread
509 509 if b'sparse-read-density-threshold' in opts:
510 510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 511 if b'sparse-read-min-gap-size' in opts:
512 512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 513 if opts.get(b'enableellipsis'):
514 514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515 515
516 516 # revlog v0 doesn't have flag processors
517 517 for flag, processor in pycompat.iteritems(
518 518 opts.get(b'flagprocessors', {})
519 519 ):
520 520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521 521
522 522 if self._chunkcachesize <= 0:
523 523 raise error.RevlogError(
524 524 _(b'revlog chunk cache size %r is not greater than 0')
525 525 % self._chunkcachesize
526 526 )
527 527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 528 raise error.RevlogError(
529 529 _(b'revlog chunk cache size %r is not a power of 2')
530 530 % self._chunkcachesize
531 531 )
532 532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 533 return new_header, mmapindexthreshold, force_nodemap
534 534
535 535 def _get_data(self, filepath, mmap_threshold, size=None):
536 536 """return a file content with or without mmap
537 537
538 538 If the file is missing return the empty string"""
539 539 try:
540 540 with self.opener(filepath) as fp:
541 541 if mmap_threshold is not None:
542 542 file_size = self.opener.fstat(fp).st_size
543 543 if file_size >= mmap_threshold:
544 544 if size is not None:
545 545 # avoid potentiel mmap crash
546 546 size = min(file_size, size)
547 547 # TODO: should .close() to release resources without
548 548 # relying on Python GC
549 549 if size is None:
550 550 return util.buffer(util.mmapread(fp))
551 551 else:
552 552 return util.buffer(util.mmapread(fp, size))
553 553 if size is None:
554 554 return fp.read()
555 555 else:
556 556 return fp.read(size)
557 557 except IOError as inst:
558 558 if inst.errno != errno.ENOENT:
559 559 raise
560 560 return b''
561 561
562 562 def _loadindex(self):
563 563
564 564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565 565
566 566 if self.postfix is not None:
567 567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 569 entry_point = b'%s.i.a' % self.radix
570 570 else:
571 571 entry_point = b'%s.i' % self.radix
572 572
573 573 entry_data = b''
574 574 self._initempty = True
575 575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 576 if len(entry_data) > 0:
577 577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 578 self._initempty = False
579 579 else:
580 580 header = new_header
581 581
582 582 self._format_flags = header & ~0xFFFF
583 583 self._format_version = header & 0xFFFF
584 584
585 585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 586 if supported_flags is None:
587 587 msg = _(b'unknown version (%d) in revlog %s')
588 588 msg %= (self._format_version, self.display_id)
589 589 raise error.RevlogError(msg)
590 590 elif self._format_flags & ~supported_flags:
591 591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 display_flag = self._format_flags >> 16
593 593 msg %= (display_flag, self._format_version, self.display_id)
594 594 raise error.RevlogError(msg)
595 595
596 596 features = FEATURES_BY_VERSION[self._format_version]
597 597 self._inline = features[b'inline'](self._format_flags)
598 598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 599 self.hassidedata = features[b'sidedata']
600 600
601 601 if not features[b'docket']:
602 602 self._indexfile = entry_point
603 603 index_data = entry_data
604 604 else:
605 605 self._docket_file = entry_point
606 606 if self._initempty:
607 607 self._docket = docketutil.default_docket(self, header)
608 608 else:
609 609 self._docket = docketutil.parse_docket(
610 610 self, entry_data, use_pending=self._trypending
611 611 )
612 612 self._indexfile = self._docket.index_filepath()
613 613 index_data = b''
614 614 index_size = self._docket.index_end
615 615 if index_size > 0:
616 616 index_data = self._get_data(
617 617 self._indexfile, mmapindexthreshold, size=index_size
618 618 )
619 619 if len(index_data) < index_size:
620 620 msg = _(b'too few index data for %s: got %d, expected %d')
621 621 msg %= (self.display_id, len(index_data), index_size)
622 622 raise error.RevlogError(msg)
623 623
624 624 self._inline = False
625 625 # generaldelta implied by version 2 revlogs.
626 626 self._generaldelta = True
627 627 # the logic for persistent nodemap will be dealt with within the
628 628 # main docket, so disable it for now.
629 629 self._nodemap_file = None
630 630
631 631 if self._docket is not None:
632 632 self._datafile = self._docket.data_filepath()
633 633 elif self.postfix is None:
634 634 self._datafile = b'%s.d' % self.radix
635 635 else:
636 636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
637 637
638 638 self.nodeconstants = sha1nodeconstants
639 639 self.nullid = self.nodeconstants.nullid
640 640
641 641 # sparse-revlog can't be on without general-delta (issue6056)
642 642 if not self._generaldelta:
643 643 self._sparserevlog = False
644 644
645 645 self._storedeltachains = True
646 646
647 647 devel_nodemap = (
648 648 self._nodemap_file
649 649 and force_nodemap
650 650 and parse_index_v1_nodemap is not None
651 651 )
652 652
653 653 use_rust_index = False
654 654 if rustrevlog is not None:
655 655 if self._nodemap_file is not None:
656 656 use_rust_index = True
657 657 else:
658 658 use_rust_index = self.opener.options.get(b'rust.index')
659 659
660 660 self._parse_index = parse_index_v1
661 661 if self._format_version == REVLOGV0:
662 662 self._parse_index = revlogv0.parse_index_v0
663 663 elif self._format_version == REVLOGV2:
664 664 self._parse_index = parse_index_v2
665 665 elif self._format_version == CHANGELOGV2:
666 666 self._parse_index = parse_index_cl_v2
667 667 elif devel_nodemap:
668 668 self._parse_index = parse_index_v1_nodemap
669 669 elif use_rust_index:
670 670 self._parse_index = parse_index_v1_mixed
671 671 try:
672 672 d = self._parse_index(index_data, self._inline)
673 673 index, _chunkcache = d
674 674 use_nodemap = (
675 675 not self._inline
676 676 and self._nodemap_file is not None
677 677 and util.safehasattr(index, 'update_nodemap_data')
678 678 )
679 679 if use_nodemap:
680 680 nodemap_data = nodemaputil.persisted_data(self)
681 681 if nodemap_data is not None:
682 682 docket = nodemap_data[0]
683 683 if (
684 684 len(d[0]) > docket.tip_rev
685 685 and d[0][docket.tip_rev][7] == docket.tip_node
686 686 ):
687 687 # no changelog tampering
688 688 self._nodemap_docket = docket
689 689 index.update_nodemap_data(*nodemap_data)
690 690 except (ValueError, IndexError):
691 691 raise error.RevlogError(
692 692 _(b"index %s is corrupted") % self.display_id
693 693 )
694 694 self.index, self._chunkcache = d
695 695 if not self._chunkcache:
696 696 self._chunkclear()
697 697 # revnum -> (chain-length, sum-delta-length)
698 698 self._chaininfocache = util.lrucachedict(500)
699 699 # revlog header -> revlog compressor
700 700 self._decompressors = {}
701 701
702 702 @util.propertycache
703 703 def revlog_kind(self):
704 704 return self.target[0]
705 705
706 706 @util.propertycache
707 707 def display_id(self):
708 708 """The public facing "ID" of the revlog that we use in message"""
709 709 # Maybe we should build a user facing representation of
710 710 # revlog.target instead of using `self.radix`
711 711 return self.radix
712 712
713 713 def _get_decompressor(self, t):
714 714 try:
715 715 compressor = self._decompressors[t]
716 716 except KeyError:
717 717 try:
718 718 engine = util.compengines.forrevlogheader(t)
719 719 compressor = engine.revlogcompressor(self._compengineopts)
720 720 self._decompressors[t] = compressor
721 721 except KeyError:
722 722 raise error.RevlogError(
723 723 _(b'unknown compression type %s') % binascii.hexlify(t)
724 724 )
725 725 return compressor
726 726
727 727 @util.propertycache
728 728 def _compressor(self):
729 729 engine = util.compengines[self._compengine]
730 730 return engine.revlogcompressor(self._compengineopts)
731 731
732 732 @util.propertycache
733 733 def _decompressor(self):
734 734 """the default decompressor"""
735 735 if self._docket is None:
736 736 return None
737 737 t = self._docket.default_compression_header
738 738 c = self._get_decompressor(t)
739 739 return c.decompress
740 740
741 741 def _indexfp(self):
742 742 """file object for the revlog's index file"""
743 743 return self.opener(self._indexfile, mode=b"r")
744 744
745 745 def __index_write_fp(self):
746 746 # You should not use this directly and use `_writing` instead
747 747 try:
748 748 f = self.opener(
749 749 self._indexfile, mode=b"r+", checkambig=self._checkambig
750 750 )
751 751 if self._docket is None:
752 752 f.seek(0, os.SEEK_END)
753 753 else:
754 754 f.seek(self._docket.index_end, os.SEEK_SET)
755 755 return f
756 756 except IOError as inst:
757 757 if inst.errno != errno.ENOENT:
758 758 raise
759 759 return self.opener(
760 760 self._indexfile, mode=b"w+", checkambig=self._checkambig
761 761 )
762 762
763 763 def __index_new_fp(self):
764 764 # You should not use this unless you are upgrading from inline revlog
765 765 return self.opener(
766 766 self._indexfile,
767 767 mode=b"w",
768 768 checkambig=self._checkambig,
769 769 atomictemp=True,
770 770 )
771 771
772 772 def _datafp(self, mode=b'r'):
773 773 """file object for the revlog's data file"""
774 774 return self.opener(self._datafile, mode=mode)
775 775
776 776 @contextlib.contextmanager
777 777 def _datareadfp(self, existingfp=None):
778 778 """file object suitable to read data"""
779 779 # Use explicit file handle, if given.
780 780 if existingfp is not None:
781 781 yield existingfp
782 782
783 783 # Use a file handle being actively used for writes, if available.
784 784 # There is some danger to doing this because reads will seek the
785 785 # file. However, _writeentry() performs a SEEK_END before all writes,
786 786 # so we should be safe.
787 787 elif self._writinghandles:
788 788 if self._inline:
789 789 yield self._writinghandles[0]
790 790 else:
791 791 yield self._writinghandles[1]
792 792
793 793 # Otherwise open a new file handle.
794 794 else:
795 795 if self._inline:
796 796 func = self._indexfp
797 797 else:
798 798 func = self._datafp
799 799 with func() as fp:
800 800 yield fp
801 801
802 802 def tiprev(self):
803 803 return len(self.index) - 1
804 804
805 805 def tip(self):
806 806 return self.node(self.tiprev())
807 807
808 808 def __contains__(self, rev):
809 809 return 0 <= rev < len(self)
810 810
811 811 def __len__(self):
812 812 return len(self.index)
813 813
814 814 def __iter__(self):
815 815 return iter(pycompat.xrange(len(self)))
816 816
817 817 def revs(self, start=0, stop=None):
818 818 """iterate over all rev in this revlog (from start to stop)"""
819 819 return storageutil.iterrevs(len(self), start=start, stop=stop)
820 820
821 821 @property
822 822 def nodemap(self):
823 823 msg = (
824 824 b"revlog.nodemap is deprecated, "
825 825 b"use revlog.index.[has_node|rev|get_rev]"
826 826 )
827 827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
828 828 return self.index.nodemap
829 829
830 830 @property
831 831 def _nodecache(self):
832 832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
833 833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
834 834 return self.index.nodemap
835 835
836 836 def hasnode(self, node):
837 837 try:
838 838 self.rev(node)
839 839 return True
840 840 except KeyError:
841 841 return False
842 842
843 843 def candelta(self, baserev, rev):
844 844 """whether two revisions (baserev, rev) can be delta-ed or not"""
845 845 # Disable delta if either rev requires a content-changing flag
846 846 # processor (ex. LFS). This is because such flag processor can alter
847 847 # the rawtext content that the delta will be based on, and two clients
848 848 # could have a same revlog node with different flags (i.e. different
849 849 # rawtext contents) and the delta could be incompatible.
850 850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
851 851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
852 852 ):
853 853 return False
854 854 return True
855 855
856 856 def update_caches(self, transaction):
857 857 if self._nodemap_file is not None:
858 858 if transaction is None:
859 859 nodemaputil.update_persistent_nodemap(self)
860 860 else:
861 861 nodemaputil.setup_persistent_nodemap(transaction, self)
862 862
863 863 def clearcaches(self):
864 864 self._revisioncache = None
865 865 self._chainbasecache.clear()
866 866 self._chunkcache = (0, b'')
867 867 self._pcache = {}
868 868 self._nodemap_docket = None
869 869 self.index.clearcaches()
870 870 # The python code is the one responsible for validating the docket, we
871 871 # end up having to refresh it here.
872 872 use_nodemap = (
873 873 not self._inline
874 874 and self._nodemap_file is not None
875 875 and util.safehasattr(self.index, 'update_nodemap_data')
876 876 )
877 877 if use_nodemap:
878 878 nodemap_data = nodemaputil.persisted_data(self)
879 879 if nodemap_data is not None:
880 880 self._nodemap_docket = nodemap_data[0]
881 881 self.index.update_nodemap_data(*nodemap_data)
882 882
883 883 def rev(self, node):
884 884 try:
885 885 return self.index.rev(node)
886 886 except TypeError:
887 887 raise
888 888 except error.RevlogError:
889 889 # parsers.c radix tree lookup failed
890 890 if (
891 891 node == self.nodeconstants.wdirid
892 892 or node in self.nodeconstants.wdirfilenodeids
893 893 ):
894 894 raise error.WdirUnsupported
895 895 raise error.LookupError(node, self.display_id, _(b'no node'))
896 896
897 897 # Accessors for index entries.
898 898
899 899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
900 900 # are flags.
901 901 def start(self, rev):
902 902 return int(self.index[rev][0] >> 16)
903 903
904 904 def flags(self, rev):
905 905 return self.index[rev][0] & 0xFFFF
906 906
907 907 def length(self, rev):
908 908 return self.index[rev][1]
909 909
910 910 def sidedata_length(self, rev):
911 911 if not self.hassidedata:
912 912 return 0
913 913 return self.index[rev][9]
914 914
915 915 def rawsize(self, rev):
916 916 """return the length of the uncompressed text for a given revision"""
917 917 l = self.index[rev][2]
918 918 if l >= 0:
919 919 return l
920 920
921 921 t = self.rawdata(rev)
922 922 return len(t)
923 923
924 924 def size(self, rev):
925 925 """length of non-raw text (processed by a "read" flag processor)"""
926 926 # fast path: if no "read" flag processor could change the content,
927 927 # size is rawsize. note: ELLIPSIS is known to not change the content.
928 928 flags = self.flags(rev)
929 929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
930 930 return self.rawsize(rev)
931 931
932 932 return len(self.revision(rev, raw=False))
933 933
934 934 def chainbase(self, rev):
935 935 base = self._chainbasecache.get(rev)
936 936 if base is not None:
937 937 return base
938 938
939 939 index = self.index
940 940 iterrev = rev
941 941 base = index[iterrev][3]
942 942 while base != iterrev:
943 943 iterrev = base
944 944 base = index[iterrev][3]
945 945
946 946 self._chainbasecache[rev] = base
947 947 return base
948 948
949 949 def linkrev(self, rev):
950 950 return self.index[rev][4]
951 951
952 952 def parentrevs(self, rev):
953 953 try:
954 954 entry = self.index[rev]
955 955 except IndexError:
956 956 if rev == wdirrev:
957 957 raise error.WdirUnsupported
958 958 raise
959 959 if entry[5] == nullrev:
960 960 return entry[6], entry[5]
961 961 else:
962 962 return entry[5], entry[6]
963 963
964 964 # fast parentrevs(rev) where rev isn't filtered
965 965 _uncheckedparentrevs = parentrevs
966 966
967 967 def node(self, rev):
968 968 try:
969 969 return self.index[rev][7]
970 970 except IndexError:
971 971 if rev == wdirrev:
972 972 raise error.WdirUnsupported
973 973 raise
974 974
975 975 # Derived from index values.
976 976
977 977 def end(self, rev):
978 978 return self.start(rev) + self.length(rev)
979 979
980 980 def parents(self, node):
981 981 i = self.index
982 982 d = i[self.rev(node)]
983 983 # inline node() to avoid function call overhead
984 984 if d[5] == self.nullid:
985 985 return i[d[6]][7], i[d[5]][7]
986 986 else:
987 987 return i[d[5]][7], i[d[6]][7]
988 988
989 989 def chainlen(self, rev):
990 990 return self._chaininfo(rev)[0]
991 991
992 992 def _chaininfo(self, rev):
993 993 chaininfocache = self._chaininfocache
994 994 if rev in chaininfocache:
995 995 return chaininfocache[rev]
996 996 index = self.index
997 997 generaldelta = self._generaldelta
998 998 iterrev = rev
999 999 e = index[iterrev]
1000 1000 clen = 0
1001 1001 compresseddeltalen = 0
1002 1002 while iterrev != e[3]:
1003 1003 clen += 1
1004 1004 compresseddeltalen += e[1]
1005 1005 if generaldelta:
1006 1006 iterrev = e[3]
1007 1007 else:
1008 1008 iterrev -= 1
1009 1009 if iterrev in chaininfocache:
1010 1010 t = chaininfocache[iterrev]
1011 1011 clen += t[0]
1012 1012 compresseddeltalen += t[1]
1013 1013 break
1014 1014 e = index[iterrev]
1015 1015 else:
1016 1016 # Add text length of base since decompressing that also takes
1017 1017 # work. For cache hits the length is already included.
1018 1018 compresseddeltalen += e[1]
1019 1019 r = (clen, compresseddeltalen)
1020 1020 chaininfocache[rev] = r
1021 1021 return r
1022 1022
1023 1023 def _deltachain(self, rev, stoprev=None):
1024 1024 """Obtain the delta chain for a revision.
1025 1025
1026 1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 1027 stop at the base of the chain.
1028 1028
1029 1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 1030 revs in ascending order and ``stopped`` is a bool indicating whether
1031 1031 ``stoprev`` was hit.
1032 1032 """
1033 1033 # Try C implementation.
1034 1034 try:
1035 1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 1036 except AttributeError:
1037 1037 pass
1038 1038
1039 1039 chain = []
1040 1040
1041 1041 # Alias to prevent attribute lookup in tight loop.
1042 1042 index = self.index
1043 1043 generaldelta = self._generaldelta
1044 1044
1045 1045 iterrev = rev
1046 1046 e = index[iterrev]
1047 1047 while iterrev != e[3] and iterrev != stoprev:
1048 1048 chain.append(iterrev)
1049 1049 if generaldelta:
1050 1050 iterrev = e[3]
1051 1051 else:
1052 1052 iterrev -= 1
1053 1053 e = index[iterrev]
1054 1054
1055 1055 if iterrev == stoprev:
1056 1056 stopped = True
1057 1057 else:
1058 1058 chain.append(iterrev)
1059 1059 stopped = False
1060 1060
1061 1061 chain.reverse()
1062 1062 return chain, stopped
1063 1063
1064 1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 1065 """Generate the ancestors of 'revs' in reverse revision order.
1066 1066 Does not generate revs lower than stoprev.
1067 1067
1068 1068 See the documentation for ancestor.lazyancestors for more details."""
1069 1069
1070 1070 # first, make sure start revisions aren't filtered
1071 1071 revs = list(revs)
1072 1072 checkrev = self.node
1073 1073 for r in revs:
1074 1074 checkrev(r)
1075 1075 # and we're sure ancestors aren't filtered as well
1076 1076
1077 1077 if rustancestor is not None and self.index.rust_ext_compat:
1078 1078 lazyancestors = rustancestor.LazyAncestors
1079 1079 arg = self.index
1080 1080 else:
1081 1081 lazyancestors = ancestor.lazyancestors
1082 1082 arg = self._uncheckedparentrevs
1083 1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1084 1084
1085 1085 def descendants(self, revs):
1086 1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1087 1087
1088 1088 def findcommonmissing(self, common=None, heads=None):
1089 1089 """Return a tuple of the ancestors of common and the ancestors of heads
1090 1090 that are not ancestors of common. In revset terminology, we return the
1091 1091 tuple:
1092 1092
1093 1093 ::common, (::heads) - (::common)
1094 1094
1095 1095 The list is sorted by revision number, meaning it is
1096 1096 topologically sorted.
1097 1097
1098 1098 'heads' and 'common' are both lists of node IDs. If heads is
1099 1099 not supplied, uses all of the revlog's heads. If common is not
1100 1100 supplied, uses nullid."""
1101 1101 if common is None:
1102 1102 common = [self.nullid]
1103 1103 if heads is None:
1104 1104 heads = self.heads()
1105 1105
1106 1106 common = [self.rev(n) for n in common]
1107 1107 heads = [self.rev(n) for n in heads]
1108 1108
1109 1109 # we want the ancestors, but inclusive
1110 1110 class lazyset(object):
1111 1111 def __init__(self, lazyvalues):
1112 1112 self.addedvalues = set()
1113 1113 self.lazyvalues = lazyvalues
1114 1114
1115 1115 def __contains__(self, value):
1116 1116 return value in self.addedvalues or value in self.lazyvalues
1117 1117
1118 1118 def __iter__(self):
1119 1119 added = self.addedvalues
1120 1120 for r in added:
1121 1121 yield r
1122 1122 for r in self.lazyvalues:
1123 1123 if not r in added:
1124 1124 yield r
1125 1125
1126 1126 def add(self, value):
1127 1127 self.addedvalues.add(value)
1128 1128
1129 1129 def update(self, values):
1130 1130 self.addedvalues.update(values)
1131 1131
1132 1132 has = lazyset(self.ancestors(common))
1133 1133 has.add(nullrev)
1134 1134 has.update(common)
1135 1135
1136 1136 # take all ancestors from heads that aren't in has
1137 1137 missing = set()
1138 1138 visit = collections.deque(r for r in heads if r not in has)
1139 1139 while visit:
1140 1140 r = visit.popleft()
1141 1141 if r in missing:
1142 1142 continue
1143 1143 else:
1144 1144 missing.add(r)
1145 1145 for p in self.parentrevs(r):
1146 1146 if p not in has:
1147 1147 visit.append(p)
1148 1148 missing = list(missing)
1149 1149 missing.sort()
1150 1150 return has, [self.node(miss) for miss in missing]
1151 1151
1152 1152 def incrementalmissingrevs(self, common=None):
1153 1153 """Return an object that can be used to incrementally compute the
1154 1154 revision numbers of the ancestors of arbitrary sets that are not
1155 1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1156 1156 object.
1157 1157
1158 1158 'common' is a list of revision numbers. If common is not supplied, uses
1159 1159 nullrev.
1160 1160 """
1161 1161 if common is None:
1162 1162 common = [nullrev]
1163 1163
1164 1164 if rustancestor is not None and self.index.rust_ext_compat:
1165 1165 return rustancestor.MissingAncestors(self.index, common)
1166 1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1167 1167
1168 1168 def findmissingrevs(self, common=None, heads=None):
1169 1169 """Return the revision numbers of the ancestors of heads that
1170 1170 are not ancestors of common.
1171 1171
1172 1172 More specifically, return a list of revision numbers corresponding to
1173 1173 nodes N such that every N satisfies the following constraints:
1174 1174
1175 1175 1. N is an ancestor of some node in 'heads'
1176 1176 2. N is not an ancestor of any node in 'common'
1177 1177
1178 1178 The list is sorted by revision number, meaning it is
1179 1179 topologically sorted.
1180 1180
1181 1181 'heads' and 'common' are both lists of revision numbers. If heads is
1182 1182 not supplied, uses all of the revlog's heads. If common is not
1183 1183 supplied, uses nullid."""
1184 1184 if common is None:
1185 1185 common = [nullrev]
1186 1186 if heads is None:
1187 1187 heads = self.headrevs()
1188 1188
1189 1189 inc = self.incrementalmissingrevs(common=common)
1190 1190 return inc.missingancestors(heads)
1191 1191
1192 1192 def findmissing(self, common=None, heads=None):
1193 1193 """Return the ancestors of heads that are not ancestors of common.
1194 1194
1195 1195 More specifically, return a list of nodes N such that every N
1196 1196 satisfies the following constraints:
1197 1197
1198 1198 1. N is an ancestor of some node in 'heads'
1199 1199 2. N is not an ancestor of any node in 'common'
1200 1200
1201 1201 The list is sorted by revision number, meaning it is
1202 1202 topologically sorted.
1203 1203
1204 1204 'heads' and 'common' are both lists of node IDs. If heads is
1205 1205 not supplied, uses all of the revlog's heads. If common is not
1206 1206 supplied, uses nullid."""
1207 1207 if common is None:
1208 1208 common = [self.nullid]
1209 1209 if heads is None:
1210 1210 heads = self.heads()
1211 1211
1212 1212 common = [self.rev(n) for n in common]
1213 1213 heads = [self.rev(n) for n in heads]
1214 1214
1215 1215 inc = self.incrementalmissingrevs(common=common)
1216 1216 return [self.node(r) for r in inc.missingancestors(heads)]
1217 1217
1218 1218 def nodesbetween(self, roots=None, heads=None):
1219 1219 """Return a topological path from 'roots' to 'heads'.
1220 1220
1221 1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1222 1222 topologically sorted list of all nodes N that satisfy both of
1223 1223 these constraints:
1224 1224
1225 1225 1. N is a descendant of some node in 'roots'
1226 1226 2. N is an ancestor of some node in 'heads'
1227 1227
1228 1228 Every node is considered to be both a descendant and an ancestor
1229 1229 of itself, so every reachable node in 'roots' and 'heads' will be
1230 1230 included in 'nodes'.
1231 1231
1232 1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1233 1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1234 1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1235 1235
1236 1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1237 1237 unspecified, uses nullid as the only root. If 'heads' is
1238 1238 unspecified, uses list of all of the revlog's heads."""
1239 1239 nonodes = ([], [], [])
1240 1240 if roots is not None:
1241 1241 roots = list(roots)
1242 1242 if not roots:
1243 1243 return nonodes
1244 1244 lowestrev = min([self.rev(n) for n in roots])
1245 1245 else:
1246 1246 roots = [self.nullid] # Everybody's a descendant of nullid
1247 1247 lowestrev = nullrev
1248 1248 if (lowestrev == nullrev) and (heads is None):
1249 1249 # We want _all_ the nodes!
1250 1250 return (
1251 1251 [self.node(r) for r in self],
1252 1252 [self.nullid],
1253 1253 list(self.heads()),
1254 1254 )
1255 1255 if heads is None:
1256 1256 # All nodes are ancestors, so the latest ancestor is the last
1257 1257 # node.
1258 1258 highestrev = len(self) - 1
1259 1259 # Set ancestors to None to signal that every node is an ancestor.
1260 1260 ancestors = None
1261 1261 # Set heads to an empty dictionary for later discovery of heads
1262 1262 heads = {}
1263 1263 else:
1264 1264 heads = list(heads)
1265 1265 if not heads:
1266 1266 return nonodes
1267 1267 ancestors = set()
1268 1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 1269 # Also, later we will be using it to filter out the heads we can't
1270 1270 # find from roots.
1271 1271 heads = dict.fromkeys(heads, False)
1272 1272 # Start at the top and keep marking parents until we're done.
1273 1273 nodestotag = set(heads)
1274 1274 # Remember where the top was so we can use it as a limit later.
1275 1275 highestrev = max([self.rev(n) for n in nodestotag])
1276 1276 while nodestotag:
1277 1277 # grab a node to tag
1278 1278 n = nodestotag.pop()
1279 1279 # Never tag nullid
1280 1280 if n == self.nullid:
1281 1281 continue
1282 1282 # A node's revision number represents its place in a
1283 1283 # topologically sorted list of nodes.
1284 1284 r = self.rev(n)
1285 1285 if r >= lowestrev:
1286 1286 if n not in ancestors:
1287 1287 # If we are possibly a descendant of one of the roots
1288 1288 # and we haven't already been marked as an ancestor
1289 1289 ancestors.add(n) # Mark as ancestor
1290 1290 # Add non-nullid parents to list of nodes to tag.
1291 1291 nodestotag.update(
1292 1292 [p for p in self.parents(n) if p != self.nullid]
1293 1293 )
1294 1294 elif n in heads: # We've seen it before, is it a fake head?
1295 1295 # So it is, real heads should not be the ancestors of
1296 1296 # any other heads.
1297 1297 heads.pop(n)
1298 1298 if not ancestors:
1299 1299 return nonodes
1300 1300 # Now that we have our set of ancestors, we want to remove any
1301 1301 # roots that are not ancestors.
1302 1302
1303 1303 # If one of the roots was nullid, everything is included anyway.
1304 1304 if lowestrev > nullrev:
1305 1305 # But, since we weren't, let's recompute the lowest rev to not
1306 1306 # include roots that aren't ancestors.
1307 1307
1308 1308 # Filter out roots that aren't ancestors of heads
1309 1309 roots = [root for root in roots if root in ancestors]
1310 1310 # Recompute the lowest revision
1311 1311 if roots:
1312 1312 lowestrev = min([self.rev(root) for root in roots])
1313 1313 else:
1314 1314 # No more roots? Return empty list
1315 1315 return nonodes
1316 1316 else:
1317 1317 # We are descending from nullid, and don't need to care about
1318 1318 # any other roots.
1319 1319 lowestrev = nullrev
1320 1320 roots = [self.nullid]
1321 1321 # Transform our roots list into a set.
1322 1322 descendants = set(roots)
1323 1323 # Also, keep the original roots so we can filter out roots that aren't
1324 1324 # 'real' roots (i.e. are descended from other roots).
1325 1325 roots = descendants.copy()
1326 1326 # Our topologically sorted list of output nodes.
1327 1327 orderedout = []
1328 1328 # Don't start at nullid since we don't want nullid in our output list,
1329 1329 # and if nullid shows up in descendants, empty parents will look like
1330 1330 # they're descendants.
1331 1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1332 1332 n = self.node(r)
1333 1333 isdescendant = False
1334 1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1335 1335 isdescendant = True
1336 1336 elif n in descendants:
1337 1337 # n is already a descendant
1338 1338 isdescendant = True
1339 1339 # This check only needs to be done here because all the roots
1340 1340 # will start being marked is descendants before the loop.
1341 1341 if n in roots:
1342 1342 # If n was a root, check if it's a 'real' root.
1343 1343 p = tuple(self.parents(n))
1344 1344 # If any of its parents are descendants, it's not a root.
1345 1345 if (p[0] in descendants) or (p[1] in descendants):
1346 1346 roots.remove(n)
1347 1347 else:
1348 1348 p = tuple(self.parents(n))
1349 1349 # A node is a descendant if either of its parents are
1350 1350 # descendants. (We seeded the dependents list with the roots
1351 1351 # up there, remember?)
1352 1352 if (p[0] in descendants) or (p[1] in descendants):
1353 1353 descendants.add(n)
1354 1354 isdescendant = True
1355 1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1356 1356 # Only include nodes that are both descendants and ancestors.
1357 1357 orderedout.append(n)
1358 1358 if (ancestors is not None) and (n in heads):
1359 1359 # We're trying to figure out which heads are reachable
1360 1360 # from roots.
1361 1361 # Mark this head as having been reached
1362 1362 heads[n] = True
1363 1363 elif ancestors is None:
1364 1364 # Otherwise, we're trying to discover the heads.
1365 1365 # Assume this is a head because if it isn't, the next step
1366 1366 # will eventually remove it.
1367 1367 heads[n] = True
1368 1368 # But, obviously its parents aren't.
1369 1369 for p in self.parents(n):
1370 1370 heads.pop(p, None)
1371 1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1372 1372 roots = list(roots)
1373 1373 assert orderedout
1374 1374 assert roots
1375 1375 assert heads
1376 1376 return (orderedout, roots, heads)
1377 1377
1378 1378 def headrevs(self, revs=None):
1379 1379 if revs is None:
1380 1380 try:
1381 1381 return self.index.headrevs()
1382 1382 except AttributeError:
1383 1383 return self._headrevs()
1384 1384 if rustdagop is not None and self.index.rust_ext_compat:
1385 1385 return rustdagop.headrevs(self.index, revs)
1386 1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1387 1387
1388 1388 def computephases(self, roots):
1389 1389 return self.index.computephasesmapsets(roots)
1390 1390
1391 1391 def _headrevs(self):
1392 1392 count = len(self)
1393 1393 if not count:
1394 1394 return [nullrev]
1395 1395 # we won't iter over filtered rev so nobody is a head at start
1396 1396 ishead = [0] * (count + 1)
1397 1397 index = self.index
1398 1398 for r in self:
1399 1399 ishead[r] = 1 # I may be an head
1400 1400 e = index[r]
1401 1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1402 1402 return [r for r, val in enumerate(ishead) if val]
1403 1403
1404 1404 def heads(self, start=None, stop=None):
1405 1405 """return the list of all nodes that have no children
1406 1406
1407 1407 if start is specified, only heads that are descendants of
1408 1408 start will be returned
1409 1409 if stop is specified, it will consider all the revs from stop
1410 1410 as if they had no children
1411 1411 """
1412 1412 if start is None and stop is None:
1413 1413 if not len(self):
1414 1414 return [self.nullid]
1415 1415 return [self.node(r) for r in self.headrevs()]
1416 1416
1417 1417 if start is None:
1418 1418 start = nullrev
1419 1419 else:
1420 1420 start = self.rev(start)
1421 1421
1422 1422 stoprevs = {self.rev(n) for n in stop or []}
1423 1423
1424 1424 revs = dagop.headrevssubset(
1425 1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1426 1426 )
1427 1427
1428 1428 return [self.node(rev) for rev in revs]
1429 1429
1430 1430 def children(self, node):
1431 1431 """find the children of a given node"""
1432 1432 c = []
1433 1433 p = self.rev(node)
1434 1434 for r in self.revs(start=p + 1):
1435 1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1436 1436 if prevs:
1437 1437 for pr in prevs:
1438 1438 if pr == p:
1439 1439 c.append(self.node(r))
1440 1440 elif p == nullrev:
1441 1441 c.append(self.node(r))
1442 1442 return c
1443 1443
1444 1444 def commonancestorsheads(self, a, b):
1445 1445 """calculate all the heads of the common ancestors of nodes a and b"""
1446 1446 a, b = self.rev(a), self.rev(b)
1447 1447 ancs = self._commonancestorsheads(a, b)
1448 1448 return pycompat.maplist(self.node, ancs)
1449 1449
1450 1450 def _commonancestorsheads(self, *revs):
1451 1451 """calculate all the heads of the common ancestors of revs"""
1452 1452 try:
1453 1453 ancs = self.index.commonancestorsheads(*revs)
1454 1454 except (AttributeError, OverflowError): # C implementation failed
1455 1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1456 1456 return ancs
1457 1457
1458 1458 def isancestor(self, a, b):
1459 1459 """return True if node a is an ancestor of node b
1460 1460
1461 1461 A revision is considered an ancestor of itself."""
1462 1462 a, b = self.rev(a), self.rev(b)
1463 1463 return self.isancestorrev(a, b)
1464 1464
1465 1465 def isancestorrev(self, a, b):
1466 1466 """return True if revision a is an ancestor of revision b
1467 1467
1468 1468 A revision is considered an ancestor of itself.
1469 1469
1470 1470 The implementation of this is trivial but the use of
1471 1471 reachableroots is not."""
1472 1472 if a == nullrev:
1473 1473 return True
1474 1474 elif a == b:
1475 1475 return True
1476 1476 elif a > b:
1477 1477 return False
1478 1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1479 1479
1480 1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1481 1481 """return (heads(::(<roots> and <roots>::<heads>)))
1482 1482
1483 1483 If includepath is True, return (<roots>::<heads>)."""
1484 1484 try:
1485 1485 return self.index.reachableroots2(
1486 1486 minroot, heads, roots, includepath
1487 1487 )
1488 1488 except AttributeError:
1489 1489 return dagop._reachablerootspure(
1490 1490 self.parentrevs, minroot, roots, heads, includepath
1491 1491 )
1492 1492
1493 1493 def ancestor(self, a, b):
1494 1494 """calculate the "best" common ancestor of nodes a and b"""
1495 1495
1496 1496 a, b = self.rev(a), self.rev(b)
1497 1497 try:
1498 1498 ancs = self.index.ancestors(a, b)
1499 1499 except (AttributeError, OverflowError):
1500 1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1501 1501 if ancs:
1502 1502 # choose a consistent winner when there's a tie
1503 1503 return min(map(self.node, ancs))
1504 1504 return self.nullid
1505 1505
1506 1506 def _match(self, id):
1507 1507 if isinstance(id, int):
1508 1508 # rev
1509 1509 return self.node(id)
1510 1510 if len(id) == self.nodeconstants.nodelen:
1511 1511 # possibly a binary node
1512 1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1513 1513 try:
1514 1514 node = id
1515 1515 self.rev(node) # quick search the index
1516 1516 return node
1517 1517 except error.LookupError:
1518 1518 pass # may be partial hex id
1519 1519 try:
1520 1520 # str(rev)
1521 1521 rev = int(id)
1522 1522 if b"%d" % rev != id:
1523 1523 raise ValueError
1524 1524 if rev < 0:
1525 1525 rev = len(self) + rev
1526 1526 if rev < 0 or rev >= len(self):
1527 1527 raise ValueError
1528 1528 return self.node(rev)
1529 1529 except (ValueError, OverflowError):
1530 1530 pass
1531 1531 if len(id) == 2 * self.nodeconstants.nodelen:
1532 1532 try:
1533 1533 # a full hex nodeid?
1534 1534 node = bin(id)
1535 1535 self.rev(node)
1536 1536 return node
1537 1537 except (TypeError, error.LookupError):
1538 1538 pass
1539 1539
1540 1540 def _partialmatch(self, id):
1541 1541 # we don't care wdirfilenodeids as they should be always full hash
1542 1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1543 1543 ambiguous = False
1544 1544 try:
1545 1545 partial = self.index.partialmatch(id)
1546 1546 if partial and self.hasnode(partial):
1547 1547 if maybewdir:
1548 1548 # single 'ff...' match in radix tree, ambiguous with wdir
1549 1549 ambiguous = True
1550 1550 else:
1551 1551 return partial
1552 1552 elif maybewdir:
1553 1553 # no 'ff...' match in radix tree, wdir identified
1554 1554 raise error.WdirUnsupported
1555 1555 else:
1556 1556 return None
1557 1557 except error.RevlogError:
1558 1558 # parsers.c radix tree lookup gave multiple matches
1559 1559 # fast path: for unfiltered changelog, radix tree is accurate
1560 1560 if not getattr(self, 'filteredrevs', None):
1561 1561 ambiguous = True
1562 1562 # fall through to slow path that filters hidden revisions
1563 1563 except (AttributeError, ValueError):
1564 1564 # we are pure python, or key was too short to search radix tree
1565 1565 pass
1566 1566 if ambiguous:
1567 1567 raise error.AmbiguousPrefixLookupError(
1568 1568 id, self.display_id, _(b'ambiguous identifier')
1569 1569 )
1570 1570
1571 1571 if id in self._pcache:
1572 1572 return self._pcache[id]
1573 1573
1574 1574 if len(id) <= 40:
1575 1575 try:
1576 1576 # hex(node)[:...]
1577 1577 l = len(id) // 2 # grab an even number of digits
1578 1578 prefix = bin(id[: l * 2])
1579 1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 1580 nl = [
1581 1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1582 1582 ]
1583 1583 if self.nodeconstants.nullhex.startswith(id):
1584 1584 nl.append(self.nullid)
1585 1585 if len(nl) > 0:
1586 1586 if len(nl) == 1 and not maybewdir:
1587 1587 self._pcache[id] = nl[0]
1588 1588 return nl[0]
1589 1589 raise error.AmbiguousPrefixLookupError(
1590 1590 id, self.display_id, _(b'ambiguous identifier')
1591 1591 )
1592 1592 if maybewdir:
1593 1593 raise error.WdirUnsupported
1594 1594 return None
1595 1595 except TypeError:
1596 1596 pass
1597 1597
1598 1598 def lookup(self, id):
1599 1599 """locate a node based on:
1600 1600 - revision number or str(revision number)
1601 1601 - nodeid or subset of hex nodeid
1602 1602 """
1603 1603 n = self._match(id)
1604 1604 if n is not None:
1605 1605 return n
1606 1606 n = self._partialmatch(id)
1607 1607 if n:
1608 1608 return n
1609 1609
1610 1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1611 1611
1612 1612 def shortest(self, node, minlength=1):
1613 1613 """Find the shortest unambiguous prefix that matches node."""
1614 1614
1615 1615 def isvalid(prefix):
1616 1616 try:
1617 1617 matchednode = self._partialmatch(prefix)
1618 1618 except error.AmbiguousPrefixLookupError:
1619 1619 return False
1620 1620 except error.WdirUnsupported:
1621 1621 # single 'ff...' match
1622 1622 return True
1623 1623 if matchednode is None:
1624 1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1625 1625 return True
1626 1626
1627 1627 def maybewdir(prefix):
1628 1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1629 1629
1630 1630 hexnode = hex(node)
1631 1631
1632 1632 def disambiguate(hexnode, minlength):
1633 1633 """Disambiguate against wdirid."""
1634 1634 for length in range(minlength, len(hexnode) + 1):
1635 1635 prefix = hexnode[:length]
1636 1636 if not maybewdir(prefix):
1637 1637 return prefix
1638 1638
1639 1639 if not getattr(self, 'filteredrevs', None):
1640 1640 try:
1641 1641 length = max(self.index.shortest(node), minlength)
1642 1642 return disambiguate(hexnode, length)
1643 1643 except error.RevlogError:
1644 1644 if node != self.nodeconstants.wdirid:
1645 1645 raise error.LookupError(
1646 1646 node, self.display_id, _(b'no node')
1647 1647 )
1648 1648 except AttributeError:
1649 1649 # Fall through to pure code
1650 1650 pass
1651 1651
1652 1652 if node == self.nodeconstants.wdirid:
1653 1653 for length in range(minlength, len(hexnode) + 1):
1654 1654 prefix = hexnode[:length]
1655 1655 if isvalid(prefix):
1656 1656 return prefix
1657 1657
1658 1658 for length in range(minlength, len(hexnode) + 1):
1659 1659 prefix = hexnode[:length]
1660 1660 if isvalid(prefix):
1661 1661 return disambiguate(hexnode, length)
1662 1662
1663 1663 def cmp(self, node, text):
1664 1664 """compare text with a given file revision
1665 1665
1666 1666 returns True if text is different than what is stored.
1667 1667 """
1668 1668 p1, p2 = self.parents(node)
1669 1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1670 1670
1671 1671 def _cachesegment(self, offset, data):
1672 1672 """Add a segment to the revlog cache.
1673 1673
1674 1674 Accepts an absolute offset and the data that is at that location.
1675 1675 """
1676 1676 o, d = self._chunkcache
1677 1677 # try to add to existing cache
1678 1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1679 1679 self._chunkcache = o, d + data
1680 1680 else:
1681 1681 self._chunkcache = offset, data
1682 1682
1683 1683 def _readsegment(self, offset, length, df=None):
1684 1684 """Load a segment of raw data from the revlog.
1685 1685
1686 1686 Accepts an absolute offset, length to read, and an optional existing
1687 1687 file handle to read from.
1688 1688
1689 1689 If an existing file handle is passed, it will be seeked and the
1690 1690 original seek position will NOT be restored.
1691 1691
1692 1692 Returns a str or buffer of raw byte data.
1693 1693
1694 1694 Raises if the requested number of bytes could not be read.
1695 1695 """
1696 1696 # Cache data both forward and backward around the requested
1697 1697 # data, in a fixed size window. This helps speed up operations
1698 1698 # involving reading the revlog backwards.
1699 1699 cachesize = self._chunkcachesize
1700 1700 realoffset = offset & ~(cachesize - 1)
1701 1701 reallength = (
1702 1702 (offset + length + cachesize) & ~(cachesize - 1)
1703 1703 ) - realoffset
1704 1704 with self._datareadfp(df) as df:
1705 1705 df.seek(realoffset)
1706 1706 d = df.read(reallength)
1707 1707
1708 1708 self._cachesegment(realoffset, d)
1709 1709 if offset != realoffset or reallength != length:
1710 1710 startoffset = offset - realoffset
1711 1711 if len(d) - startoffset < length:
1712 1712 raise error.RevlogError(
1713 1713 _(
1714 1714 b'partial read of revlog %s; expected %d bytes from '
1715 1715 b'offset %d, got %d'
1716 1716 )
1717 1717 % (
1718 1718 self._indexfile if self._inline else self._datafile,
1719 1719 length,
1720 1720 offset,
1721 1721 len(d) - startoffset,
1722 1722 )
1723 1723 )
1724 1724
1725 1725 return util.buffer(d, startoffset, length)
1726 1726
1727 1727 if len(d) < length:
1728 1728 raise error.RevlogError(
1729 1729 _(
1730 1730 b'partial read of revlog %s; expected %d bytes from offset '
1731 1731 b'%d, got %d'
1732 1732 )
1733 1733 % (
1734 1734 self._indexfile if self._inline else self._datafile,
1735 1735 length,
1736 1736 offset,
1737 1737 len(d),
1738 1738 )
1739 1739 )
1740 1740
1741 1741 return d
1742 1742
1743 1743 def _getsegment(self, offset, length, df=None):
1744 1744 """Obtain a segment of raw data from the revlog.
1745 1745
1746 1746 Accepts an absolute offset, length of bytes to obtain, and an
1747 1747 optional file handle to the already-opened revlog. If the file
1748 1748 handle is used, it's original seek position will not be preserved.
1749 1749
1750 1750 Requests for data may be returned from a cache.
1751 1751
1752 1752 Returns a str or a buffer instance of raw byte data.
1753 1753 """
1754 1754 o, d = self._chunkcache
1755 1755 l = len(d)
1756 1756
1757 1757 # is it in the cache?
1758 1758 cachestart = offset - o
1759 1759 cacheend = cachestart + length
1760 1760 if cachestart >= 0 and cacheend <= l:
1761 1761 if cachestart == 0 and cacheend == l:
1762 1762 return d # avoid a copy
1763 1763 return util.buffer(d, cachestart, cacheend - cachestart)
1764 1764
1765 1765 return self._readsegment(offset, length, df=df)
1766 1766
1767 1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 1768 """Obtain a segment of raw data corresponding to a range of revisions.
1769 1769
1770 1770 Accepts the start and end revisions and an optional already-open
1771 1771 file handle to be used for reading. If the file handle is read, its
1772 1772 seek position will not be preserved.
1773 1773
1774 1774 Requests for data may be satisfied by a cache.
1775 1775
1776 1776 Returns a 2-tuple of (offset, data) for the requested range of
1777 1777 revisions. Offset is the integer offset from the beginning of the
1778 1778 revlog and data is a str or buffer of the raw byte data.
1779 1779
1780 1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 1781 to determine where each revision's data begins and ends.
1782 1782 """
1783 1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 1784 # (functions are expensive).
1785 1785 index = self.index
1786 1786 istart = index[startrev]
1787 1787 start = int(istart[0] >> 16)
1788 1788 if startrev == endrev:
1789 1789 end = start + istart[1]
1790 1790 else:
1791 1791 iend = index[endrev]
1792 1792 end = int(iend[0] >> 16) + iend[1]
1793 1793
1794 1794 if self._inline:
1795 1795 start += (startrev + 1) * self.index.entry_size
1796 1796 end += (endrev + 1) * self.index.entry_size
1797 1797 length = end - start
1798 1798
1799 1799 return start, self._getsegment(start, length, df=df)
1800 1800
1801 1801 def _chunk(self, rev, df=None):
1802 1802 """Obtain a single decompressed chunk for a revision.
1803 1803
1804 1804 Accepts an integer revision and an optional already-open file handle
1805 1805 to be used for reading. If used, the seek position of the file will not
1806 1806 be preserved.
1807 1807
1808 1808 Returns a str holding uncompressed data for the requested revision.
1809 1809 """
1810 1810 compression_mode = self.index[rev][10]
1811 1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 1812 if compression_mode == COMP_MODE_PLAIN:
1813 1813 return data
1814 1814 elif compression_mode == COMP_MODE_DEFAULT:
1815 1815 return self._decompressor(data)
1816 1816 elif compression_mode == COMP_MODE_INLINE:
1817 1817 return self.decompress(data)
1818 1818 else:
1819 1819 msg = 'unknown compression mode %d'
1820 1820 msg %= compression_mode
1821 1821 raise error.RevlogError(msg)
1822 1822
1823 1823 def _chunks(self, revs, df=None, targetsize=None):
1824 1824 """Obtain decompressed chunks for the specified revisions.
1825 1825
1826 1826 Accepts an iterable of numeric revisions that are assumed to be in
1827 1827 ascending order. Also accepts an optional already-open file handle
1828 1828 to be used for reading. If used, the seek position of the file will
1829 1829 not be preserved.
1830 1830
1831 1831 This function is similar to calling ``self._chunk()`` multiple times,
1832 1832 but is faster.
1833 1833
1834 1834 Returns a list with decompressed data for each requested revision.
1835 1835 """
1836 1836 if not revs:
1837 1837 return []
1838 1838 start = self.start
1839 1839 length = self.length
1840 1840 inline = self._inline
1841 1841 iosize = self.index.entry_size
1842 1842 buffer = util.buffer
1843 1843
1844 1844 l = []
1845 1845 ladd = l.append
1846 1846
1847 1847 if not self._withsparseread:
1848 1848 slicedchunks = (revs,)
1849 1849 else:
1850 1850 slicedchunks = deltautil.slicechunk(
1851 1851 self, revs, targetsize=targetsize
1852 1852 )
1853 1853
1854 1854 for revschunk in slicedchunks:
1855 1855 firstrev = revschunk[0]
1856 1856 # Skip trailing revisions with empty diff
1857 1857 for lastrev in revschunk[::-1]:
1858 1858 if length(lastrev) != 0:
1859 1859 break
1860 1860
1861 1861 try:
1862 1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 1863 except OverflowError:
1864 1864 # issue4215 - we can't cache a run of chunks greater than
1865 1865 # 2G on Windows
1866 1866 return [self._chunk(rev, df=df) for rev in revschunk]
1867 1867
1868 1868 decomp = self.decompress
1869 1869 # self._decompressor might be None, but will not be used in that case
1870 1870 def_decomp = self._decompressor
1871 1871 for rev in revschunk:
1872 1872 chunkstart = start(rev)
1873 1873 if inline:
1874 1874 chunkstart += (rev + 1) * iosize
1875 1875 chunklength = length(rev)
1876 1876 comp_mode = self.index[rev][10]
1877 1877 c = buffer(data, chunkstart - offset, chunklength)
1878 1878 if comp_mode == COMP_MODE_PLAIN:
1879 1879 ladd(c)
1880 1880 elif comp_mode == COMP_MODE_INLINE:
1881 1881 ladd(decomp(c))
1882 1882 elif comp_mode == COMP_MODE_DEFAULT:
1883 1883 ladd(def_decomp(c))
1884 1884 else:
1885 1885 msg = 'unknown compression mode %d'
1886 1886 msg %= comp_mode
1887 1887 raise error.RevlogError(msg)
1888 1888
1889 1889 return l
1890 1890
1891 1891 def _chunkclear(self):
1892 1892 """Clear the raw chunk cache."""
1893 1893 self._chunkcache = (0, b'')
1894 1894
1895 1895 def deltaparent(self, rev):
1896 1896 """return deltaparent of the given revision"""
1897 1897 base = self.index[rev][3]
1898 1898 if base == rev:
1899 1899 return nullrev
1900 1900 elif self._generaldelta:
1901 1901 return base
1902 1902 else:
1903 1903 return rev - 1
1904 1904
1905 1905 def issnapshot(self, rev):
1906 1906 """tells whether rev is a snapshot"""
1907 1907 if not self._sparserevlog:
1908 1908 return self.deltaparent(rev) == nullrev
1909 1909 elif util.safehasattr(self.index, b'issnapshot'):
1910 1910 # directly assign the method to cache the testing and access
1911 1911 self.issnapshot = self.index.issnapshot
1912 1912 return self.issnapshot(rev)
1913 1913 if rev == nullrev:
1914 1914 return True
1915 1915 entry = self.index[rev]
1916 1916 base = entry[3]
1917 1917 if base == rev:
1918 1918 return True
1919 1919 if base == nullrev:
1920 1920 return True
1921 1921 p1 = entry[5]
1922 1922 p2 = entry[6]
1923 1923 if base == p1 or base == p2:
1924 1924 return False
1925 1925 return self.issnapshot(base)
1926 1926
1927 1927 def snapshotdepth(self, rev):
1928 1928 """number of snapshot in the chain before this one"""
1929 1929 if not self.issnapshot(rev):
1930 1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1931 1931 return len(self._deltachain(rev)[0]) - 1
1932 1932
1933 1933 def revdiff(self, rev1, rev2):
1934 1934 """return or calculate a delta between two revisions
1935 1935
1936 1936 The delta calculated is in binary form and is intended to be written to
1937 1937 revlog data directly. So this function needs raw revision data.
1938 1938 """
1939 1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1940 1940 return bytes(self._chunk(rev2))
1941 1941
1942 1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1943 1943
1944 1944 def _processflags(self, text, flags, operation, raw=False):
1945 1945 """deprecated entry point to access flag processors"""
1946 1946 msg = b'_processflag(...) use the specialized variant'
1947 1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 1948 if raw:
1949 1949 return text, flagutil.processflagsraw(self, text, flags)
1950 1950 elif operation == b'read':
1951 1951 return flagutil.processflagsread(self, text, flags)
1952 1952 else: # write operation
1953 1953 return flagutil.processflagswrite(self, text, flags)
1954 1954
1955 1955 def revision(self, nodeorrev, _df=None, raw=False):
1956 1956 """return an uncompressed revision of a given node or revision
1957 1957 number.
1958 1958
1959 1959 _df - an existing file handle to read from. (internal-only)
1960 1960 raw - an optional argument specifying if the revision data is to be
1961 1961 treated as raw data when applying flag transforms. 'raw' should be set
1962 1962 to True when generating changegroups or in debug commands.
1963 1963 """
1964 1964 if raw:
1965 1965 msg = (
1966 1966 b'revlog.revision(..., raw=True) is deprecated, '
1967 1967 b'use revlog.rawdata(...)'
1968 1968 )
1969 1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1971 1971
1972 1972 def sidedata(self, nodeorrev, _df=None):
1973 1973 """a map of extra data related to the changeset but not part of the hash
1974 1974
1975 1975 This function currently return a dictionary. However, more advanced
1976 1976 mapping object will likely be used in the future for a more
1977 1977 efficient/lazy code.
1978 1978 """
1979 1979 return self._revisiondata(nodeorrev, _df)[1]
1980 1980
1981 1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1982 1982 # deal with <nodeorrev> argument type
1983 1983 if isinstance(nodeorrev, int):
1984 1984 rev = nodeorrev
1985 1985 node = self.node(rev)
1986 1986 else:
1987 1987 node = nodeorrev
1988 1988 rev = None
1989 1989
1990 1990 # fast path the special `nullid` rev
1991 1991 if node == self.nullid:
1992 1992 return b"", {}
1993 1993
1994 1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1995 1995 # revision or might need to be processed to retrieve the revision.
1996 1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1997 1997
1998 1998 if self.hassidedata:
1999 1999 if rev is None:
2000 2000 rev = self.rev(node)
2001 2001 sidedata = self._sidedata(rev)
2002 2002 else:
2003 2003 sidedata = {}
2004 2004
2005 2005 if raw and validated:
2006 2006 # if we don't want to process the raw text and that raw
2007 2007 # text is cached, we can exit early.
2008 2008 return rawtext, sidedata
2009 2009 if rev is None:
2010 2010 rev = self.rev(node)
2011 2011 # the revlog's flag for this revision
2012 2012 # (usually alter its state or content)
2013 2013 flags = self.flags(rev)
2014 2014
2015 2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2016 2016 # no extra flags set, no flag processor runs, text = rawtext
2017 2017 return rawtext, sidedata
2018 2018
2019 2019 if raw:
2020 2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2021 2021 text = rawtext
2022 2022 else:
2023 2023 r = flagutil.processflagsread(self, rawtext, flags)
2024 2024 text, validatehash = r
2025 2025 if validatehash:
2026 2026 self.checkhash(text, node, rev=rev)
2027 2027 if not validated:
2028 2028 self._revisioncache = (node, rev, rawtext)
2029 2029
2030 2030 return text, sidedata
2031 2031
2032 2032 def _rawtext(self, node, rev, _df=None):
2033 2033 """return the possibly unvalidated rawtext for a revision
2034 2034
2035 2035 returns (rev, rawtext, validated)
2036 2036 """
2037 2037
2038 2038 # revision in the cache (could be useful to apply delta)
2039 2039 cachedrev = None
2040 2040 # An intermediate text to apply deltas to
2041 2041 basetext = None
2042 2042
2043 2043 # Check if we have the entry in cache
2044 2044 # The cache entry looks like (node, rev, rawtext)
2045 2045 if self._revisioncache:
2046 2046 if self._revisioncache[0] == node:
2047 2047 return (rev, self._revisioncache[2], True)
2048 2048 cachedrev = self._revisioncache[1]
2049 2049
2050 2050 if rev is None:
2051 2051 rev = self.rev(node)
2052 2052
2053 2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2054 2054 if stopped:
2055 2055 basetext = self._revisioncache[2]
2056 2056
2057 2057 # drop cache to save memory, the caller is expected to
2058 2058 # update self._revisioncache after validating the text
2059 2059 self._revisioncache = None
2060 2060
2061 2061 targetsize = None
2062 2062 rawsize = self.index[rev][2]
2063 2063 if 0 <= rawsize:
2064 2064 targetsize = 4 * rawsize
2065 2065
2066 2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2067 2067 if basetext is None:
2068 2068 basetext = bytes(bins[0])
2069 2069 bins = bins[1:]
2070 2070
2071 2071 rawtext = mdiff.patches(basetext, bins)
2072 2072 del basetext # let us have a chance to free memory early
2073 2073 return (rev, rawtext, False)
2074 2074
2075 2075 def _sidedata(self, rev):
2076 2076 """Return the sidedata for a given revision number."""
2077 2077 index_entry = self.index[rev]
2078 2078 sidedata_offset = index_entry[8]
2079 2079 sidedata_size = index_entry[9]
2080 2080
2081 2081 if self._inline:
2082 2082 sidedata_offset += self.index.entry_size * (1 + rev)
2083 2083 if sidedata_size == 0:
2084 2084 return {}
2085 2085
2086 2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2087 2087 comp = self.index[rev][11]
2088 2088 if comp == COMP_MODE_PLAIN:
2089 2089 segment = comp_segment
2090 2090 elif comp == COMP_MODE_DEFAULT:
2091 2091 segment = self._decompressor(comp_segment)
2092 2092 elif comp == COMP_MODE_INLINE:
2093 2093 segment = self.decompress(comp_segment)
2094 2094 else:
2095 2095 msg = 'unknown compression mode %d'
2096 2096 msg %= comp
2097 2097 raise error.RevlogError(msg)
2098 2098
2099 2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 2100 return sidedata
2101 2101
2102 2102 def rawdata(self, nodeorrev, _df=None):
2103 2103 """return an uncompressed raw data of a given node or revision number.
2104 2104
2105 2105 _df - an existing file handle to read from. (internal-only)
2106 2106 """
2107 2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2108 2108
2109 2109 def hash(self, text, p1, p2):
2110 2110 """Compute a node hash.
2111 2111
2112 2112 Available as a function so that subclasses can replace the hash
2113 2113 as needed.
2114 2114 """
2115 2115 return storageutil.hashrevisionsha1(text, p1, p2)
2116 2116
2117 2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 2118 """Check node hash integrity.
2119 2119
2120 2120 Available as a function so that subclasses can extend hash mismatch
2121 2121 behaviors as needed.
2122 2122 """
2123 2123 try:
2124 2124 if p1 is None and p2 is None:
2125 2125 p1, p2 = self.parents(node)
2126 2126 if node != self.hash(text, p1, p2):
2127 2127 # Clear the revision cache on hash failure. The revision cache
2128 2128 # only stores the raw revision and clearing the cache does have
2129 2129 # the side-effect that we won't have a cache hit when the raw
2130 2130 # revision data is accessed. But this case should be rare and
2131 2131 # it is extra work to teach the cache about the hash
2132 2132 # verification state.
2133 2133 if self._revisioncache and self._revisioncache[0] == node:
2134 2134 self._revisioncache = None
2135 2135
2136 2136 revornode = rev
2137 2137 if revornode is None:
2138 2138 revornode = templatefilters.short(hex(node))
2139 2139 raise error.RevlogError(
2140 2140 _(b"integrity check failed on %s:%s")
2141 2141 % (self.display_id, pycompat.bytestr(revornode))
2142 2142 )
2143 2143 except error.RevlogError:
2144 2144 if self._censorable and storageutil.iscensoredtext(text):
2145 2145 raise error.CensoredNodeError(self.display_id, node, text)
2146 2146 raise
2147 2147
2148 2148 def _enforceinlinesize(self, tr):
2149 2149 """Check if the revlog is too big for inline and convert if so.
2150 2150
2151 2151 This should be called after revisions are added to the revlog. If the
2152 2152 revlog has grown too large to be an inline revlog, it will convert it
2153 2153 to use multiple index and data files.
2154 2154 """
2155 2155 tiprev = len(self) - 1
2156 2156 total_size = self.start(tiprev) + self.length(tiprev)
2157 2157 if not self._inline or total_size < _maxinline:
2158 2158 return
2159 2159
2160 2160 troffset = tr.findoffset(self._indexfile)
2161 2161 if troffset is None:
2162 2162 raise error.RevlogError(
2163 2163 _(b"%s not found in the transaction") % self._indexfile
2164 2164 )
2165 2165 trindex = 0
2166 2166 tr.add(self._datafile, 0)
2167 2167
2168 2168 existing_handles = False
2169 2169 if self._writinghandles is not None:
2170 2170 existing_handles = True
2171 2171 fp = self._writinghandles[0]
2172 2172 fp.flush()
2173 2173 fp.close()
2174 2174 # We can't use the cached file handle after close(). So prevent
2175 2175 # its usage.
2176 2176 self._writinghandles = None
2177 2177
2178 2178 new_dfh = self._datafp(b'w+')
2179 2179 new_dfh.truncate(0) # drop any potentially existing data
2180 2180 try:
2181 2181 with self._indexfp() as read_ifh:
2182 2182 for r in self:
2183 2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2184 2184 if troffset <= self.start(r) + r * self.index.entry_size:
2185 2185 trindex = r
2186 2186 new_dfh.flush()
2187 2187
2188 2188 with self.__index_new_fp() as fp:
2189 2189 self._format_flags &= ~FLAG_INLINE_DATA
2190 2190 self._inline = False
2191 2191 for i in self:
2192 2192 e = self.index.entry_binary(i)
2193 2193 if i == 0 and self._docket is None:
2194 2194 header = self._format_flags | self._format_version
2195 2195 header = self.index.pack_header(header)
2196 2196 e = header + e
2197 2197 fp.write(e)
2198 2198 if self._docket is not None:
2199 2199 self._docket.index_end = fp.tell()
2200 2200
2201 2201 # There is a small transactional race here. If the rename of
2202 2202 # the index fails, we should remove the datafile. It is more
2203 2203 # important to ensure that the data file is not truncated
2204 2204 # when the index is replaced as otherwise data is lost.
2205 2205 tr.replace(self._datafile, self.start(trindex))
2206 2206
2207 2207 # the temp file replace the real index when we exit the context
2208 2208 # manager
2209 2209
2210 2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2211 2211 nodemaputil.setup_persistent_nodemap(tr, self)
2212 2212 self._chunkclear()
2213 2213
2214 2214 if existing_handles:
2215 2215 # switched from inline to conventional reopen the index
2216 2216 ifh = self.__index_write_fp()
2217 2217 self._writinghandles = (ifh, new_dfh)
2218 2218 new_dfh = None
2219 2219 finally:
2220 2220 if new_dfh is not None:
2221 2221 new_dfh.close()
2222 2222
2223 2223 def _nodeduplicatecallback(self, transaction, node):
2224 2224 """called when trying to add a node already stored."""
2225 2225
2226 2226 @contextlib.contextmanager
2227 2227 def _writing(self, transaction):
2228 2228 if self._trypending:
2229 2229 msg = b'try to write in a `trypending` revlog: %s'
2230 2230 msg %= self.display_id
2231 2231 raise error.ProgrammingError(msg)
2232 2232 if self._writinghandles is not None:
2233 2233 yield
2234 2234 else:
2235 r = len(self)
2236 dsize = 0
2237 if r:
2238 dsize = self.end(r - 1)
2239 dfh = None
2240 if not self._inline:
2241 try:
2242 dfh = self._datafp(b"r+")
2243 if self._docket is None:
2244 dfh.seek(0, os.SEEK_END)
2245 else:
2246 dfh.seek(self._docket.data_end, os.SEEK_SET)
2247 except IOError as inst:
2248 if inst.errno != errno.ENOENT:
2249 raise
2250 dfh = self._datafp(b"w+")
2251 transaction.add(self._datafile, dsize)
2235 ifh = dfh = None
2252 2236 try:
2237 r = len(self)
2238 # opening the data file.
2239 dsize = 0
2240 if r:
2241 dsize = self.end(r - 1)
2242 dfh = None
2243 if not self._inline:
2244 try:
2245 dfh = self._datafp(b"r+")
2246 if self._docket is None:
2247 dfh.seek(0, os.SEEK_END)
2248 else:
2249 dfh.seek(self._docket.data_end, os.SEEK_SET)
2250 except IOError as inst:
2251 if inst.errno != errno.ENOENT:
2252 raise
2253 dfh = self._datafp(b"w+")
2254 transaction.add(self._datafile, dsize)
2255
2256 # opening the index file.
2253 2257 isize = r * self.index.entry_size
2254 2258 ifh = self.__index_write_fp()
2255 2259 if self._inline:
2256 2260 transaction.add(self._indexfile, dsize + isize)
2257 2261 else:
2258 2262 transaction.add(self._indexfile, isize)
2259 try:
2260 self._writinghandles = (ifh, dfh)
2261 try:
2262 yield
2263 if self._docket is not None:
2264 self._write_docket(transaction)
2265 finally:
2266 self._writinghandles = None
2267 finally:
2263 # exposing all file handle for writing.
2264 self._writinghandles = (ifh, dfh)
2265 yield
2266 if self._docket is not None:
2267 self._write_docket(transaction)
2268 finally:
2269 self._writinghandles = None
2270 if ifh is not None:
2268 2271 ifh.close()
2269 finally:
2270 2272 if dfh is not None:
2271 2273 dfh.close()
2272 2274
2273 2275 def _write_docket(self, transaction):
2274 2276 """write the current docket on disk
2275 2277
2276 2278 Exist as a method to help changelog to implement transaction logic
2277 2279
2278 2280 We could also imagine using the same transaction logic for all revlog
2279 2281 since docket are cheap."""
2280 2282 self._docket.write(transaction)
2281 2283
2282 2284 def addrevision(
2283 2285 self,
2284 2286 text,
2285 2287 transaction,
2286 2288 link,
2287 2289 p1,
2288 2290 p2,
2289 2291 cachedelta=None,
2290 2292 node=None,
2291 2293 flags=REVIDX_DEFAULT_FLAGS,
2292 2294 deltacomputer=None,
2293 2295 sidedata=None,
2294 2296 ):
2295 2297 """add a revision to the log
2296 2298
2297 2299 text - the revision data to add
2298 2300 transaction - the transaction object used for rollback
2299 2301 link - the linkrev data to add
2300 2302 p1, p2 - the parent nodeids of the revision
2301 2303 cachedelta - an optional precomputed delta
2302 2304 node - nodeid of revision; typically node is not specified, and it is
2303 2305 computed by default as hash(text, p1, p2), however subclasses might
2304 2306 use different hashing method (and override checkhash() in such case)
2305 2307 flags - the known flags to set on the revision
2306 2308 deltacomputer - an optional deltacomputer instance shared between
2307 2309 multiple calls
2308 2310 """
2309 2311 if link == nullrev:
2310 2312 raise error.RevlogError(
2311 2313 _(b"attempted to add linkrev -1 to %s") % self.display_id
2312 2314 )
2313 2315
2314 2316 if sidedata is None:
2315 2317 sidedata = {}
2316 2318 elif sidedata and not self.hassidedata:
2317 2319 raise error.ProgrammingError(
2318 2320 _(b"trying to add sidedata to a revlog who don't support them")
2319 2321 )
2320 2322
2321 2323 if flags:
2322 2324 node = node or self.hash(text, p1, p2)
2323 2325
2324 2326 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2325 2327
2326 2328 # If the flag processor modifies the revision data, ignore any provided
2327 2329 # cachedelta.
2328 2330 if rawtext != text:
2329 2331 cachedelta = None
2330 2332
2331 2333 if len(rawtext) > _maxentrysize:
2332 2334 raise error.RevlogError(
2333 2335 _(
2334 2336 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2335 2337 )
2336 2338 % (self.display_id, len(rawtext))
2337 2339 )
2338 2340
2339 2341 node = node or self.hash(rawtext, p1, p2)
2340 2342 rev = self.index.get_rev(node)
2341 2343 if rev is not None:
2342 2344 return rev
2343 2345
2344 2346 if validatehash:
2345 2347 self.checkhash(rawtext, node, p1=p1, p2=p2)
2346 2348
2347 2349 return self.addrawrevision(
2348 2350 rawtext,
2349 2351 transaction,
2350 2352 link,
2351 2353 p1,
2352 2354 p2,
2353 2355 node,
2354 2356 flags,
2355 2357 cachedelta=cachedelta,
2356 2358 deltacomputer=deltacomputer,
2357 2359 sidedata=sidedata,
2358 2360 )
2359 2361
2360 2362 def addrawrevision(
2361 2363 self,
2362 2364 rawtext,
2363 2365 transaction,
2364 2366 link,
2365 2367 p1,
2366 2368 p2,
2367 2369 node,
2368 2370 flags,
2369 2371 cachedelta=None,
2370 2372 deltacomputer=None,
2371 2373 sidedata=None,
2372 2374 ):
2373 2375 """add a raw revision with known flags, node and parents
2374 2376 useful when reusing a revision not stored in this revlog (ex: received
2375 2377 over wire, or read from an external bundle).
2376 2378 """
2377 2379 with self._writing(transaction):
2378 2380 return self._addrevision(
2379 2381 node,
2380 2382 rawtext,
2381 2383 transaction,
2382 2384 link,
2383 2385 p1,
2384 2386 p2,
2385 2387 flags,
2386 2388 cachedelta,
2387 2389 deltacomputer=deltacomputer,
2388 2390 sidedata=sidedata,
2389 2391 )
2390 2392
2391 2393 def compress(self, data):
2392 2394 """Generate a possibly-compressed representation of data."""
2393 2395 if not data:
2394 2396 return b'', data
2395 2397
2396 2398 compressed = self._compressor.compress(data)
2397 2399
2398 2400 if compressed:
2399 2401 # The revlog compressor added the header in the returned data.
2400 2402 return b'', compressed
2401 2403
2402 2404 if data[0:1] == b'\0':
2403 2405 return b'', data
2404 2406 return b'u', data
2405 2407
2406 2408 def decompress(self, data):
2407 2409 """Decompress a revlog chunk.
2408 2410
2409 2411 The chunk is expected to begin with a header identifying the
2410 2412 format type so it can be routed to an appropriate decompressor.
2411 2413 """
2412 2414 if not data:
2413 2415 return data
2414 2416
2415 2417 # Revlogs are read much more frequently than they are written and many
2416 2418 # chunks only take microseconds to decompress, so performance is
2417 2419 # important here.
2418 2420 #
2419 2421 # We can make a few assumptions about revlogs:
2420 2422 #
2421 2423 # 1) the majority of chunks will be compressed (as opposed to inline
2422 2424 # raw data).
2423 2425 # 2) decompressing *any* data will likely by at least 10x slower than
2424 2426 # returning raw inline data.
2425 2427 # 3) we want to prioritize common and officially supported compression
2426 2428 # engines
2427 2429 #
2428 2430 # It follows that we want to optimize for "decompress compressed data
2429 2431 # when encoded with common and officially supported compression engines"
2430 2432 # case over "raw data" and "data encoded by less common or non-official
2431 2433 # compression engines." That is why we have the inline lookup first
2432 2434 # followed by the compengines lookup.
2433 2435 #
2434 2436 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2435 2437 # compressed chunks. And this matters for changelog and manifest reads.
2436 2438 t = data[0:1]
2437 2439
2438 2440 if t == b'x':
2439 2441 try:
2440 2442 return _zlibdecompress(data)
2441 2443 except zlib.error as e:
2442 2444 raise error.RevlogError(
2443 2445 _(b'revlog decompress error: %s')
2444 2446 % stringutil.forcebytestr(e)
2445 2447 )
2446 2448 # '\0' is more common than 'u' so it goes first.
2447 2449 elif t == b'\0':
2448 2450 return data
2449 2451 elif t == b'u':
2450 2452 return util.buffer(data, 1)
2451 2453
2452 2454 compressor = self._get_decompressor(t)
2453 2455
2454 2456 return compressor.decompress(data)
2455 2457
2456 2458 def _addrevision(
2457 2459 self,
2458 2460 node,
2459 2461 rawtext,
2460 2462 transaction,
2461 2463 link,
2462 2464 p1,
2463 2465 p2,
2464 2466 flags,
2465 2467 cachedelta,
2466 2468 alwayscache=False,
2467 2469 deltacomputer=None,
2468 2470 sidedata=None,
2469 2471 ):
2470 2472 """internal function to add revisions to the log
2471 2473
2472 2474 see addrevision for argument descriptions.
2473 2475
2474 2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2475 2477
2476 2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2477 2479 be used.
2478 2480
2479 2481 invariants:
2480 2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2481 2483 if both are set, they must correspond to each other.
2482 2484 """
2483 2485 if node == self.nullid:
2484 2486 raise error.RevlogError(
2485 2487 _(b"%s: attempt to add null revision") % self.display_id
2486 2488 )
2487 2489 if (
2488 2490 node == self.nodeconstants.wdirid
2489 2491 or node in self.nodeconstants.wdirfilenodeids
2490 2492 ):
2491 2493 raise error.RevlogError(
2492 2494 _(b"%s: attempt to add wdir revision") % self.display_id
2493 2495 )
2494 2496 if self._writinghandles is None:
2495 2497 msg = b'adding revision outside `revlog._writing` context'
2496 2498 raise error.ProgrammingError(msg)
2497 2499
2498 2500 if self._inline:
2499 2501 fh = self._writinghandles[0]
2500 2502 else:
2501 2503 fh = self._writinghandles[1]
2502 2504
2503 2505 btext = [rawtext]
2504 2506
2505 2507 curr = len(self)
2506 2508 prev = curr - 1
2507 2509
2508 2510 offset = self._get_data_offset(prev)
2509 2511
2510 2512 if self._concurrencychecker:
2511 2513 ifh, dfh = self._writinghandles
2512 2514 if self._inline:
2513 2515 # offset is "as if" it were in the .d file, so we need to add on
2514 2516 # the size of the entry metadata.
2515 2517 self._concurrencychecker(
2516 2518 ifh, self._indexfile, offset + curr * self.index.entry_size
2517 2519 )
2518 2520 else:
2519 2521 # Entries in the .i are a consistent size.
2520 2522 self._concurrencychecker(
2521 2523 ifh, self._indexfile, curr * self.index.entry_size
2522 2524 )
2523 2525 self._concurrencychecker(dfh, self._datafile, offset)
2524 2526
2525 2527 p1r, p2r = self.rev(p1), self.rev(p2)
2526 2528
2527 2529 # full versions are inserted when the needed deltas
2528 2530 # become comparable to the uncompressed text
2529 2531 if rawtext is None:
2530 2532 # need rawtext size, before changed by flag processors, which is
2531 2533 # the non-raw size. use revlog explicitly to avoid filelog's extra
2532 2534 # logic that might remove metadata size.
2533 2535 textlen = mdiff.patchedsize(
2534 2536 revlog.size(self, cachedelta[0]), cachedelta[1]
2535 2537 )
2536 2538 else:
2537 2539 textlen = len(rawtext)
2538 2540
2539 2541 if deltacomputer is None:
2540 2542 deltacomputer = deltautil.deltacomputer(self)
2541 2543
2542 2544 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2543 2545
2544 2546 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2545 2547
2546 2548 compression_mode = COMP_MODE_INLINE
2547 2549 if self._docket is not None:
2548 2550 h, d = deltainfo.data
2549 2551 if not h and not d:
2550 2552 # not data to store at all... declare them uncompressed
2551 2553 compression_mode = COMP_MODE_PLAIN
2552 2554 elif not h:
2553 2555 t = d[0:1]
2554 2556 if t == b'\0':
2555 2557 compression_mode = COMP_MODE_PLAIN
2556 2558 elif t == self._docket.default_compression_header:
2557 2559 compression_mode = COMP_MODE_DEFAULT
2558 2560 elif h == b'u':
2559 2561 # we have a more efficient way to declare uncompressed
2560 2562 h = b''
2561 2563 compression_mode = COMP_MODE_PLAIN
2562 2564 deltainfo = deltautil.drop_u_compression(deltainfo)
2563 2565
2564 2566 sidedata_compression_mode = COMP_MODE_INLINE
2565 2567 if sidedata and self.hassidedata:
2566 2568 sidedata_compression_mode = COMP_MODE_PLAIN
2567 2569 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2568 2570 sidedata_offset = offset + deltainfo.deltalen
2569 2571 h, comp_sidedata = self.compress(serialized_sidedata)
2570 2572 if (
2571 2573 h != b'u'
2572 2574 and comp_sidedata[0:1] != b'\0'
2573 2575 and len(comp_sidedata) < len(serialized_sidedata)
2574 2576 ):
2575 2577 assert not h
2576 2578 if (
2577 2579 comp_sidedata[0:1]
2578 2580 == self._docket.default_compression_header
2579 2581 ):
2580 2582 sidedata_compression_mode = COMP_MODE_DEFAULT
2581 2583 serialized_sidedata = comp_sidedata
2582 2584 else:
2583 2585 sidedata_compression_mode = COMP_MODE_INLINE
2584 2586 serialized_sidedata = comp_sidedata
2585 2587 else:
2586 2588 serialized_sidedata = b""
2587 2589 # Don't store the offset if the sidedata is empty, that way
2588 2590 # we can easily detect empty sidedata and they will be no different
2589 2591 # than ones we manually add.
2590 2592 sidedata_offset = 0
2591 2593
2592 2594 e = (
2593 2595 offset_type(offset, flags),
2594 2596 deltainfo.deltalen,
2595 2597 textlen,
2596 2598 deltainfo.base,
2597 2599 link,
2598 2600 p1r,
2599 2601 p2r,
2600 2602 node,
2601 2603 sidedata_offset,
2602 2604 len(serialized_sidedata),
2603 2605 compression_mode,
2604 2606 sidedata_compression_mode,
2605 2607 )
2606 2608
2607 2609 self.index.append(e)
2608 2610 entry = self.index.entry_binary(curr)
2609 2611 if curr == 0 and self._docket is None:
2610 2612 header = self._format_flags | self._format_version
2611 2613 header = self.index.pack_header(header)
2612 2614 entry = header + entry
2613 2615 self._writeentry(
2614 2616 transaction,
2615 2617 entry,
2616 2618 deltainfo.data,
2617 2619 link,
2618 2620 offset,
2619 2621 serialized_sidedata,
2620 2622 )
2621 2623
2622 2624 rawtext = btext[0]
2623 2625
2624 2626 if alwayscache and rawtext is None:
2625 2627 rawtext = deltacomputer.buildtext(revinfo, fh)
2626 2628
2627 2629 if type(rawtext) == bytes: # only accept immutable objects
2628 2630 self._revisioncache = (node, curr, rawtext)
2629 2631 self._chainbasecache[curr] = deltainfo.chainbase
2630 2632 return curr
2631 2633
2632 2634 def _get_data_offset(self, prev):
2633 2635 """Returns the current offset in the (in-transaction) data file.
2634 2636 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2635 2637 file to store that information: since sidedata can be rewritten to the
2636 2638 end of the data file within a transaction, you can have cases where, for
2637 2639 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2638 2640 to `n - 1`'s sidedata being written after `n`'s data.
2639 2641
2640 2642 TODO cache this in a docket file before getting out of experimental."""
2641 2643 if self._docket is None:
2642 2644 return self.end(prev)
2643 2645 else:
2644 2646 return self._docket.data_end
2645 2647
2646 2648 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2647 2649 # Files opened in a+ mode have inconsistent behavior on various
2648 2650 # platforms. Windows requires that a file positioning call be made
2649 2651 # when the file handle transitions between reads and writes. See
2650 2652 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2651 2653 # platforms, Python or the platform itself can be buggy. Some versions
2652 2654 # of Solaris have been observed to not append at the end of the file
2653 2655 # if the file was seeked to before the end. See issue4943 for more.
2654 2656 #
2655 2657 # We work around this issue by inserting a seek() before writing.
2656 2658 # Note: This is likely not necessary on Python 3. However, because
2657 2659 # the file handle is reused for reads and may be seeked there, we need
2658 2660 # to be careful before changing this.
2659 2661 if self._writinghandles is None:
2660 2662 msg = b'adding revision outside `revlog._writing` context'
2661 2663 raise error.ProgrammingError(msg)
2662 2664 ifh, dfh = self._writinghandles
2663 2665 if self._docket is None:
2664 2666 ifh.seek(0, os.SEEK_END)
2665 2667 else:
2666 2668 ifh.seek(self._docket.index_end, os.SEEK_SET)
2667 2669 if dfh:
2668 2670 if self._docket is None:
2669 2671 dfh.seek(0, os.SEEK_END)
2670 2672 else:
2671 2673 dfh.seek(self._docket.data_end, os.SEEK_SET)
2672 2674
2673 2675 curr = len(self) - 1
2674 2676 if not self._inline:
2675 2677 transaction.add(self._datafile, offset)
2676 2678 transaction.add(self._indexfile, curr * len(entry))
2677 2679 if data[0]:
2678 2680 dfh.write(data[0])
2679 2681 dfh.write(data[1])
2680 2682 if sidedata:
2681 2683 dfh.write(sidedata)
2682 2684 ifh.write(entry)
2683 2685 else:
2684 2686 offset += curr * self.index.entry_size
2685 2687 transaction.add(self._indexfile, offset)
2686 2688 ifh.write(entry)
2687 2689 ifh.write(data[0])
2688 2690 ifh.write(data[1])
2689 2691 if sidedata:
2690 2692 ifh.write(sidedata)
2691 2693 self._enforceinlinesize(transaction)
2692 2694 if self._docket is not None:
2693 2695 self._docket.index_end = self._writinghandles[0].tell()
2694 2696 self._docket.data_end = self._writinghandles[1].tell()
2695 2697
2696 2698 nodemaputil.setup_persistent_nodemap(transaction, self)
2697 2699
2698 2700 def addgroup(
2699 2701 self,
2700 2702 deltas,
2701 2703 linkmapper,
2702 2704 transaction,
2703 2705 alwayscache=False,
2704 2706 addrevisioncb=None,
2705 2707 duplicaterevisioncb=None,
2706 2708 ):
2707 2709 """
2708 2710 add a delta group
2709 2711
2710 2712 given a set of deltas, add them to the revision log. the
2711 2713 first delta is against its parent, which should be in our
2712 2714 log, the rest are against the previous delta.
2713 2715
2714 2716 If ``addrevisioncb`` is defined, it will be called with arguments of
2715 2717 this revlog and the node that was added.
2716 2718 """
2717 2719
2718 2720 if self._adding_group:
2719 2721 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2720 2722
2721 2723 self._adding_group = True
2722 2724 empty = True
2723 2725 try:
2724 2726 with self._writing(transaction):
2725 2727 deltacomputer = deltautil.deltacomputer(self)
2726 2728 # loop through our set of deltas
2727 2729 for data in deltas:
2728 2730 (
2729 2731 node,
2730 2732 p1,
2731 2733 p2,
2732 2734 linknode,
2733 2735 deltabase,
2734 2736 delta,
2735 2737 flags,
2736 2738 sidedata,
2737 2739 ) = data
2738 2740 link = linkmapper(linknode)
2739 2741 flags = flags or REVIDX_DEFAULT_FLAGS
2740 2742
2741 2743 rev = self.index.get_rev(node)
2742 2744 if rev is not None:
2743 2745 # this can happen if two branches make the same change
2744 2746 self._nodeduplicatecallback(transaction, rev)
2745 2747 if duplicaterevisioncb:
2746 2748 duplicaterevisioncb(self, rev)
2747 2749 empty = False
2748 2750 continue
2749 2751
2750 2752 for p in (p1, p2):
2751 2753 if not self.index.has_node(p):
2752 2754 raise error.LookupError(
2753 2755 p, self.radix, _(b'unknown parent')
2754 2756 )
2755 2757
2756 2758 if not self.index.has_node(deltabase):
2757 2759 raise error.LookupError(
2758 2760 deltabase, self.display_id, _(b'unknown delta base')
2759 2761 )
2760 2762
2761 2763 baserev = self.rev(deltabase)
2762 2764
2763 2765 if baserev != nullrev and self.iscensored(baserev):
2764 2766 # if base is censored, delta must be full replacement in a
2765 2767 # single patch operation
2766 2768 hlen = struct.calcsize(b">lll")
2767 2769 oldlen = self.rawsize(baserev)
2768 2770 newlen = len(delta) - hlen
2769 2771 if delta[:hlen] != mdiff.replacediffheader(
2770 2772 oldlen, newlen
2771 2773 ):
2772 2774 raise error.CensoredBaseError(
2773 2775 self.display_id, self.node(baserev)
2774 2776 )
2775 2777
2776 2778 if not flags and self._peek_iscensored(baserev, delta):
2777 2779 flags |= REVIDX_ISCENSORED
2778 2780
2779 2781 # We assume consumers of addrevisioncb will want to retrieve
2780 2782 # the added revision, which will require a call to
2781 2783 # revision(). revision() will fast path if there is a cache
2782 2784 # hit. So, we tell _addrevision() to always cache in this case.
2783 2785 # We're only using addgroup() in the context of changegroup
2784 2786 # generation so the revision data can always be handled as raw
2785 2787 # by the flagprocessor.
2786 2788 rev = self._addrevision(
2787 2789 node,
2788 2790 None,
2789 2791 transaction,
2790 2792 link,
2791 2793 p1,
2792 2794 p2,
2793 2795 flags,
2794 2796 (baserev, delta),
2795 2797 alwayscache=alwayscache,
2796 2798 deltacomputer=deltacomputer,
2797 2799 sidedata=sidedata,
2798 2800 )
2799 2801
2800 2802 if addrevisioncb:
2801 2803 addrevisioncb(self, rev)
2802 2804 empty = False
2803 2805 finally:
2804 2806 self._adding_group = False
2805 2807 return not empty
2806 2808
2807 2809 def iscensored(self, rev):
2808 2810 """Check if a file revision is censored."""
2809 2811 if not self._censorable:
2810 2812 return False
2811 2813
2812 2814 return self.flags(rev) & REVIDX_ISCENSORED
2813 2815
2814 2816 def _peek_iscensored(self, baserev, delta):
2815 2817 """Quickly check if a delta produces a censored revision."""
2816 2818 if not self._censorable:
2817 2819 return False
2818 2820
2819 2821 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2820 2822
2821 2823 def getstrippoint(self, minlink):
2822 2824 """find the minimum rev that must be stripped to strip the linkrev
2823 2825
2824 2826 Returns a tuple containing the minimum rev and a set of all revs that
2825 2827 have linkrevs that will be broken by this strip.
2826 2828 """
2827 2829 return storageutil.resolvestripinfo(
2828 2830 minlink,
2829 2831 len(self) - 1,
2830 2832 self.headrevs(),
2831 2833 self.linkrev,
2832 2834 self.parentrevs,
2833 2835 )
2834 2836
2835 2837 def strip(self, minlink, transaction):
2836 2838 """truncate the revlog on the first revision with a linkrev >= minlink
2837 2839
2838 2840 This function is called when we're stripping revision minlink and
2839 2841 its descendants from the repository.
2840 2842
2841 2843 We have to remove all revisions with linkrev >= minlink, because
2842 2844 the equivalent changelog revisions will be renumbered after the
2843 2845 strip.
2844 2846
2845 2847 So we truncate the revlog on the first of these revisions, and
2846 2848 trust that the caller has saved the revisions that shouldn't be
2847 2849 removed and that it'll re-add them after this truncation.
2848 2850 """
2849 2851 if len(self) == 0:
2850 2852 return
2851 2853
2852 2854 rev, _ = self.getstrippoint(minlink)
2853 2855 if rev == len(self):
2854 2856 return
2855 2857
2856 2858 # first truncate the files on disk
2857 2859 data_end = self.start(rev)
2858 2860 if not self._inline:
2859 2861 transaction.add(self._datafile, data_end)
2860 2862 end = rev * self.index.entry_size
2861 2863 else:
2862 2864 end = data_end + (rev * self.index.entry_size)
2863 2865
2864 2866 transaction.add(self._indexfile, end)
2865 2867 if self._docket is not None:
2866 2868 # XXX we could, leverage the docket while stripping. However it is
2867 2869 # not powerfull enough at the time of this comment
2868 2870 self._docket.index_end = end
2869 2871 self._docket.data_end = data_end
2870 2872 self._docket.write(transaction, stripping=True)
2871 2873
2872 2874 # then reset internal state in memory to forget those revisions
2873 2875 self._revisioncache = None
2874 2876 self._chaininfocache = util.lrucachedict(500)
2875 2877 self._chunkclear()
2876 2878
2877 2879 del self.index[rev:-1]
2878 2880
2879 2881 def checksize(self):
2880 2882 """Check size of index and data files
2881 2883
2882 2884 return a (dd, di) tuple.
2883 2885 - dd: extra bytes for the "data" file
2884 2886 - di: extra bytes for the "index" file
2885 2887
2886 2888 A healthy revlog will return (0, 0).
2887 2889 """
2888 2890 expected = 0
2889 2891 if len(self):
2890 2892 expected = max(0, self.end(len(self) - 1))
2891 2893
2892 2894 try:
2893 2895 with self._datafp() as f:
2894 2896 f.seek(0, io.SEEK_END)
2895 2897 actual = f.tell()
2896 2898 dd = actual - expected
2897 2899 except IOError as inst:
2898 2900 if inst.errno != errno.ENOENT:
2899 2901 raise
2900 2902 dd = 0
2901 2903
2902 2904 try:
2903 2905 f = self.opener(self._indexfile)
2904 2906 f.seek(0, io.SEEK_END)
2905 2907 actual = f.tell()
2906 2908 f.close()
2907 2909 s = self.index.entry_size
2908 2910 i = max(0, actual // s)
2909 2911 di = actual - (i * s)
2910 2912 if self._inline:
2911 2913 databytes = 0
2912 2914 for r in self:
2913 2915 databytes += max(0, self.length(r))
2914 2916 dd = 0
2915 2917 di = actual - len(self) * s - databytes
2916 2918 except IOError as inst:
2917 2919 if inst.errno != errno.ENOENT:
2918 2920 raise
2919 2921 di = 0
2920 2922
2921 2923 return (dd, di)
2922 2924
2923 2925 def files(self):
2924 2926 res = [self._indexfile]
2925 2927 if not self._inline:
2926 2928 res.append(self._datafile)
2927 2929 return res
2928 2930
2929 2931 def emitrevisions(
2930 2932 self,
2931 2933 nodes,
2932 2934 nodesorder=None,
2933 2935 revisiondata=False,
2934 2936 assumehaveparentrevisions=False,
2935 2937 deltamode=repository.CG_DELTAMODE_STD,
2936 2938 sidedata_helpers=None,
2937 2939 ):
2938 2940 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2939 2941 raise error.ProgrammingError(
2940 2942 b'unhandled value for nodesorder: %s' % nodesorder
2941 2943 )
2942 2944
2943 2945 if nodesorder is None and not self._generaldelta:
2944 2946 nodesorder = b'storage'
2945 2947
2946 2948 if (
2947 2949 not self._storedeltachains
2948 2950 and deltamode != repository.CG_DELTAMODE_PREV
2949 2951 ):
2950 2952 deltamode = repository.CG_DELTAMODE_FULL
2951 2953
2952 2954 return storageutil.emitrevisions(
2953 2955 self,
2954 2956 nodes,
2955 2957 nodesorder,
2956 2958 revlogrevisiondelta,
2957 2959 deltaparentfn=self.deltaparent,
2958 2960 candeltafn=self.candelta,
2959 2961 rawsizefn=self.rawsize,
2960 2962 revdifffn=self.revdiff,
2961 2963 flagsfn=self.flags,
2962 2964 deltamode=deltamode,
2963 2965 revisiondata=revisiondata,
2964 2966 assumehaveparentrevisions=assumehaveparentrevisions,
2965 2967 sidedata_helpers=sidedata_helpers,
2966 2968 )
2967 2969
2968 2970 DELTAREUSEALWAYS = b'always'
2969 2971 DELTAREUSESAMEREVS = b'samerevs'
2970 2972 DELTAREUSENEVER = b'never'
2971 2973
2972 2974 DELTAREUSEFULLADD = b'fulladd'
2973 2975
2974 2976 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2975 2977
2976 2978 def clone(
2977 2979 self,
2978 2980 tr,
2979 2981 destrevlog,
2980 2982 addrevisioncb=None,
2981 2983 deltareuse=DELTAREUSESAMEREVS,
2982 2984 forcedeltabothparents=None,
2983 2985 sidedata_helpers=None,
2984 2986 ):
2985 2987 """Copy this revlog to another, possibly with format changes.
2986 2988
2987 2989 The destination revlog will contain the same revisions and nodes.
2988 2990 However, it may not be bit-for-bit identical due to e.g. delta encoding
2989 2991 differences.
2990 2992
2991 2993 The ``deltareuse`` argument control how deltas from the existing revlog
2992 2994 are preserved in the destination revlog. The argument can have the
2993 2995 following values:
2994 2996
2995 2997 DELTAREUSEALWAYS
2996 2998 Deltas will always be reused (if possible), even if the destination
2997 2999 revlog would not select the same revisions for the delta. This is the
2998 3000 fastest mode of operation.
2999 3001 DELTAREUSESAMEREVS
3000 3002 Deltas will be reused if the destination revlog would pick the same
3001 3003 revisions for the delta. This mode strikes a balance between speed
3002 3004 and optimization.
3003 3005 DELTAREUSENEVER
3004 3006 Deltas will never be reused. This is the slowest mode of execution.
3005 3007 This mode can be used to recompute deltas (e.g. if the diff/delta
3006 3008 algorithm changes).
3007 3009 DELTAREUSEFULLADD
3008 3010 Revision will be re-added as if their were new content. This is
3009 3011 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3010 3012 eg: large file detection and handling.
3011 3013
3012 3014 Delta computation can be slow, so the choice of delta reuse policy can
3013 3015 significantly affect run time.
3014 3016
3015 3017 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3016 3018 two extremes. Deltas will be reused if they are appropriate. But if the
3017 3019 delta could choose a better revision, it will do so. This means if you
3018 3020 are converting a non-generaldelta revlog to a generaldelta revlog,
3019 3021 deltas will be recomputed if the delta's parent isn't a parent of the
3020 3022 revision.
3021 3023
3022 3024 In addition to the delta policy, the ``forcedeltabothparents``
3023 3025 argument controls whether to force compute deltas against both parents
3024 3026 for merges. By default, the current default is used.
3025 3027
3026 3028 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3027 3029 `sidedata_helpers`.
3028 3030 """
3029 3031 if deltareuse not in self.DELTAREUSEALL:
3030 3032 raise ValueError(
3031 3033 _(b'value for deltareuse invalid: %s') % deltareuse
3032 3034 )
3033 3035
3034 3036 if len(destrevlog):
3035 3037 raise ValueError(_(b'destination revlog is not empty'))
3036 3038
3037 3039 if getattr(self, 'filteredrevs', None):
3038 3040 raise ValueError(_(b'source revlog has filtered revisions'))
3039 3041 if getattr(destrevlog, 'filteredrevs', None):
3040 3042 raise ValueError(_(b'destination revlog has filtered revisions'))
3041 3043
3042 3044 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3043 3045 # if possible.
3044 3046 oldlazydelta = destrevlog._lazydelta
3045 3047 oldlazydeltabase = destrevlog._lazydeltabase
3046 3048 oldamd = destrevlog._deltabothparents
3047 3049
3048 3050 try:
3049 3051 if deltareuse == self.DELTAREUSEALWAYS:
3050 3052 destrevlog._lazydeltabase = True
3051 3053 destrevlog._lazydelta = True
3052 3054 elif deltareuse == self.DELTAREUSESAMEREVS:
3053 3055 destrevlog._lazydeltabase = False
3054 3056 destrevlog._lazydelta = True
3055 3057 elif deltareuse == self.DELTAREUSENEVER:
3056 3058 destrevlog._lazydeltabase = False
3057 3059 destrevlog._lazydelta = False
3058 3060
3059 3061 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3060 3062
3061 3063 self._clone(
3062 3064 tr,
3063 3065 destrevlog,
3064 3066 addrevisioncb,
3065 3067 deltareuse,
3066 3068 forcedeltabothparents,
3067 3069 sidedata_helpers,
3068 3070 )
3069 3071
3070 3072 finally:
3071 3073 destrevlog._lazydelta = oldlazydelta
3072 3074 destrevlog._lazydeltabase = oldlazydeltabase
3073 3075 destrevlog._deltabothparents = oldamd
3074 3076
3075 3077 def _clone(
3076 3078 self,
3077 3079 tr,
3078 3080 destrevlog,
3079 3081 addrevisioncb,
3080 3082 deltareuse,
3081 3083 forcedeltabothparents,
3082 3084 sidedata_helpers,
3083 3085 ):
3084 3086 """perform the core duty of `revlog.clone` after parameter processing"""
3085 3087 deltacomputer = deltautil.deltacomputer(destrevlog)
3086 3088 index = self.index
3087 3089 for rev in self:
3088 3090 entry = index[rev]
3089 3091
3090 3092 # Some classes override linkrev to take filtered revs into
3091 3093 # account. Use raw entry from index.
3092 3094 flags = entry[0] & 0xFFFF
3093 3095 linkrev = entry[4]
3094 3096 p1 = index[entry[5]][7]
3095 3097 p2 = index[entry[6]][7]
3096 3098 node = entry[7]
3097 3099
3098 3100 # (Possibly) reuse the delta from the revlog if allowed and
3099 3101 # the revlog chunk is a delta.
3100 3102 cachedelta = None
3101 3103 rawtext = None
3102 3104 if deltareuse == self.DELTAREUSEFULLADD:
3103 3105 text, sidedata = self._revisiondata(rev)
3104 3106
3105 3107 if sidedata_helpers is not None:
3106 3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3107 3109 self, sidedata_helpers, sidedata, rev
3108 3110 )
3109 3111 flags = flags | new_flags[0] & ~new_flags[1]
3110 3112
3111 3113 destrevlog.addrevision(
3112 3114 text,
3113 3115 tr,
3114 3116 linkrev,
3115 3117 p1,
3116 3118 p2,
3117 3119 cachedelta=cachedelta,
3118 3120 node=node,
3119 3121 flags=flags,
3120 3122 deltacomputer=deltacomputer,
3121 3123 sidedata=sidedata,
3122 3124 )
3123 3125 else:
3124 3126 if destrevlog._lazydelta:
3125 3127 dp = self.deltaparent(rev)
3126 3128 if dp != nullrev:
3127 3129 cachedelta = (dp, bytes(self._chunk(rev)))
3128 3130
3129 3131 sidedata = None
3130 3132 if not cachedelta:
3131 3133 rawtext, sidedata = self._revisiondata(rev)
3132 3134 if sidedata is None:
3133 3135 sidedata = self.sidedata(rev)
3134 3136
3135 3137 if sidedata_helpers is not None:
3136 3138 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3137 3139 self, sidedata_helpers, sidedata, rev
3138 3140 )
3139 3141 flags = flags | new_flags[0] & ~new_flags[1]
3140 3142
3141 3143 with destrevlog._writing(tr):
3142 3144 destrevlog._addrevision(
3143 3145 node,
3144 3146 rawtext,
3145 3147 tr,
3146 3148 linkrev,
3147 3149 p1,
3148 3150 p2,
3149 3151 flags,
3150 3152 cachedelta,
3151 3153 deltacomputer=deltacomputer,
3152 3154 sidedata=sidedata,
3153 3155 )
3154 3156
3155 3157 if addrevisioncb:
3156 3158 addrevisioncb(self, rev, node)
3157 3159
3158 3160 def censorrevision(self, tr, censornode, tombstone=b''):
3159 3161 if self._format_version == REVLOGV0:
3160 3162 raise error.RevlogError(
3161 3163 _(b'cannot censor with version %d revlogs')
3162 3164 % self._format_version
3163 3165 )
3164 3166
3165 3167 censorrev = self.rev(censornode)
3166 3168 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3167 3169
3168 3170 if len(tombstone) > self.rawsize(censorrev):
3169 3171 raise error.Abort(
3170 3172 _(b'censor tombstone must be no longer than censored data')
3171 3173 )
3172 3174
3173 3175 # Rewriting the revlog in place is hard. Our strategy for censoring is
3174 3176 # to create a new revlog, copy all revisions to it, then replace the
3175 3177 # revlogs on transaction close.
3176 3178 #
3177 3179 # This is a bit dangerous. We could easily have a mismatch of state.
3178 3180 newrl = revlog(
3179 3181 self.opener,
3180 3182 target=self.target,
3181 3183 radix=self.radix,
3182 3184 postfix=b'tmpcensored',
3183 3185 censorable=True,
3184 3186 )
3185 3187 newrl._format_version = self._format_version
3186 3188 newrl._format_flags = self._format_flags
3187 3189 newrl._generaldelta = self._generaldelta
3188 3190 newrl._parse_index = self._parse_index
3189 3191
3190 3192 for rev in self.revs():
3191 3193 node = self.node(rev)
3192 3194 p1, p2 = self.parents(node)
3193 3195
3194 3196 if rev == censorrev:
3195 3197 newrl.addrawrevision(
3196 3198 tombstone,
3197 3199 tr,
3198 3200 self.linkrev(censorrev),
3199 3201 p1,
3200 3202 p2,
3201 3203 censornode,
3202 3204 REVIDX_ISCENSORED,
3203 3205 )
3204 3206
3205 3207 if newrl.deltaparent(rev) != nullrev:
3206 3208 raise error.Abort(
3207 3209 _(
3208 3210 b'censored revision stored as delta; '
3209 3211 b'cannot censor'
3210 3212 ),
3211 3213 hint=_(
3212 3214 b'censoring of revlogs is not '
3213 3215 b'fully implemented; please report '
3214 3216 b'this bug'
3215 3217 ),
3216 3218 )
3217 3219 continue
3218 3220
3219 3221 if self.iscensored(rev):
3220 3222 if self.deltaparent(rev) != nullrev:
3221 3223 raise error.Abort(
3222 3224 _(
3223 3225 b'cannot censor due to censored '
3224 3226 b'revision having delta stored'
3225 3227 )
3226 3228 )
3227 3229 rawtext = self._chunk(rev)
3228 3230 else:
3229 3231 rawtext = self.rawdata(rev)
3230 3232
3231 3233 newrl.addrawrevision(
3232 3234 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3233 3235 )
3234 3236
3235 3237 tr.addbackup(self._indexfile, location=b'store')
3236 3238 if not self._inline:
3237 3239 tr.addbackup(self._datafile, location=b'store')
3238 3240
3239 3241 self.opener.rename(newrl._indexfile, self._indexfile)
3240 3242 if not self._inline:
3241 3243 self.opener.rename(newrl._datafile, self._datafile)
3242 3244
3243 3245 self.clearcaches()
3244 3246 self._loadindex()
3245 3247
3246 3248 def verifyintegrity(self, state):
3247 3249 """Verifies the integrity of the revlog.
3248 3250
3249 3251 Yields ``revlogproblem`` instances describing problems that are
3250 3252 found.
3251 3253 """
3252 3254 dd, di = self.checksize()
3253 3255 if dd:
3254 3256 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3255 3257 if di:
3256 3258 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3257 3259
3258 3260 version = self._format_version
3259 3261
3260 3262 # The verifier tells us what version revlog we should be.
3261 3263 if version != state[b'expectedversion']:
3262 3264 yield revlogproblem(
3263 3265 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3264 3266 % (self.display_id, version, state[b'expectedversion'])
3265 3267 )
3266 3268
3267 3269 state[b'skipread'] = set()
3268 3270 state[b'safe_renamed'] = set()
3269 3271
3270 3272 for rev in self:
3271 3273 node = self.node(rev)
3272 3274
3273 3275 # Verify contents. 4 cases to care about:
3274 3276 #
3275 3277 # common: the most common case
3276 3278 # rename: with a rename
3277 3279 # meta: file content starts with b'\1\n', the metadata
3278 3280 # header defined in filelog.py, but without a rename
3279 3281 # ext: content stored externally
3280 3282 #
3281 3283 # More formally, their differences are shown below:
3282 3284 #
3283 3285 # | common | rename | meta | ext
3284 3286 # -------------------------------------------------------
3285 3287 # flags() | 0 | 0 | 0 | not 0
3286 3288 # renamed() | False | True | False | ?
3287 3289 # rawtext[0:2]=='\1\n'| False | True | True | ?
3288 3290 #
3289 3291 # "rawtext" means the raw text stored in revlog data, which
3290 3292 # could be retrieved by "rawdata(rev)". "text"
3291 3293 # mentioned below is "revision(rev)".
3292 3294 #
3293 3295 # There are 3 different lengths stored physically:
3294 3296 # 1. L1: rawsize, stored in revlog index
3295 3297 # 2. L2: len(rawtext), stored in revlog data
3296 3298 # 3. L3: len(text), stored in revlog data if flags==0, or
3297 3299 # possibly somewhere else if flags!=0
3298 3300 #
3299 3301 # L1 should be equal to L2. L3 could be different from them.
3300 3302 # "text" may or may not affect commit hash depending on flag
3301 3303 # processors (see flagutil.addflagprocessor).
3302 3304 #
3303 3305 # | common | rename | meta | ext
3304 3306 # -------------------------------------------------
3305 3307 # rawsize() | L1 | L1 | L1 | L1
3306 3308 # size() | L1 | L2-LM | L1(*) | L1 (?)
3307 3309 # len(rawtext) | L2 | L2 | L2 | L2
3308 3310 # len(text) | L2 | L2 | L2 | L3
3309 3311 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3310 3312 #
3311 3313 # LM: length of metadata, depending on rawtext
3312 3314 # (*): not ideal, see comment in filelog.size
3313 3315 # (?): could be "- len(meta)" if the resolved content has
3314 3316 # rename metadata
3315 3317 #
3316 3318 # Checks needed to be done:
3317 3319 # 1. length check: L1 == L2, in all cases.
3318 3320 # 2. hash check: depending on flag processor, we may need to
3319 3321 # use either "text" (external), or "rawtext" (in revlog).
3320 3322
3321 3323 try:
3322 3324 skipflags = state.get(b'skipflags', 0)
3323 3325 if skipflags:
3324 3326 skipflags &= self.flags(rev)
3325 3327
3326 3328 _verify_revision(self, skipflags, state, node)
3327 3329
3328 3330 l1 = self.rawsize(rev)
3329 3331 l2 = len(self.rawdata(node))
3330 3332
3331 3333 if l1 != l2:
3332 3334 yield revlogproblem(
3333 3335 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3334 3336 node=node,
3335 3337 )
3336 3338
3337 3339 except error.CensoredNodeError:
3338 3340 if state[b'erroroncensored']:
3339 3341 yield revlogproblem(
3340 3342 error=_(b'censored file data'), node=node
3341 3343 )
3342 3344 state[b'skipread'].add(node)
3343 3345 except Exception as e:
3344 3346 yield revlogproblem(
3345 3347 error=_(b'unpacking %s: %s')
3346 3348 % (short(node), stringutil.forcebytestr(e)),
3347 3349 node=node,
3348 3350 )
3349 3351 state[b'skipread'].add(node)
3350 3352
3351 3353 def storageinfo(
3352 3354 self,
3353 3355 exclusivefiles=False,
3354 3356 sharedfiles=False,
3355 3357 revisionscount=False,
3356 3358 trackedsize=False,
3357 3359 storedsize=False,
3358 3360 ):
3359 3361 d = {}
3360 3362
3361 3363 if exclusivefiles:
3362 3364 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3363 3365 if not self._inline:
3364 3366 d[b'exclusivefiles'].append((self.opener, self._datafile))
3365 3367
3366 3368 if sharedfiles:
3367 3369 d[b'sharedfiles'] = []
3368 3370
3369 3371 if revisionscount:
3370 3372 d[b'revisionscount'] = len(self)
3371 3373
3372 3374 if trackedsize:
3373 3375 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3374 3376
3375 3377 if storedsize:
3376 3378 d[b'storedsize'] = sum(
3377 3379 self.opener.stat(path).st_size for path in self.files()
3378 3380 )
3379 3381
3380 3382 return d
3381 3383
3382 3384 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3383 3385 if not self.hassidedata:
3384 3386 return
3385 3387 # revlog formats with sidedata support does not support inline
3386 3388 assert not self._inline
3387 3389 if not helpers[1] and not helpers[2]:
3388 3390 # Nothing to generate or remove
3389 3391 return
3390 3392
3391 3393 new_entries = []
3392 3394 # append the new sidedata
3393 3395 with self._writing(transaction):
3394 3396 ifh, dfh = self._writinghandles
3395 3397 if self._docket is not None:
3396 3398 dfh.seek(self._docket.data_end, os.SEEK_SET)
3397 3399 else:
3398 3400 dfh.seek(0, os.SEEK_END)
3399 3401
3400 3402 current_offset = dfh.tell()
3401 3403 for rev in range(startrev, endrev + 1):
3402 3404 entry = self.index[rev]
3403 3405 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3404 3406 store=self,
3405 3407 sidedata_helpers=helpers,
3406 3408 sidedata={},
3407 3409 rev=rev,
3408 3410 )
3409 3411
3410 3412 serialized_sidedata = sidedatautil.serialize_sidedata(
3411 3413 new_sidedata
3412 3414 )
3413 3415
3414 3416 sidedata_compression_mode = COMP_MODE_INLINE
3415 3417 if serialized_sidedata and self.hassidedata:
3416 3418 sidedata_compression_mode = COMP_MODE_PLAIN
3417 3419 h, comp_sidedata = self.compress(serialized_sidedata)
3418 3420 if (
3419 3421 h != b'u'
3420 3422 and comp_sidedata[0] != b'\0'
3421 3423 and len(comp_sidedata) < len(serialized_sidedata)
3422 3424 ):
3423 3425 assert not h
3424 3426 if (
3425 3427 comp_sidedata[0]
3426 3428 == self._docket.default_compression_header
3427 3429 ):
3428 3430 sidedata_compression_mode = COMP_MODE_DEFAULT
3429 3431 serialized_sidedata = comp_sidedata
3430 3432 else:
3431 3433 sidedata_compression_mode = COMP_MODE_INLINE
3432 3434 serialized_sidedata = comp_sidedata
3433 3435 if entry[8] != 0 or entry[9] != 0:
3434 3436 # rewriting entries that already have sidedata is not
3435 3437 # supported yet, because it introduces garbage data in the
3436 3438 # revlog.
3437 3439 msg = b"rewriting existing sidedata is not supported yet"
3438 3440 raise error.Abort(msg)
3439 3441
3440 3442 # Apply (potential) flags to add and to remove after running
3441 3443 # the sidedata helpers
3442 3444 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3443 3445 entry_update = (
3444 3446 current_offset,
3445 3447 len(serialized_sidedata),
3446 3448 new_offset_flags,
3447 3449 sidedata_compression_mode,
3448 3450 )
3449 3451
3450 3452 # the sidedata computation might have move the file cursors around
3451 3453 dfh.seek(current_offset, os.SEEK_SET)
3452 3454 dfh.write(serialized_sidedata)
3453 3455 new_entries.append(entry_update)
3454 3456 current_offset += len(serialized_sidedata)
3455 3457 if self._docket is not None:
3456 3458 self._docket.data_end = dfh.tell()
3457 3459
3458 3460 # rewrite the new index entries
3459 3461 ifh.seek(startrev * self.index.entry_size)
3460 3462 for i, e in enumerate(new_entries):
3461 3463 rev = startrev + i
3462 3464 self.index.replace_sidedata_info(rev, *e)
3463 3465 packed = self.index.entry_binary(rev)
3464 3466 if rev == 0 and self._docket is None:
3465 3467 header = self._format_flags | self._format_version
3466 3468 header = self.index.pack_header(header)
3467 3469 packed = header + packed
3468 3470 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now