##// END OF EJS Templates
revlog: avoid raising no-arg RevlogError for internal flow control...
Martin von Zweigbergk -
r48074:93a0abe0 default
parent child Browse files
Show More
@@ -1,3461 +1,3466 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 FEATURES_BY_VERSION,
43 43 FLAG_GENERALDELTA,
44 44 FLAG_INLINE_DATA,
45 45 INDEX_HEADER,
46 46 KIND_CHANGELOG,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 SUPPORTED_FLAGS,
56 56 )
57 57 from .revlogutils.flagutil import (
58 58 REVIDX_DEFAULT_FLAGS,
59 59 REVIDX_ELLIPSIS,
60 60 REVIDX_EXTSTORED,
61 61 REVIDX_FLAGS_ORDER,
62 62 REVIDX_HASCOPIESINFO,
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 65 )
66 66 from .thirdparty import attr
67 67 from . import (
68 68 ancestor,
69 69 dagop,
70 70 error,
71 71 mdiff,
72 72 policy,
73 73 pycompat,
74 74 templatefilters,
75 75 util,
76 76 )
77 77 from .interfaces import (
78 78 repository,
79 79 util as interfaceutil,
80 80 )
81 81 from .revlogutils import (
82 82 deltas as deltautil,
83 83 docket as docketutil,
84 84 flagutil,
85 85 nodemap as nodemaputil,
86 86 revlogv0,
87 87 sidedata as sidedatautil,
88 88 )
89 89 from .utils import (
90 90 storageutil,
91 91 stringutil,
92 92 )
93 93
94 94 # blanked usage of all the name to prevent pyflakes constraints
95 95 # We need these name available in the module for extensions.
96 96
97 97 REVLOGV0
98 98 REVLOGV1
99 99 REVLOGV2
100 100 FLAG_INLINE_DATA
101 101 FLAG_GENERALDELTA
102 102 REVLOG_DEFAULT_FLAGS
103 103 REVLOG_DEFAULT_FORMAT
104 104 REVLOG_DEFAULT_VERSION
105 105 REVLOGV1_FLAGS
106 106 REVLOGV2_FLAGS
107 107 REVIDX_ISCENSORED
108 108 REVIDX_ELLIPSIS
109 109 REVIDX_HASCOPIESINFO
110 110 REVIDX_EXTSTORED
111 111 REVIDX_DEFAULT_FLAGS
112 112 REVIDX_FLAGS_ORDER
113 113 REVIDX_RAWTEXT_CHANGING_FLAGS
114 114
115 115 parsers = policy.importmod('parsers')
116 116 rustancestor = policy.importrust('ancestor')
117 117 rustdagop = policy.importrust('dagop')
118 118 rustrevlog = policy.importrust('revlog')
119 119
120 120 # Aliased for performance.
121 121 _zlibdecompress = zlib.decompress
122 122
123 123 # max size of revlog with inline data
124 124 _maxinline = 131072
125 125 _chunksize = 1048576
126 126
127 127 # Flag processors for REVIDX_ELLIPSIS.
128 128 def ellipsisreadprocessor(rl, text):
129 129 return text, False
130 130
131 131
132 132 def ellipsiswriteprocessor(rl, text):
133 133 return text, False
134 134
135 135
136 136 def ellipsisrawprocessor(rl, text):
137 137 return False
138 138
139 139
140 140 ellipsisprocessor = (
141 141 ellipsisreadprocessor,
142 142 ellipsiswriteprocessor,
143 143 ellipsisrawprocessor,
144 144 )
145 145
146 146
147 147 def offset_type(offset, type):
148 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 149 raise ValueError(b'unknown revlog index flags')
150 150 return int(int(offset) << 16 | type)
151 151
152 152
153 153 def _verify_revision(rl, skipflags, state, node):
154 154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 155 point for extensions to influence the operation."""
156 156 if skipflags:
157 157 state[b'skipread'].add(node)
158 158 else:
159 159 # Side-effect: read content and verify hash.
160 160 rl.revision(node)
161 161
162 162
163 163 # True if a fast implementation for persistent-nodemap is available
164 164 #
165 165 # We also consider we have a "fast" implementation in "pure" python because
166 166 # people using pure don't really have performance consideration (and a
167 167 # wheelbarrow of other slowness source)
168 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 169 parsers, 'BaseIndexObject'
170 170 )
171 171
172 172
173 173 @attr.s(slots=True, frozen=True)
174 174 class _revisioninfo(object):
175 175 """Information about a revision that allows building its fulltext
176 176 node: expected hash of the revision
177 177 p1, p2: parent revs of the revision
178 178 btext: built text cache consisting of a one-element list
179 179 cachedelta: (baserev, uncompressed_delta) or None
180 180 flags: flags associated to the revision storage
181 181
182 182 One of btext[0] or cachedelta must be set.
183 183 """
184 184
185 185 node = attr.ib()
186 186 p1 = attr.ib()
187 187 p2 = attr.ib()
188 188 btext = attr.ib()
189 189 textlen = attr.ib()
190 190 cachedelta = attr.ib()
191 191 flags = attr.ib()
192 192
193 193
194 194 @interfaceutil.implementer(repository.irevisiondelta)
195 195 @attr.s(slots=True)
196 196 class revlogrevisiondelta(object):
197 197 node = attr.ib()
198 198 p1node = attr.ib()
199 199 p2node = attr.ib()
200 200 basenode = attr.ib()
201 201 flags = attr.ib()
202 202 baserevisionsize = attr.ib()
203 203 revision = attr.ib()
204 204 delta = attr.ib()
205 205 sidedata = attr.ib()
206 206 protocol_flags = attr.ib()
207 207 linknode = attr.ib(default=None)
208 208
209 209
210 210 @interfaceutil.implementer(repository.iverifyproblem)
211 211 @attr.s(frozen=True)
212 212 class revlogproblem(object):
213 213 warning = attr.ib(default=None)
214 214 error = attr.ib(default=None)
215 215 node = attr.ib(default=None)
216 216
217 217
218 218 def parse_index_v1(data, inline):
219 219 # call the C implementation to parse the index data
220 220 index, cache = parsers.parse_index2(data, inline)
221 221 return index, cache
222 222
223 223
224 224 def parse_index_v2(data, inline):
225 225 # call the C implementation to parse the index data
226 226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 227 return index, cache
228 228
229 229
230 230 def parse_index_cl_v2(data, inline):
231 231 # call the C implementation to parse the index data
232 232 assert not inline
233 233 from .pure.parsers import parse_index_cl_v2
234 234
235 235 index, cache = parse_index_cl_v2(data)
236 236 return index, cache
237 237
238 238
239 239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240 240
241 241 def parse_index_v1_nodemap(data, inline):
242 242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 243 return index, cache
244 244
245 245
246 246 else:
247 247 parse_index_v1_nodemap = None
248 248
249 249
250 250 def parse_index_v1_mixed(data, inline):
251 251 index, cache = parse_index_v1(data, inline)
252 252 return rustrevlog.MixedIndex(index), cache
253 253
254 254
255 255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 256 # signed integer)
257 257 _maxentrysize = 0x7FFFFFFF
258 258
259 259
260 260 class revlog(object):
261 261 """
262 262 the underlying revision storage object
263 263
264 264 A revlog consists of two parts, an index and the revision data.
265 265
266 266 The index is a file with a fixed record size containing
267 267 information on each revision, including its nodeid (hash), the
268 268 nodeids of its parents, the position and offset of its data within
269 269 the data file, and the revision it's based on. Finally, each entry
270 270 contains a linkrev entry that can serve as a pointer to external
271 271 data.
272 272
273 273 The revision data itself is a linear collection of data chunks.
274 274 Each chunk represents a revision and is usually represented as a
275 275 delta against the previous chunk. To bound lookup time, runs of
276 276 deltas are limited to about 2 times the length of the original
277 277 version data. This makes retrieval of a version proportional to
278 278 its size, or O(1) relative to the number of revisions.
279 279
280 280 Both pieces of the revlog are written to in an append-only
281 281 fashion, which means we never need to rewrite a file to insert or
282 282 remove data, and can use some simple techniques to avoid the need
283 283 for locking while reading.
284 284
285 285 If checkambig, indexfile is opened with checkambig=True at
286 286 writing, to avoid file stat ambiguity.
287 287
288 288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 289 index will be mmapped rather than read if it is larger than the
290 290 configured threshold.
291 291
292 292 If censorable is True, the revlog can have censored revisions.
293 293
294 294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 295 compression for the data content.
296 296
297 297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 298 file handle, a filename, and an expected position. It should check whether
299 299 the current position in the file handle is valid, and log/warn/fail (by
300 300 raising).
301 301
302 302
303 303 Internal details
304 304 ----------------
305 305
306 306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 307 objects that contains the same "items" whatever the revlog version.
308 308 Different versions will have different ways of storing these items (sometimes
309 309 not having them at all), but the tuple will always be the same. New fields
310 310 are usually added at the end to avoid breaking existing code that relies
311 311 on the existing order. The field are defined as follows:
312 312
313 313 [0] offset:
314 314 The byte index of the start of revision data chunk.
315 315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 316 retrieve it.
317 317
318 318 flags:
319 319 A flag field that carries special information or changes the behavior
320 320 of the revision. (see `REVIDX_*` constants for details)
321 321 The flag field only occupies the first 16 bits of this field,
322 322 use "flags = field & 0xFFFF" to retrieve the value.
323 323
324 324 [1] compressed length:
325 325 The size, in bytes, of the chunk on disk
326 326
327 327 [2] uncompressed length:
328 328 The size, in bytes, of the full revision once reconstructed.
329 329
330 330 [3] base rev:
331 331 Either the base of the revision delta chain (without general
332 332 delta), or the base of the delta (stored in the data chunk)
333 333 with general delta.
334 334
335 335 [4] link rev:
336 336 Changelog revision number of the changeset introducing this
337 337 revision.
338 338
339 339 [5] parent 1 rev:
340 340 Revision number of the first parent
341 341
342 342 [6] parent 2 rev:
343 343 Revision number of the second parent
344 344
345 345 [7] node id:
346 346 The node id of the current revision
347 347
348 348 [8] sidedata offset:
349 349 The byte index of the start of the revision's side-data chunk.
350 350
351 351 [9] sidedata chunk length:
352 352 The size, in bytes, of the revision's side-data chunk.
353 353
354 354 [10] data compression mode:
355 355 two bits that detail the way the data chunk is compressed on disk.
356 356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 357 1 this will always be COMP_MODE_INLINE.
358 358
359 359 [11] side-data compression mode:
360 360 two bits that detail the way the sidedata chunk is compressed on disk.
361 361 (see "COMP_MODE_*" constants for details)
362 362 """
363 363
364 364 _flagserrorclass = error.RevlogError
365 365
366 366 def __init__(
367 367 self,
368 368 opener,
369 369 target,
370 370 radix,
371 371 postfix=None, # only exist for `tmpcensored` now
372 372 checkambig=False,
373 373 mmaplargeindex=False,
374 374 censorable=False,
375 375 upperboundcomp=None,
376 376 persistentnodemap=False,
377 377 concurrencychecker=None,
378 378 trypending=False,
379 379 ):
380 380 """
381 381 create a revlog object
382 382
383 383 opener is a function that abstracts the file opening operation
384 384 and can be used to implement COW semantics or the like.
385 385
386 386 `target`: a (KIND, ID) tuple that identify the content stored in
387 387 this revlog. It help the rest of the code to understand what the revlog
388 388 is about without having to resort to heuristic and index filename
389 389 analysis. Note: that this must be reliably be set by normal code, but
390 390 that test, debug, or performance measurement code might not set this to
391 391 accurate value.
392 392 """
393 393 self.upperboundcomp = upperboundcomp
394 394
395 395 self.radix = radix
396 396
397 397 self._docket_file = None
398 398 self._indexfile = None
399 399 self._datafile = None
400 400 self._nodemap_file = None
401 401 self.postfix = postfix
402 402 self._trypending = trypending
403 403 self.opener = opener
404 404 if persistentnodemap:
405 405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406 406
407 407 assert target[0] in ALL_KINDS
408 408 assert len(target) == 2
409 409 self.target = target
410 410 # When True, indexfile is opened with checkambig=True at writing, to
411 411 # avoid file stat ambiguity.
412 412 self._checkambig = checkambig
413 413 self._mmaplargeindex = mmaplargeindex
414 414 self._censorable = censorable
415 415 # 3-tuple of (node, rev, text) for a raw revision.
416 416 self._revisioncache = None
417 417 # Maps rev to chain base rev.
418 418 self._chainbasecache = util.lrucachedict(100)
419 419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 420 self._chunkcache = (0, b'')
421 421 # How much data to read and cache into the raw revlog data cache.
422 422 self._chunkcachesize = 65536
423 423 self._maxchainlen = None
424 424 self._deltabothparents = True
425 425 self.index = None
426 426 self._docket = None
427 427 self._nodemap_docket = None
428 428 # Mapping of partial identifiers to full nodes.
429 429 self._pcache = {}
430 430 # Mapping of revision integer to full node.
431 431 self._compengine = b'zlib'
432 432 self._compengineopts = {}
433 433 self._maxdeltachainspan = -1
434 434 self._withsparseread = False
435 435 self._sparserevlog = False
436 436 self.hassidedata = False
437 437 self._srdensitythreshold = 0.50
438 438 self._srmingapsize = 262144
439 439
440 440 # Make copy of flag processors so each revlog instance can support
441 441 # custom flags.
442 442 self._flagprocessors = dict(flagutil.flagprocessors)
443 443
444 444 # 2-tuple of file handles being used for active writing.
445 445 self._writinghandles = None
446 446 # prevent nesting of addgroup
447 447 self._adding_group = None
448 448
449 449 self._loadindex()
450 450
451 451 self._concurrencychecker = concurrencychecker
452 452
453 453 def _init_opts(self):
454 454 """process options (from above/config) to setup associated default revlog mode
455 455
456 456 These values might be affected when actually reading on disk information.
457 457
458 458 The relevant values are returned for use in _loadindex().
459 459
460 460 * newversionflags:
461 461 version header to use if we need to create a new revlog
462 462
463 463 * mmapindexthreshold:
464 464 minimal index size for start to use mmap
465 465
466 466 * force_nodemap:
467 467 force the usage of a "development" version of the nodemap code
468 468 """
469 469 mmapindexthreshold = None
470 470 opts = self.opener.options
471 471
472 472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 473 new_header = CHANGELOGV2
474 474 elif b'revlogv2' in opts:
475 475 new_header = REVLOGV2
476 476 elif b'revlogv1' in opts:
477 477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 478 if b'generaldelta' in opts:
479 479 new_header |= FLAG_GENERALDELTA
480 480 elif b'revlogv0' in self.opener.options:
481 481 new_header = REVLOGV0
482 482 else:
483 483 new_header = REVLOG_DEFAULT_VERSION
484 484
485 485 if b'chunkcachesize' in opts:
486 486 self._chunkcachesize = opts[b'chunkcachesize']
487 487 if b'maxchainlen' in opts:
488 488 self._maxchainlen = opts[b'maxchainlen']
489 489 if b'deltabothparents' in opts:
490 490 self._deltabothparents = opts[b'deltabothparents']
491 491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 492 self._lazydeltabase = False
493 493 if self._lazydelta:
494 494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 495 if b'compengine' in opts:
496 496 self._compengine = opts[b'compengine']
497 497 if b'zlib.level' in opts:
498 498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 499 if b'zstd.level' in opts:
500 500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 501 if b'maxdeltachainspan' in opts:
502 502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 507 # sparse-revlog forces sparse-read
508 508 self._withsparseread = self._sparserevlog or withsparseread
509 509 if b'sparse-read-density-threshold' in opts:
510 510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 511 if b'sparse-read-min-gap-size' in opts:
512 512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 513 if opts.get(b'enableellipsis'):
514 514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515 515
516 516 # revlog v0 doesn't have flag processors
517 517 for flag, processor in pycompat.iteritems(
518 518 opts.get(b'flagprocessors', {})
519 519 ):
520 520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521 521
522 522 if self._chunkcachesize <= 0:
523 523 raise error.RevlogError(
524 524 _(b'revlog chunk cache size %r is not greater than 0')
525 525 % self._chunkcachesize
526 526 )
527 527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 528 raise error.RevlogError(
529 529 _(b'revlog chunk cache size %r is not a power of 2')
530 530 % self._chunkcachesize
531 531 )
532 532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 533 return new_header, mmapindexthreshold, force_nodemap
534 534
535 535 def _get_data(self, filepath, mmap_threshold, size=None):
536 536 """return a file content with or without mmap
537 537
538 538 If the file is missing return the empty string"""
539 539 try:
540 540 with self.opener(filepath) as fp:
541 541 if mmap_threshold is not None:
542 542 file_size = self.opener.fstat(fp).st_size
543 543 if file_size >= mmap_threshold:
544 544 if size is not None:
545 545 # avoid potentiel mmap crash
546 546 size = min(file_size, size)
547 547 # TODO: should .close() to release resources without
548 548 # relying on Python GC
549 549 if size is None:
550 550 return util.buffer(util.mmapread(fp))
551 551 else:
552 552 return util.buffer(util.mmapread(fp, size))
553 553 if size is None:
554 554 return fp.read()
555 555 else:
556 556 return fp.read(size)
557 557 except IOError as inst:
558 558 if inst.errno != errno.ENOENT:
559 559 raise
560 560 return b''
561 561
562 562 def _loadindex(self):
563 563
564 564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565 565
566 566 if self.postfix is not None:
567 567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 569 entry_point = b'%s.i.a' % self.radix
570 570 else:
571 571 entry_point = b'%s.i' % self.radix
572 572
573 573 entry_data = b''
574 574 self._initempty = True
575 575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 576 if len(entry_data) > 0:
577 577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 578 self._initempty = False
579 579 else:
580 580 header = new_header
581 581
582 582 self._format_flags = header & ~0xFFFF
583 583 self._format_version = header & 0xFFFF
584 584
585 585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 586 if supported_flags is None:
587 587 msg = _(b'unknown version (%d) in revlog %s')
588 588 msg %= (self._format_version, self.display_id)
589 589 raise error.RevlogError(msg)
590 590 elif self._format_flags & ~supported_flags:
591 591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 display_flag = self._format_flags >> 16
593 593 msg %= (display_flag, self._format_version, self.display_id)
594 594 raise error.RevlogError(msg)
595 595
596 596 features = FEATURES_BY_VERSION[self._format_version]
597 597 self._inline = features[b'inline'](self._format_flags)
598 598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 599 self.hassidedata = features[b'sidedata']
600 600
601 601 if not features[b'docket']:
602 602 self._indexfile = entry_point
603 603 index_data = entry_data
604 604 else:
605 605 self._docket_file = entry_point
606 606 if self._initempty:
607 607 self._docket = docketutil.default_docket(self, header)
608 608 else:
609 609 self._docket = docketutil.parse_docket(
610 610 self, entry_data, use_pending=self._trypending
611 611 )
612 612 self._indexfile = self._docket.index_filepath()
613 613 index_data = b''
614 614 index_size = self._docket.index_end
615 615 if index_size > 0:
616 616 index_data = self._get_data(
617 617 self._indexfile, mmapindexthreshold, size=index_size
618 618 )
619 619 if len(index_data) < index_size:
620 620 msg = _(b'too few index data for %s: got %d, expected %d')
621 621 msg %= (self.display_id, len(index_data), index_size)
622 622 raise error.RevlogError(msg)
623 623
624 624 self._inline = False
625 625 # generaldelta implied by version 2 revlogs.
626 626 self._generaldelta = True
627 627 # the logic for persistent nodemap will be dealt with within the
628 628 # main docket, so disable it for now.
629 629 self._nodemap_file = None
630 630
631 631 if self.postfix is None:
632 632 self._datafile = b'%s.d' % self.radix
633 633 else:
634 634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635 635
636 636 self.nodeconstants = sha1nodeconstants
637 637 self.nullid = self.nodeconstants.nullid
638 638
639 639 # sparse-revlog can't be on without general-delta (issue6056)
640 640 if not self._generaldelta:
641 641 self._sparserevlog = False
642 642
643 643 self._storedeltachains = True
644 644
645 645 devel_nodemap = (
646 646 self._nodemap_file
647 647 and force_nodemap
648 648 and parse_index_v1_nodemap is not None
649 649 )
650 650
651 651 use_rust_index = False
652 652 if rustrevlog is not None:
653 653 if self._nodemap_file is not None:
654 654 use_rust_index = True
655 655 else:
656 656 use_rust_index = self.opener.options.get(b'rust.index')
657 657
658 658 self._parse_index = parse_index_v1
659 659 if self._format_version == REVLOGV0:
660 660 self._parse_index = revlogv0.parse_index_v0
661 661 elif self._format_version == REVLOGV2:
662 662 self._parse_index = parse_index_v2
663 663 elif self._format_version == CHANGELOGV2:
664 664 self._parse_index = parse_index_cl_v2
665 665 elif devel_nodemap:
666 666 self._parse_index = parse_index_v1_nodemap
667 667 elif use_rust_index:
668 668 self._parse_index = parse_index_v1_mixed
669 669 try:
670 670 d = self._parse_index(index_data, self._inline)
671 671 index, _chunkcache = d
672 672 use_nodemap = (
673 673 not self._inline
674 674 and self._nodemap_file is not None
675 675 and util.safehasattr(index, 'update_nodemap_data')
676 676 )
677 677 if use_nodemap:
678 678 nodemap_data = nodemaputil.persisted_data(self)
679 679 if nodemap_data is not None:
680 680 docket = nodemap_data[0]
681 681 if (
682 682 len(d[0]) > docket.tip_rev
683 683 and d[0][docket.tip_rev][7] == docket.tip_node
684 684 ):
685 685 # no changelog tampering
686 686 self._nodemap_docket = docket
687 687 index.update_nodemap_data(*nodemap_data)
688 688 except (ValueError, IndexError):
689 689 raise error.RevlogError(
690 690 _(b"index %s is corrupted") % self.display_id
691 691 )
692 692 self.index, self._chunkcache = d
693 693 if not self._chunkcache:
694 694 self._chunkclear()
695 695 # revnum -> (chain-length, sum-delta-length)
696 696 self._chaininfocache = util.lrucachedict(500)
697 697 # revlog header -> revlog compressor
698 698 self._decompressors = {}
699 699
700 700 @util.propertycache
701 701 def revlog_kind(self):
702 702 return self.target[0]
703 703
704 704 @util.propertycache
705 705 def display_id(self):
706 706 """The public facing "ID" of the revlog that we use in message"""
707 707 # Maybe we should build a user facing representation of
708 708 # revlog.target instead of using `self.radix`
709 709 return self.radix
710 710
711 711 def _get_decompressor(self, t):
712 712 try:
713 713 compressor = self._decompressors[t]
714 714 except KeyError:
715 715 try:
716 716 engine = util.compengines.forrevlogheader(t)
717 717 compressor = engine.revlogcompressor(self._compengineopts)
718 718 self._decompressors[t] = compressor
719 719 except KeyError:
720 720 raise error.RevlogError(
721 721 _(b'unknown compression type %s') % binascii.hexlify(t)
722 722 )
723 723 return compressor
724 724
725 725 @util.propertycache
726 726 def _compressor(self):
727 727 engine = util.compengines[self._compengine]
728 728 return engine.revlogcompressor(self._compengineopts)
729 729
730 730 @util.propertycache
731 731 def _decompressor(self):
732 732 """the default decompressor"""
733 733 if self._docket is None:
734 734 return None
735 735 t = self._docket.default_compression_header
736 736 c = self._get_decompressor(t)
737 737 return c.decompress
738 738
739 739 def _indexfp(self):
740 740 """file object for the revlog's index file"""
741 741 return self.opener(self._indexfile, mode=b"r")
742 742
743 743 def __index_write_fp(self):
744 744 # You should not use this directly and use `_writing` instead
745 745 try:
746 746 f = self.opener(
747 747 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 748 )
749 749 if self._docket is None:
750 750 f.seek(0, os.SEEK_END)
751 751 else:
752 752 f.seek(self._docket.index_end, os.SEEK_SET)
753 753 return f
754 754 except IOError as inst:
755 755 if inst.errno != errno.ENOENT:
756 756 raise
757 757 return self.opener(
758 758 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 759 )
760 760
761 761 def __index_new_fp(self):
762 762 # You should not use this unless you are upgrading from inline revlog
763 763 return self.opener(
764 764 self._indexfile,
765 765 mode=b"w",
766 766 checkambig=self._checkambig,
767 767 atomictemp=True,
768 768 )
769 769
770 770 def _datafp(self, mode=b'r'):
771 771 """file object for the revlog's data file"""
772 772 return self.opener(self._datafile, mode=mode)
773 773
774 774 @contextlib.contextmanager
775 775 def _datareadfp(self, existingfp=None):
776 776 """file object suitable to read data"""
777 777 # Use explicit file handle, if given.
778 778 if existingfp is not None:
779 779 yield existingfp
780 780
781 781 # Use a file handle being actively used for writes, if available.
782 782 # There is some danger to doing this because reads will seek the
783 783 # file. However, _writeentry() performs a SEEK_END before all writes,
784 784 # so we should be safe.
785 785 elif self._writinghandles:
786 786 if self._inline:
787 787 yield self._writinghandles[0]
788 788 else:
789 789 yield self._writinghandles[1]
790 790
791 791 # Otherwise open a new file handle.
792 792 else:
793 793 if self._inline:
794 794 func = self._indexfp
795 795 else:
796 796 func = self._datafp
797 797 with func() as fp:
798 798 yield fp
799 799
800 800 def tiprev(self):
801 801 return len(self.index) - 1
802 802
803 803 def tip(self):
804 804 return self.node(self.tiprev())
805 805
806 806 def __contains__(self, rev):
807 807 return 0 <= rev < len(self)
808 808
809 809 def __len__(self):
810 810 return len(self.index)
811 811
812 812 def __iter__(self):
813 813 return iter(pycompat.xrange(len(self)))
814 814
815 815 def revs(self, start=0, stop=None):
816 816 """iterate over all rev in this revlog (from start to stop)"""
817 817 return storageutil.iterrevs(len(self), start=start, stop=stop)
818 818
819 819 @property
820 820 def nodemap(self):
821 821 msg = (
822 822 b"revlog.nodemap is deprecated, "
823 823 b"use revlog.index.[has_node|rev|get_rev]"
824 824 )
825 825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 826 return self.index.nodemap
827 827
828 828 @property
829 829 def _nodecache(self):
830 830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 832 return self.index.nodemap
833 833
834 834 def hasnode(self, node):
835 835 try:
836 836 self.rev(node)
837 837 return True
838 838 except KeyError:
839 839 return False
840 840
841 841 def candelta(self, baserev, rev):
842 842 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 843 # Disable delta if either rev requires a content-changing flag
844 844 # processor (ex. LFS). This is because such flag processor can alter
845 845 # the rawtext content that the delta will be based on, and two clients
846 846 # could have a same revlog node with different flags (i.e. different
847 847 # rawtext contents) and the delta could be incompatible.
848 848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 850 ):
851 851 return False
852 852 return True
853 853
854 854 def update_caches(self, transaction):
855 855 if self._nodemap_file is not None:
856 856 if transaction is None:
857 857 nodemaputil.update_persistent_nodemap(self)
858 858 else:
859 859 nodemaputil.setup_persistent_nodemap(transaction, self)
860 860
861 861 def clearcaches(self):
862 862 self._revisioncache = None
863 863 self._chainbasecache.clear()
864 864 self._chunkcache = (0, b'')
865 865 self._pcache = {}
866 866 self._nodemap_docket = None
867 867 self.index.clearcaches()
868 868 # The python code is the one responsible for validating the docket, we
869 869 # end up having to refresh it here.
870 870 use_nodemap = (
871 871 not self._inline
872 872 and self._nodemap_file is not None
873 873 and util.safehasattr(self.index, 'update_nodemap_data')
874 874 )
875 875 if use_nodemap:
876 876 nodemap_data = nodemaputil.persisted_data(self)
877 877 if nodemap_data is not None:
878 878 self._nodemap_docket = nodemap_data[0]
879 879 self.index.update_nodemap_data(*nodemap_data)
880 880
881 881 def rev(self, node):
882 882 try:
883 883 return self.index.rev(node)
884 884 except TypeError:
885 885 raise
886 886 except error.RevlogError:
887 887 # parsers.c radix tree lookup failed
888 888 if (
889 889 node == self.nodeconstants.wdirid
890 890 or node in self.nodeconstants.wdirfilenodeids
891 891 ):
892 892 raise error.WdirUnsupported
893 893 raise error.LookupError(node, self.display_id, _(b'no node'))
894 894
895 895 # Accessors for index entries.
896 896
897 897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 898 # are flags.
899 899 def start(self, rev):
900 900 return int(self.index[rev][0] >> 16)
901 901
902 902 def flags(self, rev):
903 903 return self.index[rev][0] & 0xFFFF
904 904
905 905 def length(self, rev):
906 906 return self.index[rev][1]
907 907
908 908 def sidedata_length(self, rev):
909 909 if not self.hassidedata:
910 910 return 0
911 911 return self.index[rev][9]
912 912
913 913 def rawsize(self, rev):
914 914 """return the length of the uncompressed text for a given revision"""
915 915 l = self.index[rev][2]
916 916 if l >= 0:
917 917 return l
918 918
919 919 t = self.rawdata(rev)
920 920 return len(t)
921 921
922 922 def size(self, rev):
923 923 """length of non-raw text (processed by a "read" flag processor)"""
924 924 # fast path: if no "read" flag processor could change the content,
925 925 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 926 flags = self.flags(rev)
927 927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 928 return self.rawsize(rev)
929 929
930 930 return len(self.revision(rev, raw=False))
931 931
932 932 def chainbase(self, rev):
933 933 base = self._chainbasecache.get(rev)
934 934 if base is not None:
935 935 return base
936 936
937 937 index = self.index
938 938 iterrev = rev
939 939 base = index[iterrev][3]
940 940 while base != iterrev:
941 941 iterrev = base
942 942 base = index[iterrev][3]
943 943
944 944 self._chainbasecache[rev] = base
945 945 return base
946 946
947 947 def linkrev(self, rev):
948 948 return self.index[rev][4]
949 949
950 950 def parentrevs(self, rev):
951 951 try:
952 952 entry = self.index[rev]
953 953 except IndexError:
954 954 if rev == wdirrev:
955 955 raise error.WdirUnsupported
956 956 raise
957 957 if entry[5] == nullrev:
958 958 return entry[6], entry[5]
959 959 else:
960 960 return entry[5], entry[6]
961 961
962 962 # fast parentrevs(rev) where rev isn't filtered
963 963 _uncheckedparentrevs = parentrevs
964 964
965 965 def node(self, rev):
966 966 try:
967 967 return self.index[rev][7]
968 968 except IndexError:
969 969 if rev == wdirrev:
970 970 raise error.WdirUnsupported
971 971 raise
972 972
973 973 # Derived from index values.
974 974
975 975 def end(self, rev):
976 976 return self.start(rev) + self.length(rev)
977 977
978 978 def parents(self, node):
979 979 i = self.index
980 980 d = i[self.rev(node)]
981 981 # inline node() to avoid function call overhead
982 982 if d[5] == self.nullid:
983 983 return i[d[6]][7], i[d[5]][7]
984 984 else:
985 985 return i[d[5]][7], i[d[6]][7]
986 986
987 987 def chainlen(self, rev):
988 988 return self._chaininfo(rev)[0]
989 989
990 990 def _chaininfo(self, rev):
991 991 chaininfocache = self._chaininfocache
992 992 if rev in chaininfocache:
993 993 return chaininfocache[rev]
994 994 index = self.index
995 995 generaldelta = self._generaldelta
996 996 iterrev = rev
997 997 e = index[iterrev]
998 998 clen = 0
999 999 compresseddeltalen = 0
1000 1000 while iterrev != e[3]:
1001 1001 clen += 1
1002 1002 compresseddeltalen += e[1]
1003 1003 if generaldelta:
1004 1004 iterrev = e[3]
1005 1005 else:
1006 1006 iterrev -= 1
1007 1007 if iterrev in chaininfocache:
1008 1008 t = chaininfocache[iterrev]
1009 1009 clen += t[0]
1010 1010 compresseddeltalen += t[1]
1011 1011 break
1012 1012 e = index[iterrev]
1013 1013 else:
1014 1014 # Add text length of base since decompressing that also takes
1015 1015 # work. For cache hits the length is already included.
1016 1016 compresseddeltalen += e[1]
1017 1017 r = (clen, compresseddeltalen)
1018 1018 chaininfocache[rev] = r
1019 1019 return r
1020 1020
1021 1021 def _deltachain(self, rev, stoprev=None):
1022 1022 """Obtain the delta chain for a revision.
1023 1023
1024 1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 1025 stop at the base of the chain.
1026 1026
1027 1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 1028 revs in ascending order and ``stopped`` is a bool indicating whether
1029 1029 ``stoprev`` was hit.
1030 1030 """
1031 1031 # Try C implementation.
1032 1032 try:
1033 1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 1034 except AttributeError:
1035 1035 pass
1036 1036
1037 1037 chain = []
1038 1038
1039 1039 # Alias to prevent attribute lookup in tight loop.
1040 1040 index = self.index
1041 1041 generaldelta = self._generaldelta
1042 1042
1043 1043 iterrev = rev
1044 1044 e = index[iterrev]
1045 1045 while iterrev != e[3] and iterrev != stoprev:
1046 1046 chain.append(iterrev)
1047 1047 if generaldelta:
1048 1048 iterrev = e[3]
1049 1049 else:
1050 1050 iterrev -= 1
1051 1051 e = index[iterrev]
1052 1052
1053 1053 if iterrev == stoprev:
1054 1054 stopped = True
1055 1055 else:
1056 1056 chain.append(iterrev)
1057 1057 stopped = False
1058 1058
1059 1059 chain.reverse()
1060 1060 return chain, stopped
1061 1061
1062 1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 1063 """Generate the ancestors of 'revs' in reverse revision order.
1064 1064 Does not generate revs lower than stoprev.
1065 1065
1066 1066 See the documentation for ancestor.lazyancestors for more details."""
1067 1067
1068 1068 # first, make sure start revisions aren't filtered
1069 1069 revs = list(revs)
1070 1070 checkrev = self.node
1071 1071 for r in revs:
1072 1072 checkrev(r)
1073 1073 # and we're sure ancestors aren't filtered as well
1074 1074
1075 1075 if rustancestor is not None and self.index.rust_ext_compat:
1076 1076 lazyancestors = rustancestor.LazyAncestors
1077 1077 arg = self.index
1078 1078 else:
1079 1079 lazyancestors = ancestor.lazyancestors
1080 1080 arg = self._uncheckedparentrevs
1081 1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082 1082
1083 1083 def descendants(self, revs):
1084 1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085 1085
1086 1086 def findcommonmissing(self, common=None, heads=None):
1087 1087 """Return a tuple of the ancestors of common and the ancestors of heads
1088 1088 that are not ancestors of common. In revset terminology, we return the
1089 1089 tuple:
1090 1090
1091 1091 ::common, (::heads) - (::common)
1092 1092
1093 1093 The list is sorted by revision number, meaning it is
1094 1094 topologically sorted.
1095 1095
1096 1096 'heads' and 'common' are both lists of node IDs. If heads is
1097 1097 not supplied, uses all of the revlog's heads. If common is not
1098 1098 supplied, uses nullid."""
1099 1099 if common is None:
1100 1100 common = [self.nullid]
1101 1101 if heads is None:
1102 1102 heads = self.heads()
1103 1103
1104 1104 common = [self.rev(n) for n in common]
1105 1105 heads = [self.rev(n) for n in heads]
1106 1106
1107 1107 # we want the ancestors, but inclusive
1108 1108 class lazyset(object):
1109 1109 def __init__(self, lazyvalues):
1110 1110 self.addedvalues = set()
1111 1111 self.lazyvalues = lazyvalues
1112 1112
1113 1113 def __contains__(self, value):
1114 1114 return value in self.addedvalues or value in self.lazyvalues
1115 1115
1116 1116 def __iter__(self):
1117 1117 added = self.addedvalues
1118 1118 for r in added:
1119 1119 yield r
1120 1120 for r in self.lazyvalues:
1121 1121 if not r in added:
1122 1122 yield r
1123 1123
1124 1124 def add(self, value):
1125 1125 self.addedvalues.add(value)
1126 1126
1127 1127 def update(self, values):
1128 1128 self.addedvalues.update(values)
1129 1129
1130 1130 has = lazyset(self.ancestors(common))
1131 1131 has.add(nullrev)
1132 1132 has.update(common)
1133 1133
1134 1134 # take all ancestors from heads that aren't in has
1135 1135 missing = set()
1136 1136 visit = collections.deque(r for r in heads if r not in has)
1137 1137 while visit:
1138 1138 r = visit.popleft()
1139 1139 if r in missing:
1140 1140 continue
1141 1141 else:
1142 1142 missing.add(r)
1143 1143 for p in self.parentrevs(r):
1144 1144 if p not in has:
1145 1145 visit.append(p)
1146 1146 missing = list(missing)
1147 1147 missing.sort()
1148 1148 return has, [self.node(miss) for miss in missing]
1149 1149
1150 1150 def incrementalmissingrevs(self, common=None):
1151 1151 """Return an object that can be used to incrementally compute the
1152 1152 revision numbers of the ancestors of arbitrary sets that are not
1153 1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 1154 object.
1155 1155
1156 1156 'common' is a list of revision numbers. If common is not supplied, uses
1157 1157 nullrev.
1158 1158 """
1159 1159 if common is None:
1160 1160 common = [nullrev]
1161 1161
1162 1162 if rustancestor is not None and self.index.rust_ext_compat:
1163 1163 return rustancestor.MissingAncestors(self.index, common)
1164 1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165 1165
1166 1166 def findmissingrevs(self, common=None, heads=None):
1167 1167 """Return the revision numbers of the ancestors of heads that
1168 1168 are not ancestors of common.
1169 1169
1170 1170 More specifically, return a list of revision numbers corresponding to
1171 1171 nodes N such that every N satisfies the following constraints:
1172 1172
1173 1173 1. N is an ancestor of some node in 'heads'
1174 1174 2. N is not an ancestor of any node in 'common'
1175 1175
1176 1176 The list is sorted by revision number, meaning it is
1177 1177 topologically sorted.
1178 1178
1179 1179 'heads' and 'common' are both lists of revision numbers. If heads is
1180 1180 not supplied, uses all of the revlog's heads. If common is not
1181 1181 supplied, uses nullid."""
1182 1182 if common is None:
1183 1183 common = [nullrev]
1184 1184 if heads is None:
1185 1185 heads = self.headrevs()
1186 1186
1187 1187 inc = self.incrementalmissingrevs(common=common)
1188 1188 return inc.missingancestors(heads)
1189 1189
1190 1190 def findmissing(self, common=None, heads=None):
1191 1191 """Return the ancestors of heads that are not ancestors of common.
1192 1192
1193 1193 More specifically, return a list of nodes N such that every N
1194 1194 satisfies the following constraints:
1195 1195
1196 1196 1. N is an ancestor of some node in 'heads'
1197 1197 2. N is not an ancestor of any node in 'common'
1198 1198
1199 1199 The list is sorted by revision number, meaning it is
1200 1200 topologically sorted.
1201 1201
1202 1202 'heads' and 'common' are both lists of node IDs. If heads is
1203 1203 not supplied, uses all of the revlog's heads. If common is not
1204 1204 supplied, uses nullid."""
1205 1205 if common is None:
1206 1206 common = [self.nullid]
1207 1207 if heads is None:
1208 1208 heads = self.heads()
1209 1209
1210 1210 common = [self.rev(n) for n in common]
1211 1211 heads = [self.rev(n) for n in heads]
1212 1212
1213 1213 inc = self.incrementalmissingrevs(common=common)
1214 1214 return [self.node(r) for r in inc.missingancestors(heads)]
1215 1215
1216 1216 def nodesbetween(self, roots=None, heads=None):
1217 1217 """Return a topological path from 'roots' to 'heads'.
1218 1218
1219 1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 1220 topologically sorted list of all nodes N that satisfy both of
1221 1221 these constraints:
1222 1222
1223 1223 1. N is a descendant of some node in 'roots'
1224 1224 2. N is an ancestor of some node in 'heads'
1225 1225
1226 1226 Every node is considered to be both a descendant and an ancestor
1227 1227 of itself, so every reachable node in 'roots' and 'heads' will be
1228 1228 included in 'nodes'.
1229 1229
1230 1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233 1233
1234 1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 1235 unspecified, uses nullid as the only root. If 'heads' is
1236 1236 unspecified, uses list of all of the revlog's heads."""
1237 1237 nonodes = ([], [], [])
1238 1238 if roots is not None:
1239 1239 roots = list(roots)
1240 1240 if not roots:
1241 1241 return nonodes
1242 1242 lowestrev = min([self.rev(n) for n in roots])
1243 1243 else:
1244 1244 roots = [self.nullid] # Everybody's a descendant of nullid
1245 1245 lowestrev = nullrev
1246 1246 if (lowestrev == nullrev) and (heads is None):
1247 1247 # We want _all_ the nodes!
1248 1248 return (
1249 1249 [self.node(r) for r in self],
1250 1250 [self.nullid],
1251 1251 list(self.heads()),
1252 1252 )
1253 1253 if heads is None:
1254 1254 # All nodes are ancestors, so the latest ancestor is the last
1255 1255 # node.
1256 1256 highestrev = len(self) - 1
1257 1257 # Set ancestors to None to signal that every node is an ancestor.
1258 1258 ancestors = None
1259 1259 # Set heads to an empty dictionary for later discovery of heads
1260 1260 heads = {}
1261 1261 else:
1262 1262 heads = list(heads)
1263 1263 if not heads:
1264 1264 return nonodes
1265 1265 ancestors = set()
1266 1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 1267 # Also, later we will be using it to filter out the heads we can't
1268 1268 # find from roots.
1269 1269 heads = dict.fromkeys(heads, False)
1270 1270 # Start at the top and keep marking parents until we're done.
1271 1271 nodestotag = set(heads)
1272 1272 # Remember where the top was so we can use it as a limit later.
1273 1273 highestrev = max([self.rev(n) for n in nodestotag])
1274 1274 while nodestotag:
1275 1275 # grab a node to tag
1276 1276 n = nodestotag.pop()
1277 1277 # Never tag nullid
1278 1278 if n == self.nullid:
1279 1279 continue
1280 1280 # A node's revision number represents its place in a
1281 1281 # topologically sorted list of nodes.
1282 1282 r = self.rev(n)
1283 1283 if r >= lowestrev:
1284 1284 if n not in ancestors:
1285 1285 # If we are possibly a descendant of one of the roots
1286 1286 # and we haven't already been marked as an ancestor
1287 1287 ancestors.add(n) # Mark as ancestor
1288 1288 # Add non-nullid parents to list of nodes to tag.
1289 1289 nodestotag.update(
1290 1290 [p for p in self.parents(n) if p != self.nullid]
1291 1291 )
1292 1292 elif n in heads: # We've seen it before, is it a fake head?
1293 1293 # So it is, real heads should not be the ancestors of
1294 1294 # any other heads.
1295 1295 heads.pop(n)
1296 1296 if not ancestors:
1297 1297 return nonodes
1298 1298 # Now that we have our set of ancestors, we want to remove any
1299 1299 # roots that are not ancestors.
1300 1300
1301 1301 # If one of the roots was nullid, everything is included anyway.
1302 1302 if lowestrev > nullrev:
1303 1303 # But, since we weren't, let's recompute the lowest rev to not
1304 1304 # include roots that aren't ancestors.
1305 1305
1306 1306 # Filter out roots that aren't ancestors of heads
1307 1307 roots = [root for root in roots if root in ancestors]
1308 1308 # Recompute the lowest revision
1309 1309 if roots:
1310 1310 lowestrev = min([self.rev(root) for root in roots])
1311 1311 else:
1312 1312 # No more roots? Return empty list
1313 1313 return nonodes
1314 1314 else:
1315 1315 # We are descending from nullid, and don't need to care about
1316 1316 # any other roots.
1317 1317 lowestrev = nullrev
1318 1318 roots = [self.nullid]
1319 1319 # Transform our roots list into a set.
1320 1320 descendants = set(roots)
1321 1321 # Also, keep the original roots so we can filter out roots that aren't
1322 1322 # 'real' roots (i.e. are descended from other roots).
1323 1323 roots = descendants.copy()
1324 1324 # Our topologically sorted list of output nodes.
1325 1325 orderedout = []
1326 1326 # Don't start at nullid since we don't want nullid in our output list,
1327 1327 # and if nullid shows up in descendants, empty parents will look like
1328 1328 # they're descendants.
1329 1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 1330 n = self.node(r)
1331 1331 isdescendant = False
1332 1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 1333 isdescendant = True
1334 1334 elif n in descendants:
1335 1335 # n is already a descendant
1336 1336 isdescendant = True
1337 1337 # This check only needs to be done here because all the roots
1338 1338 # will start being marked is descendants before the loop.
1339 1339 if n in roots:
1340 1340 # If n was a root, check if it's a 'real' root.
1341 1341 p = tuple(self.parents(n))
1342 1342 # If any of its parents are descendants, it's not a root.
1343 1343 if (p[0] in descendants) or (p[1] in descendants):
1344 1344 roots.remove(n)
1345 1345 else:
1346 1346 p = tuple(self.parents(n))
1347 1347 # A node is a descendant if either of its parents are
1348 1348 # descendants. (We seeded the dependents list with the roots
1349 1349 # up there, remember?)
1350 1350 if (p[0] in descendants) or (p[1] in descendants):
1351 1351 descendants.add(n)
1352 1352 isdescendant = True
1353 1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 1354 # Only include nodes that are both descendants and ancestors.
1355 1355 orderedout.append(n)
1356 1356 if (ancestors is not None) and (n in heads):
1357 1357 # We're trying to figure out which heads are reachable
1358 1358 # from roots.
1359 1359 # Mark this head as having been reached
1360 1360 heads[n] = True
1361 1361 elif ancestors is None:
1362 1362 # Otherwise, we're trying to discover the heads.
1363 1363 # Assume this is a head because if it isn't, the next step
1364 1364 # will eventually remove it.
1365 1365 heads[n] = True
1366 1366 # But, obviously its parents aren't.
1367 1367 for p in self.parents(n):
1368 1368 heads.pop(p, None)
1369 1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 1370 roots = list(roots)
1371 1371 assert orderedout
1372 1372 assert roots
1373 1373 assert heads
1374 1374 return (orderedout, roots, heads)
1375 1375
1376 1376 def headrevs(self, revs=None):
1377 1377 if revs is None:
1378 1378 try:
1379 1379 return self.index.headrevs()
1380 1380 except AttributeError:
1381 1381 return self._headrevs()
1382 1382 if rustdagop is not None and self.index.rust_ext_compat:
1383 1383 return rustdagop.headrevs(self.index, revs)
1384 1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385 1385
1386 1386 def computephases(self, roots):
1387 1387 return self.index.computephasesmapsets(roots)
1388 1388
1389 1389 def _headrevs(self):
1390 1390 count = len(self)
1391 1391 if not count:
1392 1392 return [nullrev]
1393 1393 # we won't iter over filtered rev so nobody is a head at start
1394 1394 ishead = [0] * (count + 1)
1395 1395 index = self.index
1396 1396 for r in self:
1397 1397 ishead[r] = 1 # I may be an head
1398 1398 e = index[r]
1399 1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 1400 return [r for r, val in enumerate(ishead) if val]
1401 1401
1402 1402 def heads(self, start=None, stop=None):
1403 1403 """return the list of all nodes that have no children
1404 1404
1405 1405 if start is specified, only heads that are descendants of
1406 1406 start will be returned
1407 1407 if stop is specified, it will consider all the revs from stop
1408 1408 as if they had no children
1409 1409 """
1410 1410 if start is None and stop is None:
1411 1411 if not len(self):
1412 1412 return [self.nullid]
1413 1413 return [self.node(r) for r in self.headrevs()]
1414 1414
1415 1415 if start is None:
1416 1416 start = nullrev
1417 1417 else:
1418 1418 start = self.rev(start)
1419 1419
1420 1420 stoprevs = {self.rev(n) for n in stop or []}
1421 1421
1422 1422 revs = dagop.headrevssubset(
1423 1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 1424 )
1425 1425
1426 1426 return [self.node(rev) for rev in revs]
1427 1427
1428 1428 def children(self, node):
1429 1429 """find the children of a given node"""
1430 1430 c = []
1431 1431 p = self.rev(node)
1432 1432 for r in self.revs(start=p + 1):
1433 1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 1434 if prevs:
1435 1435 for pr in prevs:
1436 1436 if pr == p:
1437 1437 c.append(self.node(r))
1438 1438 elif p == nullrev:
1439 1439 c.append(self.node(r))
1440 1440 return c
1441 1441
1442 1442 def commonancestorsheads(self, a, b):
1443 1443 """calculate all the heads of the common ancestors of nodes a and b"""
1444 1444 a, b = self.rev(a), self.rev(b)
1445 1445 ancs = self._commonancestorsheads(a, b)
1446 1446 return pycompat.maplist(self.node, ancs)
1447 1447
1448 1448 def _commonancestorsheads(self, *revs):
1449 1449 """calculate all the heads of the common ancestors of revs"""
1450 1450 try:
1451 1451 ancs = self.index.commonancestorsheads(*revs)
1452 1452 except (AttributeError, OverflowError): # C implementation failed
1453 1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 1454 return ancs
1455 1455
1456 1456 def isancestor(self, a, b):
1457 1457 """return True if node a is an ancestor of node b
1458 1458
1459 1459 A revision is considered an ancestor of itself."""
1460 1460 a, b = self.rev(a), self.rev(b)
1461 1461 return self.isancestorrev(a, b)
1462 1462
1463 1463 def isancestorrev(self, a, b):
1464 1464 """return True if revision a is an ancestor of revision b
1465 1465
1466 1466 A revision is considered an ancestor of itself.
1467 1467
1468 1468 The implementation of this is trivial but the use of
1469 1469 reachableroots is not."""
1470 1470 if a == nullrev:
1471 1471 return True
1472 1472 elif a == b:
1473 1473 return True
1474 1474 elif a > b:
1475 1475 return False
1476 1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477 1477
1478 1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 1479 """return (heads(::(<roots> and <roots>::<heads>)))
1480 1480
1481 1481 If includepath is True, return (<roots>::<heads>)."""
1482 1482 try:
1483 1483 return self.index.reachableroots2(
1484 1484 minroot, heads, roots, includepath
1485 1485 )
1486 1486 except AttributeError:
1487 1487 return dagop._reachablerootspure(
1488 1488 self.parentrevs, minroot, roots, heads, includepath
1489 1489 )
1490 1490
1491 1491 def ancestor(self, a, b):
1492 1492 """calculate the "best" common ancestor of nodes a and b"""
1493 1493
1494 1494 a, b = self.rev(a), self.rev(b)
1495 1495 try:
1496 1496 ancs = self.index.ancestors(a, b)
1497 1497 except (AttributeError, OverflowError):
1498 1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 1499 if ancs:
1500 1500 # choose a consistent winner when there's a tie
1501 1501 return min(map(self.node, ancs))
1502 1502 return self.nullid
1503 1503
1504 1504 def _match(self, id):
1505 1505 if isinstance(id, int):
1506 1506 # rev
1507 1507 return self.node(id)
1508 1508 if len(id) == self.nodeconstants.nodelen:
1509 1509 # possibly a binary node
1510 1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 1511 try:
1512 1512 node = id
1513 1513 self.rev(node) # quick search the index
1514 1514 return node
1515 1515 except error.LookupError:
1516 1516 pass # may be partial hex id
1517 1517 try:
1518 1518 # str(rev)
1519 1519 rev = int(id)
1520 1520 if b"%d" % rev != id:
1521 1521 raise ValueError
1522 1522 if rev < 0:
1523 1523 rev = len(self) + rev
1524 1524 if rev < 0 or rev >= len(self):
1525 1525 raise ValueError
1526 1526 return self.node(rev)
1527 1527 except (ValueError, OverflowError):
1528 1528 pass
1529 1529 if len(id) == 2 * self.nodeconstants.nodelen:
1530 1530 try:
1531 1531 # a full hex nodeid?
1532 1532 node = bin(id)
1533 1533 self.rev(node)
1534 1534 return node
1535 1535 except (TypeError, error.LookupError):
1536 1536 pass
1537 1537
1538 1538 def _partialmatch(self, id):
1539 1539 # we don't care wdirfilenodeids as they should be always full hash
1540 1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 ambiguous = False
1541 1542 try:
1542 1543 partial = self.index.partialmatch(id)
1543 1544 if partial and self.hasnode(partial):
1544 1545 if maybewdir:
1545 1546 # single 'ff...' match in radix tree, ambiguous with wdir
1546 raise error.RevlogError
1547 return partial
1548 if maybewdir:
1547 ambiguous = True
1548 else:
1549 return partial
1550 elif maybewdir:
1549 1551 # no 'ff...' match in radix tree, wdir identified
1550 1552 raise error.WdirUnsupported
1551 return None
1553 else:
1554 return None
1552 1555 except error.RevlogError:
1553 1556 # parsers.c radix tree lookup gave multiple matches
1554 1557 # fast path: for unfiltered changelog, radix tree is accurate
1555 1558 if not getattr(self, 'filteredrevs', None):
1556 raise error.AmbiguousPrefixLookupError(
1557 id, self.display_id, _(b'ambiguous identifier')
1558 )
1559 ambiguous = True
1559 1560 # fall through to slow path that filters hidden revisions
1560 1561 except (AttributeError, ValueError):
1561 1562 # we are pure python, or key was too short to search radix tree
1562 1563 pass
1564 if ambiguous:
1565 raise error.AmbiguousPrefixLookupError(
1566 id, self.display_id, _(b'ambiguous identifier')
1567 )
1563 1568
1564 1569 if id in self._pcache:
1565 1570 return self._pcache[id]
1566 1571
1567 1572 if len(id) <= 40:
1568 1573 try:
1569 1574 # hex(node)[:...]
1570 1575 l = len(id) // 2 # grab an even number of digits
1571 1576 prefix = bin(id[: l * 2])
1572 1577 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1573 1578 nl = [
1574 1579 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1575 1580 ]
1576 1581 if self.nodeconstants.nullhex.startswith(id):
1577 1582 nl.append(self.nullid)
1578 1583 if len(nl) > 0:
1579 1584 if len(nl) == 1 and not maybewdir:
1580 1585 self._pcache[id] = nl[0]
1581 1586 return nl[0]
1582 1587 raise error.AmbiguousPrefixLookupError(
1583 1588 id, self.display_id, _(b'ambiguous identifier')
1584 1589 )
1585 1590 if maybewdir:
1586 1591 raise error.WdirUnsupported
1587 1592 return None
1588 1593 except TypeError:
1589 1594 pass
1590 1595
1591 1596 def lookup(self, id):
1592 1597 """locate a node based on:
1593 1598 - revision number or str(revision number)
1594 1599 - nodeid or subset of hex nodeid
1595 1600 """
1596 1601 n = self._match(id)
1597 1602 if n is not None:
1598 1603 return n
1599 1604 n = self._partialmatch(id)
1600 1605 if n:
1601 1606 return n
1602 1607
1603 1608 raise error.LookupError(id, self.display_id, _(b'no match found'))
1604 1609
1605 1610 def shortest(self, node, minlength=1):
1606 1611 """Find the shortest unambiguous prefix that matches node."""
1607 1612
1608 1613 def isvalid(prefix):
1609 1614 try:
1610 1615 matchednode = self._partialmatch(prefix)
1611 1616 except error.AmbiguousPrefixLookupError:
1612 1617 return False
1613 1618 except error.WdirUnsupported:
1614 1619 # single 'ff...' match
1615 1620 return True
1616 1621 if matchednode is None:
1617 1622 raise error.LookupError(node, self.display_id, _(b'no node'))
1618 1623 return True
1619 1624
1620 1625 def maybewdir(prefix):
1621 1626 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1622 1627
1623 1628 hexnode = hex(node)
1624 1629
1625 1630 def disambiguate(hexnode, minlength):
1626 1631 """Disambiguate against wdirid."""
1627 1632 for length in range(minlength, len(hexnode) + 1):
1628 1633 prefix = hexnode[:length]
1629 1634 if not maybewdir(prefix):
1630 1635 return prefix
1631 1636
1632 1637 if not getattr(self, 'filteredrevs', None):
1633 1638 try:
1634 1639 length = max(self.index.shortest(node), minlength)
1635 1640 return disambiguate(hexnode, length)
1636 1641 except error.RevlogError:
1637 1642 if node != self.nodeconstants.wdirid:
1638 1643 raise error.LookupError(
1639 1644 node, self.display_id, _(b'no node')
1640 1645 )
1641 1646 except AttributeError:
1642 1647 # Fall through to pure code
1643 1648 pass
1644 1649
1645 1650 if node == self.nodeconstants.wdirid:
1646 1651 for length in range(minlength, len(hexnode) + 1):
1647 1652 prefix = hexnode[:length]
1648 1653 if isvalid(prefix):
1649 1654 return prefix
1650 1655
1651 1656 for length in range(minlength, len(hexnode) + 1):
1652 1657 prefix = hexnode[:length]
1653 1658 if isvalid(prefix):
1654 1659 return disambiguate(hexnode, length)
1655 1660
1656 1661 def cmp(self, node, text):
1657 1662 """compare text with a given file revision
1658 1663
1659 1664 returns True if text is different than what is stored.
1660 1665 """
1661 1666 p1, p2 = self.parents(node)
1662 1667 return storageutil.hashrevisionsha1(text, p1, p2) != node
1663 1668
1664 1669 def _cachesegment(self, offset, data):
1665 1670 """Add a segment to the revlog cache.
1666 1671
1667 1672 Accepts an absolute offset and the data that is at that location.
1668 1673 """
1669 1674 o, d = self._chunkcache
1670 1675 # try to add to existing cache
1671 1676 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 1677 self._chunkcache = o, d + data
1673 1678 else:
1674 1679 self._chunkcache = offset, data
1675 1680
1676 1681 def _readsegment(self, offset, length, df=None):
1677 1682 """Load a segment of raw data from the revlog.
1678 1683
1679 1684 Accepts an absolute offset, length to read, and an optional existing
1680 1685 file handle to read from.
1681 1686
1682 1687 If an existing file handle is passed, it will be seeked and the
1683 1688 original seek position will NOT be restored.
1684 1689
1685 1690 Returns a str or buffer of raw byte data.
1686 1691
1687 1692 Raises if the requested number of bytes could not be read.
1688 1693 """
1689 1694 # Cache data both forward and backward around the requested
1690 1695 # data, in a fixed size window. This helps speed up operations
1691 1696 # involving reading the revlog backwards.
1692 1697 cachesize = self._chunkcachesize
1693 1698 realoffset = offset & ~(cachesize - 1)
1694 1699 reallength = (
1695 1700 (offset + length + cachesize) & ~(cachesize - 1)
1696 1701 ) - realoffset
1697 1702 with self._datareadfp(df) as df:
1698 1703 df.seek(realoffset)
1699 1704 d = df.read(reallength)
1700 1705
1701 1706 self._cachesegment(realoffset, d)
1702 1707 if offset != realoffset or reallength != length:
1703 1708 startoffset = offset - realoffset
1704 1709 if len(d) - startoffset < length:
1705 1710 raise error.RevlogError(
1706 1711 _(
1707 1712 b'partial read of revlog %s; expected %d bytes from '
1708 1713 b'offset %d, got %d'
1709 1714 )
1710 1715 % (
1711 1716 self._indexfile if self._inline else self._datafile,
1712 1717 length,
1713 1718 offset,
1714 1719 len(d) - startoffset,
1715 1720 )
1716 1721 )
1717 1722
1718 1723 return util.buffer(d, startoffset, length)
1719 1724
1720 1725 if len(d) < length:
1721 1726 raise error.RevlogError(
1722 1727 _(
1723 1728 b'partial read of revlog %s; expected %d bytes from offset '
1724 1729 b'%d, got %d'
1725 1730 )
1726 1731 % (
1727 1732 self._indexfile if self._inline else self._datafile,
1728 1733 length,
1729 1734 offset,
1730 1735 len(d),
1731 1736 )
1732 1737 )
1733 1738
1734 1739 return d
1735 1740
1736 1741 def _getsegment(self, offset, length, df=None):
1737 1742 """Obtain a segment of raw data from the revlog.
1738 1743
1739 1744 Accepts an absolute offset, length of bytes to obtain, and an
1740 1745 optional file handle to the already-opened revlog. If the file
1741 1746 handle is used, it's original seek position will not be preserved.
1742 1747
1743 1748 Requests for data may be returned from a cache.
1744 1749
1745 1750 Returns a str or a buffer instance of raw byte data.
1746 1751 """
1747 1752 o, d = self._chunkcache
1748 1753 l = len(d)
1749 1754
1750 1755 # is it in the cache?
1751 1756 cachestart = offset - o
1752 1757 cacheend = cachestart + length
1753 1758 if cachestart >= 0 and cacheend <= l:
1754 1759 if cachestart == 0 and cacheend == l:
1755 1760 return d # avoid a copy
1756 1761 return util.buffer(d, cachestart, cacheend - cachestart)
1757 1762
1758 1763 return self._readsegment(offset, length, df=df)
1759 1764
1760 1765 def _getsegmentforrevs(self, startrev, endrev, df=None):
1761 1766 """Obtain a segment of raw data corresponding to a range of revisions.
1762 1767
1763 1768 Accepts the start and end revisions and an optional already-open
1764 1769 file handle to be used for reading. If the file handle is read, its
1765 1770 seek position will not be preserved.
1766 1771
1767 1772 Requests for data may be satisfied by a cache.
1768 1773
1769 1774 Returns a 2-tuple of (offset, data) for the requested range of
1770 1775 revisions. Offset is the integer offset from the beginning of the
1771 1776 revlog and data is a str or buffer of the raw byte data.
1772 1777
1773 1778 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1774 1779 to determine where each revision's data begins and ends.
1775 1780 """
1776 1781 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1777 1782 # (functions are expensive).
1778 1783 index = self.index
1779 1784 istart = index[startrev]
1780 1785 start = int(istart[0] >> 16)
1781 1786 if startrev == endrev:
1782 1787 end = start + istart[1]
1783 1788 else:
1784 1789 iend = index[endrev]
1785 1790 end = int(iend[0] >> 16) + iend[1]
1786 1791
1787 1792 if self._inline:
1788 1793 start += (startrev + 1) * self.index.entry_size
1789 1794 end += (endrev + 1) * self.index.entry_size
1790 1795 length = end - start
1791 1796
1792 1797 return start, self._getsegment(start, length, df=df)
1793 1798
1794 1799 def _chunk(self, rev, df=None):
1795 1800 """Obtain a single decompressed chunk for a revision.
1796 1801
1797 1802 Accepts an integer revision and an optional already-open file handle
1798 1803 to be used for reading. If used, the seek position of the file will not
1799 1804 be preserved.
1800 1805
1801 1806 Returns a str holding uncompressed data for the requested revision.
1802 1807 """
1803 1808 compression_mode = self.index[rev][10]
1804 1809 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1805 1810 if compression_mode == COMP_MODE_PLAIN:
1806 1811 return data
1807 1812 elif compression_mode == COMP_MODE_DEFAULT:
1808 1813 return self._decompressor(data)
1809 1814 elif compression_mode == COMP_MODE_INLINE:
1810 1815 return self.decompress(data)
1811 1816 else:
1812 1817 msg = 'unknown compression mode %d'
1813 1818 msg %= compression_mode
1814 1819 raise error.RevlogError(msg)
1815 1820
1816 1821 def _chunks(self, revs, df=None, targetsize=None):
1817 1822 """Obtain decompressed chunks for the specified revisions.
1818 1823
1819 1824 Accepts an iterable of numeric revisions that are assumed to be in
1820 1825 ascending order. Also accepts an optional already-open file handle
1821 1826 to be used for reading. If used, the seek position of the file will
1822 1827 not be preserved.
1823 1828
1824 1829 This function is similar to calling ``self._chunk()`` multiple times,
1825 1830 but is faster.
1826 1831
1827 1832 Returns a list with decompressed data for each requested revision.
1828 1833 """
1829 1834 if not revs:
1830 1835 return []
1831 1836 start = self.start
1832 1837 length = self.length
1833 1838 inline = self._inline
1834 1839 iosize = self.index.entry_size
1835 1840 buffer = util.buffer
1836 1841
1837 1842 l = []
1838 1843 ladd = l.append
1839 1844
1840 1845 if not self._withsparseread:
1841 1846 slicedchunks = (revs,)
1842 1847 else:
1843 1848 slicedchunks = deltautil.slicechunk(
1844 1849 self, revs, targetsize=targetsize
1845 1850 )
1846 1851
1847 1852 for revschunk in slicedchunks:
1848 1853 firstrev = revschunk[0]
1849 1854 # Skip trailing revisions with empty diff
1850 1855 for lastrev in revschunk[::-1]:
1851 1856 if length(lastrev) != 0:
1852 1857 break
1853 1858
1854 1859 try:
1855 1860 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1856 1861 except OverflowError:
1857 1862 # issue4215 - we can't cache a run of chunks greater than
1858 1863 # 2G on Windows
1859 1864 return [self._chunk(rev, df=df) for rev in revschunk]
1860 1865
1861 1866 decomp = self.decompress
1862 1867 # self._decompressor might be None, but will not be used in that case
1863 1868 def_decomp = self._decompressor
1864 1869 for rev in revschunk:
1865 1870 chunkstart = start(rev)
1866 1871 if inline:
1867 1872 chunkstart += (rev + 1) * iosize
1868 1873 chunklength = length(rev)
1869 1874 comp_mode = self.index[rev][10]
1870 1875 c = buffer(data, chunkstart - offset, chunklength)
1871 1876 if comp_mode == COMP_MODE_PLAIN:
1872 1877 ladd(c)
1873 1878 elif comp_mode == COMP_MODE_INLINE:
1874 1879 ladd(decomp(c))
1875 1880 elif comp_mode == COMP_MODE_DEFAULT:
1876 1881 ladd(def_decomp(c))
1877 1882 else:
1878 1883 msg = 'unknown compression mode %d'
1879 1884 msg %= comp_mode
1880 1885 raise error.RevlogError(msg)
1881 1886
1882 1887 return l
1883 1888
1884 1889 def _chunkclear(self):
1885 1890 """Clear the raw chunk cache."""
1886 1891 self._chunkcache = (0, b'')
1887 1892
1888 1893 def deltaparent(self, rev):
1889 1894 """return deltaparent of the given revision"""
1890 1895 base = self.index[rev][3]
1891 1896 if base == rev:
1892 1897 return nullrev
1893 1898 elif self._generaldelta:
1894 1899 return base
1895 1900 else:
1896 1901 return rev - 1
1897 1902
1898 1903 def issnapshot(self, rev):
1899 1904 """tells whether rev is a snapshot"""
1900 1905 if not self._sparserevlog:
1901 1906 return self.deltaparent(rev) == nullrev
1902 1907 elif util.safehasattr(self.index, b'issnapshot'):
1903 1908 # directly assign the method to cache the testing and access
1904 1909 self.issnapshot = self.index.issnapshot
1905 1910 return self.issnapshot(rev)
1906 1911 if rev == nullrev:
1907 1912 return True
1908 1913 entry = self.index[rev]
1909 1914 base = entry[3]
1910 1915 if base == rev:
1911 1916 return True
1912 1917 if base == nullrev:
1913 1918 return True
1914 1919 p1 = entry[5]
1915 1920 p2 = entry[6]
1916 1921 if base == p1 or base == p2:
1917 1922 return False
1918 1923 return self.issnapshot(base)
1919 1924
1920 1925 def snapshotdepth(self, rev):
1921 1926 """number of snapshot in the chain before this one"""
1922 1927 if not self.issnapshot(rev):
1923 1928 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 1929 return len(self._deltachain(rev)[0]) - 1
1925 1930
1926 1931 def revdiff(self, rev1, rev2):
1927 1932 """return or calculate a delta between two revisions
1928 1933
1929 1934 The delta calculated is in binary form and is intended to be written to
1930 1935 revlog data directly. So this function needs raw revision data.
1931 1936 """
1932 1937 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 1938 return bytes(self._chunk(rev2))
1934 1939
1935 1940 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936 1941
1937 1942 def _processflags(self, text, flags, operation, raw=False):
1938 1943 """deprecated entry point to access flag processors"""
1939 1944 msg = b'_processflag(...) use the specialized variant'
1940 1945 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1941 1946 if raw:
1942 1947 return text, flagutil.processflagsraw(self, text, flags)
1943 1948 elif operation == b'read':
1944 1949 return flagutil.processflagsread(self, text, flags)
1945 1950 else: # write operation
1946 1951 return flagutil.processflagswrite(self, text, flags)
1947 1952
1948 1953 def revision(self, nodeorrev, _df=None, raw=False):
1949 1954 """return an uncompressed revision of a given node or revision
1950 1955 number.
1951 1956
1952 1957 _df - an existing file handle to read from. (internal-only)
1953 1958 raw - an optional argument specifying if the revision data is to be
1954 1959 treated as raw data when applying flag transforms. 'raw' should be set
1955 1960 to True when generating changegroups or in debug commands.
1956 1961 """
1957 1962 if raw:
1958 1963 msg = (
1959 1964 b'revlog.revision(..., raw=True) is deprecated, '
1960 1965 b'use revlog.rawdata(...)'
1961 1966 )
1962 1967 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1963 1968 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1964 1969
1965 1970 def sidedata(self, nodeorrev, _df=None):
1966 1971 """a map of extra data related to the changeset but not part of the hash
1967 1972
1968 1973 This function currently return a dictionary. However, more advanced
1969 1974 mapping object will likely be used in the future for a more
1970 1975 efficient/lazy code.
1971 1976 """
1972 1977 return self._revisiondata(nodeorrev, _df)[1]
1973 1978
1974 1979 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1975 1980 # deal with <nodeorrev> argument type
1976 1981 if isinstance(nodeorrev, int):
1977 1982 rev = nodeorrev
1978 1983 node = self.node(rev)
1979 1984 else:
1980 1985 node = nodeorrev
1981 1986 rev = None
1982 1987
1983 1988 # fast path the special `nullid` rev
1984 1989 if node == self.nullid:
1985 1990 return b"", {}
1986 1991
1987 1992 # ``rawtext`` is the text as stored inside the revlog. Might be the
1988 1993 # revision or might need to be processed to retrieve the revision.
1989 1994 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1990 1995
1991 1996 if self.hassidedata:
1992 1997 if rev is None:
1993 1998 rev = self.rev(node)
1994 1999 sidedata = self._sidedata(rev)
1995 2000 else:
1996 2001 sidedata = {}
1997 2002
1998 2003 if raw and validated:
1999 2004 # if we don't want to process the raw text and that raw
2000 2005 # text is cached, we can exit early.
2001 2006 return rawtext, sidedata
2002 2007 if rev is None:
2003 2008 rev = self.rev(node)
2004 2009 # the revlog's flag for this revision
2005 2010 # (usually alter its state or content)
2006 2011 flags = self.flags(rev)
2007 2012
2008 2013 if validated and flags == REVIDX_DEFAULT_FLAGS:
2009 2014 # no extra flags set, no flag processor runs, text = rawtext
2010 2015 return rawtext, sidedata
2011 2016
2012 2017 if raw:
2013 2018 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2014 2019 text = rawtext
2015 2020 else:
2016 2021 r = flagutil.processflagsread(self, rawtext, flags)
2017 2022 text, validatehash = r
2018 2023 if validatehash:
2019 2024 self.checkhash(text, node, rev=rev)
2020 2025 if not validated:
2021 2026 self._revisioncache = (node, rev, rawtext)
2022 2027
2023 2028 return text, sidedata
2024 2029
2025 2030 def _rawtext(self, node, rev, _df=None):
2026 2031 """return the possibly unvalidated rawtext for a revision
2027 2032
2028 2033 returns (rev, rawtext, validated)
2029 2034 """
2030 2035
2031 2036 # revision in the cache (could be useful to apply delta)
2032 2037 cachedrev = None
2033 2038 # An intermediate text to apply deltas to
2034 2039 basetext = None
2035 2040
2036 2041 # Check if we have the entry in cache
2037 2042 # The cache entry looks like (node, rev, rawtext)
2038 2043 if self._revisioncache:
2039 2044 if self._revisioncache[0] == node:
2040 2045 return (rev, self._revisioncache[2], True)
2041 2046 cachedrev = self._revisioncache[1]
2042 2047
2043 2048 if rev is None:
2044 2049 rev = self.rev(node)
2045 2050
2046 2051 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2047 2052 if stopped:
2048 2053 basetext = self._revisioncache[2]
2049 2054
2050 2055 # drop cache to save memory, the caller is expected to
2051 2056 # update self._revisioncache after validating the text
2052 2057 self._revisioncache = None
2053 2058
2054 2059 targetsize = None
2055 2060 rawsize = self.index[rev][2]
2056 2061 if 0 <= rawsize:
2057 2062 targetsize = 4 * rawsize
2058 2063
2059 2064 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2060 2065 if basetext is None:
2061 2066 basetext = bytes(bins[0])
2062 2067 bins = bins[1:]
2063 2068
2064 2069 rawtext = mdiff.patches(basetext, bins)
2065 2070 del basetext # let us have a chance to free memory early
2066 2071 return (rev, rawtext, False)
2067 2072
2068 2073 def _sidedata(self, rev):
2069 2074 """Return the sidedata for a given revision number."""
2070 2075 index_entry = self.index[rev]
2071 2076 sidedata_offset = index_entry[8]
2072 2077 sidedata_size = index_entry[9]
2073 2078
2074 2079 if self._inline:
2075 2080 sidedata_offset += self.index.entry_size * (1 + rev)
2076 2081 if sidedata_size == 0:
2077 2082 return {}
2078 2083
2079 2084 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2080 2085 comp = self.index[rev][11]
2081 2086 if comp == COMP_MODE_PLAIN:
2082 2087 segment = comp_segment
2083 2088 elif comp == COMP_MODE_DEFAULT:
2084 2089 segment = self._decompressor(comp_segment)
2085 2090 elif comp == COMP_MODE_INLINE:
2086 2091 segment = self.decompress(comp_segment)
2087 2092 else:
2088 2093 msg = 'unknown compression mode %d'
2089 2094 msg %= comp
2090 2095 raise error.RevlogError(msg)
2091 2096
2092 2097 sidedata = sidedatautil.deserialize_sidedata(segment)
2093 2098 return sidedata
2094 2099
2095 2100 def rawdata(self, nodeorrev, _df=None):
2096 2101 """return an uncompressed raw data of a given node or revision number.
2097 2102
2098 2103 _df - an existing file handle to read from. (internal-only)
2099 2104 """
2100 2105 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2101 2106
2102 2107 def hash(self, text, p1, p2):
2103 2108 """Compute a node hash.
2104 2109
2105 2110 Available as a function so that subclasses can replace the hash
2106 2111 as needed.
2107 2112 """
2108 2113 return storageutil.hashrevisionsha1(text, p1, p2)
2109 2114
2110 2115 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2111 2116 """Check node hash integrity.
2112 2117
2113 2118 Available as a function so that subclasses can extend hash mismatch
2114 2119 behaviors as needed.
2115 2120 """
2116 2121 try:
2117 2122 if p1 is None and p2 is None:
2118 2123 p1, p2 = self.parents(node)
2119 2124 if node != self.hash(text, p1, p2):
2120 2125 # Clear the revision cache on hash failure. The revision cache
2121 2126 # only stores the raw revision and clearing the cache does have
2122 2127 # the side-effect that we won't have a cache hit when the raw
2123 2128 # revision data is accessed. But this case should be rare and
2124 2129 # it is extra work to teach the cache about the hash
2125 2130 # verification state.
2126 2131 if self._revisioncache and self._revisioncache[0] == node:
2127 2132 self._revisioncache = None
2128 2133
2129 2134 revornode = rev
2130 2135 if revornode is None:
2131 2136 revornode = templatefilters.short(hex(node))
2132 2137 raise error.RevlogError(
2133 2138 _(b"integrity check failed on %s:%s")
2134 2139 % (self.display_id, pycompat.bytestr(revornode))
2135 2140 )
2136 2141 except error.RevlogError:
2137 2142 if self._censorable and storageutil.iscensoredtext(text):
2138 2143 raise error.CensoredNodeError(self.display_id, node, text)
2139 2144 raise
2140 2145
2141 2146 def _enforceinlinesize(self, tr):
2142 2147 """Check if the revlog is too big for inline and convert if so.
2143 2148
2144 2149 This should be called after revisions are added to the revlog. If the
2145 2150 revlog has grown too large to be an inline revlog, it will convert it
2146 2151 to use multiple index and data files.
2147 2152 """
2148 2153 tiprev = len(self) - 1
2149 2154 total_size = self.start(tiprev) + self.length(tiprev)
2150 2155 if not self._inline or total_size < _maxinline:
2151 2156 return
2152 2157
2153 2158 troffset = tr.findoffset(self._indexfile)
2154 2159 if troffset is None:
2155 2160 raise error.RevlogError(
2156 2161 _(b"%s not found in the transaction") % self._indexfile
2157 2162 )
2158 2163 trindex = 0
2159 2164 tr.add(self._datafile, 0)
2160 2165
2161 2166 existing_handles = False
2162 2167 if self._writinghandles is not None:
2163 2168 existing_handles = True
2164 2169 fp = self._writinghandles[0]
2165 2170 fp.flush()
2166 2171 fp.close()
2167 2172 # We can't use the cached file handle after close(). So prevent
2168 2173 # its usage.
2169 2174 self._writinghandles = None
2170 2175
2171 2176 new_dfh = self._datafp(b'w+')
2172 2177 new_dfh.truncate(0) # drop any potentially existing data
2173 2178 try:
2174 2179 with self._indexfp() as read_ifh:
2175 2180 for r in self:
2176 2181 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2177 2182 if troffset <= self.start(r) + r * self.index.entry_size:
2178 2183 trindex = r
2179 2184 new_dfh.flush()
2180 2185
2181 2186 with self.__index_new_fp() as fp:
2182 2187 self._format_flags &= ~FLAG_INLINE_DATA
2183 2188 self._inline = False
2184 2189 for i in self:
2185 2190 e = self.index.entry_binary(i)
2186 2191 if i == 0 and self._docket is None:
2187 2192 header = self._format_flags | self._format_version
2188 2193 header = self.index.pack_header(header)
2189 2194 e = header + e
2190 2195 fp.write(e)
2191 2196 if self._docket is not None:
2192 2197 self._docket.index_end = fp.tell()
2193 2198
2194 2199 # There is a small transactional race here. If the rename of
2195 2200 # the index fails, we should remove the datafile. It is more
2196 2201 # important to ensure that the data file is not truncated
2197 2202 # when the index is replaced as otherwise data is lost.
2198 2203 tr.replace(self._datafile, self.start(trindex))
2199 2204
2200 2205 # the temp file replace the real index when we exit the context
2201 2206 # manager
2202 2207
2203 2208 tr.replace(self._indexfile, trindex * self.index.entry_size)
2204 2209 nodemaputil.setup_persistent_nodemap(tr, self)
2205 2210 self._chunkclear()
2206 2211
2207 2212 if existing_handles:
2208 2213 # switched from inline to conventional reopen the index
2209 2214 ifh = self.__index_write_fp()
2210 2215 self._writinghandles = (ifh, new_dfh)
2211 2216 new_dfh = None
2212 2217 finally:
2213 2218 if new_dfh is not None:
2214 2219 new_dfh.close()
2215 2220
2216 2221 def _nodeduplicatecallback(self, transaction, node):
2217 2222 """called when trying to add a node already stored."""
2218 2223
2219 2224 @contextlib.contextmanager
2220 2225 def _writing(self, transaction):
2221 2226 if self._trypending:
2222 2227 msg = b'try to write in a `trypending` revlog: %s'
2223 2228 msg %= self.display_id
2224 2229 raise error.ProgrammingError(msg)
2225 2230 if self._writinghandles is not None:
2226 2231 yield
2227 2232 else:
2228 2233 r = len(self)
2229 2234 dsize = 0
2230 2235 if r:
2231 2236 dsize = self.end(r - 1)
2232 2237 dfh = None
2233 2238 if not self._inline:
2234 2239 try:
2235 2240 dfh = self._datafp(b"r+")
2236 2241 if self._docket is None:
2237 2242 dfh.seek(0, os.SEEK_END)
2238 2243 else:
2239 2244 dfh.seek(self._docket.data_end, os.SEEK_SET)
2240 2245 except IOError as inst:
2241 2246 if inst.errno != errno.ENOENT:
2242 2247 raise
2243 2248 dfh = self._datafp(b"w+")
2244 2249 transaction.add(self._datafile, dsize)
2245 2250 try:
2246 2251 isize = r * self.index.entry_size
2247 2252 ifh = self.__index_write_fp()
2248 2253 if self._inline:
2249 2254 transaction.add(self._indexfile, dsize + isize)
2250 2255 else:
2251 2256 transaction.add(self._indexfile, isize)
2252 2257 try:
2253 2258 self._writinghandles = (ifh, dfh)
2254 2259 try:
2255 2260 yield
2256 2261 if self._docket is not None:
2257 2262 self._write_docket(transaction)
2258 2263 finally:
2259 2264 self._writinghandles = None
2260 2265 finally:
2261 2266 ifh.close()
2262 2267 finally:
2263 2268 if dfh is not None:
2264 2269 dfh.close()
2265 2270
2266 2271 def _write_docket(self, transaction):
2267 2272 """write the current docket on disk
2268 2273
2269 2274 Exist as a method to help changelog to implement transaction logic
2270 2275
2271 2276 We could also imagine using the same transaction logic for all revlog
2272 2277 since docket are cheap."""
2273 2278 self._docket.write(transaction)
2274 2279
2275 2280 def addrevision(
2276 2281 self,
2277 2282 text,
2278 2283 transaction,
2279 2284 link,
2280 2285 p1,
2281 2286 p2,
2282 2287 cachedelta=None,
2283 2288 node=None,
2284 2289 flags=REVIDX_DEFAULT_FLAGS,
2285 2290 deltacomputer=None,
2286 2291 sidedata=None,
2287 2292 ):
2288 2293 """add a revision to the log
2289 2294
2290 2295 text - the revision data to add
2291 2296 transaction - the transaction object used for rollback
2292 2297 link - the linkrev data to add
2293 2298 p1, p2 - the parent nodeids of the revision
2294 2299 cachedelta - an optional precomputed delta
2295 2300 node - nodeid of revision; typically node is not specified, and it is
2296 2301 computed by default as hash(text, p1, p2), however subclasses might
2297 2302 use different hashing method (and override checkhash() in such case)
2298 2303 flags - the known flags to set on the revision
2299 2304 deltacomputer - an optional deltacomputer instance shared between
2300 2305 multiple calls
2301 2306 """
2302 2307 if link == nullrev:
2303 2308 raise error.RevlogError(
2304 2309 _(b"attempted to add linkrev -1 to %s") % self.display_id
2305 2310 )
2306 2311
2307 2312 if sidedata is None:
2308 2313 sidedata = {}
2309 2314 elif sidedata and not self.hassidedata:
2310 2315 raise error.ProgrammingError(
2311 2316 _(b"trying to add sidedata to a revlog who don't support them")
2312 2317 )
2313 2318
2314 2319 if flags:
2315 2320 node = node or self.hash(text, p1, p2)
2316 2321
2317 2322 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2318 2323
2319 2324 # If the flag processor modifies the revision data, ignore any provided
2320 2325 # cachedelta.
2321 2326 if rawtext != text:
2322 2327 cachedelta = None
2323 2328
2324 2329 if len(rawtext) > _maxentrysize:
2325 2330 raise error.RevlogError(
2326 2331 _(
2327 2332 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2328 2333 )
2329 2334 % (self.display_id, len(rawtext))
2330 2335 )
2331 2336
2332 2337 node = node or self.hash(rawtext, p1, p2)
2333 2338 rev = self.index.get_rev(node)
2334 2339 if rev is not None:
2335 2340 return rev
2336 2341
2337 2342 if validatehash:
2338 2343 self.checkhash(rawtext, node, p1=p1, p2=p2)
2339 2344
2340 2345 return self.addrawrevision(
2341 2346 rawtext,
2342 2347 transaction,
2343 2348 link,
2344 2349 p1,
2345 2350 p2,
2346 2351 node,
2347 2352 flags,
2348 2353 cachedelta=cachedelta,
2349 2354 deltacomputer=deltacomputer,
2350 2355 sidedata=sidedata,
2351 2356 )
2352 2357
2353 2358 def addrawrevision(
2354 2359 self,
2355 2360 rawtext,
2356 2361 transaction,
2357 2362 link,
2358 2363 p1,
2359 2364 p2,
2360 2365 node,
2361 2366 flags,
2362 2367 cachedelta=None,
2363 2368 deltacomputer=None,
2364 2369 sidedata=None,
2365 2370 ):
2366 2371 """add a raw revision with known flags, node and parents
2367 2372 useful when reusing a revision not stored in this revlog (ex: received
2368 2373 over wire, or read from an external bundle).
2369 2374 """
2370 2375 with self._writing(transaction):
2371 2376 return self._addrevision(
2372 2377 node,
2373 2378 rawtext,
2374 2379 transaction,
2375 2380 link,
2376 2381 p1,
2377 2382 p2,
2378 2383 flags,
2379 2384 cachedelta,
2380 2385 deltacomputer=deltacomputer,
2381 2386 sidedata=sidedata,
2382 2387 )
2383 2388
2384 2389 def compress(self, data):
2385 2390 """Generate a possibly-compressed representation of data."""
2386 2391 if not data:
2387 2392 return b'', data
2388 2393
2389 2394 compressed = self._compressor.compress(data)
2390 2395
2391 2396 if compressed:
2392 2397 # The revlog compressor added the header in the returned data.
2393 2398 return b'', compressed
2394 2399
2395 2400 if data[0:1] == b'\0':
2396 2401 return b'', data
2397 2402 return b'u', data
2398 2403
2399 2404 def decompress(self, data):
2400 2405 """Decompress a revlog chunk.
2401 2406
2402 2407 The chunk is expected to begin with a header identifying the
2403 2408 format type so it can be routed to an appropriate decompressor.
2404 2409 """
2405 2410 if not data:
2406 2411 return data
2407 2412
2408 2413 # Revlogs are read much more frequently than they are written and many
2409 2414 # chunks only take microseconds to decompress, so performance is
2410 2415 # important here.
2411 2416 #
2412 2417 # We can make a few assumptions about revlogs:
2413 2418 #
2414 2419 # 1) the majority of chunks will be compressed (as opposed to inline
2415 2420 # raw data).
2416 2421 # 2) decompressing *any* data will likely by at least 10x slower than
2417 2422 # returning raw inline data.
2418 2423 # 3) we want to prioritize common and officially supported compression
2419 2424 # engines
2420 2425 #
2421 2426 # It follows that we want to optimize for "decompress compressed data
2422 2427 # when encoded with common and officially supported compression engines"
2423 2428 # case over "raw data" and "data encoded by less common or non-official
2424 2429 # compression engines." That is why we have the inline lookup first
2425 2430 # followed by the compengines lookup.
2426 2431 #
2427 2432 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2428 2433 # compressed chunks. And this matters for changelog and manifest reads.
2429 2434 t = data[0:1]
2430 2435
2431 2436 if t == b'x':
2432 2437 try:
2433 2438 return _zlibdecompress(data)
2434 2439 except zlib.error as e:
2435 2440 raise error.RevlogError(
2436 2441 _(b'revlog decompress error: %s')
2437 2442 % stringutil.forcebytestr(e)
2438 2443 )
2439 2444 # '\0' is more common than 'u' so it goes first.
2440 2445 elif t == b'\0':
2441 2446 return data
2442 2447 elif t == b'u':
2443 2448 return util.buffer(data, 1)
2444 2449
2445 2450 compressor = self._get_decompressor(t)
2446 2451
2447 2452 return compressor.decompress(data)
2448 2453
2449 2454 def _addrevision(
2450 2455 self,
2451 2456 node,
2452 2457 rawtext,
2453 2458 transaction,
2454 2459 link,
2455 2460 p1,
2456 2461 p2,
2457 2462 flags,
2458 2463 cachedelta,
2459 2464 alwayscache=False,
2460 2465 deltacomputer=None,
2461 2466 sidedata=None,
2462 2467 ):
2463 2468 """internal function to add revisions to the log
2464 2469
2465 2470 see addrevision for argument descriptions.
2466 2471
2467 2472 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2468 2473
2469 2474 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2470 2475 be used.
2471 2476
2472 2477 invariants:
2473 2478 - rawtext is optional (can be None); if not set, cachedelta must be set.
2474 2479 if both are set, they must correspond to each other.
2475 2480 """
2476 2481 if node == self.nullid:
2477 2482 raise error.RevlogError(
2478 2483 _(b"%s: attempt to add null revision") % self.display_id
2479 2484 )
2480 2485 if (
2481 2486 node == self.nodeconstants.wdirid
2482 2487 or node in self.nodeconstants.wdirfilenodeids
2483 2488 ):
2484 2489 raise error.RevlogError(
2485 2490 _(b"%s: attempt to add wdir revision") % self.display_id
2486 2491 )
2487 2492 if self._writinghandles is None:
2488 2493 msg = b'adding revision outside `revlog._writing` context'
2489 2494 raise error.ProgrammingError(msg)
2490 2495
2491 2496 if self._inline:
2492 2497 fh = self._writinghandles[0]
2493 2498 else:
2494 2499 fh = self._writinghandles[1]
2495 2500
2496 2501 btext = [rawtext]
2497 2502
2498 2503 curr = len(self)
2499 2504 prev = curr - 1
2500 2505
2501 2506 offset = self._get_data_offset(prev)
2502 2507
2503 2508 if self._concurrencychecker:
2504 2509 ifh, dfh = self._writinghandles
2505 2510 if self._inline:
2506 2511 # offset is "as if" it were in the .d file, so we need to add on
2507 2512 # the size of the entry metadata.
2508 2513 self._concurrencychecker(
2509 2514 ifh, self._indexfile, offset + curr * self.index.entry_size
2510 2515 )
2511 2516 else:
2512 2517 # Entries in the .i are a consistent size.
2513 2518 self._concurrencychecker(
2514 2519 ifh, self._indexfile, curr * self.index.entry_size
2515 2520 )
2516 2521 self._concurrencychecker(dfh, self._datafile, offset)
2517 2522
2518 2523 p1r, p2r = self.rev(p1), self.rev(p2)
2519 2524
2520 2525 # full versions are inserted when the needed deltas
2521 2526 # become comparable to the uncompressed text
2522 2527 if rawtext is None:
2523 2528 # need rawtext size, before changed by flag processors, which is
2524 2529 # the non-raw size. use revlog explicitly to avoid filelog's extra
2525 2530 # logic that might remove metadata size.
2526 2531 textlen = mdiff.patchedsize(
2527 2532 revlog.size(self, cachedelta[0]), cachedelta[1]
2528 2533 )
2529 2534 else:
2530 2535 textlen = len(rawtext)
2531 2536
2532 2537 if deltacomputer is None:
2533 2538 deltacomputer = deltautil.deltacomputer(self)
2534 2539
2535 2540 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2536 2541
2537 2542 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2538 2543
2539 2544 compression_mode = COMP_MODE_INLINE
2540 2545 if self._docket is not None:
2541 2546 h, d = deltainfo.data
2542 2547 if not h and not d:
2543 2548 # not data to store at all... declare them uncompressed
2544 2549 compression_mode = COMP_MODE_PLAIN
2545 2550 elif not h:
2546 2551 t = d[0:1]
2547 2552 if t == b'\0':
2548 2553 compression_mode = COMP_MODE_PLAIN
2549 2554 elif t == self._docket.default_compression_header:
2550 2555 compression_mode = COMP_MODE_DEFAULT
2551 2556 elif h == b'u':
2552 2557 # we have a more efficient way to declare uncompressed
2553 2558 h = b''
2554 2559 compression_mode = COMP_MODE_PLAIN
2555 2560 deltainfo = deltautil.drop_u_compression(deltainfo)
2556 2561
2557 2562 sidedata_compression_mode = COMP_MODE_INLINE
2558 2563 if sidedata and self.hassidedata:
2559 2564 sidedata_compression_mode = COMP_MODE_PLAIN
2560 2565 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2561 2566 sidedata_offset = offset + deltainfo.deltalen
2562 2567 h, comp_sidedata = self.compress(serialized_sidedata)
2563 2568 if (
2564 2569 h != b'u'
2565 2570 and comp_sidedata[0:1] != b'\0'
2566 2571 and len(comp_sidedata) < len(serialized_sidedata)
2567 2572 ):
2568 2573 assert not h
2569 2574 if (
2570 2575 comp_sidedata[0:1]
2571 2576 == self._docket.default_compression_header
2572 2577 ):
2573 2578 sidedata_compression_mode = COMP_MODE_DEFAULT
2574 2579 serialized_sidedata = comp_sidedata
2575 2580 else:
2576 2581 sidedata_compression_mode = COMP_MODE_INLINE
2577 2582 serialized_sidedata = comp_sidedata
2578 2583 else:
2579 2584 serialized_sidedata = b""
2580 2585 # Don't store the offset if the sidedata is empty, that way
2581 2586 # we can easily detect empty sidedata and they will be no different
2582 2587 # than ones we manually add.
2583 2588 sidedata_offset = 0
2584 2589
2585 2590 e = (
2586 2591 offset_type(offset, flags),
2587 2592 deltainfo.deltalen,
2588 2593 textlen,
2589 2594 deltainfo.base,
2590 2595 link,
2591 2596 p1r,
2592 2597 p2r,
2593 2598 node,
2594 2599 sidedata_offset,
2595 2600 len(serialized_sidedata),
2596 2601 compression_mode,
2597 2602 sidedata_compression_mode,
2598 2603 )
2599 2604
2600 2605 self.index.append(e)
2601 2606 entry = self.index.entry_binary(curr)
2602 2607 if curr == 0 and self._docket is None:
2603 2608 header = self._format_flags | self._format_version
2604 2609 header = self.index.pack_header(header)
2605 2610 entry = header + entry
2606 2611 self._writeentry(
2607 2612 transaction,
2608 2613 entry,
2609 2614 deltainfo.data,
2610 2615 link,
2611 2616 offset,
2612 2617 serialized_sidedata,
2613 2618 )
2614 2619
2615 2620 rawtext = btext[0]
2616 2621
2617 2622 if alwayscache and rawtext is None:
2618 2623 rawtext = deltacomputer.buildtext(revinfo, fh)
2619 2624
2620 2625 if type(rawtext) == bytes: # only accept immutable objects
2621 2626 self._revisioncache = (node, curr, rawtext)
2622 2627 self._chainbasecache[curr] = deltainfo.chainbase
2623 2628 return curr
2624 2629
2625 2630 def _get_data_offset(self, prev):
2626 2631 """Returns the current offset in the (in-transaction) data file.
2627 2632 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2628 2633 file to store that information: since sidedata can be rewritten to the
2629 2634 end of the data file within a transaction, you can have cases where, for
2630 2635 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2631 2636 to `n - 1`'s sidedata being written after `n`'s data.
2632 2637
2633 2638 TODO cache this in a docket file before getting out of experimental."""
2634 2639 if self._docket is None:
2635 2640 return self.end(prev)
2636 2641 else:
2637 2642 return self._docket.data_end
2638 2643
2639 2644 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2640 2645 # Files opened in a+ mode have inconsistent behavior on various
2641 2646 # platforms. Windows requires that a file positioning call be made
2642 2647 # when the file handle transitions between reads and writes. See
2643 2648 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2644 2649 # platforms, Python or the platform itself can be buggy. Some versions
2645 2650 # of Solaris have been observed to not append at the end of the file
2646 2651 # if the file was seeked to before the end. See issue4943 for more.
2647 2652 #
2648 2653 # We work around this issue by inserting a seek() before writing.
2649 2654 # Note: This is likely not necessary on Python 3. However, because
2650 2655 # the file handle is reused for reads and may be seeked there, we need
2651 2656 # to be careful before changing this.
2652 2657 if self._writinghandles is None:
2653 2658 msg = b'adding revision outside `revlog._writing` context'
2654 2659 raise error.ProgrammingError(msg)
2655 2660 ifh, dfh = self._writinghandles
2656 2661 if self._docket is None:
2657 2662 ifh.seek(0, os.SEEK_END)
2658 2663 else:
2659 2664 ifh.seek(self._docket.index_end, os.SEEK_SET)
2660 2665 if dfh:
2661 2666 if self._docket is None:
2662 2667 dfh.seek(0, os.SEEK_END)
2663 2668 else:
2664 2669 dfh.seek(self._docket.data_end, os.SEEK_SET)
2665 2670
2666 2671 curr = len(self) - 1
2667 2672 if not self._inline:
2668 2673 transaction.add(self._datafile, offset)
2669 2674 transaction.add(self._indexfile, curr * len(entry))
2670 2675 if data[0]:
2671 2676 dfh.write(data[0])
2672 2677 dfh.write(data[1])
2673 2678 if sidedata:
2674 2679 dfh.write(sidedata)
2675 2680 ifh.write(entry)
2676 2681 else:
2677 2682 offset += curr * self.index.entry_size
2678 2683 transaction.add(self._indexfile, offset)
2679 2684 ifh.write(entry)
2680 2685 ifh.write(data[0])
2681 2686 ifh.write(data[1])
2682 2687 if sidedata:
2683 2688 ifh.write(sidedata)
2684 2689 self._enforceinlinesize(transaction)
2685 2690 if self._docket is not None:
2686 2691 self._docket.index_end = self._writinghandles[0].tell()
2687 2692 self._docket.data_end = self._writinghandles[1].tell()
2688 2693
2689 2694 nodemaputil.setup_persistent_nodemap(transaction, self)
2690 2695
2691 2696 def addgroup(
2692 2697 self,
2693 2698 deltas,
2694 2699 linkmapper,
2695 2700 transaction,
2696 2701 alwayscache=False,
2697 2702 addrevisioncb=None,
2698 2703 duplicaterevisioncb=None,
2699 2704 ):
2700 2705 """
2701 2706 add a delta group
2702 2707
2703 2708 given a set of deltas, add them to the revision log. the
2704 2709 first delta is against its parent, which should be in our
2705 2710 log, the rest are against the previous delta.
2706 2711
2707 2712 If ``addrevisioncb`` is defined, it will be called with arguments of
2708 2713 this revlog and the node that was added.
2709 2714 """
2710 2715
2711 2716 if self._adding_group:
2712 2717 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2713 2718
2714 2719 self._adding_group = True
2715 2720 empty = True
2716 2721 try:
2717 2722 with self._writing(transaction):
2718 2723 deltacomputer = deltautil.deltacomputer(self)
2719 2724 # loop through our set of deltas
2720 2725 for data in deltas:
2721 2726 (
2722 2727 node,
2723 2728 p1,
2724 2729 p2,
2725 2730 linknode,
2726 2731 deltabase,
2727 2732 delta,
2728 2733 flags,
2729 2734 sidedata,
2730 2735 ) = data
2731 2736 link = linkmapper(linknode)
2732 2737 flags = flags or REVIDX_DEFAULT_FLAGS
2733 2738
2734 2739 rev = self.index.get_rev(node)
2735 2740 if rev is not None:
2736 2741 # this can happen if two branches make the same change
2737 2742 self._nodeduplicatecallback(transaction, rev)
2738 2743 if duplicaterevisioncb:
2739 2744 duplicaterevisioncb(self, rev)
2740 2745 empty = False
2741 2746 continue
2742 2747
2743 2748 for p in (p1, p2):
2744 2749 if not self.index.has_node(p):
2745 2750 raise error.LookupError(
2746 2751 p, self.radix, _(b'unknown parent')
2747 2752 )
2748 2753
2749 2754 if not self.index.has_node(deltabase):
2750 2755 raise error.LookupError(
2751 2756 deltabase, self.display_id, _(b'unknown delta base')
2752 2757 )
2753 2758
2754 2759 baserev = self.rev(deltabase)
2755 2760
2756 2761 if baserev != nullrev and self.iscensored(baserev):
2757 2762 # if base is censored, delta must be full replacement in a
2758 2763 # single patch operation
2759 2764 hlen = struct.calcsize(b">lll")
2760 2765 oldlen = self.rawsize(baserev)
2761 2766 newlen = len(delta) - hlen
2762 2767 if delta[:hlen] != mdiff.replacediffheader(
2763 2768 oldlen, newlen
2764 2769 ):
2765 2770 raise error.CensoredBaseError(
2766 2771 self.display_id, self.node(baserev)
2767 2772 )
2768 2773
2769 2774 if not flags and self._peek_iscensored(baserev, delta):
2770 2775 flags |= REVIDX_ISCENSORED
2771 2776
2772 2777 # We assume consumers of addrevisioncb will want to retrieve
2773 2778 # the added revision, which will require a call to
2774 2779 # revision(). revision() will fast path if there is a cache
2775 2780 # hit. So, we tell _addrevision() to always cache in this case.
2776 2781 # We're only using addgroup() in the context of changegroup
2777 2782 # generation so the revision data can always be handled as raw
2778 2783 # by the flagprocessor.
2779 2784 rev = self._addrevision(
2780 2785 node,
2781 2786 None,
2782 2787 transaction,
2783 2788 link,
2784 2789 p1,
2785 2790 p2,
2786 2791 flags,
2787 2792 (baserev, delta),
2788 2793 alwayscache=alwayscache,
2789 2794 deltacomputer=deltacomputer,
2790 2795 sidedata=sidedata,
2791 2796 )
2792 2797
2793 2798 if addrevisioncb:
2794 2799 addrevisioncb(self, rev)
2795 2800 empty = False
2796 2801 finally:
2797 2802 self._adding_group = False
2798 2803 return not empty
2799 2804
2800 2805 def iscensored(self, rev):
2801 2806 """Check if a file revision is censored."""
2802 2807 if not self._censorable:
2803 2808 return False
2804 2809
2805 2810 return self.flags(rev) & REVIDX_ISCENSORED
2806 2811
2807 2812 def _peek_iscensored(self, baserev, delta):
2808 2813 """Quickly check if a delta produces a censored revision."""
2809 2814 if not self._censorable:
2810 2815 return False
2811 2816
2812 2817 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2813 2818
2814 2819 def getstrippoint(self, minlink):
2815 2820 """find the minimum rev that must be stripped to strip the linkrev
2816 2821
2817 2822 Returns a tuple containing the minimum rev and a set of all revs that
2818 2823 have linkrevs that will be broken by this strip.
2819 2824 """
2820 2825 return storageutil.resolvestripinfo(
2821 2826 minlink,
2822 2827 len(self) - 1,
2823 2828 self.headrevs(),
2824 2829 self.linkrev,
2825 2830 self.parentrevs,
2826 2831 )
2827 2832
2828 2833 def strip(self, minlink, transaction):
2829 2834 """truncate the revlog on the first revision with a linkrev >= minlink
2830 2835
2831 2836 This function is called when we're stripping revision minlink and
2832 2837 its descendants from the repository.
2833 2838
2834 2839 We have to remove all revisions with linkrev >= minlink, because
2835 2840 the equivalent changelog revisions will be renumbered after the
2836 2841 strip.
2837 2842
2838 2843 So we truncate the revlog on the first of these revisions, and
2839 2844 trust that the caller has saved the revisions that shouldn't be
2840 2845 removed and that it'll re-add them after this truncation.
2841 2846 """
2842 2847 if len(self) == 0:
2843 2848 return
2844 2849
2845 2850 rev, _ = self.getstrippoint(minlink)
2846 2851 if rev == len(self):
2847 2852 return
2848 2853
2849 2854 # first truncate the files on disk
2850 2855 data_end = self.start(rev)
2851 2856 if not self._inline:
2852 2857 transaction.add(self._datafile, data_end)
2853 2858 end = rev * self.index.entry_size
2854 2859 else:
2855 2860 end = data_end + (rev * self.index.entry_size)
2856 2861
2857 2862 transaction.add(self._indexfile, end)
2858 2863 if self._docket is not None:
2859 2864 # XXX we could, leverage the docket while stripping. However it is
2860 2865 # not powerfull enough at the time of this comment
2861 2866 self._docket.index_end = end
2862 2867 self._docket.data_end = data_end
2863 2868 self._docket.write(transaction, stripping=True)
2864 2869
2865 2870 # then reset internal state in memory to forget those revisions
2866 2871 self._revisioncache = None
2867 2872 self._chaininfocache = util.lrucachedict(500)
2868 2873 self._chunkclear()
2869 2874
2870 2875 del self.index[rev:-1]
2871 2876
2872 2877 def checksize(self):
2873 2878 """Check size of index and data files
2874 2879
2875 2880 return a (dd, di) tuple.
2876 2881 - dd: extra bytes for the "data" file
2877 2882 - di: extra bytes for the "index" file
2878 2883
2879 2884 A healthy revlog will return (0, 0).
2880 2885 """
2881 2886 expected = 0
2882 2887 if len(self):
2883 2888 expected = max(0, self.end(len(self) - 1))
2884 2889
2885 2890 try:
2886 2891 with self._datafp() as f:
2887 2892 f.seek(0, io.SEEK_END)
2888 2893 actual = f.tell()
2889 2894 dd = actual - expected
2890 2895 except IOError as inst:
2891 2896 if inst.errno != errno.ENOENT:
2892 2897 raise
2893 2898 dd = 0
2894 2899
2895 2900 try:
2896 2901 f = self.opener(self._indexfile)
2897 2902 f.seek(0, io.SEEK_END)
2898 2903 actual = f.tell()
2899 2904 f.close()
2900 2905 s = self.index.entry_size
2901 2906 i = max(0, actual // s)
2902 2907 di = actual - (i * s)
2903 2908 if self._inline:
2904 2909 databytes = 0
2905 2910 for r in self:
2906 2911 databytes += max(0, self.length(r))
2907 2912 dd = 0
2908 2913 di = actual - len(self) * s - databytes
2909 2914 except IOError as inst:
2910 2915 if inst.errno != errno.ENOENT:
2911 2916 raise
2912 2917 di = 0
2913 2918
2914 2919 return (dd, di)
2915 2920
2916 2921 def files(self):
2917 2922 res = [self._indexfile]
2918 2923 if not self._inline:
2919 2924 res.append(self._datafile)
2920 2925 return res
2921 2926
2922 2927 def emitrevisions(
2923 2928 self,
2924 2929 nodes,
2925 2930 nodesorder=None,
2926 2931 revisiondata=False,
2927 2932 assumehaveparentrevisions=False,
2928 2933 deltamode=repository.CG_DELTAMODE_STD,
2929 2934 sidedata_helpers=None,
2930 2935 ):
2931 2936 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2932 2937 raise error.ProgrammingError(
2933 2938 b'unhandled value for nodesorder: %s' % nodesorder
2934 2939 )
2935 2940
2936 2941 if nodesorder is None and not self._generaldelta:
2937 2942 nodesorder = b'storage'
2938 2943
2939 2944 if (
2940 2945 not self._storedeltachains
2941 2946 and deltamode != repository.CG_DELTAMODE_PREV
2942 2947 ):
2943 2948 deltamode = repository.CG_DELTAMODE_FULL
2944 2949
2945 2950 return storageutil.emitrevisions(
2946 2951 self,
2947 2952 nodes,
2948 2953 nodesorder,
2949 2954 revlogrevisiondelta,
2950 2955 deltaparentfn=self.deltaparent,
2951 2956 candeltafn=self.candelta,
2952 2957 rawsizefn=self.rawsize,
2953 2958 revdifffn=self.revdiff,
2954 2959 flagsfn=self.flags,
2955 2960 deltamode=deltamode,
2956 2961 revisiondata=revisiondata,
2957 2962 assumehaveparentrevisions=assumehaveparentrevisions,
2958 2963 sidedata_helpers=sidedata_helpers,
2959 2964 )
2960 2965
2961 2966 DELTAREUSEALWAYS = b'always'
2962 2967 DELTAREUSESAMEREVS = b'samerevs'
2963 2968 DELTAREUSENEVER = b'never'
2964 2969
2965 2970 DELTAREUSEFULLADD = b'fulladd'
2966 2971
2967 2972 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2968 2973
2969 2974 def clone(
2970 2975 self,
2971 2976 tr,
2972 2977 destrevlog,
2973 2978 addrevisioncb=None,
2974 2979 deltareuse=DELTAREUSESAMEREVS,
2975 2980 forcedeltabothparents=None,
2976 2981 sidedata_helpers=None,
2977 2982 ):
2978 2983 """Copy this revlog to another, possibly with format changes.
2979 2984
2980 2985 The destination revlog will contain the same revisions and nodes.
2981 2986 However, it may not be bit-for-bit identical due to e.g. delta encoding
2982 2987 differences.
2983 2988
2984 2989 The ``deltareuse`` argument control how deltas from the existing revlog
2985 2990 are preserved in the destination revlog. The argument can have the
2986 2991 following values:
2987 2992
2988 2993 DELTAREUSEALWAYS
2989 2994 Deltas will always be reused (if possible), even if the destination
2990 2995 revlog would not select the same revisions for the delta. This is the
2991 2996 fastest mode of operation.
2992 2997 DELTAREUSESAMEREVS
2993 2998 Deltas will be reused if the destination revlog would pick the same
2994 2999 revisions for the delta. This mode strikes a balance between speed
2995 3000 and optimization.
2996 3001 DELTAREUSENEVER
2997 3002 Deltas will never be reused. This is the slowest mode of execution.
2998 3003 This mode can be used to recompute deltas (e.g. if the diff/delta
2999 3004 algorithm changes).
3000 3005 DELTAREUSEFULLADD
3001 3006 Revision will be re-added as if their were new content. This is
3002 3007 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3003 3008 eg: large file detection and handling.
3004 3009
3005 3010 Delta computation can be slow, so the choice of delta reuse policy can
3006 3011 significantly affect run time.
3007 3012
3008 3013 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3009 3014 two extremes. Deltas will be reused if they are appropriate. But if the
3010 3015 delta could choose a better revision, it will do so. This means if you
3011 3016 are converting a non-generaldelta revlog to a generaldelta revlog,
3012 3017 deltas will be recomputed if the delta's parent isn't a parent of the
3013 3018 revision.
3014 3019
3015 3020 In addition to the delta policy, the ``forcedeltabothparents``
3016 3021 argument controls whether to force compute deltas against both parents
3017 3022 for merges. By default, the current default is used.
3018 3023
3019 3024 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3020 3025 `sidedata_helpers`.
3021 3026 """
3022 3027 if deltareuse not in self.DELTAREUSEALL:
3023 3028 raise ValueError(
3024 3029 _(b'value for deltareuse invalid: %s') % deltareuse
3025 3030 )
3026 3031
3027 3032 if len(destrevlog):
3028 3033 raise ValueError(_(b'destination revlog is not empty'))
3029 3034
3030 3035 if getattr(self, 'filteredrevs', None):
3031 3036 raise ValueError(_(b'source revlog has filtered revisions'))
3032 3037 if getattr(destrevlog, 'filteredrevs', None):
3033 3038 raise ValueError(_(b'destination revlog has filtered revisions'))
3034 3039
3035 3040 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3036 3041 # if possible.
3037 3042 oldlazydelta = destrevlog._lazydelta
3038 3043 oldlazydeltabase = destrevlog._lazydeltabase
3039 3044 oldamd = destrevlog._deltabothparents
3040 3045
3041 3046 try:
3042 3047 if deltareuse == self.DELTAREUSEALWAYS:
3043 3048 destrevlog._lazydeltabase = True
3044 3049 destrevlog._lazydelta = True
3045 3050 elif deltareuse == self.DELTAREUSESAMEREVS:
3046 3051 destrevlog._lazydeltabase = False
3047 3052 destrevlog._lazydelta = True
3048 3053 elif deltareuse == self.DELTAREUSENEVER:
3049 3054 destrevlog._lazydeltabase = False
3050 3055 destrevlog._lazydelta = False
3051 3056
3052 3057 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3053 3058
3054 3059 self._clone(
3055 3060 tr,
3056 3061 destrevlog,
3057 3062 addrevisioncb,
3058 3063 deltareuse,
3059 3064 forcedeltabothparents,
3060 3065 sidedata_helpers,
3061 3066 )
3062 3067
3063 3068 finally:
3064 3069 destrevlog._lazydelta = oldlazydelta
3065 3070 destrevlog._lazydeltabase = oldlazydeltabase
3066 3071 destrevlog._deltabothparents = oldamd
3067 3072
3068 3073 def _clone(
3069 3074 self,
3070 3075 tr,
3071 3076 destrevlog,
3072 3077 addrevisioncb,
3073 3078 deltareuse,
3074 3079 forcedeltabothparents,
3075 3080 sidedata_helpers,
3076 3081 ):
3077 3082 """perform the core duty of `revlog.clone` after parameter processing"""
3078 3083 deltacomputer = deltautil.deltacomputer(destrevlog)
3079 3084 index = self.index
3080 3085 for rev in self:
3081 3086 entry = index[rev]
3082 3087
3083 3088 # Some classes override linkrev to take filtered revs into
3084 3089 # account. Use raw entry from index.
3085 3090 flags = entry[0] & 0xFFFF
3086 3091 linkrev = entry[4]
3087 3092 p1 = index[entry[5]][7]
3088 3093 p2 = index[entry[6]][7]
3089 3094 node = entry[7]
3090 3095
3091 3096 # (Possibly) reuse the delta from the revlog if allowed and
3092 3097 # the revlog chunk is a delta.
3093 3098 cachedelta = None
3094 3099 rawtext = None
3095 3100 if deltareuse == self.DELTAREUSEFULLADD:
3096 3101 text, sidedata = self._revisiondata(rev)
3097 3102
3098 3103 if sidedata_helpers is not None:
3099 3104 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3100 3105 self, sidedata_helpers, sidedata, rev
3101 3106 )
3102 3107 flags = flags | new_flags[0] & ~new_flags[1]
3103 3108
3104 3109 destrevlog.addrevision(
3105 3110 text,
3106 3111 tr,
3107 3112 linkrev,
3108 3113 p1,
3109 3114 p2,
3110 3115 cachedelta=cachedelta,
3111 3116 node=node,
3112 3117 flags=flags,
3113 3118 deltacomputer=deltacomputer,
3114 3119 sidedata=sidedata,
3115 3120 )
3116 3121 else:
3117 3122 if destrevlog._lazydelta:
3118 3123 dp = self.deltaparent(rev)
3119 3124 if dp != nullrev:
3120 3125 cachedelta = (dp, bytes(self._chunk(rev)))
3121 3126
3122 3127 sidedata = None
3123 3128 if not cachedelta:
3124 3129 rawtext, sidedata = self._revisiondata(rev)
3125 3130 if sidedata is None:
3126 3131 sidedata = self.sidedata(rev)
3127 3132
3128 3133 if sidedata_helpers is not None:
3129 3134 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3130 3135 self, sidedata_helpers, sidedata, rev
3131 3136 )
3132 3137 flags = flags | new_flags[0] & ~new_flags[1]
3133 3138
3134 3139 with destrevlog._writing(tr):
3135 3140 destrevlog._addrevision(
3136 3141 node,
3137 3142 rawtext,
3138 3143 tr,
3139 3144 linkrev,
3140 3145 p1,
3141 3146 p2,
3142 3147 flags,
3143 3148 cachedelta,
3144 3149 deltacomputer=deltacomputer,
3145 3150 sidedata=sidedata,
3146 3151 )
3147 3152
3148 3153 if addrevisioncb:
3149 3154 addrevisioncb(self, rev, node)
3150 3155
3151 3156 def censorrevision(self, tr, censornode, tombstone=b''):
3152 3157 if self._format_version == REVLOGV0:
3153 3158 raise error.RevlogError(
3154 3159 _(b'cannot censor with version %d revlogs')
3155 3160 % self._format_version
3156 3161 )
3157 3162
3158 3163 censorrev = self.rev(censornode)
3159 3164 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3160 3165
3161 3166 if len(tombstone) > self.rawsize(censorrev):
3162 3167 raise error.Abort(
3163 3168 _(b'censor tombstone must be no longer than censored data')
3164 3169 )
3165 3170
3166 3171 # Rewriting the revlog in place is hard. Our strategy for censoring is
3167 3172 # to create a new revlog, copy all revisions to it, then replace the
3168 3173 # revlogs on transaction close.
3169 3174 #
3170 3175 # This is a bit dangerous. We could easily have a mismatch of state.
3171 3176 newrl = revlog(
3172 3177 self.opener,
3173 3178 target=self.target,
3174 3179 radix=self.radix,
3175 3180 postfix=b'tmpcensored',
3176 3181 censorable=True,
3177 3182 )
3178 3183 newrl._format_version = self._format_version
3179 3184 newrl._format_flags = self._format_flags
3180 3185 newrl._generaldelta = self._generaldelta
3181 3186 newrl._parse_index = self._parse_index
3182 3187
3183 3188 for rev in self.revs():
3184 3189 node = self.node(rev)
3185 3190 p1, p2 = self.parents(node)
3186 3191
3187 3192 if rev == censorrev:
3188 3193 newrl.addrawrevision(
3189 3194 tombstone,
3190 3195 tr,
3191 3196 self.linkrev(censorrev),
3192 3197 p1,
3193 3198 p2,
3194 3199 censornode,
3195 3200 REVIDX_ISCENSORED,
3196 3201 )
3197 3202
3198 3203 if newrl.deltaparent(rev) != nullrev:
3199 3204 raise error.Abort(
3200 3205 _(
3201 3206 b'censored revision stored as delta; '
3202 3207 b'cannot censor'
3203 3208 ),
3204 3209 hint=_(
3205 3210 b'censoring of revlogs is not '
3206 3211 b'fully implemented; please report '
3207 3212 b'this bug'
3208 3213 ),
3209 3214 )
3210 3215 continue
3211 3216
3212 3217 if self.iscensored(rev):
3213 3218 if self.deltaparent(rev) != nullrev:
3214 3219 raise error.Abort(
3215 3220 _(
3216 3221 b'cannot censor due to censored '
3217 3222 b'revision having delta stored'
3218 3223 )
3219 3224 )
3220 3225 rawtext = self._chunk(rev)
3221 3226 else:
3222 3227 rawtext = self.rawdata(rev)
3223 3228
3224 3229 newrl.addrawrevision(
3225 3230 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3226 3231 )
3227 3232
3228 3233 tr.addbackup(self._indexfile, location=b'store')
3229 3234 if not self._inline:
3230 3235 tr.addbackup(self._datafile, location=b'store')
3231 3236
3232 3237 self.opener.rename(newrl._indexfile, self._indexfile)
3233 3238 if not self._inline:
3234 3239 self.opener.rename(newrl._datafile, self._datafile)
3235 3240
3236 3241 self.clearcaches()
3237 3242 self._loadindex()
3238 3243
3239 3244 def verifyintegrity(self, state):
3240 3245 """Verifies the integrity of the revlog.
3241 3246
3242 3247 Yields ``revlogproblem`` instances describing problems that are
3243 3248 found.
3244 3249 """
3245 3250 dd, di = self.checksize()
3246 3251 if dd:
3247 3252 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3248 3253 if di:
3249 3254 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3250 3255
3251 3256 version = self._format_version
3252 3257
3253 3258 # The verifier tells us what version revlog we should be.
3254 3259 if version != state[b'expectedversion']:
3255 3260 yield revlogproblem(
3256 3261 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3257 3262 % (self.display_id, version, state[b'expectedversion'])
3258 3263 )
3259 3264
3260 3265 state[b'skipread'] = set()
3261 3266 state[b'safe_renamed'] = set()
3262 3267
3263 3268 for rev in self:
3264 3269 node = self.node(rev)
3265 3270
3266 3271 # Verify contents. 4 cases to care about:
3267 3272 #
3268 3273 # common: the most common case
3269 3274 # rename: with a rename
3270 3275 # meta: file content starts with b'\1\n', the metadata
3271 3276 # header defined in filelog.py, but without a rename
3272 3277 # ext: content stored externally
3273 3278 #
3274 3279 # More formally, their differences are shown below:
3275 3280 #
3276 3281 # | common | rename | meta | ext
3277 3282 # -------------------------------------------------------
3278 3283 # flags() | 0 | 0 | 0 | not 0
3279 3284 # renamed() | False | True | False | ?
3280 3285 # rawtext[0:2]=='\1\n'| False | True | True | ?
3281 3286 #
3282 3287 # "rawtext" means the raw text stored in revlog data, which
3283 3288 # could be retrieved by "rawdata(rev)". "text"
3284 3289 # mentioned below is "revision(rev)".
3285 3290 #
3286 3291 # There are 3 different lengths stored physically:
3287 3292 # 1. L1: rawsize, stored in revlog index
3288 3293 # 2. L2: len(rawtext), stored in revlog data
3289 3294 # 3. L3: len(text), stored in revlog data if flags==0, or
3290 3295 # possibly somewhere else if flags!=0
3291 3296 #
3292 3297 # L1 should be equal to L2. L3 could be different from them.
3293 3298 # "text" may or may not affect commit hash depending on flag
3294 3299 # processors (see flagutil.addflagprocessor).
3295 3300 #
3296 3301 # | common | rename | meta | ext
3297 3302 # -------------------------------------------------
3298 3303 # rawsize() | L1 | L1 | L1 | L1
3299 3304 # size() | L1 | L2-LM | L1(*) | L1 (?)
3300 3305 # len(rawtext) | L2 | L2 | L2 | L2
3301 3306 # len(text) | L2 | L2 | L2 | L3
3302 3307 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3303 3308 #
3304 3309 # LM: length of metadata, depending on rawtext
3305 3310 # (*): not ideal, see comment in filelog.size
3306 3311 # (?): could be "- len(meta)" if the resolved content has
3307 3312 # rename metadata
3308 3313 #
3309 3314 # Checks needed to be done:
3310 3315 # 1. length check: L1 == L2, in all cases.
3311 3316 # 2. hash check: depending on flag processor, we may need to
3312 3317 # use either "text" (external), or "rawtext" (in revlog).
3313 3318
3314 3319 try:
3315 3320 skipflags = state.get(b'skipflags', 0)
3316 3321 if skipflags:
3317 3322 skipflags &= self.flags(rev)
3318 3323
3319 3324 _verify_revision(self, skipflags, state, node)
3320 3325
3321 3326 l1 = self.rawsize(rev)
3322 3327 l2 = len(self.rawdata(node))
3323 3328
3324 3329 if l1 != l2:
3325 3330 yield revlogproblem(
3326 3331 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3327 3332 node=node,
3328 3333 )
3329 3334
3330 3335 except error.CensoredNodeError:
3331 3336 if state[b'erroroncensored']:
3332 3337 yield revlogproblem(
3333 3338 error=_(b'censored file data'), node=node
3334 3339 )
3335 3340 state[b'skipread'].add(node)
3336 3341 except Exception as e:
3337 3342 yield revlogproblem(
3338 3343 error=_(b'unpacking %s: %s')
3339 3344 % (short(node), stringutil.forcebytestr(e)),
3340 3345 node=node,
3341 3346 )
3342 3347 state[b'skipread'].add(node)
3343 3348
3344 3349 def storageinfo(
3345 3350 self,
3346 3351 exclusivefiles=False,
3347 3352 sharedfiles=False,
3348 3353 revisionscount=False,
3349 3354 trackedsize=False,
3350 3355 storedsize=False,
3351 3356 ):
3352 3357 d = {}
3353 3358
3354 3359 if exclusivefiles:
3355 3360 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3356 3361 if not self._inline:
3357 3362 d[b'exclusivefiles'].append((self.opener, self._datafile))
3358 3363
3359 3364 if sharedfiles:
3360 3365 d[b'sharedfiles'] = []
3361 3366
3362 3367 if revisionscount:
3363 3368 d[b'revisionscount'] = len(self)
3364 3369
3365 3370 if trackedsize:
3366 3371 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3367 3372
3368 3373 if storedsize:
3369 3374 d[b'storedsize'] = sum(
3370 3375 self.opener.stat(path).st_size for path in self.files()
3371 3376 )
3372 3377
3373 3378 return d
3374 3379
3375 3380 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3376 3381 if not self.hassidedata:
3377 3382 return
3378 3383 # revlog formats with sidedata support does not support inline
3379 3384 assert not self._inline
3380 3385 if not helpers[1] and not helpers[2]:
3381 3386 # Nothing to generate or remove
3382 3387 return
3383 3388
3384 3389 new_entries = []
3385 3390 # append the new sidedata
3386 3391 with self._writing(transaction):
3387 3392 ifh, dfh = self._writinghandles
3388 3393 if self._docket is not None:
3389 3394 dfh.seek(self._docket.data_end, os.SEEK_SET)
3390 3395 else:
3391 3396 dfh.seek(0, os.SEEK_END)
3392 3397
3393 3398 current_offset = dfh.tell()
3394 3399 for rev in range(startrev, endrev + 1):
3395 3400 entry = self.index[rev]
3396 3401 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3397 3402 store=self,
3398 3403 sidedata_helpers=helpers,
3399 3404 sidedata={},
3400 3405 rev=rev,
3401 3406 )
3402 3407
3403 3408 serialized_sidedata = sidedatautil.serialize_sidedata(
3404 3409 new_sidedata
3405 3410 )
3406 3411
3407 3412 sidedata_compression_mode = COMP_MODE_INLINE
3408 3413 if serialized_sidedata and self.hassidedata:
3409 3414 sidedata_compression_mode = COMP_MODE_PLAIN
3410 3415 h, comp_sidedata = self.compress(serialized_sidedata)
3411 3416 if (
3412 3417 h != b'u'
3413 3418 and comp_sidedata[0] != b'\0'
3414 3419 and len(comp_sidedata) < len(serialized_sidedata)
3415 3420 ):
3416 3421 assert not h
3417 3422 if (
3418 3423 comp_sidedata[0]
3419 3424 == self._docket.default_compression_header
3420 3425 ):
3421 3426 sidedata_compression_mode = COMP_MODE_DEFAULT
3422 3427 serialized_sidedata = comp_sidedata
3423 3428 else:
3424 3429 sidedata_compression_mode = COMP_MODE_INLINE
3425 3430 serialized_sidedata = comp_sidedata
3426 3431 if entry[8] != 0 or entry[9] != 0:
3427 3432 # rewriting entries that already have sidedata is not
3428 3433 # supported yet, because it introduces garbage data in the
3429 3434 # revlog.
3430 3435 msg = b"rewriting existing sidedata is not supported yet"
3431 3436 raise error.Abort(msg)
3432 3437
3433 3438 # Apply (potential) flags to add and to remove after running
3434 3439 # the sidedata helpers
3435 3440 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3436 3441 entry_update = (
3437 3442 current_offset,
3438 3443 len(serialized_sidedata),
3439 3444 new_offset_flags,
3440 3445 sidedata_compression_mode,
3441 3446 )
3442 3447
3443 3448 # the sidedata computation might have move the file cursors around
3444 3449 dfh.seek(current_offset, os.SEEK_SET)
3445 3450 dfh.write(serialized_sidedata)
3446 3451 new_entries.append(entry_update)
3447 3452 current_offset += len(serialized_sidedata)
3448 3453 if self._docket is not None:
3449 3454 self._docket.data_end = dfh.tell()
3450 3455
3451 3456 # rewrite the new index entries
3452 3457 ifh.seek(startrev * self.index.entry_size)
3453 3458 for i, e in enumerate(new_entries):
3454 3459 rev = startrev + i
3455 3460 self.index.replace_sidedata_info(rev, *e)
3456 3461 packed = self.index.entry_binary(rev)
3457 3462 if rev == 0 and self._docket is None:
3458 3463 header = self._format_flags | self._format_version
3459 3464 header = self.index.pack_header(header)
3460 3465 packed = header + packed
3461 3466 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now