##// END OF EJS Templates
revlog: use an intermediate variable for `data` in `_chunk`...
marmoute -
r48026:e340b556 default
parent child Browse files
Show More
@@ -1,3327 +1,3328 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 COMP_MODE_INLINE,
39 39 FEATURES_BY_VERSION,
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 INDEX_HEADER,
43 43 REVLOGV0,
44 44 REVLOGV1,
45 45 REVLOGV1_FLAGS,
46 46 REVLOGV2,
47 47 REVLOGV2_FLAGS,
48 48 REVLOG_DEFAULT_FLAGS,
49 49 REVLOG_DEFAULT_FORMAT,
50 50 REVLOG_DEFAULT_VERSION,
51 51 SUPPORTED_FLAGS,
52 52 )
53 53 from .revlogutils.flagutil import (
54 54 REVIDX_DEFAULT_FLAGS,
55 55 REVIDX_ELLIPSIS,
56 56 REVIDX_EXTSTORED,
57 57 REVIDX_FLAGS_ORDER,
58 58 REVIDX_HASCOPIESINFO,
59 59 REVIDX_ISCENSORED,
60 60 REVIDX_RAWTEXT_CHANGING_FLAGS,
61 61 )
62 62 from .thirdparty import attr
63 63 from . import (
64 64 ancestor,
65 65 dagop,
66 66 error,
67 67 mdiff,
68 68 policy,
69 69 pycompat,
70 70 templatefilters,
71 71 util,
72 72 )
73 73 from .interfaces import (
74 74 repository,
75 75 util as interfaceutil,
76 76 )
77 77 from .revlogutils import (
78 78 deltas as deltautil,
79 79 docket as docketutil,
80 80 flagutil,
81 81 nodemap as nodemaputil,
82 82 revlogv0,
83 83 sidedata as sidedatautil,
84 84 )
85 85 from .utils import (
86 86 storageutil,
87 87 stringutil,
88 88 )
89 89
90 90 # blanked usage of all the name to prevent pyflakes constraints
91 91 # We need these name available in the module for extensions.
92 92
93 93 REVLOGV0
94 94 REVLOGV1
95 95 REVLOGV2
96 96 FLAG_INLINE_DATA
97 97 FLAG_GENERALDELTA
98 98 REVLOG_DEFAULT_FLAGS
99 99 REVLOG_DEFAULT_FORMAT
100 100 REVLOG_DEFAULT_VERSION
101 101 REVLOGV1_FLAGS
102 102 REVLOGV2_FLAGS
103 103 REVIDX_ISCENSORED
104 104 REVIDX_ELLIPSIS
105 105 REVIDX_HASCOPIESINFO
106 106 REVIDX_EXTSTORED
107 107 REVIDX_DEFAULT_FLAGS
108 108 REVIDX_FLAGS_ORDER
109 109 REVIDX_RAWTEXT_CHANGING_FLAGS
110 110
111 111 parsers = policy.importmod('parsers')
112 112 rustancestor = policy.importrust('ancestor')
113 113 rustdagop = policy.importrust('dagop')
114 114 rustrevlog = policy.importrust('revlog')
115 115
116 116 # Aliased for performance.
117 117 _zlibdecompress = zlib.decompress
118 118
119 119 # max size of revlog with inline data
120 120 _maxinline = 131072
121 121 _chunksize = 1048576
122 122
123 123 # Flag processors for REVIDX_ELLIPSIS.
124 124 def ellipsisreadprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsiswriteprocessor(rl, text):
129 129 return text, False
130 130
131 131
132 132 def ellipsisrawprocessor(rl, text):
133 133 return False
134 134
135 135
136 136 ellipsisprocessor = (
137 137 ellipsisreadprocessor,
138 138 ellipsiswriteprocessor,
139 139 ellipsisrawprocessor,
140 140 )
141 141
142 142
143 143 def offset_type(offset, type):
144 144 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
145 145 raise ValueError(b'unknown revlog index flags')
146 146 return int(int(offset) << 16 | type)
147 147
148 148
149 149 def _verify_revision(rl, skipflags, state, node):
150 150 """Verify the integrity of the given revlog ``node`` while providing a hook
151 151 point for extensions to influence the operation."""
152 152 if skipflags:
153 153 state[b'skipread'].add(node)
154 154 else:
155 155 # Side-effect: read content and verify hash.
156 156 rl.revision(node)
157 157
158 158
159 159 # True if a fast implementation for persistent-nodemap is available
160 160 #
161 161 # We also consider we have a "fast" implementation in "pure" python because
162 162 # people using pure don't really have performance consideration (and a
163 163 # wheelbarrow of other slowness source)
164 164 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 165 parsers, 'BaseIndexObject'
166 166 )
167 167
168 168
169 169 @attr.s(slots=True, frozen=True)
170 170 class _revisioninfo(object):
171 171 """Information about a revision that allows building its fulltext
172 172 node: expected hash of the revision
173 173 p1, p2: parent revs of the revision
174 174 btext: built text cache consisting of a one-element list
175 175 cachedelta: (baserev, uncompressed_delta) or None
176 176 flags: flags associated to the revision storage
177 177
178 178 One of btext[0] or cachedelta must be set.
179 179 """
180 180
181 181 node = attr.ib()
182 182 p1 = attr.ib()
183 183 p2 = attr.ib()
184 184 btext = attr.ib()
185 185 textlen = attr.ib()
186 186 cachedelta = attr.ib()
187 187 flags = attr.ib()
188 188
189 189
190 190 @interfaceutil.implementer(repository.irevisiondelta)
191 191 @attr.s(slots=True)
192 192 class revlogrevisiondelta(object):
193 193 node = attr.ib()
194 194 p1node = attr.ib()
195 195 p2node = attr.ib()
196 196 basenode = attr.ib()
197 197 flags = attr.ib()
198 198 baserevisionsize = attr.ib()
199 199 revision = attr.ib()
200 200 delta = attr.ib()
201 201 sidedata = attr.ib()
202 202 protocol_flags = attr.ib()
203 203 linknode = attr.ib(default=None)
204 204
205 205
206 206 @interfaceutil.implementer(repository.iverifyproblem)
207 207 @attr.s(frozen=True)
208 208 class revlogproblem(object):
209 209 warning = attr.ib(default=None)
210 210 error = attr.ib(default=None)
211 211 node = attr.ib(default=None)
212 212
213 213
214 214 def parse_index_v1(data, inline):
215 215 # call the C implementation to parse the index data
216 216 index, cache = parsers.parse_index2(data, inline)
217 217 return index, cache
218 218
219 219
220 220 def parse_index_v2(data, inline):
221 221 # call the C implementation to parse the index data
222 222 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
223 223 return index, cache
224 224
225 225
226 226 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
227 227
228 228 def parse_index_v1_nodemap(data, inline):
229 229 index, cache = parsers.parse_index_devel_nodemap(data, inline)
230 230 return index, cache
231 231
232 232
233 233 else:
234 234 parse_index_v1_nodemap = None
235 235
236 236
237 237 def parse_index_v1_mixed(data, inline):
238 238 index, cache = parse_index_v1(data, inline)
239 239 return rustrevlog.MixedIndex(index), cache
240 240
241 241
242 242 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
243 243 # signed integer)
244 244 _maxentrysize = 0x7FFFFFFF
245 245
246 246
247 247 class revlog(object):
248 248 """
249 249 the underlying revision storage object
250 250
251 251 A revlog consists of two parts, an index and the revision data.
252 252
253 253 The index is a file with a fixed record size containing
254 254 information on each revision, including its nodeid (hash), the
255 255 nodeids of its parents, the position and offset of its data within
256 256 the data file, and the revision it's based on. Finally, each entry
257 257 contains a linkrev entry that can serve as a pointer to external
258 258 data.
259 259
260 260 The revision data itself is a linear collection of data chunks.
261 261 Each chunk represents a revision and is usually represented as a
262 262 delta against the previous chunk. To bound lookup time, runs of
263 263 deltas are limited to about 2 times the length of the original
264 264 version data. This makes retrieval of a version proportional to
265 265 its size, or O(1) relative to the number of revisions.
266 266
267 267 Both pieces of the revlog are written to in an append-only
268 268 fashion, which means we never need to rewrite a file to insert or
269 269 remove data, and can use some simple techniques to avoid the need
270 270 for locking while reading.
271 271
272 272 If checkambig, indexfile is opened with checkambig=True at
273 273 writing, to avoid file stat ambiguity.
274 274
275 275 If mmaplargeindex is True, and an mmapindexthreshold is set, the
276 276 index will be mmapped rather than read if it is larger than the
277 277 configured threshold.
278 278
279 279 If censorable is True, the revlog can have censored revisions.
280 280
281 281 If `upperboundcomp` is not None, this is the expected maximal gain from
282 282 compression for the data content.
283 283
284 284 `concurrencychecker` is an optional function that receives 3 arguments: a
285 285 file handle, a filename, and an expected position. It should check whether
286 286 the current position in the file handle is valid, and log/warn/fail (by
287 287 raising).
288 288
289 289
290 290 Internal details
291 291 ----------------
292 292
293 293 A large part of the revlog logic deals with revisions' "index entries", tuple
294 294 objects that contains the same "items" whatever the revlog version.
295 295 Different versions will have different ways of storing these items (sometimes
296 296 not having them at all), but the tuple will always be the same. New fields
297 297 are usually added at the end to avoid breaking existing code that relies
298 298 on the existing order. The field are defined as follows:
299 299
300 300 [0] offset:
301 301 The byte index of the start of revision data chunk.
302 302 That value is shifted up by 16 bits. use "offset = field >> 16" to
303 303 retrieve it.
304 304
305 305 flags:
306 306 A flag field that carries special information or changes the behavior
307 307 of the revision. (see `REVIDX_*` constants for details)
308 308 The flag field only occupies the first 16 bits of this field,
309 309 use "flags = field & 0xFFFF" to retrieve the value.
310 310
311 311 [1] compressed length:
312 312 The size, in bytes, of the chunk on disk
313 313
314 314 [2] uncompressed length:
315 315 The size, in bytes, of the full revision once reconstructed.
316 316
317 317 [3] base rev:
318 318 Either the base of the revision delta chain (without general
319 319 delta), or the base of the delta (stored in the data chunk)
320 320 with general delta.
321 321
322 322 [4] link rev:
323 323 Changelog revision number of the changeset introducing this
324 324 revision.
325 325
326 326 [5] parent 1 rev:
327 327 Revision number of the first parent
328 328
329 329 [6] parent 2 rev:
330 330 Revision number of the second parent
331 331
332 332 [7] node id:
333 333 The node id of the current revision
334 334
335 335 [8] sidedata offset:
336 336 The byte index of the start of the revision's side-data chunk.
337 337
338 338 [9] sidedata chunk length:
339 339 The size, in bytes, of the revision's side-data chunk.
340 340
341 341 [10] data compression mode:
342 342 two bits that detail the way the data chunk is compressed on disk.
343 343 (see "COMP_MODE_*" constants for details). For revlog version 0 and
344 344 1 this will always be COMP_MODE_INLINE.
345 345
346 346 """
347 347
348 348 _flagserrorclass = error.RevlogError
349 349
350 350 def __init__(
351 351 self,
352 352 opener,
353 353 target,
354 354 radix,
355 355 postfix=None, # only exist for `tmpcensored` now
356 356 checkambig=False,
357 357 mmaplargeindex=False,
358 358 censorable=False,
359 359 upperboundcomp=None,
360 360 persistentnodemap=False,
361 361 concurrencychecker=None,
362 362 trypending=False,
363 363 ):
364 364 """
365 365 create a revlog object
366 366
367 367 opener is a function that abstracts the file opening operation
368 368 and can be used to implement COW semantics or the like.
369 369
370 370 `target`: a (KIND, ID) tuple that identify the content stored in
371 371 this revlog. It help the rest of the code to understand what the revlog
372 372 is about without having to resort to heuristic and index filename
373 373 analysis. Note: that this must be reliably be set by normal code, but
374 374 that test, debug, or performance measurement code might not set this to
375 375 accurate value.
376 376 """
377 377 self.upperboundcomp = upperboundcomp
378 378
379 379 self.radix = radix
380 380
381 381 self._docket_file = None
382 382 self._indexfile = None
383 383 self._datafile = None
384 384 self._nodemap_file = None
385 385 self.postfix = postfix
386 386 self._trypending = trypending
387 387 self.opener = opener
388 388 if persistentnodemap:
389 389 self._nodemap_file = nodemaputil.get_nodemap_file(self)
390 390
391 391 assert target[0] in ALL_KINDS
392 392 assert len(target) == 2
393 393 self.target = target
394 394 # When True, indexfile is opened with checkambig=True at writing, to
395 395 # avoid file stat ambiguity.
396 396 self._checkambig = checkambig
397 397 self._mmaplargeindex = mmaplargeindex
398 398 self._censorable = censorable
399 399 # 3-tuple of (node, rev, text) for a raw revision.
400 400 self._revisioncache = None
401 401 # Maps rev to chain base rev.
402 402 self._chainbasecache = util.lrucachedict(100)
403 403 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
404 404 self._chunkcache = (0, b'')
405 405 # How much data to read and cache into the raw revlog data cache.
406 406 self._chunkcachesize = 65536
407 407 self._maxchainlen = None
408 408 self._deltabothparents = True
409 409 self.index = None
410 410 self._docket = None
411 411 self._nodemap_docket = None
412 412 # Mapping of partial identifiers to full nodes.
413 413 self._pcache = {}
414 414 # Mapping of revision integer to full node.
415 415 self._compengine = b'zlib'
416 416 self._compengineopts = {}
417 417 self._maxdeltachainspan = -1
418 418 self._withsparseread = False
419 419 self._sparserevlog = False
420 420 self.hassidedata = False
421 421 self._srdensitythreshold = 0.50
422 422 self._srmingapsize = 262144
423 423
424 424 # Make copy of flag processors so each revlog instance can support
425 425 # custom flags.
426 426 self._flagprocessors = dict(flagutil.flagprocessors)
427 427
428 428 # 2-tuple of file handles being used for active writing.
429 429 self._writinghandles = None
430 430 # prevent nesting of addgroup
431 431 self._adding_group = None
432 432
433 433 self._loadindex()
434 434
435 435 self._concurrencychecker = concurrencychecker
436 436
437 437 def _init_opts(self):
438 438 """process options (from above/config) to setup associated default revlog mode
439 439
440 440 These values might be affected when actually reading on disk information.
441 441
442 442 The relevant values are returned for use in _loadindex().
443 443
444 444 * newversionflags:
445 445 version header to use if we need to create a new revlog
446 446
447 447 * mmapindexthreshold:
448 448 minimal index size for start to use mmap
449 449
450 450 * force_nodemap:
451 451 force the usage of a "development" version of the nodemap code
452 452 """
453 453 mmapindexthreshold = None
454 454 opts = self.opener.options
455 455
456 456 if b'revlogv2' in opts:
457 457 new_header = REVLOGV2 | FLAG_INLINE_DATA
458 458 elif b'revlogv1' in opts:
459 459 new_header = REVLOGV1 | FLAG_INLINE_DATA
460 460 if b'generaldelta' in opts:
461 461 new_header |= FLAG_GENERALDELTA
462 462 elif b'revlogv0' in self.opener.options:
463 463 new_header = REVLOGV0
464 464 else:
465 465 new_header = REVLOG_DEFAULT_VERSION
466 466
467 467 if b'chunkcachesize' in opts:
468 468 self._chunkcachesize = opts[b'chunkcachesize']
469 469 if b'maxchainlen' in opts:
470 470 self._maxchainlen = opts[b'maxchainlen']
471 471 if b'deltabothparents' in opts:
472 472 self._deltabothparents = opts[b'deltabothparents']
473 473 self._lazydelta = bool(opts.get(b'lazydelta', True))
474 474 self._lazydeltabase = False
475 475 if self._lazydelta:
476 476 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
477 477 if b'compengine' in opts:
478 478 self._compengine = opts[b'compengine']
479 479 if b'zlib.level' in opts:
480 480 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
481 481 if b'zstd.level' in opts:
482 482 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
483 483 if b'maxdeltachainspan' in opts:
484 484 self._maxdeltachainspan = opts[b'maxdeltachainspan']
485 485 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
486 486 mmapindexthreshold = opts[b'mmapindexthreshold']
487 487 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
488 488 withsparseread = bool(opts.get(b'with-sparse-read', False))
489 489 # sparse-revlog forces sparse-read
490 490 self._withsparseread = self._sparserevlog or withsparseread
491 491 if b'sparse-read-density-threshold' in opts:
492 492 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
493 493 if b'sparse-read-min-gap-size' in opts:
494 494 self._srmingapsize = opts[b'sparse-read-min-gap-size']
495 495 if opts.get(b'enableellipsis'):
496 496 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
497 497
498 498 # revlog v0 doesn't have flag processors
499 499 for flag, processor in pycompat.iteritems(
500 500 opts.get(b'flagprocessors', {})
501 501 ):
502 502 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
503 503
504 504 if self._chunkcachesize <= 0:
505 505 raise error.RevlogError(
506 506 _(b'revlog chunk cache size %r is not greater than 0')
507 507 % self._chunkcachesize
508 508 )
509 509 elif self._chunkcachesize & (self._chunkcachesize - 1):
510 510 raise error.RevlogError(
511 511 _(b'revlog chunk cache size %r is not a power of 2')
512 512 % self._chunkcachesize
513 513 )
514 514 force_nodemap = opts.get(b'devel-force-nodemap', False)
515 515 return new_header, mmapindexthreshold, force_nodemap
516 516
517 517 def _get_data(self, filepath, mmap_threshold, size=None):
518 518 """return a file content with or without mmap
519 519
520 520 If the file is missing return the empty string"""
521 521 try:
522 522 with self.opener(filepath) as fp:
523 523 if mmap_threshold is not None:
524 524 file_size = self.opener.fstat(fp).st_size
525 525 if file_size >= mmap_threshold:
526 526 if size is not None:
527 527 # avoid potentiel mmap crash
528 528 size = min(file_size, size)
529 529 # TODO: should .close() to release resources without
530 530 # relying on Python GC
531 531 if size is None:
532 532 return util.buffer(util.mmapread(fp))
533 533 else:
534 534 return util.buffer(util.mmapread(fp, size))
535 535 if size is None:
536 536 return fp.read()
537 537 else:
538 538 return fp.read(size)
539 539 except IOError as inst:
540 540 if inst.errno != errno.ENOENT:
541 541 raise
542 542 return b''
543 543
544 544 def _loadindex(self):
545 545
546 546 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
547 547
548 548 if self.postfix is not None:
549 549 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
550 550 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
551 551 entry_point = b'%s.i.a' % self.radix
552 552 else:
553 553 entry_point = b'%s.i' % self.radix
554 554
555 555 entry_data = b''
556 556 self._initempty = True
557 557 entry_data = self._get_data(entry_point, mmapindexthreshold)
558 558 if len(entry_data) > 0:
559 559 header = INDEX_HEADER.unpack(entry_data[:4])[0]
560 560 self._initempty = False
561 561 else:
562 562 header = new_header
563 563
564 564 self._format_flags = header & ~0xFFFF
565 565 self._format_version = header & 0xFFFF
566 566
567 567 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
568 568 if supported_flags is None:
569 569 msg = _(b'unknown version (%d) in revlog %s')
570 570 msg %= (self._format_version, self.display_id)
571 571 raise error.RevlogError(msg)
572 572 elif self._format_flags & ~supported_flags:
573 573 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
574 574 display_flag = self._format_flags >> 16
575 575 msg %= (display_flag, self._format_version, self.display_id)
576 576 raise error.RevlogError(msg)
577 577
578 578 features = FEATURES_BY_VERSION[self._format_version]
579 579 self._inline = features[b'inline'](self._format_flags)
580 580 self._generaldelta = features[b'generaldelta'](self._format_flags)
581 581 self.hassidedata = features[b'sidedata']
582 582
583 583 if not features[b'docket']:
584 584 self._indexfile = entry_point
585 585 index_data = entry_data
586 586 else:
587 587 self._docket_file = entry_point
588 588 if self._initempty:
589 589 self._docket = docketutil.default_docket(self, header)
590 590 else:
591 591 self._docket = docketutil.parse_docket(
592 592 self, entry_data, use_pending=self._trypending
593 593 )
594 594 self._indexfile = self._docket.index_filepath()
595 595 index_data = b''
596 596 index_size = self._docket.index_end
597 597 if index_size > 0:
598 598 index_data = self._get_data(
599 599 self._indexfile, mmapindexthreshold, size=index_size
600 600 )
601 601 if len(index_data) < index_size:
602 602 msg = _(b'too few index data for %s: got %d, expected %d')
603 603 msg %= (self.display_id, len(index_data), index_size)
604 604 raise error.RevlogError(msg)
605 605
606 606 self._inline = False
607 607 # generaldelta implied by version 2 revlogs.
608 608 self._generaldelta = True
609 609 # the logic for persistent nodemap will be dealt with within the
610 610 # main docket, so disable it for now.
611 611 self._nodemap_file = None
612 612
613 613 if self.postfix is None:
614 614 self._datafile = b'%s.d' % self.radix
615 615 else:
616 616 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
617 617
618 618 self.nodeconstants = sha1nodeconstants
619 619 self.nullid = self.nodeconstants.nullid
620 620
621 621 # sparse-revlog can't be on without general-delta (issue6056)
622 622 if not self._generaldelta:
623 623 self._sparserevlog = False
624 624
625 625 self._storedeltachains = True
626 626
627 627 devel_nodemap = (
628 628 self._nodemap_file
629 629 and force_nodemap
630 630 and parse_index_v1_nodemap is not None
631 631 )
632 632
633 633 use_rust_index = False
634 634 if rustrevlog is not None:
635 635 if self._nodemap_file is not None:
636 636 use_rust_index = True
637 637 else:
638 638 use_rust_index = self.opener.options.get(b'rust.index')
639 639
640 640 self._parse_index = parse_index_v1
641 641 if self._format_version == REVLOGV0:
642 642 self._parse_index = revlogv0.parse_index_v0
643 643 elif self._format_version == REVLOGV2:
644 644 self._parse_index = parse_index_v2
645 645 elif devel_nodemap:
646 646 self._parse_index = parse_index_v1_nodemap
647 647 elif use_rust_index:
648 648 self._parse_index = parse_index_v1_mixed
649 649 try:
650 650 d = self._parse_index(index_data, self._inline)
651 651 index, _chunkcache = d
652 652 use_nodemap = (
653 653 not self._inline
654 654 and self._nodemap_file is not None
655 655 and util.safehasattr(index, 'update_nodemap_data')
656 656 )
657 657 if use_nodemap:
658 658 nodemap_data = nodemaputil.persisted_data(self)
659 659 if nodemap_data is not None:
660 660 docket = nodemap_data[0]
661 661 if (
662 662 len(d[0]) > docket.tip_rev
663 663 and d[0][docket.tip_rev][7] == docket.tip_node
664 664 ):
665 665 # no changelog tampering
666 666 self._nodemap_docket = docket
667 667 index.update_nodemap_data(*nodemap_data)
668 668 except (ValueError, IndexError):
669 669 raise error.RevlogError(
670 670 _(b"index %s is corrupted") % self.display_id
671 671 )
672 672 self.index, self._chunkcache = d
673 673 if not self._chunkcache:
674 674 self._chunkclear()
675 675 # revnum -> (chain-length, sum-delta-length)
676 676 self._chaininfocache = util.lrucachedict(500)
677 677 # revlog header -> revlog compressor
678 678 self._decompressors = {}
679 679
680 680 @util.propertycache
681 681 def revlog_kind(self):
682 682 return self.target[0]
683 683
684 684 @util.propertycache
685 685 def display_id(self):
686 686 """The public facing "ID" of the revlog that we use in message"""
687 687 # Maybe we should build a user facing representation of
688 688 # revlog.target instead of using `self.radix`
689 689 return self.radix
690 690
691 691 @util.propertycache
692 692 def _compressor(self):
693 693 engine = util.compengines[self._compengine]
694 694 return engine.revlogcompressor(self._compengineopts)
695 695
696 696 def _indexfp(self):
697 697 """file object for the revlog's index file"""
698 698 return self.opener(self._indexfile, mode=b"r")
699 699
700 700 def __index_write_fp(self):
701 701 # You should not use this directly and use `_writing` instead
702 702 try:
703 703 f = self.opener(
704 704 self._indexfile, mode=b"r+", checkambig=self._checkambig
705 705 )
706 706 if self._docket is None:
707 707 f.seek(0, os.SEEK_END)
708 708 else:
709 709 f.seek(self._docket.index_end, os.SEEK_SET)
710 710 return f
711 711 except IOError as inst:
712 712 if inst.errno != errno.ENOENT:
713 713 raise
714 714 return self.opener(
715 715 self._indexfile, mode=b"w+", checkambig=self._checkambig
716 716 )
717 717
718 718 def __index_new_fp(self):
719 719 # You should not use this unless you are upgrading from inline revlog
720 720 return self.opener(
721 721 self._indexfile,
722 722 mode=b"w",
723 723 checkambig=self._checkambig,
724 724 atomictemp=True,
725 725 )
726 726
727 727 def _datafp(self, mode=b'r'):
728 728 """file object for the revlog's data file"""
729 729 return self.opener(self._datafile, mode=mode)
730 730
731 731 @contextlib.contextmanager
732 732 def _datareadfp(self, existingfp=None):
733 733 """file object suitable to read data"""
734 734 # Use explicit file handle, if given.
735 735 if existingfp is not None:
736 736 yield existingfp
737 737
738 738 # Use a file handle being actively used for writes, if available.
739 739 # There is some danger to doing this because reads will seek the
740 740 # file. However, _writeentry() performs a SEEK_END before all writes,
741 741 # so we should be safe.
742 742 elif self._writinghandles:
743 743 if self._inline:
744 744 yield self._writinghandles[0]
745 745 else:
746 746 yield self._writinghandles[1]
747 747
748 748 # Otherwise open a new file handle.
749 749 else:
750 750 if self._inline:
751 751 func = self._indexfp
752 752 else:
753 753 func = self._datafp
754 754 with func() as fp:
755 755 yield fp
756 756
757 757 def tiprev(self):
758 758 return len(self.index) - 1
759 759
760 760 def tip(self):
761 761 return self.node(self.tiprev())
762 762
763 763 def __contains__(self, rev):
764 764 return 0 <= rev < len(self)
765 765
766 766 def __len__(self):
767 767 return len(self.index)
768 768
769 769 def __iter__(self):
770 770 return iter(pycompat.xrange(len(self)))
771 771
772 772 def revs(self, start=0, stop=None):
773 773 """iterate over all rev in this revlog (from start to stop)"""
774 774 return storageutil.iterrevs(len(self), start=start, stop=stop)
775 775
776 776 @property
777 777 def nodemap(self):
778 778 msg = (
779 779 b"revlog.nodemap is deprecated, "
780 780 b"use revlog.index.[has_node|rev|get_rev]"
781 781 )
782 782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 783 return self.index.nodemap
784 784
785 785 @property
786 786 def _nodecache(self):
787 787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
788 788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 789 return self.index.nodemap
790 790
791 791 def hasnode(self, node):
792 792 try:
793 793 self.rev(node)
794 794 return True
795 795 except KeyError:
796 796 return False
797 797
798 798 def candelta(self, baserev, rev):
799 799 """whether two revisions (baserev, rev) can be delta-ed or not"""
800 800 # Disable delta if either rev requires a content-changing flag
801 801 # processor (ex. LFS). This is because such flag processor can alter
802 802 # the rawtext content that the delta will be based on, and two clients
803 803 # could have a same revlog node with different flags (i.e. different
804 804 # rawtext contents) and the delta could be incompatible.
805 805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
806 806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
807 807 ):
808 808 return False
809 809 return True
810 810
811 811 def update_caches(self, transaction):
812 812 if self._nodemap_file is not None:
813 813 if transaction is None:
814 814 nodemaputil.update_persistent_nodemap(self)
815 815 else:
816 816 nodemaputil.setup_persistent_nodemap(transaction, self)
817 817
818 818 def clearcaches(self):
819 819 self._revisioncache = None
820 820 self._chainbasecache.clear()
821 821 self._chunkcache = (0, b'')
822 822 self._pcache = {}
823 823 self._nodemap_docket = None
824 824 self.index.clearcaches()
825 825 # The python code is the one responsible for validating the docket, we
826 826 # end up having to refresh it here.
827 827 use_nodemap = (
828 828 not self._inline
829 829 and self._nodemap_file is not None
830 830 and util.safehasattr(self.index, 'update_nodemap_data')
831 831 )
832 832 if use_nodemap:
833 833 nodemap_data = nodemaputil.persisted_data(self)
834 834 if nodemap_data is not None:
835 835 self._nodemap_docket = nodemap_data[0]
836 836 self.index.update_nodemap_data(*nodemap_data)
837 837
838 838 def rev(self, node):
839 839 try:
840 840 return self.index.rev(node)
841 841 except TypeError:
842 842 raise
843 843 except error.RevlogError:
844 844 # parsers.c radix tree lookup failed
845 845 if (
846 846 node == self.nodeconstants.wdirid
847 847 or node in self.nodeconstants.wdirfilenodeids
848 848 ):
849 849 raise error.WdirUnsupported
850 850 raise error.LookupError(node, self.display_id, _(b'no node'))
851 851
852 852 # Accessors for index entries.
853 853
854 854 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
855 855 # are flags.
856 856 def start(self, rev):
857 857 return int(self.index[rev][0] >> 16)
858 858
859 859 def flags(self, rev):
860 860 return self.index[rev][0] & 0xFFFF
861 861
862 862 def length(self, rev):
863 863 return self.index[rev][1]
864 864
865 865 def sidedata_length(self, rev):
866 866 if not self.hassidedata:
867 867 return 0
868 868 return self.index[rev][9]
869 869
870 870 def rawsize(self, rev):
871 871 """return the length of the uncompressed text for a given revision"""
872 872 l = self.index[rev][2]
873 873 if l >= 0:
874 874 return l
875 875
876 876 t = self.rawdata(rev)
877 877 return len(t)
878 878
879 879 def size(self, rev):
880 880 """length of non-raw text (processed by a "read" flag processor)"""
881 881 # fast path: if no "read" flag processor could change the content,
882 882 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 883 flags = self.flags(rev)
884 884 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 885 return self.rawsize(rev)
886 886
887 887 return len(self.revision(rev, raw=False))
888 888
889 889 def chainbase(self, rev):
890 890 base = self._chainbasecache.get(rev)
891 891 if base is not None:
892 892 return base
893 893
894 894 index = self.index
895 895 iterrev = rev
896 896 base = index[iterrev][3]
897 897 while base != iterrev:
898 898 iterrev = base
899 899 base = index[iterrev][3]
900 900
901 901 self._chainbasecache[rev] = base
902 902 return base
903 903
904 904 def linkrev(self, rev):
905 905 return self.index[rev][4]
906 906
907 907 def parentrevs(self, rev):
908 908 try:
909 909 entry = self.index[rev]
910 910 except IndexError:
911 911 if rev == wdirrev:
912 912 raise error.WdirUnsupported
913 913 raise
914 914 if entry[5] == nullrev:
915 915 return entry[6], entry[5]
916 916 else:
917 917 return entry[5], entry[6]
918 918
919 919 # fast parentrevs(rev) where rev isn't filtered
920 920 _uncheckedparentrevs = parentrevs
921 921
922 922 def node(self, rev):
923 923 try:
924 924 return self.index[rev][7]
925 925 except IndexError:
926 926 if rev == wdirrev:
927 927 raise error.WdirUnsupported
928 928 raise
929 929
930 930 # Derived from index values.
931 931
932 932 def end(self, rev):
933 933 return self.start(rev) + self.length(rev)
934 934
935 935 def parents(self, node):
936 936 i = self.index
937 937 d = i[self.rev(node)]
938 938 # inline node() to avoid function call overhead
939 939 if d[5] == self.nullid:
940 940 return i[d[6]][7], i[d[5]][7]
941 941 else:
942 942 return i[d[5]][7], i[d[6]][7]
943 943
944 944 def chainlen(self, rev):
945 945 return self._chaininfo(rev)[0]
946 946
947 947 def _chaininfo(self, rev):
948 948 chaininfocache = self._chaininfocache
949 949 if rev in chaininfocache:
950 950 return chaininfocache[rev]
951 951 index = self.index
952 952 generaldelta = self._generaldelta
953 953 iterrev = rev
954 954 e = index[iterrev]
955 955 clen = 0
956 956 compresseddeltalen = 0
957 957 while iterrev != e[3]:
958 958 clen += 1
959 959 compresseddeltalen += e[1]
960 960 if generaldelta:
961 961 iterrev = e[3]
962 962 else:
963 963 iterrev -= 1
964 964 if iterrev in chaininfocache:
965 965 t = chaininfocache[iterrev]
966 966 clen += t[0]
967 967 compresseddeltalen += t[1]
968 968 break
969 969 e = index[iterrev]
970 970 else:
971 971 # Add text length of base since decompressing that also takes
972 972 # work. For cache hits the length is already included.
973 973 compresseddeltalen += e[1]
974 974 r = (clen, compresseddeltalen)
975 975 chaininfocache[rev] = r
976 976 return r
977 977
978 978 def _deltachain(self, rev, stoprev=None):
979 979 """Obtain the delta chain for a revision.
980 980
981 981 ``stoprev`` specifies a revision to stop at. If not specified, we
982 982 stop at the base of the chain.
983 983
984 984 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
985 985 revs in ascending order and ``stopped`` is a bool indicating whether
986 986 ``stoprev`` was hit.
987 987 """
988 988 # Try C implementation.
989 989 try:
990 990 return self.index.deltachain(rev, stoprev, self._generaldelta)
991 991 except AttributeError:
992 992 pass
993 993
994 994 chain = []
995 995
996 996 # Alias to prevent attribute lookup in tight loop.
997 997 index = self.index
998 998 generaldelta = self._generaldelta
999 999
1000 1000 iterrev = rev
1001 1001 e = index[iterrev]
1002 1002 while iterrev != e[3] and iterrev != stoprev:
1003 1003 chain.append(iterrev)
1004 1004 if generaldelta:
1005 1005 iterrev = e[3]
1006 1006 else:
1007 1007 iterrev -= 1
1008 1008 e = index[iterrev]
1009 1009
1010 1010 if iterrev == stoprev:
1011 1011 stopped = True
1012 1012 else:
1013 1013 chain.append(iterrev)
1014 1014 stopped = False
1015 1015
1016 1016 chain.reverse()
1017 1017 return chain, stopped
1018 1018
1019 1019 def ancestors(self, revs, stoprev=0, inclusive=False):
1020 1020 """Generate the ancestors of 'revs' in reverse revision order.
1021 1021 Does not generate revs lower than stoprev.
1022 1022
1023 1023 See the documentation for ancestor.lazyancestors for more details."""
1024 1024
1025 1025 # first, make sure start revisions aren't filtered
1026 1026 revs = list(revs)
1027 1027 checkrev = self.node
1028 1028 for r in revs:
1029 1029 checkrev(r)
1030 1030 # and we're sure ancestors aren't filtered as well
1031 1031
1032 1032 if rustancestor is not None:
1033 1033 lazyancestors = rustancestor.LazyAncestors
1034 1034 arg = self.index
1035 1035 else:
1036 1036 lazyancestors = ancestor.lazyancestors
1037 1037 arg = self._uncheckedparentrevs
1038 1038 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1039 1039
1040 1040 def descendants(self, revs):
1041 1041 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1042 1042
1043 1043 def findcommonmissing(self, common=None, heads=None):
1044 1044 """Return a tuple of the ancestors of common and the ancestors of heads
1045 1045 that are not ancestors of common. In revset terminology, we return the
1046 1046 tuple:
1047 1047
1048 1048 ::common, (::heads) - (::common)
1049 1049
1050 1050 The list is sorted by revision number, meaning it is
1051 1051 topologically sorted.
1052 1052
1053 1053 'heads' and 'common' are both lists of node IDs. If heads is
1054 1054 not supplied, uses all of the revlog's heads. If common is not
1055 1055 supplied, uses nullid."""
1056 1056 if common is None:
1057 1057 common = [self.nullid]
1058 1058 if heads is None:
1059 1059 heads = self.heads()
1060 1060
1061 1061 common = [self.rev(n) for n in common]
1062 1062 heads = [self.rev(n) for n in heads]
1063 1063
1064 1064 # we want the ancestors, but inclusive
1065 1065 class lazyset(object):
1066 1066 def __init__(self, lazyvalues):
1067 1067 self.addedvalues = set()
1068 1068 self.lazyvalues = lazyvalues
1069 1069
1070 1070 def __contains__(self, value):
1071 1071 return value in self.addedvalues or value in self.lazyvalues
1072 1072
1073 1073 def __iter__(self):
1074 1074 added = self.addedvalues
1075 1075 for r in added:
1076 1076 yield r
1077 1077 for r in self.lazyvalues:
1078 1078 if not r in added:
1079 1079 yield r
1080 1080
1081 1081 def add(self, value):
1082 1082 self.addedvalues.add(value)
1083 1083
1084 1084 def update(self, values):
1085 1085 self.addedvalues.update(values)
1086 1086
1087 1087 has = lazyset(self.ancestors(common))
1088 1088 has.add(nullrev)
1089 1089 has.update(common)
1090 1090
1091 1091 # take all ancestors from heads that aren't in has
1092 1092 missing = set()
1093 1093 visit = collections.deque(r for r in heads if r not in has)
1094 1094 while visit:
1095 1095 r = visit.popleft()
1096 1096 if r in missing:
1097 1097 continue
1098 1098 else:
1099 1099 missing.add(r)
1100 1100 for p in self.parentrevs(r):
1101 1101 if p not in has:
1102 1102 visit.append(p)
1103 1103 missing = list(missing)
1104 1104 missing.sort()
1105 1105 return has, [self.node(miss) for miss in missing]
1106 1106
1107 1107 def incrementalmissingrevs(self, common=None):
1108 1108 """Return an object that can be used to incrementally compute the
1109 1109 revision numbers of the ancestors of arbitrary sets that are not
1110 1110 ancestors of common. This is an ancestor.incrementalmissingancestors
1111 1111 object.
1112 1112
1113 1113 'common' is a list of revision numbers. If common is not supplied, uses
1114 1114 nullrev.
1115 1115 """
1116 1116 if common is None:
1117 1117 common = [nullrev]
1118 1118
1119 1119 if rustancestor is not None:
1120 1120 return rustancestor.MissingAncestors(self.index, common)
1121 1121 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1122 1122
1123 1123 def findmissingrevs(self, common=None, heads=None):
1124 1124 """Return the revision numbers of the ancestors of heads that
1125 1125 are not ancestors of common.
1126 1126
1127 1127 More specifically, return a list of revision numbers corresponding to
1128 1128 nodes N such that every N satisfies the following constraints:
1129 1129
1130 1130 1. N is an ancestor of some node in 'heads'
1131 1131 2. N is not an ancestor of any node in 'common'
1132 1132
1133 1133 The list is sorted by revision number, meaning it is
1134 1134 topologically sorted.
1135 1135
1136 1136 'heads' and 'common' are both lists of revision numbers. If heads is
1137 1137 not supplied, uses all of the revlog's heads. If common is not
1138 1138 supplied, uses nullid."""
1139 1139 if common is None:
1140 1140 common = [nullrev]
1141 1141 if heads is None:
1142 1142 heads = self.headrevs()
1143 1143
1144 1144 inc = self.incrementalmissingrevs(common=common)
1145 1145 return inc.missingancestors(heads)
1146 1146
1147 1147 def findmissing(self, common=None, heads=None):
1148 1148 """Return the ancestors of heads that are not ancestors of common.
1149 1149
1150 1150 More specifically, return a list of nodes N such that every N
1151 1151 satisfies the following constraints:
1152 1152
1153 1153 1. N is an ancestor of some node in 'heads'
1154 1154 2. N is not an ancestor of any node in 'common'
1155 1155
1156 1156 The list is sorted by revision number, meaning it is
1157 1157 topologically sorted.
1158 1158
1159 1159 'heads' and 'common' are both lists of node IDs. If heads is
1160 1160 not supplied, uses all of the revlog's heads. If common is not
1161 1161 supplied, uses nullid."""
1162 1162 if common is None:
1163 1163 common = [self.nullid]
1164 1164 if heads is None:
1165 1165 heads = self.heads()
1166 1166
1167 1167 common = [self.rev(n) for n in common]
1168 1168 heads = [self.rev(n) for n in heads]
1169 1169
1170 1170 inc = self.incrementalmissingrevs(common=common)
1171 1171 return [self.node(r) for r in inc.missingancestors(heads)]
1172 1172
1173 1173 def nodesbetween(self, roots=None, heads=None):
1174 1174 """Return a topological path from 'roots' to 'heads'.
1175 1175
1176 1176 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1177 1177 topologically sorted list of all nodes N that satisfy both of
1178 1178 these constraints:
1179 1179
1180 1180 1. N is a descendant of some node in 'roots'
1181 1181 2. N is an ancestor of some node in 'heads'
1182 1182
1183 1183 Every node is considered to be both a descendant and an ancestor
1184 1184 of itself, so every reachable node in 'roots' and 'heads' will be
1185 1185 included in 'nodes'.
1186 1186
1187 1187 'outroots' is the list of reachable nodes in 'roots', i.e., the
1188 1188 subset of 'roots' that is returned in 'nodes'. Likewise,
1189 1189 'outheads' is the subset of 'heads' that is also in 'nodes'.
1190 1190
1191 1191 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1192 1192 unspecified, uses nullid as the only root. If 'heads' is
1193 1193 unspecified, uses list of all of the revlog's heads."""
1194 1194 nonodes = ([], [], [])
1195 1195 if roots is not None:
1196 1196 roots = list(roots)
1197 1197 if not roots:
1198 1198 return nonodes
1199 1199 lowestrev = min([self.rev(n) for n in roots])
1200 1200 else:
1201 1201 roots = [self.nullid] # Everybody's a descendant of nullid
1202 1202 lowestrev = nullrev
1203 1203 if (lowestrev == nullrev) and (heads is None):
1204 1204 # We want _all_ the nodes!
1205 1205 return (
1206 1206 [self.node(r) for r in self],
1207 1207 [self.nullid],
1208 1208 list(self.heads()),
1209 1209 )
1210 1210 if heads is None:
1211 1211 # All nodes are ancestors, so the latest ancestor is the last
1212 1212 # node.
1213 1213 highestrev = len(self) - 1
1214 1214 # Set ancestors to None to signal that every node is an ancestor.
1215 1215 ancestors = None
1216 1216 # Set heads to an empty dictionary for later discovery of heads
1217 1217 heads = {}
1218 1218 else:
1219 1219 heads = list(heads)
1220 1220 if not heads:
1221 1221 return nonodes
1222 1222 ancestors = set()
1223 1223 # Turn heads into a dictionary so we can remove 'fake' heads.
1224 1224 # Also, later we will be using it to filter out the heads we can't
1225 1225 # find from roots.
1226 1226 heads = dict.fromkeys(heads, False)
1227 1227 # Start at the top and keep marking parents until we're done.
1228 1228 nodestotag = set(heads)
1229 1229 # Remember where the top was so we can use it as a limit later.
1230 1230 highestrev = max([self.rev(n) for n in nodestotag])
1231 1231 while nodestotag:
1232 1232 # grab a node to tag
1233 1233 n = nodestotag.pop()
1234 1234 # Never tag nullid
1235 1235 if n == self.nullid:
1236 1236 continue
1237 1237 # A node's revision number represents its place in a
1238 1238 # topologically sorted list of nodes.
1239 1239 r = self.rev(n)
1240 1240 if r >= lowestrev:
1241 1241 if n not in ancestors:
1242 1242 # If we are possibly a descendant of one of the roots
1243 1243 # and we haven't already been marked as an ancestor
1244 1244 ancestors.add(n) # Mark as ancestor
1245 1245 # Add non-nullid parents to list of nodes to tag.
1246 1246 nodestotag.update(
1247 1247 [p for p in self.parents(n) if p != self.nullid]
1248 1248 )
1249 1249 elif n in heads: # We've seen it before, is it a fake head?
1250 1250 # So it is, real heads should not be the ancestors of
1251 1251 # any other heads.
1252 1252 heads.pop(n)
1253 1253 if not ancestors:
1254 1254 return nonodes
1255 1255 # Now that we have our set of ancestors, we want to remove any
1256 1256 # roots that are not ancestors.
1257 1257
1258 1258 # If one of the roots was nullid, everything is included anyway.
1259 1259 if lowestrev > nullrev:
1260 1260 # But, since we weren't, let's recompute the lowest rev to not
1261 1261 # include roots that aren't ancestors.
1262 1262
1263 1263 # Filter out roots that aren't ancestors of heads
1264 1264 roots = [root for root in roots if root in ancestors]
1265 1265 # Recompute the lowest revision
1266 1266 if roots:
1267 1267 lowestrev = min([self.rev(root) for root in roots])
1268 1268 else:
1269 1269 # No more roots? Return empty list
1270 1270 return nonodes
1271 1271 else:
1272 1272 # We are descending from nullid, and don't need to care about
1273 1273 # any other roots.
1274 1274 lowestrev = nullrev
1275 1275 roots = [self.nullid]
1276 1276 # Transform our roots list into a set.
1277 1277 descendants = set(roots)
1278 1278 # Also, keep the original roots so we can filter out roots that aren't
1279 1279 # 'real' roots (i.e. are descended from other roots).
1280 1280 roots = descendants.copy()
1281 1281 # Our topologically sorted list of output nodes.
1282 1282 orderedout = []
1283 1283 # Don't start at nullid since we don't want nullid in our output list,
1284 1284 # and if nullid shows up in descendants, empty parents will look like
1285 1285 # they're descendants.
1286 1286 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1287 1287 n = self.node(r)
1288 1288 isdescendant = False
1289 1289 if lowestrev == nullrev: # Everybody is a descendant of nullid
1290 1290 isdescendant = True
1291 1291 elif n in descendants:
1292 1292 # n is already a descendant
1293 1293 isdescendant = True
1294 1294 # This check only needs to be done here because all the roots
1295 1295 # will start being marked is descendants before the loop.
1296 1296 if n in roots:
1297 1297 # If n was a root, check if it's a 'real' root.
1298 1298 p = tuple(self.parents(n))
1299 1299 # If any of its parents are descendants, it's not a root.
1300 1300 if (p[0] in descendants) or (p[1] in descendants):
1301 1301 roots.remove(n)
1302 1302 else:
1303 1303 p = tuple(self.parents(n))
1304 1304 # A node is a descendant if either of its parents are
1305 1305 # descendants. (We seeded the dependents list with the roots
1306 1306 # up there, remember?)
1307 1307 if (p[0] in descendants) or (p[1] in descendants):
1308 1308 descendants.add(n)
1309 1309 isdescendant = True
1310 1310 if isdescendant and ((ancestors is None) or (n in ancestors)):
1311 1311 # Only include nodes that are both descendants and ancestors.
1312 1312 orderedout.append(n)
1313 1313 if (ancestors is not None) and (n in heads):
1314 1314 # We're trying to figure out which heads are reachable
1315 1315 # from roots.
1316 1316 # Mark this head as having been reached
1317 1317 heads[n] = True
1318 1318 elif ancestors is None:
1319 1319 # Otherwise, we're trying to discover the heads.
1320 1320 # Assume this is a head because if it isn't, the next step
1321 1321 # will eventually remove it.
1322 1322 heads[n] = True
1323 1323 # But, obviously its parents aren't.
1324 1324 for p in self.parents(n):
1325 1325 heads.pop(p, None)
1326 1326 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1327 1327 roots = list(roots)
1328 1328 assert orderedout
1329 1329 assert roots
1330 1330 assert heads
1331 1331 return (orderedout, roots, heads)
1332 1332
1333 1333 def headrevs(self, revs=None):
1334 1334 if revs is None:
1335 1335 try:
1336 1336 return self.index.headrevs()
1337 1337 except AttributeError:
1338 1338 return self._headrevs()
1339 1339 if rustdagop is not None:
1340 1340 return rustdagop.headrevs(self.index, revs)
1341 1341 return dagop.headrevs(revs, self._uncheckedparentrevs)
1342 1342
1343 1343 def computephases(self, roots):
1344 1344 return self.index.computephasesmapsets(roots)
1345 1345
1346 1346 def _headrevs(self):
1347 1347 count = len(self)
1348 1348 if not count:
1349 1349 return [nullrev]
1350 1350 # we won't iter over filtered rev so nobody is a head at start
1351 1351 ishead = [0] * (count + 1)
1352 1352 index = self.index
1353 1353 for r in self:
1354 1354 ishead[r] = 1 # I may be an head
1355 1355 e = index[r]
1356 1356 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1357 1357 return [r for r, val in enumerate(ishead) if val]
1358 1358
1359 1359 def heads(self, start=None, stop=None):
1360 1360 """return the list of all nodes that have no children
1361 1361
1362 1362 if start is specified, only heads that are descendants of
1363 1363 start will be returned
1364 1364 if stop is specified, it will consider all the revs from stop
1365 1365 as if they had no children
1366 1366 """
1367 1367 if start is None and stop is None:
1368 1368 if not len(self):
1369 1369 return [self.nullid]
1370 1370 return [self.node(r) for r in self.headrevs()]
1371 1371
1372 1372 if start is None:
1373 1373 start = nullrev
1374 1374 else:
1375 1375 start = self.rev(start)
1376 1376
1377 1377 stoprevs = {self.rev(n) for n in stop or []}
1378 1378
1379 1379 revs = dagop.headrevssubset(
1380 1380 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1381 1381 )
1382 1382
1383 1383 return [self.node(rev) for rev in revs]
1384 1384
1385 1385 def children(self, node):
1386 1386 """find the children of a given node"""
1387 1387 c = []
1388 1388 p = self.rev(node)
1389 1389 for r in self.revs(start=p + 1):
1390 1390 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1391 1391 if prevs:
1392 1392 for pr in prevs:
1393 1393 if pr == p:
1394 1394 c.append(self.node(r))
1395 1395 elif p == nullrev:
1396 1396 c.append(self.node(r))
1397 1397 return c
1398 1398
1399 1399 def commonancestorsheads(self, a, b):
1400 1400 """calculate all the heads of the common ancestors of nodes a and b"""
1401 1401 a, b = self.rev(a), self.rev(b)
1402 1402 ancs = self._commonancestorsheads(a, b)
1403 1403 return pycompat.maplist(self.node, ancs)
1404 1404
1405 1405 def _commonancestorsheads(self, *revs):
1406 1406 """calculate all the heads of the common ancestors of revs"""
1407 1407 try:
1408 1408 ancs = self.index.commonancestorsheads(*revs)
1409 1409 except (AttributeError, OverflowError): # C implementation failed
1410 1410 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1411 1411 return ancs
1412 1412
1413 1413 def isancestor(self, a, b):
1414 1414 """return True if node a is an ancestor of node b
1415 1415
1416 1416 A revision is considered an ancestor of itself."""
1417 1417 a, b = self.rev(a), self.rev(b)
1418 1418 return self.isancestorrev(a, b)
1419 1419
1420 1420 def isancestorrev(self, a, b):
1421 1421 """return True if revision a is an ancestor of revision b
1422 1422
1423 1423 A revision is considered an ancestor of itself.
1424 1424
1425 1425 The implementation of this is trivial but the use of
1426 1426 reachableroots is not."""
1427 1427 if a == nullrev:
1428 1428 return True
1429 1429 elif a == b:
1430 1430 return True
1431 1431 elif a > b:
1432 1432 return False
1433 1433 return bool(self.reachableroots(a, [b], [a], includepath=False))
1434 1434
1435 1435 def reachableroots(self, minroot, heads, roots, includepath=False):
1436 1436 """return (heads(::(<roots> and <roots>::<heads>)))
1437 1437
1438 1438 If includepath is True, return (<roots>::<heads>)."""
1439 1439 try:
1440 1440 return self.index.reachableroots2(
1441 1441 minroot, heads, roots, includepath
1442 1442 )
1443 1443 except AttributeError:
1444 1444 return dagop._reachablerootspure(
1445 1445 self.parentrevs, minroot, roots, heads, includepath
1446 1446 )
1447 1447
1448 1448 def ancestor(self, a, b):
1449 1449 """calculate the "best" common ancestor of nodes a and b"""
1450 1450
1451 1451 a, b = self.rev(a), self.rev(b)
1452 1452 try:
1453 1453 ancs = self.index.ancestors(a, b)
1454 1454 except (AttributeError, OverflowError):
1455 1455 ancs = ancestor.ancestors(self.parentrevs, a, b)
1456 1456 if ancs:
1457 1457 # choose a consistent winner when there's a tie
1458 1458 return min(map(self.node, ancs))
1459 1459 return self.nullid
1460 1460
1461 1461 def _match(self, id):
1462 1462 if isinstance(id, int):
1463 1463 # rev
1464 1464 return self.node(id)
1465 1465 if len(id) == self.nodeconstants.nodelen:
1466 1466 # possibly a binary node
1467 1467 # odds of a binary node being all hex in ASCII are 1 in 10**25
1468 1468 try:
1469 1469 node = id
1470 1470 self.rev(node) # quick search the index
1471 1471 return node
1472 1472 except error.LookupError:
1473 1473 pass # may be partial hex id
1474 1474 try:
1475 1475 # str(rev)
1476 1476 rev = int(id)
1477 1477 if b"%d" % rev != id:
1478 1478 raise ValueError
1479 1479 if rev < 0:
1480 1480 rev = len(self) + rev
1481 1481 if rev < 0 or rev >= len(self):
1482 1482 raise ValueError
1483 1483 return self.node(rev)
1484 1484 except (ValueError, OverflowError):
1485 1485 pass
1486 1486 if len(id) == 2 * self.nodeconstants.nodelen:
1487 1487 try:
1488 1488 # a full hex nodeid?
1489 1489 node = bin(id)
1490 1490 self.rev(node)
1491 1491 return node
1492 1492 except (TypeError, error.LookupError):
1493 1493 pass
1494 1494
1495 1495 def _partialmatch(self, id):
1496 1496 # we don't care wdirfilenodeids as they should be always full hash
1497 1497 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1498 1498 try:
1499 1499 partial = self.index.partialmatch(id)
1500 1500 if partial and self.hasnode(partial):
1501 1501 if maybewdir:
1502 1502 # single 'ff...' match in radix tree, ambiguous with wdir
1503 1503 raise error.RevlogError
1504 1504 return partial
1505 1505 if maybewdir:
1506 1506 # no 'ff...' match in radix tree, wdir identified
1507 1507 raise error.WdirUnsupported
1508 1508 return None
1509 1509 except error.RevlogError:
1510 1510 # parsers.c radix tree lookup gave multiple matches
1511 1511 # fast path: for unfiltered changelog, radix tree is accurate
1512 1512 if not getattr(self, 'filteredrevs', None):
1513 1513 raise error.AmbiguousPrefixLookupError(
1514 1514 id, self.display_id, _(b'ambiguous identifier')
1515 1515 )
1516 1516 # fall through to slow path that filters hidden revisions
1517 1517 except (AttributeError, ValueError):
1518 1518 # we are pure python, or key was too short to search radix tree
1519 1519 pass
1520 1520
1521 1521 if id in self._pcache:
1522 1522 return self._pcache[id]
1523 1523
1524 1524 if len(id) <= 40:
1525 1525 try:
1526 1526 # hex(node)[:...]
1527 1527 l = len(id) // 2 # grab an even number of digits
1528 1528 prefix = bin(id[: l * 2])
1529 1529 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1530 1530 nl = [
1531 1531 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1532 1532 ]
1533 1533 if self.nodeconstants.nullhex.startswith(id):
1534 1534 nl.append(self.nullid)
1535 1535 if len(nl) > 0:
1536 1536 if len(nl) == 1 and not maybewdir:
1537 1537 self._pcache[id] = nl[0]
1538 1538 return nl[0]
1539 1539 raise error.AmbiguousPrefixLookupError(
1540 1540 id, self.display_id, _(b'ambiguous identifier')
1541 1541 )
1542 1542 if maybewdir:
1543 1543 raise error.WdirUnsupported
1544 1544 return None
1545 1545 except TypeError:
1546 1546 pass
1547 1547
1548 1548 def lookup(self, id):
1549 1549 """locate a node based on:
1550 1550 - revision number or str(revision number)
1551 1551 - nodeid or subset of hex nodeid
1552 1552 """
1553 1553 n = self._match(id)
1554 1554 if n is not None:
1555 1555 return n
1556 1556 n = self._partialmatch(id)
1557 1557 if n:
1558 1558 return n
1559 1559
1560 1560 raise error.LookupError(id, self.display_id, _(b'no match found'))
1561 1561
1562 1562 def shortest(self, node, minlength=1):
1563 1563 """Find the shortest unambiguous prefix that matches node."""
1564 1564
1565 1565 def isvalid(prefix):
1566 1566 try:
1567 1567 matchednode = self._partialmatch(prefix)
1568 1568 except error.AmbiguousPrefixLookupError:
1569 1569 return False
1570 1570 except error.WdirUnsupported:
1571 1571 # single 'ff...' match
1572 1572 return True
1573 1573 if matchednode is None:
1574 1574 raise error.LookupError(node, self.display_id, _(b'no node'))
1575 1575 return True
1576 1576
1577 1577 def maybewdir(prefix):
1578 1578 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1579 1579
1580 1580 hexnode = hex(node)
1581 1581
1582 1582 def disambiguate(hexnode, minlength):
1583 1583 """Disambiguate against wdirid."""
1584 1584 for length in range(minlength, len(hexnode) + 1):
1585 1585 prefix = hexnode[:length]
1586 1586 if not maybewdir(prefix):
1587 1587 return prefix
1588 1588
1589 1589 if not getattr(self, 'filteredrevs', None):
1590 1590 try:
1591 1591 length = max(self.index.shortest(node), minlength)
1592 1592 return disambiguate(hexnode, length)
1593 1593 except error.RevlogError:
1594 1594 if node != self.nodeconstants.wdirid:
1595 1595 raise error.LookupError(
1596 1596 node, self.display_id, _(b'no node')
1597 1597 )
1598 1598 except AttributeError:
1599 1599 # Fall through to pure code
1600 1600 pass
1601 1601
1602 1602 if node == self.nodeconstants.wdirid:
1603 1603 for length in range(minlength, len(hexnode) + 1):
1604 1604 prefix = hexnode[:length]
1605 1605 if isvalid(prefix):
1606 1606 return prefix
1607 1607
1608 1608 for length in range(minlength, len(hexnode) + 1):
1609 1609 prefix = hexnode[:length]
1610 1610 if isvalid(prefix):
1611 1611 return disambiguate(hexnode, length)
1612 1612
1613 1613 def cmp(self, node, text):
1614 1614 """compare text with a given file revision
1615 1615
1616 1616 returns True if text is different than what is stored.
1617 1617 """
1618 1618 p1, p2 = self.parents(node)
1619 1619 return storageutil.hashrevisionsha1(text, p1, p2) != node
1620 1620
1621 1621 def _cachesegment(self, offset, data):
1622 1622 """Add a segment to the revlog cache.
1623 1623
1624 1624 Accepts an absolute offset and the data that is at that location.
1625 1625 """
1626 1626 o, d = self._chunkcache
1627 1627 # try to add to existing cache
1628 1628 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1629 1629 self._chunkcache = o, d + data
1630 1630 else:
1631 1631 self._chunkcache = offset, data
1632 1632
1633 1633 def _readsegment(self, offset, length, df=None):
1634 1634 """Load a segment of raw data from the revlog.
1635 1635
1636 1636 Accepts an absolute offset, length to read, and an optional existing
1637 1637 file handle to read from.
1638 1638
1639 1639 If an existing file handle is passed, it will be seeked and the
1640 1640 original seek position will NOT be restored.
1641 1641
1642 1642 Returns a str or buffer of raw byte data.
1643 1643
1644 1644 Raises if the requested number of bytes could not be read.
1645 1645 """
1646 1646 # Cache data both forward and backward around the requested
1647 1647 # data, in a fixed size window. This helps speed up operations
1648 1648 # involving reading the revlog backwards.
1649 1649 cachesize = self._chunkcachesize
1650 1650 realoffset = offset & ~(cachesize - 1)
1651 1651 reallength = (
1652 1652 (offset + length + cachesize) & ~(cachesize - 1)
1653 1653 ) - realoffset
1654 1654 with self._datareadfp(df) as df:
1655 1655 df.seek(realoffset)
1656 1656 d = df.read(reallength)
1657 1657
1658 1658 self._cachesegment(realoffset, d)
1659 1659 if offset != realoffset or reallength != length:
1660 1660 startoffset = offset - realoffset
1661 1661 if len(d) - startoffset < length:
1662 1662 raise error.RevlogError(
1663 1663 _(
1664 1664 b'partial read of revlog %s; expected %d bytes from '
1665 1665 b'offset %d, got %d'
1666 1666 )
1667 1667 % (
1668 1668 self._indexfile if self._inline else self._datafile,
1669 1669 length,
1670 1670 offset,
1671 1671 len(d) - startoffset,
1672 1672 )
1673 1673 )
1674 1674
1675 1675 return util.buffer(d, startoffset, length)
1676 1676
1677 1677 if len(d) < length:
1678 1678 raise error.RevlogError(
1679 1679 _(
1680 1680 b'partial read of revlog %s; expected %d bytes from offset '
1681 1681 b'%d, got %d'
1682 1682 )
1683 1683 % (
1684 1684 self._indexfile if self._inline else self._datafile,
1685 1685 length,
1686 1686 offset,
1687 1687 len(d),
1688 1688 )
1689 1689 )
1690 1690
1691 1691 return d
1692 1692
1693 1693 def _getsegment(self, offset, length, df=None):
1694 1694 """Obtain a segment of raw data from the revlog.
1695 1695
1696 1696 Accepts an absolute offset, length of bytes to obtain, and an
1697 1697 optional file handle to the already-opened revlog. If the file
1698 1698 handle is used, it's original seek position will not be preserved.
1699 1699
1700 1700 Requests for data may be returned from a cache.
1701 1701
1702 1702 Returns a str or a buffer instance of raw byte data.
1703 1703 """
1704 1704 o, d = self._chunkcache
1705 1705 l = len(d)
1706 1706
1707 1707 # is it in the cache?
1708 1708 cachestart = offset - o
1709 1709 cacheend = cachestart + length
1710 1710 if cachestart >= 0 and cacheend <= l:
1711 1711 if cachestart == 0 and cacheend == l:
1712 1712 return d # avoid a copy
1713 1713 return util.buffer(d, cachestart, cacheend - cachestart)
1714 1714
1715 1715 return self._readsegment(offset, length, df=df)
1716 1716
1717 1717 def _getsegmentforrevs(self, startrev, endrev, df=None):
1718 1718 """Obtain a segment of raw data corresponding to a range of revisions.
1719 1719
1720 1720 Accepts the start and end revisions and an optional already-open
1721 1721 file handle to be used for reading. If the file handle is read, its
1722 1722 seek position will not be preserved.
1723 1723
1724 1724 Requests for data may be satisfied by a cache.
1725 1725
1726 1726 Returns a 2-tuple of (offset, data) for the requested range of
1727 1727 revisions. Offset is the integer offset from the beginning of the
1728 1728 revlog and data is a str or buffer of the raw byte data.
1729 1729
1730 1730 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1731 1731 to determine where each revision's data begins and ends.
1732 1732 """
1733 1733 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1734 1734 # (functions are expensive).
1735 1735 index = self.index
1736 1736 istart = index[startrev]
1737 1737 start = int(istart[0] >> 16)
1738 1738 if startrev == endrev:
1739 1739 end = start + istart[1]
1740 1740 else:
1741 1741 iend = index[endrev]
1742 1742 end = int(iend[0] >> 16) + iend[1]
1743 1743
1744 1744 if self._inline:
1745 1745 start += (startrev + 1) * self.index.entry_size
1746 1746 end += (endrev + 1) * self.index.entry_size
1747 1747 length = end - start
1748 1748
1749 1749 return start, self._getsegment(start, length, df=df)
1750 1750
1751 1751 def _chunk(self, rev, df=None):
1752 1752 """Obtain a single decompressed chunk for a revision.
1753 1753
1754 1754 Accepts an integer revision and an optional already-open file handle
1755 1755 to be used for reading. If used, the seek position of the file will not
1756 1756 be preserved.
1757 1757
1758 1758 Returns a str holding uncompressed data for the requested revision.
1759 1759 """
1760 1760 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1761 1761
1762 1762 def _chunks(self, revs, df=None, targetsize=None):
1763 1763 """Obtain decompressed chunks for the specified revisions.
1764 1764
1765 1765 Accepts an iterable of numeric revisions that are assumed to be in
1766 1766 ascending order. Also accepts an optional already-open file handle
1767 1767 to be used for reading. If used, the seek position of the file will
1768 1768 not be preserved.
1769 1769
1770 1770 This function is similar to calling ``self._chunk()`` multiple times,
1771 1771 but is faster.
1772 1772
1773 1773 Returns a list with decompressed data for each requested revision.
1774 1774 """
1775 1775 if not revs:
1776 1776 return []
1777 1777 start = self.start
1778 1778 length = self.length
1779 1779 inline = self._inline
1780 1780 iosize = self.index.entry_size
1781 1781 buffer = util.buffer
1782 1782
1783 1783 l = []
1784 1784 ladd = l.append
1785 1785
1786 1786 if not self._withsparseread:
1787 1787 slicedchunks = (revs,)
1788 1788 else:
1789 1789 slicedchunks = deltautil.slicechunk(
1790 1790 self, revs, targetsize=targetsize
1791 1791 )
1792 1792
1793 1793 for revschunk in slicedchunks:
1794 1794 firstrev = revschunk[0]
1795 1795 # Skip trailing revisions with empty diff
1796 1796 for lastrev in revschunk[::-1]:
1797 1797 if length(lastrev) != 0:
1798 1798 break
1799 1799
1800 1800 try:
1801 1801 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1802 1802 except OverflowError:
1803 1803 # issue4215 - we can't cache a run of chunks greater than
1804 1804 # 2G on Windows
1805 1805 return [self._chunk(rev, df=df) for rev in revschunk]
1806 1806
1807 1807 decomp = self.decompress
1808 1808 for rev in revschunk:
1809 1809 chunkstart = start(rev)
1810 1810 if inline:
1811 1811 chunkstart += (rev + 1) * iosize
1812 1812 chunklength = length(rev)
1813 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1813 c = buffer(data, chunkstart - offset, chunklength)
1814 ladd(decomp(c))
1814 1815
1815 1816 return l
1816 1817
1817 1818 def _chunkclear(self):
1818 1819 """Clear the raw chunk cache."""
1819 1820 self._chunkcache = (0, b'')
1820 1821
1821 1822 def deltaparent(self, rev):
1822 1823 """return deltaparent of the given revision"""
1823 1824 base = self.index[rev][3]
1824 1825 if base == rev:
1825 1826 return nullrev
1826 1827 elif self._generaldelta:
1827 1828 return base
1828 1829 else:
1829 1830 return rev - 1
1830 1831
1831 1832 def issnapshot(self, rev):
1832 1833 """tells whether rev is a snapshot"""
1833 1834 if not self._sparserevlog:
1834 1835 return self.deltaparent(rev) == nullrev
1835 1836 elif util.safehasattr(self.index, b'issnapshot'):
1836 1837 # directly assign the method to cache the testing and access
1837 1838 self.issnapshot = self.index.issnapshot
1838 1839 return self.issnapshot(rev)
1839 1840 if rev == nullrev:
1840 1841 return True
1841 1842 entry = self.index[rev]
1842 1843 base = entry[3]
1843 1844 if base == rev:
1844 1845 return True
1845 1846 if base == nullrev:
1846 1847 return True
1847 1848 p1 = entry[5]
1848 1849 p2 = entry[6]
1849 1850 if base == p1 or base == p2:
1850 1851 return False
1851 1852 return self.issnapshot(base)
1852 1853
1853 1854 def snapshotdepth(self, rev):
1854 1855 """number of snapshot in the chain before this one"""
1855 1856 if not self.issnapshot(rev):
1856 1857 raise error.ProgrammingError(b'revision %d not a snapshot')
1857 1858 return len(self._deltachain(rev)[0]) - 1
1858 1859
1859 1860 def revdiff(self, rev1, rev2):
1860 1861 """return or calculate a delta between two revisions
1861 1862
1862 1863 The delta calculated is in binary form and is intended to be written to
1863 1864 revlog data directly. So this function needs raw revision data.
1864 1865 """
1865 1866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1866 1867 return bytes(self._chunk(rev2))
1867 1868
1868 1869 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1869 1870
1870 1871 def _processflags(self, text, flags, operation, raw=False):
1871 1872 """deprecated entry point to access flag processors"""
1872 1873 msg = b'_processflag(...) use the specialized variant'
1873 1874 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1874 1875 if raw:
1875 1876 return text, flagutil.processflagsraw(self, text, flags)
1876 1877 elif operation == b'read':
1877 1878 return flagutil.processflagsread(self, text, flags)
1878 1879 else: # write operation
1879 1880 return flagutil.processflagswrite(self, text, flags)
1880 1881
1881 1882 def revision(self, nodeorrev, _df=None, raw=False):
1882 1883 """return an uncompressed revision of a given node or revision
1883 1884 number.
1884 1885
1885 1886 _df - an existing file handle to read from. (internal-only)
1886 1887 raw - an optional argument specifying if the revision data is to be
1887 1888 treated as raw data when applying flag transforms. 'raw' should be set
1888 1889 to True when generating changegroups or in debug commands.
1889 1890 """
1890 1891 if raw:
1891 1892 msg = (
1892 1893 b'revlog.revision(..., raw=True) is deprecated, '
1893 1894 b'use revlog.rawdata(...)'
1894 1895 )
1895 1896 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1896 1897 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1897 1898
1898 1899 def sidedata(self, nodeorrev, _df=None):
1899 1900 """a map of extra data related to the changeset but not part of the hash
1900 1901
1901 1902 This function currently return a dictionary. However, more advanced
1902 1903 mapping object will likely be used in the future for a more
1903 1904 efficient/lazy code.
1904 1905 """
1905 1906 return self._revisiondata(nodeorrev, _df)[1]
1906 1907
1907 1908 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1908 1909 # deal with <nodeorrev> argument type
1909 1910 if isinstance(nodeorrev, int):
1910 1911 rev = nodeorrev
1911 1912 node = self.node(rev)
1912 1913 else:
1913 1914 node = nodeorrev
1914 1915 rev = None
1915 1916
1916 1917 # fast path the special `nullid` rev
1917 1918 if node == self.nullid:
1918 1919 return b"", {}
1919 1920
1920 1921 # ``rawtext`` is the text as stored inside the revlog. Might be the
1921 1922 # revision or might need to be processed to retrieve the revision.
1922 1923 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1923 1924
1924 1925 if self.hassidedata:
1925 1926 if rev is None:
1926 1927 rev = self.rev(node)
1927 1928 sidedata = self._sidedata(rev)
1928 1929 else:
1929 1930 sidedata = {}
1930 1931
1931 1932 if raw and validated:
1932 1933 # if we don't want to process the raw text and that raw
1933 1934 # text is cached, we can exit early.
1934 1935 return rawtext, sidedata
1935 1936 if rev is None:
1936 1937 rev = self.rev(node)
1937 1938 # the revlog's flag for this revision
1938 1939 # (usually alter its state or content)
1939 1940 flags = self.flags(rev)
1940 1941
1941 1942 if validated and flags == REVIDX_DEFAULT_FLAGS:
1942 1943 # no extra flags set, no flag processor runs, text = rawtext
1943 1944 return rawtext, sidedata
1944 1945
1945 1946 if raw:
1946 1947 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1947 1948 text = rawtext
1948 1949 else:
1949 1950 r = flagutil.processflagsread(self, rawtext, flags)
1950 1951 text, validatehash = r
1951 1952 if validatehash:
1952 1953 self.checkhash(text, node, rev=rev)
1953 1954 if not validated:
1954 1955 self._revisioncache = (node, rev, rawtext)
1955 1956
1956 1957 return text, sidedata
1957 1958
1958 1959 def _rawtext(self, node, rev, _df=None):
1959 1960 """return the possibly unvalidated rawtext for a revision
1960 1961
1961 1962 returns (rev, rawtext, validated)
1962 1963 """
1963 1964
1964 1965 # revision in the cache (could be useful to apply delta)
1965 1966 cachedrev = None
1966 1967 # An intermediate text to apply deltas to
1967 1968 basetext = None
1968 1969
1969 1970 # Check if we have the entry in cache
1970 1971 # The cache entry looks like (node, rev, rawtext)
1971 1972 if self._revisioncache:
1972 1973 if self._revisioncache[0] == node:
1973 1974 return (rev, self._revisioncache[2], True)
1974 1975 cachedrev = self._revisioncache[1]
1975 1976
1976 1977 if rev is None:
1977 1978 rev = self.rev(node)
1978 1979
1979 1980 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1980 1981 if stopped:
1981 1982 basetext = self._revisioncache[2]
1982 1983
1983 1984 # drop cache to save memory, the caller is expected to
1984 1985 # update self._revisioncache after validating the text
1985 1986 self._revisioncache = None
1986 1987
1987 1988 targetsize = None
1988 1989 rawsize = self.index[rev][2]
1989 1990 if 0 <= rawsize:
1990 1991 targetsize = 4 * rawsize
1991 1992
1992 1993 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1993 1994 if basetext is None:
1994 1995 basetext = bytes(bins[0])
1995 1996 bins = bins[1:]
1996 1997
1997 1998 rawtext = mdiff.patches(basetext, bins)
1998 1999 del basetext # let us have a chance to free memory early
1999 2000 return (rev, rawtext, False)
2000 2001
2001 2002 def _sidedata(self, rev):
2002 2003 """Return the sidedata for a given revision number."""
2003 2004 index_entry = self.index[rev]
2004 2005 sidedata_offset = index_entry[8]
2005 2006 sidedata_size = index_entry[9]
2006 2007
2007 2008 if self._inline:
2008 2009 sidedata_offset += self.index.entry_size * (1 + rev)
2009 2010 if sidedata_size == 0:
2010 2011 return {}
2011 2012
2012 2013 segment = self._getsegment(sidedata_offset, sidedata_size)
2013 2014 sidedata = sidedatautil.deserialize_sidedata(segment)
2014 2015 return sidedata
2015 2016
2016 2017 def rawdata(self, nodeorrev, _df=None):
2017 2018 """return an uncompressed raw data of a given node or revision number.
2018 2019
2019 2020 _df - an existing file handle to read from. (internal-only)
2020 2021 """
2021 2022 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2022 2023
2023 2024 def hash(self, text, p1, p2):
2024 2025 """Compute a node hash.
2025 2026
2026 2027 Available as a function so that subclasses can replace the hash
2027 2028 as needed.
2028 2029 """
2029 2030 return storageutil.hashrevisionsha1(text, p1, p2)
2030 2031
2031 2032 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2032 2033 """Check node hash integrity.
2033 2034
2034 2035 Available as a function so that subclasses can extend hash mismatch
2035 2036 behaviors as needed.
2036 2037 """
2037 2038 try:
2038 2039 if p1 is None and p2 is None:
2039 2040 p1, p2 = self.parents(node)
2040 2041 if node != self.hash(text, p1, p2):
2041 2042 # Clear the revision cache on hash failure. The revision cache
2042 2043 # only stores the raw revision and clearing the cache does have
2043 2044 # the side-effect that we won't have a cache hit when the raw
2044 2045 # revision data is accessed. But this case should be rare and
2045 2046 # it is extra work to teach the cache about the hash
2046 2047 # verification state.
2047 2048 if self._revisioncache and self._revisioncache[0] == node:
2048 2049 self._revisioncache = None
2049 2050
2050 2051 revornode = rev
2051 2052 if revornode is None:
2052 2053 revornode = templatefilters.short(hex(node))
2053 2054 raise error.RevlogError(
2054 2055 _(b"integrity check failed on %s:%s")
2055 2056 % (self.display_id, pycompat.bytestr(revornode))
2056 2057 )
2057 2058 except error.RevlogError:
2058 2059 if self._censorable and storageutil.iscensoredtext(text):
2059 2060 raise error.CensoredNodeError(self.display_id, node, text)
2060 2061 raise
2061 2062
2062 2063 def _enforceinlinesize(self, tr):
2063 2064 """Check if the revlog is too big for inline and convert if so.
2064 2065
2065 2066 This should be called after revisions are added to the revlog. If the
2066 2067 revlog has grown too large to be an inline revlog, it will convert it
2067 2068 to use multiple index and data files.
2068 2069 """
2069 2070 tiprev = len(self) - 1
2070 2071 total_size = self.start(tiprev) + self.length(tiprev)
2071 2072 if not self._inline or total_size < _maxinline:
2072 2073 return
2073 2074
2074 2075 troffset = tr.findoffset(self._indexfile)
2075 2076 if troffset is None:
2076 2077 raise error.RevlogError(
2077 2078 _(b"%s not found in the transaction") % self._indexfile
2078 2079 )
2079 2080 trindex = 0
2080 2081 tr.add(self._datafile, 0)
2081 2082
2082 2083 existing_handles = False
2083 2084 if self._writinghandles is not None:
2084 2085 existing_handles = True
2085 2086 fp = self._writinghandles[0]
2086 2087 fp.flush()
2087 2088 fp.close()
2088 2089 # We can't use the cached file handle after close(). So prevent
2089 2090 # its usage.
2090 2091 self._writinghandles = None
2091 2092
2092 2093 new_dfh = self._datafp(b'w+')
2093 2094 new_dfh.truncate(0) # drop any potentially existing data
2094 2095 try:
2095 2096 with self._indexfp() as read_ifh:
2096 2097 for r in self:
2097 2098 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2098 2099 if troffset <= self.start(r):
2099 2100 trindex = r
2100 2101 new_dfh.flush()
2101 2102
2102 2103 with self.__index_new_fp() as fp:
2103 2104 self._format_flags &= ~FLAG_INLINE_DATA
2104 2105 self._inline = False
2105 2106 for i in self:
2106 2107 e = self.index.entry_binary(i)
2107 2108 if i == 0 and self._docket is None:
2108 2109 header = self._format_flags | self._format_version
2109 2110 header = self.index.pack_header(header)
2110 2111 e = header + e
2111 2112 fp.write(e)
2112 2113 if self._docket is not None:
2113 2114 self._docket.index_end = fp.tell()
2114 2115 # the temp file replace the real index when we exit the context
2115 2116 # manager
2116 2117
2117 2118 tr.replace(self._indexfile, trindex * self.index.entry_size)
2118 2119 nodemaputil.setup_persistent_nodemap(tr, self)
2119 2120 self._chunkclear()
2120 2121
2121 2122 if existing_handles:
2122 2123 # switched from inline to conventional reopen the index
2123 2124 ifh = self.__index_write_fp()
2124 2125 self._writinghandles = (ifh, new_dfh)
2125 2126 new_dfh = None
2126 2127 finally:
2127 2128 if new_dfh is not None:
2128 2129 new_dfh.close()
2129 2130
2130 2131 def _nodeduplicatecallback(self, transaction, node):
2131 2132 """called when trying to add a node already stored."""
2132 2133
2133 2134 @contextlib.contextmanager
2134 2135 def _writing(self, transaction):
2135 2136 if self._trypending:
2136 2137 msg = b'try to write in a `trypending` revlog: %s'
2137 2138 msg %= self.display_id
2138 2139 raise error.ProgrammingError(msg)
2139 2140 if self._writinghandles is not None:
2140 2141 yield
2141 2142 else:
2142 2143 r = len(self)
2143 2144 dsize = 0
2144 2145 if r:
2145 2146 dsize = self.end(r - 1)
2146 2147 dfh = None
2147 2148 if not self._inline:
2148 2149 try:
2149 2150 dfh = self._datafp(b"r+")
2150 2151 if self._docket is None:
2151 2152 dfh.seek(0, os.SEEK_END)
2152 2153 else:
2153 2154 dfh.seek(self._docket.data_end, os.SEEK_SET)
2154 2155 except IOError as inst:
2155 2156 if inst.errno != errno.ENOENT:
2156 2157 raise
2157 2158 dfh = self._datafp(b"w+")
2158 2159 transaction.add(self._datafile, dsize)
2159 2160 try:
2160 2161 isize = r * self.index.entry_size
2161 2162 ifh = self.__index_write_fp()
2162 2163 if self._inline:
2163 2164 transaction.add(self._indexfile, dsize + isize)
2164 2165 else:
2165 2166 transaction.add(self._indexfile, isize)
2166 2167 try:
2167 2168 self._writinghandles = (ifh, dfh)
2168 2169 try:
2169 2170 yield
2170 2171 if self._docket is not None:
2171 2172 self._write_docket(transaction)
2172 2173 finally:
2173 2174 self._writinghandles = None
2174 2175 finally:
2175 2176 ifh.close()
2176 2177 finally:
2177 2178 if dfh is not None:
2178 2179 dfh.close()
2179 2180
2180 2181 def _write_docket(self, transaction):
2181 2182 """write the current docket on disk
2182 2183
2183 2184 Exist as a method to help changelog to implement transaction logic
2184 2185
2185 2186 We could also imagine using the same transaction logic for all revlog
2186 2187 since docket are cheap."""
2187 2188 self._docket.write(transaction)
2188 2189
2189 2190 def addrevision(
2190 2191 self,
2191 2192 text,
2192 2193 transaction,
2193 2194 link,
2194 2195 p1,
2195 2196 p2,
2196 2197 cachedelta=None,
2197 2198 node=None,
2198 2199 flags=REVIDX_DEFAULT_FLAGS,
2199 2200 deltacomputer=None,
2200 2201 sidedata=None,
2201 2202 ):
2202 2203 """add a revision to the log
2203 2204
2204 2205 text - the revision data to add
2205 2206 transaction - the transaction object used for rollback
2206 2207 link - the linkrev data to add
2207 2208 p1, p2 - the parent nodeids of the revision
2208 2209 cachedelta - an optional precomputed delta
2209 2210 node - nodeid of revision; typically node is not specified, and it is
2210 2211 computed by default as hash(text, p1, p2), however subclasses might
2211 2212 use different hashing method (and override checkhash() in such case)
2212 2213 flags - the known flags to set on the revision
2213 2214 deltacomputer - an optional deltacomputer instance shared between
2214 2215 multiple calls
2215 2216 """
2216 2217 if link == nullrev:
2217 2218 raise error.RevlogError(
2218 2219 _(b"attempted to add linkrev -1 to %s") % self.display_id
2219 2220 )
2220 2221
2221 2222 if sidedata is None:
2222 2223 sidedata = {}
2223 2224 elif sidedata and not self.hassidedata:
2224 2225 raise error.ProgrammingError(
2225 2226 _(b"trying to add sidedata to a revlog who don't support them")
2226 2227 )
2227 2228
2228 2229 if flags:
2229 2230 node = node or self.hash(text, p1, p2)
2230 2231
2231 2232 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2232 2233
2233 2234 # If the flag processor modifies the revision data, ignore any provided
2234 2235 # cachedelta.
2235 2236 if rawtext != text:
2236 2237 cachedelta = None
2237 2238
2238 2239 if len(rawtext) > _maxentrysize:
2239 2240 raise error.RevlogError(
2240 2241 _(
2241 2242 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2242 2243 )
2243 2244 % (self.display_id, len(rawtext))
2244 2245 )
2245 2246
2246 2247 node = node or self.hash(rawtext, p1, p2)
2247 2248 rev = self.index.get_rev(node)
2248 2249 if rev is not None:
2249 2250 return rev
2250 2251
2251 2252 if validatehash:
2252 2253 self.checkhash(rawtext, node, p1=p1, p2=p2)
2253 2254
2254 2255 return self.addrawrevision(
2255 2256 rawtext,
2256 2257 transaction,
2257 2258 link,
2258 2259 p1,
2259 2260 p2,
2260 2261 node,
2261 2262 flags,
2262 2263 cachedelta=cachedelta,
2263 2264 deltacomputer=deltacomputer,
2264 2265 sidedata=sidedata,
2265 2266 )
2266 2267
2267 2268 def addrawrevision(
2268 2269 self,
2269 2270 rawtext,
2270 2271 transaction,
2271 2272 link,
2272 2273 p1,
2273 2274 p2,
2274 2275 node,
2275 2276 flags,
2276 2277 cachedelta=None,
2277 2278 deltacomputer=None,
2278 2279 sidedata=None,
2279 2280 ):
2280 2281 """add a raw revision with known flags, node and parents
2281 2282 useful when reusing a revision not stored in this revlog (ex: received
2282 2283 over wire, or read from an external bundle).
2283 2284 """
2284 2285 with self._writing(transaction):
2285 2286 return self._addrevision(
2286 2287 node,
2287 2288 rawtext,
2288 2289 transaction,
2289 2290 link,
2290 2291 p1,
2291 2292 p2,
2292 2293 flags,
2293 2294 cachedelta,
2294 2295 deltacomputer=deltacomputer,
2295 2296 sidedata=sidedata,
2296 2297 )
2297 2298
2298 2299 def compress(self, data):
2299 2300 """Generate a possibly-compressed representation of data."""
2300 2301 if not data:
2301 2302 return b'', data
2302 2303
2303 2304 compressed = self._compressor.compress(data)
2304 2305
2305 2306 if compressed:
2306 2307 # The revlog compressor added the header in the returned data.
2307 2308 return b'', compressed
2308 2309
2309 2310 if data[0:1] == b'\0':
2310 2311 return b'', data
2311 2312 return b'u', data
2312 2313
2313 2314 def decompress(self, data):
2314 2315 """Decompress a revlog chunk.
2315 2316
2316 2317 The chunk is expected to begin with a header identifying the
2317 2318 format type so it can be routed to an appropriate decompressor.
2318 2319 """
2319 2320 if not data:
2320 2321 return data
2321 2322
2322 2323 # Revlogs are read much more frequently than they are written and many
2323 2324 # chunks only take microseconds to decompress, so performance is
2324 2325 # important here.
2325 2326 #
2326 2327 # We can make a few assumptions about revlogs:
2327 2328 #
2328 2329 # 1) the majority of chunks will be compressed (as opposed to inline
2329 2330 # raw data).
2330 2331 # 2) decompressing *any* data will likely by at least 10x slower than
2331 2332 # returning raw inline data.
2332 2333 # 3) we want to prioritize common and officially supported compression
2333 2334 # engines
2334 2335 #
2335 2336 # It follows that we want to optimize for "decompress compressed data
2336 2337 # when encoded with common and officially supported compression engines"
2337 2338 # case over "raw data" and "data encoded by less common or non-official
2338 2339 # compression engines." That is why we have the inline lookup first
2339 2340 # followed by the compengines lookup.
2340 2341 #
2341 2342 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2342 2343 # compressed chunks. And this matters for changelog and manifest reads.
2343 2344 t = data[0:1]
2344 2345
2345 2346 if t == b'x':
2346 2347 try:
2347 2348 return _zlibdecompress(data)
2348 2349 except zlib.error as e:
2349 2350 raise error.RevlogError(
2350 2351 _(b'revlog decompress error: %s')
2351 2352 % stringutil.forcebytestr(e)
2352 2353 )
2353 2354 # '\0' is more common than 'u' so it goes first.
2354 2355 elif t == b'\0':
2355 2356 return data
2356 2357 elif t == b'u':
2357 2358 return util.buffer(data, 1)
2358 2359
2359 2360 try:
2360 2361 compressor = self._decompressors[t]
2361 2362 except KeyError:
2362 2363 try:
2363 2364 engine = util.compengines.forrevlogheader(t)
2364 2365 compressor = engine.revlogcompressor(self._compengineopts)
2365 2366 self._decompressors[t] = compressor
2366 2367 except KeyError:
2367 2368 raise error.RevlogError(
2368 2369 _(b'unknown compression type %s') % binascii.hexlify(t)
2369 2370 )
2370 2371
2371 2372 return compressor.decompress(data)
2372 2373
2373 2374 def _addrevision(
2374 2375 self,
2375 2376 node,
2376 2377 rawtext,
2377 2378 transaction,
2378 2379 link,
2379 2380 p1,
2380 2381 p2,
2381 2382 flags,
2382 2383 cachedelta,
2383 2384 alwayscache=False,
2384 2385 deltacomputer=None,
2385 2386 sidedata=None,
2386 2387 ):
2387 2388 """internal function to add revisions to the log
2388 2389
2389 2390 see addrevision for argument descriptions.
2390 2391
2391 2392 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2392 2393
2393 2394 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2394 2395 be used.
2395 2396
2396 2397 invariants:
2397 2398 - rawtext is optional (can be None); if not set, cachedelta must be set.
2398 2399 if both are set, they must correspond to each other.
2399 2400 """
2400 2401 if node == self.nullid:
2401 2402 raise error.RevlogError(
2402 2403 _(b"%s: attempt to add null revision") % self.display_id
2403 2404 )
2404 2405 if (
2405 2406 node == self.nodeconstants.wdirid
2406 2407 or node in self.nodeconstants.wdirfilenodeids
2407 2408 ):
2408 2409 raise error.RevlogError(
2409 2410 _(b"%s: attempt to add wdir revision") % self.display_id
2410 2411 )
2411 2412 if self._writinghandles is None:
2412 2413 msg = b'adding revision outside `revlog._writing` context'
2413 2414 raise error.ProgrammingError(msg)
2414 2415
2415 2416 if self._inline:
2416 2417 fh = self._writinghandles[0]
2417 2418 else:
2418 2419 fh = self._writinghandles[1]
2419 2420
2420 2421 btext = [rawtext]
2421 2422
2422 2423 curr = len(self)
2423 2424 prev = curr - 1
2424 2425
2425 2426 offset = self._get_data_offset(prev)
2426 2427
2427 2428 if self._concurrencychecker:
2428 2429 ifh, dfh = self._writinghandles
2429 2430 if self._inline:
2430 2431 # offset is "as if" it were in the .d file, so we need to add on
2431 2432 # the size of the entry metadata.
2432 2433 self._concurrencychecker(
2433 2434 ifh, self._indexfile, offset + curr * self.index.entry_size
2434 2435 )
2435 2436 else:
2436 2437 # Entries in the .i are a consistent size.
2437 2438 self._concurrencychecker(
2438 2439 ifh, self._indexfile, curr * self.index.entry_size
2439 2440 )
2440 2441 self._concurrencychecker(dfh, self._datafile, offset)
2441 2442
2442 2443 p1r, p2r = self.rev(p1), self.rev(p2)
2443 2444
2444 2445 # full versions are inserted when the needed deltas
2445 2446 # become comparable to the uncompressed text
2446 2447 if rawtext is None:
2447 2448 # need rawtext size, before changed by flag processors, which is
2448 2449 # the non-raw size. use revlog explicitly to avoid filelog's extra
2449 2450 # logic that might remove metadata size.
2450 2451 textlen = mdiff.patchedsize(
2451 2452 revlog.size(self, cachedelta[0]), cachedelta[1]
2452 2453 )
2453 2454 else:
2454 2455 textlen = len(rawtext)
2455 2456
2456 2457 if deltacomputer is None:
2457 2458 deltacomputer = deltautil.deltacomputer(self)
2458 2459
2459 2460 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2460 2461
2461 2462 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2462 2463
2463 2464 if sidedata and self.hassidedata:
2464 2465 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2465 2466 sidedata_offset = offset + deltainfo.deltalen
2466 2467 else:
2467 2468 serialized_sidedata = b""
2468 2469 # Don't store the offset if the sidedata is empty, that way
2469 2470 # we can easily detect empty sidedata and they will be no different
2470 2471 # than ones we manually add.
2471 2472 sidedata_offset = 0
2472 2473
2473 2474 e = (
2474 2475 offset_type(offset, flags),
2475 2476 deltainfo.deltalen,
2476 2477 textlen,
2477 2478 deltainfo.base,
2478 2479 link,
2479 2480 p1r,
2480 2481 p2r,
2481 2482 node,
2482 2483 sidedata_offset,
2483 2484 len(serialized_sidedata),
2484 2485 COMP_MODE_INLINE,
2485 2486 )
2486 2487
2487 2488 self.index.append(e)
2488 2489 entry = self.index.entry_binary(curr)
2489 2490 if curr == 0 and self._docket is None:
2490 2491 header = self._format_flags | self._format_version
2491 2492 header = self.index.pack_header(header)
2492 2493 entry = header + entry
2493 2494 self._writeentry(
2494 2495 transaction,
2495 2496 entry,
2496 2497 deltainfo.data,
2497 2498 link,
2498 2499 offset,
2499 2500 serialized_sidedata,
2500 2501 )
2501 2502
2502 2503 rawtext = btext[0]
2503 2504
2504 2505 if alwayscache and rawtext is None:
2505 2506 rawtext = deltacomputer.buildtext(revinfo, fh)
2506 2507
2507 2508 if type(rawtext) == bytes: # only accept immutable objects
2508 2509 self._revisioncache = (node, curr, rawtext)
2509 2510 self._chainbasecache[curr] = deltainfo.chainbase
2510 2511 return curr
2511 2512
2512 2513 def _get_data_offset(self, prev):
2513 2514 """Returns the current offset in the (in-transaction) data file.
2514 2515 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2515 2516 file to store that information: since sidedata can be rewritten to the
2516 2517 end of the data file within a transaction, you can have cases where, for
2517 2518 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2518 2519 to `n - 1`'s sidedata being written after `n`'s data.
2519 2520
2520 2521 TODO cache this in a docket file before getting out of experimental."""
2521 2522 if self._docket is None:
2522 2523 return self.end(prev)
2523 2524 else:
2524 2525 return self._docket.data_end
2525 2526
2526 2527 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2527 2528 # Files opened in a+ mode have inconsistent behavior on various
2528 2529 # platforms. Windows requires that a file positioning call be made
2529 2530 # when the file handle transitions between reads and writes. See
2530 2531 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2531 2532 # platforms, Python or the platform itself can be buggy. Some versions
2532 2533 # of Solaris have been observed to not append at the end of the file
2533 2534 # if the file was seeked to before the end. See issue4943 for more.
2534 2535 #
2535 2536 # We work around this issue by inserting a seek() before writing.
2536 2537 # Note: This is likely not necessary on Python 3. However, because
2537 2538 # the file handle is reused for reads and may be seeked there, we need
2538 2539 # to be careful before changing this.
2539 2540 if self._writinghandles is None:
2540 2541 msg = b'adding revision outside `revlog._writing` context'
2541 2542 raise error.ProgrammingError(msg)
2542 2543 ifh, dfh = self._writinghandles
2543 2544 if self._docket is None:
2544 2545 ifh.seek(0, os.SEEK_END)
2545 2546 else:
2546 2547 ifh.seek(self._docket.index_end, os.SEEK_SET)
2547 2548 if dfh:
2548 2549 if self._docket is None:
2549 2550 dfh.seek(0, os.SEEK_END)
2550 2551 else:
2551 2552 dfh.seek(self._docket.data_end, os.SEEK_SET)
2552 2553
2553 2554 curr = len(self) - 1
2554 2555 if not self._inline:
2555 2556 transaction.add(self._datafile, offset)
2556 2557 transaction.add(self._indexfile, curr * len(entry))
2557 2558 if data[0]:
2558 2559 dfh.write(data[0])
2559 2560 dfh.write(data[1])
2560 2561 if sidedata:
2561 2562 dfh.write(sidedata)
2562 2563 ifh.write(entry)
2563 2564 else:
2564 2565 offset += curr * self.index.entry_size
2565 2566 transaction.add(self._indexfile, offset)
2566 2567 ifh.write(entry)
2567 2568 ifh.write(data[0])
2568 2569 ifh.write(data[1])
2569 2570 if sidedata:
2570 2571 ifh.write(sidedata)
2571 2572 self._enforceinlinesize(transaction)
2572 2573 if self._docket is not None:
2573 2574 self._docket.index_end = self._writinghandles[0].tell()
2574 2575 self._docket.data_end = self._writinghandles[1].tell()
2575 2576
2576 2577 nodemaputil.setup_persistent_nodemap(transaction, self)
2577 2578
2578 2579 def addgroup(
2579 2580 self,
2580 2581 deltas,
2581 2582 linkmapper,
2582 2583 transaction,
2583 2584 alwayscache=False,
2584 2585 addrevisioncb=None,
2585 2586 duplicaterevisioncb=None,
2586 2587 ):
2587 2588 """
2588 2589 add a delta group
2589 2590
2590 2591 given a set of deltas, add them to the revision log. the
2591 2592 first delta is against its parent, which should be in our
2592 2593 log, the rest are against the previous delta.
2593 2594
2594 2595 If ``addrevisioncb`` is defined, it will be called with arguments of
2595 2596 this revlog and the node that was added.
2596 2597 """
2597 2598
2598 2599 if self._adding_group:
2599 2600 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2600 2601
2601 2602 self._adding_group = True
2602 2603 empty = True
2603 2604 try:
2604 2605 with self._writing(transaction):
2605 2606 deltacomputer = deltautil.deltacomputer(self)
2606 2607 # loop through our set of deltas
2607 2608 for data in deltas:
2608 2609 (
2609 2610 node,
2610 2611 p1,
2611 2612 p2,
2612 2613 linknode,
2613 2614 deltabase,
2614 2615 delta,
2615 2616 flags,
2616 2617 sidedata,
2617 2618 ) = data
2618 2619 link = linkmapper(linknode)
2619 2620 flags = flags or REVIDX_DEFAULT_FLAGS
2620 2621
2621 2622 rev = self.index.get_rev(node)
2622 2623 if rev is not None:
2623 2624 # this can happen if two branches make the same change
2624 2625 self._nodeduplicatecallback(transaction, rev)
2625 2626 if duplicaterevisioncb:
2626 2627 duplicaterevisioncb(self, rev)
2627 2628 empty = False
2628 2629 continue
2629 2630
2630 2631 for p in (p1, p2):
2631 2632 if not self.index.has_node(p):
2632 2633 raise error.LookupError(
2633 2634 p, self.radix, _(b'unknown parent')
2634 2635 )
2635 2636
2636 2637 if not self.index.has_node(deltabase):
2637 2638 raise error.LookupError(
2638 2639 deltabase, self.display_id, _(b'unknown delta base')
2639 2640 )
2640 2641
2641 2642 baserev = self.rev(deltabase)
2642 2643
2643 2644 if baserev != nullrev and self.iscensored(baserev):
2644 2645 # if base is censored, delta must be full replacement in a
2645 2646 # single patch operation
2646 2647 hlen = struct.calcsize(b">lll")
2647 2648 oldlen = self.rawsize(baserev)
2648 2649 newlen = len(delta) - hlen
2649 2650 if delta[:hlen] != mdiff.replacediffheader(
2650 2651 oldlen, newlen
2651 2652 ):
2652 2653 raise error.CensoredBaseError(
2653 2654 self.display_id, self.node(baserev)
2654 2655 )
2655 2656
2656 2657 if not flags and self._peek_iscensored(baserev, delta):
2657 2658 flags |= REVIDX_ISCENSORED
2658 2659
2659 2660 # We assume consumers of addrevisioncb will want to retrieve
2660 2661 # the added revision, which will require a call to
2661 2662 # revision(). revision() will fast path if there is a cache
2662 2663 # hit. So, we tell _addrevision() to always cache in this case.
2663 2664 # We're only using addgroup() in the context of changegroup
2664 2665 # generation so the revision data can always be handled as raw
2665 2666 # by the flagprocessor.
2666 2667 rev = self._addrevision(
2667 2668 node,
2668 2669 None,
2669 2670 transaction,
2670 2671 link,
2671 2672 p1,
2672 2673 p2,
2673 2674 flags,
2674 2675 (baserev, delta),
2675 2676 alwayscache=alwayscache,
2676 2677 deltacomputer=deltacomputer,
2677 2678 sidedata=sidedata,
2678 2679 )
2679 2680
2680 2681 if addrevisioncb:
2681 2682 addrevisioncb(self, rev)
2682 2683 empty = False
2683 2684 finally:
2684 2685 self._adding_group = False
2685 2686 return not empty
2686 2687
2687 2688 def iscensored(self, rev):
2688 2689 """Check if a file revision is censored."""
2689 2690 if not self._censorable:
2690 2691 return False
2691 2692
2692 2693 return self.flags(rev) & REVIDX_ISCENSORED
2693 2694
2694 2695 def _peek_iscensored(self, baserev, delta):
2695 2696 """Quickly check if a delta produces a censored revision."""
2696 2697 if not self._censorable:
2697 2698 return False
2698 2699
2699 2700 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2700 2701
2701 2702 def getstrippoint(self, minlink):
2702 2703 """find the minimum rev that must be stripped to strip the linkrev
2703 2704
2704 2705 Returns a tuple containing the minimum rev and a set of all revs that
2705 2706 have linkrevs that will be broken by this strip.
2706 2707 """
2707 2708 return storageutil.resolvestripinfo(
2708 2709 minlink,
2709 2710 len(self) - 1,
2710 2711 self.headrevs(),
2711 2712 self.linkrev,
2712 2713 self.parentrevs,
2713 2714 )
2714 2715
2715 2716 def strip(self, minlink, transaction):
2716 2717 """truncate the revlog on the first revision with a linkrev >= minlink
2717 2718
2718 2719 This function is called when we're stripping revision minlink and
2719 2720 its descendants from the repository.
2720 2721
2721 2722 We have to remove all revisions with linkrev >= minlink, because
2722 2723 the equivalent changelog revisions will be renumbered after the
2723 2724 strip.
2724 2725
2725 2726 So we truncate the revlog on the first of these revisions, and
2726 2727 trust that the caller has saved the revisions that shouldn't be
2727 2728 removed and that it'll re-add them after this truncation.
2728 2729 """
2729 2730 if len(self) == 0:
2730 2731 return
2731 2732
2732 2733 rev, _ = self.getstrippoint(minlink)
2733 2734 if rev == len(self):
2734 2735 return
2735 2736
2736 2737 # first truncate the files on disk
2737 2738 data_end = self.start(rev)
2738 2739 if not self._inline:
2739 2740 transaction.add(self._datafile, data_end)
2740 2741 end = rev * self.index.entry_size
2741 2742 else:
2742 2743 end = data_end + (rev * self.index.entry_size)
2743 2744
2744 2745 transaction.add(self._indexfile, end)
2745 2746 if self._docket is not None:
2746 2747 # XXX we could, leverage the docket while stripping. However it is
2747 2748 # not powerfull enough at the time of this comment
2748 2749 self._docket.index_end = end
2749 2750 self._docket.data_end = data_end
2750 2751 self._docket.write(transaction, stripping=True)
2751 2752
2752 2753 # then reset internal state in memory to forget those revisions
2753 2754 self._revisioncache = None
2754 2755 self._chaininfocache = util.lrucachedict(500)
2755 2756 self._chunkclear()
2756 2757
2757 2758 del self.index[rev:-1]
2758 2759
2759 2760 def checksize(self):
2760 2761 """Check size of index and data files
2761 2762
2762 2763 return a (dd, di) tuple.
2763 2764 - dd: extra bytes for the "data" file
2764 2765 - di: extra bytes for the "index" file
2765 2766
2766 2767 A healthy revlog will return (0, 0).
2767 2768 """
2768 2769 expected = 0
2769 2770 if len(self):
2770 2771 expected = max(0, self.end(len(self) - 1))
2771 2772
2772 2773 try:
2773 2774 with self._datafp() as f:
2774 2775 f.seek(0, io.SEEK_END)
2775 2776 actual = f.tell()
2776 2777 dd = actual - expected
2777 2778 except IOError as inst:
2778 2779 if inst.errno != errno.ENOENT:
2779 2780 raise
2780 2781 dd = 0
2781 2782
2782 2783 try:
2783 2784 f = self.opener(self._indexfile)
2784 2785 f.seek(0, io.SEEK_END)
2785 2786 actual = f.tell()
2786 2787 f.close()
2787 2788 s = self.index.entry_size
2788 2789 i = max(0, actual // s)
2789 2790 di = actual - (i * s)
2790 2791 if self._inline:
2791 2792 databytes = 0
2792 2793 for r in self:
2793 2794 databytes += max(0, self.length(r))
2794 2795 dd = 0
2795 2796 di = actual - len(self) * s - databytes
2796 2797 except IOError as inst:
2797 2798 if inst.errno != errno.ENOENT:
2798 2799 raise
2799 2800 di = 0
2800 2801
2801 2802 return (dd, di)
2802 2803
2803 2804 def files(self):
2804 2805 res = [self._indexfile]
2805 2806 if not self._inline:
2806 2807 res.append(self._datafile)
2807 2808 return res
2808 2809
2809 2810 def emitrevisions(
2810 2811 self,
2811 2812 nodes,
2812 2813 nodesorder=None,
2813 2814 revisiondata=False,
2814 2815 assumehaveparentrevisions=False,
2815 2816 deltamode=repository.CG_DELTAMODE_STD,
2816 2817 sidedata_helpers=None,
2817 2818 ):
2818 2819 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2819 2820 raise error.ProgrammingError(
2820 2821 b'unhandled value for nodesorder: %s' % nodesorder
2821 2822 )
2822 2823
2823 2824 if nodesorder is None and not self._generaldelta:
2824 2825 nodesorder = b'storage'
2825 2826
2826 2827 if (
2827 2828 not self._storedeltachains
2828 2829 and deltamode != repository.CG_DELTAMODE_PREV
2829 2830 ):
2830 2831 deltamode = repository.CG_DELTAMODE_FULL
2831 2832
2832 2833 return storageutil.emitrevisions(
2833 2834 self,
2834 2835 nodes,
2835 2836 nodesorder,
2836 2837 revlogrevisiondelta,
2837 2838 deltaparentfn=self.deltaparent,
2838 2839 candeltafn=self.candelta,
2839 2840 rawsizefn=self.rawsize,
2840 2841 revdifffn=self.revdiff,
2841 2842 flagsfn=self.flags,
2842 2843 deltamode=deltamode,
2843 2844 revisiondata=revisiondata,
2844 2845 assumehaveparentrevisions=assumehaveparentrevisions,
2845 2846 sidedata_helpers=sidedata_helpers,
2846 2847 )
2847 2848
2848 2849 DELTAREUSEALWAYS = b'always'
2849 2850 DELTAREUSESAMEREVS = b'samerevs'
2850 2851 DELTAREUSENEVER = b'never'
2851 2852
2852 2853 DELTAREUSEFULLADD = b'fulladd'
2853 2854
2854 2855 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2855 2856
2856 2857 def clone(
2857 2858 self,
2858 2859 tr,
2859 2860 destrevlog,
2860 2861 addrevisioncb=None,
2861 2862 deltareuse=DELTAREUSESAMEREVS,
2862 2863 forcedeltabothparents=None,
2863 2864 sidedata_helpers=None,
2864 2865 ):
2865 2866 """Copy this revlog to another, possibly with format changes.
2866 2867
2867 2868 The destination revlog will contain the same revisions and nodes.
2868 2869 However, it may not be bit-for-bit identical due to e.g. delta encoding
2869 2870 differences.
2870 2871
2871 2872 The ``deltareuse`` argument control how deltas from the existing revlog
2872 2873 are preserved in the destination revlog. The argument can have the
2873 2874 following values:
2874 2875
2875 2876 DELTAREUSEALWAYS
2876 2877 Deltas will always be reused (if possible), even if the destination
2877 2878 revlog would not select the same revisions for the delta. This is the
2878 2879 fastest mode of operation.
2879 2880 DELTAREUSESAMEREVS
2880 2881 Deltas will be reused if the destination revlog would pick the same
2881 2882 revisions for the delta. This mode strikes a balance between speed
2882 2883 and optimization.
2883 2884 DELTAREUSENEVER
2884 2885 Deltas will never be reused. This is the slowest mode of execution.
2885 2886 This mode can be used to recompute deltas (e.g. if the diff/delta
2886 2887 algorithm changes).
2887 2888 DELTAREUSEFULLADD
2888 2889 Revision will be re-added as if their were new content. This is
2889 2890 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2890 2891 eg: large file detection and handling.
2891 2892
2892 2893 Delta computation can be slow, so the choice of delta reuse policy can
2893 2894 significantly affect run time.
2894 2895
2895 2896 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2896 2897 two extremes. Deltas will be reused if they are appropriate. But if the
2897 2898 delta could choose a better revision, it will do so. This means if you
2898 2899 are converting a non-generaldelta revlog to a generaldelta revlog,
2899 2900 deltas will be recomputed if the delta's parent isn't a parent of the
2900 2901 revision.
2901 2902
2902 2903 In addition to the delta policy, the ``forcedeltabothparents``
2903 2904 argument controls whether to force compute deltas against both parents
2904 2905 for merges. By default, the current default is used.
2905 2906
2906 2907 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2907 2908 `sidedata_helpers`.
2908 2909 """
2909 2910 if deltareuse not in self.DELTAREUSEALL:
2910 2911 raise ValueError(
2911 2912 _(b'value for deltareuse invalid: %s') % deltareuse
2912 2913 )
2913 2914
2914 2915 if len(destrevlog):
2915 2916 raise ValueError(_(b'destination revlog is not empty'))
2916 2917
2917 2918 if getattr(self, 'filteredrevs', None):
2918 2919 raise ValueError(_(b'source revlog has filtered revisions'))
2919 2920 if getattr(destrevlog, 'filteredrevs', None):
2920 2921 raise ValueError(_(b'destination revlog has filtered revisions'))
2921 2922
2922 2923 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2923 2924 # if possible.
2924 2925 oldlazydelta = destrevlog._lazydelta
2925 2926 oldlazydeltabase = destrevlog._lazydeltabase
2926 2927 oldamd = destrevlog._deltabothparents
2927 2928
2928 2929 try:
2929 2930 if deltareuse == self.DELTAREUSEALWAYS:
2930 2931 destrevlog._lazydeltabase = True
2931 2932 destrevlog._lazydelta = True
2932 2933 elif deltareuse == self.DELTAREUSESAMEREVS:
2933 2934 destrevlog._lazydeltabase = False
2934 2935 destrevlog._lazydelta = True
2935 2936 elif deltareuse == self.DELTAREUSENEVER:
2936 2937 destrevlog._lazydeltabase = False
2937 2938 destrevlog._lazydelta = False
2938 2939
2939 2940 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2940 2941
2941 2942 self._clone(
2942 2943 tr,
2943 2944 destrevlog,
2944 2945 addrevisioncb,
2945 2946 deltareuse,
2946 2947 forcedeltabothparents,
2947 2948 sidedata_helpers,
2948 2949 )
2949 2950
2950 2951 finally:
2951 2952 destrevlog._lazydelta = oldlazydelta
2952 2953 destrevlog._lazydeltabase = oldlazydeltabase
2953 2954 destrevlog._deltabothparents = oldamd
2954 2955
2955 2956 def _clone(
2956 2957 self,
2957 2958 tr,
2958 2959 destrevlog,
2959 2960 addrevisioncb,
2960 2961 deltareuse,
2961 2962 forcedeltabothparents,
2962 2963 sidedata_helpers,
2963 2964 ):
2964 2965 """perform the core duty of `revlog.clone` after parameter processing"""
2965 2966 deltacomputer = deltautil.deltacomputer(destrevlog)
2966 2967 index = self.index
2967 2968 for rev in self:
2968 2969 entry = index[rev]
2969 2970
2970 2971 # Some classes override linkrev to take filtered revs into
2971 2972 # account. Use raw entry from index.
2972 2973 flags = entry[0] & 0xFFFF
2973 2974 linkrev = entry[4]
2974 2975 p1 = index[entry[5]][7]
2975 2976 p2 = index[entry[6]][7]
2976 2977 node = entry[7]
2977 2978
2978 2979 # (Possibly) reuse the delta from the revlog if allowed and
2979 2980 # the revlog chunk is a delta.
2980 2981 cachedelta = None
2981 2982 rawtext = None
2982 2983 if deltareuse == self.DELTAREUSEFULLADD:
2983 2984 text, sidedata = self._revisiondata(rev)
2984 2985
2985 2986 if sidedata_helpers is not None:
2986 2987 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2987 2988 self, sidedata_helpers, sidedata, rev
2988 2989 )
2989 2990 flags = flags | new_flags[0] & ~new_flags[1]
2990 2991
2991 2992 destrevlog.addrevision(
2992 2993 text,
2993 2994 tr,
2994 2995 linkrev,
2995 2996 p1,
2996 2997 p2,
2997 2998 cachedelta=cachedelta,
2998 2999 node=node,
2999 3000 flags=flags,
3000 3001 deltacomputer=deltacomputer,
3001 3002 sidedata=sidedata,
3002 3003 )
3003 3004 else:
3004 3005 if destrevlog._lazydelta:
3005 3006 dp = self.deltaparent(rev)
3006 3007 if dp != nullrev:
3007 3008 cachedelta = (dp, bytes(self._chunk(rev)))
3008 3009
3009 3010 sidedata = None
3010 3011 if not cachedelta:
3011 3012 rawtext, sidedata = self._revisiondata(rev)
3012 3013 if sidedata is None:
3013 3014 sidedata = self.sidedata(rev)
3014 3015
3015 3016 if sidedata_helpers is not None:
3016 3017 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3017 3018 self, sidedata_helpers, sidedata, rev
3018 3019 )
3019 3020 flags = flags | new_flags[0] & ~new_flags[1]
3020 3021
3021 3022 with destrevlog._writing(tr):
3022 3023 destrevlog._addrevision(
3023 3024 node,
3024 3025 rawtext,
3025 3026 tr,
3026 3027 linkrev,
3027 3028 p1,
3028 3029 p2,
3029 3030 flags,
3030 3031 cachedelta,
3031 3032 deltacomputer=deltacomputer,
3032 3033 sidedata=sidedata,
3033 3034 )
3034 3035
3035 3036 if addrevisioncb:
3036 3037 addrevisioncb(self, rev, node)
3037 3038
3038 3039 def censorrevision(self, tr, censornode, tombstone=b''):
3039 3040 if self._format_version == REVLOGV0:
3040 3041 raise error.RevlogError(
3041 3042 _(b'cannot censor with version %d revlogs')
3042 3043 % self._format_version
3043 3044 )
3044 3045
3045 3046 censorrev = self.rev(censornode)
3046 3047 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3047 3048
3048 3049 if len(tombstone) > self.rawsize(censorrev):
3049 3050 raise error.Abort(
3050 3051 _(b'censor tombstone must be no longer than censored data')
3051 3052 )
3052 3053
3053 3054 # Rewriting the revlog in place is hard. Our strategy for censoring is
3054 3055 # to create a new revlog, copy all revisions to it, then replace the
3055 3056 # revlogs on transaction close.
3056 3057 #
3057 3058 # This is a bit dangerous. We could easily have a mismatch of state.
3058 3059 newrl = revlog(
3059 3060 self.opener,
3060 3061 target=self.target,
3061 3062 radix=self.radix,
3062 3063 postfix=b'tmpcensored',
3063 3064 censorable=True,
3064 3065 )
3065 3066 newrl._format_version = self._format_version
3066 3067 newrl._format_flags = self._format_flags
3067 3068 newrl._generaldelta = self._generaldelta
3068 3069 newrl._parse_index = self._parse_index
3069 3070
3070 3071 for rev in self.revs():
3071 3072 node = self.node(rev)
3072 3073 p1, p2 = self.parents(node)
3073 3074
3074 3075 if rev == censorrev:
3075 3076 newrl.addrawrevision(
3076 3077 tombstone,
3077 3078 tr,
3078 3079 self.linkrev(censorrev),
3079 3080 p1,
3080 3081 p2,
3081 3082 censornode,
3082 3083 REVIDX_ISCENSORED,
3083 3084 )
3084 3085
3085 3086 if newrl.deltaparent(rev) != nullrev:
3086 3087 raise error.Abort(
3087 3088 _(
3088 3089 b'censored revision stored as delta; '
3089 3090 b'cannot censor'
3090 3091 ),
3091 3092 hint=_(
3092 3093 b'censoring of revlogs is not '
3093 3094 b'fully implemented; please report '
3094 3095 b'this bug'
3095 3096 ),
3096 3097 )
3097 3098 continue
3098 3099
3099 3100 if self.iscensored(rev):
3100 3101 if self.deltaparent(rev) != nullrev:
3101 3102 raise error.Abort(
3102 3103 _(
3103 3104 b'cannot censor due to censored '
3104 3105 b'revision having delta stored'
3105 3106 )
3106 3107 )
3107 3108 rawtext = self._chunk(rev)
3108 3109 else:
3109 3110 rawtext = self.rawdata(rev)
3110 3111
3111 3112 newrl.addrawrevision(
3112 3113 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3113 3114 )
3114 3115
3115 3116 tr.addbackup(self._indexfile, location=b'store')
3116 3117 if not self._inline:
3117 3118 tr.addbackup(self._datafile, location=b'store')
3118 3119
3119 3120 self.opener.rename(newrl._indexfile, self._indexfile)
3120 3121 if not self._inline:
3121 3122 self.opener.rename(newrl._datafile, self._datafile)
3122 3123
3123 3124 self.clearcaches()
3124 3125 self._loadindex()
3125 3126
3126 3127 def verifyintegrity(self, state):
3127 3128 """Verifies the integrity of the revlog.
3128 3129
3129 3130 Yields ``revlogproblem`` instances describing problems that are
3130 3131 found.
3131 3132 """
3132 3133 dd, di = self.checksize()
3133 3134 if dd:
3134 3135 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3135 3136 if di:
3136 3137 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3137 3138
3138 3139 version = self._format_version
3139 3140
3140 3141 # The verifier tells us what version revlog we should be.
3141 3142 if version != state[b'expectedversion']:
3142 3143 yield revlogproblem(
3143 3144 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3144 3145 % (self.display_id, version, state[b'expectedversion'])
3145 3146 )
3146 3147
3147 3148 state[b'skipread'] = set()
3148 3149 state[b'safe_renamed'] = set()
3149 3150
3150 3151 for rev in self:
3151 3152 node = self.node(rev)
3152 3153
3153 3154 # Verify contents. 4 cases to care about:
3154 3155 #
3155 3156 # common: the most common case
3156 3157 # rename: with a rename
3157 3158 # meta: file content starts with b'\1\n', the metadata
3158 3159 # header defined in filelog.py, but without a rename
3159 3160 # ext: content stored externally
3160 3161 #
3161 3162 # More formally, their differences are shown below:
3162 3163 #
3163 3164 # | common | rename | meta | ext
3164 3165 # -------------------------------------------------------
3165 3166 # flags() | 0 | 0 | 0 | not 0
3166 3167 # renamed() | False | True | False | ?
3167 3168 # rawtext[0:2]=='\1\n'| False | True | True | ?
3168 3169 #
3169 3170 # "rawtext" means the raw text stored in revlog data, which
3170 3171 # could be retrieved by "rawdata(rev)". "text"
3171 3172 # mentioned below is "revision(rev)".
3172 3173 #
3173 3174 # There are 3 different lengths stored physically:
3174 3175 # 1. L1: rawsize, stored in revlog index
3175 3176 # 2. L2: len(rawtext), stored in revlog data
3176 3177 # 3. L3: len(text), stored in revlog data if flags==0, or
3177 3178 # possibly somewhere else if flags!=0
3178 3179 #
3179 3180 # L1 should be equal to L2. L3 could be different from them.
3180 3181 # "text" may or may not affect commit hash depending on flag
3181 3182 # processors (see flagutil.addflagprocessor).
3182 3183 #
3183 3184 # | common | rename | meta | ext
3184 3185 # -------------------------------------------------
3185 3186 # rawsize() | L1 | L1 | L1 | L1
3186 3187 # size() | L1 | L2-LM | L1(*) | L1 (?)
3187 3188 # len(rawtext) | L2 | L2 | L2 | L2
3188 3189 # len(text) | L2 | L2 | L2 | L3
3189 3190 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3190 3191 #
3191 3192 # LM: length of metadata, depending on rawtext
3192 3193 # (*): not ideal, see comment in filelog.size
3193 3194 # (?): could be "- len(meta)" if the resolved content has
3194 3195 # rename metadata
3195 3196 #
3196 3197 # Checks needed to be done:
3197 3198 # 1. length check: L1 == L2, in all cases.
3198 3199 # 2. hash check: depending on flag processor, we may need to
3199 3200 # use either "text" (external), or "rawtext" (in revlog).
3200 3201
3201 3202 try:
3202 3203 skipflags = state.get(b'skipflags', 0)
3203 3204 if skipflags:
3204 3205 skipflags &= self.flags(rev)
3205 3206
3206 3207 _verify_revision(self, skipflags, state, node)
3207 3208
3208 3209 l1 = self.rawsize(rev)
3209 3210 l2 = len(self.rawdata(node))
3210 3211
3211 3212 if l1 != l2:
3212 3213 yield revlogproblem(
3213 3214 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3214 3215 node=node,
3215 3216 )
3216 3217
3217 3218 except error.CensoredNodeError:
3218 3219 if state[b'erroroncensored']:
3219 3220 yield revlogproblem(
3220 3221 error=_(b'censored file data'), node=node
3221 3222 )
3222 3223 state[b'skipread'].add(node)
3223 3224 except Exception as e:
3224 3225 yield revlogproblem(
3225 3226 error=_(b'unpacking %s: %s')
3226 3227 % (short(node), stringutil.forcebytestr(e)),
3227 3228 node=node,
3228 3229 )
3229 3230 state[b'skipread'].add(node)
3230 3231
3231 3232 def storageinfo(
3232 3233 self,
3233 3234 exclusivefiles=False,
3234 3235 sharedfiles=False,
3235 3236 revisionscount=False,
3236 3237 trackedsize=False,
3237 3238 storedsize=False,
3238 3239 ):
3239 3240 d = {}
3240 3241
3241 3242 if exclusivefiles:
3242 3243 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3243 3244 if not self._inline:
3244 3245 d[b'exclusivefiles'].append((self.opener, self._datafile))
3245 3246
3246 3247 if sharedfiles:
3247 3248 d[b'sharedfiles'] = []
3248 3249
3249 3250 if revisionscount:
3250 3251 d[b'revisionscount'] = len(self)
3251 3252
3252 3253 if trackedsize:
3253 3254 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3254 3255
3255 3256 if storedsize:
3256 3257 d[b'storedsize'] = sum(
3257 3258 self.opener.stat(path).st_size for path in self.files()
3258 3259 )
3259 3260
3260 3261 return d
3261 3262
3262 3263 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3263 3264 if not self.hassidedata:
3264 3265 return
3265 3266 # revlog formats with sidedata support does not support inline
3266 3267 assert not self._inline
3267 3268 if not helpers[1] and not helpers[2]:
3268 3269 # Nothing to generate or remove
3269 3270 return
3270 3271
3271 3272 new_entries = []
3272 3273 # append the new sidedata
3273 3274 with self._writing(transaction):
3274 3275 ifh, dfh = self._writinghandles
3275 3276 if self._docket is not None:
3276 3277 dfh.seek(self._docket.data_end, os.SEEK_SET)
3277 3278 else:
3278 3279 dfh.seek(0, os.SEEK_END)
3279 3280
3280 3281 current_offset = dfh.tell()
3281 3282 for rev in range(startrev, endrev + 1):
3282 3283 entry = self.index[rev]
3283 3284 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3284 3285 store=self,
3285 3286 sidedata_helpers=helpers,
3286 3287 sidedata={},
3287 3288 rev=rev,
3288 3289 )
3289 3290
3290 3291 serialized_sidedata = sidedatautil.serialize_sidedata(
3291 3292 new_sidedata
3292 3293 )
3293 3294 if entry[8] != 0 or entry[9] != 0:
3294 3295 # rewriting entries that already have sidedata is not
3295 3296 # supported yet, because it introduces garbage data in the
3296 3297 # revlog.
3297 3298 msg = b"rewriting existing sidedata is not supported yet"
3298 3299 raise error.Abort(msg)
3299 3300
3300 3301 # Apply (potential) flags to add and to remove after running
3301 3302 # the sidedata helpers
3302 3303 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3303 3304 entry_update = (
3304 3305 current_offset,
3305 3306 len(serialized_sidedata),
3306 3307 new_offset_flags,
3307 3308 )
3308 3309
3309 3310 # the sidedata computation might have move the file cursors around
3310 3311 dfh.seek(current_offset, os.SEEK_SET)
3311 3312 dfh.write(serialized_sidedata)
3312 3313 new_entries.append(entry_update)
3313 3314 current_offset += len(serialized_sidedata)
3314 3315 if self._docket is not None:
3315 3316 self._docket.data_end = dfh.tell()
3316 3317
3317 3318 # rewrite the new index entries
3318 3319 ifh.seek(startrev * self.index.entry_size)
3319 3320 for i, e in enumerate(new_entries):
3320 3321 rev = startrev + i
3321 3322 self.index.replace_sidedata_info(rev, *e)
3322 3323 packed = self.index.entry_binary(rev)
3323 3324 if rev == 0 and self._docket is None:
3324 3325 header = self._format_flags | self._format_version
3325 3326 header = self.index.pack_header(header)
3326 3327 packed = header + packed
3327 3328 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now