##// END OF EJS Templates
revlog: update data file record before index rename...
Joerg Sonnenberger -
r48060:d92310d4 default draft
parent child Browse files
Show More
@@ -1,3454 +1,3461 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 FEATURES_BY_VERSION,
43 43 FLAG_GENERALDELTA,
44 44 FLAG_INLINE_DATA,
45 45 INDEX_HEADER,
46 46 KIND_CHANGELOG,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 SUPPORTED_FLAGS,
56 56 )
57 57 from .revlogutils.flagutil import (
58 58 REVIDX_DEFAULT_FLAGS,
59 59 REVIDX_ELLIPSIS,
60 60 REVIDX_EXTSTORED,
61 61 REVIDX_FLAGS_ORDER,
62 62 REVIDX_HASCOPIESINFO,
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 65 )
66 66 from .thirdparty import attr
67 67 from . import (
68 68 ancestor,
69 69 dagop,
70 70 error,
71 71 mdiff,
72 72 policy,
73 73 pycompat,
74 74 templatefilters,
75 75 util,
76 76 )
77 77 from .interfaces import (
78 78 repository,
79 79 util as interfaceutil,
80 80 )
81 81 from .revlogutils import (
82 82 deltas as deltautil,
83 83 docket as docketutil,
84 84 flagutil,
85 85 nodemap as nodemaputil,
86 86 revlogv0,
87 87 sidedata as sidedatautil,
88 88 )
89 89 from .utils import (
90 90 storageutil,
91 91 stringutil,
92 92 )
93 93
94 94 # blanked usage of all the name to prevent pyflakes constraints
95 95 # We need these name available in the module for extensions.
96 96
97 97 REVLOGV0
98 98 REVLOGV1
99 99 REVLOGV2
100 100 FLAG_INLINE_DATA
101 101 FLAG_GENERALDELTA
102 102 REVLOG_DEFAULT_FLAGS
103 103 REVLOG_DEFAULT_FORMAT
104 104 REVLOG_DEFAULT_VERSION
105 105 REVLOGV1_FLAGS
106 106 REVLOGV2_FLAGS
107 107 REVIDX_ISCENSORED
108 108 REVIDX_ELLIPSIS
109 109 REVIDX_HASCOPIESINFO
110 110 REVIDX_EXTSTORED
111 111 REVIDX_DEFAULT_FLAGS
112 112 REVIDX_FLAGS_ORDER
113 113 REVIDX_RAWTEXT_CHANGING_FLAGS
114 114
115 115 parsers = policy.importmod('parsers')
116 116 rustancestor = policy.importrust('ancestor')
117 117 rustdagop = policy.importrust('dagop')
118 118 rustrevlog = policy.importrust('revlog')
119 119
120 120 # Aliased for performance.
121 121 _zlibdecompress = zlib.decompress
122 122
123 123 # max size of revlog with inline data
124 124 _maxinline = 131072
125 125 _chunksize = 1048576
126 126
127 127 # Flag processors for REVIDX_ELLIPSIS.
128 128 def ellipsisreadprocessor(rl, text):
129 129 return text, False
130 130
131 131
132 132 def ellipsiswriteprocessor(rl, text):
133 133 return text, False
134 134
135 135
136 136 def ellipsisrawprocessor(rl, text):
137 137 return False
138 138
139 139
140 140 ellipsisprocessor = (
141 141 ellipsisreadprocessor,
142 142 ellipsiswriteprocessor,
143 143 ellipsisrawprocessor,
144 144 )
145 145
146 146
147 147 def offset_type(offset, type):
148 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 149 raise ValueError(b'unknown revlog index flags')
150 150 return int(int(offset) << 16 | type)
151 151
152 152
153 153 def _verify_revision(rl, skipflags, state, node):
154 154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 155 point for extensions to influence the operation."""
156 156 if skipflags:
157 157 state[b'skipread'].add(node)
158 158 else:
159 159 # Side-effect: read content and verify hash.
160 160 rl.revision(node)
161 161
162 162
163 163 # True if a fast implementation for persistent-nodemap is available
164 164 #
165 165 # We also consider we have a "fast" implementation in "pure" python because
166 166 # people using pure don't really have performance consideration (and a
167 167 # wheelbarrow of other slowness source)
168 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 169 parsers, 'BaseIndexObject'
170 170 )
171 171
172 172
173 173 @attr.s(slots=True, frozen=True)
174 174 class _revisioninfo(object):
175 175 """Information about a revision that allows building its fulltext
176 176 node: expected hash of the revision
177 177 p1, p2: parent revs of the revision
178 178 btext: built text cache consisting of a one-element list
179 179 cachedelta: (baserev, uncompressed_delta) or None
180 180 flags: flags associated to the revision storage
181 181
182 182 One of btext[0] or cachedelta must be set.
183 183 """
184 184
185 185 node = attr.ib()
186 186 p1 = attr.ib()
187 187 p2 = attr.ib()
188 188 btext = attr.ib()
189 189 textlen = attr.ib()
190 190 cachedelta = attr.ib()
191 191 flags = attr.ib()
192 192
193 193
194 194 @interfaceutil.implementer(repository.irevisiondelta)
195 195 @attr.s(slots=True)
196 196 class revlogrevisiondelta(object):
197 197 node = attr.ib()
198 198 p1node = attr.ib()
199 199 p2node = attr.ib()
200 200 basenode = attr.ib()
201 201 flags = attr.ib()
202 202 baserevisionsize = attr.ib()
203 203 revision = attr.ib()
204 204 delta = attr.ib()
205 205 sidedata = attr.ib()
206 206 protocol_flags = attr.ib()
207 207 linknode = attr.ib(default=None)
208 208
209 209
210 210 @interfaceutil.implementer(repository.iverifyproblem)
211 211 @attr.s(frozen=True)
212 212 class revlogproblem(object):
213 213 warning = attr.ib(default=None)
214 214 error = attr.ib(default=None)
215 215 node = attr.ib(default=None)
216 216
217 217
218 218 def parse_index_v1(data, inline):
219 219 # call the C implementation to parse the index data
220 220 index, cache = parsers.parse_index2(data, inline)
221 221 return index, cache
222 222
223 223
224 224 def parse_index_v2(data, inline):
225 225 # call the C implementation to parse the index data
226 226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 227 return index, cache
228 228
229 229
230 230 def parse_index_cl_v2(data, inline):
231 231 # call the C implementation to parse the index data
232 232 assert not inline
233 233 from .pure.parsers import parse_index_cl_v2
234 234
235 235 index, cache = parse_index_cl_v2(data)
236 236 return index, cache
237 237
238 238
239 239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240 240
241 241 def parse_index_v1_nodemap(data, inline):
242 242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 243 return index, cache
244 244
245 245
246 246 else:
247 247 parse_index_v1_nodemap = None
248 248
249 249
250 250 def parse_index_v1_mixed(data, inline):
251 251 index, cache = parse_index_v1(data, inline)
252 252 return rustrevlog.MixedIndex(index), cache
253 253
254 254
255 255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 256 # signed integer)
257 257 _maxentrysize = 0x7FFFFFFF
258 258
259 259
260 260 class revlog(object):
261 261 """
262 262 the underlying revision storage object
263 263
264 264 A revlog consists of two parts, an index and the revision data.
265 265
266 266 The index is a file with a fixed record size containing
267 267 information on each revision, including its nodeid (hash), the
268 268 nodeids of its parents, the position and offset of its data within
269 269 the data file, and the revision it's based on. Finally, each entry
270 270 contains a linkrev entry that can serve as a pointer to external
271 271 data.
272 272
273 273 The revision data itself is a linear collection of data chunks.
274 274 Each chunk represents a revision and is usually represented as a
275 275 delta against the previous chunk. To bound lookup time, runs of
276 276 deltas are limited to about 2 times the length of the original
277 277 version data. This makes retrieval of a version proportional to
278 278 its size, or O(1) relative to the number of revisions.
279 279
280 280 Both pieces of the revlog are written to in an append-only
281 281 fashion, which means we never need to rewrite a file to insert or
282 282 remove data, and can use some simple techniques to avoid the need
283 283 for locking while reading.
284 284
285 285 If checkambig, indexfile is opened with checkambig=True at
286 286 writing, to avoid file stat ambiguity.
287 287
288 288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 289 index will be mmapped rather than read if it is larger than the
290 290 configured threshold.
291 291
292 292 If censorable is True, the revlog can have censored revisions.
293 293
294 294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 295 compression for the data content.
296 296
297 297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 298 file handle, a filename, and an expected position. It should check whether
299 299 the current position in the file handle is valid, and log/warn/fail (by
300 300 raising).
301 301
302 302
303 303 Internal details
304 304 ----------------
305 305
306 306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 307 objects that contains the same "items" whatever the revlog version.
308 308 Different versions will have different ways of storing these items (sometimes
309 309 not having them at all), but the tuple will always be the same. New fields
310 310 are usually added at the end to avoid breaking existing code that relies
311 311 on the existing order. The field are defined as follows:
312 312
313 313 [0] offset:
314 314 The byte index of the start of revision data chunk.
315 315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 316 retrieve it.
317 317
318 318 flags:
319 319 A flag field that carries special information or changes the behavior
320 320 of the revision. (see `REVIDX_*` constants for details)
321 321 The flag field only occupies the first 16 bits of this field,
322 322 use "flags = field & 0xFFFF" to retrieve the value.
323 323
324 324 [1] compressed length:
325 325 The size, in bytes, of the chunk on disk
326 326
327 327 [2] uncompressed length:
328 328 The size, in bytes, of the full revision once reconstructed.
329 329
330 330 [3] base rev:
331 331 Either the base of the revision delta chain (without general
332 332 delta), or the base of the delta (stored in the data chunk)
333 333 with general delta.
334 334
335 335 [4] link rev:
336 336 Changelog revision number of the changeset introducing this
337 337 revision.
338 338
339 339 [5] parent 1 rev:
340 340 Revision number of the first parent
341 341
342 342 [6] parent 2 rev:
343 343 Revision number of the second parent
344 344
345 345 [7] node id:
346 346 The node id of the current revision
347 347
348 348 [8] sidedata offset:
349 349 The byte index of the start of the revision's side-data chunk.
350 350
351 351 [9] sidedata chunk length:
352 352 The size, in bytes, of the revision's side-data chunk.
353 353
354 354 [10] data compression mode:
355 355 two bits that detail the way the data chunk is compressed on disk.
356 356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 357 1 this will always be COMP_MODE_INLINE.
358 358
359 359 [11] side-data compression mode:
360 360 two bits that detail the way the sidedata chunk is compressed on disk.
361 361 (see "COMP_MODE_*" constants for details)
362 362 """
363 363
364 364 _flagserrorclass = error.RevlogError
365 365
366 366 def __init__(
367 367 self,
368 368 opener,
369 369 target,
370 370 radix,
371 371 postfix=None, # only exist for `tmpcensored` now
372 372 checkambig=False,
373 373 mmaplargeindex=False,
374 374 censorable=False,
375 375 upperboundcomp=None,
376 376 persistentnodemap=False,
377 377 concurrencychecker=None,
378 378 trypending=False,
379 379 ):
380 380 """
381 381 create a revlog object
382 382
383 383 opener is a function that abstracts the file opening operation
384 384 and can be used to implement COW semantics or the like.
385 385
386 386 `target`: a (KIND, ID) tuple that identify the content stored in
387 387 this revlog. It help the rest of the code to understand what the revlog
388 388 is about without having to resort to heuristic and index filename
389 389 analysis. Note: that this must be reliably be set by normal code, but
390 390 that test, debug, or performance measurement code might not set this to
391 391 accurate value.
392 392 """
393 393 self.upperboundcomp = upperboundcomp
394 394
395 395 self.radix = radix
396 396
397 397 self._docket_file = None
398 398 self._indexfile = None
399 399 self._datafile = None
400 400 self._nodemap_file = None
401 401 self.postfix = postfix
402 402 self._trypending = trypending
403 403 self.opener = opener
404 404 if persistentnodemap:
405 405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406 406
407 407 assert target[0] in ALL_KINDS
408 408 assert len(target) == 2
409 409 self.target = target
410 410 # When True, indexfile is opened with checkambig=True at writing, to
411 411 # avoid file stat ambiguity.
412 412 self._checkambig = checkambig
413 413 self._mmaplargeindex = mmaplargeindex
414 414 self._censorable = censorable
415 415 # 3-tuple of (node, rev, text) for a raw revision.
416 416 self._revisioncache = None
417 417 # Maps rev to chain base rev.
418 418 self._chainbasecache = util.lrucachedict(100)
419 419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 420 self._chunkcache = (0, b'')
421 421 # How much data to read and cache into the raw revlog data cache.
422 422 self._chunkcachesize = 65536
423 423 self._maxchainlen = None
424 424 self._deltabothparents = True
425 425 self.index = None
426 426 self._docket = None
427 427 self._nodemap_docket = None
428 428 # Mapping of partial identifiers to full nodes.
429 429 self._pcache = {}
430 430 # Mapping of revision integer to full node.
431 431 self._compengine = b'zlib'
432 432 self._compengineopts = {}
433 433 self._maxdeltachainspan = -1
434 434 self._withsparseread = False
435 435 self._sparserevlog = False
436 436 self.hassidedata = False
437 437 self._srdensitythreshold = 0.50
438 438 self._srmingapsize = 262144
439 439
440 440 # Make copy of flag processors so each revlog instance can support
441 441 # custom flags.
442 442 self._flagprocessors = dict(flagutil.flagprocessors)
443 443
444 444 # 2-tuple of file handles being used for active writing.
445 445 self._writinghandles = None
446 446 # prevent nesting of addgroup
447 447 self._adding_group = None
448 448
449 449 self._loadindex()
450 450
451 451 self._concurrencychecker = concurrencychecker
452 452
453 453 def _init_opts(self):
454 454 """process options (from above/config) to setup associated default revlog mode
455 455
456 456 These values might be affected when actually reading on disk information.
457 457
458 458 The relevant values are returned for use in _loadindex().
459 459
460 460 * newversionflags:
461 461 version header to use if we need to create a new revlog
462 462
463 463 * mmapindexthreshold:
464 464 minimal index size for start to use mmap
465 465
466 466 * force_nodemap:
467 467 force the usage of a "development" version of the nodemap code
468 468 """
469 469 mmapindexthreshold = None
470 470 opts = self.opener.options
471 471
472 472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 473 new_header = CHANGELOGV2
474 474 elif b'revlogv2' in opts:
475 475 new_header = REVLOGV2
476 476 elif b'revlogv1' in opts:
477 477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 478 if b'generaldelta' in opts:
479 479 new_header |= FLAG_GENERALDELTA
480 480 elif b'revlogv0' in self.opener.options:
481 481 new_header = REVLOGV0
482 482 else:
483 483 new_header = REVLOG_DEFAULT_VERSION
484 484
485 485 if b'chunkcachesize' in opts:
486 486 self._chunkcachesize = opts[b'chunkcachesize']
487 487 if b'maxchainlen' in opts:
488 488 self._maxchainlen = opts[b'maxchainlen']
489 489 if b'deltabothparents' in opts:
490 490 self._deltabothparents = opts[b'deltabothparents']
491 491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 492 self._lazydeltabase = False
493 493 if self._lazydelta:
494 494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 495 if b'compengine' in opts:
496 496 self._compengine = opts[b'compengine']
497 497 if b'zlib.level' in opts:
498 498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 499 if b'zstd.level' in opts:
500 500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 501 if b'maxdeltachainspan' in opts:
502 502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 507 # sparse-revlog forces sparse-read
508 508 self._withsparseread = self._sparserevlog or withsparseread
509 509 if b'sparse-read-density-threshold' in opts:
510 510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 511 if b'sparse-read-min-gap-size' in opts:
512 512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 513 if opts.get(b'enableellipsis'):
514 514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515 515
516 516 # revlog v0 doesn't have flag processors
517 517 for flag, processor in pycompat.iteritems(
518 518 opts.get(b'flagprocessors', {})
519 519 ):
520 520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521 521
522 522 if self._chunkcachesize <= 0:
523 523 raise error.RevlogError(
524 524 _(b'revlog chunk cache size %r is not greater than 0')
525 525 % self._chunkcachesize
526 526 )
527 527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 528 raise error.RevlogError(
529 529 _(b'revlog chunk cache size %r is not a power of 2')
530 530 % self._chunkcachesize
531 531 )
532 532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 533 return new_header, mmapindexthreshold, force_nodemap
534 534
535 535 def _get_data(self, filepath, mmap_threshold, size=None):
536 536 """return a file content with or without mmap
537 537
538 538 If the file is missing return the empty string"""
539 539 try:
540 540 with self.opener(filepath) as fp:
541 541 if mmap_threshold is not None:
542 542 file_size = self.opener.fstat(fp).st_size
543 543 if file_size >= mmap_threshold:
544 544 if size is not None:
545 545 # avoid potentiel mmap crash
546 546 size = min(file_size, size)
547 547 # TODO: should .close() to release resources without
548 548 # relying on Python GC
549 549 if size is None:
550 550 return util.buffer(util.mmapread(fp))
551 551 else:
552 552 return util.buffer(util.mmapread(fp, size))
553 553 if size is None:
554 554 return fp.read()
555 555 else:
556 556 return fp.read(size)
557 557 except IOError as inst:
558 558 if inst.errno != errno.ENOENT:
559 559 raise
560 560 return b''
561 561
562 562 def _loadindex(self):
563 563
564 564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565 565
566 566 if self.postfix is not None:
567 567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 569 entry_point = b'%s.i.a' % self.radix
570 570 else:
571 571 entry_point = b'%s.i' % self.radix
572 572
573 573 entry_data = b''
574 574 self._initempty = True
575 575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 576 if len(entry_data) > 0:
577 577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 578 self._initempty = False
579 579 else:
580 580 header = new_header
581 581
582 582 self._format_flags = header & ~0xFFFF
583 583 self._format_version = header & 0xFFFF
584 584
585 585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 586 if supported_flags is None:
587 587 msg = _(b'unknown version (%d) in revlog %s')
588 588 msg %= (self._format_version, self.display_id)
589 589 raise error.RevlogError(msg)
590 590 elif self._format_flags & ~supported_flags:
591 591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 display_flag = self._format_flags >> 16
593 593 msg %= (display_flag, self._format_version, self.display_id)
594 594 raise error.RevlogError(msg)
595 595
596 596 features = FEATURES_BY_VERSION[self._format_version]
597 597 self._inline = features[b'inline'](self._format_flags)
598 598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 599 self.hassidedata = features[b'sidedata']
600 600
601 601 if not features[b'docket']:
602 602 self._indexfile = entry_point
603 603 index_data = entry_data
604 604 else:
605 605 self._docket_file = entry_point
606 606 if self._initempty:
607 607 self._docket = docketutil.default_docket(self, header)
608 608 else:
609 609 self._docket = docketutil.parse_docket(
610 610 self, entry_data, use_pending=self._trypending
611 611 )
612 612 self._indexfile = self._docket.index_filepath()
613 613 index_data = b''
614 614 index_size = self._docket.index_end
615 615 if index_size > 0:
616 616 index_data = self._get_data(
617 617 self._indexfile, mmapindexthreshold, size=index_size
618 618 )
619 619 if len(index_data) < index_size:
620 620 msg = _(b'too few index data for %s: got %d, expected %d')
621 621 msg %= (self.display_id, len(index_data), index_size)
622 622 raise error.RevlogError(msg)
623 623
624 624 self._inline = False
625 625 # generaldelta implied by version 2 revlogs.
626 626 self._generaldelta = True
627 627 # the logic for persistent nodemap will be dealt with within the
628 628 # main docket, so disable it for now.
629 629 self._nodemap_file = None
630 630
631 631 if self.postfix is None:
632 632 self._datafile = b'%s.d' % self.radix
633 633 else:
634 634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635 635
636 636 self.nodeconstants = sha1nodeconstants
637 637 self.nullid = self.nodeconstants.nullid
638 638
639 639 # sparse-revlog can't be on without general-delta (issue6056)
640 640 if not self._generaldelta:
641 641 self._sparserevlog = False
642 642
643 643 self._storedeltachains = True
644 644
645 645 devel_nodemap = (
646 646 self._nodemap_file
647 647 and force_nodemap
648 648 and parse_index_v1_nodemap is not None
649 649 )
650 650
651 651 use_rust_index = False
652 652 if rustrevlog is not None:
653 653 if self._nodemap_file is not None:
654 654 use_rust_index = True
655 655 else:
656 656 use_rust_index = self.opener.options.get(b'rust.index')
657 657
658 658 self._parse_index = parse_index_v1
659 659 if self._format_version == REVLOGV0:
660 660 self._parse_index = revlogv0.parse_index_v0
661 661 elif self._format_version == REVLOGV2:
662 662 self._parse_index = parse_index_v2
663 663 elif self._format_version == CHANGELOGV2:
664 664 self._parse_index = parse_index_cl_v2
665 665 elif devel_nodemap:
666 666 self._parse_index = parse_index_v1_nodemap
667 667 elif use_rust_index:
668 668 self._parse_index = parse_index_v1_mixed
669 669 try:
670 670 d = self._parse_index(index_data, self._inline)
671 671 index, _chunkcache = d
672 672 use_nodemap = (
673 673 not self._inline
674 674 and self._nodemap_file is not None
675 675 and util.safehasattr(index, 'update_nodemap_data')
676 676 )
677 677 if use_nodemap:
678 678 nodemap_data = nodemaputil.persisted_data(self)
679 679 if nodemap_data is not None:
680 680 docket = nodemap_data[0]
681 681 if (
682 682 len(d[0]) > docket.tip_rev
683 683 and d[0][docket.tip_rev][7] == docket.tip_node
684 684 ):
685 685 # no changelog tampering
686 686 self._nodemap_docket = docket
687 687 index.update_nodemap_data(*nodemap_data)
688 688 except (ValueError, IndexError):
689 689 raise error.RevlogError(
690 690 _(b"index %s is corrupted") % self.display_id
691 691 )
692 692 self.index, self._chunkcache = d
693 693 if not self._chunkcache:
694 694 self._chunkclear()
695 695 # revnum -> (chain-length, sum-delta-length)
696 696 self._chaininfocache = util.lrucachedict(500)
697 697 # revlog header -> revlog compressor
698 698 self._decompressors = {}
699 699
700 700 @util.propertycache
701 701 def revlog_kind(self):
702 702 return self.target[0]
703 703
704 704 @util.propertycache
705 705 def display_id(self):
706 706 """The public facing "ID" of the revlog that we use in message"""
707 707 # Maybe we should build a user facing representation of
708 708 # revlog.target instead of using `self.radix`
709 709 return self.radix
710 710
711 711 def _get_decompressor(self, t):
712 712 try:
713 713 compressor = self._decompressors[t]
714 714 except KeyError:
715 715 try:
716 716 engine = util.compengines.forrevlogheader(t)
717 717 compressor = engine.revlogcompressor(self._compengineopts)
718 718 self._decompressors[t] = compressor
719 719 except KeyError:
720 720 raise error.RevlogError(
721 721 _(b'unknown compression type %s') % binascii.hexlify(t)
722 722 )
723 723 return compressor
724 724
725 725 @util.propertycache
726 726 def _compressor(self):
727 727 engine = util.compengines[self._compengine]
728 728 return engine.revlogcompressor(self._compengineopts)
729 729
730 730 @util.propertycache
731 731 def _decompressor(self):
732 732 """the default decompressor"""
733 733 if self._docket is None:
734 734 return None
735 735 t = self._docket.default_compression_header
736 736 c = self._get_decompressor(t)
737 737 return c.decompress
738 738
739 739 def _indexfp(self):
740 740 """file object for the revlog's index file"""
741 741 return self.opener(self._indexfile, mode=b"r")
742 742
743 743 def __index_write_fp(self):
744 744 # You should not use this directly and use `_writing` instead
745 745 try:
746 746 f = self.opener(
747 747 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 748 )
749 749 if self._docket is None:
750 750 f.seek(0, os.SEEK_END)
751 751 else:
752 752 f.seek(self._docket.index_end, os.SEEK_SET)
753 753 return f
754 754 except IOError as inst:
755 755 if inst.errno != errno.ENOENT:
756 756 raise
757 757 return self.opener(
758 758 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 759 )
760 760
761 761 def __index_new_fp(self):
762 762 # You should not use this unless you are upgrading from inline revlog
763 763 return self.opener(
764 764 self._indexfile,
765 765 mode=b"w",
766 766 checkambig=self._checkambig,
767 767 atomictemp=True,
768 768 )
769 769
770 770 def _datafp(self, mode=b'r'):
771 771 """file object for the revlog's data file"""
772 772 return self.opener(self._datafile, mode=mode)
773 773
774 774 @contextlib.contextmanager
775 775 def _datareadfp(self, existingfp=None):
776 776 """file object suitable to read data"""
777 777 # Use explicit file handle, if given.
778 778 if existingfp is not None:
779 779 yield existingfp
780 780
781 781 # Use a file handle being actively used for writes, if available.
782 782 # There is some danger to doing this because reads will seek the
783 783 # file. However, _writeentry() performs a SEEK_END before all writes,
784 784 # so we should be safe.
785 785 elif self._writinghandles:
786 786 if self._inline:
787 787 yield self._writinghandles[0]
788 788 else:
789 789 yield self._writinghandles[1]
790 790
791 791 # Otherwise open a new file handle.
792 792 else:
793 793 if self._inline:
794 794 func = self._indexfp
795 795 else:
796 796 func = self._datafp
797 797 with func() as fp:
798 798 yield fp
799 799
800 800 def tiprev(self):
801 801 return len(self.index) - 1
802 802
803 803 def tip(self):
804 804 return self.node(self.tiprev())
805 805
806 806 def __contains__(self, rev):
807 807 return 0 <= rev < len(self)
808 808
809 809 def __len__(self):
810 810 return len(self.index)
811 811
812 812 def __iter__(self):
813 813 return iter(pycompat.xrange(len(self)))
814 814
815 815 def revs(self, start=0, stop=None):
816 816 """iterate over all rev in this revlog (from start to stop)"""
817 817 return storageutil.iterrevs(len(self), start=start, stop=stop)
818 818
819 819 @property
820 820 def nodemap(self):
821 821 msg = (
822 822 b"revlog.nodemap is deprecated, "
823 823 b"use revlog.index.[has_node|rev|get_rev]"
824 824 )
825 825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 826 return self.index.nodemap
827 827
828 828 @property
829 829 def _nodecache(self):
830 830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 832 return self.index.nodemap
833 833
834 834 def hasnode(self, node):
835 835 try:
836 836 self.rev(node)
837 837 return True
838 838 except KeyError:
839 839 return False
840 840
841 841 def candelta(self, baserev, rev):
842 842 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 843 # Disable delta if either rev requires a content-changing flag
844 844 # processor (ex. LFS). This is because such flag processor can alter
845 845 # the rawtext content that the delta will be based on, and two clients
846 846 # could have a same revlog node with different flags (i.e. different
847 847 # rawtext contents) and the delta could be incompatible.
848 848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 850 ):
851 851 return False
852 852 return True
853 853
854 854 def update_caches(self, transaction):
855 855 if self._nodemap_file is not None:
856 856 if transaction is None:
857 857 nodemaputil.update_persistent_nodemap(self)
858 858 else:
859 859 nodemaputil.setup_persistent_nodemap(transaction, self)
860 860
861 861 def clearcaches(self):
862 862 self._revisioncache = None
863 863 self._chainbasecache.clear()
864 864 self._chunkcache = (0, b'')
865 865 self._pcache = {}
866 866 self._nodemap_docket = None
867 867 self.index.clearcaches()
868 868 # The python code is the one responsible for validating the docket, we
869 869 # end up having to refresh it here.
870 870 use_nodemap = (
871 871 not self._inline
872 872 and self._nodemap_file is not None
873 873 and util.safehasattr(self.index, 'update_nodemap_data')
874 874 )
875 875 if use_nodemap:
876 876 nodemap_data = nodemaputil.persisted_data(self)
877 877 if nodemap_data is not None:
878 878 self._nodemap_docket = nodemap_data[0]
879 879 self.index.update_nodemap_data(*nodemap_data)
880 880
881 881 def rev(self, node):
882 882 try:
883 883 return self.index.rev(node)
884 884 except TypeError:
885 885 raise
886 886 except error.RevlogError:
887 887 # parsers.c radix tree lookup failed
888 888 if (
889 889 node == self.nodeconstants.wdirid
890 890 or node in self.nodeconstants.wdirfilenodeids
891 891 ):
892 892 raise error.WdirUnsupported
893 893 raise error.LookupError(node, self.display_id, _(b'no node'))
894 894
895 895 # Accessors for index entries.
896 896
897 897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 898 # are flags.
899 899 def start(self, rev):
900 900 return int(self.index[rev][0] >> 16)
901 901
902 902 def flags(self, rev):
903 903 return self.index[rev][0] & 0xFFFF
904 904
905 905 def length(self, rev):
906 906 return self.index[rev][1]
907 907
908 908 def sidedata_length(self, rev):
909 909 if not self.hassidedata:
910 910 return 0
911 911 return self.index[rev][9]
912 912
913 913 def rawsize(self, rev):
914 914 """return the length of the uncompressed text for a given revision"""
915 915 l = self.index[rev][2]
916 916 if l >= 0:
917 917 return l
918 918
919 919 t = self.rawdata(rev)
920 920 return len(t)
921 921
922 922 def size(self, rev):
923 923 """length of non-raw text (processed by a "read" flag processor)"""
924 924 # fast path: if no "read" flag processor could change the content,
925 925 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 926 flags = self.flags(rev)
927 927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 928 return self.rawsize(rev)
929 929
930 930 return len(self.revision(rev, raw=False))
931 931
932 932 def chainbase(self, rev):
933 933 base = self._chainbasecache.get(rev)
934 934 if base is not None:
935 935 return base
936 936
937 937 index = self.index
938 938 iterrev = rev
939 939 base = index[iterrev][3]
940 940 while base != iterrev:
941 941 iterrev = base
942 942 base = index[iterrev][3]
943 943
944 944 self._chainbasecache[rev] = base
945 945 return base
946 946
947 947 def linkrev(self, rev):
948 948 return self.index[rev][4]
949 949
950 950 def parentrevs(self, rev):
951 951 try:
952 952 entry = self.index[rev]
953 953 except IndexError:
954 954 if rev == wdirrev:
955 955 raise error.WdirUnsupported
956 956 raise
957 957 if entry[5] == nullrev:
958 958 return entry[6], entry[5]
959 959 else:
960 960 return entry[5], entry[6]
961 961
962 962 # fast parentrevs(rev) where rev isn't filtered
963 963 _uncheckedparentrevs = parentrevs
964 964
965 965 def node(self, rev):
966 966 try:
967 967 return self.index[rev][7]
968 968 except IndexError:
969 969 if rev == wdirrev:
970 970 raise error.WdirUnsupported
971 971 raise
972 972
973 973 # Derived from index values.
974 974
975 975 def end(self, rev):
976 976 return self.start(rev) + self.length(rev)
977 977
978 978 def parents(self, node):
979 979 i = self.index
980 980 d = i[self.rev(node)]
981 981 # inline node() to avoid function call overhead
982 982 if d[5] == self.nullid:
983 983 return i[d[6]][7], i[d[5]][7]
984 984 else:
985 985 return i[d[5]][7], i[d[6]][7]
986 986
987 987 def chainlen(self, rev):
988 988 return self._chaininfo(rev)[0]
989 989
990 990 def _chaininfo(self, rev):
991 991 chaininfocache = self._chaininfocache
992 992 if rev in chaininfocache:
993 993 return chaininfocache[rev]
994 994 index = self.index
995 995 generaldelta = self._generaldelta
996 996 iterrev = rev
997 997 e = index[iterrev]
998 998 clen = 0
999 999 compresseddeltalen = 0
1000 1000 while iterrev != e[3]:
1001 1001 clen += 1
1002 1002 compresseddeltalen += e[1]
1003 1003 if generaldelta:
1004 1004 iterrev = e[3]
1005 1005 else:
1006 1006 iterrev -= 1
1007 1007 if iterrev in chaininfocache:
1008 1008 t = chaininfocache[iterrev]
1009 1009 clen += t[0]
1010 1010 compresseddeltalen += t[1]
1011 1011 break
1012 1012 e = index[iterrev]
1013 1013 else:
1014 1014 # Add text length of base since decompressing that also takes
1015 1015 # work. For cache hits the length is already included.
1016 1016 compresseddeltalen += e[1]
1017 1017 r = (clen, compresseddeltalen)
1018 1018 chaininfocache[rev] = r
1019 1019 return r
1020 1020
1021 1021 def _deltachain(self, rev, stoprev=None):
1022 1022 """Obtain the delta chain for a revision.
1023 1023
1024 1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 1025 stop at the base of the chain.
1026 1026
1027 1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 1028 revs in ascending order and ``stopped`` is a bool indicating whether
1029 1029 ``stoprev`` was hit.
1030 1030 """
1031 1031 # Try C implementation.
1032 1032 try:
1033 1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 1034 except AttributeError:
1035 1035 pass
1036 1036
1037 1037 chain = []
1038 1038
1039 1039 # Alias to prevent attribute lookup in tight loop.
1040 1040 index = self.index
1041 1041 generaldelta = self._generaldelta
1042 1042
1043 1043 iterrev = rev
1044 1044 e = index[iterrev]
1045 1045 while iterrev != e[3] and iterrev != stoprev:
1046 1046 chain.append(iterrev)
1047 1047 if generaldelta:
1048 1048 iterrev = e[3]
1049 1049 else:
1050 1050 iterrev -= 1
1051 1051 e = index[iterrev]
1052 1052
1053 1053 if iterrev == stoprev:
1054 1054 stopped = True
1055 1055 else:
1056 1056 chain.append(iterrev)
1057 1057 stopped = False
1058 1058
1059 1059 chain.reverse()
1060 1060 return chain, stopped
1061 1061
1062 1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 1063 """Generate the ancestors of 'revs' in reverse revision order.
1064 1064 Does not generate revs lower than stoprev.
1065 1065
1066 1066 See the documentation for ancestor.lazyancestors for more details."""
1067 1067
1068 1068 # first, make sure start revisions aren't filtered
1069 1069 revs = list(revs)
1070 1070 checkrev = self.node
1071 1071 for r in revs:
1072 1072 checkrev(r)
1073 1073 # and we're sure ancestors aren't filtered as well
1074 1074
1075 1075 if rustancestor is not None and self.index.rust_ext_compat:
1076 1076 lazyancestors = rustancestor.LazyAncestors
1077 1077 arg = self.index
1078 1078 else:
1079 1079 lazyancestors = ancestor.lazyancestors
1080 1080 arg = self._uncheckedparentrevs
1081 1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082 1082
1083 1083 def descendants(self, revs):
1084 1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085 1085
1086 1086 def findcommonmissing(self, common=None, heads=None):
1087 1087 """Return a tuple of the ancestors of common and the ancestors of heads
1088 1088 that are not ancestors of common. In revset terminology, we return the
1089 1089 tuple:
1090 1090
1091 1091 ::common, (::heads) - (::common)
1092 1092
1093 1093 The list is sorted by revision number, meaning it is
1094 1094 topologically sorted.
1095 1095
1096 1096 'heads' and 'common' are both lists of node IDs. If heads is
1097 1097 not supplied, uses all of the revlog's heads. If common is not
1098 1098 supplied, uses nullid."""
1099 1099 if common is None:
1100 1100 common = [self.nullid]
1101 1101 if heads is None:
1102 1102 heads = self.heads()
1103 1103
1104 1104 common = [self.rev(n) for n in common]
1105 1105 heads = [self.rev(n) for n in heads]
1106 1106
1107 1107 # we want the ancestors, but inclusive
1108 1108 class lazyset(object):
1109 1109 def __init__(self, lazyvalues):
1110 1110 self.addedvalues = set()
1111 1111 self.lazyvalues = lazyvalues
1112 1112
1113 1113 def __contains__(self, value):
1114 1114 return value in self.addedvalues or value in self.lazyvalues
1115 1115
1116 1116 def __iter__(self):
1117 1117 added = self.addedvalues
1118 1118 for r in added:
1119 1119 yield r
1120 1120 for r in self.lazyvalues:
1121 1121 if not r in added:
1122 1122 yield r
1123 1123
1124 1124 def add(self, value):
1125 1125 self.addedvalues.add(value)
1126 1126
1127 1127 def update(self, values):
1128 1128 self.addedvalues.update(values)
1129 1129
1130 1130 has = lazyset(self.ancestors(common))
1131 1131 has.add(nullrev)
1132 1132 has.update(common)
1133 1133
1134 1134 # take all ancestors from heads that aren't in has
1135 1135 missing = set()
1136 1136 visit = collections.deque(r for r in heads if r not in has)
1137 1137 while visit:
1138 1138 r = visit.popleft()
1139 1139 if r in missing:
1140 1140 continue
1141 1141 else:
1142 1142 missing.add(r)
1143 1143 for p in self.parentrevs(r):
1144 1144 if p not in has:
1145 1145 visit.append(p)
1146 1146 missing = list(missing)
1147 1147 missing.sort()
1148 1148 return has, [self.node(miss) for miss in missing]
1149 1149
1150 1150 def incrementalmissingrevs(self, common=None):
1151 1151 """Return an object that can be used to incrementally compute the
1152 1152 revision numbers of the ancestors of arbitrary sets that are not
1153 1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 1154 object.
1155 1155
1156 1156 'common' is a list of revision numbers. If common is not supplied, uses
1157 1157 nullrev.
1158 1158 """
1159 1159 if common is None:
1160 1160 common = [nullrev]
1161 1161
1162 1162 if rustancestor is not None and self.index.rust_ext_compat:
1163 1163 return rustancestor.MissingAncestors(self.index, common)
1164 1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165 1165
1166 1166 def findmissingrevs(self, common=None, heads=None):
1167 1167 """Return the revision numbers of the ancestors of heads that
1168 1168 are not ancestors of common.
1169 1169
1170 1170 More specifically, return a list of revision numbers corresponding to
1171 1171 nodes N such that every N satisfies the following constraints:
1172 1172
1173 1173 1. N is an ancestor of some node in 'heads'
1174 1174 2. N is not an ancestor of any node in 'common'
1175 1175
1176 1176 The list is sorted by revision number, meaning it is
1177 1177 topologically sorted.
1178 1178
1179 1179 'heads' and 'common' are both lists of revision numbers. If heads is
1180 1180 not supplied, uses all of the revlog's heads. If common is not
1181 1181 supplied, uses nullid."""
1182 1182 if common is None:
1183 1183 common = [nullrev]
1184 1184 if heads is None:
1185 1185 heads = self.headrevs()
1186 1186
1187 1187 inc = self.incrementalmissingrevs(common=common)
1188 1188 return inc.missingancestors(heads)
1189 1189
1190 1190 def findmissing(self, common=None, heads=None):
1191 1191 """Return the ancestors of heads that are not ancestors of common.
1192 1192
1193 1193 More specifically, return a list of nodes N such that every N
1194 1194 satisfies the following constraints:
1195 1195
1196 1196 1. N is an ancestor of some node in 'heads'
1197 1197 2. N is not an ancestor of any node in 'common'
1198 1198
1199 1199 The list is sorted by revision number, meaning it is
1200 1200 topologically sorted.
1201 1201
1202 1202 'heads' and 'common' are both lists of node IDs. If heads is
1203 1203 not supplied, uses all of the revlog's heads. If common is not
1204 1204 supplied, uses nullid."""
1205 1205 if common is None:
1206 1206 common = [self.nullid]
1207 1207 if heads is None:
1208 1208 heads = self.heads()
1209 1209
1210 1210 common = [self.rev(n) for n in common]
1211 1211 heads = [self.rev(n) for n in heads]
1212 1212
1213 1213 inc = self.incrementalmissingrevs(common=common)
1214 1214 return [self.node(r) for r in inc.missingancestors(heads)]
1215 1215
1216 1216 def nodesbetween(self, roots=None, heads=None):
1217 1217 """Return a topological path from 'roots' to 'heads'.
1218 1218
1219 1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 1220 topologically sorted list of all nodes N that satisfy both of
1221 1221 these constraints:
1222 1222
1223 1223 1. N is a descendant of some node in 'roots'
1224 1224 2. N is an ancestor of some node in 'heads'
1225 1225
1226 1226 Every node is considered to be both a descendant and an ancestor
1227 1227 of itself, so every reachable node in 'roots' and 'heads' will be
1228 1228 included in 'nodes'.
1229 1229
1230 1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233 1233
1234 1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 1235 unspecified, uses nullid as the only root. If 'heads' is
1236 1236 unspecified, uses list of all of the revlog's heads."""
1237 1237 nonodes = ([], [], [])
1238 1238 if roots is not None:
1239 1239 roots = list(roots)
1240 1240 if not roots:
1241 1241 return nonodes
1242 1242 lowestrev = min([self.rev(n) for n in roots])
1243 1243 else:
1244 1244 roots = [self.nullid] # Everybody's a descendant of nullid
1245 1245 lowestrev = nullrev
1246 1246 if (lowestrev == nullrev) and (heads is None):
1247 1247 # We want _all_ the nodes!
1248 1248 return (
1249 1249 [self.node(r) for r in self],
1250 1250 [self.nullid],
1251 1251 list(self.heads()),
1252 1252 )
1253 1253 if heads is None:
1254 1254 # All nodes are ancestors, so the latest ancestor is the last
1255 1255 # node.
1256 1256 highestrev = len(self) - 1
1257 1257 # Set ancestors to None to signal that every node is an ancestor.
1258 1258 ancestors = None
1259 1259 # Set heads to an empty dictionary for later discovery of heads
1260 1260 heads = {}
1261 1261 else:
1262 1262 heads = list(heads)
1263 1263 if not heads:
1264 1264 return nonodes
1265 1265 ancestors = set()
1266 1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 1267 # Also, later we will be using it to filter out the heads we can't
1268 1268 # find from roots.
1269 1269 heads = dict.fromkeys(heads, False)
1270 1270 # Start at the top and keep marking parents until we're done.
1271 1271 nodestotag = set(heads)
1272 1272 # Remember where the top was so we can use it as a limit later.
1273 1273 highestrev = max([self.rev(n) for n in nodestotag])
1274 1274 while nodestotag:
1275 1275 # grab a node to tag
1276 1276 n = nodestotag.pop()
1277 1277 # Never tag nullid
1278 1278 if n == self.nullid:
1279 1279 continue
1280 1280 # A node's revision number represents its place in a
1281 1281 # topologically sorted list of nodes.
1282 1282 r = self.rev(n)
1283 1283 if r >= lowestrev:
1284 1284 if n not in ancestors:
1285 1285 # If we are possibly a descendant of one of the roots
1286 1286 # and we haven't already been marked as an ancestor
1287 1287 ancestors.add(n) # Mark as ancestor
1288 1288 # Add non-nullid parents to list of nodes to tag.
1289 1289 nodestotag.update(
1290 1290 [p for p in self.parents(n) if p != self.nullid]
1291 1291 )
1292 1292 elif n in heads: # We've seen it before, is it a fake head?
1293 1293 # So it is, real heads should not be the ancestors of
1294 1294 # any other heads.
1295 1295 heads.pop(n)
1296 1296 if not ancestors:
1297 1297 return nonodes
1298 1298 # Now that we have our set of ancestors, we want to remove any
1299 1299 # roots that are not ancestors.
1300 1300
1301 1301 # If one of the roots was nullid, everything is included anyway.
1302 1302 if lowestrev > nullrev:
1303 1303 # But, since we weren't, let's recompute the lowest rev to not
1304 1304 # include roots that aren't ancestors.
1305 1305
1306 1306 # Filter out roots that aren't ancestors of heads
1307 1307 roots = [root for root in roots if root in ancestors]
1308 1308 # Recompute the lowest revision
1309 1309 if roots:
1310 1310 lowestrev = min([self.rev(root) for root in roots])
1311 1311 else:
1312 1312 # No more roots? Return empty list
1313 1313 return nonodes
1314 1314 else:
1315 1315 # We are descending from nullid, and don't need to care about
1316 1316 # any other roots.
1317 1317 lowestrev = nullrev
1318 1318 roots = [self.nullid]
1319 1319 # Transform our roots list into a set.
1320 1320 descendants = set(roots)
1321 1321 # Also, keep the original roots so we can filter out roots that aren't
1322 1322 # 'real' roots (i.e. are descended from other roots).
1323 1323 roots = descendants.copy()
1324 1324 # Our topologically sorted list of output nodes.
1325 1325 orderedout = []
1326 1326 # Don't start at nullid since we don't want nullid in our output list,
1327 1327 # and if nullid shows up in descendants, empty parents will look like
1328 1328 # they're descendants.
1329 1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 1330 n = self.node(r)
1331 1331 isdescendant = False
1332 1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 1333 isdescendant = True
1334 1334 elif n in descendants:
1335 1335 # n is already a descendant
1336 1336 isdescendant = True
1337 1337 # This check only needs to be done here because all the roots
1338 1338 # will start being marked is descendants before the loop.
1339 1339 if n in roots:
1340 1340 # If n was a root, check if it's a 'real' root.
1341 1341 p = tuple(self.parents(n))
1342 1342 # If any of its parents are descendants, it's not a root.
1343 1343 if (p[0] in descendants) or (p[1] in descendants):
1344 1344 roots.remove(n)
1345 1345 else:
1346 1346 p = tuple(self.parents(n))
1347 1347 # A node is a descendant if either of its parents are
1348 1348 # descendants. (We seeded the dependents list with the roots
1349 1349 # up there, remember?)
1350 1350 if (p[0] in descendants) or (p[1] in descendants):
1351 1351 descendants.add(n)
1352 1352 isdescendant = True
1353 1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 1354 # Only include nodes that are both descendants and ancestors.
1355 1355 orderedout.append(n)
1356 1356 if (ancestors is not None) and (n in heads):
1357 1357 # We're trying to figure out which heads are reachable
1358 1358 # from roots.
1359 1359 # Mark this head as having been reached
1360 1360 heads[n] = True
1361 1361 elif ancestors is None:
1362 1362 # Otherwise, we're trying to discover the heads.
1363 1363 # Assume this is a head because if it isn't, the next step
1364 1364 # will eventually remove it.
1365 1365 heads[n] = True
1366 1366 # But, obviously its parents aren't.
1367 1367 for p in self.parents(n):
1368 1368 heads.pop(p, None)
1369 1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 1370 roots = list(roots)
1371 1371 assert orderedout
1372 1372 assert roots
1373 1373 assert heads
1374 1374 return (orderedout, roots, heads)
1375 1375
1376 1376 def headrevs(self, revs=None):
1377 1377 if revs is None:
1378 1378 try:
1379 1379 return self.index.headrevs()
1380 1380 except AttributeError:
1381 1381 return self._headrevs()
1382 1382 if rustdagop is not None and self.index.rust_ext_compat:
1383 1383 return rustdagop.headrevs(self.index, revs)
1384 1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385 1385
1386 1386 def computephases(self, roots):
1387 1387 return self.index.computephasesmapsets(roots)
1388 1388
1389 1389 def _headrevs(self):
1390 1390 count = len(self)
1391 1391 if not count:
1392 1392 return [nullrev]
1393 1393 # we won't iter over filtered rev so nobody is a head at start
1394 1394 ishead = [0] * (count + 1)
1395 1395 index = self.index
1396 1396 for r in self:
1397 1397 ishead[r] = 1 # I may be an head
1398 1398 e = index[r]
1399 1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 1400 return [r for r, val in enumerate(ishead) if val]
1401 1401
1402 1402 def heads(self, start=None, stop=None):
1403 1403 """return the list of all nodes that have no children
1404 1404
1405 1405 if start is specified, only heads that are descendants of
1406 1406 start will be returned
1407 1407 if stop is specified, it will consider all the revs from stop
1408 1408 as if they had no children
1409 1409 """
1410 1410 if start is None and stop is None:
1411 1411 if not len(self):
1412 1412 return [self.nullid]
1413 1413 return [self.node(r) for r in self.headrevs()]
1414 1414
1415 1415 if start is None:
1416 1416 start = nullrev
1417 1417 else:
1418 1418 start = self.rev(start)
1419 1419
1420 1420 stoprevs = {self.rev(n) for n in stop or []}
1421 1421
1422 1422 revs = dagop.headrevssubset(
1423 1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 1424 )
1425 1425
1426 1426 return [self.node(rev) for rev in revs]
1427 1427
1428 1428 def children(self, node):
1429 1429 """find the children of a given node"""
1430 1430 c = []
1431 1431 p = self.rev(node)
1432 1432 for r in self.revs(start=p + 1):
1433 1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 1434 if prevs:
1435 1435 for pr in prevs:
1436 1436 if pr == p:
1437 1437 c.append(self.node(r))
1438 1438 elif p == nullrev:
1439 1439 c.append(self.node(r))
1440 1440 return c
1441 1441
1442 1442 def commonancestorsheads(self, a, b):
1443 1443 """calculate all the heads of the common ancestors of nodes a and b"""
1444 1444 a, b = self.rev(a), self.rev(b)
1445 1445 ancs = self._commonancestorsheads(a, b)
1446 1446 return pycompat.maplist(self.node, ancs)
1447 1447
1448 1448 def _commonancestorsheads(self, *revs):
1449 1449 """calculate all the heads of the common ancestors of revs"""
1450 1450 try:
1451 1451 ancs = self.index.commonancestorsheads(*revs)
1452 1452 except (AttributeError, OverflowError): # C implementation failed
1453 1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 1454 return ancs
1455 1455
1456 1456 def isancestor(self, a, b):
1457 1457 """return True if node a is an ancestor of node b
1458 1458
1459 1459 A revision is considered an ancestor of itself."""
1460 1460 a, b = self.rev(a), self.rev(b)
1461 1461 return self.isancestorrev(a, b)
1462 1462
1463 1463 def isancestorrev(self, a, b):
1464 1464 """return True if revision a is an ancestor of revision b
1465 1465
1466 1466 A revision is considered an ancestor of itself.
1467 1467
1468 1468 The implementation of this is trivial but the use of
1469 1469 reachableroots is not."""
1470 1470 if a == nullrev:
1471 1471 return True
1472 1472 elif a == b:
1473 1473 return True
1474 1474 elif a > b:
1475 1475 return False
1476 1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477 1477
1478 1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 1479 """return (heads(::(<roots> and <roots>::<heads>)))
1480 1480
1481 1481 If includepath is True, return (<roots>::<heads>)."""
1482 1482 try:
1483 1483 return self.index.reachableroots2(
1484 1484 minroot, heads, roots, includepath
1485 1485 )
1486 1486 except AttributeError:
1487 1487 return dagop._reachablerootspure(
1488 1488 self.parentrevs, minroot, roots, heads, includepath
1489 1489 )
1490 1490
1491 1491 def ancestor(self, a, b):
1492 1492 """calculate the "best" common ancestor of nodes a and b"""
1493 1493
1494 1494 a, b = self.rev(a), self.rev(b)
1495 1495 try:
1496 1496 ancs = self.index.ancestors(a, b)
1497 1497 except (AttributeError, OverflowError):
1498 1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 1499 if ancs:
1500 1500 # choose a consistent winner when there's a tie
1501 1501 return min(map(self.node, ancs))
1502 1502 return self.nullid
1503 1503
1504 1504 def _match(self, id):
1505 1505 if isinstance(id, int):
1506 1506 # rev
1507 1507 return self.node(id)
1508 1508 if len(id) == self.nodeconstants.nodelen:
1509 1509 # possibly a binary node
1510 1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 1511 try:
1512 1512 node = id
1513 1513 self.rev(node) # quick search the index
1514 1514 return node
1515 1515 except error.LookupError:
1516 1516 pass # may be partial hex id
1517 1517 try:
1518 1518 # str(rev)
1519 1519 rev = int(id)
1520 1520 if b"%d" % rev != id:
1521 1521 raise ValueError
1522 1522 if rev < 0:
1523 1523 rev = len(self) + rev
1524 1524 if rev < 0 or rev >= len(self):
1525 1525 raise ValueError
1526 1526 return self.node(rev)
1527 1527 except (ValueError, OverflowError):
1528 1528 pass
1529 1529 if len(id) == 2 * self.nodeconstants.nodelen:
1530 1530 try:
1531 1531 # a full hex nodeid?
1532 1532 node = bin(id)
1533 1533 self.rev(node)
1534 1534 return node
1535 1535 except (TypeError, error.LookupError):
1536 1536 pass
1537 1537
1538 1538 def _partialmatch(self, id):
1539 1539 # we don't care wdirfilenodeids as they should be always full hash
1540 1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 1541 try:
1542 1542 partial = self.index.partialmatch(id)
1543 1543 if partial and self.hasnode(partial):
1544 1544 if maybewdir:
1545 1545 # single 'ff...' match in radix tree, ambiguous with wdir
1546 1546 raise error.RevlogError
1547 1547 return partial
1548 1548 if maybewdir:
1549 1549 # no 'ff...' match in radix tree, wdir identified
1550 1550 raise error.WdirUnsupported
1551 1551 return None
1552 1552 except error.RevlogError:
1553 1553 # parsers.c radix tree lookup gave multiple matches
1554 1554 # fast path: for unfiltered changelog, radix tree is accurate
1555 1555 if not getattr(self, 'filteredrevs', None):
1556 1556 raise error.AmbiguousPrefixLookupError(
1557 1557 id, self.display_id, _(b'ambiguous identifier')
1558 1558 )
1559 1559 # fall through to slow path that filters hidden revisions
1560 1560 except (AttributeError, ValueError):
1561 1561 # we are pure python, or key was too short to search radix tree
1562 1562 pass
1563 1563
1564 1564 if id in self._pcache:
1565 1565 return self._pcache[id]
1566 1566
1567 1567 if len(id) <= 40:
1568 1568 try:
1569 1569 # hex(node)[:...]
1570 1570 l = len(id) // 2 # grab an even number of digits
1571 1571 prefix = bin(id[: l * 2])
1572 1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1573 1573 nl = [
1574 1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1575 1575 ]
1576 1576 if self.nodeconstants.nullhex.startswith(id):
1577 1577 nl.append(self.nullid)
1578 1578 if len(nl) > 0:
1579 1579 if len(nl) == 1 and not maybewdir:
1580 1580 self._pcache[id] = nl[0]
1581 1581 return nl[0]
1582 1582 raise error.AmbiguousPrefixLookupError(
1583 1583 id, self.display_id, _(b'ambiguous identifier')
1584 1584 )
1585 1585 if maybewdir:
1586 1586 raise error.WdirUnsupported
1587 1587 return None
1588 1588 except TypeError:
1589 1589 pass
1590 1590
1591 1591 def lookup(self, id):
1592 1592 """locate a node based on:
1593 1593 - revision number or str(revision number)
1594 1594 - nodeid or subset of hex nodeid
1595 1595 """
1596 1596 n = self._match(id)
1597 1597 if n is not None:
1598 1598 return n
1599 1599 n = self._partialmatch(id)
1600 1600 if n:
1601 1601 return n
1602 1602
1603 1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1604 1604
1605 1605 def shortest(self, node, minlength=1):
1606 1606 """Find the shortest unambiguous prefix that matches node."""
1607 1607
1608 1608 def isvalid(prefix):
1609 1609 try:
1610 1610 matchednode = self._partialmatch(prefix)
1611 1611 except error.AmbiguousPrefixLookupError:
1612 1612 return False
1613 1613 except error.WdirUnsupported:
1614 1614 # single 'ff...' match
1615 1615 return True
1616 1616 if matchednode is None:
1617 1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1618 1618 return True
1619 1619
1620 1620 def maybewdir(prefix):
1621 1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1622 1622
1623 1623 hexnode = hex(node)
1624 1624
1625 1625 def disambiguate(hexnode, minlength):
1626 1626 """Disambiguate against wdirid."""
1627 1627 for length in range(minlength, len(hexnode) + 1):
1628 1628 prefix = hexnode[:length]
1629 1629 if not maybewdir(prefix):
1630 1630 return prefix
1631 1631
1632 1632 if not getattr(self, 'filteredrevs', None):
1633 1633 try:
1634 1634 length = max(self.index.shortest(node), minlength)
1635 1635 return disambiguate(hexnode, length)
1636 1636 except error.RevlogError:
1637 1637 if node != self.nodeconstants.wdirid:
1638 1638 raise error.LookupError(
1639 1639 node, self.display_id, _(b'no node')
1640 1640 )
1641 1641 except AttributeError:
1642 1642 # Fall through to pure code
1643 1643 pass
1644 1644
1645 1645 if node == self.nodeconstants.wdirid:
1646 1646 for length in range(minlength, len(hexnode) + 1):
1647 1647 prefix = hexnode[:length]
1648 1648 if isvalid(prefix):
1649 1649 return prefix
1650 1650
1651 1651 for length in range(minlength, len(hexnode) + 1):
1652 1652 prefix = hexnode[:length]
1653 1653 if isvalid(prefix):
1654 1654 return disambiguate(hexnode, length)
1655 1655
1656 1656 def cmp(self, node, text):
1657 1657 """compare text with a given file revision
1658 1658
1659 1659 returns True if text is different than what is stored.
1660 1660 """
1661 1661 p1, p2 = self.parents(node)
1662 1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1663 1663
1664 1664 def _cachesegment(self, offset, data):
1665 1665 """Add a segment to the revlog cache.
1666 1666
1667 1667 Accepts an absolute offset and the data that is at that location.
1668 1668 """
1669 1669 o, d = self._chunkcache
1670 1670 # try to add to existing cache
1671 1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 1672 self._chunkcache = o, d + data
1673 1673 else:
1674 1674 self._chunkcache = offset, data
1675 1675
1676 1676 def _readsegment(self, offset, length, df=None):
1677 1677 """Load a segment of raw data from the revlog.
1678 1678
1679 1679 Accepts an absolute offset, length to read, and an optional existing
1680 1680 file handle to read from.
1681 1681
1682 1682 If an existing file handle is passed, it will be seeked and the
1683 1683 original seek position will NOT be restored.
1684 1684
1685 1685 Returns a str or buffer of raw byte data.
1686 1686
1687 1687 Raises if the requested number of bytes could not be read.
1688 1688 """
1689 1689 # Cache data both forward and backward around the requested
1690 1690 # data, in a fixed size window. This helps speed up operations
1691 1691 # involving reading the revlog backwards.
1692 1692 cachesize = self._chunkcachesize
1693 1693 realoffset = offset & ~(cachesize - 1)
1694 1694 reallength = (
1695 1695 (offset + length + cachesize) & ~(cachesize - 1)
1696 1696 ) - realoffset
1697 1697 with self._datareadfp(df) as df:
1698 1698 df.seek(realoffset)
1699 1699 d = df.read(reallength)
1700 1700
1701 1701 self._cachesegment(realoffset, d)
1702 1702 if offset != realoffset or reallength != length:
1703 1703 startoffset = offset - realoffset
1704 1704 if len(d) - startoffset < length:
1705 1705 raise error.RevlogError(
1706 1706 _(
1707 1707 b'partial read of revlog %s; expected %d bytes from '
1708 1708 b'offset %d, got %d'
1709 1709 )
1710 1710 % (
1711 1711 self._indexfile if self._inline else self._datafile,
1712 1712 length,
1713 1713 offset,
1714 1714 len(d) - startoffset,
1715 1715 )
1716 1716 )
1717 1717
1718 1718 return util.buffer(d, startoffset, length)
1719 1719
1720 1720 if len(d) < length:
1721 1721 raise error.RevlogError(
1722 1722 _(
1723 1723 b'partial read of revlog %s; expected %d bytes from offset '
1724 1724 b'%d, got %d'
1725 1725 )
1726 1726 % (
1727 1727 self._indexfile if self._inline else self._datafile,
1728 1728 length,
1729 1729 offset,
1730 1730 len(d),
1731 1731 )
1732 1732 )
1733 1733
1734 1734 return d
1735 1735
1736 1736 def _getsegment(self, offset, length, df=None):
1737 1737 """Obtain a segment of raw data from the revlog.
1738 1738
1739 1739 Accepts an absolute offset, length of bytes to obtain, and an
1740 1740 optional file handle to the already-opened revlog. If the file
1741 1741 handle is used, it's original seek position will not be preserved.
1742 1742
1743 1743 Requests for data may be returned from a cache.
1744 1744
1745 1745 Returns a str or a buffer instance of raw byte data.
1746 1746 """
1747 1747 o, d = self._chunkcache
1748 1748 l = len(d)
1749 1749
1750 1750 # is it in the cache?
1751 1751 cachestart = offset - o
1752 1752 cacheend = cachestart + length
1753 1753 if cachestart >= 0 and cacheend <= l:
1754 1754 if cachestart == 0 and cacheend == l:
1755 1755 return d # avoid a copy
1756 1756 return util.buffer(d, cachestart, cacheend - cachestart)
1757 1757
1758 1758 return self._readsegment(offset, length, df=df)
1759 1759
1760 1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1761 1761 """Obtain a segment of raw data corresponding to a range of revisions.
1762 1762
1763 1763 Accepts the start and end revisions and an optional already-open
1764 1764 file handle to be used for reading. If the file handle is read, its
1765 1765 seek position will not be preserved.
1766 1766
1767 1767 Requests for data may be satisfied by a cache.
1768 1768
1769 1769 Returns a 2-tuple of (offset, data) for the requested range of
1770 1770 revisions. Offset is the integer offset from the beginning of the
1771 1771 revlog and data is a str or buffer of the raw byte data.
1772 1772
1773 1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1774 1774 to determine where each revision's data begins and ends.
1775 1775 """
1776 1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1777 1777 # (functions are expensive).
1778 1778 index = self.index
1779 1779 istart = index[startrev]
1780 1780 start = int(istart[0] >> 16)
1781 1781 if startrev == endrev:
1782 1782 end = start + istart[1]
1783 1783 else:
1784 1784 iend = index[endrev]
1785 1785 end = int(iend[0] >> 16) + iend[1]
1786 1786
1787 1787 if self._inline:
1788 1788 start += (startrev + 1) * self.index.entry_size
1789 1789 end += (endrev + 1) * self.index.entry_size
1790 1790 length = end - start
1791 1791
1792 1792 return start, self._getsegment(start, length, df=df)
1793 1793
1794 1794 def _chunk(self, rev, df=None):
1795 1795 """Obtain a single decompressed chunk for a revision.
1796 1796
1797 1797 Accepts an integer revision and an optional already-open file handle
1798 1798 to be used for reading. If used, the seek position of the file will not
1799 1799 be preserved.
1800 1800
1801 1801 Returns a str holding uncompressed data for the requested revision.
1802 1802 """
1803 1803 compression_mode = self.index[rev][10]
1804 1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1805 1805 if compression_mode == COMP_MODE_PLAIN:
1806 1806 return data
1807 1807 elif compression_mode == COMP_MODE_DEFAULT:
1808 1808 return self._decompressor(data)
1809 1809 elif compression_mode == COMP_MODE_INLINE:
1810 1810 return self.decompress(data)
1811 1811 else:
1812 1812 msg = 'unknown compression mode %d'
1813 1813 msg %= compression_mode
1814 1814 raise error.RevlogError(msg)
1815 1815
1816 1816 def _chunks(self, revs, df=None, targetsize=None):
1817 1817 """Obtain decompressed chunks for the specified revisions.
1818 1818
1819 1819 Accepts an iterable of numeric revisions that are assumed to be in
1820 1820 ascending order. Also accepts an optional already-open file handle
1821 1821 to be used for reading. If used, the seek position of the file will
1822 1822 not be preserved.
1823 1823
1824 1824 This function is similar to calling ``self._chunk()`` multiple times,
1825 1825 but is faster.
1826 1826
1827 1827 Returns a list with decompressed data for each requested revision.
1828 1828 """
1829 1829 if not revs:
1830 1830 return []
1831 1831 start = self.start
1832 1832 length = self.length
1833 1833 inline = self._inline
1834 1834 iosize = self.index.entry_size
1835 1835 buffer = util.buffer
1836 1836
1837 1837 l = []
1838 1838 ladd = l.append
1839 1839
1840 1840 if not self._withsparseread:
1841 1841 slicedchunks = (revs,)
1842 1842 else:
1843 1843 slicedchunks = deltautil.slicechunk(
1844 1844 self, revs, targetsize=targetsize
1845 1845 )
1846 1846
1847 1847 for revschunk in slicedchunks:
1848 1848 firstrev = revschunk[0]
1849 1849 # Skip trailing revisions with empty diff
1850 1850 for lastrev in revschunk[::-1]:
1851 1851 if length(lastrev) != 0:
1852 1852 break
1853 1853
1854 1854 try:
1855 1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1856 1856 except OverflowError:
1857 1857 # issue4215 - we can't cache a run of chunks greater than
1858 1858 # 2G on Windows
1859 1859 return [self._chunk(rev, df=df) for rev in revschunk]
1860 1860
1861 1861 decomp = self.decompress
1862 1862 # self._decompressor might be None, but will not be used in that case
1863 1863 def_decomp = self._decompressor
1864 1864 for rev in revschunk:
1865 1865 chunkstart = start(rev)
1866 1866 if inline:
1867 1867 chunkstart += (rev + 1) * iosize
1868 1868 chunklength = length(rev)
1869 1869 comp_mode = self.index[rev][10]
1870 1870 c = buffer(data, chunkstart - offset, chunklength)
1871 1871 if comp_mode == COMP_MODE_PLAIN:
1872 1872 ladd(c)
1873 1873 elif comp_mode == COMP_MODE_INLINE:
1874 1874 ladd(decomp(c))
1875 1875 elif comp_mode == COMP_MODE_DEFAULT:
1876 1876 ladd(def_decomp(c))
1877 1877 else:
1878 1878 msg = 'unknown compression mode %d'
1879 1879 msg %= comp_mode
1880 1880 raise error.RevlogError(msg)
1881 1881
1882 1882 return l
1883 1883
1884 1884 def _chunkclear(self):
1885 1885 """Clear the raw chunk cache."""
1886 1886 self._chunkcache = (0, b'')
1887 1887
1888 1888 def deltaparent(self, rev):
1889 1889 """return deltaparent of the given revision"""
1890 1890 base = self.index[rev][3]
1891 1891 if base == rev:
1892 1892 return nullrev
1893 1893 elif self._generaldelta:
1894 1894 return base
1895 1895 else:
1896 1896 return rev - 1
1897 1897
1898 1898 def issnapshot(self, rev):
1899 1899 """tells whether rev is a snapshot"""
1900 1900 if not self._sparserevlog:
1901 1901 return self.deltaparent(rev) == nullrev
1902 1902 elif util.safehasattr(self.index, b'issnapshot'):
1903 1903 # directly assign the method to cache the testing and access
1904 1904 self.issnapshot = self.index.issnapshot
1905 1905 return self.issnapshot(rev)
1906 1906 if rev == nullrev:
1907 1907 return True
1908 1908 entry = self.index[rev]
1909 1909 base = entry[3]
1910 1910 if base == rev:
1911 1911 return True
1912 1912 if base == nullrev:
1913 1913 return True
1914 1914 p1 = entry[5]
1915 1915 p2 = entry[6]
1916 1916 if base == p1 or base == p2:
1917 1917 return False
1918 1918 return self.issnapshot(base)
1919 1919
1920 1920 def snapshotdepth(self, rev):
1921 1921 """number of snapshot in the chain before this one"""
1922 1922 if not self.issnapshot(rev):
1923 1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 1924 return len(self._deltachain(rev)[0]) - 1
1925 1925
1926 1926 def revdiff(self, rev1, rev2):
1927 1927 """return or calculate a delta between two revisions
1928 1928
1929 1929 The delta calculated is in binary form and is intended to be written to
1930 1930 revlog data directly. So this function needs raw revision data.
1931 1931 """
1932 1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 1933 return bytes(self._chunk(rev2))
1934 1934
1935 1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936 1936
1937 1937 def _processflags(self, text, flags, operation, raw=False):
1938 1938 """deprecated entry point to access flag processors"""
1939 1939 msg = b'_processflag(...) use the specialized variant'
1940 1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1941 1941 if raw:
1942 1942 return text, flagutil.processflagsraw(self, text, flags)
1943 1943 elif operation == b'read':
1944 1944 return flagutil.processflagsread(self, text, flags)
1945 1945 else: # write operation
1946 1946 return flagutil.processflagswrite(self, text, flags)
1947 1947
1948 1948 def revision(self, nodeorrev, _df=None, raw=False):
1949 1949 """return an uncompressed revision of a given node or revision
1950 1950 number.
1951 1951
1952 1952 _df - an existing file handle to read from. (internal-only)
1953 1953 raw - an optional argument specifying if the revision data is to be
1954 1954 treated as raw data when applying flag transforms. 'raw' should be set
1955 1955 to True when generating changegroups or in debug commands.
1956 1956 """
1957 1957 if raw:
1958 1958 msg = (
1959 1959 b'revlog.revision(..., raw=True) is deprecated, '
1960 1960 b'use revlog.rawdata(...)'
1961 1961 )
1962 1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1963 1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1964 1964
1965 1965 def sidedata(self, nodeorrev, _df=None):
1966 1966 """a map of extra data related to the changeset but not part of the hash
1967 1967
1968 1968 This function currently return a dictionary. However, more advanced
1969 1969 mapping object will likely be used in the future for a more
1970 1970 efficient/lazy code.
1971 1971 """
1972 1972 return self._revisiondata(nodeorrev, _df)[1]
1973 1973
1974 1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1975 1975 # deal with <nodeorrev> argument type
1976 1976 if isinstance(nodeorrev, int):
1977 1977 rev = nodeorrev
1978 1978 node = self.node(rev)
1979 1979 else:
1980 1980 node = nodeorrev
1981 1981 rev = None
1982 1982
1983 1983 # fast path the special `nullid` rev
1984 1984 if node == self.nullid:
1985 1985 return b"", {}
1986 1986
1987 1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1988 1988 # revision or might need to be processed to retrieve the revision.
1989 1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1990 1990
1991 1991 if self.hassidedata:
1992 1992 if rev is None:
1993 1993 rev = self.rev(node)
1994 1994 sidedata = self._sidedata(rev)
1995 1995 else:
1996 1996 sidedata = {}
1997 1997
1998 1998 if raw and validated:
1999 1999 # if we don't want to process the raw text and that raw
2000 2000 # text is cached, we can exit early.
2001 2001 return rawtext, sidedata
2002 2002 if rev is None:
2003 2003 rev = self.rev(node)
2004 2004 # the revlog's flag for this revision
2005 2005 # (usually alter its state or content)
2006 2006 flags = self.flags(rev)
2007 2007
2008 2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2009 2009 # no extra flags set, no flag processor runs, text = rawtext
2010 2010 return rawtext, sidedata
2011 2011
2012 2012 if raw:
2013 2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2014 2014 text = rawtext
2015 2015 else:
2016 2016 r = flagutil.processflagsread(self, rawtext, flags)
2017 2017 text, validatehash = r
2018 2018 if validatehash:
2019 2019 self.checkhash(text, node, rev=rev)
2020 2020 if not validated:
2021 2021 self._revisioncache = (node, rev, rawtext)
2022 2022
2023 2023 return text, sidedata
2024 2024
2025 2025 def _rawtext(self, node, rev, _df=None):
2026 2026 """return the possibly unvalidated rawtext for a revision
2027 2027
2028 2028 returns (rev, rawtext, validated)
2029 2029 """
2030 2030
2031 2031 # revision in the cache (could be useful to apply delta)
2032 2032 cachedrev = None
2033 2033 # An intermediate text to apply deltas to
2034 2034 basetext = None
2035 2035
2036 2036 # Check if we have the entry in cache
2037 2037 # The cache entry looks like (node, rev, rawtext)
2038 2038 if self._revisioncache:
2039 2039 if self._revisioncache[0] == node:
2040 2040 return (rev, self._revisioncache[2], True)
2041 2041 cachedrev = self._revisioncache[1]
2042 2042
2043 2043 if rev is None:
2044 2044 rev = self.rev(node)
2045 2045
2046 2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2047 2047 if stopped:
2048 2048 basetext = self._revisioncache[2]
2049 2049
2050 2050 # drop cache to save memory, the caller is expected to
2051 2051 # update self._revisioncache after validating the text
2052 2052 self._revisioncache = None
2053 2053
2054 2054 targetsize = None
2055 2055 rawsize = self.index[rev][2]
2056 2056 if 0 <= rawsize:
2057 2057 targetsize = 4 * rawsize
2058 2058
2059 2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2060 2060 if basetext is None:
2061 2061 basetext = bytes(bins[0])
2062 2062 bins = bins[1:]
2063 2063
2064 2064 rawtext = mdiff.patches(basetext, bins)
2065 2065 del basetext # let us have a chance to free memory early
2066 2066 return (rev, rawtext, False)
2067 2067
2068 2068 def _sidedata(self, rev):
2069 2069 """Return the sidedata for a given revision number."""
2070 2070 index_entry = self.index[rev]
2071 2071 sidedata_offset = index_entry[8]
2072 2072 sidedata_size = index_entry[9]
2073 2073
2074 2074 if self._inline:
2075 2075 sidedata_offset += self.index.entry_size * (1 + rev)
2076 2076 if sidedata_size == 0:
2077 2077 return {}
2078 2078
2079 2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2080 2080 comp = self.index[rev][11]
2081 2081 if comp == COMP_MODE_PLAIN:
2082 2082 segment = comp_segment
2083 2083 elif comp == COMP_MODE_DEFAULT:
2084 2084 segment = self._decompressor(comp_segment)
2085 2085 elif comp == COMP_MODE_INLINE:
2086 2086 segment = self.decompress(comp_segment)
2087 2087 else:
2088 2088 msg = 'unknown compression mode %d'
2089 2089 msg %= comp
2090 2090 raise error.RevlogError(msg)
2091 2091
2092 2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2093 2093 return sidedata
2094 2094
2095 2095 def rawdata(self, nodeorrev, _df=None):
2096 2096 """return an uncompressed raw data of a given node or revision number.
2097 2097
2098 2098 _df - an existing file handle to read from. (internal-only)
2099 2099 """
2100 2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2101 2101
2102 2102 def hash(self, text, p1, p2):
2103 2103 """Compute a node hash.
2104 2104
2105 2105 Available as a function so that subclasses can replace the hash
2106 2106 as needed.
2107 2107 """
2108 2108 return storageutil.hashrevisionsha1(text, p1, p2)
2109 2109
2110 2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2111 2111 """Check node hash integrity.
2112 2112
2113 2113 Available as a function so that subclasses can extend hash mismatch
2114 2114 behaviors as needed.
2115 2115 """
2116 2116 try:
2117 2117 if p1 is None and p2 is None:
2118 2118 p1, p2 = self.parents(node)
2119 2119 if node != self.hash(text, p1, p2):
2120 2120 # Clear the revision cache on hash failure. The revision cache
2121 2121 # only stores the raw revision and clearing the cache does have
2122 2122 # the side-effect that we won't have a cache hit when the raw
2123 2123 # revision data is accessed. But this case should be rare and
2124 2124 # it is extra work to teach the cache about the hash
2125 2125 # verification state.
2126 2126 if self._revisioncache and self._revisioncache[0] == node:
2127 2127 self._revisioncache = None
2128 2128
2129 2129 revornode = rev
2130 2130 if revornode is None:
2131 2131 revornode = templatefilters.short(hex(node))
2132 2132 raise error.RevlogError(
2133 2133 _(b"integrity check failed on %s:%s")
2134 2134 % (self.display_id, pycompat.bytestr(revornode))
2135 2135 )
2136 2136 except error.RevlogError:
2137 2137 if self._censorable and storageutil.iscensoredtext(text):
2138 2138 raise error.CensoredNodeError(self.display_id, node, text)
2139 2139 raise
2140 2140
2141 2141 def _enforceinlinesize(self, tr):
2142 2142 """Check if the revlog is too big for inline and convert if so.
2143 2143
2144 2144 This should be called after revisions are added to the revlog. If the
2145 2145 revlog has grown too large to be an inline revlog, it will convert it
2146 2146 to use multiple index and data files.
2147 2147 """
2148 2148 tiprev = len(self) - 1
2149 2149 total_size = self.start(tiprev) + self.length(tiprev)
2150 2150 if not self._inline or total_size < _maxinline:
2151 2151 return
2152 2152
2153 2153 troffset = tr.findoffset(self._indexfile)
2154 2154 if troffset is None:
2155 2155 raise error.RevlogError(
2156 2156 _(b"%s not found in the transaction") % self._indexfile
2157 2157 )
2158 2158 trindex = 0
2159 2159 tr.add(self._datafile, 0)
2160 2160
2161 2161 existing_handles = False
2162 2162 if self._writinghandles is not None:
2163 2163 existing_handles = True
2164 2164 fp = self._writinghandles[0]
2165 2165 fp.flush()
2166 2166 fp.close()
2167 2167 # We can't use the cached file handle after close(). So prevent
2168 2168 # its usage.
2169 2169 self._writinghandles = None
2170 2170
2171 2171 new_dfh = self._datafp(b'w+')
2172 2172 new_dfh.truncate(0) # drop any potentially existing data
2173 2173 try:
2174 2174 with self._indexfp() as read_ifh:
2175 2175 for r in self:
2176 2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2177 2177 if troffset <= self.start(r) + r * self.index.entry_size:
2178 2178 trindex = r
2179 2179 new_dfh.flush()
2180 2180
2181 2181 with self.__index_new_fp() as fp:
2182 2182 self._format_flags &= ~FLAG_INLINE_DATA
2183 2183 self._inline = False
2184 2184 for i in self:
2185 2185 e = self.index.entry_binary(i)
2186 2186 if i == 0 and self._docket is None:
2187 2187 header = self._format_flags | self._format_version
2188 2188 header = self.index.pack_header(header)
2189 2189 e = header + e
2190 2190 fp.write(e)
2191 2191 if self._docket is not None:
2192 2192 self._docket.index_end = fp.tell()
2193
2194 # There is a small transactional race here. If the rename of
2195 # the index fails, we should remove the datafile. It is more
2196 # important to ensure that the data file is not truncated
2197 # when the index is replaced as otherwise data is lost.
2198 tr.replace(self._datafile, self.start(trindex))
2199
2193 2200 # the temp file replace the real index when we exit the context
2194 2201 # manager
2195 2202
2196 2203 tr.replace(self._indexfile, trindex * self.index.entry_size)
2197 2204 nodemaputil.setup_persistent_nodemap(tr, self)
2198 2205 self._chunkclear()
2199 2206
2200 2207 if existing_handles:
2201 2208 # switched from inline to conventional reopen the index
2202 2209 ifh = self.__index_write_fp()
2203 2210 self._writinghandles = (ifh, new_dfh)
2204 2211 new_dfh = None
2205 2212 finally:
2206 2213 if new_dfh is not None:
2207 2214 new_dfh.close()
2208 2215
2209 2216 def _nodeduplicatecallback(self, transaction, node):
2210 2217 """called when trying to add a node already stored."""
2211 2218
2212 2219 @contextlib.contextmanager
2213 2220 def _writing(self, transaction):
2214 2221 if self._trypending:
2215 2222 msg = b'try to write in a `trypending` revlog: %s'
2216 2223 msg %= self.display_id
2217 2224 raise error.ProgrammingError(msg)
2218 2225 if self._writinghandles is not None:
2219 2226 yield
2220 2227 else:
2221 2228 r = len(self)
2222 2229 dsize = 0
2223 2230 if r:
2224 2231 dsize = self.end(r - 1)
2225 2232 dfh = None
2226 2233 if not self._inline:
2227 2234 try:
2228 2235 dfh = self._datafp(b"r+")
2229 2236 if self._docket is None:
2230 2237 dfh.seek(0, os.SEEK_END)
2231 2238 else:
2232 2239 dfh.seek(self._docket.data_end, os.SEEK_SET)
2233 2240 except IOError as inst:
2234 2241 if inst.errno != errno.ENOENT:
2235 2242 raise
2236 2243 dfh = self._datafp(b"w+")
2237 2244 transaction.add(self._datafile, dsize)
2238 2245 try:
2239 2246 isize = r * self.index.entry_size
2240 2247 ifh = self.__index_write_fp()
2241 2248 if self._inline:
2242 2249 transaction.add(self._indexfile, dsize + isize)
2243 2250 else:
2244 2251 transaction.add(self._indexfile, isize)
2245 2252 try:
2246 2253 self._writinghandles = (ifh, dfh)
2247 2254 try:
2248 2255 yield
2249 2256 if self._docket is not None:
2250 2257 self._write_docket(transaction)
2251 2258 finally:
2252 2259 self._writinghandles = None
2253 2260 finally:
2254 2261 ifh.close()
2255 2262 finally:
2256 2263 if dfh is not None:
2257 2264 dfh.close()
2258 2265
2259 2266 def _write_docket(self, transaction):
2260 2267 """write the current docket on disk
2261 2268
2262 2269 Exist as a method to help changelog to implement transaction logic
2263 2270
2264 2271 We could also imagine using the same transaction logic for all revlog
2265 2272 since docket are cheap."""
2266 2273 self._docket.write(transaction)
2267 2274
2268 2275 def addrevision(
2269 2276 self,
2270 2277 text,
2271 2278 transaction,
2272 2279 link,
2273 2280 p1,
2274 2281 p2,
2275 2282 cachedelta=None,
2276 2283 node=None,
2277 2284 flags=REVIDX_DEFAULT_FLAGS,
2278 2285 deltacomputer=None,
2279 2286 sidedata=None,
2280 2287 ):
2281 2288 """add a revision to the log
2282 2289
2283 2290 text - the revision data to add
2284 2291 transaction - the transaction object used for rollback
2285 2292 link - the linkrev data to add
2286 2293 p1, p2 - the parent nodeids of the revision
2287 2294 cachedelta - an optional precomputed delta
2288 2295 node - nodeid of revision; typically node is not specified, and it is
2289 2296 computed by default as hash(text, p1, p2), however subclasses might
2290 2297 use different hashing method (and override checkhash() in such case)
2291 2298 flags - the known flags to set on the revision
2292 2299 deltacomputer - an optional deltacomputer instance shared between
2293 2300 multiple calls
2294 2301 """
2295 2302 if link == nullrev:
2296 2303 raise error.RevlogError(
2297 2304 _(b"attempted to add linkrev -1 to %s") % self.display_id
2298 2305 )
2299 2306
2300 2307 if sidedata is None:
2301 2308 sidedata = {}
2302 2309 elif sidedata and not self.hassidedata:
2303 2310 raise error.ProgrammingError(
2304 2311 _(b"trying to add sidedata to a revlog who don't support them")
2305 2312 )
2306 2313
2307 2314 if flags:
2308 2315 node = node or self.hash(text, p1, p2)
2309 2316
2310 2317 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2311 2318
2312 2319 # If the flag processor modifies the revision data, ignore any provided
2313 2320 # cachedelta.
2314 2321 if rawtext != text:
2315 2322 cachedelta = None
2316 2323
2317 2324 if len(rawtext) > _maxentrysize:
2318 2325 raise error.RevlogError(
2319 2326 _(
2320 2327 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2321 2328 )
2322 2329 % (self.display_id, len(rawtext))
2323 2330 )
2324 2331
2325 2332 node = node or self.hash(rawtext, p1, p2)
2326 2333 rev = self.index.get_rev(node)
2327 2334 if rev is not None:
2328 2335 return rev
2329 2336
2330 2337 if validatehash:
2331 2338 self.checkhash(rawtext, node, p1=p1, p2=p2)
2332 2339
2333 2340 return self.addrawrevision(
2334 2341 rawtext,
2335 2342 transaction,
2336 2343 link,
2337 2344 p1,
2338 2345 p2,
2339 2346 node,
2340 2347 flags,
2341 2348 cachedelta=cachedelta,
2342 2349 deltacomputer=deltacomputer,
2343 2350 sidedata=sidedata,
2344 2351 )
2345 2352
2346 2353 def addrawrevision(
2347 2354 self,
2348 2355 rawtext,
2349 2356 transaction,
2350 2357 link,
2351 2358 p1,
2352 2359 p2,
2353 2360 node,
2354 2361 flags,
2355 2362 cachedelta=None,
2356 2363 deltacomputer=None,
2357 2364 sidedata=None,
2358 2365 ):
2359 2366 """add a raw revision with known flags, node and parents
2360 2367 useful when reusing a revision not stored in this revlog (ex: received
2361 2368 over wire, or read from an external bundle).
2362 2369 """
2363 2370 with self._writing(transaction):
2364 2371 return self._addrevision(
2365 2372 node,
2366 2373 rawtext,
2367 2374 transaction,
2368 2375 link,
2369 2376 p1,
2370 2377 p2,
2371 2378 flags,
2372 2379 cachedelta,
2373 2380 deltacomputer=deltacomputer,
2374 2381 sidedata=sidedata,
2375 2382 )
2376 2383
2377 2384 def compress(self, data):
2378 2385 """Generate a possibly-compressed representation of data."""
2379 2386 if not data:
2380 2387 return b'', data
2381 2388
2382 2389 compressed = self._compressor.compress(data)
2383 2390
2384 2391 if compressed:
2385 2392 # The revlog compressor added the header in the returned data.
2386 2393 return b'', compressed
2387 2394
2388 2395 if data[0:1] == b'\0':
2389 2396 return b'', data
2390 2397 return b'u', data
2391 2398
2392 2399 def decompress(self, data):
2393 2400 """Decompress a revlog chunk.
2394 2401
2395 2402 The chunk is expected to begin with a header identifying the
2396 2403 format type so it can be routed to an appropriate decompressor.
2397 2404 """
2398 2405 if not data:
2399 2406 return data
2400 2407
2401 2408 # Revlogs are read much more frequently than they are written and many
2402 2409 # chunks only take microseconds to decompress, so performance is
2403 2410 # important here.
2404 2411 #
2405 2412 # We can make a few assumptions about revlogs:
2406 2413 #
2407 2414 # 1) the majority of chunks will be compressed (as opposed to inline
2408 2415 # raw data).
2409 2416 # 2) decompressing *any* data will likely by at least 10x slower than
2410 2417 # returning raw inline data.
2411 2418 # 3) we want to prioritize common and officially supported compression
2412 2419 # engines
2413 2420 #
2414 2421 # It follows that we want to optimize for "decompress compressed data
2415 2422 # when encoded with common and officially supported compression engines"
2416 2423 # case over "raw data" and "data encoded by less common or non-official
2417 2424 # compression engines." That is why we have the inline lookup first
2418 2425 # followed by the compengines lookup.
2419 2426 #
2420 2427 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2421 2428 # compressed chunks. And this matters for changelog and manifest reads.
2422 2429 t = data[0:1]
2423 2430
2424 2431 if t == b'x':
2425 2432 try:
2426 2433 return _zlibdecompress(data)
2427 2434 except zlib.error as e:
2428 2435 raise error.RevlogError(
2429 2436 _(b'revlog decompress error: %s')
2430 2437 % stringutil.forcebytestr(e)
2431 2438 )
2432 2439 # '\0' is more common than 'u' so it goes first.
2433 2440 elif t == b'\0':
2434 2441 return data
2435 2442 elif t == b'u':
2436 2443 return util.buffer(data, 1)
2437 2444
2438 2445 compressor = self._get_decompressor(t)
2439 2446
2440 2447 return compressor.decompress(data)
2441 2448
2442 2449 def _addrevision(
2443 2450 self,
2444 2451 node,
2445 2452 rawtext,
2446 2453 transaction,
2447 2454 link,
2448 2455 p1,
2449 2456 p2,
2450 2457 flags,
2451 2458 cachedelta,
2452 2459 alwayscache=False,
2453 2460 deltacomputer=None,
2454 2461 sidedata=None,
2455 2462 ):
2456 2463 """internal function to add revisions to the log
2457 2464
2458 2465 see addrevision for argument descriptions.
2459 2466
2460 2467 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2461 2468
2462 2469 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2463 2470 be used.
2464 2471
2465 2472 invariants:
2466 2473 - rawtext is optional (can be None); if not set, cachedelta must be set.
2467 2474 if both are set, they must correspond to each other.
2468 2475 """
2469 2476 if node == self.nullid:
2470 2477 raise error.RevlogError(
2471 2478 _(b"%s: attempt to add null revision") % self.display_id
2472 2479 )
2473 2480 if (
2474 2481 node == self.nodeconstants.wdirid
2475 2482 or node in self.nodeconstants.wdirfilenodeids
2476 2483 ):
2477 2484 raise error.RevlogError(
2478 2485 _(b"%s: attempt to add wdir revision") % self.display_id
2479 2486 )
2480 2487 if self._writinghandles is None:
2481 2488 msg = b'adding revision outside `revlog._writing` context'
2482 2489 raise error.ProgrammingError(msg)
2483 2490
2484 2491 if self._inline:
2485 2492 fh = self._writinghandles[0]
2486 2493 else:
2487 2494 fh = self._writinghandles[1]
2488 2495
2489 2496 btext = [rawtext]
2490 2497
2491 2498 curr = len(self)
2492 2499 prev = curr - 1
2493 2500
2494 2501 offset = self._get_data_offset(prev)
2495 2502
2496 2503 if self._concurrencychecker:
2497 2504 ifh, dfh = self._writinghandles
2498 2505 if self._inline:
2499 2506 # offset is "as if" it were in the .d file, so we need to add on
2500 2507 # the size of the entry metadata.
2501 2508 self._concurrencychecker(
2502 2509 ifh, self._indexfile, offset + curr * self.index.entry_size
2503 2510 )
2504 2511 else:
2505 2512 # Entries in the .i are a consistent size.
2506 2513 self._concurrencychecker(
2507 2514 ifh, self._indexfile, curr * self.index.entry_size
2508 2515 )
2509 2516 self._concurrencychecker(dfh, self._datafile, offset)
2510 2517
2511 2518 p1r, p2r = self.rev(p1), self.rev(p2)
2512 2519
2513 2520 # full versions are inserted when the needed deltas
2514 2521 # become comparable to the uncompressed text
2515 2522 if rawtext is None:
2516 2523 # need rawtext size, before changed by flag processors, which is
2517 2524 # the non-raw size. use revlog explicitly to avoid filelog's extra
2518 2525 # logic that might remove metadata size.
2519 2526 textlen = mdiff.patchedsize(
2520 2527 revlog.size(self, cachedelta[0]), cachedelta[1]
2521 2528 )
2522 2529 else:
2523 2530 textlen = len(rawtext)
2524 2531
2525 2532 if deltacomputer is None:
2526 2533 deltacomputer = deltautil.deltacomputer(self)
2527 2534
2528 2535 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2529 2536
2530 2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2531 2538
2532 2539 compression_mode = COMP_MODE_INLINE
2533 2540 if self._docket is not None:
2534 2541 h, d = deltainfo.data
2535 2542 if not h and not d:
2536 2543 # not data to store at all... declare them uncompressed
2537 2544 compression_mode = COMP_MODE_PLAIN
2538 2545 elif not h:
2539 2546 t = d[0:1]
2540 2547 if t == b'\0':
2541 2548 compression_mode = COMP_MODE_PLAIN
2542 2549 elif t == self._docket.default_compression_header:
2543 2550 compression_mode = COMP_MODE_DEFAULT
2544 2551 elif h == b'u':
2545 2552 # we have a more efficient way to declare uncompressed
2546 2553 h = b''
2547 2554 compression_mode = COMP_MODE_PLAIN
2548 2555 deltainfo = deltautil.drop_u_compression(deltainfo)
2549 2556
2550 2557 sidedata_compression_mode = COMP_MODE_INLINE
2551 2558 if sidedata and self.hassidedata:
2552 2559 sidedata_compression_mode = COMP_MODE_PLAIN
2553 2560 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2554 2561 sidedata_offset = offset + deltainfo.deltalen
2555 2562 h, comp_sidedata = self.compress(serialized_sidedata)
2556 2563 if (
2557 2564 h != b'u'
2558 2565 and comp_sidedata[0:1] != b'\0'
2559 2566 and len(comp_sidedata) < len(serialized_sidedata)
2560 2567 ):
2561 2568 assert not h
2562 2569 if (
2563 2570 comp_sidedata[0:1]
2564 2571 == self._docket.default_compression_header
2565 2572 ):
2566 2573 sidedata_compression_mode = COMP_MODE_DEFAULT
2567 2574 serialized_sidedata = comp_sidedata
2568 2575 else:
2569 2576 sidedata_compression_mode = COMP_MODE_INLINE
2570 2577 serialized_sidedata = comp_sidedata
2571 2578 else:
2572 2579 serialized_sidedata = b""
2573 2580 # Don't store the offset if the sidedata is empty, that way
2574 2581 # we can easily detect empty sidedata and they will be no different
2575 2582 # than ones we manually add.
2576 2583 sidedata_offset = 0
2577 2584
2578 2585 e = (
2579 2586 offset_type(offset, flags),
2580 2587 deltainfo.deltalen,
2581 2588 textlen,
2582 2589 deltainfo.base,
2583 2590 link,
2584 2591 p1r,
2585 2592 p2r,
2586 2593 node,
2587 2594 sidedata_offset,
2588 2595 len(serialized_sidedata),
2589 2596 compression_mode,
2590 2597 sidedata_compression_mode,
2591 2598 )
2592 2599
2593 2600 self.index.append(e)
2594 2601 entry = self.index.entry_binary(curr)
2595 2602 if curr == 0 and self._docket is None:
2596 2603 header = self._format_flags | self._format_version
2597 2604 header = self.index.pack_header(header)
2598 2605 entry = header + entry
2599 2606 self._writeentry(
2600 2607 transaction,
2601 2608 entry,
2602 2609 deltainfo.data,
2603 2610 link,
2604 2611 offset,
2605 2612 serialized_sidedata,
2606 2613 )
2607 2614
2608 2615 rawtext = btext[0]
2609 2616
2610 2617 if alwayscache and rawtext is None:
2611 2618 rawtext = deltacomputer.buildtext(revinfo, fh)
2612 2619
2613 2620 if type(rawtext) == bytes: # only accept immutable objects
2614 2621 self._revisioncache = (node, curr, rawtext)
2615 2622 self._chainbasecache[curr] = deltainfo.chainbase
2616 2623 return curr
2617 2624
2618 2625 def _get_data_offset(self, prev):
2619 2626 """Returns the current offset in the (in-transaction) data file.
2620 2627 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2621 2628 file to store that information: since sidedata can be rewritten to the
2622 2629 end of the data file within a transaction, you can have cases where, for
2623 2630 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2624 2631 to `n - 1`'s sidedata being written after `n`'s data.
2625 2632
2626 2633 TODO cache this in a docket file before getting out of experimental."""
2627 2634 if self._docket is None:
2628 2635 return self.end(prev)
2629 2636 else:
2630 2637 return self._docket.data_end
2631 2638
2632 2639 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2633 2640 # Files opened in a+ mode have inconsistent behavior on various
2634 2641 # platforms. Windows requires that a file positioning call be made
2635 2642 # when the file handle transitions between reads and writes. See
2636 2643 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2637 2644 # platforms, Python or the platform itself can be buggy. Some versions
2638 2645 # of Solaris have been observed to not append at the end of the file
2639 2646 # if the file was seeked to before the end. See issue4943 for more.
2640 2647 #
2641 2648 # We work around this issue by inserting a seek() before writing.
2642 2649 # Note: This is likely not necessary on Python 3. However, because
2643 2650 # the file handle is reused for reads and may be seeked there, we need
2644 2651 # to be careful before changing this.
2645 2652 if self._writinghandles is None:
2646 2653 msg = b'adding revision outside `revlog._writing` context'
2647 2654 raise error.ProgrammingError(msg)
2648 2655 ifh, dfh = self._writinghandles
2649 2656 if self._docket is None:
2650 2657 ifh.seek(0, os.SEEK_END)
2651 2658 else:
2652 2659 ifh.seek(self._docket.index_end, os.SEEK_SET)
2653 2660 if dfh:
2654 2661 if self._docket is None:
2655 2662 dfh.seek(0, os.SEEK_END)
2656 2663 else:
2657 2664 dfh.seek(self._docket.data_end, os.SEEK_SET)
2658 2665
2659 2666 curr = len(self) - 1
2660 2667 if not self._inline:
2661 2668 transaction.add(self._datafile, offset)
2662 2669 transaction.add(self._indexfile, curr * len(entry))
2663 2670 if data[0]:
2664 2671 dfh.write(data[0])
2665 2672 dfh.write(data[1])
2666 2673 if sidedata:
2667 2674 dfh.write(sidedata)
2668 2675 ifh.write(entry)
2669 2676 else:
2670 2677 offset += curr * self.index.entry_size
2671 2678 transaction.add(self._indexfile, offset)
2672 2679 ifh.write(entry)
2673 2680 ifh.write(data[0])
2674 2681 ifh.write(data[1])
2675 2682 if sidedata:
2676 2683 ifh.write(sidedata)
2677 2684 self._enforceinlinesize(transaction)
2678 2685 if self._docket is not None:
2679 2686 self._docket.index_end = self._writinghandles[0].tell()
2680 2687 self._docket.data_end = self._writinghandles[1].tell()
2681 2688
2682 2689 nodemaputil.setup_persistent_nodemap(transaction, self)
2683 2690
2684 2691 def addgroup(
2685 2692 self,
2686 2693 deltas,
2687 2694 linkmapper,
2688 2695 transaction,
2689 2696 alwayscache=False,
2690 2697 addrevisioncb=None,
2691 2698 duplicaterevisioncb=None,
2692 2699 ):
2693 2700 """
2694 2701 add a delta group
2695 2702
2696 2703 given a set of deltas, add them to the revision log. the
2697 2704 first delta is against its parent, which should be in our
2698 2705 log, the rest are against the previous delta.
2699 2706
2700 2707 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 2708 this revlog and the node that was added.
2702 2709 """
2703 2710
2704 2711 if self._adding_group:
2705 2712 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706 2713
2707 2714 self._adding_group = True
2708 2715 empty = True
2709 2716 try:
2710 2717 with self._writing(transaction):
2711 2718 deltacomputer = deltautil.deltacomputer(self)
2712 2719 # loop through our set of deltas
2713 2720 for data in deltas:
2714 2721 (
2715 2722 node,
2716 2723 p1,
2717 2724 p2,
2718 2725 linknode,
2719 2726 deltabase,
2720 2727 delta,
2721 2728 flags,
2722 2729 sidedata,
2723 2730 ) = data
2724 2731 link = linkmapper(linknode)
2725 2732 flags = flags or REVIDX_DEFAULT_FLAGS
2726 2733
2727 2734 rev = self.index.get_rev(node)
2728 2735 if rev is not None:
2729 2736 # this can happen if two branches make the same change
2730 2737 self._nodeduplicatecallback(transaction, rev)
2731 2738 if duplicaterevisioncb:
2732 2739 duplicaterevisioncb(self, rev)
2733 2740 empty = False
2734 2741 continue
2735 2742
2736 2743 for p in (p1, p2):
2737 2744 if not self.index.has_node(p):
2738 2745 raise error.LookupError(
2739 2746 p, self.radix, _(b'unknown parent')
2740 2747 )
2741 2748
2742 2749 if not self.index.has_node(deltabase):
2743 2750 raise error.LookupError(
2744 2751 deltabase, self.display_id, _(b'unknown delta base')
2745 2752 )
2746 2753
2747 2754 baserev = self.rev(deltabase)
2748 2755
2749 2756 if baserev != nullrev and self.iscensored(baserev):
2750 2757 # if base is censored, delta must be full replacement in a
2751 2758 # single patch operation
2752 2759 hlen = struct.calcsize(b">lll")
2753 2760 oldlen = self.rawsize(baserev)
2754 2761 newlen = len(delta) - hlen
2755 2762 if delta[:hlen] != mdiff.replacediffheader(
2756 2763 oldlen, newlen
2757 2764 ):
2758 2765 raise error.CensoredBaseError(
2759 2766 self.display_id, self.node(baserev)
2760 2767 )
2761 2768
2762 2769 if not flags and self._peek_iscensored(baserev, delta):
2763 2770 flags |= REVIDX_ISCENSORED
2764 2771
2765 2772 # We assume consumers of addrevisioncb will want to retrieve
2766 2773 # the added revision, which will require a call to
2767 2774 # revision(). revision() will fast path if there is a cache
2768 2775 # hit. So, we tell _addrevision() to always cache in this case.
2769 2776 # We're only using addgroup() in the context of changegroup
2770 2777 # generation so the revision data can always be handled as raw
2771 2778 # by the flagprocessor.
2772 2779 rev = self._addrevision(
2773 2780 node,
2774 2781 None,
2775 2782 transaction,
2776 2783 link,
2777 2784 p1,
2778 2785 p2,
2779 2786 flags,
2780 2787 (baserev, delta),
2781 2788 alwayscache=alwayscache,
2782 2789 deltacomputer=deltacomputer,
2783 2790 sidedata=sidedata,
2784 2791 )
2785 2792
2786 2793 if addrevisioncb:
2787 2794 addrevisioncb(self, rev)
2788 2795 empty = False
2789 2796 finally:
2790 2797 self._adding_group = False
2791 2798 return not empty
2792 2799
2793 2800 def iscensored(self, rev):
2794 2801 """Check if a file revision is censored."""
2795 2802 if not self._censorable:
2796 2803 return False
2797 2804
2798 2805 return self.flags(rev) & REVIDX_ISCENSORED
2799 2806
2800 2807 def _peek_iscensored(self, baserev, delta):
2801 2808 """Quickly check if a delta produces a censored revision."""
2802 2809 if not self._censorable:
2803 2810 return False
2804 2811
2805 2812 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2806 2813
2807 2814 def getstrippoint(self, minlink):
2808 2815 """find the minimum rev that must be stripped to strip the linkrev
2809 2816
2810 2817 Returns a tuple containing the minimum rev and a set of all revs that
2811 2818 have linkrevs that will be broken by this strip.
2812 2819 """
2813 2820 return storageutil.resolvestripinfo(
2814 2821 minlink,
2815 2822 len(self) - 1,
2816 2823 self.headrevs(),
2817 2824 self.linkrev,
2818 2825 self.parentrevs,
2819 2826 )
2820 2827
2821 2828 def strip(self, minlink, transaction):
2822 2829 """truncate the revlog on the first revision with a linkrev >= minlink
2823 2830
2824 2831 This function is called when we're stripping revision minlink and
2825 2832 its descendants from the repository.
2826 2833
2827 2834 We have to remove all revisions with linkrev >= minlink, because
2828 2835 the equivalent changelog revisions will be renumbered after the
2829 2836 strip.
2830 2837
2831 2838 So we truncate the revlog on the first of these revisions, and
2832 2839 trust that the caller has saved the revisions that shouldn't be
2833 2840 removed and that it'll re-add them after this truncation.
2834 2841 """
2835 2842 if len(self) == 0:
2836 2843 return
2837 2844
2838 2845 rev, _ = self.getstrippoint(minlink)
2839 2846 if rev == len(self):
2840 2847 return
2841 2848
2842 2849 # first truncate the files on disk
2843 2850 data_end = self.start(rev)
2844 2851 if not self._inline:
2845 2852 transaction.add(self._datafile, data_end)
2846 2853 end = rev * self.index.entry_size
2847 2854 else:
2848 2855 end = data_end + (rev * self.index.entry_size)
2849 2856
2850 2857 transaction.add(self._indexfile, end)
2851 2858 if self._docket is not None:
2852 2859 # XXX we could, leverage the docket while stripping. However it is
2853 2860 # not powerfull enough at the time of this comment
2854 2861 self._docket.index_end = end
2855 2862 self._docket.data_end = data_end
2856 2863 self._docket.write(transaction, stripping=True)
2857 2864
2858 2865 # then reset internal state in memory to forget those revisions
2859 2866 self._revisioncache = None
2860 2867 self._chaininfocache = util.lrucachedict(500)
2861 2868 self._chunkclear()
2862 2869
2863 2870 del self.index[rev:-1]
2864 2871
2865 2872 def checksize(self):
2866 2873 """Check size of index and data files
2867 2874
2868 2875 return a (dd, di) tuple.
2869 2876 - dd: extra bytes for the "data" file
2870 2877 - di: extra bytes for the "index" file
2871 2878
2872 2879 A healthy revlog will return (0, 0).
2873 2880 """
2874 2881 expected = 0
2875 2882 if len(self):
2876 2883 expected = max(0, self.end(len(self) - 1))
2877 2884
2878 2885 try:
2879 2886 with self._datafp() as f:
2880 2887 f.seek(0, io.SEEK_END)
2881 2888 actual = f.tell()
2882 2889 dd = actual - expected
2883 2890 except IOError as inst:
2884 2891 if inst.errno != errno.ENOENT:
2885 2892 raise
2886 2893 dd = 0
2887 2894
2888 2895 try:
2889 2896 f = self.opener(self._indexfile)
2890 2897 f.seek(0, io.SEEK_END)
2891 2898 actual = f.tell()
2892 2899 f.close()
2893 2900 s = self.index.entry_size
2894 2901 i = max(0, actual // s)
2895 2902 di = actual - (i * s)
2896 2903 if self._inline:
2897 2904 databytes = 0
2898 2905 for r in self:
2899 2906 databytes += max(0, self.length(r))
2900 2907 dd = 0
2901 2908 di = actual - len(self) * s - databytes
2902 2909 except IOError as inst:
2903 2910 if inst.errno != errno.ENOENT:
2904 2911 raise
2905 2912 di = 0
2906 2913
2907 2914 return (dd, di)
2908 2915
2909 2916 def files(self):
2910 2917 res = [self._indexfile]
2911 2918 if not self._inline:
2912 2919 res.append(self._datafile)
2913 2920 return res
2914 2921
2915 2922 def emitrevisions(
2916 2923 self,
2917 2924 nodes,
2918 2925 nodesorder=None,
2919 2926 revisiondata=False,
2920 2927 assumehaveparentrevisions=False,
2921 2928 deltamode=repository.CG_DELTAMODE_STD,
2922 2929 sidedata_helpers=None,
2923 2930 ):
2924 2931 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2925 2932 raise error.ProgrammingError(
2926 2933 b'unhandled value for nodesorder: %s' % nodesorder
2927 2934 )
2928 2935
2929 2936 if nodesorder is None and not self._generaldelta:
2930 2937 nodesorder = b'storage'
2931 2938
2932 2939 if (
2933 2940 not self._storedeltachains
2934 2941 and deltamode != repository.CG_DELTAMODE_PREV
2935 2942 ):
2936 2943 deltamode = repository.CG_DELTAMODE_FULL
2937 2944
2938 2945 return storageutil.emitrevisions(
2939 2946 self,
2940 2947 nodes,
2941 2948 nodesorder,
2942 2949 revlogrevisiondelta,
2943 2950 deltaparentfn=self.deltaparent,
2944 2951 candeltafn=self.candelta,
2945 2952 rawsizefn=self.rawsize,
2946 2953 revdifffn=self.revdiff,
2947 2954 flagsfn=self.flags,
2948 2955 deltamode=deltamode,
2949 2956 revisiondata=revisiondata,
2950 2957 assumehaveparentrevisions=assumehaveparentrevisions,
2951 2958 sidedata_helpers=sidedata_helpers,
2952 2959 )
2953 2960
2954 2961 DELTAREUSEALWAYS = b'always'
2955 2962 DELTAREUSESAMEREVS = b'samerevs'
2956 2963 DELTAREUSENEVER = b'never'
2957 2964
2958 2965 DELTAREUSEFULLADD = b'fulladd'
2959 2966
2960 2967 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2961 2968
2962 2969 def clone(
2963 2970 self,
2964 2971 tr,
2965 2972 destrevlog,
2966 2973 addrevisioncb=None,
2967 2974 deltareuse=DELTAREUSESAMEREVS,
2968 2975 forcedeltabothparents=None,
2969 2976 sidedata_helpers=None,
2970 2977 ):
2971 2978 """Copy this revlog to another, possibly with format changes.
2972 2979
2973 2980 The destination revlog will contain the same revisions and nodes.
2974 2981 However, it may not be bit-for-bit identical due to e.g. delta encoding
2975 2982 differences.
2976 2983
2977 2984 The ``deltareuse`` argument control how deltas from the existing revlog
2978 2985 are preserved in the destination revlog. The argument can have the
2979 2986 following values:
2980 2987
2981 2988 DELTAREUSEALWAYS
2982 2989 Deltas will always be reused (if possible), even if the destination
2983 2990 revlog would not select the same revisions for the delta. This is the
2984 2991 fastest mode of operation.
2985 2992 DELTAREUSESAMEREVS
2986 2993 Deltas will be reused if the destination revlog would pick the same
2987 2994 revisions for the delta. This mode strikes a balance between speed
2988 2995 and optimization.
2989 2996 DELTAREUSENEVER
2990 2997 Deltas will never be reused. This is the slowest mode of execution.
2991 2998 This mode can be used to recompute deltas (e.g. if the diff/delta
2992 2999 algorithm changes).
2993 3000 DELTAREUSEFULLADD
2994 3001 Revision will be re-added as if their were new content. This is
2995 3002 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2996 3003 eg: large file detection and handling.
2997 3004
2998 3005 Delta computation can be slow, so the choice of delta reuse policy can
2999 3006 significantly affect run time.
3000 3007
3001 3008 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3002 3009 two extremes. Deltas will be reused if they are appropriate. But if the
3003 3010 delta could choose a better revision, it will do so. This means if you
3004 3011 are converting a non-generaldelta revlog to a generaldelta revlog,
3005 3012 deltas will be recomputed if the delta's parent isn't a parent of the
3006 3013 revision.
3007 3014
3008 3015 In addition to the delta policy, the ``forcedeltabothparents``
3009 3016 argument controls whether to force compute deltas against both parents
3010 3017 for merges. By default, the current default is used.
3011 3018
3012 3019 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3013 3020 `sidedata_helpers`.
3014 3021 """
3015 3022 if deltareuse not in self.DELTAREUSEALL:
3016 3023 raise ValueError(
3017 3024 _(b'value for deltareuse invalid: %s') % deltareuse
3018 3025 )
3019 3026
3020 3027 if len(destrevlog):
3021 3028 raise ValueError(_(b'destination revlog is not empty'))
3022 3029
3023 3030 if getattr(self, 'filteredrevs', None):
3024 3031 raise ValueError(_(b'source revlog has filtered revisions'))
3025 3032 if getattr(destrevlog, 'filteredrevs', None):
3026 3033 raise ValueError(_(b'destination revlog has filtered revisions'))
3027 3034
3028 3035 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3029 3036 # if possible.
3030 3037 oldlazydelta = destrevlog._lazydelta
3031 3038 oldlazydeltabase = destrevlog._lazydeltabase
3032 3039 oldamd = destrevlog._deltabothparents
3033 3040
3034 3041 try:
3035 3042 if deltareuse == self.DELTAREUSEALWAYS:
3036 3043 destrevlog._lazydeltabase = True
3037 3044 destrevlog._lazydelta = True
3038 3045 elif deltareuse == self.DELTAREUSESAMEREVS:
3039 3046 destrevlog._lazydeltabase = False
3040 3047 destrevlog._lazydelta = True
3041 3048 elif deltareuse == self.DELTAREUSENEVER:
3042 3049 destrevlog._lazydeltabase = False
3043 3050 destrevlog._lazydelta = False
3044 3051
3045 3052 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3046 3053
3047 3054 self._clone(
3048 3055 tr,
3049 3056 destrevlog,
3050 3057 addrevisioncb,
3051 3058 deltareuse,
3052 3059 forcedeltabothparents,
3053 3060 sidedata_helpers,
3054 3061 )
3055 3062
3056 3063 finally:
3057 3064 destrevlog._lazydelta = oldlazydelta
3058 3065 destrevlog._lazydeltabase = oldlazydeltabase
3059 3066 destrevlog._deltabothparents = oldamd
3060 3067
3061 3068 def _clone(
3062 3069 self,
3063 3070 tr,
3064 3071 destrevlog,
3065 3072 addrevisioncb,
3066 3073 deltareuse,
3067 3074 forcedeltabothparents,
3068 3075 sidedata_helpers,
3069 3076 ):
3070 3077 """perform the core duty of `revlog.clone` after parameter processing"""
3071 3078 deltacomputer = deltautil.deltacomputer(destrevlog)
3072 3079 index = self.index
3073 3080 for rev in self:
3074 3081 entry = index[rev]
3075 3082
3076 3083 # Some classes override linkrev to take filtered revs into
3077 3084 # account. Use raw entry from index.
3078 3085 flags = entry[0] & 0xFFFF
3079 3086 linkrev = entry[4]
3080 3087 p1 = index[entry[5]][7]
3081 3088 p2 = index[entry[6]][7]
3082 3089 node = entry[7]
3083 3090
3084 3091 # (Possibly) reuse the delta from the revlog if allowed and
3085 3092 # the revlog chunk is a delta.
3086 3093 cachedelta = None
3087 3094 rawtext = None
3088 3095 if deltareuse == self.DELTAREUSEFULLADD:
3089 3096 text, sidedata = self._revisiondata(rev)
3090 3097
3091 3098 if sidedata_helpers is not None:
3092 3099 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3093 3100 self, sidedata_helpers, sidedata, rev
3094 3101 )
3095 3102 flags = flags | new_flags[0] & ~new_flags[1]
3096 3103
3097 3104 destrevlog.addrevision(
3098 3105 text,
3099 3106 tr,
3100 3107 linkrev,
3101 3108 p1,
3102 3109 p2,
3103 3110 cachedelta=cachedelta,
3104 3111 node=node,
3105 3112 flags=flags,
3106 3113 deltacomputer=deltacomputer,
3107 3114 sidedata=sidedata,
3108 3115 )
3109 3116 else:
3110 3117 if destrevlog._lazydelta:
3111 3118 dp = self.deltaparent(rev)
3112 3119 if dp != nullrev:
3113 3120 cachedelta = (dp, bytes(self._chunk(rev)))
3114 3121
3115 3122 sidedata = None
3116 3123 if not cachedelta:
3117 3124 rawtext, sidedata = self._revisiondata(rev)
3118 3125 if sidedata is None:
3119 3126 sidedata = self.sidedata(rev)
3120 3127
3121 3128 if sidedata_helpers is not None:
3122 3129 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3123 3130 self, sidedata_helpers, sidedata, rev
3124 3131 )
3125 3132 flags = flags | new_flags[0] & ~new_flags[1]
3126 3133
3127 3134 with destrevlog._writing(tr):
3128 3135 destrevlog._addrevision(
3129 3136 node,
3130 3137 rawtext,
3131 3138 tr,
3132 3139 linkrev,
3133 3140 p1,
3134 3141 p2,
3135 3142 flags,
3136 3143 cachedelta,
3137 3144 deltacomputer=deltacomputer,
3138 3145 sidedata=sidedata,
3139 3146 )
3140 3147
3141 3148 if addrevisioncb:
3142 3149 addrevisioncb(self, rev, node)
3143 3150
3144 3151 def censorrevision(self, tr, censornode, tombstone=b''):
3145 3152 if self._format_version == REVLOGV0:
3146 3153 raise error.RevlogError(
3147 3154 _(b'cannot censor with version %d revlogs')
3148 3155 % self._format_version
3149 3156 )
3150 3157
3151 3158 censorrev = self.rev(censornode)
3152 3159 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3153 3160
3154 3161 if len(tombstone) > self.rawsize(censorrev):
3155 3162 raise error.Abort(
3156 3163 _(b'censor tombstone must be no longer than censored data')
3157 3164 )
3158 3165
3159 3166 # Rewriting the revlog in place is hard. Our strategy for censoring is
3160 3167 # to create a new revlog, copy all revisions to it, then replace the
3161 3168 # revlogs on transaction close.
3162 3169 #
3163 3170 # This is a bit dangerous. We could easily have a mismatch of state.
3164 3171 newrl = revlog(
3165 3172 self.opener,
3166 3173 target=self.target,
3167 3174 radix=self.radix,
3168 3175 postfix=b'tmpcensored',
3169 3176 censorable=True,
3170 3177 )
3171 3178 newrl._format_version = self._format_version
3172 3179 newrl._format_flags = self._format_flags
3173 3180 newrl._generaldelta = self._generaldelta
3174 3181 newrl._parse_index = self._parse_index
3175 3182
3176 3183 for rev in self.revs():
3177 3184 node = self.node(rev)
3178 3185 p1, p2 = self.parents(node)
3179 3186
3180 3187 if rev == censorrev:
3181 3188 newrl.addrawrevision(
3182 3189 tombstone,
3183 3190 tr,
3184 3191 self.linkrev(censorrev),
3185 3192 p1,
3186 3193 p2,
3187 3194 censornode,
3188 3195 REVIDX_ISCENSORED,
3189 3196 )
3190 3197
3191 3198 if newrl.deltaparent(rev) != nullrev:
3192 3199 raise error.Abort(
3193 3200 _(
3194 3201 b'censored revision stored as delta; '
3195 3202 b'cannot censor'
3196 3203 ),
3197 3204 hint=_(
3198 3205 b'censoring of revlogs is not '
3199 3206 b'fully implemented; please report '
3200 3207 b'this bug'
3201 3208 ),
3202 3209 )
3203 3210 continue
3204 3211
3205 3212 if self.iscensored(rev):
3206 3213 if self.deltaparent(rev) != nullrev:
3207 3214 raise error.Abort(
3208 3215 _(
3209 3216 b'cannot censor due to censored '
3210 3217 b'revision having delta stored'
3211 3218 )
3212 3219 )
3213 3220 rawtext = self._chunk(rev)
3214 3221 else:
3215 3222 rawtext = self.rawdata(rev)
3216 3223
3217 3224 newrl.addrawrevision(
3218 3225 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3219 3226 )
3220 3227
3221 3228 tr.addbackup(self._indexfile, location=b'store')
3222 3229 if not self._inline:
3223 3230 tr.addbackup(self._datafile, location=b'store')
3224 3231
3225 3232 self.opener.rename(newrl._indexfile, self._indexfile)
3226 3233 if not self._inline:
3227 3234 self.opener.rename(newrl._datafile, self._datafile)
3228 3235
3229 3236 self.clearcaches()
3230 3237 self._loadindex()
3231 3238
3232 3239 def verifyintegrity(self, state):
3233 3240 """Verifies the integrity of the revlog.
3234 3241
3235 3242 Yields ``revlogproblem`` instances describing problems that are
3236 3243 found.
3237 3244 """
3238 3245 dd, di = self.checksize()
3239 3246 if dd:
3240 3247 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3241 3248 if di:
3242 3249 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3243 3250
3244 3251 version = self._format_version
3245 3252
3246 3253 # The verifier tells us what version revlog we should be.
3247 3254 if version != state[b'expectedversion']:
3248 3255 yield revlogproblem(
3249 3256 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3250 3257 % (self.display_id, version, state[b'expectedversion'])
3251 3258 )
3252 3259
3253 3260 state[b'skipread'] = set()
3254 3261 state[b'safe_renamed'] = set()
3255 3262
3256 3263 for rev in self:
3257 3264 node = self.node(rev)
3258 3265
3259 3266 # Verify contents. 4 cases to care about:
3260 3267 #
3261 3268 # common: the most common case
3262 3269 # rename: with a rename
3263 3270 # meta: file content starts with b'\1\n', the metadata
3264 3271 # header defined in filelog.py, but without a rename
3265 3272 # ext: content stored externally
3266 3273 #
3267 3274 # More formally, their differences are shown below:
3268 3275 #
3269 3276 # | common | rename | meta | ext
3270 3277 # -------------------------------------------------------
3271 3278 # flags() | 0 | 0 | 0 | not 0
3272 3279 # renamed() | False | True | False | ?
3273 3280 # rawtext[0:2]=='\1\n'| False | True | True | ?
3274 3281 #
3275 3282 # "rawtext" means the raw text stored in revlog data, which
3276 3283 # could be retrieved by "rawdata(rev)". "text"
3277 3284 # mentioned below is "revision(rev)".
3278 3285 #
3279 3286 # There are 3 different lengths stored physically:
3280 3287 # 1. L1: rawsize, stored in revlog index
3281 3288 # 2. L2: len(rawtext), stored in revlog data
3282 3289 # 3. L3: len(text), stored in revlog data if flags==0, or
3283 3290 # possibly somewhere else if flags!=0
3284 3291 #
3285 3292 # L1 should be equal to L2. L3 could be different from them.
3286 3293 # "text" may or may not affect commit hash depending on flag
3287 3294 # processors (see flagutil.addflagprocessor).
3288 3295 #
3289 3296 # | common | rename | meta | ext
3290 3297 # -------------------------------------------------
3291 3298 # rawsize() | L1 | L1 | L1 | L1
3292 3299 # size() | L1 | L2-LM | L1(*) | L1 (?)
3293 3300 # len(rawtext) | L2 | L2 | L2 | L2
3294 3301 # len(text) | L2 | L2 | L2 | L3
3295 3302 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3296 3303 #
3297 3304 # LM: length of metadata, depending on rawtext
3298 3305 # (*): not ideal, see comment in filelog.size
3299 3306 # (?): could be "- len(meta)" if the resolved content has
3300 3307 # rename metadata
3301 3308 #
3302 3309 # Checks needed to be done:
3303 3310 # 1. length check: L1 == L2, in all cases.
3304 3311 # 2. hash check: depending on flag processor, we may need to
3305 3312 # use either "text" (external), or "rawtext" (in revlog).
3306 3313
3307 3314 try:
3308 3315 skipflags = state.get(b'skipflags', 0)
3309 3316 if skipflags:
3310 3317 skipflags &= self.flags(rev)
3311 3318
3312 3319 _verify_revision(self, skipflags, state, node)
3313 3320
3314 3321 l1 = self.rawsize(rev)
3315 3322 l2 = len(self.rawdata(node))
3316 3323
3317 3324 if l1 != l2:
3318 3325 yield revlogproblem(
3319 3326 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3320 3327 node=node,
3321 3328 )
3322 3329
3323 3330 except error.CensoredNodeError:
3324 3331 if state[b'erroroncensored']:
3325 3332 yield revlogproblem(
3326 3333 error=_(b'censored file data'), node=node
3327 3334 )
3328 3335 state[b'skipread'].add(node)
3329 3336 except Exception as e:
3330 3337 yield revlogproblem(
3331 3338 error=_(b'unpacking %s: %s')
3332 3339 % (short(node), stringutil.forcebytestr(e)),
3333 3340 node=node,
3334 3341 )
3335 3342 state[b'skipread'].add(node)
3336 3343
3337 3344 def storageinfo(
3338 3345 self,
3339 3346 exclusivefiles=False,
3340 3347 sharedfiles=False,
3341 3348 revisionscount=False,
3342 3349 trackedsize=False,
3343 3350 storedsize=False,
3344 3351 ):
3345 3352 d = {}
3346 3353
3347 3354 if exclusivefiles:
3348 3355 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3349 3356 if not self._inline:
3350 3357 d[b'exclusivefiles'].append((self.opener, self._datafile))
3351 3358
3352 3359 if sharedfiles:
3353 3360 d[b'sharedfiles'] = []
3354 3361
3355 3362 if revisionscount:
3356 3363 d[b'revisionscount'] = len(self)
3357 3364
3358 3365 if trackedsize:
3359 3366 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3360 3367
3361 3368 if storedsize:
3362 3369 d[b'storedsize'] = sum(
3363 3370 self.opener.stat(path).st_size for path in self.files()
3364 3371 )
3365 3372
3366 3373 return d
3367 3374
3368 3375 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3369 3376 if not self.hassidedata:
3370 3377 return
3371 3378 # revlog formats with sidedata support does not support inline
3372 3379 assert not self._inline
3373 3380 if not helpers[1] and not helpers[2]:
3374 3381 # Nothing to generate or remove
3375 3382 return
3376 3383
3377 3384 new_entries = []
3378 3385 # append the new sidedata
3379 3386 with self._writing(transaction):
3380 3387 ifh, dfh = self._writinghandles
3381 3388 if self._docket is not None:
3382 3389 dfh.seek(self._docket.data_end, os.SEEK_SET)
3383 3390 else:
3384 3391 dfh.seek(0, os.SEEK_END)
3385 3392
3386 3393 current_offset = dfh.tell()
3387 3394 for rev in range(startrev, endrev + 1):
3388 3395 entry = self.index[rev]
3389 3396 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3390 3397 store=self,
3391 3398 sidedata_helpers=helpers,
3392 3399 sidedata={},
3393 3400 rev=rev,
3394 3401 )
3395 3402
3396 3403 serialized_sidedata = sidedatautil.serialize_sidedata(
3397 3404 new_sidedata
3398 3405 )
3399 3406
3400 3407 sidedata_compression_mode = COMP_MODE_INLINE
3401 3408 if serialized_sidedata and self.hassidedata:
3402 3409 sidedata_compression_mode = COMP_MODE_PLAIN
3403 3410 h, comp_sidedata = self.compress(serialized_sidedata)
3404 3411 if (
3405 3412 h != b'u'
3406 3413 and comp_sidedata[0] != b'\0'
3407 3414 and len(comp_sidedata) < len(serialized_sidedata)
3408 3415 ):
3409 3416 assert not h
3410 3417 if (
3411 3418 comp_sidedata[0]
3412 3419 == self._docket.default_compression_header
3413 3420 ):
3414 3421 sidedata_compression_mode = COMP_MODE_DEFAULT
3415 3422 serialized_sidedata = comp_sidedata
3416 3423 else:
3417 3424 sidedata_compression_mode = COMP_MODE_INLINE
3418 3425 serialized_sidedata = comp_sidedata
3419 3426 if entry[8] != 0 or entry[9] != 0:
3420 3427 # rewriting entries that already have sidedata is not
3421 3428 # supported yet, because it introduces garbage data in the
3422 3429 # revlog.
3423 3430 msg = b"rewriting existing sidedata is not supported yet"
3424 3431 raise error.Abort(msg)
3425 3432
3426 3433 # Apply (potential) flags to add and to remove after running
3427 3434 # the sidedata helpers
3428 3435 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3429 3436 entry_update = (
3430 3437 current_offset,
3431 3438 len(serialized_sidedata),
3432 3439 new_offset_flags,
3433 3440 sidedata_compression_mode,
3434 3441 )
3435 3442
3436 3443 # the sidedata computation might have move the file cursors around
3437 3444 dfh.seek(current_offset, os.SEEK_SET)
3438 3445 dfh.write(serialized_sidedata)
3439 3446 new_entries.append(entry_update)
3440 3447 current_offset += len(serialized_sidedata)
3441 3448 if self._docket is not None:
3442 3449 self._docket.data_end = dfh.tell()
3443 3450
3444 3451 # rewrite the new index entries
3445 3452 ifh.seek(startrev * self.index.entry_size)
3446 3453 for i, e in enumerate(new_entries):
3447 3454 rev = startrev + i
3448 3455 self.index.replace_sidedata_info(rev, *e)
3449 3456 packed = self.index.entry_binary(rev)
3450 3457 if rev == 0 and self._docket is None:
3451 3458 header = self._format_flags | self._format_version
3452 3459 header = self.index.pack_header(header)
3453 3460 packed = header + packed
3454 3461 ifh.write(packed)
@@ -1,29 +1,86 b''
1 1 Test correctness of revlog inline -> non-inline transition
2 2 ----------------------------------------------------------
3 3
4 Helper extension to intercept renames.
5
6 $ cat > $TESTTMP/intercept_rename.py << EOF
7 > import os
8 > import sys
9 > from mercurial import extensions, util
10 >
11 > def extsetup(ui):
12 > def close(orig, *args, **kwargs):
13 > path = args[0]._atomictempfile__name
14 > if path.endswith(b'/.hg/store/data/file.i'):
15 > os._exit(80)
16 > return orig(*args, **kwargs)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
18 > EOF
19
20
4 21 Test offset computation to correctly factor in the index entries themselve.
22 Also test that the new data size has the correct size if the transaction is aborted
23 after the index has been replaced.
24
5 25 Test repo has one small, one moderate and one big change. The clone has
6 26 the small and moderate change and will transition to non-inline storage when
7 27 adding the big change.
8 28
9 29 $ hg init troffset-computation --config format.revlog-compression=none
10 30 $ cd troffset-computation
11 31 $ printf '% 20d' '1' > file
12 32 $ hg commit -Aqm_
13 33 $ printf '% 1024d' '1' > file
14 34 $ hg commit -Aqm_
15 35 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
16 36 $ hg commit -Aqm_
17 37 $ cd ..
18 38
19 39 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
20 40 $ cd troffset-computation-copy
41
42 Reference size:
43
44 $ f -s .hg/store/data/file*
45 .hg/store/data/file.i: size=1174
46
21 47 $ cat > .hg/hgrc <<EOF
22 48 > [hooks]
23 49 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
24 50 > EOF
25 51 $ hg pull ../troffset-computation
26 52 pulling from ../troffset-computation
27 53 [80]
28 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
54
55 The first file.i entry should match the size above.
56 The first file.d entry is the temporary record during the split,
57 the second entry after the split happened. The sum of the second file.d
58 and the second file.i entry should match the first file.i entry.
59
60 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
61 data/file.i 1174
62 data/file.d 0
63 data/file.d 1046
29 64 data/file.i 128
65 $ cd ..
66
67 Now retry the same but intercept the rename of the index and check that
68 the journal does not contain the new index size. This demonstrates the edge case
69 where the data file is left as garbage.
70
71 $ hg clone -r 1 troffset-computation troffset-computation-copy2 --config format.revlog-compression=none -q
72 $ cd troffset-computation-copy2
73 $ cat > .hg/hgrc <<EOF
74 > [extensions]
75 > intercept_rename = $TESTTMP/intercept_rename.py
76 > [hooks]
77 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
78 > EOF
79 $ hg pull ../troffset-computation
80 pulling from ../troffset-computation
81 [80]
82 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
83 data/file.i 1174
84 data/file.d 0
85 data/file.d 1046
86 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now