##// END OF EJS Templates
revlog: unify flag processing when loading index...
marmoute -
r48005:4d1c893b default
parent child Browse files
Show More
@@ -1,3209 +1,3196
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 FEATURES_BY_VERSION,
38 39 FLAG_GENERALDELTA,
39 40 FLAG_INLINE_DATA,
40 41 INDEX_HEADER,
41 42 REVLOGV0,
42 43 REVLOGV1,
43 44 REVLOGV1_FLAGS,
44 45 REVLOGV2,
45 46 REVLOGV2_FLAGS,
46 47 REVLOG_DEFAULT_FLAGS,
47 48 REVLOG_DEFAULT_FORMAT,
48 49 REVLOG_DEFAULT_VERSION,
49 50 SUPPORTED_FLAGS,
50 51 )
51 52 from .revlogutils.flagutil import (
52 53 REVIDX_DEFAULT_FLAGS,
53 54 REVIDX_ELLIPSIS,
54 55 REVIDX_EXTSTORED,
55 56 REVIDX_FLAGS_ORDER,
56 57 REVIDX_HASCOPIESINFO,
57 58 REVIDX_ISCENSORED,
58 59 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 60 )
60 61 from .thirdparty import attr
61 62 from . import (
62 63 ancestor,
63 64 dagop,
64 65 error,
65 66 mdiff,
66 67 policy,
67 68 pycompat,
68 69 templatefilters,
69 70 util,
70 71 )
71 72 from .interfaces import (
72 73 repository,
73 74 util as interfaceutil,
74 75 )
75 76 from .revlogutils import (
76 77 deltas as deltautil,
77 78 flagutil,
78 79 nodemap as nodemaputil,
79 80 revlogv0,
80 81 sidedata as sidedatautil,
81 82 )
82 83 from .utils import (
83 84 storageutil,
84 85 stringutil,
85 86 )
86 87
87 88 # blanked usage of all the name to prevent pyflakes constraints
88 89 # We need these name available in the module for extensions.
89 90
90 91 REVLOGV0
91 92 REVLOGV1
92 93 REVLOGV2
93 94 FLAG_INLINE_DATA
94 95 FLAG_GENERALDELTA
95 96 REVLOG_DEFAULT_FLAGS
96 97 REVLOG_DEFAULT_FORMAT
97 98 REVLOG_DEFAULT_VERSION
98 99 REVLOGV1_FLAGS
99 100 REVLOGV2_FLAGS
100 101 REVIDX_ISCENSORED
101 102 REVIDX_ELLIPSIS
102 103 REVIDX_HASCOPIESINFO
103 104 REVIDX_EXTSTORED
104 105 REVIDX_DEFAULT_FLAGS
105 106 REVIDX_FLAGS_ORDER
106 107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 108
108 109 parsers = policy.importmod('parsers')
109 110 rustancestor = policy.importrust('ancestor')
110 111 rustdagop = policy.importrust('dagop')
111 112 rustrevlog = policy.importrust('revlog')
112 113
113 114 # Aliased for performance.
114 115 _zlibdecompress = zlib.decompress
115 116
116 117 # max size of revlog with inline data
117 118 _maxinline = 131072
118 119 _chunksize = 1048576
119 120
120 121 # Flag processors for REVIDX_ELLIPSIS.
121 122 def ellipsisreadprocessor(rl, text):
122 123 return text, False
123 124
124 125
125 126 def ellipsiswriteprocessor(rl, text):
126 127 return text, False
127 128
128 129
129 130 def ellipsisrawprocessor(rl, text):
130 131 return False
131 132
132 133
133 134 ellipsisprocessor = (
134 135 ellipsisreadprocessor,
135 136 ellipsiswriteprocessor,
136 137 ellipsisrawprocessor,
137 138 )
138 139
139 140
140 141 def offset_type(offset, type):
141 142 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
142 143 raise ValueError(b'unknown revlog index flags')
143 144 return int(int(offset) << 16 | type)
144 145
145 146
146 147 def _verify_revision(rl, skipflags, state, node):
147 148 """Verify the integrity of the given revlog ``node`` while providing a hook
148 149 point for extensions to influence the operation."""
149 150 if skipflags:
150 151 state[b'skipread'].add(node)
151 152 else:
152 153 # Side-effect: read content and verify hash.
153 154 rl.revision(node)
154 155
155 156
156 157 # True if a fast implementation for persistent-nodemap is available
157 158 #
158 159 # We also consider we have a "fast" implementation in "pure" python because
159 160 # people using pure don't really have performance consideration (and a
160 161 # wheelbarrow of other slowness source)
161 162 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
162 163 parsers, 'BaseIndexObject'
163 164 )
164 165
165 166
166 167 @attr.s(slots=True, frozen=True)
167 168 class _revisioninfo(object):
168 169 """Information about a revision that allows building its fulltext
169 170 node: expected hash of the revision
170 171 p1, p2: parent revs of the revision
171 172 btext: built text cache consisting of a one-element list
172 173 cachedelta: (baserev, uncompressed_delta) or None
173 174 flags: flags associated to the revision storage
174 175
175 176 One of btext[0] or cachedelta must be set.
176 177 """
177 178
178 179 node = attr.ib()
179 180 p1 = attr.ib()
180 181 p2 = attr.ib()
181 182 btext = attr.ib()
182 183 textlen = attr.ib()
183 184 cachedelta = attr.ib()
184 185 flags = attr.ib()
185 186
186 187
187 188 @interfaceutil.implementer(repository.irevisiondelta)
188 189 @attr.s(slots=True)
189 190 class revlogrevisiondelta(object):
190 191 node = attr.ib()
191 192 p1node = attr.ib()
192 193 p2node = attr.ib()
193 194 basenode = attr.ib()
194 195 flags = attr.ib()
195 196 baserevisionsize = attr.ib()
196 197 revision = attr.ib()
197 198 delta = attr.ib()
198 199 sidedata = attr.ib()
199 200 protocol_flags = attr.ib()
200 201 linknode = attr.ib(default=None)
201 202
202 203
203 204 @interfaceutil.implementer(repository.iverifyproblem)
204 205 @attr.s(frozen=True)
205 206 class revlogproblem(object):
206 207 warning = attr.ib(default=None)
207 208 error = attr.ib(default=None)
208 209 node = attr.ib(default=None)
209 210
210 211
211 212 def parse_index_v1(data, inline):
212 213 # call the C implementation to parse the index data
213 214 index, cache = parsers.parse_index2(data, inline)
214 215 return index, cache
215 216
216 217
217 218 def parse_index_v2(data, inline):
218 219 # call the C implementation to parse the index data
219 220 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
220 221 return index, cache
221 222
222 223
223 224 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
224 225
225 226 def parse_index_v1_nodemap(data, inline):
226 227 index, cache = parsers.parse_index_devel_nodemap(data, inline)
227 228 return index, cache
228 229
229 230
230 231 else:
231 232 parse_index_v1_nodemap = None
232 233
233 234
234 235 def parse_index_v1_mixed(data, inline):
235 236 index, cache = parse_index_v1(data, inline)
236 237 return rustrevlog.MixedIndex(index), cache
237 238
238 239
239 240 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
240 241 # signed integer)
241 242 _maxentrysize = 0x7FFFFFFF
242 243
243 244
244 245 class revlog(object):
245 246 """
246 247 the underlying revision storage object
247 248
248 249 A revlog consists of two parts, an index and the revision data.
249 250
250 251 The index is a file with a fixed record size containing
251 252 information on each revision, including its nodeid (hash), the
252 253 nodeids of its parents, the position and offset of its data within
253 254 the data file, and the revision it's based on. Finally, each entry
254 255 contains a linkrev entry that can serve as a pointer to external
255 256 data.
256 257
257 258 The revision data itself is a linear collection of data chunks.
258 259 Each chunk represents a revision and is usually represented as a
259 260 delta against the previous chunk. To bound lookup time, runs of
260 261 deltas are limited to about 2 times the length of the original
261 262 version data. This makes retrieval of a version proportional to
262 263 its size, or O(1) relative to the number of revisions.
263 264
264 265 Both pieces of the revlog are written to in an append-only
265 266 fashion, which means we never need to rewrite a file to insert or
266 267 remove data, and can use some simple techniques to avoid the need
267 268 for locking while reading.
268 269
269 270 If checkambig, indexfile is opened with checkambig=True at
270 271 writing, to avoid file stat ambiguity.
271 272
272 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 274 index will be mmapped rather than read if it is larger than the
274 275 configured threshold.
275 276
276 277 If censorable is True, the revlog can have censored revisions.
277 278
278 279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 280 compression for the data content.
280 281
281 282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 283 file handle, a filename, and an expected position. It should check whether
283 284 the current position in the file handle is valid, and log/warn/fail (by
284 285 raising).
285 286 """
286 287
287 288 _flagserrorclass = error.RevlogError
288 289
289 290 def __init__(
290 291 self,
291 292 opener,
292 293 target,
293 294 radix,
294 295 postfix=None,
295 296 checkambig=False,
296 297 mmaplargeindex=False,
297 298 censorable=False,
298 299 upperboundcomp=None,
299 300 persistentnodemap=False,
300 301 concurrencychecker=None,
301 302 ):
302 303 """
303 304 create a revlog object
304 305
305 306 opener is a function that abstracts the file opening operation
306 307 and can be used to implement COW semantics or the like.
307 308
308 309 `target`: a (KIND, ID) tuple that identify the content stored in
309 310 this revlog. It help the rest of the code to understand what the revlog
310 311 is about without having to resort to heuristic and index filename
311 312 analysis. Note: that this must be reliably be set by normal code, but
312 313 that test, debug, or performance measurement code might not set this to
313 314 accurate value.
314 315 """
315 316 self.upperboundcomp = upperboundcomp
316 317
317 318 self.radix = radix
318 319
319 320 self._indexfile = None
320 321 self._datafile = None
321 322 self._nodemap_file = None
322 323 self.postfix = postfix
323 324 self.opener = opener
324 325 if persistentnodemap:
325 326 self._nodemap_file = nodemaputil.get_nodemap_file(self)
326 327
327 328 assert target[0] in ALL_KINDS
328 329 assert len(target) == 2
329 330 self.target = target
330 331 # When True, indexfile is opened with checkambig=True at writing, to
331 332 # avoid file stat ambiguity.
332 333 self._checkambig = checkambig
333 334 self._mmaplargeindex = mmaplargeindex
334 335 self._censorable = censorable
335 336 # 3-tuple of (node, rev, text) for a raw revision.
336 337 self._revisioncache = None
337 338 # Maps rev to chain base rev.
338 339 self._chainbasecache = util.lrucachedict(100)
339 340 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
340 341 self._chunkcache = (0, b'')
341 342 # How much data to read and cache into the raw revlog data cache.
342 343 self._chunkcachesize = 65536
343 344 self._maxchainlen = None
344 345 self._deltabothparents = True
345 346 self.index = None
346 347 self._nodemap_docket = None
347 348 # Mapping of partial identifiers to full nodes.
348 349 self._pcache = {}
349 350 # Mapping of revision integer to full node.
350 351 self._compengine = b'zlib'
351 352 self._compengineopts = {}
352 353 self._maxdeltachainspan = -1
353 354 self._withsparseread = False
354 355 self._sparserevlog = False
355 356 self.hassidedata = False
356 357 self._srdensitythreshold = 0.50
357 358 self._srmingapsize = 262144
358 359
359 360 # Make copy of flag processors so each revlog instance can support
360 361 # custom flags.
361 362 self._flagprocessors = dict(flagutil.flagprocessors)
362 363
363 364 # 2-tuple of file handles being used for active writing.
364 365 self._writinghandles = None
365 366 # prevent nesting of addgroup
366 367 self._adding_group = None
367 368
368 369 self._loadindex()
369 370
370 371 self._concurrencychecker = concurrencychecker
371 372
372 373 def _init_opts(self):
373 374 """process options (from above/config) to setup associated default revlog mode
374 375
375 376 These values might be affected when actually reading on disk information.
376 377
377 378 The relevant values are returned for use in _loadindex().
378 379
379 380 * newversionflags:
380 381 version header to use if we need to create a new revlog
381 382
382 383 * mmapindexthreshold:
383 384 minimal index size for start to use mmap
384 385
385 386 * force_nodemap:
386 387 force the usage of a "development" version of the nodemap code
387 388 """
388 389 mmapindexthreshold = None
389 390 opts = self.opener.options
390 391
391 392 if b'revlogv2' in opts:
392 393 new_header = REVLOGV2 | FLAG_INLINE_DATA
393 394 elif b'revlogv1' in opts:
394 395 new_header = REVLOGV1 | FLAG_INLINE_DATA
395 396 if b'generaldelta' in opts:
396 397 new_header |= FLAG_GENERALDELTA
397 398 elif b'revlogv0' in self.opener.options:
398 399 new_header = REVLOGV0
399 400 else:
400 401 new_header = REVLOG_DEFAULT_VERSION
401 402
402 403 if b'chunkcachesize' in opts:
403 404 self._chunkcachesize = opts[b'chunkcachesize']
404 405 if b'maxchainlen' in opts:
405 406 self._maxchainlen = opts[b'maxchainlen']
406 407 if b'deltabothparents' in opts:
407 408 self._deltabothparents = opts[b'deltabothparents']
408 409 self._lazydelta = bool(opts.get(b'lazydelta', True))
409 410 self._lazydeltabase = False
410 411 if self._lazydelta:
411 412 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
412 413 if b'compengine' in opts:
413 414 self._compengine = opts[b'compengine']
414 415 if b'zlib.level' in opts:
415 416 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
416 417 if b'zstd.level' in opts:
417 418 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
418 419 if b'maxdeltachainspan' in opts:
419 420 self._maxdeltachainspan = opts[b'maxdeltachainspan']
420 421 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
421 422 mmapindexthreshold = opts[b'mmapindexthreshold']
422 423 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
423 424 withsparseread = bool(opts.get(b'with-sparse-read', False))
424 425 # sparse-revlog forces sparse-read
425 426 self._withsparseread = self._sparserevlog or withsparseread
426 427 if b'sparse-read-density-threshold' in opts:
427 428 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
428 429 if b'sparse-read-min-gap-size' in opts:
429 430 self._srmingapsize = opts[b'sparse-read-min-gap-size']
430 431 if opts.get(b'enableellipsis'):
431 432 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
432 433
433 434 # revlog v0 doesn't have flag processors
434 435 for flag, processor in pycompat.iteritems(
435 436 opts.get(b'flagprocessors', {})
436 437 ):
437 438 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
438 439
439 440 if self._chunkcachesize <= 0:
440 441 raise error.RevlogError(
441 442 _(b'revlog chunk cache size %r is not greater than 0')
442 443 % self._chunkcachesize
443 444 )
444 445 elif self._chunkcachesize & (self._chunkcachesize - 1):
445 446 raise error.RevlogError(
446 447 _(b'revlog chunk cache size %r is not a power of 2')
447 448 % self._chunkcachesize
448 449 )
449 450 force_nodemap = opts.get(b'devel-force-nodemap', False)
450 451 return new_header, mmapindexthreshold, force_nodemap
451 452
452 453 def _get_data(self, filepath, mmap_threshold):
453 454 """return a file content with or without mmap
454 455
455 456 If the file is missing return the empty string"""
456 457 try:
457 458 with self.opener(filepath) as fp:
458 459 if mmap_threshold is not None:
459 460 file_size = self.opener.fstat(fp).st_size
460 461 if file_size >= mmap_threshold:
461 462 # TODO: should .close() to release resources without
462 463 # relying on Python GC
463 464 return util.buffer(util.mmapread(fp))
464 465 return fp.read()
465 466 except IOError as inst:
466 467 if inst.errno != errno.ENOENT:
467 468 raise
468 469 return b''
469 470
470 471 def _loadindex(self):
471 472
472 473 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
473 474
474 475 if self.postfix is None:
475 476 entry_point = b'%s.i' % self.radix
476 477 else:
477 478 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
478 479
479 480 entry_data = b''
480 481 self._initempty = True
481 482 entry_data = self._get_data(entry_point, mmapindexthreshold)
482 483 if len(entry_data) > 0:
483 484 header = INDEX_HEADER.unpack(entry_data[:4])[0]
484 485 self._initempty = False
485 486 else:
486 487 header = new_header
487 488
488 489 self._format_flags = header & ~0xFFFF
489 490 self._format_version = header & 0xFFFF
490 491
491 492 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
492 493 if supported_flags is None:
493 494 msg = _(b'unknown version (%d) in revlog %s')
494 495 msg %= (self._format_version, self.display_id)
495 496 raise error.RevlogError(msg)
496 497 elif self._format_flags & ~supported_flags:
497 498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
498 499 display_flag = self._format_flags >> 16
499 500 msg %= (display_flag, self._format_version, self.display_id)
500 501 raise error.RevlogError(msg)
501 502
502 if self._format_version == REVLOGV0:
503 self._inline = False
504 self._generaldelta = False
505 elif self._format_version == REVLOGV1:
506 self._inline = self._format_flags & FLAG_INLINE_DATA
507 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
508 elif self._format_version == REVLOGV2:
509 # There is a bug in the transaction handling when going from an
510 # inline revlog to a separate index and data file. Turn it off until
511 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
512 # See issue6485
513 self._inline = False
514 # generaldelta implied by version 2 revlogs.
515 self._generaldelta = True
516 # revlog-v2 has built in sidedata support
517 self.hassidedata = True
518 else:
519 assert False, 'unreachable'
503 features = FEATURES_BY_VERSION[self._format_version]
504 self._inline = features[b'inline'](self._format_flags)
505 self._generaldelta = features[b'generaldelta'](self._format_flags)
506 self.hassidedata = features[b'sidedata']
520 507
521 508 index_data = entry_data
522 509 self._indexfile = entry_point
523 510
524 511 if self.postfix is None or self.postfix == b'a':
525 512 self._datafile = b'%s.d' % self.radix
526 513 else:
527 514 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
528 515
529 516 self.nodeconstants = sha1nodeconstants
530 517 self.nullid = self.nodeconstants.nullid
531 518
532 519 # sparse-revlog can't be on without general-delta (issue6056)
533 520 if not self._generaldelta:
534 521 self._sparserevlog = False
535 522
536 523 self._storedeltachains = True
537 524
538 525 devel_nodemap = (
539 526 self._nodemap_file
540 527 and force_nodemap
541 528 and parse_index_v1_nodemap is not None
542 529 )
543 530
544 531 use_rust_index = False
545 532 if rustrevlog is not None:
546 533 if self._nodemap_file is not None:
547 534 use_rust_index = True
548 535 else:
549 536 use_rust_index = self.opener.options.get(b'rust.index')
550 537
551 538 self._parse_index = parse_index_v1
552 539 if self._format_version == REVLOGV0:
553 540 self._parse_index = revlogv0.parse_index_v0
554 541 elif self._format_version == REVLOGV2:
555 542 self._parse_index = parse_index_v2
556 543 elif devel_nodemap:
557 544 self._parse_index = parse_index_v1_nodemap
558 545 elif use_rust_index:
559 546 self._parse_index = parse_index_v1_mixed
560 547 try:
561 548 d = self._parse_index(index_data, self._inline)
562 549 index, _chunkcache = d
563 550 use_nodemap = (
564 551 not self._inline
565 552 and self._nodemap_file is not None
566 553 and util.safehasattr(index, 'update_nodemap_data')
567 554 )
568 555 if use_nodemap:
569 556 nodemap_data = nodemaputil.persisted_data(self)
570 557 if nodemap_data is not None:
571 558 docket = nodemap_data[0]
572 559 if (
573 560 len(d[0]) > docket.tip_rev
574 561 and d[0][docket.tip_rev][7] == docket.tip_node
575 562 ):
576 563 # no changelog tampering
577 564 self._nodemap_docket = docket
578 565 index.update_nodemap_data(*nodemap_data)
579 566 except (ValueError, IndexError):
580 567 raise error.RevlogError(
581 568 _(b"index %s is corrupted") % self.display_id
582 569 )
583 570 self.index, self._chunkcache = d
584 571 if not self._chunkcache:
585 572 self._chunkclear()
586 573 # revnum -> (chain-length, sum-delta-length)
587 574 self._chaininfocache = util.lrucachedict(500)
588 575 # revlog header -> revlog compressor
589 576 self._decompressors = {}
590 577
591 578 @util.propertycache
592 579 def revlog_kind(self):
593 580 return self.target[0]
594 581
595 582 @util.propertycache
596 583 def display_id(self):
597 584 """The public facing "ID" of the revlog that we use in message"""
598 585 # Maybe we should build a user facing representation of
599 586 # revlog.target instead of using `self.radix`
600 587 return self.radix
601 588
602 589 @util.propertycache
603 590 def _compressor(self):
604 591 engine = util.compengines[self._compengine]
605 592 return engine.revlogcompressor(self._compengineopts)
606 593
607 594 def _indexfp(self):
608 595 """file object for the revlog's index file"""
609 596 return self.opener(self._indexfile, mode=b"r")
610 597
611 598 def __index_write_fp(self):
612 599 # You should not use this directly and use `_writing` instead
613 600 try:
614 601 f = self.opener(
615 602 self._indexfile, mode=b"r+", checkambig=self._checkambig
616 603 )
617 604 f.seek(0, os.SEEK_END)
618 605 return f
619 606 except IOError as inst:
620 607 if inst.errno != errno.ENOENT:
621 608 raise
622 609 return self.opener(
623 610 self._indexfile, mode=b"w+", checkambig=self._checkambig
624 611 )
625 612
626 613 def __index_new_fp(self):
627 614 # You should not use this unless you are upgrading from inline revlog
628 615 return self.opener(
629 616 self._indexfile,
630 617 mode=b"w",
631 618 checkambig=self._checkambig,
632 619 atomictemp=True,
633 620 )
634 621
635 622 def _datafp(self, mode=b'r'):
636 623 """file object for the revlog's data file"""
637 624 return self.opener(self._datafile, mode=mode)
638 625
639 626 @contextlib.contextmanager
640 627 def _datareadfp(self, existingfp=None):
641 628 """file object suitable to read data"""
642 629 # Use explicit file handle, if given.
643 630 if existingfp is not None:
644 631 yield existingfp
645 632
646 633 # Use a file handle being actively used for writes, if available.
647 634 # There is some danger to doing this because reads will seek the
648 635 # file. However, _writeentry() performs a SEEK_END before all writes,
649 636 # so we should be safe.
650 637 elif self._writinghandles:
651 638 if self._inline:
652 639 yield self._writinghandles[0]
653 640 else:
654 641 yield self._writinghandles[1]
655 642
656 643 # Otherwise open a new file handle.
657 644 else:
658 645 if self._inline:
659 646 func = self._indexfp
660 647 else:
661 648 func = self._datafp
662 649 with func() as fp:
663 650 yield fp
664 651
665 652 def tiprev(self):
666 653 return len(self.index) - 1
667 654
668 655 def tip(self):
669 656 return self.node(self.tiprev())
670 657
671 658 def __contains__(self, rev):
672 659 return 0 <= rev < len(self)
673 660
674 661 def __len__(self):
675 662 return len(self.index)
676 663
677 664 def __iter__(self):
678 665 return iter(pycompat.xrange(len(self)))
679 666
680 667 def revs(self, start=0, stop=None):
681 668 """iterate over all rev in this revlog (from start to stop)"""
682 669 return storageutil.iterrevs(len(self), start=start, stop=stop)
683 670
684 671 @property
685 672 def nodemap(self):
686 673 msg = (
687 674 b"revlog.nodemap is deprecated, "
688 675 b"use revlog.index.[has_node|rev|get_rev]"
689 676 )
690 677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
691 678 return self.index.nodemap
692 679
693 680 @property
694 681 def _nodecache(self):
695 682 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
696 683 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
697 684 return self.index.nodemap
698 685
699 686 def hasnode(self, node):
700 687 try:
701 688 self.rev(node)
702 689 return True
703 690 except KeyError:
704 691 return False
705 692
706 693 def candelta(self, baserev, rev):
707 694 """whether two revisions (baserev, rev) can be delta-ed or not"""
708 695 # Disable delta if either rev requires a content-changing flag
709 696 # processor (ex. LFS). This is because such flag processor can alter
710 697 # the rawtext content that the delta will be based on, and two clients
711 698 # could have a same revlog node with different flags (i.e. different
712 699 # rawtext contents) and the delta could be incompatible.
713 700 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
714 701 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
715 702 ):
716 703 return False
717 704 return True
718 705
719 706 def update_caches(self, transaction):
720 707 if self._nodemap_file is not None:
721 708 if transaction is None:
722 709 nodemaputil.update_persistent_nodemap(self)
723 710 else:
724 711 nodemaputil.setup_persistent_nodemap(transaction, self)
725 712
726 713 def clearcaches(self):
727 714 self._revisioncache = None
728 715 self._chainbasecache.clear()
729 716 self._chunkcache = (0, b'')
730 717 self._pcache = {}
731 718 self._nodemap_docket = None
732 719 self.index.clearcaches()
733 720 # The python code is the one responsible for validating the docket, we
734 721 # end up having to refresh it here.
735 722 use_nodemap = (
736 723 not self._inline
737 724 and self._nodemap_file is not None
738 725 and util.safehasattr(self.index, 'update_nodemap_data')
739 726 )
740 727 if use_nodemap:
741 728 nodemap_data = nodemaputil.persisted_data(self)
742 729 if nodemap_data is not None:
743 730 self._nodemap_docket = nodemap_data[0]
744 731 self.index.update_nodemap_data(*nodemap_data)
745 732
746 733 def rev(self, node):
747 734 try:
748 735 return self.index.rev(node)
749 736 except TypeError:
750 737 raise
751 738 except error.RevlogError:
752 739 # parsers.c radix tree lookup failed
753 740 if (
754 741 node == self.nodeconstants.wdirid
755 742 or node in self.nodeconstants.wdirfilenodeids
756 743 ):
757 744 raise error.WdirUnsupported
758 745 raise error.LookupError(node, self.display_id, _(b'no node'))
759 746
760 747 # Accessors for index entries.
761 748
762 749 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
763 750 # are flags.
764 751 def start(self, rev):
765 752 return int(self.index[rev][0] >> 16)
766 753
767 754 def flags(self, rev):
768 755 return self.index[rev][0] & 0xFFFF
769 756
770 757 def length(self, rev):
771 758 return self.index[rev][1]
772 759
773 760 def sidedata_length(self, rev):
774 761 if not self.hassidedata:
775 762 return 0
776 763 return self.index[rev][9]
777 764
778 765 def rawsize(self, rev):
779 766 """return the length of the uncompressed text for a given revision"""
780 767 l = self.index[rev][2]
781 768 if l >= 0:
782 769 return l
783 770
784 771 t = self.rawdata(rev)
785 772 return len(t)
786 773
787 774 def size(self, rev):
788 775 """length of non-raw text (processed by a "read" flag processor)"""
789 776 # fast path: if no "read" flag processor could change the content,
790 777 # size is rawsize. note: ELLIPSIS is known to not change the content.
791 778 flags = self.flags(rev)
792 779 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
793 780 return self.rawsize(rev)
794 781
795 782 return len(self.revision(rev, raw=False))
796 783
797 784 def chainbase(self, rev):
798 785 base = self._chainbasecache.get(rev)
799 786 if base is not None:
800 787 return base
801 788
802 789 index = self.index
803 790 iterrev = rev
804 791 base = index[iterrev][3]
805 792 while base != iterrev:
806 793 iterrev = base
807 794 base = index[iterrev][3]
808 795
809 796 self._chainbasecache[rev] = base
810 797 return base
811 798
812 799 def linkrev(self, rev):
813 800 return self.index[rev][4]
814 801
815 802 def parentrevs(self, rev):
816 803 try:
817 804 entry = self.index[rev]
818 805 except IndexError:
819 806 if rev == wdirrev:
820 807 raise error.WdirUnsupported
821 808 raise
822 809 if entry[5] == nullrev:
823 810 return entry[6], entry[5]
824 811 else:
825 812 return entry[5], entry[6]
826 813
827 814 # fast parentrevs(rev) where rev isn't filtered
828 815 _uncheckedparentrevs = parentrevs
829 816
830 817 def node(self, rev):
831 818 try:
832 819 return self.index[rev][7]
833 820 except IndexError:
834 821 if rev == wdirrev:
835 822 raise error.WdirUnsupported
836 823 raise
837 824
838 825 # Derived from index values.
839 826
840 827 def end(self, rev):
841 828 return self.start(rev) + self.length(rev)
842 829
843 830 def parents(self, node):
844 831 i = self.index
845 832 d = i[self.rev(node)]
846 833 # inline node() to avoid function call overhead
847 834 if d[5] == self.nullid:
848 835 return i[d[6]][7], i[d[5]][7]
849 836 else:
850 837 return i[d[5]][7], i[d[6]][7]
851 838
852 839 def chainlen(self, rev):
853 840 return self._chaininfo(rev)[0]
854 841
855 842 def _chaininfo(self, rev):
856 843 chaininfocache = self._chaininfocache
857 844 if rev in chaininfocache:
858 845 return chaininfocache[rev]
859 846 index = self.index
860 847 generaldelta = self._generaldelta
861 848 iterrev = rev
862 849 e = index[iterrev]
863 850 clen = 0
864 851 compresseddeltalen = 0
865 852 while iterrev != e[3]:
866 853 clen += 1
867 854 compresseddeltalen += e[1]
868 855 if generaldelta:
869 856 iterrev = e[3]
870 857 else:
871 858 iterrev -= 1
872 859 if iterrev in chaininfocache:
873 860 t = chaininfocache[iterrev]
874 861 clen += t[0]
875 862 compresseddeltalen += t[1]
876 863 break
877 864 e = index[iterrev]
878 865 else:
879 866 # Add text length of base since decompressing that also takes
880 867 # work. For cache hits the length is already included.
881 868 compresseddeltalen += e[1]
882 869 r = (clen, compresseddeltalen)
883 870 chaininfocache[rev] = r
884 871 return r
885 872
886 873 def _deltachain(self, rev, stoprev=None):
887 874 """Obtain the delta chain for a revision.
888 875
889 876 ``stoprev`` specifies a revision to stop at. If not specified, we
890 877 stop at the base of the chain.
891 878
892 879 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
893 880 revs in ascending order and ``stopped`` is a bool indicating whether
894 881 ``stoprev`` was hit.
895 882 """
896 883 # Try C implementation.
897 884 try:
898 885 return self.index.deltachain(rev, stoprev, self._generaldelta)
899 886 except AttributeError:
900 887 pass
901 888
902 889 chain = []
903 890
904 891 # Alias to prevent attribute lookup in tight loop.
905 892 index = self.index
906 893 generaldelta = self._generaldelta
907 894
908 895 iterrev = rev
909 896 e = index[iterrev]
910 897 while iterrev != e[3] and iterrev != stoprev:
911 898 chain.append(iterrev)
912 899 if generaldelta:
913 900 iterrev = e[3]
914 901 else:
915 902 iterrev -= 1
916 903 e = index[iterrev]
917 904
918 905 if iterrev == stoprev:
919 906 stopped = True
920 907 else:
921 908 chain.append(iterrev)
922 909 stopped = False
923 910
924 911 chain.reverse()
925 912 return chain, stopped
926 913
927 914 def ancestors(self, revs, stoprev=0, inclusive=False):
928 915 """Generate the ancestors of 'revs' in reverse revision order.
929 916 Does not generate revs lower than stoprev.
930 917
931 918 See the documentation for ancestor.lazyancestors for more details."""
932 919
933 920 # first, make sure start revisions aren't filtered
934 921 revs = list(revs)
935 922 checkrev = self.node
936 923 for r in revs:
937 924 checkrev(r)
938 925 # and we're sure ancestors aren't filtered as well
939 926
940 927 if rustancestor is not None:
941 928 lazyancestors = rustancestor.LazyAncestors
942 929 arg = self.index
943 930 else:
944 931 lazyancestors = ancestor.lazyancestors
945 932 arg = self._uncheckedparentrevs
946 933 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
947 934
948 935 def descendants(self, revs):
949 936 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
950 937
951 938 def findcommonmissing(self, common=None, heads=None):
952 939 """Return a tuple of the ancestors of common and the ancestors of heads
953 940 that are not ancestors of common. In revset terminology, we return the
954 941 tuple:
955 942
956 943 ::common, (::heads) - (::common)
957 944
958 945 The list is sorted by revision number, meaning it is
959 946 topologically sorted.
960 947
961 948 'heads' and 'common' are both lists of node IDs. If heads is
962 949 not supplied, uses all of the revlog's heads. If common is not
963 950 supplied, uses nullid."""
964 951 if common is None:
965 952 common = [self.nullid]
966 953 if heads is None:
967 954 heads = self.heads()
968 955
969 956 common = [self.rev(n) for n in common]
970 957 heads = [self.rev(n) for n in heads]
971 958
972 959 # we want the ancestors, but inclusive
973 960 class lazyset(object):
974 961 def __init__(self, lazyvalues):
975 962 self.addedvalues = set()
976 963 self.lazyvalues = lazyvalues
977 964
978 965 def __contains__(self, value):
979 966 return value in self.addedvalues or value in self.lazyvalues
980 967
981 968 def __iter__(self):
982 969 added = self.addedvalues
983 970 for r in added:
984 971 yield r
985 972 for r in self.lazyvalues:
986 973 if not r in added:
987 974 yield r
988 975
989 976 def add(self, value):
990 977 self.addedvalues.add(value)
991 978
992 979 def update(self, values):
993 980 self.addedvalues.update(values)
994 981
995 982 has = lazyset(self.ancestors(common))
996 983 has.add(nullrev)
997 984 has.update(common)
998 985
999 986 # take all ancestors from heads that aren't in has
1000 987 missing = set()
1001 988 visit = collections.deque(r for r in heads if r not in has)
1002 989 while visit:
1003 990 r = visit.popleft()
1004 991 if r in missing:
1005 992 continue
1006 993 else:
1007 994 missing.add(r)
1008 995 for p in self.parentrevs(r):
1009 996 if p not in has:
1010 997 visit.append(p)
1011 998 missing = list(missing)
1012 999 missing.sort()
1013 1000 return has, [self.node(miss) for miss in missing]
1014 1001
1015 1002 def incrementalmissingrevs(self, common=None):
1016 1003 """Return an object that can be used to incrementally compute the
1017 1004 revision numbers of the ancestors of arbitrary sets that are not
1018 1005 ancestors of common. This is an ancestor.incrementalmissingancestors
1019 1006 object.
1020 1007
1021 1008 'common' is a list of revision numbers. If common is not supplied, uses
1022 1009 nullrev.
1023 1010 """
1024 1011 if common is None:
1025 1012 common = [nullrev]
1026 1013
1027 1014 if rustancestor is not None:
1028 1015 return rustancestor.MissingAncestors(self.index, common)
1029 1016 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1030 1017
1031 1018 def findmissingrevs(self, common=None, heads=None):
1032 1019 """Return the revision numbers of the ancestors of heads that
1033 1020 are not ancestors of common.
1034 1021
1035 1022 More specifically, return a list of revision numbers corresponding to
1036 1023 nodes N such that every N satisfies the following constraints:
1037 1024
1038 1025 1. N is an ancestor of some node in 'heads'
1039 1026 2. N is not an ancestor of any node in 'common'
1040 1027
1041 1028 The list is sorted by revision number, meaning it is
1042 1029 topologically sorted.
1043 1030
1044 1031 'heads' and 'common' are both lists of revision numbers. If heads is
1045 1032 not supplied, uses all of the revlog's heads. If common is not
1046 1033 supplied, uses nullid."""
1047 1034 if common is None:
1048 1035 common = [nullrev]
1049 1036 if heads is None:
1050 1037 heads = self.headrevs()
1051 1038
1052 1039 inc = self.incrementalmissingrevs(common=common)
1053 1040 return inc.missingancestors(heads)
1054 1041
1055 1042 def findmissing(self, common=None, heads=None):
1056 1043 """Return the ancestors of heads that are not ancestors of common.
1057 1044
1058 1045 More specifically, return a list of nodes N such that every N
1059 1046 satisfies the following constraints:
1060 1047
1061 1048 1. N is an ancestor of some node in 'heads'
1062 1049 2. N is not an ancestor of any node in 'common'
1063 1050
1064 1051 The list is sorted by revision number, meaning it is
1065 1052 topologically sorted.
1066 1053
1067 1054 'heads' and 'common' are both lists of node IDs. If heads is
1068 1055 not supplied, uses all of the revlog's heads. If common is not
1069 1056 supplied, uses nullid."""
1070 1057 if common is None:
1071 1058 common = [self.nullid]
1072 1059 if heads is None:
1073 1060 heads = self.heads()
1074 1061
1075 1062 common = [self.rev(n) for n in common]
1076 1063 heads = [self.rev(n) for n in heads]
1077 1064
1078 1065 inc = self.incrementalmissingrevs(common=common)
1079 1066 return [self.node(r) for r in inc.missingancestors(heads)]
1080 1067
1081 1068 def nodesbetween(self, roots=None, heads=None):
1082 1069 """Return a topological path from 'roots' to 'heads'.
1083 1070
1084 1071 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1085 1072 topologically sorted list of all nodes N that satisfy both of
1086 1073 these constraints:
1087 1074
1088 1075 1. N is a descendant of some node in 'roots'
1089 1076 2. N is an ancestor of some node in 'heads'
1090 1077
1091 1078 Every node is considered to be both a descendant and an ancestor
1092 1079 of itself, so every reachable node in 'roots' and 'heads' will be
1093 1080 included in 'nodes'.
1094 1081
1095 1082 'outroots' is the list of reachable nodes in 'roots', i.e., the
1096 1083 subset of 'roots' that is returned in 'nodes'. Likewise,
1097 1084 'outheads' is the subset of 'heads' that is also in 'nodes'.
1098 1085
1099 1086 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1100 1087 unspecified, uses nullid as the only root. If 'heads' is
1101 1088 unspecified, uses list of all of the revlog's heads."""
1102 1089 nonodes = ([], [], [])
1103 1090 if roots is not None:
1104 1091 roots = list(roots)
1105 1092 if not roots:
1106 1093 return nonodes
1107 1094 lowestrev = min([self.rev(n) for n in roots])
1108 1095 else:
1109 1096 roots = [self.nullid] # Everybody's a descendant of nullid
1110 1097 lowestrev = nullrev
1111 1098 if (lowestrev == nullrev) and (heads is None):
1112 1099 # We want _all_ the nodes!
1113 1100 return (
1114 1101 [self.node(r) for r in self],
1115 1102 [self.nullid],
1116 1103 list(self.heads()),
1117 1104 )
1118 1105 if heads is None:
1119 1106 # All nodes are ancestors, so the latest ancestor is the last
1120 1107 # node.
1121 1108 highestrev = len(self) - 1
1122 1109 # Set ancestors to None to signal that every node is an ancestor.
1123 1110 ancestors = None
1124 1111 # Set heads to an empty dictionary for later discovery of heads
1125 1112 heads = {}
1126 1113 else:
1127 1114 heads = list(heads)
1128 1115 if not heads:
1129 1116 return nonodes
1130 1117 ancestors = set()
1131 1118 # Turn heads into a dictionary so we can remove 'fake' heads.
1132 1119 # Also, later we will be using it to filter out the heads we can't
1133 1120 # find from roots.
1134 1121 heads = dict.fromkeys(heads, False)
1135 1122 # Start at the top and keep marking parents until we're done.
1136 1123 nodestotag = set(heads)
1137 1124 # Remember where the top was so we can use it as a limit later.
1138 1125 highestrev = max([self.rev(n) for n in nodestotag])
1139 1126 while nodestotag:
1140 1127 # grab a node to tag
1141 1128 n = nodestotag.pop()
1142 1129 # Never tag nullid
1143 1130 if n == self.nullid:
1144 1131 continue
1145 1132 # A node's revision number represents its place in a
1146 1133 # topologically sorted list of nodes.
1147 1134 r = self.rev(n)
1148 1135 if r >= lowestrev:
1149 1136 if n not in ancestors:
1150 1137 # If we are possibly a descendant of one of the roots
1151 1138 # and we haven't already been marked as an ancestor
1152 1139 ancestors.add(n) # Mark as ancestor
1153 1140 # Add non-nullid parents to list of nodes to tag.
1154 1141 nodestotag.update(
1155 1142 [p for p in self.parents(n) if p != self.nullid]
1156 1143 )
1157 1144 elif n in heads: # We've seen it before, is it a fake head?
1158 1145 # So it is, real heads should not be the ancestors of
1159 1146 # any other heads.
1160 1147 heads.pop(n)
1161 1148 if not ancestors:
1162 1149 return nonodes
1163 1150 # Now that we have our set of ancestors, we want to remove any
1164 1151 # roots that are not ancestors.
1165 1152
1166 1153 # If one of the roots was nullid, everything is included anyway.
1167 1154 if lowestrev > nullrev:
1168 1155 # But, since we weren't, let's recompute the lowest rev to not
1169 1156 # include roots that aren't ancestors.
1170 1157
1171 1158 # Filter out roots that aren't ancestors of heads
1172 1159 roots = [root for root in roots if root in ancestors]
1173 1160 # Recompute the lowest revision
1174 1161 if roots:
1175 1162 lowestrev = min([self.rev(root) for root in roots])
1176 1163 else:
1177 1164 # No more roots? Return empty list
1178 1165 return nonodes
1179 1166 else:
1180 1167 # We are descending from nullid, and don't need to care about
1181 1168 # any other roots.
1182 1169 lowestrev = nullrev
1183 1170 roots = [self.nullid]
1184 1171 # Transform our roots list into a set.
1185 1172 descendants = set(roots)
1186 1173 # Also, keep the original roots so we can filter out roots that aren't
1187 1174 # 'real' roots (i.e. are descended from other roots).
1188 1175 roots = descendants.copy()
1189 1176 # Our topologically sorted list of output nodes.
1190 1177 orderedout = []
1191 1178 # Don't start at nullid since we don't want nullid in our output list,
1192 1179 # and if nullid shows up in descendants, empty parents will look like
1193 1180 # they're descendants.
1194 1181 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1195 1182 n = self.node(r)
1196 1183 isdescendant = False
1197 1184 if lowestrev == nullrev: # Everybody is a descendant of nullid
1198 1185 isdescendant = True
1199 1186 elif n in descendants:
1200 1187 # n is already a descendant
1201 1188 isdescendant = True
1202 1189 # This check only needs to be done here because all the roots
1203 1190 # will start being marked is descendants before the loop.
1204 1191 if n in roots:
1205 1192 # If n was a root, check if it's a 'real' root.
1206 1193 p = tuple(self.parents(n))
1207 1194 # If any of its parents are descendants, it's not a root.
1208 1195 if (p[0] in descendants) or (p[1] in descendants):
1209 1196 roots.remove(n)
1210 1197 else:
1211 1198 p = tuple(self.parents(n))
1212 1199 # A node is a descendant if either of its parents are
1213 1200 # descendants. (We seeded the dependents list with the roots
1214 1201 # up there, remember?)
1215 1202 if (p[0] in descendants) or (p[1] in descendants):
1216 1203 descendants.add(n)
1217 1204 isdescendant = True
1218 1205 if isdescendant and ((ancestors is None) or (n in ancestors)):
1219 1206 # Only include nodes that are both descendants and ancestors.
1220 1207 orderedout.append(n)
1221 1208 if (ancestors is not None) and (n in heads):
1222 1209 # We're trying to figure out which heads are reachable
1223 1210 # from roots.
1224 1211 # Mark this head as having been reached
1225 1212 heads[n] = True
1226 1213 elif ancestors is None:
1227 1214 # Otherwise, we're trying to discover the heads.
1228 1215 # Assume this is a head because if it isn't, the next step
1229 1216 # will eventually remove it.
1230 1217 heads[n] = True
1231 1218 # But, obviously its parents aren't.
1232 1219 for p in self.parents(n):
1233 1220 heads.pop(p, None)
1234 1221 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1235 1222 roots = list(roots)
1236 1223 assert orderedout
1237 1224 assert roots
1238 1225 assert heads
1239 1226 return (orderedout, roots, heads)
1240 1227
1241 1228 def headrevs(self, revs=None):
1242 1229 if revs is None:
1243 1230 try:
1244 1231 return self.index.headrevs()
1245 1232 except AttributeError:
1246 1233 return self._headrevs()
1247 1234 if rustdagop is not None:
1248 1235 return rustdagop.headrevs(self.index, revs)
1249 1236 return dagop.headrevs(revs, self._uncheckedparentrevs)
1250 1237
1251 1238 def computephases(self, roots):
1252 1239 return self.index.computephasesmapsets(roots)
1253 1240
1254 1241 def _headrevs(self):
1255 1242 count = len(self)
1256 1243 if not count:
1257 1244 return [nullrev]
1258 1245 # we won't iter over filtered rev so nobody is a head at start
1259 1246 ishead = [0] * (count + 1)
1260 1247 index = self.index
1261 1248 for r in self:
1262 1249 ishead[r] = 1 # I may be an head
1263 1250 e = index[r]
1264 1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 1252 return [r for r, val in enumerate(ishead) if val]
1266 1253
1267 1254 def heads(self, start=None, stop=None):
1268 1255 """return the list of all nodes that have no children
1269 1256
1270 1257 if start is specified, only heads that are descendants of
1271 1258 start will be returned
1272 1259 if stop is specified, it will consider all the revs from stop
1273 1260 as if they had no children
1274 1261 """
1275 1262 if start is None and stop is None:
1276 1263 if not len(self):
1277 1264 return [self.nullid]
1278 1265 return [self.node(r) for r in self.headrevs()]
1279 1266
1280 1267 if start is None:
1281 1268 start = nullrev
1282 1269 else:
1283 1270 start = self.rev(start)
1284 1271
1285 1272 stoprevs = {self.rev(n) for n in stop or []}
1286 1273
1287 1274 revs = dagop.headrevssubset(
1288 1275 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1289 1276 )
1290 1277
1291 1278 return [self.node(rev) for rev in revs]
1292 1279
1293 1280 def children(self, node):
1294 1281 """find the children of a given node"""
1295 1282 c = []
1296 1283 p = self.rev(node)
1297 1284 for r in self.revs(start=p + 1):
1298 1285 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1299 1286 if prevs:
1300 1287 for pr in prevs:
1301 1288 if pr == p:
1302 1289 c.append(self.node(r))
1303 1290 elif p == nullrev:
1304 1291 c.append(self.node(r))
1305 1292 return c
1306 1293
1307 1294 def commonancestorsheads(self, a, b):
1308 1295 """calculate all the heads of the common ancestors of nodes a and b"""
1309 1296 a, b = self.rev(a), self.rev(b)
1310 1297 ancs = self._commonancestorsheads(a, b)
1311 1298 return pycompat.maplist(self.node, ancs)
1312 1299
1313 1300 def _commonancestorsheads(self, *revs):
1314 1301 """calculate all the heads of the common ancestors of revs"""
1315 1302 try:
1316 1303 ancs = self.index.commonancestorsheads(*revs)
1317 1304 except (AttributeError, OverflowError): # C implementation failed
1318 1305 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1319 1306 return ancs
1320 1307
1321 1308 def isancestor(self, a, b):
1322 1309 """return True if node a is an ancestor of node b
1323 1310
1324 1311 A revision is considered an ancestor of itself."""
1325 1312 a, b = self.rev(a), self.rev(b)
1326 1313 return self.isancestorrev(a, b)
1327 1314
1328 1315 def isancestorrev(self, a, b):
1329 1316 """return True if revision a is an ancestor of revision b
1330 1317
1331 1318 A revision is considered an ancestor of itself.
1332 1319
1333 1320 The implementation of this is trivial but the use of
1334 1321 reachableroots is not."""
1335 1322 if a == nullrev:
1336 1323 return True
1337 1324 elif a == b:
1338 1325 return True
1339 1326 elif a > b:
1340 1327 return False
1341 1328 return bool(self.reachableroots(a, [b], [a], includepath=False))
1342 1329
1343 1330 def reachableroots(self, minroot, heads, roots, includepath=False):
1344 1331 """return (heads(::(<roots> and <roots>::<heads>)))
1345 1332
1346 1333 If includepath is True, return (<roots>::<heads>)."""
1347 1334 try:
1348 1335 return self.index.reachableroots2(
1349 1336 minroot, heads, roots, includepath
1350 1337 )
1351 1338 except AttributeError:
1352 1339 return dagop._reachablerootspure(
1353 1340 self.parentrevs, minroot, roots, heads, includepath
1354 1341 )
1355 1342
1356 1343 def ancestor(self, a, b):
1357 1344 """calculate the "best" common ancestor of nodes a and b"""
1358 1345
1359 1346 a, b = self.rev(a), self.rev(b)
1360 1347 try:
1361 1348 ancs = self.index.ancestors(a, b)
1362 1349 except (AttributeError, OverflowError):
1363 1350 ancs = ancestor.ancestors(self.parentrevs, a, b)
1364 1351 if ancs:
1365 1352 # choose a consistent winner when there's a tie
1366 1353 return min(map(self.node, ancs))
1367 1354 return self.nullid
1368 1355
1369 1356 def _match(self, id):
1370 1357 if isinstance(id, int):
1371 1358 # rev
1372 1359 return self.node(id)
1373 1360 if len(id) == self.nodeconstants.nodelen:
1374 1361 # possibly a binary node
1375 1362 # odds of a binary node being all hex in ASCII are 1 in 10**25
1376 1363 try:
1377 1364 node = id
1378 1365 self.rev(node) # quick search the index
1379 1366 return node
1380 1367 except error.LookupError:
1381 1368 pass # may be partial hex id
1382 1369 try:
1383 1370 # str(rev)
1384 1371 rev = int(id)
1385 1372 if b"%d" % rev != id:
1386 1373 raise ValueError
1387 1374 if rev < 0:
1388 1375 rev = len(self) + rev
1389 1376 if rev < 0 or rev >= len(self):
1390 1377 raise ValueError
1391 1378 return self.node(rev)
1392 1379 except (ValueError, OverflowError):
1393 1380 pass
1394 1381 if len(id) == 2 * self.nodeconstants.nodelen:
1395 1382 try:
1396 1383 # a full hex nodeid?
1397 1384 node = bin(id)
1398 1385 self.rev(node)
1399 1386 return node
1400 1387 except (TypeError, error.LookupError):
1401 1388 pass
1402 1389
1403 1390 def _partialmatch(self, id):
1404 1391 # we don't care wdirfilenodeids as they should be always full hash
1405 1392 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1406 1393 try:
1407 1394 partial = self.index.partialmatch(id)
1408 1395 if partial and self.hasnode(partial):
1409 1396 if maybewdir:
1410 1397 # single 'ff...' match in radix tree, ambiguous with wdir
1411 1398 raise error.RevlogError
1412 1399 return partial
1413 1400 if maybewdir:
1414 1401 # no 'ff...' match in radix tree, wdir identified
1415 1402 raise error.WdirUnsupported
1416 1403 return None
1417 1404 except error.RevlogError:
1418 1405 # parsers.c radix tree lookup gave multiple matches
1419 1406 # fast path: for unfiltered changelog, radix tree is accurate
1420 1407 if not getattr(self, 'filteredrevs', None):
1421 1408 raise error.AmbiguousPrefixLookupError(
1422 1409 id, self.display_id, _(b'ambiguous identifier')
1423 1410 )
1424 1411 # fall through to slow path that filters hidden revisions
1425 1412 except (AttributeError, ValueError):
1426 1413 # we are pure python, or key was too short to search radix tree
1427 1414 pass
1428 1415
1429 1416 if id in self._pcache:
1430 1417 return self._pcache[id]
1431 1418
1432 1419 if len(id) <= 40:
1433 1420 try:
1434 1421 # hex(node)[:...]
1435 1422 l = len(id) // 2 # grab an even number of digits
1436 1423 prefix = bin(id[: l * 2])
1437 1424 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1438 1425 nl = [
1439 1426 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1440 1427 ]
1441 1428 if self.nodeconstants.nullhex.startswith(id):
1442 1429 nl.append(self.nullid)
1443 1430 if len(nl) > 0:
1444 1431 if len(nl) == 1 and not maybewdir:
1445 1432 self._pcache[id] = nl[0]
1446 1433 return nl[0]
1447 1434 raise error.AmbiguousPrefixLookupError(
1448 1435 id, self.display_id, _(b'ambiguous identifier')
1449 1436 )
1450 1437 if maybewdir:
1451 1438 raise error.WdirUnsupported
1452 1439 return None
1453 1440 except TypeError:
1454 1441 pass
1455 1442
1456 1443 def lookup(self, id):
1457 1444 """locate a node based on:
1458 1445 - revision number or str(revision number)
1459 1446 - nodeid or subset of hex nodeid
1460 1447 """
1461 1448 n = self._match(id)
1462 1449 if n is not None:
1463 1450 return n
1464 1451 n = self._partialmatch(id)
1465 1452 if n:
1466 1453 return n
1467 1454
1468 1455 raise error.LookupError(id, self.display_id, _(b'no match found'))
1469 1456
1470 1457 def shortest(self, node, minlength=1):
1471 1458 """Find the shortest unambiguous prefix that matches node."""
1472 1459
1473 1460 def isvalid(prefix):
1474 1461 try:
1475 1462 matchednode = self._partialmatch(prefix)
1476 1463 except error.AmbiguousPrefixLookupError:
1477 1464 return False
1478 1465 except error.WdirUnsupported:
1479 1466 # single 'ff...' match
1480 1467 return True
1481 1468 if matchednode is None:
1482 1469 raise error.LookupError(node, self.display_id, _(b'no node'))
1483 1470 return True
1484 1471
1485 1472 def maybewdir(prefix):
1486 1473 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1487 1474
1488 1475 hexnode = hex(node)
1489 1476
1490 1477 def disambiguate(hexnode, minlength):
1491 1478 """Disambiguate against wdirid."""
1492 1479 for length in range(minlength, len(hexnode) + 1):
1493 1480 prefix = hexnode[:length]
1494 1481 if not maybewdir(prefix):
1495 1482 return prefix
1496 1483
1497 1484 if not getattr(self, 'filteredrevs', None):
1498 1485 try:
1499 1486 length = max(self.index.shortest(node), minlength)
1500 1487 return disambiguate(hexnode, length)
1501 1488 except error.RevlogError:
1502 1489 if node != self.nodeconstants.wdirid:
1503 1490 raise error.LookupError(
1504 1491 node, self.display_id, _(b'no node')
1505 1492 )
1506 1493 except AttributeError:
1507 1494 # Fall through to pure code
1508 1495 pass
1509 1496
1510 1497 if node == self.nodeconstants.wdirid:
1511 1498 for length in range(minlength, len(hexnode) + 1):
1512 1499 prefix = hexnode[:length]
1513 1500 if isvalid(prefix):
1514 1501 return prefix
1515 1502
1516 1503 for length in range(minlength, len(hexnode) + 1):
1517 1504 prefix = hexnode[:length]
1518 1505 if isvalid(prefix):
1519 1506 return disambiguate(hexnode, length)
1520 1507
1521 1508 def cmp(self, node, text):
1522 1509 """compare text with a given file revision
1523 1510
1524 1511 returns True if text is different than what is stored.
1525 1512 """
1526 1513 p1, p2 = self.parents(node)
1527 1514 return storageutil.hashrevisionsha1(text, p1, p2) != node
1528 1515
1529 1516 def _cachesegment(self, offset, data):
1530 1517 """Add a segment to the revlog cache.
1531 1518
1532 1519 Accepts an absolute offset and the data that is at that location.
1533 1520 """
1534 1521 o, d = self._chunkcache
1535 1522 # try to add to existing cache
1536 1523 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1537 1524 self._chunkcache = o, d + data
1538 1525 else:
1539 1526 self._chunkcache = offset, data
1540 1527
1541 1528 def _readsegment(self, offset, length, df=None):
1542 1529 """Load a segment of raw data from the revlog.
1543 1530
1544 1531 Accepts an absolute offset, length to read, and an optional existing
1545 1532 file handle to read from.
1546 1533
1547 1534 If an existing file handle is passed, it will be seeked and the
1548 1535 original seek position will NOT be restored.
1549 1536
1550 1537 Returns a str or buffer of raw byte data.
1551 1538
1552 1539 Raises if the requested number of bytes could not be read.
1553 1540 """
1554 1541 # Cache data both forward and backward around the requested
1555 1542 # data, in a fixed size window. This helps speed up operations
1556 1543 # involving reading the revlog backwards.
1557 1544 cachesize = self._chunkcachesize
1558 1545 realoffset = offset & ~(cachesize - 1)
1559 1546 reallength = (
1560 1547 (offset + length + cachesize) & ~(cachesize - 1)
1561 1548 ) - realoffset
1562 1549 with self._datareadfp(df) as df:
1563 1550 df.seek(realoffset)
1564 1551 d = df.read(reallength)
1565 1552
1566 1553 self._cachesegment(realoffset, d)
1567 1554 if offset != realoffset or reallength != length:
1568 1555 startoffset = offset - realoffset
1569 1556 if len(d) - startoffset < length:
1570 1557 raise error.RevlogError(
1571 1558 _(
1572 1559 b'partial read of revlog %s; expected %d bytes from '
1573 1560 b'offset %d, got %d'
1574 1561 )
1575 1562 % (
1576 1563 self._indexfile if self._inline else self._datafile,
1577 1564 length,
1578 1565 offset,
1579 1566 len(d) - startoffset,
1580 1567 )
1581 1568 )
1582 1569
1583 1570 return util.buffer(d, startoffset, length)
1584 1571
1585 1572 if len(d) < length:
1586 1573 raise error.RevlogError(
1587 1574 _(
1588 1575 b'partial read of revlog %s; expected %d bytes from offset '
1589 1576 b'%d, got %d'
1590 1577 )
1591 1578 % (
1592 1579 self._indexfile if self._inline else self._datafile,
1593 1580 length,
1594 1581 offset,
1595 1582 len(d),
1596 1583 )
1597 1584 )
1598 1585
1599 1586 return d
1600 1587
1601 1588 def _getsegment(self, offset, length, df=None):
1602 1589 """Obtain a segment of raw data from the revlog.
1603 1590
1604 1591 Accepts an absolute offset, length of bytes to obtain, and an
1605 1592 optional file handle to the already-opened revlog. If the file
1606 1593 handle is used, it's original seek position will not be preserved.
1607 1594
1608 1595 Requests for data may be returned from a cache.
1609 1596
1610 1597 Returns a str or a buffer instance of raw byte data.
1611 1598 """
1612 1599 o, d = self._chunkcache
1613 1600 l = len(d)
1614 1601
1615 1602 # is it in the cache?
1616 1603 cachestart = offset - o
1617 1604 cacheend = cachestart + length
1618 1605 if cachestart >= 0 and cacheend <= l:
1619 1606 if cachestart == 0 and cacheend == l:
1620 1607 return d # avoid a copy
1621 1608 return util.buffer(d, cachestart, cacheend - cachestart)
1622 1609
1623 1610 return self._readsegment(offset, length, df=df)
1624 1611
1625 1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1626 1613 """Obtain a segment of raw data corresponding to a range of revisions.
1627 1614
1628 1615 Accepts the start and end revisions and an optional already-open
1629 1616 file handle to be used for reading. If the file handle is read, its
1630 1617 seek position will not be preserved.
1631 1618
1632 1619 Requests for data may be satisfied by a cache.
1633 1620
1634 1621 Returns a 2-tuple of (offset, data) for the requested range of
1635 1622 revisions. Offset is the integer offset from the beginning of the
1636 1623 revlog and data is a str or buffer of the raw byte data.
1637 1624
1638 1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1639 1626 to determine where each revision's data begins and ends.
1640 1627 """
1641 1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1642 1629 # (functions are expensive).
1643 1630 index = self.index
1644 1631 istart = index[startrev]
1645 1632 start = int(istart[0] >> 16)
1646 1633 if startrev == endrev:
1647 1634 end = start + istart[1]
1648 1635 else:
1649 1636 iend = index[endrev]
1650 1637 end = int(iend[0] >> 16) + iend[1]
1651 1638
1652 1639 if self._inline:
1653 1640 start += (startrev + 1) * self.index.entry_size
1654 1641 end += (endrev + 1) * self.index.entry_size
1655 1642 length = end - start
1656 1643
1657 1644 return start, self._getsegment(start, length, df=df)
1658 1645
1659 1646 def _chunk(self, rev, df=None):
1660 1647 """Obtain a single decompressed chunk for a revision.
1661 1648
1662 1649 Accepts an integer revision and an optional already-open file handle
1663 1650 to be used for reading. If used, the seek position of the file will not
1664 1651 be preserved.
1665 1652
1666 1653 Returns a str holding uncompressed data for the requested revision.
1667 1654 """
1668 1655 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1669 1656
1670 1657 def _chunks(self, revs, df=None, targetsize=None):
1671 1658 """Obtain decompressed chunks for the specified revisions.
1672 1659
1673 1660 Accepts an iterable of numeric revisions that are assumed to be in
1674 1661 ascending order. Also accepts an optional already-open file handle
1675 1662 to be used for reading. If used, the seek position of the file will
1676 1663 not be preserved.
1677 1664
1678 1665 This function is similar to calling ``self._chunk()`` multiple times,
1679 1666 but is faster.
1680 1667
1681 1668 Returns a list with decompressed data for each requested revision.
1682 1669 """
1683 1670 if not revs:
1684 1671 return []
1685 1672 start = self.start
1686 1673 length = self.length
1687 1674 inline = self._inline
1688 1675 iosize = self.index.entry_size
1689 1676 buffer = util.buffer
1690 1677
1691 1678 l = []
1692 1679 ladd = l.append
1693 1680
1694 1681 if not self._withsparseread:
1695 1682 slicedchunks = (revs,)
1696 1683 else:
1697 1684 slicedchunks = deltautil.slicechunk(
1698 1685 self, revs, targetsize=targetsize
1699 1686 )
1700 1687
1701 1688 for revschunk in slicedchunks:
1702 1689 firstrev = revschunk[0]
1703 1690 # Skip trailing revisions with empty diff
1704 1691 for lastrev in revschunk[::-1]:
1705 1692 if length(lastrev) != 0:
1706 1693 break
1707 1694
1708 1695 try:
1709 1696 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1710 1697 except OverflowError:
1711 1698 # issue4215 - we can't cache a run of chunks greater than
1712 1699 # 2G on Windows
1713 1700 return [self._chunk(rev, df=df) for rev in revschunk]
1714 1701
1715 1702 decomp = self.decompress
1716 1703 for rev in revschunk:
1717 1704 chunkstart = start(rev)
1718 1705 if inline:
1719 1706 chunkstart += (rev + 1) * iosize
1720 1707 chunklength = length(rev)
1721 1708 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1722 1709
1723 1710 return l
1724 1711
1725 1712 def _chunkclear(self):
1726 1713 """Clear the raw chunk cache."""
1727 1714 self._chunkcache = (0, b'')
1728 1715
1729 1716 def deltaparent(self, rev):
1730 1717 """return deltaparent of the given revision"""
1731 1718 base = self.index[rev][3]
1732 1719 if base == rev:
1733 1720 return nullrev
1734 1721 elif self._generaldelta:
1735 1722 return base
1736 1723 else:
1737 1724 return rev - 1
1738 1725
1739 1726 def issnapshot(self, rev):
1740 1727 """tells whether rev is a snapshot"""
1741 1728 if not self._sparserevlog:
1742 1729 return self.deltaparent(rev) == nullrev
1743 1730 elif util.safehasattr(self.index, b'issnapshot'):
1744 1731 # directly assign the method to cache the testing and access
1745 1732 self.issnapshot = self.index.issnapshot
1746 1733 return self.issnapshot(rev)
1747 1734 if rev == nullrev:
1748 1735 return True
1749 1736 entry = self.index[rev]
1750 1737 base = entry[3]
1751 1738 if base == rev:
1752 1739 return True
1753 1740 if base == nullrev:
1754 1741 return True
1755 1742 p1 = entry[5]
1756 1743 p2 = entry[6]
1757 1744 if base == p1 or base == p2:
1758 1745 return False
1759 1746 return self.issnapshot(base)
1760 1747
1761 1748 def snapshotdepth(self, rev):
1762 1749 """number of snapshot in the chain before this one"""
1763 1750 if not self.issnapshot(rev):
1764 1751 raise error.ProgrammingError(b'revision %d not a snapshot')
1765 1752 return len(self._deltachain(rev)[0]) - 1
1766 1753
1767 1754 def revdiff(self, rev1, rev2):
1768 1755 """return or calculate a delta between two revisions
1769 1756
1770 1757 The delta calculated is in binary form and is intended to be written to
1771 1758 revlog data directly. So this function needs raw revision data.
1772 1759 """
1773 1760 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1774 1761 return bytes(self._chunk(rev2))
1775 1762
1776 1763 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1777 1764
1778 1765 def _processflags(self, text, flags, operation, raw=False):
1779 1766 """deprecated entry point to access flag processors"""
1780 1767 msg = b'_processflag(...) use the specialized variant'
1781 1768 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1782 1769 if raw:
1783 1770 return text, flagutil.processflagsraw(self, text, flags)
1784 1771 elif operation == b'read':
1785 1772 return flagutil.processflagsread(self, text, flags)
1786 1773 else: # write operation
1787 1774 return flagutil.processflagswrite(self, text, flags)
1788 1775
1789 1776 def revision(self, nodeorrev, _df=None, raw=False):
1790 1777 """return an uncompressed revision of a given node or revision
1791 1778 number.
1792 1779
1793 1780 _df - an existing file handle to read from. (internal-only)
1794 1781 raw - an optional argument specifying if the revision data is to be
1795 1782 treated as raw data when applying flag transforms. 'raw' should be set
1796 1783 to True when generating changegroups or in debug commands.
1797 1784 """
1798 1785 if raw:
1799 1786 msg = (
1800 1787 b'revlog.revision(..., raw=True) is deprecated, '
1801 1788 b'use revlog.rawdata(...)'
1802 1789 )
1803 1790 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1804 1791 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1805 1792
1806 1793 def sidedata(self, nodeorrev, _df=None):
1807 1794 """a map of extra data related to the changeset but not part of the hash
1808 1795
1809 1796 This function currently return a dictionary. However, more advanced
1810 1797 mapping object will likely be used in the future for a more
1811 1798 efficient/lazy code.
1812 1799 """
1813 1800 return self._revisiondata(nodeorrev, _df)[1]
1814 1801
1815 1802 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1816 1803 # deal with <nodeorrev> argument type
1817 1804 if isinstance(nodeorrev, int):
1818 1805 rev = nodeorrev
1819 1806 node = self.node(rev)
1820 1807 else:
1821 1808 node = nodeorrev
1822 1809 rev = None
1823 1810
1824 1811 # fast path the special `nullid` rev
1825 1812 if node == self.nullid:
1826 1813 return b"", {}
1827 1814
1828 1815 # ``rawtext`` is the text as stored inside the revlog. Might be the
1829 1816 # revision or might need to be processed to retrieve the revision.
1830 1817 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1831 1818
1832 1819 if self.hassidedata:
1833 1820 if rev is None:
1834 1821 rev = self.rev(node)
1835 1822 sidedata = self._sidedata(rev)
1836 1823 else:
1837 1824 sidedata = {}
1838 1825
1839 1826 if raw and validated:
1840 1827 # if we don't want to process the raw text and that raw
1841 1828 # text is cached, we can exit early.
1842 1829 return rawtext, sidedata
1843 1830 if rev is None:
1844 1831 rev = self.rev(node)
1845 1832 # the revlog's flag for this revision
1846 1833 # (usually alter its state or content)
1847 1834 flags = self.flags(rev)
1848 1835
1849 1836 if validated and flags == REVIDX_DEFAULT_FLAGS:
1850 1837 # no extra flags set, no flag processor runs, text = rawtext
1851 1838 return rawtext, sidedata
1852 1839
1853 1840 if raw:
1854 1841 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1855 1842 text = rawtext
1856 1843 else:
1857 1844 r = flagutil.processflagsread(self, rawtext, flags)
1858 1845 text, validatehash = r
1859 1846 if validatehash:
1860 1847 self.checkhash(text, node, rev=rev)
1861 1848 if not validated:
1862 1849 self._revisioncache = (node, rev, rawtext)
1863 1850
1864 1851 return text, sidedata
1865 1852
1866 1853 def _rawtext(self, node, rev, _df=None):
1867 1854 """return the possibly unvalidated rawtext for a revision
1868 1855
1869 1856 returns (rev, rawtext, validated)
1870 1857 """
1871 1858
1872 1859 # revision in the cache (could be useful to apply delta)
1873 1860 cachedrev = None
1874 1861 # An intermediate text to apply deltas to
1875 1862 basetext = None
1876 1863
1877 1864 # Check if we have the entry in cache
1878 1865 # The cache entry looks like (node, rev, rawtext)
1879 1866 if self._revisioncache:
1880 1867 if self._revisioncache[0] == node:
1881 1868 return (rev, self._revisioncache[2], True)
1882 1869 cachedrev = self._revisioncache[1]
1883 1870
1884 1871 if rev is None:
1885 1872 rev = self.rev(node)
1886 1873
1887 1874 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1888 1875 if stopped:
1889 1876 basetext = self._revisioncache[2]
1890 1877
1891 1878 # drop cache to save memory, the caller is expected to
1892 1879 # update self._revisioncache after validating the text
1893 1880 self._revisioncache = None
1894 1881
1895 1882 targetsize = None
1896 1883 rawsize = self.index[rev][2]
1897 1884 if 0 <= rawsize:
1898 1885 targetsize = 4 * rawsize
1899 1886
1900 1887 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1901 1888 if basetext is None:
1902 1889 basetext = bytes(bins[0])
1903 1890 bins = bins[1:]
1904 1891
1905 1892 rawtext = mdiff.patches(basetext, bins)
1906 1893 del basetext # let us have a chance to free memory early
1907 1894 return (rev, rawtext, False)
1908 1895
1909 1896 def _sidedata(self, rev):
1910 1897 """Return the sidedata for a given revision number."""
1911 1898 index_entry = self.index[rev]
1912 1899 sidedata_offset = index_entry[8]
1913 1900 sidedata_size = index_entry[9]
1914 1901
1915 1902 if self._inline:
1916 1903 sidedata_offset += self.index.entry_size * (1 + rev)
1917 1904 if sidedata_size == 0:
1918 1905 return {}
1919 1906
1920 1907 segment = self._getsegment(sidedata_offset, sidedata_size)
1921 1908 sidedata = sidedatautil.deserialize_sidedata(segment)
1922 1909 return sidedata
1923 1910
1924 1911 def rawdata(self, nodeorrev, _df=None):
1925 1912 """return an uncompressed raw data of a given node or revision number.
1926 1913
1927 1914 _df - an existing file handle to read from. (internal-only)
1928 1915 """
1929 1916 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1930 1917
1931 1918 def hash(self, text, p1, p2):
1932 1919 """Compute a node hash.
1933 1920
1934 1921 Available as a function so that subclasses can replace the hash
1935 1922 as needed.
1936 1923 """
1937 1924 return storageutil.hashrevisionsha1(text, p1, p2)
1938 1925
1939 1926 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1940 1927 """Check node hash integrity.
1941 1928
1942 1929 Available as a function so that subclasses can extend hash mismatch
1943 1930 behaviors as needed.
1944 1931 """
1945 1932 try:
1946 1933 if p1 is None and p2 is None:
1947 1934 p1, p2 = self.parents(node)
1948 1935 if node != self.hash(text, p1, p2):
1949 1936 # Clear the revision cache on hash failure. The revision cache
1950 1937 # only stores the raw revision and clearing the cache does have
1951 1938 # the side-effect that we won't have a cache hit when the raw
1952 1939 # revision data is accessed. But this case should be rare and
1953 1940 # it is extra work to teach the cache about the hash
1954 1941 # verification state.
1955 1942 if self._revisioncache and self._revisioncache[0] == node:
1956 1943 self._revisioncache = None
1957 1944
1958 1945 revornode = rev
1959 1946 if revornode is None:
1960 1947 revornode = templatefilters.short(hex(node))
1961 1948 raise error.RevlogError(
1962 1949 _(b"integrity check failed on %s:%s")
1963 1950 % (self.display_id, pycompat.bytestr(revornode))
1964 1951 )
1965 1952 except error.RevlogError:
1966 1953 if self._censorable and storageutil.iscensoredtext(text):
1967 1954 raise error.CensoredNodeError(self.display_id, node, text)
1968 1955 raise
1969 1956
1970 1957 def _enforceinlinesize(self, tr):
1971 1958 """Check if the revlog is too big for inline and convert if so.
1972 1959
1973 1960 This should be called after revisions are added to the revlog. If the
1974 1961 revlog has grown too large to be an inline revlog, it will convert it
1975 1962 to use multiple index and data files.
1976 1963 """
1977 1964 tiprev = len(self) - 1
1978 1965 total_size = self.start(tiprev) + self.length(tiprev)
1979 1966 if not self._inline or total_size < _maxinline:
1980 1967 return
1981 1968
1982 1969 troffset = tr.findoffset(self._indexfile)
1983 1970 if troffset is None:
1984 1971 raise error.RevlogError(
1985 1972 _(b"%s not found in the transaction") % self._indexfile
1986 1973 )
1987 1974 trindex = 0
1988 1975 tr.add(self._datafile, 0)
1989 1976
1990 1977 existing_handles = False
1991 1978 if self._writinghandles is not None:
1992 1979 existing_handles = True
1993 1980 fp = self._writinghandles[0]
1994 1981 fp.flush()
1995 1982 fp.close()
1996 1983 # We can't use the cached file handle after close(). So prevent
1997 1984 # its usage.
1998 1985 self._writinghandles = None
1999 1986
2000 1987 new_dfh = self._datafp(b'w+')
2001 1988 new_dfh.truncate(0) # drop any potentially existing data
2002 1989 try:
2003 1990 with self._indexfp() as read_ifh:
2004 1991 for r in self:
2005 1992 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2006 1993 if troffset <= self.start(r):
2007 1994 trindex = r
2008 1995 new_dfh.flush()
2009 1996
2010 1997 with self.__index_new_fp() as fp:
2011 1998 self._format_flags &= ~FLAG_INLINE_DATA
2012 1999 self._inline = False
2013 2000 for i in self:
2014 2001 e = self.index.entry_binary(i)
2015 2002 if i == 0:
2016 2003 header = self._format_flags | self._format_version
2017 2004 header = self.index.pack_header(header)
2018 2005 e = header + e
2019 2006 fp.write(e)
2020 2007 # the temp file replace the real index when we exit the context
2021 2008 # manager
2022 2009
2023 2010 tr.replace(self._indexfile, trindex * self.index.entry_size)
2024 2011 nodemaputil.setup_persistent_nodemap(tr, self)
2025 2012 self._chunkclear()
2026 2013
2027 2014 if existing_handles:
2028 2015 # switched from inline to conventional reopen the index
2029 2016 ifh = self.__index_write_fp()
2030 2017 self._writinghandles = (ifh, new_dfh)
2031 2018 new_dfh = None
2032 2019 finally:
2033 2020 if new_dfh is not None:
2034 2021 new_dfh.close()
2035 2022
2036 2023 def _nodeduplicatecallback(self, transaction, node):
2037 2024 """called when trying to add a node already stored."""
2038 2025
2039 2026 @contextlib.contextmanager
2040 2027 def _writing(self, transaction):
2041 2028 if self._writinghandles is not None:
2042 2029 yield
2043 2030 else:
2044 2031 r = len(self)
2045 2032 dsize = 0
2046 2033 if r:
2047 2034 dsize = self.end(r - 1)
2048 2035 dfh = None
2049 2036 if not self._inline:
2050 2037 try:
2051 2038 dfh = self._datafp(b"r+")
2052 2039 dfh.seek(0, os.SEEK_END)
2053 2040 except IOError as inst:
2054 2041 if inst.errno != errno.ENOENT:
2055 2042 raise
2056 2043 dfh = self._datafp(b"w+")
2057 2044 transaction.add(self._datafile, dsize)
2058 2045 try:
2059 2046 isize = r * self.index.entry_size
2060 2047 ifh = self.__index_write_fp()
2061 2048 if self._inline:
2062 2049 transaction.add(self._indexfile, dsize + isize)
2063 2050 else:
2064 2051 transaction.add(self._indexfile, isize)
2065 2052 try:
2066 2053 self._writinghandles = (ifh, dfh)
2067 2054 try:
2068 2055 yield
2069 2056 finally:
2070 2057 self._writinghandles = None
2071 2058 finally:
2072 2059 ifh.close()
2073 2060 finally:
2074 2061 if dfh is not None:
2075 2062 dfh.close()
2076 2063
2077 2064 def addrevision(
2078 2065 self,
2079 2066 text,
2080 2067 transaction,
2081 2068 link,
2082 2069 p1,
2083 2070 p2,
2084 2071 cachedelta=None,
2085 2072 node=None,
2086 2073 flags=REVIDX_DEFAULT_FLAGS,
2087 2074 deltacomputer=None,
2088 2075 sidedata=None,
2089 2076 ):
2090 2077 """add a revision to the log
2091 2078
2092 2079 text - the revision data to add
2093 2080 transaction - the transaction object used for rollback
2094 2081 link - the linkrev data to add
2095 2082 p1, p2 - the parent nodeids of the revision
2096 2083 cachedelta - an optional precomputed delta
2097 2084 node - nodeid of revision; typically node is not specified, and it is
2098 2085 computed by default as hash(text, p1, p2), however subclasses might
2099 2086 use different hashing method (and override checkhash() in such case)
2100 2087 flags - the known flags to set on the revision
2101 2088 deltacomputer - an optional deltacomputer instance shared between
2102 2089 multiple calls
2103 2090 """
2104 2091 if link == nullrev:
2105 2092 raise error.RevlogError(
2106 2093 _(b"attempted to add linkrev -1 to %s") % self.display_id
2107 2094 )
2108 2095
2109 2096 if sidedata is None:
2110 2097 sidedata = {}
2111 2098 elif sidedata and not self.hassidedata:
2112 2099 raise error.ProgrammingError(
2113 2100 _(b"trying to add sidedata to a revlog who don't support them")
2114 2101 )
2115 2102
2116 2103 if flags:
2117 2104 node = node or self.hash(text, p1, p2)
2118 2105
2119 2106 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2120 2107
2121 2108 # If the flag processor modifies the revision data, ignore any provided
2122 2109 # cachedelta.
2123 2110 if rawtext != text:
2124 2111 cachedelta = None
2125 2112
2126 2113 if len(rawtext) > _maxentrysize:
2127 2114 raise error.RevlogError(
2128 2115 _(
2129 2116 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2130 2117 )
2131 2118 % (self.display_id, len(rawtext))
2132 2119 )
2133 2120
2134 2121 node = node or self.hash(rawtext, p1, p2)
2135 2122 rev = self.index.get_rev(node)
2136 2123 if rev is not None:
2137 2124 return rev
2138 2125
2139 2126 if validatehash:
2140 2127 self.checkhash(rawtext, node, p1=p1, p2=p2)
2141 2128
2142 2129 return self.addrawrevision(
2143 2130 rawtext,
2144 2131 transaction,
2145 2132 link,
2146 2133 p1,
2147 2134 p2,
2148 2135 node,
2149 2136 flags,
2150 2137 cachedelta=cachedelta,
2151 2138 deltacomputer=deltacomputer,
2152 2139 sidedata=sidedata,
2153 2140 )
2154 2141
2155 2142 def addrawrevision(
2156 2143 self,
2157 2144 rawtext,
2158 2145 transaction,
2159 2146 link,
2160 2147 p1,
2161 2148 p2,
2162 2149 node,
2163 2150 flags,
2164 2151 cachedelta=None,
2165 2152 deltacomputer=None,
2166 2153 sidedata=None,
2167 2154 ):
2168 2155 """add a raw revision with known flags, node and parents
2169 2156 useful when reusing a revision not stored in this revlog (ex: received
2170 2157 over wire, or read from an external bundle).
2171 2158 """
2172 2159 with self._writing(transaction):
2173 2160 return self._addrevision(
2174 2161 node,
2175 2162 rawtext,
2176 2163 transaction,
2177 2164 link,
2178 2165 p1,
2179 2166 p2,
2180 2167 flags,
2181 2168 cachedelta,
2182 2169 deltacomputer=deltacomputer,
2183 2170 sidedata=sidedata,
2184 2171 )
2185 2172
2186 2173 def compress(self, data):
2187 2174 """Generate a possibly-compressed representation of data."""
2188 2175 if not data:
2189 2176 return b'', data
2190 2177
2191 2178 compressed = self._compressor.compress(data)
2192 2179
2193 2180 if compressed:
2194 2181 # The revlog compressor added the header in the returned data.
2195 2182 return b'', compressed
2196 2183
2197 2184 if data[0:1] == b'\0':
2198 2185 return b'', data
2199 2186 return b'u', data
2200 2187
2201 2188 def decompress(self, data):
2202 2189 """Decompress a revlog chunk.
2203 2190
2204 2191 The chunk is expected to begin with a header identifying the
2205 2192 format type so it can be routed to an appropriate decompressor.
2206 2193 """
2207 2194 if not data:
2208 2195 return data
2209 2196
2210 2197 # Revlogs are read much more frequently than they are written and many
2211 2198 # chunks only take microseconds to decompress, so performance is
2212 2199 # important here.
2213 2200 #
2214 2201 # We can make a few assumptions about revlogs:
2215 2202 #
2216 2203 # 1) the majority of chunks will be compressed (as opposed to inline
2217 2204 # raw data).
2218 2205 # 2) decompressing *any* data will likely by at least 10x slower than
2219 2206 # returning raw inline data.
2220 2207 # 3) we want to prioritize common and officially supported compression
2221 2208 # engines
2222 2209 #
2223 2210 # It follows that we want to optimize for "decompress compressed data
2224 2211 # when encoded with common and officially supported compression engines"
2225 2212 # case over "raw data" and "data encoded by less common or non-official
2226 2213 # compression engines." That is why we have the inline lookup first
2227 2214 # followed by the compengines lookup.
2228 2215 #
2229 2216 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2230 2217 # compressed chunks. And this matters for changelog and manifest reads.
2231 2218 t = data[0:1]
2232 2219
2233 2220 if t == b'x':
2234 2221 try:
2235 2222 return _zlibdecompress(data)
2236 2223 except zlib.error as e:
2237 2224 raise error.RevlogError(
2238 2225 _(b'revlog decompress error: %s')
2239 2226 % stringutil.forcebytestr(e)
2240 2227 )
2241 2228 # '\0' is more common than 'u' so it goes first.
2242 2229 elif t == b'\0':
2243 2230 return data
2244 2231 elif t == b'u':
2245 2232 return util.buffer(data, 1)
2246 2233
2247 2234 try:
2248 2235 compressor = self._decompressors[t]
2249 2236 except KeyError:
2250 2237 try:
2251 2238 engine = util.compengines.forrevlogheader(t)
2252 2239 compressor = engine.revlogcompressor(self._compengineopts)
2253 2240 self._decompressors[t] = compressor
2254 2241 except KeyError:
2255 2242 raise error.RevlogError(
2256 2243 _(b'unknown compression type %s') % binascii.hexlify(t)
2257 2244 )
2258 2245
2259 2246 return compressor.decompress(data)
2260 2247
2261 2248 def _addrevision(
2262 2249 self,
2263 2250 node,
2264 2251 rawtext,
2265 2252 transaction,
2266 2253 link,
2267 2254 p1,
2268 2255 p2,
2269 2256 flags,
2270 2257 cachedelta,
2271 2258 alwayscache=False,
2272 2259 deltacomputer=None,
2273 2260 sidedata=None,
2274 2261 ):
2275 2262 """internal function to add revisions to the log
2276 2263
2277 2264 see addrevision for argument descriptions.
2278 2265
2279 2266 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2280 2267
2281 2268 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2282 2269 be used.
2283 2270
2284 2271 invariants:
2285 2272 - rawtext is optional (can be None); if not set, cachedelta must be set.
2286 2273 if both are set, they must correspond to each other.
2287 2274 """
2288 2275 if node == self.nullid:
2289 2276 raise error.RevlogError(
2290 2277 _(b"%s: attempt to add null revision") % self.display_id
2291 2278 )
2292 2279 if (
2293 2280 node == self.nodeconstants.wdirid
2294 2281 or node in self.nodeconstants.wdirfilenodeids
2295 2282 ):
2296 2283 raise error.RevlogError(
2297 2284 _(b"%s: attempt to add wdir revision") % self.display_id
2298 2285 )
2299 2286 if self._writinghandles is None:
2300 2287 msg = b'adding revision outside `revlog._writing` context'
2301 2288 raise error.ProgrammingError(msg)
2302 2289
2303 2290 if self._inline:
2304 2291 fh = self._writinghandles[0]
2305 2292 else:
2306 2293 fh = self._writinghandles[1]
2307 2294
2308 2295 btext = [rawtext]
2309 2296
2310 2297 curr = len(self)
2311 2298 prev = curr - 1
2312 2299
2313 2300 offset = self._get_data_offset(prev)
2314 2301
2315 2302 if self._concurrencychecker:
2316 2303 ifh, dfh = self._writinghandles
2317 2304 if self._inline:
2318 2305 # offset is "as if" it were in the .d file, so we need to add on
2319 2306 # the size of the entry metadata.
2320 2307 self._concurrencychecker(
2321 2308 ifh, self._indexfile, offset + curr * self.index.entry_size
2322 2309 )
2323 2310 else:
2324 2311 # Entries in the .i are a consistent size.
2325 2312 self._concurrencychecker(
2326 2313 ifh, self._indexfile, curr * self.index.entry_size
2327 2314 )
2328 2315 self._concurrencychecker(dfh, self._datafile, offset)
2329 2316
2330 2317 p1r, p2r = self.rev(p1), self.rev(p2)
2331 2318
2332 2319 # full versions are inserted when the needed deltas
2333 2320 # become comparable to the uncompressed text
2334 2321 if rawtext is None:
2335 2322 # need rawtext size, before changed by flag processors, which is
2336 2323 # the non-raw size. use revlog explicitly to avoid filelog's extra
2337 2324 # logic that might remove metadata size.
2338 2325 textlen = mdiff.patchedsize(
2339 2326 revlog.size(self, cachedelta[0]), cachedelta[1]
2340 2327 )
2341 2328 else:
2342 2329 textlen = len(rawtext)
2343 2330
2344 2331 if deltacomputer is None:
2345 2332 deltacomputer = deltautil.deltacomputer(self)
2346 2333
2347 2334 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2348 2335
2349 2336 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2350 2337
2351 2338 if sidedata and self.hassidedata:
2352 2339 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2353 2340 sidedata_offset = offset + deltainfo.deltalen
2354 2341 else:
2355 2342 serialized_sidedata = b""
2356 2343 # Don't store the offset if the sidedata is empty, that way
2357 2344 # we can easily detect empty sidedata and they will be no different
2358 2345 # than ones we manually add.
2359 2346 sidedata_offset = 0
2360 2347
2361 2348 e = (
2362 2349 offset_type(offset, flags),
2363 2350 deltainfo.deltalen,
2364 2351 textlen,
2365 2352 deltainfo.base,
2366 2353 link,
2367 2354 p1r,
2368 2355 p2r,
2369 2356 node,
2370 2357 sidedata_offset,
2371 2358 len(serialized_sidedata),
2372 2359 )
2373 2360
2374 2361 self.index.append(e)
2375 2362 entry = self.index.entry_binary(curr)
2376 2363 if curr == 0:
2377 2364 header = self._format_flags | self._format_version
2378 2365 header = self.index.pack_header(header)
2379 2366 entry = header + entry
2380 2367 self._writeentry(
2381 2368 transaction,
2382 2369 entry,
2383 2370 deltainfo.data,
2384 2371 link,
2385 2372 offset,
2386 2373 serialized_sidedata,
2387 2374 )
2388 2375
2389 2376 rawtext = btext[0]
2390 2377
2391 2378 if alwayscache and rawtext is None:
2392 2379 rawtext = deltacomputer.buildtext(revinfo, fh)
2393 2380
2394 2381 if type(rawtext) == bytes: # only accept immutable objects
2395 2382 self._revisioncache = (node, curr, rawtext)
2396 2383 self._chainbasecache[curr] = deltainfo.chainbase
2397 2384 return curr
2398 2385
2399 2386 def _get_data_offset(self, prev):
2400 2387 """Returns the current offset in the (in-transaction) data file.
2401 2388 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2402 2389 file to store that information: since sidedata can be rewritten to the
2403 2390 end of the data file within a transaction, you can have cases where, for
2404 2391 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2405 2392 to `n - 1`'s sidedata being written after `n`'s data.
2406 2393
2407 2394 TODO cache this in a docket file before getting out of experimental."""
2408 2395 if self._format_version != REVLOGV2:
2409 2396 return self.end(prev)
2410 2397
2411 2398 offset = 0
2412 2399 for rev, entry in enumerate(self.index):
2413 2400 sidedata_end = entry[8] + entry[9]
2414 2401 # Sidedata for a previous rev has potentially been written after
2415 2402 # this rev's end, so take the max.
2416 2403 offset = max(self.end(rev), offset, sidedata_end)
2417 2404 return offset
2418 2405
2419 2406 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2420 2407 # Files opened in a+ mode have inconsistent behavior on various
2421 2408 # platforms. Windows requires that a file positioning call be made
2422 2409 # when the file handle transitions between reads and writes. See
2423 2410 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2424 2411 # platforms, Python or the platform itself can be buggy. Some versions
2425 2412 # of Solaris have been observed to not append at the end of the file
2426 2413 # if the file was seeked to before the end. See issue4943 for more.
2427 2414 #
2428 2415 # We work around this issue by inserting a seek() before writing.
2429 2416 # Note: This is likely not necessary on Python 3. However, because
2430 2417 # the file handle is reused for reads and may be seeked there, we need
2431 2418 # to be careful before changing this.
2432 2419 if self._writinghandles is None:
2433 2420 msg = b'adding revision outside `revlog._writing` context'
2434 2421 raise error.ProgrammingError(msg)
2435 2422 ifh, dfh = self._writinghandles
2436 2423 ifh.seek(0, os.SEEK_END)
2437 2424 if dfh:
2438 2425 dfh.seek(0, os.SEEK_END)
2439 2426
2440 2427 curr = len(self) - 1
2441 2428 if not self._inline:
2442 2429 transaction.add(self._datafile, offset)
2443 2430 transaction.add(self._indexfile, curr * len(entry))
2444 2431 if data[0]:
2445 2432 dfh.write(data[0])
2446 2433 dfh.write(data[1])
2447 2434 if sidedata:
2448 2435 dfh.write(sidedata)
2449 2436 ifh.write(entry)
2450 2437 else:
2451 2438 offset += curr * self.index.entry_size
2452 2439 transaction.add(self._indexfile, offset)
2453 2440 ifh.write(entry)
2454 2441 ifh.write(data[0])
2455 2442 ifh.write(data[1])
2456 2443 if sidedata:
2457 2444 ifh.write(sidedata)
2458 2445 self._enforceinlinesize(transaction)
2459 2446 nodemaputil.setup_persistent_nodemap(transaction, self)
2460 2447
2461 2448 def addgroup(
2462 2449 self,
2463 2450 deltas,
2464 2451 linkmapper,
2465 2452 transaction,
2466 2453 alwayscache=False,
2467 2454 addrevisioncb=None,
2468 2455 duplicaterevisioncb=None,
2469 2456 ):
2470 2457 """
2471 2458 add a delta group
2472 2459
2473 2460 given a set of deltas, add them to the revision log. the
2474 2461 first delta is against its parent, which should be in our
2475 2462 log, the rest are against the previous delta.
2476 2463
2477 2464 If ``addrevisioncb`` is defined, it will be called with arguments of
2478 2465 this revlog and the node that was added.
2479 2466 """
2480 2467
2481 2468 if self._adding_group:
2482 2469 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2483 2470
2484 2471 self._adding_group = True
2485 2472 empty = True
2486 2473 try:
2487 2474 with self._writing(transaction):
2488 2475 deltacomputer = deltautil.deltacomputer(self)
2489 2476 # loop through our set of deltas
2490 2477 for data in deltas:
2491 2478 (
2492 2479 node,
2493 2480 p1,
2494 2481 p2,
2495 2482 linknode,
2496 2483 deltabase,
2497 2484 delta,
2498 2485 flags,
2499 2486 sidedata,
2500 2487 ) = data
2501 2488 link = linkmapper(linknode)
2502 2489 flags = flags or REVIDX_DEFAULT_FLAGS
2503 2490
2504 2491 rev = self.index.get_rev(node)
2505 2492 if rev is not None:
2506 2493 # this can happen if two branches make the same change
2507 2494 self._nodeduplicatecallback(transaction, rev)
2508 2495 if duplicaterevisioncb:
2509 2496 duplicaterevisioncb(self, rev)
2510 2497 empty = False
2511 2498 continue
2512 2499
2513 2500 for p in (p1, p2):
2514 2501 if not self.index.has_node(p):
2515 2502 raise error.LookupError(
2516 2503 p, self.radix, _(b'unknown parent')
2517 2504 )
2518 2505
2519 2506 if not self.index.has_node(deltabase):
2520 2507 raise error.LookupError(
2521 2508 deltabase, self.display_id, _(b'unknown delta base')
2522 2509 )
2523 2510
2524 2511 baserev = self.rev(deltabase)
2525 2512
2526 2513 if baserev != nullrev and self.iscensored(baserev):
2527 2514 # if base is censored, delta must be full replacement in a
2528 2515 # single patch operation
2529 2516 hlen = struct.calcsize(b">lll")
2530 2517 oldlen = self.rawsize(baserev)
2531 2518 newlen = len(delta) - hlen
2532 2519 if delta[:hlen] != mdiff.replacediffheader(
2533 2520 oldlen, newlen
2534 2521 ):
2535 2522 raise error.CensoredBaseError(
2536 2523 self.display_id, self.node(baserev)
2537 2524 )
2538 2525
2539 2526 if not flags and self._peek_iscensored(baserev, delta):
2540 2527 flags |= REVIDX_ISCENSORED
2541 2528
2542 2529 # We assume consumers of addrevisioncb will want to retrieve
2543 2530 # the added revision, which will require a call to
2544 2531 # revision(). revision() will fast path if there is a cache
2545 2532 # hit. So, we tell _addrevision() to always cache in this case.
2546 2533 # We're only using addgroup() in the context of changegroup
2547 2534 # generation so the revision data can always be handled as raw
2548 2535 # by the flagprocessor.
2549 2536 rev = self._addrevision(
2550 2537 node,
2551 2538 None,
2552 2539 transaction,
2553 2540 link,
2554 2541 p1,
2555 2542 p2,
2556 2543 flags,
2557 2544 (baserev, delta),
2558 2545 alwayscache=alwayscache,
2559 2546 deltacomputer=deltacomputer,
2560 2547 sidedata=sidedata,
2561 2548 )
2562 2549
2563 2550 if addrevisioncb:
2564 2551 addrevisioncb(self, rev)
2565 2552 empty = False
2566 2553 finally:
2567 2554 self._adding_group = False
2568 2555 return not empty
2569 2556
2570 2557 def iscensored(self, rev):
2571 2558 """Check if a file revision is censored."""
2572 2559 if not self._censorable:
2573 2560 return False
2574 2561
2575 2562 return self.flags(rev) & REVIDX_ISCENSORED
2576 2563
2577 2564 def _peek_iscensored(self, baserev, delta):
2578 2565 """Quickly check if a delta produces a censored revision."""
2579 2566 if not self._censorable:
2580 2567 return False
2581 2568
2582 2569 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2583 2570
2584 2571 def getstrippoint(self, minlink):
2585 2572 """find the minimum rev that must be stripped to strip the linkrev
2586 2573
2587 2574 Returns a tuple containing the minimum rev and a set of all revs that
2588 2575 have linkrevs that will be broken by this strip.
2589 2576 """
2590 2577 return storageutil.resolvestripinfo(
2591 2578 minlink,
2592 2579 len(self) - 1,
2593 2580 self.headrevs(),
2594 2581 self.linkrev,
2595 2582 self.parentrevs,
2596 2583 )
2597 2584
2598 2585 def strip(self, minlink, transaction):
2599 2586 """truncate the revlog on the first revision with a linkrev >= minlink
2600 2587
2601 2588 This function is called when we're stripping revision minlink and
2602 2589 its descendants from the repository.
2603 2590
2604 2591 We have to remove all revisions with linkrev >= minlink, because
2605 2592 the equivalent changelog revisions will be renumbered after the
2606 2593 strip.
2607 2594
2608 2595 So we truncate the revlog on the first of these revisions, and
2609 2596 trust that the caller has saved the revisions that shouldn't be
2610 2597 removed and that it'll re-add them after this truncation.
2611 2598 """
2612 2599 if len(self) == 0:
2613 2600 return
2614 2601
2615 2602 rev, _ = self.getstrippoint(minlink)
2616 2603 if rev == len(self):
2617 2604 return
2618 2605
2619 2606 # first truncate the files on disk
2620 2607 end = self.start(rev)
2621 2608 if not self._inline:
2622 2609 transaction.add(self._datafile, end)
2623 2610 end = rev * self.index.entry_size
2624 2611 else:
2625 2612 end += rev * self.index.entry_size
2626 2613
2627 2614 transaction.add(self._indexfile, end)
2628 2615
2629 2616 # then reset internal state in memory to forget those revisions
2630 2617 self._revisioncache = None
2631 2618 self._chaininfocache = util.lrucachedict(500)
2632 2619 self._chunkclear()
2633 2620
2634 2621 del self.index[rev:-1]
2635 2622
2636 2623 def checksize(self):
2637 2624 """Check size of index and data files
2638 2625
2639 2626 return a (dd, di) tuple.
2640 2627 - dd: extra bytes for the "data" file
2641 2628 - di: extra bytes for the "index" file
2642 2629
2643 2630 A healthy revlog will return (0, 0).
2644 2631 """
2645 2632 expected = 0
2646 2633 if len(self):
2647 2634 expected = max(0, self.end(len(self) - 1))
2648 2635
2649 2636 try:
2650 2637 with self._datafp() as f:
2651 2638 f.seek(0, io.SEEK_END)
2652 2639 actual = f.tell()
2653 2640 dd = actual - expected
2654 2641 except IOError as inst:
2655 2642 if inst.errno != errno.ENOENT:
2656 2643 raise
2657 2644 dd = 0
2658 2645
2659 2646 try:
2660 2647 f = self.opener(self._indexfile)
2661 2648 f.seek(0, io.SEEK_END)
2662 2649 actual = f.tell()
2663 2650 f.close()
2664 2651 s = self.index.entry_size
2665 2652 i = max(0, actual // s)
2666 2653 di = actual - (i * s)
2667 2654 if self._inline:
2668 2655 databytes = 0
2669 2656 for r in self:
2670 2657 databytes += max(0, self.length(r))
2671 2658 dd = 0
2672 2659 di = actual - len(self) * s - databytes
2673 2660 except IOError as inst:
2674 2661 if inst.errno != errno.ENOENT:
2675 2662 raise
2676 2663 di = 0
2677 2664
2678 2665 return (dd, di)
2679 2666
2680 2667 def files(self):
2681 2668 res = [self._indexfile]
2682 2669 if not self._inline:
2683 2670 res.append(self._datafile)
2684 2671 return res
2685 2672
2686 2673 def emitrevisions(
2687 2674 self,
2688 2675 nodes,
2689 2676 nodesorder=None,
2690 2677 revisiondata=False,
2691 2678 assumehaveparentrevisions=False,
2692 2679 deltamode=repository.CG_DELTAMODE_STD,
2693 2680 sidedata_helpers=None,
2694 2681 ):
2695 2682 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2696 2683 raise error.ProgrammingError(
2697 2684 b'unhandled value for nodesorder: %s' % nodesorder
2698 2685 )
2699 2686
2700 2687 if nodesorder is None and not self._generaldelta:
2701 2688 nodesorder = b'storage'
2702 2689
2703 2690 if (
2704 2691 not self._storedeltachains
2705 2692 and deltamode != repository.CG_DELTAMODE_PREV
2706 2693 ):
2707 2694 deltamode = repository.CG_DELTAMODE_FULL
2708 2695
2709 2696 return storageutil.emitrevisions(
2710 2697 self,
2711 2698 nodes,
2712 2699 nodesorder,
2713 2700 revlogrevisiondelta,
2714 2701 deltaparentfn=self.deltaparent,
2715 2702 candeltafn=self.candelta,
2716 2703 rawsizefn=self.rawsize,
2717 2704 revdifffn=self.revdiff,
2718 2705 flagsfn=self.flags,
2719 2706 deltamode=deltamode,
2720 2707 revisiondata=revisiondata,
2721 2708 assumehaveparentrevisions=assumehaveparentrevisions,
2722 2709 sidedata_helpers=sidedata_helpers,
2723 2710 )
2724 2711
2725 2712 DELTAREUSEALWAYS = b'always'
2726 2713 DELTAREUSESAMEREVS = b'samerevs'
2727 2714 DELTAREUSENEVER = b'never'
2728 2715
2729 2716 DELTAREUSEFULLADD = b'fulladd'
2730 2717
2731 2718 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2732 2719
2733 2720 def clone(
2734 2721 self,
2735 2722 tr,
2736 2723 destrevlog,
2737 2724 addrevisioncb=None,
2738 2725 deltareuse=DELTAREUSESAMEREVS,
2739 2726 forcedeltabothparents=None,
2740 2727 sidedata_helpers=None,
2741 2728 ):
2742 2729 """Copy this revlog to another, possibly with format changes.
2743 2730
2744 2731 The destination revlog will contain the same revisions and nodes.
2745 2732 However, it may not be bit-for-bit identical due to e.g. delta encoding
2746 2733 differences.
2747 2734
2748 2735 The ``deltareuse`` argument control how deltas from the existing revlog
2749 2736 are preserved in the destination revlog. The argument can have the
2750 2737 following values:
2751 2738
2752 2739 DELTAREUSEALWAYS
2753 2740 Deltas will always be reused (if possible), even if the destination
2754 2741 revlog would not select the same revisions for the delta. This is the
2755 2742 fastest mode of operation.
2756 2743 DELTAREUSESAMEREVS
2757 2744 Deltas will be reused if the destination revlog would pick the same
2758 2745 revisions for the delta. This mode strikes a balance between speed
2759 2746 and optimization.
2760 2747 DELTAREUSENEVER
2761 2748 Deltas will never be reused. This is the slowest mode of execution.
2762 2749 This mode can be used to recompute deltas (e.g. if the diff/delta
2763 2750 algorithm changes).
2764 2751 DELTAREUSEFULLADD
2765 2752 Revision will be re-added as if their were new content. This is
2766 2753 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2767 2754 eg: large file detection and handling.
2768 2755
2769 2756 Delta computation can be slow, so the choice of delta reuse policy can
2770 2757 significantly affect run time.
2771 2758
2772 2759 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2773 2760 two extremes. Deltas will be reused if they are appropriate. But if the
2774 2761 delta could choose a better revision, it will do so. This means if you
2775 2762 are converting a non-generaldelta revlog to a generaldelta revlog,
2776 2763 deltas will be recomputed if the delta's parent isn't a parent of the
2777 2764 revision.
2778 2765
2779 2766 In addition to the delta policy, the ``forcedeltabothparents``
2780 2767 argument controls whether to force compute deltas against both parents
2781 2768 for merges. By default, the current default is used.
2782 2769
2783 2770 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2784 2771 `sidedata_helpers`.
2785 2772 """
2786 2773 if deltareuse not in self.DELTAREUSEALL:
2787 2774 raise ValueError(
2788 2775 _(b'value for deltareuse invalid: %s') % deltareuse
2789 2776 )
2790 2777
2791 2778 if len(destrevlog):
2792 2779 raise ValueError(_(b'destination revlog is not empty'))
2793 2780
2794 2781 if getattr(self, 'filteredrevs', None):
2795 2782 raise ValueError(_(b'source revlog has filtered revisions'))
2796 2783 if getattr(destrevlog, 'filteredrevs', None):
2797 2784 raise ValueError(_(b'destination revlog has filtered revisions'))
2798 2785
2799 2786 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2800 2787 # if possible.
2801 2788 oldlazydelta = destrevlog._lazydelta
2802 2789 oldlazydeltabase = destrevlog._lazydeltabase
2803 2790 oldamd = destrevlog._deltabothparents
2804 2791
2805 2792 try:
2806 2793 if deltareuse == self.DELTAREUSEALWAYS:
2807 2794 destrevlog._lazydeltabase = True
2808 2795 destrevlog._lazydelta = True
2809 2796 elif deltareuse == self.DELTAREUSESAMEREVS:
2810 2797 destrevlog._lazydeltabase = False
2811 2798 destrevlog._lazydelta = True
2812 2799 elif deltareuse == self.DELTAREUSENEVER:
2813 2800 destrevlog._lazydeltabase = False
2814 2801 destrevlog._lazydelta = False
2815 2802
2816 2803 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2817 2804
2818 2805 self._clone(
2819 2806 tr,
2820 2807 destrevlog,
2821 2808 addrevisioncb,
2822 2809 deltareuse,
2823 2810 forcedeltabothparents,
2824 2811 sidedata_helpers,
2825 2812 )
2826 2813
2827 2814 finally:
2828 2815 destrevlog._lazydelta = oldlazydelta
2829 2816 destrevlog._lazydeltabase = oldlazydeltabase
2830 2817 destrevlog._deltabothparents = oldamd
2831 2818
2832 2819 def _clone(
2833 2820 self,
2834 2821 tr,
2835 2822 destrevlog,
2836 2823 addrevisioncb,
2837 2824 deltareuse,
2838 2825 forcedeltabothparents,
2839 2826 sidedata_helpers,
2840 2827 ):
2841 2828 """perform the core duty of `revlog.clone` after parameter processing"""
2842 2829 deltacomputer = deltautil.deltacomputer(destrevlog)
2843 2830 index = self.index
2844 2831 for rev in self:
2845 2832 entry = index[rev]
2846 2833
2847 2834 # Some classes override linkrev to take filtered revs into
2848 2835 # account. Use raw entry from index.
2849 2836 flags = entry[0] & 0xFFFF
2850 2837 linkrev = entry[4]
2851 2838 p1 = index[entry[5]][7]
2852 2839 p2 = index[entry[6]][7]
2853 2840 node = entry[7]
2854 2841
2855 2842 # (Possibly) reuse the delta from the revlog if allowed and
2856 2843 # the revlog chunk is a delta.
2857 2844 cachedelta = None
2858 2845 rawtext = None
2859 2846 if deltareuse == self.DELTAREUSEFULLADD:
2860 2847 text, sidedata = self._revisiondata(rev)
2861 2848
2862 2849 if sidedata_helpers is not None:
2863 2850 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2864 2851 self, sidedata_helpers, sidedata, rev
2865 2852 )
2866 2853 flags = flags | new_flags[0] & ~new_flags[1]
2867 2854
2868 2855 destrevlog.addrevision(
2869 2856 text,
2870 2857 tr,
2871 2858 linkrev,
2872 2859 p1,
2873 2860 p2,
2874 2861 cachedelta=cachedelta,
2875 2862 node=node,
2876 2863 flags=flags,
2877 2864 deltacomputer=deltacomputer,
2878 2865 sidedata=sidedata,
2879 2866 )
2880 2867 else:
2881 2868 if destrevlog._lazydelta:
2882 2869 dp = self.deltaparent(rev)
2883 2870 if dp != nullrev:
2884 2871 cachedelta = (dp, bytes(self._chunk(rev)))
2885 2872
2886 2873 sidedata = None
2887 2874 if not cachedelta:
2888 2875 rawtext, sidedata = self._revisiondata(rev)
2889 2876 if sidedata is None:
2890 2877 sidedata = self.sidedata(rev)
2891 2878
2892 2879 if sidedata_helpers is not None:
2893 2880 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2894 2881 self, sidedata_helpers, sidedata, rev
2895 2882 )
2896 2883 flags = flags | new_flags[0] & ~new_flags[1]
2897 2884
2898 2885 with destrevlog._writing(tr):
2899 2886 destrevlog._addrevision(
2900 2887 node,
2901 2888 rawtext,
2902 2889 tr,
2903 2890 linkrev,
2904 2891 p1,
2905 2892 p2,
2906 2893 flags,
2907 2894 cachedelta,
2908 2895 deltacomputer=deltacomputer,
2909 2896 sidedata=sidedata,
2910 2897 )
2911 2898
2912 2899 if addrevisioncb:
2913 2900 addrevisioncb(self, rev, node)
2914 2901
2915 2902 def censorrevision(self, tr, censornode, tombstone=b''):
2916 2903 if self._format_version == REVLOGV0:
2917 2904 raise error.RevlogError(
2918 2905 _(b'cannot censor with version %d revlogs')
2919 2906 % self._format_version
2920 2907 )
2921 2908
2922 2909 censorrev = self.rev(censornode)
2923 2910 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2924 2911
2925 2912 if len(tombstone) > self.rawsize(censorrev):
2926 2913 raise error.Abort(
2927 2914 _(b'censor tombstone must be no longer than censored data')
2928 2915 )
2929 2916
2930 2917 # Rewriting the revlog in place is hard. Our strategy for censoring is
2931 2918 # to create a new revlog, copy all revisions to it, then replace the
2932 2919 # revlogs on transaction close.
2933 2920 #
2934 2921 # This is a bit dangerous. We could easily have a mismatch of state.
2935 2922 newrl = revlog(
2936 2923 self.opener,
2937 2924 target=self.target,
2938 2925 radix=self.radix,
2939 2926 postfix=b'tmpcensored',
2940 2927 censorable=True,
2941 2928 )
2942 2929 newrl._format_version = self._format_version
2943 2930 newrl._format_flags = self._format_flags
2944 2931 newrl._generaldelta = self._generaldelta
2945 2932 newrl._parse_index = self._parse_index
2946 2933
2947 2934 for rev in self.revs():
2948 2935 node = self.node(rev)
2949 2936 p1, p2 = self.parents(node)
2950 2937
2951 2938 if rev == censorrev:
2952 2939 newrl.addrawrevision(
2953 2940 tombstone,
2954 2941 tr,
2955 2942 self.linkrev(censorrev),
2956 2943 p1,
2957 2944 p2,
2958 2945 censornode,
2959 2946 REVIDX_ISCENSORED,
2960 2947 )
2961 2948
2962 2949 if newrl.deltaparent(rev) != nullrev:
2963 2950 raise error.Abort(
2964 2951 _(
2965 2952 b'censored revision stored as delta; '
2966 2953 b'cannot censor'
2967 2954 ),
2968 2955 hint=_(
2969 2956 b'censoring of revlogs is not '
2970 2957 b'fully implemented; please report '
2971 2958 b'this bug'
2972 2959 ),
2973 2960 )
2974 2961 continue
2975 2962
2976 2963 if self.iscensored(rev):
2977 2964 if self.deltaparent(rev) != nullrev:
2978 2965 raise error.Abort(
2979 2966 _(
2980 2967 b'cannot censor due to censored '
2981 2968 b'revision having delta stored'
2982 2969 )
2983 2970 )
2984 2971 rawtext = self._chunk(rev)
2985 2972 else:
2986 2973 rawtext = self.rawdata(rev)
2987 2974
2988 2975 newrl.addrawrevision(
2989 2976 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2990 2977 )
2991 2978
2992 2979 tr.addbackup(self._indexfile, location=b'store')
2993 2980 if not self._inline:
2994 2981 tr.addbackup(self._datafile, location=b'store')
2995 2982
2996 2983 self.opener.rename(newrl._indexfile, self._indexfile)
2997 2984 if not self._inline:
2998 2985 self.opener.rename(newrl._datafile, self._datafile)
2999 2986
3000 2987 self.clearcaches()
3001 2988 self._loadindex()
3002 2989
3003 2990 def verifyintegrity(self, state):
3004 2991 """Verifies the integrity of the revlog.
3005 2992
3006 2993 Yields ``revlogproblem`` instances describing problems that are
3007 2994 found.
3008 2995 """
3009 2996 dd, di = self.checksize()
3010 2997 if dd:
3011 2998 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3012 2999 if di:
3013 3000 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3014 3001
3015 3002 version = self._format_version
3016 3003
3017 3004 # The verifier tells us what version revlog we should be.
3018 3005 if version != state[b'expectedversion']:
3019 3006 yield revlogproblem(
3020 3007 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3021 3008 % (self.display_id, version, state[b'expectedversion'])
3022 3009 )
3023 3010
3024 3011 state[b'skipread'] = set()
3025 3012 state[b'safe_renamed'] = set()
3026 3013
3027 3014 for rev in self:
3028 3015 node = self.node(rev)
3029 3016
3030 3017 # Verify contents. 4 cases to care about:
3031 3018 #
3032 3019 # common: the most common case
3033 3020 # rename: with a rename
3034 3021 # meta: file content starts with b'\1\n', the metadata
3035 3022 # header defined in filelog.py, but without a rename
3036 3023 # ext: content stored externally
3037 3024 #
3038 3025 # More formally, their differences are shown below:
3039 3026 #
3040 3027 # | common | rename | meta | ext
3041 3028 # -------------------------------------------------------
3042 3029 # flags() | 0 | 0 | 0 | not 0
3043 3030 # renamed() | False | True | False | ?
3044 3031 # rawtext[0:2]=='\1\n'| False | True | True | ?
3045 3032 #
3046 3033 # "rawtext" means the raw text stored in revlog data, which
3047 3034 # could be retrieved by "rawdata(rev)". "text"
3048 3035 # mentioned below is "revision(rev)".
3049 3036 #
3050 3037 # There are 3 different lengths stored physically:
3051 3038 # 1. L1: rawsize, stored in revlog index
3052 3039 # 2. L2: len(rawtext), stored in revlog data
3053 3040 # 3. L3: len(text), stored in revlog data if flags==0, or
3054 3041 # possibly somewhere else if flags!=0
3055 3042 #
3056 3043 # L1 should be equal to L2. L3 could be different from them.
3057 3044 # "text" may or may not affect commit hash depending on flag
3058 3045 # processors (see flagutil.addflagprocessor).
3059 3046 #
3060 3047 # | common | rename | meta | ext
3061 3048 # -------------------------------------------------
3062 3049 # rawsize() | L1 | L1 | L1 | L1
3063 3050 # size() | L1 | L2-LM | L1(*) | L1 (?)
3064 3051 # len(rawtext) | L2 | L2 | L2 | L2
3065 3052 # len(text) | L2 | L2 | L2 | L3
3066 3053 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3067 3054 #
3068 3055 # LM: length of metadata, depending on rawtext
3069 3056 # (*): not ideal, see comment in filelog.size
3070 3057 # (?): could be "- len(meta)" if the resolved content has
3071 3058 # rename metadata
3072 3059 #
3073 3060 # Checks needed to be done:
3074 3061 # 1. length check: L1 == L2, in all cases.
3075 3062 # 2. hash check: depending on flag processor, we may need to
3076 3063 # use either "text" (external), or "rawtext" (in revlog).
3077 3064
3078 3065 try:
3079 3066 skipflags = state.get(b'skipflags', 0)
3080 3067 if skipflags:
3081 3068 skipflags &= self.flags(rev)
3082 3069
3083 3070 _verify_revision(self, skipflags, state, node)
3084 3071
3085 3072 l1 = self.rawsize(rev)
3086 3073 l2 = len(self.rawdata(node))
3087 3074
3088 3075 if l1 != l2:
3089 3076 yield revlogproblem(
3090 3077 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3091 3078 node=node,
3092 3079 )
3093 3080
3094 3081 except error.CensoredNodeError:
3095 3082 if state[b'erroroncensored']:
3096 3083 yield revlogproblem(
3097 3084 error=_(b'censored file data'), node=node
3098 3085 )
3099 3086 state[b'skipread'].add(node)
3100 3087 except Exception as e:
3101 3088 yield revlogproblem(
3102 3089 error=_(b'unpacking %s: %s')
3103 3090 % (short(node), stringutil.forcebytestr(e)),
3104 3091 node=node,
3105 3092 )
3106 3093 state[b'skipread'].add(node)
3107 3094
3108 3095 def storageinfo(
3109 3096 self,
3110 3097 exclusivefiles=False,
3111 3098 sharedfiles=False,
3112 3099 revisionscount=False,
3113 3100 trackedsize=False,
3114 3101 storedsize=False,
3115 3102 ):
3116 3103 d = {}
3117 3104
3118 3105 if exclusivefiles:
3119 3106 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3120 3107 if not self._inline:
3121 3108 d[b'exclusivefiles'].append((self.opener, self._datafile))
3122 3109
3123 3110 if sharedfiles:
3124 3111 d[b'sharedfiles'] = []
3125 3112
3126 3113 if revisionscount:
3127 3114 d[b'revisionscount'] = len(self)
3128 3115
3129 3116 if trackedsize:
3130 3117 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3131 3118
3132 3119 if storedsize:
3133 3120 d[b'storedsize'] = sum(
3134 3121 self.opener.stat(path).st_size for path in self.files()
3135 3122 )
3136 3123
3137 3124 return d
3138 3125
3139 3126 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3140 3127 if not self.hassidedata:
3141 3128 return
3142 3129 # inline are not yet supported because they suffer from an issue when
3143 3130 # rewriting them (since it's not an append-only operation).
3144 3131 # See issue6485.
3145 3132 assert not self._inline
3146 3133 if not helpers[1] and not helpers[2]:
3147 3134 # Nothing to generate or remove
3148 3135 return
3149 3136
3150 3137 # changelog implement some "delayed" writing mechanism that assume that
3151 3138 # all index data is writen in append mode and is therefor incompatible
3152 3139 # with the seeked write done in this method. The use of such "delayed"
3153 3140 # writing will soon be removed for revlog version that support side
3154 3141 # data, so for now, we only keep this simple assert to highlight the
3155 3142 # situation.
3156 3143 delayed = getattr(self, '_delayed', False)
3157 3144 diverted = getattr(self, '_divert', False)
3158 3145 if delayed and not diverted:
3159 3146 msg = "cannot rewrite_sidedata of a delayed revlog"
3160 3147 raise error.ProgrammingError(msg)
3161 3148
3162 3149 new_entries = []
3163 3150 # append the new sidedata
3164 3151 with self._writing(transaction):
3165 3152 ifh, dfh = self._writinghandles
3166 3153 dfh.seek(0, os.SEEK_END)
3167 3154 current_offset = dfh.tell()
3168 3155 for rev in range(startrev, endrev + 1):
3169 3156 entry = self.index[rev]
3170 3157 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3171 3158 store=self,
3172 3159 sidedata_helpers=helpers,
3173 3160 sidedata={},
3174 3161 rev=rev,
3175 3162 )
3176 3163
3177 3164 serialized_sidedata = sidedatautil.serialize_sidedata(
3178 3165 new_sidedata
3179 3166 )
3180 3167 if entry[8] != 0 or entry[9] != 0:
3181 3168 # rewriting entries that already have sidedata is not
3182 3169 # supported yet, because it introduces garbage data in the
3183 3170 # revlog.
3184 3171 msg = b"rewriting existing sidedata is not supported yet"
3185 3172 raise error.Abort(msg)
3186 3173
3187 3174 # Apply (potential) flags to add and to remove after running
3188 3175 # the sidedata helpers
3189 3176 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3190 3177 entry = (new_offset_flags,) + entry[1:8]
3191 3178 entry += (current_offset, len(serialized_sidedata))
3192 3179
3193 3180 # the sidedata computation might have move the file cursors around
3194 3181 dfh.seek(current_offset, os.SEEK_SET)
3195 3182 dfh.write(serialized_sidedata)
3196 3183 new_entries.append(entry)
3197 3184 current_offset += len(serialized_sidedata)
3198 3185
3199 3186 # rewrite the new index entries
3200 3187 ifh.seek(startrev * self.index.entry_size)
3201 3188 for i, e in enumerate(new_entries):
3202 3189 rev = startrev + i
3203 3190 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3204 3191 packed = self.index.entry_binary(rev)
3205 3192 if rev == 0:
3206 3193 header = self._format_flags | self._format_version
3207 3194 header = self.index.pack_header(header)
3208 3195 packed = header + packed
3209 3196 ifh.write(packed)
@@ -1,123 +1,153
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 16 ### Internal utily constants
17 17
18 18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 19 KIND_MANIFESTLOG = 1002
20 20 KIND_FILELOG = 1003
21 21 KIND_OTHER = 1004
22 22
23 23 ALL_KINDS = {
24 24 KIND_CHANGELOG,
25 25 KIND_MANIFESTLOG,
26 26 KIND_FILELOG,
27 27 KIND_OTHER,
28 28 }
29 29
30 30 ### main revlog header
31 31
32 32 INDEX_HEADER = struct.Struct(b">I")
33 33
34 34 ## revlog version
35 35 REVLOGV0 = 0
36 36 REVLOGV1 = 1
37 37 # Dummy value until file format is finalized.
38 38 REVLOGV2 = 0xDEAD
39 39
40 40 ## global revlog header flags
41 41 # Shared across v1 and v2.
42 42 FLAG_INLINE_DATA = 1 << 16
43 43 # Only used by v1, implied by v2.
44 44 FLAG_GENERALDELTA = 1 << 17
45 45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 48 REVLOGV0_FLAGS = 0
49 49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51 51
52 52 ### individual entry
53 53
54 54 ## index v0:
55 55 # 4 bytes: offset
56 56 # 4 bytes: compressed length
57 57 # 4 bytes: base rev
58 58 # 4 bytes: link rev
59 59 # 20 bytes: parent 1 nodeid
60 60 # 20 bytes: parent 2 nodeid
61 61 # 20 bytes: nodeid
62 62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63 63
64 64 ## index v1
65 65 # 6 bytes: offset
66 66 # 2 bytes: flags
67 67 # 4 bytes: compressed length
68 68 # 4 bytes: uncompressed length
69 69 # 4 bytes: base rev
70 70 # 4 bytes: link rev
71 71 # 4 bytes: parent 1 rev
72 72 # 4 bytes: parent 2 rev
73 73 # 32 bytes: nodeid
74 74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 75 assert INDEX_ENTRY_V1.size == 32 * 2
76 76
77 77 # 6 bytes: offset
78 78 # 2 bytes: flags
79 79 # 4 bytes: compressed length
80 80 # 4 bytes: uncompressed length
81 81 # 4 bytes: base rev
82 82 # 4 bytes: link rev
83 83 # 4 bytes: parent 1 rev
84 84 # 4 bytes: parent 2 rev
85 85 # 32 bytes: nodeid
86 86 # 8 bytes: sidedata offset
87 87 # 4 bytes: sidedata compressed length
88 88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
89 89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
90 90 assert INDEX_ENTRY_V2.size == 32 * 3
91 91
92 92 # revlog index flags
93 93
94 94 # For historical reasons, revlog's internal flags were exposed via the
95 95 # wire protocol and are even exposed in parts of the storage APIs.
96 96
97 97 # revision has censor metadata, must be verified
98 98 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 99 # revision hash does not match data (narrowhg)
100 100 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 101 # revision data is stored externally
102 102 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 103 # revision changes files in a way that could affect copy tracing.
104 104 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 105 REVIDX_DEFAULT_FLAGS = 0
106 106 # stable order in which flags need to be processed and their processors applied
107 107 REVIDX_FLAGS_ORDER = [
108 108 REVIDX_ISCENSORED,
109 109 REVIDX_ELLIPSIS,
110 110 REVIDX_EXTSTORED,
111 111 REVIDX_HASCOPIESINFO,
112 112 ]
113 113
114 114 # bitmark for flags that could cause rawdata content change
115 115 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116 116
117 117 SUPPORTED_FLAGS = {
118 118 REVLOGV0: REVLOGV0_FLAGS,
119 119 REVLOGV1: REVLOGV1_FLAGS,
120 120 REVLOGV2: REVLOGV2_FLAGS,
121 121 }
122 122
123 _no = lambda flags: False
124 _yes = lambda flags: True
125
126
127 def _from_flag(flag):
128 return lambda flags: bool(flags & flag)
129
130
131 FEATURES_BY_VERSION = {
132 REVLOGV0: {
133 b'inline': _no,
134 b'generaldelta': _no,
135 b'sidedata': False,
136 },
137 REVLOGV1: {
138 b'inline': _from_flag(FLAG_INLINE_DATA),
139 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
140 b'sidedata': False,
141 },
142 REVLOGV2: {
143 # There is a bug in the transaction handling when going from an
144 # inline revlog to a separate index and data file. Turn it off until
145 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
146 # See issue6485
147 b'inline': _no,
148 b'generaldelta': _yes,
149 b'sidedata': True,
150 },
151 }
152
123 153 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now