##// END OF EJS Templates
revlog: use "entry_point" phrasing for loading the revlog...
marmoute -
r47946:21ef5f75 default
parent child Browse files
Show More
@@ -1,3180 +1,3179 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 self._indexfile = None
319 319 self._datafile = None
320 320 self._nodemap_file = None
321 321 self.postfix = postfix
322 322 self.opener = opener
323 323 if persistentnodemap:
324 324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 325
326 326 assert target[0] in ALL_KINDS
327 327 assert len(target) == 2
328 328 self.target = target
329 329 # When True, indexfile is opened with checkambig=True at writing, to
330 330 # avoid file stat ambiguity.
331 331 self._checkambig = checkambig
332 332 self._mmaplargeindex = mmaplargeindex
333 333 self._censorable = censorable
334 334 # 3-tuple of (node, rev, text) for a raw revision.
335 335 self._revisioncache = None
336 336 # Maps rev to chain base rev.
337 337 self._chainbasecache = util.lrucachedict(100)
338 338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 339 self._chunkcache = (0, b'')
340 340 # How much data to read and cache into the raw revlog data cache.
341 341 self._chunkcachesize = 65536
342 342 self._maxchainlen = None
343 343 self._deltabothparents = True
344 344 self.index = None
345 345 self._nodemap_docket = None
346 346 # Mapping of partial identifiers to full nodes.
347 347 self._pcache = {}
348 348 # Mapping of revision integer to full node.
349 349 self._compengine = b'zlib'
350 350 self._compengineopts = {}
351 351 self._maxdeltachainspan = -1
352 352 self._withsparseread = False
353 353 self._sparserevlog = False
354 354 self._srdensitythreshold = 0.50
355 355 self._srmingapsize = 262144
356 356
357 357 # Make copy of flag processors so each revlog instance can support
358 358 # custom flags.
359 359 self._flagprocessors = dict(flagutil.flagprocessors)
360 360
361 361 # 2-tuple of file handles being used for active writing.
362 362 self._writinghandles = None
363 363
364 364 self._loadindex()
365 365
366 366 self._concurrencychecker = concurrencychecker
367 367
368 368 def _init_opts(self):
369 369 """process options (from above/config) to setup associated default revlog mode
370 370
371 371 These values might be affected when actually reading on disk information.
372 372
373 373 The relevant values are returned for use in _loadindex().
374 374
375 375 * newversionflags:
376 376 version header to use if we need to create a new revlog
377 377
378 378 * mmapindexthreshold:
379 379 minimal index size for start to use mmap
380 380
381 381 * force_nodemap:
382 382 force the usage of a "development" version of the nodemap code
383 383 """
384 384 mmapindexthreshold = None
385 385 opts = self.opener.options
386 386
387 387 if b'revlogv2' in opts:
388 388 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 389 elif b'revlogv1' in opts:
390 390 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 391 if b'generaldelta' in opts:
392 392 new_header |= FLAG_GENERALDELTA
393 393 elif b'revlogv0' in self.opener.options:
394 394 new_header = REVLOGV0
395 395 else:
396 396 new_header = REVLOG_DEFAULT_VERSION
397 397
398 398 if b'chunkcachesize' in opts:
399 399 self._chunkcachesize = opts[b'chunkcachesize']
400 400 if b'maxchainlen' in opts:
401 401 self._maxchainlen = opts[b'maxchainlen']
402 402 if b'deltabothparents' in opts:
403 403 self._deltabothparents = opts[b'deltabothparents']
404 404 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 405 self._lazydeltabase = False
406 406 if self._lazydelta:
407 407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 408 if b'compengine' in opts:
409 409 self._compengine = opts[b'compengine']
410 410 if b'zlib.level' in opts:
411 411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 412 if b'zstd.level' in opts:
413 413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 414 if b'maxdeltachainspan' in opts:
415 415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 417 mmapindexthreshold = opts[b'mmapindexthreshold']
418 418 self.hassidedata = bool(opts.get(b'side-data', False))
419 419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 420 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 421 # sparse-revlog forces sparse-read
422 422 self._withsparseread = self._sparserevlog or withsparseread
423 423 if b'sparse-read-density-threshold' in opts:
424 424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 425 if b'sparse-read-min-gap-size' in opts:
426 426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 427 if opts.get(b'enableellipsis'):
428 428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429 429
430 430 # revlog v0 doesn't have flag processors
431 431 for flag, processor in pycompat.iteritems(
432 432 opts.get(b'flagprocessors', {})
433 433 ):
434 434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435 435
436 436 if self._chunkcachesize <= 0:
437 437 raise error.RevlogError(
438 438 _(b'revlog chunk cache size %r is not greater than 0')
439 439 % self._chunkcachesize
440 440 )
441 441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 442 raise error.RevlogError(
443 443 _(b'revlog chunk cache size %r is not a power of 2')
444 444 % self._chunkcachesize
445 445 )
446 446 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 447 return new_header, mmapindexthreshold, force_nodemap
448 448
449 449 def _get_data(self, filepath, mmap_threshold):
450 450 """return a file content with or without mmap
451 451
452 452 If the file is missing return the empty string"""
453 453 try:
454 454 with self.opener(filepath) as fp:
455 455 if mmap_threshold is not None:
456 456 file_size = self.opener.fstat(fp).st_size
457 457 if file_size >= mmap_threshold:
458 458 # TODO: should .close() to release resources without
459 459 # relying on Python GC
460 460 return util.buffer(util.mmapread(fp))
461 461 return fp.read()
462 462 except IOError as inst:
463 463 if inst.errno != errno.ENOENT:
464 464 raise
465 465 return b''
466 466
467 467 def _loadindex(self):
468 468
469 469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470 470
471 471 if self.postfix is None:
472 index_file = b'%s.i' % self.radix
473 data_file = b'%s.d' % self.radix
474 elif self.postfix == b'a':
475 index_file = b'%s.i.a' % self.radix
476 data_file = b'%s.d' % self.radix
472 entry_point = b'%s.i' % self.radix
477 473 else:
478 index_file = b'%s.i.%s' % (self.radix, self.postfix)
479 data_file = b'%s.d.%s' % (self.radix, self.postfix)
480
481 self._indexfile = index_file
482 self._datafile = data_file
474 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
483 475
484 476 indexdata = b''
485 477 self._initempty = True
486 indexdata = self._get_data(self._indexfile, mmapindexthreshold)
478 indexdata = self._get_data(entry_point, mmapindexthreshold)
487 479 if len(indexdata) > 0:
488 480 header = INDEX_HEADER.unpack(indexdata[:4])[0]
489 481 self._initempty = False
490 482 else:
491 483 header = new_header
492 484
493 485 self._format_flags = header & ~0xFFFF
494 486 self._format_version = header & 0xFFFF
495 487
496 488 if self._format_version == REVLOGV0:
497 489 if self._format_flags:
498 490 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
499 491 display_flag = self._format_flags >> 16
500 492 msg %= (display_flag, self._format_version, self.display_id)
501 493 raise error.RevlogError(msg)
502 494
503 495 self._inline = False
504 496 self._generaldelta = False
505 497
506 498 elif self._format_version == REVLOGV1:
507 499 if self._format_flags & ~REVLOGV1_FLAGS:
508 500 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
509 501 display_flag = self._format_flags >> 16
510 502 msg %= (display_flag, self._format_version, self.display_id)
511 503 raise error.RevlogError(msg)
512 504
513 505 self._inline = self._format_flags & FLAG_INLINE_DATA
514 506 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
515 507
516 508 elif self._format_version == REVLOGV2:
517 509 if self._format_flags & ~REVLOGV2_FLAGS:
518 510 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 511 display_flag = self._format_flags >> 16
520 512 msg %= (display_flag, self._format_version, self.display_id)
521 513 raise error.RevlogError(msg)
522 514
523 515 # There is a bug in the transaction handling when going from an
524 516 # inline revlog to a separate index and data file. Turn it off until
525 517 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
526 518 # See issue6485
527 519 self._inline = False
528 520 # generaldelta implied by version 2 revlogs.
529 521 self._generaldelta = True
530 522
531 523 else:
532 524 msg = _(b'unknown version (%d) in revlog %s')
533 525 msg %= (self._format_version, self.display_id)
534 526 raise error.RevlogError(msg)
535 527
528 self._indexfile = entry_point
529
530 if self.postfix is None or self.postfix == b'a':
531 self._datafile = b'%s.d' % self.radix
532 else:
533 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
534
536 535 self.nodeconstants = sha1nodeconstants
537 536 self.nullid = self.nodeconstants.nullid
538 537
539 538 # sparse-revlog can't be on without general-delta (issue6056)
540 539 if not self._generaldelta:
541 540 self._sparserevlog = False
542 541
543 542 self._storedeltachains = True
544 543
545 544 devel_nodemap = (
546 545 self._nodemap_file
547 546 and force_nodemap
548 547 and parse_index_v1_nodemap is not None
549 548 )
550 549
551 550 use_rust_index = False
552 551 if rustrevlog is not None:
553 552 if self._nodemap_file is not None:
554 553 use_rust_index = True
555 554 else:
556 555 use_rust_index = self.opener.options.get(b'rust.index')
557 556
558 557 self._parse_index = parse_index_v1
559 558 if self._format_version == REVLOGV0:
560 559 self._parse_index = revlogv0.parse_index_v0
561 560 elif self._format_version == REVLOGV2:
562 561 self._parse_index = parse_index_v2
563 562 elif devel_nodemap:
564 563 self._parse_index = parse_index_v1_nodemap
565 564 elif use_rust_index:
566 565 self._parse_index = parse_index_v1_mixed
567 566 try:
568 567 d = self._parse_index(indexdata, self._inline)
569 568 index, _chunkcache = d
570 569 use_nodemap = (
571 570 not self._inline
572 571 and self._nodemap_file is not None
573 572 and util.safehasattr(index, 'update_nodemap_data')
574 573 )
575 574 if use_nodemap:
576 575 nodemap_data = nodemaputil.persisted_data(self)
577 576 if nodemap_data is not None:
578 577 docket = nodemap_data[0]
579 578 if (
580 579 len(d[0]) > docket.tip_rev
581 580 and d[0][docket.tip_rev][7] == docket.tip_node
582 581 ):
583 582 # no changelog tampering
584 583 self._nodemap_docket = docket
585 584 index.update_nodemap_data(*nodemap_data)
586 585 except (ValueError, IndexError):
587 586 raise error.RevlogError(
588 587 _(b"index %s is corrupted") % self.display_id
589 588 )
590 589 self.index, self._chunkcache = d
591 590 if not self._chunkcache:
592 591 self._chunkclear()
593 592 # revnum -> (chain-length, sum-delta-length)
594 593 self._chaininfocache = util.lrucachedict(500)
595 594 # revlog header -> revlog compressor
596 595 self._decompressors = {}
597 596
598 597 @util.propertycache
599 598 def revlog_kind(self):
600 599 return self.target[0]
601 600
602 601 @util.propertycache
603 602 def display_id(self):
604 603 """The public facing "ID" of the revlog that we use in message"""
605 604 # Maybe we should build a user facing representation of
606 605 # revlog.target instead of using `self.radix`
607 606 return self.radix
608 607
609 608 @util.propertycache
610 609 def _compressor(self):
611 610 engine = util.compengines[self._compengine]
612 611 return engine.revlogcompressor(self._compengineopts)
613 612
614 613 def _indexfp(self, mode=b'r'):
615 614 """file object for the revlog's index file"""
616 615 args = {'mode': mode}
617 616 if mode != b'r':
618 617 args['checkambig'] = self._checkambig
619 618 if mode == b'w':
620 619 args['atomictemp'] = True
621 620 return self.opener(self._indexfile, **args)
622 621
623 622 def _datafp(self, mode=b'r'):
624 623 """file object for the revlog's data file"""
625 624 return self.opener(self._datafile, mode=mode)
626 625
627 626 @contextlib.contextmanager
628 627 def _datareadfp(self, existingfp=None):
629 628 """file object suitable to read data"""
630 629 # Use explicit file handle, if given.
631 630 if existingfp is not None:
632 631 yield existingfp
633 632
634 633 # Use a file handle being actively used for writes, if available.
635 634 # There is some danger to doing this because reads will seek the
636 635 # file. However, _writeentry() performs a SEEK_END before all writes,
637 636 # so we should be safe.
638 637 elif self._writinghandles:
639 638 if self._inline:
640 639 yield self._writinghandles[0]
641 640 else:
642 641 yield self._writinghandles[1]
643 642
644 643 # Otherwise open a new file handle.
645 644 else:
646 645 if self._inline:
647 646 func = self._indexfp
648 647 else:
649 648 func = self._datafp
650 649 with func() as fp:
651 650 yield fp
652 651
653 652 def tiprev(self):
654 653 return len(self.index) - 1
655 654
656 655 def tip(self):
657 656 return self.node(self.tiprev())
658 657
659 658 def __contains__(self, rev):
660 659 return 0 <= rev < len(self)
661 660
662 661 def __len__(self):
663 662 return len(self.index)
664 663
665 664 def __iter__(self):
666 665 return iter(pycompat.xrange(len(self)))
667 666
668 667 def revs(self, start=0, stop=None):
669 668 """iterate over all rev in this revlog (from start to stop)"""
670 669 return storageutil.iterrevs(len(self), start=start, stop=stop)
671 670
672 671 @property
673 672 def nodemap(self):
674 673 msg = (
675 674 b"revlog.nodemap is deprecated, "
676 675 b"use revlog.index.[has_node|rev|get_rev]"
677 676 )
678 677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
679 678 return self.index.nodemap
680 679
681 680 @property
682 681 def _nodecache(self):
683 682 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
684 683 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
685 684 return self.index.nodemap
686 685
687 686 def hasnode(self, node):
688 687 try:
689 688 self.rev(node)
690 689 return True
691 690 except KeyError:
692 691 return False
693 692
694 693 def candelta(self, baserev, rev):
695 694 """whether two revisions (baserev, rev) can be delta-ed or not"""
696 695 # Disable delta if either rev requires a content-changing flag
697 696 # processor (ex. LFS). This is because such flag processor can alter
698 697 # the rawtext content that the delta will be based on, and two clients
699 698 # could have a same revlog node with different flags (i.e. different
700 699 # rawtext contents) and the delta could be incompatible.
701 700 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
702 701 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
703 702 ):
704 703 return False
705 704 return True
706 705
707 706 def update_caches(self, transaction):
708 707 if self._nodemap_file is not None:
709 708 if transaction is None:
710 709 nodemaputil.update_persistent_nodemap(self)
711 710 else:
712 711 nodemaputil.setup_persistent_nodemap(transaction, self)
713 712
714 713 def clearcaches(self):
715 714 self._revisioncache = None
716 715 self._chainbasecache.clear()
717 716 self._chunkcache = (0, b'')
718 717 self._pcache = {}
719 718 self._nodemap_docket = None
720 719 self.index.clearcaches()
721 720 # The python code is the one responsible for validating the docket, we
722 721 # end up having to refresh it here.
723 722 use_nodemap = (
724 723 not self._inline
725 724 and self._nodemap_file is not None
726 725 and util.safehasattr(self.index, 'update_nodemap_data')
727 726 )
728 727 if use_nodemap:
729 728 nodemap_data = nodemaputil.persisted_data(self)
730 729 if nodemap_data is not None:
731 730 self._nodemap_docket = nodemap_data[0]
732 731 self.index.update_nodemap_data(*nodemap_data)
733 732
734 733 def rev(self, node):
735 734 try:
736 735 return self.index.rev(node)
737 736 except TypeError:
738 737 raise
739 738 except error.RevlogError:
740 739 # parsers.c radix tree lookup failed
741 740 if (
742 741 node == self.nodeconstants.wdirid
743 742 or node in self.nodeconstants.wdirfilenodeids
744 743 ):
745 744 raise error.WdirUnsupported
746 745 raise error.LookupError(node, self.display_id, _(b'no node'))
747 746
748 747 # Accessors for index entries.
749 748
750 749 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
751 750 # are flags.
752 751 def start(self, rev):
753 752 return int(self.index[rev][0] >> 16)
754 753
755 754 def flags(self, rev):
756 755 return self.index[rev][0] & 0xFFFF
757 756
758 757 def length(self, rev):
759 758 return self.index[rev][1]
760 759
761 760 def sidedata_length(self, rev):
762 761 if not self.hassidedata:
763 762 return 0
764 763 return self.index[rev][9]
765 764
766 765 def rawsize(self, rev):
767 766 """return the length of the uncompressed text for a given revision"""
768 767 l = self.index[rev][2]
769 768 if l >= 0:
770 769 return l
771 770
772 771 t = self.rawdata(rev)
773 772 return len(t)
774 773
775 774 def size(self, rev):
776 775 """length of non-raw text (processed by a "read" flag processor)"""
777 776 # fast path: if no "read" flag processor could change the content,
778 777 # size is rawsize. note: ELLIPSIS is known to not change the content.
779 778 flags = self.flags(rev)
780 779 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
781 780 return self.rawsize(rev)
782 781
783 782 return len(self.revision(rev, raw=False))
784 783
785 784 def chainbase(self, rev):
786 785 base = self._chainbasecache.get(rev)
787 786 if base is not None:
788 787 return base
789 788
790 789 index = self.index
791 790 iterrev = rev
792 791 base = index[iterrev][3]
793 792 while base != iterrev:
794 793 iterrev = base
795 794 base = index[iterrev][3]
796 795
797 796 self._chainbasecache[rev] = base
798 797 return base
799 798
800 799 def linkrev(self, rev):
801 800 return self.index[rev][4]
802 801
803 802 def parentrevs(self, rev):
804 803 try:
805 804 entry = self.index[rev]
806 805 except IndexError:
807 806 if rev == wdirrev:
808 807 raise error.WdirUnsupported
809 808 raise
810 809 if entry[5] == nullrev:
811 810 return entry[6], entry[5]
812 811 else:
813 812 return entry[5], entry[6]
814 813
815 814 # fast parentrevs(rev) where rev isn't filtered
816 815 _uncheckedparentrevs = parentrevs
817 816
818 817 def node(self, rev):
819 818 try:
820 819 return self.index[rev][7]
821 820 except IndexError:
822 821 if rev == wdirrev:
823 822 raise error.WdirUnsupported
824 823 raise
825 824
826 825 # Derived from index values.
827 826
828 827 def end(self, rev):
829 828 return self.start(rev) + self.length(rev)
830 829
831 830 def parents(self, node):
832 831 i = self.index
833 832 d = i[self.rev(node)]
834 833 # inline node() to avoid function call overhead
835 834 if d[5] == self.nullid:
836 835 return i[d[6]][7], i[d[5]][7]
837 836 else:
838 837 return i[d[5]][7], i[d[6]][7]
839 838
840 839 def chainlen(self, rev):
841 840 return self._chaininfo(rev)[0]
842 841
843 842 def _chaininfo(self, rev):
844 843 chaininfocache = self._chaininfocache
845 844 if rev in chaininfocache:
846 845 return chaininfocache[rev]
847 846 index = self.index
848 847 generaldelta = self._generaldelta
849 848 iterrev = rev
850 849 e = index[iterrev]
851 850 clen = 0
852 851 compresseddeltalen = 0
853 852 while iterrev != e[3]:
854 853 clen += 1
855 854 compresseddeltalen += e[1]
856 855 if generaldelta:
857 856 iterrev = e[3]
858 857 else:
859 858 iterrev -= 1
860 859 if iterrev in chaininfocache:
861 860 t = chaininfocache[iterrev]
862 861 clen += t[0]
863 862 compresseddeltalen += t[1]
864 863 break
865 864 e = index[iterrev]
866 865 else:
867 866 # Add text length of base since decompressing that also takes
868 867 # work. For cache hits the length is already included.
869 868 compresseddeltalen += e[1]
870 869 r = (clen, compresseddeltalen)
871 870 chaininfocache[rev] = r
872 871 return r
873 872
874 873 def _deltachain(self, rev, stoprev=None):
875 874 """Obtain the delta chain for a revision.
876 875
877 876 ``stoprev`` specifies a revision to stop at. If not specified, we
878 877 stop at the base of the chain.
879 878
880 879 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
881 880 revs in ascending order and ``stopped`` is a bool indicating whether
882 881 ``stoprev`` was hit.
883 882 """
884 883 # Try C implementation.
885 884 try:
886 885 return self.index.deltachain(rev, stoprev, self._generaldelta)
887 886 except AttributeError:
888 887 pass
889 888
890 889 chain = []
891 890
892 891 # Alias to prevent attribute lookup in tight loop.
893 892 index = self.index
894 893 generaldelta = self._generaldelta
895 894
896 895 iterrev = rev
897 896 e = index[iterrev]
898 897 while iterrev != e[3] and iterrev != stoprev:
899 898 chain.append(iterrev)
900 899 if generaldelta:
901 900 iterrev = e[3]
902 901 else:
903 902 iterrev -= 1
904 903 e = index[iterrev]
905 904
906 905 if iterrev == stoprev:
907 906 stopped = True
908 907 else:
909 908 chain.append(iterrev)
910 909 stopped = False
911 910
912 911 chain.reverse()
913 912 return chain, stopped
914 913
915 914 def ancestors(self, revs, stoprev=0, inclusive=False):
916 915 """Generate the ancestors of 'revs' in reverse revision order.
917 916 Does not generate revs lower than stoprev.
918 917
919 918 See the documentation for ancestor.lazyancestors for more details."""
920 919
921 920 # first, make sure start revisions aren't filtered
922 921 revs = list(revs)
923 922 checkrev = self.node
924 923 for r in revs:
925 924 checkrev(r)
926 925 # and we're sure ancestors aren't filtered as well
927 926
928 927 if rustancestor is not None:
929 928 lazyancestors = rustancestor.LazyAncestors
930 929 arg = self.index
931 930 else:
932 931 lazyancestors = ancestor.lazyancestors
933 932 arg = self._uncheckedparentrevs
934 933 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
935 934
936 935 def descendants(self, revs):
937 936 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
938 937
939 938 def findcommonmissing(self, common=None, heads=None):
940 939 """Return a tuple of the ancestors of common and the ancestors of heads
941 940 that are not ancestors of common. In revset terminology, we return the
942 941 tuple:
943 942
944 943 ::common, (::heads) - (::common)
945 944
946 945 The list is sorted by revision number, meaning it is
947 946 topologically sorted.
948 947
949 948 'heads' and 'common' are both lists of node IDs. If heads is
950 949 not supplied, uses all of the revlog's heads. If common is not
951 950 supplied, uses nullid."""
952 951 if common is None:
953 952 common = [self.nullid]
954 953 if heads is None:
955 954 heads = self.heads()
956 955
957 956 common = [self.rev(n) for n in common]
958 957 heads = [self.rev(n) for n in heads]
959 958
960 959 # we want the ancestors, but inclusive
961 960 class lazyset(object):
962 961 def __init__(self, lazyvalues):
963 962 self.addedvalues = set()
964 963 self.lazyvalues = lazyvalues
965 964
966 965 def __contains__(self, value):
967 966 return value in self.addedvalues or value in self.lazyvalues
968 967
969 968 def __iter__(self):
970 969 added = self.addedvalues
971 970 for r in added:
972 971 yield r
973 972 for r in self.lazyvalues:
974 973 if not r in added:
975 974 yield r
976 975
977 976 def add(self, value):
978 977 self.addedvalues.add(value)
979 978
980 979 def update(self, values):
981 980 self.addedvalues.update(values)
982 981
983 982 has = lazyset(self.ancestors(common))
984 983 has.add(nullrev)
985 984 has.update(common)
986 985
987 986 # take all ancestors from heads that aren't in has
988 987 missing = set()
989 988 visit = collections.deque(r for r in heads if r not in has)
990 989 while visit:
991 990 r = visit.popleft()
992 991 if r in missing:
993 992 continue
994 993 else:
995 994 missing.add(r)
996 995 for p in self.parentrevs(r):
997 996 if p not in has:
998 997 visit.append(p)
999 998 missing = list(missing)
1000 999 missing.sort()
1001 1000 return has, [self.node(miss) for miss in missing]
1002 1001
1003 1002 def incrementalmissingrevs(self, common=None):
1004 1003 """Return an object that can be used to incrementally compute the
1005 1004 revision numbers of the ancestors of arbitrary sets that are not
1006 1005 ancestors of common. This is an ancestor.incrementalmissingancestors
1007 1006 object.
1008 1007
1009 1008 'common' is a list of revision numbers. If common is not supplied, uses
1010 1009 nullrev.
1011 1010 """
1012 1011 if common is None:
1013 1012 common = [nullrev]
1014 1013
1015 1014 if rustancestor is not None:
1016 1015 return rustancestor.MissingAncestors(self.index, common)
1017 1016 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1018 1017
1019 1018 def findmissingrevs(self, common=None, heads=None):
1020 1019 """Return the revision numbers of the ancestors of heads that
1021 1020 are not ancestors of common.
1022 1021
1023 1022 More specifically, return a list of revision numbers corresponding to
1024 1023 nodes N such that every N satisfies the following constraints:
1025 1024
1026 1025 1. N is an ancestor of some node in 'heads'
1027 1026 2. N is not an ancestor of any node in 'common'
1028 1027
1029 1028 The list is sorted by revision number, meaning it is
1030 1029 topologically sorted.
1031 1030
1032 1031 'heads' and 'common' are both lists of revision numbers. If heads is
1033 1032 not supplied, uses all of the revlog's heads. If common is not
1034 1033 supplied, uses nullid."""
1035 1034 if common is None:
1036 1035 common = [nullrev]
1037 1036 if heads is None:
1038 1037 heads = self.headrevs()
1039 1038
1040 1039 inc = self.incrementalmissingrevs(common=common)
1041 1040 return inc.missingancestors(heads)
1042 1041
1043 1042 def findmissing(self, common=None, heads=None):
1044 1043 """Return the ancestors of heads that are not ancestors of common.
1045 1044
1046 1045 More specifically, return a list of nodes N such that every N
1047 1046 satisfies the following constraints:
1048 1047
1049 1048 1. N is an ancestor of some node in 'heads'
1050 1049 2. N is not an ancestor of any node in 'common'
1051 1050
1052 1051 The list is sorted by revision number, meaning it is
1053 1052 topologically sorted.
1054 1053
1055 1054 'heads' and 'common' are both lists of node IDs. If heads is
1056 1055 not supplied, uses all of the revlog's heads. If common is not
1057 1056 supplied, uses nullid."""
1058 1057 if common is None:
1059 1058 common = [self.nullid]
1060 1059 if heads is None:
1061 1060 heads = self.heads()
1062 1061
1063 1062 common = [self.rev(n) for n in common]
1064 1063 heads = [self.rev(n) for n in heads]
1065 1064
1066 1065 inc = self.incrementalmissingrevs(common=common)
1067 1066 return [self.node(r) for r in inc.missingancestors(heads)]
1068 1067
1069 1068 def nodesbetween(self, roots=None, heads=None):
1070 1069 """Return a topological path from 'roots' to 'heads'.
1071 1070
1072 1071 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1073 1072 topologically sorted list of all nodes N that satisfy both of
1074 1073 these constraints:
1075 1074
1076 1075 1. N is a descendant of some node in 'roots'
1077 1076 2. N is an ancestor of some node in 'heads'
1078 1077
1079 1078 Every node is considered to be both a descendant and an ancestor
1080 1079 of itself, so every reachable node in 'roots' and 'heads' will be
1081 1080 included in 'nodes'.
1082 1081
1083 1082 'outroots' is the list of reachable nodes in 'roots', i.e., the
1084 1083 subset of 'roots' that is returned in 'nodes'. Likewise,
1085 1084 'outheads' is the subset of 'heads' that is also in 'nodes'.
1086 1085
1087 1086 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1088 1087 unspecified, uses nullid as the only root. If 'heads' is
1089 1088 unspecified, uses list of all of the revlog's heads."""
1090 1089 nonodes = ([], [], [])
1091 1090 if roots is not None:
1092 1091 roots = list(roots)
1093 1092 if not roots:
1094 1093 return nonodes
1095 1094 lowestrev = min([self.rev(n) for n in roots])
1096 1095 else:
1097 1096 roots = [self.nullid] # Everybody's a descendant of nullid
1098 1097 lowestrev = nullrev
1099 1098 if (lowestrev == nullrev) and (heads is None):
1100 1099 # We want _all_ the nodes!
1101 1100 return (
1102 1101 [self.node(r) for r in self],
1103 1102 [self.nullid],
1104 1103 list(self.heads()),
1105 1104 )
1106 1105 if heads is None:
1107 1106 # All nodes are ancestors, so the latest ancestor is the last
1108 1107 # node.
1109 1108 highestrev = len(self) - 1
1110 1109 # Set ancestors to None to signal that every node is an ancestor.
1111 1110 ancestors = None
1112 1111 # Set heads to an empty dictionary for later discovery of heads
1113 1112 heads = {}
1114 1113 else:
1115 1114 heads = list(heads)
1116 1115 if not heads:
1117 1116 return nonodes
1118 1117 ancestors = set()
1119 1118 # Turn heads into a dictionary so we can remove 'fake' heads.
1120 1119 # Also, later we will be using it to filter out the heads we can't
1121 1120 # find from roots.
1122 1121 heads = dict.fromkeys(heads, False)
1123 1122 # Start at the top and keep marking parents until we're done.
1124 1123 nodestotag = set(heads)
1125 1124 # Remember where the top was so we can use it as a limit later.
1126 1125 highestrev = max([self.rev(n) for n in nodestotag])
1127 1126 while nodestotag:
1128 1127 # grab a node to tag
1129 1128 n = nodestotag.pop()
1130 1129 # Never tag nullid
1131 1130 if n == self.nullid:
1132 1131 continue
1133 1132 # A node's revision number represents its place in a
1134 1133 # topologically sorted list of nodes.
1135 1134 r = self.rev(n)
1136 1135 if r >= lowestrev:
1137 1136 if n not in ancestors:
1138 1137 # If we are possibly a descendant of one of the roots
1139 1138 # and we haven't already been marked as an ancestor
1140 1139 ancestors.add(n) # Mark as ancestor
1141 1140 # Add non-nullid parents to list of nodes to tag.
1142 1141 nodestotag.update(
1143 1142 [p for p in self.parents(n) if p != self.nullid]
1144 1143 )
1145 1144 elif n in heads: # We've seen it before, is it a fake head?
1146 1145 # So it is, real heads should not be the ancestors of
1147 1146 # any other heads.
1148 1147 heads.pop(n)
1149 1148 if not ancestors:
1150 1149 return nonodes
1151 1150 # Now that we have our set of ancestors, we want to remove any
1152 1151 # roots that are not ancestors.
1153 1152
1154 1153 # If one of the roots was nullid, everything is included anyway.
1155 1154 if lowestrev > nullrev:
1156 1155 # But, since we weren't, let's recompute the lowest rev to not
1157 1156 # include roots that aren't ancestors.
1158 1157
1159 1158 # Filter out roots that aren't ancestors of heads
1160 1159 roots = [root for root in roots if root in ancestors]
1161 1160 # Recompute the lowest revision
1162 1161 if roots:
1163 1162 lowestrev = min([self.rev(root) for root in roots])
1164 1163 else:
1165 1164 # No more roots? Return empty list
1166 1165 return nonodes
1167 1166 else:
1168 1167 # We are descending from nullid, and don't need to care about
1169 1168 # any other roots.
1170 1169 lowestrev = nullrev
1171 1170 roots = [self.nullid]
1172 1171 # Transform our roots list into a set.
1173 1172 descendants = set(roots)
1174 1173 # Also, keep the original roots so we can filter out roots that aren't
1175 1174 # 'real' roots (i.e. are descended from other roots).
1176 1175 roots = descendants.copy()
1177 1176 # Our topologically sorted list of output nodes.
1178 1177 orderedout = []
1179 1178 # Don't start at nullid since we don't want nullid in our output list,
1180 1179 # and if nullid shows up in descendants, empty parents will look like
1181 1180 # they're descendants.
1182 1181 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1183 1182 n = self.node(r)
1184 1183 isdescendant = False
1185 1184 if lowestrev == nullrev: # Everybody is a descendant of nullid
1186 1185 isdescendant = True
1187 1186 elif n in descendants:
1188 1187 # n is already a descendant
1189 1188 isdescendant = True
1190 1189 # This check only needs to be done here because all the roots
1191 1190 # will start being marked is descendants before the loop.
1192 1191 if n in roots:
1193 1192 # If n was a root, check if it's a 'real' root.
1194 1193 p = tuple(self.parents(n))
1195 1194 # If any of its parents are descendants, it's not a root.
1196 1195 if (p[0] in descendants) or (p[1] in descendants):
1197 1196 roots.remove(n)
1198 1197 else:
1199 1198 p = tuple(self.parents(n))
1200 1199 # A node is a descendant if either of its parents are
1201 1200 # descendants. (We seeded the dependents list with the roots
1202 1201 # up there, remember?)
1203 1202 if (p[0] in descendants) or (p[1] in descendants):
1204 1203 descendants.add(n)
1205 1204 isdescendant = True
1206 1205 if isdescendant and ((ancestors is None) or (n in ancestors)):
1207 1206 # Only include nodes that are both descendants and ancestors.
1208 1207 orderedout.append(n)
1209 1208 if (ancestors is not None) and (n in heads):
1210 1209 # We're trying to figure out which heads are reachable
1211 1210 # from roots.
1212 1211 # Mark this head as having been reached
1213 1212 heads[n] = True
1214 1213 elif ancestors is None:
1215 1214 # Otherwise, we're trying to discover the heads.
1216 1215 # Assume this is a head because if it isn't, the next step
1217 1216 # will eventually remove it.
1218 1217 heads[n] = True
1219 1218 # But, obviously its parents aren't.
1220 1219 for p in self.parents(n):
1221 1220 heads.pop(p, None)
1222 1221 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1223 1222 roots = list(roots)
1224 1223 assert orderedout
1225 1224 assert roots
1226 1225 assert heads
1227 1226 return (orderedout, roots, heads)
1228 1227
1229 1228 def headrevs(self, revs=None):
1230 1229 if revs is None:
1231 1230 try:
1232 1231 return self.index.headrevs()
1233 1232 except AttributeError:
1234 1233 return self._headrevs()
1235 1234 if rustdagop is not None:
1236 1235 return rustdagop.headrevs(self.index, revs)
1237 1236 return dagop.headrevs(revs, self._uncheckedparentrevs)
1238 1237
1239 1238 def computephases(self, roots):
1240 1239 return self.index.computephasesmapsets(roots)
1241 1240
1242 1241 def _headrevs(self):
1243 1242 count = len(self)
1244 1243 if not count:
1245 1244 return [nullrev]
1246 1245 # we won't iter over filtered rev so nobody is a head at start
1247 1246 ishead = [0] * (count + 1)
1248 1247 index = self.index
1249 1248 for r in self:
1250 1249 ishead[r] = 1 # I may be an head
1251 1250 e = index[r]
1252 1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1253 1252 return [r for r, val in enumerate(ishead) if val]
1254 1253
1255 1254 def heads(self, start=None, stop=None):
1256 1255 """return the list of all nodes that have no children
1257 1256
1258 1257 if start is specified, only heads that are descendants of
1259 1258 start will be returned
1260 1259 if stop is specified, it will consider all the revs from stop
1261 1260 as if they had no children
1262 1261 """
1263 1262 if start is None and stop is None:
1264 1263 if not len(self):
1265 1264 return [self.nullid]
1266 1265 return [self.node(r) for r in self.headrevs()]
1267 1266
1268 1267 if start is None:
1269 1268 start = nullrev
1270 1269 else:
1271 1270 start = self.rev(start)
1272 1271
1273 1272 stoprevs = {self.rev(n) for n in stop or []}
1274 1273
1275 1274 revs = dagop.headrevssubset(
1276 1275 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1277 1276 )
1278 1277
1279 1278 return [self.node(rev) for rev in revs]
1280 1279
1281 1280 def children(self, node):
1282 1281 """find the children of a given node"""
1283 1282 c = []
1284 1283 p = self.rev(node)
1285 1284 for r in self.revs(start=p + 1):
1286 1285 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1287 1286 if prevs:
1288 1287 for pr in prevs:
1289 1288 if pr == p:
1290 1289 c.append(self.node(r))
1291 1290 elif p == nullrev:
1292 1291 c.append(self.node(r))
1293 1292 return c
1294 1293
1295 1294 def commonancestorsheads(self, a, b):
1296 1295 """calculate all the heads of the common ancestors of nodes a and b"""
1297 1296 a, b = self.rev(a), self.rev(b)
1298 1297 ancs = self._commonancestorsheads(a, b)
1299 1298 return pycompat.maplist(self.node, ancs)
1300 1299
1301 1300 def _commonancestorsheads(self, *revs):
1302 1301 """calculate all the heads of the common ancestors of revs"""
1303 1302 try:
1304 1303 ancs = self.index.commonancestorsheads(*revs)
1305 1304 except (AttributeError, OverflowError): # C implementation failed
1306 1305 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1307 1306 return ancs
1308 1307
1309 1308 def isancestor(self, a, b):
1310 1309 """return True if node a is an ancestor of node b
1311 1310
1312 1311 A revision is considered an ancestor of itself."""
1313 1312 a, b = self.rev(a), self.rev(b)
1314 1313 return self.isancestorrev(a, b)
1315 1314
1316 1315 def isancestorrev(self, a, b):
1317 1316 """return True if revision a is an ancestor of revision b
1318 1317
1319 1318 A revision is considered an ancestor of itself.
1320 1319
1321 1320 The implementation of this is trivial but the use of
1322 1321 reachableroots is not."""
1323 1322 if a == nullrev:
1324 1323 return True
1325 1324 elif a == b:
1326 1325 return True
1327 1326 elif a > b:
1328 1327 return False
1329 1328 return bool(self.reachableroots(a, [b], [a], includepath=False))
1330 1329
1331 1330 def reachableroots(self, minroot, heads, roots, includepath=False):
1332 1331 """return (heads(::(<roots> and <roots>::<heads>)))
1333 1332
1334 1333 If includepath is True, return (<roots>::<heads>)."""
1335 1334 try:
1336 1335 return self.index.reachableroots2(
1337 1336 minroot, heads, roots, includepath
1338 1337 )
1339 1338 except AttributeError:
1340 1339 return dagop._reachablerootspure(
1341 1340 self.parentrevs, minroot, roots, heads, includepath
1342 1341 )
1343 1342
1344 1343 def ancestor(self, a, b):
1345 1344 """calculate the "best" common ancestor of nodes a and b"""
1346 1345
1347 1346 a, b = self.rev(a), self.rev(b)
1348 1347 try:
1349 1348 ancs = self.index.ancestors(a, b)
1350 1349 except (AttributeError, OverflowError):
1351 1350 ancs = ancestor.ancestors(self.parentrevs, a, b)
1352 1351 if ancs:
1353 1352 # choose a consistent winner when there's a tie
1354 1353 return min(map(self.node, ancs))
1355 1354 return self.nullid
1356 1355
1357 1356 def _match(self, id):
1358 1357 if isinstance(id, int):
1359 1358 # rev
1360 1359 return self.node(id)
1361 1360 if len(id) == self.nodeconstants.nodelen:
1362 1361 # possibly a binary node
1363 1362 # odds of a binary node being all hex in ASCII are 1 in 10**25
1364 1363 try:
1365 1364 node = id
1366 1365 self.rev(node) # quick search the index
1367 1366 return node
1368 1367 except error.LookupError:
1369 1368 pass # may be partial hex id
1370 1369 try:
1371 1370 # str(rev)
1372 1371 rev = int(id)
1373 1372 if b"%d" % rev != id:
1374 1373 raise ValueError
1375 1374 if rev < 0:
1376 1375 rev = len(self) + rev
1377 1376 if rev < 0 or rev >= len(self):
1378 1377 raise ValueError
1379 1378 return self.node(rev)
1380 1379 except (ValueError, OverflowError):
1381 1380 pass
1382 1381 if len(id) == 2 * self.nodeconstants.nodelen:
1383 1382 try:
1384 1383 # a full hex nodeid?
1385 1384 node = bin(id)
1386 1385 self.rev(node)
1387 1386 return node
1388 1387 except (TypeError, error.LookupError):
1389 1388 pass
1390 1389
1391 1390 def _partialmatch(self, id):
1392 1391 # we don't care wdirfilenodeids as they should be always full hash
1393 1392 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1394 1393 try:
1395 1394 partial = self.index.partialmatch(id)
1396 1395 if partial and self.hasnode(partial):
1397 1396 if maybewdir:
1398 1397 # single 'ff...' match in radix tree, ambiguous with wdir
1399 1398 raise error.RevlogError
1400 1399 return partial
1401 1400 if maybewdir:
1402 1401 # no 'ff...' match in radix tree, wdir identified
1403 1402 raise error.WdirUnsupported
1404 1403 return None
1405 1404 except error.RevlogError:
1406 1405 # parsers.c radix tree lookup gave multiple matches
1407 1406 # fast path: for unfiltered changelog, radix tree is accurate
1408 1407 if not getattr(self, 'filteredrevs', None):
1409 1408 raise error.AmbiguousPrefixLookupError(
1410 1409 id, self.display_id, _(b'ambiguous identifier')
1411 1410 )
1412 1411 # fall through to slow path that filters hidden revisions
1413 1412 except (AttributeError, ValueError):
1414 1413 # we are pure python, or key was too short to search radix tree
1415 1414 pass
1416 1415
1417 1416 if id in self._pcache:
1418 1417 return self._pcache[id]
1419 1418
1420 1419 if len(id) <= 40:
1421 1420 try:
1422 1421 # hex(node)[:...]
1423 1422 l = len(id) // 2 # grab an even number of digits
1424 1423 prefix = bin(id[: l * 2])
1425 1424 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1426 1425 nl = [
1427 1426 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1428 1427 ]
1429 1428 if self.nodeconstants.nullhex.startswith(id):
1430 1429 nl.append(self.nullid)
1431 1430 if len(nl) > 0:
1432 1431 if len(nl) == 1 and not maybewdir:
1433 1432 self._pcache[id] = nl[0]
1434 1433 return nl[0]
1435 1434 raise error.AmbiguousPrefixLookupError(
1436 1435 id, self.display_id, _(b'ambiguous identifier')
1437 1436 )
1438 1437 if maybewdir:
1439 1438 raise error.WdirUnsupported
1440 1439 return None
1441 1440 except TypeError:
1442 1441 pass
1443 1442
1444 1443 def lookup(self, id):
1445 1444 """locate a node based on:
1446 1445 - revision number or str(revision number)
1447 1446 - nodeid or subset of hex nodeid
1448 1447 """
1449 1448 n = self._match(id)
1450 1449 if n is not None:
1451 1450 return n
1452 1451 n = self._partialmatch(id)
1453 1452 if n:
1454 1453 return n
1455 1454
1456 1455 raise error.LookupError(id, self.display_id, _(b'no match found'))
1457 1456
1458 1457 def shortest(self, node, minlength=1):
1459 1458 """Find the shortest unambiguous prefix that matches node."""
1460 1459
1461 1460 def isvalid(prefix):
1462 1461 try:
1463 1462 matchednode = self._partialmatch(prefix)
1464 1463 except error.AmbiguousPrefixLookupError:
1465 1464 return False
1466 1465 except error.WdirUnsupported:
1467 1466 # single 'ff...' match
1468 1467 return True
1469 1468 if matchednode is None:
1470 1469 raise error.LookupError(node, self.display_id, _(b'no node'))
1471 1470 return True
1472 1471
1473 1472 def maybewdir(prefix):
1474 1473 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1475 1474
1476 1475 hexnode = hex(node)
1477 1476
1478 1477 def disambiguate(hexnode, minlength):
1479 1478 """Disambiguate against wdirid."""
1480 1479 for length in range(minlength, len(hexnode) + 1):
1481 1480 prefix = hexnode[:length]
1482 1481 if not maybewdir(prefix):
1483 1482 return prefix
1484 1483
1485 1484 if not getattr(self, 'filteredrevs', None):
1486 1485 try:
1487 1486 length = max(self.index.shortest(node), minlength)
1488 1487 return disambiguate(hexnode, length)
1489 1488 except error.RevlogError:
1490 1489 if node != self.nodeconstants.wdirid:
1491 1490 raise error.LookupError(
1492 1491 node, self.display_id, _(b'no node')
1493 1492 )
1494 1493 except AttributeError:
1495 1494 # Fall through to pure code
1496 1495 pass
1497 1496
1498 1497 if node == self.nodeconstants.wdirid:
1499 1498 for length in range(minlength, len(hexnode) + 1):
1500 1499 prefix = hexnode[:length]
1501 1500 if isvalid(prefix):
1502 1501 return prefix
1503 1502
1504 1503 for length in range(minlength, len(hexnode) + 1):
1505 1504 prefix = hexnode[:length]
1506 1505 if isvalid(prefix):
1507 1506 return disambiguate(hexnode, length)
1508 1507
1509 1508 def cmp(self, node, text):
1510 1509 """compare text with a given file revision
1511 1510
1512 1511 returns True if text is different than what is stored.
1513 1512 """
1514 1513 p1, p2 = self.parents(node)
1515 1514 return storageutil.hashrevisionsha1(text, p1, p2) != node
1516 1515
1517 1516 def _cachesegment(self, offset, data):
1518 1517 """Add a segment to the revlog cache.
1519 1518
1520 1519 Accepts an absolute offset and the data that is at that location.
1521 1520 """
1522 1521 o, d = self._chunkcache
1523 1522 # try to add to existing cache
1524 1523 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1525 1524 self._chunkcache = o, d + data
1526 1525 else:
1527 1526 self._chunkcache = offset, data
1528 1527
1529 1528 def _readsegment(self, offset, length, df=None):
1530 1529 """Load a segment of raw data from the revlog.
1531 1530
1532 1531 Accepts an absolute offset, length to read, and an optional existing
1533 1532 file handle to read from.
1534 1533
1535 1534 If an existing file handle is passed, it will be seeked and the
1536 1535 original seek position will NOT be restored.
1537 1536
1538 1537 Returns a str or buffer of raw byte data.
1539 1538
1540 1539 Raises if the requested number of bytes could not be read.
1541 1540 """
1542 1541 # Cache data both forward and backward around the requested
1543 1542 # data, in a fixed size window. This helps speed up operations
1544 1543 # involving reading the revlog backwards.
1545 1544 cachesize = self._chunkcachesize
1546 1545 realoffset = offset & ~(cachesize - 1)
1547 1546 reallength = (
1548 1547 (offset + length + cachesize) & ~(cachesize - 1)
1549 1548 ) - realoffset
1550 1549 with self._datareadfp(df) as df:
1551 1550 df.seek(realoffset)
1552 1551 d = df.read(reallength)
1553 1552
1554 1553 self._cachesegment(realoffset, d)
1555 1554 if offset != realoffset or reallength != length:
1556 1555 startoffset = offset - realoffset
1557 1556 if len(d) - startoffset < length:
1558 1557 raise error.RevlogError(
1559 1558 _(
1560 1559 b'partial read of revlog %s; expected %d bytes from '
1561 1560 b'offset %d, got %d'
1562 1561 )
1563 1562 % (
1564 1563 self._indexfile if self._inline else self._datafile,
1565 1564 length,
1566 1565 offset,
1567 1566 len(d) - startoffset,
1568 1567 )
1569 1568 )
1570 1569
1571 1570 return util.buffer(d, startoffset, length)
1572 1571
1573 1572 if len(d) < length:
1574 1573 raise error.RevlogError(
1575 1574 _(
1576 1575 b'partial read of revlog %s; expected %d bytes from offset '
1577 1576 b'%d, got %d'
1578 1577 )
1579 1578 % (
1580 1579 self._indexfile if self._inline else self._datafile,
1581 1580 length,
1582 1581 offset,
1583 1582 len(d),
1584 1583 )
1585 1584 )
1586 1585
1587 1586 return d
1588 1587
1589 1588 def _getsegment(self, offset, length, df=None):
1590 1589 """Obtain a segment of raw data from the revlog.
1591 1590
1592 1591 Accepts an absolute offset, length of bytes to obtain, and an
1593 1592 optional file handle to the already-opened revlog. If the file
1594 1593 handle is used, it's original seek position will not be preserved.
1595 1594
1596 1595 Requests for data may be returned from a cache.
1597 1596
1598 1597 Returns a str or a buffer instance of raw byte data.
1599 1598 """
1600 1599 o, d = self._chunkcache
1601 1600 l = len(d)
1602 1601
1603 1602 # is it in the cache?
1604 1603 cachestart = offset - o
1605 1604 cacheend = cachestart + length
1606 1605 if cachestart >= 0 and cacheend <= l:
1607 1606 if cachestart == 0 and cacheend == l:
1608 1607 return d # avoid a copy
1609 1608 return util.buffer(d, cachestart, cacheend - cachestart)
1610 1609
1611 1610 return self._readsegment(offset, length, df=df)
1612 1611
1613 1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1614 1613 """Obtain a segment of raw data corresponding to a range of revisions.
1615 1614
1616 1615 Accepts the start and end revisions and an optional already-open
1617 1616 file handle to be used for reading. If the file handle is read, its
1618 1617 seek position will not be preserved.
1619 1618
1620 1619 Requests for data may be satisfied by a cache.
1621 1620
1622 1621 Returns a 2-tuple of (offset, data) for the requested range of
1623 1622 revisions. Offset is the integer offset from the beginning of the
1624 1623 revlog and data is a str or buffer of the raw byte data.
1625 1624
1626 1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1627 1626 to determine where each revision's data begins and ends.
1628 1627 """
1629 1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1630 1629 # (functions are expensive).
1631 1630 index = self.index
1632 1631 istart = index[startrev]
1633 1632 start = int(istart[0] >> 16)
1634 1633 if startrev == endrev:
1635 1634 end = start + istart[1]
1636 1635 else:
1637 1636 iend = index[endrev]
1638 1637 end = int(iend[0] >> 16) + iend[1]
1639 1638
1640 1639 if self._inline:
1641 1640 start += (startrev + 1) * self.index.entry_size
1642 1641 end += (endrev + 1) * self.index.entry_size
1643 1642 length = end - start
1644 1643
1645 1644 return start, self._getsegment(start, length, df=df)
1646 1645
1647 1646 def _chunk(self, rev, df=None):
1648 1647 """Obtain a single decompressed chunk for a revision.
1649 1648
1650 1649 Accepts an integer revision and an optional already-open file handle
1651 1650 to be used for reading. If used, the seek position of the file will not
1652 1651 be preserved.
1653 1652
1654 1653 Returns a str holding uncompressed data for the requested revision.
1655 1654 """
1656 1655 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1657 1656
1658 1657 def _chunks(self, revs, df=None, targetsize=None):
1659 1658 """Obtain decompressed chunks for the specified revisions.
1660 1659
1661 1660 Accepts an iterable of numeric revisions that are assumed to be in
1662 1661 ascending order. Also accepts an optional already-open file handle
1663 1662 to be used for reading. If used, the seek position of the file will
1664 1663 not be preserved.
1665 1664
1666 1665 This function is similar to calling ``self._chunk()`` multiple times,
1667 1666 but is faster.
1668 1667
1669 1668 Returns a list with decompressed data for each requested revision.
1670 1669 """
1671 1670 if not revs:
1672 1671 return []
1673 1672 start = self.start
1674 1673 length = self.length
1675 1674 inline = self._inline
1676 1675 iosize = self.index.entry_size
1677 1676 buffer = util.buffer
1678 1677
1679 1678 l = []
1680 1679 ladd = l.append
1681 1680
1682 1681 if not self._withsparseread:
1683 1682 slicedchunks = (revs,)
1684 1683 else:
1685 1684 slicedchunks = deltautil.slicechunk(
1686 1685 self, revs, targetsize=targetsize
1687 1686 )
1688 1687
1689 1688 for revschunk in slicedchunks:
1690 1689 firstrev = revschunk[0]
1691 1690 # Skip trailing revisions with empty diff
1692 1691 for lastrev in revschunk[::-1]:
1693 1692 if length(lastrev) != 0:
1694 1693 break
1695 1694
1696 1695 try:
1697 1696 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1698 1697 except OverflowError:
1699 1698 # issue4215 - we can't cache a run of chunks greater than
1700 1699 # 2G on Windows
1701 1700 return [self._chunk(rev, df=df) for rev in revschunk]
1702 1701
1703 1702 decomp = self.decompress
1704 1703 for rev in revschunk:
1705 1704 chunkstart = start(rev)
1706 1705 if inline:
1707 1706 chunkstart += (rev + 1) * iosize
1708 1707 chunklength = length(rev)
1709 1708 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1710 1709
1711 1710 return l
1712 1711
1713 1712 def _chunkclear(self):
1714 1713 """Clear the raw chunk cache."""
1715 1714 self._chunkcache = (0, b'')
1716 1715
1717 1716 def deltaparent(self, rev):
1718 1717 """return deltaparent of the given revision"""
1719 1718 base = self.index[rev][3]
1720 1719 if base == rev:
1721 1720 return nullrev
1722 1721 elif self._generaldelta:
1723 1722 return base
1724 1723 else:
1725 1724 return rev - 1
1726 1725
1727 1726 def issnapshot(self, rev):
1728 1727 """tells whether rev is a snapshot"""
1729 1728 if not self._sparserevlog:
1730 1729 return self.deltaparent(rev) == nullrev
1731 1730 elif util.safehasattr(self.index, b'issnapshot'):
1732 1731 # directly assign the method to cache the testing and access
1733 1732 self.issnapshot = self.index.issnapshot
1734 1733 return self.issnapshot(rev)
1735 1734 if rev == nullrev:
1736 1735 return True
1737 1736 entry = self.index[rev]
1738 1737 base = entry[3]
1739 1738 if base == rev:
1740 1739 return True
1741 1740 if base == nullrev:
1742 1741 return True
1743 1742 p1 = entry[5]
1744 1743 p2 = entry[6]
1745 1744 if base == p1 or base == p2:
1746 1745 return False
1747 1746 return self.issnapshot(base)
1748 1747
1749 1748 def snapshotdepth(self, rev):
1750 1749 """number of snapshot in the chain before this one"""
1751 1750 if not self.issnapshot(rev):
1752 1751 raise error.ProgrammingError(b'revision %d not a snapshot')
1753 1752 return len(self._deltachain(rev)[0]) - 1
1754 1753
1755 1754 def revdiff(self, rev1, rev2):
1756 1755 """return or calculate a delta between two revisions
1757 1756
1758 1757 The delta calculated is in binary form and is intended to be written to
1759 1758 revlog data directly. So this function needs raw revision data.
1760 1759 """
1761 1760 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1762 1761 return bytes(self._chunk(rev2))
1763 1762
1764 1763 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1765 1764
1766 1765 def _processflags(self, text, flags, operation, raw=False):
1767 1766 """deprecated entry point to access flag processors"""
1768 1767 msg = b'_processflag(...) use the specialized variant'
1769 1768 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1770 1769 if raw:
1771 1770 return text, flagutil.processflagsraw(self, text, flags)
1772 1771 elif operation == b'read':
1773 1772 return flagutil.processflagsread(self, text, flags)
1774 1773 else: # write operation
1775 1774 return flagutil.processflagswrite(self, text, flags)
1776 1775
1777 1776 def revision(self, nodeorrev, _df=None, raw=False):
1778 1777 """return an uncompressed revision of a given node or revision
1779 1778 number.
1780 1779
1781 1780 _df - an existing file handle to read from. (internal-only)
1782 1781 raw - an optional argument specifying if the revision data is to be
1783 1782 treated as raw data when applying flag transforms. 'raw' should be set
1784 1783 to True when generating changegroups or in debug commands.
1785 1784 """
1786 1785 if raw:
1787 1786 msg = (
1788 1787 b'revlog.revision(..., raw=True) is deprecated, '
1789 1788 b'use revlog.rawdata(...)'
1790 1789 )
1791 1790 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1792 1791 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1793 1792
1794 1793 def sidedata(self, nodeorrev, _df=None):
1795 1794 """a map of extra data related to the changeset but not part of the hash
1796 1795
1797 1796 This function currently return a dictionary. However, more advanced
1798 1797 mapping object will likely be used in the future for a more
1799 1798 efficient/lazy code.
1800 1799 """
1801 1800 return self._revisiondata(nodeorrev, _df)[1]
1802 1801
1803 1802 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1804 1803 # deal with <nodeorrev> argument type
1805 1804 if isinstance(nodeorrev, int):
1806 1805 rev = nodeorrev
1807 1806 node = self.node(rev)
1808 1807 else:
1809 1808 node = nodeorrev
1810 1809 rev = None
1811 1810
1812 1811 # fast path the special `nullid` rev
1813 1812 if node == self.nullid:
1814 1813 return b"", {}
1815 1814
1816 1815 # ``rawtext`` is the text as stored inside the revlog. Might be the
1817 1816 # revision or might need to be processed to retrieve the revision.
1818 1817 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1819 1818
1820 1819 if self.hassidedata:
1821 1820 if rev is None:
1822 1821 rev = self.rev(node)
1823 1822 sidedata = self._sidedata(rev)
1824 1823 else:
1825 1824 sidedata = {}
1826 1825
1827 1826 if raw and validated:
1828 1827 # if we don't want to process the raw text and that raw
1829 1828 # text is cached, we can exit early.
1830 1829 return rawtext, sidedata
1831 1830 if rev is None:
1832 1831 rev = self.rev(node)
1833 1832 # the revlog's flag for this revision
1834 1833 # (usually alter its state or content)
1835 1834 flags = self.flags(rev)
1836 1835
1837 1836 if validated and flags == REVIDX_DEFAULT_FLAGS:
1838 1837 # no extra flags set, no flag processor runs, text = rawtext
1839 1838 return rawtext, sidedata
1840 1839
1841 1840 if raw:
1842 1841 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1843 1842 text = rawtext
1844 1843 else:
1845 1844 r = flagutil.processflagsread(self, rawtext, flags)
1846 1845 text, validatehash = r
1847 1846 if validatehash:
1848 1847 self.checkhash(text, node, rev=rev)
1849 1848 if not validated:
1850 1849 self._revisioncache = (node, rev, rawtext)
1851 1850
1852 1851 return text, sidedata
1853 1852
1854 1853 def _rawtext(self, node, rev, _df=None):
1855 1854 """return the possibly unvalidated rawtext for a revision
1856 1855
1857 1856 returns (rev, rawtext, validated)
1858 1857 """
1859 1858
1860 1859 # revision in the cache (could be useful to apply delta)
1861 1860 cachedrev = None
1862 1861 # An intermediate text to apply deltas to
1863 1862 basetext = None
1864 1863
1865 1864 # Check if we have the entry in cache
1866 1865 # The cache entry looks like (node, rev, rawtext)
1867 1866 if self._revisioncache:
1868 1867 if self._revisioncache[0] == node:
1869 1868 return (rev, self._revisioncache[2], True)
1870 1869 cachedrev = self._revisioncache[1]
1871 1870
1872 1871 if rev is None:
1873 1872 rev = self.rev(node)
1874 1873
1875 1874 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1876 1875 if stopped:
1877 1876 basetext = self._revisioncache[2]
1878 1877
1879 1878 # drop cache to save memory, the caller is expected to
1880 1879 # update self._revisioncache after validating the text
1881 1880 self._revisioncache = None
1882 1881
1883 1882 targetsize = None
1884 1883 rawsize = self.index[rev][2]
1885 1884 if 0 <= rawsize:
1886 1885 targetsize = 4 * rawsize
1887 1886
1888 1887 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1889 1888 if basetext is None:
1890 1889 basetext = bytes(bins[0])
1891 1890 bins = bins[1:]
1892 1891
1893 1892 rawtext = mdiff.patches(basetext, bins)
1894 1893 del basetext # let us have a chance to free memory early
1895 1894 return (rev, rawtext, False)
1896 1895
1897 1896 def _sidedata(self, rev):
1898 1897 """Return the sidedata for a given revision number."""
1899 1898 index_entry = self.index[rev]
1900 1899 sidedata_offset = index_entry[8]
1901 1900 sidedata_size = index_entry[9]
1902 1901
1903 1902 if self._inline:
1904 1903 sidedata_offset += self.index.entry_size * (1 + rev)
1905 1904 if sidedata_size == 0:
1906 1905 return {}
1907 1906
1908 1907 segment = self._getsegment(sidedata_offset, sidedata_size)
1909 1908 sidedata = sidedatautil.deserialize_sidedata(segment)
1910 1909 return sidedata
1911 1910
1912 1911 def rawdata(self, nodeorrev, _df=None):
1913 1912 """return an uncompressed raw data of a given node or revision number.
1914 1913
1915 1914 _df - an existing file handle to read from. (internal-only)
1916 1915 """
1917 1916 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1918 1917
1919 1918 def hash(self, text, p1, p2):
1920 1919 """Compute a node hash.
1921 1920
1922 1921 Available as a function so that subclasses can replace the hash
1923 1922 as needed.
1924 1923 """
1925 1924 return storageutil.hashrevisionsha1(text, p1, p2)
1926 1925
1927 1926 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1928 1927 """Check node hash integrity.
1929 1928
1930 1929 Available as a function so that subclasses can extend hash mismatch
1931 1930 behaviors as needed.
1932 1931 """
1933 1932 try:
1934 1933 if p1 is None and p2 is None:
1935 1934 p1, p2 = self.parents(node)
1936 1935 if node != self.hash(text, p1, p2):
1937 1936 # Clear the revision cache on hash failure. The revision cache
1938 1937 # only stores the raw revision and clearing the cache does have
1939 1938 # the side-effect that we won't have a cache hit when the raw
1940 1939 # revision data is accessed. But this case should be rare and
1941 1940 # it is extra work to teach the cache about the hash
1942 1941 # verification state.
1943 1942 if self._revisioncache and self._revisioncache[0] == node:
1944 1943 self._revisioncache = None
1945 1944
1946 1945 revornode = rev
1947 1946 if revornode is None:
1948 1947 revornode = templatefilters.short(hex(node))
1949 1948 raise error.RevlogError(
1950 1949 _(b"integrity check failed on %s:%s")
1951 1950 % (self.display_id, pycompat.bytestr(revornode))
1952 1951 )
1953 1952 except error.RevlogError:
1954 1953 if self._censorable and storageutil.iscensoredtext(text):
1955 1954 raise error.CensoredNodeError(self.display_id, node, text)
1956 1955 raise
1957 1956
1958 1957 def _enforceinlinesize(self, tr, fp=None):
1959 1958 """Check if the revlog is too big for inline and convert if so.
1960 1959
1961 1960 This should be called after revisions are added to the revlog. If the
1962 1961 revlog has grown too large to be an inline revlog, it will convert it
1963 1962 to use multiple index and data files.
1964 1963 """
1965 1964 tiprev = len(self) - 1
1966 1965 total_size = self.start(tiprev) + self.length(tiprev)
1967 1966 if not self._inline or total_size < _maxinline:
1968 1967 return
1969 1968
1970 1969 troffset = tr.findoffset(self._indexfile)
1971 1970 if troffset is None:
1972 1971 raise error.RevlogError(
1973 1972 _(b"%s not found in the transaction") % self._indexfile
1974 1973 )
1975 1974 trindex = 0
1976 1975 tr.add(self._datafile, 0)
1977 1976
1978 1977 if fp:
1979 1978 fp.flush()
1980 1979 fp.close()
1981 1980 # We can't use the cached file handle after close(). So prevent
1982 1981 # its usage.
1983 1982 self._writinghandles = None
1984 1983
1985 1984 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1986 1985 for r in self:
1987 1986 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1988 1987 if troffset <= self.start(r):
1989 1988 trindex = r
1990 1989
1991 1990 with self._indexfp(b'w') as fp:
1992 1991 self._format_flags &= ~FLAG_INLINE_DATA
1993 1992 self._inline = False
1994 1993 for i in self:
1995 1994 e = self.index.entry_binary(i)
1996 1995 if i == 0:
1997 1996 header = self._format_flags | self._format_version
1998 1997 header = self.index.pack_header(header)
1999 1998 e = header + e
2000 1999 fp.write(e)
2001 2000
2002 2001 # the temp file replace the real index when we exit the context
2003 2002 # manager
2004 2003
2005 2004 tr.replace(self._indexfile, trindex * self.index.entry_size)
2006 2005 nodemaputil.setup_persistent_nodemap(tr, self)
2007 2006 self._chunkclear()
2008 2007
2009 2008 def _nodeduplicatecallback(self, transaction, node):
2010 2009 """called when trying to add a node already stored."""
2011 2010
2012 2011 def addrevision(
2013 2012 self,
2014 2013 text,
2015 2014 transaction,
2016 2015 link,
2017 2016 p1,
2018 2017 p2,
2019 2018 cachedelta=None,
2020 2019 node=None,
2021 2020 flags=REVIDX_DEFAULT_FLAGS,
2022 2021 deltacomputer=None,
2023 2022 sidedata=None,
2024 2023 ):
2025 2024 """add a revision to the log
2026 2025
2027 2026 text - the revision data to add
2028 2027 transaction - the transaction object used for rollback
2029 2028 link - the linkrev data to add
2030 2029 p1, p2 - the parent nodeids of the revision
2031 2030 cachedelta - an optional precomputed delta
2032 2031 node - nodeid of revision; typically node is not specified, and it is
2033 2032 computed by default as hash(text, p1, p2), however subclasses might
2034 2033 use different hashing method (and override checkhash() in such case)
2035 2034 flags - the known flags to set on the revision
2036 2035 deltacomputer - an optional deltacomputer instance shared between
2037 2036 multiple calls
2038 2037 """
2039 2038 if link == nullrev:
2040 2039 raise error.RevlogError(
2041 2040 _(b"attempted to add linkrev -1 to %s") % self.display_id
2042 2041 )
2043 2042
2044 2043 if sidedata is None:
2045 2044 sidedata = {}
2046 2045 elif sidedata and not self.hassidedata:
2047 2046 raise error.ProgrammingError(
2048 2047 _(b"trying to add sidedata to a revlog who don't support them")
2049 2048 )
2050 2049
2051 2050 if flags:
2052 2051 node = node or self.hash(text, p1, p2)
2053 2052
2054 2053 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2055 2054
2056 2055 # If the flag processor modifies the revision data, ignore any provided
2057 2056 # cachedelta.
2058 2057 if rawtext != text:
2059 2058 cachedelta = None
2060 2059
2061 2060 if len(rawtext) > _maxentrysize:
2062 2061 raise error.RevlogError(
2063 2062 _(
2064 2063 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2065 2064 )
2066 2065 % (self.display_id, len(rawtext))
2067 2066 )
2068 2067
2069 2068 node = node or self.hash(rawtext, p1, p2)
2070 2069 rev = self.index.get_rev(node)
2071 2070 if rev is not None:
2072 2071 return rev
2073 2072
2074 2073 if validatehash:
2075 2074 self.checkhash(rawtext, node, p1=p1, p2=p2)
2076 2075
2077 2076 return self.addrawrevision(
2078 2077 rawtext,
2079 2078 transaction,
2080 2079 link,
2081 2080 p1,
2082 2081 p2,
2083 2082 node,
2084 2083 flags,
2085 2084 cachedelta=cachedelta,
2086 2085 deltacomputer=deltacomputer,
2087 2086 sidedata=sidedata,
2088 2087 )
2089 2088
2090 2089 def addrawrevision(
2091 2090 self,
2092 2091 rawtext,
2093 2092 transaction,
2094 2093 link,
2095 2094 p1,
2096 2095 p2,
2097 2096 node,
2098 2097 flags,
2099 2098 cachedelta=None,
2100 2099 deltacomputer=None,
2101 2100 sidedata=None,
2102 2101 ):
2103 2102 """add a raw revision with known flags, node and parents
2104 2103 useful when reusing a revision not stored in this revlog (ex: received
2105 2104 over wire, or read from an external bundle).
2106 2105 """
2107 2106 dfh = None
2108 2107 if not self._inline:
2109 2108 dfh = self._datafp(b"a+")
2110 2109 ifh = self._indexfp(b"a+")
2111 2110 try:
2112 2111 return self._addrevision(
2113 2112 node,
2114 2113 rawtext,
2115 2114 transaction,
2116 2115 link,
2117 2116 p1,
2118 2117 p2,
2119 2118 flags,
2120 2119 cachedelta,
2121 2120 ifh,
2122 2121 dfh,
2123 2122 deltacomputer=deltacomputer,
2124 2123 sidedata=sidedata,
2125 2124 )
2126 2125 finally:
2127 2126 if dfh:
2128 2127 dfh.close()
2129 2128 ifh.close()
2130 2129
2131 2130 def compress(self, data):
2132 2131 """Generate a possibly-compressed representation of data."""
2133 2132 if not data:
2134 2133 return b'', data
2135 2134
2136 2135 compressed = self._compressor.compress(data)
2137 2136
2138 2137 if compressed:
2139 2138 # The revlog compressor added the header in the returned data.
2140 2139 return b'', compressed
2141 2140
2142 2141 if data[0:1] == b'\0':
2143 2142 return b'', data
2144 2143 return b'u', data
2145 2144
2146 2145 def decompress(self, data):
2147 2146 """Decompress a revlog chunk.
2148 2147
2149 2148 The chunk is expected to begin with a header identifying the
2150 2149 format type so it can be routed to an appropriate decompressor.
2151 2150 """
2152 2151 if not data:
2153 2152 return data
2154 2153
2155 2154 # Revlogs are read much more frequently than they are written and many
2156 2155 # chunks only take microseconds to decompress, so performance is
2157 2156 # important here.
2158 2157 #
2159 2158 # We can make a few assumptions about revlogs:
2160 2159 #
2161 2160 # 1) the majority of chunks will be compressed (as opposed to inline
2162 2161 # raw data).
2163 2162 # 2) decompressing *any* data will likely by at least 10x slower than
2164 2163 # returning raw inline data.
2165 2164 # 3) we want to prioritize common and officially supported compression
2166 2165 # engines
2167 2166 #
2168 2167 # It follows that we want to optimize for "decompress compressed data
2169 2168 # when encoded with common and officially supported compression engines"
2170 2169 # case over "raw data" and "data encoded by less common or non-official
2171 2170 # compression engines." That is why we have the inline lookup first
2172 2171 # followed by the compengines lookup.
2173 2172 #
2174 2173 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2175 2174 # compressed chunks. And this matters for changelog and manifest reads.
2176 2175 t = data[0:1]
2177 2176
2178 2177 if t == b'x':
2179 2178 try:
2180 2179 return _zlibdecompress(data)
2181 2180 except zlib.error as e:
2182 2181 raise error.RevlogError(
2183 2182 _(b'revlog decompress error: %s')
2184 2183 % stringutil.forcebytestr(e)
2185 2184 )
2186 2185 # '\0' is more common than 'u' so it goes first.
2187 2186 elif t == b'\0':
2188 2187 return data
2189 2188 elif t == b'u':
2190 2189 return util.buffer(data, 1)
2191 2190
2192 2191 try:
2193 2192 compressor = self._decompressors[t]
2194 2193 except KeyError:
2195 2194 try:
2196 2195 engine = util.compengines.forrevlogheader(t)
2197 2196 compressor = engine.revlogcompressor(self._compengineopts)
2198 2197 self._decompressors[t] = compressor
2199 2198 except KeyError:
2200 2199 raise error.RevlogError(
2201 2200 _(b'unknown compression type %s') % binascii.hexlify(t)
2202 2201 )
2203 2202
2204 2203 return compressor.decompress(data)
2205 2204
2206 2205 def _addrevision(
2207 2206 self,
2208 2207 node,
2209 2208 rawtext,
2210 2209 transaction,
2211 2210 link,
2212 2211 p1,
2213 2212 p2,
2214 2213 flags,
2215 2214 cachedelta,
2216 2215 ifh,
2217 2216 dfh,
2218 2217 alwayscache=False,
2219 2218 deltacomputer=None,
2220 2219 sidedata=None,
2221 2220 ):
2222 2221 """internal function to add revisions to the log
2223 2222
2224 2223 see addrevision for argument descriptions.
2225 2224
2226 2225 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2227 2226
2228 2227 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2229 2228 be used.
2230 2229
2231 2230 invariants:
2232 2231 - rawtext is optional (can be None); if not set, cachedelta must be set.
2233 2232 if both are set, they must correspond to each other.
2234 2233 """
2235 2234 if node == self.nullid:
2236 2235 raise error.RevlogError(
2237 2236 _(b"%s: attempt to add null revision") % self.display_id
2238 2237 )
2239 2238 if (
2240 2239 node == self.nodeconstants.wdirid
2241 2240 or node in self.nodeconstants.wdirfilenodeids
2242 2241 ):
2243 2242 raise error.RevlogError(
2244 2243 _(b"%s: attempt to add wdir revision") % self.display_id
2245 2244 )
2246 2245
2247 2246 if self._inline:
2248 2247 fh = ifh
2249 2248 else:
2250 2249 fh = dfh
2251 2250
2252 2251 btext = [rawtext]
2253 2252
2254 2253 curr = len(self)
2255 2254 prev = curr - 1
2256 2255
2257 2256 offset = self._get_data_offset(prev)
2258 2257
2259 2258 if self._concurrencychecker:
2260 2259 if self._inline:
2261 2260 # offset is "as if" it were in the .d file, so we need to add on
2262 2261 # the size of the entry metadata.
2263 2262 self._concurrencychecker(
2264 2263 ifh, self._indexfile, offset + curr * self.index.entry_size
2265 2264 )
2266 2265 else:
2267 2266 # Entries in the .i are a consistent size.
2268 2267 self._concurrencychecker(
2269 2268 ifh, self._indexfile, curr * self.index.entry_size
2270 2269 )
2271 2270 self._concurrencychecker(dfh, self._datafile, offset)
2272 2271
2273 2272 p1r, p2r = self.rev(p1), self.rev(p2)
2274 2273
2275 2274 # full versions are inserted when the needed deltas
2276 2275 # become comparable to the uncompressed text
2277 2276 if rawtext is None:
2278 2277 # need rawtext size, before changed by flag processors, which is
2279 2278 # the non-raw size. use revlog explicitly to avoid filelog's extra
2280 2279 # logic that might remove metadata size.
2281 2280 textlen = mdiff.patchedsize(
2282 2281 revlog.size(self, cachedelta[0]), cachedelta[1]
2283 2282 )
2284 2283 else:
2285 2284 textlen = len(rawtext)
2286 2285
2287 2286 if deltacomputer is None:
2288 2287 deltacomputer = deltautil.deltacomputer(self)
2289 2288
2290 2289 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2291 2290
2292 2291 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2293 2292
2294 2293 if sidedata and self.hassidedata:
2295 2294 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2296 2295 sidedata_offset = offset + deltainfo.deltalen
2297 2296 else:
2298 2297 serialized_sidedata = b""
2299 2298 # Don't store the offset if the sidedata is empty, that way
2300 2299 # we can easily detect empty sidedata and they will be no different
2301 2300 # than ones we manually add.
2302 2301 sidedata_offset = 0
2303 2302
2304 2303 e = (
2305 2304 offset_type(offset, flags),
2306 2305 deltainfo.deltalen,
2307 2306 textlen,
2308 2307 deltainfo.base,
2309 2308 link,
2310 2309 p1r,
2311 2310 p2r,
2312 2311 node,
2313 2312 sidedata_offset,
2314 2313 len(serialized_sidedata),
2315 2314 )
2316 2315
2317 2316 self.index.append(e)
2318 2317 entry = self.index.entry_binary(curr)
2319 2318 if curr == 0:
2320 2319 header = self._format_flags | self._format_version
2321 2320 header = self.index.pack_header(header)
2322 2321 entry = header + entry
2323 2322 self._writeentry(
2324 2323 transaction,
2325 2324 ifh,
2326 2325 dfh,
2327 2326 entry,
2328 2327 deltainfo.data,
2329 2328 link,
2330 2329 offset,
2331 2330 serialized_sidedata,
2332 2331 )
2333 2332
2334 2333 rawtext = btext[0]
2335 2334
2336 2335 if alwayscache and rawtext is None:
2337 2336 rawtext = deltacomputer.buildtext(revinfo, fh)
2338 2337
2339 2338 if type(rawtext) == bytes: # only accept immutable objects
2340 2339 self._revisioncache = (node, curr, rawtext)
2341 2340 self._chainbasecache[curr] = deltainfo.chainbase
2342 2341 return curr
2343 2342
2344 2343 def _get_data_offset(self, prev):
2345 2344 """Returns the current offset in the (in-transaction) data file.
2346 2345 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2347 2346 file to store that information: since sidedata can be rewritten to the
2348 2347 end of the data file within a transaction, you can have cases where, for
2349 2348 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2350 2349 to `n - 1`'s sidedata being written after `n`'s data.
2351 2350
2352 2351 TODO cache this in a docket file before getting out of experimental."""
2353 2352 if self._format_version != REVLOGV2:
2354 2353 return self.end(prev)
2355 2354
2356 2355 offset = 0
2357 2356 for rev, entry in enumerate(self.index):
2358 2357 sidedata_end = entry[8] + entry[9]
2359 2358 # Sidedata for a previous rev has potentially been written after
2360 2359 # this rev's end, so take the max.
2361 2360 offset = max(self.end(rev), offset, sidedata_end)
2362 2361 return offset
2363 2362
2364 2363 def _writeentry(
2365 2364 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2366 2365 ):
2367 2366 # Files opened in a+ mode have inconsistent behavior on various
2368 2367 # platforms. Windows requires that a file positioning call be made
2369 2368 # when the file handle transitions between reads and writes. See
2370 2369 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2371 2370 # platforms, Python or the platform itself can be buggy. Some versions
2372 2371 # of Solaris have been observed to not append at the end of the file
2373 2372 # if the file was seeked to before the end. See issue4943 for more.
2374 2373 #
2375 2374 # We work around this issue by inserting a seek() before writing.
2376 2375 # Note: This is likely not necessary on Python 3. However, because
2377 2376 # the file handle is reused for reads and may be seeked there, we need
2378 2377 # to be careful before changing this.
2379 2378 ifh.seek(0, os.SEEK_END)
2380 2379 if dfh:
2381 2380 dfh.seek(0, os.SEEK_END)
2382 2381
2383 2382 curr = len(self) - 1
2384 2383 if not self._inline:
2385 2384 transaction.add(self._datafile, offset)
2386 2385 transaction.add(self._indexfile, curr * len(entry))
2387 2386 if data[0]:
2388 2387 dfh.write(data[0])
2389 2388 dfh.write(data[1])
2390 2389 if sidedata:
2391 2390 dfh.write(sidedata)
2392 2391 ifh.write(entry)
2393 2392 else:
2394 2393 offset += curr * self.index.entry_size
2395 2394 transaction.add(self._indexfile, offset)
2396 2395 ifh.write(entry)
2397 2396 ifh.write(data[0])
2398 2397 ifh.write(data[1])
2399 2398 if sidedata:
2400 2399 ifh.write(sidedata)
2401 2400 self._enforceinlinesize(transaction, ifh)
2402 2401 nodemaputil.setup_persistent_nodemap(transaction, self)
2403 2402
2404 2403 def addgroup(
2405 2404 self,
2406 2405 deltas,
2407 2406 linkmapper,
2408 2407 transaction,
2409 2408 alwayscache=False,
2410 2409 addrevisioncb=None,
2411 2410 duplicaterevisioncb=None,
2412 2411 ):
2413 2412 """
2414 2413 add a delta group
2415 2414
2416 2415 given a set of deltas, add them to the revision log. the
2417 2416 first delta is against its parent, which should be in our
2418 2417 log, the rest are against the previous delta.
2419 2418
2420 2419 If ``addrevisioncb`` is defined, it will be called with arguments of
2421 2420 this revlog and the node that was added.
2422 2421 """
2423 2422
2424 2423 if self._writinghandles:
2425 2424 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2426 2425
2427 2426 r = len(self)
2428 2427 end = 0
2429 2428 if r:
2430 2429 end = self.end(r - 1)
2431 2430 ifh = self._indexfp(b"a+")
2432 2431 isize = r * self.index.entry_size
2433 2432 if self._inline:
2434 2433 transaction.add(self._indexfile, end + isize)
2435 2434 dfh = None
2436 2435 else:
2437 2436 transaction.add(self._indexfile, isize)
2438 2437 transaction.add(self._datafile, end)
2439 2438 dfh = self._datafp(b"a+")
2440 2439
2441 2440 self._writinghandles = (ifh, dfh)
2442 2441 empty = True
2443 2442
2444 2443 try:
2445 2444 deltacomputer = deltautil.deltacomputer(self)
2446 2445 # loop through our set of deltas
2447 2446 for data in deltas:
2448 2447 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2449 2448 link = linkmapper(linknode)
2450 2449 flags = flags or REVIDX_DEFAULT_FLAGS
2451 2450
2452 2451 rev = self.index.get_rev(node)
2453 2452 if rev is not None:
2454 2453 # this can happen if two branches make the same change
2455 2454 self._nodeduplicatecallback(transaction, rev)
2456 2455 if duplicaterevisioncb:
2457 2456 duplicaterevisioncb(self, rev)
2458 2457 empty = False
2459 2458 continue
2460 2459
2461 2460 for p in (p1, p2):
2462 2461 if not self.index.has_node(p):
2463 2462 raise error.LookupError(
2464 2463 p, self.radix, _(b'unknown parent')
2465 2464 )
2466 2465
2467 2466 if not self.index.has_node(deltabase):
2468 2467 raise error.LookupError(
2469 2468 deltabase, self.display_id, _(b'unknown delta base')
2470 2469 )
2471 2470
2472 2471 baserev = self.rev(deltabase)
2473 2472
2474 2473 if baserev != nullrev and self.iscensored(baserev):
2475 2474 # if base is censored, delta must be full replacement in a
2476 2475 # single patch operation
2477 2476 hlen = struct.calcsize(b">lll")
2478 2477 oldlen = self.rawsize(baserev)
2479 2478 newlen = len(delta) - hlen
2480 2479 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2481 2480 raise error.CensoredBaseError(
2482 2481 self.display_id, self.node(baserev)
2483 2482 )
2484 2483
2485 2484 if not flags and self._peek_iscensored(baserev, delta):
2486 2485 flags |= REVIDX_ISCENSORED
2487 2486
2488 2487 # We assume consumers of addrevisioncb will want to retrieve
2489 2488 # the added revision, which will require a call to
2490 2489 # revision(). revision() will fast path if there is a cache
2491 2490 # hit. So, we tell _addrevision() to always cache in this case.
2492 2491 # We're only using addgroup() in the context of changegroup
2493 2492 # generation so the revision data can always be handled as raw
2494 2493 # by the flagprocessor.
2495 2494 rev = self._addrevision(
2496 2495 node,
2497 2496 None,
2498 2497 transaction,
2499 2498 link,
2500 2499 p1,
2501 2500 p2,
2502 2501 flags,
2503 2502 (baserev, delta),
2504 2503 ifh,
2505 2504 dfh,
2506 2505 alwayscache=alwayscache,
2507 2506 deltacomputer=deltacomputer,
2508 2507 sidedata=sidedata,
2509 2508 )
2510 2509
2511 2510 if addrevisioncb:
2512 2511 addrevisioncb(self, rev)
2513 2512 empty = False
2514 2513
2515 2514 if not dfh and not self._inline:
2516 2515 # addrevision switched from inline to conventional
2517 2516 # reopen the index
2518 2517 ifh.close()
2519 2518 dfh = self._datafp(b"a+")
2520 2519 ifh = self._indexfp(b"a+")
2521 2520 self._writinghandles = (ifh, dfh)
2522 2521 finally:
2523 2522 self._writinghandles = None
2524 2523
2525 2524 if dfh:
2526 2525 dfh.close()
2527 2526 ifh.close()
2528 2527 return not empty
2529 2528
2530 2529 def iscensored(self, rev):
2531 2530 """Check if a file revision is censored."""
2532 2531 if not self._censorable:
2533 2532 return False
2534 2533
2535 2534 return self.flags(rev) & REVIDX_ISCENSORED
2536 2535
2537 2536 def _peek_iscensored(self, baserev, delta):
2538 2537 """Quickly check if a delta produces a censored revision."""
2539 2538 if not self._censorable:
2540 2539 return False
2541 2540
2542 2541 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2543 2542
2544 2543 def getstrippoint(self, minlink):
2545 2544 """find the minimum rev that must be stripped to strip the linkrev
2546 2545
2547 2546 Returns a tuple containing the minimum rev and a set of all revs that
2548 2547 have linkrevs that will be broken by this strip.
2549 2548 """
2550 2549 return storageutil.resolvestripinfo(
2551 2550 minlink,
2552 2551 len(self) - 1,
2553 2552 self.headrevs(),
2554 2553 self.linkrev,
2555 2554 self.parentrevs,
2556 2555 )
2557 2556
2558 2557 def strip(self, minlink, transaction):
2559 2558 """truncate the revlog on the first revision with a linkrev >= minlink
2560 2559
2561 2560 This function is called when we're stripping revision minlink and
2562 2561 its descendants from the repository.
2563 2562
2564 2563 We have to remove all revisions with linkrev >= minlink, because
2565 2564 the equivalent changelog revisions will be renumbered after the
2566 2565 strip.
2567 2566
2568 2567 So we truncate the revlog on the first of these revisions, and
2569 2568 trust that the caller has saved the revisions that shouldn't be
2570 2569 removed and that it'll re-add them after this truncation.
2571 2570 """
2572 2571 if len(self) == 0:
2573 2572 return
2574 2573
2575 2574 rev, _ = self.getstrippoint(minlink)
2576 2575 if rev == len(self):
2577 2576 return
2578 2577
2579 2578 # first truncate the files on disk
2580 2579 end = self.start(rev)
2581 2580 if not self._inline:
2582 2581 transaction.add(self._datafile, end)
2583 2582 end = rev * self.index.entry_size
2584 2583 else:
2585 2584 end += rev * self.index.entry_size
2586 2585
2587 2586 transaction.add(self._indexfile, end)
2588 2587
2589 2588 # then reset internal state in memory to forget those revisions
2590 2589 self._revisioncache = None
2591 2590 self._chaininfocache = util.lrucachedict(500)
2592 2591 self._chunkclear()
2593 2592
2594 2593 del self.index[rev:-1]
2595 2594
2596 2595 def checksize(self):
2597 2596 """Check size of index and data files
2598 2597
2599 2598 return a (dd, di) tuple.
2600 2599 - dd: extra bytes for the "data" file
2601 2600 - di: extra bytes for the "index" file
2602 2601
2603 2602 A healthy revlog will return (0, 0).
2604 2603 """
2605 2604 expected = 0
2606 2605 if len(self):
2607 2606 expected = max(0, self.end(len(self) - 1))
2608 2607
2609 2608 try:
2610 2609 with self._datafp() as f:
2611 2610 f.seek(0, io.SEEK_END)
2612 2611 actual = f.tell()
2613 2612 dd = actual - expected
2614 2613 except IOError as inst:
2615 2614 if inst.errno != errno.ENOENT:
2616 2615 raise
2617 2616 dd = 0
2618 2617
2619 2618 try:
2620 2619 f = self.opener(self._indexfile)
2621 2620 f.seek(0, io.SEEK_END)
2622 2621 actual = f.tell()
2623 2622 f.close()
2624 2623 s = self.index.entry_size
2625 2624 i = max(0, actual // s)
2626 2625 di = actual - (i * s)
2627 2626 if self._inline:
2628 2627 databytes = 0
2629 2628 for r in self:
2630 2629 databytes += max(0, self.length(r))
2631 2630 dd = 0
2632 2631 di = actual - len(self) * s - databytes
2633 2632 except IOError as inst:
2634 2633 if inst.errno != errno.ENOENT:
2635 2634 raise
2636 2635 di = 0
2637 2636
2638 2637 return (dd, di)
2639 2638
2640 2639 def files(self):
2641 2640 res = [self._indexfile]
2642 2641 if not self._inline:
2643 2642 res.append(self._datafile)
2644 2643 return res
2645 2644
2646 2645 def emitrevisions(
2647 2646 self,
2648 2647 nodes,
2649 2648 nodesorder=None,
2650 2649 revisiondata=False,
2651 2650 assumehaveparentrevisions=False,
2652 2651 deltamode=repository.CG_DELTAMODE_STD,
2653 2652 sidedata_helpers=None,
2654 2653 ):
2655 2654 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2656 2655 raise error.ProgrammingError(
2657 2656 b'unhandled value for nodesorder: %s' % nodesorder
2658 2657 )
2659 2658
2660 2659 if nodesorder is None and not self._generaldelta:
2661 2660 nodesorder = b'storage'
2662 2661
2663 2662 if (
2664 2663 not self._storedeltachains
2665 2664 and deltamode != repository.CG_DELTAMODE_PREV
2666 2665 ):
2667 2666 deltamode = repository.CG_DELTAMODE_FULL
2668 2667
2669 2668 return storageutil.emitrevisions(
2670 2669 self,
2671 2670 nodes,
2672 2671 nodesorder,
2673 2672 revlogrevisiondelta,
2674 2673 deltaparentfn=self.deltaparent,
2675 2674 candeltafn=self.candelta,
2676 2675 rawsizefn=self.rawsize,
2677 2676 revdifffn=self.revdiff,
2678 2677 flagsfn=self.flags,
2679 2678 deltamode=deltamode,
2680 2679 revisiondata=revisiondata,
2681 2680 assumehaveparentrevisions=assumehaveparentrevisions,
2682 2681 sidedata_helpers=sidedata_helpers,
2683 2682 )
2684 2683
2685 2684 DELTAREUSEALWAYS = b'always'
2686 2685 DELTAREUSESAMEREVS = b'samerevs'
2687 2686 DELTAREUSENEVER = b'never'
2688 2687
2689 2688 DELTAREUSEFULLADD = b'fulladd'
2690 2689
2691 2690 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2692 2691
2693 2692 def clone(
2694 2693 self,
2695 2694 tr,
2696 2695 destrevlog,
2697 2696 addrevisioncb=None,
2698 2697 deltareuse=DELTAREUSESAMEREVS,
2699 2698 forcedeltabothparents=None,
2700 2699 sidedata_helpers=None,
2701 2700 ):
2702 2701 """Copy this revlog to another, possibly with format changes.
2703 2702
2704 2703 The destination revlog will contain the same revisions and nodes.
2705 2704 However, it may not be bit-for-bit identical due to e.g. delta encoding
2706 2705 differences.
2707 2706
2708 2707 The ``deltareuse`` argument control how deltas from the existing revlog
2709 2708 are preserved in the destination revlog. The argument can have the
2710 2709 following values:
2711 2710
2712 2711 DELTAREUSEALWAYS
2713 2712 Deltas will always be reused (if possible), even if the destination
2714 2713 revlog would not select the same revisions for the delta. This is the
2715 2714 fastest mode of operation.
2716 2715 DELTAREUSESAMEREVS
2717 2716 Deltas will be reused if the destination revlog would pick the same
2718 2717 revisions for the delta. This mode strikes a balance between speed
2719 2718 and optimization.
2720 2719 DELTAREUSENEVER
2721 2720 Deltas will never be reused. This is the slowest mode of execution.
2722 2721 This mode can be used to recompute deltas (e.g. if the diff/delta
2723 2722 algorithm changes).
2724 2723 DELTAREUSEFULLADD
2725 2724 Revision will be re-added as if their were new content. This is
2726 2725 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2727 2726 eg: large file detection and handling.
2728 2727
2729 2728 Delta computation can be slow, so the choice of delta reuse policy can
2730 2729 significantly affect run time.
2731 2730
2732 2731 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2733 2732 two extremes. Deltas will be reused if they are appropriate. But if the
2734 2733 delta could choose a better revision, it will do so. This means if you
2735 2734 are converting a non-generaldelta revlog to a generaldelta revlog,
2736 2735 deltas will be recomputed if the delta's parent isn't a parent of the
2737 2736 revision.
2738 2737
2739 2738 In addition to the delta policy, the ``forcedeltabothparents``
2740 2739 argument controls whether to force compute deltas against both parents
2741 2740 for merges. By default, the current default is used.
2742 2741
2743 2742 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2744 2743 `sidedata_helpers`.
2745 2744 """
2746 2745 if deltareuse not in self.DELTAREUSEALL:
2747 2746 raise ValueError(
2748 2747 _(b'value for deltareuse invalid: %s') % deltareuse
2749 2748 )
2750 2749
2751 2750 if len(destrevlog):
2752 2751 raise ValueError(_(b'destination revlog is not empty'))
2753 2752
2754 2753 if getattr(self, 'filteredrevs', None):
2755 2754 raise ValueError(_(b'source revlog has filtered revisions'))
2756 2755 if getattr(destrevlog, 'filteredrevs', None):
2757 2756 raise ValueError(_(b'destination revlog has filtered revisions'))
2758 2757
2759 2758 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2760 2759 # if possible.
2761 2760 oldlazydelta = destrevlog._lazydelta
2762 2761 oldlazydeltabase = destrevlog._lazydeltabase
2763 2762 oldamd = destrevlog._deltabothparents
2764 2763
2765 2764 try:
2766 2765 if deltareuse == self.DELTAREUSEALWAYS:
2767 2766 destrevlog._lazydeltabase = True
2768 2767 destrevlog._lazydelta = True
2769 2768 elif deltareuse == self.DELTAREUSESAMEREVS:
2770 2769 destrevlog._lazydeltabase = False
2771 2770 destrevlog._lazydelta = True
2772 2771 elif deltareuse == self.DELTAREUSENEVER:
2773 2772 destrevlog._lazydeltabase = False
2774 2773 destrevlog._lazydelta = False
2775 2774
2776 2775 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2777 2776
2778 2777 self._clone(
2779 2778 tr,
2780 2779 destrevlog,
2781 2780 addrevisioncb,
2782 2781 deltareuse,
2783 2782 forcedeltabothparents,
2784 2783 sidedata_helpers,
2785 2784 )
2786 2785
2787 2786 finally:
2788 2787 destrevlog._lazydelta = oldlazydelta
2789 2788 destrevlog._lazydeltabase = oldlazydeltabase
2790 2789 destrevlog._deltabothparents = oldamd
2791 2790
2792 2791 def _clone(
2793 2792 self,
2794 2793 tr,
2795 2794 destrevlog,
2796 2795 addrevisioncb,
2797 2796 deltareuse,
2798 2797 forcedeltabothparents,
2799 2798 sidedata_helpers,
2800 2799 ):
2801 2800 """perform the core duty of `revlog.clone` after parameter processing"""
2802 2801 deltacomputer = deltautil.deltacomputer(destrevlog)
2803 2802 index = self.index
2804 2803 for rev in self:
2805 2804 entry = index[rev]
2806 2805
2807 2806 # Some classes override linkrev to take filtered revs into
2808 2807 # account. Use raw entry from index.
2809 2808 flags = entry[0] & 0xFFFF
2810 2809 linkrev = entry[4]
2811 2810 p1 = index[entry[5]][7]
2812 2811 p2 = index[entry[6]][7]
2813 2812 node = entry[7]
2814 2813
2815 2814 # (Possibly) reuse the delta from the revlog if allowed and
2816 2815 # the revlog chunk is a delta.
2817 2816 cachedelta = None
2818 2817 rawtext = None
2819 2818 if deltareuse == self.DELTAREUSEFULLADD:
2820 2819 text, sidedata = self._revisiondata(rev)
2821 2820
2822 2821 if sidedata_helpers is not None:
2823 2822 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2824 2823 self, sidedata_helpers, sidedata, rev
2825 2824 )
2826 2825 flags = flags | new_flags[0] & ~new_flags[1]
2827 2826
2828 2827 destrevlog.addrevision(
2829 2828 text,
2830 2829 tr,
2831 2830 linkrev,
2832 2831 p1,
2833 2832 p2,
2834 2833 cachedelta=cachedelta,
2835 2834 node=node,
2836 2835 flags=flags,
2837 2836 deltacomputer=deltacomputer,
2838 2837 sidedata=sidedata,
2839 2838 )
2840 2839 else:
2841 2840 if destrevlog._lazydelta:
2842 2841 dp = self.deltaparent(rev)
2843 2842 if dp != nullrev:
2844 2843 cachedelta = (dp, bytes(self._chunk(rev)))
2845 2844
2846 2845 sidedata = None
2847 2846 if not cachedelta:
2848 2847 rawtext, sidedata = self._revisiondata(rev)
2849 2848 if sidedata is None:
2850 2849 sidedata = self.sidedata(rev)
2851 2850
2852 2851 if sidedata_helpers is not None:
2853 2852 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2854 2853 self, sidedata_helpers, sidedata, rev
2855 2854 )
2856 2855 flags = flags | new_flags[0] & ~new_flags[1]
2857 2856
2858 2857 ifh = destrevlog.opener(
2859 2858 destrevlog._indexfile, b'a+', checkambig=False
2860 2859 )
2861 2860 dfh = None
2862 2861 if not destrevlog._inline:
2863 2862 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2864 2863 try:
2865 2864 destrevlog._addrevision(
2866 2865 node,
2867 2866 rawtext,
2868 2867 tr,
2869 2868 linkrev,
2870 2869 p1,
2871 2870 p2,
2872 2871 flags,
2873 2872 cachedelta,
2874 2873 ifh,
2875 2874 dfh,
2876 2875 deltacomputer=deltacomputer,
2877 2876 sidedata=sidedata,
2878 2877 )
2879 2878 finally:
2880 2879 if dfh:
2881 2880 dfh.close()
2882 2881 ifh.close()
2883 2882
2884 2883 if addrevisioncb:
2885 2884 addrevisioncb(self, rev, node)
2886 2885
2887 2886 def censorrevision(self, tr, censornode, tombstone=b''):
2888 2887 if self._format_version == REVLOGV0:
2889 2888 raise error.RevlogError(
2890 2889 _(b'cannot censor with version %d revlogs')
2891 2890 % self._format_version
2892 2891 )
2893 2892
2894 2893 censorrev = self.rev(censornode)
2895 2894 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2896 2895
2897 2896 if len(tombstone) > self.rawsize(censorrev):
2898 2897 raise error.Abort(
2899 2898 _(b'censor tombstone must be no longer than censored data')
2900 2899 )
2901 2900
2902 2901 # Rewriting the revlog in place is hard. Our strategy for censoring is
2903 2902 # to create a new revlog, copy all revisions to it, then replace the
2904 2903 # revlogs on transaction close.
2905 2904 #
2906 2905 # This is a bit dangerous. We could easily have a mismatch of state.
2907 2906 newrl = revlog(
2908 2907 self.opener,
2909 2908 target=self.target,
2910 2909 radix=self.radix,
2911 2910 postfix=b'tmpcensored',
2912 2911 censorable=True,
2913 2912 )
2914 2913 newrl._format_version = self._format_version
2915 2914 newrl._format_flags = self._format_flags
2916 2915 newrl._generaldelta = self._generaldelta
2917 2916 newrl._parse_index = self._parse_index
2918 2917
2919 2918 for rev in self.revs():
2920 2919 node = self.node(rev)
2921 2920 p1, p2 = self.parents(node)
2922 2921
2923 2922 if rev == censorrev:
2924 2923 newrl.addrawrevision(
2925 2924 tombstone,
2926 2925 tr,
2927 2926 self.linkrev(censorrev),
2928 2927 p1,
2929 2928 p2,
2930 2929 censornode,
2931 2930 REVIDX_ISCENSORED,
2932 2931 )
2933 2932
2934 2933 if newrl.deltaparent(rev) != nullrev:
2935 2934 raise error.Abort(
2936 2935 _(
2937 2936 b'censored revision stored as delta; '
2938 2937 b'cannot censor'
2939 2938 ),
2940 2939 hint=_(
2941 2940 b'censoring of revlogs is not '
2942 2941 b'fully implemented; please report '
2943 2942 b'this bug'
2944 2943 ),
2945 2944 )
2946 2945 continue
2947 2946
2948 2947 if self.iscensored(rev):
2949 2948 if self.deltaparent(rev) != nullrev:
2950 2949 raise error.Abort(
2951 2950 _(
2952 2951 b'cannot censor due to censored '
2953 2952 b'revision having delta stored'
2954 2953 )
2955 2954 )
2956 2955 rawtext = self._chunk(rev)
2957 2956 else:
2958 2957 rawtext = self.rawdata(rev)
2959 2958
2960 2959 newrl.addrawrevision(
2961 2960 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2962 2961 )
2963 2962
2964 2963 tr.addbackup(self._indexfile, location=b'store')
2965 2964 if not self._inline:
2966 2965 tr.addbackup(self._datafile, location=b'store')
2967 2966
2968 2967 self.opener.rename(newrl._indexfile, self._indexfile)
2969 2968 if not self._inline:
2970 2969 self.opener.rename(newrl._datafile, self._datafile)
2971 2970
2972 2971 self.clearcaches()
2973 2972 self._loadindex()
2974 2973
2975 2974 def verifyintegrity(self, state):
2976 2975 """Verifies the integrity of the revlog.
2977 2976
2978 2977 Yields ``revlogproblem`` instances describing problems that are
2979 2978 found.
2980 2979 """
2981 2980 dd, di = self.checksize()
2982 2981 if dd:
2983 2982 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2984 2983 if di:
2985 2984 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2986 2985
2987 2986 version = self._format_version
2988 2987
2989 2988 # The verifier tells us what version revlog we should be.
2990 2989 if version != state[b'expectedversion']:
2991 2990 yield revlogproblem(
2992 2991 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2993 2992 % (self.display_id, version, state[b'expectedversion'])
2994 2993 )
2995 2994
2996 2995 state[b'skipread'] = set()
2997 2996 state[b'safe_renamed'] = set()
2998 2997
2999 2998 for rev in self:
3000 2999 node = self.node(rev)
3001 3000
3002 3001 # Verify contents. 4 cases to care about:
3003 3002 #
3004 3003 # common: the most common case
3005 3004 # rename: with a rename
3006 3005 # meta: file content starts with b'\1\n', the metadata
3007 3006 # header defined in filelog.py, but without a rename
3008 3007 # ext: content stored externally
3009 3008 #
3010 3009 # More formally, their differences are shown below:
3011 3010 #
3012 3011 # | common | rename | meta | ext
3013 3012 # -------------------------------------------------------
3014 3013 # flags() | 0 | 0 | 0 | not 0
3015 3014 # renamed() | False | True | False | ?
3016 3015 # rawtext[0:2]=='\1\n'| False | True | True | ?
3017 3016 #
3018 3017 # "rawtext" means the raw text stored in revlog data, which
3019 3018 # could be retrieved by "rawdata(rev)". "text"
3020 3019 # mentioned below is "revision(rev)".
3021 3020 #
3022 3021 # There are 3 different lengths stored physically:
3023 3022 # 1. L1: rawsize, stored in revlog index
3024 3023 # 2. L2: len(rawtext), stored in revlog data
3025 3024 # 3. L3: len(text), stored in revlog data if flags==0, or
3026 3025 # possibly somewhere else if flags!=0
3027 3026 #
3028 3027 # L1 should be equal to L2. L3 could be different from them.
3029 3028 # "text" may or may not affect commit hash depending on flag
3030 3029 # processors (see flagutil.addflagprocessor).
3031 3030 #
3032 3031 # | common | rename | meta | ext
3033 3032 # -------------------------------------------------
3034 3033 # rawsize() | L1 | L1 | L1 | L1
3035 3034 # size() | L1 | L2-LM | L1(*) | L1 (?)
3036 3035 # len(rawtext) | L2 | L2 | L2 | L2
3037 3036 # len(text) | L2 | L2 | L2 | L3
3038 3037 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3039 3038 #
3040 3039 # LM: length of metadata, depending on rawtext
3041 3040 # (*): not ideal, see comment in filelog.size
3042 3041 # (?): could be "- len(meta)" if the resolved content has
3043 3042 # rename metadata
3044 3043 #
3045 3044 # Checks needed to be done:
3046 3045 # 1. length check: L1 == L2, in all cases.
3047 3046 # 2. hash check: depending on flag processor, we may need to
3048 3047 # use either "text" (external), or "rawtext" (in revlog).
3049 3048
3050 3049 try:
3051 3050 skipflags = state.get(b'skipflags', 0)
3052 3051 if skipflags:
3053 3052 skipflags &= self.flags(rev)
3054 3053
3055 3054 _verify_revision(self, skipflags, state, node)
3056 3055
3057 3056 l1 = self.rawsize(rev)
3058 3057 l2 = len(self.rawdata(node))
3059 3058
3060 3059 if l1 != l2:
3061 3060 yield revlogproblem(
3062 3061 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3063 3062 node=node,
3064 3063 )
3065 3064
3066 3065 except error.CensoredNodeError:
3067 3066 if state[b'erroroncensored']:
3068 3067 yield revlogproblem(
3069 3068 error=_(b'censored file data'), node=node
3070 3069 )
3071 3070 state[b'skipread'].add(node)
3072 3071 except Exception as e:
3073 3072 yield revlogproblem(
3074 3073 error=_(b'unpacking %s: %s')
3075 3074 % (short(node), stringutil.forcebytestr(e)),
3076 3075 node=node,
3077 3076 )
3078 3077 state[b'skipread'].add(node)
3079 3078
3080 3079 def storageinfo(
3081 3080 self,
3082 3081 exclusivefiles=False,
3083 3082 sharedfiles=False,
3084 3083 revisionscount=False,
3085 3084 trackedsize=False,
3086 3085 storedsize=False,
3087 3086 ):
3088 3087 d = {}
3089 3088
3090 3089 if exclusivefiles:
3091 3090 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3092 3091 if not self._inline:
3093 3092 d[b'exclusivefiles'].append((self.opener, self._datafile))
3094 3093
3095 3094 if sharedfiles:
3096 3095 d[b'sharedfiles'] = []
3097 3096
3098 3097 if revisionscount:
3099 3098 d[b'revisionscount'] = len(self)
3100 3099
3101 3100 if trackedsize:
3102 3101 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3103 3102
3104 3103 if storedsize:
3105 3104 d[b'storedsize'] = sum(
3106 3105 self.opener.stat(path).st_size for path in self.files()
3107 3106 )
3108 3107
3109 3108 return d
3110 3109
3111 3110 def rewrite_sidedata(self, helpers, startrev, endrev):
3112 3111 if not self.hassidedata:
3113 3112 return
3114 3113 # inline are not yet supported because they suffer from an issue when
3115 3114 # rewriting them (since it's not an append-only operation).
3116 3115 # See issue6485.
3117 3116 assert not self._inline
3118 3117 if not helpers[1] and not helpers[2]:
3119 3118 # Nothing to generate or remove
3120 3119 return
3121 3120
3122 3121 # changelog implement some "delayed" writing mechanism that assume that
3123 3122 # all index data is writen in append mode and is therefor incompatible
3124 3123 # with the seeked write done in this method. The use of such "delayed"
3125 3124 # writing will soon be removed for revlog version that support side
3126 3125 # data, so for now, we only keep this simple assert to highlight the
3127 3126 # situation.
3128 3127 delayed = getattr(self, '_delayed', False)
3129 3128 diverted = getattr(self, '_divert', False)
3130 3129 if delayed and not diverted:
3131 3130 msg = "cannot rewrite_sidedata of a delayed revlog"
3132 3131 raise error.ProgrammingError(msg)
3133 3132
3134 3133 new_entries = []
3135 3134 # append the new sidedata
3136 3135 with self._datafp(b'a+') as fp:
3137 3136 # Maybe this bug still exists, see revlog._writeentry
3138 3137 fp.seek(0, os.SEEK_END)
3139 3138 current_offset = fp.tell()
3140 3139 for rev in range(startrev, endrev + 1):
3141 3140 entry = self.index[rev]
3142 3141 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3143 3142 store=self,
3144 3143 sidedata_helpers=helpers,
3145 3144 sidedata={},
3146 3145 rev=rev,
3147 3146 )
3148 3147
3149 3148 serialized_sidedata = sidedatautil.serialize_sidedata(
3150 3149 new_sidedata
3151 3150 )
3152 3151 if entry[8] != 0 or entry[9] != 0:
3153 3152 # rewriting entries that already have sidedata is not
3154 3153 # supported yet, because it introduces garbage data in the
3155 3154 # revlog.
3156 3155 msg = b"Rewriting existing sidedata is not supported yet"
3157 3156 raise error.Abort(msg)
3158 3157
3159 3158 # Apply (potential) flags to add and to remove after running
3160 3159 # the sidedata helpers
3161 3160 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3162 3161 entry = (new_offset_flags,) + entry[1:8]
3163 3162 entry += (current_offset, len(serialized_sidedata))
3164 3163
3165 3164 fp.write(serialized_sidedata)
3166 3165 new_entries.append(entry)
3167 3166 current_offset += len(serialized_sidedata)
3168 3167
3169 3168 # rewrite the new index entries
3170 3169 with self._indexfp(b'r+') as fp:
3171 3170 fp.seek(startrev * self.index.entry_size)
3172 3171 for i, e in enumerate(new_entries):
3173 3172 rev = startrev + i
3174 3173 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3175 3174 packed = self.index.entry_binary(rev)
3176 3175 if rev == 0:
3177 3176 header = self._format_flags | self._format_version
3178 3177 header = self.index.pack_header(header)
3179 3178 packed = header + packed
3180 3179 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now