##// END OF EJS Templates
revlog: rename `indexdata` to entry_data...
marmoute -
r47947:8b549ea4 default
parent child Browse files
Show More
@@ -1,3179 +1,3180 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 self._indexfile = None
319 319 self._datafile = None
320 320 self._nodemap_file = None
321 321 self.postfix = postfix
322 322 self.opener = opener
323 323 if persistentnodemap:
324 324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 325
326 326 assert target[0] in ALL_KINDS
327 327 assert len(target) == 2
328 328 self.target = target
329 329 # When True, indexfile is opened with checkambig=True at writing, to
330 330 # avoid file stat ambiguity.
331 331 self._checkambig = checkambig
332 332 self._mmaplargeindex = mmaplargeindex
333 333 self._censorable = censorable
334 334 # 3-tuple of (node, rev, text) for a raw revision.
335 335 self._revisioncache = None
336 336 # Maps rev to chain base rev.
337 337 self._chainbasecache = util.lrucachedict(100)
338 338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 339 self._chunkcache = (0, b'')
340 340 # How much data to read and cache into the raw revlog data cache.
341 341 self._chunkcachesize = 65536
342 342 self._maxchainlen = None
343 343 self._deltabothparents = True
344 344 self.index = None
345 345 self._nodemap_docket = None
346 346 # Mapping of partial identifiers to full nodes.
347 347 self._pcache = {}
348 348 # Mapping of revision integer to full node.
349 349 self._compengine = b'zlib'
350 350 self._compengineopts = {}
351 351 self._maxdeltachainspan = -1
352 352 self._withsparseread = False
353 353 self._sparserevlog = False
354 354 self._srdensitythreshold = 0.50
355 355 self._srmingapsize = 262144
356 356
357 357 # Make copy of flag processors so each revlog instance can support
358 358 # custom flags.
359 359 self._flagprocessors = dict(flagutil.flagprocessors)
360 360
361 361 # 2-tuple of file handles being used for active writing.
362 362 self._writinghandles = None
363 363
364 364 self._loadindex()
365 365
366 366 self._concurrencychecker = concurrencychecker
367 367
368 368 def _init_opts(self):
369 369 """process options (from above/config) to setup associated default revlog mode
370 370
371 371 These values might be affected when actually reading on disk information.
372 372
373 373 The relevant values are returned for use in _loadindex().
374 374
375 375 * newversionflags:
376 376 version header to use if we need to create a new revlog
377 377
378 378 * mmapindexthreshold:
379 379 minimal index size for start to use mmap
380 380
381 381 * force_nodemap:
382 382 force the usage of a "development" version of the nodemap code
383 383 """
384 384 mmapindexthreshold = None
385 385 opts = self.opener.options
386 386
387 387 if b'revlogv2' in opts:
388 388 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 389 elif b'revlogv1' in opts:
390 390 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 391 if b'generaldelta' in opts:
392 392 new_header |= FLAG_GENERALDELTA
393 393 elif b'revlogv0' in self.opener.options:
394 394 new_header = REVLOGV0
395 395 else:
396 396 new_header = REVLOG_DEFAULT_VERSION
397 397
398 398 if b'chunkcachesize' in opts:
399 399 self._chunkcachesize = opts[b'chunkcachesize']
400 400 if b'maxchainlen' in opts:
401 401 self._maxchainlen = opts[b'maxchainlen']
402 402 if b'deltabothparents' in opts:
403 403 self._deltabothparents = opts[b'deltabothparents']
404 404 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 405 self._lazydeltabase = False
406 406 if self._lazydelta:
407 407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 408 if b'compengine' in opts:
409 409 self._compengine = opts[b'compengine']
410 410 if b'zlib.level' in opts:
411 411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 412 if b'zstd.level' in opts:
413 413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 414 if b'maxdeltachainspan' in opts:
415 415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 417 mmapindexthreshold = opts[b'mmapindexthreshold']
418 418 self.hassidedata = bool(opts.get(b'side-data', False))
419 419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 420 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 421 # sparse-revlog forces sparse-read
422 422 self._withsparseread = self._sparserevlog or withsparseread
423 423 if b'sparse-read-density-threshold' in opts:
424 424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 425 if b'sparse-read-min-gap-size' in opts:
426 426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 427 if opts.get(b'enableellipsis'):
428 428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429 429
430 430 # revlog v0 doesn't have flag processors
431 431 for flag, processor in pycompat.iteritems(
432 432 opts.get(b'flagprocessors', {})
433 433 ):
434 434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435 435
436 436 if self._chunkcachesize <= 0:
437 437 raise error.RevlogError(
438 438 _(b'revlog chunk cache size %r is not greater than 0')
439 439 % self._chunkcachesize
440 440 )
441 441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 442 raise error.RevlogError(
443 443 _(b'revlog chunk cache size %r is not a power of 2')
444 444 % self._chunkcachesize
445 445 )
446 446 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 447 return new_header, mmapindexthreshold, force_nodemap
448 448
449 449 def _get_data(self, filepath, mmap_threshold):
450 450 """return a file content with or without mmap
451 451
452 452 If the file is missing return the empty string"""
453 453 try:
454 454 with self.opener(filepath) as fp:
455 455 if mmap_threshold is not None:
456 456 file_size = self.opener.fstat(fp).st_size
457 457 if file_size >= mmap_threshold:
458 458 # TODO: should .close() to release resources without
459 459 # relying on Python GC
460 460 return util.buffer(util.mmapread(fp))
461 461 return fp.read()
462 462 except IOError as inst:
463 463 if inst.errno != errno.ENOENT:
464 464 raise
465 465 return b''
466 466
467 467 def _loadindex(self):
468 468
469 469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470 470
471 471 if self.postfix is None:
472 472 entry_point = b'%s.i' % self.radix
473 473 else:
474 474 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
475 475
476 indexdata = b''
476 entry_data = b''
477 477 self._initempty = True
478 indexdata = self._get_data(entry_point, mmapindexthreshold)
479 if len(indexdata) > 0:
480 header = INDEX_HEADER.unpack(indexdata[:4])[0]
478 entry_data = self._get_data(entry_point, mmapindexthreshold)
479 if len(entry_data) > 0:
480 header = INDEX_HEADER.unpack(entry_data[:4])[0]
481 481 self._initempty = False
482 482 else:
483 483 header = new_header
484 484
485 485 self._format_flags = header & ~0xFFFF
486 486 self._format_version = header & 0xFFFF
487 487
488 488 if self._format_version == REVLOGV0:
489 489 if self._format_flags:
490 490 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
491 491 display_flag = self._format_flags >> 16
492 492 msg %= (display_flag, self._format_version, self.display_id)
493 493 raise error.RevlogError(msg)
494 494
495 495 self._inline = False
496 496 self._generaldelta = False
497 497
498 498 elif self._format_version == REVLOGV1:
499 499 if self._format_flags & ~REVLOGV1_FLAGS:
500 500 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
501 501 display_flag = self._format_flags >> 16
502 502 msg %= (display_flag, self._format_version, self.display_id)
503 503 raise error.RevlogError(msg)
504 504
505 505 self._inline = self._format_flags & FLAG_INLINE_DATA
506 506 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
507 507
508 508 elif self._format_version == REVLOGV2:
509 509 if self._format_flags & ~REVLOGV2_FLAGS:
510 510 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
511 511 display_flag = self._format_flags >> 16
512 512 msg %= (display_flag, self._format_version, self.display_id)
513 513 raise error.RevlogError(msg)
514 514
515 515 # There is a bug in the transaction handling when going from an
516 516 # inline revlog to a separate index and data file. Turn it off until
517 517 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 518 # See issue6485
519 519 self._inline = False
520 520 # generaldelta implied by version 2 revlogs.
521 521 self._generaldelta = True
522 522
523 523 else:
524 524 msg = _(b'unknown version (%d) in revlog %s')
525 525 msg %= (self._format_version, self.display_id)
526 526 raise error.RevlogError(msg)
527 527
528 index_data = entry_data
528 529 self._indexfile = entry_point
529 530
530 531 if self.postfix is None or self.postfix == b'a':
531 532 self._datafile = b'%s.d' % self.radix
532 533 else:
533 534 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
534 535
535 536 self.nodeconstants = sha1nodeconstants
536 537 self.nullid = self.nodeconstants.nullid
537 538
538 539 # sparse-revlog can't be on without general-delta (issue6056)
539 540 if not self._generaldelta:
540 541 self._sparserevlog = False
541 542
542 543 self._storedeltachains = True
543 544
544 545 devel_nodemap = (
545 546 self._nodemap_file
546 547 and force_nodemap
547 548 and parse_index_v1_nodemap is not None
548 549 )
549 550
550 551 use_rust_index = False
551 552 if rustrevlog is not None:
552 553 if self._nodemap_file is not None:
553 554 use_rust_index = True
554 555 else:
555 556 use_rust_index = self.opener.options.get(b'rust.index')
556 557
557 558 self._parse_index = parse_index_v1
558 559 if self._format_version == REVLOGV0:
559 560 self._parse_index = revlogv0.parse_index_v0
560 561 elif self._format_version == REVLOGV2:
561 562 self._parse_index = parse_index_v2
562 563 elif devel_nodemap:
563 564 self._parse_index = parse_index_v1_nodemap
564 565 elif use_rust_index:
565 566 self._parse_index = parse_index_v1_mixed
566 567 try:
567 d = self._parse_index(indexdata, self._inline)
568 d = self._parse_index(index_data, self._inline)
568 569 index, _chunkcache = d
569 570 use_nodemap = (
570 571 not self._inline
571 572 and self._nodemap_file is not None
572 573 and util.safehasattr(index, 'update_nodemap_data')
573 574 )
574 575 if use_nodemap:
575 576 nodemap_data = nodemaputil.persisted_data(self)
576 577 if nodemap_data is not None:
577 578 docket = nodemap_data[0]
578 579 if (
579 580 len(d[0]) > docket.tip_rev
580 581 and d[0][docket.tip_rev][7] == docket.tip_node
581 582 ):
582 583 # no changelog tampering
583 584 self._nodemap_docket = docket
584 585 index.update_nodemap_data(*nodemap_data)
585 586 except (ValueError, IndexError):
586 587 raise error.RevlogError(
587 588 _(b"index %s is corrupted") % self.display_id
588 589 )
589 590 self.index, self._chunkcache = d
590 591 if not self._chunkcache:
591 592 self._chunkclear()
592 593 # revnum -> (chain-length, sum-delta-length)
593 594 self._chaininfocache = util.lrucachedict(500)
594 595 # revlog header -> revlog compressor
595 596 self._decompressors = {}
596 597
597 598 @util.propertycache
598 599 def revlog_kind(self):
599 600 return self.target[0]
600 601
601 602 @util.propertycache
602 603 def display_id(self):
603 604 """The public facing "ID" of the revlog that we use in message"""
604 605 # Maybe we should build a user facing representation of
605 606 # revlog.target instead of using `self.radix`
606 607 return self.radix
607 608
608 609 @util.propertycache
609 610 def _compressor(self):
610 611 engine = util.compengines[self._compengine]
611 612 return engine.revlogcompressor(self._compengineopts)
612 613
613 614 def _indexfp(self, mode=b'r'):
614 615 """file object for the revlog's index file"""
615 616 args = {'mode': mode}
616 617 if mode != b'r':
617 618 args['checkambig'] = self._checkambig
618 619 if mode == b'w':
619 620 args['atomictemp'] = True
620 621 return self.opener(self._indexfile, **args)
621 622
622 623 def _datafp(self, mode=b'r'):
623 624 """file object for the revlog's data file"""
624 625 return self.opener(self._datafile, mode=mode)
625 626
626 627 @contextlib.contextmanager
627 628 def _datareadfp(self, existingfp=None):
628 629 """file object suitable to read data"""
629 630 # Use explicit file handle, if given.
630 631 if existingfp is not None:
631 632 yield existingfp
632 633
633 634 # Use a file handle being actively used for writes, if available.
634 635 # There is some danger to doing this because reads will seek the
635 636 # file. However, _writeentry() performs a SEEK_END before all writes,
636 637 # so we should be safe.
637 638 elif self._writinghandles:
638 639 if self._inline:
639 640 yield self._writinghandles[0]
640 641 else:
641 642 yield self._writinghandles[1]
642 643
643 644 # Otherwise open a new file handle.
644 645 else:
645 646 if self._inline:
646 647 func = self._indexfp
647 648 else:
648 649 func = self._datafp
649 650 with func() as fp:
650 651 yield fp
651 652
652 653 def tiprev(self):
653 654 return len(self.index) - 1
654 655
655 656 def tip(self):
656 657 return self.node(self.tiprev())
657 658
658 659 def __contains__(self, rev):
659 660 return 0 <= rev < len(self)
660 661
661 662 def __len__(self):
662 663 return len(self.index)
663 664
664 665 def __iter__(self):
665 666 return iter(pycompat.xrange(len(self)))
666 667
667 668 def revs(self, start=0, stop=None):
668 669 """iterate over all rev in this revlog (from start to stop)"""
669 670 return storageutil.iterrevs(len(self), start=start, stop=stop)
670 671
671 672 @property
672 673 def nodemap(self):
673 674 msg = (
674 675 b"revlog.nodemap is deprecated, "
675 676 b"use revlog.index.[has_node|rev|get_rev]"
676 677 )
677 678 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 679 return self.index.nodemap
679 680
680 681 @property
681 682 def _nodecache(self):
682 683 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
683 684 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
684 685 return self.index.nodemap
685 686
686 687 def hasnode(self, node):
687 688 try:
688 689 self.rev(node)
689 690 return True
690 691 except KeyError:
691 692 return False
692 693
693 694 def candelta(self, baserev, rev):
694 695 """whether two revisions (baserev, rev) can be delta-ed or not"""
695 696 # Disable delta if either rev requires a content-changing flag
696 697 # processor (ex. LFS). This is because such flag processor can alter
697 698 # the rawtext content that the delta will be based on, and two clients
698 699 # could have a same revlog node with different flags (i.e. different
699 700 # rawtext contents) and the delta could be incompatible.
700 701 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
701 702 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
702 703 ):
703 704 return False
704 705 return True
705 706
706 707 def update_caches(self, transaction):
707 708 if self._nodemap_file is not None:
708 709 if transaction is None:
709 710 nodemaputil.update_persistent_nodemap(self)
710 711 else:
711 712 nodemaputil.setup_persistent_nodemap(transaction, self)
712 713
713 714 def clearcaches(self):
714 715 self._revisioncache = None
715 716 self._chainbasecache.clear()
716 717 self._chunkcache = (0, b'')
717 718 self._pcache = {}
718 719 self._nodemap_docket = None
719 720 self.index.clearcaches()
720 721 # The python code is the one responsible for validating the docket, we
721 722 # end up having to refresh it here.
722 723 use_nodemap = (
723 724 not self._inline
724 725 and self._nodemap_file is not None
725 726 and util.safehasattr(self.index, 'update_nodemap_data')
726 727 )
727 728 if use_nodemap:
728 729 nodemap_data = nodemaputil.persisted_data(self)
729 730 if nodemap_data is not None:
730 731 self._nodemap_docket = nodemap_data[0]
731 732 self.index.update_nodemap_data(*nodemap_data)
732 733
733 734 def rev(self, node):
734 735 try:
735 736 return self.index.rev(node)
736 737 except TypeError:
737 738 raise
738 739 except error.RevlogError:
739 740 # parsers.c radix tree lookup failed
740 741 if (
741 742 node == self.nodeconstants.wdirid
742 743 or node in self.nodeconstants.wdirfilenodeids
743 744 ):
744 745 raise error.WdirUnsupported
745 746 raise error.LookupError(node, self.display_id, _(b'no node'))
746 747
747 748 # Accessors for index entries.
748 749
749 750 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
750 751 # are flags.
751 752 def start(self, rev):
752 753 return int(self.index[rev][0] >> 16)
753 754
754 755 def flags(self, rev):
755 756 return self.index[rev][0] & 0xFFFF
756 757
757 758 def length(self, rev):
758 759 return self.index[rev][1]
759 760
760 761 def sidedata_length(self, rev):
761 762 if not self.hassidedata:
762 763 return 0
763 764 return self.index[rev][9]
764 765
765 766 def rawsize(self, rev):
766 767 """return the length of the uncompressed text for a given revision"""
767 768 l = self.index[rev][2]
768 769 if l >= 0:
769 770 return l
770 771
771 772 t = self.rawdata(rev)
772 773 return len(t)
773 774
774 775 def size(self, rev):
775 776 """length of non-raw text (processed by a "read" flag processor)"""
776 777 # fast path: if no "read" flag processor could change the content,
777 778 # size is rawsize. note: ELLIPSIS is known to not change the content.
778 779 flags = self.flags(rev)
779 780 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
780 781 return self.rawsize(rev)
781 782
782 783 return len(self.revision(rev, raw=False))
783 784
784 785 def chainbase(self, rev):
785 786 base = self._chainbasecache.get(rev)
786 787 if base is not None:
787 788 return base
788 789
789 790 index = self.index
790 791 iterrev = rev
791 792 base = index[iterrev][3]
792 793 while base != iterrev:
793 794 iterrev = base
794 795 base = index[iterrev][3]
795 796
796 797 self._chainbasecache[rev] = base
797 798 return base
798 799
799 800 def linkrev(self, rev):
800 801 return self.index[rev][4]
801 802
802 803 def parentrevs(self, rev):
803 804 try:
804 805 entry = self.index[rev]
805 806 except IndexError:
806 807 if rev == wdirrev:
807 808 raise error.WdirUnsupported
808 809 raise
809 810 if entry[5] == nullrev:
810 811 return entry[6], entry[5]
811 812 else:
812 813 return entry[5], entry[6]
813 814
814 815 # fast parentrevs(rev) where rev isn't filtered
815 816 _uncheckedparentrevs = parentrevs
816 817
817 818 def node(self, rev):
818 819 try:
819 820 return self.index[rev][7]
820 821 except IndexError:
821 822 if rev == wdirrev:
822 823 raise error.WdirUnsupported
823 824 raise
824 825
825 826 # Derived from index values.
826 827
827 828 def end(self, rev):
828 829 return self.start(rev) + self.length(rev)
829 830
830 831 def parents(self, node):
831 832 i = self.index
832 833 d = i[self.rev(node)]
833 834 # inline node() to avoid function call overhead
834 835 if d[5] == self.nullid:
835 836 return i[d[6]][7], i[d[5]][7]
836 837 else:
837 838 return i[d[5]][7], i[d[6]][7]
838 839
839 840 def chainlen(self, rev):
840 841 return self._chaininfo(rev)[0]
841 842
842 843 def _chaininfo(self, rev):
843 844 chaininfocache = self._chaininfocache
844 845 if rev in chaininfocache:
845 846 return chaininfocache[rev]
846 847 index = self.index
847 848 generaldelta = self._generaldelta
848 849 iterrev = rev
849 850 e = index[iterrev]
850 851 clen = 0
851 852 compresseddeltalen = 0
852 853 while iterrev != e[3]:
853 854 clen += 1
854 855 compresseddeltalen += e[1]
855 856 if generaldelta:
856 857 iterrev = e[3]
857 858 else:
858 859 iterrev -= 1
859 860 if iterrev in chaininfocache:
860 861 t = chaininfocache[iterrev]
861 862 clen += t[0]
862 863 compresseddeltalen += t[1]
863 864 break
864 865 e = index[iterrev]
865 866 else:
866 867 # Add text length of base since decompressing that also takes
867 868 # work. For cache hits the length is already included.
868 869 compresseddeltalen += e[1]
869 870 r = (clen, compresseddeltalen)
870 871 chaininfocache[rev] = r
871 872 return r
872 873
873 874 def _deltachain(self, rev, stoprev=None):
874 875 """Obtain the delta chain for a revision.
875 876
876 877 ``stoprev`` specifies a revision to stop at. If not specified, we
877 878 stop at the base of the chain.
878 879
879 880 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
880 881 revs in ascending order and ``stopped`` is a bool indicating whether
881 882 ``stoprev`` was hit.
882 883 """
883 884 # Try C implementation.
884 885 try:
885 886 return self.index.deltachain(rev, stoprev, self._generaldelta)
886 887 except AttributeError:
887 888 pass
888 889
889 890 chain = []
890 891
891 892 # Alias to prevent attribute lookup in tight loop.
892 893 index = self.index
893 894 generaldelta = self._generaldelta
894 895
895 896 iterrev = rev
896 897 e = index[iterrev]
897 898 while iterrev != e[3] and iterrev != stoprev:
898 899 chain.append(iterrev)
899 900 if generaldelta:
900 901 iterrev = e[3]
901 902 else:
902 903 iterrev -= 1
903 904 e = index[iterrev]
904 905
905 906 if iterrev == stoprev:
906 907 stopped = True
907 908 else:
908 909 chain.append(iterrev)
909 910 stopped = False
910 911
911 912 chain.reverse()
912 913 return chain, stopped
913 914
914 915 def ancestors(self, revs, stoprev=0, inclusive=False):
915 916 """Generate the ancestors of 'revs' in reverse revision order.
916 917 Does not generate revs lower than stoprev.
917 918
918 919 See the documentation for ancestor.lazyancestors for more details."""
919 920
920 921 # first, make sure start revisions aren't filtered
921 922 revs = list(revs)
922 923 checkrev = self.node
923 924 for r in revs:
924 925 checkrev(r)
925 926 # and we're sure ancestors aren't filtered as well
926 927
927 928 if rustancestor is not None:
928 929 lazyancestors = rustancestor.LazyAncestors
929 930 arg = self.index
930 931 else:
931 932 lazyancestors = ancestor.lazyancestors
932 933 arg = self._uncheckedparentrevs
933 934 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
934 935
935 936 def descendants(self, revs):
936 937 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
937 938
938 939 def findcommonmissing(self, common=None, heads=None):
939 940 """Return a tuple of the ancestors of common and the ancestors of heads
940 941 that are not ancestors of common. In revset terminology, we return the
941 942 tuple:
942 943
943 944 ::common, (::heads) - (::common)
944 945
945 946 The list is sorted by revision number, meaning it is
946 947 topologically sorted.
947 948
948 949 'heads' and 'common' are both lists of node IDs. If heads is
949 950 not supplied, uses all of the revlog's heads. If common is not
950 951 supplied, uses nullid."""
951 952 if common is None:
952 953 common = [self.nullid]
953 954 if heads is None:
954 955 heads = self.heads()
955 956
956 957 common = [self.rev(n) for n in common]
957 958 heads = [self.rev(n) for n in heads]
958 959
959 960 # we want the ancestors, but inclusive
960 961 class lazyset(object):
961 962 def __init__(self, lazyvalues):
962 963 self.addedvalues = set()
963 964 self.lazyvalues = lazyvalues
964 965
965 966 def __contains__(self, value):
966 967 return value in self.addedvalues or value in self.lazyvalues
967 968
968 969 def __iter__(self):
969 970 added = self.addedvalues
970 971 for r in added:
971 972 yield r
972 973 for r in self.lazyvalues:
973 974 if not r in added:
974 975 yield r
975 976
976 977 def add(self, value):
977 978 self.addedvalues.add(value)
978 979
979 980 def update(self, values):
980 981 self.addedvalues.update(values)
981 982
982 983 has = lazyset(self.ancestors(common))
983 984 has.add(nullrev)
984 985 has.update(common)
985 986
986 987 # take all ancestors from heads that aren't in has
987 988 missing = set()
988 989 visit = collections.deque(r for r in heads if r not in has)
989 990 while visit:
990 991 r = visit.popleft()
991 992 if r in missing:
992 993 continue
993 994 else:
994 995 missing.add(r)
995 996 for p in self.parentrevs(r):
996 997 if p not in has:
997 998 visit.append(p)
998 999 missing = list(missing)
999 1000 missing.sort()
1000 1001 return has, [self.node(miss) for miss in missing]
1001 1002
1002 1003 def incrementalmissingrevs(self, common=None):
1003 1004 """Return an object that can be used to incrementally compute the
1004 1005 revision numbers of the ancestors of arbitrary sets that are not
1005 1006 ancestors of common. This is an ancestor.incrementalmissingancestors
1006 1007 object.
1007 1008
1008 1009 'common' is a list of revision numbers. If common is not supplied, uses
1009 1010 nullrev.
1010 1011 """
1011 1012 if common is None:
1012 1013 common = [nullrev]
1013 1014
1014 1015 if rustancestor is not None:
1015 1016 return rustancestor.MissingAncestors(self.index, common)
1016 1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1017 1018
1018 1019 def findmissingrevs(self, common=None, heads=None):
1019 1020 """Return the revision numbers of the ancestors of heads that
1020 1021 are not ancestors of common.
1021 1022
1022 1023 More specifically, return a list of revision numbers corresponding to
1023 1024 nodes N such that every N satisfies the following constraints:
1024 1025
1025 1026 1. N is an ancestor of some node in 'heads'
1026 1027 2. N is not an ancestor of any node in 'common'
1027 1028
1028 1029 The list is sorted by revision number, meaning it is
1029 1030 topologically sorted.
1030 1031
1031 1032 'heads' and 'common' are both lists of revision numbers. If heads is
1032 1033 not supplied, uses all of the revlog's heads. If common is not
1033 1034 supplied, uses nullid."""
1034 1035 if common is None:
1035 1036 common = [nullrev]
1036 1037 if heads is None:
1037 1038 heads = self.headrevs()
1038 1039
1039 1040 inc = self.incrementalmissingrevs(common=common)
1040 1041 return inc.missingancestors(heads)
1041 1042
1042 1043 def findmissing(self, common=None, heads=None):
1043 1044 """Return the ancestors of heads that are not ancestors of common.
1044 1045
1045 1046 More specifically, return a list of nodes N such that every N
1046 1047 satisfies the following constraints:
1047 1048
1048 1049 1. N is an ancestor of some node in 'heads'
1049 1050 2. N is not an ancestor of any node in 'common'
1050 1051
1051 1052 The list is sorted by revision number, meaning it is
1052 1053 topologically sorted.
1053 1054
1054 1055 'heads' and 'common' are both lists of node IDs. If heads is
1055 1056 not supplied, uses all of the revlog's heads. If common is not
1056 1057 supplied, uses nullid."""
1057 1058 if common is None:
1058 1059 common = [self.nullid]
1059 1060 if heads is None:
1060 1061 heads = self.heads()
1061 1062
1062 1063 common = [self.rev(n) for n in common]
1063 1064 heads = [self.rev(n) for n in heads]
1064 1065
1065 1066 inc = self.incrementalmissingrevs(common=common)
1066 1067 return [self.node(r) for r in inc.missingancestors(heads)]
1067 1068
1068 1069 def nodesbetween(self, roots=None, heads=None):
1069 1070 """Return a topological path from 'roots' to 'heads'.
1070 1071
1071 1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1072 1073 topologically sorted list of all nodes N that satisfy both of
1073 1074 these constraints:
1074 1075
1075 1076 1. N is a descendant of some node in 'roots'
1076 1077 2. N is an ancestor of some node in 'heads'
1077 1078
1078 1079 Every node is considered to be both a descendant and an ancestor
1079 1080 of itself, so every reachable node in 'roots' and 'heads' will be
1080 1081 included in 'nodes'.
1081 1082
1082 1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1083 1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1084 1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1085 1086
1086 1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1087 1088 unspecified, uses nullid as the only root. If 'heads' is
1088 1089 unspecified, uses list of all of the revlog's heads."""
1089 1090 nonodes = ([], [], [])
1090 1091 if roots is not None:
1091 1092 roots = list(roots)
1092 1093 if not roots:
1093 1094 return nonodes
1094 1095 lowestrev = min([self.rev(n) for n in roots])
1095 1096 else:
1096 1097 roots = [self.nullid] # Everybody's a descendant of nullid
1097 1098 lowestrev = nullrev
1098 1099 if (lowestrev == nullrev) and (heads is None):
1099 1100 # We want _all_ the nodes!
1100 1101 return (
1101 1102 [self.node(r) for r in self],
1102 1103 [self.nullid],
1103 1104 list(self.heads()),
1104 1105 )
1105 1106 if heads is None:
1106 1107 # All nodes are ancestors, so the latest ancestor is the last
1107 1108 # node.
1108 1109 highestrev = len(self) - 1
1109 1110 # Set ancestors to None to signal that every node is an ancestor.
1110 1111 ancestors = None
1111 1112 # Set heads to an empty dictionary for later discovery of heads
1112 1113 heads = {}
1113 1114 else:
1114 1115 heads = list(heads)
1115 1116 if not heads:
1116 1117 return nonodes
1117 1118 ancestors = set()
1118 1119 # Turn heads into a dictionary so we can remove 'fake' heads.
1119 1120 # Also, later we will be using it to filter out the heads we can't
1120 1121 # find from roots.
1121 1122 heads = dict.fromkeys(heads, False)
1122 1123 # Start at the top and keep marking parents until we're done.
1123 1124 nodestotag = set(heads)
1124 1125 # Remember where the top was so we can use it as a limit later.
1125 1126 highestrev = max([self.rev(n) for n in nodestotag])
1126 1127 while nodestotag:
1127 1128 # grab a node to tag
1128 1129 n = nodestotag.pop()
1129 1130 # Never tag nullid
1130 1131 if n == self.nullid:
1131 1132 continue
1132 1133 # A node's revision number represents its place in a
1133 1134 # topologically sorted list of nodes.
1134 1135 r = self.rev(n)
1135 1136 if r >= lowestrev:
1136 1137 if n not in ancestors:
1137 1138 # If we are possibly a descendant of one of the roots
1138 1139 # and we haven't already been marked as an ancestor
1139 1140 ancestors.add(n) # Mark as ancestor
1140 1141 # Add non-nullid parents to list of nodes to tag.
1141 1142 nodestotag.update(
1142 1143 [p for p in self.parents(n) if p != self.nullid]
1143 1144 )
1144 1145 elif n in heads: # We've seen it before, is it a fake head?
1145 1146 # So it is, real heads should not be the ancestors of
1146 1147 # any other heads.
1147 1148 heads.pop(n)
1148 1149 if not ancestors:
1149 1150 return nonodes
1150 1151 # Now that we have our set of ancestors, we want to remove any
1151 1152 # roots that are not ancestors.
1152 1153
1153 1154 # If one of the roots was nullid, everything is included anyway.
1154 1155 if lowestrev > nullrev:
1155 1156 # But, since we weren't, let's recompute the lowest rev to not
1156 1157 # include roots that aren't ancestors.
1157 1158
1158 1159 # Filter out roots that aren't ancestors of heads
1159 1160 roots = [root for root in roots if root in ancestors]
1160 1161 # Recompute the lowest revision
1161 1162 if roots:
1162 1163 lowestrev = min([self.rev(root) for root in roots])
1163 1164 else:
1164 1165 # No more roots? Return empty list
1165 1166 return nonodes
1166 1167 else:
1167 1168 # We are descending from nullid, and don't need to care about
1168 1169 # any other roots.
1169 1170 lowestrev = nullrev
1170 1171 roots = [self.nullid]
1171 1172 # Transform our roots list into a set.
1172 1173 descendants = set(roots)
1173 1174 # Also, keep the original roots so we can filter out roots that aren't
1174 1175 # 'real' roots (i.e. are descended from other roots).
1175 1176 roots = descendants.copy()
1176 1177 # Our topologically sorted list of output nodes.
1177 1178 orderedout = []
1178 1179 # Don't start at nullid since we don't want nullid in our output list,
1179 1180 # and if nullid shows up in descendants, empty parents will look like
1180 1181 # they're descendants.
1181 1182 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1182 1183 n = self.node(r)
1183 1184 isdescendant = False
1184 1185 if lowestrev == nullrev: # Everybody is a descendant of nullid
1185 1186 isdescendant = True
1186 1187 elif n in descendants:
1187 1188 # n is already a descendant
1188 1189 isdescendant = True
1189 1190 # This check only needs to be done here because all the roots
1190 1191 # will start being marked is descendants before the loop.
1191 1192 if n in roots:
1192 1193 # If n was a root, check if it's a 'real' root.
1193 1194 p = tuple(self.parents(n))
1194 1195 # If any of its parents are descendants, it's not a root.
1195 1196 if (p[0] in descendants) or (p[1] in descendants):
1196 1197 roots.remove(n)
1197 1198 else:
1198 1199 p = tuple(self.parents(n))
1199 1200 # A node is a descendant if either of its parents are
1200 1201 # descendants. (We seeded the dependents list with the roots
1201 1202 # up there, remember?)
1202 1203 if (p[0] in descendants) or (p[1] in descendants):
1203 1204 descendants.add(n)
1204 1205 isdescendant = True
1205 1206 if isdescendant and ((ancestors is None) or (n in ancestors)):
1206 1207 # Only include nodes that are both descendants and ancestors.
1207 1208 orderedout.append(n)
1208 1209 if (ancestors is not None) and (n in heads):
1209 1210 # We're trying to figure out which heads are reachable
1210 1211 # from roots.
1211 1212 # Mark this head as having been reached
1212 1213 heads[n] = True
1213 1214 elif ancestors is None:
1214 1215 # Otherwise, we're trying to discover the heads.
1215 1216 # Assume this is a head because if it isn't, the next step
1216 1217 # will eventually remove it.
1217 1218 heads[n] = True
1218 1219 # But, obviously its parents aren't.
1219 1220 for p in self.parents(n):
1220 1221 heads.pop(p, None)
1221 1222 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1222 1223 roots = list(roots)
1223 1224 assert orderedout
1224 1225 assert roots
1225 1226 assert heads
1226 1227 return (orderedout, roots, heads)
1227 1228
1228 1229 def headrevs(self, revs=None):
1229 1230 if revs is None:
1230 1231 try:
1231 1232 return self.index.headrevs()
1232 1233 except AttributeError:
1233 1234 return self._headrevs()
1234 1235 if rustdagop is not None:
1235 1236 return rustdagop.headrevs(self.index, revs)
1236 1237 return dagop.headrevs(revs, self._uncheckedparentrevs)
1237 1238
1238 1239 def computephases(self, roots):
1239 1240 return self.index.computephasesmapsets(roots)
1240 1241
1241 1242 def _headrevs(self):
1242 1243 count = len(self)
1243 1244 if not count:
1244 1245 return [nullrev]
1245 1246 # we won't iter over filtered rev so nobody is a head at start
1246 1247 ishead = [0] * (count + 1)
1247 1248 index = self.index
1248 1249 for r in self:
1249 1250 ishead[r] = 1 # I may be an head
1250 1251 e = index[r]
1251 1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 1253 return [r for r, val in enumerate(ishead) if val]
1253 1254
1254 1255 def heads(self, start=None, stop=None):
1255 1256 """return the list of all nodes that have no children
1256 1257
1257 1258 if start is specified, only heads that are descendants of
1258 1259 start will be returned
1259 1260 if stop is specified, it will consider all the revs from stop
1260 1261 as if they had no children
1261 1262 """
1262 1263 if start is None and stop is None:
1263 1264 if not len(self):
1264 1265 return [self.nullid]
1265 1266 return [self.node(r) for r in self.headrevs()]
1266 1267
1267 1268 if start is None:
1268 1269 start = nullrev
1269 1270 else:
1270 1271 start = self.rev(start)
1271 1272
1272 1273 stoprevs = {self.rev(n) for n in stop or []}
1273 1274
1274 1275 revs = dagop.headrevssubset(
1275 1276 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1276 1277 )
1277 1278
1278 1279 return [self.node(rev) for rev in revs]
1279 1280
1280 1281 def children(self, node):
1281 1282 """find the children of a given node"""
1282 1283 c = []
1283 1284 p = self.rev(node)
1284 1285 for r in self.revs(start=p + 1):
1285 1286 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1286 1287 if prevs:
1287 1288 for pr in prevs:
1288 1289 if pr == p:
1289 1290 c.append(self.node(r))
1290 1291 elif p == nullrev:
1291 1292 c.append(self.node(r))
1292 1293 return c
1293 1294
1294 1295 def commonancestorsheads(self, a, b):
1295 1296 """calculate all the heads of the common ancestors of nodes a and b"""
1296 1297 a, b = self.rev(a), self.rev(b)
1297 1298 ancs = self._commonancestorsheads(a, b)
1298 1299 return pycompat.maplist(self.node, ancs)
1299 1300
1300 1301 def _commonancestorsheads(self, *revs):
1301 1302 """calculate all the heads of the common ancestors of revs"""
1302 1303 try:
1303 1304 ancs = self.index.commonancestorsheads(*revs)
1304 1305 except (AttributeError, OverflowError): # C implementation failed
1305 1306 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1306 1307 return ancs
1307 1308
1308 1309 def isancestor(self, a, b):
1309 1310 """return True if node a is an ancestor of node b
1310 1311
1311 1312 A revision is considered an ancestor of itself."""
1312 1313 a, b = self.rev(a), self.rev(b)
1313 1314 return self.isancestorrev(a, b)
1314 1315
1315 1316 def isancestorrev(self, a, b):
1316 1317 """return True if revision a is an ancestor of revision b
1317 1318
1318 1319 A revision is considered an ancestor of itself.
1319 1320
1320 1321 The implementation of this is trivial but the use of
1321 1322 reachableroots is not."""
1322 1323 if a == nullrev:
1323 1324 return True
1324 1325 elif a == b:
1325 1326 return True
1326 1327 elif a > b:
1327 1328 return False
1328 1329 return bool(self.reachableroots(a, [b], [a], includepath=False))
1329 1330
1330 1331 def reachableroots(self, minroot, heads, roots, includepath=False):
1331 1332 """return (heads(::(<roots> and <roots>::<heads>)))
1332 1333
1333 1334 If includepath is True, return (<roots>::<heads>)."""
1334 1335 try:
1335 1336 return self.index.reachableroots2(
1336 1337 minroot, heads, roots, includepath
1337 1338 )
1338 1339 except AttributeError:
1339 1340 return dagop._reachablerootspure(
1340 1341 self.parentrevs, minroot, roots, heads, includepath
1341 1342 )
1342 1343
1343 1344 def ancestor(self, a, b):
1344 1345 """calculate the "best" common ancestor of nodes a and b"""
1345 1346
1346 1347 a, b = self.rev(a), self.rev(b)
1347 1348 try:
1348 1349 ancs = self.index.ancestors(a, b)
1349 1350 except (AttributeError, OverflowError):
1350 1351 ancs = ancestor.ancestors(self.parentrevs, a, b)
1351 1352 if ancs:
1352 1353 # choose a consistent winner when there's a tie
1353 1354 return min(map(self.node, ancs))
1354 1355 return self.nullid
1355 1356
1356 1357 def _match(self, id):
1357 1358 if isinstance(id, int):
1358 1359 # rev
1359 1360 return self.node(id)
1360 1361 if len(id) == self.nodeconstants.nodelen:
1361 1362 # possibly a binary node
1362 1363 # odds of a binary node being all hex in ASCII are 1 in 10**25
1363 1364 try:
1364 1365 node = id
1365 1366 self.rev(node) # quick search the index
1366 1367 return node
1367 1368 except error.LookupError:
1368 1369 pass # may be partial hex id
1369 1370 try:
1370 1371 # str(rev)
1371 1372 rev = int(id)
1372 1373 if b"%d" % rev != id:
1373 1374 raise ValueError
1374 1375 if rev < 0:
1375 1376 rev = len(self) + rev
1376 1377 if rev < 0 or rev >= len(self):
1377 1378 raise ValueError
1378 1379 return self.node(rev)
1379 1380 except (ValueError, OverflowError):
1380 1381 pass
1381 1382 if len(id) == 2 * self.nodeconstants.nodelen:
1382 1383 try:
1383 1384 # a full hex nodeid?
1384 1385 node = bin(id)
1385 1386 self.rev(node)
1386 1387 return node
1387 1388 except (TypeError, error.LookupError):
1388 1389 pass
1389 1390
1390 1391 def _partialmatch(self, id):
1391 1392 # we don't care wdirfilenodeids as they should be always full hash
1392 1393 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1393 1394 try:
1394 1395 partial = self.index.partialmatch(id)
1395 1396 if partial and self.hasnode(partial):
1396 1397 if maybewdir:
1397 1398 # single 'ff...' match in radix tree, ambiguous with wdir
1398 1399 raise error.RevlogError
1399 1400 return partial
1400 1401 if maybewdir:
1401 1402 # no 'ff...' match in radix tree, wdir identified
1402 1403 raise error.WdirUnsupported
1403 1404 return None
1404 1405 except error.RevlogError:
1405 1406 # parsers.c radix tree lookup gave multiple matches
1406 1407 # fast path: for unfiltered changelog, radix tree is accurate
1407 1408 if not getattr(self, 'filteredrevs', None):
1408 1409 raise error.AmbiguousPrefixLookupError(
1409 1410 id, self.display_id, _(b'ambiguous identifier')
1410 1411 )
1411 1412 # fall through to slow path that filters hidden revisions
1412 1413 except (AttributeError, ValueError):
1413 1414 # we are pure python, or key was too short to search radix tree
1414 1415 pass
1415 1416
1416 1417 if id in self._pcache:
1417 1418 return self._pcache[id]
1418 1419
1419 1420 if len(id) <= 40:
1420 1421 try:
1421 1422 # hex(node)[:...]
1422 1423 l = len(id) // 2 # grab an even number of digits
1423 1424 prefix = bin(id[: l * 2])
1424 1425 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1425 1426 nl = [
1426 1427 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1427 1428 ]
1428 1429 if self.nodeconstants.nullhex.startswith(id):
1429 1430 nl.append(self.nullid)
1430 1431 if len(nl) > 0:
1431 1432 if len(nl) == 1 and not maybewdir:
1432 1433 self._pcache[id] = nl[0]
1433 1434 return nl[0]
1434 1435 raise error.AmbiguousPrefixLookupError(
1435 1436 id, self.display_id, _(b'ambiguous identifier')
1436 1437 )
1437 1438 if maybewdir:
1438 1439 raise error.WdirUnsupported
1439 1440 return None
1440 1441 except TypeError:
1441 1442 pass
1442 1443
1443 1444 def lookup(self, id):
1444 1445 """locate a node based on:
1445 1446 - revision number or str(revision number)
1446 1447 - nodeid or subset of hex nodeid
1447 1448 """
1448 1449 n = self._match(id)
1449 1450 if n is not None:
1450 1451 return n
1451 1452 n = self._partialmatch(id)
1452 1453 if n:
1453 1454 return n
1454 1455
1455 1456 raise error.LookupError(id, self.display_id, _(b'no match found'))
1456 1457
1457 1458 def shortest(self, node, minlength=1):
1458 1459 """Find the shortest unambiguous prefix that matches node."""
1459 1460
1460 1461 def isvalid(prefix):
1461 1462 try:
1462 1463 matchednode = self._partialmatch(prefix)
1463 1464 except error.AmbiguousPrefixLookupError:
1464 1465 return False
1465 1466 except error.WdirUnsupported:
1466 1467 # single 'ff...' match
1467 1468 return True
1468 1469 if matchednode is None:
1469 1470 raise error.LookupError(node, self.display_id, _(b'no node'))
1470 1471 return True
1471 1472
1472 1473 def maybewdir(prefix):
1473 1474 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1474 1475
1475 1476 hexnode = hex(node)
1476 1477
1477 1478 def disambiguate(hexnode, minlength):
1478 1479 """Disambiguate against wdirid."""
1479 1480 for length in range(minlength, len(hexnode) + 1):
1480 1481 prefix = hexnode[:length]
1481 1482 if not maybewdir(prefix):
1482 1483 return prefix
1483 1484
1484 1485 if not getattr(self, 'filteredrevs', None):
1485 1486 try:
1486 1487 length = max(self.index.shortest(node), minlength)
1487 1488 return disambiguate(hexnode, length)
1488 1489 except error.RevlogError:
1489 1490 if node != self.nodeconstants.wdirid:
1490 1491 raise error.LookupError(
1491 1492 node, self.display_id, _(b'no node')
1492 1493 )
1493 1494 except AttributeError:
1494 1495 # Fall through to pure code
1495 1496 pass
1496 1497
1497 1498 if node == self.nodeconstants.wdirid:
1498 1499 for length in range(minlength, len(hexnode) + 1):
1499 1500 prefix = hexnode[:length]
1500 1501 if isvalid(prefix):
1501 1502 return prefix
1502 1503
1503 1504 for length in range(minlength, len(hexnode) + 1):
1504 1505 prefix = hexnode[:length]
1505 1506 if isvalid(prefix):
1506 1507 return disambiguate(hexnode, length)
1507 1508
1508 1509 def cmp(self, node, text):
1509 1510 """compare text with a given file revision
1510 1511
1511 1512 returns True if text is different than what is stored.
1512 1513 """
1513 1514 p1, p2 = self.parents(node)
1514 1515 return storageutil.hashrevisionsha1(text, p1, p2) != node
1515 1516
1516 1517 def _cachesegment(self, offset, data):
1517 1518 """Add a segment to the revlog cache.
1518 1519
1519 1520 Accepts an absolute offset and the data that is at that location.
1520 1521 """
1521 1522 o, d = self._chunkcache
1522 1523 # try to add to existing cache
1523 1524 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1524 1525 self._chunkcache = o, d + data
1525 1526 else:
1526 1527 self._chunkcache = offset, data
1527 1528
1528 1529 def _readsegment(self, offset, length, df=None):
1529 1530 """Load a segment of raw data from the revlog.
1530 1531
1531 1532 Accepts an absolute offset, length to read, and an optional existing
1532 1533 file handle to read from.
1533 1534
1534 1535 If an existing file handle is passed, it will be seeked and the
1535 1536 original seek position will NOT be restored.
1536 1537
1537 1538 Returns a str or buffer of raw byte data.
1538 1539
1539 1540 Raises if the requested number of bytes could not be read.
1540 1541 """
1541 1542 # Cache data both forward and backward around the requested
1542 1543 # data, in a fixed size window. This helps speed up operations
1543 1544 # involving reading the revlog backwards.
1544 1545 cachesize = self._chunkcachesize
1545 1546 realoffset = offset & ~(cachesize - 1)
1546 1547 reallength = (
1547 1548 (offset + length + cachesize) & ~(cachesize - 1)
1548 1549 ) - realoffset
1549 1550 with self._datareadfp(df) as df:
1550 1551 df.seek(realoffset)
1551 1552 d = df.read(reallength)
1552 1553
1553 1554 self._cachesegment(realoffset, d)
1554 1555 if offset != realoffset or reallength != length:
1555 1556 startoffset = offset - realoffset
1556 1557 if len(d) - startoffset < length:
1557 1558 raise error.RevlogError(
1558 1559 _(
1559 1560 b'partial read of revlog %s; expected %d bytes from '
1560 1561 b'offset %d, got %d'
1561 1562 )
1562 1563 % (
1563 1564 self._indexfile if self._inline else self._datafile,
1564 1565 length,
1565 1566 offset,
1566 1567 len(d) - startoffset,
1567 1568 )
1568 1569 )
1569 1570
1570 1571 return util.buffer(d, startoffset, length)
1571 1572
1572 1573 if len(d) < length:
1573 1574 raise error.RevlogError(
1574 1575 _(
1575 1576 b'partial read of revlog %s; expected %d bytes from offset '
1576 1577 b'%d, got %d'
1577 1578 )
1578 1579 % (
1579 1580 self._indexfile if self._inline else self._datafile,
1580 1581 length,
1581 1582 offset,
1582 1583 len(d),
1583 1584 )
1584 1585 )
1585 1586
1586 1587 return d
1587 1588
1588 1589 def _getsegment(self, offset, length, df=None):
1589 1590 """Obtain a segment of raw data from the revlog.
1590 1591
1591 1592 Accepts an absolute offset, length of bytes to obtain, and an
1592 1593 optional file handle to the already-opened revlog. If the file
1593 1594 handle is used, it's original seek position will not be preserved.
1594 1595
1595 1596 Requests for data may be returned from a cache.
1596 1597
1597 1598 Returns a str or a buffer instance of raw byte data.
1598 1599 """
1599 1600 o, d = self._chunkcache
1600 1601 l = len(d)
1601 1602
1602 1603 # is it in the cache?
1603 1604 cachestart = offset - o
1604 1605 cacheend = cachestart + length
1605 1606 if cachestart >= 0 and cacheend <= l:
1606 1607 if cachestart == 0 and cacheend == l:
1607 1608 return d # avoid a copy
1608 1609 return util.buffer(d, cachestart, cacheend - cachestart)
1609 1610
1610 1611 return self._readsegment(offset, length, df=df)
1611 1612
1612 1613 def _getsegmentforrevs(self, startrev, endrev, df=None):
1613 1614 """Obtain a segment of raw data corresponding to a range of revisions.
1614 1615
1615 1616 Accepts the start and end revisions and an optional already-open
1616 1617 file handle to be used for reading. If the file handle is read, its
1617 1618 seek position will not be preserved.
1618 1619
1619 1620 Requests for data may be satisfied by a cache.
1620 1621
1621 1622 Returns a 2-tuple of (offset, data) for the requested range of
1622 1623 revisions. Offset is the integer offset from the beginning of the
1623 1624 revlog and data is a str or buffer of the raw byte data.
1624 1625
1625 1626 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1626 1627 to determine where each revision's data begins and ends.
1627 1628 """
1628 1629 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1629 1630 # (functions are expensive).
1630 1631 index = self.index
1631 1632 istart = index[startrev]
1632 1633 start = int(istart[0] >> 16)
1633 1634 if startrev == endrev:
1634 1635 end = start + istart[1]
1635 1636 else:
1636 1637 iend = index[endrev]
1637 1638 end = int(iend[0] >> 16) + iend[1]
1638 1639
1639 1640 if self._inline:
1640 1641 start += (startrev + 1) * self.index.entry_size
1641 1642 end += (endrev + 1) * self.index.entry_size
1642 1643 length = end - start
1643 1644
1644 1645 return start, self._getsegment(start, length, df=df)
1645 1646
1646 1647 def _chunk(self, rev, df=None):
1647 1648 """Obtain a single decompressed chunk for a revision.
1648 1649
1649 1650 Accepts an integer revision and an optional already-open file handle
1650 1651 to be used for reading. If used, the seek position of the file will not
1651 1652 be preserved.
1652 1653
1653 1654 Returns a str holding uncompressed data for the requested revision.
1654 1655 """
1655 1656 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1656 1657
1657 1658 def _chunks(self, revs, df=None, targetsize=None):
1658 1659 """Obtain decompressed chunks for the specified revisions.
1659 1660
1660 1661 Accepts an iterable of numeric revisions that are assumed to be in
1661 1662 ascending order. Also accepts an optional already-open file handle
1662 1663 to be used for reading. If used, the seek position of the file will
1663 1664 not be preserved.
1664 1665
1665 1666 This function is similar to calling ``self._chunk()`` multiple times,
1666 1667 but is faster.
1667 1668
1668 1669 Returns a list with decompressed data for each requested revision.
1669 1670 """
1670 1671 if not revs:
1671 1672 return []
1672 1673 start = self.start
1673 1674 length = self.length
1674 1675 inline = self._inline
1675 1676 iosize = self.index.entry_size
1676 1677 buffer = util.buffer
1677 1678
1678 1679 l = []
1679 1680 ladd = l.append
1680 1681
1681 1682 if not self._withsparseread:
1682 1683 slicedchunks = (revs,)
1683 1684 else:
1684 1685 slicedchunks = deltautil.slicechunk(
1685 1686 self, revs, targetsize=targetsize
1686 1687 )
1687 1688
1688 1689 for revschunk in slicedchunks:
1689 1690 firstrev = revschunk[0]
1690 1691 # Skip trailing revisions with empty diff
1691 1692 for lastrev in revschunk[::-1]:
1692 1693 if length(lastrev) != 0:
1693 1694 break
1694 1695
1695 1696 try:
1696 1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1697 1698 except OverflowError:
1698 1699 # issue4215 - we can't cache a run of chunks greater than
1699 1700 # 2G on Windows
1700 1701 return [self._chunk(rev, df=df) for rev in revschunk]
1701 1702
1702 1703 decomp = self.decompress
1703 1704 for rev in revschunk:
1704 1705 chunkstart = start(rev)
1705 1706 if inline:
1706 1707 chunkstart += (rev + 1) * iosize
1707 1708 chunklength = length(rev)
1708 1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1709 1710
1710 1711 return l
1711 1712
1712 1713 def _chunkclear(self):
1713 1714 """Clear the raw chunk cache."""
1714 1715 self._chunkcache = (0, b'')
1715 1716
1716 1717 def deltaparent(self, rev):
1717 1718 """return deltaparent of the given revision"""
1718 1719 base = self.index[rev][3]
1719 1720 if base == rev:
1720 1721 return nullrev
1721 1722 elif self._generaldelta:
1722 1723 return base
1723 1724 else:
1724 1725 return rev - 1
1725 1726
1726 1727 def issnapshot(self, rev):
1727 1728 """tells whether rev is a snapshot"""
1728 1729 if not self._sparserevlog:
1729 1730 return self.deltaparent(rev) == nullrev
1730 1731 elif util.safehasattr(self.index, b'issnapshot'):
1731 1732 # directly assign the method to cache the testing and access
1732 1733 self.issnapshot = self.index.issnapshot
1733 1734 return self.issnapshot(rev)
1734 1735 if rev == nullrev:
1735 1736 return True
1736 1737 entry = self.index[rev]
1737 1738 base = entry[3]
1738 1739 if base == rev:
1739 1740 return True
1740 1741 if base == nullrev:
1741 1742 return True
1742 1743 p1 = entry[5]
1743 1744 p2 = entry[6]
1744 1745 if base == p1 or base == p2:
1745 1746 return False
1746 1747 return self.issnapshot(base)
1747 1748
1748 1749 def snapshotdepth(self, rev):
1749 1750 """number of snapshot in the chain before this one"""
1750 1751 if not self.issnapshot(rev):
1751 1752 raise error.ProgrammingError(b'revision %d not a snapshot')
1752 1753 return len(self._deltachain(rev)[0]) - 1
1753 1754
1754 1755 def revdiff(self, rev1, rev2):
1755 1756 """return or calculate a delta between two revisions
1756 1757
1757 1758 The delta calculated is in binary form and is intended to be written to
1758 1759 revlog data directly. So this function needs raw revision data.
1759 1760 """
1760 1761 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1761 1762 return bytes(self._chunk(rev2))
1762 1763
1763 1764 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1764 1765
1765 1766 def _processflags(self, text, flags, operation, raw=False):
1766 1767 """deprecated entry point to access flag processors"""
1767 1768 msg = b'_processflag(...) use the specialized variant'
1768 1769 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1769 1770 if raw:
1770 1771 return text, flagutil.processflagsraw(self, text, flags)
1771 1772 elif operation == b'read':
1772 1773 return flagutil.processflagsread(self, text, flags)
1773 1774 else: # write operation
1774 1775 return flagutil.processflagswrite(self, text, flags)
1775 1776
1776 1777 def revision(self, nodeorrev, _df=None, raw=False):
1777 1778 """return an uncompressed revision of a given node or revision
1778 1779 number.
1779 1780
1780 1781 _df - an existing file handle to read from. (internal-only)
1781 1782 raw - an optional argument specifying if the revision data is to be
1782 1783 treated as raw data when applying flag transforms. 'raw' should be set
1783 1784 to True when generating changegroups or in debug commands.
1784 1785 """
1785 1786 if raw:
1786 1787 msg = (
1787 1788 b'revlog.revision(..., raw=True) is deprecated, '
1788 1789 b'use revlog.rawdata(...)'
1789 1790 )
1790 1791 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1791 1792 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1792 1793
1793 1794 def sidedata(self, nodeorrev, _df=None):
1794 1795 """a map of extra data related to the changeset but not part of the hash
1795 1796
1796 1797 This function currently return a dictionary. However, more advanced
1797 1798 mapping object will likely be used in the future for a more
1798 1799 efficient/lazy code.
1799 1800 """
1800 1801 return self._revisiondata(nodeorrev, _df)[1]
1801 1802
1802 1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1803 1804 # deal with <nodeorrev> argument type
1804 1805 if isinstance(nodeorrev, int):
1805 1806 rev = nodeorrev
1806 1807 node = self.node(rev)
1807 1808 else:
1808 1809 node = nodeorrev
1809 1810 rev = None
1810 1811
1811 1812 # fast path the special `nullid` rev
1812 1813 if node == self.nullid:
1813 1814 return b"", {}
1814 1815
1815 1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1816 1817 # revision or might need to be processed to retrieve the revision.
1817 1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1818 1819
1819 1820 if self.hassidedata:
1820 1821 if rev is None:
1821 1822 rev = self.rev(node)
1822 1823 sidedata = self._sidedata(rev)
1823 1824 else:
1824 1825 sidedata = {}
1825 1826
1826 1827 if raw and validated:
1827 1828 # if we don't want to process the raw text and that raw
1828 1829 # text is cached, we can exit early.
1829 1830 return rawtext, sidedata
1830 1831 if rev is None:
1831 1832 rev = self.rev(node)
1832 1833 # the revlog's flag for this revision
1833 1834 # (usually alter its state or content)
1834 1835 flags = self.flags(rev)
1835 1836
1836 1837 if validated and flags == REVIDX_DEFAULT_FLAGS:
1837 1838 # no extra flags set, no flag processor runs, text = rawtext
1838 1839 return rawtext, sidedata
1839 1840
1840 1841 if raw:
1841 1842 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1842 1843 text = rawtext
1843 1844 else:
1844 1845 r = flagutil.processflagsread(self, rawtext, flags)
1845 1846 text, validatehash = r
1846 1847 if validatehash:
1847 1848 self.checkhash(text, node, rev=rev)
1848 1849 if not validated:
1849 1850 self._revisioncache = (node, rev, rawtext)
1850 1851
1851 1852 return text, sidedata
1852 1853
1853 1854 def _rawtext(self, node, rev, _df=None):
1854 1855 """return the possibly unvalidated rawtext for a revision
1855 1856
1856 1857 returns (rev, rawtext, validated)
1857 1858 """
1858 1859
1859 1860 # revision in the cache (could be useful to apply delta)
1860 1861 cachedrev = None
1861 1862 # An intermediate text to apply deltas to
1862 1863 basetext = None
1863 1864
1864 1865 # Check if we have the entry in cache
1865 1866 # The cache entry looks like (node, rev, rawtext)
1866 1867 if self._revisioncache:
1867 1868 if self._revisioncache[0] == node:
1868 1869 return (rev, self._revisioncache[2], True)
1869 1870 cachedrev = self._revisioncache[1]
1870 1871
1871 1872 if rev is None:
1872 1873 rev = self.rev(node)
1873 1874
1874 1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1875 1876 if stopped:
1876 1877 basetext = self._revisioncache[2]
1877 1878
1878 1879 # drop cache to save memory, the caller is expected to
1879 1880 # update self._revisioncache after validating the text
1880 1881 self._revisioncache = None
1881 1882
1882 1883 targetsize = None
1883 1884 rawsize = self.index[rev][2]
1884 1885 if 0 <= rawsize:
1885 1886 targetsize = 4 * rawsize
1886 1887
1887 1888 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1888 1889 if basetext is None:
1889 1890 basetext = bytes(bins[0])
1890 1891 bins = bins[1:]
1891 1892
1892 1893 rawtext = mdiff.patches(basetext, bins)
1893 1894 del basetext # let us have a chance to free memory early
1894 1895 return (rev, rawtext, False)
1895 1896
1896 1897 def _sidedata(self, rev):
1897 1898 """Return the sidedata for a given revision number."""
1898 1899 index_entry = self.index[rev]
1899 1900 sidedata_offset = index_entry[8]
1900 1901 sidedata_size = index_entry[9]
1901 1902
1902 1903 if self._inline:
1903 1904 sidedata_offset += self.index.entry_size * (1 + rev)
1904 1905 if sidedata_size == 0:
1905 1906 return {}
1906 1907
1907 1908 segment = self._getsegment(sidedata_offset, sidedata_size)
1908 1909 sidedata = sidedatautil.deserialize_sidedata(segment)
1909 1910 return sidedata
1910 1911
1911 1912 def rawdata(self, nodeorrev, _df=None):
1912 1913 """return an uncompressed raw data of a given node or revision number.
1913 1914
1914 1915 _df - an existing file handle to read from. (internal-only)
1915 1916 """
1916 1917 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1917 1918
1918 1919 def hash(self, text, p1, p2):
1919 1920 """Compute a node hash.
1920 1921
1921 1922 Available as a function so that subclasses can replace the hash
1922 1923 as needed.
1923 1924 """
1924 1925 return storageutil.hashrevisionsha1(text, p1, p2)
1925 1926
1926 1927 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1927 1928 """Check node hash integrity.
1928 1929
1929 1930 Available as a function so that subclasses can extend hash mismatch
1930 1931 behaviors as needed.
1931 1932 """
1932 1933 try:
1933 1934 if p1 is None and p2 is None:
1934 1935 p1, p2 = self.parents(node)
1935 1936 if node != self.hash(text, p1, p2):
1936 1937 # Clear the revision cache on hash failure. The revision cache
1937 1938 # only stores the raw revision and clearing the cache does have
1938 1939 # the side-effect that we won't have a cache hit when the raw
1939 1940 # revision data is accessed. But this case should be rare and
1940 1941 # it is extra work to teach the cache about the hash
1941 1942 # verification state.
1942 1943 if self._revisioncache and self._revisioncache[0] == node:
1943 1944 self._revisioncache = None
1944 1945
1945 1946 revornode = rev
1946 1947 if revornode is None:
1947 1948 revornode = templatefilters.short(hex(node))
1948 1949 raise error.RevlogError(
1949 1950 _(b"integrity check failed on %s:%s")
1950 1951 % (self.display_id, pycompat.bytestr(revornode))
1951 1952 )
1952 1953 except error.RevlogError:
1953 1954 if self._censorable and storageutil.iscensoredtext(text):
1954 1955 raise error.CensoredNodeError(self.display_id, node, text)
1955 1956 raise
1956 1957
1957 1958 def _enforceinlinesize(self, tr, fp=None):
1958 1959 """Check if the revlog is too big for inline and convert if so.
1959 1960
1960 1961 This should be called after revisions are added to the revlog. If the
1961 1962 revlog has grown too large to be an inline revlog, it will convert it
1962 1963 to use multiple index and data files.
1963 1964 """
1964 1965 tiprev = len(self) - 1
1965 1966 total_size = self.start(tiprev) + self.length(tiprev)
1966 1967 if not self._inline or total_size < _maxinline:
1967 1968 return
1968 1969
1969 1970 troffset = tr.findoffset(self._indexfile)
1970 1971 if troffset is None:
1971 1972 raise error.RevlogError(
1972 1973 _(b"%s not found in the transaction") % self._indexfile
1973 1974 )
1974 1975 trindex = 0
1975 1976 tr.add(self._datafile, 0)
1976 1977
1977 1978 if fp:
1978 1979 fp.flush()
1979 1980 fp.close()
1980 1981 # We can't use the cached file handle after close(). So prevent
1981 1982 # its usage.
1982 1983 self._writinghandles = None
1983 1984
1984 1985 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1985 1986 for r in self:
1986 1987 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1987 1988 if troffset <= self.start(r):
1988 1989 trindex = r
1989 1990
1990 1991 with self._indexfp(b'w') as fp:
1991 1992 self._format_flags &= ~FLAG_INLINE_DATA
1992 1993 self._inline = False
1993 1994 for i in self:
1994 1995 e = self.index.entry_binary(i)
1995 1996 if i == 0:
1996 1997 header = self._format_flags | self._format_version
1997 1998 header = self.index.pack_header(header)
1998 1999 e = header + e
1999 2000 fp.write(e)
2000 2001
2001 2002 # the temp file replace the real index when we exit the context
2002 2003 # manager
2003 2004
2004 2005 tr.replace(self._indexfile, trindex * self.index.entry_size)
2005 2006 nodemaputil.setup_persistent_nodemap(tr, self)
2006 2007 self._chunkclear()
2007 2008
2008 2009 def _nodeduplicatecallback(self, transaction, node):
2009 2010 """called when trying to add a node already stored."""
2010 2011
2011 2012 def addrevision(
2012 2013 self,
2013 2014 text,
2014 2015 transaction,
2015 2016 link,
2016 2017 p1,
2017 2018 p2,
2018 2019 cachedelta=None,
2019 2020 node=None,
2020 2021 flags=REVIDX_DEFAULT_FLAGS,
2021 2022 deltacomputer=None,
2022 2023 sidedata=None,
2023 2024 ):
2024 2025 """add a revision to the log
2025 2026
2026 2027 text - the revision data to add
2027 2028 transaction - the transaction object used for rollback
2028 2029 link - the linkrev data to add
2029 2030 p1, p2 - the parent nodeids of the revision
2030 2031 cachedelta - an optional precomputed delta
2031 2032 node - nodeid of revision; typically node is not specified, and it is
2032 2033 computed by default as hash(text, p1, p2), however subclasses might
2033 2034 use different hashing method (and override checkhash() in such case)
2034 2035 flags - the known flags to set on the revision
2035 2036 deltacomputer - an optional deltacomputer instance shared between
2036 2037 multiple calls
2037 2038 """
2038 2039 if link == nullrev:
2039 2040 raise error.RevlogError(
2040 2041 _(b"attempted to add linkrev -1 to %s") % self.display_id
2041 2042 )
2042 2043
2043 2044 if sidedata is None:
2044 2045 sidedata = {}
2045 2046 elif sidedata and not self.hassidedata:
2046 2047 raise error.ProgrammingError(
2047 2048 _(b"trying to add sidedata to a revlog who don't support them")
2048 2049 )
2049 2050
2050 2051 if flags:
2051 2052 node = node or self.hash(text, p1, p2)
2052 2053
2053 2054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2054 2055
2055 2056 # If the flag processor modifies the revision data, ignore any provided
2056 2057 # cachedelta.
2057 2058 if rawtext != text:
2058 2059 cachedelta = None
2059 2060
2060 2061 if len(rawtext) > _maxentrysize:
2061 2062 raise error.RevlogError(
2062 2063 _(
2063 2064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2064 2065 )
2065 2066 % (self.display_id, len(rawtext))
2066 2067 )
2067 2068
2068 2069 node = node or self.hash(rawtext, p1, p2)
2069 2070 rev = self.index.get_rev(node)
2070 2071 if rev is not None:
2071 2072 return rev
2072 2073
2073 2074 if validatehash:
2074 2075 self.checkhash(rawtext, node, p1=p1, p2=p2)
2075 2076
2076 2077 return self.addrawrevision(
2077 2078 rawtext,
2078 2079 transaction,
2079 2080 link,
2080 2081 p1,
2081 2082 p2,
2082 2083 node,
2083 2084 flags,
2084 2085 cachedelta=cachedelta,
2085 2086 deltacomputer=deltacomputer,
2086 2087 sidedata=sidedata,
2087 2088 )
2088 2089
2089 2090 def addrawrevision(
2090 2091 self,
2091 2092 rawtext,
2092 2093 transaction,
2093 2094 link,
2094 2095 p1,
2095 2096 p2,
2096 2097 node,
2097 2098 flags,
2098 2099 cachedelta=None,
2099 2100 deltacomputer=None,
2100 2101 sidedata=None,
2101 2102 ):
2102 2103 """add a raw revision with known flags, node and parents
2103 2104 useful when reusing a revision not stored in this revlog (ex: received
2104 2105 over wire, or read from an external bundle).
2105 2106 """
2106 2107 dfh = None
2107 2108 if not self._inline:
2108 2109 dfh = self._datafp(b"a+")
2109 2110 ifh = self._indexfp(b"a+")
2110 2111 try:
2111 2112 return self._addrevision(
2112 2113 node,
2113 2114 rawtext,
2114 2115 transaction,
2115 2116 link,
2116 2117 p1,
2117 2118 p2,
2118 2119 flags,
2119 2120 cachedelta,
2120 2121 ifh,
2121 2122 dfh,
2122 2123 deltacomputer=deltacomputer,
2123 2124 sidedata=sidedata,
2124 2125 )
2125 2126 finally:
2126 2127 if dfh:
2127 2128 dfh.close()
2128 2129 ifh.close()
2129 2130
2130 2131 def compress(self, data):
2131 2132 """Generate a possibly-compressed representation of data."""
2132 2133 if not data:
2133 2134 return b'', data
2134 2135
2135 2136 compressed = self._compressor.compress(data)
2136 2137
2137 2138 if compressed:
2138 2139 # The revlog compressor added the header in the returned data.
2139 2140 return b'', compressed
2140 2141
2141 2142 if data[0:1] == b'\0':
2142 2143 return b'', data
2143 2144 return b'u', data
2144 2145
2145 2146 def decompress(self, data):
2146 2147 """Decompress a revlog chunk.
2147 2148
2148 2149 The chunk is expected to begin with a header identifying the
2149 2150 format type so it can be routed to an appropriate decompressor.
2150 2151 """
2151 2152 if not data:
2152 2153 return data
2153 2154
2154 2155 # Revlogs are read much more frequently than they are written and many
2155 2156 # chunks only take microseconds to decompress, so performance is
2156 2157 # important here.
2157 2158 #
2158 2159 # We can make a few assumptions about revlogs:
2159 2160 #
2160 2161 # 1) the majority of chunks will be compressed (as opposed to inline
2161 2162 # raw data).
2162 2163 # 2) decompressing *any* data will likely by at least 10x slower than
2163 2164 # returning raw inline data.
2164 2165 # 3) we want to prioritize common and officially supported compression
2165 2166 # engines
2166 2167 #
2167 2168 # It follows that we want to optimize for "decompress compressed data
2168 2169 # when encoded with common and officially supported compression engines"
2169 2170 # case over "raw data" and "data encoded by less common or non-official
2170 2171 # compression engines." That is why we have the inline lookup first
2171 2172 # followed by the compengines lookup.
2172 2173 #
2173 2174 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2174 2175 # compressed chunks. And this matters for changelog and manifest reads.
2175 2176 t = data[0:1]
2176 2177
2177 2178 if t == b'x':
2178 2179 try:
2179 2180 return _zlibdecompress(data)
2180 2181 except zlib.error as e:
2181 2182 raise error.RevlogError(
2182 2183 _(b'revlog decompress error: %s')
2183 2184 % stringutil.forcebytestr(e)
2184 2185 )
2185 2186 # '\0' is more common than 'u' so it goes first.
2186 2187 elif t == b'\0':
2187 2188 return data
2188 2189 elif t == b'u':
2189 2190 return util.buffer(data, 1)
2190 2191
2191 2192 try:
2192 2193 compressor = self._decompressors[t]
2193 2194 except KeyError:
2194 2195 try:
2195 2196 engine = util.compengines.forrevlogheader(t)
2196 2197 compressor = engine.revlogcompressor(self._compengineopts)
2197 2198 self._decompressors[t] = compressor
2198 2199 except KeyError:
2199 2200 raise error.RevlogError(
2200 2201 _(b'unknown compression type %s') % binascii.hexlify(t)
2201 2202 )
2202 2203
2203 2204 return compressor.decompress(data)
2204 2205
2205 2206 def _addrevision(
2206 2207 self,
2207 2208 node,
2208 2209 rawtext,
2209 2210 transaction,
2210 2211 link,
2211 2212 p1,
2212 2213 p2,
2213 2214 flags,
2214 2215 cachedelta,
2215 2216 ifh,
2216 2217 dfh,
2217 2218 alwayscache=False,
2218 2219 deltacomputer=None,
2219 2220 sidedata=None,
2220 2221 ):
2221 2222 """internal function to add revisions to the log
2222 2223
2223 2224 see addrevision for argument descriptions.
2224 2225
2225 2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2226 2227
2227 2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2228 2229 be used.
2229 2230
2230 2231 invariants:
2231 2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2232 2233 if both are set, they must correspond to each other.
2233 2234 """
2234 2235 if node == self.nullid:
2235 2236 raise error.RevlogError(
2236 2237 _(b"%s: attempt to add null revision") % self.display_id
2237 2238 )
2238 2239 if (
2239 2240 node == self.nodeconstants.wdirid
2240 2241 or node in self.nodeconstants.wdirfilenodeids
2241 2242 ):
2242 2243 raise error.RevlogError(
2243 2244 _(b"%s: attempt to add wdir revision") % self.display_id
2244 2245 )
2245 2246
2246 2247 if self._inline:
2247 2248 fh = ifh
2248 2249 else:
2249 2250 fh = dfh
2250 2251
2251 2252 btext = [rawtext]
2252 2253
2253 2254 curr = len(self)
2254 2255 prev = curr - 1
2255 2256
2256 2257 offset = self._get_data_offset(prev)
2257 2258
2258 2259 if self._concurrencychecker:
2259 2260 if self._inline:
2260 2261 # offset is "as if" it were in the .d file, so we need to add on
2261 2262 # the size of the entry metadata.
2262 2263 self._concurrencychecker(
2263 2264 ifh, self._indexfile, offset + curr * self.index.entry_size
2264 2265 )
2265 2266 else:
2266 2267 # Entries in the .i are a consistent size.
2267 2268 self._concurrencychecker(
2268 2269 ifh, self._indexfile, curr * self.index.entry_size
2269 2270 )
2270 2271 self._concurrencychecker(dfh, self._datafile, offset)
2271 2272
2272 2273 p1r, p2r = self.rev(p1), self.rev(p2)
2273 2274
2274 2275 # full versions are inserted when the needed deltas
2275 2276 # become comparable to the uncompressed text
2276 2277 if rawtext is None:
2277 2278 # need rawtext size, before changed by flag processors, which is
2278 2279 # the non-raw size. use revlog explicitly to avoid filelog's extra
2279 2280 # logic that might remove metadata size.
2280 2281 textlen = mdiff.patchedsize(
2281 2282 revlog.size(self, cachedelta[0]), cachedelta[1]
2282 2283 )
2283 2284 else:
2284 2285 textlen = len(rawtext)
2285 2286
2286 2287 if deltacomputer is None:
2287 2288 deltacomputer = deltautil.deltacomputer(self)
2288 2289
2289 2290 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2290 2291
2291 2292 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2292 2293
2293 2294 if sidedata and self.hassidedata:
2294 2295 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2295 2296 sidedata_offset = offset + deltainfo.deltalen
2296 2297 else:
2297 2298 serialized_sidedata = b""
2298 2299 # Don't store the offset if the sidedata is empty, that way
2299 2300 # we can easily detect empty sidedata and they will be no different
2300 2301 # than ones we manually add.
2301 2302 sidedata_offset = 0
2302 2303
2303 2304 e = (
2304 2305 offset_type(offset, flags),
2305 2306 deltainfo.deltalen,
2306 2307 textlen,
2307 2308 deltainfo.base,
2308 2309 link,
2309 2310 p1r,
2310 2311 p2r,
2311 2312 node,
2312 2313 sidedata_offset,
2313 2314 len(serialized_sidedata),
2314 2315 )
2315 2316
2316 2317 self.index.append(e)
2317 2318 entry = self.index.entry_binary(curr)
2318 2319 if curr == 0:
2319 2320 header = self._format_flags | self._format_version
2320 2321 header = self.index.pack_header(header)
2321 2322 entry = header + entry
2322 2323 self._writeentry(
2323 2324 transaction,
2324 2325 ifh,
2325 2326 dfh,
2326 2327 entry,
2327 2328 deltainfo.data,
2328 2329 link,
2329 2330 offset,
2330 2331 serialized_sidedata,
2331 2332 )
2332 2333
2333 2334 rawtext = btext[0]
2334 2335
2335 2336 if alwayscache and rawtext is None:
2336 2337 rawtext = deltacomputer.buildtext(revinfo, fh)
2337 2338
2338 2339 if type(rawtext) == bytes: # only accept immutable objects
2339 2340 self._revisioncache = (node, curr, rawtext)
2340 2341 self._chainbasecache[curr] = deltainfo.chainbase
2341 2342 return curr
2342 2343
2343 2344 def _get_data_offset(self, prev):
2344 2345 """Returns the current offset in the (in-transaction) data file.
2345 2346 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2346 2347 file to store that information: since sidedata can be rewritten to the
2347 2348 end of the data file within a transaction, you can have cases where, for
2348 2349 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2349 2350 to `n - 1`'s sidedata being written after `n`'s data.
2350 2351
2351 2352 TODO cache this in a docket file before getting out of experimental."""
2352 2353 if self._format_version != REVLOGV2:
2353 2354 return self.end(prev)
2354 2355
2355 2356 offset = 0
2356 2357 for rev, entry in enumerate(self.index):
2357 2358 sidedata_end = entry[8] + entry[9]
2358 2359 # Sidedata for a previous rev has potentially been written after
2359 2360 # this rev's end, so take the max.
2360 2361 offset = max(self.end(rev), offset, sidedata_end)
2361 2362 return offset
2362 2363
2363 2364 def _writeentry(
2364 2365 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2365 2366 ):
2366 2367 # Files opened in a+ mode have inconsistent behavior on various
2367 2368 # platforms. Windows requires that a file positioning call be made
2368 2369 # when the file handle transitions between reads and writes. See
2369 2370 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2370 2371 # platforms, Python or the platform itself can be buggy. Some versions
2371 2372 # of Solaris have been observed to not append at the end of the file
2372 2373 # if the file was seeked to before the end. See issue4943 for more.
2373 2374 #
2374 2375 # We work around this issue by inserting a seek() before writing.
2375 2376 # Note: This is likely not necessary on Python 3. However, because
2376 2377 # the file handle is reused for reads and may be seeked there, we need
2377 2378 # to be careful before changing this.
2378 2379 ifh.seek(0, os.SEEK_END)
2379 2380 if dfh:
2380 2381 dfh.seek(0, os.SEEK_END)
2381 2382
2382 2383 curr = len(self) - 1
2383 2384 if not self._inline:
2384 2385 transaction.add(self._datafile, offset)
2385 2386 transaction.add(self._indexfile, curr * len(entry))
2386 2387 if data[0]:
2387 2388 dfh.write(data[0])
2388 2389 dfh.write(data[1])
2389 2390 if sidedata:
2390 2391 dfh.write(sidedata)
2391 2392 ifh.write(entry)
2392 2393 else:
2393 2394 offset += curr * self.index.entry_size
2394 2395 transaction.add(self._indexfile, offset)
2395 2396 ifh.write(entry)
2396 2397 ifh.write(data[0])
2397 2398 ifh.write(data[1])
2398 2399 if sidedata:
2399 2400 ifh.write(sidedata)
2400 2401 self._enforceinlinesize(transaction, ifh)
2401 2402 nodemaputil.setup_persistent_nodemap(transaction, self)
2402 2403
2403 2404 def addgroup(
2404 2405 self,
2405 2406 deltas,
2406 2407 linkmapper,
2407 2408 transaction,
2408 2409 alwayscache=False,
2409 2410 addrevisioncb=None,
2410 2411 duplicaterevisioncb=None,
2411 2412 ):
2412 2413 """
2413 2414 add a delta group
2414 2415
2415 2416 given a set of deltas, add them to the revision log. the
2416 2417 first delta is against its parent, which should be in our
2417 2418 log, the rest are against the previous delta.
2418 2419
2419 2420 If ``addrevisioncb`` is defined, it will be called with arguments of
2420 2421 this revlog and the node that was added.
2421 2422 """
2422 2423
2423 2424 if self._writinghandles:
2424 2425 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2425 2426
2426 2427 r = len(self)
2427 2428 end = 0
2428 2429 if r:
2429 2430 end = self.end(r - 1)
2430 2431 ifh = self._indexfp(b"a+")
2431 2432 isize = r * self.index.entry_size
2432 2433 if self._inline:
2433 2434 transaction.add(self._indexfile, end + isize)
2434 2435 dfh = None
2435 2436 else:
2436 2437 transaction.add(self._indexfile, isize)
2437 2438 transaction.add(self._datafile, end)
2438 2439 dfh = self._datafp(b"a+")
2439 2440
2440 2441 self._writinghandles = (ifh, dfh)
2441 2442 empty = True
2442 2443
2443 2444 try:
2444 2445 deltacomputer = deltautil.deltacomputer(self)
2445 2446 # loop through our set of deltas
2446 2447 for data in deltas:
2447 2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2448 2449 link = linkmapper(linknode)
2449 2450 flags = flags or REVIDX_DEFAULT_FLAGS
2450 2451
2451 2452 rev = self.index.get_rev(node)
2452 2453 if rev is not None:
2453 2454 # this can happen if two branches make the same change
2454 2455 self._nodeduplicatecallback(transaction, rev)
2455 2456 if duplicaterevisioncb:
2456 2457 duplicaterevisioncb(self, rev)
2457 2458 empty = False
2458 2459 continue
2459 2460
2460 2461 for p in (p1, p2):
2461 2462 if not self.index.has_node(p):
2462 2463 raise error.LookupError(
2463 2464 p, self.radix, _(b'unknown parent')
2464 2465 )
2465 2466
2466 2467 if not self.index.has_node(deltabase):
2467 2468 raise error.LookupError(
2468 2469 deltabase, self.display_id, _(b'unknown delta base')
2469 2470 )
2470 2471
2471 2472 baserev = self.rev(deltabase)
2472 2473
2473 2474 if baserev != nullrev and self.iscensored(baserev):
2474 2475 # if base is censored, delta must be full replacement in a
2475 2476 # single patch operation
2476 2477 hlen = struct.calcsize(b">lll")
2477 2478 oldlen = self.rawsize(baserev)
2478 2479 newlen = len(delta) - hlen
2479 2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2480 2481 raise error.CensoredBaseError(
2481 2482 self.display_id, self.node(baserev)
2482 2483 )
2483 2484
2484 2485 if not flags and self._peek_iscensored(baserev, delta):
2485 2486 flags |= REVIDX_ISCENSORED
2486 2487
2487 2488 # We assume consumers of addrevisioncb will want to retrieve
2488 2489 # the added revision, which will require a call to
2489 2490 # revision(). revision() will fast path if there is a cache
2490 2491 # hit. So, we tell _addrevision() to always cache in this case.
2491 2492 # We're only using addgroup() in the context of changegroup
2492 2493 # generation so the revision data can always be handled as raw
2493 2494 # by the flagprocessor.
2494 2495 rev = self._addrevision(
2495 2496 node,
2496 2497 None,
2497 2498 transaction,
2498 2499 link,
2499 2500 p1,
2500 2501 p2,
2501 2502 flags,
2502 2503 (baserev, delta),
2503 2504 ifh,
2504 2505 dfh,
2505 2506 alwayscache=alwayscache,
2506 2507 deltacomputer=deltacomputer,
2507 2508 sidedata=sidedata,
2508 2509 )
2509 2510
2510 2511 if addrevisioncb:
2511 2512 addrevisioncb(self, rev)
2512 2513 empty = False
2513 2514
2514 2515 if not dfh and not self._inline:
2515 2516 # addrevision switched from inline to conventional
2516 2517 # reopen the index
2517 2518 ifh.close()
2518 2519 dfh = self._datafp(b"a+")
2519 2520 ifh = self._indexfp(b"a+")
2520 2521 self._writinghandles = (ifh, dfh)
2521 2522 finally:
2522 2523 self._writinghandles = None
2523 2524
2524 2525 if dfh:
2525 2526 dfh.close()
2526 2527 ifh.close()
2527 2528 return not empty
2528 2529
2529 2530 def iscensored(self, rev):
2530 2531 """Check if a file revision is censored."""
2531 2532 if not self._censorable:
2532 2533 return False
2533 2534
2534 2535 return self.flags(rev) & REVIDX_ISCENSORED
2535 2536
2536 2537 def _peek_iscensored(self, baserev, delta):
2537 2538 """Quickly check if a delta produces a censored revision."""
2538 2539 if not self._censorable:
2539 2540 return False
2540 2541
2541 2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2542 2543
2543 2544 def getstrippoint(self, minlink):
2544 2545 """find the minimum rev that must be stripped to strip the linkrev
2545 2546
2546 2547 Returns a tuple containing the minimum rev and a set of all revs that
2547 2548 have linkrevs that will be broken by this strip.
2548 2549 """
2549 2550 return storageutil.resolvestripinfo(
2550 2551 minlink,
2551 2552 len(self) - 1,
2552 2553 self.headrevs(),
2553 2554 self.linkrev,
2554 2555 self.parentrevs,
2555 2556 )
2556 2557
2557 2558 def strip(self, minlink, transaction):
2558 2559 """truncate the revlog on the first revision with a linkrev >= minlink
2559 2560
2560 2561 This function is called when we're stripping revision minlink and
2561 2562 its descendants from the repository.
2562 2563
2563 2564 We have to remove all revisions with linkrev >= minlink, because
2564 2565 the equivalent changelog revisions will be renumbered after the
2565 2566 strip.
2566 2567
2567 2568 So we truncate the revlog on the first of these revisions, and
2568 2569 trust that the caller has saved the revisions that shouldn't be
2569 2570 removed and that it'll re-add them after this truncation.
2570 2571 """
2571 2572 if len(self) == 0:
2572 2573 return
2573 2574
2574 2575 rev, _ = self.getstrippoint(minlink)
2575 2576 if rev == len(self):
2576 2577 return
2577 2578
2578 2579 # first truncate the files on disk
2579 2580 end = self.start(rev)
2580 2581 if not self._inline:
2581 2582 transaction.add(self._datafile, end)
2582 2583 end = rev * self.index.entry_size
2583 2584 else:
2584 2585 end += rev * self.index.entry_size
2585 2586
2586 2587 transaction.add(self._indexfile, end)
2587 2588
2588 2589 # then reset internal state in memory to forget those revisions
2589 2590 self._revisioncache = None
2590 2591 self._chaininfocache = util.lrucachedict(500)
2591 2592 self._chunkclear()
2592 2593
2593 2594 del self.index[rev:-1]
2594 2595
2595 2596 def checksize(self):
2596 2597 """Check size of index and data files
2597 2598
2598 2599 return a (dd, di) tuple.
2599 2600 - dd: extra bytes for the "data" file
2600 2601 - di: extra bytes for the "index" file
2601 2602
2602 2603 A healthy revlog will return (0, 0).
2603 2604 """
2604 2605 expected = 0
2605 2606 if len(self):
2606 2607 expected = max(0, self.end(len(self) - 1))
2607 2608
2608 2609 try:
2609 2610 with self._datafp() as f:
2610 2611 f.seek(0, io.SEEK_END)
2611 2612 actual = f.tell()
2612 2613 dd = actual - expected
2613 2614 except IOError as inst:
2614 2615 if inst.errno != errno.ENOENT:
2615 2616 raise
2616 2617 dd = 0
2617 2618
2618 2619 try:
2619 2620 f = self.opener(self._indexfile)
2620 2621 f.seek(0, io.SEEK_END)
2621 2622 actual = f.tell()
2622 2623 f.close()
2623 2624 s = self.index.entry_size
2624 2625 i = max(0, actual // s)
2625 2626 di = actual - (i * s)
2626 2627 if self._inline:
2627 2628 databytes = 0
2628 2629 for r in self:
2629 2630 databytes += max(0, self.length(r))
2630 2631 dd = 0
2631 2632 di = actual - len(self) * s - databytes
2632 2633 except IOError as inst:
2633 2634 if inst.errno != errno.ENOENT:
2634 2635 raise
2635 2636 di = 0
2636 2637
2637 2638 return (dd, di)
2638 2639
2639 2640 def files(self):
2640 2641 res = [self._indexfile]
2641 2642 if not self._inline:
2642 2643 res.append(self._datafile)
2643 2644 return res
2644 2645
2645 2646 def emitrevisions(
2646 2647 self,
2647 2648 nodes,
2648 2649 nodesorder=None,
2649 2650 revisiondata=False,
2650 2651 assumehaveparentrevisions=False,
2651 2652 deltamode=repository.CG_DELTAMODE_STD,
2652 2653 sidedata_helpers=None,
2653 2654 ):
2654 2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2655 2656 raise error.ProgrammingError(
2656 2657 b'unhandled value for nodesorder: %s' % nodesorder
2657 2658 )
2658 2659
2659 2660 if nodesorder is None and not self._generaldelta:
2660 2661 nodesorder = b'storage'
2661 2662
2662 2663 if (
2663 2664 not self._storedeltachains
2664 2665 and deltamode != repository.CG_DELTAMODE_PREV
2665 2666 ):
2666 2667 deltamode = repository.CG_DELTAMODE_FULL
2667 2668
2668 2669 return storageutil.emitrevisions(
2669 2670 self,
2670 2671 nodes,
2671 2672 nodesorder,
2672 2673 revlogrevisiondelta,
2673 2674 deltaparentfn=self.deltaparent,
2674 2675 candeltafn=self.candelta,
2675 2676 rawsizefn=self.rawsize,
2676 2677 revdifffn=self.revdiff,
2677 2678 flagsfn=self.flags,
2678 2679 deltamode=deltamode,
2679 2680 revisiondata=revisiondata,
2680 2681 assumehaveparentrevisions=assumehaveparentrevisions,
2681 2682 sidedata_helpers=sidedata_helpers,
2682 2683 )
2683 2684
2684 2685 DELTAREUSEALWAYS = b'always'
2685 2686 DELTAREUSESAMEREVS = b'samerevs'
2686 2687 DELTAREUSENEVER = b'never'
2687 2688
2688 2689 DELTAREUSEFULLADD = b'fulladd'
2689 2690
2690 2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2691 2692
2692 2693 def clone(
2693 2694 self,
2694 2695 tr,
2695 2696 destrevlog,
2696 2697 addrevisioncb=None,
2697 2698 deltareuse=DELTAREUSESAMEREVS,
2698 2699 forcedeltabothparents=None,
2699 2700 sidedata_helpers=None,
2700 2701 ):
2701 2702 """Copy this revlog to another, possibly with format changes.
2702 2703
2703 2704 The destination revlog will contain the same revisions and nodes.
2704 2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2705 2706 differences.
2706 2707
2707 2708 The ``deltareuse`` argument control how deltas from the existing revlog
2708 2709 are preserved in the destination revlog. The argument can have the
2709 2710 following values:
2710 2711
2711 2712 DELTAREUSEALWAYS
2712 2713 Deltas will always be reused (if possible), even if the destination
2713 2714 revlog would not select the same revisions for the delta. This is the
2714 2715 fastest mode of operation.
2715 2716 DELTAREUSESAMEREVS
2716 2717 Deltas will be reused if the destination revlog would pick the same
2717 2718 revisions for the delta. This mode strikes a balance between speed
2718 2719 and optimization.
2719 2720 DELTAREUSENEVER
2720 2721 Deltas will never be reused. This is the slowest mode of execution.
2721 2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2722 2723 algorithm changes).
2723 2724 DELTAREUSEFULLADD
2724 2725 Revision will be re-added as if their were new content. This is
2725 2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2726 2727 eg: large file detection and handling.
2727 2728
2728 2729 Delta computation can be slow, so the choice of delta reuse policy can
2729 2730 significantly affect run time.
2730 2731
2731 2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2732 2733 two extremes. Deltas will be reused if they are appropriate. But if the
2733 2734 delta could choose a better revision, it will do so. This means if you
2734 2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2735 2736 deltas will be recomputed if the delta's parent isn't a parent of the
2736 2737 revision.
2737 2738
2738 2739 In addition to the delta policy, the ``forcedeltabothparents``
2739 2740 argument controls whether to force compute deltas against both parents
2740 2741 for merges. By default, the current default is used.
2741 2742
2742 2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2743 2744 `sidedata_helpers`.
2744 2745 """
2745 2746 if deltareuse not in self.DELTAREUSEALL:
2746 2747 raise ValueError(
2747 2748 _(b'value for deltareuse invalid: %s') % deltareuse
2748 2749 )
2749 2750
2750 2751 if len(destrevlog):
2751 2752 raise ValueError(_(b'destination revlog is not empty'))
2752 2753
2753 2754 if getattr(self, 'filteredrevs', None):
2754 2755 raise ValueError(_(b'source revlog has filtered revisions'))
2755 2756 if getattr(destrevlog, 'filteredrevs', None):
2756 2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2757 2758
2758 2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2759 2760 # if possible.
2760 2761 oldlazydelta = destrevlog._lazydelta
2761 2762 oldlazydeltabase = destrevlog._lazydeltabase
2762 2763 oldamd = destrevlog._deltabothparents
2763 2764
2764 2765 try:
2765 2766 if deltareuse == self.DELTAREUSEALWAYS:
2766 2767 destrevlog._lazydeltabase = True
2767 2768 destrevlog._lazydelta = True
2768 2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2769 2770 destrevlog._lazydeltabase = False
2770 2771 destrevlog._lazydelta = True
2771 2772 elif deltareuse == self.DELTAREUSENEVER:
2772 2773 destrevlog._lazydeltabase = False
2773 2774 destrevlog._lazydelta = False
2774 2775
2775 2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2776 2777
2777 2778 self._clone(
2778 2779 tr,
2779 2780 destrevlog,
2780 2781 addrevisioncb,
2781 2782 deltareuse,
2782 2783 forcedeltabothparents,
2783 2784 sidedata_helpers,
2784 2785 )
2785 2786
2786 2787 finally:
2787 2788 destrevlog._lazydelta = oldlazydelta
2788 2789 destrevlog._lazydeltabase = oldlazydeltabase
2789 2790 destrevlog._deltabothparents = oldamd
2790 2791
2791 2792 def _clone(
2792 2793 self,
2793 2794 tr,
2794 2795 destrevlog,
2795 2796 addrevisioncb,
2796 2797 deltareuse,
2797 2798 forcedeltabothparents,
2798 2799 sidedata_helpers,
2799 2800 ):
2800 2801 """perform the core duty of `revlog.clone` after parameter processing"""
2801 2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2802 2803 index = self.index
2803 2804 for rev in self:
2804 2805 entry = index[rev]
2805 2806
2806 2807 # Some classes override linkrev to take filtered revs into
2807 2808 # account. Use raw entry from index.
2808 2809 flags = entry[0] & 0xFFFF
2809 2810 linkrev = entry[4]
2810 2811 p1 = index[entry[5]][7]
2811 2812 p2 = index[entry[6]][7]
2812 2813 node = entry[7]
2813 2814
2814 2815 # (Possibly) reuse the delta from the revlog if allowed and
2815 2816 # the revlog chunk is a delta.
2816 2817 cachedelta = None
2817 2818 rawtext = None
2818 2819 if deltareuse == self.DELTAREUSEFULLADD:
2819 2820 text, sidedata = self._revisiondata(rev)
2820 2821
2821 2822 if sidedata_helpers is not None:
2822 2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2823 2824 self, sidedata_helpers, sidedata, rev
2824 2825 )
2825 2826 flags = flags | new_flags[0] & ~new_flags[1]
2826 2827
2827 2828 destrevlog.addrevision(
2828 2829 text,
2829 2830 tr,
2830 2831 linkrev,
2831 2832 p1,
2832 2833 p2,
2833 2834 cachedelta=cachedelta,
2834 2835 node=node,
2835 2836 flags=flags,
2836 2837 deltacomputer=deltacomputer,
2837 2838 sidedata=sidedata,
2838 2839 )
2839 2840 else:
2840 2841 if destrevlog._lazydelta:
2841 2842 dp = self.deltaparent(rev)
2842 2843 if dp != nullrev:
2843 2844 cachedelta = (dp, bytes(self._chunk(rev)))
2844 2845
2845 2846 sidedata = None
2846 2847 if not cachedelta:
2847 2848 rawtext, sidedata = self._revisiondata(rev)
2848 2849 if sidedata is None:
2849 2850 sidedata = self.sidedata(rev)
2850 2851
2851 2852 if sidedata_helpers is not None:
2852 2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2853 2854 self, sidedata_helpers, sidedata, rev
2854 2855 )
2855 2856 flags = flags | new_flags[0] & ~new_flags[1]
2856 2857
2857 2858 ifh = destrevlog.opener(
2858 2859 destrevlog._indexfile, b'a+', checkambig=False
2859 2860 )
2860 2861 dfh = None
2861 2862 if not destrevlog._inline:
2862 2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2863 2864 try:
2864 2865 destrevlog._addrevision(
2865 2866 node,
2866 2867 rawtext,
2867 2868 tr,
2868 2869 linkrev,
2869 2870 p1,
2870 2871 p2,
2871 2872 flags,
2872 2873 cachedelta,
2873 2874 ifh,
2874 2875 dfh,
2875 2876 deltacomputer=deltacomputer,
2876 2877 sidedata=sidedata,
2877 2878 )
2878 2879 finally:
2879 2880 if dfh:
2880 2881 dfh.close()
2881 2882 ifh.close()
2882 2883
2883 2884 if addrevisioncb:
2884 2885 addrevisioncb(self, rev, node)
2885 2886
2886 2887 def censorrevision(self, tr, censornode, tombstone=b''):
2887 2888 if self._format_version == REVLOGV0:
2888 2889 raise error.RevlogError(
2889 2890 _(b'cannot censor with version %d revlogs')
2890 2891 % self._format_version
2891 2892 )
2892 2893
2893 2894 censorrev = self.rev(censornode)
2894 2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2895 2896
2896 2897 if len(tombstone) > self.rawsize(censorrev):
2897 2898 raise error.Abort(
2898 2899 _(b'censor tombstone must be no longer than censored data')
2899 2900 )
2900 2901
2901 2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2902 2903 # to create a new revlog, copy all revisions to it, then replace the
2903 2904 # revlogs on transaction close.
2904 2905 #
2905 2906 # This is a bit dangerous. We could easily have a mismatch of state.
2906 2907 newrl = revlog(
2907 2908 self.opener,
2908 2909 target=self.target,
2909 2910 radix=self.radix,
2910 2911 postfix=b'tmpcensored',
2911 2912 censorable=True,
2912 2913 )
2913 2914 newrl._format_version = self._format_version
2914 2915 newrl._format_flags = self._format_flags
2915 2916 newrl._generaldelta = self._generaldelta
2916 2917 newrl._parse_index = self._parse_index
2917 2918
2918 2919 for rev in self.revs():
2919 2920 node = self.node(rev)
2920 2921 p1, p2 = self.parents(node)
2921 2922
2922 2923 if rev == censorrev:
2923 2924 newrl.addrawrevision(
2924 2925 tombstone,
2925 2926 tr,
2926 2927 self.linkrev(censorrev),
2927 2928 p1,
2928 2929 p2,
2929 2930 censornode,
2930 2931 REVIDX_ISCENSORED,
2931 2932 )
2932 2933
2933 2934 if newrl.deltaparent(rev) != nullrev:
2934 2935 raise error.Abort(
2935 2936 _(
2936 2937 b'censored revision stored as delta; '
2937 2938 b'cannot censor'
2938 2939 ),
2939 2940 hint=_(
2940 2941 b'censoring of revlogs is not '
2941 2942 b'fully implemented; please report '
2942 2943 b'this bug'
2943 2944 ),
2944 2945 )
2945 2946 continue
2946 2947
2947 2948 if self.iscensored(rev):
2948 2949 if self.deltaparent(rev) != nullrev:
2949 2950 raise error.Abort(
2950 2951 _(
2951 2952 b'cannot censor due to censored '
2952 2953 b'revision having delta stored'
2953 2954 )
2954 2955 )
2955 2956 rawtext = self._chunk(rev)
2956 2957 else:
2957 2958 rawtext = self.rawdata(rev)
2958 2959
2959 2960 newrl.addrawrevision(
2960 2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2961 2962 )
2962 2963
2963 2964 tr.addbackup(self._indexfile, location=b'store')
2964 2965 if not self._inline:
2965 2966 tr.addbackup(self._datafile, location=b'store')
2966 2967
2967 2968 self.opener.rename(newrl._indexfile, self._indexfile)
2968 2969 if not self._inline:
2969 2970 self.opener.rename(newrl._datafile, self._datafile)
2970 2971
2971 2972 self.clearcaches()
2972 2973 self._loadindex()
2973 2974
2974 2975 def verifyintegrity(self, state):
2975 2976 """Verifies the integrity of the revlog.
2976 2977
2977 2978 Yields ``revlogproblem`` instances describing problems that are
2978 2979 found.
2979 2980 """
2980 2981 dd, di = self.checksize()
2981 2982 if dd:
2982 2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2983 2984 if di:
2984 2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2985 2986
2986 2987 version = self._format_version
2987 2988
2988 2989 # The verifier tells us what version revlog we should be.
2989 2990 if version != state[b'expectedversion']:
2990 2991 yield revlogproblem(
2991 2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2992 2993 % (self.display_id, version, state[b'expectedversion'])
2993 2994 )
2994 2995
2995 2996 state[b'skipread'] = set()
2996 2997 state[b'safe_renamed'] = set()
2997 2998
2998 2999 for rev in self:
2999 3000 node = self.node(rev)
3000 3001
3001 3002 # Verify contents. 4 cases to care about:
3002 3003 #
3003 3004 # common: the most common case
3004 3005 # rename: with a rename
3005 3006 # meta: file content starts with b'\1\n', the metadata
3006 3007 # header defined in filelog.py, but without a rename
3007 3008 # ext: content stored externally
3008 3009 #
3009 3010 # More formally, their differences are shown below:
3010 3011 #
3011 3012 # | common | rename | meta | ext
3012 3013 # -------------------------------------------------------
3013 3014 # flags() | 0 | 0 | 0 | not 0
3014 3015 # renamed() | False | True | False | ?
3015 3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3016 3017 #
3017 3018 # "rawtext" means the raw text stored in revlog data, which
3018 3019 # could be retrieved by "rawdata(rev)". "text"
3019 3020 # mentioned below is "revision(rev)".
3020 3021 #
3021 3022 # There are 3 different lengths stored physically:
3022 3023 # 1. L1: rawsize, stored in revlog index
3023 3024 # 2. L2: len(rawtext), stored in revlog data
3024 3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3025 3026 # possibly somewhere else if flags!=0
3026 3027 #
3027 3028 # L1 should be equal to L2. L3 could be different from them.
3028 3029 # "text" may or may not affect commit hash depending on flag
3029 3030 # processors (see flagutil.addflagprocessor).
3030 3031 #
3031 3032 # | common | rename | meta | ext
3032 3033 # -------------------------------------------------
3033 3034 # rawsize() | L1 | L1 | L1 | L1
3034 3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3035 3036 # len(rawtext) | L2 | L2 | L2 | L2
3036 3037 # len(text) | L2 | L2 | L2 | L3
3037 3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3038 3039 #
3039 3040 # LM: length of metadata, depending on rawtext
3040 3041 # (*): not ideal, see comment in filelog.size
3041 3042 # (?): could be "- len(meta)" if the resolved content has
3042 3043 # rename metadata
3043 3044 #
3044 3045 # Checks needed to be done:
3045 3046 # 1. length check: L1 == L2, in all cases.
3046 3047 # 2. hash check: depending on flag processor, we may need to
3047 3048 # use either "text" (external), or "rawtext" (in revlog).
3048 3049
3049 3050 try:
3050 3051 skipflags = state.get(b'skipflags', 0)
3051 3052 if skipflags:
3052 3053 skipflags &= self.flags(rev)
3053 3054
3054 3055 _verify_revision(self, skipflags, state, node)
3055 3056
3056 3057 l1 = self.rawsize(rev)
3057 3058 l2 = len(self.rawdata(node))
3058 3059
3059 3060 if l1 != l2:
3060 3061 yield revlogproblem(
3061 3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3062 3063 node=node,
3063 3064 )
3064 3065
3065 3066 except error.CensoredNodeError:
3066 3067 if state[b'erroroncensored']:
3067 3068 yield revlogproblem(
3068 3069 error=_(b'censored file data'), node=node
3069 3070 )
3070 3071 state[b'skipread'].add(node)
3071 3072 except Exception as e:
3072 3073 yield revlogproblem(
3073 3074 error=_(b'unpacking %s: %s')
3074 3075 % (short(node), stringutil.forcebytestr(e)),
3075 3076 node=node,
3076 3077 )
3077 3078 state[b'skipread'].add(node)
3078 3079
3079 3080 def storageinfo(
3080 3081 self,
3081 3082 exclusivefiles=False,
3082 3083 sharedfiles=False,
3083 3084 revisionscount=False,
3084 3085 trackedsize=False,
3085 3086 storedsize=False,
3086 3087 ):
3087 3088 d = {}
3088 3089
3089 3090 if exclusivefiles:
3090 3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3091 3092 if not self._inline:
3092 3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3093 3094
3094 3095 if sharedfiles:
3095 3096 d[b'sharedfiles'] = []
3096 3097
3097 3098 if revisionscount:
3098 3099 d[b'revisionscount'] = len(self)
3099 3100
3100 3101 if trackedsize:
3101 3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3102 3103
3103 3104 if storedsize:
3104 3105 d[b'storedsize'] = sum(
3105 3106 self.opener.stat(path).st_size for path in self.files()
3106 3107 )
3107 3108
3108 3109 return d
3109 3110
3110 3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3111 3112 if not self.hassidedata:
3112 3113 return
3113 3114 # inline are not yet supported because they suffer from an issue when
3114 3115 # rewriting them (since it's not an append-only operation).
3115 3116 # See issue6485.
3116 3117 assert not self._inline
3117 3118 if not helpers[1] and not helpers[2]:
3118 3119 # Nothing to generate or remove
3119 3120 return
3120 3121
3121 3122 # changelog implement some "delayed" writing mechanism that assume that
3122 3123 # all index data is writen in append mode and is therefor incompatible
3123 3124 # with the seeked write done in this method. The use of such "delayed"
3124 3125 # writing will soon be removed for revlog version that support side
3125 3126 # data, so for now, we only keep this simple assert to highlight the
3126 3127 # situation.
3127 3128 delayed = getattr(self, '_delayed', False)
3128 3129 diverted = getattr(self, '_divert', False)
3129 3130 if delayed and not diverted:
3130 3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3131 3132 raise error.ProgrammingError(msg)
3132 3133
3133 3134 new_entries = []
3134 3135 # append the new sidedata
3135 3136 with self._datafp(b'a+') as fp:
3136 3137 # Maybe this bug still exists, see revlog._writeentry
3137 3138 fp.seek(0, os.SEEK_END)
3138 3139 current_offset = fp.tell()
3139 3140 for rev in range(startrev, endrev + 1):
3140 3141 entry = self.index[rev]
3141 3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3142 3143 store=self,
3143 3144 sidedata_helpers=helpers,
3144 3145 sidedata={},
3145 3146 rev=rev,
3146 3147 )
3147 3148
3148 3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3149 3150 new_sidedata
3150 3151 )
3151 3152 if entry[8] != 0 or entry[9] != 0:
3152 3153 # rewriting entries that already have sidedata is not
3153 3154 # supported yet, because it introduces garbage data in the
3154 3155 # revlog.
3155 3156 msg = b"Rewriting existing sidedata is not supported yet"
3156 3157 raise error.Abort(msg)
3157 3158
3158 3159 # Apply (potential) flags to add and to remove after running
3159 3160 # the sidedata helpers
3160 3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3161 3162 entry = (new_offset_flags,) + entry[1:8]
3162 3163 entry += (current_offset, len(serialized_sidedata))
3163 3164
3164 3165 fp.write(serialized_sidedata)
3165 3166 new_entries.append(entry)
3166 3167 current_offset += len(serialized_sidedata)
3167 3168
3168 3169 # rewrite the new index entries
3169 3170 with self._indexfp(b'r+') as fp:
3170 3171 fp.seek(startrev * self.index.entry_size)
3171 3172 for i, e in enumerate(new_entries):
3172 3173 rev = startrev + i
3173 3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3174 3175 packed = self.index.entry_binary(rev)
3175 3176 if rev == 0:
3176 3177 header = self._format_flags | self._format_version
3177 3178 header = self.index.pack_header(header)
3178 3179 packed = header + packed
3179 3180 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now