##// END OF EJS Templates
revlog: directly use self._format_flags when loading index...
marmoute -
r47945:58ef549a default
parent child Browse files
Show More
@@ -1,3177 +1,3180 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 self._indexfile = None
319 319 self._datafile = None
320 320 self._nodemap_file = None
321 321 self.postfix = postfix
322 322 self.opener = opener
323 323 if persistentnodemap:
324 324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 325
326 326 assert target[0] in ALL_KINDS
327 327 assert len(target) == 2
328 328 self.target = target
329 329 # When True, indexfile is opened with checkambig=True at writing, to
330 330 # avoid file stat ambiguity.
331 331 self._checkambig = checkambig
332 332 self._mmaplargeindex = mmaplargeindex
333 333 self._censorable = censorable
334 334 # 3-tuple of (node, rev, text) for a raw revision.
335 335 self._revisioncache = None
336 336 # Maps rev to chain base rev.
337 337 self._chainbasecache = util.lrucachedict(100)
338 338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 339 self._chunkcache = (0, b'')
340 340 # How much data to read and cache into the raw revlog data cache.
341 341 self._chunkcachesize = 65536
342 342 self._maxchainlen = None
343 343 self._deltabothparents = True
344 344 self.index = None
345 345 self._nodemap_docket = None
346 346 # Mapping of partial identifiers to full nodes.
347 347 self._pcache = {}
348 348 # Mapping of revision integer to full node.
349 349 self._compengine = b'zlib'
350 350 self._compengineopts = {}
351 351 self._maxdeltachainspan = -1
352 352 self._withsparseread = False
353 353 self._sparserevlog = False
354 354 self._srdensitythreshold = 0.50
355 355 self._srmingapsize = 262144
356 356
357 357 # Make copy of flag processors so each revlog instance can support
358 358 # custom flags.
359 359 self._flagprocessors = dict(flagutil.flagprocessors)
360 360
361 361 # 2-tuple of file handles being used for active writing.
362 362 self._writinghandles = None
363 363
364 364 self._loadindex()
365 365
366 366 self._concurrencychecker = concurrencychecker
367 367
368 368 def _init_opts(self):
369 369 """process options (from above/config) to setup associated default revlog mode
370 370
371 371 These values might be affected when actually reading on disk information.
372 372
373 373 The relevant values are returned for use in _loadindex().
374 374
375 375 * newversionflags:
376 376 version header to use if we need to create a new revlog
377 377
378 378 * mmapindexthreshold:
379 379 minimal index size for start to use mmap
380 380
381 381 * force_nodemap:
382 382 force the usage of a "development" version of the nodemap code
383 383 """
384 384 mmapindexthreshold = None
385 385 opts = self.opener.options
386 386
387 387 if b'revlogv2' in opts:
388 388 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 389 elif b'revlogv1' in opts:
390 390 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 391 if b'generaldelta' in opts:
392 392 new_header |= FLAG_GENERALDELTA
393 393 elif b'revlogv0' in self.opener.options:
394 394 new_header = REVLOGV0
395 395 else:
396 396 new_header = REVLOG_DEFAULT_VERSION
397 397
398 398 if b'chunkcachesize' in opts:
399 399 self._chunkcachesize = opts[b'chunkcachesize']
400 400 if b'maxchainlen' in opts:
401 401 self._maxchainlen = opts[b'maxchainlen']
402 402 if b'deltabothparents' in opts:
403 403 self._deltabothparents = opts[b'deltabothparents']
404 404 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 405 self._lazydeltabase = False
406 406 if self._lazydelta:
407 407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 408 if b'compengine' in opts:
409 409 self._compengine = opts[b'compengine']
410 410 if b'zlib.level' in opts:
411 411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 412 if b'zstd.level' in opts:
413 413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 414 if b'maxdeltachainspan' in opts:
415 415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 417 mmapindexthreshold = opts[b'mmapindexthreshold']
418 418 self.hassidedata = bool(opts.get(b'side-data', False))
419 419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 420 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 421 # sparse-revlog forces sparse-read
422 422 self._withsparseread = self._sparserevlog or withsparseread
423 423 if b'sparse-read-density-threshold' in opts:
424 424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 425 if b'sparse-read-min-gap-size' in opts:
426 426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 427 if opts.get(b'enableellipsis'):
428 428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429 429
430 430 # revlog v0 doesn't have flag processors
431 431 for flag, processor in pycompat.iteritems(
432 432 opts.get(b'flagprocessors', {})
433 433 ):
434 434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435 435
436 436 if self._chunkcachesize <= 0:
437 437 raise error.RevlogError(
438 438 _(b'revlog chunk cache size %r is not greater than 0')
439 439 % self._chunkcachesize
440 440 )
441 441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 442 raise error.RevlogError(
443 443 _(b'revlog chunk cache size %r is not a power of 2')
444 444 % self._chunkcachesize
445 445 )
446 446 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 447 return new_header, mmapindexthreshold, force_nodemap
448 448
449 449 def _get_data(self, filepath, mmap_threshold):
450 450 """return a file content with or without mmap
451 451
452 452 If the file is missing return the empty string"""
453 453 try:
454 454 with self.opener(filepath) as fp:
455 455 if mmap_threshold is not None:
456 456 file_size = self.opener.fstat(fp).st_size
457 457 if file_size >= mmap_threshold:
458 458 # TODO: should .close() to release resources without
459 459 # relying on Python GC
460 460 return util.buffer(util.mmapread(fp))
461 461 return fp.read()
462 462 except IOError as inst:
463 463 if inst.errno != errno.ENOENT:
464 464 raise
465 465 return b''
466 466
467 467 def _loadindex(self):
468 468
469 469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470 470
471 471 if self.postfix is None:
472 472 index_file = b'%s.i' % self.radix
473 473 data_file = b'%s.d' % self.radix
474 474 elif self.postfix == b'a':
475 475 index_file = b'%s.i.a' % self.radix
476 476 data_file = b'%s.d' % self.radix
477 477 else:
478 478 index_file = b'%s.i.%s' % (self.radix, self.postfix)
479 479 data_file = b'%s.d.%s' % (self.radix, self.postfix)
480 480
481 481 self._indexfile = index_file
482 482 self._datafile = data_file
483 483
484 484 indexdata = b''
485 485 self._initempty = True
486 486 indexdata = self._get_data(self._indexfile, mmapindexthreshold)
487 487 if len(indexdata) > 0:
488 488 header = INDEX_HEADER.unpack(indexdata[:4])[0]
489 489 self._initempty = False
490 490 else:
491 491 header = new_header
492 492
493 flags = self._format_flags = header & ~0xFFFF
493 self._format_flags = header & ~0xFFFF
494 494 self._format_version = header & 0xFFFF
495 495
496 496 if self._format_version == REVLOGV0:
497 if flags:
497 if self._format_flags:
498 498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
499 msg %= (flags >> 16, self._format_version, self.display_id)
499 display_flag = self._format_flags >> 16
500 msg %= (display_flag, self._format_version, self.display_id)
500 501 raise error.RevlogError(msg)
501 502
502 503 self._inline = False
503 504 self._generaldelta = False
504 505
505 506 elif self._format_version == REVLOGV1:
506 if flags & ~REVLOGV1_FLAGS:
507 if self._format_flags & ~REVLOGV1_FLAGS:
507 508 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
508 msg %= (flags >> 16, self._format_version, self.display_id)
509 display_flag = self._format_flags >> 16
510 msg %= (display_flag, self._format_version, self.display_id)
509 511 raise error.RevlogError(msg)
510 512
511 513 self._inline = self._format_flags & FLAG_INLINE_DATA
512 514 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
513 515
514 516 elif self._format_version == REVLOGV2:
515 if flags & ~REVLOGV2_FLAGS:
517 if self._format_flags & ~REVLOGV2_FLAGS:
516 518 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
517 msg %= (flags >> 16, self._format_version, self.display_id)
519 display_flag = self._format_flags >> 16
520 msg %= (display_flag, self._format_version, self.display_id)
518 521 raise error.RevlogError(msg)
519 522
520 523 # There is a bug in the transaction handling when going from an
521 524 # inline revlog to a separate index and data file. Turn it off until
522 525 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
523 526 # See issue6485
524 527 self._inline = False
525 528 # generaldelta implied by version 2 revlogs.
526 529 self._generaldelta = True
527 530
528 531 else:
529 532 msg = _(b'unknown version (%d) in revlog %s')
530 533 msg %= (self._format_version, self.display_id)
531 534 raise error.RevlogError(msg)
532 535
533 536 self.nodeconstants = sha1nodeconstants
534 537 self.nullid = self.nodeconstants.nullid
535 538
536 539 # sparse-revlog can't be on without general-delta (issue6056)
537 540 if not self._generaldelta:
538 541 self._sparserevlog = False
539 542
540 543 self._storedeltachains = True
541 544
542 545 devel_nodemap = (
543 546 self._nodemap_file
544 547 and force_nodemap
545 548 and parse_index_v1_nodemap is not None
546 549 )
547 550
548 551 use_rust_index = False
549 552 if rustrevlog is not None:
550 553 if self._nodemap_file is not None:
551 554 use_rust_index = True
552 555 else:
553 556 use_rust_index = self.opener.options.get(b'rust.index')
554 557
555 558 self._parse_index = parse_index_v1
556 559 if self._format_version == REVLOGV0:
557 560 self._parse_index = revlogv0.parse_index_v0
558 561 elif self._format_version == REVLOGV2:
559 562 self._parse_index = parse_index_v2
560 563 elif devel_nodemap:
561 564 self._parse_index = parse_index_v1_nodemap
562 565 elif use_rust_index:
563 566 self._parse_index = parse_index_v1_mixed
564 567 try:
565 568 d = self._parse_index(indexdata, self._inline)
566 569 index, _chunkcache = d
567 570 use_nodemap = (
568 571 not self._inline
569 572 and self._nodemap_file is not None
570 573 and util.safehasattr(index, 'update_nodemap_data')
571 574 )
572 575 if use_nodemap:
573 576 nodemap_data = nodemaputil.persisted_data(self)
574 577 if nodemap_data is not None:
575 578 docket = nodemap_data[0]
576 579 if (
577 580 len(d[0]) > docket.tip_rev
578 581 and d[0][docket.tip_rev][7] == docket.tip_node
579 582 ):
580 583 # no changelog tampering
581 584 self._nodemap_docket = docket
582 585 index.update_nodemap_data(*nodemap_data)
583 586 except (ValueError, IndexError):
584 587 raise error.RevlogError(
585 588 _(b"index %s is corrupted") % self.display_id
586 589 )
587 590 self.index, self._chunkcache = d
588 591 if not self._chunkcache:
589 592 self._chunkclear()
590 593 # revnum -> (chain-length, sum-delta-length)
591 594 self._chaininfocache = util.lrucachedict(500)
592 595 # revlog header -> revlog compressor
593 596 self._decompressors = {}
594 597
595 598 @util.propertycache
596 599 def revlog_kind(self):
597 600 return self.target[0]
598 601
599 602 @util.propertycache
600 603 def display_id(self):
601 604 """The public facing "ID" of the revlog that we use in message"""
602 605 # Maybe we should build a user facing representation of
603 606 # revlog.target instead of using `self.radix`
604 607 return self.radix
605 608
606 609 @util.propertycache
607 610 def _compressor(self):
608 611 engine = util.compengines[self._compengine]
609 612 return engine.revlogcompressor(self._compengineopts)
610 613
611 614 def _indexfp(self, mode=b'r'):
612 615 """file object for the revlog's index file"""
613 616 args = {'mode': mode}
614 617 if mode != b'r':
615 618 args['checkambig'] = self._checkambig
616 619 if mode == b'w':
617 620 args['atomictemp'] = True
618 621 return self.opener(self._indexfile, **args)
619 622
620 623 def _datafp(self, mode=b'r'):
621 624 """file object for the revlog's data file"""
622 625 return self.opener(self._datafile, mode=mode)
623 626
624 627 @contextlib.contextmanager
625 628 def _datareadfp(self, existingfp=None):
626 629 """file object suitable to read data"""
627 630 # Use explicit file handle, if given.
628 631 if existingfp is not None:
629 632 yield existingfp
630 633
631 634 # Use a file handle being actively used for writes, if available.
632 635 # There is some danger to doing this because reads will seek the
633 636 # file. However, _writeentry() performs a SEEK_END before all writes,
634 637 # so we should be safe.
635 638 elif self._writinghandles:
636 639 if self._inline:
637 640 yield self._writinghandles[0]
638 641 else:
639 642 yield self._writinghandles[1]
640 643
641 644 # Otherwise open a new file handle.
642 645 else:
643 646 if self._inline:
644 647 func = self._indexfp
645 648 else:
646 649 func = self._datafp
647 650 with func() as fp:
648 651 yield fp
649 652
650 653 def tiprev(self):
651 654 return len(self.index) - 1
652 655
653 656 def tip(self):
654 657 return self.node(self.tiprev())
655 658
656 659 def __contains__(self, rev):
657 660 return 0 <= rev < len(self)
658 661
659 662 def __len__(self):
660 663 return len(self.index)
661 664
662 665 def __iter__(self):
663 666 return iter(pycompat.xrange(len(self)))
664 667
665 668 def revs(self, start=0, stop=None):
666 669 """iterate over all rev in this revlog (from start to stop)"""
667 670 return storageutil.iterrevs(len(self), start=start, stop=stop)
668 671
669 672 @property
670 673 def nodemap(self):
671 674 msg = (
672 675 b"revlog.nodemap is deprecated, "
673 676 b"use revlog.index.[has_node|rev|get_rev]"
674 677 )
675 678 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
676 679 return self.index.nodemap
677 680
678 681 @property
679 682 def _nodecache(self):
680 683 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
681 684 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
682 685 return self.index.nodemap
683 686
684 687 def hasnode(self, node):
685 688 try:
686 689 self.rev(node)
687 690 return True
688 691 except KeyError:
689 692 return False
690 693
691 694 def candelta(self, baserev, rev):
692 695 """whether two revisions (baserev, rev) can be delta-ed or not"""
693 696 # Disable delta if either rev requires a content-changing flag
694 697 # processor (ex. LFS). This is because such flag processor can alter
695 698 # the rawtext content that the delta will be based on, and two clients
696 699 # could have a same revlog node with different flags (i.e. different
697 700 # rawtext contents) and the delta could be incompatible.
698 701 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
699 702 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
700 703 ):
701 704 return False
702 705 return True
703 706
704 707 def update_caches(self, transaction):
705 708 if self._nodemap_file is not None:
706 709 if transaction is None:
707 710 nodemaputil.update_persistent_nodemap(self)
708 711 else:
709 712 nodemaputil.setup_persistent_nodemap(transaction, self)
710 713
711 714 def clearcaches(self):
712 715 self._revisioncache = None
713 716 self._chainbasecache.clear()
714 717 self._chunkcache = (0, b'')
715 718 self._pcache = {}
716 719 self._nodemap_docket = None
717 720 self.index.clearcaches()
718 721 # The python code is the one responsible for validating the docket, we
719 722 # end up having to refresh it here.
720 723 use_nodemap = (
721 724 not self._inline
722 725 and self._nodemap_file is not None
723 726 and util.safehasattr(self.index, 'update_nodemap_data')
724 727 )
725 728 if use_nodemap:
726 729 nodemap_data = nodemaputil.persisted_data(self)
727 730 if nodemap_data is not None:
728 731 self._nodemap_docket = nodemap_data[0]
729 732 self.index.update_nodemap_data(*nodemap_data)
730 733
731 734 def rev(self, node):
732 735 try:
733 736 return self.index.rev(node)
734 737 except TypeError:
735 738 raise
736 739 except error.RevlogError:
737 740 # parsers.c radix tree lookup failed
738 741 if (
739 742 node == self.nodeconstants.wdirid
740 743 or node in self.nodeconstants.wdirfilenodeids
741 744 ):
742 745 raise error.WdirUnsupported
743 746 raise error.LookupError(node, self.display_id, _(b'no node'))
744 747
745 748 # Accessors for index entries.
746 749
747 750 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
748 751 # are flags.
749 752 def start(self, rev):
750 753 return int(self.index[rev][0] >> 16)
751 754
752 755 def flags(self, rev):
753 756 return self.index[rev][0] & 0xFFFF
754 757
755 758 def length(self, rev):
756 759 return self.index[rev][1]
757 760
758 761 def sidedata_length(self, rev):
759 762 if not self.hassidedata:
760 763 return 0
761 764 return self.index[rev][9]
762 765
763 766 def rawsize(self, rev):
764 767 """return the length of the uncompressed text for a given revision"""
765 768 l = self.index[rev][2]
766 769 if l >= 0:
767 770 return l
768 771
769 772 t = self.rawdata(rev)
770 773 return len(t)
771 774
772 775 def size(self, rev):
773 776 """length of non-raw text (processed by a "read" flag processor)"""
774 777 # fast path: if no "read" flag processor could change the content,
775 778 # size is rawsize. note: ELLIPSIS is known to not change the content.
776 779 flags = self.flags(rev)
777 780 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
778 781 return self.rawsize(rev)
779 782
780 783 return len(self.revision(rev, raw=False))
781 784
782 785 def chainbase(self, rev):
783 786 base = self._chainbasecache.get(rev)
784 787 if base is not None:
785 788 return base
786 789
787 790 index = self.index
788 791 iterrev = rev
789 792 base = index[iterrev][3]
790 793 while base != iterrev:
791 794 iterrev = base
792 795 base = index[iterrev][3]
793 796
794 797 self._chainbasecache[rev] = base
795 798 return base
796 799
797 800 def linkrev(self, rev):
798 801 return self.index[rev][4]
799 802
800 803 def parentrevs(self, rev):
801 804 try:
802 805 entry = self.index[rev]
803 806 except IndexError:
804 807 if rev == wdirrev:
805 808 raise error.WdirUnsupported
806 809 raise
807 810 if entry[5] == nullrev:
808 811 return entry[6], entry[5]
809 812 else:
810 813 return entry[5], entry[6]
811 814
812 815 # fast parentrevs(rev) where rev isn't filtered
813 816 _uncheckedparentrevs = parentrevs
814 817
815 818 def node(self, rev):
816 819 try:
817 820 return self.index[rev][7]
818 821 except IndexError:
819 822 if rev == wdirrev:
820 823 raise error.WdirUnsupported
821 824 raise
822 825
823 826 # Derived from index values.
824 827
825 828 def end(self, rev):
826 829 return self.start(rev) + self.length(rev)
827 830
828 831 def parents(self, node):
829 832 i = self.index
830 833 d = i[self.rev(node)]
831 834 # inline node() to avoid function call overhead
832 835 if d[5] == self.nullid:
833 836 return i[d[6]][7], i[d[5]][7]
834 837 else:
835 838 return i[d[5]][7], i[d[6]][7]
836 839
837 840 def chainlen(self, rev):
838 841 return self._chaininfo(rev)[0]
839 842
840 843 def _chaininfo(self, rev):
841 844 chaininfocache = self._chaininfocache
842 845 if rev in chaininfocache:
843 846 return chaininfocache[rev]
844 847 index = self.index
845 848 generaldelta = self._generaldelta
846 849 iterrev = rev
847 850 e = index[iterrev]
848 851 clen = 0
849 852 compresseddeltalen = 0
850 853 while iterrev != e[3]:
851 854 clen += 1
852 855 compresseddeltalen += e[1]
853 856 if generaldelta:
854 857 iterrev = e[3]
855 858 else:
856 859 iterrev -= 1
857 860 if iterrev in chaininfocache:
858 861 t = chaininfocache[iterrev]
859 862 clen += t[0]
860 863 compresseddeltalen += t[1]
861 864 break
862 865 e = index[iterrev]
863 866 else:
864 867 # Add text length of base since decompressing that also takes
865 868 # work. For cache hits the length is already included.
866 869 compresseddeltalen += e[1]
867 870 r = (clen, compresseddeltalen)
868 871 chaininfocache[rev] = r
869 872 return r
870 873
871 874 def _deltachain(self, rev, stoprev=None):
872 875 """Obtain the delta chain for a revision.
873 876
874 877 ``stoprev`` specifies a revision to stop at. If not specified, we
875 878 stop at the base of the chain.
876 879
877 880 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
878 881 revs in ascending order and ``stopped`` is a bool indicating whether
879 882 ``stoprev`` was hit.
880 883 """
881 884 # Try C implementation.
882 885 try:
883 886 return self.index.deltachain(rev, stoprev, self._generaldelta)
884 887 except AttributeError:
885 888 pass
886 889
887 890 chain = []
888 891
889 892 # Alias to prevent attribute lookup in tight loop.
890 893 index = self.index
891 894 generaldelta = self._generaldelta
892 895
893 896 iterrev = rev
894 897 e = index[iterrev]
895 898 while iterrev != e[3] and iterrev != stoprev:
896 899 chain.append(iterrev)
897 900 if generaldelta:
898 901 iterrev = e[3]
899 902 else:
900 903 iterrev -= 1
901 904 e = index[iterrev]
902 905
903 906 if iterrev == stoprev:
904 907 stopped = True
905 908 else:
906 909 chain.append(iterrev)
907 910 stopped = False
908 911
909 912 chain.reverse()
910 913 return chain, stopped
911 914
912 915 def ancestors(self, revs, stoprev=0, inclusive=False):
913 916 """Generate the ancestors of 'revs' in reverse revision order.
914 917 Does not generate revs lower than stoprev.
915 918
916 919 See the documentation for ancestor.lazyancestors for more details."""
917 920
918 921 # first, make sure start revisions aren't filtered
919 922 revs = list(revs)
920 923 checkrev = self.node
921 924 for r in revs:
922 925 checkrev(r)
923 926 # and we're sure ancestors aren't filtered as well
924 927
925 928 if rustancestor is not None:
926 929 lazyancestors = rustancestor.LazyAncestors
927 930 arg = self.index
928 931 else:
929 932 lazyancestors = ancestor.lazyancestors
930 933 arg = self._uncheckedparentrevs
931 934 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
932 935
933 936 def descendants(self, revs):
934 937 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
935 938
936 939 def findcommonmissing(self, common=None, heads=None):
937 940 """Return a tuple of the ancestors of common and the ancestors of heads
938 941 that are not ancestors of common. In revset terminology, we return the
939 942 tuple:
940 943
941 944 ::common, (::heads) - (::common)
942 945
943 946 The list is sorted by revision number, meaning it is
944 947 topologically sorted.
945 948
946 949 'heads' and 'common' are both lists of node IDs. If heads is
947 950 not supplied, uses all of the revlog's heads. If common is not
948 951 supplied, uses nullid."""
949 952 if common is None:
950 953 common = [self.nullid]
951 954 if heads is None:
952 955 heads = self.heads()
953 956
954 957 common = [self.rev(n) for n in common]
955 958 heads = [self.rev(n) for n in heads]
956 959
957 960 # we want the ancestors, but inclusive
958 961 class lazyset(object):
959 962 def __init__(self, lazyvalues):
960 963 self.addedvalues = set()
961 964 self.lazyvalues = lazyvalues
962 965
963 966 def __contains__(self, value):
964 967 return value in self.addedvalues or value in self.lazyvalues
965 968
966 969 def __iter__(self):
967 970 added = self.addedvalues
968 971 for r in added:
969 972 yield r
970 973 for r in self.lazyvalues:
971 974 if not r in added:
972 975 yield r
973 976
974 977 def add(self, value):
975 978 self.addedvalues.add(value)
976 979
977 980 def update(self, values):
978 981 self.addedvalues.update(values)
979 982
980 983 has = lazyset(self.ancestors(common))
981 984 has.add(nullrev)
982 985 has.update(common)
983 986
984 987 # take all ancestors from heads that aren't in has
985 988 missing = set()
986 989 visit = collections.deque(r for r in heads if r not in has)
987 990 while visit:
988 991 r = visit.popleft()
989 992 if r in missing:
990 993 continue
991 994 else:
992 995 missing.add(r)
993 996 for p in self.parentrevs(r):
994 997 if p not in has:
995 998 visit.append(p)
996 999 missing = list(missing)
997 1000 missing.sort()
998 1001 return has, [self.node(miss) for miss in missing]
999 1002
1000 1003 def incrementalmissingrevs(self, common=None):
1001 1004 """Return an object that can be used to incrementally compute the
1002 1005 revision numbers of the ancestors of arbitrary sets that are not
1003 1006 ancestors of common. This is an ancestor.incrementalmissingancestors
1004 1007 object.
1005 1008
1006 1009 'common' is a list of revision numbers. If common is not supplied, uses
1007 1010 nullrev.
1008 1011 """
1009 1012 if common is None:
1010 1013 common = [nullrev]
1011 1014
1012 1015 if rustancestor is not None:
1013 1016 return rustancestor.MissingAncestors(self.index, common)
1014 1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1015 1018
1016 1019 def findmissingrevs(self, common=None, heads=None):
1017 1020 """Return the revision numbers of the ancestors of heads that
1018 1021 are not ancestors of common.
1019 1022
1020 1023 More specifically, return a list of revision numbers corresponding to
1021 1024 nodes N such that every N satisfies the following constraints:
1022 1025
1023 1026 1. N is an ancestor of some node in 'heads'
1024 1027 2. N is not an ancestor of any node in 'common'
1025 1028
1026 1029 The list is sorted by revision number, meaning it is
1027 1030 topologically sorted.
1028 1031
1029 1032 'heads' and 'common' are both lists of revision numbers. If heads is
1030 1033 not supplied, uses all of the revlog's heads. If common is not
1031 1034 supplied, uses nullid."""
1032 1035 if common is None:
1033 1036 common = [nullrev]
1034 1037 if heads is None:
1035 1038 heads = self.headrevs()
1036 1039
1037 1040 inc = self.incrementalmissingrevs(common=common)
1038 1041 return inc.missingancestors(heads)
1039 1042
1040 1043 def findmissing(self, common=None, heads=None):
1041 1044 """Return the ancestors of heads that are not ancestors of common.
1042 1045
1043 1046 More specifically, return a list of nodes N such that every N
1044 1047 satisfies the following constraints:
1045 1048
1046 1049 1. N is an ancestor of some node in 'heads'
1047 1050 2. N is not an ancestor of any node in 'common'
1048 1051
1049 1052 The list is sorted by revision number, meaning it is
1050 1053 topologically sorted.
1051 1054
1052 1055 'heads' and 'common' are both lists of node IDs. If heads is
1053 1056 not supplied, uses all of the revlog's heads. If common is not
1054 1057 supplied, uses nullid."""
1055 1058 if common is None:
1056 1059 common = [self.nullid]
1057 1060 if heads is None:
1058 1061 heads = self.heads()
1059 1062
1060 1063 common = [self.rev(n) for n in common]
1061 1064 heads = [self.rev(n) for n in heads]
1062 1065
1063 1066 inc = self.incrementalmissingrevs(common=common)
1064 1067 return [self.node(r) for r in inc.missingancestors(heads)]
1065 1068
1066 1069 def nodesbetween(self, roots=None, heads=None):
1067 1070 """Return a topological path from 'roots' to 'heads'.
1068 1071
1069 1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1070 1073 topologically sorted list of all nodes N that satisfy both of
1071 1074 these constraints:
1072 1075
1073 1076 1. N is a descendant of some node in 'roots'
1074 1077 2. N is an ancestor of some node in 'heads'
1075 1078
1076 1079 Every node is considered to be both a descendant and an ancestor
1077 1080 of itself, so every reachable node in 'roots' and 'heads' will be
1078 1081 included in 'nodes'.
1079 1082
1080 1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1081 1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1082 1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1083 1086
1084 1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1085 1088 unspecified, uses nullid as the only root. If 'heads' is
1086 1089 unspecified, uses list of all of the revlog's heads."""
1087 1090 nonodes = ([], [], [])
1088 1091 if roots is not None:
1089 1092 roots = list(roots)
1090 1093 if not roots:
1091 1094 return nonodes
1092 1095 lowestrev = min([self.rev(n) for n in roots])
1093 1096 else:
1094 1097 roots = [self.nullid] # Everybody's a descendant of nullid
1095 1098 lowestrev = nullrev
1096 1099 if (lowestrev == nullrev) and (heads is None):
1097 1100 # We want _all_ the nodes!
1098 1101 return (
1099 1102 [self.node(r) for r in self],
1100 1103 [self.nullid],
1101 1104 list(self.heads()),
1102 1105 )
1103 1106 if heads is None:
1104 1107 # All nodes are ancestors, so the latest ancestor is the last
1105 1108 # node.
1106 1109 highestrev = len(self) - 1
1107 1110 # Set ancestors to None to signal that every node is an ancestor.
1108 1111 ancestors = None
1109 1112 # Set heads to an empty dictionary for later discovery of heads
1110 1113 heads = {}
1111 1114 else:
1112 1115 heads = list(heads)
1113 1116 if not heads:
1114 1117 return nonodes
1115 1118 ancestors = set()
1116 1119 # Turn heads into a dictionary so we can remove 'fake' heads.
1117 1120 # Also, later we will be using it to filter out the heads we can't
1118 1121 # find from roots.
1119 1122 heads = dict.fromkeys(heads, False)
1120 1123 # Start at the top and keep marking parents until we're done.
1121 1124 nodestotag = set(heads)
1122 1125 # Remember where the top was so we can use it as a limit later.
1123 1126 highestrev = max([self.rev(n) for n in nodestotag])
1124 1127 while nodestotag:
1125 1128 # grab a node to tag
1126 1129 n = nodestotag.pop()
1127 1130 # Never tag nullid
1128 1131 if n == self.nullid:
1129 1132 continue
1130 1133 # A node's revision number represents its place in a
1131 1134 # topologically sorted list of nodes.
1132 1135 r = self.rev(n)
1133 1136 if r >= lowestrev:
1134 1137 if n not in ancestors:
1135 1138 # If we are possibly a descendant of one of the roots
1136 1139 # and we haven't already been marked as an ancestor
1137 1140 ancestors.add(n) # Mark as ancestor
1138 1141 # Add non-nullid parents to list of nodes to tag.
1139 1142 nodestotag.update(
1140 1143 [p for p in self.parents(n) if p != self.nullid]
1141 1144 )
1142 1145 elif n in heads: # We've seen it before, is it a fake head?
1143 1146 # So it is, real heads should not be the ancestors of
1144 1147 # any other heads.
1145 1148 heads.pop(n)
1146 1149 if not ancestors:
1147 1150 return nonodes
1148 1151 # Now that we have our set of ancestors, we want to remove any
1149 1152 # roots that are not ancestors.
1150 1153
1151 1154 # If one of the roots was nullid, everything is included anyway.
1152 1155 if lowestrev > nullrev:
1153 1156 # But, since we weren't, let's recompute the lowest rev to not
1154 1157 # include roots that aren't ancestors.
1155 1158
1156 1159 # Filter out roots that aren't ancestors of heads
1157 1160 roots = [root for root in roots if root in ancestors]
1158 1161 # Recompute the lowest revision
1159 1162 if roots:
1160 1163 lowestrev = min([self.rev(root) for root in roots])
1161 1164 else:
1162 1165 # No more roots? Return empty list
1163 1166 return nonodes
1164 1167 else:
1165 1168 # We are descending from nullid, and don't need to care about
1166 1169 # any other roots.
1167 1170 lowestrev = nullrev
1168 1171 roots = [self.nullid]
1169 1172 # Transform our roots list into a set.
1170 1173 descendants = set(roots)
1171 1174 # Also, keep the original roots so we can filter out roots that aren't
1172 1175 # 'real' roots (i.e. are descended from other roots).
1173 1176 roots = descendants.copy()
1174 1177 # Our topologically sorted list of output nodes.
1175 1178 orderedout = []
1176 1179 # Don't start at nullid since we don't want nullid in our output list,
1177 1180 # and if nullid shows up in descendants, empty parents will look like
1178 1181 # they're descendants.
1179 1182 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1180 1183 n = self.node(r)
1181 1184 isdescendant = False
1182 1185 if lowestrev == nullrev: # Everybody is a descendant of nullid
1183 1186 isdescendant = True
1184 1187 elif n in descendants:
1185 1188 # n is already a descendant
1186 1189 isdescendant = True
1187 1190 # This check only needs to be done here because all the roots
1188 1191 # will start being marked is descendants before the loop.
1189 1192 if n in roots:
1190 1193 # If n was a root, check if it's a 'real' root.
1191 1194 p = tuple(self.parents(n))
1192 1195 # If any of its parents are descendants, it's not a root.
1193 1196 if (p[0] in descendants) or (p[1] in descendants):
1194 1197 roots.remove(n)
1195 1198 else:
1196 1199 p = tuple(self.parents(n))
1197 1200 # A node is a descendant if either of its parents are
1198 1201 # descendants. (We seeded the dependents list with the roots
1199 1202 # up there, remember?)
1200 1203 if (p[0] in descendants) or (p[1] in descendants):
1201 1204 descendants.add(n)
1202 1205 isdescendant = True
1203 1206 if isdescendant and ((ancestors is None) or (n in ancestors)):
1204 1207 # Only include nodes that are both descendants and ancestors.
1205 1208 orderedout.append(n)
1206 1209 if (ancestors is not None) and (n in heads):
1207 1210 # We're trying to figure out which heads are reachable
1208 1211 # from roots.
1209 1212 # Mark this head as having been reached
1210 1213 heads[n] = True
1211 1214 elif ancestors is None:
1212 1215 # Otherwise, we're trying to discover the heads.
1213 1216 # Assume this is a head because if it isn't, the next step
1214 1217 # will eventually remove it.
1215 1218 heads[n] = True
1216 1219 # But, obviously its parents aren't.
1217 1220 for p in self.parents(n):
1218 1221 heads.pop(p, None)
1219 1222 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1220 1223 roots = list(roots)
1221 1224 assert orderedout
1222 1225 assert roots
1223 1226 assert heads
1224 1227 return (orderedout, roots, heads)
1225 1228
1226 1229 def headrevs(self, revs=None):
1227 1230 if revs is None:
1228 1231 try:
1229 1232 return self.index.headrevs()
1230 1233 except AttributeError:
1231 1234 return self._headrevs()
1232 1235 if rustdagop is not None:
1233 1236 return rustdagop.headrevs(self.index, revs)
1234 1237 return dagop.headrevs(revs, self._uncheckedparentrevs)
1235 1238
1236 1239 def computephases(self, roots):
1237 1240 return self.index.computephasesmapsets(roots)
1238 1241
1239 1242 def _headrevs(self):
1240 1243 count = len(self)
1241 1244 if not count:
1242 1245 return [nullrev]
1243 1246 # we won't iter over filtered rev so nobody is a head at start
1244 1247 ishead = [0] * (count + 1)
1245 1248 index = self.index
1246 1249 for r in self:
1247 1250 ishead[r] = 1 # I may be an head
1248 1251 e = index[r]
1249 1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1250 1253 return [r for r, val in enumerate(ishead) if val]
1251 1254
1252 1255 def heads(self, start=None, stop=None):
1253 1256 """return the list of all nodes that have no children
1254 1257
1255 1258 if start is specified, only heads that are descendants of
1256 1259 start will be returned
1257 1260 if stop is specified, it will consider all the revs from stop
1258 1261 as if they had no children
1259 1262 """
1260 1263 if start is None and stop is None:
1261 1264 if not len(self):
1262 1265 return [self.nullid]
1263 1266 return [self.node(r) for r in self.headrevs()]
1264 1267
1265 1268 if start is None:
1266 1269 start = nullrev
1267 1270 else:
1268 1271 start = self.rev(start)
1269 1272
1270 1273 stoprevs = {self.rev(n) for n in stop or []}
1271 1274
1272 1275 revs = dagop.headrevssubset(
1273 1276 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1274 1277 )
1275 1278
1276 1279 return [self.node(rev) for rev in revs]
1277 1280
1278 1281 def children(self, node):
1279 1282 """find the children of a given node"""
1280 1283 c = []
1281 1284 p = self.rev(node)
1282 1285 for r in self.revs(start=p + 1):
1283 1286 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1284 1287 if prevs:
1285 1288 for pr in prevs:
1286 1289 if pr == p:
1287 1290 c.append(self.node(r))
1288 1291 elif p == nullrev:
1289 1292 c.append(self.node(r))
1290 1293 return c
1291 1294
1292 1295 def commonancestorsheads(self, a, b):
1293 1296 """calculate all the heads of the common ancestors of nodes a and b"""
1294 1297 a, b = self.rev(a), self.rev(b)
1295 1298 ancs = self._commonancestorsheads(a, b)
1296 1299 return pycompat.maplist(self.node, ancs)
1297 1300
1298 1301 def _commonancestorsheads(self, *revs):
1299 1302 """calculate all the heads of the common ancestors of revs"""
1300 1303 try:
1301 1304 ancs = self.index.commonancestorsheads(*revs)
1302 1305 except (AttributeError, OverflowError): # C implementation failed
1303 1306 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1304 1307 return ancs
1305 1308
1306 1309 def isancestor(self, a, b):
1307 1310 """return True if node a is an ancestor of node b
1308 1311
1309 1312 A revision is considered an ancestor of itself."""
1310 1313 a, b = self.rev(a), self.rev(b)
1311 1314 return self.isancestorrev(a, b)
1312 1315
1313 1316 def isancestorrev(self, a, b):
1314 1317 """return True if revision a is an ancestor of revision b
1315 1318
1316 1319 A revision is considered an ancestor of itself.
1317 1320
1318 1321 The implementation of this is trivial but the use of
1319 1322 reachableroots is not."""
1320 1323 if a == nullrev:
1321 1324 return True
1322 1325 elif a == b:
1323 1326 return True
1324 1327 elif a > b:
1325 1328 return False
1326 1329 return bool(self.reachableroots(a, [b], [a], includepath=False))
1327 1330
1328 1331 def reachableroots(self, minroot, heads, roots, includepath=False):
1329 1332 """return (heads(::(<roots> and <roots>::<heads>)))
1330 1333
1331 1334 If includepath is True, return (<roots>::<heads>)."""
1332 1335 try:
1333 1336 return self.index.reachableroots2(
1334 1337 minroot, heads, roots, includepath
1335 1338 )
1336 1339 except AttributeError:
1337 1340 return dagop._reachablerootspure(
1338 1341 self.parentrevs, minroot, roots, heads, includepath
1339 1342 )
1340 1343
1341 1344 def ancestor(self, a, b):
1342 1345 """calculate the "best" common ancestor of nodes a and b"""
1343 1346
1344 1347 a, b = self.rev(a), self.rev(b)
1345 1348 try:
1346 1349 ancs = self.index.ancestors(a, b)
1347 1350 except (AttributeError, OverflowError):
1348 1351 ancs = ancestor.ancestors(self.parentrevs, a, b)
1349 1352 if ancs:
1350 1353 # choose a consistent winner when there's a tie
1351 1354 return min(map(self.node, ancs))
1352 1355 return self.nullid
1353 1356
1354 1357 def _match(self, id):
1355 1358 if isinstance(id, int):
1356 1359 # rev
1357 1360 return self.node(id)
1358 1361 if len(id) == self.nodeconstants.nodelen:
1359 1362 # possibly a binary node
1360 1363 # odds of a binary node being all hex in ASCII are 1 in 10**25
1361 1364 try:
1362 1365 node = id
1363 1366 self.rev(node) # quick search the index
1364 1367 return node
1365 1368 except error.LookupError:
1366 1369 pass # may be partial hex id
1367 1370 try:
1368 1371 # str(rev)
1369 1372 rev = int(id)
1370 1373 if b"%d" % rev != id:
1371 1374 raise ValueError
1372 1375 if rev < 0:
1373 1376 rev = len(self) + rev
1374 1377 if rev < 0 or rev >= len(self):
1375 1378 raise ValueError
1376 1379 return self.node(rev)
1377 1380 except (ValueError, OverflowError):
1378 1381 pass
1379 1382 if len(id) == 2 * self.nodeconstants.nodelen:
1380 1383 try:
1381 1384 # a full hex nodeid?
1382 1385 node = bin(id)
1383 1386 self.rev(node)
1384 1387 return node
1385 1388 except (TypeError, error.LookupError):
1386 1389 pass
1387 1390
1388 1391 def _partialmatch(self, id):
1389 1392 # we don't care wdirfilenodeids as they should be always full hash
1390 1393 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1391 1394 try:
1392 1395 partial = self.index.partialmatch(id)
1393 1396 if partial and self.hasnode(partial):
1394 1397 if maybewdir:
1395 1398 # single 'ff...' match in radix tree, ambiguous with wdir
1396 1399 raise error.RevlogError
1397 1400 return partial
1398 1401 if maybewdir:
1399 1402 # no 'ff...' match in radix tree, wdir identified
1400 1403 raise error.WdirUnsupported
1401 1404 return None
1402 1405 except error.RevlogError:
1403 1406 # parsers.c radix tree lookup gave multiple matches
1404 1407 # fast path: for unfiltered changelog, radix tree is accurate
1405 1408 if not getattr(self, 'filteredrevs', None):
1406 1409 raise error.AmbiguousPrefixLookupError(
1407 1410 id, self.display_id, _(b'ambiguous identifier')
1408 1411 )
1409 1412 # fall through to slow path that filters hidden revisions
1410 1413 except (AttributeError, ValueError):
1411 1414 # we are pure python, or key was too short to search radix tree
1412 1415 pass
1413 1416
1414 1417 if id in self._pcache:
1415 1418 return self._pcache[id]
1416 1419
1417 1420 if len(id) <= 40:
1418 1421 try:
1419 1422 # hex(node)[:...]
1420 1423 l = len(id) // 2 # grab an even number of digits
1421 1424 prefix = bin(id[: l * 2])
1422 1425 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1423 1426 nl = [
1424 1427 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1425 1428 ]
1426 1429 if self.nodeconstants.nullhex.startswith(id):
1427 1430 nl.append(self.nullid)
1428 1431 if len(nl) > 0:
1429 1432 if len(nl) == 1 and not maybewdir:
1430 1433 self._pcache[id] = nl[0]
1431 1434 return nl[0]
1432 1435 raise error.AmbiguousPrefixLookupError(
1433 1436 id, self.display_id, _(b'ambiguous identifier')
1434 1437 )
1435 1438 if maybewdir:
1436 1439 raise error.WdirUnsupported
1437 1440 return None
1438 1441 except TypeError:
1439 1442 pass
1440 1443
1441 1444 def lookup(self, id):
1442 1445 """locate a node based on:
1443 1446 - revision number or str(revision number)
1444 1447 - nodeid or subset of hex nodeid
1445 1448 """
1446 1449 n = self._match(id)
1447 1450 if n is not None:
1448 1451 return n
1449 1452 n = self._partialmatch(id)
1450 1453 if n:
1451 1454 return n
1452 1455
1453 1456 raise error.LookupError(id, self.display_id, _(b'no match found'))
1454 1457
1455 1458 def shortest(self, node, minlength=1):
1456 1459 """Find the shortest unambiguous prefix that matches node."""
1457 1460
1458 1461 def isvalid(prefix):
1459 1462 try:
1460 1463 matchednode = self._partialmatch(prefix)
1461 1464 except error.AmbiguousPrefixLookupError:
1462 1465 return False
1463 1466 except error.WdirUnsupported:
1464 1467 # single 'ff...' match
1465 1468 return True
1466 1469 if matchednode is None:
1467 1470 raise error.LookupError(node, self.display_id, _(b'no node'))
1468 1471 return True
1469 1472
1470 1473 def maybewdir(prefix):
1471 1474 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1472 1475
1473 1476 hexnode = hex(node)
1474 1477
1475 1478 def disambiguate(hexnode, minlength):
1476 1479 """Disambiguate against wdirid."""
1477 1480 for length in range(minlength, len(hexnode) + 1):
1478 1481 prefix = hexnode[:length]
1479 1482 if not maybewdir(prefix):
1480 1483 return prefix
1481 1484
1482 1485 if not getattr(self, 'filteredrevs', None):
1483 1486 try:
1484 1487 length = max(self.index.shortest(node), minlength)
1485 1488 return disambiguate(hexnode, length)
1486 1489 except error.RevlogError:
1487 1490 if node != self.nodeconstants.wdirid:
1488 1491 raise error.LookupError(
1489 1492 node, self.display_id, _(b'no node')
1490 1493 )
1491 1494 except AttributeError:
1492 1495 # Fall through to pure code
1493 1496 pass
1494 1497
1495 1498 if node == self.nodeconstants.wdirid:
1496 1499 for length in range(minlength, len(hexnode) + 1):
1497 1500 prefix = hexnode[:length]
1498 1501 if isvalid(prefix):
1499 1502 return prefix
1500 1503
1501 1504 for length in range(minlength, len(hexnode) + 1):
1502 1505 prefix = hexnode[:length]
1503 1506 if isvalid(prefix):
1504 1507 return disambiguate(hexnode, length)
1505 1508
1506 1509 def cmp(self, node, text):
1507 1510 """compare text with a given file revision
1508 1511
1509 1512 returns True if text is different than what is stored.
1510 1513 """
1511 1514 p1, p2 = self.parents(node)
1512 1515 return storageutil.hashrevisionsha1(text, p1, p2) != node
1513 1516
1514 1517 def _cachesegment(self, offset, data):
1515 1518 """Add a segment to the revlog cache.
1516 1519
1517 1520 Accepts an absolute offset and the data that is at that location.
1518 1521 """
1519 1522 o, d = self._chunkcache
1520 1523 # try to add to existing cache
1521 1524 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1522 1525 self._chunkcache = o, d + data
1523 1526 else:
1524 1527 self._chunkcache = offset, data
1525 1528
1526 1529 def _readsegment(self, offset, length, df=None):
1527 1530 """Load a segment of raw data from the revlog.
1528 1531
1529 1532 Accepts an absolute offset, length to read, and an optional existing
1530 1533 file handle to read from.
1531 1534
1532 1535 If an existing file handle is passed, it will be seeked and the
1533 1536 original seek position will NOT be restored.
1534 1537
1535 1538 Returns a str or buffer of raw byte data.
1536 1539
1537 1540 Raises if the requested number of bytes could not be read.
1538 1541 """
1539 1542 # Cache data both forward and backward around the requested
1540 1543 # data, in a fixed size window. This helps speed up operations
1541 1544 # involving reading the revlog backwards.
1542 1545 cachesize = self._chunkcachesize
1543 1546 realoffset = offset & ~(cachesize - 1)
1544 1547 reallength = (
1545 1548 (offset + length + cachesize) & ~(cachesize - 1)
1546 1549 ) - realoffset
1547 1550 with self._datareadfp(df) as df:
1548 1551 df.seek(realoffset)
1549 1552 d = df.read(reallength)
1550 1553
1551 1554 self._cachesegment(realoffset, d)
1552 1555 if offset != realoffset or reallength != length:
1553 1556 startoffset = offset - realoffset
1554 1557 if len(d) - startoffset < length:
1555 1558 raise error.RevlogError(
1556 1559 _(
1557 1560 b'partial read of revlog %s; expected %d bytes from '
1558 1561 b'offset %d, got %d'
1559 1562 )
1560 1563 % (
1561 1564 self._indexfile if self._inline else self._datafile,
1562 1565 length,
1563 1566 offset,
1564 1567 len(d) - startoffset,
1565 1568 )
1566 1569 )
1567 1570
1568 1571 return util.buffer(d, startoffset, length)
1569 1572
1570 1573 if len(d) < length:
1571 1574 raise error.RevlogError(
1572 1575 _(
1573 1576 b'partial read of revlog %s; expected %d bytes from offset '
1574 1577 b'%d, got %d'
1575 1578 )
1576 1579 % (
1577 1580 self._indexfile if self._inline else self._datafile,
1578 1581 length,
1579 1582 offset,
1580 1583 len(d),
1581 1584 )
1582 1585 )
1583 1586
1584 1587 return d
1585 1588
1586 1589 def _getsegment(self, offset, length, df=None):
1587 1590 """Obtain a segment of raw data from the revlog.
1588 1591
1589 1592 Accepts an absolute offset, length of bytes to obtain, and an
1590 1593 optional file handle to the already-opened revlog. If the file
1591 1594 handle is used, it's original seek position will not be preserved.
1592 1595
1593 1596 Requests for data may be returned from a cache.
1594 1597
1595 1598 Returns a str or a buffer instance of raw byte data.
1596 1599 """
1597 1600 o, d = self._chunkcache
1598 1601 l = len(d)
1599 1602
1600 1603 # is it in the cache?
1601 1604 cachestart = offset - o
1602 1605 cacheend = cachestart + length
1603 1606 if cachestart >= 0 and cacheend <= l:
1604 1607 if cachestart == 0 and cacheend == l:
1605 1608 return d # avoid a copy
1606 1609 return util.buffer(d, cachestart, cacheend - cachestart)
1607 1610
1608 1611 return self._readsegment(offset, length, df=df)
1609 1612
1610 1613 def _getsegmentforrevs(self, startrev, endrev, df=None):
1611 1614 """Obtain a segment of raw data corresponding to a range of revisions.
1612 1615
1613 1616 Accepts the start and end revisions and an optional already-open
1614 1617 file handle to be used for reading. If the file handle is read, its
1615 1618 seek position will not be preserved.
1616 1619
1617 1620 Requests for data may be satisfied by a cache.
1618 1621
1619 1622 Returns a 2-tuple of (offset, data) for the requested range of
1620 1623 revisions. Offset is the integer offset from the beginning of the
1621 1624 revlog and data is a str or buffer of the raw byte data.
1622 1625
1623 1626 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1624 1627 to determine where each revision's data begins and ends.
1625 1628 """
1626 1629 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1627 1630 # (functions are expensive).
1628 1631 index = self.index
1629 1632 istart = index[startrev]
1630 1633 start = int(istart[0] >> 16)
1631 1634 if startrev == endrev:
1632 1635 end = start + istart[1]
1633 1636 else:
1634 1637 iend = index[endrev]
1635 1638 end = int(iend[0] >> 16) + iend[1]
1636 1639
1637 1640 if self._inline:
1638 1641 start += (startrev + 1) * self.index.entry_size
1639 1642 end += (endrev + 1) * self.index.entry_size
1640 1643 length = end - start
1641 1644
1642 1645 return start, self._getsegment(start, length, df=df)
1643 1646
1644 1647 def _chunk(self, rev, df=None):
1645 1648 """Obtain a single decompressed chunk for a revision.
1646 1649
1647 1650 Accepts an integer revision and an optional already-open file handle
1648 1651 to be used for reading. If used, the seek position of the file will not
1649 1652 be preserved.
1650 1653
1651 1654 Returns a str holding uncompressed data for the requested revision.
1652 1655 """
1653 1656 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1654 1657
1655 1658 def _chunks(self, revs, df=None, targetsize=None):
1656 1659 """Obtain decompressed chunks for the specified revisions.
1657 1660
1658 1661 Accepts an iterable of numeric revisions that are assumed to be in
1659 1662 ascending order. Also accepts an optional already-open file handle
1660 1663 to be used for reading. If used, the seek position of the file will
1661 1664 not be preserved.
1662 1665
1663 1666 This function is similar to calling ``self._chunk()`` multiple times,
1664 1667 but is faster.
1665 1668
1666 1669 Returns a list with decompressed data for each requested revision.
1667 1670 """
1668 1671 if not revs:
1669 1672 return []
1670 1673 start = self.start
1671 1674 length = self.length
1672 1675 inline = self._inline
1673 1676 iosize = self.index.entry_size
1674 1677 buffer = util.buffer
1675 1678
1676 1679 l = []
1677 1680 ladd = l.append
1678 1681
1679 1682 if not self._withsparseread:
1680 1683 slicedchunks = (revs,)
1681 1684 else:
1682 1685 slicedchunks = deltautil.slicechunk(
1683 1686 self, revs, targetsize=targetsize
1684 1687 )
1685 1688
1686 1689 for revschunk in slicedchunks:
1687 1690 firstrev = revschunk[0]
1688 1691 # Skip trailing revisions with empty diff
1689 1692 for lastrev in revschunk[::-1]:
1690 1693 if length(lastrev) != 0:
1691 1694 break
1692 1695
1693 1696 try:
1694 1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1695 1698 except OverflowError:
1696 1699 # issue4215 - we can't cache a run of chunks greater than
1697 1700 # 2G on Windows
1698 1701 return [self._chunk(rev, df=df) for rev in revschunk]
1699 1702
1700 1703 decomp = self.decompress
1701 1704 for rev in revschunk:
1702 1705 chunkstart = start(rev)
1703 1706 if inline:
1704 1707 chunkstart += (rev + 1) * iosize
1705 1708 chunklength = length(rev)
1706 1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1707 1710
1708 1711 return l
1709 1712
1710 1713 def _chunkclear(self):
1711 1714 """Clear the raw chunk cache."""
1712 1715 self._chunkcache = (0, b'')
1713 1716
1714 1717 def deltaparent(self, rev):
1715 1718 """return deltaparent of the given revision"""
1716 1719 base = self.index[rev][3]
1717 1720 if base == rev:
1718 1721 return nullrev
1719 1722 elif self._generaldelta:
1720 1723 return base
1721 1724 else:
1722 1725 return rev - 1
1723 1726
1724 1727 def issnapshot(self, rev):
1725 1728 """tells whether rev is a snapshot"""
1726 1729 if not self._sparserevlog:
1727 1730 return self.deltaparent(rev) == nullrev
1728 1731 elif util.safehasattr(self.index, b'issnapshot'):
1729 1732 # directly assign the method to cache the testing and access
1730 1733 self.issnapshot = self.index.issnapshot
1731 1734 return self.issnapshot(rev)
1732 1735 if rev == nullrev:
1733 1736 return True
1734 1737 entry = self.index[rev]
1735 1738 base = entry[3]
1736 1739 if base == rev:
1737 1740 return True
1738 1741 if base == nullrev:
1739 1742 return True
1740 1743 p1 = entry[5]
1741 1744 p2 = entry[6]
1742 1745 if base == p1 or base == p2:
1743 1746 return False
1744 1747 return self.issnapshot(base)
1745 1748
1746 1749 def snapshotdepth(self, rev):
1747 1750 """number of snapshot in the chain before this one"""
1748 1751 if not self.issnapshot(rev):
1749 1752 raise error.ProgrammingError(b'revision %d not a snapshot')
1750 1753 return len(self._deltachain(rev)[0]) - 1
1751 1754
1752 1755 def revdiff(self, rev1, rev2):
1753 1756 """return or calculate a delta between two revisions
1754 1757
1755 1758 The delta calculated is in binary form and is intended to be written to
1756 1759 revlog data directly. So this function needs raw revision data.
1757 1760 """
1758 1761 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1759 1762 return bytes(self._chunk(rev2))
1760 1763
1761 1764 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1762 1765
1763 1766 def _processflags(self, text, flags, operation, raw=False):
1764 1767 """deprecated entry point to access flag processors"""
1765 1768 msg = b'_processflag(...) use the specialized variant'
1766 1769 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1767 1770 if raw:
1768 1771 return text, flagutil.processflagsraw(self, text, flags)
1769 1772 elif operation == b'read':
1770 1773 return flagutil.processflagsread(self, text, flags)
1771 1774 else: # write operation
1772 1775 return flagutil.processflagswrite(self, text, flags)
1773 1776
1774 1777 def revision(self, nodeorrev, _df=None, raw=False):
1775 1778 """return an uncompressed revision of a given node or revision
1776 1779 number.
1777 1780
1778 1781 _df - an existing file handle to read from. (internal-only)
1779 1782 raw - an optional argument specifying if the revision data is to be
1780 1783 treated as raw data when applying flag transforms. 'raw' should be set
1781 1784 to True when generating changegroups or in debug commands.
1782 1785 """
1783 1786 if raw:
1784 1787 msg = (
1785 1788 b'revlog.revision(..., raw=True) is deprecated, '
1786 1789 b'use revlog.rawdata(...)'
1787 1790 )
1788 1791 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1789 1792 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1790 1793
1791 1794 def sidedata(self, nodeorrev, _df=None):
1792 1795 """a map of extra data related to the changeset but not part of the hash
1793 1796
1794 1797 This function currently return a dictionary. However, more advanced
1795 1798 mapping object will likely be used in the future for a more
1796 1799 efficient/lazy code.
1797 1800 """
1798 1801 return self._revisiondata(nodeorrev, _df)[1]
1799 1802
1800 1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1801 1804 # deal with <nodeorrev> argument type
1802 1805 if isinstance(nodeorrev, int):
1803 1806 rev = nodeorrev
1804 1807 node = self.node(rev)
1805 1808 else:
1806 1809 node = nodeorrev
1807 1810 rev = None
1808 1811
1809 1812 # fast path the special `nullid` rev
1810 1813 if node == self.nullid:
1811 1814 return b"", {}
1812 1815
1813 1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1814 1817 # revision or might need to be processed to retrieve the revision.
1815 1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1816 1819
1817 1820 if self.hassidedata:
1818 1821 if rev is None:
1819 1822 rev = self.rev(node)
1820 1823 sidedata = self._sidedata(rev)
1821 1824 else:
1822 1825 sidedata = {}
1823 1826
1824 1827 if raw and validated:
1825 1828 # if we don't want to process the raw text and that raw
1826 1829 # text is cached, we can exit early.
1827 1830 return rawtext, sidedata
1828 1831 if rev is None:
1829 1832 rev = self.rev(node)
1830 1833 # the revlog's flag for this revision
1831 1834 # (usually alter its state or content)
1832 1835 flags = self.flags(rev)
1833 1836
1834 1837 if validated and flags == REVIDX_DEFAULT_FLAGS:
1835 1838 # no extra flags set, no flag processor runs, text = rawtext
1836 1839 return rawtext, sidedata
1837 1840
1838 1841 if raw:
1839 1842 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1840 1843 text = rawtext
1841 1844 else:
1842 1845 r = flagutil.processflagsread(self, rawtext, flags)
1843 1846 text, validatehash = r
1844 1847 if validatehash:
1845 1848 self.checkhash(text, node, rev=rev)
1846 1849 if not validated:
1847 1850 self._revisioncache = (node, rev, rawtext)
1848 1851
1849 1852 return text, sidedata
1850 1853
1851 1854 def _rawtext(self, node, rev, _df=None):
1852 1855 """return the possibly unvalidated rawtext for a revision
1853 1856
1854 1857 returns (rev, rawtext, validated)
1855 1858 """
1856 1859
1857 1860 # revision in the cache (could be useful to apply delta)
1858 1861 cachedrev = None
1859 1862 # An intermediate text to apply deltas to
1860 1863 basetext = None
1861 1864
1862 1865 # Check if we have the entry in cache
1863 1866 # The cache entry looks like (node, rev, rawtext)
1864 1867 if self._revisioncache:
1865 1868 if self._revisioncache[0] == node:
1866 1869 return (rev, self._revisioncache[2], True)
1867 1870 cachedrev = self._revisioncache[1]
1868 1871
1869 1872 if rev is None:
1870 1873 rev = self.rev(node)
1871 1874
1872 1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1873 1876 if stopped:
1874 1877 basetext = self._revisioncache[2]
1875 1878
1876 1879 # drop cache to save memory, the caller is expected to
1877 1880 # update self._revisioncache after validating the text
1878 1881 self._revisioncache = None
1879 1882
1880 1883 targetsize = None
1881 1884 rawsize = self.index[rev][2]
1882 1885 if 0 <= rawsize:
1883 1886 targetsize = 4 * rawsize
1884 1887
1885 1888 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1886 1889 if basetext is None:
1887 1890 basetext = bytes(bins[0])
1888 1891 bins = bins[1:]
1889 1892
1890 1893 rawtext = mdiff.patches(basetext, bins)
1891 1894 del basetext # let us have a chance to free memory early
1892 1895 return (rev, rawtext, False)
1893 1896
1894 1897 def _sidedata(self, rev):
1895 1898 """Return the sidedata for a given revision number."""
1896 1899 index_entry = self.index[rev]
1897 1900 sidedata_offset = index_entry[8]
1898 1901 sidedata_size = index_entry[9]
1899 1902
1900 1903 if self._inline:
1901 1904 sidedata_offset += self.index.entry_size * (1 + rev)
1902 1905 if sidedata_size == 0:
1903 1906 return {}
1904 1907
1905 1908 segment = self._getsegment(sidedata_offset, sidedata_size)
1906 1909 sidedata = sidedatautil.deserialize_sidedata(segment)
1907 1910 return sidedata
1908 1911
1909 1912 def rawdata(self, nodeorrev, _df=None):
1910 1913 """return an uncompressed raw data of a given node or revision number.
1911 1914
1912 1915 _df - an existing file handle to read from. (internal-only)
1913 1916 """
1914 1917 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1915 1918
1916 1919 def hash(self, text, p1, p2):
1917 1920 """Compute a node hash.
1918 1921
1919 1922 Available as a function so that subclasses can replace the hash
1920 1923 as needed.
1921 1924 """
1922 1925 return storageutil.hashrevisionsha1(text, p1, p2)
1923 1926
1924 1927 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1925 1928 """Check node hash integrity.
1926 1929
1927 1930 Available as a function so that subclasses can extend hash mismatch
1928 1931 behaviors as needed.
1929 1932 """
1930 1933 try:
1931 1934 if p1 is None and p2 is None:
1932 1935 p1, p2 = self.parents(node)
1933 1936 if node != self.hash(text, p1, p2):
1934 1937 # Clear the revision cache on hash failure. The revision cache
1935 1938 # only stores the raw revision and clearing the cache does have
1936 1939 # the side-effect that we won't have a cache hit when the raw
1937 1940 # revision data is accessed. But this case should be rare and
1938 1941 # it is extra work to teach the cache about the hash
1939 1942 # verification state.
1940 1943 if self._revisioncache and self._revisioncache[0] == node:
1941 1944 self._revisioncache = None
1942 1945
1943 1946 revornode = rev
1944 1947 if revornode is None:
1945 1948 revornode = templatefilters.short(hex(node))
1946 1949 raise error.RevlogError(
1947 1950 _(b"integrity check failed on %s:%s")
1948 1951 % (self.display_id, pycompat.bytestr(revornode))
1949 1952 )
1950 1953 except error.RevlogError:
1951 1954 if self._censorable and storageutil.iscensoredtext(text):
1952 1955 raise error.CensoredNodeError(self.display_id, node, text)
1953 1956 raise
1954 1957
1955 1958 def _enforceinlinesize(self, tr, fp=None):
1956 1959 """Check if the revlog is too big for inline and convert if so.
1957 1960
1958 1961 This should be called after revisions are added to the revlog. If the
1959 1962 revlog has grown too large to be an inline revlog, it will convert it
1960 1963 to use multiple index and data files.
1961 1964 """
1962 1965 tiprev = len(self) - 1
1963 1966 total_size = self.start(tiprev) + self.length(tiprev)
1964 1967 if not self._inline or total_size < _maxinline:
1965 1968 return
1966 1969
1967 1970 troffset = tr.findoffset(self._indexfile)
1968 1971 if troffset is None:
1969 1972 raise error.RevlogError(
1970 1973 _(b"%s not found in the transaction") % self._indexfile
1971 1974 )
1972 1975 trindex = 0
1973 1976 tr.add(self._datafile, 0)
1974 1977
1975 1978 if fp:
1976 1979 fp.flush()
1977 1980 fp.close()
1978 1981 # We can't use the cached file handle after close(). So prevent
1979 1982 # its usage.
1980 1983 self._writinghandles = None
1981 1984
1982 1985 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1983 1986 for r in self:
1984 1987 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1985 1988 if troffset <= self.start(r):
1986 1989 trindex = r
1987 1990
1988 1991 with self._indexfp(b'w') as fp:
1989 1992 self._format_flags &= ~FLAG_INLINE_DATA
1990 1993 self._inline = False
1991 1994 for i in self:
1992 1995 e = self.index.entry_binary(i)
1993 1996 if i == 0:
1994 1997 header = self._format_flags | self._format_version
1995 1998 header = self.index.pack_header(header)
1996 1999 e = header + e
1997 2000 fp.write(e)
1998 2001
1999 2002 # the temp file replace the real index when we exit the context
2000 2003 # manager
2001 2004
2002 2005 tr.replace(self._indexfile, trindex * self.index.entry_size)
2003 2006 nodemaputil.setup_persistent_nodemap(tr, self)
2004 2007 self._chunkclear()
2005 2008
2006 2009 def _nodeduplicatecallback(self, transaction, node):
2007 2010 """called when trying to add a node already stored."""
2008 2011
2009 2012 def addrevision(
2010 2013 self,
2011 2014 text,
2012 2015 transaction,
2013 2016 link,
2014 2017 p1,
2015 2018 p2,
2016 2019 cachedelta=None,
2017 2020 node=None,
2018 2021 flags=REVIDX_DEFAULT_FLAGS,
2019 2022 deltacomputer=None,
2020 2023 sidedata=None,
2021 2024 ):
2022 2025 """add a revision to the log
2023 2026
2024 2027 text - the revision data to add
2025 2028 transaction - the transaction object used for rollback
2026 2029 link - the linkrev data to add
2027 2030 p1, p2 - the parent nodeids of the revision
2028 2031 cachedelta - an optional precomputed delta
2029 2032 node - nodeid of revision; typically node is not specified, and it is
2030 2033 computed by default as hash(text, p1, p2), however subclasses might
2031 2034 use different hashing method (and override checkhash() in such case)
2032 2035 flags - the known flags to set on the revision
2033 2036 deltacomputer - an optional deltacomputer instance shared between
2034 2037 multiple calls
2035 2038 """
2036 2039 if link == nullrev:
2037 2040 raise error.RevlogError(
2038 2041 _(b"attempted to add linkrev -1 to %s") % self.display_id
2039 2042 )
2040 2043
2041 2044 if sidedata is None:
2042 2045 sidedata = {}
2043 2046 elif sidedata and not self.hassidedata:
2044 2047 raise error.ProgrammingError(
2045 2048 _(b"trying to add sidedata to a revlog who don't support them")
2046 2049 )
2047 2050
2048 2051 if flags:
2049 2052 node = node or self.hash(text, p1, p2)
2050 2053
2051 2054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2052 2055
2053 2056 # If the flag processor modifies the revision data, ignore any provided
2054 2057 # cachedelta.
2055 2058 if rawtext != text:
2056 2059 cachedelta = None
2057 2060
2058 2061 if len(rawtext) > _maxentrysize:
2059 2062 raise error.RevlogError(
2060 2063 _(
2061 2064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2062 2065 )
2063 2066 % (self.display_id, len(rawtext))
2064 2067 )
2065 2068
2066 2069 node = node or self.hash(rawtext, p1, p2)
2067 2070 rev = self.index.get_rev(node)
2068 2071 if rev is not None:
2069 2072 return rev
2070 2073
2071 2074 if validatehash:
2072 2075 self.checkhash(rawtext, node, p1=p1, p2=p2)
2073 2076
2074 2077 return self.addrawrevision(
2075 2078 rawtext,
2076 2079 transaction,
2077 2080 link,
2078 2081 p1,
2079 2082 p2,
2080 2083 node,
2081 2084 flags,
2082 2085 cachedelta=cachedelta,
2083 2086 deltacomputer=deltacomputer,
2084 2087 sidedata=sidedata,
2085 2088 )
2086 2089
2087 2090 def addrawrevision(
2088 2091 self,
2089 2092 rawtext,
2090 2093 transaction,
2091 2094 link,
2092 2095 p1,
2093 2096 p2,
2094 2097 node,
2095 2098 flags,
2096 2099 cachedelta=None,
2097 2100 deltacomputer=None,
2098 2101 sidedata=None,
2099 2102 ):
2100 2103 """add a raw revision with known flags, node and parents
2101 2104 useful when reusing a revision not stored in this revlog (ex: received
2102 2105 over wire, or read from an external bundle).
2103 2106 """
2104 2107 dfh = None
2105 2108 if not self._inline:
2106 2109 dfh = self._datafp(b"a+")
2107 2110 ifh = self._indexfp(b"a+")
2108 2111 try:
2109 2112 return self._addrevision(
2110 2113 node,
2111 2114 rawtext,
2112 2115 transaction,
2113 2116 link,
2114 2117 p1,
2115 2118 p2,
2116 2119 flags,
2117 2120 cachedelta,
2118 2121 ifh,
2119 2122 dfh,
2120 2123 deltacomputer=deltacomputer,
2121 2124 sidedata=sidedata,
2122 2125 )
2123 2126 finally:
2124 2127 if dfh:
2125 2128 dfh.close()
2126 2129 ifh.close()
2127 2130
2128 2131 def compress(self, data):
2129 2132 """Generate a possibly-compressed representation of data."""
2130 2133 if not data:
2131 2134 return b'', data
2132 2135
2133 2136 compressed = self._compressor.compress(data)
2134 2137
2135 2138 if compressed:
2136 2139 # The revlog compressor added the header in the returned data.
2137 2140 return b'', compressed
2138 2141
2139 2142 if data[0:1] == b'\0':
2140 2143 return b'', data
2141 2144 return b'u', data
2142 2145
2143 2146 def decompress(self, data):
2144 2147 """Decompress a revlog chunk.
2145 2148
2146 2149 The chunk is expected to begin with a header identifying the
2147 2150 format type so it can be routed to an appropriate decompressor.
2148 2151 """
2149 2152 if not data:
2150 2153 return data
2151 2154
2152 2155 # Revlogs are read much more frequently than they are written and many
2153 2156 # chunks only take microseconds to decompress, so performance is
2154 2157 # important here.
2155 2158 #
2156 2159 # We can make a few assumptions about revlogs:
2157 2160 #
2158 2161 # 1) the majority of chunks will be compressed (as opposed to inline
2159 2162 # raw data).
2160 2163 # 2) decompressing *any* data will likely by at least 10x slower than
2161 2164 # returning raw inline data.
2162 2165 # 3) we want to prioritize common and officially supported compression
2163 2166 # engines
2164 2167 #
2165 2168 # It follows that we want to optimize for "decompress compressed data
2166 2169 # when encoded with common and officially supported compression engines"
2167 2170 # case over "raw data" and "data encoded by less common or non-official
2168 2171 # compression engines." That is why we have the inline lookup first
2169 2172 # followed by the compengines lookup.
2170 2173 #
2171 2174 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2172 2175 # compressed chunks. And this matters for changelog and manifest reads.
2173 2176 t = data[0:1]
2174 2177
2175 2178 if t == b'x':
2176 2179 try:
2177 2180 return _zlibdecompress(data)
2178 2181 except zlib.error as e:
2179 2182 raise error.RevlogError(
2180 2183 _(b'revlog decompress error: %s')
2181 2184 % stringutil.forcebytestr(e)
2182 2185 )
2183 2186 # '\0' is more common than 'u' so it goes first.
2184 2187 elif t == b'\0':
2185 2188 return data
2186 2189 elif t == b'u':
2187 2190 return util.buffer(data, 1)
2188 2191
2189 2192 try:
2190 2193 compressor = self._decompressors[t]
2191 2194 except KeyError:
2192 2195 try:
2193 2196 engine = util.compengines.forrevlogheader(t)
2194 2197 compressor = engine.revlogcompressor(self._compengineopts)
2195 2198 self._decompressors[t] = compressor
2196 2199 except KeyError:
2197 2200 raise error.RevlogError(
2198 2201 _(b'unknown compression type %s') % binascii.hexlify(t)
2199 2202 )
2200 2203
2201 2204 return compressor.decompress(data)
2202 2205
2203 2206 def _addrevision(
2204 2207 self,
2205 2208 node,
2206 2209 rawtext,
2207 2210 transaction,
2208 2211 link,
2209 2212 p1,
2210 2213 p2,
2211 2214 flags,
2212 2215 cachedelta,
2213 2216 ifh,
2214 2217 dfh,
2215 2218 alwayscache=False,
2216 2219 deltacomputer=None,
2217 2220 sidedata=None,
2218 2221 ):
2219 2222 """internal function to add revisions to the log
2220 2223
2221 2224 see addrevision for argument descriptions.
2222 2225
2223 2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2224 2227
2225 2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2226 2229 be used.
2227 2230
2228 2231 invariants:
2229 2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2230 2233 if both are set, they must correspond to each other.
2231 2234 """
2232 2235 if node == self.nullid:
2233 2236 raise error.RevlogError(
2234 2237 _(b"%s: attempt to add null revision") % self.display_id
2235 2238 )
2236 2239 if (
2237 2240 node == self.nodeconstants.wdirid
2238 2241 or node in self.nodeconstants.wdirfilenodeids
2239 2242 ):
2240 2243 raise error.RevlogError(
2241 2244 _(b"%s: attempt to add wdir revision") % self.display_id
2242 2245 )
2243 2246
2244 2247 if self._inline:
2245 2248 fh = ifh
2246 2249 else:
2247 2250 fh = dfh
2248 2251
2249 2252 btext = [rawtext]
2250 2253
2251 2254 curr = len(self)
2252 2255 prev = curr - 1
2253 2256
2254 2257 offset = self._get_data_offset(prev)
2255 2258
2256 2259 if self._concurrencychecker:
2257 2260 if self._inline:
2258 2261 # offset is "as if" it were in the .d file, so we need to add on
2259 2262 # the size of the entry metadata.
2260 2263 self._concurrencychecker(
2261 2264 ifh, self._indexfile, offset + curr * self.index.entry_size
2262 2265 )
2263 2266 else:
2264 2267 # Entries in the .i are a consistent size.
2265 2268 self._concurrencychecker(
2266 2269 ifh, self._indexfile, curr * self.index.entry_size
2267 2270 )
2268 2271 self._concurrencychecker(dfh, self._datafile, offset)
2269 2272
2270 2273 p1r, p2r = self.rev(p1), self.rev(p2)
2271 2274
2272 2275 # full versions are inserted when the needed deltas
2273 2276 # become comparable to the uncompressed text
2274 2277 if rawtext is None:
2275 2278 # need rawtext size, before changed by flag processors, which is
2276 2279 # the non-raw size. use revlog explicitly to avoid filelog's extra
2277 2280 # logic that might remove metadata size.
2278 2281 textlen = mdiff.patchedsize(
2279 2282 revlog.size(self, cachedelta[0]), cachedelta[1]
2280 2283 )
2281 2284 else:
2282 2285 textlen = len(rawtext)
2283 2286
2284 2287 if deltacomputer is None:
2285 2288 deltacomputer = deltautil.deltacomputer(self)
2286 2289
2287 2290 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2288 2291
2289 2292 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2290 2293
2291 2294 if sidedata and self.hassidedata:
2292 2295 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2293 2296 sidedata_offset = offset + deltainfo.deltalen
2294 2297 else:
2295 2298 serialized_sidedata = b""
2296 2299 # Don't store the offset if the sidedata is empty, that way
2297 2300 # we can easily detect empty sidedata and they will be no different
2298 2301 # than ones we manually add.
2299 2302 sidedata_offset = 0
2300 2303
2301 2304 e = (
2302 2305 offset_type(offset, flags),
2303 2306 deltainfo.deltalen,
2304 2307 textlen,
2305 2308 deltainfo.base,
2306 2309 link,
2307 2310 p1r,
2308 2311 p2r,
2309 2312 node,
2310 2313 sidedata_offset,
2311 2314 len(serialized_sidedata),
2312 2315 )
2313 2316
2314 2317 self.index.append(e)
2315 2318 entry = self.index.entry_binary(curr)
2316 2319 if curr == 0:
2317 2320 header = self._format_flags | self._format_version
2318 2321 header = self.index.pack_header(header)
2319 2322 entry = header + entry
2320 2323 self._writeentry(
2321 2324 transaction,
2322 2325 ifh,
2323 2326 dfh,
2324 2327 entry,
2325 2328 deltainfo.data,
2326 2329 link,
2327 2330 offset,
2328 2331 serialized_sidedata,
2329 2332 )
2330 2333
2331 2334 rawtext = btext[0]
2332 2335
2333 2336 if alwayscache and rawtext is None:
2334 2337 rawtext = deltacomputer.buildtext(revinfo, fh)
2335 2338
2336 2339 if type(rawtext) == bytes: # only accept immutable objects
2337 2340 self._revisioncache = (node, curr, rawtext)
2338 2341 self._chainbasecache[curr] = deltainfo.chainbase
2339 2342 return curr
2340 2343
2341 2344 def _get_data_offset(self, prev):
2342 2345 """Returns the current offset in the (in-transaction) data file.
2343 2346 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2344 2347 file to store that information: since sidedata can be rewritten to the
2345 2348 end of the data file within a transaction, you can have cases where, for
2346 2349 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2347 2350 to `n - 1`'s sidedata being written after `n`'s data.
2348 2351
2349 2352 TODO cache this in a docket file before getting out of experimental."""
2350 2353 if self._format_version != REVLOGV2:
2351 2354 return self.end(prev)
2352 2355
2353 2356 offset = 0
2354 2357 for rev, entry in enumerate(self.index):
2355 2358 sidedata_end = entry[8] + entry[9]
2356 2359 # Sidedata for a previous rev has potentially been written after
2357 2360 # this rev's end, so take the max.
2358 2361 offset = max(self.end(rev), offset, sidedata_end)
2359 2362 return offset
2360 2363
2361 2364 def _writeentry(
2362 2365 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2363 2366 ):
2364 2367 # Files opened in a+ mode have inconsistent behavior on various
2365 2368 # platforms. Windows requires that a file positioning call be made
2366 2369 # when the file handle transitions between reads and writes. See
2367 2370 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2368 2371 # platforms, Python or the platform itself can be buggy. Some versions
2369 2372 # of Solaris have been observed to not append at the end of the file
2370 2373 # if the file was seeked to before the end. See issue4943 for more.
2371 2374 #
2372 2375 # We work around this issue by inserting a seek() before writing.
2373 2376 # Note: This is likely not necessary on Python 3. However, because
2374 2377 # the file handle is reused for reads and may be seeked there, we need
2375 2378 # to be careful before changing this.
2376 2379 ifh.seek(0, os.SEEK_END)
2377 2380 if dfh:
2378 2381 dfh.seek(0, os.SEEK_END)
2379 2382
2380 2383 curr = len(self) - 1
2381 2384 if not self._inline:
2382 2385 transaction.add(self._datafile, offset)
2383 2386 transaction.add(self._indexfile, curr * len(entry))
2384 2387 if data[0]:
2385 2388 dfh.write(data[0])
2386 2389 dfh.write(data[1])
2387 2390 if sidedata:
2388 2391 dfh.write(sidedata)
2389 2392 ifh.write(entry)
2390 2393 else:
2391 2394 offset += curr * self.index.entry_size
2392 2395 transaction.add(self._indexfile, offset)
2393 2396 ifh.write(entry)
2394 2397 ifh.write(data[0])
2395 2398 ifh.write(data[1])
2396 2399 if sidedata:
2397 2400 ifh.write(sidedata)
2398 2401 self._enforceinlinesize(transaction, ifh)
2399 2402 nodemaputil.setup_persistent_nodemap(transaction, self)
2400 2403
2401 2404 def addgroup(
2402 2405 self,
2403 2406 deltas,
2404 2407 linkmapper,
2405 2408 transaction,
2406 2409 alwayscache=False,
2407 2410 addrevisioncb=None,
2408 2411 duplicaterevisioncb=None,
2409 2412 ):
2410 2413 """
2411 2414 add a delta group
2412 2415
2413 2416 given a set of deltas, add them to the revision log. the
2414 2417 first delta is against its parent, which should be in our
2415 2418 log, the rest are against the previous delta.
2416 2419
2417 2420 If ``addrevisioncb`` is defined, it will be called with arguments of
2418 2421 this revlog and the node that was added.
2419 2422 """
2420 2423
2421 2424 if self._writinghandles:
2422 2425 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2423 2426
2424 2427 r = len(self)
2425 2428 end = 0
2426 2429 if r:
2427 2430 end = self.end(r - 1)
2428 2431 ifh = self._indexfp(b"a+")
2429 2432 isize = r * self.index.entry_size
2430 2433 if self._inline:
2431 2434 transaction.add(self._indexfile, end + isize)
2432 2435 dfh = None
2433 2436 else:
2434 2437 transaction.add(self._indexfile, isize)
2435 2438 transaction.add(self._datafile, end)
2436 2439 dfh = self._datafp(b"a+")
2437 2440
2438 2441 self._writinghandles = (ifh, dfh)
2439 2442 empty = True
2440 2443
2441 2444 try:
2442 2445 deltacomputer = deltautil.deltacomputer(self)
2443 2446 # loop through our set of deltas
2444 2447 for data in deltas:
2445 2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2446 2449 link = linkmapper(linknode)
2447 2450 flags = flags or REVIDX_DEFAULT_FLAGS
2448 2451
2449 2452 rev = self.index.get_rev(node)
2450 2453 if rev is not None:
2451 2454 # this can happen if two branches make the same change
2452 2455 self._nodeduplicatecallback(transaction, rev)
2453 2456 if duplicaterevisioncb:
2454 2457 duplicaterevisioncb(self, rev)
2455 2458 empty = False
2456 2459 continue
2457 2460
2458 2461 for p in (p1, p2):
2459 2462 if not self.index.has_node(p):
2460 2463 raise error.LookupError(
2461 2464 p, self.radix, _(b'unknown parent')
2462 2465 )
2463 2466
2464 2467 if not self.index.has_node(deltabase):
2465 2468 raise error.LookupError(
2466 2469 deltabase, self.display_id, _(b'unknown delta base')
2467 2470 )
2468 2471
2469 2472 baserev = self.rev(deltabase)
2470 2473
2471 2474 if baserev != nullrev and self.iscensored(baserev):
2472 2475 # if base is censored, delta must be full replacement in a
2473 2476 # single patch operation
2474 2477 hlen = struct.calcsize(b">lll")
2475 2478 oldlen = self.rawsize(baserev)
2476 2479 newlen = len(delta) - hlen
2477 2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2478 2481 raise error.CensoredBaseError(
2479 2482 self.display_id, self.node(baserev)
2480 2483 )
2481 2484
2482 2485 if not flags and self._peek_iscensored(baserev, delta):
2483 2486 flags |= REVIDX_ISCENSORED
2484 2487
2485 2488 # We assume consumers of addrevisioncb will want to retrieve
2486 2489 # the added revision, which will require a call to
2487 2490 # revision(). revision() will fast path if there is a cache
2488 2491 # hit. So, we tell _addrevision() to always cache in this case.
2489 2492 # We're only using addgroup() in the context of changegroup
2490 2493 # generation so the revision data can always be handled as raw
2491 2494 # by the flagprocessor.
2492 2495 rev = self._addrevision(
2493 2496 node,
2494 2497 None,
2495 2498 transaction,
2496 2499 link,
2497 2500 p1,
2498 2501 p2,
2499 2502 flags,
2500 2503 (baserev, delta),
2501 2504 ifh,
2502 2505 dfh,
2503 2506 alwayscache=alwayscache,
2504 2507 deltacomputer=deltacomputer,
2505 2508 sidedata=sidedata,
2506 2509 )
2507 2510
2508 2511 if addrevisioncb:
2509 2512 addrevisioncb(self, rev)
2510 2513 empty = False
2511 2514
2512 2515 if not dfh and not self._inline:
2513 2516 # addrevision switched from inline to conventional
2514 2517 # reopen the index
2515 2518 ifh.close()
2516 2519 dfh = self._datafp(b"a+")
2517 2520 ifh = self._indexfp(b"a+")
2518 2521 self._writinghandles = (ifh, dfh)
2519 2522 finally:
2520 2523 self._writinghandles = None
2521 2524
2522 2525 if dfh:
2523 2526 dfh.close()
2524 2527 ifh.close()
2525 2528 return not empty
2526 2529
2527 2530 def iscensored(self, rev):
2528 2531 """Check if a file revision is censored."""
2529 2532 if not self._censorable:
2530 2533 return False
2531 2534
2532 2535 return self.flags(rev) & REVIDX_ISCENSORED
2533 2536
2534 2537 def _peek_iscensored(self, baserev, delta):
2535 2538 """Quickly check if a delta produces a censored revision."""
2536 2539 if not self._censorable:
2537 2540 return False
2538 2541
2539 2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2540 2543
2541 2544 def getstrippoint(self, minlink):
2542 2545 """find the minimum rev that must be stripped to strip the linkrev
2543 2546
2544 2547 Returns a tuple containing the minimum rev and a set of all revs that
2545 2548 have linkrevs that will be broken by this strip.
2546 2549 """
2547 2550 return storageutil.resolvestripinfo(
2548 2551 minlink,
2549 2552 len(self) - 1,
2550 2553 self.headrevs(),
2551 2554 self.linkrev,
2552 2555 self.parentrevs,
2553 2556 )
2554 2557
2555 2558 def strip(self, minlink, transaction):
2556 2559 """truncate the revlog on the first revision with a linkrev >= minlink
2557 2560
2558 2561 This function is called when we're stripping revision minlink and
2559 2562 its descendants from the repository.
2560 2563
2561 2564 We have to remove all revisions with linkrev >= minlink, because
2562 2565 the equivalent changelog revisions will be renumbered after the
2563 2566 strip.
2564 2567
2565 2568 So we truncate the revlog on the first of these revisions, and
2566 2569 trust that the caller has saved the revisions that shouldn't be
2567 2570 removed and that it'll re-add them after this truncation.
2568 2571 """
2569 2572 if len(self) == 0:
2570 2573 return
2571 2574
2572 2575 rev, _ = self.getstrippoint(minlink)
2573 2576 if rev == len(self):
2574 2577 return
2575 2578
2576 2579 # first truncate the files on disk
2577 2580 end = self.start(rev)
2578 2581 if not self._inline:
2579 2582 transaction.add(self._datafile, end)
2580 2583 end = rev * self.index.entry_size
2581 2584 else:
2582 2585 end += rev * self.index.entry_size
2583 2586
2584 2587 transaction.add(self._indexfile, end)
2585 2588
2586 2589 # then reset internal state in memory to forget those revisions
2587 2590 self._revisioncache = None
2588 2591 self._chaininfocache = util.lrucachedict(500)
2589 2592 self._chunkclear()
2590 2593
2591 2594 del self.index[rev:-1]
2592 2595
2593 2596 def checksize(self):
2594 2597 """Check size of index and data files
2595 2598
2596 2599 return a (dd, di) tuple.
2597 2600 - dd: extra bytes for the "data" file
2598 2601 - di: extra bytes for the "index" file
2599 2602
2600 2603 A healthy revlog will return (0, 0).
2601 2604 """
2602 2605 expected = 0
2603 2606 if len(self):
2604 2607 expected = max(0, self.end(len(self) - 1))
2605 2608
2606 2609 try:
2607 2610 with self._datafp() as f:
2608 2611 f.seek(0, io.SEEK_END)
2609 2612 actual = f.tell()
2610 2613 dd = actual - expected
2611 2614 except IOError as inst:
2612 2615 if inst.errno != errno.ENOENT:
2613 2616 raise
2614 2617 dd = 0
2615 2618
2616 2619 try:
2617 2620 f = self.opener(self._indexfile)
2618 2621 f.seek(0, io.SEEK_END)
2619 2622 actual = f.tell()
2620 2623 f.close()
2621 2624 s = self.index.entry_size
2622 2625 i = max(0, actual // s)
2623 2626 di = actual - (i * s)
2624 2627 if self._inline:
2625 2628 databytes = 0
2626 2629 for r in self:
2627 2630 databytes += max(0, self.length(r))
2628 2631 dd = 0
2629 2632 di = actual - len(self) * s - databytes
2630 2633 except IOError as inst:
2631 2634 if inst.errno != errno.ENOENT:
2632 2635 raise
2633 2636 di = 0
2634 2637
2635 2638 return (dd, di)
2636 2639
2637 2640 def files(self):
2638 2641 res = [self._indexfile]
2639 2642 if not self._inline:
2640 2643 res.append(self._datafile)
2641 2644 return res
2642 2645
2643 2646 def emitrevisions(
2644 2647 self,
2645 2648 nodes,
2646 2649 nodesorder=None,
2647 2650 revisiondata=False,
2648 2651 assumehaveparentrevisions=False,
2649 2652 deltamode=repository.CG_DELTAMODE_STD,
2650 2653 sidedata_helpers=None,
2651 2654 ):
2652 2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2653 2656 raise error.ProgrammingError(
2654 2657 b'unhandled value for nodesorder: %s' % nodesorder
2655 2658 )
2656 2659
2657 2660 if nodesorder is None and not self._generaldelta:
2658 2661 nodesorder = b'storage'
2659 2662
2660 2663 if (
2661 2664 not self._storedeltachains
2662 2665 and deltamode != repository.CG_DELTAMODE_PREV
2663 2666 ):
2664 2667 deltamode = repository.CG_DELTAMODE_FULL
2665 2668
2666 2669 return storageutil.emitrevisions(
2667 2670 self,
2668 2671 nodes,
2669 2672 nodesorder,
2670 2673 revlogrevisiondelta,
2671 2674 deltaparentfn=self.deltaparent,
2672 2675 candeltafn=self.candelta,
2673 2676 rawsizefn=self.rawsize,
2674 2677 revdifffn=self.revdiff,
2675 2678 flagsfn=self.flags,
2676 2679 deltamode=deltamode,
2677 2680 revisiondata=revisiondata,
2678 2681 assumehaveparentrevisions=assumehaveparentrevisions,
2679 2682 sidedata_helpers=sidedata_helpers,
2680 2683 )
2681 2684
2682 2685 DELTAREUSEALWAYS = b'always'
2683 2686 DELTAREUSESAMEREVS = b'samerevs'
2684 2687 DELTAREUSENEVER = b'never'
2685 2688
2686 2689 DELTAREUSEFULLADD = b'fulladd'
2687 2690
2688 2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2689 2692
2690 2693 def clone(
2691 2694 self,
2692 2695 tr,
2693 2696 destrevlog,
2694 2697 addrevisioncb=None,
2695 2698 deltareuse=DELTAREUSESAMEREVS,
2696 2699 forcedeltabothparents=None,
2697 2700 sidedata_helpers=None,
2698 2701 ):
2699 2702 """Copy this revlog to another, possibly with format changes.
2700 2703
2701 2704 The destination revlog will contain the same revisions and nodes.
2702 2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2703 2706 differences.
2704 2707
2705 2708 The ``deltareuse`` argument control how deltas from the existing revlog
2706 2709 are preserved in the destination revlog. The argument can have the
2707 2710 following values:
2708 2711
2709 2712 DELTAREUSEALWAYS
2710 2713 Deltas will always be reused (if possible), even if the destination
2711 2714 revlog would not select the same revisions for the delta. This is the
2712 2715 fastest mode of operation.
2713 2716 DELTAREUSESAMEREVS
2714 2717 Deltas will be reused if the destination revlog would pick the same
2715 2718 revisions for the delta. This mode strikes a balance between speed
2716 2719 and optimization.
2717 2720 DELTAREUSENEVER
2718 2721 Deltas will never be reused. This is the slowest mode of execution.
2719 2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2720 2723 algorithm changes).
2721 2724 DELTAREUSEFULLADD
2722 2725 Revision will be re-added as if their were new content. This is
2723 2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2724 2727 eg: large file detection and handling.
2725 2728
2726 2729 Delta computation can be slow, so the choice of delta reuse policy can
2727 2730 significantly affect run time.
2728 2731
2729 2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2730 2733 two extremes. Deltas will be reused if they are appropriate. But if the
2731 2734 delta could choose a better revision, it will do so. This means if you
2732 2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2733 2736 deltas will be recomputed if the delta's parent isn't a parent of the
2734 2737 revision.
2735 2738
2736 2739 In addition to the delta policy, the ``forcedeltabothparents``
2737 2740 argument controls whether to force compute deltas against both parents
2738 2741 for merges. By default, the current default is used.
2739 2742
2740 2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2741 2744 `sidedata_helpers`.
2742 2745 """
2743 2746 if deltareuse not in self.DELTAREUSEALL:
2744 2747 raise ValueError(
2745 2748 _(b'value for deltareuse invalid: %s') % deltareuse
2746 2749 )
2747 2750
2748 2751 if len(destrevlog):
2749 2752 raise ValueError(_(b'destination revlog is not empty'))
2750 2753
2751 2754 if getattr(self, 'filteredrevs', None):
2752 2755 raise ValueError(_(b'source revlog has filtered revisions'))
2753 2756 if getattr(destrevlog, 'filteredrevs', None):
2754 2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2755 2758
2756 2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2757 2760 # if possible.
2758 2761 oldlazydelta = destrevlog._lazydelta
2759 2762 oldlazydeltabase = destrevlog._lazydeltabase
2760 2763 oldamd = destrevlog._deltabothparents
2761 2764
2762 2765 try:
2763 2766 if deltareuse == self.DELTAREUSEALWAYS:
2764 2767 destrevlog._lazydeltabase = True
2765 2768 destrevlog._lazydelta = True
2766 2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2767 2770 destrevlog._lazydeltabase = False
2768 2771 destrevlog._lazydelta = True
2769 2772 elif deltareuse == self.DELTAREUSENEVER:
2770 2773 destrevlog._lazydeltabase = False
2771 2774 destrevlog._lazydelta = False
2772 2775
2773 2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2774 2777
2775 2778 self._clone(
2776 2779 tr,
2777 2780 destrevlog,
2778 2781 addrevisioncb,
2779 2782 deltareuse,
2780 2783 forcedeltabothparents,
2781 2784 sidedata_helpers,
2782 2785 )
2783 2786
2784 2787 finally:
2785 2788 destrevlog._lazydelta = oldlazydelta
2786 2789 destrevlog._lazydeltabase = oldlazydeltabase
2787 2790 destrevlog._deltabothparents = oldamd
2788 2791
2789 2792 def _clone(
2790 2793 self,
2791 2794 tr,
2792 2795 destrevlog,
2793 2796 addrevisioncb,
2794 2797 deltareuse,
2795 2798 forcedeltabothparents,
2796 2799 sidedata_helpers,
2797 2800 ):
2798 2801 """perform the core duty of `revlog.clone` after parameter processing"""
2799 2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2800 2803 index = self.index
2801 2804 for rev in self:
2802 2805 entry = index[rev]
2803 2806
2804 2807 # Some classes override linkrev to take filtered revs into
2805 2808 # account. Use raw entry from index.
2806 2809 flags = entry[0] & 0xFFFF
2807 2810 linkrev = entry[4]
2808 2811 p1 = index[entry[5]][7]
2809 2812 p2 = index[entry[6]][7]
2810 2813 node = entry[7]
2811 2814
2812 2815 # (Possibly) reuse the delta from the revlog if allowed and
2813 2816 # the revlog chunk is a delta.
2814 2817 cachedelta = None
2815 2818 rawtext = None
2816 2819 if deltareuse == self.DELTAREUSEFULLADD:
2817 2820 text, sidedata = self._revisiondata(rev)
2818 2821
2819 2822 if sidedata_helpers is not None:
2820 2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2821 2824 self, sidedata_helpers, sidedata, rev
2822 2825 )
2823 2826 flags = flags | new_flags[0] & ~new_flags[1]
2824 2827
2825 2828 destrevlog.addrevision(
2826 2829 text,
2827 2830 tr,
2828 2831 linkrev,
2829 2832 p1,
2830 2833 p2,
2831 2834 cachedelta=cachedelta,
2832 2835 node=node,
2833 2836 flags=flags,
2834 2837 deltacomputer=deltacomputer,
2835 2838 sidedata=sidedata,
2836 2839 )
2837 2840 else:
2838 2841 if destrevlog._lazydelta:
2839 2842 dp = self.deltaparent(rev)
2840 2843 if dp != nullrev:
2841 2844 cachedelta = (dp, bytes(self._chunk(rev)))
2842 2845
2843 2846 sidedata = None
2844 2847 if not cachedelta:
2845 2848 rawtext, sidedata = self._revisiondata(rev)
2846 2849 if sidedata is None:
2847 2850 sidedata = self.sidedata(rev)
2848 2851
2849 2852 if sidedata_helpers is not None:
2850 2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2851 2854 self, sidedata_helpers, sidedata, rev
2852 2855 )
2853 2856 flags = flags | new_flags[0] & ~new_flags[1]
2854 2857
2855 2858 ifh = destrevlog.opener(
2856 2859 destrevlog._indexfile, b'a+', checkambig=False
2857 2860 )
2858 2861 dfh = None
2859 2862 if not destrevlog._inline:
2860 2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2861 2864 try:
2862 2865 destrevlog._addrevision(
2863 2866 node,
2864 2867 rawtext,
2865 2868 tr,
2866 2869 linkrev,
2867 2870 p1,
2868 2871 p2,
2869 2872 flags,
2870 2873 cachedelta,
2871 2874 ifh,
2872 2875 dfh,
2873 2876 deltacomputer=deltacomputer,
2874 2877 sidedata=sidedata,
2875 2878 )
2876 2879 finally:
2877 2880 if dfh:
2878 2881 dfh.close()
2879 2882 ifh.close()
2880 2883
2881 2884 if addrevisioncb:
2882 2885 addrevisioncb(self, rev, node)
2883 2886
2884 2887 def censorrevision(self, tr, censornode, tombstone=b''):
2885 2888 if self._format_version == REVLOGV0:
2886 2889 raise error.RevlogError(
2887 2890 _(b'cannot censor with version %d revlogs')
2888 2891 % self._format_version
2889 2892 )
2890 2893
2891 2894 censorrev = self.rev(censornode)
2892 2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2893 2896
2894 2897 if len(tombstone) > self.rawsize(censorrev):
2895 2898 raise error.Abort(
2896 2899 _(b'censor tombstone must be no longer than censored data')
2897 2900 )
2898 2901
2899 2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2900 2903 # to create a new revlog, copy all revisions to it, then replace the
2901 2904 # revlogs on transaction close.
2902 2905 #
2903 2906 # This is a bit dangerous. We could easily have a mismatch of state.
2904 2907 newrl = revlog(
2905 2908 self.opener,
2906 2909 target=self.target,
2907 2910 radix=self.radix,
2908 2911 postfix=b'tmpcensored',
2909 2912 censorable=True,
2910 2913 )
2911 2914 newrl._format_version = self._format_version
2912 2915 newrl._format_flags = self._format_flags
2913 2916 newrl._generaldelta = self._generaldelta
2914 2917 newrl._parse_index = self._parse_index
2915 2918
2916 2919 for rev in self.revs():
2917 2920 node = self.node(rev)
2918 2921 p1, p2 = self.parents(node)
2919 2922
2920 2923 if rev == censorrev:
2921 2924 newrl.addrawrevision(
2922 2925 tombstone,
2923 2926 tr,
2924 2927 self.linkrev(censorrev),
2925 2928 p1,
2926 2929 p2,
2927 2930 censornode,
2928 2931 REVIDX_ISCENSORED,
2929 2932 )
2930 2933
2931 2934 if newrl.deltaparent(rev) != nullrev:
2932 2935 raise error.Abort(
2933 2936 _(
2934 2937 b'censored revision stored as delta; '
2935 2938 b'cannot censor'
2936 2939 ),
2937 2940 hint=_(
2938 2941 b'censoring of revlogs is not '
2939 2942 b'fully implemented; please report '
2940 2943 b'this bug'
2941 2944 ),
2942 2945 )
2943 2946 continue
2944 2947
2945 2948 if self.iscensored(rev):
2946 2949 if self.deltaparent(rev) != nullrev:
2947 2950 raise error.Abort(
2948 2951 _(
2949 2952 b'cannot censor due to censored '
2950 2953 b'revision having delta stored'
2951 2954 )
2952 2955 )
2953 2956 rawtext = self._chunk(rev)
2954 2957 else:
2955 2958 rawtext = self.rawdata(rev)
2956 2959
2957 2960 newrl.addrawrevision(
2958 2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2959 2962 )
2960 2963
2961 2964 tr.addbackup(self._indexfile, location=b'store')
2962 2965 if not self._inline:
2963 2966 tr.addbackup(self._datafile, location=b'store')
2964 2967
2965 2968 self.opener.rename(newrl._indexfile, self._indexfile)
2966 2969 if not self._inline:
2967 2970 self.opener.rename(newrl._datafile, self._datafile)
2968 2971
2969 2972 self.clearcaches()
2970 2973 self._loadindex()
2971 2974
2972 2975 def verifyintegrity(self, state):
2973 2976 """Verifies the integrity of the revlog.
2974 2977
2975 2978 Yields ``revlogproblem`` instances describing problems that are
2976 2979 found.
2977 2980 """
2978 2981 dd, di = self.checksize()
2979 2982 if dd:
2980 2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2981 2984 if di:
2982 2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2983 2986
2984 2987 version = self._format_version
2985 2988
2986 2989 # The verifier tells us what version revlog we should be.
2987 2990 if version != state[b'expectedversion']:
2988 2991 yield revlogproblem(
2989 2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2990 2993 % (self.display_id, version, state[b'expectedversion'])
2991 2994 )
2992 2995
2993 2996 state[b'skipread'] = set()
2994 2997 state[b'safe_renamed'] = set()
2995 2998
2996 2999 for rev in self:
2997 3000 node = self.node(rev)
2998 3001
2999 3002 # Verify contents. 4 cases to care about:
3000 3003 #
3001 3004 # common: the most common case
3002 3005 # rename: with a rename
3003 3006 # meta: file content starts with b'\1\n', the metadata
3004 3007 # header defined in filelog.py, but without a rename
3005 3008 # ext: content stored externally
3006 3009 #
3007 3010 # More formally, their differences are shown below:
3008 3011 #
3009 3012 # | common | rename | meta | ext
3010 3013 # -------------------------------------------------------
3011 3014 # flags() | 0 | 0 | 0 | not 0
3012 3015 # renamed() | False | True | False | ?
3013 3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3014 3017 #
3015 3018 # "rawtext" means the raw text stored in revlog data, which
3016 3019 # could be retrieved by "rawdata(rev)". "text"
3017 3020 # mentioned below is "revision(rev)".
3018 3021 #
3019 3022 # There are 3 different lengths stored physically:
3020 3023 # 1. L1: rawsize, stored in revlog index
3021 3024 # 2. L2: len(rawtext), stored in revlog data
3022 3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3023 3026 # possibly somewhere else if flags!=0
3024 3027 #
3025 3028 # L1 should be equal to L2. L3 could be different from them.
3026 3029 # "text" may or may not affect commit hash depending on flag
3027 3030 # processors (see flagutil.addflagprocessor).
3028 3031 #
3029 3032 # | common | rename | meta | ext
3030 3033 # -------------------------------------------------
3031 3034 # rawsize() | L1 | L1 | L1 | L1
3032 3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3033 3036 # len(rawtext) | L2 | L2 | L2 | L2
3034 3037 # len(text) | L2 | L2 | L2 | L3
3035 3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3036 3039 #
3037 3040 # LM: length of metadata, depending on rawtext
3038 3041 # (*): not ideal, see comment in filelog.size
3039 3042 # (?): could be "- len(meta)" if the resolved content has
3040 3043 # rename metadata
3041 3044 #
3042 3045 # Checks needed to be done:
3043 3046 # 1. length check: L1 == L2, in all cases.
3044 3047 # 2. hash check: depending on flag processor, we may need to
3045 3048 # use either "text" (external), or "rawtext" (in revlog).
3046 3049
3047 3050 try:
3048 3051 skipflags = state.get(b'skipflags', 0)
3049 3052 if skipflags:
3050 3053 skipflags &= self.flags(rev)
3051 3054
3052 3055 _verify_revision(self, skipflags, state, node)
3053 3056
3054 3057 l1 = self.rawsize(rev)
3055 3058 l2 = len(self.rawdata(node))
3056 3059
3057 3060 if l1 != l2:
3058 3061 yield revlogproblem(
3059 3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3060 3063 node=node,
3061 3064 )
3062 3065
3063 3066 except error.CensoredNodeError:
3064 3067 if state[b'erroroncensored']:
3065 3068 yield revlogproblem(
3066 3069 error=_(b'censored file data'), node=node
3067 3070 )
3068 3071 state[b'skipread'].add(node)
3069 3072 except Exception as e:
3070 3073 yield revlogproblem(
3071 3074 error=_(b'unpacking %s: %s')
3072 3075 % (short(node), stringutil.forcebytestr(e)),
3073 3076 node=node,
3074 3077 )
3075 3078 state[b'skipread'].add(node)
3076 3079
3077 3080 def storageinfo(
3078 3081 self,
3079 3082 exclusivefiles=False,
3080 3083 sharedfiles=False,
3081 3084 revisionscount=False,
3082 3085 trackedsize=False,
3083 3086 storedsize=False,
3084 3087 ):
3085 3088 d = {}
3086 3089
3087 3090 if exclusivefiles:
3088 3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3089 3092 if not self._inline:
3090 3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3091 3094
3092 3095 if sharedfiles:
3093 3096 d[b'sharedfiles'] = []
3094 3097
3095 3098 if revisionscount:
3096 3099 d[b'revisionscount'] = len(self)
3097 3100
3098 3101 if trackedsize:
3099 3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3100 3103
3101 3104 if storedsize:
3102 3105 d[b'storedsize'] = sum(
3103 3106 self.opener.stat(path).st_size for path in self.files()
3104 3107 )
3105 3108
3106 3109 return d
3107 3110
3108 3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3109 3112 if not self.hassidedata:
3110 3113 return
3111 3114 # inline are not yet supported because they suffer from an issue when
3112 3115 # rewriting them (since it's not an append-only operation).
3113 3116 # See issue6485.
3114 3117 assert not self._inline
3115 3118 if not helpers[1] and not helpers[2]:
3116 3119 # Nothing to generate or remove
3117 3120 return
3118 3121
3119 3122 # changelog implement some "delayed" writing mechanism that assume that
3120 3123 # all index data is writen in append mode and is therefor incompatible
3121 3124 # with the seeked write done in this method. The use of such "delayed"
3122 3125 # writing will soon be removed for revlog version that support side
3123 3126 # data, so for now, we only keep this simple assert to highlight the
3124 3127 # situation.
3125 3128 delayed = getattr(self, '_delayed', False)
3126 3129 diverted = getattr(self, '_divert', False)
3127 3130 if delayed and not diverted:
3128 3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3129 3132 raise error.ProgrammingError(msg)
3130 3133
3131 3134 new_entries = []
3132 3135 # append the new sidedata
3133 3136 with self._datafp(b'a+') as fp:
3134 3137 # Maybe this bug still exists, see revlog._writeentry
3135 3138 fp.seek(0, os.SEEK_END)
3136 3139 current_offset = fp.tell()
3137 3140 for rev in range(startrev, endrev + 1):
3138 3141 entry = self.index[rev]
3139 3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3140 3143 store=self,
3141 3144 sidedata_helpers=helpers,
3142 3145 sidedata={},
3143 3146 rev=rev,
3144 3147 )
3145 3148
3146 3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3147 3150 new_sidedata
3148 3151 )
3149 3152 if entry[8] != 0 or entry[9] != 0:
3150 3153 # rewriting entries that already have sidedata is not
3151 3154 # supported yet, because it introduces garbage data in the
3152 3155 # revlog.
3153 3156 msg = b"Rewriting existing sidedata is not supported yet"
3154 3157 raise error.Abort(msg)
3155 3158
3156 3159 # Apply (potential) flags to add and to remove after running
3157 3160 # the sidedata helpers
3158 3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3159 3162 entry = (new_offset_flags,) + entry[1:8]
3160 3163 entry += (current_offset, len(serialized_sidedata))
3161 3164
3162 3165 fp.write(serialized_sidedata)
3163 3166 new_entries.append(entry)
3164 3167 current_offset += len(serialized_sidedata)
3165 3168
3166 3169 # rewrite the new index entries
3167 3170 with self._indexfp(b'r+') as fp:
3168 3171 fp.seek(startrev * self.index.entry_size)
3169 3172 for i, e in enumerate(new_entries):
3170 3173 rev = startrev + i
3171 3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3172 3175 packed = self.index.entry_binary(rev)
3173 3176 if rev == 0:
3174 3177 header = self._format_flags | self._format_version
3175 3178 header = self.index.pack_header(header)
3176 3179 packed = header + packed
3177 3180 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now