##// END OF EJS Templates
revlog: fix capitalisation of an error...
marmoute -
r48002:e51392ac default
parent child Browse files
Show More
@@ -1,3220 +1,3220 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 self._indexfile = None
319 319 self._datafile = None
320 320 self._nodemap_file = None
321 321 self.postfix = postfix
322 322 self.opener = opener
323 323 if persistentnodemap:
324 324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 325
326 326 assert target[0] in ALL_KINDS
327 327 assert len(target) == 2
328 328 self.target = target
329 329 # When True, indexfile is opened with checkambig=True at writing, to
330 330 # avoid file stat ambiguity.
331 331 self._checkambig = checkambig
332 332 self._mmaplargeindex = mmaplargeindex
333 333 self._censorable = censorable
334 334 # 3-tuple of (node, rev, text) for a raw revision.
335 335 self._revisioncache = None
336 336 # Maps rev to chain base rev.
337 337 self._chainbasecache = util.lrucachedict(100)
338 338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 339 self._chunkcache = (0, b'')
340 340 # How much data to read and cache into the raw revlog data cache.
341 341 self._chunkcachesize = 65536
342 342 self._maxchainlen = None
343 343 self._deltabothparents = True
344 344 self.index = None
345 345 self._nodemap_docket = None
346 346 # Mapping of partial identifiers to full nodes.
347 347 self._pcache = {}
348 348 # Mapping of revision integer to full node.
349 349 self._compengine = b'zlib'
350 350 self._compengineopts = {}
351 351 self._maxdeltachainspan = -1
352 352 self._withsparseread = False
353 353 self._sparserevlog = False
354 354 self.hassidedata = False
355 355 self._srdensitythreshold = 0.50
356 356 self._srmingapsize = 262144
357 357
358 358 # Make copy of flag processors so each revlog instance can support
359 359 # custom flags.
360 360 self._flagprocessors = dict(flagutil.flagprocessors)
361 361
362 362 # 2-tuple of file handles being used for active writing.
363 363 self._writinghandles = None
364 364 # prevent nesting of addgroup
365 365 self._adding_group = None
366 366
367 367 self._loadindex()
368 368
369 369 self._concurrencychecker = concurrencychecker
370 370
371 371 def _init_opts(self):
372 372 """process options (from above/config) to setup associated default revlog mode
373 373
374 374 These values might be affected when actually reading on disk information.
375 375
376 376 The relevant values are returned for use in _loadindex().
377 377
378 378 * newversionflags:
379 379 version header to use if we need to create a new revlog
380 380
381 381 * mmapindexthreshold:
382 382 minimal index size for start to use mmap
383 383
384 384 * force_nodemap:
385 385 force the usage of a "development" version of the nodemap code
386 386 """
387 387 mmapindexthreshold = None
388 388 opts = self.opener.options
389 389
390 390 if b'revlogv2' in opts:
391 391 new_header = REVLOGV2 | FLAG_INLINE_DATA
392 392 elif b'revlogv1' in opts:
393 393 new_header = REVLOGV1 | FLAG_INLINE_DATA
394 394 if b'generaldelta' in opts:
395 395 new_header |= FLAG_GENERALDELTA
396 396 elif b'revlogv0' in self.opener.options:
397 397 new_header = REVLOGV0
398 398 else:
399 399 new_header = REVLOG_DEFAULT_VERSION
400 400
401 401 if b'chunkcachesize' in opts:
402 402 self._chunkcachesize = opts[b'chunkcachesize']
403 403 if b'maxchainlen' in opts:
404 404 self._maxchainlen = opts[b'maxchainlen']
405 405 if b'deltabothparents' in opts:
406 406 self._deltabothparents = opts[b'deltabothparents']
407 407 self._lazydelta = bool(opts.get(b'lazydelta', True))
408 408 self._lazydeltabase = False
409 409 if self._lazydelta:
410 410 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
411 411 if b'compengine' in opts:
412 412 self._compengine = opts[b'compengine']
413 413 if b'zlib.level' in opts:
414 414 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
415 415 if b'zstd.level' in opts:
416 416 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
417 417 if b'maxdeltachainspan' in opts:
418 418 self._maxdeltachainspan = opts[b'maxdeltachainspan']
419 419 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
420 420 mmapindexthreshold = opts[b'mmapindexthreshold']
421 421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 422 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 423 # sparse-revlog forces sparse-read
424 424 self._withsparseread = self._sparserevlog or withsparseread
425 425 if b'sparse-read-density-threshold' in opts:
426 426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 427 if b'sparse-read-min-gap-size' in opts:
428 428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 429 if opts.get(b'enableellipsis'):
430 430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431 431
432 432 # revlog v0 doesn't have flag processors
433 433 for flag, processor in pycompat.iteritems(
434 434 opts.get(b'flagprocessors', {})
435 435 ):
436 436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437 437
438 438 if self._chunkcachesize <= 0:
439 439 raise error.RevlogError(
440 440 _(b'revlog chunk cache size %r is not greater than 0')
441 441 % self._chunkcachesize
442 442 )
443 443 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 444 raise error.RevlogError(
445 445 _(b'revlog chunk cache size %r is not a power of 2')
446 446 % self._chunkcachesize
447 447 )
448 448 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 449 return new_header, mmapindexthreshold, force_nodemap
450 450
451 451 def _get_data(self, filepath, mmap_threshold):
452 452 """return a file content with or without mmap
453 453
454 454 If the file is missing return the empty string"""
455 455 try:
456 456 with self.opener(filepath) as fp:
457 457 if mmap_threshold is not None:
458 458 file_size = self.opener.fstat(fp).st_size
459 459 if file_size >= mmap_threshold:
460 460 # TODO: should .close() to release resources without
461 461 # relying on Python GC
462 462 return util.buffer(util.mmapread(fp))
463 463 return fp.read()
464 464 except IOError as inst:
465 465 if inst.errno != errno.ENOENT:
466 466 raise
467 467 return b''
468 468
469 469 def _loadindex(self):
470 470
471 471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472 472
473 473 if self.postfix is None:
474 474 entry_point = b'%s.i' % self.radix
475 475 else:
476 476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477 477
478 478 entry_data = b''
479 479 self._initempty = True
480 480 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 481 if len(entry_data) > 0:
482 482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 483 self._initempty = False
484 484 else:
485 485 header = new_header
486 486
487 487 self._format_flags = header & ~0xFFFF
488 488 self._format_version = header & 0xFFFF
489 489
490 490 if self._format_version == REVLOGV0:
491 491 if self._format_flags:
492 492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 493 display_flag = self._format_flags >> 16
494 494 msg %= (display_flag, self._format_version, self.display_id)
495 495 raise error.RevlogError(msg)
496 496
497 497 self._inline = False
498 498 self._generaldelta = False
499 499
500 500 elif self._format_version == REVLOGV1:
501 501 if self._format_flags & ~REVLOGV1_FLAGS:
502 502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 503 display_flag = self._format_flags >> 16
504 504 msg %= (display_flag, self._format_version, self.display_id)
505 505 raise error.RevlogError(msg)
506 506
507 507 self._inline = self._format_flags & FLAG_INLINE_DATA
508 508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509 509
510 510 elif self._format_version == REVLOGV2:
511 511 if self._format_flags & ~REVLOGV2_FLAGS:
512 512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 513 display_flag = self._format_flags >> 16
514 514 msg %= (display_flag, self._format_version, self.display_id)
515 515 raise error.RevlogError(msg)
516 516
517 517 # There is a bug in the transaction handling when going from an
518 518 # inline revlog to a separate index and data file. Turn it off until
519 519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 520 # See issue6485
521 521 self._inline = False
522 522 # generaldelta implied by version 2 revlogs.
523 523 self._generaldelta = True
524 524 # revlog-v2 has built in sidedata support
525 525 self.hassidedata = True
526 526
527 527 else:
528 528 msg = _(b'unknown version (%d) in revlog %s')
529 529 msg %= (self._format_version, self.display_id)
530 530 raise error.RevlogError(msg)
531 531
532 532 index_data = entry_data
533 533 self._indexfile = entry_point
534 534
535 535 if self.postfix is None or self.postfix == b'a':
536 536 self._datafile = b'%s.d' % self.radix
537 537 else:
538 538 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
539 539
540 540 self.nodeconstants = sha1nodeconstants
541 541 self.nullid = self.nodeconstants.nullid
542 542
543 543 # sparse-revlog can't be on without general-delta (issue6056)
544 544 if not self._generaldelta:
545 545 self._sparserevlog = False
546 546
547 547 self._storedeltachains = True
548 548
549 549 devel_nodemap = (
550 550 self._nodemap_file
551 551 and force_nodemap
552 552 and parse_index_v1_nodemap is not None
553 553 )
554 554
555 555 use_rust_index = False
556 556 if rustrevlog is not None:
557 557 if self._nodemap_file is not None:
558 558 use_rust_index = True
559 559 else:
560 560 use_rust_index = self.opener.options.get(b'rust.index')
561 561
562 562 self._parse_index = parse_index_v1
563 563 if self._format_version == REVLOGV0:
564 564 self._parse_index = revlogv0.parse_index_v0
565 565 elif self._format_version == REVLOGV2:
566 566 self._parse_index = parse_index_v2
567 567 elif devel_nodemap:
568 568 self._parse_index = parse_index_v1_nodemap
569 569 elif use_rust_index:
570 570 self._parse_index = parse_index_v1_mixed
571 571 try:
572 572 d = self._parse_index(index_data, self._inline)
573 573 index, _chunkcache = d
574 574 use_nodemap = (
575 575 not self._inline
576 576 and self._nodemap_file is not None
577 577 and util.safehasattr(index, 'update_nodemap_data')
578 578 )
579 579 if use_nodemap:
580 580 nodemap_data = nodemaputil.persisted_data(self)
581 581 if nodemap_data is not None:
582 582 docket = nodemap_data[0]
583 583 if (
584 584 len(d[0]) > docket.tip_rev
585 585 and d[0][docket.tip_rev][7] == docket.tip_node
586 586 ):
587 587 # no changelog tampering
588 588 self._nodemap_docket = docket
589 589 index.update_nodemap_data(*nodemap_data)
590 590 except (ValueError, IndexError):
591 591 raise error.RevlogError(
592 592 _(b"index %s is corrupted") % self.display_id
593 593 )
594 594 self.index, self._chunkcache = d
595 595 if not self._chunkcache:
596 596 self._chunkclear()
597 597 # revnum -> (chain-length, sum-delta-length)
598 598 self._chaininfocache = util.lrucachedict(500)
599 599 # revlog header -> revlog compressor
600 600 self._decompressors = {}
601 601
602 602 @util.propertycache
603 603 def revlog_kind(self):
604 604 return self.target[0]
605 605
606 606 @util.propertycache
607 607 def display_id(self):
608 608 """The public facing "ID" of the revlog that we use in message"""
609 609 # Maybe we should build a user facing representation of
610 610 # revlog.target instead of using `self.radix`
611 611 return self.radix
612 612
613 613 @util.propertycache
614 614 def _compressor(self):
615 615 engine = util.compengines[self._compengine]
616 616 return engine.revlogcompressor(self._compengineopts)
617 617
618 618 def _indexfp(self):
619 619 """file object for the revlog's index file"""
620 620 return self.opener(self._indexfile, mode=b"r")
621 621
622 622 def __index_write_fp(self):
623 623 # You should not use this directly and use `_writing` instead
624 624 try:
625 625 f = self.opener(
626 626 self._indexfile, mode=b"r+", checkambig=self._checkambig
627 627 )
628 628 f.seek(0, os.SEEK_END)
629 629 return f
630 630 except IOError as inst:
631 631 if inst.errno != errno.ENOENT:
632 632 raise
633 633 return self.opener(
634 634 self._indexfile, mode=b"w+", checkambig=self._checkambig
635 635 )
636 636
637 637 def __index_new_fp(self):
638 638 # You should not use this unless you are upgrading from inline revlog
639 639 return self.opener(
640 640 self._indexfile,
641 641 mode=b"w",
642 642 checkambig=self._checkambig,
643 643 atomictemp=True,
644 644 )
645 645
646 646 def _datafp(self, mode=b'r'):
647 647 """file object for the revlog's data file"""
648 648 return self.opener(self._datafile, mode=mode)
649 649
650 650 @contextlib.contextmanager
651 651 def _datareadfp(self, existingfp=None):
652 652 """file object suitable to read data"""
653 653 # Use explicit file handle, if given.
654 654 if existingfp is not None:
655 655 yield existingfp
656 656
657 657 # Use a file handle being actively used for writes, if available.
658 658 # There is some danger to doing this because reads will seek the
659 659 # file. However, _writeentry() performs a SEEK_END before all writes,
660 660 # so we should be safe.
661 661 elif self._writinghandles:
662 662 if self._inline:
663 663 yield self._writinghandles[0]
664 664 else:
665 665 yield self._writinghandles[1]
666 666
667 667 # Otherwise open a new file handle.
668 668 else:
669 669 if self._inline:
670 670 func = self._indexfp
671 671 else:
672 672 func = self._datafp
673 673 with func() as fp:
674 674 yield fp
675 675
676 676 def tiprev(self):
677 677 return len(self.index) - 1
678 678
679 679 def tip(self):
680 680 return self.node(self.tiprev())
681 681
682 682 def __contains__(self, rev):
683 683 return 0 <= rev < len(self)
684 684
685 685 def __len__(self):
686 686 return len(self.index)
687 687
688 688 def __iter__(self):
689 689 return iter(pycompat.xrange(len(self)))
690 690
691 691 def revs(self, start=0, stop=None):
692 692 """iterate over all rev in this revlog (from start to stop)"""
693 693 return storageutil.iterrevs(len(self), start=start, stop=stop)
694 694
695 695 @property
696 696 def nodemap(self):
697 697 msg = (
698 698 b"revlog.nodemap is deprecated, "
699 699 b"use revlog.index.[has_node|rev|get_rev]"
700 700 )
701 701 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
702 702 return self.index.nodemap
703 703
704 704 @property
705 705 def _nodecache(self):
706 706 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
707 707 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
708 708 return self.index.nodemap
709 709
710 710 def hasnode(self, node):
711 711 try:
712 712 self.rev(node)
713 713 return True
714 714 except KeyError:
715 715 return False
716 716
717 717 def candelta(self, baserev, rev):
718 718 """whether two revisions (baserev, rev) can be delta-ed or not"""
719 719 # Disable delta if either rev requires a content-changing flag
720 720 # processor (ex. LFS). This is because such flag processor can alter
721 721 # the rawtext content that the delta will be based on, and two clients
722 722 # could have a same revlog node with different flags (i.e. different
723 723 # rawtext contents) and the delta could be incompatible.
724 724 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
725 725 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
726 726 ):
727 727 return False
728 728 return True
729 729
730 730 def update_caches(self, transaction):
731 731 if self._nodemap_file is not None:
732 732 if transaction is None:
733 733 nodemaputil.update_persistent_nodemap(self)
734 734 else:
735 735 nodemaputil.setup_persistent_nodemap(transaction, self)
736 736
737 737 def clearcaches(self):
738 738 self._revisioncache = None
739 739 self._chainbasecache.clear()
740 740 self._chunkcache = (0, b'')
741 741 self._pcache = {}
742 742 self._nodemap_docket = None
743 743 self.index.clearcaches()
744 744 # The python code is the one responsible for validating the docket, we
745 745 # end up having to refresh it here.
746 746 use_nodemap = (
747 747 not self._inline
748 748 and self._nodemap_file is not None
749 749 and util.safehasattr(self.index, 'update_nodemap_data')
750 750 )
751 751 if use_nodemap:
752 752 nodemap_data = nodemaputil.persisted_data(self)
753 753 if nodemap_data is not None:
754 754 self._nodemap_docket = nodemap_data[0]
755 755 self.index.update_nodemap_data(*nodemap_data)
756 756
757 757 def rev(self, node):
758 758 try:
759 759 return self.index.rev(node)
760 760 except TypeError:
761 761 raise
762 762 except error.RevlogError:
763 763 # parsers.c radix tree lookup failed
764 764 if (
765 765 node == self.nodeconstants.wdirid
766 766 or node in self.nodeconstants.wdirfilenodeids
767 767 ):
768 768 raise error.WdirUnsupported
769 769 raise error.LookupError(node, self.display_id, _(b'no node'))
770 770
771 771 # Accessors for index entries.
772 772
773 773 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
774 774 # are flags.
775 775 def start(self, rev):
776 776 return int(self.index[rev][0] >> 16)
777 777
778 778 def flags(self, rev):
779 779 return self.index[rev][0] & 0xFFFF
780 780
781 781 def length(self, rev):
782 782 return self.index[rev][1]
783 783
784 784 def sidedata_length(self, rev):
785 785 if not self.hassidedata:
786 786 return 0
787 787 return self.index[rev][9]
788 788
789 789 def rawsize(self, rev):
790 790 """return the length of the uncompressed text for a given revision"""
791 791 l = self.index[rev][2]
792 792 if l >= 0:
793 793 return l
794 794
795 795 t = self.rawdata(rev)
796 796 return len(t)
797 797
798 798 def size(self, rev):
799 799 """length of non-raw text (processed by a "read" flag processor)"""
800 800 # fast path: if no "read" flag processor could change the content,
801 801 # size is rawsize. note: ELLIPSIS is known to not change the content.
802 802 flags = self.flags(rev)
803 803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
804 804 return self.rawsize(rev)
805 805
806 806 return len(self.revision(rev, raw=False))
807 807
808 808 def chainbase(self, rev):
809 809 base = self._chainbasecache.get(rev)
810 810 if base is not None:
811 811 return base
812 812
813 813 index = self.index
814 814 iterrev = rev
815 815 base = index[iterrev][3]
816 816 while base != iterrev:
817 817 iterrev = base
818 818 base = index[iterrev][3]
819 819
820 820 self._chainbasecache[rev] = base
821 821 return base
822 822
823 823 def linkrev(self, rev):
824 824 return self.index[rev][4]
825 825
826 826 def parentrevs(self, rev):
827 827 try:
828 828 entry = self.index[rev]
829 829 except IndexError:
830 830 if rev == wdirrev:
831 831 raise error.WdirUnsupported
832 832 raise
833 833 if entry[5] == nullrev:
834 834 return entry[6], entry[5]
835 835 else:
836 836 return entry[5], entry[6]
837 837
838 838 # fast parentrevs(rev) where rev isn't filtered
839 839 _uncheckedparentrevs = parentrevs
840 840
841 841 def node(self, rev):
842 842 try:
843 843 return self.index[rev][7]
844 844 except IndexError:
845 845 if rev == wdirrev:
846 846 raise error.WdirUnsupported
847 847 raise
848 848
849 849 # Derived from index values.
850 850
851 851 def end(self, rev):
852 852 return self.start(rev) + self.length(rev)
853 853
854 854 def parents(self, node):
855 855 i = self.index
856 856 d = i[self.rev(node)]
857 857 # inline node() to avoid function call overhead
858 858 if d[5] == self.nullid:
859 859 return i[d[6]][7], i[d[5]][7]
860 860 else:
861 861 return i[d[5]][7], i[d[6]][7]
862 862
863 863 def chainlen(self, rev):
864 864 return self._chaininfo(rev)[0]
865 865
866 866 def _chaininfo(self, rev):
867 867 chaininfocache = self._chaininfocache
868 868 if rev in chaininfocache:
869 869 return chaininfocache[rev]
870 870 index = self.index
871 871 generaldelta = self._generaldelta
872 872 iterrev = rev
873 873 e = index[iterrev]
874 874 clen = 0
875 875 compresseddeltalen = 0
876 876 while iterrev != e[3]:
877 877 clen += 1
878 878 compresseddeltalen += e[1]
879 879 if generaldelta:
880 880 iterrev = e[3]
881 881 else:
882 882 iterrev -= 1
883 883 if iterrev in chaininfocache:
884 884 t = chaininfocache[iterrev]
885 885 clen += t[0]
886 886 compresseddeltalen += t[1]
887 887 break
888 888 e = index[iterrev]
889 889 else:
890 890 # Add text length of base since decompressing that also takes
891 891 # work. For cache hits the length is already included.
892 892 compresseddeltalen += e[1]
893 893 r = (clen, compresseddeltalen)
894 894 chaininfocache[rev] = r
895 895 return r
896 896
897 897 def _deltachain(self, rev, stoprev=None):
898 898 """Obtain the delta chain for a revision.
899 899
900 900 ``stoprev`` specifies a revision to stop at. If not specified, we
901 901 stop at the base of the chain.
902 902
903 903 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
904 904 revs in ascending order and ``stopped`` is a bool indicating whether
905 905 ``stoprev`` was hit.
906 906 """
907 907 # Try C implementation.
908 908 try:
909 909 return self.index.deltachain(rev, stoprev, self._generaldelta)
910 910 except AttributeError:
911 911 pass
912 912
913 913 chain = []
914 914
915 915 # Alias to prevent attribute lookup in tight loop.
916 916 index = self.index
917 917 generaldelta = self._generaldelta
918 918
919 919 iterrev = rev
920 920 e = index[iterrev]
921 921 while iterrev != e[3] and iterrev != stoprev:
922 922 chain.append(iterrev)
923 923 if generaldelta:
924 924 iterrev = e[3]
925 925 else:
926 926 iterrev -= 1
927 927 e = index[iterrev]
928 928
929 929 if iterrev == stoprev:
930 930 stopped = True
931 931 else:
932 932 chain.append(iterrev)
933 933 stopped = False
934 934
935 935 chain.reverse()
936 936 return chain, stopped
937 937
938 938 def ancestors(self, revs, stoprev=0, inclusive=False):
939 939 """Generate the ancestors of 'revs' in reverse revision order.
940 940 Does not generate revs lower than stoprev.
941 941
942 942 See the documentation for ancestor.lazyancestors for more details."""
943 943
944 944 # first, make sure start revisions aren't filtered
945 945 revs = list(revs)
946 946 checkrev = self.node
947 947 for r in revs:
948 948 checkrev(r)
949 949 # and we're sure ancestors aren't filtered as well
950 950
951 951 if rustancestor is not None:
952 952 lazyancestors = rustancestor.LazyAncestors
953 953 arg = self.index
954 954 else:
955 955 lazyancestors = ancestor.lazyancestors
956 956 arg = self._uncheckedparentrevs
957 957 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
958 958
959 959 def descendants(self, revs):
960 960 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
961 961
962 962 def findcommonmissing(self, common=None, heads=None):
963 963 """Return a tuple of the ancestors of common and the ancestors of heads
964 964 that are not ancestors of common. In revset terminology, we return the
965 965 tuple:
966 966
967 967 ::common, (::heads) - (::common)
968 968
969 969 The list is sorted by revision number, meaning it is
970 970 topologically sorted.
971 971
972 972 'heads' and 'common' are both lists of node IDs. If heads is
973 973 not supplied, uses all of the revlog's heads. If common is not
974 974 supplied, uses nullid."""
975 975 if common is None:
976 976 common = [self.nullid]
977 977 if heads is None:
978 978 heads = self.heads()
979 979
980 980 common = [self.rev(n) for n in common]
981 981 heads = [self.rev(n) for n in heads]
982 982
983 983 # we want the ancestors, but inclusive
984 984 class lazyset(object):
985 985 def __init__(self, lazyvalues):
986 986 self.addedvalues = set()
987 987 self.lazyvalues = lazyvalues
988 988
989 989 def __contains__(self, value):
990 990 return value in self.addedvalues or value in self.lazyvalues
991 991
992 992 def __iter__(self):
993 993 added = self.addedvalues
994 994 for r in added:
995 995 yield r
996 996 for r in self.lazyvalues:
997 997 if not r in added:
998 998 yield r
999 999
1000 1000 def add(self, value):
1001 1001 self.addedvalues.add(value)
1002 1002
1003 1003 def update(self, values):
1004 1004 self.addedvalues.update(values)
1005 1005
1006 1006 has = lazyset(self.ancestors(common))
1007 1007 has.add(nullrev)
1008 1008 has.update(common)
1009 1009
1010 1010 # take all ancestors from heads that aren't in has
1011 1011 missing = set()
1012 1012 visit = collections.deque(r for r in heads if r not in has)
1013 1013 while visit:
1014 1014 r = visit.popleft()
1015 1015 if r in missing:
1016 1016 continue
1017 1017 else:
1018 1018 missing.add(r)
1019 1019 for p in self.parentrevs(r):
1020 1020 if p not in has:
1021 1021 visit.append(p)
1022 1022 missing = list(missing)
1023 1023 missing.sort()
1024 1024 return has, [self.node(miss) for miss in missing]
1025 1025
1026 1026 def incrementalmissingrevs(self, common=None):
1027 1027 """Return an object that can be used to incrementally compute the
1028 1028 revision numbers of the ancestors of arbitrary sets that are not
1029 1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 1030 object.
1031 1031
1032 1032 'common' is a list of revision numbers. If common is not supplied, uses
1033 1033 nullrev.
1034 1034 """
1035 1035 if common is None:
1036 1036 common = [nullrev]
1037 1037
1038 1038 if rustancestor is not None:
1039 1039 return rustancestor.MissingAncestors(self.index, common)
1040 1040 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1041 1041
1042 1042 def findmissingrevs(self, common=None, heads=None):
1043 1043 """Return the revision numbers of the ancestors of heads that
1044 1044 are not ancestors of common.
1045 1045
1046 1046 More specifically, return a list of revision numbers corresponding to
1047 1047 nodes N such that every N satisfies the following constraints:
1048 1048
1049 1049 1. N is an ancestor of some node in 'heads'
1050 1050 2. N is not an ancestor of any node in 'common'
1051 1051
1052 1052 The list is sorted by revision number, meaning it is
1053 1053 topologically sorted.
1054 1054
1055 1055 'heads' and 'common' are both lists of revision numbers. If heads is
1056 1056 not supplied, uses all of the revlog's heads. If common is not
1057 1057 supplied, uses nullid."""
1058 1058 if common is None:
1059 1059 common = [nullrev]
1060 1060 if heads is None:
1061 1061 heads = self.headrevs()
1062 1062
1063 1063 inc = self.incrementalmissingrevs(common=common)
1064 1064 return inc.missingancestors(heads)
1065 1065
1066 1066 def findmissing(self, common=None, heads=None):
1067 1067 """Return the ancestors of heads that are not ancestors of common.
1068 1068
1069 1069 More specifically, return a list of nodes N such that every N
1070 1070 satisfies the following constraints:
1071 1071
1072 1072 1. N is an ancestor of some node in 'heads'
1073 1073 2. N is not an ancestor of any node in 'common'
1074 1074
1075 1075 The list is sorted by revision number, meaning it is
1076 1076 topologically sorted.
1077 1077
1078 1078 'heads' and 'common' are both lists of node IDs. If heads is
1079 1079 not supplied, uses all of the revlog's heads. If common is not
1080 1080 supplied, uses nullid."""
1081 1081 if common is None:
1082 1082 common = [self.nullid]
1083 1083 if heads is None:
1084 1084 heads = self.heads()
1085 1085
1086 1086 common = [self.rev(n) for n in common]
1087 1087 heads = [self.rev(n) for n in heads]
1088 1088
1089 1089 inc = self.incrementalmissingrevs(common=common)
1090 1090 return [self.node(r) for r in inc.missingancestors(heads)]
1091 1091
1092 1092 def nodesbetween(self, roots=None, heads=None):
1093 1093 """Return a topological path from 'roots' to 'heads'.
1094 1094
1095 1095 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1096 1096 topologically sorted list of all nodes N that satisfy both of
1097 1097 these constraints:
1098 1098
1099 1099 1. N is a descendant of some node in 'roots'
1100 1100 2. N is an ancestor of some node in 'heads'
1101 1101
1102 1102 Every node is considered to be both a descendant and an ancestor
1103 1103 of itself, so every reachable node in 'roots' and 'heads' will be
1104 1104 included in 'nodes'.
1105 1105
1106 1106 'outroots' is the list of reachable nodes in 'roots', i.e., the
1107 1107 subset of 'roots' that is returned in 'nodes'. Likewise,
1108 1108 'outheads' is the subset of 'heads' that is also in 'nodes'.
1109 1109
1110 1110 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1111 1111 unspecified, uses nullid as the only root. If 'heads' is
1112 1112 unspecified, uses list of all of the revlog's heads."""
1113 1113 nonodes = ([], [], [])
1114 1114 if roots is not None:
1115 1115 roots = list(roots)
1116 1116 if not roots:
1117 1117 return nonodes
1118 1118 lowestrev = min([self.rev(n) for n in roots])
1119 1119 else:
1120 1120 roots = [self.nullid] # Everybody's a descendant of nullid
1121 1121 lowestrev = nullrev
1122 1122 if (lowestrev == nullrev) and (heads is None):
1123 1123 # We want _all_ the nodes!
1124 1124 return (
1125 1125 [self.node(r) for r in self],
1126 1126 [self.nullid],
1127 1127 list(self.heads()),
1128 1128 )
1129 1129 if heads is None:
1130 1130 # All nodes are ancestors, so the latest ancestor is the last
1131 1131 # node.
1132 1132 highestrev = len(self) - 1
1133 1133 # Set ancestors to None to signal that every node is an ancestor.
1134 1134 ancestors = None
1135 1135 # Set heads to an empty dictionary for later discovery of heads
1136 1136 heads = {}
1137 1137 else:
1138 1138 heads = list(heads)
1139 1139 if not heads:
1140 1140 return nonodes
1141 1141 ancestors = set()
1142 1142 # Turn heads into a dictionary so we can remove 'fake' heads.
1143 1143 # Also, later we will be using it to filter out the heads we can't
1144 1144 # find from roots.
1145 1145 heads = dict.fromkeys(heads, False)
1146 1146 # Start at the top and keep marking parents until we're done.
1147 1147 nodestotag = set(heads)
1148 1148 # Remember where the top was so we can use it as a limit later.
1149 1149 highestrev = max([self.rev(n) for n in nodestotag])
1150 1150 while nodestotag:
1151 1151 # grab a node to tag
1152 1152 n = nodestotag.pop()
1153 1153 # Never tag nullid
1154 1154 if n == self.nullid:
1155 1155 continue
1156 1156 # A node's revision number represents its place in a
1157 1157 # topologically sorted list of nodes.
1158 1158 r = self.rev(n)
1159 1159 if r >= lowestrev:
1160 1160 if n not in ancestors:
1161 1161 # If we are possibly a descendant of one of the roots
1162 1162 # and we haven't already been marked as an ancestor
1163 1163 ancestors.add(n) # Mark as ancestor
1164 1164 # Add non-nullid parents to list of nodes to tag.
1165 1165 nodestotag.update(
1166 1166 [p for p in self.parents(n) if p != self.nullid]
1167 1167 )
1168 1168 elif n in heads: # We've seen it before, is it a fake head?
1169 1169 # So it is, real heads should not be the ancestors of
1170 1170 # any other heads.
1171 1171 heads.pop(n)
1172 1172 if not ancestors:
1173 1173 return nonodes
1174 1174 # Now that we have our set of ancestors, we want to remove any
1175 1175 # roots that are not ancestors.
1176 1176
1177 1177 # If one of the roots was nullid, everything is included anyway.
1178 1178 if lowestrev > nullrev:
1179 1179 # But, since we weren't, let's recompute the lowest rev to not
1180 1180 # include roots that aren't ancestors.
1181 1181
1182 1182 # Filter out roots that aren't ancestors of heads
1183 1183 roots = [root for root in roots if root in ancestors]
1184 1184 # Recompute the lowest revision
1185 1185 if roots:
1186 1186 lowestrev = min([self.rev(root) for root in roots])
1187 1187 else:
1188 1188 # No more roots? Return empty list
1189 1189 return nonodes
1190 1190 else:
1191 1191 # We are descending from nullid, and don't need to care about
1192 1192 # any other roots.
1193 1193 lowestrev = nullrev
1194 1194 roots = [self.nullid]
1195 1195 # Transform our roots list into a set.
1196 1196 descendants = set(roots)
1197 1197 # Also, keep the original roots so we can filter out roots that aren't
1198 1198 # 'real' roots (i.e. are descended from other roots).
1199 1199 roots = descendants.copy()
1200 1200 # Our topologically sorted list of output nodes.
1201 1201 orderedout = []
1202 1202 # Don't start at nullid since we don't want nullid in our output list,
1203 1203 # and if nullid shows up in descendants, empty parents will look like
1204 1204 # they're descendants.
1205 1205 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1206 1206 n = self.node(r)
1207 1207 isdescendant = False
1208 1208 if lowestrev == nullrev: # Everybody is a descendant of nullid
1209 1209 isdescendant = True
1210 1210 elif n in descendants:
1211 1211 # n is already a descendant
1212 1212 isdescendant = True
1213 1213 # This check only needs to be done here because all the roots
1214 1214 # will start being marked is descendants before the loop.
1215 1215 if n in roots:
1216 1216 # If n was a root, check if it's a 'real' root.
1217 1217 p = tuple(self.parents(n))
1218 1218 # If any of its parents are descendants, it's not a root.
1219 1219 if (p[0] in descendants) or (p[1] in descendants):
1220 1220 roots.remove(n)
1221 1221 else:
1222 1222 p = tuple(self.parents(n))
1223 1223 # A node is a descendant if either of its parents are
1224 1224 # descendants. (We seeded the dependents list with the roots
1225 1225 # up there, remember?)
1226 1226 if (p[0] in descendants) or (p[1] in descendants):
1227 1227 descendants.add(n)
1228 1228 isdescendant = True
1229 1229 if isdescendant and ((ancestors is None) or (n in ancestors)):
1230 1230 # Only include nodes that are both descendants and ancestors.
1231 1231 orderedout.append(n)
1232 1232 if (ancestors is not None) and (n in heads):
1233 1233 # We're trying to figure out which heads are reachable
1234 1234 # from roots.
1235 1235 # Mark this head as having been reached
1236 1236 heads[n] = True
1237 1237 elif ancestors is None:
1238 1238 # Otherwise, we're trying to discover the heads.
1239 1239 # Assume this is a head because if it isn't, the next step
1240 1240 # will eventually remove it.
1241 1241 heads[n] = True
1242 1242 # But, obviously its parents aren't.
1243 1243 for p in self.parents(n):
1244 1244 heads.pop(p, None)
1245 1245 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1246 1246 roots = list(roots)
1247 1247 assert orderedout
1248 1248 assert roots
1249 1249 assert heads
1250 1250 return (orderedout, roots, heads)
1251 1251
1252 1252 def headrevs(self, revs=None):
1253 1253 if revs is None:
1254 1254 try:
1255 1255 return self.index.headrevs()
1256 1256 except AttributeError:
1257 1257 return self._headrevs()
1258 1258 if rustdagop is not None:
1259 1259 return rustdagop.headrevs(self.index, revs)
1260 1260 return dagop.headrevs(revs, self._uncheckedparentrevs)
1261 1261
1262 1262 def computephases(self, roots):
1263 1263 return self.index.computephasesmapsets(roots)
1264 1264
1265 1265 def _headrevs(self):
1266 1266 count = len(self)
1267 1267 if not count:
1268 1268 return [nullrev]
1269 1269 # we won't iter over filtered rev so nobody is a head at start
1270 1270 ishead = [0] * (count + 1)
1271 1271 index = self.index
1272 1272 for r in self:
1273 1273 ishead[r] = 1 # I may be an head
1274 1274 e = index[r]
1275 1275 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1276 1276 return [r for r, val in enumerate(ishead) if val]
1277 1277
1278 1278 def heads(self, start=None, stop=None):
1279 1279 """return the list of all nodes that have no children
1280 1280
1281 1281 if start is specified, only heads that are descendants of
1282 1282 start will be returned
1283 1283 if stop is specified, it will consider all the revs from stop
1284 1284 as if they had no children
1285 1285 """
1286 1286 if start is None and stop is None:
1287 1287 if not len(self):
1288 1288 return [self.nullid]
1289 1289 return [self.node(r) for r in self.headrevs()]
1290 1290
1291 1291 if start is None:
1292 1292 start = nullrev
1293 1293 else:
1294 1294 start = self.rev(start)
1295 1295
1296 1296 stoprevs = {self.rev(n) for n in stop or []}
1297 1297
1298 1298 revs = dagop.headrevssubset(
1299 1299 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1300 1300 )
1301 1301
1302 1302 return [self.node(rev) for rev in revs]
1303 1303
1304 1304 def children(self, node):
1305 1305 """find the children of a given node"""
1306 1306 c = []
1307 1307 p = self.rev(node)
1308 1308 for r in self.revs(start=p + 1):
1309 1309 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1310 1310 if prevs:
1311 1311 for pr in prevs:
1312 1312 if pr == p:
1313 1313 c.append(self.node(r))
1314 1314 elif p == nullrev:
1315 1315 c.append(self.node(r))
1316 1316 return c
1317 1317
1318 1318 def commonancestorsheads(self, a, b):
1319 1319 """calculate all the heads of the common ancestors of nodes a and b"""
1320 1320 a, b = self.rev(a), self.rev(b)
1321 1321 ancs = self._commonancestorsheads(a, b)
1322 1322 return pycompat.maplist(self.node, ancs)
1323 1323
1324 1324 def _commonancestorsheads(self, *revs):
1325 1325 """calculate all the heads of the common ancestors of revs"""
1326 1326 try:
1327 1327 ancs = self.index.commonancestorsheads(*revs)
1328 1328 except (AttributeError, OverflowError): # C implementation failed
1329 1329 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1330 1330 return ancs
1331 1331
1332 1332 def isancestor(self, a, b):
1333 1333 """return True if node a is an ancestor of node b
1334 1334
1335 1335 A revision is considered an ancestor of itself."""
1336 1336 a, b = self.rev(a), self.rev(b)
1337 1337 return self.isancestorrev(a, b)
1338 1338
1339 1339 def isancestorrev(self, a, b):
1340 1340 """return True if revision a is an ancestor of revision b
1341 1341
1342 1342 A revision is considered an ancestor of itself.
1343 1343
1344 1344 The implementation of this is trivial but the use of
1345 1345 reachableroots is not."""
1346 1346 if a == nullrev:
1347 1347 return True
1348 1348 elif a == b:
1349 1349 return True
1350 1350 elif a > b:
1351 1351 return False
1352 1352 return bool(self.reachableroots(a, [b], [a], includepath=False))
1353 1353
1354 1354 def reachableroots(self, minroot, heads, roots, includepath=False):
1355 1355 """return (heads(::(<roots> and <roots>::<heads>)))
1356 1356
1357 1357 If includepath is True, return (<roots>::<heads>)."""
1358 1358 try:
1359 1359 return self.index.reachableroots2(
1360 1360 minroot, heads, roots, includepath
1361 1361 )
1362 1362 except AttributeError:
1363 1363 return dagop._reachablerootspure(
1364 1364 self.parentrevs, minroot, roots, heads, includepath
1365 1365 )
1366 1366
1367 1367 def ancestor(self, a, b):
1368 1368 """calculate the "best" common ancestor of nodes a and b"""
1369 1369
1370 1370 a, b = self.rev(a), self.rev(b)
1371 1371 try:
1372 1372 ancs = self.index.ancestors(a, b)
1373 1373 except (AttributeError, OverflowError):
1374 1374 ancs = ancestor.ancestors(self.parentrevs, a, b)
1375 1375 if ancs:
1376 1376 # choose a consistent winner when there's a tie
1377 1377 return min(map(self.node, ancs))
1378 1378 return self.nullid
1379 1379
1380 1380 def _match(self, id):
1381 1381 if isinstance(id, int):
1382 1382 # rev
1383 1383 return self.node(id)
1384 1384 if len(id) == self.nodeconstants.nodelen:
1385 1385 # possibly a binary node
1386 1386 # odds of a binary node being all hex in ASCII are 1 in 10**25
1387 1387 try:
1388 1388 node = id
1389 1389 self.rev(node) # quick search the index
1390 1390 return node
1391 1391 except error.LookupError:
1392 1392 pass # may be partial hex id
1393 1393 try:
1394 1394 # str(rev)
1395 1395 rev = int(id)
1396 1396 if b"%d" % rev != id:
1397 1397 raise ValueError
1398 1398 if rev < 0:
1399 1399 rev = len(self) + rev
1400 1400 if rev < 0 or rev >= len(self):
1401 1401 raise ValueError
1402 1402 return self.node(rev)
1403 1403 except (ValueError, OverflowError):
1404 1404 pass
1405 1405 if len(id) == 2 * self.nodeconstants.nodelen:
1406 1406 try:
1407 1407 # a full hex nodeid?
1408 1408 node = bin(id)
1409 1409 self.rev(node)
1410 1410 return node
1411 1411 except (TypeError, error.LookupError):
1412 1412 pass
1413 1413
1414 1414 def _partialmatch(self, id):
1415 1415 # we don't care wdirfilenodeids as they should be always full hash
1416 1416 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1417 1417 try:
1418 1418 partial = self.index.partialmatch(id)
1419 1419 if partial and self.hasnode(partial):
1420 1420 if maybewdir:
1421 1421 # single 'ff...' match in radix tree, ambiguous with wdir
1422 1422 raise error.RevlogError
1423 1423 return partial
1424 1424 if maybewdir:
1425 1425 # no 'ff...' match in radix tree, wdir identified
1426 1426 raise error.WdirUnsupported
1427 1427 return None
1428 1428 except error.RevlogError:
1429 1429 # parsers.c radix tree lookup gave multiple matches
1430 1430 # fast path: for unfiltered changelog, radix tree is accurate
1431 1431 if not getattr(self, 'filteredrevs', None):
1432 1432 raise error.AmbiguousPrefixLookupError(
1433 1433 id, self.display_id, _(b'ambiguous identifier')
1434 1434 )
1435 1435 # fall through to slow path that filters hidden revisions
1436 1436 except (AttributeError, ValueError):
1437 1437 # we are pure python, or key was too short to search radix tree
1438 1438 pass
1439 1439
1440 1440 if id in self._pcache:
1441 1441 return self._pcache[id]
1442 1442
1443 1443 if len(id) <= 40:
1444 1444 try:
1445 1445 # hex(node)[:...]
1446 1446 l = len(id) // 2 # grab an even number of digits
1447 1447 prefix = bin(id[: l * 2])
1448 1448 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1449 1449 nl = [
1450 1450 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1451 1451 ]
1452 1452 if self.nodeconstants.nullhex.startswith(id):
1453 1453 nl.append(self.nullid)
1454 1454 if len(nl) > 0:
1455 1455 if len(nl) == 1 and not maybewdir:
1456 1456 self._pcache[id] = nl[0]
1457 1457 return nl[0]
1458 1458 raise error.AmbiguousPrefixLookupError(
1459 1459 id, self.display_id, _(b'ambiguous identifier')
1460 1460 )
1461 1461 if maybewdir:
1462 1462 raise error.WdirUnsupported
1463 1463 return None
1464 1464 except TypeError:
1465 1465 pass
1466 1466
1467 1467 def lookup(self, id):
1468 1468 """locate a node based on:
1469 1469 - revision number or str(revision number)
1470 1470 - nodeid or subset of hex nodeid
1471 1471 """
1472 1472 n = self._match(id)
1473 1473 if n is not None:
1474 1474 return n
1475 1475 n = self._partialmatch(id)
1476 1476 if n:
1477 1477 return n
1478 1478
1479 1479 raise error.LookupError(id, self.display_id, _(b'no match found'))
1480 1480
1481 1481 def shortest(self, node, minlength=1):
1482 1482 """Find the shortest unambiguous prefix that matches node."""
1483 1483
1484 1484 def isvalid(prefix):
1485 1485 try:
1486 1486 matchednode = self._partialmatch(prefix)
1487 1487 except error.AmbiguousPrefixLookupError:
1488 1488 return False
1489 1489 except error.WdirUnsupported:
1490 1490 # single 'ff...' match
1491 1491 return True
1492 1492 if matchednode is None:
1493 1493 raise error.LookupError(node, self.display_id, _(b'no node'))
1494 1494 return True
1495 1495
1496 1496 def maybewdir(prefix):
1497 1497 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1498 1498
1499 1499 hexnode = hex(node)
1500 1500
1501 1501 def disambiguate(hexnode, minlength):
1502 1502 """Disambiguate against wdirid."""
1503 1503 for length in range(minlength, len(hexnode) + 1):
1504 1504 prefix = hexnode[:length]
1505 1505 if not maybewdir(prefix):
1506 1506 return prefix
1507 1507
1508 1508 if not getattr(self, 'filteredrevs', None):
1509 1509 try:
1510 1510 length = max(self.index.shortest(node), minlength)
1511 1511 return disambiguate(hexnode, length)
1512 1512 except error.RevlogError:
1513 1513 if node != self.nodeconstants.wdirid:
1514 1514 raise error.LookupError(
1515 1515 node, self.display_id, _(b'no node')
1516 1516 )
1517 1517 except AttributeError:
1518 1518 # Fall through to pure code
1519 1519 pass
1520 1520
1521 1521 if node == self.nodeconstants.wdirid:
1522 1522 for length in range(minlength, len(hexnode) + 1):
1523 1523 prefix = hexnode[:length]
1524 1524 if isvalid(prefix):
1525 1525 return prefix
1526 1526
1527 1527 for length in range(minlength, len(hexnode) + 1):
1528 1528 prefix = hexnode[:length]
1529 1529 if isvalid(prefix):
1530 1530 return disambiguate(hexnode, length)
1531 1531
1532 1532 def cmp(self, node, text):
1533 1533 """compare text with a given file revision
1534 1534
1535 1535 returns True if text is different than what is stored.
1536 1536 """
1537 1537 p1, p2 = self.parents(node)
1538 1538 return storageutil.hashrevisionsha1(text, p1, p2) != node
1539 1539
1540 1540 def _cachesegment(self, offset, data):
1541 1541 """Add a segment to the revlog cache.
1542 1542
1543 1543 Accepts an absolute offset and the data that is at that location.
1544 1544 """
1545 1545 o, d = self._chunkcache
1546 1546 # try to add to existing cache
1547 1547 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1548 1548 self._chunkcache = o, d + data
1549 1549 else:
1550 1550 self._chunkcache = offset, data
1551 1551
1552 1552 def _readsegment(self, offset, length, df=None):
1553 1553 """Load a segment of raw data from the revlog.
1554 1554
1555 1555 Accepts an absolute offset, length to read, and an optional existing
1556 1556 file handle to read from.
1557 1557
1558 1558 If an existing file handle is passed, it will be seeked and the
1559 1559 original seek position will NOT be restored.
1560 1560
1561 1561 Returns a str or buffer of raw byte data.
1562 1562
1563 1563 Raises if the requested number of bytes could not be read.
1564 1564 """
1565 1565 # Cache data both forward and backward around the requested
1566 1566 # data, in a fixed size window. This helps speed up operations
1567 1567 # involving reading the revlog backwards.
1568 1568 cachesize = self._chunkcachesize
1569 1569 realoffset = offset & ~(cachesize - 1)
1570 1570 reallength = (
1571 1571 (offset + length + cachesize) & ~(cachesize - 1)
1572 1572 ) - realoffset
1573 1573 with self._datareadfp(df) as df:
1574 1574 df.seek(realoffset)
1575 1575 d = df.read(reallength)
1576 1576
1577 1577 self._cachesegment(realoffset, d)
1578 1578 if offset != realoffset or reallength != length:
1579 1579 startoffset = offset - realoffset
1580 1580 if len(d) - startoffset < length:
1581 1581 raise error.RevlogError(
1582 1582 _(
1583 1583 b'partial read of revlog %s; expected %d bytes from '
1584 1584 b'offset %d, got %d'
1585 1585 )
1586 1586 % (
1587 1587 self._indexfile if self._inline else self._datafile,
1588 1588 length,
1589 1589 offset,
1590 1590 len(d) - startoffset,
1591 1591 )
1592 1592 )
1593 1593
1594 1594 return util.buffer(d, startoffset, length)
1595 1595
1596 1596 if len(d) < length:
1597 1597 raise error.RevlogError(
1598 1598 _(
1599 1599 b'partial read of revlog %s; expected %d bytes from offset '
1600 1600 b'%d, got %d'
1601 1601 )
1602 1602 % (
1603 1603 self._indexfile if self._inline else self._datafile,
1604 1604 length,
1605 1605 offset,
1606 1606 len(d),
1607 1607 )
1608 1608 )
1609 1609
1610 1610 return d
1611 1611
1612 1612 def _getsegment(self, offset, length, df=None):
1613 1613 """Obtain a segment of raw data from the revlog.
1614 1614
1615 1615 Accepts an absolute offset, length of bytes to obtain, and an
1616 1616 optional file handle to the already-opened revlog. If the file
1617 1617 handle is used, it's original seek position will not be preserved.
1618 1618
1619 1619 Requests for data may be returned from a cache.
1620 1620
1621 1621 Returns a str or a buffer instance of raw byte data.
1622 1622 """
1623 1623 o, d = self._chunkcache
1624 1624 l = len(d)
1625 1625
1626 1626 # is it in the cache?
1627 1627 cachestart = offset - o
1628 1628 cacheend = cachestart + length
1629 1629 if cachestart >= 0 and cacheend <= l:
1630 1630 if cachestart == 0 and cacheend == l:
1631 1631 return d # avoid a copy
1632 1632 return util.buffer(d, cachestart, cacheend - cachestart)
1633 1633
1634 1634 return self._readsegment(offset, length, df=df)
1635 1635
1636 1636 def _getsegmentforrevs(self, startrev, endrev, df=None):
1637 1637 """Obtain a segment of raw data corresponding to a range of revisions.
1638 1638
1639 1639 Accepts the start and end revisions and an optional already-open
1640 1640 file handle to be used for reading. If the file handle is read, its
1641 1641 seek position will not be preserved.
1642 1642
1643 1643 Requests for data may be satisfied by a cache.
1644 1644
1645 1645 Returns a 2-tuple of (offset, data) for the requested range of
1646 1646 revisions. Offset is the integer offset from the beginning of the
1647 1647 revlog and data is a str or buffer of the raw byte data.
1648 1648
1649 1649 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1650 1650 to determine where each revision's data begins and ends.
1651 1651 """
1652 1652 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1653 1653 # (functions are expensive).
1654 1654 index = self.index
1655 1655 istart = index[startrev]
1656 1656 start = int(istart[0] >> 16)
1657 1657 if startrev == endrev:
1658 1658 end = start + istart[1]
1659 1659 else:
1660 1660 iend = index[endrev]
1661 1661 end = int(iend[0] >> 16) + iend[1]
1662 1662
1663 1663 if self._inline:
1664 1664 start += (startrev + 1) * self.index.entry_size
1665 1665 end += (endrev + 1) * self.index.entry_size
1666 1666 length = end - start
1667 1667
1668 1668 return start, self._getsegment(start, length, df=df)
1669 1669
1670 1670 def _chunk(self, rev, df=None):
1671 1671 """Obtain a single decompressed chunk for a revision.
1672 1672
1673 1673 Accepts an integer revision and an optional already-open file handle
1674 1674 to be used for reading. If used, the seek position of the file will not
1675 1675 be preserved.
1676 1676
1677 1677 Returns a str holding uncompressed data for the requested revision.
1678 1678 """
1679 1679 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1680 1680
1681 1681 def _chunks(self, revs, df=None, targetsize=None):
1682 1682 """Obtain decompressed chunks for the specified revisions.
1683 1683
1684 1684 Accepts an iterable of numeric revisions that are assumed to be in
1685 1685 ascending order. Also accepts an optional already-open file handle
1686 1686 to be used for reading. If used, the seek position of the file will
1687 1687 not be preserved.
1688 1688
1689 1689 This function is similar to calling ``self._chunk()`` multiple times,
1690 1690 but is faster.
1691 1691
1692 1692 Returns a list with decompressed data for each requested revision.
1693 1693 """
1694 1694 if not revs:
1695 1695 return []
1696 1696 start = self.start
1697 1697 length = self.length
1698 1698 inline = self._inline
1699 1699 iosize = self.index.entry_size
1700 1700 buffer = util.buffer
1701 1701
1702 1702 l = []
1703 1703 ladd = l.append
1704 1704
1705 1705 if not self._withsparseread:
1706 1706 slicedchunks = (revs,)
1707 1707 else:
1708 1708 slicedchunks = deltautil.slicechunk(
1709 1709 self, revs, targetsize=targetsize
1710 1710 )
1711 1711
1712 1712 for revschunk in slicedchunks:
1713 1713 firstrev = revschunk[0]
1714 1714 # Skip trailing revisions with empty diff
1715 1715 for lastrev in revschunk[::-1]:
1716 1716 if length(lastrev) != 0:
1717 1717 break
1718 1718
1719 1719 try:
1720 1720 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1721 1721 except OverflowError:
1722 1722 # issue4215 - we can't cache a run of chunks greater than
1723 1723 # 2G on Windows
1724 1724 return [self._chunk(rev, df=df) for rev in revschunk]
1725 1725
1726 1726 decomp = self.decompress
1727 1727 for rev in revschunk:
1728 1728 chunkstart = start(rev)
1729 1729 if inline:
1730 1730 chunkstart += (rev + 1) * iosize
1731 1731 chunklength = length(rev)
1732 1732 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1733 1733
1734 1734 return l
1735 1735
1736 1736 def _chunkclear(self):
1737 1737 """Clear the raw chunk cache."""
1738 1738 self._chunkcache = (0, b'')
1739 1739
1740 1740 def deltaparent(self, rev):
1741 1741 """return deltaparent of the given revision"""
1742 1742 base = self.index[rev][3]
1743 1743 if base == rev:
1744 1744 return nullrev
1745 1745 elif self._generaldelta:
1746 1746 return base
1747 1747 else:
1748 1748 return rev - 1
1749 1749
1750 1750 def issnapshot(self, rev):
1751 1751 """tells whether rev is a snapshot"""
1752 1752 if not self._sparserevlog:
1753 1753 return self.deltaparent(rev) == nullrev
1754 1754 elif util.safehasattr(self.index, b'issnapshot'):
1755 1755 # directly assign the method to cache the testing and access
1756 1756 self.issnapshot = self.index.issnapshot
1757 1757 return self.issnapshot(rev)
1758 1758 if rev == nullrev:
1759 1759 return True
1760 1760 entry = self.index[rev]
1761 1761 base = entry[3]
1762 1762 if base == rev:
1763 1763 return True
1764 1764 if base == nullrev:
1765 1765 return True
1766 1766 p1 = entry[5]
1767 1767 p2 = entry[6]
1768 1768 if base == p1 or base == p2:
1769 1769 return False
1770 1770 return self.issnapshot(base)
1771 1771
1772 1772 def snapshotdepth(self, rev):
1773 1773 """number of snapshot in the chain before this one"""
1774 1774 if not self.issnapshot(rev):
1775 1775 raise error.ProgrammingError(b'revision %d not a snapshot')
1776 1776 return len(self._deltachain(rev)[0]) - 1
1777 1777
1778 1778 def revdiff(self, rev1, rev2):
1779 1779 """return or calculate a delta between two revisions
1780 1780
1781 1781 The delta calculated is in binary form and is intended to be written to
1782 1782 revlog data directly. So this function needs raw revision data.
1783 1783 """
1784 1784 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1785 1785 return bytes(self._chunk(rev2))
1786 1786
1787 1787 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1788 1788
1789 1789 def _processflags(self, text, flags, operation, raw=False):
1790 1790 """deprecated entry point to access flag processors"""
1791 1791 msg = b'_processflag(...) use the specialized variant'
1792 1792 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1793 1793 if raw:
1794 1794 return text, flagutil.processflagsraw(self, text, flags)
1795 1795 elif operation == b'read':
1796 1796 return flagutil.processflagsread(self, text, flags)
1797 1797 else: # write operation
1798 1798 return flagutil.processflagswrite(self, text, flags)
1799 1799
1800 1800 def revision(self, nodeorrev, _df=None, raw=False):
1801 1801 """return an uncompressed revision of a given node or revision
1802 1802 number.
1803 1803
1804 1804 _df - an existing file handle to read from. (internal-only)
1805 1805 raw - an optional argument specifying if the revision data is to be
1806 1806 treated as raw data when applying flag transforms. 'raw' should be set
1807 1807 to True when generating changegroups or in debug commands.
1808 1808 """
1809 1809 if raw:
1810 1810 msg = (
1811 1811 b'revlog.revision(..., raw=True) is deprecated, '
1812 1812 b'use revlog.rawdata(...)'
1813 1813 )
1814 1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 1815 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1816 1816
1817 1817 def sidedata(self, nodeorrev, _df=None):
1818 1818 """a map of extra data related to the changeset but not part of the hash
1819 1819
1820 1820 This function currently return a dictionary. However, more advanced
1821 1821 mapping object will likely be used in the future for a more
1822 1822 efficient/lazy code.
1823 1823 """
1824 1824 return self._revisiondata(nodeorrev, _df)[1]
1825 1825
1826 1826 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 1827 # deal with <nodeorrev> argument type
1828 1828 if isinstance(nodeorrev, int):
1829 1829 rev = nodeorrev
1830 1830 node = self.node(rev)
1831 1831 else:
1832 1832 node = nodeorrev
1833 1833 rev = None
1834 1834
1835 1835 # fast path the special `nullid` rev
1836 1836 if node == self.nullid:
1837 1837 return b"", {}
1838 1838
1839 1839 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 1840 # revision or might need to be processed to retrieve the revision.
1841 1841 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842 1842
1843 1843 if self.hassidedata:
1844 1844 if rev is None:
1845 1845 rev = self.rev(node)
1846 1846 sidedata = self._sidedata(rev)
1847 1847 else:
1848 1848 sidedata = {}
1849 1849
1850 1850 if raw and validated:
1851 1851 # if we don't want to process the raw text and that raw
1852 1852 # text is cached, we can exit early.
1853 1853 return rawtext, sidedata
1854 1854 if rev is None:
1855 1855 rev = self.rev(node)
1856 1856 # the revlog's flag for this revision
1857 1857 # (usually alter its state or content)
1858 1858 flags = self.flags(rev)
1859 1859
1860 1860 if validated and flags == REVIDX_DEFAULT_FLAGS:
1861 1861 # no extra flags set, no flag processor runs, text = rawtext
1862 1862 return rawtext, sidedata
1863 1863
1864 1864 if raw:
1865 1865 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1866 1866 text = rawtext
1867 1867 else:
1868 1868 r = flagutil.processflagsread(self, rawtext, flags)
1869 1869 text, validatehash = r
1870 1870 if validatehash:
1871 1871 self.checkhash(text, node, rev=rev)
1872 1872 if not validated:
1873 1873 self._revisioncache = (node, rev, rawtext)
1874 1874
1875 1875 return text, sidedata
1876 1876
1877 1877 def _rawtext(self, node, rev, _df=None):
1878 1878 """return the possibly unvalidated rawtext for a revision
1879 1879
1880 1880 returns (rev, rawtext, validated)
1881 1881 """
1882 1882
1883 1883 # revision in the cache (could be useful to apply delta)
1884 1884 cachedrev = None
1885 1885 # An intermediate text to apply deltas to
1886 1886 basetext = None
1887 1887
1888 1888 # Check if we have the entry in cache
1889 1889 # The cache entry looks like (node, rev, rawtext)
1890 1890 if self._revisioncache:
1891 1891 if self._revisioncache[0] == node:
1892 1892 return (rev, self._revisioncache[2], True)
1893 1893 cachedrev = self._revisioncache[1]
1894 1894
1895 1895 if rev is None:
1896 1896 rev = self.rev(node)
1897 1897
1898 1898 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1899 1899 if stopped:
1900 1900 basetext = self._revisioncache[2]
1901 1901
1902 1902 # drop cache to save memory, the caller is expected to
1903 1903 # update self._revisioncache after validating the text
1904 1904 self._revisioncache = None
1905 1905
1906 1906 targetsize = None
1907 1907 rawsize = self.index[rev][2]
1908 1908 if 0 <= rawsize:
1909 1909 targetsize = 4 * rawsize
1910 1910
1911 1911 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1912 1912 if basetext is None:
1913 1913 basetext = bytes(bins[0])
1914 1914 bins = bins[1:]
1915 1915
1916 1916 rawtext = mdiff.patches(basetext, bins)
1917 1917 del basetext # let us have a chance to free memory early
1918 1918 return (rev, rawtext, False)
1919 1919
1920 1920 def _sidedata(self, rev):
1921 1921 """Return the sidedata for a given revision number."""
1922 1922 index_entry = self.index[rev]
1923 1923 sidedata_offset = index_entry[8]
1924 1924 sidedata_size = index_entry[9]
1925 1925
1926 1926 if self._inline:
1927 1927 sidedata_offset += self.index.entry_size * (1 + rev)
1928 1928 if sidedata_size == 0:
1929 1929 return {}
1930 1930
1931 1931 segment = self._getsegment(sidedata_offset, sidedata_size)
1932 1932 sidedata = sidedatautil.deserialize_sidedata(segment)
1933 1933 return sidedata
1934 1934
1935 1935 def rawdata(self, nodeorrev, _df=None):
1936 1936 """return an uncompressed raw data of a given node or revision number.
1937 1937
1938 1938 _df - an existing file handle to read from. (internal-only)
1939 1939 """
1940 1940 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1941 1941
1942 1942 def hash(self, text, p1, p2):
1943 1943 """Compute a node hash.
1944 1944
1945 1945 Available as a function so that subclasses can replace the hash
1946 1946 as needed.
1947 1947 """
1948 1948 return storageutil.hashrevisionsha1(text, p1, p2)
1949 1949
1950 1950 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1951 1951 """Check node hash integrity.
1952 1952
1953 1953 Available as a function so that subclasses can extend hash mismatch
1954 1954 behaviors as needed.
1955 1955 """
1956 1956 try:
1957 1957 if p1 is None and p2 is None:
1958 1958 p1, p2 = self.parents(node)
1959 1959 if node != self.hash(text, p1, p2):
1960 1960 # Clear the revision cache on hash failure. The revision cache
1961 1961 # only stores the raw revision and clearing the cache does have
1962 1962 # the side-effect that we won't have a cache hit when the raw
1963 1963 # revision data is accessed. But this case should be rare and
1964 1964 # it is extra work to teach the cache about the hash
1965 1965 # verification state.
1966 1966 if self._revisioncache and self._revisioncache[0] == node:
1967 1967 self._revisioncache = None
1968 1968
1969 1969 revornode = rev
1970 1970 if revornode is None:
1971 1971 revornode = templatefilters.short(hex(node))
1972 1972 raise error.RevlogError(
1973 1973 _(b"integrity check failed on %s:%s")
1974 1974 % (self.display_id, pycompat.bytestr(revornode))
1975 1975 )
1976 1976 except error.RevlogError:
1977 1977 if self._censorable and storageutil.iscensoredtext(text):
1978 1978 raise error.CensoredNodeError(self.display_id, node, text)
1979 1979 raise
1980 1980
1981 1981 def _enforceinlinesize(self, tr):
1982 1982 """Check if the revlog is too big for inline and convert if so.
1983 1983
1984 1984 This should be called after revisions are added to the revlog. If the
1985 1985 revlog has grown too large to be an inline revlog, it will convert it
1986 1986 to use multiple index and data files.
1987 1987 """
1988 1988 tiprev = len(self) - 1
1989 1989 total_size = self.start(tiprev) + self.length(tiprev)
1990 1990 if not self._inline or total_size < _maxinline:
1991 1991 return
1992 1992
1993 1993 troffset = tr.findoffset(self._indexfile)
1994 1994 if troffset is None:
1995 1995 raise error.RevlogError(
1996 1996 _(b"%s not found in the transaction") % self._indexfile
1997 1997 )
1998 1998 trindex = 0
1999 1999 tr.add(self._datafile, 0)
2000 2000
2001 2001 existing_handles = False
2002 2002 if self._writinghandles is not None:
2003 2003 existing_handles = True
2004 2004 fp = self._writinghandles[0]
2005 2005 fp.flush()
2006 2006 fp.close()
2007 2007 # We can't use the cached file handle after close(). So prevent
2008 2008 # its usage.
2009 2009 self._writinghandles = None
2010 2010
2011 2011 new_dfh = self._datafp(b'w+')
2012 2012 new_dfh.truncate(0) # drop any potentially existing data
2013 2013 try:
2014 2014 with self._indexfp() as read_ifh:
2015 2015 for r in self:
2016 2016 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2017 2017 if troffset <= self.start(r):
2018 2018 trindex = r
2019 2019 new_dfh.flush()
2020 2020
2021 2021 with self.__index_new_fp() as fp:
2022 2022 self._format_flags &= ~FLAG_INLINE_DATA
2023 2023 self._inline = False
2024 2024 for i in self:
2025 2025 e = self.index.entry_binary(i)
2026 2026 if i == 0:
2027 2027 header = self._format_flags | self._format_version
2028 2028 header = self.index.pack_header(header)
2029 2029 e = header + e
2030 2030 fp.write(e)
2031 2031 # the temp file replace the real index when we exit the context
2032 2032 # manager
2033 2033
2034 2034 tr.replace(self._indexfile, trindex * self.index.entry_size)
2035 2035 nodemaputil.setup_persistent_nodemap(tr, self)
2036 2036 self._chunkclear()
2037 2037
2038 2038 if existing_handles:
2039 2039 # switched from inline to conventional reopen the index
2040 2040 ifh = self.__index_write_fp()
2041 2041 self._writinghandles = (ifh, new_dfh)
2042 2042 new_dfh = None
2043 2043 finally:
2044 2044 if new_dfh is not None:
2045 2045 new_dfh.close()
2046 2046
2047 2047 def _nodeduplicatecallback(self, transaction, node):
2048 2048 """called when trying to add a node already stored."""
2049 2049
2050 2050 @contextlib.contextmanager
2051 2051 def _writing(self, transaction):
2052 2052 if self._writinghandles is not None:
2053 2053 yield
2054 2054 else:
2055 2055 r = len(self)
2056 2056 dsize = 0
2057 2057 if r:
2058 2058 dsize = self.end(r - 1)
2059 2059 dfh = None
2060 2060 if not self._inline:
2061 2061 try:
2062 2062 dfh = self._datafp(b"r+")
2063 2063 dfh.seek(0, os.SEEK_END)
2064 2064 except IOError as inst:
2065 2065 if inst.errno != errno.ENOENT:
2066 2066 raise
2067 2067 dfh = self._datafp(b"w+")
2068 2068 transaction.add(self._datafile, dsize)
2069 2069 try:
2070 2070 isize = r * self.index.entry_size
2071 2071 ifh = self.__index_write_fp()
2072 2072 if self._inline:
2073 2073 transaction.add(self._indexfile, dsize + isize)
2074 2074 else:
2075 2075 transaction.add(self._indexfile, isize)
2076 2076 try:
2077 2077 self._writinghandles = (ifh, dfh)
2078 2078 try:
2079 2079 yield
2080 2080 finally:
2081 2081 self._writinghandles = None
2082 2082 finally:
2083 2083 ifh.close()
2084 2084 finally:
2085 2085 if dfh is not None:
2086 2086 dfh.close()
2087 2087
2088 2088 def addrevision(
2089 2089 self,
2090 2090 text,
2091 2091 transaction,
2092 2092 link,
2093 2093 p1,
2094 2094 p2,
2095 2095 cachedelta=None,
2096 2096 node=None,
2097 2097 flags=REVIDX_DEFAULT_FLAGS,
2098 2098 deltacomputer=None,
2099 2099 sidedata=None,
2100 2100 ):
2101 2101 """add a revision to the log
2102 2102
2103 2103 text - the revision data to add
2104 2104 transaction - the transaction object used for rollback
2105 2105 link - the linkrev data to add
2106 2106 p1, p2 - the parent nodeids of the revision
2107 2107 cachedelta - an optional precomputed delta
2108 2108 node - nodeid of revision; typically node is not specified, and it is
2109 2109 computed by default as hash(text, p1, p2), however subclasses might
2110 2110 use different hashing method (and override checkhash() in such case)
2111 2111 flags - the known flags to set on the revision
2112 2112 deltacomputer - an optional deltacomputer instance shared between
2113 2113 multiple calls
2114 2114 """
2115 2115 if link == nullrev:
2116 2116 raise error.RevlogError(
2117 2117 _(b"attempted to add linkrev -1 to %s") % self.display_id
2118 2118 )
2119 2119
2120 2120 if sidedata is None:
2121 2121 sidedata = {}
2122 2122 elif sidedata and not self.hassidedata:
2123 2123 raise error.ProgrammingError(
2124 2124 _(b"trying to add sidedata to a revlog who don't support them")
2125 2125 )
2126 2126
2127 2127 if flags:
2128 2128 node = node or self.hash(text, p1, p2)
2129 2129
2130 2130 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2131 2131
2132 2132 # If the flag processor modifies the revision data, ignore any provided
2133 2133 # cachedelta.
2134 2134 if rawtext != text:
2135 2135 cachedelta = None
2136 2136
2137 2137 if len(rawtext) > _maxentrysize:
2138 2138 raise error.RevlogError(
2139 2139 _(
2140 2140 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2141 2141 )
2142 2142 % (self.display_id, len(rawtext))
2143 2143 )
2144 2144
2145 2145 node = node or self.hash(rawtext, p1, p2)
2146 2146 rev = self.index.get_rev(node)
2147 2147 if rev is not None:
2148 2148 return rev
2149 2149
2150 2150 if validatehash:
2151 2151 self.checkhash(rawtext, node, p1=p1, p2=p2)
2152 2152
2153 2153 return self.addrawrevision(
2154 2154 rawtext,
2155 2155 transaction,
2156 2156 link,
2157 2157 p1,
2158 2158 p2,
2159 2159 node,
2160 2160 flags,
2161 2161 cachedelta=cachedelta,
2162 2162 deltacomputer=deltacomputer,
2163 2163 sidedata=sidedata,
2164 2164 )
2165 2165
2166 2166 def addrawrevision(
2167 2167 self,
2168 2168 rawtext,
2169 2169 transaction,
2170 2170 link,
2171 2171 p1,
2172 2172 p2,
2173 2173 node,
2174 2174 flags,
2175 2175 cachedelta=None,
2176 2176 deltacomputer=None,
2177 2177 sidedata=None,
2178 2178 ):
2179 2179 """add a raw revision with known flags, node and parents
2180 2180 useful when reusing a revision not stored in this revlog (ex: received
2181 2181 over wire, or read from an external bundle).
2182 2182 """
2183 2183 with self._writing(transaction):
2184 2184 return self._addrevision(
2185 2185 node,
2186 2186 rawtext,
2187 2187 transaction,
2188 2188 link,
2189 2189 p1,
2190 2190 p2,
2191 2191 flags,
2192 2192 cachedelta,
2193 2193 deltacomputer=deltacomputer,
2194 2194 sidedata=sidedata,
2195 2195 )
2196 2196
2197 2197 def compress(self, data):
2198 2198 """Generate a possibly-compressed representation of data."""
2199 2199 if not data:
2200 2200 return b'', data
2201 2201
2202 2202 compressed = self._compressor.compress(data)
2203 2203
2204 2204 if compressed:
2205 2205 # The revlog compressor added the header in the returned data.
2206 2206 return b'', compressed
2207 2207
2208 2208 if data[0:1] == b'\0':
2209 2209 return b'', data
2210 2210 return b'u', data
2211 2211
2212 2212 def decompress(self, data):
2213 2213 """Decompress a revlog chunk.
2214 2214
2215 2215 The chunk is expected to begin with a header identifying the
2216 2216 format type so it can be routed to an appropriate decompressor.
2217 2217 """
2218 2218 if not data:
2219 2219 return data
2220 2220
2221 2221 # Revlogs are read much more frequently than they are written and many
2222 2222 # chunks only take microseconds to decompress, so performance is
2223 2223 # important here.
2224 2224 #
2225 2225 # We can make a few assumptions about revlogs:
2226 2226 #
2227 2227 # 1) the majority of chunks will be compressed (as opposed to inline
2228 2228 # raw data).
2229 2229 # 2) decompressing *any* data will likely by at least 10x slower than
2230 2230 # returning raw inline data.
2231 2231 # 3) we want to prioritize common and officially supported compression
2232 2232 # engines
2233 2233 #
2234 2234 # It follows that we want to optimize for "decompress compressed data
2235 2235 # when encoded with common and officially supported compression engines"
2236 2236 # case over "raw data" and "data encoded by less common or non-official
2237 2237 # compression engines." That is why we have the inline lookup first
2238 2238 # followed by the compengines lookup.
2239 2239 #
2240 2240 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2241 2241 # compressed chunks. And this matters for changelog and manifest reads.
2242 2242 t = data[0:1]
2243 2243
2244 2244 if t == b'x':
2245 2245 try:
2246 2246 return _zlibdecompress(data)
2247 2247 except zlib.error as e:
2248 2248 raise error.RevlogError(
2249 2249 _(b'revlog decompress error: %s')
2250 2250 % stringutil.forcebytestr(e)
2251 2251 )
2252 2252 # '\0' is more common than 'u' so it goes first.
2253 2253 elif t == b'\0':
2254 2254 return data
2255 2255 elif t == b'u':
2256 2256 return util.buffer(data, 1)
2257 2257
2258 2258 try:
2259 2259 compressor = self._decompressors[t]
2260 2260 except KeyError:
2261 2261 try:
2262 2262 engine = util.compengines.forrevlogheader(t)
2263 2263 compressor = engine.revlogcompressor(self._compengineopts)
2264 2264 self._decompressors[t] = compressor
2265 2265 except KeyError:
2266 2266 raise error.RevlogError(
2267 2267 _(b'unknown compression type %s') % binascii.hexlify(t)
2268 2268 )
2269 2269
2270 2270 return compressor.decompress(data)
2271 2271
2272 2272 def _addrevision(
2273 2273 self,
2274 2274 node,
2275 2275 rawtext,
2276 2276 transaction,
2277 2277 link,
2278 2278 p1,
2279 2279 p2,
2280 2280 flags,
2281 2281 cachedelta,
2282 2282 alwayscache=False,
2283 2283 deltacomputer=None,
2284 2284 sidedata=None,
2285 2285 ):
2286 2286 """internal function to add revisions to the log
2287 2287
2288 2288 see addrevision for argument descriptions.
2289 2289
2290 2290 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2291 2291
2292 2292 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2293 2293 be used.
2294 2294
2295 2295 invariants:
2296 2296 - rawtext is optional (can be None); if not set, cachedelta must be set.
2297 2297 if both are set, they must correspond to each other.
2298 2298 """
2299 2299 if node == self.nullid:
2300 2300 raise error.RevlogError(
2301 2301 _(b"%s: attempt to add null revision") % self.display_id
2302 2302 )
2303 2303 if (
2304 2304 node == self.nodeconstants.wdirid
2305 2305 or node in self.nodeconstants.wdirfilenodeids
2306 2306 ):
2307 2307 raise error.RevlogError(
2308 2308 _(b"%s: attempt to add wdir revision") % self.display_id
2309 2309 )
2310 2310 if self._writinghandles is None:
2311 2311 msg = b'adding revision outside `revlog._writing` context'
2312 2312 raise error.ProgrammingError(msg)
2313 2313
2314 2314 if self._inline:
2315 2315 fh = self._writinghandles[0]
2316 2316 else:
2317 2317 fh = self._writinghandles[1]
2318 2318
2319 2319 btext = [rawtext]
2320 2320
2321 2321 curr = len(self)
2322 2322 prev = curr - 1
2323 2323
2324 2324 offset = self._get_data_offset(prev)
2325 2325
2326 2326 if self._concurrencychecker:
2327 2327 ifh, dfh = self._writinghandles
2328 2328 if self._inline:
2329 2329 # offset is "as if" it were in the .d file, so we need to add on
2330 2330 # the size of the entry metadata.
2331 2331 self._concurrencychecker(
2332 2332 ifh, self._indexfile, offset + curr * self.index.entry_size
2333 2333 )
2334 2334 else:
2335 2335 # Entries in the .i are a consistent size.
2336 2336 self._concurrencychecker(
2337 2337 ifh, self._indexfile, curr * self.index.entry_size
2338 2338 )
2339 2339 self._concurrencychecker(dfh, self._datafile, offset)
2340 2340
2341 2341 p1r, p2r = self.rev(p1), self.rev(p2)
2342 2342
2343 2343 # full versions are inserted when the needed deltas
2344 2344 # become comparable to the uncompressed text
2345 2345 if rawtext is None:
2346 2346 # need rawtext size, before changed by flag processors, which is
2347 2347 # the non-raw size. use revlog explicitly to avoid filelog's extra
2348 2348 # logic that might remove metadata size.
2349 2349 textlen = mdiff.patchedsize(
2350 2350 revlog.size(self, cachedelta[0]), cachedelta[1]
2351 2351 )
2352 2352 else:
2353 2353 textlen = len(rawtext)
2354 2354
2355 2355 if deltacomputer is None:
2356 2356 deltacomputer = deltautil.deltacomputer(self)
2357 2357
2358 2358 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2359 2359
2360 2360 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2361 2361
2362 2362 if sidedata and self.hassidedata:
2363 2363 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2364 2364 sidedata_offset = offset + deltainfo.deltalen
2365 2365 else:
2366 2366 serialized_sidedata = b""
2367 2367 # Don't store the offset if the sidedata is empty, that way
2368 2368 # we can easily detect empty sidedata and they will be no different
2369 2369 # than ones we manually add.
2370 2370 sidedata_offset = 0
2371 2371
2372 2372 e = (
2373 2373 offset_type(offset, flags),
2374 2374 deltainfo.deltalen,
2375 2375 textlen,
2376 2376 deltainfo.base,
2377 2377 link,
2378 2378 p1r,
2379 2379 p2r,
2380 2380 node,
2381 2381 sidedata_offset,
2382 2382 len(serialized_sidedata),
2383 2383 )
2384 2384
2385 2385 self.index.append(e)
2386 2386 entry = self.index.entry_binary(curr)
2387 2387 if curr == 0:
2388 2388 header = self._format_flags | self._format_version
2389 2389 header = self.index.pack_header(header)
2390 2390 entry = header + entry
2391 2391 self._writeentry(
2392 2392 transaction,
2393 2393 entry,
2394 2394 deltainfo.data,
2395 2395 link,
2396 2396 offset,
2397 2397 serialized_sidedata,
2398 2398 )
2399 2399
2400 2400 rawtext = btext[0]
2401 2401
2402 2402 if alwayscache and rawtext is None:
2403 2403 rawtext = deltacomputer.buildtext(revinfo, fh)
2404 2404
2405 2405 if type(rawtext) == bytes: # only accept immutable objects
2406 2406 self._revisioncache = (node, curr, rawtext)
2407 2407 self._chainbasecache[curr] = deltainfo.chainbase
2408 2408 return curr
2409 2409
2410 2410 def _get_data_offset(self, prev):
2411 2411 """Returns the current offset in the (in-transaction) data file.
2412 2412 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2413 2413 file to store that information: since sidedata can be rewritten to the
2414 2414 end of the data file within a transaction, you can have cases where, for
2415 2415 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2416 2416 to `n - 1`'s sidedata being written after `n`'s data.
2417 2417
2418 2418 TODO cache this in a docket file before getting out of experimental."""
2419 2419 if self._format_version != REVLOGV2:
2420 2420 return self.end(prev)
2421 2421
2422 2422 offset = 0
2423 2423 for rev, entry in enumerate(self.index):
2424 2424 sidedata_end = entry[8] + entry[9]
2425 2425 # Sidedata for a previous rev has potentially been written after
2426 2426 # this rev's end, so take the max.
2427 2427 offset = max(self.end(rev), offset, sidedata_end)
2428 2428 return offset
2429 2429
2430 2430 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2431 2431 # Files opened in a+ mode have inconsistent behavior on various
2432 2432 # platforms. Windows requires that a file positioning call be made
2433 2433 # when the file handle transitions between reads and writes. See
2434 2434 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2435 2435 # platforms, Python or the platform itself can be buggy. Some versions
2436 2436 # of Solaris have been observed to not append at the end of the file
2437 2437 # if the file was seeked to before the end. See issue4943 for more.
2438 2438 #
2439 2439 # We work around this issue by inserting a seek() before writing.
2440 2440 # Note: This is likely not necessary on Python 3. However, because
2441 2441 # the file handle is reused for reads and may be seeked there, we need
2442 2442 # to be careful before changing this.
2443 2443 if self._writinghandles is None:
2444 2444 msg = b'adding revision outside `revlog._writing` context'
2445 2445 raise error.ProgrammingError(msg)
2446 2446 ifh, dfh = self._writinghandles
2447 2447 ifh.seek(0, os.SEEK_END)
2448 2448 if dfh:
2449 2449 dfh.seek(0, os.SEEK_END)
2450 2450
2451 2451 curr = len(self) - 1
2452 2452 if not self._inline:
2453 2453 transaction.add(self._datafile, offset)
2454 2454 transaction.add(self._indexfile, curr * len(entry))
2455 2455 if data[0]:
2456 2456 dfh.write(data[0])
2457 2457 dfh.write(data[1])
2458 2458 if sidedata:
2459 2459 dfh.write(sidedata)
2460 2460 ifh.write(entry)
2461 2461 else:
2462 2462 offset += curr * self.index.entry_size
2463 2463 transaction.add(self._indexfile, offset)
2464 2464 ifh.write(entry)
2465 2465 ifh.write(data[0])
2466 2466 ifh.write(data[1])
2467 2467 if sidedata:
2468 2468 ifh.write(sidedata)
2469 2469 self._enforceinlinesize(transaction)
2470 2470 nodemaputil.setup_persistent_nodemap(transaction, self)
2471 2471
2472 2472 def addgroup(
2473 2473 self,
2474 2474 deltas,
2475 2475 linkmapper,
2476 2476 transaction,
2477 2477 alwayscache=False,
2478 2478 addrevisioncb=None,
2479 2479 duplicaterevisioncb=None,
2480 2480 ):
2481 2481 """
2482 2482 add a delta group
2483 2483
2484 2484 given a set of deltas, add them to the revision log. the
2485 2485 first delta is against its parent, which should be in our
2486 2486 log, the rest are against the previous delta.
2487 2487
2488 2488 If ``addrevisioncb`` is defined, it will be called with arguments of
2489 2489 this revlog and the node that was added.
2490 2490 """
2491 2491
2492 2492 if self._adding_group:
2493 2493 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2494 2494
2495 2495 self._adding_group = True
2496 2496 empty = True
2497 2497 try:
2498 2498 with self._writing(transaction):
2499 2499 deltacomputer = deltautil.deltacomputer(self)
2500 2500 # loop through our set of deltas
2501 2501 for data in deltas:
2502 2502 (
2503 2503 node,
2504 2504 p1,
2505 2505 p2,
2506 2506 linknode,
2507 2507 deltabase,
2508 2508 delta,
2509 2509 flags,
2510 2510 sidedata,
2511 2511 ) = data
2512 2512 link = linkmapper(linknode)
2513 2513 flags = flags or REVIDX_DEFAULT_FLAGS
2514 2514
2515 2515 rev = self.index.get_rev(node)
2516 2516 if rev is not None:
2517 2517 # this can happen if two branches make the same change
2518 2518 self._nodeduplicatecallback(transaction, rev)
2519 2519 if duplicaterevisioncb:
2520 2520 duplicaterevisioncb(self, rev)
2521 2521 empty = False
2522 2522 continue
2523 2523
2524 2524 for p in (p1, p2):
2525 2525 if not self.index.has_node(p):
2526 2526 raise error.LookupError(
2527 2527 p, self.radix, _(b'unknown parent')
2528 2528 )
2529 2529
2530 2530 if not self.index.has_node(deltabase):
2531 2531 raise error.LookupError(
2532 2532 deltabase, self.display_id, _(b'unknown delta base')
2533 2533 )
2534 2534
2535 2535 baserev = self.rev(deltabase)
2536 2536
2537 2537 if baserev != nullrev and self.iscensored(baserev):
2538 2538 # if base is censored, delta must be full replacement in a
2539 2539 # single patch operation
2540 2540 hlen = struct.calcsize(b">lll")
2541 2541 oldlen = self.rawsize(baserev)
2542 2542 newlen = len(delta) - hlen
2543 2543 if delta[:hlen] != mdiff.replacediffheader(
2544 2544 oldlen, newlen
2545 2545 ):
2546 2546 raise error.CensoredBaseError(
2547 2547 self.display_id, self.node(baserev)
2548 2548 )
2549 2549
2550 2550 if not flags and self._peek_iscensored(baserev, delta):
2551 2551 flags |= REVIDX_ISCENSORED
2552 2552
2553 2553 # We assume consumers of addrevisioncb will want to retrieve
2554 2554 # the added revision, which will require a call to
2555 2555 # revision(). revision() will fast path if there is a cache
2556 2556 # hit. So, we tell _addrevision() to always cache in this case.
2557 2557 # We're only using addgroup() in the context of changegroup
2558 2558 # generation so the revision data can always be handled as raw
2559 2559 # by the flagprocessor.
2560 2560 rev = self._addrevision(
2561 2561 node,
2562 2562 None,
2563 2563 transaction,
2564 2564 link,
2565 2565 p1,
2566 2566 p2,
2567 2567 flags,
2568 2568 (baserev, delta),
2569 2569 alwayscache=alwayscache,
2570 2570 deltacomputer=deltacomputer,
2571 2571 sidedata=sidedata,
2572 2572 )
2573 2573
2574 2574 if addrevisioncb:
2575 2575 addrevisioncb(self, rev)
2576 2576 empty = False
2577 2577 finally:
2578 2578 self._adding_group = False
2579 2579 return not empty
2580 2580
2581 2581 def iscensored(self, rev):
2582 2582 """Check if a file revision is censored."""
2583 2583 if not self._censorable:
2584 2584 return False
2585 2585
2586 2586 return self.flags(rev) & REVIDX_ISCENSORED
2587 2587
2588 2588 def _peek_iscensored(self, baserev, delta):
2589 2589 """Quickly check if a delta produces a censored revision."""
2590 2590 if not self._censorable:
2591 2591 return False
2592 2592
2593 2593 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2594 2594
2595 2595 def getstrippoint(self, minlink):
2596 2596 """find the minimum rev that must be stripped to strip the linkrev
2597 2597
2598 2598 Returns a tuple containing the minimum rev and a set of all revs that
2599 2599 have linkrevs that will be broken by this strip.
2600 2600 """
2601 2601 return storageutil.resolvestripinfo(
2602 2602 minlink,
2603 2603 len(self) - 1,
2604 2604 self.headrevs(),
2605 2605 self.linkrev,
2606 2606 self.parentrevs,
2607 2607 )
2608 2608
2609 2609 def strip(self, minlink, transaction):
2610 2610 """truncate the revlog on the first revision with a linkrev >= minlink
2611 2611
2612 2612 This function is called when we're stripping revision minlink and
2613 2613 its descendants from the repository.
2614 2614
2615 2615 We have to remove all revisions with linkrev >= minlink, because
2616 2616 the equivalent changelog revisions will be renumbered after the
2617 2617 strip.
2618 2618
2619 2619 So we truncate the revlog on the first of these revisions, and
2620 2620 trust that the caller has saved the revisions that shouldn't be
2621 2621 removed and that it'll re-add them after this truncation.
2622 2622 """
2623 2623 if len(self) == 0:
2624 2624 return
2625 2625
2626 2626 rev, _ = self.getstrippoint(minlink)
2627 2627 if rev == len(self):
2628 2628 return
2629 2629
2630 2630 # first truncate the files on disk
2631 2631 end = self.start(rev)
2632 2632 if not self._inline:
2633 2633 transaction.add(self._datafile, end)
2634 2634 end = rev * self.index.entry_size
2635 2635 else:
2636 2636 end += rev * self.index.entry_size
2637 2637
2638 2638 transaction.add(self._indexfile, end)
2639 2639
2640 2640 # then reset internal state in memory to forget those revisions
2641 2641 self._revisioncache = None
2642 2642 self._chaininfocache = util.lrucachedict(500)
2643 2643 self._chunkclear()
2644 2644
2645 2645 del self.index[rev:-1]
2646 2646
2647 2647 def checksize(self):
2648 2648 """Check size of index and data files
2649 2649
2650 2650 return a (dd, di) tuple.
2651 2651 - dd: extra bytes for the "data" file
2652 2652 - di: extra bytes for the "index" file
2653 2653
2654 2654 A healthy revlog will return (0, 0).
2655 2655 """
2656 2656 expected = 0
2657 2657 if len(self):
2658 2658 expected = max(0, self.end(len(self) - 1))
2659 2659
2660 2660 try:
2661 2661 with self._datafp() as f:
2662 2662 f.seek(0, io.SEEK_END)
2663 2663 actual = f.tell()
2664 2664 dd = actual - expected
2665 2665 except IOError as inst:
2666 2666 if inst.errno != errno.ENOENT:
2667 2667 raise
2668 2668 dd = 0
2669 2669
2670 2670 try:
2671 2671 f = self.opener(self._indexfile)
2672 2672 f.seek(0, io.SEEK_END)
2673 2673 actual = f.tell()
2674 2674 f.close()
2675 2675 s = self.index.entry_size
2676 2676 i = max(0, actual // s)
2677 2677 di = actual - (i * s)
2678 2678 if self._inline:
2679 2679 databytes = 0
2680 2680 for r in self:
2681 2681 databytes += max(0, self.length(r))
2682 2682 dd = 0
2683 2683 di = actual - len(self) * s - databytes
2684 2684 except IOError as inst:
2685 2685 if inst.errno != errno.ENOENT:
2686 2686 raise
2687 2687 di = 0
2688 2688
2689 2689 return (dd, di)
2690 2690
2691 2691 def files(self):
2692 2692 res = [self._indexfile]
2693 2693 if not self._inline:
2694 2694 res.append(self._datafile)
2695 2695 return res
2696 2696
2697 2697 def emitrevisions(
2698 2698 self,
2699 2699 nodes,
2700 2700 nodesorder=None,
2701 2701 revisiondata=False,
2702 2702 assumehaveparentrevisions=False,
2703 2703 deltamode=repository.CG_DELTAMODE_STD,
2704 2704 sidedata_helpers=None,
2705 2705 ):
2706 2706 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2707 2707 raise error.ProgrammingError(
2708 2708 b'unhandled value for nodesorder: %s' % nodesorder
2709 2709 )
2710 2710
2711 2711 if nodesorder is None and not self._generaldelta:
2712 2712 nodesorder = b'storage'
2713 2713
2714 2714 if (
2715 2715 not self._storedeltachains
2716 2716 and deltamode != repository.CG_DELTAMODE_PREV
2717 2717 ):
2718 2718 deltamode = repository.CG_DELTAMODE_FULL
2719 2719
2720 2720 return storageutil.emitrevisions(
2721 2721 self,
2722 2722 nodes,
2723 2723 nodesorder,
2724 2724 revlogrevisiondelta,
2725 2725 deltaparentfn=self.deltaparent,
2726 2726 candeltafn=self.candelta,
2727 2727 rawsizefn=self.rawsize,
2728 2728 revdifffn=self.revdiff,
2729 2729 flagsfn=self.flags,
2730 2730 deltamode=deltamode,
2731 2731 revisiondata=revisiondata,
2732 2732 assumehaveparentrevisions=assumehaveparentrevisions,
2733 2733 sidedata_helpers=sidedata_helpers,
2734 2734 )
2735 2735
2736 2736 DELTAREUSEALWAYS = b'always'
2737 2737 DELTAREUSESAMEREVS = b'samerevs'
2738 2738 DELTAREUSENEVER = b'never'
2739 2739
2740 2740 DELTAREUSEFULLADD = b'fulladd'
2741 2741
2742 2742 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2743 2743
2744 2744 def clone(
2745 2745 self,
2746 2746 tr,
2747 2747 destrevlog,
2748 2748 addrevisioncb=None,
2749 2749 deltareuse=DELTAREUSESAMEREVS,
2750 2750 forcedeltabothparents=None,
2751 2751 sidedata_helpers=None,
2752 2752 ):
2753 2753 """Copy this revlog to another, possibly with format changes.
2754 2754
2755 2755 The destination revlog will contain the same revisions and nodes.
2756 2756 However, it may not be bit-for-bit identical due to e.g. delta encoding
2757 2757 differences.
2758 2758
2759 2759 The ``deltareuse`` argument control how deltas from the existing revlog
2760 2760 are preserved in the destination revlog. The argument can have the
2761 2761 following values:
2762 2762
2763 2763 DELTAREUSEALWAYS
2764 2764 Deltas will always be reused (if possible), even if the destination
2765 2765 revlog would not select the same revisions for the delta. This is the
2766 2766 fastest mode of operation.
2767 2767 DELTAREUSESAMEREVS
2768 2768 Deltas will be reused if the destination revlog would pick the same
2769 2769 revisions for the delta. This mode strikes a balance between speed
2770 2770 and optimization.
2771 2771 DELTAREUSENEVER
2772 2772 Deltas will never be reused. This is the slowest mode of execution.
2773 2773 This mode can be used to recompute deltas (e.g. if the diff/delta
2774 2774 algorithm changes).
2775 2775 DELTAREUSEFULLADD
2776 2776 Revision will be re-added as if their were new content. This is
2777 2777 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2778 2778 eg: large file detection and handling.
2779 2779
2780 2780 Delta computation can be slow, so the choice of delta reuse policy can
2781 2781 significantly affect run time.
2782 2782
2783 2783 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2784 2784 two extremes. Deltas will be reused if they are appropriate. But if the
2785 2785 delta could choose a better revision, it will do so. This means if you
2786 2786 are converting a non-generaldelta revlog to a generaldelta revlog,
2787 2787 deltas will be recomputed if the delta's parent isn't a parent of the
2788 2788 revision.
2789 2789
2790 2790 In addition to the delta policy, the ``forcedeltabothparents``
2791 2791 argument controls whether to force compute deltas against both parents
2792 2792 for merges. By default, the current default is used.
2793 2793
2794 2794 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2795 2795 `sidedata_helpers`.
2796 2796 """
2797 2797 if deltareuse not in self.DELTAREUSEALL:
2798 2798 raise ValueError(
2799 2799 _(b'value for deltareuse invalid: %s') % deltareuse
2800 2800 )
2801 2801
2802 2802 if len(destrevlog):
2803 2803 raise ValueError(_(b'destination revlog is not empty'))
2804 2804
2805 2805 if getattr(self, 'filteredrevs', None):
2806 2806 raise ValueError(_(b'source revlog has filtered revisions'))
2807 2807 if getattr(destrevlog, 'filteredrevs', None):
2808 2808 raise ValueError(_(b'destination revlog has filtered revisions'))
2809 2809
2810 2810 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2811 2811 # if possible.
2812 2812 oldlazydelta = destrevlog._lazydelta
2813 2813 oldlazydeltabase = destrevlog._lazydeltabase
2814 2814 oldamd = destrevlog._deltabothparents
2815 2815
2816 2816 try:
2817 2817 if deltareuse == self.DELTAREUSEALWAYS:
2818 2818 destrevlog._lazydeltabase = True
2819 2819 destrevlog._lazydelta = True
2820 2820 elif deltareuse == self.DELTAREUSESAMEREVS:
2821 2821 destrevlog._lazydeltabase = False
2822 2822 destrevlog._lazydelta = True
2823 2823 elif deltareuse == self.DELTAREUSENEVER:
2824 2824 destrevlog._lazydeltabase = False
2825 2825 destrevlog._lazydelta = False
2826 2826
2827 2827 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2828 2828
2829 2829 self._clone(
2830 2830 tr,
2831 2831 destrevlog,
2832 2832 addrevisioncb,
2833 2833 deltareuse,
2834 2834 forcedeltabothparents,
2835 2835 sidedata_helpers,
2836 2836 )
2837 2837
2838 2838 finally:
2839 2839 destrevlog._lazydelta = oldlazydelta
2840 2840 destrevlog._lazydeltabase = oldlazydeltabase
2841 2841 destrevlog._deltabothparents = oldamd
2842 2842
2843 2843 def _clone(
2844 2844 self,
2845 2845 tr,
2846 2846 destrevlog,
2847 2847 addrevisioncb,
2848 2848 deltareuse,
2849 2849 forcedeltabothparents,
2850 2850 sidedata_helpers,
2851 2851 ):
2852 2852 """perform the core duty of `revlog.clone` after parameter processing"""
2853 2853 deltacomputer = deltautil.deltacomputer(destrevlog)
2854 2854 index = self.index
2855 2855 for rev in self:
2856 2856 entry = index[rev]
2857 2857
2858 2858 # Some classes override linkrev to take filtered revs into
2859 2859 # account. Use raw entry from index.
2860 2860 flags = entry[0] & 0xFFFF
2861 2861 linkrev = entry[4]
2862 2862 p1 = index[entry[5]][7]
2863 2863 p2 = index[entry[6]][7]
2864 2864 node = entry[7]
2865 2865
2866 2866 # (Possibly) reuse the delta from the revlog if allowed and
2867 2867 # the revlog chunk is a delta.
2868 2868 cachedelta = None
2869 2869 rawtext = None
2870 2870 if deltareuse == self.DELTAREUSEFULLADD:
2871 2871 text, sidedata = self._revisiondata(rev)
2872 2872
2873 2873 if sidedata_helpers is not None:
2874 2874 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2875 2875 self, sidedata_helpers, sidedata, rev
2876 2876 )
2877 2877 flags = flags | new_flags[0] & ~new_flags[1]
2878 2878
2879 2879 destrevlog.addrevision(
2880 2880 text,
2881 2881 tr,
2882 2882 linkrev,
2883 2883 p1,
2884 2884 p2,
2885 2885 cachedelta=cachedelta,
2886 2886 node=node,
2887 2887 flags=flags,
2888 2888 deltacomputer=deltacomputer,
2889 2889 sidedata=sidedata,
2890 2890 )
2891 2891 else:
2892 2892 if destrevlog._lazydelta:
2893 2893 dp = self.deltaparent(rev)
2894 2894 if dp != nullrev:
2895 2895 cachedelta = (dp, bytes(self._chunk(rev)))
2896 2896
2897 2897 sidedata = None
2898 2898 if not cachedelta:
2899 2899 rawtext, sidedata = self._revisiondata(rev)
2900 2900 if sidedata is None:
2901 2901 sidedata = self.sidedata(rev)
2902 2902
2903 2903 if sidedata_helpers is not None:
2904 2904 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2905 2905 self, sidedata_helpers, sidedata, rev
2906 2906 )
2907 2907 flags = flags | new_flags[0] & ~new_flags[1]
2908 2908
2909 2909 with destrevlog._writing(tr):
2910 2910 destrevlog._addrevision(
2911 2911 node,
2912 2912 rawtext,
2913 2913 tr,
2914 2914 linkrev,
2915 2915 p1,
2916 2916 p2,
2917 2917 flags,
2918 2918 cachedelta,
2919 2919 deltacomputer=deltacomputer,
2920 2920 sidedata=sidedata,
2921 2921 )
2922 2922
2923 2923 if addrevisioncb:
2924 2924 addrevisioncb(self, rev, node)
2925 2925
2926 2926 def censorrevision(self, tr, censornode, tombstone=b''):
2927 2927 if self._format_version == REVLOGV0:
2928 2928 raise error.RevlogError(
2929 2929 _(b'cannot censor with version %d revlogs')
2930 2930 % self._format_version
2931 2931 )
2932 2932
2933 2933 censorrev = self.rev(censornode)
2934 2934 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2935 2935
2936 2936 if len(tombstone) > self.rawsize(censorrev):
2937 2937 raise error.Abort(
2938 2938 _(b'censor tombstone must be no longer than censored data')
2939 2939 )
2940 2940
2941 2941 # Rewriting the revlog in place is hard. Our strategy for censoring is
2942 2942 # to create a new revlog, copy all revisions to it, then replace the
2943 2943 # revlogs on transaction close.
2944 2944 #
2945 2945 # This is a bit dangerous. We could easily have a mismatch of state.
2946 2946 newrl = revlog(
2947 2947 self.opener,
2948 2948 target=self.target,
2949 2949 radix=self.radix,
2950 2950 postfix=b'tmpcensored',
2951 2951 censorable=True,
2952 2952 )
2953 2953 newrl._format_version = self._format_version
2954 2954 newrl._format_flags = self._format_flags
2955 2955 newrl._generaldelta = self._generaldelta
2956 2956 newrl._parse_index = self._parse_index
2957 2957
2958 2958 for rev in self.revs():
2959 2959 node = self.node(rev)
2960 2960 p1, p2 = self.parents(node)
2961 2961
2962 2962 if rev == censorrev:
2963 2963 newrl.addrawrevision(
2964 2964 tombstone,
2965 2965 tr,
2966 2966 self.linkrev(censorrev),
2967 2967 p1,
2968 2968 p2,
2969 2969 censornode,
2970 2970 REVIDX_ISCENSORED,
2971 2971 )
2972 2972
2973 2973 if newrl.deltaparent(rev) != nullrev:
2974 2974 raise error.Abort(
2975 2975 _(
2976 2976 b'censored revision stored as delta; '
2977 2977 b'cannot censor'
2978 2978 ),
2979 2979 hint=_(
2980 2980 b'censoring of revlogs is not '
2981 2981 b'fully implemented; please report '
2982 2982 b'this bug'
2983 2983 ),
2984 2984 )
2985 2985 continue
2986 2986
2987 2987 if self.iscensored(rev):
2988 2988 if self.deltaparent(rev) != nullrev:
2989 2989 raise error.Abort(
2990 2990 _(
2991 2991 b'cannot censor due to censored '
2992 2992 b'revision having delta stored'
2993 2993 )
2994 2994 )
2995 2995 rawtext = self._chunk(rev)
2996 2996 else:
2997 2997 rawtext = self.rawdata(rev)
2998 2998
2999 2999 newrl.addrawrevision(
3000 3000 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3001 3001 )
3002 3002
3003 3003 tr.addbackup(self._indexfile, location=b'store')
3004 3004 if not self._inline:
3005 3005 tr.addbackup(self._datafile, location=b'store')
3006 3006
3007 3007 self.opener.rename(newrl._indexfile, self._indexfile)
3008 3008 if not self._inline:
3009 3009 self.opener.rename(newrl._datafile, self._datafile)
3010 3010
3011 3011 self.clearcaches()
3012 3012 self._loadindex()
3013 3013
3014 3014 def verifyintegrity(self, state):
3015 3015 """Verifies the integrity of the revlog.
3016 3016
3017 3017 Yields ``revlogproblem`` instances describing problems that are
3018 3018 found.
3019 3019 """
3020 3020 dd, di = self.checksize()
3021 3021 if dd:
3022 3022 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3023 3023 if di:
3024 3024 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3025 3025
3026 3026 version = self._format_version
3027 3027
3028 3028 # The verifier tells us what version revlog we should be.
3029 3029 if version != state[b'expectedversion']:
3030 3030 yield revlogproblem(
3031 3031 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3032 3032 % (self.display_id, version, state[b'expectedversion'])
3033 3033 )
3034 3034
3035 3035 state[b'skipread'] = set()
3036 3036 state[b'safe_renamed'] = set()
3037 3037
3038 3038 for rev in self:
3039 3039 node = self.node(rev)
3040 3040
3041 3041 # Verify contents. 4 cases to care about:
3042 3042 #
3043 3043 # common: the most common case
3044 3044 # rename: with a rename
3045 3045 # meta: file content starts with b'\1\n', the metadata
3046 3046 # header defined in filelog.py, but without a rename
3047 3047 # ext: content stored externally
3048 3048 #
3049 3049 # More formally, their differences are shown below:
3050 3050 #
3051 3051 # | common | rename | meta | ext
3052 3052 # -------------------------------------------------------
3053 3053 # flags() | 0 | 0 | 0 | not 0
3054 3054 # renamed() | False | True | False | ?
3055 3055 # rawtext[0:2]=='\1\n'| False | True | True | ?
3056 3056 #
3057 3057 # "rawtext" means the raw text stored in revlog data, which
3058 3058 # could be retrieved by "rawdata(rev)". "text"
3059 3059 # mentioned below is "revision(rev)".
3060 3060 #
3061 3061 # There are 3 different lengths stored physically:
3062 3062 # 1. L1: rawsize, stored in revlog index
3063 3063 # 2. L2: len(rawtext), stored in revlog data
3064 3064 # 3. L3: len(text), stored in revlog data if flags==0, or
3065 3065 # possibly somewhere else if flags!=0
3066 3066 #
3067 3067 # L1 should be equal to L2. L3 could be different from them.
3068 3068 # "text" may or may not affect commit hash depending on flag
3069 3069 # processors (see flagutil.addflagprocessor).
3070 3070 #
3071 3071 # | common | rename | meta | ext
3072 3072 # -------------------------------------------------
3073 3073 # rawsize() | L1 | L1 | L1 | L1
3074 3074 # size() | L1 | L2-LM | L1(*) | L1 (?)
3075 3075 # len(rawtext) | L2 | L2 | L2 | L2
3076 3076 # len(text) | L2 | L2 | L2 | L3
3077 3077 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3078 3078 #
3079 3079 # LM: length of metadata, depending on rawtext
3080 3080 # (*): not ideal, see comment in filelog.size
3081 3081 # (?): could be "- len(meta)" if the resolved content has
3082 3082 # rename metadata
3083 3083 #
3084 3084 # Checks needed to be done:
3085 3085 # 1. length check: L1 == L2, in all cases.
3086 3086 # 2. hash check: depending on flag processor, we may need to
3087 3087 # use either "text" (external), or "rawtext" (in revlog).
3088 3088
3089 3089 try:
3090 3090 skipflags = state.get(b'skipflags', 0)
3091 3091 if skipflags:
3092 3092 skipflags &= self.flags(rev)
3093 3093
3094 3094 _verify_revision(self, skipflags, state, node)
3095 3095
3096 3096 l1 = self.rawsize(rev)
3097 3097 l2 = len(self.rawdata(node))
3098 3098
3099 3099 if l1 != l2:
3100 3100 yield revlogproblem(
3101 3101 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3102 3102 node=node,
3103 3103 )
3104 3104
3105 3105 except error.CensoredNodeError:
3106 3106 if state[b'erroroncensored']:
3107 3107 yield revlogproblem(
3108 3108 error=_(b'censored file data'), node=node
3109 3109 )
3110 3110 state[b'skipread'].add(node)
3111 3111 except Exception as e:
3112 3112 yield revlogproblem(
3113 3113 error=_(b'unpacking %s: %s')
3114 3114 % (short(node), stringutil.forcebytestr(e)),
3115 3115 node=node,
3116 3116 )
3117 3117 state[b'skipread'].add(node)
3118 3118
3119 3119 def storageinfo(
3120 3120 self,
3121 3121 exclusivefiles=False,
3122 3122 sharedfiles=False,
3123 3123 revisionscount=False,
3124 3124 trackedsize=False,
3125 3125 storedsize=False,
3126 3126 ):
3127 3127 d = {}
3128 3128
3129 3129 if exclusivefiles:
3130 3130 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3131 3131 if not self._inline:
3132 3132 d[b'exclusivefiles'].append((self.opener, self._datafile))
3133 3133
3134 3134 if sharedfiles:
3135 3135 d[b'sharedfiles'] = []
3136 3136
3137 3137 if revisionscount:
3138 3138 d[b'revisionscount'] = len(self)
3139 3139
3140 3140 if trackedsize:
3141 3141 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3142 3142
3143 3143 if storedsize:
3144 3144 d[b'storedsize'] = sum(
3145 3145 self.opener.stat(path).st_size for path in self.files()
3146 3146 )
3147 3147
3148 3148 return d
3149 3149
3150 3150 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3151 3151 if not self.hassidedata:
3152 3152 return
3153 3153 # inline are not yet supported because they suffer from an issue when
3154 3154 # rewriting them (since it's not an append-only operation).
3155 3155 # See issue6485.
3156 3156 assert not self._inline
3157 3157 if not helpers[1] and not helpers[2]:
3158 3158 # Nothing to generate or remove
3159 3159 return
3160 3160
3161 3161 # changelog implement some "delayed" writing mechanism that assume that
3162 3162 # all index data is writen in append mode and is therefor incompatible
3163 3163 # with the seeked write done in this method. The use of such "delayed"
3164 3164 # writing will soon be removed for revlog version that support side
3165 3165 # data, so for now, we only keep this simple assert to highlight the
3166 3166 # situation.
3167 3167 delayed = getattr(self, '_delayed', False)
3168 3168 diverted = getattr(self, '_divert', False)
3169 3169 if delayed and not diverted:
3170 3170 msg = "cannot rewrite_sidedata of a delayed revlog"
3171 3171 raise error.ProgrammingError(msg)
3172 3172
3173 3173 new_entries = []
3174 3174 # append the new sidedata
3175 3175 with self._writing(transaction):
3176 3176 ifh, dfh = self._writinghandles
3177 3177 dfh.seek(0, os.SEEK_END)
3178 3178 current_offset = dfh.tell()
3179 3179 for rev in range(startrev, endrev + 1):
3180 3180 entry = self.index[rev]
3181 3181 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3182 3182 store=self,
3183 3183 sidedata_helpers=helpers,
3184 3184 sidedata={},
3185 3185 rev=rev,
3186 3186 )
3187 3187
3188 3188 serialized_sidedata = sidedatautil.serialize_sidedata(
3189 3189 new_sidedata
3190 3190 )
3191 3191 if entry[8] != 0 or entry[9] != 0:
3192 3192 # rewriting entries that already have sidedata is not
3193 3193 # supported yet, because it introduces garbage data in the
3194 3194 # revlog.
3195 msg = b"Rewriting existing sidedata is not supported yet"
3195 msg = b"rewriting existing sidedata is not supported yet"
3196 3196 raise error.Abort(msg)
3197 3197
3198 3198 # Apply (potential) flags to add and to remove after running
3199 3199 # the sidedata helpers
3200 3200 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3201 3201 entry = (new_offset_flags,) + entry[1:8]
3202 3202 entry += (current_offset, len(serialized_sidedata))
3203 3203
3204 3204 # the sidedata computation might have move the file cursors around
3205 3205 dfh.seek(current_offset, os.SEEK_SET)
3206 3206 dfh.write(serialized_sidedata)
3207 3207 new_entries.append(entry)
3208 3208 current_offset += len(serialized_sidedata)
3209 3209
3210 3210 # rewrite the new index entries
3211 3211 ifh.seek(startrev * self.index.entry_size)
3212 3212 for i, e in enumerate(new_entries):
3213 3213 rev = startrev + i
3214 3214 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3215 3215 packed = self.index.entry_binary(rev)
3216 3216 if rev == 0:
3217 3217 header = self._format_flags | self._format_version
3218 3218 header = self.index.pack_header(header)
3219 3219 packed = header + packed
3220 3220 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now