##// END OF EJS Templates
revlog: fix a bug where transaction can be aborted partially...
Arseniy Alekseyev -
r49423:ccd9cb73 stable
parent child Browse files
Show More
@@ -1,3303 +1,3310 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 deltas as deltautil,
85 85 docket as docketutil,
86 86 flagutil,
87 87 nodemap as nodemaputil,
88 88 randomaccessfile,
89 89 revlogv0,
90 90 rewrite,
91 91 sidedata as sidedatautil,
92 92 )
93 93 from .utils import (
94 94 storageutil,
95 95 stringutil,
96 96 )
97 97
98 98 # blanked usage of all the name to prevent pyflakes constraints
99 99 # We need these name available in the module for extensions.
100 100
101 101 REVLOGV0
102 102 REVLOGV1
103 103 REVLOGV2
104 104 FLAG_INLINE_DATA
105 105 FLAG_GENERALDELTA
106 106 REVLOG_DEFAULT_FLAGS
107 107 REVLOG_DEFAULT_FORMAT
108 108 REVLOG_DEFAULT_VERSION
109 109 REVLOGV1_FLAGS
110 110 REVLOGV2_FLAGS
111 111 REVIDX_ISCENSORED
112 112 REVIDX_ELLIPSIS
113 113 REVIDX_HASCOPIESINFO
114 114 REVIDX_EXTSTORED
115 115 REVIDX_DEFAULT_FLAGS
116 116 REVIDX_FLAGS_ORDER
117 117 REVIDX_RAWTEXT_CHANGING_FLAGS
118 118
119 119 parsers = policy.importmod('parsers')
120 120 rustancestor = policy.importrust('ancestor')
121 121 rustdagop = policy.importrust('dagop')
122 122 rustrevlog = policy.importrust('revlog')
123 123
124 124 # Aliased for performance.
125 125 _zlibdecompress = zlib.decompress
126 126
127 127 # max size of revlog with inline data
128 128 _maxinline = 131072
129 129
130 130 # Flag processors for REVIDX_ELLIPSIS.
131 131 def ellipsisreadprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsiswriteprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsisrawprocessor(rl, text):
140 140 return False
141 141
142 142
143 143 ellipsisprocessor = (
144 144 ellipsisreadprocessor,
145 145 ellipsiswriteprocessor,
146 146 ellipsisrawprocessor,
147 147 )
148 148
149 149
150 150 def _verify_revision(rl, skipflags, state, node):
151 151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 152 point for extensions to influence the operation."""
153 153 if skipflags:
154 154 state[b'skipread'].add(node)
155 155 else:
156 156 # Side-effect: read content and verify hash.
157 157 rl.revision(node)
158 158
159 159
160 160 # True if a fast implementation for persistent-nodemap is available
161 161 #
162 162 # We also consider we have a "fast" implementation in "pure" python because
163 163 # people using pure don't really have performance consideration (and a
164 164 # wheelbarrow of other slowness source)
165 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 166 parsers, 'BaseIndexObject'
167 167 )
168 168
169 169
170 170 @interfaceutil.implementer(repository.irevisiondelta)
171 171 @attr.s(slots=True)
172 172 class revlogrevisiondelta(object):
173 173 node = attr.ib()
174 174 p1node = attr.ib()
175 175 p2node = attr.ib()
176 176 basenode = attr.ib()
177 177 flags = attr.ib()
178 178 baserevisionsize = attr.ib()
179 179 revision = attr.ib()
180 180 delta = attr.ib()
181 181 sidedata = attr.ib()
182 182 protocol_flags = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 def parse_index_v1(data, inline):
195 195 # call the C implementation to parse the index data
196 196 index, cache = parsers.parse_index2(data, inline)
197 197 return index, cache
198 198
199 199
200 200 def parse_index_v2(data, inline):
201 201 # call the C implementation to parse the index data
202 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 203 return index, cache
204 204
205 205
206 206 def parse_index_cl_v2(data, inline):
207 207 # call the C implementation to parse the index data
208 208 assert not inline
209 209 from .pure.parsers import parse_index_cl_v2
210 210
211 211 index, cache = parse_index_cl_v2(data)
212 212 return index, cache
213 213
214 214
215 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 216
217 217 def parse_index_v1_nodemap(data, inline):
218 218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 219 return index, cache
220 220
221 221
222 222 else:
223 223 parse_index_v1_nodemap = None
224 224
225 225
226 226 def parse_index_v1_mixed(data, inline):
227 227 index, cache = parse_index_v1(data, inline)
228 228 return rustrevlog.MixedIndex(index), cache
229 229
230 230
231 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 232 # signed integer)
233 233 _maxentrysize = 0x7FFFFFFF
234 234
235 235 FILE_TOO_SHORT_MSG = _(
236 236 b'cannot read from revlog %s;'
237 237 b' expected %d bytes from offset %d, data size is %d'
238 238 )
239 239
240 240
241 241 class revlog(object):
242 242 """
243 243 the underlying revision storage object
244 244
245 245 A revlog consists of two parts, an index and the revision data.
246 246
247 247 The index is a file with a fixed record size containing
248 248 information on each revision, including its nodeid (hash), the
249 249 nodeids of its parents, the position and offset of its data within
250 250 the data file, and the revision it's based on. Finally, each entry
251 251 contains a linkrev entry that can serve as a pointer to external
252 252 data.
253 253
254 254 The revision data itself is a linear collection of data chunks.
255 255 Each chunk represents a revision and is usually represented as a
256 256 delta against the previous chunk. To bound lookup time, runs of
257 257 deltas are limited to about 2 times the length of the original
258 258 version data. This makes retrieval of a version proportional to
259 259 its size, or O(1) relative to the number of revisions.
260 260
261 261 Both pieces of the revlog are written to in an append-only
262 262 fashion, which means we never need to rewrite a file to insert or
263 263 remove data, and can use some simple techniques to avoid the need
264 264 for locking while reading.
265 265
266 266 If checkambig, indexfile is opened with checkambig=True at
267 267 writing, to avoid file stat ambiguity.
268 268
269 269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 270 index will be mmapped rather than read if it is larger than the
271 271 configured threshold.
272 272
273 273 If censorable is True, the revlog can have censored revisions.
274 274
275 275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 276 compression for the data content.
277 277
278 278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 279 file handle, a filename, and an expected position. It should check whether
280 280 the current position in the file handle is valid, and log/warn/fail (by
281 281 raising).
282 282
283 283 See mercurial/revlogutils/contants.py for details about the content of an
284 284 index entry.
285 285 """
286 286
287 287 _flagserrorclass = error.RevlogError
288 288
289 289 def __init__(
290 290 self,
291 291 opener,
292 292 target,
293 293 radix,
294 294 postfix=None, # only exist for `tmpcensored` now
295 295 checkambig=False,
296 296 mmaplargeindex=False,
297 297 censorable=False,
298 298 upperboundcomp=None,
299 299 persistentnodemap=False,
300 300 concurrencychecker=None,
301 301 trypending=False,
302 302 ):
303 303 """
304 304 create a revlog object
305 305
306 306 opener is a function that abstracts the file opening operation
307 307 and can be used to implement COW semantics or the like.
308 308
309 309 `target`: a (KIND, ID) tuple that identify the content stored in
310 310 this revlog. It help the rest of the code to understand what the revlog
311 311 is about without having to resort to heuristic and index filename
312 312 analysis. Note: that this must be reliably be set by normal code, but
313 313 that test, debug, or performance measurement code might not set this to
314 314 accurate value.
315 315 """
316 316 self.upperboundcomp = upperboundcomp
317 317
318 318 self.radix = radix
319 319
320 320 self._docket_file = None
321 321 self._indexfile = None
322 322 self._datafile = None
323 323 self._sidedatafile = None
324 324 self._nodemap_file = None
325 325 self.postfix = postfix
326 326 self._trypending = trypending
327 327 self.opener = opener
328 328 if persistentnodemap:
329 329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 330
331 331 assert target[0] in ALL_KINDS
332 332 assert len(target) == 2
333 333 self.target = target
334 334 # When True, indexfile is opened with checkambig=True at writing, to
335 335 # avoid file stat ambiguity.
336 336 self._checkambig = checkambig
337 337 self._mmaplargeindex = mmaplargeindex
338 338 self._censorable = censorable
339 339 # 3-tuple of (node, rev, text) for a raw revision.
340 340 self._revisioncache = None
341 341 # Maps rev to chain base rev.
342 342 self._chainbasecache = util.lrucachedict(100)
343 343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 344 self._chunkcache = (0, b'')
345 345 # How much data to read and cache into the raw revlog data cache.
346 346 self._chunkcachesize = 65536
347 347 self._maxchainlen = None
348 348 self._deltabothparents = True
349 349 self.index = None
350 350 self._docket = None
351 351 self._nodemap_docket = None
352 352 # Mapping of partial identifiers to full nodes.
353 353 self._pcache = {}
354 354 # Mapping of revision integer to full node.
355 355 self._compengine = b'zlib'
356 356 self._compengineopts = {}
357 357 self._maxdeltachainspan = -1
358 358 self._withsparseread = False
359 359 self._sparserevlog = False
360 360 self.hassidedata = False
361 361 self._srdensitythreshold = 0.50
362 362 self._srmingapsize = 262144
363 363
364 364 # Make copy of flag processors so each revlog instance can support
365 365 # custom flags.
366 366 self._flagprocessors = dict(flagutil.flagprocessors)
367 367
368 368 # 3-tuple of file handles being used for active writing.
369 369 self._writinghandles = None
370 370 # prevent nesting of addgroup
371 371 self._adding_group = None
372 372
373 373 self._loadindex()
374 374
375 375 self._concurrencychecker = concurrencychecker
376 376
377 377 def _init_opts(self):
378 378 """process options (from above/config) to setup associated default revlog mode
379 379
380 380 These values might be affected when actually reading on disk information.
381 381
382 382 The relevant values are returned for use in _loadindex().
383 383
384 384 * newversionflags:
385 385 version header to use if we need to create a new revlog
386 386
387 387 * mmapindexthreshold:
388 388 minimal index size for start to use mmap
389 389
390 390 * force_nodemap:
391 391 force the usage of a "development" version of the nodemap code
392 392 """
393 393 mmapindexthreshold = None
394 394 opts = self.opener.options
395 395
396 396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 397 new_header = CHANGELOGV2
398 398 elif b'revlogv2' in opts:
399 399 new_header = REVLOGV2
400 400 elif b'revlogv1' in opts:
401 401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 402 if b'generaldelta' in opts:
403 403 new_header |= FLAG_GENERALDELTA
404 404 elif b'revlogv0' in self.opener.options:
405 405 new_header = REVLOGV0
406 406 else:
407 407 new_header = REVLOG_DEFAULT_VERSION
408 408
409 409 if b'chunkcachesize' in opts:
410 410 self._chunkcachesize = opts[b'chunkcachesize']
411 411 if b'maxchainlen' in opts:
412 412 self._maxchainlen = opts[b'maxchainlen']
413 413 if b'deltabothparents' in opts:
414 414 self._deltabothparents = opts[b'deltabothparents']
415 415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 416 self._lazydeltabase = False
417 417 if self._lazydelta:
418 418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 419 if b'compengine' in opts:
420 420 self._compengine = opts[b'compengine']
421 421 if b'zlib.level' in opts:
422 422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 423 if b'zstd.level' in opts:
424 424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 425 if b'maxdeltachainspan' in opts:
426 426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 431 # sparse-revlog forces sparse-read
432 432 self._withsparseread = self._sparserevlog or withsparseread
433 433 if b'sparse-read-density-threshold' in opts:
434 434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 435 if b'sparse-read-min-gap-size' in opts:
436 436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 437 if opts.get(b'enableellipsis'):
438 438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439 439
440 440 # revlog v0 doesn't have flag processors
441 441 for flag, processor in pycompat.iteritems(
442 442 opts.get(b'flagprocessors', {})
443 443 ):
444 444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445 445
446 446 if self._chunkcachesize <= 0:
447 447 raise error.RevlogError(
448 448 _(b'revlog chunk cache size %r is not greater than 0')
449 449 % self._chunkcachesize
450 450 )
451 451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 452 raise error.RevlogError(
453 453 _(b'revlog chunk cache size %r is not a power of 2')
454 454 % self._chunkcachesize
455 455 )
456 456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 457 return new_header, mmapindexthreshold, force_nodemap
458 458
459 459 def _get_data(self, filepath, mmap_threshold, size=None):
460 460 """return a file content with or without mmap
461 461
462 462 If the file is missing return the empty string"""
463 463 try:
464 464 with self.opener(filepath) as fp:
465 465 if mmap_threshold is not None:
466 466 file_size = self.opener.fstat(fp).st_size
467 467 if file_size >= mmap_threshold:
468 468 if size is not None:
469 469 # avoid potentiel mmap crash
470 470 size = min(file_size, size)
471 471 # TODO: should .close() to release resources without
472 472 # relying on Python GC
473 473 if size is None:
474 474 return util.buffer(util.mmapread(fp))
475 475 else:
476 476 return util.buffer(util.mmapread(fp, size))
477 477 if size is None:
478 478 return fp.read()
479 479 else:
480 480 return fp.read(size)
481 481 except IOError as inst:
482 482 if inst.errno != errno.ENOENT:
483 483 raise
484 484 return b''
485 485
486 486 def _loadindex(self, docket=None):
487 487
488 488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489 489
490 490 if self.postfix is not None:
491 491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 493 entry_point = b'%s.i.a' % self.radix
494 494 else:
495 495 entry_point = b'%s.i' % self.radix
496 496
497 497 if docket is not None:
498 498 self._docket = docket
499 499 self._docket_file = entry_point
500 500 else:
501 501 entry_data = b''
502 502 self._initempty = True
503 503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 504 if len(entry_data) > 0:
505 505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 506 self._initempty = False
507 507 else:
508 508 header = new_header
509 509
510 510 self._format_flags = header & ~0xFFFF
511 511 self._format_version = header & 0xFFFF
512 512
513 513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 514 if supported_flags is None:
515 515 msg = _(b'unknown version (%d) in revlog %s')
516 516 msg %= (self._format_version, self.display_id)
517 517 raise error.RevlogError(msg)
518 518 elif self._format_flags & ~supported_flags:
519 519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 520 display_flag = self._format_flags >> 16
521 521 msg %= (display_flag, self._format_version, self.display_id)
522 522 raise error.RevlogError(msg)
523 523
524 524 features = FEATURES_BY_VERSION[self._format_version]
525 525 self._inline = features[b'inline'](self._format_flags)
526 526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 527 self.hassidedata = features[b'sidedata']
528 528
529 529 if not features[b'docket']:
530 530 self._indexfile = entry_point
531 531 index_data = entry_data
532 532 else:
533 533 self._docket_file = entry_point
534 534 if self._initempty:
535 535 self._docket = docketutil.default_docket(self, header)
536 536 else:
537 537 self._docket = docketutil.parse_docket(
538 538 self, entry_data, use_pending=self._trypending
539 539 )
540 540
541 541 if self._docket is not None:
542 542 self._indexfile = self._docket.index_filepath()
543 543 index_data = b''
544 544 index_size = self._docket.index_end
545 545 if index_size > 0:
546 546 index_data = self._get_data(
547 547 self._indexfile, mmapindexthreshold, size=index_size
548 548 )
549 549 if len(index_data) < index_size:
550 550 msg = _(b'too few index data for %s: got %d, expected %d')
551 551 msg %= (self.display_id, len(index_data), index_size)
552 552 raise error.RevlogError(msg)
553 553
554 554 self._inline = False
555 555 # generaldelta implied by version 2 revlogs.
556 556 self._generaldelta = True
557 557 # the logic for persistent nodemap will be dealt with within the
558 558 # main docket, so disable it for now.
559 559 self._nodemap_file = None
560 560
561 561 if self._docket is not None:
562 562 self._datafile = self._docket.data_filepath()
563 563 self._sidedatafile = self._docket.sidedata_filepath()
564 564 elif self.postfix is None:
565 565 self._datafile = b'%s.d' % self.radix
566 566 else:
567 567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568 568
569 569 self.nodeconstants = sha1nodeconstants
570 570 self.nullid = self.nodeconstants.nullid
571 571
572 572 # sparse-revlog can't be on without general-delta (issue6056)
573 573 if not self._generaldelta:
574 574 self._sparserevlog = False
575 575
576 576 self._storedeltachains = True
577 577
578 578 devel_nodemap = (
579 579 self._nodemap_file
580 580 and force_nodemap
581 581 and parse_index_v1_nodemap is not None
582 582 )
583 583
584 584 use_rust_index = False
585 585 if rustrevlog is not None:
586 586 if self._nodemap_file is not None:
587 587 use_rust_index = True
588 588 else:
589 589 use_rust_index = self.opener.options.get(b'rust.index')
590 590
591 591 self._parse_index = parse_index_v1
592 592 if self._format_version == REVLOGV0:
593 593 self._parse_index = revlogv0.parse_index_v0
594 594 elif self._format_version == REVLOGV2:
595 595 self._parse_index = parse_index_v2
596 596 elif self._format_version == CHANGELOGV2:
597 597 self._parse_index = parse_index_cl_v2
598 598 elif devel_nodemap:
599 599 self._parse_index = parse_index_v1_nodemap
600 600 elif use_rust_index:
601 601 self._parse_index = parse_index_v1_mixed
602 602 try:
603 603 d = self._parse_index(index_data, self._inline)
604 604 index, chunkcache = d
605 605 use_nodemap = (
606 606 not self._inline
607 607 and self._nodemap_file is not None
608 608 and util.safehasattr(index, 'update_nodemap_data')
609 609 )
610 610 if use_nodemap:
611 611 nodemap_data = nodemaputil.persisted_data(self)
612 612 if nodemap_data is not None:
613 613 docket = nodemap_data[0]
614 614 if (
615 615 len(d[0]) > docket.tip_rev
616 616 and d[0][docket.tip_rev][7] == docket.tip_node
617 617 ):
618 618 # no changelog tampering
619 619 self._nodemap_docket = docket
620 620 index.update_nodemap_data(*nodemap_data)
621 621 except (ValueError, IndexError):
622 622 raise error.RevlogError(
623 623 _(b"index %s is corrupted") % self.display_id
624 624 )
625 625 self.index = index
626 626 self._segmentfile = randomaccessfile.randomaccessfile(
627 627 self.opener,
628 628 (self._indexfile if self._inline else self._datafile),
629 629 self._chunkcachesize,
630 630 chunkcache,
631 631 )
632 632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 633 self.opener,
634 634 self._sidedatafile,
635 635 self._chunkcachesize,
636 636 )
637 637 # revnum -> (chain-length, sum-delta-length)
638 638 self._chaininfocache = util.lrucachedict(500)
639 639 # revlog header -> revlog compressor
640 640 self._decompressors = {}
641 641
642 642 @util.propertycache
643 643 def revlog_kind(self):
644 644 return self.target[0]
645 645
646 646 @util.propertycache
647 647 def display_id(self):
648 648 """The public facing "ID" of the revlog that we use in message"""
649 649 # Maybe we should build a user facing representation of
650 650 # revlog.target instead of using `self.radix`
651 651 return self.radix
652 652
653 653 def _get_decompressor(self, t):
654 654 try:
655 655 compressor = self._decompressors[t]
656 656 except KeyError:
657 657 try:
658 658 engine = util.compengines.forrevlogheader(t)
659 659 compressor = engine.revlogcompressor(self._compengineopts)
660 660 self._decompressors[t] = compressor
661 661 except KeyError:
662 662 raise error.RevlogError(
663 663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 664 )
665 665 return compressor
666 666
667 667 @util.propertycache
668 668 def _compressor(self):
669 669 engine = util.compengines[self._compengine]
670 670 return engine.revlogcompressor(self._compengineopts)
671 671
672 672 @util.propertycache
673 673 def _decompressor(self):
674 674 """the default decompressor"""
675 675 if self._docket is None:
676 676 return None
677 677 t = self._docket.default_compression_header
678 678 c = self._get_decompressor(t)
679 679 return c.decompress
680 680
681 681 def _indexfp(self):
682 682 """file object for the revlog's index file"""
683 683 return self.opener(self._indexfile, mode=b"r")
684 684
685 685 def __index_write_fp(self):
686 686 # You should not use this directly and use `_writing` instead
687 687 try:
688 688 f = self.opener(
689 689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 690 )
691 691 if self._docket is None:
692 692 f.seek(0, os.SEEK_END)
693 693 else:
694 694 f.seek(self._docket.index_end, os.SEEK_SET)
695 695 return f
696 696 except IOError as inst:
697 697 if inst.errno != errno.ENOENT:
698 698 raise
699 699 return self.opener(
700 700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 701 )
702 702
703 703 def __index_new_fp(self):
704 704 # You should not use this unless you are upgrading from inline revlog
705 705 return self.opener(
706 706 self._indexfile,
707 707 mode=b"w",
708 708 checkambig=self._checkambig,
709 709 atomictemp=True,
710 710 )
711 711
712 712 def _datafp(self, mode=b'r'):
713 713 """file object for the revlog's data file"""
714 714 return self.opener(self._datafile, mode=mode)
715 715
716 716 @contextlib.contextmanager
717 717 def _sidedatareadfp(self):
718 718 """file object suitable to read sidedata"""
719 719 if self._writinghandles:
720 720 yield self._writinghandles[2]
721 721 else:
722 722 with self.opener(self._sidedatafile) as fp:
723 723 yield fp
724 724
725 725 def tiprev(self):
726 726 return len(self.index) - 1
727 727
728 728 def tip(self):
729 729 return self.node(self.tiprev())
730 730
731 731 def __contains__(self, rev):
732 732 return 0 <= rev < len(self)
733 733
734 734 def __len__(self):
735 735 return len(self.index)
736 736
737 737 def __iter__(self):
738 738 return iter(pycompat.xrange(len(self)))
739 739
740 740 def revs(self, start=0, stop=None):
741 741 """iterate over all rev in this revlog (from start to stop)"""
742 742 return storageutil.iterrevs(len(self), start=start, stop=stop)
743 743
744 744 @property
745 745 def nodemap(self):
746 746 msg = (
747 747 b"revlog.nodemap is deprecated, "
748 748 b"use revlog.index.[has_node|rev|get_rev]"
749 749 )
750 750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 751 return self.index.nodemap
752 752
753 753 @property
754 754 def _nodecache(self):
755 755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
756 756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
757 757 return self.index.nodemap
758 758
759 759 def hasnode(self, node):
760 760 try:
761 761 self.rev(node)
762 762 return True
763 763 except KeyError:
764 764 return False
765 765
766 766 def candelta(self, baserev, rev):
767 767 """whether two revisions (baserev, rev) can be delta-ed or not"""
768 768 # Disable delta if either rev requires a content-changing flag
769 769 # processor (ex. LFS). This is because such flag processor can alter
770 770 # the rawtext content that the delta will be based on, and two clients
771 771 # could have a same revlog node with different flags (i.e. different
772 772 # rawtext contents) and the delta could be incompatible.
773 773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
774 774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
775 775 ):
776 776 return False
777 777 return True
778 778
779 779 def update_caches(self, transaction):
780 780 if self._nodemap_file is not None:
781 781 if transaction is None:
782 782 nodemaputil.update_persistent_nodemap(self)
783 783 else:
784 784 nodemaputil.setup_persistent_nodemap(transaction, self)
785 785
786 786 def clearcaches(self):
787 787 self._revisioncache = None
788 788 self._chainbasecache.clear()
789 789 self._segmentfile.clear_cache()
790 790 self._segmentfile_sidedata.clear_cache()
791 791 self._pcache = {}
792 792 self._nodemap_docket = None
793 793 self.index.clearcaches()
794 794 # The python code is the one responsible for validating the docket, we
795 795 # end up having to refresh it here.
796 796 use_nodemap = (
797 797 not self._inline
798 798 and self._nodemap_file is not None
799 799 and util.safehasattr(self.index, 'update_nodemap_data')
800 800 )
801 801 if use_nodemap:
802 802 nodemap_data = nodemaputil.persisted_data(self)
803 803 if nodemap_data is not None:
804 804 self._nodemap_docket = nodemap_data[0]
805 805 self.index.update_nodemap_data(*nodemap_data)
806 806
807 807 def rev(self, node):
808 808 try:
809 809 return self.index.rev(node)
810 810 except TypeError:
811 811 raise
812 812 except error.RevlogError:
813 813 # parsers.c radix tree lookup failed
814 814 if (
815 815 node == self.nodeconstants.wdirid
816 816 or node in self.nodeconstants.wdirfilenodeids
817 817 ):
818 818 raise error.WdirUnsupported
819 819 raise error.LookupError(node, self.display_id, _(b'no node'))
820 820
821 821 # Accessors for index entries.
822 822
823 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 824 # are flags.
825 825 def start(self, rev):
826 826 return int(self.index[rev][0] >> 16)
827 827
828 828 def sidedata_cut_off(self, rev):
829 829 sd_cut_off = self.index[rev][8]
830 830 if sd_cut_off != 0:
831 831 return sd_cut_off
832 832 # This is some annoying dance, because entries without sidedata
833 833 # currently use 0 as their ofsset. (instead of previous-offset +
834 834 # previous-size)
835 835 #
836 836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
837 837 # In the meantime, we need this.
838 838 while 0 <= rev:
839 839 e = self.index[rev]
840 840 if e[9] != 0:
841 841 return e[8] + e[9]
842 842 rev -= 1
843 843 return 0
844 844
845 845 def flags(self, rev):
846 846 return self.index[rev][0] & 0xFFFF
847 847
848 848 def length(self, rev):
849 849 return self.index[rev][1]
850 850
851 851 def sidedata_length(self, rev):
852 852 if not self.hassidedata:
853 853 return 0
854 854 return self.index[rev][9]
855 855
856 856 def rawsize(self, rev):
857 857 """return the length of the uncompressed text for a given revision"""
858 858 l = self.index[rev][2]
859 859 if l >= 0:
860 860 return l
861 861
862 862 t = self.rawdata(rev)
863 863 return len(t)
864 864
865 865 def size(self, rev):
866 866 """length of non-raw text (processed by a "read" flag processor)"""
867 867 # fast path: if no "read" flag processor could change the content,
868 868 # size is rawsize. note: ELLIPSIS is known to not change the content.
869 869 flags = self.flags(rev)
870 870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871 871 return self.rawsize(rev)
872 872
873 873 return len(self.revision(rev, raw=False))
874 874
875 875 def chainbase(self, rev):
876 876 base = self._chainbasecache.get(rev)
877 877 if base is not None:
878 878 return base
879 879
880 880 index = self.index
881 881 iterrev = rev
882 882 base = index[iterrev][3]
883 883 while base != iterrev:
884 884 iterrev = base
885 885 base = index[iterrev][3]
886 886
887 887 self._chainbasecache[rev] = base
888 888 return base
889 889
890 890 def linkrev(self, rev):
891 891 return self.index[rev][4]
892 892
893 893 def parentrevs(self, rev):
894 894 try:
895 895 entry = self.index[rev]
896 896 except IndexError:
897 897 if rev == wdirrev:
898 898 raise error.WdirUnsupported
899 899 raise
900 900
901 901 return entry[5], entry[6]
902 902
903 903 # fast parentrevs(rev) where rev isn't filtered
904 904 _uncheckedparentrevs = parentrevs
905 905
906 906 def node(self, rev):
907 907 try:
908 908 return self.index[rev][7]
909 909 except IndexError:
910 910 if rev == wdirrev:
911 911 raise error.WdirUnsupported
912 912 raise
913 913
914 914 # Derived from index values.
915 915
916 916 def end(self, rev):
917 917 return self.start(rev) + self.length(rev)
918 918
919 919 def parents(self, node):
920 920 i = self.index
921 921 d = i[self.rev(node)]
922 922 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
923 923
924 924 def chainlen(self, rev):
925 925 return self._chaininfo(rev)[0]
926 926
927 927 def _chaininfo(self, rev):
928 928 chaininfocache = self._chaininfocache
929 929 if rev in chaininfocache:
930 930 return chaininfocache[rev]
931 931 index = self.index
932 932 generaldelta = self._generaldelta
933 933 iterrev = rev
934 934 e = index[iterrev]
935 935 clen = 0
936 936 compresseddeltalen = 0
937 937 while iterrev != e[3]:
938 938 clen += 1
939 939 compresseddeltalen += e[1]
940 940 if generaldelta:
941 941 iterrev = e[3]
942 942 else:
943 943 iterrev -= 1
944 944 if iterrev in chaininfocache:
945 945 t = chaininfocache[iterrev]
946 946 clen += t[0]
947 947 compresseddeltalen += t[1]
948 948 break
949 949 e = index[iterrev]
950 950 else:
951 951 # Add text length of base since decompressing that also takes
952 952 # work. For cache hits the length is already included.
953 953 compresseddeltalen += e[1]
954 954 r = (clen, compresseddeltalen)
955 955 chaininfocache[rev] = r
956 956 return r
957 957
958 958 def _deltachain(self, rev, stoprev=None):
959 959 """Obtain the delta chain for a revision.
960 960
961 961 ``stoprev`` specifies a revision to stop at. If not specified, we
962 962 stop at the base of the chain.
963 963
964 964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 965 revs in ascending order and ``stopped`` is a bool indicating whether
966 966 ``stoprev`` was hit.
967 967 """
968 968 # Try C implementation.
969 969 try:
970 970 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 971 except AttributeError:
972 972 pass
973 973
974 974 chain = []
975 975
976 976 # Alias to prevent attribute lookup in tight loop.
977 977 index = self.index
978 978 generaldelta = self._generaldelta
979 979
980 980 iterrev = rev
981 981 e = index[iterrev]
982 982 while iterrev != e[3] and iterrev != stoprev:
983 983 chain.append(iterrev)
984 984 if generaldelta:
985 985 iterrev = e[3]
986 986 else:
987 987 iterrev -= 1
988 988 e = index[iterrev]
989 989
990 990 if iterrev == stoprev:
991 991 stopped = True
992 992 else:
993 993 chain.append(iterrev)
994 994 stopped = False
995 995
996 996 chain.reverse()
997 997 return chain, stopped
998 998
999 999 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 1000 """Generate the ancestors of 'revs' in reverse revision order.
1001 1001 Does not generate revs lower than stoprev.
1002 1002
1003 1003 See the documentation for ancestor.lazyancestors for more details."""
1004 1004
1005 1005 # first, make sure start revisions aren't filtered
1006 1006 revs = list(revs)
1007 1007 checkrev = self.node
1008 1008 for r in revs:
1009 1009 checkrev(r)
1010 1010 # and we're sure ancestors aren't filtered as well
1011 1011
1012 1012 if rustancestor is not None and self.index.rust_ext_compat:
1013 1013 lazyancestors = rustancestor.LazyAncestors
1014 1014 arg = self.index
1015 1015 else:
1016 1016 lazyancestors = ancestor.lazyancestors
1017 1017 arg = self._uncheckedparentrevs
1018 1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019 1019
1020 1020 def descendants(self, revs):
1021 1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022 1022
1023 1023 def findcommonmissing(self, common=None, heads=None):
1024 1024 """Return a tuple of the ancestors of common and the ancestors of heads
1025 1025 that are not ancestors of common. In revset terminology, we return the
1026 1026 tuple:
1027 1027
1028 1028 ::common, (::heads) - (::common)
1029 1029
1030 1030 The list is sorted by revision number, meaning it is
1031 1031 topologically sorted.
1032 1032
1033 1033 'heads' and 'common' are both lists of node IDs. If heads is
1034 1034 not supplied, uses all of the revlog's heads. If common is not
1035 1035 supplied, uses nullid."""
1036 1036 if common is None:
1037 1037 common = [self.nullid]
1038 1038 if heads is None:
1039 1039 heads = self.heads()
1040 1040
1041 1041 common = [self.rev(n) for n in common]
1042 1042 heads = [self.rev(n) for n in heads]
1043 1043
1044 1044 # we want the ancestors, but inclusive
1045 1045 class lazyset(object):
1046 1046 def __init__(self, lazyvalues):
1047 1047 self.addedvalues = set()
1048 1048 self.lazyvalues = lazyvalues
1049 1049
1050 1050 def __contains__(self, value):
1051 1051 return value in self.addedvalues or value in self.lazyvalues
1052 1052
1053 1053 def __iter__(self):
1054 1054 added = self.addedvalues
1055 1055 for r in added:
1056 1056 yield r
1057 1057 for r in self.lazyvalues:
1058 1058 if not r in added:
1059 1059 yield r
1060 1060
1061 1061 def add(self, value):
1062 1062 self.addedvalues.add(value)
1063 1063
1064 1064 def update(self, values):
1065 1065 self.addedvalues.update(values)
1066 1066
1067 1067 has = lazyset(self.ancestors(common))
1068 1068 has.add(nullrev)
1069 1069 has.update(common)
1070 1070
1071 1071 # take all ancestors from heads that aren't in has
1072 1072 missing = set()
1073 1073 visit = collections.deque(r for r in heads if r not in has)
1074 1074 while visit:
1075 1075 r = visit.popleft()
1076 1076 if r in missing:
1077 1077 continue
1078 1078 else:
1079 1079 missing.add(r)
1080 1080 for p in self.parentrevs(r):
1081 1081 if p not in has:
1082 1082 visit.append(p)
1083 1083 missing = list(missing)
1084 1084 missing.sort()
1085 1085 return has, [self.node(miss) for miss in missing]
1086 1086
1087 1087 def incrementalmissingrevs(self, common=None):
1088 1088 """Return an object that can be used to incrementally compute the
1089 1089 revision numbers of the ancestors of arbitrary sets that are not
1090 1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 1091 object.
1092 1092
1093 1093 'common' is a list of revision numbers. If common is not supplied, uses
1094 1094 nullrev.
1095 1095 """
1096 1096 if common is None:
1097 1097 common = [nullrev]
1098 1098
1099 1099 if rustancestor is not None and self.index.rust_ext_compat:
1100 1100 return rustancestor.MissingAncestors(self.index, common)
1101 1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102 1102
1103 1103 def findmissingrevs(self, common=None, heads=None):
1104 1104 """Return the revision numbers of the ancestors of heads that
1105 1105 are not ancestors of common.
1106 1106
1107 1107 More specifically, return a list of revision numbers corresponding to
1108 1108 nodes N such that every N satisfies the following constraints:
1109 1109
1110 1110 1. N is an ancestor of some node in 'heads'
1111 1111 2. N is not an ancestor of any node in 'common'
1112 1112
1113 1113 The list is sorted by revision number, meaning it is
1114 1114 topologically sorted.
1115 1115
1116 1116 'heads' and 'common' are both lists of revision numbers. If heads is
1117 1117 not supplied, uses all of the revlog's heads. If common is not
1118 1118 supplied, uses nullid."""
1119 1119 if common is None:
1120 1120 common = [nullrev]
1121 1121 if heads is None:
1122 1122 heads = self.headrevs()
1123 1123
1124 1124 inc = self.incrementalmissingrevs(common=common)
1125 1125 return inc.missingancestors(heads)
1126 1126
1127 1127 def findmissing(self, common=None, heads=None):
1128 1128 """Return the ancestors of heads that are not ancestors of common.
1129 1129
1130 1130 More specifically, return a list of nodes N such that every N
1131 1131 satisfies the following constraints:
1132 1132
1133 1133 1. N is an ancestor of some node in 'heads'
1134 1134 2. N is not an ancestor of any node in 'common'
1135 1135
1136 1136 The list is sorted by revision number, meaning it is
1137 1137 topologically sorted.
1138 1138
1139 1139 'heads' and 'common' are both lists of node IDs. If heads is
1140 1140 not supplied, uses all of the revlog's heads. If common is not
1141 1141 supplied, uses nullid."""
1142 1142 if common is None:
1143 1143 common = [self.nullid]
1144 1144 if heads is None:
1145 1145 heads = self.heads()
1146 1146
1147 1147 common = [self.rev(n) for n in common]
1148 1148 heads = [self.rev(n) for n in heads]
1149 1149
1150 1150 inc = self.incrementalmissingrevs(common=common)
1151 1151 return [self.node(r) for r in inc.missingancestors(heads)]
1152 1152
1153 1153 def nodesbetween(self, roots=None, heads=None):
1154 1154 """Return a topological path from 'roots' to 'heads'.
1155 1155
1156 1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 1157 topologically sorted list of all nodes N that satisfy both of
1158 1158 these constraints:
1159 1159
1160 1160 1. N is a descendant of some node in 'roots'
1161 1161 2. N is an ancestor of some node in 'heads'
1162 1162
1163 1163 Every node is considered to be both a descendant and an ancestor
1164 1164 of itself, so every reachable node in 'roots' and 'heads' will be
1165 1165 included in 'nodes'.
1166 1166
1167 1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170 1170
1171 1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 1172 unspecified, uses nullid as the only root. If 'heads' is
1173 1173 unspecified, uses list of all of the revlog's heads."""
1174 1174 nonodes = ([], [], [])
1175 1175 if roots is not None:
1176 1176 roots = list(roots)
1177 1177 if not roots:
1178 1178 return nonodes
1179 1179 lowestrev = min([self.rev(n) for n in roots])
1180 1180 else:
1181 1181 roots = [self.nullid] # Everybody's a descendant of nullid
1182 1182 lowestrev = nullrev
1183 1183 if (lowestrev == nullrev) and (heads is None):
1184 1184 # We want _all_ the nodes!
1185 1185 return (
1186 1186 [self.node(r) for r in self],
1187 1187 [self.nullid],
1188 1188 list(self.heads()),
1189 1189 )
1190 1190 if heads is None:
1191 1191 # All nodes are ancestors, so the latest ancestor is the last
1192 1192 # node.
1193 1193 highestrev = len(self) - 1
1194 1194 # Set ancestors to None to signal that every node is an ancestor.
1195 1195 ancestors = None
1196 1196 # Set heads to an empty dictionary for later discovery of heads
1197 1197 heads = {}
1198 1198 else:
1199 1199 heads = list(heads)
1200 1200 if not heads:
1201 1201 return nonodes
1202 1202 ancestors = set()
1203 1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1204 1204 # Also, later we will be using it to filter out the heads we can't
1205 1205 # find from roots.
1206 1206 heads = dict.fromkeys(heads, False)
1207 1207 # Start at the top and keep marking parents until we're done.
1208 1208 nodestotag = set(heads)
1209 1209 # Remember where the top was so we can use it as a limit later.
1210 1210 highestrev = max([self.rev(n) for n in nodestotag])
1211 1211 while nodestotag:
1212 1212 # grab a node to tag
1213 1213 n = nodestotag.pop()
1214 1214 # Never tag nullid
1215 1215 if n == self.nullid:
1216 1216 continue
1217 1217 # A node's revision number represents its place in a
1218 1218 # topologically sorted list of nodes.
1219 1219 r = self.rev(n)
1220 1220 if r >= lowestrev:
1221 1221 if n not in ancestors:
1222 1222 # If we are possibly a descendant of one of the roots
1223 1223 # and we haven't already been marked as an ancestor
1224 1224 ancestors.add(n) # Mark as ancestor
1225 1225 # Add non-nullid parents to list of nodes to tag.
1226 1226 nodestotag.update(
1227 1227 [p for p in self.parents(n) if p != self.nullid]
1228 1228 )
1229 1229 elif n in heads: # We've seen it before, is it a fake head?
1230 1230 # So it is, real heads should not be the ancestors of
1231 1231 # any other heads.
1232 1232 heads.pop(n)
1233 1233 if not ancestors:
1234 1234 return nonodes
1235 1235 # Now that we have our set of ancestors, we want to remove any
1236 1236 # roots that are not ancestors.
1237 1237
1238 1238 # If one of the roots was nullid, everything is included anyway.
1239 1239 if lowestrev > nullrev:
1240 1240 # But, since we weren't, let's recompute the lowest rev to not
1241 1241 # include roots that aren't ancestors.
1242 1242
1243 1243 # Filter out roots that aren't ancestors of heads
1244 1244 roots = [root for root in roots if root in ancestors]
1245 1245 # Recompute the lowest revision
1246 1246 if roots:
1247 1247 lowestrev = min([self.rev(root) for root in roots])
1248 1248 else:
1249 1249 # No more roots? Return empty list
1250 1250 return nonodes
1251 1251 else:
1252 1252 # We are descending from nullid, and don't need to care about
1253 1253 # any other roots.
1254 1254 lowestrev = nullrev
1255 1255 roots = [self.nullid]
1256 1256 # Transform our roots list into a set.
1257 1257 descendants = set(roots)
1258 1258 # Also, keep the original roots so we can filter out roots that aren't
1259 1259 # 'real' roots (i.e. are descended from other roots).
1260 1260 roots = descendants.copy()
1261 1261 # Our topologically sorted list of output nodes.
1262 1262 orderedout = []
1263 1263 # Don't start at nullid since we don't want nullid in our output list,
1264 1264 # and if nullid shows up in descendants, empty parents will look like
1265 1265 # they're descendants.
1266 1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1267 1267 n = self.node(r)
1268 1268 isdescendant = False
1269 1269 if lowestrev == nullrev: # Everybody is a descendant of nullid
1270 1270 isdescendant = True
1271 1271 elif n in descendants:
1272 1272 # n is already a descendant
1273 1273 isdescendant = True
1274 1274 # This check only needs to be done here because all the roots
1275 1275 # will start being marked is descendants before the loop.
1276 1276 if n in roots:
1277 1277 # If n was a root, check if it's a 'real' root.
1278 1278 p = tuple(self.parents(n))
1279 1279 # If any of its parents are descendants, it's not a root.
1280 1280 if (p[0] in descendants) or (p[1] in descendants):
1281 1281 roots.remove(n)
1282 1282 else:
1283 1283 p = tuple(self.parents(n))
1284 1284 # A node is a descendant if either of its parents are
1285 1285 # descendants. (We seeded the dependents list with the roots
1286 1286 # up there, remember?)
1287 1287 if (p[0] in descendants) or (p[1] in descendants):
1288 1288 descendants.add(n)
1289 1289 isdescendant = True
1290 1290 if isdescendant and ((ancestors is None) or (n in ancestors)):
1291 1291 # Only include nodes that are both descendants and ancestors.
1292 1292 orderedout.append(n)
1293 1293 if (ancestors is not None) and (n in heads):
1294 1294 # We're trying to figure out which heads are reachable
1295 1295 # from roots.
1296 1296 # Mark this head as having been reached
1297 1297 heads[n] = True
1298 1298 elif ancestors is None:
1299 1299 # Otherwise, we're trying to discover the heads.
1300 1300 # Assume this is a head because if it isn't, the next step
1301 1301 # will eventually remove it.
1302 1302 heads[n] = True
1303 1303 # But, obviously its parents aren't.
1304 1304 for p in self.parents(n):
1305 1305 heads.pop(p, None)
1306 1306 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1307 1307 roots = list(roots)
1308 1308 assert orderedout
1309 1309 assert roots
1310 1310 assert heads
1311 1311 return (orderedout, roots, heads)
1312 1312
1313 1313 def headrevs(self, revs=None):
1314 1314 if revs is None:
1315 1315 try:
1316 1316 return self.index.headrevs()
1317 1317 except AttributeError:
1318 1318 return self._headrevs()
1319 1319 if rustdagop is not None and self.index.rust_ext_compat:
1320 1320 return rustdagop.headrevs(self.index, revs)
1321 1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1322 1322
1323 1323 def computephases(self, roots):
1324 1324 return self.index.computephasesmapsets(roots)
1325 1325
1326 1326 def _headrevs(self):
1327 1327 count = len(self)
1328 1328 if not count:
1329 1329 return [nullrev]
1330 1330 # we won't iter over filtered rev so nobody is a head at start
1331 1331 ishead = [0] * (count + 1)
1332 1332 index = self.index
1333 1333 for r in self:
1334 1334 ishead[r] = 1 # I may be an head
1335 1335 e = index[r]
1336 1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1337 1337 return [r for r, val in enumerate(ishead) if val]
1338 1338
1339 1339 def heads(self, start=None, stop=None):
1340 1340 """return the list of all nodes that have no children
1341 1341
1342 1342 if start is specified, only heads that are descendants of
1343 1343 start will be returned
1344 1344 if stop is specified, it will consider all the revs from stop
1345 1345 as if they had no children
1346 1346 """
1347 1347 if start is None and stop is None:
1348 1348 if not len(self):
1349 1349 return [self.nullid]
1350 1350 return [self.node(r) for r in self.headrevs()]
1351 1351
1352 1352 if start is None:
1353 1353 start = nullrev
1354 1354 else:
1355 1355 start = self.rev(start)
1356 1356
1357 1357 stoprevs = {self.rev(n) for n in stop or []}
1358 1358
1359 1359 revs = dagop.headrevssubset(
1360 1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1361 1361 )
1362 1362
1363 1363 return [self.node(rev) for rev in revs]
1364 1364
1365 1365 def children(self, node):
1366 1366 """find the children of a given node"""
1367 1367 c = []
1368 1368 p = self.rev(node)
1369 1369 for r in self.revs(start=p + 1):
1370 1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1371 1371 if prevs:
1372 1372 for pr in prevs:
1373 1373 if pr == p:
1374 1374 c.append(self.node(r))
1375 1375 elif p == nullrev:
1376 1376 c.append(self.node(r))
1377 1377 return c
1378 1378
1379 1379 def commonancestorsheads(self, a, b):
1380 1380 """calculate all the heads of the common ancestors of nodes a and b"""
1381 1381 a, b = self.rev(a), self.rev(b)
1382 1382 ancs = self._commonancestorsheads(a, b)
1383 1383 return pycompat.maplist(self.node, ancs)
1384 1384
1385 1385 def _commonancestorsheads(self, *revs):
1386 1386 """calculate all the heads of the common ancestors of revs"""
1387 1387 try:
1388 1388 ancs = self.index.commonancestorsheads(*revs)
1389 1389 except (AttributeError, OverflowError): # C implementation failed
1390 1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1391 1391 return ancs
1392 1392
1393 1393 def isancestor(self, a, b):
1394 1394 """return True if node a is an ancestor of node b
1395 1395
1396 1396 A revision is considered an ancestor of itself."""
1397 1397 a, b = self.rev(a), self.rev(b)
1398 1398 return self.isancestorrev(a, b)
1399 1399
1400 1400 def isancestorrev(self, a, b):
1401 1401 """return True if revision a is an ancestor of revision b
1402 1402
1403 1403 A revision is considered an ancestor of itself.
1404 1404
1405 1405 The implementation of this is trivial but the use of
1406 1406 reachableroots is not."""
1407 1407 if a == nullrev:
1408 1408 return True
1409 1409 elif a == b:
1410 1410 return True
1411 1411 elif a > b:
1412 1412 return False
1413 1413 return bool(self.reachableroots(a, [b], [a], includepath=False))
1414 1414
1415 1415 def reachableroots(self, minroot, heads, roots, includepath=False):
1416 1416 """return (heads(::(<roots> and <roots>::<heads>)))
1417 1417
1418 1418 If includepath is True, return (<roots>::<heads>)."""
1419 1419 try:
1420 1420 return self.index.reachableroots2(
1421 1421 minroot, heads, roots, includepath
1422 1422 )
1423 1423 except AttributeError:
1424 1424 return dagop._reachablerootspure(
1425 1425 self.parentrevs, minroot, roots, heads, includepath
1426 1426 )
1427 1427
1428 1428 def ancestor(self, a, b):
1429 1429 """calculate the "best" common ancestor of nodes a and b"""
1430 1430
1431 1431 a, b = self.rev(a), self.rev(b)
1432 1432 try:
1433 1433 ancs = self.index.ancestors(a, b)
1434 1434 except (AttributeError, OverflowError):
1435 1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1436 1436 if ancs:
1437 1437 # choose a consistent winner when there's a tie
1438 1438 return min(map(self.node, ancs))
1439 1439 return self.nullid
1440 1440
1441 1441 def _match(self, id):
1442 1442 if isinstance(id, int):
1443 1443 # rev
1444 1444 return self.node(id)
1445 1445 if len(id) == self.nodeconstants.nodelen:
1446 1446 # possibly a binary node
1447 1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1448 1448 try:
1449 1449 node = id
1450 1450 self.rev(node) # quick search the index
1451 1451 return node
1452 1452 except error.LookupError:
1453 1453 pass # may be partial hex id
1454 1454 try:
1455 1455 # str(rev)
1456 1456 rev = int(id)
1457 1457 if b"%d" % rev != id:
1458 1458 raise ValueError
1459 1459 if rev < 0:
1460 1460 rev = len(self) + rev
1461 1461 if rev < 0 or rev >= len(self):
1462 1462 raise ValueError
1463 1463 return self.node(rev)
1464 1464 except (ValueError, OverflowError):
1465 1465 pass
1466 1466 if len(id) == 2 * self.nodeconstants.nodelen:
1467 1467 try:
1468 1468 # a full hex nodeid?
1469 1469 node = bin(id)
1470 1470 self.rev(node)
1471 1471 return node
1472 1472 except (TypeError, error.LookupError):
1473 1473 pass
1474 1474
1475 1475 def _partialmatch(self, id):
1476 1476 # we don't care wdirfilenodeids as they should be always full hash
1477 1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1478 1478 ambiguous = False
1479 1479 try:
1480 1480 partial = self.index.partialmatch(id)
1481 1481 if partial and self.hasnode(partial):
1482 1482 if maybewdir:
1483 1483 # single 'ff...' match in radix tree, ambiguous with wdir
1484 1484 ambiguous = True
1485 1485 else:
1486 1486 return partial
1487 1487 elif maybewdir:
1488 1488 # no 'ff...' match in radix tree, wdir identified
1489 1489 raise error.WdirUnsupported
1490 1490 else:
1491 1491 return None
1492 1492 except error.RevlogError:
1493 1493 # parsers.c radix tree lookup gave multiple matches
1494 1494 # fast path: for unfiltered changelog, radix tree is accurate
1495 1495 if not getattr(self, 'filteredrevs', None):
1496 1496 ambiguous = True
1497 1497 # fall through to slow path that filters hidden revisions
1498 1498 except (AttributeError, ValueError):
1499 1499 # we are pure python, or key was too short to search radix tree
1500 1500 pass
1501 1501 if ambiguous:
1502 1502 raise error.AmbiguousPrefixLookupError(
1503 1503 id, self.display_id, _(b'ambiguous identifier')
1504 1504 )
1505 1505
1506 1506 if id in self._pcache:
1507 1507 return self._pcache[id]
1508 1508
1509 1509 if len(id) <= 40:
1510 1510 try:
1511 1511 # hex(node)[:...]
1512 1512 l = len(id) // 2 # grab an even number of digits
1513 1513 prefix = bin(id[: l * 2])
1514 1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1515 1515 nl = [
1516 1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1517 1517 ]
1518 1518 if self.nodeconstants.nullhex.startswith(id):
1519 1519 nl.append(self.nullid)
1520 1520 if len(nl) > 0:
1521 1521 if len(nl) == 1 and not maybewdir:
1522 1522 self._pcache[id] = nl[0]
1523 1523 return nl[0]
1524 1524 raise error.AmbiguousPrefixLookupError(
1525 1525 id, self.display_id, _(b'ambiguous identifier')
1526 1526 )
1527 1527 if maybewdir:
1528 1528 raise error.WdirUnsupported
1529 1529 return None
1530 1530 except TypeError:
1531 1531 pass
1532 1532
1533 1533 def lookup(self, id):
1534 1534 """locate a node based on:
1535 1535 - revision number or str(revision number)
1536 1536 - nodeid or subset of hex nodeid
1537 1537 """
1538 1538 n = self._match(id)
1539 1539 if n is not None:
1540 1540 return n
1541 1541 n = self._partialmatch(id)
1542 1542 if n:
1543 1543 return n
1544 1544
1545 1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1546 1546
1547 1547 def shortest(self, node, minlength=1):
1548 1548 """Find the shortest unambiguous prefix that matches node."""
1549 1549
1550 1550 def isvalid(prefix):
1551 1551 try:
1552 1552 matchednode = self._partialmatch(prefix)
1553 1553 except error.AmbiguousPrefixLookupError:
1554 1554 return False
1555 1555 except error.WdirUnsupported:
1556 1556 # single 'ff...' match
1557 1557 return True
1558 1558 if matchednode is None:
1559 1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1560 1560 return True
1561 1561
1562 1562 def maybewdir(prefix):
1563 1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1564 1564
1565 1565 hexnode = hex(node)
1566 1566
1567 1567 def disambiguate(hexnode, minlength):
1568 1568 """Disambiguate against wdirid."""
1569 1569 for length in range(minlength, len(hexnode) + 1):
1570 1570 prefix = hexnode[:length]
1571 1571 if not maybewdir(prefix):
1572 1572 return prefix
1573 1573
1574 1574 if not getattr(self, 'filteredrevs', None):
1575 1575 try:
1576 1576 length = max(self.index.shortest(node), minlength)
1577 1577 return disambiguate(hexnode, length)
1578 1578 except error.RevlogError:
1579 1579 if node != self.nodeconstants.wdirid:
1580 1580 raise error.LookupError(
1581 1581 node, self.display_id, _(b'no node')
1582 1582 )
1583 1583 except AttributeError:
1584 1584 # Fall through to pure code
1585 1585 pass
1586 1586
1587 1587 if node == self.nodeconstants.wdirid:
1588 1588 for length in range(minlength, len(hexnode) + 1):
1589 1589 prefix = hexnode[:length]
1590 1590 if isvalid(prefix):
1591 1591 return prefix
1592 1592
1593 1593 for length in range(minlength, len(hexnode) + 1):
1594 1594 prefix = hexnode[:length]
1595 1595 if isvalid(prefix):
1596 1596 return disambiguate(hexnode, length)
1597 1597
1598 1598 def cmp(self, node, text):
1599 1599 """compare text with a given file revision
1600 1600
1601 1601 returns True if text is different than what is stored.
1602 1602 """
1603 1603 p1, p2 = self.parents(node)
1604 1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1605 1605
1606 1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 1607 """Obtain a segment of raw data corresponding to a range of revisions.
1608 1608
1609 1609 Accepts the start and end revisions and an optional already-open
1610 1610 file handle to be used for reading. If the file handle is read, its
1611 1611 seek position will not be preserved.
1612 1612
1613 1613 Requests for data may be satisfied by a cache.
1614 1614
1615 1615 Returns a 2-tuple of (offset, data) for the requested range of
1616 1616 revisions. Offset is the integer offset from the beginning of the
1617 1617 revlog and data is a str or buffer of the raw byte data.
1618 1618
1619 1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 1620 to determine where each revision's data begins and ends.
1621 1621 """
1622 1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 1623 # (functions are expensive).
1624 1624 index = self.index
1625 1625 istart = index[startrev]
1626 1626 start = int(istart[0] >> 16)
1627 1627 if startrev == endrev:
1628 1628 end = start + istart[1]
1629 1629 else:
1630 1630 iend = index[endrev]
1631 1631 end = int(iend[0] >> 16) + iend[1]
1632 1632
1633 1633 if self._inline:
1634 1634 start += (startrev + 1) * self.index.entry_size
1635 1635 end += (endrev + 1) * self.index.entry_size
1636 1636 length = end - start
1637 1637
1638 1638 return start, self._segmentfile.read_chunk(start, length, df)
1639 1639
1640 1640 def _chunk(self, rev, df=None):
1641 1641 """Obtain a single decompressed chunk for a revision.
1642 1642
1643 1643 Accepts an integer revision and an optional already-open file handle
1644 1644 to be used for reading. If used, the seek position of the file will not
1645 1645 be preserved.
1646 1646
1647 1647 Returns a str holding uncompressed data for the requested revision.
1648 1648 """
1649 1649 compression_mode = self.index[rev][10]
1650 1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1651 1651 if compression_mode == COMP_MODE_PLAIN:
1652 1652 return data
1653 1653 elif compression_mode == COMP_MODE_DEFAULT:
1654 1654 return self._decompressor(data)
1655 1655 elif compression_mode == COMP_MODE_INLINE:
1656 1656 return self.decompress(data)
1657 1657 else:
1658 1658 msg = b'unknown compression mode %d'
1659 1659 msg %= compression_mode
1660 1660 raise error.RevlogError(msg)
1661 1661
1662 1662 def _chunks(self, revs, df=None, targetsize=None):
1663 1663 """Obtain decompressed chunks for the specified revisions.
1664 1664
1665 1665 Accepts an iterable of numeric revisions that are assumed to be in
1666 1666 ascending order. Also accepts an optional already-open file handle
1667 1667 to be used for reading. If used, the seek position of the file will
1668 1668 not be preserved.
1669 1669
1670 1670 This function is similar to calling ``self._chunk()`` multiple times,
1671 1671 but is faster.
1672 1672
1673 1673 Returns a list with decompressed data for each requested revision.
1674 1674 """
1675 1675 if not revs:
1676 1676 return []
1677 1677 start = self.start
1678 1678 length = self.length
1679 1679 inline = self._inline
1680 1680 iosize = self.index.entry_size
1681 1681 buffer = util.buffer
1682 1682
1683 1683 l = []
1684 1684 ladd = l.append
1685 1685
1686 1686 if not self._withsparseread:
1687 1687 slicedchunks = (revs,)
1688 1688 else:
1689 1689 slicedchunks = deltautil.slicechunk(
1690 1690 self, revs, targetsize=targetsize
1691 1691 )
1692 1692
1693 1693 for revschunk in slicedchunks:
1694 1694 firstrev = revschunk[0]
1695 1695 # Skip trailing revisions with empty diff
1696 1696 for lastrev in revschunk[::-1]:
1697 1697 if length(lastrev) != 0:
1698 1698 break
1699 1699
1700 1700 try:
1701 1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1702 1702 except OverflowError:
1703 1703 # issue4215 - we can't cache a run of chunks greater than
1704 1704 # 2G on Windows
1705 1705 return [self._chunk(rev, df=df) for rev in revschunk]
1706 1706
1707 1707 decomp = self.decompress
1708 1708 # self._decompressor might be None, but will not be used in that case
1709 1709 def_decomp = self._decompressor
1710 1710 for rev in revschunk:
1711 1711 chunkstart = start(rev)
1712 1712 if inline:
1713 1713 chunkstart += (rev + 1) * iosize
1714 1714 chunklength = length(rev)
1715 1715 comp_mode = self.index[rev][10]
1716 1716 c = buffer(data, chunkstart - offset, chunklength)
1717 1717 if comp_mode == COMP_MODE_PLAIN:
1718 1718 ladd(c)
1719 1719 elif comp_mode == COMP_MODE_INLINE:
1720 1720 ladd(decomp(c))
1721 1721 elif comp_mode == COMP_MODE_DEFAULT:
1722 1722 ladd(def_decomp(c))
1723 1723 else:
1724 1724 msg = b'unknown compression mode %d'
1725 1725 msg %= comp_mode
1726 1726 raise error.RevlogError(msg)
1727 1727
1728 1728 return l
1729 1729
1730 1730 def deltaparent(self, rev):
1731 1731 """return deltaparent of the given revision"""
1732 1732 base = self.index[rev][3]
1733 1733 if base == rev:
1734 1734 return nullrev
1735 1735 elif self._generaldelta:
1736 1736 return base
1737 1737 else:
1738 1738 return rev - 1
1739 1739
1740 1740 def issnapshot(self, rev):
1741 1741 """tells whether rev is a snapshot"""
1742 1742 if not self._sparserevlog:
1743 1743 return self.deltaparent(rev) == nullrev
1744 1744 elif util.safehasattr(self.index, b'issnapshot'):
1745 1745 # directly assign the method to cache the testing and access
1746 1746 self.issnapshot = self.index.issnapshot
1747 1747 return self.issnapshot(rev)
1748 1748 if rev == nullrev:
1749 1749 return True
1750 1750 entry = self.index[rev]
1751 1751 base = entry[3]
1752 1752 if base == rev:
1753 1753 return True
1754 1754 if base == nullrev:
1755 1755 return True
1756 1756 p1 = entry[5]
1757 1757 p2 = entry[6]
1758 1758 if base == p1 or base == p2:
1759 1759 return False
1760 1760 return self.issnapshot(base)
1761 1761
1762 1762 def snapshotdepth(self, rev):
1763 1763 """number of snapshot in the chain before this one"""
1764 1764 if not self.issnapshot(rev):
1765 1765 raise error.ProgrammingError(b'revision %d not a snapshot')
1766 1766 return len(self._deltachain(rev)[0]) - 1
1767 1767
1768 1768 def revdiff(self, rev1, rev2):
1769 1769 """return or calculate a delta between two revisions
1770 1770
1771 1771 The delta calculated is in binary form and is intended to be written to
1772 1772 revlog data directly. So this function needs raw revision data.
1773 1773 """
1774 1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1775 1775 return bytes(self._chunk(rev2))
1776 1776
1777 1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1778 1778
1779 1779 def _processflags(self, text, flags, operation, raw=False):
1780 1780 """deprecated entry point to access flag processors"""
1781 1781 msg = b'_processflag(...) use the specialized variant'
1782 1782 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1783 1783 if raw:
1784 1784 return text, flagutil.processflagsraw(self, text, flags)
1785 1785 elif operation == b'read':
1786 1786 return flagutil.processflagsread(self, text, flags)
1787 1787 else: # write operation
1788 1788 return flagutil.processflagswrite(self, text, flags)
1789 1789
1790 1790 def revision(self, nodeorrev, _df=None, raw=False):
1791 1791 """return an uncompressed revision of a given node or revision
1792 1792 number.
1793 1793
1794 1794 _df - an existing file handle to read from. (internal-only)
1795 1795 raw - an optional argument specifying if the revision data is to be
1796 1796 treated as raw data when applying flag transforms. 'raw' should be set
1797 1797 to True when generating changegroups or in debug commands.
1798 1798 """
1799 1799 if raw:
1800 1800 msg = (
1801 1801 b'revlog.revision(..., raw=True) is deprecated, '
1802 1802 b'use revlog.rawdata(...)'
1803 1803 )
1804 1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1805 1805 return self._revisiondata(nodeorrev, _df, raw=raw)
1806 1806
1807 1807 def sidedata(self, nodeorrev, _df=None):
1808 1808 """a map of extra data related to the changeset but not part of the hash
1809 1809
1810 1810 This function currently return a dictionary. However, more advanced
1811 1811 mapping object will likely be used in the future for a more
1812 1812 efficient/lazy code.
1813 1813 """
1814 1814 # deal with <nodeorrev> argument type
1815 1815 if isinstance(nodeorrev, int):
1816 1816 rev = nodeorrev
1817 1817 else:
1818 1818 rev = self.rev(nodeorrev)
1819 1819 return self._sidedata(rev)
1820 1820
1821 1821 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1822 1822 # deal with <nodeorrev> argument type
1823 1823 if isinstance(nodeorrev, int):
1824 1824 rev = nodeorrev
1825 1825 node = self.node(rev)
1826 1826 else:
1827 1827 node = nodeorrev
1828 1828 rev = None
1829 1829
1830 1830 # fast path the special `nullid` rev
1831 1831 if node == self.nullid:
1832 1832 return b""
1833 1833
1834 1834 # ``rawtext`` is the text as stored inside the revlog. Might be the
1835 1835 # revision or might need to be processed to retrieve the revision.
1836 1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1837 1837
1838 1838 if raw and validated:
1839 1839 # if we don't want to process the raw text and that raw
1840 1840 # text is cached, we can exit early.
1841 1841 return rawtext
1842 1842 if rev is None:
1843 1843 rev = self.rev(node)
1844 1844 # the revlog's flag for this revision
1845 1845 # (usually alter its state or content)
1846 1846 flags = self.flags(rev)
1847 1847
1848 1848 if validated and flags == REVIDX_DEFAULT_FLAGS:
1849 1849 # no extra flags set, no flag processor runs, text = rawtext
1850 1850 return rawtext
1851 1851
1852 1852 if raw:
1853 1853 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854 1854 text = rawtext
1855 1855 else:
1856 1856 r = flagutil.processflagsread(self, rawtext, flags)
1857 1857 text, validatehash = r
1858 1858 if validatehash:
1859 1859 self.checkhash(text, node, rev=rev)
1860 1860 if not validated:
1861 1861 self._revisioncache = (node, rev, rawtext)
1862 1862
1863 1863 return text
1864 1864
1865 1865 def _rawtext(self, node, rev, _df=None):
1866 1866 """return the possibly unvalidated rawtext for a revision
1867 1867
1868 1868 returns (rev, rawtext, validated)
1869 1869 """
1870 1870
1871 1871 # revision in the cache (could be useful to apply delta)
1872 1872 cachedrev = None
1873 1873 # An intermediate text to apply deltas to
1874 1874 basetext = None
1875 1875
1876 1876 # Check if we have the entry in cache
1877 1877 # The cache entry looks like (node, rev, rawtext)
1878 1878 if self._revisioncache:
1879 1879 if self._revisioncache[0] == node:
1880 1880 return (rev, self._revisioncache[2], True)
1881 1881 cachedrev = self._revisioncache[1]
1882 1882
1883 1883 if rev is None:
1884 1884 rev = self.rev(node)
1885 1885
1886 1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887 1887 if stopped:
1888 1888 basetext = self._revisioncache[2]
1889 1889
1890 1890 # drop cache to save memory, the caller is expected to
1891 1891 # update self._revisioncache after validating the text
1892 1892 self._revisioncache = None
1893 1893
1894 1894 targetsize = None
1895 1895 rawsize = self.index[rev][2]
1896 1896 if 0 <= rawsize:
1897 1897 targetsize = 4 * rawsize
1898 1898
1899 1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900 1900 if basetext is None:
1901 1901 basetext = bytes(bins[0])
1902 1902 bins = bins[1:]
1903 1903
1904 1904 rawtext = mdiff.patches(basetext, bins)
1905 1905 del basetext # let us have a chance to free memory early
1906 1906 return (rev, rawtext, False)
1907 1907
1908 1908 def _sidedata(self, rev):
1909 1909 """Return the sidedata for a given revision number."""
1910 1910 index_entry = self.index[rev]
1911 1911 sidedata_offset = index_entry[8]
1912 1912 sidedata_size = index_entry[9]
1913 1913
1914 1914 if self._inline:
1915 1915 sidedata_offset += self.index.entry_size * (1 + rev)
1916 1916 if sidedata_size == 0:
1917 1917 return {}
1918 1918
1919 1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920 1920 filename = self._sidedatafile
1921 1921 end = self._docket.sidedata_end
1922 1922 offset = sidedata_offset
1923 1923 length = sidedata_size
1924 1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925 1925 raise error.RevlogError(m)
1926 1926
1927 1927 comp_segment = self._segmentfile_sidedata.read_chunk(
1928 1928 sidedata_offset, sidedata_size
1929 1929 )
1930 1930
1931 1931 comp = self.index[rev][11]
1932 1932 if comp == COMP_MODE_PLAIN:
1933 1933 segment = comp_segment
1934 1934 elif comp == COMP_MODE_DEFAULT:
1935 1935 segment = self._decompressor(comp_segment)
1936 1936 elif comp == COMP_MODE_INLINE:
1937 1937 segment = self.decompress(comp_segment)
1938 1938 else:
1939 1939 msg = b'unknown compression mode %d'
1940 1940 msg %= comp
1941 1941 raise error.RevlogError(msg)
1942 1942
1943 1943 sidedata = sidedatautil.deserialize_sidedata(segment)
1944 1944 return sidedata
1945 1945
1946 1946 def rawdata(self, nodeorrev, _df=None):
1947 1947 """return an uncompressed raw data of a given node or revision number.
1948 1948
1949 1949 _df - an existing file handle to read from. (internal-only)
1950 1950 """
1951 1951 return self._revisiondata(nodeorrev, _df, raw=True)
1952 1952
1953 1953 def hash(self, text, p1, p2):
1954 1954 """Compute a node hash.
1955 1955
1956 1956 Available as a function so that subclasses can replace the hash
1957 1957 as needed.
1958 1958 """
1959 1959 return storageutil.hashrevisionsha1(text, p1, p2)
1960 1960
1961 1961 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1962 1962 """Check node hash integrity.
1963 1963
1964 1964 Available as a function so that subclasses can extend hash mismatch
1965 1965 behaviors as needed.
1966 1966 """
1967 1967 try:
1968 1968 if p1 is None and p2 is None:
1969 1969 p1, p2 = self.parents(node)
1970 1970 if node != self.hash(text, p1, p2):
1971 1971 # Clear the revision cache on hash failure. The revision cache
1972 1972 # only stores the raw revision and clearing the cache does have
1973 1973 # the side-effect that we won't have a cache hit when the raw
1974 1974 # revision data is accessed. But this case should be rare and
1975 1975 # it is extra work to teach the cache about the hash
1976 1976 # verification state.
1977 1977 if self._revisioncache and self._revisioncache[0] == node:
1978 1978 self._revisioncache = None
1979 1979
1980 1980 revornode = rev
1981 1981 if revornode is None:
1982 1982 revornode = templatefilters.short(hex(node))
1983 1983 raise error.RevlogError(
1984 1984 _(b"integrity check failed on %s:%s")
1985 1985 % (self.display_id, pycompat.bytestr(revornode))
1986 1986 )
1987 1987 except error.RevlogError:
1988 1988 if self._censorable and storageutil.iscensoredtext(text):
1989 1989 raise error.CensoredNodeError(self.display_id, node, text)
1990 1990 raise
1991 1991
1992 1992 def _enforceinlinesize(self, tr):
1993 1993 """Check if the revlog is too big for inline and convert if so.
1994 1994
1995 1995 This should be called after revisions are added to the revlog. If the
1996 1996 revlog has grown too large to be an inline revlog, it will convert it
1997 1997 to use multiple index and data files.
1998 1998 """
1999 1999 tiprev = len(self) - 1
2000 2000 total_size = self.start(tiprev) + self.length(tiprev)
2001 2001 if not self._inline or total_size < _maxinline:
2002 2002 return
2003 2003
2004 2004 troffset = tr.findoffset(self._indexfile)
2005 2005 if troffset is None:
2006 2006 raise error.RevlogError(
2007 2007 _(b"%s not found in the transaction") % self._indexfile
2008 2008 )
2009 trindex = 0
2009 trindex = None
2010 2010 tr.add(self._datafile, 0)
2011 2011
2012 2012 existing_handles = False
2013 2013 if self._writinghandles is not None:
2014 2014 existing_handles = True
2015 2015 fp = self._writinghandles[0]
2016 2016 fp.flush()
2017 2017 fp.close()
2018 2018 # We can't use the cached file handle after close(). So prevent
2019 2019 # its usage.
2020 2020 self._writinghandles = None
2021 2021 self._segmentfile.writing_handle = None
2022 2022 # No need to deal with sidedata writing handle as it is only
2023 2023 # relevant with revlog-v2 which is never inline, not reaching
2024 2024 # this code
2025 2025
2026 2026 new_dfh = self._datafp(b'w+')
2027 2027 new_dfh.truncate(0) # drop any potentially existing data
2028 2028 try:
2029 2029 with self._indexfp() as read_ifh:
2030 2030 for r in self:
2031 2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2032 if troffset <= self.start(r) + r * self.index.entry_size:
2032 if (
2033 trindex is None
2034 and troffset
2035 <= self.start(r) + r * self.index.entry_size
2036 ):
2033 2037 trindex = r
2034 2038 new_dfh.flush()
2035 2039
2040 if trindex is None:
2041 trindex = 0
2042
2036 2043 with self.__index_new_fp() as fp:
2037 2044 self._format_flags &= ~FLAG_INLINE_DATA
2038 2045 self._inline = False
2039 2046 for i in self:
2040 2047 e = self.index.entry_binary(i)
2041 2048 if i == 0 and self._docket is None:
2042 2049 header = self._format_flags | self._format_version
2043 2050 header = self.index.pack_header(header)
2044 2051 e = header + e
2045 2052 fp.write(e)
2046 2053 if self._docket is not None:
2047 2054 self._docket.index_end = fp.tell()
2048 2055
2049 2056 # There is a small transactional race here. If the rename of
2050 2057 # the index fails, we should remove the datafile. It is more
2051 2058 # important to ensure that the data file is not truncated
2052 2059 # when the index is replaced as otherwise data is lost.
2053 2060 tr.replace(self._datafile, self.start(trindex))
2054 2061
2055 2062 # the temp file replace the real index when we exit the context
2056 2063 # manager
2057 2064
2058 2065 tr.replace(self._indexfile, trindex * self.index.entry_size)
2059 2066 nodemaputil.setup_persistent_nodemap(tr, self)
2060 2067 self._segmentfile = randomaccessfile.randomaccessfile(
2061 2068 self.opener,
2062 2069 self._datafile,
2063 2070 self._chunkcachesize,
2064 2071 )
2065 2072
2066 2073 if existing_handles:
2067 2074 # switched from inline to conventional reopen the index
2068 2075 ifh = self.__index_write_fp()
2069 2076 self._writinghandles = (ifh, new_dfh, None)
2070 2077 self._segmentfile.writing_handle = new_dfh
2071 2078 new_dfh = None
2072 2079 # No need to deal with sidedata writing handle as it is only
2073 2080 # relevant with revlog-v2 which is never inline, not reaching
2074 2081 # this code
2075 2082 finally:
2076 2083 if new_dfh is not None:
2077 2084 new_dfh.close()
2078 2085
2079 2086 def _nodeduplicatecallback(self, transaction, node):
2080 2087 """called when trying to add a node already stored."""
2081 2088
2082 2089 @contextlib.contextmanager
2083 2090 def reading(self):
2084 2091 """Context manager that keeps data and sidedata files open for reading"""
2085 2092 with self._segmentfile.reading():
2086 2093 with self._segmentfile_sidedata.reading():
2087 2094 yield
2088 2095
2089 2096 @contextlib.contextmanager
2090 2097 def _writing(self, transaction):
2091 2098 if self._trypending:
2092 2099 msg = b'try to write in a `trypending` revlog: %s'
2093 2100 msg %= self.display_id
2094 2101 raise error.ProgrammingError(msg)
2095 2102 if self._writinghandles is not None:
2096 2103 yield
2097 2104 else:
2098 2105 ifh = dfh = sdfh = None
2099 2106 try:
2100 2107 r = len(self)
2101 2108 # opening the data file.
2102 2109 dsize = 0
2103 2110 if r:
2104 2111 dsize = self.end(r - 1)
2105 2112 dfh = None
2106 2113 if not self._inline:
2107 2114 try:
2108 2115 dfh = self._datafp(b"r+")
2109 2116 if self._docket is None:
2110 2117 dfh.seek(0, os.SEEK_END)
2111 2118 else:
2112 2119 dfh.seek(self._docket.data_end, os.SEEK_SET)
2113 2120 except IOError as inst:
2114 2121 if inst.errno != errno.ENOENT:
2115 2122 raise
2116 2123 dfh = self._datafp(b"w+")
2117 2124 transaction.add(self._datafile, dsize)
2118 2125 if self._sidedatafile is not None:
2119 2126 # revlog-v2 does not inline, help Pytype
2120 2127 assert dfh is not None
2121 2128 try:
2122 2129 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2123 2130 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2124 2131 except IOError as inst:
2125 2132 if inst.errno != errno.ENOENT:
2126 2133 raise
2127 2134 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2128 2135 transaction.add(
2129 2136 self._sidedatafile, self._docket.sidedata_end
2130 2137 )
2131 2138
2132 2139 # opening the index file.
2133 2140 isize = r * self.index.entry_size
2134 2141 ifh = self.__index_write_fp()
2135 2142 if self._inline:
2136 2143 transaction.add(self._indexfile, dsize + isize)
2137 2144 else:
2138 2145 transaction.add(self._indexfile, isize)
2139 2146 # exposing all file handle for writing.
2140 2147 self._writinghandles = (ifh, dfh, sdfh)
2141 2148 self._segmentfile.writing_handle = ifh if self._inline else dfh
2142 2149 self._segmentfile_sidedata.writing_handle = sdfh
2143 2150 yield
2144 2151 if self._docket is not None:
2145 2152 self._write_docket(transaction)
2146 2153 finally:
2147 2154 self._writinghandles = None
2148 2155 self._segmentfile.writing_handle = None
2149 2156 self._segmentfile_sidedata.writing_handle = None
2150 2157 if dfh is not None:
2151 2158 dfh.close()
2152 2159 if sdfh is not None:
2153 2160 sdfh.close()
2154 2161 # closing the index file last to avoid exposing referent to
2155 2162 # potential unflushed data content.
2156 2163 if ifh is not None:
2157 2164 ifh.close()
2158 2165
2159 2166 def _write_docket(self, transaction):
2160 2167 """write the current docket on disk
2161 2168
2162 2169 Exist as a method to help changelog to implement transaction logic
2163 2170
2164 2171 We could also imagine using the same transaction logic for all revlog
2165 2172 since docket are cheap."""
2166 2173 self._docket.write(transaction)
2167 2174
2168 2175 def addrevision(
2169 2176 self,
2170 2177 text,
2171 2178 transaction,
2172 2179 link,
2173 2180 p1,
2174 2181 p2,
2175 2182 cachedelta=None,
2176 2183 node=None,
2177 2184 flags=REVIDX_DEFAULT_FLAGS,
2178 2185 deltacomputer=None,
2179 2186 sidedata=None,
2180 2187 ):
2181 2188 """add a revision to the log
2182 2189
2183 2190 text - the revision data to add
2184 2191 transaction - the transaction object used for rollback
2185 2192 link - the linkrev data to add
2186 2193 p1, p2 - the parent nodeids of the revision
2187 2194 cachedelta - an optional precomputed delta
2188 2195 node - nodeid of revision; typically node is not specified, and it is
2189 2196 computed by default as hash(text, p1, p2), however subclasses might
2190 2197 use different hashing method (and override checkhash() in such case)
2191 2198 flags - the known flags to set on the revision
2192 2199 deltacomputer - an optional deltacomputer instance shared between
2193 2200 multiple calls
2194 2201 """
2195 2202 if link == nullrev:
2196 2203 raise error.RevlogError(
2197 2204 _(b"attempted to add linkrev -1 to %s") % self.display_id
2198 2205 )
2199 2206
2200 2207 if sidedata is None:
2201 2208 sidedata = {}
2202 2209 elif sidedata and not self.hassidedata:
2203 2210 raise error.ProgrammingError(
2204 2211 _(b"trying to add sidedata to a revlog who don't support them")
2205 2212 )
2206 2213
2207 2214 if flags:
2208 2215 node = node or self.hash(text, p1, p2)
2209 2216
2210 2217 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2211 2218
2212 2219 # If the flag processor modifies the revision data, ignore any provided
2213 2220 # cachedelta.
2214 2221 if rawtext != text:
2215 2222 cachedelta = None
2216 2223
2217 2224 if len(rawtext) > _maxentrysize:
2218 2225 raise error.RevlogError(
2219 2226 _(
2220 2227 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2221 2228 )
2222 2229 % (self.display_id, len(rawtext))
2223 2230 )
2224 2231
2225 2232 node = node or self.hash(rawtext, p1, p2)
2226 2233 rev = self.index.get_rev(node)
2227 2234 if rev is not None:
2228 2235 return rev
2229 2236
2230 2237 if validatehash:
2231 2238 self.checkhash(rawtext, node, p1=p1, p2=p2)
2232 2239
2233 2240 return self.addrawrevision(
2234 2241 rawtext,
2235 2242 transaction,
2236 2243 link,
2237 2244 p1,
2238 2245 p2,
2239 2246 node,
2240 2247 flags,
2241 2248 cachedelta=cachedelta,
2242 2249 deltacomputer=deltacomputer,
2243 2250 sidedata=sidedata,
2244 2251 )
2245 2252
2246 2253 def addrawrevision(
2247 2254 self,
2248 2255 rawtext,
2249 2256 transaction,
2250 2257 link,
2251 2258 p1,
2252 2259 p2,
2253 2260 node,
2254 2261 flags,
2255 2262 cachedelta=None,
2256 2263 deltacomputer=None,
2257 2264 sidedata=None,
2258 2265 ):
2259 2266 """add a raw revision with known flags, node and parents
2260 2267 useful when reusing a revision not stored in this revlog (ex: received
2261 2268 over wire, or read from an external bundle).
2262 2269 """
2263 2270 with self._writing(transaction):
2264 2271 return self._addrevision(
2265 2272 node,
2266 2273 rawtext,
2267 2274 transaction,
2268 2275 link,
2269 2276 p1,
2270 2277 p2,
2271 2278 flags,
2272 2279 cachedelta,
2273 2280 deltacomputer=deltacomputer,
2274 2281 sidedata=sidedata,
2275 2282 )
2276 2283
2277 2284 def compress(self, data):
2278 2285 """Generate a possibly-compressed representation of data."""
2279 2286 if not data:
2280 2287 return b'', data
2281 2288
2282 2289 compressed = self._compressor.compress(data)
2283 2290
2284 2291 if compressed:
2285 2292 # The revlog compressor added the header in the returned data.
2286 2293 return b'', compressed
2287 2294
2288 2295 if data[0:1] == b'\0':
2289 2296 return b'', data
2290 2297 return b'u', data
2291 2298
2292 2299 def decompress(self, data):
2293 2300 """Decompress a revlog chunk.
2294 2301
2295 2302 The chunk is expected to begin with a header identifying the
2296 2303 format type so it can be routed to an appropriate decompressor.
2297 2304 """
2298 2305 if not data:
2299 2306 return data
2300 2307
2301 2308 # Revlogs are read much more frequently than they are written and many
2302 2309 # chunks only take microseconds to decompress, so performance is
2303 2310 # important here.
2304 2311 #
2305 2312 # We can make a few assumptions about revlogs:
2306 2313 #
2307 2314 # 1) the majority of chunks will be compressed (as opposed to inline
2308 2315 # raw data).
2309 2316 # 2) decompressing *any* data will likely by at least 10x slower than
2310 2317 # returning raw inline data.
2311 2318 # 3) we want to prioritize common and officially supported compression
2312 2319 # engines
2313 2320 #
2314 2321 # It follows that we want to optimize for "decompress compressed data
2315 2322 # when encoded with common and officially supported compression engines"
2316 2323 # case over "raw data" and "data encoded by less common or non-official
2317 2324 # compression engines." That is why we have the inline lookup first
2318 2325 # followed by the compengines lookup.
2319 2326 #
2320 2327 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2321 2328 # compressed chunks. And this matters for changelog and manifest reads.
2322 2329 t = data[0:1]
2323 2330
2324 2331 if t == b'x':
2325 2332 try:
2326 2333 return _zlibdecompress(data)
2327 2334 except zlib.error as e:
2328 2335 raise error.RevlogError(
2329 2336 _(b'revlog decompress error: %s')
2330 2337 % stringutil.forcebytestr(e)
2331 2338 )
2332 2339 # '\0' is more common than 'u' so it goes first.
2333 2340 elif t == b'\0':
2334 2341 return data
2335 2342 elif t == b'u':
2336 2343 return util.buffer(data, 1)
2337 2344
2338 2345 compressor = self._get_decompressor(t)
2339 2346
2340 2347 return compressor.decompress(data)
2341 2348
2342 2349 def _addrevision(
2343 2350 self,
2344 2351 node,
2345 2352 rawtext,
2346 2353 transaction,
2347 2354 link,
2348 2355 p1,
2349 2356 p2,
2350 2357 flags,
2351 2358 cachedelta,
2352 2359 alwayscache=False,
2353 2360 deltacomputer=None,
2354 2361 sidedata=None,
2355 2362 ):
2356 2363 """internal function to add revisions to the log
2357 2364
2358 2365 see addrevision for argument descriptions.
2359 2366
2360 2367 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2361 2368
2362 2369 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2363 2370 be used.
2364 2371
2365 2372 invariants:
2366 2373 - rawtext is optional (can be None); if not set, cachedelta must be set.
2367 2374 if both are set, they must correspond to each other.
2368 2375 """
2369 2376 if node == self.nullid:
2370 2377 raise error.RevlogError(
2371 2378 _(b"%s: attempt to add null revision") % self.display_id
2372 2379 )
2373 2380 if (
2374 2381 node == self.nodeconstants.wdirid
2375 2382 or node in self.nodeconstants.wdirfilenodeids
2376 2383 ):
2377 2384 raise error.RevlogError(
2378 2385 _(b"%s: attempt to add wdir revision") % self.display_id
2379 2386 )
2380 2387 if self._writinghandles is None:
2381 2388 msg = b'adding revision outside `revlog._writing` context'
2382 2389 raise error.ProgrammingError(msg)
2383 2390
2384 2391 if self._inline:
2385 2392 fh = self._writinghandles[0]
2386 2393 else:
2387 2394 fh = self._writinghandles[1]
2388 2395
2389 2396 btext = [rawtext]
2390 2397
2391 2398 curr = len(self)
2392 2399 prev = curr - 1
2393 2400
2394 2401 offset = self._get_data_offset(prev)
2395 2402
2396 2403 if self._concurrencychecker:
2397 2404 ifh, dfh, sdfh = self._writinghandles
2398 2405 # XXX no checking for the sidedata file
2399 2406 if self._inline:
2400 2407 # offset is "as if" it were in the .d file, so we need to add on
2401 2408 # the size of the entry metadata.
2402 2409 self._concurrencychecker(
2403 2410 ifh, self._indexfile, offset + curr * self.index.entry_size
2404 2411 )
2405 2412 else:
2406 2413 # Entries in the .i are a consistent size.
2407 2414 self._concurrencychecker(
2408 2415 ifh, self._indexfile, curr * self.index.entry_size
2409 2416 )
2410 2417 self._concurrencychecker(dfh, self._datafile, offset)
2411 2418
2412 2419 p1r, p2r = self.rev(p1), self.rev(p2)
2413 2420
2414 2421 # full versions are inserted when the needed deltas
2415 2422 # become comparable to the uncompressed text
2416 2423 if rawtext is None:
2417 2424 # need rawtext size, before changed by flag processors, which is
2418 2425 # the non-raw size. use revlog explicitly to avoid filelog's extra
2419 2426 # logic that might remove metadata size.
2420 2427 textlen = mdiff.patchedsize(
2421 2428 revlog.size(self, cachedelta[0]), cachedelta[1]
2422 2429 )
2423 2430 else:
2424 2431 textlen = len(rawtext)
2425 2432
2426 2433 if deltacomputer is None:
2427 2434 deltacomputer = deltautil.deltacomputer(self)
2428 2435
2429 2436 revinfo = revlogutils.revisioninfo(
2430 2437 node,
2431 2438 p1,
2432 2439 p2,
2433 2440 btext,
2434 2441 textlen,
2435 2442 cachedelta,
2436 2443 flags,
2437 2444 )
2438 2445
2439 2446 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2440 2447
2441 2448 compression_mode = COMP_MODE_INLINE
2442 2449 if self._docket is not None:
2443 2450 default_comp = self._docket.default_compression_header
2444 2451 r = deltautil.delta_compression(default_comp, deltainfo)
2445 2452 compression_mode, deltainfo = r
2446 2453
2447 2454 sidedata_compression_mode = COMP_MODE_INLINE
2448 2455 if sidedata and self.hassidedata:
2449 2456 sidedata_compression_mode = COMP_MODE_PLAIN
2450 2457 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2451 2458 sidedata_offset = self._docket.sidedata_end
2452 2459 h, comp_sidedata = self.compress(serialized_sidedata)
2453 2460 if (
2454 2461 h != b'u'
2455 2462 and comp_sidedata[0:1] != b'\0'
2456 2463 and len(comp_sidedata) < len(serialized_sidedata)
2457 2464 ):
2458 2465 assert not h
2459 2466 if (
2460 2467 comp_sidedata[0:1]
2461 2468 == self._docket.default_compression_header
2462 2469 ):
2463 2470 sidedata_compression_mode = COMP_MODE_DEFAULT
2464 2471 serialized_sidedata = comp_sidedata
2465 2472 else:
2466 2473 sidedata_compression_mode = COMP_MODE_INLINE
2467 2474 serialized_sidedata = comp_sidedata
2468 2475 else:
2469 2476 serialized_sidedata = b""
2470 2477 # Don't store the offset if the sidedata is empty, that way
2471 2478 # we can easily detect empty sidedata and they will be no different
2472 2479 # than ones we manually add.
2473 2480 sidedata_offset = 0
2474 2481
2475 2482 e = revlogutils.entry(
2476 2483 flags=flags,
2477 2484 data_offset=offset,
2478 2485 data_compressed_length=deltainfo.deltalen,
2479 2486 data_uncompressed_length=textlen,
2480 2487 data_compression_mode=compression_mode,
2481 2488 data_delta_base=deltainfo.base,
2482 2489 link_rev=link,
2483 2490 parent_rev_1=p1r,
2484 2491 parent_rev_2=p2r,
2485 2492 node_id=node,
2486 2493 sidedata_offset=sidedata_offset,
2487 2494 sidedata_compressed_length=len(serialized_sidedata),
2488 2495 sidedata_compression_mode=sidedata_compression_mode,
2489 2496 )
2490 2497
2491 2498 self.index.append(e)
2492 2499 entry = self.index.entry_binary(curr)
2493 2500 if curr == 0 and self._docket is None:
2494 2501 header = self._format_flags | self._format_version
2495 2502 header = self.index.pack_header(header)
2496 2503 entry = header + entry
2497 2504 self._writeentry(
2498 2505 transaction,
2499 2506 entry,
2500 2507 deltainfo.data,
2501 2508 link,
2502 2509 offset,
2503 2510 serialized_sidedata,
2504 2511 sidedata_offset,
2505 2512 )
2506 2513
2507 2514 rawtext = btext[0]
2508 2515
2509 2516 if alwayscache and rawtext is None:
2510 2517 rawtext = deltacomputer.buildtext(revinfo, fh)
2511 2518
2512 2519 if type(rawtext) == bytes: # only accept immutable objects
2513 2520 self._revisioncache = (node, curr, rawtext)
2514 2521 self._chainbasecache[curr] = deltainfo.chainbase
2515 2522 return curr
2516 2523
2517 2524 def _get_data_offset(self, prev):
2518 2525 """Returns the current offset in the (in-transaction) data file.
2519 2526 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2520 2527 file to store that information: since sidedata can be rewritten to the
2521 2528 end of the data file within a transaction, you can have cases where, for
2522 2529 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2523 2530 to `n - 1`'s sidedata being written after `n`'s data.
2524 2531
2525 2532 TODO cache this in a docket file before getting out of experimental."""
2526 2533 if self._docket is None:
2527 2534 return self.end(prev)
2528 2535 else:
2529 2536 return self._docket.data_end
2530 2537
2531 2538 def _writeentry(
2532 2539 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2533 2540 ):
2534 2541 # Files opened in a+ mode have inconsistent behavior on various
2535 2542 # platforms. Windows requires that a file positioning call be made
2536 2543 # when the file handle transitions between reads and writes. See
2537 2544 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2538 2545 # platforms, Python or the platform itself can be buggy. Some versions
2539 2546 # of Solaris have been observed to not append at the end of the file
2540 2547 # if the file was seeked to before the end. See issue4943 for more.
2541 2548 #
2542 2549 # We work around this issue by inserting a seek() before writing.
2543 2550 # Note: This is likely not necessary on Python 3. However, because
2544 2551 # the file handle is reused for reads and may be seeked there, we need
2545 2552 # to be careful before changing this.
2546 2553 if self._writinghandles is None:
2547 2554 msg = b'adding revision outside `revlog._writing` context'
2548 2555 raise error.ProgrammingError(msg)
2549 2556 ifh, dfh, sdfh = self._writinghandles
2550 2557 if self._docket is None:
2551 2558 ifh.seek(0, os.SEEK_END)
2552 2559 else:
2553 2560 ifh.seek(self._docket.index_end, os.SEEK_SET)
2554 2561 if dfh:
2555 2562 if self._docket is None:
2556 2563 dfh.seek(0, os.SEEK_END)
2557 2564 else:
2558 2565 dfh.seek(self._docket.data_end, os.SEEK_SET)
2559 2566 if sdfh:
2560 2567 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2561 2568
2562 2569 curr = len(self) - 1
2563 2570 if not self._inline:
2564 2571 transaction.add(self._datafile, offset)
2565 2572 if self._sidedatafile:
2566 2573 transaction.add(self._sidedatafile, sidedata_offset)
2567 2574 transaction.add(self._indexfile, curr * len(entry))
2568 2575 if data[0]:
2569 2576 dfh.write(data[0])
2570 2577 dfh.write(data[1])
2571 2578 if sidedata:
2572 2579 sdfh.write(sidedata)
2573 2580 ifh.write(entry)
2574 2581 else:
2575 2582 offset += curr * self.index.entry_size
2576 2583 transaction.add(self._indexfile, offset)
2577 2584 ifh.write(entry)
2578 2585 ifh.write(data[0])
2579 2586 ifh.write(data[1])
2580 2587 assert not sidedata
2581 2588 self._enforceinlinesize(transaction)
2582 2589 if self._docket is not None:
2583 2590 # revlog-v2 always has 3 writing handles, help Pytype
2584 2591 wh1 = self._writinghandles[0]
2585 2592 wh2 = self._writinghandles[1]
2586 2593 wh3 = self._writinghandles[2]
2587 2594 assert wh1 is not None
2588 2595 assert wh2 is not None
2589 2596 assert wh3 is not None
2590 2597 self._docket.index_end = wh1.tell()
2591 2598 self._docket.data_end = wh2.tell()
2592 2599 self._docket.sidedata_end = wh3.tell()
2593 2600
2594 2601 nodemaputil.setup_persistent_nodemap(transaction, self)
2595 2602
2596 2603 def addgroup(
2597 2604 self,
2598 2605 deltas,
2599 2606 linkmapper,
2600 2607 transaction,
2601 2608 alwayscache=False,
2602 2609 addrevisioncb=None,
2603 2610 duplicaterevisioncb=None,
2604 2611 ):
2605 2612 """
2606 2613 add a delta group
2607 2614
2608 2615 given a set of deltas, add them to the revision log. the
2609 2616 first delta is against its parent, which should be in our
2610 2617 log, the rest are against the previous delta.
2611 2618
2612 2619 If ``addrevisioncb`` is defined, it will be called with arguments of
2613 2620 this revlog and the node that was added.
2614 2621 """
2615 2622
2616 2623 if self._adding_group:
2617 2624 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2618 2625
2619 2626 self._adding_group = True
2620 2627 empty = True
2621 2628 try:
2622 2629 with self._writing(transaction):
2623 2630 deltacomputer = deltautil.deltacomputer(self)
2624 2631 # loop through our set of deltas
2625 2632 for data in deltas:
2626 2633 (
2627 2634 node,
2628 2635 p1,
2629 2636 p2,
2630 2637 linknode,
2631 2638 deltabase,
2632 2639 delta,
2633 2640 flags,
2634 2641 sidedata,
2635 2642 ) = data
2636 2643 link = linkmapper(linknode)
2637 2644 flags = flags or REVIDX_DEFAULT_FLAGS
2638 2645
2639 2646 rev = self.index.get_rev(node)
2640 2647 if rev is not None:
2641 2648 # this can happen if two branches make the same change
2642 2649 self._nodeduplicatecallback(transaction, rev)
2643 2650 if duplicaterevisioncb:
2644 2651 duplicaterevisioncb(self, rev)
2645 2652 empty = False
2646 2653 continue
2647 2654
2648 2655 for p in (p1, p2):
2649 2656 if not self.index.has_node(p):
2650 2657 raise error.LookupError(
2651 2658 p, self.radix, _(b'unknown parent')
2652 2659 )
2653 2660
2654 2661 if not self.index.has_node(deltabase):
2655 2662 raise error.LookupError(
2656 2663 deltabase, self.display_id, _(b'unknown delta base')
2657 2664 )
2658 2665
2659 2666 baserev = self.rev(deltabase)
2660 2667
2661 2668 if baserev != nullrev and self.iscensored(baserev):
2662 2669 # if base is censored, delta must be full replacement in a
2663 2670 # single patch operation
2664 2671 hlen = struct.calcsize(b">lll")
2665 2672 oldlen = self.rawsize(baserev)
2666 2673 newlen = len(delta) - hlen
2667 2674 if delta[:hlen] != mdiff.replacediffheader(
2668 2675 oldlen, newlen
2669 2676 ):
2670 2677 raise error.CensoredBaseError(
2671 2678 self.display_id, self.node(baserev)
2672 2679 )
2673 2680
2674 2681 if not flags and self._peek_iscensored(baserev, delta):
2675 2682 flags |= REVIDX_ISCENSORED
2676 2683
2677 2684 # We assume consumers of addrevisioncb will want to retrieve
2678 2685 # the added revision, which will require a call to
2679 2686 # revision(). revision() will fast path if there is a cache
2680 2687 # hit. So, we tell _addrevision() to always cache in this case.
2681 2688 # We're only using addgroup() in the context of changegroup
2682 2689 # generation so the revision data can always be handled as raw
2683 2690 # by the flagprocessor.
2684 2691 rev = self._addrevision(
2685 2692 node,
2686 2693 None,
2687 2694 transaction,
2688 2695 link,
2689 2696 p1,
2690 2697 p2,
2691 2698 flags,
2692 2699 (baserev, delta),
2693 2700 alwayscache=alwayscache,
2694 2701 deltacomputer=deltacomputer,
2695 2702 sidedata=sidedata,
2696 2703 )
2697 2704
2698 2705 if addrevisioncb:
2699 2706 addrevisioncb(self, rev)
2700 2707 empty = False
2701 2708 finally:
2702 2709 self._adding_group = False
2703 2710 return not empty
2704 2711
2705 2712 def iscensored(self, rev):
2706 2713 """Check if a file revision is censored."""
2707 2714 if not self._censorable:
2708 2715 return False
2709 2716
2710 2717 return self.flags(rev) & REVIDX_ISCENSORED
2711 2718
2712 2719 def _peek_iscensored(self, baserev, delta):
2713 2720 """Quickly check if a delta produces a censored revision."""
2714 2721 if not self._censorable:
2715 2722 return False
2716 2723
2717 2724 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2718 2725
2719 2726 def getstrippoint(self, minlink):
2720 2727 """find the minimum rev that must be stripped to strip the linkrev
2721 2728
2722 2729 Returns a tuple containing the minimum rev and a set of all revs that
2723 2730 have linkrevs that will be broken by this strip.
2724 2731 """
2725 2732 return storageutil.resolvestripinfo(
2726 2733 minlink,
2727 2734 len(self) - 1,
2728 2735 self.headrevs(),
2729 2736 self.linkrev,
2730 2737 self.parentrevs,
2731 2738 )
2732 2739
2733 2740 def strip(self, minlink, transaction):
2734 2741 """truncate the revlog on the first revision with a linkrev >= minlink
2735 2742
2736 2743 This function is called when we're stripping revision minlink and
2737 2744 its descendants from the repository.
2738 2745
2739 2746 We have to remove all revisions with linkrev >= minlink, because
2740 2747 the equivalent changelog revisions will be renumbered after the
2741 2748 strip.
2742 2749
2743 2750 So we truncate the revlog on the first of these revisions, and
2744 2751 trust that the caller has saved the revisions that shouldn't be
2745 2752 removed and that it'll re-add them after this truncation.
2746 2753 """
2747 2754 if len(self) == 0:
2748 2755 return
2749 2756
2750 2757 rev, _ = self.getstrippoint(minlink)
2751 2758 if rev == len(self):
2752 2759 return
2753 2760
2754 2761 # first truncate the files on disk
2755 2762 data_end = self.start(rev)
2756 2763 if not self._inline:
2757 2764 transaction.add(self._datafile, data_end)
2758 2765 end = rev * self.index.entry_size
2759 2766 else:
2760 2767 end = data_end + (rev * self.index.entry_size)
2761 2768
2762 2769 if self._sidedatafile:
2763 2770 sidedata_end = self.sidedata_cut_off(rev)
2764 2771 transaction.add(self._sidedatafile, sidedata_end)
2765 2772
2766 2773 transaction.add(self._indexfile, end)
2767 2774 if self._docket is not None:
2768 2775 # XXX we could, leverage the docket while stripping. However it is
2769 2776 # not powerfull enough at the time of this comment
2770 2777 self._docket.index_end = end
2771 2778 self._docket.data_end = data_end
2772 2779 self._docket.sidedata_end = sidedata_end
2773 2780 self._docket.write(transaction, stripping=True)
2774 2781
2775 2782 # then reset internal state in memory to forget those revisions
2776 2783 self._revisioncache = None
2777 2784 self._chaininfocache = util.lrucachedict(500)
2778 2785 self._segmentfile.clear_cache()
2779 2786 self._segmentfile_sidedata.clear_cache()
2780 2787
2781 2788 del self.index[rev:-1]
2782 2789
2783 2790 def checksize(self):
2784 2791 """Check size of index and data files
2785 2792
2786 2793 return a (dd, di) tuple.
2787 2794 - dd: extra bytes for the "data" file
2788 2795 - di: extra bytes for the "index" file
2789 2796
2790 2797 A healthy revlog will return (0, 0).
2791 2798 """
2792 2799 expected = 0
2793 2800 if len(self):
2794 2801 expected = max(0, self.end(len(self) - 1))
2795 2802
2796 2803 try:
2797 2804 with self._datafp() as f:
2798 2805 f.seek(0, io.SEEK_END)
2799 2806 actual = f.tell()
2800 2807 dd = actual - expected
2801 2808 except IOError as inst:
2802 2809 if inst.errno != errno.ENOENT:
2803 2810 raise
2804 2811 dd = 0
2805 2812
2806 2813 try:
2807 2814 f = self.opener(self._indexfile)
2808 2815 f.seek(0, io.SEEK_END)
2809 2816 actual = f.tell()
2810 2817 f.close()
2811 2818 s = self.index.entry_size
2812 2819 i = max(0, actual // s)
2813 2820 di = actual - (i * s)
2814 2821 if self._inline:
2815 2822 databytes = 0
2816 2823 for r in self:
2817 2824 databytes += max(0, self.length(r))
2818 2825 dd = 0
2819 2826 di = actual - len(self) * s - databytes
2820 2827 except IOError as inst:
2821 2828 if inst.errno != errno.ENOENT:
2822 2829 raise
2823 2830 di = 0
2824 2831
2825 2832 return (dd, di)
2826 2833
2827 2834 def files(self):
2828 2835 res = [self._indexfile]
2829 2836 if self._docket_file is None:
2830 2837 if not self._inline:
2831 2838 res.append(self._datafile)
2832 2839 else:
2833 2840 res.append(self._docket_file)
2834 2841 res.extend(self._docket.old_index_filepaths(include_empty=False))
2835 2842 if self._docket.data_end:
2836 2843 res.append(self._datafile)
2837 2844 res.extend(self._docket.old_data_filepaths(include_empty=False))
2838 2845 if self._docket.sidedata_end:
2839 2846 res.append(self._sidedatafile)
2840 2847 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2841 2848 return res
2842 2849
2843 2850 def emitrevisions(
2844 2851 self,
2845 2852 nodes,
2846 2853 nodesorder=None,
2847 2854 revisiondata=False,
2848 2855 assumehaveparentrevisions=False,
2849 2856 deltamode=repository.CG_DELTAMODE_STD,
2850 2857 sidedata_helpers=None,
2851 2858 ):
2852 2859 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2853 2860 raise error.ProgrammingError(
2854 2861 b'unhandled value for nodesorder: %s' % nodesorder
2855 2862 )
2856 2863
2857 2864 if nodesorder is None and not self._generaldelta:
2858 2865 nodesorder = b'storage'
2859 2866
2860 2867 if (
2861 2868 not self._storedeltachains
2862 2869 and deltamode != repository.CG_DELTAMODE_PREV
2863 2870 ):
2864 2871 deltamode = repository.CG_DELTAMODE_FULL
2865 2872
2866 2873 return storageutil.emitrevisions(
2867 2874 self,
2868 2875 nodes,
2869 2876 nodesorder,
2870 2877 revlogrevisiondelta,
2871 2878 deltaparentfn=self.deltaparent,
2872 2879 candeltafn=self.candelta,
2873 2880 rawsizefn=self.rawsize,
2874 2881 revdifffn=self.revdiff,
2875 2882 flagsfn=self.flags,
2876 2883 deltamode=deltamode,
2877 2884 revisiondata=revisiondata,
2878 2885 assumehaveparentrevisions=assumehaveparentrevisions,
2879 2886 sidedata_helpers=sidedata_helpers,
2880 2887 )
2881 2888
2882 2889 DELTAREUSEALWAYS = b'always'
2883 2890 DELTAREUSESAMEREVS = b'samerevs'
2884 2891 DELTAREUSENEVER = b'never'
2885 2892
2886 2893 DELTAREUSEFULLADD = b'fulladd'
2887 2894
2888 2895 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2889 2896
2890 2897 def clone(
2891 2898 self,
2892 2899 tr,
2893 2900 destrevlog,
2894 2901 addrevisioncb=None,
2895 2902 deltareuse=DELTAREUSESAMEREVS,
2896 2903 forcedeltabothparents=None,
2897 2904 sidedata_helpers=None,
2898 2905 ):
2899 2906 """Copy this revlog to another, possibly with format changes.
2900 2907
2901 2908 The destination revlog will contain the same revisions and nodes.
2902 2909 However, it may not be bit-for-bit identical due to e.g. delta encoding
2903 2910 differences.
2904 2911
2905 2912 The ``deltareuse`` argument control how deltas from the existing revlog
2906 2913 are preserved in the destination revlog. The argument can have the
2907 2914 following values:
2908 2915
2909 2916 DELTAREUSEALWAYS
2910 2917 Deltas will always be reused (if possible), even if the destination
2911 2918 revlog would not select the same revisions for the delta. This is the
2912 2919 fastest mode of operation.
2913 2920 DELTAREUSESAMEREVS
2914 2921 Deltas will be reused if the destination revlog would pick the same
2915 2922 revisions for the delta. This mode strikes a balance between speed
2916 2923 and optimization.
2917 2924 DELTAREUSENEVER
2918 2925 Deltas will never be reused. This is the slowest mode of execution.
2919 2926 This mode can be used to recompute deltas (e.g. if the diff/delta
2920 2927 algorithm changes).
2921 2928 DELTAREUSEFULLADD
2922 2929 Revision will be re-added as if their were new content. This is
2923 2930 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2924 2931 eg: large file detection and handling.
2925 2932
2926 2933 Delta computation can be slow, so the choice of delta reuse policy can
2927 2934 significantly affect run time.
2928 2935
2929 2936 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2930 2937 two extremes. Deltas will be reused if they are appropriate. But if the
2931 2938 delta could choose a better revision, it will do so. This means if you
2932 2939 are converting a non-generaldelta revlog to a generaldelta revlog,
2933 2940 deltas will be recomputed if the delta's parent isn't a parent of the
2934 2941 revision.
2935 2942
2936 2943 In addition to the delta policy, the ``forcedeltabothparents``
2937 2944 argument controls whether to force compute deltas against both parents
2938 2945 for merges. By default, the current default is used.
2939 2946
2940 2947 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2941 2948 `sidedata_helpers`.
2942 2949 """
2943 2950 if deltareuse not in self.DELTAREUSEALL:
2944 2951 raise ValueError(
2945 2952 _(b'value for deltareuse invalid: %s') % deltareuse
2946 2953 )
2947 2954
2948 2955 if len(destrevlog):
2949 2956 raise ValueError(_(b'destination revlog is not empty'))
2950 2957
2951 2958 if getattr(self, 'filteredrevs', None):
2952 2959 raise ValueError(_(b'source revlog has filtered revisions'))
2953 2960 if getattr(destrevlog, 'filteredrevs', None):
2954 2961 raise ValueError(_(b'destination revlog has filtered revisions'))
2955 2962
2956 2963 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2957 2964 # if possible.
2958 2965 oldlazydelta = destrevlog._lazydelta
2959 2966 oldlazydeltabase = destrevlog._lazydeltabase
2960 2967 oldamd = destrevlog._deltabothparents
2961 2968
2962 2969 try:
2963 2970 if deltareuse == self.DELTAREUSEALWAYS:
2964 2971 destrevlog._lazydeltabase = True
2965 2972 destrevlog._lazydelta = True
2966 2973 elif deltareuse == self.DELTAREUSESAMEREVS:
2967 2974 destrevlog._lazydeltabase = False
2968 2975 destrevlog._lazydelta = True
2969 2976 elif deltareuse == self.DELTAREUSENEVER:
2970 2977 destrevlog._lazydeltabase = False
2971 2978 destrevlog._lazydelta = False
2972 2979
2973 2980 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2974 2981
2975 2982 self._clone(
2976 2983 tr,
2977 2984 destrevlog,
2978 2985 addrevisioncb,
2979 2986 deltareuse,
2980 2987 forcedeltabothparents,
2981 2988 sidedata_helpers,
2982 2989 )
2983 2990
2984 2991 finally:
2985 2992 destrevlog._lazydelta = oldlazydelta
2986 2993 destrevlog._lazydeltabase = oldlazydeltabase
2987 2994 destrevlog._deltabothparents = oldamd
2988 2995
2989 2996 def _clone(
2990 2997 self,
2991 2998 tr,
2992 2999 destrevlog,
2993 3000 addrevisioncb,
2994 3001 deltareuse,
2995 3002 forcedeltabothparents,
2996 3003 sidedata_helpers,
2997 3004 ):
2998 3005 """perform the core duty of `revlog.clone` after parameter processing"""
2999 3006 deltacomputer = deltautil.deltacomputer(destrevlog)
3000 3007 index = self.index
3001 3008 for rev in self:
3002 3009 entry = index[rev]
3003 3010
3004 3011 # Some classes override linkrev to take filtered revs into
3005 3012 # account. Use raw entry from index.
3006 3013 flags = entry[0] & 0xFFFF
3007 3014 linkrev = entry[4]
3008 3015 p1 = index[entry[5]][7]
3009 3016 p2 = index[entry[6]][7]
3010 3017 node = entry[7]
3011 3018
3012 3019 # (Possibly) reuse the delta from the revlog if allowed and
3013 3020 # the revlog chunk is a delta.
3014 3021 cachedelta = None
3015 3022 rawtext = None
3016 3023 if deltareuse == self.DELTAREUSEFULLADD:
3017 3024 text = self._revisiondata(rev)
3018 3025 sidedata = self.sidedata(rev)
3019 3026
3020 3027 if sidedata_helpers is not None:
3021 3028 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3022 3029 self, sidedata_helpers, sidedata, rev
3023 3030 )
3024 3031 flags = flags | new_flags[0] & ~new_flags[1]
3025 3032
3026 3033 destrevlog.addrevision(
3027 3034 text,
3028 3035 tr,
3029 3036 linkrev,
3030 3037 p1,
3031 3038 p2,
3032 3039 cachedelta=cachedelta,
3033 3040 node=node,
3034 3041 flags=flags,
3035 3042 deltacomputer=deltacomputer,
3036 3043 sidedata=sidedata,
3037 3044 )
3038 3045 else:
3039 3046 if destrevlog._lazydelta:
3040 3047 dp = self.deltaparent(rev)
3041 3048 if dp != nullrev:
3042 3049 cachedelta = (dp, bytes(self._chunk(rev)))
3043 3050
3044 3051 sidedata = None
3045 3052 if not cachedelta:
3046 3053 rawtext = self._revisiondata(rev)
3047 3054 sidedata = self.sidedata(rev)
3048 3055 if sidedata is None:
3049 3056 sidedata = self.sidedata(rev)
3050 3057
3051 3058 if sidedata_helpers is not None:
3052 3059 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3053 3060 self, sidedata_helpers, sidedata, rev
3054 3061 )
3055 3062 flags = flags | new_flags[0] & ~new_flags[1]
3056 3063
3057 3064 with destrevlog._writing(tr):
3058 3065 destrevlog._addrevision(
3059 3066 node,
3060 3067 rawtext,
3061 3068 tr,
3062 3069 linkrev,
3063 3070 p1,
3064 3071 p2,
3065 3072 flags,
3066 3073 cachedelta,
3067 3074 deltacomputer=deltacomputer,
3068 3075 sidedata=sidedata,
3069 3076 )
3070 3077
3071 3078 if addrevisioncb:
3072 3079 addrevisioncb(self, rev, node)
3073 3080
3074 3081 def censorrevision(self, tr, censornode, tombstone=b''):
3075 3082 if self._format_version == REVLOGV0:
3076 3083 raise error.RevlogError(
3077 3084 _(b'cannot censor with version %d revlogs')
3078 3085 % self._format_version
3079 3086 )
3080 3087 elif self._format_version == REVLOGV1:
3081 3088 rewrite.v1_censor(self, tr, censornode, tombstone)
3082 3089 else:
3083 3090 rewrite.v2_censor(self, tr, censornode, tombstone)
3084 3091
3085 3092 def verifyintegrity(self, state):
3086 3093 """Verifies the integrity of the revlog.
3087 3094
3088 3095 Yields ``revlogproblem`` instances describing problems that are
3089 3096 found.
3090 3097 """
3091 3098 dd, di = self.checksize()
3092 3099 if dd:
3093 3100 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3094 3101 if di:
3095 3102 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3096 3103
3097 3104 version = self._format_version
3098 3105
3099 3106 # The verifier tells us what version revlog we should be.
3100 3107 if version != state[b'expectedversion']:
3101 3108 yield revlogproblem(
3102 3109 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3103 3110 % (self.display_id, version, state[b'expectedversion'])
3104 3111 )
3105 3112
3106 3113 state[b'skipread'] = set()
3107 3114 state[b'safe_renamed'] = set()
3108 3115
3109 3116 for rev in self:
3110 3117 node = self.node(rev)
3111 3118
3112 3119 # Verify contents. 4 cases to care about:
3113 3120 #
3114 3121 # common: the most common case
3115 3122 # rename: with a rename
3116 3123 # meta: file content starts with b'\1\n', the metadata
3117 3124 # header defined in filelog.py, but without a rename
3118 3125 # ext: content stored externally
3119 3126 #
3120 3127 # More formally, their differences are shown below:
3121 3128 #
3122 3129 # | common | rename | meta | ext
3123 3130 # -------------------------------------------------------
3124 3131 # flags() | 0 | 0 | 0 | not 0
3125 3132 # renamed() | False | True | False | ?
3126 3133 # rawtext[0:2]=='\1\n'| False | True | True | ?
3127 3134 #
3128 3135 # "rawtext" means the raw text stored in revlog data, which
3129 3136 # could be retrieved by "rawdata(rev)". "text"
3130 3137 # mentioned below is "revision(rev)".
3131 3138 #
3132 3139 # There are 3 different lengths stored physically:
3133 3140 # 1. L1: rawsize, stored in revlog index
3134 3141 # 2. L2: len(rawtext), stored in revlog data
3135 3142 # 3. L3: len(text), stored in revlog data if flags==0, or
3136 3143 # possibly somewhere else if flags!=0
3137 3144 #
3138 3145 # L1 should be equal to L2. L3 could be different from them.
3139 3146 # "text" may or may not affect commit hash depending on flag
3140 3147 # processors (see flagutil.addflagprocessor).
3141 3148 #
3142 3149 # | common | rename | meta | ext
3143 3150 # -------------------------------------------------
3144 3151 # rawsize() | L1 | L1 | L1 | L1
3145 3152 # size() | L1 | L2-LM | L1(*) | L1 (?)
3146 3153 # len(rawtext) | L2 | L2 | L2 | L2
3147 3154 # len(text) | L2 | L2 | L2 | L3
3148 3155 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3149 3156 #
3150 3157 # LM: length of metadata, depending on rawtext
3151 3158 # (*): not ideal, see comment in filelog.size
3152 3159 # (?): could be "- len(meta)" if the resolved content has
3153 3160 # rename metadata
3154 3161 #
3155 3162 # Checks needed to be done:
3156 3163 # 1. length check: L1 == L2, in all cases.
3157 3164 # 2. hash check: depending on flag processor, we may need to
3158 3165 # use either "text" (external), or "rawtext" (in revlog).
3159 3166
3160 3167 try:
3161 3168 skipflags = state.get(b'skipflags', 0)
3162 3169 if skipflags:
3163 3170 skipflags &= self.flags(rev)
3164 3171
3165 3172 _verify_revision(self, skipflags, state, node)
3166 3173
3167 3174 l1 = self.rawsize(rev)
3168 3175 l2 = len(self.rawdata(node))
3169 3176
3170 3177 if l1 != l2:
3171 3178 yield revlogproblem(
3172 3179 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3173 3180 node=node,
3174 3181 )
3175 3182
3176 3183 except error.CensoredNodeError:
3177 3184 if state[b'erroroncensored']:
3178 3185 yield revlogproblem(
3179 3186 error=_(b'censored file data'), node=node
3180 3187 )
3181 3188 state[b'skipread'].add(node)
3182 3189 except Exception as e:
3183 3190 yield revlogproblem(
3184 3191 error=_(b'unpacking %s: %s')
3185 3192 % (short(node), stringutil.forcebytestr(e)),
3186 3193 node=node,
3187 3194 )
3188 3195 state[b'skipread'].add(node)
3189 3196
3190 3197 def storageinfo(
3191 3198 self,
3192 3199 exclusivefiles=False,
3193 3200 sharedfiles=False,
3194 3201 revisionscount=False,
3195 3202 trackedsize=False,
3196 3203 storedsize=False,
3197 3204 ):
3198 3205 d = {}
3199 3206
3200 3207 if exclusivefiles:
3201 3208 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3202 3209 if not self._inline:
3203 3210 d[b'exclusivefiles'].append((self.opener, self._datafile))
3204 3211
3205 3212 if sharedfiles:
3206 3213 d[b'sharedfiles'] = []
3207 3214
3208 3215 if revisionscount:
3209 3216 d[b'revisionscount'] = len(self)
3210 3217
3211 3218 if trackedsize:
3212 3219 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3213 3220
3214 3221 if storedsize:
3215 3222 d[b'storedsize'] = sum(
3216 3223 self.opener.stat(path).st_size for path in self.files()
3217 3224 )
3218 3225
3219 3226 return d
3220 3227
3221 3228 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3222 3229 if not self.hassidedata:
3223 3230 return
3224 3231 # revlog formats with sidedata support does not support inline
3225 3232 assert not self._inline
3226 3233 if not helpers[1] and not helpers[2]:
3227 3234 # Nothing to generate or remove
3228 3235 return
3229 3236
3230 3237 new_entries = []
3231 3238 # append the new sidedata
3232 3239 with self._writing(transaction):
3233 3240 ifh, dfh, sdfh = self._writinghandles
3234 3241 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3235 3242
3236 3243 current_offset = sdfh.tell()
3237 3244 for rev in range(startrev, endrev + 1):
3238 3245 entry = self.index[rev]
3239 3246 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3240 3247 store=self,
3241 3248 sidedata_helpers=helpers,
3242 3249 sidedata={},
3243 3250 rev=rev,
3244 3251 )
3245 3252
3246 3253 serialized_sidedata = sidedatautil.serialize_sidedata(
3247 3254 new_sidedata
3248 3255 )
3249 3256
3250 3257 sidedata_compression_mode = COMP_MODE_INLINE
3251 3258 if serialized_sidedata and self.hassidedata:
3252 3259 sidedata_compression_mode = COMP_MODE_PLAIN
3253 3260 h, comp_sidedata = self.compress(serialized_sidedata)
3254 3261 if (
3255 3262 h != b'u'
3256 3263 and comp_sidedata[0] != b'\0'
3257 3264 and len(comp_sidedata) < len(serialized_sidedata)
3258 3265 ):
3259 3266 assert not h
3260 3267 if (
3261 3268 comp_sidedata[0]
3262 3269 == self._docket.default_compression_header
3263 3270 ):
3264 3271 sidedata_compression_mode = COMP_MODE_DEFAULT
3265 3272 serialized_sidedata = comp_sidedata
3266 3273 else:
3267 3274 sidedata_compression_mode = COMP_MODE_INLINE
3268 3275 serialized_sidedata = comp_sidedata
3269 3276 if entry[8] != 0 or entry[9] != 0:
3270 3277 # rewriting entries that already have sidedata is not
3271 3278 # supported yet, because it introduces garbage data in the
3272 3279 # revlog.
3273 3280 msg = b"rewriting existing sidedata is not supported yet"
3274 3281 raise error.Abort(msg)
3275 3282
3276 3283 # Apply (potential) flags to add and to remove after running
3277 3284 # the sidedata helpers
3278 3285 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3279 3286 entry_update = (
3280 3287 current_offset,
3281 3288 len(serialized_sidedata),
3282 3289 new_offset_flags,
3283 3290 sidedata_compression_mode,
3284 3291 )
3285 3292
3286 3293 # the sidedata computation might have move the file cursors around
3287 3294 sdfh.seek(current_offset, os.SEEK_SET)
3288 3295 sdfh.write(serialized_sidedata)
3289 3296 new_entries.append(entry_update)
3290 3297 current_offset += len(serialized_sidedata)
3291 3298 self._docket.sidedata_end = sdfh.tell()
3292 3299
3293 3300 # rewrite the new index entries
3294 3301 ifh.seek(startrev * self.index.entry_size)
3295 3302 for i, e in enumerate(new_entries):
3296 3303 rev = startrev + i
3297 3304 self.index.replace_sidedata_info(rev, *e)
3298 3305 packed = self.index.entry_binary(rev)
3299 3306 if rev == 0 and self._docket is None:
3300 3307 header = self._format_flags | self._format_version
3301 3308 header = self.index.pack_header(header)
3302 3309 packed = header + packed
3303 3310 ifh.write(packed)
@@ -1,194 +1,178 b''
1 1 Test correctness of revlog inline -> non-inline transition
2 2 ----------------------------------------------------------
3 3
4 4 Helper extension to intercept renames.
5 5
6 6 $ cat > $TESTTMP/intercept_rename.py << EOF
7 7 > import os
8 8 > import sys
9 9 > from mercurial import extensions, util
10 10 >
11 11 > def extsetup(ui):
12 12 > def close(orig, *args, **kwargs):
13 13 > path = util.normpath(args[0]._atomictempfile__name)
14 14 > if path.endswith(b'/.hg/store/data/file.i'):
15 15 > os._exit(80)
16 16 > return orig(*args, **kwargs)
17 17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
18 18 > EOF
19 19
20 20 Test offset computation to correctly factor in the index entries themselves.
21 21 Also test that the new data size has the correct size if the transaction is aborted
22 22 after the index has been replaced.
23 23
24 24 Test repo has commits a, b, c, D, where D is large (grows the revlog enough that it
25 25 transitions to non-inline storage). The clone initially has changes a, b
26 26 and will transition to non-inline storage when adding c, D.
27 27
28 28 If the transaction adding c, D is rolled back, then we don't undo the revlog split,
29 29 but truncate the index and the data to remove both c and D.
30 30
31 31 $ hg init troffset-computation --config format.revlog-compression=none
32 32 $ cd troffset-computation
33 33 $ printf '%20d' '1' > file
34 34 $ hg commit -Aqma
35 35 $ printf '%1024d' '1' > file
36 36 $ hg commit -Aqmb
37 37 $ printf '%20d' '1' > file
38 38 $ hg commit -Aqmc
39 39 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
40 40 $ hg commit -AqmD
41 41
42 42 $ cd ..
43 43
44 44 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
45 45 $ cd troffset-computation-copy
46 46
47 47 Reference size:
48 48
49 49 $ f -s .hg/store/data/file*
50 50 .hg/store/data/file.i: size=1174
51 51
52 52 $ cat > .hg/hgrc <<EOF
53 53 > [hooks]
54 54 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
55 55 > EOF
56 56 #if chg
57 57 $ hg pull ../troffset-computation
58 58 pulling from ../troffset-computation
59 59 [255]
60 60 #else
61 61 $ hg pull ../troffset-computation
62 62 pulling from ../troffset-computation
63 63 [80]
64 64 #endif
65 65 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
66 data/file.i 192
66 data/file.i 128
67 67
68 68 The first file.i entry should match the "Reference size" above.
69 69 The first file.d entry is the temporary record during the split,
70 70 the second entry after the split happened. The sum of the second file.d
71 71 and the second file.i entry should match the first file.i entry.
72 72
73 73 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
74 74 data/file.i 1174
75 75 data/file.d 0
76 data/file.d 1067
77 data/file.i 192
76 data/file.d 1046
77 data/file.i 128
78 78 $ hg recover
79 79 rolling back interrupted transaction
80 80 (verify step skipped, run `hg verify` to check your repository content)
81 81 $ f -s .hg/store/data/file*
82 .hg/store/data/file.d: size=1067
83 .hg/store/data/file.i: size=192
82 .hg/store/data/file.d: size=1046
83 .hg/store/data/file.i: size=128
84 84 $ hg tip
85 85 changeset: 1:cfa8d6e60429
86 86 tag: tip
87 87 user: test
88 88 date: Thu Jan 01 00:00:00 1970 +0000
89 89 summary: b
90 90
91 91 $ hg verify -q
92 92 warning: revlog 'data/file.d' not in fncache!
93 file@?: rev 2 points to nonexistent changeset 2
94 (expected )
95 file@?: fa1120531cc1 not in manifests
96 2 warnings encountered!
93 1 warnings encountered!
97 94 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
98 2 integrity errors encountered!
99 [1]
100 95 $ hg debugrebuildfncache --only-data
101 96 adding data/file.d
102 97 1 items added, 0 removed from fncache
103 98 $ hg verify -q
104 file@?: rev 2 points to nonexistent changeset 2
105 (expected )
106 file@?: fa1120531cc1 not in manifests
107 1 warnings encountered!
108 2 integrity errors encountered!
109 [1]
110 99 $ cd ..
111 100
112 101
113 102 Now retry the procedure but intercept the rename of the index and check that
114 103 the journal does not contain the new index size. This demonstrates the edge case
115 104 where the data file is left as garbage.
116 105
117 106 $ hg clone -r 1 troffset-computation troffset-computation-copy2 --config format.revlog-compression=none -q
118 107 $ cd troffset-computation-copy2
119 108 $ cat > .hg/hgrc <<EOF
120 109 > [extensions]
121 110 > intercept_rename = $TESTTMP/intercept_rename.py
122 111 > [hooks]
123 112 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
124 113 > EOF
125 114 #if chg
126 115 $ hg pull ../troffset-computation
127 116 pulling from ../troffset-computation
128 117 [255]
129 118 #else
130 119 $ hg pull ../troffset-computation
131 120 pulling from ../troffset-computation
132 121 [80]
133 122 #endif
134 123 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
135 124 data/file.i 1174
136 125 data/file.d 0
137 data/file.d 1067
126 data/file.d 1046
138 127
139 128 $ hg recover
140 129 rolling back interrupted transaction
141 130 (verify step skipped, run `hg verify` to check your repository content)
142 131 $ f -s .hg/store/data/file*
143 .hg/store/data/file.d: size=1067
132 .hg/store/data/file.d: size=1046
144 133 .hg/store/data/file.i: size=1174
145 134 $ hg tip
146 135 changeset: 1:cfa8d6e60429
147 136 tag: tip
148 137 user: test
149 138 date: Thu Jan 01 00:00:00 1970 +0000
150 139 summary: b
151 140
152 141 $ hg verify -q
153 142 $ cd ..
154 143
155 144
156 145 Repeat the original test but let hg rollback the transaction.
157 146
158 147 $ hg clone -r 1 troffset-computation troffset-computation-copy-rb --config format.revlog-compression=none -q
159 148 $ cd troffset-computation-copy-rb
160 149 $ cat > .hg/hgrc <<EOF
161 150 > [hooks]
162 151 > pretxnchangegroup = false
163 152 > EOF
164 153 $ hg pull ../troffset-computation
165 154 pulling from ../troffset-computation
166 155 searching for changes
167 156 adding changesets
168 157 adding manifests
169 158 adding file changes
170 159 transaction abort!
171 160 rollback completed
172 161 abort: pretxnchangegroup hook exited with status 1
173 162 [40]
174 163 $ f -s .hg/store/data/file*
175 .hg/store/data/file.d: size=1067
176 .hg/store/data/file.i: size=192
164 .hg/store/data/file.d: size=1046
165 .hg/store/data/file.i: size=128
177 166 $ hg tip
178 167 changeset: 1:cfa8d6e60429
179 168 tag: tip
180 169 user: test
181 170 date: Thu Jan 01 00:00:00 1970 +0000
182 171 summary: b
183 172
184 173 $ hg verify -q
185 174 warning: revlog 'data/file.d' not in fncache!
186 file@?: rev 2 points to nonexistent changeset 2
187 (expected )
188 file@?: fa1120531cc1 not in manifests
189 2 warnings encountered!
175 1 warnings encountered!
190 176 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
191 2 integrity errors encountered!
192 [1]
193 177 $ cd ..
194 178
General Comments 0
You need to be logged in to leave comments. Login now