##// END OF EJS Templates
revlog: allow to pass an existing docket to `_loadindex()`...
marmoute -
r48194:f7f082bc default
parent child Browse files
Show More
@@ -1,3388 +1,3394 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 censor,
85 85 deltas as deltautil,
86 86 docket as docketutil,
87 87 flagutil,
88 88 nodemap as nodemaputil,
89 89 revlogv0,
90 90 sidedata as sidedatautil,
91 91 )
92 92 from .utils import (
93 93 storageutil,
94 94 stringutil,
95 95 )
96 96
97 97 # blanked usage of all the name to prevent pyflakes constraints
98 98 # We need these name available in the module for extensions.
99 99
100 100 REVLOGV0
101 101 REVLOGV1
102 102 REVLOGV2
103 103 FLAG_INLINE_DATA
104 104 FLAG_GENERALDELTA
105 105 REVLOG_DEFAULT_FLAGS
106 106 REVLOG_DEFAULT_FORMAT
107 107 REVLOG_DEFAULT_VERSION
108 108 REVLOGV1_FLAGS
109 109 REVLOGV2_FLAGS
110 110 REVIDX_ISCENSORED
111 111 REVIDX_ELLIPSIS
112 112 REVIDX_HASCOPIESINFO
113 113 REVIDX_EXTSTORED
114 114 REVIDX_DEFAULT_FLAGS
115 115 REVIDX_FLAGS_ORDER
116 116 REVIDX_RAWTEXT_CHANGING_FLAGS
117 117
118 118 parsers = policy.importmod('parsers')
119 119 rustancestor = policy.importrust('ancestor')
120 120 rustdagop = policy.importrust('dagop')
121 121 rustrevlog = policy.importrust('revlog')
122 122
123 123 # Aliased for performance.
124 124 _zlibdecompress = zlib.decompress
125 125
126 126 # max size of revlog with inline data
127 127 _maxinline = 131072
128 128 _chunksize = 1048576
129 129
130 130 # Flag processors for REVIDX_ELLIPSIS.
131 131 def ellipsisreadprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsiswriteprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsisrawprocessor(rl, text):
140 140 return False
141 141
142 142
143 143 ellipsisprocessor = (
144 144 ellipsisreadprocessor,
145 145 ellipsiswriteprocessor,
146 146 ellipsisrawprocessor,
147 147 )
148 148
149 149
150 150 def _verify_revision(rl, skipflags, state, node):
151 151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 152 point for extensions to influence the operation."""
153 153 if skipflags:
154 154 state[b'skipread'].add(node)
155 155 else:
156 156 # Side-effect: read content and verify hash.
157 157 rl.revision(node)
158 158
159 159
160 160 # True if a fast implementation for persistent-nodemap is available
161 161 #
162 162 # We also consider we have a "fast" implementation in "pure" python because
163 163 # people using pure don't really have performance consideration (and a
164 164 # wheelbarrow of other slowness source)
165 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 166 parsers, 'BaseIndexObject'
167 167 )
168 168
169 169
170 170 @interfaceutil.implementer(repository.irevisiondelta)
171 171 @attr.s(slots=True)
172 172 class revlogrevisiondelta(object):
173 173 node = attr.ib()
174 174 p1node = attr.ib()
175 175 p2node = attr.ib()
176 176 basenode = attr.ib()
177 177 flags = attr.ib()
178 178 baserevisionsize = attr.ib()
179 179 revision = attr.ib()
180 180 delta = attr.ib()
181 181 sidedata = attr.ib()
182 182 protocol_flags = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 def parse_index_v1(data, inline):
195 195 # call the C implementation to parse the index data
196 196 index, cache = parsers.parse_index2(data, inline)
197 197 return index, cache
198 198
199 199
200 200 def parse_index_v2(data, inline):
201 201 # call the C implementation to parse the index data
202 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 203 return index, cache
204 204
205 205
206 206 def parse_index_cl_v2(data, inline):
207 207 # call the C implementation to parse the index data
208 208 assert not inline
209 209 from .pure.parsers import parse_index_cl_v2
210 210
211 211 index, cache = parse_index_cl_v2(data)
212 212 return index, cache
213 213
214 214
215 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 216
217 217 def parse_index_v1_nodemap(data, inline):
218 218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 219 return index, cache
220 220
221 221
222 222 else:
223 223 parse_index_v1_nodemap = None
224 224
225 225
226 226 def parse_index_v1_mixed(data, inline):
227 227 index, cache = parse_index_v1(data, inline)
228 228 return rustrevlog.MixedIndex(index), cache
229 229
230 230
231 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 232 # signed integer)
233 233 _maxentrysize = 0x7FFFFFFF
234 234
235 235 PARTIAL_READ_MSG = _(
236 236 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
237 237 )
238 238
239 239 FILE_TOO_SHORT_MSG = _(
240 240 b'cannot read from revlog %s;'
241 241 b' expected %d bytes from offset %d, data size is %d'
242 242 )
243 243
244 244
245 245 class revlog(object):
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 def __init__(
294 294 self,
295 295 opener,
296 296 target,
297 297 radix,
298 298 postfix=None, # only exist for `tmpcensored` now
299 299 checkambig=False,
300 300 mmaplargeindex=False,
301 301 censorable=False,
302 302 upperboundcomp=None,
303 303 persistentnodemap=False,
304 304 concurrencychecker=None,
305 305 trypending=False,
306 306 ):
307 307 """
308 308 create a revlog object
309 309
310 310 opener is a function that abstracts the file opening operation
311 311 and can be used to implement COW semantics or the like.
312 312
313 313 `target`: a (KIND, ID) tuple that identify the content stored in
314 314 this revlog. It help the rest of the code to understand what the revlog
315 315 is about without having to resort to heuristic and index filename
316 316 analysis. Note: that this must be reliably be set by normal code, but
317 317 that test, debug, or performance measurement code might not set this to
318 318 accurate value.
319 319 """
320 320 self.upperboundcomp = upperboundcomp
321 321
322 322 self.radix = radix
323 323
324 324 self._docket_file = None
325 325 self._indexfile = None
326 326 self._datafile = None
327 327 self._sidedatafile = None
328 328 self._nodemap_file = None
329 329 self.postfix = postfix
330 330 self._trypending = trypending
331 331 self.opener = opener
332 332 if persistentnodemap:
333 333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334 334
335 335 assert target[0] in ALL_KINDS
336 336 assert len(target) == 2
337 337 self.target = target
338 338 # When True, indexfile is opened with checkambig=True at writing, to
339 339 # avoid file stat ambiguity.
340 340 self._checkambig = checkambig
341 341 self._mmaplargeindex = mmaplargeindex
342 342 self._censorable = censorable
343 343 # 3-tuple of (node, rev, text) for a raw revision.
344 344 self._revisioncache = None
345 345 # Maps rev to chain base rev.
346 346 self._chainbasecache = util.lrucachedict(100)
347 347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 348 self._chunkcache = (0, b'')
349 349 # How much data to read and cache into the raw revlog data cache.
350 350 self._chunkcachesize = 65536
351 351 self._maxchainlen = None
352 352 self._deltabothparents = True
353 353 self.index = None
354 354 self._docket = None
355 355 self._nodemap_docket = None
356 356 # Mapping of partial identifiers to full nodes.
357 357 self._pcache = {}
358 358 # Mapping of revision integer to full node.
359 359 self._compengine = b'zlib'
360 360 self._compengineopts = {}
361 361 self._maxdeltachainspan = -1
362 362 self._withsparseread = False
363 363 self._sparserevlog = False
364 364 self.hassidedata = False
365 365 self._srdensitythreshold = 0.50
366 366 self._srmingapsize = 262144
367 367
368 368 # Make copy of flag processors so each revlog instance can support
369 369 # custom flags.
370 370 self._flagprocessors = dict(flagutil.flagprocessors)
371 371
372 372 # 3-tuple of file handles being used for active writing.
373 373 self._writinghandles = None
374 374 # prevent nesting of addgroup
375 375 self._adding_group = None
376 376
377 377 self._loadindex()
378 378
379 379 self._concurrencychecker = concurrencychecker
380 380
381 381 def _init_opts(self):
382 382 """process options (from above/config) to setup associated default revlog mode
383 383
384 384 These values might be affected when actually reading on disk information.
385 385
386 386 The relevant values are returned for use in _loadindex().
387 387
388 388 * newversionflags:
389 389 version header to use if we need to create a new revlog
390 390
391 391 * mmapindexthreshold:
392 392 minimal index size for start to use mmap
393 393
394 394 * force_nodemap:
395 395 force the usage of a "development" version of the nodemap code
396 396 """
397 397 mmapindexthreshold = None
398 398 opts = self.opener.options
399 399
400 400 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
401 401 new_header = CHANGELOGV2
402 402 elif b'revlogv2' in opts:
403 403 new_header = REVLOGV2
404 404 elif b'revlogv1' in opts:
405 405 new_header = REVLOGV1 | FLAG_INLINE_DATA
406 406 if b'generaldelta' in opts:
407 407 new_header |= FLAG_GENERALDELTA
408 408 elif b'revlogv0' in self.opener.options:
409 409 new_header = REVLOGV0
410 410 else:
411 411 new_header = REVLOG_DEFAULT_VERSION
412 412
413 413 if b'chunkcachesize' in opts:
414 414 self._chunkcachesize = opts[b'chunkcachesize']
415 415 if b'maxchainlen' in opts:
416 416 self._maxchainlen = opts[b'maxchainlen']
417 417 if b'deltabothparents' in opts:
418 418 self._deltabothparents = opts[b'deltabothparents']
419 419 self._lazydelta = bool(opts.get(b'lazydelta', True))
420 420 self._lazydeltabase = False
421 421 if self._lazydelta:
422 422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
423 423 if b'compengine' in opts:
424 424 self._compengine = opts[b'compengine']
425 425 if b'zlib.level' in opts:
426 426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
427 427 if b'zstd.level' in opts:
428 428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
429 429 if b'maxdeltachainspan' in opts:
430 430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
431 431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
432 432 mmapindexthreshold = opts[b'mmapindexthreshold']
433 433 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
434 434 withsparseread = bool(opts.get(b'with-sparse-read', False))
435 435 # sparse-revlog forces sparse-read
436 436 self._withsparseread = self._sparserevlog or withsparseread
437 437 if b'sparse-read-density-threshold' in opts:
438 438 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
439 439 if b'sparse-read-min-gap-size' in opts:
440 440 self._srmingapsize = opts[b'sparse-read-min-gap-size']
441 441 if opts.get(b'enableellipsis'):
442 442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
443 443
444 444 # revlog v0 doesn't have flag processors
445 445 for flag, processor in pycompat.iteritems(
446 446 opts.get(b'flagprocessors', {})
447 447 ):
448 448 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
449 449
450 450 if self._chunkcachesize <= 0:
451 451 raise error.RevlogError(
452 452 _(b'revlog chunk cache size %r is not greater than 0')
453 453 % self._chunkcachesize
454 454 )
455 455 elif self._chunkcachesize & (self._chunkcachesize - 1):
456 456 raise error.RevlogError(
457 457 _(b'revlog chunk cache size %r is not a power of 2')
458 458 % self._chunkcachesize
459 459 )
460 460 force_nodemap = opts.get(b'devel-force-nodemap', False)
461 461 return new_header, mmapindexthreshold, force_nodemap
462 462
463 463 def _get_data(self, filepath, mmap_threshold, size=None):
464 464 """return a file content with or without mmap
465 465
466 466 If the file is missing return the empty string"""
467 467 try:
468 468 with self.opener(filepath) as fp:
469 469 if mmap_threshold is not None:
470 470 file_size = self.opener.fstat(fp).st_size
471 471 if file_size >= mmap_threshold:
472 472 if size is not None:
473 473 # avoid potentiel mmap crash
474 474 size = min(file_size, size)
475 475 # TODO: should .close() to release resources without
476 476 # relying on Python GC
477 477 if size is None:
478 478 return util.buffer(util.mmapread(fp))
479 479 else:
480 480 return util.buffer(util.mmapread(fp, size))
481 481 if size is None:
482 482 return fp.read()
483 483 else:
484 484 return fp.read(size)
485 485 except IOError as inst:
486 486 if inst.errno != errno.ENOENT:
487 487 raise
488 488 return b''
489 489
490 def _loadindex(self):
490 def _loadindex(self, docket=None):
491 491
492 492 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
493 493
494 494 if self.postfix is not None:
495 495 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
496 496 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
497 497 entry_point = b'%s.i.a' % self.radix
498 498 else:
499 499 entry_point = b'%s.i' % self.radix
500 500
501 entry_data = b''
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
501 if docket is not None:
502 self._docket = docket
503 self._docket_file = entry_point
507 504 else:
508 header = new_header
509
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
523
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
528
529 if not features[b'docket']:
530 self._indexfile = entry_point
531 index_data = entry_data
532 else:
533 self._docket_file = entry_point
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
505 entry_data = b''
506 self._initempty = True
507 entry_data = self._get_data(entry_point, mmapindexthreshold)
508 if len(entry_data) > 0:
509 header = INDEX_HEADER.unpack(entry_data[:4])[0]
510 self._initempty = False
536 511 else:
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
539 )
512 header = new_header
513
514 self._format_flags = header & ~0xFFFF
515 self._format_version = header & 0xFFFF
516
517 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
518 if supported_flags is None:
519 msg = _(b'unknown version (%d) in revlog %s')
520 msg %= (self._format_version, self.display_id)
521 raise error.RevlogError(msg)
522 elif self._format_flags & ~supported_flags:
523 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
524 display_flag = self._format_flags >> 16
525 msg %= (display_flag, self._format_version, self.display_id)
526 raise error.RevlogError(msg)
527
528 features = FEATURES_BY_VERSION[self._format_version]
529 self._inline = features[b'inline'](self._format_flags)
530 self._generaldelta = features[b'generaldelta'](self._format_flags)
531 self.hassidedata = features[b'sidedata']
532
533 if not features[b'docket']:
534 self._indexfile = entry_point
535 index_data = entry_data
536 else:
537 self._docket_file = entry_point
538 if self._initempty:
539 self._docket = docketutil.default_docket(self, header)
540 else:
541 self._docket = docketutil.parse_docket(
542 self, entry_data, use_pending=self._trypending
543 )
544
545 if self._docket is not None:
540 546 self._indexfile = self._docket.index_filepath()
541 547 index_data = b''
542 548 index_size = self._docket.index_end
543 549 if index_size > 0:
544 550 index_data = self._get_data(
545 551 self._indexfile, mmapindexthreshold, size=index_size
546 552 )
547 553 if len(index_data) < index_size:
548 554 msg = _(b'too few index data for %s: got %d, expected %d')
549 555 msg %= (self.display_id, len(index_data), index_size)
550 556 raise error.RevlogError(msg)
551 557
552 558 self._inline = False
553 559 # generaldelta implied by version 2 revlogs.
554 560 self._generaldelta = True
555 561 # the logic for persistent nodemap will be dealt with within the
556 562 # main docket, so disable it for now.
557 563 self._nodemap_file = None
558 564
559 565 if self._docket is not None:
560 566 self._datafile = self._docket.data_filepath()
561 567 self._sidedatafile = self._docket.sidedata_filepath()
562 568 elif self.postfix is None:
563 569 self._datafile = b'%s.d' % self.radix
564 570 else:
565 571 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
566 572
567 573 self.nodeconstants = sha1nodeconstants
568 574 self.nullid = self.nodeconstants.nullid
569 575
570 576 # sparse-revlog can't be on without general-delta (issue6056)
571 577 if not self._generaldelta:
572 578 self._sparserevlog = False
573 579
574 580 self._storedeltachains = True
575 581
576 582 devel_nodemap = (
577 583 self._nodemap_file
578 584 and force_nodemap
579 585 and parse_index_v1_nodemap is not None
580 586 )
581 587
582 588 use_rust_index = False
583 589 if rustrevlog is not None:
584 590 if self._nodemap_file is not None:
585 591 use_rust_index = True
586 592 else:
587 593 use_rust_index = self.opener.options.get(b'rust.index')
588 594
589 595 self._parse_index = parse_index_v1
590 596 if self._format_version == REVLOGV0:
591 597 self._parse_index = revlogv0.parse_index_v0
592 598 elif self._format_version == REVLOGV2:
593 599 self._parse_index = parse_index_v2
594 600 elif self._format_version == CHANGELOGV2:
595 601 self._parse_index = parse_index_cl_v2
596 602 elif devel_nodemap:
597 603 self._parse_index = parse_index_v1_nodemap
598 604 elif use_rust_index:
599 605 self._parse_index = parse_index_v1_mixed
600 606 try:
601 607 d = self._parse_index(index_data, self._inline)
602 608 index, _chunkcache = d
603 609 use_nodemap = (
604 610 not self._inline
605 611 and self._nodemap_file is not None
606 612 and util.safehasattr(index, 'update_nodemap_data')
607 613 )
608 614 if use_nodemap:
609 615 nodemap_data = nodemaputil.persisted_data(self)
610 616 if nodemap_data is not None:
611 617 docket = nodemap_data[0]
612 618 if (
613 619 len(d[0]) > docket.tip_rev
614 620 and d[0][docket.tip_rev][7] == docket.tip_node
615 621 ):
616 622 # no changelog tampering
617 623 self._nodemap_docket = docket
618 624 index.update_nodemap_data(*nodemap_data)
619 625 except (ValueError, IndexError):
620 626 raise error.RevlogError(
621 627 _(b"index %s is corrupted") % self.display_id
622 628 )
623 629 self.index, self._chunkcache = d
624 630 if not self._chunkcache:
625 631 self._chunkclear()
626 632 # revnum -> (chain-length, sum-delta-length)
627 633 self._chaininfocache = util.lrucachedict(500)
628 634 # revlog header -> revlog compressor
629 635 self._decompressors = {}
630 636
631 637 @util.propertycache
632 638 def revlog_kind(self):
633 639 return self.target[0]
634 640
635 641 @util.propertycache
636 642 def display_id(self):
637 643 """The public facing "ID" of the revlog that we use in message"""
638 644 # Maybe we should build a user facing representation of
639 645 # revlog.target instead of using `self.radix`
640 646 return self.radix
641 647
642 648 def _get_decompressor(self, t):
643 649 try:
644 650 compressor = self._decompressors[t]
645 651 except KeyError:
646 652 try:
647 653 engine = util.compengines.forrevlogheader(t)
648 654 compressor = engine.revlogcompressor(self._compengineopts)
649 655 self._decompressors[t] = compressor
650 656 except KeyError:
651 657 raise error.RevlogError(
652 658 _(b'unknown compression type %s') % binascii.hexlify(t)
653 659 )
654 660 return compressor
655 661
656 662 @util.propertycache
657 663 def _compressor(self):
658 664 engine = util.compengines[self._compengine]
659 665 return engine.revlogcompressor(self._compengineopts)
660 666
661 667 @util.propertycache
662 668 def _decompressor(self):
663 669 """the default decompressor"""
664 670 if self._docket is None:
665 671 return None
666 672 t = self._docket.default_compression_header
667 673 c = self._get_decompressor(t)
668 674 return c.decompress
669 675
670 676 def _indexfp(self):
671 677 """file object for the revlog's index file"""
672 678 return self.opener(self._indexfile, mode=b"r")
673 679
674 680 def __index_write_fp(self):
675 681 # You should not use this directly and use `_writing` instead
676 682 try:
677 683 f = self.opener(
678 684 self._indexfile, mode=b"r+", checkambig=self._checkambig
679 685 )
680 686 if self._docket is None:
681 687 f.seek(0, os.SEEK_END)
682 688 else:
683 689 f.seek(self._docket.index_end, os.SEEK_SET)
684 690 return f
685 691 except IOError as inst:
686 692 if inst.errno != errno.ENOENT:
687 693 raise
688 694 return self.opener(
689 695 self._indexfile, mode=b"w+", checkambig=self._checkambig
690 696 )
691 697
692 698 def __index_new_fp(self):
693 699 # You should not use this unless you are upgrading from inline revlog
694 700 return self.opener(
695 701 self._indexfile,
696 702 mode=b"w",
697 703 checkambig=self._checkambig,
698 704 atomictemp=True,
699 705 )
700 706
701 707 def _datafp(self, mode=b'r'):
702 708 """file object for the revlog's data file"""
703 709 return self.opener(self._datafile, mode=mode)
704 710
705 711 @contextlib.contextmanager
706 712 def _datareadfp(self, existingfp=None):
707 713 """file object suitable to read data"""
708 714 # Use explicit file handle, if given.
709 715 if existingfp is not None:
710 716 yield existingfp
711 717
712 718 # Use a file handle being actively used for writes, if available.
713 719 # There is some danger to doing this because reads will seek the
714 720 # file. However, _writeentry() performs a SEEK_END before all writes,
715 721 # so we should be safe.
716 722 elif self._writinghandles:
717 723 if self._inline:
718 724 yield self._writinghandles[0]
719 725 else:
720 726 yield self._writinghandles[1]
721 727
722 728 # Otherwise open a new file handle.
723 729 else:
724 730 if self._inline:
725 731 func = self._indexfp
726 732 else:
727 733 func = self._datafp
728 734 with func() as fp:
729 735 yield fp
730 736
731 737 @contextlib.contextmanager
732 738 def _sidedatareadfp(self):
733 739 """file object suitable to read sidedata"""
734 740 if self._writinghandles:
735 741 yield self._writinghandles[2]
736 742 else:
737 743 with self.opener(self._sidedatafile) as fp:
738 744 yield fp
739 745
740 746 def tiprev(self):
741 747 return len(self.index) - 1
742 748
743 749 def tip(self):
744 750 return self.node(self.tiprev())
745 751
746 752 def __contains__(self, rev):
747 753 return 0 <= rev < len(self)
748 754
749 755 def __len__(self):
750 756 return len(self.index)
751 757
752 758 def __iter__(self):
753 759 return iter(pycompat.xrange(len(self)))
754 760
755 761 def revs(self, start=0, stop=None):
756 762 """iterate over all rev in this revlog (from start to stop)"""
757 763 return storageutil.iterrevs(len(self), start=start, stop=stop)
758 764
759 765 @property
760 766 def nodemap(self):
761 767 msg = (
762 768 b"revlog.nodemap is deprecated, "
763 769 b"use revlog.index.[has_node|rev|get_rev]"
764 770 )
765 771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
766 772 return self.index.nodemap
767 773
768 774 @property
769 775 def _nodecache(self):
770 776 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
771 777 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 778 return self.index.nodemap
773 779
774 780 def hasnode(self, node):
775 781 try:
776 782 self.rev(node)
777 783 return True
778 784 except KeyError:
779 785 return False
780 786
781 787 def candelta(self, baserev, rev):
782 788 """whether two revisions (baserev, rev) can be delta-ed or not"""
783 789 # Disable delta if either rev requires a content-changing flag
784 790 # processor (ex. LFS). This is because such flag processor can alter
785 791 # the rawtext content that the delta will be based on, and two clients
786 792 # could have a same revlog node with different flags (i.e. different
787 793 # rawtext contents) and the delta could be incompatible.
788 794 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
789 795 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
790 796 ):
791 797 return False
792 798 return True
793 799
794 800 def update_caches(self, transaction):
795 801 if self._nodemap_file is not None:
796 802 if transaction is None:
797 803 nodemaputil.update_persistent_nodemap(self)
798 804 else:
799 805 nodemaputil.setup_persistent_nodemap(transaction, self)
800 806
801 807 def clearcaches(self):
802 808 self._revisioncache = None
803 809 self._chainbasecache.clear()
804 810 self._chunkcache = (0, b'')
805 811 self._pcache = {}
806 812 self._nodemap_docket = None
807 813 self.index.clearcaches()
808 814 # The python code is the one responsible for validating the docket, we
809 815 # end up having to refresh it here.
810 816 use_nodemap = (
811 817 not self._inline
812 818 and self._nodemap_file is not None
813 819 and util.safehasattr(self.index, 'update_nodemap_data')
814 820 )
815 821 if use_nodemap:
816 822 nodemap_data = nodemaputil.persisted_data(self)
817 823 if nodemap_data is not None:
818 824 self._nodemap_docket = nodemap_data[0]
819 825 self.index.update_nodemap_data(*nodemap_data)
820 826
821 827 def rev(self, node):
822 828 try:
823 829 return self.index.rev(node)
824 830 except TypeError:
825 831 raise
826 832 except error.RevlogError:
827 833 # parsers.c radix tree lookup failed
828 834 if (
829 835 node == self.nodeconstants.wdirid
830 836 or node in self.nodeconstants.wdirfilenodeids
831 837 ):
832 838 raise error.WdirUnsupported
833 839 raise error.LookupError(node, self.display_id, _(b'no node'))
834 840
835 841 # Accessors for index entries.
836 842
837 843 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
838 844 # are flags.
839 845 def start(self, rev):
840 846 return int(self.index[rev][0] >> 16)
841 847
842 848 def sidedata_cut_off(self, rev):
843 849 sd_cut_off = self.index[rev][8]
844 850 if sd_cut_off != 0:
845 851 return sd_cut_off
846 852 # This is some annoying dance, because entries without sidedata
847 853 # currently use 0 as their ofsset. (instead of previous-offset +
848 854 # previous-size)
849 855 #
850 856 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
851 857 # In the meantime, we need this.
852 858 while 0 <= rev:
853 859 e = self.index[rev]
854 860 if e[9] != 0:
855 861 return e[8] + e[9]
856 862 rev -= 1
857 863 return 0
858 864
859 865 def flags(self, rev):
860 866 return self.index[rev][0] & 0xFFFF
861 867
862 868 def length(self, rev):
863 869 return self.index[rev][1]
864 870
865 871 def sidedata_length(self, rev):
866 872 if not self.hassidedata:
867 873 return 0
868 874 return self.index[rev][9]
869 875
870 876 def rawsize(self, rev):
871 877 """return the length of the uncompressed text for a given revision"""
872 878 l = self.index[rev][2]
873 879 if l >= 0:
874 880 return l
875 881
876 882 t = self.rawdata(rev)
877 883 return len(t)
878 884
879 885 def size(self, rev):
880 886 """length of non-raw text (processed by a "read" flag processor)"""
881 887 # fast path: if no "read" flag processor could change the content,
882 888 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 889 flags = self.flags(rev)
884 890 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 891 return self.rawsize(rev)
886 892
887 893 return len(self.revision(rev, raw=False))
888 894
889 895 def chainbase(self, rev):
890 896 base = self._chainbasecache.get(rev)
891 897 if base is not None:
892 898 return base
893 899
894 900 index = self.index
895 901 iterrev = rev
896 902 base = index[iterrev][3]
897 903 while base != iterrev:
898 904 iterrev = base
899 905 base = index[iterrev][3]
900 906
901 907 self._chainbasecache[rev] = base
902 908 return base
903 909
904 910 def linkrev(self, rev):
905 911 return self.index[rev][4]
906 912
907 913 def parentrevs(self, rev):
908 914 try:
909 915 entry = self.index[rev]
910 916 except IndexError:
911 917 if rev == wdirrev:
912 918 raise error.WdirUnsupported
913 919 raise
914 920 if entry[5] == nullrev:
915 921 return entry[6], entry[5]
916 922 else:
917 923 return entry[5], entry[6]
918 924
919 925 # fast parentrevs(rev) where rev isn't filtered
920 926 _uncheckedparentrevs = parentrevs
921 927
922 928 def node(self, rev):
923 929 try:
924 930 return self.index[rev][7]
925 931 except IndexError:
926 932 if rev == wdirrev:
927 933 raise error.WdirUnsupported
928 934 raise
929 935
930 936 # Derived from index values.
931 937
932 938 def end(self, rev):
933 939 return self.start(rev) + self.length(rev)
934 940
935 941 def parents(self, node):
936 942 i = self.index
937 943 d = i[self.rev(node)]
938 944 # inline node() to avoid function call overhead
939 945 if d[5] == self.nullid:
940 946 return i[d[6]][7], i[d[5]][7]
941 947 else:
942 948 return i[d[5]][7], i[d[6]][7]
943 949
944 950 def chainlen(self, rev):
945 951 return self._chaininfo(rev)[0]
946 952
947 953 def _chaininfo(self, rev):
948 954 chaininfocache = self._chaininfocache
949 955 if rev in chaininfocache:
950 956 return chaininfocache[rev]
951 957 index = self.index
952 958 generaldelta = self._generaldelta
953 959 iterrev = rev
954 960 e = index[iterrev]
955 961 clen = 0
956 962 compresseddeltalen = 0
957 963 while iterrev != e[3]:
958 964 clen += 1
959 965 compresseddeltalen += e[1]
960 966 if generaldelta:
961 967 iterrev = e[3]
962 968 else:
963 969 iterrev -= 1
964 970 if iterrev in chaininfocache:
965 971 t = chaininfocache[iterrev]
966 972 clen += t[0]
967 973 compresseddeltalen += t[1]
968 974 break
969 975 e = index[iterrev]
970 976 else:
971 977 # Add text length of base since decompressing that also takes
972 978 # work. For cache hits the length is already included.
973 979 compresseddeltalen += e[1]
974 980 r = (clen, compresseddeltalen)
975 981 chaininfocache[rev] = r
976 982 return r
977 983
978 984 def _deltachain(self, rev, stoprev=None):
979 985 """Obtain the delta chain for a revision.
980 986
981 987 ``stoprev`` specifies a revision to stop at. If not specified, we
982 988 stop at the base of the chain.
983 989
984 990 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
985 991 revs in ascending order and ``stopped`` is a bool indicating whether
986 992 ``stoprev`` was hit.
987 993 """
988 994 # Try C implementation.
989 995 try:
990 996 return self.index.deltachain(rev, stoprev, self._generaldelta)
991 997 except AttributeError:
992 998 pass
993 999
994 1000 chain = []
995 1001
996 1002 # Alias to prevent attribute lookup in tight loop.
997 1003 index = self.index
998 1004 generaldelta = self._generaldelta
999 1005
1000 1006 iterrev = rev
1001 1007 e = index[iterrev]
1002 1008 while iterrev != e[3] and iterrev != stoprev:
1003 1009 chain.append(iterrev)
1004 1010 if generaldelta:
1005 1011 iterrev = e[3]
1006 1012 else:
1007 1013 iterrev -= 1
1008 1014 e = index[iterrev]
1009 1015
1010 1016 if iterrev == stoprev:
1011 1017 stopped = True
1012 1018 else:
1013 1019 chain.append(iterrev)
1014 1020 stopped = False
1015 1021
1016 1022 chain.reverse()
1017 1023 return chain, stopped
1018 1024
1019 1025 def ancestors(self, revs, stoprev=0, inclusive=False):
1020 1026 """Generate the ancestors of 'revs' in reverse revision order.
1021 1027 Does not generate revs lower than stoprev.
1022 1028
1023 1029 See the documentation for ancestor.lazyancestors for more details."""
1024 1030
1025 1031 # first, make sure start revisions aren't filtered
1026 1032 revs = list(revs)
1027 1033 checkrev = self.node
1028 1034 for r in revs:
1029 1035 checkrev(r)
1030 1036 # and we're sure ancestors aren't filtered as well
1031 1037
1032 1038 if rustancestor is not None and self.index.rust_ext_compat:
1033 1039 lazyancestors = rustancestor.LazyAncestors
1034 1040 arg = self.index
1035 1041 else:
1036 1042 lazyancestors = ancestor.lazyancestors
1037 1043 arg = self._uncheckedparentrevs
1038 1044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1039 1045
1040 1046 def descendants(self, revs):
1041 1047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1042 1048
1043 1049 def findcommonmissing(self, common=None, heads=None):
1044 1050 """Return a tuple of the ancestors of common and the ancestors of heads
1045 1051 that are not ancestors of common. In revset terminology, we return the
1046 1052 tuple:
1047 1053
1048 1054 ::common, (::heads) - (::common)
1049 1055
1050 1056 The list is sorted by revision number, meaning it is
1051 1057 topologically sorted.
1052 1058
1053 1059 'heads' and 'common' are both lists of node IDs. If heads is
1054 1060 not supplied, uses all of the revlog's heads. If common is not
1055 1061 supplied, uses nullid."""
1056 1062 if common is None:
1057 1063 common = [self.nullid]
1058 1064 if heads is None:
1059 1065 heads = self.heads()
1060 1066
1061 1067 common = [self.rev(n) for n in common]
1062 1068 heads = [self.rev(n) for n in heads]
1063 1069
1064 1070 # we want the ancestors, but inclusive
1065 1071 class lazyset(object):
1066 1072 def __init__(self, lazyvalues):
1067 1073 self.addedvalues = set()
1068 1074 self.lazyvalues = lazyvalues
1069 1075
1070 1076 def __contains__(self, value):
1071 1077 return value in self.addedvalues or value in self.lazyvalues
1072 1078
1073 1079 def __iter__(self):
1074 1080 added = self.addedvalues
1075 1081 for r in added:
1076 1082 yield r
1077 1083 for r in self.lazyvalues:
1078 1084 if not r in added:
1079 1085 yield r
1080 1086
1081 1087 def add(self, value):
1082 1088 self.addedvalues.add(value)
1083 1089
1084 1090 def update(self, values):
1085 1091 self.addedvalues.update(values)
1086 1092
1087 1093 has = lazyset(self.ancestors(common))
1088 1094 has.add(nullrev)
1089 1095 has.update(common)
1090 1096
1091 1097 # take all ancestors from heads that aren't in has
1092 1098 missing = set()
1093 1099 visit = collections.deque(r for r in heads if r not in has)
1094 1100 while visit:
1095 1101 r = visit.popleft()
1096 1102 if r in missing:
1097 1103 continue
1098 1104 else:
1099 1105 missing.add(r)
1100 1106 for p in self.parentrevs(r):
1101 1107 if p not in has:
1102 1108 visit.append(p)
1103 1109 missing = list(missing)
1104 1110 missing.sort()
1105 1111 return has, [self.node(miss) for miss in missing]
1106 1112
1107 1113 def incrementalmissingrevs(self, common=None):
1108 1114 """Return an object that can be used to incrementally compute the
1109 1115 revision numbers of the ancestors of arbitrary sets that are not
1110 1116 ancestors of common. This is an ancestor.incrementalmissingancestors
1111 1117 object.
1112 1118
1113 1119 'common' is a list of revision numbers. If common is not supplied, uses
1114 1120 nullrev.
1115 1121 """
1116 1122 if common is None:
1117 1123 common = [nullrev]
1118 1124
1119 1125 if rustancestor is not None and self.index.rust_ext_compat:
1120 1126 return rustancestor.MissingAncestors(self.index, common)
1121 1127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1122 1128
1123 1129 def findmissingrevs(self, common=None, heads=None):
1124 1130 """Return the revision numbers of the ancestors of heads that
1125 1131 are not ancestors of common.
1126 1132
1127 1133 More specifically, return a list of revision numbers corresponding to
1128 1134 nodes N such that every N satisfies the following constraints:
1129 1135
1130 1136 1. N is an ancestor of some node in 'heads'
1131 1137 2. N is not an ancestor of any node in 'common'
1132 1138
1133 1139 The list is sorted by revision number, meaning it is
1134 1140 topologically sorted.
1135 1141
1136 1142 'heads' and 'common' are both lists of revision numbers. If heads is
1137 1143 not supplied, uses all of the revlog's heads. If common is not
1138 1144 supplied, uses nullid."""
1139 1145 if common is None:
1140 1146 common = [nullrev]
1141 1147 if heads is None:
1142 1148 heads = self.headrevs()
1143 1149
1144 1150 inc = self.incrementalmissingrevs(common=common)
1145 1151 return inc.missingancestors(heads)
1146 1152
1147 1153 def findmissing(self, common=None, heads=None):
1148 1154 """Return the ancestors of heads that are not ancestors of common.
1149 1155
1150 1156 More specifically, return a list of nodes N such that every N
1151 1157 satisfies the following constraints:
1152 1158
1153 1159 1. N is an ancestor of some node in 'heads'
1154 1160 2. N is not an ancestor of any node in 'common'
1155 1161
1156 1162 The list is sorted by revision number, meaning it is
1157 1163 topologically sorted.
1158 1164
1159 1165 'heads' and 'common' are both lists of node IDs. If heads is
1160 1166 not supplied, uses all of the revlog's heads. If common is not
1161 1167 supplied, uses nullid."""
1162 1168 if common is None:
1163 1169 common = [self.nullid]
1164 1170 if heads is None:
1165 1171 heads = self.heads()
1166 1172
1167 1173 common = [self.rev(n) for n in common]
1168 1174 heads = [self.rev(n) for n in heads]
1169 1175
1170 1176 inc = self.incrementalmissingrevs(common=common)
1171 1177 return [self.node(r) for r in inc.missingancestors(heads)]
1172 1178
1173 1179 def nodesbetween(self, roots=None, heads=None):
1174 1180 """Return a topological path from 'roots' to 'heads'.
1175 1181
1176 1182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1177 1183 topologically sorted list of all nodes N that satisfy both of
1178 1184 these constraints:
1179 1185
1180 1186 1. N is a descendant of some node in 'roots'
1181 1187 2. N is an ancestor of some node in 'heads'
1182 1188
1183 1189 Every node is considered to be both a descendant and an ancestor
1184 1190 of itself, so every reachable node in 'roots' and 'heads' will be
1185 1191 included in 'nodes'.
1186 1192
1187 1193 'outroots' is the list of reachable nodes in 'roots', i.e., the
1188 1194 subset of 'roots' that is returned in 'nodes'. Likewise,
1189 1195 'outheads' is the subset of 'heads' that is also in 'nodes'.
1190 1196
1191 1197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1192 1198 unspecified, uses nullid as the only root. If 'heads' is
1193 1199 unspecified, uses list of all of the revlog's heads."""
1194 1200 nonodes = ([], [], [])
1195 1201 if roots is not None:
1196 1202 roots = list(roots)
1197 1203 if not roots:
1198 1204 return nonodes
1199 1205 lowestrev = min([self.rev(n) for n in roots])
1200 1206 else:
1201 1207 roots = [self.nullid] # Everybody's a descendant of nullid
1202 1208 lowestrev = nullrev
1203 1209 if (lowestrev == nullrev) and (heads is None):
1204 1210 # We want _all_ the nodes!
1205 1211 return (
1206 1212 [self.node(r) for r in self],
1207 1213 [self.nullid],
1208 1214 list(self.heads()),
1209 1215 )
1210 1216 if heads is None:
1211 1217 # All nodes are ancestors, so the latest ancestor is the last
1212 1218 # node.
1213 1219 highestrev = len(self) - 1
1214 1220 # Set ancestors to None to signal that every node is an ancestor.
1215 1221 ancestors = None
1216 1222 # Set heads to an empty dictionary for later discovery of heads
1217 1223 heads = {}
1218 1224 else:
1219 1225 heads = list(heads)
1220 1226 if not heads:
1221 1227 return nonodes
1222 1228 ancestors = set()
1223 1229 # Turn heads into a dictionary so we can remove 'fake' heads.
1224 1230 # Also, later we will be using it to filter out the heads we can't
1225 1231 # find from roots.
1226 1232 heads = dict.fromkeys(heads, False)
1227 1233 # Start at the top and keep marking parents until we're done.
1228 1234 nodestotag = set(heads)
1229 1235 # Remember where the top was so we can use it as a limit later.
1230 1236 highestrev = max([self.rev(n) for n in nodestotag])
1231 1237 while nodestotag:
1232 1238 # grab a node to tag
1233 1239 n = nodestotag.pop()
1234 1240 # Never tag nullid
1235 1241 if n == self.nullid:
1236 1242 continue
1237 1243 # A node's revision number represents its place in a
1238 1244 # topologically sorted list of nodes.
1239 1245 r = self.rev(n)
1240 1246 if r >= lowestrev:
1241 1247 if n not in ancestors:
1242 1248 # If we are possibly a descendant of one of the roots
1243 1249 # and we haven't already been marked as an ancestor
1244 1250 ancestors.add(n) # Mark as ancestor
1245 1251 # Add non-nullid parents to list of nodes to tag.
1246 1252 nodestotag.update(
1247 1253 [p for p in self.parents(n) if p != self.nullid]
1248 1254 )
1249 1255 elif n in heads: # We've seen it before, is it a fake head?
1250 1256 # So it is, real heads should not be the ancestors of
1251 1257 # any other heads.
1252 1258 heads.pop(n)
1253 1259 if not ancestors:
1254 1260 return nonodes
1255 1261 # Now that we have our set of ancestors, we want to remove any
1256 1262 # roots that are not ancestors.
1257 1263
1258 1264 # If one of the roots was nullid, everything is included anyway.
1259 1265 if lowestrev > nullrev:
1260 1266 # But, since we weren't, let's recompute the lowest rev to not
1261 1267 # include roots that aren't ancestors.
1262 1268
1263 1269 # Filter out roots that aren't ancestors of heads
1264 1270 roots = [root for root in roots if root in ancestors]
1265 1271 # Recompute the lowest revision
1266 1272 if roots:
1267 1273 lowestrev = min([self.rev(root) for root in roots])
1268 1274 else:
1269 1275 # No more roots? Return empty list
1270 1276 return nonodes
1271 1277 else:
1272 1278 # We are descending from nullid, and don't need to care about
1273 1279 # any other roots.
1274 1280 lowestrev = nullrev
1275 1281 roots = [self.nullid]
1276 1282 # Transform our roots list into a set.
1277 1283 descendants = set(roots)
1278 1284 # Also, keep the original roots so we can filter out roots that aren't
1279 1285 # 'real' roots (i.e. are descended from other roots).
1280 1286 roots = descendants.copy()
1281 1287 # Our topologically sorted list of output nodes.
1282 1288 orderedout = []
1283 1289 # Don't start at nullid since we don't want nullid in our output list,
1284 1290 # and if nullid shows up in descendants, empty parents will look like
1285 1291 # they're descendants.
1286 1292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1287 1293 n = self.node(r)
1288 1294 isdescendant = False
1289 1295 if lowestrev == nullrev: # Everybody is a descendant of nullid
1290 1296 isdescendant = True
1291 1297 elif n in descendants:
1292 1298 # n is already a descendant
1293 1299 isdescendant = True
1294 1300 # This check only needs to be done here because all the roots
1295 1301 # will start being marked is descendants before the loop.
1296 1302 if n in roots:
1297 1303 # If n was a root, check if it's a 'real' root.
1298 1304 p = tuple(self.parents(n))
1299 1305 # If any of its parents are descendants, it's not a root.
1300 1306 if (p[0] in descendants) or (p[1] in descendants):
1301 1307 roots.remove(n)
1302 1308 else:
1303 1309 p = tuple(self.parents(n))
1304 1310 # A node is a descendant if either of its parents are
1305 1311 # descendants. (We seeded the dependents list with the roots
1306 1312 # up there, remember?)
1307 1313 if (p[0] in descendants) or (p[1] in descendants):
1308 1314 descendants.add(n)
1309 1315 isdescendant = True
1310 1316 if isdescendant and ((ancestors is None) or (n in ancestors)):
1311 1317 # Only include nodes that are both descendants and ancestors.
1312 1318 orderedout.append(n)
1313 1319 if (ancestors is not None) and (n in heads):
1314 1320 # We're trying to figure out which heads are reachable
1315 1321 # from roots.
1316 1322 # Mark this head as having been reached
1317 1323 heads[n] = True
1318 1324 elif ancestors is None:
1319 1325 # Otherwise, we're trying to discover the heads.
1320 1326 # Assume this is a head because if it isn't, the next step
1321 1327 # will eventually remove it.
1322 1328 heads[n] = True
1323 1329 # But, obviously its parents aren't.
1324 1330 for p in self.parents(n):
1325 1331 heads.pop(p, None)
1326 1332 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1327 1333 roots = list(roots)
1328 1334 assert orderedout
1329 1335 assert roots
1330 1336 assert heads
1331 1337 return (orderedout, roots, heads)
1332 1338
1333 1339 def headrevs(self, revs=None):
1334 1340 if revs is None:
1335 1341 try:
1336 1342 return self.index.headrevs()
1337 1343 except AttributeError:
1338 1344 return self._headrevs()
1339 1345 if rustdagop is not None and self.index.rust_ext_compat:
1340 1346 return rustdagop.headrevs(self.index, revs)
1341 1347 return dagop.headrevs(revs, self._uncheckedparentrevs)
1342 1348
1343 1349 def computephases(self, roots):
1344 1350 return self.index.computephasesmapsets(roots)
1345 1351
1346 1352 def _headrevs(self):
1347 1353 count = len(self)
1348 1354 if not count:
1349 1355 return [nullrev]
1350 1356 # we won't iter over filtered rev so nobody is a head at start
1351 1357 ishead = [0] * (count + 1)
1352 1358 index = self.index
1353 1359 for r in self:
1354 1360 ishead[r] = 1 # I may be an head
1355 1361 e = index[r]
1356 1362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1357 1363 return [r for r, val in enumerate(ishead) if val]
1358 1364
1359 1365 def heads(self, start=None, stop=None):
1360 1366 """return the list of all nodes that have no children
1361 1367
1362 1368 if start is specified, only heads that are descendants of
1363 1369 start will be returned
1364 1370 if stop is specified, it will consider all the revs from stop
1365 1371 as if they had no children
1366 1372 """
1367 1373 if start is None and stop is None:
1368 1374 if not len(self):
1369 1375 return [self.nullid]
1370 1376 return [self.node(r) for r in self.headrevs()]
1371 1377
1372 1378 if start is None:
1373 1379 start = nullrev
1374 1380 else:
1375 1381 start = self.rev(start)
1376 1382
1377 1383 stoprevs = {self.rev(n) for n in stop or []}
1378 1384
1379 1385 revs = dagop.headrevssubset(
1380 1386 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1381 1387 )
1382 1388
1383 1389 return [self.node(rev) for rev in revs]
1384 1390
1385 1391 def children(self, node):
1386 1392 """find the children of a given node"""
1387 1393 c = []
1388 1394 p = self.rev(node)
1389 1395 for r in self.revs(start=p + 1):
1390 1396 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1391 1397 if prevs:
1392 1398 for pr in prevs:
1393 1399 if pr == p:
1394 1400 c.append(self.node(r))
1395 1401 elif p == nullrev:
1396 1402 c.append(self.node(r))
1397 1403 return c
1398 1404
1399 1405 def commonancestorsheads(self, a, b):
1400 1406 """calculate all the heads of the common ancestors of nodes a and b"""
1401 1407 a, b = self.rev(a), self.rev(b)
1402 1408 ancs = self._commonancestorsheads(a, b)
1403 1409 return pycompat.maplist(self.node, ancs)
1404 1410
1405 1411 def _commonancestorsheads(self, *revs):
1406 1412 """calculate all the heads of the common ancestors of revs"""
1407 1413 try:
1408 1414 ancs = self.index.commonancestorsheads(*revs)
1409 1415 except (AttributeError, OverflowError): # C implementation failed
1410 1416 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1411 1417 return ancs
1412 1418
1413 1419 def isancestor(self, a, b):
1414 1420 """return True if node a is an ancestor of node b
1415 1421
1416 1422 A revision is considered an ancestor of itself."""
1417 1423 a, b = self.rev(a), self.rev(b)
1418 1424 return self.isancestorrev(a, b)
1419 1425
1420 1426 def isancestorrev(self, a, b):
1421 1427 """return True if revision a is an ancestor of revision b
1422 1428
1423 1429 A revision is considered an ancestor of itself.
1424 1430
1425 1431 The implementation of this is trivial but the use of
1426 1432 reachableroots is not."""
1427 1433 if a == nullrev:
1428 1434 return True
1429 1435 elif a == b:
1430 1436 return True
1431 1437 elif a > b:
1432 1438 return False
1433 1439 return bool(self.reachableroots(a, [b], [a], includepath=False))
1434 1440
1435 1441 def reachableroots(self, minroot, heads, roots, includepath=False):
1436 1442 """return (heads(::(<roots> and <roots>::<heads>)))
1437 1443
1438 1444 If includepath is True, return (<roots>::<heads>)."""
1439 1445 try:
1440 1446 return self.index.reachableroots2(
1441 1447 minroot, heads, roots, includepath
1442 1448 )
1443 1449 except AttributeError:
1444 1450 return dagop._reachablerootspure(
1445 1451 self.parentrevs, minroot, roots, heads, includepath
1446 1452 )
1447 1453
1448 1454 def ancestor(self, a, b):
1449 1455 """calculate the "best" common ancestor of nodes a and b"""
1450 1456
1451 1457 a, b = self.rev(a), self.rev(b)
1452 1458 try:
1453 1459 ancs = self.index.ancestors(a, b)
1454 1460 except (AttributeError, OverflowError):
1455 1461 ancs = ancestor.ancestors(self.parentrevs, a, b)
1456 1462 if ancs:
1457 1463 # choose a consistent winner when there's a tie
1458 1464 return min(map(self.node, ancs))
1459 1465 return self.nullid
1460 1466
1461 1467 def _match(self, id):
1462 1468 if isinstance(id, int):
1463 1469 # rev
1464 1470 return self.node(id)
1465 1471 if len(id) == self.nodeconstants.nodelen:
1466 1472 # possibly a binary node
1467 1473 # odds of a binary node being all hex in ASCII are 1 in 10**25
1468 1474 try:
1469 1475 node = id
1470 1476 self.rev(node) # quick search the index
1471 1477 return node
1472 1478 except error.LookupError:
1473 1479 pass # may be partial hex id
1474 1480 try:
1475 1481 # str(rev)
1476 1482 rev = int(id)
1477 1483 if b"%d" % rev != id:
1478 1484 raise ValueError
1479 1485 if rev < 0:
1480 1486 rev = len(self) + rev
1481 1487 if rev < 0 or rev >= len(self):
1482 1488 raise ValueError
1483 1489 return self.node(rev)
1484 1490 except (ValueError, OverflowError):
1485 1491 pass
1486 1492 if len(id) == 2 * self.nodeconstants.nodelen:
1487 1493 try:
1488 1494 # a full hex nodeid?
1489 1495 node = bin(id)
1490 1496 self.rev(node)
1491 1497 return node
1492 1498 except (TypeError, error.LookupError):
1493 1499 pass
1494 1500
1495 1501 def _partialmatch(self, id):
1496 1502 # we don't care wdirfilenodeids as they should be always full hash
1497 1503 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1498 1504 ambiguous = False
1499 1505 try:
1500 1506 partial = self.index.partialmatch(id)
1501 1507 if partial and self.hasnode(partial):
1502 1508 if maybewdir:
1503 1509 # single 'ff...' match in radix tree, ambiguous with wdir
1504 1510 ambiguous = True
1505 1511 else:
1506 1512 return partial
1507 1513 elif maybewdir:
1508 1514 # no 'ff...' match in radix tree, wdir identified
1509 1515 raise error.WdirUnsupported
1510 1516 else:
1511 1517 return None
1512 1518 except error.RevlogError:
1513 1519 # parsers.c radix tree lookup gave multiple matches
1514 1520 # fast path: for unfiltered changelog, radix tree is accurate
1515 1521 if not getattr(self, 'filteredrevs', None):
1516 1522 ambiguous = True
1517 1523 # fall through to slow path that filters hidden revisions
1518 1524 except (AttributeError, ValueError):
1519 1525 # we are pure python, or key was too short to search radix tree
1520 1526 pass
1521 1527 if ambiguous:
1522 1528 raise error.AmbiguousPrefixLookupError(
1523 1529 id, self.display_id, _(b'ambiguous identifier')
1524 1530 )
1525 1531
1526 1532 if id in self._pcache:
1527 1533 return self._pcache[id]
1528 1534
1529 1535 if len(id) <= 40:
1530 1536 try:
1531 1537 # hex(node)[:...]
1532 1538 l = len(id) // 2 # grab an even number of digits
1533 1539 prefix = bin(id[: l * 2])
1534 1540 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1535 1541 nl = [
1536 1542 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1537 1543 ]
1538 1544 if self.nodeconstants.nullhex.startswith(id):
1539 1545 nl.append(self.nullid)
1540 1546 if len(nl) > 0:
1541 1547 if len(nl) == 1 and not maybewdir:
1542 1548 self._pcache[id] = nl[0]
1543 1549 return nl[0]
1544 1550 raise error.AmbiguousPrefixLookupError(
1545 1551 id, self.display_id, _(b'ambiguous identifier')
1546 1552 )
1547 1553 if maybewdir:
1548 1554 raise error.WdirUnsupported
1549 1555 return None
1550 1556 except TypeError:
1551 1557 pass
1552 1558
1553 1559 def lookup(self, id):
1554 1560 """locate a node based on:
1555 1561 - revision number or str(revision number)
1556 1562 - nodeid or subset of hex nodeid
1557 1563 """
1558 1564 n = self._match(id)
1559 1565 if n is not None:
1560 1566 return n
1561 1567 n = self._partialmatch(id)
1562 1568 if n:
1563 1569 return n
1564 1570
1565 1571 raise error.LookupError(id, self.display_id, _(b'no match found'))
1566 1572
1567 1573 def shortest(self, node, minlength=1):
1568 1574 """Find the shortest unambiguous prefix that matches node."""
1569 1575
1570 1576 def isvalid(prefix):
1571 1577 try:
1572 1578 matchednode = self._partialmatch(prefix)
1573 1579 except error.AmbiguousPrefixLookupError:
1574 1580 return False
1575 1581 except error.WdirUnsupported:
1576 1582 # single 'ff...' match
1577 1583 return True
1578 1584 if matchednode is None:
1579 1585 raise error.LookupError(node, self.display_id, _(b'no node'))
1580 1586 return True
1581 1587
1582 1588 def maybewdir(prefix):
1583 1589 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1584 1590
1585 1591 hexnode = hex(node)
1586 1592
1587 1593 def disambiguate(hexnode, minlength):
1588 1594 """Disambiguate against wdirid."""
1589 1595 for length in range(minlength, len(hexnode) + 1):
1590 1596 prefix = hexnode[:length]
1591 1597 if not maybewdir(prefix):
1592 1598 return prefix
1593 1599
1594 1600 if not getattr(self, 'filteredrevs', None):
1595 1601 try:
1596 1602 length = max(self.index.shortest(node), minlength)
1597 1603 return disambiguate(hexnode, length)
1598 1604 except error.RevlogError:
1599 1605 if node != self.nodeconstants.wdirid:
1600 1606 raise error.LookupError(
1601 1607 node, self.display_id, _(b'no node')
1602 1608 )
1603 1609 except AttributeError:
1604 1610 # Fall through to pure code
1605 1611 pass
1606 1612
1607 1613 if node == self.nodeconstants.wdirid:
1608 1614 for length in range(minlength, len(hexnode) + 1):
1609 1615 prefix = hexnode[:length]
1610 1616 if isvalid(prefix):
1611 1617 return prefix
1612 1618
1613 1619 for length in range(minlength, len(hexnode) + 1):
1614 1620 prefix = hexnode[:length]
1615 1621 if isvalid(prefix):
1616 1622 return disambiguate(hexnode, length)
1617 1623
1618 1624 def cmp(self, node, text):
1619 1625 """compare text with a given file revision
1620 1626
1621 1627 returns True if text is different than what is stored.
1622 1628 """
1623 1629 p1, p2 = self.parents(node)
1624 1630 return storageutil.hashrevisionsha1(text, p1, p2) != node
1625 1631
1626 1632 def _cachesegment(self, offset, data):
1627 1633 """Add a segment to the revlog cache.
1628 1634
1629 1635 Accepts an absolute offset and the data that is at that location.
1630 1636 """
1631 1637 o, d = self._chunkcache
1632 1638 # try to add to existing cache
1633 1639 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1634 1640 self._chunkcache = o, d + data
1635 1641 else:
1636 1642 self._chunkcache = offset, data
1637 1643
1638 1644 def _readsegment(self, offset, length, df=None):
1639 1645 """Load a segment of raw data from the revlog.
1640 1646
1641 1647 Accepts an absolute offset, length to read, and an optional existing
1642 1648 file handle to read from.
1643 1649
1644 1650 If an existing file handle is passed, it will be seeked and the
1645 1651 original seek position will NOT be restored.
1646 1652
1647 1653 Returns a str or buffer of raw byte data.
1648 1654
1649 1655 Raises if the requested number of bytes could not be read.
1650 1656 """
1651 1657 # Cache data both forward and backward around the requested
1652 1658 # data, in a fixed size window. This helps speed up operations
1653 1659 # involving reading the revlog backwards.
1654 1660 cachesize = self._chunkcachesize
1655 1661 realoffset = offset & ~(cachesize - 1)
1656 1662 reallength = (
1657 1663 (offset + length + cachesize) & ~(cachesize - 1)
1658 1664 ) - realoffset
1659 1665 with self._datareadfp(df) as df:
1660 1666 df.seek(realoffset)
1661 1667 d = df.read(reallength)
1662 1668
1663 1669 self._cachesegment(realoffset, d)
1664 1670 if offset != realoffset or reallength != length:
1665 1671 startoffset = offset - realoffset
1666 1672 if len(d) - startoffset < length:
1667 1673 filename = self._indexfile if self._inline else self._datafile
1668 1674 got = len(d) - startoffset
1669 1675 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1670 1676 raise error.RevlogError(m)
1671 1677 return util.buffer(d, startoffset, length)
1672 1678
1673 1679 if len(d) < length:
1674 1680 filename = self._indexfile if self._inline else self._datafile
1675 1681 got = len(d) - startoffset
1676 1682 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1677 1683 raise error.RevlogError(m)
1678 1684
1679 1685 return d
1680 1686
1681 1687 def _getsegment(self, offset, length, df=None):
1682 1688 """Obtain a segment of raw data from the revlog.
1683 1689
1684 1690 Accepts an absolute offset, length of bytes to obtain, and an
1685 1691 optional file handle to the already-opened revlog. If the file
1686 1692 handle is used, it's original seek position will not be preserved.
1687 1693
1688 1694 Requests for data may be returned from a cache.
1689 1695
1690 1696 Returns a str or a buffer instance of raw byte data.
1691 1697 """
1692 1698 o, d = self._chunkcache
1693 1699 l = len(d)
1694 1700
1695 1701 # is it in the cache?
1696 1702 cachestart = offset - o
1697 1703 cacheend = cachestart + length
1698 1704 if cachestart >= 0 and cacheend <= l:
1699 1705 if cachestart == 0 and cacheend == l:
1700 1706 return d # avoid a copy
1701 1707 return util.buffer(d, cachestart, cacheend - cachestart)
1702 1708
1703 1709 return self._readsegment(offset, length, df=df)
1704 1710
1705 1711 def _getsegmentforrevs(self, startrev, endrev, df=None):
1706 1712 """Obtain a segment of raw data corresponding to a range of revisions.
1707 1713
1708 1714 Accepts the start and end revisions and an optional already-open
1709 1715 file handle to be used for reading. If the file handle is read, its
1710 1716 seek position will not be preserved.
1711 1717
1712 1718 Requests for data may be satisfied by a cache.
1713 1719
1714 1720 Returns a 2-tuple of (offset, data) for the requested range of
1715 1721 revisions. Offset is the integer offset from the beginning of the
1716 1722 revlog and data is a str or buffer of the raw byte data.
1717 1723
1718 1724 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1719 1725 to determine where each revision's data begins and ends.
1720 1726 """
1721 1727 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1722 1728 # (functions are expensive).
1723 1729 index = self.index
1724 1730 istart = index[startrev]
1725 1731 start = int(istart[0] >> 16)
1726 1732 if startrev == endrev:
1727 1733 end = start + istart[1]
1728 1734 else:
1729 1735 iend = index[endrev]
1730 1736 end = int(iend[0] >> 16) + iend[1]
1731 1737
1732 1738 if self._inline:
1733 1739 start += (startrev + 1) * self.index.entry_size
1734 1740 end += (endrev + 1) * self.index.entry_size
1735 1741 length = end - start
1736 1742
1737 1743 return start, self._getsegment(start, length, df=df)
1738 1744
1739 1745 def _chunk(self, rev, df=None):
1740 1746 """Obtain a single decompressed chunk for a revision.
1741 1747
1742 1748 Accepts an integer revision and an optional already-open file handle
1743 1749 to be used for reading. If used, the seek position of the file will not
1744 1750 be preserved.
1745 1751
1746 1752 Returns a str holding uncompressed data for the requested revision.
1747 1753 """
1748 1754 compression_mode = self.index[rev][10]
1749 1755 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1750 1756 if compression_mode == COMP_MODE_PLAIN:
1751 1757 return data
1752 1758 elif compression_mode == COMP_MODE_DEFAULT:
1753 1759 return self._decompressor(data)
1754 1760 elif compression_mode == COMP_MODE_INLINE:
1755 1761 return self.decompress(data)
1756 1762 else:
1757 1763 msg = 'unknown compression mode %d'
1758 1764 msg %= compression_mode
1759 1765 raise error.RevlogError(msg)
1760 1766
1761 1767 def _chunks(self, revs, df=None, targetsize=None):
1762 1768 """Obtain decompressed chunks for the specified revisions.
1763 1769
1764 1770 Accepts an iterable of numeric revisions that are assumed to be in
1765 1771 ascending order. Also accepts an optional already-open file handle
1766 1772 to be used for reading. If used, the seek position of the file will
1767 1773 not be preserved.
1768 1774
1769 1775 This function is similar to calling ``self._chunk()`` multiple times,
1770 1776 but is faster.
1771 1777
1772 1778 Returns a list with decompressed data for each requested revision.
1773 1779 """
1774 1780 if not revs:
1775 1781 return []
1776 1782 start = self.start
1777 1783 length = self.length
1778 1784 inline = self._inline
1779 1785 iosize = self.index.entry_size
1780 1786 buffer = util.buffer
1781 1787
1782 1788 l = []
1783 1789 ladd = l.append
1784 1790
1785 1791 if not self._withsparseread:
1786 1792 slicedchunks = (revs,)
1787 1793 else:
1788 1794 slicedchunks = deltautil.slicechunk(
1789 1795 self, revs, targetsize=targetsize
1790 1796 )
1791 1797
1792 1798 for revschunk in slicedchunks:
1793 1799 firstrev = revschunk[0]
1794 1800 # Skip trailing revisions with empty diff
1795 1801 for lastrev in revschunk[::-1]:
1796 1802 if length(lastrev) != 0:
1797 1803 break
1798 1804
1799 1805 try:
1800 1806 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1801 1807 except OverflowError:
1802 1808 # issue4215 - we can't cache a run of chunks greater than
1803 1809 # 2G on Windows
1804 1810 return [self._chunk(rev, df=df) for rev in revschunk]
1805 1811
1806 1812 decomp = self.decompress
1807 1813 # self._decompressor might be None, but will not be used in that case
1808 1814 def_decomp = self._decompressor
1809 1815 for rev in revschunk:
1810 1816 chunkstart = start(rev)
1811 1817 if inline:
1812 1818 chunkstart += (rev + 1) * iosize
1813 1819 chunklength = length(rev)
1814 1820 comp_mode = self.index[rev][10]
1815 1821 c = buffer(data, chunkstart - offset, chunklength)
1816 1822 if comp_mode == COMP_MODE_PLAIN:
1817 1823 ladd(c)
1818 1824 elif comp_mode == COMP_MODE_INLINE:
1819 1825 ladd(decomp(c))
1820 1826 elif comp_mode == COMP_MODE_DEFAULT:
1821 1827 ladd(def_decomp(c))
1822 1828 else:
1823 1829 msg = 'unknown compression mode %d'
1824 1830 msg %= comp_mode
1825 1831 raise error.RevlogError(msg)
1826 1832
1827 1833 return l
1828 1834
1829 1835 def _chunkclear(self):
1830 1836 """Clear the raw chunk cache."""
1831 1837 self._chunkcache = (0, b'')
1832 1838
1833 1839 def deltaparent(self, rev):
1834 1840 """return deltaparent of the given revision"""
1835 1841 base = self.index[rev][3]
1836 1842 if base == rev:
1837 1843 return nullrev
1838 1844 elif self._generaldelta:
1839 1845 return base
1840 1846 else:
1841 1847 return rev - 1
1842 1848
1843 1849 def issnapshot(self, rev):
1844 1850 """tells whether rev is a snapshot"""
1845 1851 if not self._sparserevlog:
1846 1852 return self.deltaparent(rev) == nullrev
1847 1853 elif util.safehasattr(self.index, b'issnapshot'):
1848 1854 # directly assign the method to cache the testing and access
1849 1855 self.issnapshot = self.index.issnapshot
1850 1856 return self.issnapshot(rev)
1851 1857 if rev == nullrev:
1852 1858 return True
1853 1859 entry = self.index[rev]
1854 1860 base = entry[3]
1855 1861 if base == rev:
1856 1862 return True
1857 1863 if base == nullrev:
1858 1864 return True
1859 1865 p1 = entry[5]
1860 1866 p2 = entry[6]
1861 1867 if base == p1 or base == p2:
1862 1868 return False
1863 1869 return self.issnapshot(base)
1864 1870
1865 1871 def snapshotdepth(self, rev):
1866 1872 """number of snapshot in the chain before this one"""
1867 1873 if not self.issnapshot(rev):
1868 1874 raise error.ProgrammingError(b'revision %d not a snapshot')
1869 1875 return len(self._deltachain(rev)[0]) - 1
1870 1876
1871 1877 def revdiff(self, rev1, rev2):
1872 1878 """return or calculate a delta between two revisions
1873 1879
1874 1880 The delta calculated is in binary form and is intended to be written to
1875 1881 revlog data directly. So this function needs raw revision data.
1876 1882 """
1877 1883 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1878 1884 return bytes(self._chunk(rev2))
1879 1885
1880 1886 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1881 1887
1882 1888 def _processflags(self, text, flags, operation, raw=False):
1883 1889 """deprecated entry point to access flag processors"""
1884 1890 msg = b'_processflag(...) use the specialized variant'
1885 1891 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1886 1892 if raw:
1887 1893 return text, flagutil.processflagsraw(self, text, flags)
1888 1894 elif operation == b'read':
1889 1895 return flagutil.processflagsread(self, text, flags)
1890 1896 else: # write operation
1891 1897 return flagutil.processflagswrite(self, text, flags)
1892 1898
1893 1899 def revision(self, nodeorrev, _df=None, raw=False):
1894 1900 """return an uncompressed revision of a given node or revision
1895 1901 number.
1896 1902
1897 1903 _df - an existing file handle to read from. (internal-only)
1898 1904 raw - an optional argument specifying if the revision data is to be
1899 1905 treated as raw data when applying flag transforms. 'raw' should be set
1900 1906 to True when generating changegroups or in debug commands.
1901 1907 """
1902 1908 if raw:
1903 1909 msg = (
1904 1910 b'revlog.revision(..., raw=True) is deprecated, '
1905 1911 b'use revlog.rawdata(...)'
1906 1912 )
1907 1913 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1908 1914 return self._revisiondata(nodeorrev, _df, raw=raw)
1909 1915
1910 1916 def sidedata(self, nodeorrev, _df=None):
1911 1917 """a map of extra data related to the changeset but not part of the hash
1912 1918
1913 1919 This function currently return a dictionary. However, more advanced
1914 1920 mapping object will likely be used in the future for a more
1915 1921 efficient/lazy code.
1916 1922 """
1917 1923 # deal with <nodeorrev> argument type
1918 1924 if isinstance(nodeorrev, int):
1919 1925 rev = nodeorrev
1920 1926 else:
1921 1927 rev = self.rev(nodeorrev)
1922 1928 return self._sidedata(rev)
1923 1929
1924 1930 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1925 1931 # deal with <nodeorrev> argument type
1926 1932 if isinstance(nodeorrev, int):
1927 1933 rev = nodeorrev
1928 1934 node = self.node(rev)
1929 1935 else:
1930 1936 node = nodeorrev
1931 1937 rev = None
1932 1938
1933 1939 # fast path the special `nullid` rev
1934 1940 if node == self.nullid:
1935 1941 return b""
1936 1942
1937 1943 # ``rawtext`` is the text as stored inside the revlog. Might be the
1938 1944 # revision or might need to be processed to retrieve the revision.
1939 1945 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1940 1946
1941 1947 if raw and validated:
1942 1948 # if we don't want to process the raw text and that raw
1943 1949 # text is cached, we can exit early.
1944 1950 return rawtext
1945 1951 if rev is None:
1946 1952 rev = self.rev(node)
1947 1953 # the revlog's flag for this revision
1948 1954 # (usually alter its state or content)
1949 1955 flags = self.flags(rev)
1950 1956
1951 1957 if validated and flags == REVIDX_DEFAULT_FLAGS:
1952 1958 # no extra flags set, no flag processor runs, text = rawtext
1953 1959 return rawtext
1954 1960
1955 1961 if raw:
1956 1962 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1957 1963 text = rawtext
1958 1964 else:
1959 1965 r = flagutil.processflagsread(self, rawtext, flags)
1960 1966 text, validatehash = r
1961 1967 if validatehash:
1962 1968 self.checkhash(text, node, rev=rev)
1963 1969 if not validated:
1964 1970 self._revisioncache = (node, rev, rawtext)
1965 1971
1966 1972 return text
1967 1973
1968 1974 def _rawtext(self, node, rev, _df=None):
1969 1975 """return the possibly unvalidated rawtext for a revision
1970 1976
1971 1977 returns (rev, rawtext, validated)
1972 1978 """
1973 1979
1974 1980 # revision in the cache (could be useful to apply delta)
1975 1981 cachedrev = None
1976 1982 # An intermediate text to apply deltas to
1977 1983 basetext = None
1978 1984
1979 1985 # Check if we have the entry in cache
1980 1986 # The cache entry looks like (node, rev, rawtext)
1981 1987 if self._revisioncache:
1982 1988 if self._revisioncache[0] == node:
1983 1989 return (rev, self._revisioncache[2], True)
1984 1990 cachedrev = self._revisioncache[1]
1985 1991
1986 1992 if rev is None:
1987 1993 rev = self.rev(node)
1988 1994
1989 1995 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1990 1996 if stopped:
1991 1997 basetext = self._revisioncache[2]
1992 1998
1993 1999 # drop cache to save memory, the caller is expected to
1994 2000 # update self._revisioncache after validating the text
1995 2001 self._revisioncache = None
1996 2002
1997 2003 targetsize = None
1998 2004 rawsize = self.index[rev][2]
1999 2005 if 0 <= rawsize:
2000 2006 targetsize = 4 * rawsize
2001 2007
2002 2008 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2003 2009 if basetext is None:
2004 2010 basetext = bytes(bins[0])
2005 2011 bins = bins[1:]
2006 2012
2007 2013 rawtext = mdiff.patches(basetext, bins)
2008 2014 del basetext # let us have a chance to free memory early
2009 2015 return (rev, rawtext, False)
2010 2016
2011 2017 def _sidedata(self, rev):
2012 2018 """Return the sidedata for a given revision number."""
2013 2019 index_entry = self.index[rev]
2014 2020 sidedata_offset = index_entry[8]
2015 2021 sidedata_size = index_entry[9]
2016 2022
2017 2023 if self._inline:
2018 2024 sidedata_offset += self.index.entry_size * (1 + rev)
2019 2025 if sidedata_size == 0:
2020 2026 return {}
2021 2027
2022 2028 # XXX this need caching, as we do for data
2023 2029 with self._sidedatareadfp() as sdf:
2024 2030 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2025 2031 filename = self._sidedatafile
2026 2032 end = self._docket.sidedata_end
2027 2033 offset = sidedata_offset
2028 2034 length = sidedata_size
2029 2035 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2030 2036 raise error.RevlogError(m)
2031 2037
2032 2038 sdf.seek(sidedata_offset, os.SEEK_SET)
2033 2039 comp_segment = sdf.read(sidedata_size)
2034 2040
2035 2041 if len(comp_segment) < sidedata_size:
2036 2042 filename = self._sidedatafile
2037 2043 length = sidedata_size
2038 2044 offset = sidedata_offset
2039 2045 got = len(comp_segment)
2040 2046 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2041 2047 raise error.RevlogError(m)
2042 2048
2043 2049 comp = self.index[rev][11]
2044 2050 if comp == COMP_MODE_PLAIN:
2045 2051 segment = comp_segment
2046 2052 elif comp == COMP_MODE_DEFAULT:
2047 2053 segment = self._decompressor(comp_segment)
2048 2054 elif comp == COMP_MODE_INLINE:
2049 2055 segment = self.decompress(comp_segment)
2050 2056 else:
2051 2057 msg = 'unknown compression mode %d'
2052 2058 msg %= comp
2053 2059 raise error.RevlogError(msg)
2054 2060
2055 2061 sidedata = sidedatautil.deserialize_sidedata(segment)
2056 2062 return sidedata
2057 2063
2058 2064 def rawdata(self, nodeorrev, _df=None):
2059 2065 """return an uncompressed raw data of a given node or revision number.
2060 2066
2061 2067 _df - an existing file handle to read from. (internal-only)
2062 2068 """
2063 2069 return self._revisiondata(nodeorrev, _df, raw=True)
2064 2070
2065 2071 def hash(self, text, p1, p2):
2066 2072 """Compute a node hash.
2067 2073
2068 2074 Available as a function so that subclasses can replace the hash
2069 2075 as needed.
2070 2076 """
2071 2077 return storageutil.hashrevisionsha1(text, p1, p2)
2072 2078
2073 2079 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2074 2080 """Check node hash integrity.
2075 2081
2076 2082 Available as a function so that subclasses can extend hash mismatch
2077 2083 behaviors as needed.
2078 2084 """
2079 2085 try:
2080 2086 if p1 is None and p2 is None:
2081 2087 p1, p2 = self.parents(node)
2082 2088 if node != self.hash(text, p1, p2):
2083 2089 # Clear the revision cache on hash failure. The revision cache
2084 2090 # only stores the raw revision and clearing the cache does have
2085 2091 # the side-effect that we won't have a cache hit when the raw
2086 2092 # revision data is accessed. But this case should be rare and
2087 2093 # it is extra work to teach the cache about the hash
2088 2094 # verification state.
2089 2095 if self._revisioncache and self._revisioncache[0] == node:
2090 2096 self._revisioncache = None
2091 2097
2092 2098 revornode = rev
2093 2099 if revornode is None:
2094 2100 revornode = templatefilters.short(hex(node))
2095 2101 raise error.RevlogError(
2096 2102 _(b"integrity check failed on %s:%s")
2097 2103 % (self.display_id, pycompat.bytestr(revornode))
2098 2104 )
2099 2105 except error.RevlogError:
2100 2106 if self._censorable and storageutil.iscensoredtext(text):
2101 2107 raise error.CensoredNodeError(self.display_id, node, text)
2102 2108 raise
2103 2109
2104 2110 def _enforceinlinesize(self, tr):
2105 2111 """Check if the revlog is too big for inline and convert if so.
2106 2112
2107 2113 This should be called after revisions are added to the revlog. If the
2108 2114 revlog has grown too large to be an inline revlog, it will convert it
2109 2115 to use multiple index and data files.
2110 2116 """
2111 2117 tiprev = len(self) - 1
2112 2118 total_size = self.start(tiprev) + self.length(tiprev)
2113 2119 if not self._inline or total_size < _maxinline:
2114 2120 return
2115 2121
2116 2122 troffset = tr.findoffset(self._indexfile)
2117 2123 if troffset is None:
2118 2124 raise error.RevlogError(
2119 2125 _(b"%s not found in the transaction") % self._indexfile
2120 2126 )
2121 2127 trindex = 0
2122 2128 tr.add(self._datafile, 0)
2123 2129
2124 2130 existing_handles = False
2125 2131 if self._writinghandles is not None:
2126 2132 existing_handles = True
2127 2133 fp = self._writinghandles[0]
2128 2134 fp.flush()
2129 2135 fp.close()
2130 2136 # We can't use the cached file handle after close(). So prevent
2131 2137 # its usage.
2132 2138 self._writinghandles = None
2133 2139
2134 2140 new_dfh = self._datafp(b'w+')
2135 2141 new_dfh.truncate(0) # drop any potentially existing data
2136 2142 try:
2137 2143 with self._indexfp() as read_ifh:
2138 2144 for r in self:
2139 2145 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2140 2146 if troffset <= self.start(r) + r * self.index.entry_size:
2141 2147 trindex = r
2142 2148 new_dfh.flush()
2143 2149
2144 2150 with self.__index_new_fp() as fp:
2145 2151 self._format_flags &= ~FLAG_INLINE_DATA
2146 2152 self._inline = False
2147 2153 for i in self:
2148 2154 e = self.index.entry_binary(i)
2149 2155 if i == 0 and self._docket is None:
2150 2156 header = self._format_flags | self._format_version
2151 2157 header = self.index.pack_header(header)
2152 2158 e = header + e
2153 2159 fp.write(e)
2154 2160 if self._docket is not None:
2155 2161 self._docket.index_end = fp.tell()
2156 2162
2157 2163 # There is a small transactional race here. If the rename of
2158 2164 # the index fails, we should remove the datafile. It is more
2159 2165 # important to ensure that the data file is not truncated
2160 2166 # when the index is replaced as otherwise data is lost.
2161 2167 tr.replace(self._datafile, self.start(trindex))
2162 2168
2163 2169 # the temp file replace the real index when we exit the context
2164 2170 # manager
2165 2171
2166 2172 tr.replace(self._indexfile, trindex * self.index.entry_size)
2167 2173 nodemaputil.setup_persistent_nodemap(tr, self)
2168 2174 self._chunkclear()
2169 2175
2170 2176 if existing_handles:
2171 2177 # switched from inline to conventional reopen the index
2172 2178 ifh = self.__index_write_fp()
2173 2179 self._writinghandles = (ifh, new_dfh, None)
2174 2180 new_dfh = None
2175 2181 finally:
2176 2182 if new_dfh is not None:
2177 2183 new_dfh.close()
2178 2184
2179 2185 def _nodeduplicatecallback(self, transaction, node):
2180 2186 """called when trying to add a node already stored."""
2181 2187
2182 2188 @contextlib.contextmanager
2183 2189 def _writing(self, transaction):
2184 2190 if self._trypending:
2185 2191 msg = b'try to write in a `trypending` revlog: %s'
2186 2192 msg %= self.display_id
2187 2193 raise error.ProgrammingError(msg)
2188 2194 if self._writinghandles is not None:
2189 2195 yield
2190 2196 else:
2191 2197 ifh = dfh = sdfh = None
2192 2198 try:
2193 2199 r = len(self)
2194 2200 # opening the data file.
2195 2201 dsize = 0
2196 2202 if r:
2197 2203 dsize = self.end(r - 1)
2198 2204 dfh = None
2199 2205 if not self._inline:
2200 2206 try:
2201 2207 dfh = self._datafp(b"r+")
2202 2208 if self._docket is None:
2203 2209 dfh.seek(0, os.SEEK_END)
2204 2210 else:
2205 2211 dfh.seek(self._docket.data_end, os.SEEK_SET)
2206 2212 except IOError as inst:
2207 2213 if inst.errno != errno.ENOENT:
2208 2214 raise
2209 2215 dfh = self._datafp(b"w+")
2210 2216 transaction.add(self._datafile, dsize)
2211 2217 if self._sidedatafile is not None:
2212 2218 try:
2213 2219 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2214 2220 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2215 2221 except IOError as inst:
2216 2222 if inst.errno != errno.ENOENT:
2217 2223 raise
2218 2224 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2219 2225 transaction.add(
2220 2226 self._sidedatafile, self._docket.sidedata_end
2221 2227 )
2222 2228
2223 2229 # opening the index file.
2224 2230 isize = r * self.index.entry_size
2225 2231 ifh = self.__index_write_fp()
2226 2232 if self._inline:
2227 2233 transaction.add(self._indexfile, dsize + isize)
2228 2234 else:
2229 2235 transaction.add(self._indexfile, isize)
2230 2236 # exposing all file handle for writing.
2231 2237 self._writinghandles = (ifh, dfh, sdfh)
2232 2238 yield
2233 2239 if self._docket is not None:
2234 2240 self._write_docket(transaction)
2235 2241 finally:
2236 2242 self._writinghandles = None
2237 2243 if dfh is not None:
2238 2244 dfh.close()
2239 2245 if sdfh is not None:
2240 2246 dfh.close()
2241 2247 # closing the index file last to avoid exposing referent to
2242 2248 # potential unflushed data content.
2243 2249 if ifh is not None:
2244 2250 ifh.close()
2245 2251
2246 2252 def _write_docket(self, transaction):
2247 2253 """write the current docket on disk
2248 2254
2249 2255 Exist as a method to help changelog to implement transaction logic
2250 2256
2251 2257 We could also imagine using the same transaction logic for all revlog
2252 2258 since docket are cheap."""
2253 2259 self._docket.write(transaction)
2254 2260
2255 2261 def addrevision(
2256 2262 self,
2257 2263 text,
2258 2264 transaction,
2259 2265 link,
2260 2266 p1,
2261 2267 p2,
2262 2268 cachedelta=None,
2263 2269 node=None,
2264 2270 flags=REVIDX_DEFAULT_FLAGS,
2265 2271 deltacomputer=None,
2266 2272 sidedata=None,
2267 2273 ):
2268 2274 """add a revision to the log
2269 2275
2270 2276 text - the revision data to add
2271 2277 transaction - the transaction object used for rollback
2272 2278 link - the linkrev data to add
2273 2279 p1, p2 - the parent nodeids of the revision
2274 2280 cachedelta - an optional precomputed delta
2275 2281 node - nodeid of revision; typically node is not specified, and it is
2276 2282 computed by default as hash(text, p1, p2), however subclasses might
2277 2283 use different hashing method (and override checkhash() in such case)
2278 2284 flags - the known flags to set on the revision
2279 2285 deltacomputer - an optional deltacomputer instance shared between
2280 2286 multiple calls
2281 2287 """
2282 2288 if link == nullrev:
2283 2289 raise error.RevlogError(
2284 2290 _(b"attempted to add linkrev -1 to %s") % self.display_id
2285 2291 )
2286 2292
2287 2293 if sidedata is None:
2288 2294 sidedata = {}
2289 2295 elif sidedata and not self.hassidedata:
2290 2296 raise error.ProgrammingError(
2291 2297 _(b"trying to add sidedata to a revlog who don't support them")
2292 2298 )
2293 2299
2294 2300 if flags:
2295 2301 node = node or self.hash(text, p1, p2)
2296 2302
2297 2303 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2298 2304
2299 2305 # If the flag processor modifies the revision data, ignore any provided
2300 2306 # cachedelta.
2301 2307 if rawtext != text:
2302 2308 cachedelta = None
2303 2309
2304 2310 if len(rawtext) > _maxentrysize:
2305 2311 raise error.RevlogError(
2306 2312 _(
2307 2313 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2308 2314 )
2309 2315 % (self.display_id, len(rawtext))
2310 2316 )
2311 2317
2312 2318 node = node or self.hash(rawtext, p1, p2)
2313 2319 rev = self.index.get_rev(node)
2314 2320 if rev is not None:
2315 2321 return rev
2316 2322
2317 2323 if validatehash:
2318 2324 self.checkhash(rawtext, node, p1=p1, p2=p2)
2319 2325
2320 2326 return self.addrawrevision(
2321 2327 rawtext,
2322 2328 transaction,
2323 2329 link,
2324 2330 p1,
2325 2331 p2,
2326 2332 node,
2327 2333 flags,
2328 2334 cachedelta=cachedelta,
2329 2335 deltacomputer=deltacomputer,
2330 2336 sidedata=sidedata,
2331 2337 )
2332 2338
2333 2339 def addrawrevision(
2334 2340 self,
2335 2341 rawtext,
2336 2342 transaction,
2337 2343 link,
2338 2344 p1,
2339 2345 p2,
2340 2346 node,
2341 2347 flags,
2342 2348 cachedelta=None,
2343 2349 deltacomputer=None,
2344 2350 sidedata=None,
2345 2351 ):
2346 2352 """add a raw revision with known flags, node and parents
2347 2353 useful when reusing a revision not stored in this revlog (ex: received
2348 2354 over wire, or read from an external bundle).
2349 2355 """
2350 2356 with self._writing(transaction):
2351 2357 return self._addrevision(
2352 2358 node,
2353 2359 rawtext,
2354 2360 transaction,
2355 2361 link,
2356 2362 p1,
2357 2363 p2,
2358 2364 flags,
2359 2365 cachedelta,
2360 2366 deltacomputer=deltacomputer,
2361 2367 sidedata=sidedata,
2362 2368 )
2363 2369
2364 2370 def compress(self, data):
2365 2371 """Generate a possibly-compressed representation of data."""
2366 2372 if not data:
2367 2373 return b'', data
2368 2374
2369 2375 compressed = self._compressor.compress(data)
2370 2376
2371 2377 if compressed:
2372 2378 # The revlog compressor added the header in the returned data.
2373 2379 return b'', compressed
2374 2380
2375 2381 if data[0:1] == b'\0':
2376 2382 return b'', data
2377 2383 return b'u', data
2378 2384
2379 2385 def decompress(self, data):
2380 2386 """Decompress a revlog chunk.
2381 2387
2382 2388 The chunk is expected to begin with a header identifying the
2383 2389 format type so it can be routed to an appropriate decompressor.
2384 2390 """
2385 2391 if not data:
2386 2392 return data
2387 2393
2388 2394 # Revlogs are read much more frequently than they are written and many
2389 2395 # chunks only take microseconds to decompress, so performance is
2390 2396 # important here.
2391 2397 #
2392 2398 # We can make a few assumptions about revlogs:
2393 2399 #
2394 2400 # 1) the majority of chunks will be compressed (as opposed to inline
2395 2401 # raw data).
2396 2402 # 2) decompressing *any* data will likely by at least 10x slower than
2397 2403 # returning raw inline data.
2398 2404 # 3) we want to prioritize common and officially supported compression
2399 2405 # engines
2400 2406 #
2401 2407 # It follows that we want to optimize for "decompress compressed data
2402 2408 # when encoded with common and officially supported compression engines"
2403 2409 # case over "raw data" and "data encoded by less common or non-official
2404 2410 # compression engines." That is why we have the inline lookup first
2405 2411 # followed by the compengines lookup.
2406 2412 #
2407 2413 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2408 2414 # compressed chunks. And this matters for changelog and manifest reads.
2409 2415 t = data[0:1]
2410 2416
2411 2417 if t == b'x':
2412 2418 try:
2413 2419 return _zlibdecompress(data)
2414 2420 except zlib.error as e:
2415 2421 raise error.RevlogError(
2416 2422 _(b'revlog decompress error: %s')
2417 2423 % stringutil.forcebytestr(e)
2418 2424 )
2419 2425 # '\0' is more common than 'u' so it goes first.
2420 2426 elif t == b'\0':
2421 2427 return data
2422 2428 elif t == b'u':
2423 2429 return util.buffer(data, 1)
2424 2430
2425 2431 compressor = self._get_decompressor(t)
2426 2432
2427 2433 return compressor.decompress(data)
2428 2434
2429 2435 def _addrevision(
2430 2436 self,
2431 2437 node,
2432 2438 rawtext,
2433 2439 transaction,
2434 2440 link,
2435 2441 p1,
2436 2442 p2,
2437 2443 flags,
2438 2444 cachedelta,
2439 2445 alwayscache=False,
2440 2446 deltacomputer=None,
2441 2447 sidedata=None,
2442 2448 ):
2443 2449 """internal function to add revisions to the log
2444 2450
2445 2451 see addrevision for argument descriptions.
2446 2452
2447 2453 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2448 2454
2449 2455 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2450 2456 be used.
2451 2457
2452 2458 invariants:
2453 2459 - rawtext is optional (can be None); if not set, cachedelta must be set.
2454 2460 if both are set, they must correspond to each other.
2455 2461 """
2456 2462 if node == self.nullid:
2457 2463 raise error.RevlogError(
2458 2464 _(b"%s: attempt to add null revision") % self.display_id
2459 2465 )
2460 2466 if (
2461 2467 node == self.nodeconstants.wdirid
2462 2468 or node in self.nodeconstants.wdirfilenodeids
2463 2469 ):
2464 2470 raise error.RevlogError(
2465 2471 _(b"%s: attempt to add wdir revision") % self.display_id
2466 2472 )
2467 2473 if self._writinghandles is None:
2468 2474 msg = b'adding revision outside `revlog._writing` context'
2469 2475 raise error.ProgrammingError(msg)
2470 2476
2471 2477 if self._inline:
2472 2478 fh = self._writinghandles[0]
2473 2479 else:
2474 2480 fh = self._writinghandles[1]
2475 2481
2476 2482 btext = [rawtext]
2477 2483
2478 2484 curr = len(self)
2479 2485 prev = curr - 1
2480 2486
2481 2487 offset = self._get_data_offset(prev)
2482 2488
2483 2489 if self._concurrencychecker:
2484 2490 ifh, dfh, sdfh = self._writinghandles
2485 2491 # XXX no checking for the sidedata file
2486 2492 if self._inline:
2487 2493 # offset is "as if" it were in the .d file, so we need to add on
2488 2494 # the size of the entry metadata.
2489 2495 self._concurrencychecker(
2490 2496 ifh, self._indexfile, offset + curr * self.index.entry_size
2491 2497 )
2492 2498 else:
2493 2499 # Entries in the .i are a consistent size.
2494 2500 self._concurrencychecker(
2495 2501 ifh, self._indexfile, curr * self.index.entry_size
2496 2502 )
2497 2503 self._concurrencychecker(dfh, self._datafile, offset)
2498 2504
2499 2505 p1r, p2r = self.rev(p1), self.rev(p2)
2500 2506
2501 2507 # full versions are inserted when the needed deltas
2502 2508 # become comparable to the uncompressed text
2503 2509 if rawtext is None:
2504 2510 # need rawtext size, before changed by flag processors, which is
2505 2511 # the non-raw size. use revlog explicitly to avoid filelog's extra
2506 2512 # logic that might remove metadata size.
2507 2513 textlen = mdiff.patchedsize(
2508 2514 revlog.size(self, cachedelta[0]), cachedelta[1]
2509 2515 )
2510 2516 else:
2511 2517 textlen = len(rawtext)
2512 2518
2513 2519 if deltacomputer is None:
2514 2520 deltacomputer = deltautil.deltacomputer(self)
2515 2521
2516 2522 revinfo = revlogutils.revisioninfo(
2517 2523 node,
2518 2524 p1,
2519 2525 p2,
2520 2526 btext,
2521 2527 textlen,
2522 2528 cachedelta,
2523 2529 flags,
2524 2530 )
2525 2531
2526 2532 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2527 2533
2528 2534 compression_mode = COMP_MODE_INLINE
2529 2535 if self._docket is not None:
2530 2536 h, d = deltainfo.data
2531 2537 if not h and not d:
2532 2538 # not data to store at all... declare them uncompressed
2533 2539 compression_mode = COMP_MODE_PLAIN
2534 2540 elif not h:
2535 2541 t = d[0:1]
2536 2542 if t == b'\0':
2537 2543 compression_mode = COMP_MODE_PLAIN
2538 2544 elif t == self._docket.default_compression_header:
2539 2545 compression_mode = COMP_MODE_DEFAULT
2540 2546 elif h == b'u':
2541 2547 # we have a more efficient way to declare uncompressed
2542 2548 h = b''
2543 2549 compression_mode = COMP_MODE_PLAIN
2544 2550 deltainfo = deltautil.drop_u_compression(deltainfo)
2545 2551
2546 2552 sidedata_compression_mode = COMP_MODE_INLINE
2547 2553 if sidedata and self.hassidedata:
2548 2554 sidedata_compression_mode = COMP_MODE_PLAIN
2549 2555 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2550 2556 sidedata_offset = self._docket.sidedata_end
2551 2557 h, comp_sidedata = self.compress(serialized_sidedata)
2552 2558 if (
2553 2559 h != b'u'
2554 2560 and comp_sidedata[0:1] != b'\0'
2555 2561 and len(comp_sidedata) < len(serialized_sidedata)
2556 2562 ):
2557 2563 assert not h
2558 2564 if (
2559 2565 comp_sidedata[0:1]
2560 2566 == self._docket.default_compression_header
2561 2567 ):
2562 2568 sidedata_compression_mode = COMP_MODE_DEFAULT
2563 2569 serialized_sidedata = comp_sidedata
2564 2570 else:
2565 2571 sidedata_compression_mode = COMP_MODE_INLINE
2566 2572 serialized_sidedata = comp_sidedata
2567 2573 else:
2568 2574 serialized_sidedata = b""
2569 2575 # Don't store the offset if the sidedata is empty, that way
2570 2576 # we can easily detect empty sidedata and they will be no different
2571 2577 # than ones we manually add.
2572 2578 sidedata_offset = 0
2573 2579
2574 2580 e = revlogutils.entry(
2575 2581 flags=flags,
2576 2582 data_offset=offset,
2577 2583 data_compressed_length=deltainfo.deltalen,
2578 2584 data_uncompressed_length=textlen,
2579 2585 data_compression_mode=compression_mode,
2580 2586 data_delta_base=deltainfo.base,
2581 2587 link_rev=link,
2582 2588 parent_rev_1=p1r,
2583 2589 parent_rev_2=p2r,
2584 2590 node_id=node,
2585 2591 sidedata_offset=sidedata_offset,
2586 2592 sidedata_compressed_length=len(serialized_sidedata),
2587 2593 sidedata_compression_mode=sidedata_compression_mode,
2588 2594 )
2589 2595
2590 2596 self.index.append(e)
2591 2597 entry = self.index.entry_binary(curr)
2592 2598 if curr == 0 and self._docket is None:
2593 2599 header = self._format_flags | self._format_version
2594 2600 header = self.index.pack_header(header)
2595 2601 entry = header + entry
2596 2602 self._writeentry(
2597 2603 transaction,
2598 2604 entry,
2599 2605 deltainfo.data,
2600 2606 link,
2601 2607 offset,
2602 2608 serialized_sidedata,
2603 2609 sidedata_offset,
2604 2610 )
2605 2611
2606 2612 rawtext = btext[0]
2607 2613
2608 2614 if alwayscache and rawtext is None:
2609 2615 rawtext = deltacomputer.buildtext(revinfo, fh)
2610 2616
2611 2617 if type(rawtext) == bytes: # only accept immutable objects
2612 2618 self._revisioncache = (node, curr, rawtext)
2613 2619 self._chainbasecache[curr] = deltainfo.chainbase
2614 2620 return curr
2615 2621
2616 2622 def _get_data_offset(self, prev):
2617 2623 """Returns the current offset in the (in-transaction) data file.
2618 2624 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2619 2625 file to store that information: since sidedata can be rewritten to the
2620 2626 end of the data file within a transaction, you can have cases where, for
2621 2627 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2622 2628 to `n - 1`'s sidedata being written after `n`'s data.
2623 2629
2624 2630 TODO cache this in a docket file before getting out of experimental."""
2625 2631 if self._docket is None:
2626 2632 return self.end(prev)
2627 2633 else:
2628 2634 return self._docket.data_end
2629 2635
2630 2636 def _writeentry(
2631 2637 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2632 2638 ):
2633 2639 # Files opened in a+ mode have inconsistent behavior on various
2634 2640 # platforms. Windows requires that a file positioning call be made
2635 2641 # when the file handle transitions between reads and writes. See
2636 2642 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2637 2643 # platforms, Python or the platform itself can be buggy. Some versions
2638 2644 # of Solaris have been observed to not append at the end of the file
2639 2645 # if the file was seeked to before the end. See issue4943 for more.
2640 2646 #
2641 2647 # We work around this issue by inserting a seek() before writing.
2642 2648 # Note: This is likely not necessary on Python 3. However, because
2643 2649 # the file handle is reused for reads and may be seeked there, we need
2644 2650 # to be careful before changing this.
2645 2651 if self._writinghandles is None:
2646 2652 msg = b'adding revision outside `revlog._writing` context'
2647 2653 raise error.ProgrammingError(msg)
2648 2654 ifh, dfh, sdfh = self._writinghandles
2649 2655 if self._docket is None:
2650 2656 ifh.seek(0, os.SEEK_END)
2651 2657 else:
2652 2658 ifh.seek(self._docket.index_end, os.SEEK_SET)
2653 2659 if dfh:
2654 2660 if self._docket is None:
2655 2661 dfh.seek(0, os.SEEK_END)
2656 2662 else:
2657 2663 dfh.seek(self._docket.data_end, os.SEEK_SET)
2658 2664 if sdfh:
2659 2665 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2660 2666
2661 2667 curr = len(self) - 1
2662 2668 if not self._inline:
2663 2669 transaction.add(self._datafile, offset)
2664 2670 if self._sidedatafile:
2665 2671 transaction.add(self._sidedatafile, sidedata_offset)
2666 2672 transaction.add(self._indexfile, curr * len(entry))
2667 2673 if data[0]:
2668 2674 dfh.write(data[0])
2669 2675 dfh.write(data[1])
2670 2676 if sidedata:
2671 2677 sdfh.write(sidedata)
2672 2678 ifh.write(entry)
2673 2679 else:
2674 2680 offset += curr * self.index.entry_size
2675 2681 transaction.add(self._indexfile, offset)
2676 2682 ifh.write(entry)
2677 2683 ifh.write(data[0])
2678 2684 ifh.write(data[1])
2679 2685 assert not sidedata
2680 2686 self._enforceinlinesize(transaction)
2681 2687 if self._docket is not None:
2682 2688 self._docket.index_end = self._writinghandles[0].tell()
2683 2689 self._docket.data_end = self._writinghandles[1].tell()
2684 2690 self._docket.sidedata_end = self._writinghandles[2].tell()
2685 2691
2686 2692 nodemaputil.setup_persistent_nodemap(transaction, self)
2687 2693
2688 2694 def addgroup(
2689 2695 self,
2690 2696 deltas,
2691 2697 linkmapper,
2692 2698 transaction,
2693 2699 alwayscache=False,
2694 2700 addrevisioncb=None,
2695 2701 duplicaterevisioncb=None,
2696 2702 ):
2697 2703 """
2698 2704 add a delta group
2699 2705
2700 2706 given a set of deltas, add them to the revision log. the
2701 2707 first delta is against its parent, which should be in our
2702 2708 log, the rest are against the previous delta.
2703 2709
2704 2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2705 2711 this revlog and the node that was added.
2706 2712 """
2707 2713
2708 2714 if self._adding_group:
2709 2715 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2710 2716
2711 2717 self._adding_group = True
2712 2718 empty = True
2713 2719 try:
2714 2720 with self._writing(transaction):
2715 2721 deltacomputer = deltautil.deltacomputer(self)
2716 2722 # loop through our set of deltas
2717 2723 for data in deltas:
2718 2724 (
2719 2725 node,
2720 2726 p1,
2721 2727 p2,
2722 2728 linknode,
2723 2729 deltabase,
2724 2730 delta,
2725 2731 flags,
2726 2732 sidedata,
2727 2733 ) = data
2728 2734 link = linkmapper(linknode)
2729 2735 flags = flags or REVIDX_DEFAULT_FLAGS
2730 2736
2731 2737 rev = self.index.get_rev(node)
2732 2738 if rev is not None:
2733 2739 # this can happen if two branches make the same change
2734 2740 self._nodeduplicatecallback(transaction, rev)
2735 2741 if duplicaterevisioncb:
2736 2742 duplicaterevisioncb(self, rev)
2737 2743 empty = False
2738 2744 continue
2739 2745
2740 2746 for p in (p1, p2):
2741 2747 if not self.index.has_node(p):
2742 2748 raise error.LookupError(
2743 2749 p, self.radix, _(b'unknown parent')
2744 2750 )
2745 2751
2746 2752 if not self.index.has_node(deltabase):
2747 2753 raise error.LookupError(
2748 2754 deltabase, self.display_id, _(b'unknown delta base')
2749 2755 )
2750 2756
2751 2757 baserev = self.rev(deltabase)
2752 2758
2753 2759 if baserev != nullrev and self.iscensored(baserev):
2754 2760 # if base is censored, delta must be full replacement in a
2755 2761 # single patch operation
2756 2762 hlen = struct.calcsize(b">lll")
2757 2763 oldlen = self.rawsize(baserev)
2758 2764 newlen = len(delta) - hlen
2759 2765 if delta[:hlen] != mdiff.replacediffheader(
2760 2766 oldlen, newlen
2761 2767 ):
2762 2768 raise error.CensoredBaseError(
2763 2769 self.display_id, self.node(baserev)
2764 2770 )
2765 2771
2766 2772 if not flags and self._peek_iscensored(baserev, delta):
2767 2773 flags |= REVIDX_ISCENSORED
2768 2774
2769 2775 # We assume consumers of addrevisioncb will want to retrieve
2770 2776 # the added revision, which will require a call to
2771 2777 # revision(). revision() will fast path if there is a cache
2772 2778 # hit. So, we tell _addrevision() to always cache in this case.
2773 2779 # We're only using addgroup() in the context of changegroup
2774 2780 # generation so the revision data can always be handled as raw
2775 2781 # by the flagprocessor.
2776 2782 rev = self._addrevision(
2777 2783 node,
2778 2784 None,
2779 2785 transaction,
2780 2786 link,
2781 2787 p1,
2782 2788 p2,
2783 2789 flags,
2784 2790 (baserev, delta),
2785 2791 alwayscache=alwayscache,
2786 2792 deltacomputer=deltacomputer,
2787 2793 sidedata=sidedata,
2788 2794 )
2789 2795
2790 2796 if addrevisioncb:
2791 2797 addrevisioncb(self, rev)
2792 2798 empty = False
2793 2799 finally:
2794 2800 self._adding_group = False
2795 2801 return not empty
2796 2802
2797 2803 def iscensored(self, rev):
2798 2804 """Check if a file revision is censored."""
2799 2805 if not self._censorable:
2800 2806 return False
2801 2807
2802 2808 return self.flags(rev) & REVIDX_ISCENSORED
2803 2809
2804 2810 def _peek_iscensored(self, baserev, delta):
2805 2811 """Quickly check if a delta produces a censored revision."""
2806 2812 if not self._censorable:
2807 2813 return False
2808 2814
2809 2815 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2810 2816
2811 2817 def getstrippoint(self, minlink):
2812 2818 """find the minimum rev that must be stripped to strip the linkrev
2813 2819
2814 2820 Returns a tuple containing the minimum rev and a set of all revs that
2815 2821 have linkrevs that will be broken by this strip.
2816 2822 """
2817 2823 return storageutil.resolvestripinfo(
2818 2824 minlink,
2819 2825 len(self) - 1,
2820 2826 self.headrevs(),
2821 2827 self.linkrev,
2822 2828 self.parentrevs,
2823 2829 )
2824 2830
2825 2831 def strip(self, minlink, transaction):
2826 2832 """truncate the revlog on the first revision with a linkrev >= minlink
2827 2833
2828 2834 This function is called when we're stripping revision minlink and
2829 2835 its descendants from the repository.
2830 2836
2831 2837 We have to remove all revisions with linkrev >= minlink, because
2832 2838 the equivalent changelog revisions will be renumbered after the
2833 2839 strip.
2834 2840
2835 2841 So we truncate the revlog on the first of these revisions, and
2836 2842 trust that the caller has saved the revisions that shouldn't be
2837 2843 removed and that it'll re-add them after this truncation.
2838 2844 """
2839 2845 if len(self) == 0:
2840 2846 return
2841 2847
2842 2848 rev, _ = self.getstrippoint(minlink)
2843 2849 if rev == len(self):
2844 2850 return
2845 2851
2846 2852 # first truncate the files on disk
2847 2853 data_end = self.start(rev)
2848 2854 if not self._inline:
2849 2855 transaction.add(self._datafile, data_end)
2850 2856 end = rev * self.index.entry_size
2851 2857 else:
2852 2858 end = data_end + (rev * self.index.entry_size)
2853 2859
2854 2860 if self._sidedatafile:
2855 2861 sidedata_end = self.sidedata_cut_off(rev)
2856 2862 transaction.add(self._sidedatafile, sidedata_end)
2857 2863
2858 2864 transaction.add(self._indexfile, end)
2859 2865 if self._docket is not None:
2860 2866 # XXX we could, leverage the docket while stripping. However it is
2861 2867 # not powerfull enough at the time of this comment
2862 2868 self._docket.index_end = end
2863 2869 self._docket.data_end = data_end
2864 2870 self._docket.sidedata_end = sidedata_end
2865 2871 self._docket.write(transaction, stripping=True)
2866 2872
2867 2873 # then reset internal state in memory to forget those revisions
2868 2874 self._revisioncache = None
2869 2875 self._chaininfocache = util.lrucachedict(500)
2870 2876 self._chunkclear()
2871 2877
2872 2878 del self.index[rev:-1]
2873 2879
2874 2880 def checksize(self):
2875 2881 """Check size of index and data files
2876 2882
2877 2883 return a (dd, di) tuple.
2878 2884 - dd: extra bytes for the "data" file
2879 2885 - di: extra bytes for the "index" file
2880 2886
2881 2887 A healthy revlog will return (0, 0).
2882 2888 """
2883 2889 expected = 0
2884 2890 if len(self):
2885 2891 expected = max(0, self.end(len(self) - 1))
2886 2892
2887 2893 try:
2888 2894 with self._datafp() as f:
2889 2895 f.seek(0, io.SEEK_END)
2890 2896 actual = f.tell()
2891 2897 dd = actual - expected
2892 2898 except IOError as inst:
2893 2899 if inst.errno != errno.ENOENT:
2894 2900 raise
2895 2901 dd = 0
2896 2902
2897 2903 try:
2898 2904 f = self.opener(self._indexfile)
2899 2905 f.seek(0, io.SEEK_END)
2900 2906 actual = f.tell()
2901 2907 f.close()
2902 2908 s = self.index.entry_size
2903 2909 i = max(0, actual // s)
2904 2910 di = actual - (i * s)
2905 2911 if self._inline:
2906 2912 databytes = 0
2907 2913 for r in self:
2908 2914 databytes += max(0, self.length(r))
2909 2915 dd = 0
2910 2916 di = actual - len(self) * s - databytes
2911 2917 except IOError as inst:
2912 2918 if inst.errno != errno.ENOENT:
2913 2919 raise
2914 2920 di = 0
2915 2921
2916 2922 return (dd, di)
2917 2923
2918 2924 def files(self):
2919 2925 res = [self._indexfile]
2920 2926 if not self._inline:
2921 2927 res.append(self._datafile)
2922 2928 return res
2923 2929
2924 2930 def emitrevisions(
2925 2931 self,
2926 2932 nodes,
2927 2933 nodesorder=None,
2928 2934 revisiondata=False,
2929 2935 assumehaveparentrevisions=False,
2930 2936 deltamode=repository.CG_DELTAMODE_STD,
2931 2937 sidedata_helpers=None,
2932 2938 ):
2933 2939 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2934 2940 raise error.ProgrammingError(
2935 2941 b'unhandled value for nodesorder: %s' % nodesorder
2936 2942 )
2937 2943
2938 2944 if nodesorder is None and not self._generaldelta:
2939 2945 nodesorder = b'storage'
2940 2946
2941 2947 if (
2942 2948 not self._storedeltachains
2943 2949 and deltamode != repository.CG_DELTAMODE_PREV
2944 2950 ):
2945 2951 deltamode = repository.CG_DELTAMODE_FULL
2946 2952
2947 2953 return storageutil.emitrevisions(
2948 2954 self,
2949 2955 nodes,
2950 2956 nodesorder,
2951 2957 revlogrevisiondelta,
2952 2958 deltaparentfn=self.deltaparent,
2953 2959 candeltafn=self.candelta,
2954 2960 rawsizefn=self.rawsize,
2955 2961 revdifffn=self.revdiff,
2956 2962 flagsfn=self.flags,
2957 2963 deltamode=deltamode,
2958 2964 revisiondata=revisiondata,
2959 2965 assumehaveparentrevisions=assumehaveparentrevisions,
2960 2966 sidedata_helpers=sidedata_helpers,
2961 2967 )
2962 2968
2963 2969 DELTAREUSEALWAYS = b'always'
2964 2970 DELTAREUSESAMEREVS = b'samerevs'
2965 2971 DELTAREUSENEVER = b'never'
2966 2972
2967 2973 DELTAREUSEFULLADD = b'fulladd'
2968 2974
2969 2975 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2970 2976
2971 2977 def clone(
2972 2978 self,
2973 2979 tr,
2974 2980 destrevlog,
2975 2981 addrevisioncb=None,
2976 2982 deltareuse=DELTAREUSESAMEREVS,
2977 2983 forcedeltabothparents=None,
2978 2984 sidedata_helpers=None,
2979 2985 ):
2980 2986 """Copy this revlog to another, possibly with format changes.
2981 2987
2982 2988 The destination revlog will contain the same revisions and nodes.
2983 2989 However, it may not be bit-for-bit identical due to e.g. delta encoding
2984 2990 differences.
2985 2991
2986 2992 The ``deltareuse`` argument control how deltas from the existing revlog
2987 2993 are preserved in the destination revlog. The argument can have the
2988 2994 following values:
2989 2995
2990 2996 DELTAREUSEALWAYS
2991 2997 Deltas will always be reused (if possible), even if the destination
2992 2998 revlog would not select the same revisions for the delta. This is the
2993 2999 fastest mode of operation.
2994 3000 DELTAREUSESAMEREVS
2995 3001 Deltas will be reused if the destination revlog would pick the same
2996 3002 revisions for the delta. This mode strikes a balance between speed
2997 3003 and optimization.
2998 3004 DELTAREUSENEVER
2999 3005 Deltas will never be reused. This is the slowest mode of execution.
3000 3006 This mode can be used to recompute deltas (e.g. if the diff/delta
3001 3007 algorithm changes).
3002 3008 DELTAREUSEFULLADD
3003 3009 Revision will be re-added as if their were new content. This is
3004 3010 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3005 3011 eg: large file detection and handling.
3006 3012
3007 3013 Delta computation can be slow, so the choice of delta reuse policy can
3008 3014 significantly affect run time.
3009 3015
3010 3016 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3011 3017 two extremes. Deltas will be reused if they are appropriate. But if the
3012 3018 delta could choose a better revision, it will do so. This means if you
3013 3019 are converting a non-generaldelta revlog to a generaldelta revlog,
3014 3020 deltas will be recomputed if the delta's parent isn't a parent of the
3015 3021 revision.
3016 3022
3017 3023 In addition to the delta policy, the ``forcedeltabothparents``
3018 3024 argument controls whether to force compute deltas against both parents
3019 3025 for merges. By default, the current default is used.
3020 3026
3021 3027 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3022 3028 `sidedata_helpers`.
3023 3029 """
3024 3030 if deltareuse not in self.DELTAREUSEALL:
3025 3031 raise ValueError(
3026 3032 _(b'value for deltareuse invalid: %s') % deltareuse
3027 3033 )
3028 3034
3029 3035 if len(destrevlog):
3030 3036 raise ValueError(_(b'destination revlog is not empty'))
3031 3037
3032 3038 if getattr(self, 'filteredrevs', None):
3033 3039 raise ValueError(_(b'source revlog has filtered revisions'))
3034 3040 if getattr(destrevlog, 'filteredrevs', None):
3035 3041 raise ValueError(_(b'destination revlog has filtered revisions'))
3036 3042
3037 3043 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3038 3044 # if possible.
3039 3045 oldlazydelta = destrevlog._lazydelta
3040 3046 oldlazydeltabase = destrevlog._lazydeltabase
3041 3047 oldamd = destrevlog._deltabothparents
3042 3048
3043 3049 try:
3044 3050 if deltareuse == self.DELTAREUSEALWAYS:
3045 3051 destrevlog._lazydeltabase = True
3046 3052 destrevlog._lazydelta = True
3047 3053 elif deltareuse == self.DELTAREUSESAMEREVS:
3048 3054 destrevlog._lazydeltabase = False
3049 3055 destrevlog._lazydelta = True
3050 3056 elif deltareuse == self.DELTAREUSENEVER:
3051 3057 destrevlog._lazydeltabase = False
3052 3058 destrevlog._lazydelta = False
3053 3059
3054 3060 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3055 3061
3056 3062 self._clone(
3057 3063 tr,
3058 3064 destrevlog,
3059 3065 addrevisioncb,
3060 3066 deltareuse,
3061 3067 forcedeltabothparents,
3062 3068 sidedata_helpers,
3063 3069 )
3064 3070
3065 3071 finally:
3066 3072 destrevlog._lazydelta = oldlazydelta
3067 3073 destrevlog._lazydeltabase = oldlazydeltabase
3068 3074 destrevlog._deltabothparents = oldamd
3069 3075
3070 3076 def _clone(
3071 3077 self,
3072 3078 tr,
3073 3079 destrevlog,
3074 3080 addrevisioncb,
3075 3081 deltareuse,
3076 3082 forcedeltabothparents,
3077 3083 sidedata_helpers,
3078 3084 ):
3079 3085 """perform the core duty of `revlog.clone` after parameter processing"""
3080 3086 deltacomputer = deltautil.deltacomputer(destrevlog)
3081 3087 index = self.index
3082 3088 for rev in self:
3083 3089 entry = index[rev]
3084 3090
3085 3091 # Some classes override linkrev to take filtered revs into
3086 3092 # account. Use raw entry from index.
3087 3093 flags = entry[0] & 0xFFFF
3088 3094 linkrev = entry[4]
3089 3095 p1 = index[entry[5]][7]
3090 3096 p2 = index[entry[6]][7]
3091 3097 node = entry[7]
3092 3098
3093 3099 # (Possibly) reuse the delta from the revlog if allowed and
3094 3100 # the revlog chunk is a delta.
3095 3101 cachedelta = None
3096 3102 rawtext = None
3097 3103 if deltareuse == self.DELTAREUSEFULLADD:
3098 3104 text = self._revisiondata(rev)
3099 3105 sidedata = self.sidedata(rev)
3100 3106
3101 3107 if sidedata_helpers is not None:
3102 3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3103 3109 self, sidedata_helpers, sidedata, rev
3104 3110 )
3105 3111 flags = flags | new_flags[0] & ~new_flags[1]
3106 3112
3107 3113 destrevlog.addrevision(
3108 3114 text,
3109 3115 tr,
3110 3116 linkrev,
3111 3117 p1,
3112 3118 p2,
3113 3119 cachedelta=cachedelta,
3114 3120 node=node,
3115 3121 flags=flags,
3116 3122 deltacomputer=deltacomputer,
3117 3123 sidedata=sidedata,
3118 3124 )
3119 3125 else:
3120 3126 if destrevlog._lazydelta:
3121 3127 dp = self.deltaparent(rev)
3122 3128 if dp != nullrev:
3123 3129 cachedelta = (dp, bytes(self._chunk(rev)))
3124 3130
3125 3131 sidedata = None
3126 3132 if not cachedelta:
3127 3133 rawtext = self._revisiondata(rev)
3128 3134 sidedata = self.sidedata(rev)
3129 3135 if sidedata is None:
3130 3136 sidedata = self.sidedata(rev)
3131 3137
3132 3138 if sidedata_helpers is not None:
3133 3139 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3134 3140 self, sidedata_helpers, sidedata, rev
3135 3141 )
3136 3142 flags = flags | new_flags[0] & ~new_flags[1]
3137 3143
3138 3144 with destrevlog._writing(tr):
3139 3145 destrevlog._addrevision(
3140 3146 node,
3141 3147 rawtext,
3142 3148 tr,
3143 3149 linkrev,
3144 3150 p1,
3145 3151 p2,
3146 3152 flags,
3147 3153 cachedelta,
3148 3154 deltacomputer=deltacomputer,
3149 3155 sidedata=sidedata,
3150 3156 )
3151 3157
3152 3158 if addrevisioncb:
3153 3159 addrevisioncb(self, rev, node)
3154 3160
3155 3161 def censorrevision(self, tr, censornode, tombstone=b''):
3156 3162 if self._format_version == REVLOGV0:
3157 3163 raise error.RevlogError(
3158 3164 _(b'cannot censor with version %d revlogs')
3159 3165 % self._format_version
3160 3166 )
3161 3167 elif self._format_version == REVLOGV1:
3162 3168 censor.v1_censor(self, tr, censornode, tombstone)
3163 3169 else:
3164 3170 # revlog v2
3165 3171 raise error.RevlogError(
3166 3172 _(b'cannot censor with version %d revlogs')
3167 3173 % self._format_version
3168 3174 )
3169 3175
3170 3176 def verifyintegrity(self, state):
3171 3177 """Verifies the integrity of the revlog.
3172 3178
3173 3179 Yields ``revlogproblem`` instances describing problems that are
3174 3180 found.
3175 3181 """
3176 3182 dd, di = self.checksize()
3177 3183 if dd:
3178 3184 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3179 3185 if di:
3180 3186 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3181 3187
3182 3188 version = self._format_version
3183 3189
3184 3190 # The verifier tells us what version revlog we should be.
3185 3191 if version != state[b'expectedversion']:
3186 3192 yield revlogproblem(
3187 3193 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3188 3194 % (self.display_id, version, state[b'expectedversion'])
3189 3195 )
3190 3196
3191 3197 state[b'skipread'] = set()
3192 3198 state[b'safe_renamed'] = set()
3193 3199
3194 3200 for rev in self:
3195 3201 node = self.node(rev)
3196 3202
3197 3203 # Verify contents. 4 cases to care about:
3198 3204 #
3199 3205 # common: the most common case
3200 3206 # rename: with a rename
3201 3207 # meta: file content starts with b'\1\n', the metadata
3202 3208 # header defined in filelog.py, but without a rename
3203 3209 # ext: content stored externally
3204 3210 #
3205 3211 # More formally, their differences are shown below:
3206 3212 #
3207 3213 # | common | rename | meta | ext
3208 3214 # -------------------------------------------------------
3209 3215 # flags() | 0 | 0 | 0 | not 0
3210 3216 # renamed() | False | True | False | ?
3211 3217 # rawtext[0:2]=='\1\n'| False | True | True | ?
3212 3218 #
3213 3219 # "rawtext" means the raw text stored in revlog data, which
3214 3220 # could be retrieved by "rawdata(rev)". "text"
3215 3221 # mentioned below is "revision(rev)".
3216 3222 #
3217 3223 # There are 3 different lengths stored physically:
3218 3224 # 1. L1: rawsize, stored in revlog index
3219 3225 # 2. L2: len(rawtext), stored in revlog data
3220 3226 # 3. L3: len(text), stored in revlog data if flags==0, or
3221 3227 # possibly somewhere else if flags!=0
3222 3228 #
3223 3229 # L1 should be equal to L2. L3 could be different from them.
3224 3230 # "text" may or may not affect commit hash depending on flag
3225 3231 # processors (see flagutil.addflagprocessor).
3226 3232 #
3227 3233 # | common | rename | meta | ext
3228 3234 # -------------------------------------------------
3229 3235 # rawsize() | L1 | L1 | L1 | L1
3230 3236 # size() | L1 | L2-LM | L1(*) | L1 (?)
3231 3237 # len(rawtext) | L2 | L2 | L2 | L2
3232 3238 # len(text) | L2 | L2 | L2 | L3
3233 3239 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3234 3240 #
3235 3241 # LM: length of metadata, depending on rawtext
3236 3242 # (*): not ideal, see comment in filelog.size
3237 3243 # (?): could be "- len(meta)" if the resolved content has
3238 3244 # rename metadata
3239 3245 #
3240 3246 # Checks needed to be done:
3241 3247 # 1. length check: L1 == L2, in all cases.
3242 3248 # 2. hash check: depending on flag processor, we may need to
3243 3249 # use either "text" (external), or "rawtext" (in revlog).
3244 3250
3245 3251 try:
3246 3252 skipflags = state.get(b'skipflags', 0)
3247 3253 if skipflags:
3248 3254 skipflags &= self.flags(rev)
3249 3255
3250 3256 _verify_revision(self, skipflags, state, node)
3251 3257
3252 3258 l1 = self.rawsize(rev)
3253 3259 l2 = len(self.rawdata(node))
3254 3260
3255 3261 if l1 != l2:
3256 3262 yield revlogproblem(
3257 3263 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3258 3264 node=node,
3259 3265 )
3260 3266
3261 3267 except error.CensoredNodeError:
3262 3268 if state[b'erroroncensored']:
3263 3269 yield revlogproblem(
3264 3270 error=_(b'censored file data'), node=node
3265 3271 )
3266 3272 state[b'skipread'].add(node)
3267 3273 except Exception as e:
3268 3274 yield revlogproblem(
3269 3275 error=_(b'unpacking %s: %s')
3270 3276 % (short(node), stringutil.forcebytestr(e)),
3271 3277 node=node,
3272 3278 )
3273 3279 state[b'skipread'].add(node)
3274 3280
3275 3281 def storageinfo(
3276 3282 self,
3277 3283 exclusivefiles=False,
3278 3284 sharedfiles=False,
3279 3285 revisionscount=False,
3280 3286 trackedsize=False,
3281 3287 storedsize=False,
3282 3288 ):
3283 3289 d = {}
3284 3290
3285 3291 if exclusivefiles:
3286 3292 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3287 3293 if not self._inline:
3288 3294 d[b'exclusivefiles'].append((self.opener, self._datafile))
3289 3295
3290 3296 if sharedfiles:
3291 3297 d[b'sharedfiles'] = []
3292 3298
3293 3299 if revisionscount:
3294 3300 d[b'revisionscount'] = len(self)
3295 3301
3296 3302 if trackedsize:
3297 3303 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3298 3304
3299 3305 if storedsize:
3300 3306 d[b'storedsize'] = sum(
3301 3307 self.opener.stat(path).st_size for path in self.files()
3302 3308 )
3303 3309
3304 3310 return d
3305 3311
3306 3312 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3307 3313 if not self.hassidedata:
3308 3314 return
3309 3315 # revlog formats with sidedata support does not support inline
3310 3316 assert not self._inline
3311 3317 if not helpers[1] and not helpers[2]:
3312 3318 # Nothing to generate or remove
3313 3319 return
3314 3320
3315 3321 new_entries = []
3316 3322 # append the new sidedata
3317 3323 with self._writing(transaction):
3318 3324 ifh, dfh, sdfh = self._writinghandles
3319 3325 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3320 3326
3321 3327 current_offset = sdfh.tell()
3322 3328 for rev in range(startrev, endrev + 1):
3323 3329 entry = self.index[rev]
3324 3330 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3325 3331 store=self,
3326 3332 sidedata_helpers=helpers,
3327 3333 sidedata={},
3328 3334 rev=rev,
3329 3335 )
3330 3336
3331 3337 serialized_sidedata = sidedatautil.serialize_sidedata(
3332 3338 new_sidedata
3333 3339 )
3334 3340
3335 3341 sidedata_compression_mode = COMP_MODE_INLINE
3336 3342 if serialized_sidedata and self.hassidedata:
3337 3343 sidedata_compression_mode = COMP_MODE_PLAIN
3338 3344 h, comp_sidedata = self.compress(serialized_sidedata)
3339 3345 if (
3340 3346 h != b'u'
3341 3347 and comp_sidedata[0] != b'\0'
3342 3348 and len(comp_sidedata) < len(serialized_sidedata)
3343 3349 ):
3344 3350 assert not h
3345 3351 if (
3346 3352 comp_sidedata[0]
3347 3353 == self._docket.default_compression_header
3348 3354 ):
3349 3355 sidedata_compression_mode = COMP_MODE_DEFAULT
3350 3356 serialized_sidedata = comp_sidedata
3351 3357 else:
3352 3358 sidedata_compression_mode = COMP_MODE_INLINE
3353 3359 serialized_sidedata = comp_sidedata
3354 3360 if entry[8] != 0 or entry[9] != 0:
3355 3361 # rewriting entries that already have sidedata is not
3356 3362 # supported yet, because it introduces garbage data in the
3357 3363 # revlog.
3358 3364 msg = b"rewriting existing sidedata is not supported yet"
3359 3365 raise error.Abort(msg)
3360 3366
3361 3367 # Apply (potential) flags to add and to remove after running
3362 3368 # the sidedata helpers
3363 3369 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3364 3370 entry_update = (
3365 3371 current_offset,
3366 3372 len(serialized_sidedata),
3367 3373 new_offset_flags,
3368 3374 sidedata_compression_mode,
3369 3375 )
3370 3376
3371 3377 # the sidedata computation might have move the file cursors around
3372 3378 sdfh.seek(current_offset, os.SEEK_SET)
3373 3379 sdfh.write(serialized_sidedata)
3374 3380 new_entries.append(entry_update)
3375 3381 current_offset += len(serialized_sidedata)
3376 3382 self._docket.sidedata_end = sdfh.tell()
3377 3383
3378 3384 # rewrite the new index entries
3379 3385 ifh.seek(startrev * self.index.entry_size)
3380 3386 for i, e in enumerate(new_entries):
3381 3387 rev = startrev + i
3382 3388 self.index.replace_sidedata_info(rev, *e)
3383 3389 packed = self.index.entry_binary(rev)
3384 3390 if rev == 0 and self._docket is None:
3385 3391 header = self._format_flags | self._format_version
3386 3392 header = self.index.pack_header(header)
3387 3393 packed = header + packed
3388 3394 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now