##// END OF EJS Templates
revlog: fix a typo closing the wrong file...
Matt Harbison -
r48206:5e44936b default
parent child Browse files
Show More
@@ -1,3394 +1,3394 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 censor,
85 85 deltas as deltautil,
86 86 docket as docketutil,
87 87 flagutil,
88 88 nodemap as nodemaputil,
89 89 revlogv0,
90 90 sidedata as sidedatautil,
91 91 )
92 92 from .utils import (
93 93 storageutil,
94 94 stringutil,
95 95 )
96 96
97 97 # blanked usage of all the name to prevent pyflakes constraints
98 98 # We need these name available in the module for extensions.
99 99
100 100 REVLOGV0
101 101 REVLOGV1
102 102 REVLOGV2
103 103 FLAG_INLINE_DATA
104 104 FLAG_GENERALDELTA
105 105 REVLOG_DEFAULT_FLAGS
106 106 REVLOG_DEFAULT_FORMAT
107 107 REVLOG_DEFAULT_VERSION
108 108 REVLOGV1_FLAGS
109 109 REVLOGV2_FLAGS
110 110 REVIDX_ISCENSORED
111 111 REVIDX_ELLIPSIS
112 112 REVIDX_HASCOPIESINFO
113 113 REVIDX_EXTSTORED
114 114 REVIDX_DEFAULT_FLAGS
115 115 REVIDX_FLAGS_ORDER
116 116 REVIDX_RAWTEXT_CHANGING_FLAGS
117 117
118 118 parsers = policy.importmod('parsers')
119 119 rustancestor = policy.importrust('ancestor')
120 120 rustdagop = policy.importrust('dagop')
121 121 rustrevlog = policy.importrust('revlog')
122 122
123 123 # Aliased for performance.
124 124 _zlibdecompress = zlib.decompress
125 125
126 126 # max size of revlog with inline data
127 127 _maxinline = 131072
128 128 _chunksize = 1048576
129 129
130 130 # Flag processors for REVIDX_ELLIPSIS.
131 131 def ellipsisreadprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsiswriteprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsisrawprocessor(rl, text):
140 140 return False
141 141
142 142
143 143 ellipsisprocessor = (
144 144 ellipsisreadprocessor,
145 145 ellipsiswriteprocessor,
146 146 ellipsisrawprocessor,
147 147 )
148 148
149 149
150 150 def _verify_revision(rl, skipflags, state, node):
151 151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 152 point for extensions to influence the operation."""
153 153 if skipflags:
154 154 state[b'skipread'].add(node)
155 155 else:
156 156 # Side-effect: read content and verify hash.
157 157 rl.revision(node)
158 158
159 159
160 160 # True if a fast implementation for persistent-nodemap is available
161 161 #
162 162 # We also consider we have a "fast" implementation in "pure" python because
163 163 # people using pure don't really have performance consideration (and a
164 164 # wheelbarrow of other slowness source)
165 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 166 parsers, 'BaseIndexObject'
167 167 )
168 168
169 169
170 170 @interfaceutil.implementer(repository.irevisiondelta)
171 171 @attr.s(slots=True)
172 172 class revlogrevisiondelta(object):
173 173 node = attr.ib()
174 174 p1node = attr.ib()
175 175 p2node = attr.ib()
176 176 basenode = attr.ib()
177 177 flags = attr.ib()
178 178 baserevisionsize = attr.ib()
179 179 revision = attr.ib()
180 180 delta = attr.ib()
181 181 sidedata = attr.ib()
182 182 protocol_flags = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 def parse_index_v1(data, inline):
195 195 # call the C implementation to parse the index data
196 196 index, cache = parsers.parse_index2(data, inline)
197 197 return index, cache
198 198
199 199
200 200 def parse_index_v2(data, inline):
201 201 # call the C implementation to parse the index data
202 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 203 return index, cache
204 204
205 205
206 206 def parse_index_cl_v2(data, inline):
207 207 # call the C implementation to parse the index data
208 208 assert not inline
209 209 from .pure.parsers import parse_index_cl_v2
210 210
211 211 index, cache = parse_index_cl_v2(data)
212 212 return index, cache
213 213
214 214
215 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 216
217 217 def parse_index_v1_nodemap(data, inline):
218 218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 219 return index, cache
220 220
221 221
222 222 else:
223 223 parse_index_v1_nodemap = None
224 224
225 225
226 226 def parse_index_v1_mixed(data, inline):
227 227 index, cache = parse_index_v1(data, inline)
228 228 return rustrevlog.MixedIndex(index), cache
229 229
230 230
231 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 232 # signed integer)
233 233 _maxentrysize = 0x7FFFFFFF
234 234
235 235 PARTIAL_READ_MSG = _(
236 236 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
237 237 )
238 238
239 239 FILE_TOO_SHORT_MSG = _(
240 240 b'cannot read from revlog %s;'
241 241 b' expected %d bytes from offset %d, data size is %d'
242 242 )
243 243
244 244
245 245 class revlog(object):
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 def __init__(
294 294 self,
295 295 opener,
296 296 target,
297 297 radix,
298 298 postfix=None, # only exist for `tmpcensored` now
299 299 checkambig=False,
300 300 mmaplargeindex=False,
301 301 censorable=False,
302 302 upperboundcomp=None,
303 303 persistentnodemap=False,
304 304 concurrencychecker=None,
305 305 trypending=False,
306 306 ):
307 307 """
308 308 create a revlog object
309 309
310 310 opener is a function that abstracts the file opening operation
311 311 and can be used to implement COW semantics or the like.
312 312
313 313 `target`: a (KIND, ID) tuple that identify the content stored in
314 314 this revlog. It help the rest of the code to understand what the revlog
315 315 is about without having to resort to heuristic and index filename
316 316 analysis. Note: that this must be reliably be set by normal code, but
317 317 that test, debug, or performance measurement code might not set this to
318 318 accurate value.
319 319 """
320 320 self.upperboundcomp = upperboundcomp
321 321
322 322 self.radix = radix
323 323
324 324 self._docket_file = None
325 325 self._indexfile = None
326 326 self._datafile = None
327 327 self._sidedatafile = None
328 328 self._nodemap_file = None
329 329 self.postfix = postfix
330 330 self._trypending = trypending
331 331 self.opener = opener
332 332 if persistentnodemap:
333 333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334 334
335 335 assert target[0] in ALL_KINDS
336 336 assert len(target) == 2
337 337 self.target = target
338 338 # When True, indexfile is opened with checkambig=True at writing, to
339 339 # avoid file stat ambiguity.
340 340 self._checkambig = checkambig
341 341 self._mmaplargeindex = mmaplargeindex
342 342 self._censorable = censorable
343 343 # 3-tuple of (node, rev, text) for a raw revision.
344 344 self._revisioncache = None
345 345 # Maps rev to chain base rev.
346 346 self._chainbasecache = util.lrucachedict(100)
347 347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 348 self._chunkcache = (0, b'')
349 349 # How much data to read and cache into the raw revlog data cache.
350 350 self._chunkcachesize = 65536
351 351 self._maxchainlen = None
352 352 self._deltabothparents = True
353 353 self.index = None
354 354 self._docket = None
355 355 self._nodemap_docket = None
356 356 # Mapping of partial identifiers to full nodes.
357 357 self._pcache = {}
358 358 # Mapping of revision integer to full node.
359 359 self._compengine = b'zlib'
360 360 self._compengineopts = {}
361 361 self._maxdeltachainspan = -1
362 362 self._withsparseread = False
363 363 self._sparserevlog = False
364 364 self.hassidedata = False
365 365 self._srdensitythreshold = 0.50
366 366 self._srmingapsize = 262144
367 367
368 368 # Make copy of flag processors so each revlog instance can support
369 369 # custom flags.
370 370 self._flagprocessors = dict(flagutil.flagprocessors)
371 371
372 372 # 3-tuple of file handles being used for active writing.
373 373 self._writinghandles = None
374 374 # prevent nesting of addgroup
375 375 self._adding_group = None
376 376
377 377 self._loadindex()
378 378
379 379 self._concurrencychecker = concurrencychecker
380 380
381 381 def _init_opts(self):
382 382 """process options (from above/config) to setup associated default revlog mode
383 383
384 384 These values might be affected when actually reading on disk information.
385 385
386 386 The relevant values are returned for use in _loadindex().
387 387
388 388 * newversionflags:
389 389 version header to use if we need to create a new revlog
390 390
391 391 * mmapindexthreshold:
392 392 minimal index size for start to use mmap
393 393
394 394 * force_nodemap:
395 395 force the usage of a "development" version of the nodemap code
396 396 """
397 397 mmapindexthreshold = None
398 398 opts = self.opener.options
399 399
400 400 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
401 401 new_header = CHANGELOGV2
402 402 elif b'revlogv2' in opts:
403 403 new_header = REVLOGV2
404 404 elif b'revlogv1' in opts:
405 405 new_header = REVLOGV1 | FLAG_INLINE_DATA
406 406 if b'generaldelta' in opts:
407 407 new_header |= FLAG_GENERALDELTA
408 408 elif b'revlogv0' in self.opener.options:
409 409 new_header = REVLOGV0
410 410 else:
411 411 new_header = REVLOG_DEFAULT_VERSION
412 412
413 413 if b'chunkcachesize' in opts:
414 414 self._chunkcachesize = opts[b'chunkcachesize']
415 415 if b'maxchainlen' in opts:
416 416 self._maxchainlen = opts[b'maxchainlen']
417 417 if b'deltabothparents' in opts:
418 418 self._deltabothparents = opts[b'deltabothparents']
419 419 self._lazydelta = bool(opts.get(b'lazydelta', True))
420 420 self._lazydeltabase = False
421 421 if self._lazydelta:
422 422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
423 423 if b'compengine' in opts:
424 424 self._compengine = opts[b'compengine']
425 425 if b'zlib.level' in opts:
426 426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
427 427 if b'zstd.level' in opts:
428 428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
429 429 if b'maxdeltachainspan' in opts:
430 430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
431 431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
432 432 mmapindexthreshold = opts[b'mmapindexthreshold']
433 433 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
434 434 withsparseread = bool(opts.get(b'with-sparse-read', False))
435 435 # sparse-revlog forces sparse-read
436 436 self._withsparseread = self._sparserevlog or withsparseread
437 437 if b'sparse-read-density-threshold' in opts:
438 438 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
439 439 if b'sparse-read-min-gap-size' in opts:
440 440 self._srmingapsize = opts[b'sparse-read-min-gap-size']
441 441 if opts.get(b'enableellipsis'):
442 442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
443 443
444 444 # revlog v0 doesn't have flag processors
445 445 for flag, processor in pycompat.iteritems(
446 446 opts.get(b'flagprocessors', {})
447 447 ):
448 448 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
449 449
450 450 if self._chunkcachesize <= 0:
451 451 raise error.RevlogError(
452 452 _(b'revlog chunk cache size %r is not greater than 0')
453 453 % self._chunkcachesize
454 454 )
455 455 elif self._chunkcachesize & (self._chunkcachesize - 1):
456 456 raise error.RevlogError(
457 457 _(b'revlog chunk cache size %r is not a power of 2')
458 458 % self._chunkcachesize
459 459 )
460 460 force_nodemap = opts.get(b'devel-force-nodemap', False)
461 461 return new_header, mmapindexthreshold, force_nodemap
462 462
463 463 def _get_data(self, filepath, mmap_threshold, size=None):
464 464 """return a file content with or without mmap
465 465
466 466 If the file is missing return the empty string"""
467 467 try:
468 468 with self.opener(filepath) as fp:
469 469 if mmap_threshold is not None:
470 470 file_size = self.opener.fstat(fp).st_size
471 471 if file_size >= mmap_threshold:
472 472 if size is not None:
473 473 # avoid potentiel mmap crash
474 474 size = min(file_size, size)
475 475 # TODO: should .close() to release resources without
476 476 # relying on Python GC
477 477 if size is None:
478 478 return util.buffer(util.mmapread(fp))
479 479 else:
480 480 return util.buffer(util.mmapread(fp, size))
481 481 if size is None:
482 482 return fp.read()
483 483 else:
484 484 return fp.read(size)
485 485 except IOError as inst:
486 486 if inst.errno != errno.ENOENT:
487 487 raise
488 488 return b''
489 489
490 490 def _loadindex(self, docket=None):
491 491
492 492 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
493 493
494 494 if self.postfix is not None:
495 495 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
496 496 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
497 497 entry_point = b'%s.i.a' % self.radix
498 498 else:
499 499 entry_point = b'%s.i' % self.radix
500 500
501 501 if docket is not None:
502 502 self._docket = docket
503 503 self._docket_file = entry_point
504 504 else:
505 505 entry_data = b''
506 506 self._initempty = True
507 507 entry_data = self._get_data(entry_point, mmapindexthreshold)
508 508 if len(entry_data) > 0:
509 509 header = INDEX_HEADER.unpack(entry_data[:4])[0]
510 510 self._initempty = False
511 511 else:
512 512 header = new_header
513 513
514 514 self._format_flags = header & ~0xFFFF
515 515 self._format_version = header & 0xFFFF
516 516
517 517 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
518 518 if supported_flags is None:
519 519 msg = _(b'unknown version (%d) in revlog %s')
520 520 msg %= (self._format_version, self.display_id)
521 521 raise error.RevlogError(msg)
522 522 elif self._format_flags & ~supported_flags:
523 523 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
524 524 display_flag = self._format_flags >> 16
525 525 msg %= (display_flag, self._format_version, self.display_id)
526 526 raise error.RevlogError(msg)
527 527
528 528 features = FEATURES_BY_VERSION[self._format_version]
529 529 self._inline = features[b'inline'](self._format_flags)
530 530 self._generaldelta = features[b'generaldelta'](self._format_flags)
531 531 self.hassidedata = features[b'sidedata']
532 532
533 533 if not features[b'docket']:
534 534 self._indexfile = entry_point
535 535 index_data = entry_data
536 536 else:
537 537 self._docket_file = entry_point
538 538 if self._initempty:
539 539 self._docket = docketutil.default_docket(self, header)
540 540 else:
541 541 self._docket = docketutil.parse_docket(
542 542 self, entry_data, use_pending=self._trypending
543 543 )
544 544
545 545 if self._docket is not None:
546 546 self._indexfile = self._docket.index_filepath()
547 547 index_data = b''
548 548 index_size = self._docket.index_end
549 549 if index_size > 0:
550 550 index_data = self._get_data(
551 551 self._indexfile, mmapindexthreshold, size=index_size
552 552 )
553 553 if len(index_data) < index_size:
554 554 msg = _(b'too few index data for %s: got %d, expected %d')
555 555 msg %= (self.display_id, len(index_data), index_size)
556 556 raise error.RevlogError(msg)
557 557
558 558 self._inline = False
559 559 # generaldelta implied by version 2 revlogs.
560 560 self._generaldelta = True
561 561 # the logic for persistent nodemap will be dealt with within the
562 562 # main docket, so disable it for now.
563 563 self._nodemap_file = None
564 564
565 565 if self._docket is not None:
566 566 self._datafile = self._docket.data_filepath()
567 567 self._sidedatafile = self._docket.sidedata_filepath()
568 568 elif self.postfix is None:
569 569 self._datafile = b'%s.d' % self.radix
570 570 else:
571 571 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
572 572
573 573 self.nodeconstants = sha1nodeconstants
574 574 self.nullid = self.nodeconstants.nullid
575 575
576 576 # sparse-revlog can't be on without general-delta (issue6056)
577 577 if not self._generaldelta:
578 578 self._sparserevlog = False
579 579
580 580 self._storedeltachains = True
581 581
582 582 devel_nodemap = (
583 583 self._nodemap_file
584 584 and force_nodemap
585 585 and parse_index_v1_nodemap is not None
586 586 )
587 587
588 588 use_rust_index = False
589 589 if rustrevlog is not None:
590 590 if self._nodemap_file is not None:
591 591 use_rust_index = True
592 592 else:
593 593 use_rust_index = self.opener.options.get(b'rust.index')
594 594
595 595 self._parse_index = parse_index_v1
596 596 if self._format_version == REVLOGV0:
597 597 self._parse_index = revlogv0.parse_index_v0
598 598 elif self._format_version == REVLOGV2:
599 599 self._parse_index = parse_index_v2
600 600 elif self._format_version == CHANGELOGV2:
601 601 self._parse_index = parse_index_cl_v2
602 602 elif devel_nodemap:
603 603 self._parse_index = parse_index_v1_nodemap
604 604 elif use_rust_index:
605 605 self._parse_index = parse_index_v1_mixed
606 606 try:
607 607 d = self._parse_index(index_data, self._inline)
608 608 index, _chunkcache = d
609 609 use_nodemap = (
610 610 not self._inline
611 611 and self._nodemap_file is not None
612 612 and util.safehasattr(index, 'update_nodemap_data')
613 613 )
614 614 if use_nodemap:
615 615 nodemap_data = nodemaputil.persisted_data(self)
616 616 if nodemap_data is not None:
617 617 docket = nodemap_data[0]
618 618 if (
619 619 len(d[0]) > docket.tip_rev
620 620 and d[0][docket.tip_rev][7] == docket.tip_node
621 621 ):
622 622 # no changelog tampering
623 623 self._nodemap_docket = docket
624 624 index.update_nodemap_data(*nodemap_data)
625 625 except (ValueError, IndexError):
626 626 raise error.RevlogError(
627 627 _(b"index %s is corrupted") % self.display_id
628 628 )
629 629 self.index, self._chunkcache = d
630 630 if not self._chunkcache:
631 631 self._chunkclear()
632 632 # revnum -> (chain-length, sum-delta-length)
633 633 self._chaininfocache = util.lrucachedict(500)
634 634 # revlog header -> revlog compressor
635 635 self._decompressors = {}
636 636
637 637 @util.propertycache
638 638 def revlog_kind(self):
639 639 return self.target[0]
640 640
641 641 @util.propertycache
642 642 def display_id(self):
643 643 """The public facing "ID" of the revlog that we use in message"""
644 644 # Maybe we should build a user facing representation of
645 645 # revlog.target instead of using `self.radix`
646 646 return self.radix
647 647
648 648 def _get_decompressor(self, t):
649 649 try:
650 650 compressor = self._decompressors[t]
651 651 except KeyError:
652 652 try:
653 653 engine = util.compengines.forrevlogheader(t)
654 654 compressor = engine.revlogcompressor(self._compengineopts)
655 655 self._decompressors[t] = compressor
656 656 except KeyError:
657 657 raise error.RevlogError(
658 658 _(b'unknown compression type %s') % binascii.hexlify(t)
659 659 )
660 660 return compressor
661 661
662 662 @util.propertycache
663 663 def _compressor(self):
664 664 engine = util.compengines[self._compengine]
665 665 return engine.revlogcompressor(self._compengineopts)
666 666
667 667 @util.propertycache
668 668 def _decompressor(self):
669 669 """the default decompressor"""
670 670 if self._docket is None:
671 671 return None
672 672 t = self._docket.default_compression_header
673 673 c = self._get_decompressor(t)
674 674 return c.decompress
675 675
676 676 def _indexfp(self):
677 677 """file object for the revlog's index file"""
678 678 return self.opener(self._indexfile, mode=b"r")
679 679
680 680 def __index_write_fp(self):
681 681 # You should not use this directly and use `_writing` instead
682 682 try:
683 683 f = self.opener(
684 684 self._indexfile, mode=b"r+", checkambig=self._checkambig
685 685 )
686 686 if self._docket is None:
687 687 f.seek(0, os.SEEK_END)
688 688 else:
689 689 f.seek(self._docket.index_end, os.SEEK_SET)
690 690 return f
691 691 except IOError as inst:
692 692 if inst.errno != errno.ENOENT:
693 693 raise
694 694 return self.opener(
695 695 self._indexfile, mode=b"w+", checkambig=self._checkambig
696 696 )
697 697
698 698 def __index_new_fp(self):
699 699 # You should not use this unless you are upgrading from inline revlog
700 700 return self.opener(
701 701 self._indexfile,
702 702 mode=b"w",
703 703 checkambig=self._checkambig,
704 704 atomictemp=True,
705 705 )
706 706
707 707 def _datafp(self, mode=b'r'):
708 708 """file object for the revlog's data file"""
709 709 return self.opener(self._datafile, mode=mode)
710 710
711 711 @contextlib.contextmanager
712 712 def _datareadfp(self, existingfp=None):
713 713 """file object suitable to read data"""
714 714 # Use explicit file handle, if given.
715 715 if existingfp is not None:
716 716 yield existingfp
717 717
718 718 # Use a file handle being actively used for writes, if available.
719 719 # There is some danger to doing this because reads will seek the
720 720 # file. However, _writeentry() performs a SEEK_END before all writes,
721 721 # so we should be safe.
722 722 elif self._writinghandles:
723 723 if self._inline:
724 724 yield self._writinghandles[0]
725 725 else:
726 726 yield self._writinghandles[1]
727 727
728 728 # Otherwise open a new file handle.
729 729 else:
730 730 if self._inline:
731 731 func = self._indexfp
732 732 else:
733 733 func = self._datafp
734 734 with func() as fp:
735 735 yield fp
736 736
737 737 @contextlib.contextmanager
738 738 def _sidedatareadfp(self):
739 739 """file object suitable to read sidedata"""
740 740 if self._writinghandles:
741 741 yield self._writinghandles[2]
742 742 else:
743 743 with self.opener(self._sidedatafile) as fp:
744 744 yield fp
745 745
746 746 def tiprev(self):
747 747 return len(self.index) - 1
748 748
749 749 def tip(self):
750 750 return self.node(self.tiprev())
751 751
752 752 def __contains__(self, rev):
753 753 return 0 <= rev < len(self)
754 754
755 755 def __len__(self):
756 756 return len(self.index)
757 757
758 758 def __iter__(self):
759 759 return iter(pycompat.xrange(len(self)))
760 760
761 761 def revs(self, start=0, stop=None):
762 762 """iterate over all rev in this revlog (from start to stop)"""
763 763 return storageutil.iterrevs(len(self), start=start, stop=stop)
764 764
765 765 @property
766 766 def nodemap(self):
767 767 msg = (
768 768 b"revlog.nodemap is deprecated, "
769 769 b"use revlog.index.[has_node|rev|get_rev]"
770 770 )
771 771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 772 return self.index.nodemap
773 773
774 774 @property
775 775 def _nodecache(self):
776 776 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
777 777 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
778 778 return self.index.nodemap
779 779
780 780 def hasnode(self, node):
781 781 try:
782 782 self.rev(node)
783 783 return True
784 784 except KeyError:
785 785 return False
786 786
787 787 def candelta(self, baserev, rev):
788 788 """whether two revisions (baserev, rev) can be delta-ed or not"""
789 789 # Disable delta if either rev requires a content-changing flag
790 790 # processor (ex. LFS). This is because such flag processor can alter
791 791 # the rawtext content that the delta will be based on, and two clients
792 792 # could have a same revlog node with different flags (i.e. different
793 793 # rawtext contents) and the delta could be incompatible.
794 794 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
795 795 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
796 796 ):
797 797 return False
798 798 return True
799 799
800 800 def update_caches(self, transaction):
801 801 if self._nodemap_file is not None:
802 802 if transaction is None:
803 803 nodemaputil.update_persistent_nodemap(self)
804 804 else:
805 805 nodemaputil.setup_persistent_nodemap(transaction, self)
806 806
807 807 def clearcaches(self):
808 808 self._revisioncache = None
809 809 self._chainbasecache.clear()
810 810 self._chunkcache = (0, b'')
811 811 self._pcache = {}
812 812 self._nodemap_docket = None
813 813 self.index.clearcaches()
814 814 # The python code is the one responsible for validating the docket, we
815 815 # end up having to refresh it here.
816 816 use_nodemap = (
817 817 not self._inline
818 818 and self._nodemap_file is not None
819 819 and util.safehasattr(self.index, 'update_nodemap_data')
820 820 )
821 821 if use_nodemap:
822 822 nodemap_data = nodemaputil.persisted_data(self)
823 823 if nodemap_data is not None:
824 824 self._nodemap_docket = nodemap_data[0]
825 825 self.index.update_nodemap_data(*nodemap_data)
826 826
827 827 def rev(self, node):
828 828 try:
829 829 return self.index.rev(node)
830 830 except TypeError:
831 831 raise
832 832 except error.RevlogError:
833 833 # parsers.c radix tree lookup failed
834 834 if (
835 835 node == self.nodeconstants.wdirid
836 836 or node in self.nodeconstants.wdirfilenodeids
837 837 ):
838 838 raise error.WdirUnsupported
839 839 raise error.LookupError(node, self.display_id, _(b'no node'))
840 840
841 841 # Accessors for index entries.
842 842
843 843 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 844 # are flags.
845 845 def start(self, rev):
846 846 return int(self.index[rev][0] >> 16)
847 847
848 848 def sidedata_cut_off(self, rev):
849 849 sd_cut_off = self.index[rev][8]
850 850 if sd_cut_off != 0:
851 851 return sd_cut_off
852 852 # This is some annoying dance, because entries without sidedata
853 853 # currently use 0 as their ofsset. (instead of previous-offset +
854 854 # previous-size)
855 855 #
856 856 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
857 857 # In the meantime, we need this.
858 858 while 0 <= rev:
859 859 e = self.index[rev]
860 860 if e[9] != 0:
861 861 return e[8] + e[9]
862 862 rev -= 1
863 863 return 0
864 864
865 865 def flags(self, rev):
866 866 return self.index[rev][0] & 0xFFFF
867 867
868 868 def length(self, rev):
869 869 return self.index[rev][1]
870 870
871 871 def sidedata_length(self, rev):
872 872 if not self.hassidedata:
873 873 return 0
874 874 return self.index[rev][9]
875 875
876 876 def rawsize(self, rev):
877 877 """return the length of the uncompressed text for a given revision"""
878 878 l = self.index[rev][2]
879 879 if l >= 0:
880 880 return l
881 881
882 882 t = self.rawdata(rev)
883 883 return len(t)
884 884
885 885 def size(self, rev):
886 886 """length of non-raw text (processed by a "read" flag processor)"""
887 887 # fast path: if no "read" flag processor could change the content,
888 888 # size is rawsize. note: ELLIPSIS is known to not change the content.
889 889 flags = self.flags(rev)
890 890 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
891 891 return self.rawsize(rev)
892 892
893 893 return len(self.revision(rev, raw=False))
894 894
895 895 def chainbase(self, rev):
896 896 base = self._chainbasecache.get(rev)
897 897 if base is not None:
898 898 return base
899 899
900 900 index = self.index
901 901 iterrev = rev
902 902 base = index[iterrev][3]
903 903 while base != iterrev:
904 904 iterrev = base
905 905 base = index[iterrev][3]
906 906
907 907 self._chainbasecache[rev] = base
908 908 return base
909 909
910 910 def linkrev(self, rev):
911 911 return self.index[rev][4]
912 912
913 913 def parentrevs(self, rev):
914 914 try:
915 915 entry = self.index[rev]
916 916 except IndexError:
917 917 if rev == wdirrev:
918 918 raise error.WdirUnsupported
919 919 raise
920 920 if entry[5] == nullrev:
921 921 return entry[6], entry[5]
922 922 else:
923 923 return entry[5], entry[6]
924 924
925 925 # fast parentrevs(rev) where rev isn't filtered
926 926 _uncheckedparentrevs = parentrevs
927 927
928 928 def node(self, rev):
929 929 try:
930 930 return self.index[rev][7]
931 931 except IndexError:
932 932 if rev == wdirrev:
933 933 raise error.WdirUnsupported
934 934 raise
935 935
936 936 # Derived from index values.
937 937
938 938 def end(self, rev):
939 939 return self.start(rev) + self.length(rev)
940 940
941 941 def parents(self, node):
942 942 i = self.index
943 943 d = i[self.rev(node)]
944 944 # inline node() to avoid function call overhead
945 945 if d[5] == self.nullid:
946 946 return i[d[6]][7], i[d[5]][7]
947 947 else:
948 948 return i[d[5]][7], i[d[6]][7]
949 949
950 950 def chainlen(self, rev):
951 951 return self._chaininfo(rev)[0]
952 952
953 953 def _chaininfo(self, rev):
954 954 chaininfocache = self._chaininfocache
955 955 if rev in chaininfocache:
956 956 return chaininfocache[rev]
957 957 index = self.index
958 958 generaldelta = self._generaldelta
959 959 iterrev = rev
960 960 e = index[iterrev]
961 961 clen = 0
962 962 compresseddeltalen = 0
963 963 while iterrev != e[3]:
964 964 clen += 1
965 965 compresseddeltalen += e[1]
966 966 if generaldelta:
967 967 iterrev = e[3]
968 968 else:
969 969 iterrev -= 1
970 970 if iterrev in chaininfocache:
971 971 t = chaininfocache[iterrev]
972 972 clen += t[0]
973 973 compresseddeltalen += t[1]
974 974 break
975 975 e = index[iterrev]
976 976 else:
977 977 # Add text length of base since decompressing that also takes
978 978 # work. For cache hits the length is already included.
979 979 compresseddeltalen += e[1]
980 980 r = (clen, compresseddeltalen)
981 981 chaininfocache[rev] = r
982 982 return r
983 983
984 984 def _deltachain(self, rev, stoprev=None):
985 985 """Obtain the delta chain for a revision.
986 986
987 987 ``stoprev`` specifies a revision to stop at. If not specified, we
988 988 stop at the base of the chain.
989 989
990 990 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
991 991 revs in ascending order and ``stopped`` is a bool indicating whether
992 992 ``stoprev`` was hit.
993 993 """
994 994 # Try C implementation.
995 995 try:
996 996 return self.index.deltachain(rev, stoprev, self._generaldelta)
997 997 except AttributeError:
998 998 pass
999 999
1000 1000 chain = []
1001 1001
1002 1002 # Alias to prevent attribute lookup in tight loop.
1003 1003 index = self.index
1004 1004 generaldelta = self._generaldelta
1005 1005
1006 1006 iterrev = rev
1007 1007 e = index[iterrev]
1008 1008 while iterrev != e[3] and iterrev != stoprev:
1009 1009 chain.append(iterrev)
1010 1010 if generaldelta:
1011 1011 iterrev = e[3]
1012 1012 else:
1013 1013 iterrev -= 1
1014 1014 e = index[iterrev]
1015 1015
1016 1016 if iterrev == stoprev:
1017 1017 stopped = True
1018 1018 else:
1019 1019 chain.append(iterrev)
1020 1020 stopped = False
1021 1021
1022 1022 chain.reverse()
1023 1023 return chain, stopped
1024 1024
1025 1025 def ancestors(self, revs, stoprev=0, inclusive=False):
1026 1026 """Generate the ancestors of 'revs' in reverse revision order.
1027 1027 Does not generate revs lower than stoprev.
1028 1028
1029 1029 See the documentation for ancestor.lazyancestors for more details."""
1030 1030
1031 1031 # first, make sure start revisions aren't filtered
1032 1032 revs = list(revs)
1033 1033 checkrev = self.node
1034 1034 for r in revs:
1035 1035 checkrev(r)
1036 1036 # and we're sure ancestors aren't filtered as well
1037 1037
1038 1038 if rustancestor is not None and self.index.rust_ext_compat:
1039 1039 lazyancestors = rustancestor.LazyAncestors
1040 1040 arg = self.index
1041 1041 else:
1042 1042 lazyancestors = ancestor.lazyancestors
1043 1043 arg = self._uncheckedparentrevs
1044 1044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1045 1045
1046 1046 def descendants(self, revs):
1047 1047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1048 1048
1049 1049 def findcommonmissing(self, common=None, heads=None):
1050 1050 """Return a tuple of the ancestors of common and the ancestors of heads
1051 1051 that are not ancestors of common. In revset terminology, we return the
1052 1052 tuple:
1053 1053
1054 1054 ::common, (::heads) - (::common)
1055 1055
1056 1056 The list is sorted by revision number, meaning it is
1057 1057 topologically sorted.
1058 1058
1059 1059 'heads' and 'common' are both lists of node IDs. If heads is
1060 1060 not supplied, uses all of the revlog's heads. If common is not
1061 1061 supplied, uses nullid."""
1062 1062 if common is None:
1063 1063 common = [self.nullid]
1064 1064 if heads is None:
1065 1065 heads = self.heads()
1066 1066
1067 1067 common = [self.rev(n) for n in common]
1068 1068 heads = [self.rev(n) for n in heads]
1069 1069
1070 1070 # we want the ancestors, but inclusive
1071 1071 class lazyset(object):
1072 1072 def __init__(self, lazyvalues):
1073 1073 self.addedvalues = set()
1074 1074 self.lazyvalues = lazyvalues
1075 1075
1076 1076 def __contains__(self, value):
1077 1077 return value in self.addedvalues or value in self.lazyvalues
1078 1078
1079 1079 def __iter__(self):
1080 1080 added = self.addedvalues
1081 1081 for r in added:
1082 1082 yield r
1083 1083 for r in self.lazyvalues:
1084 1084 if not r in added:
1085 1085 yield r
1086 1086
1087 1087 def add(self, value):
1088 1088 self.addedvalues.add(value)
1089 1089
1090 1090 def update(self, values):
1091 1091 self.addedvalues.update(values)
1092 1092
1093 1093 has = lazyset(self.ancestors(common))
1094 1094 has.add(nullrev)
1095 1095 has.update(common)
1096 1096
1097 1097 # take all ancestors from heads that aren't in has
1098 1098 missing = set()
1099 1099 visit = collections.deque(r for r in heads if r not in has)
1100 1100 while visit:
1101 1101 r = visit.popleft()
1102 1102 if r in missing:
1103 1103 continue
1104 1104 else:
1105 1105 missing.add(r)
1106 1106 for p in self.parentrevs(r):
1107 1107 if p not in has:
1108 1108 visit.append(p)
1109 1109 missing = list(missing)
1110 1110 missing.sort()
1111 1111 return has, [self.node(miss) for miss in missing]
1112 1112
1113 1113 def incrementalmissingrevs(self, common=None):
1114 1114 """Return an object that can be used to incrementally compute the
1115 1115 revision numbers of the ancestors of arbitrary sets that are not
1116 1116 ancestors of common. This is an ancestor.incrementalmissingancestors
1117 1117 object.
1118 1118
1119 1119 'common' is a list of revision numbers. If common is not supplied, uses
1120 1120 nullrev.
1121 1121 """
1122 1122 if common is None:
1123 1123 common = [nullrev]
1124 1124
1125 1125 if rustancestor is not None and self.index.rust_ext_compat:
1126 1126 return rustancestor.MissingAncestors(self.index, common)
1127 1127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1128 1128
1129 1129 def findmissingrevs(self, common=None, heads=None):
1130 1130 """Return the revision numbers of the ancestors of heads that
1131 1131 are not ancestors of common.
1132 1132
1133 1133 More specifically, return a list of revision numbers corresponding to
1134 1134 nodes N such that every N satisfies the following constraints:
1135 1135
1136 1136 1. N is an ancestor of some node in 'heads'
1137 1137 2. N is not an ancestor of any node in 'common'
1138 1138
1139 1139 The list is sorted by revision number, meaning it is
1140 1140 topologically sorted.
1141 1141
1142 1142 'heads' and 'common' are both lists of revision numbers. If heads is
1143 1143 not supplied, uses all of the revlog's heads. If common is not
1144 1144 supplied, uses nullid."""
1145 1145 if common is None:
1146 1146 common = [nullrev]
1147 1147 if heads is None:
1148 1148 heads = self.headrevs()
1149 1149
1150 1150 inc = self.incrementalmissingrevs(common=common)
1151 1151 return inc.missingancestors(heads)
1152 1152
1153 1153 def findmissing(self, common=None, heads=None):
1154 1154 """Return the ancestors of heads that are not ancestors of common.
1155 1155
1156 1156 More specifically, return a list of nodes N such that every N
1157 1157 satisfies the following constraints:
1158 1158
1159 1159 1. N is an ancestor of some node in 'heads'
1160 1160 2. N is not an ancestor of any node in 'common'
1161 1161
1162 1162 The list is sorted by revision number, meaning it is
1163 1163 topologically sorted.
1164 1164
1165 1165 'heads' and 'common' are both lists of node IDs. If heads is
1166 1166 not supplied, uses all of the revlog's heads. If common is not
1167 1167 supplied, uses nullid."""
1168 1168 if common is None:
1169 1169 common = [self.nullid]
1170 1170 if heads is None:
1171 1171 heads = self.heads()
1172 1172
1173 1173 common = [self.rev(n) for n in common]
1174 1174 heads = [self.rev(n) for n in heads]
1175 1175
1176 1176 inc = self.incrementalmissingrevs(common=common)
1177 1177 return [self.node(r) for r in inc.missingancestors(heads)]
1178 1178
1179 1179 def nodesbetween(self, roots=None, heads=None):
1180 1180 """Return a topological path from 'roots' to 'heads'.
1181 1181
1182 1182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1183 1183 topologically sorted list of all nodes N that satisfy both of
1184 1184 these constraints:
1185 1185
1186 1186 1. N is a descendant of some node in 'roots'
1187 1187 2. N is an ancestor of some node in 'heads'
1188 1188
1189 1189 Every node is considered to be both a descendant and an ancestor
1190 1190 of itself, so every reachable node in 'roots' and 'heads' will be
1191 1191 included in 'nodes'.
1192 1192
1193 1193 'outroots' is the list of reachable nodes in 'roots', i.e., the
1194 1194 subset of 'roots' that is returned in 'nodes'. Likewise,
1195 1195 'outheads' is the subset of 'heads' that is also in 'nodes'.
1196 1196
1197 1197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1198 1198 unspecified, uses nullid as the only root. If 'heads' is
1199 1199 unspecified, uses list of all of the revlog's heads."""
1200 1200 nonodes = ([], [], [])
1201 1201 if roots is not None:
1202 1202 roots = list(roots)
1203 1203 if not roots:
1204 1204 return nonodes
1205 1205 lowestrev = min([self.rev(n) for n in roots])
1206 1206 else:
1207 1207 roots = [self.nullid] # Everybody's a descendant of nullid
1208 1208 lowestrev = nullrev
1209 1209 if (lowestrev == nullrev) and (heads is None):
1210 1210 # We want _all_ the nodes!
1211 1211 return (
1212 1212 [self.node(r) for r in self],
1213 1213 [self.nullid],
1214 1214 list(self.heads()),
1215 1215 )
1216 1216 if heads is None:
1217 1217 # All nodes are ancestors, so the latest ancestor is the last
1218 1218 # node.
1219 1219 highestrev = len(self) - 1
1220 1220 # Set ancestors to None to signal that every node is an ancestor.
1221 1221 ancestors = None
1222 1222 # Set heads to an empty dictionary for later discovery of heads
1223 1223 heads = {}
1224 1224 else:
1225 1225 heads = list(heads)
1226 1226 if not heads:
1227 1227 return nonodes
1228 1228 ancestors = set()
1229 1229 # Turn heads into a dictionary so we can remove 'fake' heads.
1230 1230 # Also, later we will be using it to filter out the heads we can't
1231 1231 # find from roots.
1232 1232 heads = dict.fromkeys(heads, False)
1233 1233 # Start at the top and keep marking parents until we're done.
1234 1234 nodestotag = set(heads)
1235 1235 # Remember where the top was so we can use it as a limit later.
1236 1236 highestrev = max([self.rev(n) for n in nodestotag])
1237 1237 while nodestotag:
1238 1238 # grab a node to tag
1239 1239 n = nodestotag.pop()
1240 1240 # Never tag nullid
1241 1241 if n == self.nullid:
1242 1242 continue
1243 1243 # A node's revision number represents its place in a
1244 1244 # topologically sorted list of nodes.
1245 1245 r = self.rev(n)
1246 1246 if r >= lowestrev:
1247 1247 if n not in ancestors:
1248 1248 # If we are possibly a descendant of one of the roots
1249 1249 # and we haven't already been marked as an ancestor
1250 1250 ancestors.add(n) # Mark as ancestor
1251 1251 # Add non-nullid parents to list of nodes to tag.
1252 1252 nodestotag.update(
1253 1253 [p for p in self.parents(n) if p != self.nullid]
1254 1254 )
1255 1255 elif n in heads: # We've seen it before, is it a fake head?
1256 1256 # So it is, real heads should not be the ancestors of
1257 1257 # any other heads.
1258 1258 heads.pop(n)
1259 1259 if not ancestors:
1260 1260 return nonodes
1261 1261 # Now that we have our set of ancestors, we want to remove any
1262 1262 # roots that are not ancestors.
1263 1263
1264 1264 # If one of the roots was nullid, everything is included anyway.
1265 1265 if lowestrev > nullrev:
1266 1266 # But, since we weren't, let's recompute the lowest rev to not
1267 1267 # include roots that aren't ancestors.
1268 1268
1269 1269 # Filter out roots that aren't ancestors of heads
1270 1270 roots = [root for root in roots if root in ancestors]
1271 1271 # Recompute the lowest revision
1272 1272 if roots:
1273 1273 lowestrev = min([self.rev(root) for root in roots])
1274 1274 else:
1275 1275 # No more roots? Return empty list
1276 1276 return nonodes
1277 1277 else:
1278 1278 # We are descending from nullid, and don't need to care about
1279 1279 # any other roots.
1280 1280 lowestrev = nullrev
1281 1281 roots = [self.nullid]
1282 1282 # Transform our roots list into a set.
1283 1283 descendants = set(roots)
1284 1284 # Also, keep the original roots so we can filter out roots that aren't
1285 1285 # 'real' roots (i.e. are descended from other roots).
1286 1286 roots = descendants.copy()
1287 1287 # Our topologically sorted list of output nodes.
1288 1288 orderedout = []
1289 1289 # Don't start at nullid since we don't want nullid in our output list,
1290 1290 # and if nullid shows up in descendants, empty parents will look like
1291 1291 # they're descendants.
1292 1292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1293 1293 n = self.node(r)
1294 1294 isdescendant = False
1295 1295 if lowestrev == nullrev: # Everybody is a descendant of nullid
1296 1296 isdescendant = True
1297 1297 elif n in descendants:
1298 1298 # n is already a descendant
1299 1299 isdescendant = True
1300 1300 # This check only needs to be done here because all the roots
1301 1301 # will start being marked is descendants before the loop.
1302 1302 if n in roots:
1303 1303 # If n was a root, check if it's a 'real' root.
1304 1304 p = tuple(self.parents(n))
1305 1305 # If any of its parents are descendants, it's not a root.
1306 1306 if (p[0] in descendants) or (p[1] in descendants):
1307 1307 roots.remove(n)
1308 1308 else:
1309 1309 p = tuple(self.parents(n))
1310 1310 # A node is a descendant if either of its parents are
1311 1311 # descendants. (We seeded the dependents list with the roots
1312 1312 # up there, remember?)
1313 1313 if (p[0] in descendants) or (p[1] in descendants):
1314 1314 descendants.add(n)
1315 1315 isdescendant = True
1316 1316 if isdescendant and ((ancestors is None) or (n in ancestors)):
1317 1317 # Only include nodes that are both descendants and ancestors.
1318 1318 orderedout.append(n)
1319 1319 if (ancestors is not None) and (n in heads):
1320 1320 # We're trying to figure out which heads are reachable
1321 1321 # from roots.
1322 1322 # Mark this head as having been reached
1323 1323 heads[n] = True
1324 1324 elif ancestors is None:
1325 1325 # Otherwise, we're trying to discover the heads.
1326 1326 # Assume this is a head because if it isn't, the next step
1327 1327 # will eventually remove it.
1328 1328 heads[n] = True
1329 1329 # But, obviously its parents aren't.
1330 1330 for p in self.parents(n):
1331 1331 heads.pop(p, None)
1332 1332 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1333 1333 roots = list(roots)
1334 1334 assert orderedout
1335 1335 assert roots
1336 1336 assert heads
1337 1337 return (orderedout, roots, heads)
1338 1338
1339 1339 def headrevs(self, revs=None):
1340 1340 if revs is None:
1341 1341 try:
1342 1342 return self.index.headrevs()
1343 1343 except AttributeError:
1344 1344 return self._headrevs()
1345 1345 if rustdagop is not None and self.index.rust_ext_compat:
1346 1346 return rustdagop.headrevs(self.index, revs)
1347 1347 return dagop.headrevs(revs, self._uncheckedparentrevs)
1348 1348
1349 1349 def computephases(self, roots):
1350 1350 return self.index.computephasesmapsets(roots)
1351 1351
1352 1352 def _headrevs(self):
1353 1353 count = len(self)
1354 1354 if not count:
1355 1355 return [nullrev]
1356 1356 # we won't iter over filtered rev so nobody is a head at start
1357 1357 ishead = [0] * (count + 1)
1358 1358 index = self.index
1359 1359 for r in self:
1360 1360 ishead[r] = 1 # I may be an head
1361 1361 e = index[r]
1362 1362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1363 1363 return [r for r, val in enumerate(ishead) if val]
1364 1364
1365 1365 def heads(self, start=None, stop=None):
1366 1366 """return the list of all nodes that have no children
1367 1367
1368 1368 if start is specified, only heads that are descendants of
1369 1369 start will be returned
1370 1370 if stop is specified, it will consider all the revs from stop
1371 1371 as if they had no children
1372 1372 """
1373 1373 if start is None and stop is None:
1374 1374 if not len(self):
1375 1375 return [self.nullid]
1376 1376 return [self.node(r) for r in self.headrevs()]
1377 1377
1378 1378 if start is None:
1379 1379 start = nullrev
1380 1380 else:
1381 1381 start = self.rev(start)
1382 1382
1383 1383 stoprevs = {self.rev(n) for n in stop or []}
1384 1384
1385 1385 revs = dagop.headrevssubset(
1386 1386 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1387 1387 )
1388 1388
1389 1389 return [self.node(rev) for rev in revs]
1390 1390
1391 1391 def children(self, node):
1392 1392 """find the children of a given node"""
1393 1393 c = []
1394 1394 p = self.rev(node)
1395 1395 for r in self.revs(start=p + 1):
1396 1396 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1397 1397 if prevs:
1398 1398 for pr in prevs:
1399 1399 if pr == p:
1400 1400 c.append(self.node(r))
1401 1401 elif p == nullrev:
1402 1402 c.append(self.node(r))
1403 1403 return c
1404 1404
1405 1405 def commonancestorsheads(self, a, b):
1406 1406 """calculate all the heads of the common ancestors of nodes a and b"""
1407 1407 a, b = self.rev(a), self.rev(b)
1408 1408 ancs = self._commonancestorsheads(a, b)
1409 1409 return pycompat.maplist(self.node, ancs)
1410 1410
1411 1411 def _commonancestorsheads(self, *revs):
1412 1412 """calculate all the heads of the common ancestors of revs"""
1413 1413 try:
1414 1414 ancs = self.index.commonancestorsheads(*revs)
1415 1415 except (AttributeError, OverflowError): # C implementation failed
1416 1416 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1417 1417 return ancs
1418 1418
1419 1419 def isancestor(self, a, b):
1420 1420 """return True if node a is an ancestor of node b
1421 1421
1422 1422 A revision is considered an ancestor of itself."""
1423 1423 a, b = self.rev(a), self.rev(b)
1424 1424 return self.isancestorrev(a, b)
1425 1425
1426 1426 def isancestorrev(self, a, b):
1427 1427 """return True if revision a is an ancestor of revision b
1428 1428
1429 1429 A revision is considered an ancestor of itself.
1430 1430
1431 1431 The implementation of this is trivial but the use of
1432 1432 reachableroots is not."""
1433 1433 if a == nullrev:
1434 1434 return True
1435 1435 elif a == b:
1436 1436 return True
1437 1437 elif a > b:
1438 1438 return False
1439 1439 return bool(self.reachableroots(a, [b], [a], includepath=False))
1440 1440
1441 1441 def reachableroots(self, minroot, heads, roots, includepath=False):
1442 1442 """return (heads(::(<roots> and <roots>::<heads>)))
1443 1443
1444 1444 If includepath is True, return (<roots>::<heads>)."""
1445 1445 try:
1446 1446 return self.index.reachableroots2(
1447 1447 minroot, heads, roots, includepath
1448 1448 )
1449 1449 except AttributeError:
1450 1450 return dagop._reachablerootspure(
1451 1451 self.parentrevs, minroot, roots, heads, includepath
1452 1452 )
1453 1453
1454 1454 def ancestor(self, a, b):
1455 1455 """calculate the "best" common ancestor of nodes a and b"""
1456 1456
1457 1457 a, b = self.rev(a), self.rev(b)
1458 1458 try:
1459 1459 ancs = self.index.ancestors(a, b)
1460 1460 except (AttributeError, OverflowError):
1461 1461 ancs = ancestor.ancestors(self.parentrevs, a, b)
1462 1462 if ancs:
1463 1463 # choose a consistent winner when there's a tie
1464 1464 return min(map(self.node, ancs))
1465 1465 return self.nullid
1466 1466
1467 1467 def _match(self, id):
1468 1468 if isinstance(id, int):
1469 1469 # rev
1470 1470 return self.node(id)
1471 1471 if len(id) == self.nodeconstants.nodelen:
1472 1472 # possibly a binary node
1473 1473 # odds of a binary node being all hex in ASCII are 1 in 10**25
1474 1474 try:
1475 1475 node = id
1476 1476 self.rev(node) # quick search the index
1477 1477 return node
1478 1478 except error.LookupError:
1479 1479 pass # may be partial hex id
1480 1480 try:
1481 1481 # str(rev)
1482 1482 rev = int(id)
1483 1483 if b"%d" % rev != id:
1484 1484 raise ValueError
1485 1485 if rev < 0:
1486 1486 rev = len(self) + rev
1487 1487 if rev < 0 or rev >= len(self):
1488 1488 raise ValueError
1489 1489 return self.node(rev)
1490 1490 except (ValueError, OverflowError):
1491 1491 pass
1492 1492 if len(id) == 2 * self.nodeconstants.nodelen:
1493 1493 try:
1494 1494 # a full hex nodeid?
1495 1495 node = bin(id)
1496 1496 self.rev(node)
1497 1497 return node
1498 1498 except (TypeError, error.LookupError):
1499 1499 pass
1500 1500
1501 1501 def _partialmatch(self, id):
1502 1502 # we don't care wdirfilenodeids as they should be always full hash
1503 1503 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1504 1504 ambiguous = False
1505 1505 try:
1506 1506 partial = self.index.partialmatch(id)
1507 1507 if partial and self.hasnode(partial):
1508 1508 if maybewdir:
1509 1509 # single 'ff...' match in radix tree, ambiguous with wdir
1510 1510 ambiguous = True
1511 1511 else:
1512 1512 return partial
1513 1513 elif maybewdir:
1514 1514 # no 'ff...' match in radix tree, wdir identified
1515 1515 raise error.WdirUnsupported
1516 1516 else:
1517 1517 return None
1518 1518 except error.RevlogError:
1519 1519 # parsers.c radix tree lookup gave multiple matches
1520 1520 # fast path: for unfiltered changelog, radix tree is accurate
1521 1521 if not getattr(self, 'filteredrevs', None):
1522 1522 ambiguous = True
1523 1523 # fall through to slow path that filters hidden revisions
1524 1524 except (AttributeError, ValueError):
1525 1525 # we are pure python, or key was too short to search radix tree
1526 1526 pass
1527 1527 if ambiguous:
1528 1528 raise error.AmbiguousPrefixLookupError(
1529 1529 id, self.display_id, _(b'ambiguous identifier')
1530 1530 )
1531 1531
1532 1532 if id in self._pcache:
1533 1533 return self._pcache[id]
1534 1534
1535 1535 if len(id) <= 40:
1536 1536 try:
1537 1537 # hex(node)[:...]
1538 1538 l = len(id) // 2 # grab an even number of digits
1539 1539 prefix = bin(id[: l * 2])
1540 1540 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1541 1541 nl = [
1542 1542 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1543 1543 ]
1544 1544 if self.nodeconstants.nullhex.startswith(id):
1545 1545 nl.append(self.nullid)
1546 1546 if len(nl) > 0:
1547 1547 if len(nl) == 1 and not maybewdir:
1548 1548 self._pcache[id] = nl[0]
1549 1549 return nl[0]
1550 1550 raise error.AmbiguousPrefixLookupError(
1551 1551 id, self.display_id, _(b'ambiguous identifier')
1552 1552 )
1553 1553 if maybewdir:
1554 1554 raise error.WdirUnsupported
1555 1555 return None
1556 1556 except TypeError:
1557 1557 pass
1558 1558
1559 1559 def lookup(self, id):
1560 1560 """locate a node based on:
1561 1561 - revision number or str(revision number)
1562 1562 - nodeid or subset of hex nodeid
1563 1563 """
1564 1564 n = self._match(id)
1565 1565 if n is not None:
1566 1566 return n
1567 1567 n = self._partialmatch(id)
1568 1568 if n:
1569 1569 return n
1570 1570
1571 1571 raise error.LookupError(id, self.display_id, _(b'no match found'))
1572 1572
1573 1573 def shortest(self, node, minlength=1):
1574 1574 """Find the shortest unambiguous prefix that matches node."""
1575 1575
1576 1576 def isvalid(prefix):
1577 1577 try:
1578 1578 matchednode = self._partialmatch(prefix)
1579 1579 except error.AmbiguousPrefixLookupError:
1580 1580 return False
1581 1581 except error.WdirUnsupported:
1582 1582 # single 'ff...' match
1583 1583 return True
1584 1584 if matchednode is None:
1585 1585 raise error.LookupError(node, self.display_id, _(b'no node'))
1586 1586 return True
1587 1587
1588 1588 def maybewdir(prefix):
1589 1589 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1590 1590
1591 1591 hexnode = hex(node)
1592 1592
1593 1593 def disambiguate(hexnode, minlength):
1594 1594 """Disambiguate against wdirid."""
1595 1595 for length in range(minlength, len(hexnode) + 1):
1596 1596 prefix = hexnode[:length]
1597 1597 if not maybewdir(prefix):
1598 1598 return prefix
1599 1599
1600 1600 if not getattr(self, 'filteredrevs', None):
1601 1601 try:
1602 1602 length = max(self.index.shortest(node), minlength)
1603 1603 return disambiguate(hexnode, length)
1604 1604 except error.RevlogError:
1605 1605 if node != self.nodeconstants.wdirid:
1606 1606 raise error.LookupError(
1607 1607 node, self.display_id, _(b'no node')
1608 1608 )
1609 1609 except AttributeError:
1610 1610 # Fall through to pure code
1611 1611 pass
1612 1612
1613 1613 if node == self.nodeconstants.wdirid:
1614 1614 for length in range(minlength, len(hexnode) + 1):
1615 1615 prefix = hexnode[:length]
1616 1616 if isvalid(prefix):
1617 1617 return prefix
1618 1618
1619 1619 for length in range(minlength, len(hexnode) + 1):
1620 1620 prefix = hexnode[:length]
1621 1621 if isvalid(prefix):
1622 1622 return disambiguate(hexnode, length)
1623 1623
1624 1624 def cmp(self, node, text):
1625 1625 """compare text with a given file revision
1626 1626
1627 1627 returns True if text is different than what is stored.
1628 1628 """
1629 1629 p1, p2 = self.parents(node)
1630 1630 return storageutil.hashrevisionsha1(text, p1, p2) != node
1631 1631
1632 1632 def _cachesegment(self, offset, data):
1633 1633 """Add a segment to the revlog cache.
1634 1634
1635 1635 Accepts an absolute offset and the data that is at that location.
1636 1636 """
1637 1637 o, d = self._chunkcache
1638 1638 # try to add to existing cache
1639 1639 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1640 1640 self._chunkcache = o, d + data
1641 1641 else:
1642 1642 self._chunkcache = offset, data
1643 1643
1644 1644 def _readsegment(self, offset, length, df=None):
1645 1645 """Load a segment of raw data from the revlog.
1646 1646
1647 1647 Accepts an absolute offset, length to read, and an optional existing
1648 1648 file handle to read from.
1649 1649
1650 1650 If an existing file handle is passed, it will be seeked and the
1651 1651 original seek position will NOT be restored.
1652 1652
1653 1653 Returns a str or buffer of raw byte data.
1654 1654
1655 1655 Raises if the requested number of bytes could not be read.
1656 1656 """
1657 1657 # Cache data both forward and backward around the requested
1658 1658 # data, in a fixed size window. This helps speed up operations
1659 1659 # involving reading the revlog backwards.
1660 1660 cachesize = self._chunkcachesize
1661 1661 realoffset = offset & ~(cachesize - 1)
1662 1662 reallength = (
1663 1663 (offset + length + cachesize) & ~(cachesize - 1)
1664 1664 ) - realoffset
1665 1665 with self._datareadfp(df) as df:
1666 1666 df.seek(realoffset)
1667 1667 d = df.read(reallength)
1668 1668
1669 1669 self._cachesegment(realoffset, d)
1670 1670 if offset != realoffset or reallength != length:
1671 1671 startoffset = offset - realoffset
1672 1672 if len(d) - startoffset < length:
1673 1673 filename = self._indexfile if self._inline else self._datafile
1674 1674 got = len(d) - startoffset
1675 1675 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1676 1676 raise error.RevlogError(m)
1677 1677 return util.buffer(d, startoffset, length)
1678 1678
1679 1679 if len(d) < length:
1680 1680 filename = self._indexfile if self._inline else self._datafile
1681 1681 got = len(d) - startoffset
1682 1682 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1683 1683 raise error.RevlogError(m)
1684 1684
1685 1685 return d
1686 1686
1687 1687 def _getsegment(self, offset, length, df=None):
1688 1688 """Obtain a segment of raw data from the revlog.
1689 1689
1690 1690 Accepts an absolute offset, length of bytes to obtain, and an
1691 1691 optional file handle to the already-opened revlog. If the file
1692 1692 handle is used, it's original seek position will not be preserved.
1693 1693
1694 1694 Requests for data may be returned from a cache.
1695 1695
1696 1696 Returns a str or a buffer instance of raw byte data.
1697 1697 """
1698 1698 o, d = self._chunkcache
1699 1699 l = len(d)
1700 1700
1701 1701 # is it in the cache?
1702 1702 cachestart = offset - o
1703 1703 cacheend = cachestart + length
1704 1704 if cachestart >= 0 and cacheend <= l:
1705 1705 if cachestart == 0 and cacheend == l:
1706 1706 return d # avoid a copy
1707 1707 return util.buffer(d, cachestart, cacheend - cachestart)
1708 1708
1709 1709 return self._readsegment(offset, length, df=df)
1710 1710
1711 1711 def _getsegmentforrevs(self, startrev, endrev, df=None):
1712 1712 """Obtain a segment of raw data corresponding to a range of revisions.
1713 1713
1714 1714 Accepts the start and end revisions and an optional already-open
1715 1715 file handle to be used for reading. If the file handle is read, its
1716 1716 seek position will not be preserved.
1717 1717
1718 1718 Requests for data may be satisfied by a cache.
1719 1719
1720 1720 Returns a 2-tuple of (offset, data) for the requested range of
1721 1721 revisions. Offset is the integer offset from the beginning of the
1722 1722 revlog and data is a str or buffer of the raw byte data.
1723 1723
1724 1724 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1725 1725 to determine where each revision's data begins and ends.
1726 1726 """
1727 1727 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1728 1728 # (functions are expensive).
1729 1729 index = self.index
1730 1730 istart = index[startrev]
1731 1731 start = int(istart[0] >> 16)
1732 1732 if startrev == endrev:
1733 1733 end = start + istart[1]
1734 1734 else:
1735 1735 iend = index[endrev]
1736 1736 end = int(iend[0] >> 16) + iend[1]
1737 1737
1738 1738 if self._inline:
1739 1739 start += (startrev + 1) * self.index.entry_size
1740 1740 end += (endrev + 1) * self.index.entry_size
1741 1741 length = end - start
1742 1742
1743 1743 return start, self._getsegment(start, length, df=df)
1744 1744
1745 1745 def _chunk(self, rev, df=None):
1746 1746 """Obtain a single decompressed chunk for a revision.
1747 1747
1748 1748 Accepts an integer revision and an optional already-open file handle
1749 1749 to be used for reading. If used, the seek position of the file will not
1750 1750 be preserved.
1751 1751
1752 1752 Returns a str holding uncompressed data for the requested revision.
1753 1753 """
1754 1754 compression_mode = self.index[rev][10]
1755 1755 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1756 1756 if compression_mode == COMP_MODE_PLAIN:
1757 1757 return data
1758 1758 elif compression_mode == COMP_MODE_DEFAULT:
1759 1759 return self._decompressor(data)
1760 1760 elif compression_mode == COMP_MODE_INLINE:
1761 1761 return self.decompress(data)
1762 1762 else:
1763 1763 msg = 'unknown compression mode %d'
1764 1764 msg %= compression_mode
1765 1765 raise error.RevlogError(msg)
1766 1766
1767 1767 def _chunks(self, revs, df=None, targetsize=None):
1768 1768 """Obtain decompressed chunks for the specified revisions.
1769 1769
1770 1770 Accepts an iterable of numeric revisions that are assumed to be in
1771 1771 ascending order. Also accepts an optional already-open file handle
1772 1772 to be used for reading. If used, the seek position of the file will
1773 1773 not be preserved.
1774 1774
1775 1775 This function is similar to calling ``self._chunk()`` multiple times,
1776 1776 but is faster.
1777 1777
1778 1778 Returns a list with decompressed data for each requested revision.
1779 1779 """
1780 1780 if not revs:
1781 1781 return []
1782 1782 start = self.start
1783 1783 length = self.length
1784 1784 inline = self._inline
1785 1785 iosize = self.index.entry_size
1786 1786 buffer = util.buffer
1787 1787
1788 1788 l = []
1789 1789 ladd = l.append
1790 1790
1791 1791 if not self._withsparseread:
1792 1792 slicedchunks = (revs,)
1793 1793 else:
1794 1794 slicedchunks = deltautil.slicechunk(
1795 1795 self, revs, targetsize=targetsize
1796 1796 )
1797 1797
1798 1798 for revschunk in slicedchunks:
1799 1799 firstrev = revschunk[0]
1800 1800 # Skip trailing revisions with empty diff
1801 1801 for lastrev in revschunk[::-1]:
1802 1802 if length(lastrev) != 0:
1803 1803 break
1804 1804
1805 1805 try:
1806 1806 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1807 1807 except OverflowError:
1808 1808 # issue4215 - we can't cache a run of chunks greater than
1809 1809 # 2G on Windows
1810 1810 return [self._chunk(rev, df=df) for rev in revschunk]
1811 1811
1812 1812 decomp = self.decompress
1813 1813 # self._decompressor might be None, but will not be used in that case
1814 1814 def_decomp = self._decompressor
1815 1815 for rev in revschunk:
1816 1816 chunkstart = start(rev)
1817 1817 if inline:
1818 1818 chunkstart += (rev + 1) * iosize
1819 1819 chunklength = length(rev)
1820 1820 comp_mode = self.index[rev][10]
1821 1821 c = buffer(data, chunkstart - offset, chunklength)
1822 1822 if comp_mode == COMP_MODE_PLAIN:
1823 1823 ladd(c)
1824 1824 elif comp_mode == COMP_MODE_INLINE:
1825 1825 ladd(decomp(c))
1826 1826 elif comp_mode == COMP_MODE_DEFAULT:
1827 1827 ladd(def_decomp(c))
1828 1828 else:
1829 1829 msg = 'unknown compression mode %d'
1830 1830 msg %= comp_mode
1831 1831 raise error.RevlogError(msg)
1832 1832
1833 1833 return l
1834 1834
1835 1835 def _chunkclear(self):
1836 1836 """Clear the raw chunk cache."""
1837 1837 self._chunkcache = (0, b'')
1838 1838
1839 1839 def deltaparent(self, rev):
1840 1840 """return deltaparent of the given revision"""
1841 1841 base = self.index[rev][3]
1842 1842 if base == rev:
1843 1843 return nullrev
1844 1844 elif self._generaldelta:
1845 1845 return base
1846 1846 else:
1847 1847 return rev - 1
1848 1848
1849 1849 def issnapshot(self, rev):
1850 1850 """tells whether rev is a snapshot"""
1851 1851 if not self._sparserevlog:
1852 1852 return self.deltaparent(rev) == nullrev
1853 1853 elif util.safehasattr(self.index, b'issnapshot'):
1854 1854 # directly assign the method to cache the testing and access
1855 1855 self.issnapshot = self.index.issnapshot
1856 1856 return self.issnapshot(rev)
1857 1857 if rev == nullrev:
1858 1858 return True
1859 1859 entry = self.index[rev]
1860 1860 base = entry[3]
1861 1861 if base == rev:
1862 1862 return True
1863 1863 if base == nullrev:
1864 1864 return True
1865 1865 p1 = entry[5]
1866 1866 p2 = entry[6]
1867 1867 if base == p1 or base == p2:
1868 1868 return False
1869 1869 return self.issnapshot(base)
1870 1870
1871 1871 def snapshotdepth(self, rev):
1872 1872 """number of snapshot in the chain before this one"""
1873 1873 if not self.issnapshot(rev):
1874 1874 raise error.ProgrammingError(b'revision %d not a snapshot')
1875 1875 return len(self._deltachain(rev)[0]) - 1
1876 1876
1877 1877 def revdiff(self, rev1, rev2):
1878 1878 """return or calculate a delta between two revisions
1879 1879
1880 1880 The delta calculated is in binary form and is intended to be written to
1881 1881 revlog data directly. So this function needs raw revision data.
1882 1882 """
1883 1883 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1884 1884 return bytes(self._chunk(rev2))
1885 1885
1886 1886 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1887 1887
1888 1888 def _processflags(self, text, flags, operation, raw=False):
1889 1889 """deprecated entry point to access flag processors"""
1890 1890 msg = b'_processflag(...) use the specialized variant'
1891 1891 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1892 1892 if raw:
1893 1893 return text, flagutil.processflagsraw(self, text, flags)
1894 1894 elif operation == b'read':
1895 1895 return flagutil.processflagsread(self, text, flags)
1896 1896 else: # write operation
1897 1897 return flagutil.processflagswrite(self, text, flags)
1898 1898
1899 1899 def revision(self, nodeorrev, _df=None, raw=False):
1900 1900 """return an uncompressed revision of a given node or revision
1901 1901 number.
1902 1902
1903 1903 _df - an existing file handle to read from. (internal-only)
1904 1904 raw - an optional argument specifying if the revision data is to be
1905 1905 treated as raw data when applying flag transforms. 'raw' should be set
1906 1906 to True when generating changegroups or in debug commands.
1907 1907 """
1908 1908 if raw:
1909 1909 msg = (
1910 1910 b'revlog.revision(..., raw=True) is deprecated, '
1911 1911 b'use revlog.rawdata(...)'
1912 1912 )
1913 1913 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1914 1914 return self._revisiondata(nodeorrev, _df, raw=raw)
1915 1915
1916 1916 def sidedata(self, nodeorrev, _df=None):
1917 1917 """a map of extra data related to the changeset but not part of the hash
1918 1918
1919 1919 This function currently return a dictionary. However, more advanced
1920 1920 mapping object will likely be used in the future for a more
1921 1921 efficient/lazy code.
1922 1922 """
1923 1923 # deal with <nodeorrev> argument type
1924 1924 if isinstance(nodeorrev, int):
1925 1925 rev = nodeorrev
1926 1926 else:
1927 1927 rev = self.rev(nodeorrev)
1928 1928 return self._sidedata(rev)
1929 1929
1930 1930 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1931 1931 # deal with <nodeorrev> argument type
1932 1932 if isinstance(nodeorrev, int):
1933 1933 rev = nodeorrev
1934 1934 node = self.node(rev)
1935 1935 else:
1936 1936 node = nodeorrev
1937 1937 rev = None
1938 1938
1939 1939 # fast path the special `nullid` rev
1940 1940 if node == self.nullid:
1941 1941 return b""
1942 1942
1943 1943 # ``rawtext`` is the text as stored inside the revlog. Might be the
1944 1944 # revision or might need to be processed to retrieve the revision.
1945 1945 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1946 1946
1947 1947 if raw and validated:
1948 1948 # if we don't want to process the raw text and that raw
1949 1949 # text is cached, we can exit early.
1950 1950 return rawtext
1951 1951 if rev is None:
1952 1952 rev = self.rev(node)
1953 1953 # the revlog's flag for this revision
1954 1954 # (usually alter its state or content)
1955 1955 flags = self.flags(rev)
1956 1956
1957 1957 if validated and flags == REVIDX_DEFAULT_FLAGS:
1958 1958 # no extra flags set, no flag processor runs, text = rawtext
1959 1959 return rawtext
1960 1960
1961 1961 if raw:
1962 1962 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1963 1963 text = rawtext
1964 1964 else:
1965 1965 r = flagutil.processflagsread(self, rawtext, flags)
1966 1966 text, validatehash = r
1967 1967 if validatehash:
1968 1968 self.checkhash(text, node, rev=rev)
1969 1969 if not validated:
1970 1970 self._revisioncache = (node, rev, rawtext)
1971 1971
1972 1972 return text
1973 1973
1974 1974 def _rawtext(self, node, rev, _df=None):
1975 1975 """return the possibly unvalidated rawtext for a revision
1976 1976
1977 1977 returns (rev, rawtext, validated)
1978 1978 """
1979 1979
1980 1980 # revision in the cache (could be useful to apply delta)
1981 1981 cachedrev = None
1982 1982 # An intermediate text to apply deltas to
1983 1983 basetext = None
1984 1984
1985 1985 # Check if we have the entry in cache
1986 1986 # The cache entry looks like (node, rev, rawtext)
1987 1987 if self._revisioncache:
1988 1988 if self._revisioncache[0] == node:
1989 1989 return (rev, self._revisioncache[2], True)
1990 1990 cachedrev = self._revisioncache[1]
1991 1991
1992 1992 if rev is None:
1993 1993 rev = self.rev(node)
1994 1994
1995 1995 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1996 1996 if stopped:
1997 1997 basetext = self._revisioncache[2]
1998 1998
1999 1999 # drop cache to save memory, the caller is expected to
2000 2000 # update self._revisioncache after validating the text
2001 2001 self._revisioncache = None
2002 2002
2003 2003 targetsize = None
2004 2004 rawsize = self.index[rev][2]
2005 2005 if 0 <= rawsize:
2006 2006 targetsize = 4 * rawsize
2007 2007
2008 2008 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2009 2009 if basetext is None:
2010 2010 basetext = bytes(bins[0])
2011 2011 bins = bins[1:]
2012 2012
2013 2013 rawtext = mdiff.patches(basetext, bins)
2014 2014 del basetext # let us have a chance to free memory early
2015 2015 return (rev, rawtext, False)
2016 2016
2017 2017 def _sidedata(self, rev):
2018 2018 """Return the sidedata for a given revision number."""
2019 2019 index_entry = self.index[rev]
2020 2020 sidedata_offset = index_entry[8]
2021 2021 sidedata_size = index_entry[9]
2022 2022
2023 2023 if self._inline:
2024 2024 sidedata_offset += self.index.entry_size * (1 + rev)
2025 2025 if sidedata_size == 0:
2026 2026 return {}
2027 2027
2028 2028 # XXX this need caching, as we do for data
2029 2029 with self._sidedatareadfp() as sdf:
2030 2030 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2031 2031 filename = self._sidedatafile
2032 2032 end = self._docket.sidedata_end
2033 2033 offset = sidedata_offset
2034 2034 length = sidedata_size
2035 2035 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2036 2036 raise error.RevlogError(m)
2037 2037
2038 2038 sdf.seek(sidedata_offset, os.SEEK_SET)
2039 2039 comp_segment = sdf.read(sidedata_size)
2040 2040
2041 2041 if len(comp_segment) < sidedata_size:
2042 2042 filename = self._sidedatafile
2043 2043 length = sidedata_size
2044 2044 offset = sidedata_offset
2045 2045 got = len(comp_segment)
2046 2046 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2047 2047 raise error.RevlogError(m)
2048 2048
2049 2049 comp = self.index[rev][11]
2050 2050 if comp == COMP_MODE_PLAIN:
2051 2051 segment = comp_segment
2052 2052 elif comp == COMP_MODE_DEFAULT:
2053 2053 segment = self._decompressor(comp_segment)
2054 2054 elif comp == COMP_MODE_INLINE:
2055 2055 segment = self.decompress(comp_segment)
2056 2056 else:
2057 2057 msg = 'unknown compression mode %d'
2058 2058 msg %= comp
2059 2059 raise error.RevlogError(msg)
2060 2060
2061 2061 sidedata = sidedatautil.deserialize_sidedata(segment)
2062 2062 return sidedata
2063 2063
2064 2064 def rawdata(self, nodeorrev, _df=None):
2065 2065 """return an uncompressed raw data of a given node or revision number.
2066 2066
2067 2067 _df - an existing file handle to read from. (internal-only)
2068 2068 """
2069 2069 return self._revisiondata(nodeorrev, _df, raw=True)
2070 2070
2071 2071 def hash(self, text, p1, p2):
2072 2072 """Compute a node hash.
2073 2073
2074 2074 Available as a function so that subclasses can replace the hash
2075 2075 as needed.
2076 2076 """
2077 2077 return storageutil.hashrevisionsha1(text, p1, p2)
2078 2078
2079 2079 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2080 2080 """Check node hash integrity.
2081 2081
2082 2082 Available as a function so that subclasses can extend hash mismatch
2083 2083 behaviors as needed.
2084 2084 """
2085 2085 try:
2086 2086 if p1 is None and p2 is None:
2087 2087 p1, p2 = self.parents(node)
2088 2088 if node != self.hash(text, p1, p2):
2089 2089 # Clear the revision cache on hash failure. The revision cache
2090 2090 # only stores the raw revision and clearing the cache does have
2091 2091 # the side-effect that we won't have a cache hit when the raw
2092 2092 # revision data is accessed. But this case should be rare and
2093 2093 # it is extra work to teach the cache about the hash
2094 2094 # verification state.
2095 2095 if self._revisioncache and self._revisioncache[0] == node:
2096 2096 self._revisioncache = None
2097 2097
2098 2098 revornode = rev
2099 2099 if revornode is None:
2100 2100 revornode = templatefilters.short(hex(node))
2101 2101 raise error.RevlogError(
2102 2102 _(b"integrity check failed on %s:%s")
2103 2103 % (self.display_id, pycompat.bytestr(revornode))
2104 2104 )
2105 2105 except error.RevlogError:
2106 2106 if self._censorable and storageutil.iscensoredtext(text):
2107 2107 raise error.CensoredNodeError(self.display_id, node, text)
2108 2108 raise
2109 2109
2110 2110 def _enforceinlinesize(self, tr):
2111 2111 """Check if the revlog is too big for inline and convert if so.
2112 2112
2113 2113 This should be called after revisions are added to the revlog. If the
2114 2114 revlog has grown too large to be an inline revlog, it will convert it
2115 2115 to use multiple index and data files.
2116 2116 """
2117 2117 tiprev = len(self) - 1
2118 2118 total_size = self.start(tiprev) + self.length(tiprev)
2119 2119 if not self._inline or total_size < _maxinline:
2120 2120 return
2121 2121
2122 2122 troffset = tr.findoffset(self._indexfile)
2123 2123 if troffset is None:
2124 2124 raise error.RevlogError(
2125 2125 _(b"%s not found in the transaction") % self._indexfile
2126 2126 )
2127 2127 trindex = 0
2128 2128 tr.add(self._datafile, 0)
2129 2129
2130 2130 existing_handles = False
2131 2131 if self._writinghandles is not None:
2132 2132 existing_handles = True
2133 2133 fp = self._writinghandles[0]
2134 2134 fp.flush()
2135 2135 fp.close()
2136 2136 # We can't use the cached file handle after close(). So prevent
2137 2137 # its usage.
2138 2138 self._writinghandles = None
2139 2139
2140 2140 new_dfh = self._datafp(b'w+')
2141 2141 new_dfh.truncate(0) # drop any potentially existing data
2142 2142 try:
2143 2143 with self._indexfp() as read_ifh:
2144 2144 for r in self:
2145 2145 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2146 2146 if troffset <= self.start(r) + r * self.index.entry_size:
2147 2147 trindex = r
2148 2148 new_dfh.flush()
2149 2149
2150 2150 with self.__index_new_fp() as fp:
2151 2151 self._format_flags &= ~FLAG_INLINE_DATA
2152 2152 self._inline = False
2153 2153 for i in self:
2154 2154 e = self.index.entry_binary(i)
2155 2155 if i == 0 and self._docket is None:
2156 2156 header = self._format_flags | self._format_version
2157 2157 header = self.index.pack_header(header)
2158 2158 e = header + e
2159 2159 fp.write(e)
2160 2160 if self._docket is not None:
2161 2161 self._docket.index_end = fp.tell()
2162 2162
2163 2163 # There is a small transactional race here. If the rename of
2164 2164 # the index fails, we should remove the datafile. It is more
2165 2165 # important to ensure that the data file is not truncated
2166 2166 # when the index is replaced as otherwise data is lost.
2167 2167 tr.replace(self._datafile, self.start(trindex))
2168 2168
2169 2169 # the temp file replace the real index when we exit the context
2170 2170 # manager
2171 2171
2172 2172 tr.replace(self._indexfile, trindex * self.index.entry_size)
2173 2173 nodemaputil.setup_persistent_nodemap(tr, self)
2174 2174 self._chunkclear()
2175 2175
2176 2176 if existing_handles:
2177 2177 # switched from inline to conventional reopen the index
2178 2178 ifh = self.__index_write_fp()
2179 2179 self._writinghandles = (ifh, new_dfh, None)
2180 2180 new_dfh = None
2181 2181 finally:
2182 2182 if new_dfh is not None:
2183 2183 new_dfh.close()
2184 2184
2185 2185 def _nodeduplicatecallback(self, transaction, node):
2186 2186 """called when trying to add a node already stored."""
2187 2187
2188 2188 @contextlib.contextmanager
2189 2189 def _writing(self, transaction):
2190 2190 if self._trypending:
2191 2191 msg = b'try to write in a `trypending` revlog: %s'
2192 2192 msg %= self.display_id
2193 2193 raise error.ProgrammingError(msg)
2194 2194 if self._writinghandles is not None:
2195 2195 yield
2196 2196 else:
2197 2197 ifh = dfh = sdfh = None
2198 2198 try:
2199 2199 r = len(self)
2200 2200 # opening the data file.
2201 2201 dsize = 0
2202 2202 if r:
2203 2203 dsize = self.end(r - 1)
2204 2204 dfh = None
2205 2205 if not self._inline:
2206 2206 try:
2207 2207 dfh = self._datafp(b"r+")
2208 2208 if self._docket is None:
2209 2209 dfh.seek(0, os.SEEK_END)
2210 2210 else:
2211 2211 dfh.seek(self._docket.data_end, os.SEEK_SET)
2212 2212 except IOError as inst:
2213 2213 if inst.errno != errno.ENOENT:
2214 2214 raise
2215 2215 dfh = self._datafp(b"w+")
2216 2216 transaction.add(self._datafile, dsize)
2217 2217 if self._sidedatafile is not None:
2218 2218 try:
2219 2219 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2220 2220 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2221 2221 except IOError as inst:
2222 2222 if inst.errno != errno.ENOENT:
2223 2223 raise
2224 2224 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2225 2225 transaction.add(
2226 2226 self._sidedatafile, self._docket.sidedata_end
2227 2227 )
2228 2228
2229 2229 # opening the index file.
2230 2230 isize = r * self.index.entry_size
2231 2231 ifh = self.__index_write_fp()
2232 2232 if self._inline:
2233 2233 transaction.add(self._indexfile, dsize + isize)
2234 2234 else:
2235 2235 transaction.add(self._indexfile, isize)
2236 2236 # exposing all file handle for writing.
2237 2237 self._writinghandles = (ifh, dfh, sdfh)
2238 2238 yield
2239 2239 if self._docket is not None:
2240 2240 self._write_docket(transaction)
2241 2241 finally:
2242 2242 self._writinghandles = None
2243 2243 if dfh is not None:
2244 2244 dfh.close()
2245 2245 if sdfh is not None:
2246 dfh.close()
2246 sdfh.close()
2247 2247 # closing the index file last to avoid exposing referent to
2248 2248 # potential unflushed data content.
2249 2249 if ifh is not None:
2250 2250 ifh.close()
2251 2251
2252 2252 def _write_docket(self, transaction):
2253 2253 """write the current docket on disk
2254 2254
2255 2255 Exist as a method to help changelog to implement transaction logic
2256 2256
2257 2257 We could also imagine using the same transaction logic for all revlog
2258 2258 since docket are cheap."""
2259 2259 self._docket.write(transaction)
2260 2260
2261 2261 def addrevision(
2262 2262 self,
2263 2263 text,
2264 2264 transaction,
2265 2265 link,
2266 2266 p1,
2267 2267 p2,
2268 2268 cachedelta=None,
2269 2269 node=None,
2270 2270 flags=REVIDX_DEFAULT_FLAGS,
2271 2271 deltacomputer=None,
2272 2272 sidedata=None,
2273 2273 ):
2274 2274 """add a revision to the log
2275 2275
2276 2276 text - the revision data to add
2277 2277 transaction - the transaction object used for rollback
2278 2278 link - the linkrev data to add
2279 2279 p1, p2 - the parent nodeids of the revision
2280 2280 cachedelta - an optional precomputed delta
2281 2281 node - nodeid of revision; typically node is not specified, and it is
2282 2282 computed by default as hash(text, p1, p2), however subclasses might
2283 2283 use different hashing method (and override checkhash() in such case)
2284 2284 flags - the known flags to set on the revision
2285 2285 deltacomputer - an optional deltacomputer instance shared between
2286 2286 multiple calls
2287 2287 """
2288 2288 if link == nullrev:
2289 2289 raise error.RevlogError(
2290 2290 _(b"attempted to add linkrev -1 to %s") % self.display_id
2291 2291 )
2292 2292
2293 2293 if sidedata is None:
2294 2294 sidedata = {}
2295 2295 elif sidedata and not self.hassidedata:
2296 2296 raise error.ProgrammingError(
2297 2297 _(b"trying to add sidedata to a revlog who don't support them")
2298 2298 )
2299 2299
2300 2300 if flags:
2301 2301 node = node or self.hash(text, p1, p2)
2302 2302
2303 2303 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2304 2304
2305 2305 # If the flag processor modifies the revision data, ignore any provided
2306 2306 # cachedelta.
2307 2307 if rawtext != text:
2308 2308 cachedelta = None
2309 2309
2310 2310 if len(rawtext) > _maxentrysize:
2311 2311 raise error.RevlogError(
2312 2312 _(
2313 2313 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2314 2314 )
2315 2315 % (self.display_id, len(rawtext))
2316 2316 )
2317 2317
2318 2318 node = node or self.hash(rawtext, p1, p2)
2319 2319 rev = self.index.get_rev(node)
2320 2320 if rev is not None:
2321 2321 return rev
2322 2322
2323 2323 if validatehash:
2324 2324 self.checkhash(rawtext, node, p1=p1, p2=p2)
2325 2325
2326 2326 return self.addrawrevision(
2327 2327 rawtext,
2328 2328 transaction,
2329 2329 link,
2330 2330 p1,
2331 2331 p2,
2332 2332 node,
2333 2333 flags,
2334 2334 cachedelta=cachedelta,
2335 2335 deltacomputer=deltacomputer,
2336 2336 sidedata=sidedata,
2337 2337 )
2338 2338
2339 2339 def addrawrevision(
2340 2340 self,
2341 2341 rawtext,
2342 2342 transaction,
2343 2343 link,
2344 2344 p1,
2345 2345 p2,
2346 2346 node,
2347 2347 flags,
2348 2348 cachedelta=None,
2349 2349 deltacomputer=None,
2350 2350 sidedata=None,
2351 2351 ):
2352 2352 """add a raw revision with known flags, node and parents
2353 2353 useful when reusing a revision not stored in this revlog (ex: received
2354 2354 over wire, or read from an external bundle).
2355 2355 """
2356 2356 with self._writing(transaction):
2357 2357 return self._addrevision(
2358 2358 node,
2359 2359 rawtext,
2360 2360 transaction,
2361 2361 link,
2362 2362 p1,
2363 2363 p2,
2364 2364 flags,
2365 2365 cachedelta,
2366 2366 deltacomputer=deltacomputer,
2367 2367 sidedata=sidedata,
2368 2368 )
2369 2369
2370 2370 def compress(self, data):
2371 2371 """Generate a possibly-compressed representation of data."""
2372 2372 if not data:
2373 2373 return b'', data
2374 2374
2375 2375 compressed = self._compressor.compress(data)
2376 2376
2377 2377 if compressed:
2378 2378 # The revlog compressor added the header in the returned data.
2379 2379 return b'', compressed
2380 2380
2381 2381 if data[0:1] == b'\0':
2382 2382 return b'', data
2383 2383 return b'u', data
2384 2384
2385 2385 def decompress(self, data):
2386 2386 """Decompress a revlog chunk.
2387 2387
2388 2388 The chunk is expected to begin with a header identifying the
2389 2389 format type so it can be routed to an appropriate decompressor.
2390 2390 """
2391 2391 if not data:
2392 2392 return data
2393 2393
2394 2394 # Revlogs are read much more frequently than they are written and many
2395 2395 # chunks only take microseconds to decompress, so performance is
2396 2396 # important here.
2397 2397 #
2398 2398 # We can make a few assumptions about revlogs:
2399 2399 #
2400 2400 # 1) the majority of chunks will be compressed (as opposed to inline
2401 2401 # raw data).
2402 2402 # 2) decompressing *any* data will likely by at least 10x slower than
2403 2403 # returning raw inline data.
2404 2404 # 3) we want to prioritize common and officially supported compression
2405 2405 # engines
2406 2406 #
2407 2407 # It follows that we want to optimize for "decompress compressed data
2408 2408 # when encoded with common and officially supported compression engines"
2409 2409 # case over "raw data" and "data encoded by less common or non-official
2410 2410 # compression engines." That is why we have the inline lookup first
2411 2411 # followed by the compengines lookup.
2412 2412 #
2413 2413 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2414 2414 # compressed chunks. And this matters for changelog and manifest reads.
2415 2415 t = data[0:1]
2416 2416
2417 2417 if t == b'x':
2418 2418 try:
2419 2419 return _zlibdecompress(data)
2420 2420 except zlib.error as e:
2421 2421 raise error.RevlogError(
2422 2422 _(b'revlog decompress error: %s')
2423 2423 % stringutil.forcebytestr(e)
2424 2424 )
2425 2425 # '\0' is more common than 'u' so it goes first.
2426 2426 elif t == b'\0':
2427 2427 return data
2428 2428 elif t == b'u':
2429 2429 return util.buffer(data, 1)
2430 2430
2431 2431 compressor = self._get_decompressor(t)
2432 2432
2433 2433 return compressor.decompress(data)
2434 2434
2435 2435 def _addrevision(
2436 2436 self,
2437 2437 node,
2438 2438 rawtext,
2439 2439 transaction,
2440 2440 link,
2441 2441 p1,
2442 2442 p2,
2443 2443 flags,
2444 2444 cachedelta,
2445 2445 alwayscache=False,
2446 2446 deltacomputer=None,
2447 2447 sidedata=None,
2448 2448 ):
2449 2449 """internal function to add revisions to the log
2450 2450
2451 2451 see addrevision for argument descriptions.
2452 2452
2453 2453 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2454 2454
2455 2455 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2456 2456 be used.
2457 2457
2458 2458 invariants:
2459 2459 - rawtext is optional (can be None); if not set, cachedelta must be set.
2460 2460 if both are set, they must correspond to each other.
2461 2461 """
2462 2462 if node == self.nullid:
2463 2463 raise error.RevlogError(
2464 2464 _(b"%s: attempt to add null revision") % self.display_id
2465 2465 )
2466 2466 if (
2467 2467 node == self.nodeconstants.wdirid
2468 2468 or node in self.nodeconstants.wdirfilenodeids
2469 2469 ):
2470 2470 raise error.RevlogError(
2471 2471 _(b"%s: attempt to add wdir revision") % self.display_id
2472 2472 )
2473 2473 if self._writinghandles is None:
2474 2474 msg = b'adding revision outside `revlog._writing` context'
2475 2475 raise error.ProgrammingError(msg)
2476 2476
2477 2477 if self._inline:
2478 2478 fh = self._writinghandles[0]
2479 2479 else:
2480 2480 fh = self._writinghandles[1]
2481 2481
2482 2482 btext = [rawtext]
2483 2483
2484 2484 curr = len(self)
2485 2485 prev = curr - 1
2486 2486
2487 2487 offset = self._get_data_offset(prev)
2488 2488
2489 2489 if self._concurrencychecker:
2490 2490 ifh, dfh, sdfh = self._writinghandles
2491 2491 # XXX no checking for the sidedata file
2492 2492 if self._inline:
2493 2493 # offset is "as if" it were in the .d file, so we need to add on
2494 2494 # the size of the entry metadata.
2495 2495 self._concurrencychecker(
2496 2496 ifh, self._indexfile, offset + curr * self.index.entry_size
2497 2497 )
2498 2498 else:
2499 2499 # Entries in the .i are a consistent size.
2500 2500 self._concurrencychecker(
2501 2501 ifh, self._indexfile, curr * self.index.entry_size
2502 2502 )
2503 2503 self._concurrencychecker(dfh, self._datafile, offset)
2504 2504
2505 2505 p1r, p2r = self.rev(p1), self.rev(p2)
2506 2506
2507 2507 # full versions are inserted when the needed deltas
2508 2508 # become comparable to the uncompressed text
2509 2509 if rawtext is None:
2510 2510 # need rawtext size, before changed by flag processors, which is
2511 2511 # the non-raw size. use revlog explicitly to avoid filelog's extra
2512 2512 # logic that might remove metadata size.
2513 2513 textlen = mdiff.patchedsize(
2514 2514 revlog.size(self, cachedelta[0]), cachedelta[1]
2515 2515 )
2516 2516 else:
2517 2517 textlen = len(rawtext)
2518 2518
2519 2519 if deltacomputer is None:
2520 2520 deltacomputer = deltautil.deltacomputer(self)
2521 2521
2522 2522 revinfo = revlogutils.revisioninfo(
2523 2523 node,
2524 2524 p1,
2525 2525 p2,
2526 2526 btext,
2527 2527 textlen,
2528 2528 cachedelta,
2529 2529 flags,
2530 2530 )
2531 2531
2532 2532 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2533 2533
2534 2534 compression_mode = COMP_MODE_INLINE
2535 2535 if self._docket is not None:
2536 2536 h, d = deltainfo.data
2537 2537 if not h and not d:
2538 2538 # not data to store at all... declare them uncompressed
2539 2539 compression_mode = COMP_MODE_PLAIN
2540 2540 elif not h:
2541 2541 t = d[0:1]
2542 2542 if t == b'\0':
2543 2543 compression_mode = COMP_MODE_PLAIN
2544 2544 elif t == self._docket.default_compression_header:
2545 2545 compression_mode = COMP_MODE_DEFAULT
2546 2546 elif h == b'u':
2547 2547 # we have a more efficient way to declare uncompressed
2548 2548 h = b''
2549 2549 compression_mode = COMP_MODE_PLAIN
2550 2550 deltainfo = deltautil.drop_u_compression(deltainfo)
2551 2551
2552 2552 sidedata_compression_mode = COMP_MODE_INLINE
2553 2553 if sidedata and self.hassidedata:
2554 2554 sidedata_compression_mode = COMP_MODE_PLAIN
2555 2555 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2556 2556 sidedata_offset = self._docket.sidedata_end
2557 2557 h, comp_sidedata = self.compress(serialized_sidedata)
2558 2558 if (
2559 2559 h != b'u'
2560 2560 and comp_sidedata[0:1] != b'\0'
2561 2561 and len(comp_sidedata) < len(serialized_sidedata)
2562 2562 ):
2563 2563 assert not h
2564 2564 if (
2565 2565 comp_sidedata[0:1]
2566 2566 == self._docket.default_compression_header
2567 2567 ):
2568 2568 sidedata_compression_mode = COMP_MODE_DEFAULT
2569 2569 serialized_sidedata = comp_sidedata
2570 2570 else:
2571 2571 sidedata_compression_mode = COMP_MODE_INLINE
2572 2572 serialized_sidedata = comp_sidedata
2573 2573 else:
2574 2574 serialized_sidedata = b""
2575 2575 # Don't store the offset if the sidedata is empty, that way
2576 2576 # we can easily detect empty sidedata and they will be no different
2577 2577 # than ones we manually add.
2578 2578 sidedata_offset = 0
2579 2579
2580 2580 e = revlogutils.entry(
2581 2581 flags=flags,
2582 2582 data_offset=offset,
2583 2583 data_compressed_length=deltainfo.deltalen,
2584 2584 data_uncompressed_length=textlen,
2585 2585 data_compression_mode=compression_mode,
2586 2586 data_delta_base=deltainfo.base,
2587 2587 link_rev=link,
2588 2588 parent_rev_1=p1r,
2589 2589 parent_rev_2=p2r,
2590 2590 node_id=node,
2591 2591 sidedata_offset=sidedata_offset,
2592 2592 sidedata_compressed_length=len(serialized_sidedata),
2593 2593 sidedata_compression_mode=sidedata_compression_mode,
2594 2594 )
2595 2595
2596 2596 self.index.append(e)
2597 2597 entry = self.index.entry_binary(curr)
2598 2598 if curr == 0 and self._docket is None:
2599 2599 header = self._format_flags | self._format_version
2600 2600 header = self.index.pack_header(header)
2601 2601 entry = header + entry
2602 2602 self._writeentry(
2603 2603 transaction,
2604 2604 entry,
2605 2605 deltainfo.data,
2606 2606 link,
2607 2607 offset,
2608 2608 serialized_sidedata,
2609 2609 sidedata_offset,
2610 2610 )
2611 2611
2612 2612 rawtext = btext[0]
2613 2613
2614 2614 if alwayscache and rawtext is None:
2615 2615 rawtext = deltacomputer.buildtext(revinfo, fh)
2616 2616
2617 2617 if type(rawtext) == bytes: # only accept immutable objects
2618 2618 self._revisioncache = (node, curr, rawtext)
2619 2619 self._chainbasecache[curr] = deltainfo.chainbase
2620 2620 return curr
2621 2621
2622 2622 def _get_data_offset(self, prev):
2623 2623 """Returns the current offset in the (in-transaction) data file.
2624 2624 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2625 2625 file to store that information: since sidedata can be rewritten to the
2626 2626 end of the data file within a transaction, you can have cases where, for
2627 2627 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2628 2628 to `n - 1`'s sidedata being written after `n`'s data.
2629 2629
2630 2630 TODO cache this in a docket file before getting out of experimental."""
2631 2631 if self._docket is None:
2632 2632 return self.end(prev)
2633 2633 else:
2634 2634 return self._docket.data_end
2635 2635
2636 2636 def _writeentry(
2637 2637 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2638 2638 ):
2639 2639 # Files opened in a+ mode have inconsistent behavior on various
2640 2640 # platforms. Windows requires that a file positioning call be made
2641 2641 # when the file handle transitions between reads and writes. See
2642 2642 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2643 2643 # platforms, Python or the platform itself can be buggy. Some versions
2644 2644 # of Solaris have been observed to not append at the end of the file
2645 2645 # if the file was seeked to before the end. See issue4943 for more.
2646 2646 #
2647 2647 # We work around this issue by inserting a seek() before writing.
2648 2648 # Note: This is likely not necessary on Python 3. However, because
2649 2649 # the file handle is reused for reads and may be seeked there, we need
2650 2650 # to be careful before changing this.
2651 2651 if self._writinghandles is None:
2652 2652 msg = b'adding revision outside `revlog._writing` context'
2653 2653 raise error.ProgrammingError(msg)
2654 2654 ifh, dfh, sdfh = self._writinghandles
2655 2655 if self._docket is None:
2656 2656 ifh.seek(0, os.SEEK_END)
2657 2657 else:
2658 2658 ifh.seek(self._docket.index_end, os.SEEK_SET)
2659 2659 if dfh:
2660 2660 if self._docket is None:
2661 2661 dfh.seek(0, os.SEEK_END)
2662 2662 else:
2663 2663 dfh.seek(self._docket.data_end, os.SEEK_SET)
2664 2664 if sdfh:
2665 2665 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2666 2666
2667 2667 curr = len(self) - 1
2668 2668 if not self._inline:
2669 2669 transaction.add(self._datafile, offset)
2670 2670 if self._sidedatafile:
2671 2671 transaction.add(self._sidedatafile, sidedata_offset)
2672 2672 transaction.add(self._indexfile, curr * len(entry))
2673 2673 if data[0]:
2674 2674 dfh.write(data[0])
2675 2675 dfh.write(data[1])
2676 2676 if sidedata:
2677 2677 sdfh.write(sidedata)
2678 2678 ifh.write(entry)
2679 2679 else:
2680 2680 offset += curr * self.index.entry_size
2681 2681 transaction.add(self._indexfile, offset)
2682 2682 ifh.write(entry)
2683 2683 ifh.write(data[0])
2684 2684 ifh.write(data[1])
2685 2685 assert not sidedata
2686 2686 self._enforceinlinesize(transaction)
2687 2687 if self._docket is not None:
2688 2688 self._docket.index_end = self._writinghandles[0].tell()
2689 2689 self._docket.data_end = self._writinghandles[1].tell()
2690 2690 self._docket.sidedata_end = self._writinghandles[2].tell()
2691 2691
2692 2692 nodemaputil.setup_persistent_nodemap(transaction, self)
2693 2693
2694 2694 def addgroup(
2695 2695 self,
2696 2696 deltas,
2697 2697 linkmapper,
2698 2698 transaction,
2699 2699 alwayscache=False,
2700 2700 addrevisioncb=None,
2701 2701 duplicaterevisioncb=None,
2702 2702 ):
2703 2703 """
2704 2704 add a delta group
2705 2705
2706 2706 given a set of deltas, add them to the revision log. the
2707 2707 first delta is against its parent, which should be in our
2708 2708 log, the rest are against the previous delta.
2709 2709
2710 2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2711 2711 this revlog and the node that was added.
2712 2712 """
2713 2713
2714 2714 if self._adding_group:
2715 2715 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2716 2716
2717 2717 self._adding_group = True
2718 2718 empty = True
2719 2719 try:
2720 2720 with self._writing(transaction):
2721 2721 deltacomputer = deltautil.deltacomputer(self)
2722 2722 # loop through our set of deltas
2723 2723 for data in deltas:
2724 2724 (
2725 2725 node,
2726 2726 p1,
2727 2727 p2,
2728 2728 linknode,
2729 2729 deltabase,
2730 2730 delta,
2731 2731 flags,
2732 2732 sidedata,
2733 2733 ) = data
2734 2734 link = linkmapper(linknode)
2735 2735 flags = flags or REVIDX_DEFAULT_FLAGS
2736 2736
2737 2737 rev = self.index.get_rev(node)
2738 2738 if rev is not None:
2739 2739 # this can happen if two branches make the same change
2740 2740 self._nodeduplicatecallback(transaction, rev)
2741 2741 if duplicaterevisioncb:
2742 2742 duplicaterevisioncb(self, rev)
2743 2743 empty = False
2744 2744 continue
2745 2745
2746 2746 for p in (p1, p2):
2747 2747 if not self.index.has_node(p):
2748 2748 raise error.LookupError(
2749 2749 p, self.radix, _(b'unknown parent')
2750 2750 )
2751 2751
2752 2752 if not self.index.has_node(deltabase):
2753 2753 raise error.LookupError(
2754 2754 deltabase, self.display_id, _(b'unknown delta base')
2755 2755 )
2756 2756
2757 2757 baserev = self.rev(deltabase)
2758 2758
2759 2759 if baserev != nullrev and self.iscensored(baserev):
2760 2760 # if base is censored, delta must be full replacement in a
2761 2761 # single patch operation
2762 2762 hlen = struct.calcsize(b">lll")
2763 2763 oldlen = self.rawsize(baserev)
2764 2764 newlen = len(delta) - hlen
2765 2765 if delta[:hlen] != mdiff.replacediffheader(
2766 2766 oldlen, newlen
2767 2767 ):
2768 2768 raise error.CensoredBaseError(
2769 2769 self.display_id, self.node(baserev)
2770 2770 )
2771 2771
2772 2772 if not flags and self._peek_iscensored(baserev, delta):
2773 2773 flags |= REVIDX_ISCENSORED
2774 2774
2775 2775 # We assume consumers of addrevisioncb will want to retrieve
2776 2776 # the added revision, which will require a call to
2777 2777 # revision(). revision() will fast path if there is a cache
2778 2778 # hit. So, we tell _addrevision() to always cache in this case.
2779 2779 # We're only using addgroup() in the context of changegroup
2780 2780 # generation so the revision data can always be handled as raw
2781 2781 # by the flagprocessor.
2782 2782 rev = self._addrevision(
2783 2783 node,
2784 2784 None,
2785 2785 transaction,
2786 2786 link,
2787 2787 p1,
2788 2788 p2,
2789 2789 flags,
2790 2790 (baserev, delta),
2791 2791 alwayscache=alwayscache,
2792 2792 deltacomputer=deltacomputer,
2793 2793 sidedata=sidedata,
2794 2794 )
2795 2795
2796 2796 if addrevisioncb:
2797 2797 addrevisioncb(self, rev)
2798 2798 empty = False
2799 2799 finally:
2800 2800 self._adding_group = False
2801 2801 return not empty
2802 2802
2803 2803 def iscensored(self, rev):
2804 2804 """Check if a file revision is censored."""
2805 2805 if not self._censorable:
2806 2806 return False
2807 2807
2808 2808 return self.flags(rev) & REVIDX_ISCENSORED
2809 2809
2810 2810 def _peek_iscensored(self, baserev, delta):
2811 2811 """Quickly check if a delta produces a censored revision."""
2812 2812 if not self._censorable:
2813 2813 return False
2814 2814
2815 2815 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2816 2816
2817 2817 def getstrippoint(self, minlink):
2818 2818 """find the minimum rev that must be stripped to strip the linkrev
2819 2819
2820 2820 Returns a tuple containing the minimum rev and a set of all revs that
2821 2821 have linkrevs that will be broken by this strip.
2822 2822 """
2823 2823 return storageutil.resolvestripinfo(
2824 2824 minlink,
2825 2825 len(self) - 1,
2826 2826 self.headrevs(),
2827 2827 self.linkrev,
2828 2828 self.parentrevs,
2829 2829 )
2830 2830
2831 2831 def strip(self, minlink, transaction):
2832 2832 """truncate the revlog on the first revision with a linkrev >= minlink
2833 2833
2834 2834 This function is called when we're stripping revision minlink and
2835 2835 its descendants from the repository.
2836 2836
2837 2837 We have to remove all revisions with linkrev >= minlink, because
2838 2838 the equivalent changelog revisions will be renumbered after the
2839 2839 strip.
2840 2840
2841 2841 So we truncate the revlog on the first of these revisions, and
2842 2842 trust that the caller has saved the revisions that shouldn't be
2843 2843 removed and that it'll re-add them after this truncation.
2844 2844 """
2845 2845 if len(self) == 0:
2846 2846 return
2847 2847
2848 2848 rev, _ = self.getstrippoint(minlink)
2849 2849 if rev == len(self):
2850 2850 return
2851 2851
2852 2852 # first truncate the files on disk
2853 2853 data_end = self.start(rev)
2854 2854 if not self._inline:
2855 2855 transaction.add(self._datafile, data_end)
2856 2856 end = rev * self.index.entry_size
2857 2857 else:
2858 2858 end = data_end + (rev * self.index.entry_size)
2859 2859
2860 2860 if self._sidedatafile:
2861 2861 sidedata_end = self.sidedata_cut_off(rev)
2862 2862 transaction.add(self._sidedatafile, sidedata_end)
2863 2863
2864 2864 transaction.add(self._indexfile, end)
2865 2865 if self._docket is not None:
2866 2866 # XXX we could, leverage the docket while stripping. However it is
2867 2867 # not powerfull enough at the time of this comment
2868 2868 self._docket.index_end = end
2869 2869 self._docket.data_end = data_end
2870 2870 self._docket.sidedata_end = sidedata_end
2871 2871 self._docket.write(transaction, stripping=True)
2872 2872
2873 2873 # then reset internal state in memory to forget those revisions
2874 2874 self._revisioncache = None
2875 2875 self._chaininfocache = util.lrucachedict(500)
2876 2876 self._chunkclear()
2877 2877
2878 2878 del self.index[rev:-1]
2879 2879
2880 2880 def checksize(self):
2881 2881 """Check size of index and data files
2882 2882
2883 2883 return a (dd, di) tuple.
2884 2884 - dd: extra bytes for the "data" file
2885 2885 - di: extra bytes for the "index" file
2886 2886
2887 2887 A healthy revlog will return (0, 0).
2888 2888 """
2889 2889 expected = 0
2890 2890 if len(self):
2891 2891 expected = max(0, self.end(len(self) - 1))
2892 2892
2893 2893 try:
2894 2894 with self._datafp() as f:
2895 2895 f.seek(0, io.SEEK_END)
2896 2896 actual = f.tell()
2897 2897 dd = actual - expected
2898 2898 except IOError as inst:
2899 2899 if inst.errno != errno.ENOENT:
2900 2900 raise
2901 2901 dd = 0
2902 2902
2903 2903 try:
2904 2904 f = self.opener(self._indexfile)
2905 2905 f.seek(0, io.SEEK_END)
2906 2906 actual = f.tell()
2907 2907 f.close()
2908 2908 s = self.index.entry_size
2909 2909 i = max(0, actual // s)
2910 2910 di = actual - (i * s)
2911 2911 if self._inline:
2912 2912 databytes = 0
2913 2913 for r in self:
2914 2914 databytes += max(0, self.length(r))
2915 2915 dd = 0
2916 2916 di = actual - len(self) * s - databytes
2917 2917 except IOError as inst:
2918 2918 if inst.errno != errno.ENOENT:
2919 2919 raise
2920 2920 di = 0
2921 2921
2922 2922 return (dd, di)
2923 2923
2924 2924 def files(self):
2925 2925 res = [self._indexfile]
2926 2926 if not self._inline:
2927 2927 res.append(self._datafile)
2928 2928 return res
2929 2929
2930 2930 def emitrevisions(
2931 2931 self,
2932 2932 nodes,
2933 2933 nodesorder=None,
2934 2934 revisiondata=False,
2935 2935 assumehaveparentrevisions=False,
2936 2936 deltamode=repository.CG_DELTAMODE_STD,
2937 2937 sidedata_helpers=None,
2938 2938 ):
2939 2939 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2940 2940 raise error.ProgrammingError(
2941 2941 b'unhandled value for nodesorder: %s' % nodesorder
2942 2942 )
2943 2943
2944 2944 if nodesorder is None and not self._generaldelta:
2945 2945 nodesorder = b'storage'
2946 2946
2947 2947 if (
2948 2948 not self._storedeltachains
2949 2949 and deltamode != repository.CG_DELTAMODE_PREV
2950 2950 ):
2951 2951 deltamode = repository.CG_DELTAMODE_FULL
2952 2952
2953 2953 return storageutil.emitrevisions(
2954 2954 self,
2955 2955 nodes,
2956 2956 nodesorder,
2957 2957 revlogrevisiondelta,
2958 2958 deltaparentfn=self.deltaparent,
2959 2959 candeltafn=self.candelta,
2960 2960 rawsizefn=self.rawsize,
2961 2961 revdifffn=self.revdiff,
2962 2962 flagsfn=self.flags,
2963 2963 deltamode=deltamode,
2964 2964 revisiondata=revisiondata,
2965 2965 assumehaveparentrevisions=assumehaveparentrevisions,
2966 2966 sidedata_helpers=sidedata_helpers,
2967 2967 )
2968 2968
2969 2969 DELTAREUSEALWAYS = b'always'
2970 2970 DELTAREUSESAMEREVS = b'samerevs'
2971 2971 DELTAREUSENEVER = b'never'
2972 2972
2973 2973 DELTAREUSEFULLADD = b'fulladd'
2974 2974
2975 2975 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2976 2976
2977 2977 def clone(
2978 2978 self,
2979 2979 tr,
2980 2980 destrevlog,
2981 2981 addrevisioncb=None,
2982 2982 deltareuse=DELTAREUSESAMEREVS,
2983 2983 forcedeltabothparents=None,
2984 2984 sidedata_helpers=None,
2985 2985 ):
2986 2986 """Copy this revlog to another, possibly with format changes.
2987 2987
2988 2988 The destination revlog will contain the same revisions and nodes.
2989 2989 However, it may not be bit-for-bit identical due to e.g. delta encoding
2990 2990 differences.
2991 2991
2992 2992 The ``deltareuse`` argument control how deltas from the existing revlog
2993 2993 are preserved in the destination revlog. The argument can have the
2994 2994 following values:
2995 2995
2996 2996 DELTAREUSEALWAYS
2997 2997 Deltas will always be reused (if possible), even if the destination
2998 2998 revlog would not select the same revisions for the delta. This is the
2999 2999 fastest mode of operation.
3000 3000 DELTAREUSESAMEREVS
3001 3001 Deltas will be reused if the destination revlog would pick the same
3002 3002 revisions for the delta. This mode strikes a balance between speed
3003 3003 and optimization.
3004 3004 DELTAREUSENEVER
3005 3005 Deltas will never be reused. This is the slowest mode of execution.
3006 3006 This mode can be used to recompute deltas (e.g. if the diff/delta
3007 3007 algorithm changes).
3008 3008 DELTAREUSEFULLADD
3009 3009 Revision will be re-added as if their were new content. This is
3010 3010 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3011 3011 eg: large file detection and handling.
3012 3012
3013 3013 Delta computation can be slow, so the choice of delta reuse policy can
3014 3014 significantly affect run time.
3015 3015
3016 3016 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3017 3017 two extremes. Deltas will be reused if they are appropriate. But if the
3018 3018 delta could choose a better revision, it will do so. This means if you
3019 3019 are converting a non-generaldelta revlog to a generaldelta revlog,
3020 3020 deltas will be recomputed if the delta's parent isn't a parent of the
3021 3021 revision.
3022 3022
3023 3023 In addition to the delta policy, the ``forcedeltabothparents``
3024 3024 argument controls whether to force compute deltas against both parents
3025 3025 for merges. By default, the current default is used.
3026 3026
3027 3027 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3028 3028 `sidedata_helpers`.
3029 3029 """
3030 3030 if deltareuse not in self.DELTAREUSEALL:
3031 3031 raise ValueError(
3032 3032 _(b'value for deltareuse invalid: %s') % deltareuse
3033 3033 )
3034 3034
3035 3035 if len(destrevlog):
3036 3036 raise ValueError(_(b'destination revlog is not empty'))
3037 3037
3038 3038 if getattr(self, 'filteredrevs', None):
3039 3039 raise ValueError(_(b'source revlog has filtered revisions'))
3040 3040 if getattr(destrevlog, 'filteredrevs', None):
3041 3041 raise ValueError(_(b'destination revlog has filtered revisions'))
3042 3042
3043 3043 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3044 3044 # if possible.
3045 3045 oldlazydelta = destrevlog._lazydelta
3046 3046 oldlazydeltabase = destrevlog._lazydeltabase
3047 3047 oldamd = destrevlog._deltabothparents
3048 3048
3049 3049 try:
3050 3050 if deltareuse == self.DELTAREUSEALWAYS:
3051 3051 destrevlog._lazydeltabase = True
3052 3052 destrevlog._lazydelta = True
3053 3053 elif deltareuse == self.DELTAREUSESAMEREVS:
3054 3054 destrevlog._lazydeltabase = False
3055 3055 destrevlog._lazydelta = True
3056 3056 elif deltareuse == self.DELTAREUSENEVER:
3057 3057 destrevlog._lazydeltabase = False
3058 3058 destrevlog._lazydelta = False
3059 3059
3060 3060 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3061 3061
3062 3062 self._clone(
3063 3063 tr,
3064 3064 destrevlog,
3065 3065 addrevisioncb,
3066 3066 deltareuse,
3067 3067 forcedeltabothparents,
3068 3068 sidedata_helpers,
3069 3069 )
3070 3070
3071 3071 finally:
3072 3072 destrevlog._lazydelta = oldlazydelta
3073 3073 destrevlog._lazydeltabase = oldlazydeltabase
3074 3074 destrevlog._deltabothparents = oldamd
3075 3075
3076 3076 def _clone(
3077 3077 self,
3078 3078 tr,
3079 3079 destrevlog,
3080 3080 addrevisioncb,
3081 3081 deltareuse,
3082 3082 forcedeltabothparents,
3083 3083 sidedata_helpers,
3084 3084 ):
3085 3085 """perform the core duty of `revlog.clone` after parameter processing"""
3086 3086 deltacomputer = deltautil.deltacomputer(destrevlog)
3087 3087 index = self.index
3088 3088 for rev in self:
3089 3089 entry = index[rev]
3090 3090
3091 3091 # Some classes override linkrev to take filtered revs into
3092 3092 # account. Use raw entry from index.
3093 3093 flags = entry[0] & 0xFFFF
3094 3094 linkrev = entry[4]
3095 3095 p1 = index[entry[5]][7]
3096 3096 p2 = index[entry[6]][7]
3097 3097 node = entry[7]
3098 3098
3099 3099 # (Possibly) reuse the delta from the revlog if allowed and
3100 3100 # the revlog chunk is a delta.
3101 3101 cachedelta = None
3102 3102 rawtext = None
3103 3103 if deltareuse == self.DELTAREUSEFULLADD:
3104 3104 text = self._revisiondata(rev)
3105 3105 sidedata = self.sidedata(rev)
3106 3106
3107 3107 if sidedata_helpers is not None:
3108 3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3109 3109 self, sidedata_helpers, sidedata, rev
3110 3110 )
3111 3111 flags = flags | new_flags[0] & ~new_flags[1]
3112 3112
3113 3113 destrevlog.addrevision(
3114 3114 text,
3115 3115 tr,
3116 3116 linkrev,
3117 3117 p1,
3118 3118 p2,
3119 3119 cachedelta=cachedelta,
3120 3120 node=node,
3121 3121 flags=flags,
3122 3122 deltacomputer=deltacomputer,
3123 3123 sidedata=sidedata,
3124 3124 )
3125 3125 else:
3126 3126 if destrevlog._lazydelta:
3127 3127 dp = self.deltaparent(rev)
3128 3128 if dp != nullrev:
3129 3129 cachedelta = (dp, bytes(self._chunk(rev)))
3130 3130
3131 3131 sidedata = None
3132 3132 if not cachedelta:
3133 3133 rawtext = self._revisiondata(rev)
3134 3134 sidedata = self.sidedata(rev)
3135 3135 if sidedata is None:
3136 3136 sidedata = self.sidedata(rev)
3137 3137
3138 3138 if sidedata_helpers is not None:
3139 3139 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3140 3140 self, sidedata_helpers, sidedata, rev
3141 3141 )
3142 3142 flags = flags | new_flags[0] & ~new_flags[1]
3143 3143
3144 3144 with destrevlog._writing(tr):
3145 3145 destrevlog._addrevision(
3146 3146 node,
3147 3147 rawtext,
3148 3148 tr,
3149 3149 linkrev,
3150 3150 p1,
3151 3151 p2,
3152 3152 flags,
3153 3153 cachedelta,
3154 3154 deltacomputer=deltacomputer,
3155 3155 sidedata=sidedata,
3156 3156 )
3157 3157
3158 3158 if addrevisioncb:
3159 3159 addrevisioncb(self, rev, node)
3160 3160
3161 3161 def censorrevision(self, tr, censornode, tombstone=b''):
3162 3162 if self._format_version == REVLOGV0:
3163 3163 raise error.RevlogError(
3164 3164 _(b'cannot censor with version %d revlogs')
3165 3165 % self._format_version
3166 3166 )
3167 3167 elif self._format_version == REVLOGV1:
3168 3168 censor.v1_censor(self, tr, censornode, tombstone)
3169 3169 else:
3170 3170 # revlog v2
3171 3171 raise error.RevlogError(
3172 3172 _(b'cannot censor with version %d revlogs')
3173 3173 % self._format_version
3174 3174 )
3175 3175
3176 3176 def verifyintegrity(self, state):
3177 3177 """Verifies the integrity of the revlog.
3178 3178
3179 3179 Yields ``revlogproblem`` instances describing problems that are
3180 3180 found.
3181 3181 """
3182 3182 dd, di = self.checksize()
3183 3183 if dd:
3184 3184 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3185 3185 if di:
3186 3186 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3187 3187
3188 3188 version = self._format_version
3189 3189
3190 3190 # The verifier tells us what version revlog we should be.
3191 3191 if version != state[b'expectedversion']:
3192 3192 yield revlogproblem(
3193 3193 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3194 3194 % (self.display_id, version, state[b'expectedversion'])
3195 3195 )
3196 3196
3197 3197 state[b'skipread'] = set()
3198 3198 state[b'safe_renamed'] = set()
3199 3199
3200 3200 for rev in self:
3201 3201 node = self.node(rev)
3202 3202
3203 3203 # Verify contents. 4 cases to care about:
3204 3204 #
3205 3205 # common: the most common case
3206 3206 # rename: with a rename
3207 3207 # meta: file content starts with b'\1\n', the metadata
3208 3208 # header defined in filelog.py, but without a rename
3209 3209 # ext: content stored externally
3210 3210 #
3211 3211 # More formally, their differences are shown below:
3212 3212 #
3213 3213 # | common | rename | meta | ext
3214 3214 # -------------------------------------------------------
3215 3215 # flags() | 0 | 0 | 0 | not 0
3216 3216 # renamed() | False | True | False | ?
3217 3217 # rawtext[0:2]=='\1\n'| False | True | True | ?
3218 3218 #
3219 3219 # "rawtext" means the raw text stored in revlog data, which
3220 3220 # could be retrieved by "rawdata(rev)". "text"
3221 3221 # mentioned below is "revision(rev)".
3222 3222 #
3223 3223 # There are 3 different lengths stored physically:
3224 3224 # 1. L1: rawsize, stored in revlog index
3225 3225 # 2. L2: len(rawtext), stored in revlog data
3226 3226 # 3. L3: len(text), stored in revlog data if flags==0, or
3227 3227 # possibly somewhere else if flags!=0
3228 3228 #
3229 3229 # L1 should be equal to L2. L3 could be different from them.
3230 3230 # "text" may or may not affect commit hash depending on flag
3231 3231 # processors (see flagutil.addflagprocessor).
3232 3232 #
3233 3233 # | common | rename | meta | ext
3234 3234 # -------------------------------------------------
3235 3235 # rawsize() | L1 | L1 | L1 | L1
3236 3236 # size() | L1 | L2-LM | L1(*) | L1 (?)
3237 3237 # len(rawtext) | L2 | L2 | L2 | L2
3238 3238 # len(text) | L2 | L2 | L2 | L3
3239 3239 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3240 3240 #
3241 3241 # LM: length of metadata, depending on rawtext
3242 3242 # (*): not ideal, see comment in filelog.size
3243 3243 # (?): could be "- len(meta)" if the resolved content has
3244 3244 # rename metadata
3245 3245 #
3246 3246 # Checks needed to be done:
3247 3247 # 1. length check: L1 == L2, in all cases.
3248 3248 # 2. hash check: depending on flag processor, we may need to
3249 3249 # use either "text" (external), or "rawtext" (in revlog).
3250 3250
3251 3251 try:
3252 3252 skipflags = state.get(b'skipflags', 0)
3253 3253 if skipflags:
3254 3254 skipflags &= self.flags(rev)
3255 3255
3256 3256 _verify_revision(self, skipflags, state, node)
3257 3257
3258 3258 l1 = self.rawsize(rev)
3259 3259 l2 = len(self.rawdata(node))
3260 3260
3261 3261 if l1 != l2:
3262 3262 yield revlogproblem(
3263 3263 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3264 3264 node=node,
3265 3265 )
3266 3266
3267 3267 except error.CensoredNodeError:
3268 3268 if state[b'erroroncensored']:
3269 3269 yield revlogproblem(
3270 3270 error=_(b'censored file data'), node=node
3271 3271 )
3272 3272 state[b'skipread'].add(node)
3273 3273 except Exception as e:
3274 3274 yield revlogproblem(
3275 3275 error=_(b'unpacking %s: %s')
3276 3276 % (short(node), stringutil.forcebytestr(e)),
3277 3277 node=node,
3278 3278 )
3279 3279 state[b'skipread'].add(node)
3280 3280
3281 3281 def storageinfo(
3282 3282 self,
3283 3283 exclusivefiles=False,
3284 3284 sharedfiles=False,
3285 3285 revisionscount=False,
3286 3286 trackedsize=False,
3287 3287 storedsize=False,
3288 3288 ):
3289 3289 d = {}
3290 3290
3291 3291 if exclusivefiles:
3292 3292 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3293 3293 if not self._inline:
3294 3294 d[b'exclusivefiles'].append((self.opener, self._datafile))
3295 3295
3296 3296 if sharedfiles:
3297 3297 d[b'sharedfiles'] = []
3298 3298
3299 3299 if revisionscount:
3300 3300 d[b'revisionscount'] = len(self)
3301 3301
3302 3302 if trackedsize:
3303 3303 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3304 3304
3305 3305 if storedsize:
3306 3306 d[b'storedsize'] = sum(
3307 3307 self.opener.stat(path).st_size for path in self.files()
3308 3308 )
3309 3309
3310 3310 return d
3311 3311
3312 3312 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3313 3313 if not self.hassidedata:
3314 3314 return
3315 3315 # revlog formats with sidedata support does not support inline
3316 3316 assert not self._inline
3317 3317 if not helpers[1] and not helpers[2]:
3318 3318 # Nothing to generate or remove
3319 3319 return
3320 3320
3321 3321 new_entries = []
3322 3322 # append the new sidedata
3323 3323 with self._writing(transaction):
3324 3324 ifh, dfh, sdfh = self._writinghandles
3325 3325 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3326 3326
3327 3327 current_offset = sdfh.tell()
3328 3328 for rev in range(startrev, endrev + 1):
3329 3329 entry = self.index[rev]
3330 3330 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3331 3331 store=self,
3332 3332 sidedata_helpers=helpers,
3333 3333 sidedata={},
3334 3334 rev=rev,
3335 3335 )
3336 3336
3337 3337 serialized_sidedata = sidedatautil.serialize_sidedata(
3338 3338 new_sidedata
3339 3339 )
3340 3340
3341 3341 sidedata_compression_mode = COMP_MODE_INLINE
3342 3342 if serialized_sidedata and self.hassidedata:
3343 3343 sidedata_compression_mode = COMP_MODE_PLAIN
3344 3344 h, comp_sidedata = self.compress(serialized_sidedata)
3345 3345 if (
3346 3346 h != b'u'
3347 3347 and comp_sidedata[0] != b'\0'
3348 3348 and len(comp_sidedata) < len(serialized_sidedata)
3349 3349 ):
3350 3350 assert not h
3351 3351 if (
3352 3352 comp_sidedata[0]
3353 3353 == self._docket.default_compression_header
3354 3354 ):
3355 3355 sidedata_compression_mode = COMP_MODE_DEFAULT
3356 3356 serialized_sidedata = comp_sidedata
3357 3357 else:
3358 3358 sidedata_compression_mode = COMP_MODE_INLINE
3359 3359 serialized_sidedata = comp_sidedata
3360 3360 if entry[8] != 0 or entry[9] != 0:
3361 3361 # rewriting entries that already have sidedata is not
3362 3362 # supported yet, because it introduces garbage data in the
3363 3363 # revlog.
3364 3364 msg = b"rewriting existing sidedata is not supported yet"
3365 3365 raise error.Abort(msg)
3366 3366
3367 3367 # Apply (potential) flags to add and to remove after running
3368 3368 # the sidedata helpers
3369 3369 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3370 3370 entry_update = (
3371 3371 current_offset,
3372 3372 len(serialized_sidedata),
3373 3373 new_offset_flags,
3374 3374 sidedata_compression_mode,
3375 3375 )
3376 3376
3377 3377 # the sidedata computation might have move the file cursors around
3378 3378 sdfh.seek(current_offset, os.SEEK_SET)
3379 3379 sdfh.write(serialized_sidedata)
3380 3380 new_entries.append(entry_update)
3381 3381 current_offset += len(serialized_sidedata)
3382 3382 self._docket.sidedata_end = sdfh.tell()
3383 3383
3384 3384 # rewrite the new index entries
3385 3385 ifh.seek(startrev * self.index.entry_size)
3386 3386 for i, e in enumerate(new_entries):
3387 3387 rev = startrev + i
3388 3388 self.index.replace_sidedata_info(rev, *e)
3389 3389 packed = self.index.entry_binary(rev)
3390 3390 if rev == 0 and self._docket is None:
3391 3391 header = self._format_flags | self._format_version
3392 3392 header = self.index.pack_header(header)
3393 3393 packed = header + packed
3394 3394 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now