##// END OF EJS Templates
rank: naive rank property computation and retrieval...
marmoute -
r49606:2e949ede default
parent child Browse files
Show More
@@ -1,3275 +1,3298 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 ENTRY_RANK,
43 44 FEATURES_BY_VERSION,
44 45 FLAG_GENERALDELTA,
45 46 FLAG_INLINE_DATA,
46 47 INDEX_HEADER,
47 48 KIND_CHANGELOG,
49 RANK_UNKNOWN,
48 50 REVLOGV0,
49 51 REVLOGV1,
50 52 REVLOGV1_FLAGS,
51 53 REVLOGV2,
52 54 REVLOGV2_FLAGS,
53 55 REVLOG_DEFAULT_FLAGS,
54 56 REVLOG_DEFAULT_FORMAT,
55 57 REVLOG_DEFAULT_VERSION,
56 58 SUPPORTED_FLAGS,
57 59 )
58 60 from .revlogutils.flagutil import (
59 61 REVIDX_DEFAULT_FLAGS,
60 62 REVIDX_ELLIPSIS,
61 63 REVIDX_EXTSTORED,
62 64 REVIDX_FLAGS_ORDER,
63 65 REVIDX_HASCOPIESINFO,
64 66 REVIDX_ISCENSORED,
65 67 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 68 )
67 69 from .thirdparty import attr
68 70 from . import (
69 71 ancestor,
70 72 dagop,
71 73 error,
72 74 mdiff,
73 75 policy,
74 76 pycompat,
75 77 revlogutils,
76 78 templatefilters,
77 79 util,
78 80 )
79 81 from .interfaces import (
80 82 repository,
81 83 util as interfaceutil,
82 84 )
83 85 from .revlogutils import (
84 86 deltas as deltautil,
85 87 docket as docketutil,
86 88 flagutil,
87 89 nodemap as nodemaputil,
88 90 randomaccessfile,
89 91 revlogv0,
90 92 rewrite,
91 93 sidedata as sidedatautil,
92 94 )
93 95 from .utils import (
94 96 storageutil,
95 97 stringutil,
96 98 )
97 99
98 100 # blanked usage of all the name to prevent pyflakes constraints
99 101 # We need these name available in the module for extensions.
100 102
101 103 REVLOGV0
102 104 REVLOGV1
103 105 REVLOGV2
104 106 FLAG_INLINE_DATA
105 107 FLAG_GENERALDELTA
106 108 REVLOG_DEFAULT_FLAGS
107 109 REVLOG_DEFAULT_FORMAT
108 110 REVLOG_DEFAULT_VERSION
109 111 REVLOGV1_FLAGS
110 112 REVLOGV2_FLAGS
111 113 REVIDX_ISCENSORED
112 114 REVIDX_ELLIPSIS
113 115 REVIDX_HASCOPIESINFO
114 116 REVIDX_EXTSTORED
115 117 REVIDX_DEFAULT_FLAGS
116 118 REVIDX_FLAGS_ORDER
117 119 REVIDX_RAWTEXT_CHANGING_FLAGS
118 120
119 121 parsers = policy.importmod('parsers')
120 122 rustancestor = policy.importrust('ancestor')
121 123 rustdagop = policy.importrust('dagop')
122 124 rustrevlog = policy.importrust('revlog')
123 125
124 126 # Aliased for performance.
125 127 _zlibdecompress = zlib.decompress
126 128
127 129 # max size of revlog with inline data
128 130 _maxinline = 131072
129 131
130 132 # Flag processors for REVIDX_ELLIPSIS.
131 133 def ellipsisreadprocessor(rl, text):
132 134 return text, False
133 135
134 136
135 137 def ellipsiswriteprocessor(rl, text):
136 138 return text, False
137 139
138 140
139 141 def ellipsisrawprocessor(rl, text):
140 142 return False
141 143
142 144
143 145 ellipsisprocessor = (
144 146 ellipsisreadprocessor,
145 147 ellipsiswriteprocessor,
146 148 ellipsisrawprocessor,
147 149 )
148 150
149 151
150 152 def _verify_revision(rl, skipflags, state, node):
151 153 """Verify the integrity of the given revlog ``node`` while providing a hook
152 154 point for extensions to influence the operation."""
153 155 if skipflags:
154 156 state[b'skipread'].add(node)
155 157 else:
156 158 # Side-effect: read content and verify hash.
157 159 rl.revision(node)
158 160
159 161
160 162 # True if a fast implementation for persistent-nodemap is available
161 163 #
162 164 # We also consider we have a "fast" implementation in "pure" python because
163 165 # people using pure don't really have performance consideration (and a
164 166 # wheelbarrow of other slowness source)
165 167 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 168 parsers, 'BaseIndexObject'
167 169 )
168 170
169 171
170 172 @interfaceutil.implementer(repository.irevisiondelta)
171 173 @attr.s(slots=True)
172 174 class revlogrevisiondelta(object):
173 175 node = attr.ib()
174 176 p1node = attr.ib()
175 177 p2node = attr.ib()
176 178 basenode = attr.ib()
177 179 flags = attr.ib()
178 180 baserevisionsize = attr.ib()
179 181 revision = attr.ib()
180 182 delta = attr.ib()
181 183 sidedata = attr.ib()
182 184 protocol_flags = attr.ib()
183 185 linknode = attr.ib(default=None)
184 186
185 187
186 188 @interfaceutil.implementer(repository.iverifyproblem)
187 189 @attr.s(frozen=True)
188 190 class revlogproblem(object):
189 191 warning = attr.ib(default=None)
190 192 error = attr.ib(default=None)
191 193 node = attr.ib(default=None)
192 194
193 195
194 196 def parse_index_v1(data, inline):
195 197 # call the C implementation to parse the index data
196 198 index, cache = parsers.parse_index2(data, inline)
197 199 return index, cache
198 200
199 201
200 202 def parse_index_v2(data, inline):
201 203 # call the C implementation to parse the index data
202 204 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 205 return index, cache
204 206
205 207
206 208 def parse_index_cl_v2(data, inline):
207 209 # call the C implementation to parse the index data
208 210 assert not inline
209 211 from .pure.parsers import parse_index_cl_v2
210 212
211 213 index, cache = parse_index_cl_v2(data)
212 214 return index, cache
213 215
214 216
215 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 218
217 219 def parse_index_v1_nodemap(data, inline):
218 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 221 return index, cache
220 222
221 223
222 224 else:
223 225 parse_index_v1_nodemap = None
224 226
225 227
226 228 def parse_index_v1_mixed(data, inline):
227 229 index, cache = parse_index_v1(data, inline)
228 230 return rustrevlog.MixedIndex(index), cache
229 231
230 232
231 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 234 # signed integer)
233 235 _maxentrysize = 0x7FFFFFFF
234 236
235 237 FILE_TOO_SHORT_MSG = _(
236 238 b'cannot read from revlog %s;'
237 239 b' expected %d bytes from offset %d, data size is %d'
238 240 )
239 241
240 242
241 243 class revlog(object):
242 244 """
243 245 the underlying revision storage object
244 246
245 247 A revlog consists of two parts, an index and the revision data.
246 248
247 249 The index is a file with a fixed record size containing
248 250 information on each revision, including its nodeid (hash), the
249 251 nodeids of its parents, the position and offset of its data within
250 252 the data file, and the revision it's based on. Finally, each entry
251 253 contains a linkrev entry that can serve as a pointer to external
252 254 data.
253 255
254 256 The revision data itself is a linear collection of data chunks.
255 257 Each chunk represents a revision and is usually represented as a
256 258 delta against the previous chunk. To bound lookup time, runs of
257 259 deltas are limited to about 2 times the length of the original
258 260 version data. This makes retrieval of a version proportional to
259 261 its size, or O(1) relative to the number of revisions.
260 262
261 263 Both pieces of the revlog are written to in an append-only
262 264 fashion, which means we never need to rewrite a file to insert or
263 265 remove data, and can use some simple techniques to avoid the need
264 266 for locking while reading.
265 267
266 268 If checkambig, indexfile is opened with checkambig=True at
267 269 writing, to avoid file stat ambiguity.
268 270
269 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 272 index will be mmapped rather than read if it is larger than the
271 273 configured threshold.
272 274
273 275 If censorable is True, the revlog can have censored revisions.
274 276
275 277 If `upperboundcomp` is not None, this is the expected maximal gain from
276 278 compression for the data content.
277 279
278 280 `concurrencychecker` is an optional function that receives 3 arguments: a
279 281 file handle, a filename, and an expected position. It should check whether
280 282 the current position in the file handle is valid, and log/warn/fail (by
281 283 raising).
282 284
283 285 See mercurial/revlogutils/contants.py for details about the content of an
284 286 index entry.
285 287 """
286 288
287 289 _flagserrorclass = error.RevlogError
288 290
289 291 def __init__(
290 292 self,
291 293 opener,
292 294 target,
293 295 radix,
294 296 postfix=None, # only exist for `tmpcensored` now
295 297 checkambig=False,
296 298 mmaplargeindex=False,
297 299 censorable=False,
298 300 upperboundcomp=None,
299 301 persistentnodemap=False,
300 302 concurrencychecker=None,
301 303 trypending=False,
302 304 ):
303 305 """
304 306 create a revlog object
305 307
306 308 opener is a function that abstracts the file opening operation
307 309 and can be used to implement COW semantics or the like.
308 310
309 311 `target`: a (KIND, ID) tuple that identify the content stored in
310 312 this revlog. It help the rest of the code to understand what the revlog
311 313 is about without having to resort to heuristic and index filename
312 314 analysis. Note: that this must be reliably be set by normal code, but
313 315 that test, debug, or performance measurement code might not set this to
314 316 accurate value.
315 317 """
316 318 self.upperboundcomp = upperboundcomp
317 319
318 320 self.radix = radix
319 321
320 322 self._docket_file = None
321 323 self._indexfile = None
322 324 self._datafile = None
323 325 self._sidedatafile = None
324 326 self._nodemap_file = None
325 327 self.postfix = postfix
326 328 self._trypending = trypending
327 329 self.opener = opener
328 330 if persistentnodemap:
329 331 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 332
331 333 assert target[0] in ALL_KINDS
332 334 assert len(target) == 2
333 335 self.target = target
334 336 # When True, indexfile is opened with checkambig=True at writing, to
335 337 # avoid file stat ambiguity.
336 338 self._checkambig = checkambig
337 339 self._mmaplargeindex = mmaplargeindex
338 340 self._censorable = censorable
339 341 # 3-tuple of (node, rev, text) for a raw revision.
340 342 self._revisioncache = None
341 343 # Maps rev to chain base rev.
342 344 self._chainbasecache = util.lrucachedict(100)
343 345 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 346 self._chunkcache = (0, b'')
345 347 # How much data to read and cache into the raw revlog data cache.
346 348 self._chunkcachesize = 65536
347 349 self._maxchainlen = None
348 350 self._deltabothparents = True
349 351 self.index = None
350 352 self._docket = None
351 353 self._nodemap_docket = None
352 354 # Mapping of partial identifiers to full nodes.
353 355 self._pcache = {}
354 356 # Mapping of revision integer to full node.
355 357 self._compengine = b'zlib'
356 358 self._compengineopts = {}
357 359 self._maxdeltachainspan = -1
358 360 self._withsparseread = False
359 361 self._sparserevlog = False
360 362 self.hassidedata = False
361 363 self._srdensitythreshold = 0.50
362 364 self._srmingapsize = 262144
363 365
364 366 # Make copy of flag processors so each revlog instance can support
365 367 # custom flags.
366 368 self._flagprocessors = dict(flagutil.flagprocessors)
367 369
368 370 # 3-tuple of file handles being used for active writing.
369 371 self._writinghandles = None
370 372 # prevent nesting of addgroup
371 373 self._adding_group = None
372 374
373 375 self._loadindex()
374 376
375 377 self._concurrencychecker = concurrencychecker
376 378
377 379 def _init_opts(self):
378 380 """process options (from above/config) to setup associated default revlog mode
379 381
380 382 These values might be affected when actually reading on disk information.
381 383
382 384 The relevant values are returned for use in _loadindex().
383 385
384 386 * newversionflags:
385 387 version header to use if we need to create a new revlog
386 388
387 389 * mmapindexthreshold:
388 390 minimal index size for start to use mmap
389 391
390 392 * force_nodemap:
391 393 force the usage of a "development" version of the nodemap code
392 394 """
393 395 mmapindexthreshold = None
394 396 opts = self.opener.options
395 397
396 398 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 399 new_header = CHANGELOGV2
398 400 elif b'revlogv2' in opts:
399 401 new_header = REVLOGV2
400 402 elif b'revlogv1' in opts:
401 403 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 404 if b'generaldelta' in opts:
403 405 new_header |= FLAG_GENERALDELTA
404 406 elif b'revlogv0' in self.opener.options:
405 407 new_header = REVLOGV0
406 408 else:
407 409 new_header = REVLOG_DEFAULT_VERSION
408 410
409 411 if b'chunkcachesize' in opts:
410 412 self._chunkcachesize = opts[b'chunkcachesize']
411 413 if b'maxchainlen' in opts:
412 414 self._maxchainlen = opts[b'maxchainlen']
413 415 if b'deltabothparents' in opts:
414 416 self._deltabothparents = opts[b'deltabothparents']
415 417 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 418 self._lazydeltabase = False
417 419 if self._lazydelta:
418 420 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 421 if b'compengine' in opts:
420 422 self._compengine = opts[b'compengine']
421 423 if b'zlib.level' in opts:
422 424 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 425 if b'zstd.level' in opts:
424 426 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 427 if b'maxdeltachainspan' in opts:
426 428 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 429 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 430 mmapindexthreshold = opts[b'mmapindexthreshold']
429 431 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 432 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 433 # sparse-revlog forces sparse-read
432 434 self._withsparseread = self._sparserevlog or withsparseread
433 435 if b'sparse-read-density-threshold' in opts:
434 436 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 437 if b'sparse-read-min-gap-size' in opts:
436 438 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 439 if opts.get(b'enableellipsis'):
438 440 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439 441
440 442 # revlog v0 doesn't have flag processors
441 443 for flag, processor in pycompat.iteritems(
442 444 opts.get(b'flagprocessors', {})
443 445 ):
444 446 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445 447
446 448 if self._chunkcachesize <= 0:
447 449 raise error.RevlogError(
448 450 _(b'revlog chunk cache size %r is not greater than 0')
449 451 % self._chunkcachesize
450 452 )
451 453 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 454 raise error.RevlogError(
453 455 _(b'revlog chunk cache size %r is not a power of 2')
454 456 % self._chunkcachesize
455 457 )
456 458 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 459 return new_header, mmapindexthreshold, force_nodemap
458 460
459 461 def _get_data(self, filepath, mmap_threshold, size=None):
460 462 """return a file content with or without mmap
461 463
462 464 If the file is missing return the empty string"""
463 465 try:
464 466 with self.opener(filepath) as fp:
465 467 if mmap_threshold is not None:
466 468 file_size = self.opener.fstat(fp).st_size
467 469 if file_size >= mmap_threshold:
468 470 if size is not None:
469 471 # avoid potentiel mmap crash
470 472 size = min(file_size, size)
471 473 # TODO: should .close() to release resources without
472 474 # relying on Python GC
473 475 if size is None:
474 476 return util.buffer(util.mmapread(fp))
475 477 else:
476 478 return util.buffer(util.mmapread(fp, size))
477 479 if size is None:
478 480 return fp.read()
479 481 else:
480 482 return fp.read(size)
481 483 except IOError as inst:
482 484 if inst.errno != errno.ENOENT:
483 485 raise
484 486 return b''
485 487
486 488 def _loadindex(self, docket=None):
487 489
488 490 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489 491
490 492 if self.postfix is not None:
491 493 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 494 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 495 entry_point = b'%s.i.a' % self.radix
494 496 else:
495 497 entry_point = b'%s.i' % self.radix
496 498
497 499 if docket is not None:
498 500 self._docket = docket
499 501 self._docket_file = entry_point
500 502 else:
501 503 entry_data = b''
502 504 self._initempty = True
503 505 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 506 if len(entry_data) > 0:
505 507 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 508 self._initempty = False
507 509 else:
508 510 header = new_header
509 511
510 512 self._format_flags = header & ~0xFFFF
511 513 self._format_version = header & 0xFFFF
512 514
513 515 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 516 if supported_flags is None:
515 517 msg = _(b'unknown version (%d) in revlog %s')
516 518 msg %= (self._format_version, self.display_id)
517 519 raise error.RevlogError(msg)
518 520 elif self._format_flags & ~supported_flags:
519 521 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 522 display_flag = self._format_flags >> 16
521 523 msg %= (display_flag, self._format_version, self.display_id)
522 524 raise error.RevlogError(msg)
523 525
524 526 features = FEATURES_BY_VERSION[self._format_version]
525 527 self._inline = features[b'inline'](self._format_flags)
526 528 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 529 self.hassidedata = features[b'sidedata']
528 530
529 531 if not features[b'docket']:
530 532 self._indexfile = entry_point
531 533 index_data = entry_data
532 534 else:
533 535 self._docket_file = entry_point
534 536 if self._initempty:
535 537 self._docket = docketutil.default_docket(self, header)
536 538 else:
537 539 self._docket = docketutil.parse_docket(
538 540 self, entry_data, use_pending=self._trypending
539 541 )
540 542
541 543 if self._docket is not None:
542 544 self._indexfile = self._docket.index_filepath()
543 545 index_data = b''
544 546 index_size = self._docket.index_end
545 547 if index_size > 0:
546 548 index_data = self._get_data(
547 549 self._indexfile, mmapindexthreshold, size=index_size
548 550 )
549 551 if len(index_data) < index_size:
550 552 msg = _(b'too few index data for %s: got %d, expected %d')
551 553 msg %= (self.display_id, len(index_data), index_size)
552 554 raise error.RevlogError(msg)
553 555
554 556 self._inline = False
555 557 # generaldelta implied by version 2 revlogs.
556 558 self._generaldelta = True
557 559 # the logic for persistent nodemap will be dealt with within the
558 560 # main docket, so disable it for now.
559 561 self._nodemap_file = None
560 562
561 563 if self._docket is not None:
562 564 self._datafile = self._docket.data_filepath()
563 565 self._sidedatafile = self._docket.sidedata_filepath()
564 566 elif self.postfix is None:
565 567 self._datafile = b'%s.d' % self.radix
566 568 else:
567 569 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568 570
569 571 self.nodeconstants = sha1nodeconstants
570 572 self.nullid = self.nodeconstants.nullid
571 573
572 574 # sparse-revlog can't be on without general-delta (issue6056)
573 575 if not self._generaldelta:
574 576 self._sparserevlog = False
575 577
576 578 self._storedeltachains = True
577 579
578 580 devel_nodemap = (
579 581 self._nodemap_file
580 582 and force_nodemap
581 583 and parse_index_v1_nodemap is not None
582 584 )
583 585
584 586 use_rust_index = False
585 587 if rustrevlog is not None:
586 588 if self._nodemap_file is not None:
587 589 use_rust_index = True
588 590 else:
589 591 use_rust_index = self.opener.options.get(b'rust.index')
590 592
591 593 self._parse_index = parse_index_v1
592 594 if self._format_version == REVLOGV0:
593 595 self._parse_index = revlogv0.parse_index_v0
594 596 elif self._format_version == REVLOGV2:
595 597 self._parse_index = parse_index_v2
596 598 elif self._format_version == CHANGELOGV2:
597 599 self._parse_index = parse_index_cl_v2
598 600 elif devel_nodemap:
599 601 self._parse_index = parse_index_v1_nodemap
600 602 elif use_rust_index:
601 603 self._parse_index = parse_index_v1_mixed
602 604 try:
603 605 d = self._parse_index(index_data, self._inline)
604 606 index, chunkcache = d
605 607 use_nodemap = (
606 608 not self._inline
607 609 and self._nodemap_file is not None
608 610 and util.safehasattr(index, 'update_nodemap_data')
609 611 )
610 612 if use_nodemap:
611 613 nodemap_data = nodemaputil.persisted_data(self)
612 614 if nodemap_data is not None:
613 615 docket = nodemap_data[0]
614 616 if (
615 617 len(d[0]) > docket.tip_rev
616 618 and d[0][docket.tip_rev][7] == docket.tip_node
617 619 ):
618 620 # no changelog tampering
619 621 self._nodemap_docket = docket
620 622 index.update_nodemap_data(*nodemap_data)
621 623 except (ValueError, IndexError):
622 624 raise error.RevlogError(
623 625 _(b"index %s is corrupted") % self.display_id
624 626 )
625 627 self.index = index
626 628 self._segmentfile = randomaccessfile.randomaccessfile(
627 629 self.opener,
628 630 (self._indexfile if self._inline else self._datafile),
629 631 self._chunkcachesize,
630 632 chunkcache,
631 633 )
632 634 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 635 self.opener,
634 636 self._sidedatafile,
635 637 self._chunkcachesize,
636 638 )
637 639 # revnum -> (chain-length, sum-delta-length)
638 640 self._chaininfocache = util.lrucachedict(500)
639 641 # revlog header -> revlog compressor
640 642 self._decompressors = {}
641 643
642 644 @util.propertycache
643 645 def revlog_kind(self):
644 646 return self.target[0]
645 647
646 648 @util.propertycache
647 649 def display_id(self):
648 650 """The public facing "ID" of the revlog that we use in message"""
649 651 # Maybe we should build a user facing representation of
650 652 # revlog.target instead of using `self.radix`
651 653 return self.radix
652 654
653 655 def _get_decompressor(self, t):
654 656 try:
655 657 compressor = self._decompressors[t]
656 658 except KeyError:
657 659 try:
658 660 engine = util.compengines.forrevlogheader(t)
659 661 compressor = engine.revlogcompressor(self._compengineopts)
660 662 self._decompressors[t] = compressor
661 663 except KeyError:
662 664 raise error.RevlogError(
663 665 _(b'unknown compression type %s') % binascii.hexlify(t)
664 666 )
665 667 return compressor
666 668
667 669 @util.propertycache
668 670 def _compressor(self):
669 671 engine = util.compengines[self._compengine]
670 672 return engine.revlogcompressor(self._compengineopts)
671 673
672 674 @util.propertycache
673 675 def _decompressor(self):
674 676 """the default decompressor"""
675 677 if self._docket is None:
676 678 return None
677 679 t = self._docket.default_compression_header
678 680 c = self._get_decompressor(t)
679 681 return c.decompress
680 682
681 683 def _indexfp(self):
682 684 """file object for the revlog's index file"""
683 685 return self.opener(self._indexfile, mode=b"r")
684 686
685 687 def __index_write_fp(self):
686 688 # You should not use this directly and use `_writing` instead
687 689 try:
688 690 f = self.opener(
689 691 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 692 )
691 693 if self._docket is None:
692 694 f.seek(0, os.SEEK_END)
693 695 else:
694 696 f.seek(self._docket.index_end, os.SEEK_SET)
695 697 return f
696 698 except IOError as inst:
697 699 if inst.errno != errno.ENOENT:
698 700 raise
699 701 return self.opener(
700 702 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 703 )
702 704
703 705 def __index_new_fp(self):
704 706 # You should not use this unless you are upgrading from inline revlog
705 707 return self.opener(
706 708 self._indexfile,
707 709 mode=b"w",
708 710 checkambig=self._checkambig,
709 711 atomictemp=True,
710 712 )
711 713
712 714 def _datafp(self, mode=b'r'):
713 715 """file object for the revlog's data file"""
714 716 return self.opener(self._datafile, mode=mode)
715 717
716 718 @contextlib.contextmanager
717 719 def _sidedatareadfp(self):
718 720 """file object suitable to read sidedata"""
719 721 if self._writinghandles:
720 722 yield self._writinghandles[2]
721 723 else:
722 724 with self.opener(self._sidedatafile) as fp:
723 725 yield fp
724 726
725 727 def tiprev(self):
726 728 return len(self.index) - 1
727 729
728 730 def tip(self):
729 731 return self.node(self.tiprev())
730 732
731 733 def __contains__(self, rev):
732 734 return 0 <= rev < len(self)
733 735
734 736 def __len__(self):
735 737 return len(self.index)
736 738
737 739 def __iter__(self):
738 740 return iter(pycompat.xrange(len(self)))
739 741
740 742 def revs(self, start=0, stop=None):
741 743 """iterate over all rev in this revlog (from start to stop)"""
742 744 return storageutil.iterrevs(len(self), start=start, stop=stop)
743 745
744 746 def hasnode(self, node):
745 747 try:
746 748 self.rev(node)
747 749 return True
748 750 except KeyError:
749 751 return False
750 752
751 753 def candelta(self, baserev, rev):
752 754 """whether two revisions (baserev, rev) can be delta-ed or not"""
753 755 # Disable delta if either rev requires a content-changing flag
754 756 # processor (ex. LFS). This is because such flag processor can alter
755 757 # the rawtext content that the delta will be based on, and two clients
756 758 # could have a same revlog node with different flags (i.e. different
757 759 # rawtext contents) and the delta could be incompatible.
758 760 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
759 761 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
760 762 ):
761 763 return False
762 764 return True
763 765
764 766 def update_caches(self, transaction):
765 767 if self._nodemap_file is not None:
766 768 if transaction is None:
767 769 nodemaputil.update_persistent_nodemap(self)
768 770 else:
769 771 nodemaputil.setup_persistent_nodemap(transaction, self)
770 772
771 773 def clearcaches(self):
772 774 self._revisioncache = None
773 775 self._chainbasecache.clear()
774 776 self._segmentfile.clear_cache()
775 777 self._segmentfile_sidedata.clear_cache()
776 778 self._pcache = {}
777 779 self._nodemap_docket = None
778 780 self.index.clearcaches()
779 781 # The python code is the one responsible for validating the docket, we
780 782 # end up having to refresh it here.
781 783 use_nodemap = (
782 784 not self._inline
783 785 and self._nodemap_file is not None
784 786 and util.safehasattr(self.index, 'update_nodemap_data')
785 787 )
786 788 if use_nodemap:
787 789 nodemap_data = nodemaputil.persisted_data(self)
788 790 if nodemap_data is not None:
789 791 self._nodemap_docket = nodemap_data[0]
790 792 self.index.update_nodemap_data(*nodemap_data)
791 793
792 794 def rev(self, node):
793 795 try:
794 796 return self.index.rev(node)
795 797 except TypeError:
796 798 raise
797 799 except error.RevlogError:
798 800 # parsers.c radix tree lookup failed
799 801 if (
800 802 node == self.nodeconstants.wdirid
801 803 or node in self.nodeconstants.wdirfilenodeids
802 804 ):
803 805 raise error.WdirUnsupported
804 806 raise error.LookupError(node, self.display_id, _(b'no node'))
805 807
806 808 # Accessors for index entries.
807 809
808 810 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
809 811 # are flags.
810 812 def start(self, rev):
811 813 return int(self.index[rev][0] >> 16)
812 814
813 815 def sidedata_cut_off(self, rev):
814 816 sd_cut_off = self.index[rev][8]
815 817 if sd_cut_off != 0:
816 818 return sd_cut_off
817 819 # This is some annoying dance, because entries without sidedata
818 820 # currently use 0 as their ofsset. (instead of previous-offset +
819 821 # previous-size)
820 822 #
821 823 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
822 824 # In the meantime, we need this.
823 825 while 0 <= rev:
824 826 e = self.index[rev]
825 827 if e[9] != 0:
826 828 return e[8] + e[9]
827 829 rev -= 1
828 830 return 0
829 831
830 832 def flags(self, rev):
831 833 return self.index[rev][0] & 0xFFFF
832 834
833 835 def length(self, rev):
834 836 return self.index[rev][1]
835 837
836 838 def sidedata_length(self, rev):
837 839 if not self.hassidedata:
838 840 return 0
839 841 return self.index[rev][9]
840 842
841 843 def rawsize(self, rev):
842 844 """return the length of the uncompressed text for a given revision"""
843 845 l = self.index[rev][2]
844 846 if l >= 0:
845 847 return l
846 848
847 849 t = self.rawdata(rev)
848 850 return len(t)
849 851
850 852 def size(self, rev):
851 853 """length of non-raw text (processed by a "read" flag processor)"""
852 854 # fast path: if no "read" flag processor could change the content,
853 855 # size is rawsize. note: ELLIPSIS is known to not change the content.
854 856 flags = self.flags(rev)
855 857 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
856 858 return self.rawsize(rev)
857 859
858 860 return len(self.revision(rev))
859 861
862 def fast_rank(self, rev):
863 """Return the rank of a revision if already known, or None otherwise.
864
865 The rank of a revision is the size of the sub-graph it defines as a
866 head. Equivalently, the rank of a revision `r` is the size of the set
867 `ancestors(r)`, `r` included.
868
869 This method returns the rank retrieved from the revlog in constant
870 time. It makes no attempt at computing unknown values for versions of
871 the revlog which do not persist the rank.
872 """
873 rank = self.index[rev][ENTRY_RANK]
874 if rank == RANK_UNKNOWN:
875 return None
876 return rank
877
860 878 def chainbase(self, rev):
861 879 base = self._chainbasecache.get(rev)
862 880 if base is not None:
863 881 return base
864 882
865 883 index = self.index
866 884 iterrev = rev
867 885 base = index[iterrev][3]
868 886 while base != iterrev:
869 887 iterrev = base
870 888 base = index[iterrev][3]
871 889
872 890 self._chainbasecache[rev] = base
873 891 return base
874 892
875 893 def linkrev(self, rev):
876 894 return self.index[rev][4]
877 895
878 896 def parentrevs(self, rev):
879 897 try:
880 898 entry = self.index[rev]
881 899 except IndexError:
882 900 if rev == wdirrev:
883 901 raise error.WdirUnsupported
884 902 raise
885 903
886 904 return entry[5], entry[6]
887 905
888 906 # fast parentrevs(rev) where rev isn't filtered
889 907 _uncheckedparentrevs = parentrevs
890 908
891 909 def node(self, rev):
892 910 try:
893 911 return self.index[rev][7]
894 912 except IndexError:
895 913 if rev == wdirrev:
896 914 raise error.WdirUnsupported
897 915 raise
898 916
899 917 # Derived from index values.
900 918
901 919 def end(self, rev):
902 920 return self.start(rev) + self.length(rev)
903 921
904 922 def parents(self, node):
905 923 i = self.index
906 924 d = i[self.rev(node)]
907 925 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
908 926
909 927 def chainlen(self, rev):
910 928 return self._chaininfo(rev)[0]
911 929
912 930 def _chaininfo(self, rev):
913 931 chaininfocache = self._chaininfocache
914 932 if rev in chaininfocache:
915 933 return chaininfocache[rev]
916 934 index = self.index
917 935 generaldelta = self._generaldelta
918 936 iterrev = rev
919 937 e = index[iterrev]
920 938 clen = 0
921 939 compresseddeltalen = 0
922 940 while iterrev != e[3]:
923 941 clen += 1
924 942 compresseddeltalen += e[1]
925 943 if generaldelta:
926 944 iterrev = e[3]
927 945 else:
928 946 iterrev -= 1
929 947 if iterrev in chaininfocache:
930 948 t = chaininfocache[iterrev]
931 949 clen += t[0]
932 950 compresseddeltalen += t[1]
933 951 break
934 952 e = index[iterrev]
935 953 else:
936 954 # Add text length of base since decompressing that also takes
937 955 # work. For cache hits the length is already included.
938 956 compresseddeltalen += e[1]
939 957 r = (clen, compresseddeltalen)
940 958 chaininfocache[rev] = r
941 959 return r
942 960
943 961 def _deltachain(self, rev, stoprev=None):
944 962 """Obtain the delta chain for a revision.
945 963
946 964 ``stoprev`` specifies a revision to stop at. If not specified, we
947 965 stop at the base of the chain.
948 966
949 967 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
950 968 revs in ascending order and ``stopped`` is a bool indicating whether
951 969 ``stoprev`` was hit.
952 970 """
953 971 # Try C implementation.
954 972 try:
955 973 return self.index.deltachain(rev, stoprev, self._generaldelta)
956 974 except AttributeError:
957 975 pass
958 976
959 977 chain = []
960 978
961 979 # Alias to prevent attribute lookup in tight loop.
962 980 index = self.index
963 981 generaldelta = self._generaldelta
964 982
965 983 iterrev = rev
966 984 e = index[iterrev]
967 985 while iterrev != e[3] and iterrev != stoprev:
968 986 chain.append(iterrev)
969 987 if generaldelta:
970 988 iterrev = e[3]
971 989 else:
972 990 iterrev -= 1
973 991 e = index[iterrev]
974 992
975 993 if iterrev == stoprev:
976 994 stopped = True
977 995 else:
978 996 chain.append(iterrev)
979 997 stopped = False
980 998
981 999 chain.reverse()
982 1000 return chain, stopped
983 1001
984 1002 def ancestors(self, revs, stoprev=0, inclusive=False):
985 1003 """Generate the ancestors of 'revs' in reverse revision order.
986 1004 Does not generate revs lower than stoprev.
987 1005
988 1006 See the documentation for ancestor.lazyancestors for more details."""
989 1007
990 1008 # first, make sure start revisions aren't filtered
991 1009 revs = list(revs)
992 1010 checkrev = self.node
993 1011 for r in revs:
994 1012 checkrev(r)
995 1013 # and we're sure ancestors aren't filtered as well
996 1014
997 1015 if rustancestor is not None and self.index.rust_ext_compat:
998 1016 lazyancestors = rustancestor.LazyAncestors
999 1017 arg = self.index
1000 1018 else:
1001 1019 lazyancestors = ancestor.lazyancestors
1002 1020 arg = self._uncheckedparentrevs
1003 1021 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1004 1022
1005 1023 def descendants(self, revs):
1006 1024 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1007 1025
1008 1026 def findcommonmissing(self, common=None, heads=None):
1009 1027 """Return a tuple of the ancestors of common and the ancestors of heads
1010 1028 that are not ancestors of common. In revset terminology, we return the
1011 1029 tuple:
1012 1030
1013 1031 ::common, (::heads) - (::common)
1014 1032
1015 1033 The list is sorted by revision number, meaning it is
1016 1034 topologically sorted.
1017 1035
1018 1036 'heads' and 'common' are both lists of node IDs. If heads is
1019 1037 not supplied, uses all of the revlog's heads. If common is not
1020 1038 supplied, uses nullid."""
1021 1039 if common is None:
1022 1040 common = [self.nullid]
1023 1041 if heads is None:
1024 1042 heads = self.heads()
1025 1043
1026 1044 common = [self.rev(n) for n in common]
1027 1045 heads = [self.rev(n) for n in heads]
1028 1046
1029 1047 # we want the ancestors, but inclusive
1030 1048 class lazyset(object):
1031 1049 def __init__(self, lazyvalues):
1032 1050 self.addedvalues = set()
1033 1051 self.lazyvalues = lazyvalues
1034 1052
1035 1053 def __contains__(self, value):
1036 1054 return value in self.addedvalues or value in self.lazyvalues
1037 1055
1038 1056 def __iter__(self):
1039 1057 added = self.addedvalues
1040 1058 for r in added:
1041 1059 yield r
1042 1060 for r in self.lazyvalues:
1043 1061 if not r in added:
1044 1062 yield r
1045 1063
1046 1064 def add(self, value):
1047 1065 self.addedvalues.add(value)
1048 1066
1049 1067 def update(self, values):
1050 1068 self.addedvalues.update(values)
1051 1069
1052 1070 has = lazyset(self.ancestors(common))
1053 1071 has.add(nullrev)
1054 1072 has.update(common)
1055 1073
1056 1074 # take all ancestors from heads that aren't in has
1057 1075 missing = set()
1058 1076 visit = collections.deque(r for r in heads if r not in has)
1059 1077 while visit:
1060 1078 r = visit.popleft()
1061 1079 if r in missing:
1062 1080 continue
1063 1081 else:
1064 1082 missing.add(r)
1065 1083 for p in self.parentrevs(r):
1066 1084 if p not in has:
1067 1085 visit.append(p)
1068 1086 missing = list(missing)
1069 1087 missing.sort()
1070 1088 return has, [self.node(miss) for miss in missing]
1071 1089
1072 1090 def incrementalmissingrevs(self, common=None):
1073 1091 """Return an object that can be used to incrementally compute the
1074 1092 revision numbers of the ancestors of arbitrary sets that are not
1075 1093 ancestors of common. This is an ancestor.incrementalmissingancestors
1076 1094 object.
1077 1095
1078 1096 'common' is a list of revision numbers. If common is not supplied, uses
1079 1097 nullrev.
1080 1098 """
1081 1099 if common is None:
1082 1100 common = [nullrev]
1083 1101
1084 1102 if rustancestor is not None and self.index.rust_ext_compat:
1085 1103 return rustancestor.MissingAncestors(self.index, common)
1086 1104 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1087 1105
1088 1106 def findmissingrevs(self, common=None, heads=None):
1089 1107 """Return the revision numbers of the ancestors of heads that
1090 1108 are not ancestors of common.
1091 1109
1092 1110 More specifically, return a list of revision numbers corresponding to
1093 1111 nodes N such that every N satisfies the following constraints:
1094 1112
1095 1113 1. N is an ancestor of some node in 'heads'
1096 1114 2. N is not an ancestor of any node in 'common'
1097 1115
1098 1116 The list is sorted by revision number, meaning it is
1099 1117 topologically sorted.
1100 1118
1101 1119 'heads' and 'common' are both lists of revision numbers. If heads is
1102 1120 not supplied, uses all of the revlog's heads. If common is not
1103 1121 supplied, uses nullid."""
1104 1122 if common is None:
1105 1123 common = [nullrev]
1106 1124 if heads is None:
1107 1125 heads = self.headrevs()
1108 1126
1109 1127 inc = self.incrementalmissingrevs(common=common)
1110 1128 return inc.missingancestors(heads)
1111 1129
1112 1130 def findmissing(self, common=None, heads=None):
1113 1131 """Return the ancestors of heads that are not ancestors of common.
1114 1132
1115 1133 More specifically, return a list of nodes N such that every N
1116 1134 satisfies the following constraints:
1117 1135
1118 1136 1. N is an ancestor of some node in 'heads'
1119 1137 2. N is not an ancestor of any node in 'common'
1120 1138
1121 1139 The list is sorted by revision number, meaning it is
1122 1140 topologically sorted.
1123 1141
1124 1142 'heads' and 'common' are both lists of node IDs. If heads is
1125 1143 not supplied, uses all of the revlog's heads. If common is not
1126 1144 supplied, uses nullid."""
1127 1145 if common is None:
1128 1146 common = [self.nullid]
1129 1147 if heads is None:
1130 1148 heads = self.heads()
1131 1149
1132 1150 common = [self.rev(n) for n in common]
1133 1151 heads = [self.rev(n) for n in heads]
1134 1152
1135 1153 inc = self.incrementalmissingrevs(common=common)
1136 1154 return [self.node(r) for r in inc.missingancestors(heads)]
1137 1155
1138 1156 def nodesbetween(self, roots=None, heads=None):
1139 1157 """Return a topological path from 'roots' to 'heads'.
1140 1158
1141 1159 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1142 1160 topologically sorted list of all nodes N that satisfy both of
1143 1161 these constraints:
1144 1162
1145 1163 1. N is a descendant of some node in 'roots'
1146 1164 2. N is an ancestor of some node in 'heads'
1147 1165
1148 1166 Every node is considered to be both a descendant and an ancestor
1149 1167 of itself, so every reachable node in 'roots' and 'heads' will be
1150 1168 included in 'nodes'.
1151 1169
1152 1170 'outroots' is the list of reachable nodes in 'roots', i.e., the
1153 1171 subset of 'roots' that is returned in 'nodes'. Likewise,
1154 1172 'outheads' is the subset of 'heads' that is also in 'nodes'.
1155 1173
1156 1174 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1157 1175 unspecified, uses nullid as the only root. If 'heads' is
1158 1176 unspecified, uses list of all of the revlog's heads."""
1159 1177 nonodes = ([], [], [])
1160 1178 if roots is not None:
1161 1179 roots = list(roots)
1162 1180 if not roots:
1163 1181 return nonodes
1164 1182 lowestrev = min([self.rev(n) for n in roots])
1165 1183 else:
1166 1184 roots = [self.nullid] # Everybody's a descendant of nullid
1167 1185 lowestrev = nullrev
1168 1186 if (lowestrev == nullrev) and (heads is None):
1169 1187 # We want _all_ the nodes!
1170 1188 return (
1171 1189 [self.node(r) for r in self],
1172 1190 [self.nullid],
1173 1191 list(self.heads()),
1174 1192 )
1175 1193 if heads is None:
1176 1194 # All nodes are ancestors, so the latest ancestor is the last
1177 1195 # node.
1178 1196 highestrev = len(self) - 1
1179 1197 # Set ancestors to None to signal that every node is an ancestor.
1180 1198 ancestors = None
1181 1199 # Set heads to an empty dictionary for later discovery of heads
1182 1200 heads = {}
1183 1201 else:
1184 1202 heads = list(heads)
1185 1203 if not heads:
1186 1204 return nonodes
1187 1205 ancestors = set()
1188 1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1189 1207 # Also, later we will be using it to filter out the heads we can't
1190 1208 # find from roots.
1191 1209 heads = dict.fromkeys(heads, False)
1192 1210 # Start at the top and keep marking parents until we're done.
1193 1211 nodestotag = set(heads)
1194 1212 # Remember where the top was so we can use it as a limit later.
1195 1213 highestrev = max([self.rev(n) for n in nodestotag])
1196 1214 while nodestotag:
1197 1215 # grab a node to tag
1198 1216 n = nodestotag.pop()
1199 1217 # Never tag nullid
1200 1218 if n == self.nullid:
1201 1219 continue
1202 1220 # A node's revision number represents its place in a
1203 1221 # topologically sorted list of nodes.
1204 1222 r = self.rev(n)
1205 1223 if r >= lowestrev:
1206 1224 if n not in ancestors:
1207 1225 # If we are possibly a descendant of one of the roots
1208 1226 # and we haven't already been marked as an ancestor
1209 1227 ancestors.add(n) # Mark as ancestor
1210 1228 # Add non-nullid parents to list of nodes to tag.
1211 1229 nodestotag.update(
1212 1230 [p for p in self.parents(n) if p != self.nullid]
1213 1231 )
1214 1232 elif n in heads: # We've seen it before, is it a fake head?
1215 1233 # So it is, real heads should not be the ancestors of
1216 1234 # any other heads.
1217 1235 heads.pop(n)
1218 1236 if not ancestors:
1219 1237 return nonodes
1220 1238 # Now that we have our set of ancestors, we want to remove any
1221 1239 # roots that are not ancestors.
1222 1240
1223 1241 # If one of the roots was nullid, everything is included anyway.
1224 1242 if lowestrev > nullrev:
1225 1243 # But, since we weren't, let's recompute the lowest rev to not
1226 1244 # include roots that aren't ancestors.
1227 1245
1228 1246 # Filter out roots that aren't ancestors of heads
1229 1247 roots = [root for root in roots if root in ancestors]
1230 1248 # Recompute the lowest revision
1231 1249 if roots:
1232 1250 lowestrev = min([self.rev(root) for root in roots])
1233 1251 else:
1234 1252 # No more roots? Return empty list
1235 1253 return nonodes
1236 1254 else:
1237 1255 # We are descending from nullid, and don't need to care about
1238 1256 # any other roots.
1239 1257 lowestrev = nullrev
1240 1258 roots = [self.nullid]
1241 1259 # Transform our roots list into a set.
1242 1260 descendants = set(roots)
1243 1261 # Also, keep the original roots so we can filter out roots that aren't
1244 1262 # 'real' roots (i.e. are descended from other roots).
1245 1263 roots = descendants.copy()
1246 1264 # Our topologically sorted list of output nodes.
1247 1265 orderedout = []
1248 1266 # Don't start at nullid since we don't want nullid in our output list,
1249 1267 # and if nullid shows up in descendants, empty parents will look like
1250 1268 # they're descendants.
1251 1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 1270 n = self.node(r)
1253 1271 isdescendant = False
1254 1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 1273 isdescendant = True
1256 1274 elif n in descendants:
1257 1275 # n is already a descendant
1258 1276 isdescendant = True
1259 1277 # This check only needs to be done here because all the roots
1260 1278 # will start being marked is descendants before the loop.
1261 1279 if n in roots:
1262 1280 # If n was a root, check if it's a 'real' root.
1263 1281 p = tuple(self.parents(n))
1264 1282 # If any of its parents are descendants, it's not a root.
1265 1283 if (p[0] in descendants) or (p[1] in descendants):
1266 1284 roots.remove(n)
1267 1285 else:
1268 1286 p = tuple(self.parents(n))
1269 1287 # A node is a descendant if either of its parents are
1270 1288 # descendants. (We seeded the dependents list with the roots
1271 1289 # up there, remember?)
1272 1290 if (p[0] in descendants) or (p[1] in descendants):
1273 1291 descendants.add(n)
1274 1292 isdescendant = True
1275 1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 1294 # Only include nodes that are both descendants and ancestors.
1277 1295 orderedout.append(n)
1278 1296 if (ancestors is not None) and (n in heads):
1279 1297 # We're trying to figure out which heads are reachable
1280 1298 # from roots.
1281 1299 # Mark this head as having been reached
1282 1300 heads[n] = True
1283 1301 elif ancestors is None:
1284 1302 # Otherwise, we're trying to discover the heads.
1285 1303 # Assume this is a head because if it isn't, the next step
1286 1304 # will eventually remove it.
1287 1305 heads[n] = True
1288 1306 # But, obviously its parents aren't.
1289 1307 for p in self.parents(n):
1290 1308 heads.pop(p, None)
1291 1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1292 1310 roots = list(roots)
1293 1311 assert orderedout
1294 1312 assert roots
1295 1313 assert heads
1296 1314 return (orderedout, roots, heads)
1297 1315
1298 1316 def headrevs(self, revs=None):
1299 1317 if revs is None:
1300 1318 try:
1301 1319 return self.index.headrevs()
1302 1320 except AttributeError:
1303 1321 return self._headrevs()
1304 1322 if rustdagop is not None and self.index.rust_ext_compat:
1305 1323 return rustdagop.headrevs(self.index, revs)
1306 1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1307 1325
1308 1326 def computephases(self, roots):
1309 1327 return self.index.computephasesmapsets(roots)
1310 1328
1311 1329 def _headrevs(self):
1312 1330 count = len(self)
1313 1331 if not count:
1314 1332 return [nullrev]
1315 1333 # we won't iter over filtered rev so nobody is a head at start
1316 1334 ishead = [0] * (count + 1)
1317 1335 index = self.index
1318 1336 for r in self:
1319 1337 ishead[r] = 1 # I may be an head
1320 1338 e = index[r]
1321 1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1322 1340 return [r for r, val in enumerate(ishead) if val]
1323 1341
1324 1342 def heads(self, start=None, stop=None):
1325 1343 """return the list of all nodes that have no children
1326 1344
1327 1345 if start is specified, only heads that are descendants of
1328 1346 start will be returned
1329 1347 if stop is specified, it will consider all the revs from stop
1330 1348 as if they had no children
1331 1349 """
1332 1350 if start is None and stop is None:
1333 1351 if not len(self):
1334 1352 return [self.nullid]
1335 1353 return [self.node(r) for r in self.headrevs()]
1336 1354
1337 1355 if start is None:
1338 1356 start = nullrev
1339 1357 else:
1340 1358 start = self.rev(start)
1341 1359
1342 1360 stoprevs = {self.rev(n) for n in stop or []}
1343 1361
1344 1362 revs = dagop.headrevssubset(
1345 1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1346 1364 )
1347 1365
1348 1366 return [self.node(rev) for rev in revs]
1349 1367
1350 1368 def children(self, node):
1351 1369 """find the children of a given node"""
1352 1370 c = []
1353 1371 p = self.rev(node)
1354 1372 for r in self.revs(start=p + 1):
1355 1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1356 1374 if prevs:
1357 1375 for pr in prevs:
1358 1376 if pr == p:
1359 1377 c.append(self.node(r))
1360 1378 elif p == nullrev:
1361 1379 c.append(self.node(r))
1362 1380 return c
1363 1381
1364 1382 def commonancestorsheads(self, a, b):
1365 1383 """calculate all the heads of the common ancestors of nodes a and b"""
1366 1384 a, b = self.rev(a), self.rev(b)
1367 1385 ancs = self._commonancestorsheads(a, b)
1368 1386 return pycompat.maplist(self.node, ancs)
1369 1387
1370 1388 def _commonancestorsheads(self, *revs):
1371 1389 """calculate all the heads of the common ancestors of revs"""
1372 1390 try:
1373 1391 ancs = self.index.commonancestorsheads(*revs)
1374 1392 except (AttributeError, OverflowError): # C implementation failed
1375 1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1376 1394 return ancs
1377 1395
1378 1396 def isancestor(self, a, b):
1379 1397 """return True if node a is an ancestor of node b
1380 1398
1381 1399 A revision is considered an ancestor of itself."""
1382 1400 a, b = self.rev(a), self.rev(b)
1383 1401 return self.isancestorrev(a, b)
1384 1402
1385 1403 def isancestorrev(self, a, b):
1386 1404 """return True if revision a is an ancestor of revision b
1387 1405
1388 1406 A revision is considered an ancestor of itself.
1389 1407
1390 1408 The implementation of this is trivial but the use of
1391 1409 reachableroots is not."""
1392 1410 if a == nullrev:
1393 1411 return True
1394 1412 elif a == b:
1395 1413 return True
1396 1414 elif a > b:
1397 1415 return False
1398 1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1399 1417
1400 1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1401 1419 """return (heads(::(<roots> and <roots>::<heads>)))
1402 1420
1403 1421 If includepath is True, return (<roots>::<heads>)."""
1404 1422 try:
1405 1423 return self.index.reachableroots2(
1406 1424 minroot, heads, roots, includepath
1407 1425 )
1408 1426 except AttributeError:
1409 1427 return dagop._reachablerootspure(
1410 1428 self.parentrevs, minroot, roots, heads, includepath
1411 1429 )
1412 1430
1413 1431 def ancestor(self, a, b):
1414 1432 """calculate the "best" common ancestor of nodes a and b"""
1415 1433
1416 1434 a, b = self.rev(a), self.rev(b)
1417 1435 try:
1418 1436 ancs = self.index.ancestors(a, b)
1419 1437 except (AttributeError, OverflowError):
1420 1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1421 1439 if ancs:
1422 1440 # choose a consistent winner when there's a tie
1423 1441 return min(map(self.node, ancs))
1424 1442 return self.nullid
1425 1443
1426 1444 def _match(self, id):
1427 1445 if isinstance(id, int):
1428 1446 # rev
1429 1447 return self.node(id)
1430 1448 if len(id) == self.nodeconstants.nodelen:
1431 1449 # possibly a binary node
1432 1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1433 1451 try:
1434 1452 node = id
1435 1453 self.rev(node) # quick search the index
1436 1454 return node
1437 1455 except error.LookupError:
1438 1456 pass # may be partial hex id
1439 1457 try:
1440 1458 # str(rev)
1441 1459 rev = int(id)
1442 1460 if b"%d" % rev != id:
1443 1461 raise ValueError
1444 1462 if rev < 0:
1445 1463 rev = len(self) + rev
1446 1464 if rev < 0 or rev >= len(self):
1447 1465 raise ValueError
1448 1466 return self.node(rev)
1449 1467 except (ValueError, OverflowError):
1450 1468 pass
1451 1469 if len(id) == 2 * self.nodeconstants.nodelen:
1452 1470 try:
1453 1471 # a full hex nodeid?
1454 1472 node = bin(id)
1455 1473 self.rev(node)
1456 1474 return node
1457 1475 except (TypeError, error.LookupError):
1458 1476 pass
1459 1477
1460 1478 def _partialmatch(self, id):
1461 1479 # we don't care wdirfilenodeids as they should be always full hash
1462 1480 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1463 1481 ambiguous = False
1464 1482 try:
1465 1483 partial = self.index.partialmatch(id)
1466 1484 if partial and self.hasnode(partial):
1467 1485 if maybewdir:
1468 1486 # single 'ff...' match in radix tree, ambiguous with wdir
1469 1487 ambiguous = True
1470 1488 else:
1471 1489 return partial
1472 1490 elif maybewdir:
1473 1491 # no 'ff...' match in radix tree, wdir identified
1474 1492 raise error.WdirUnsupported
1475 1493 else:
1476 1494 return None
1477 1495 except error.RevlogError:
1478 1496 # parsers.c radix tree lookup gave multiple matches
1479 1497 # fast path: for unfiltered changelog, radix tree is accurate
1480 1498 if not getattr(self, 'filteredrevs', None):
1481 1499 ambiguous = True
1482 1500 # fall through to slow path that filters hidden revisions
1483 1501 except (AttributeError, ValueError):
1484 1502 # we are pure python, or key was too short to search radix tree
1485 1503 pass
1486 1504 if ambiguous:
1487 1505 raise error.AmbiguousPrefixLookupError(
1488 1506 id, self.display_id, _(b'ambiguous identifier')
1489 1507 )
1490 1508
1491 1509 if id in self._pcache:
1492 1510 return self._pcache[id]
1493 1511
1494 1512 if len(id) <= 40:
1495 1513 try:
1496 1514 # hex(node)[:...]
1497 1515 l = len(id) // 2 # grab an even number of digits
1498 1516 prefix = bin(id[: l * 2])
1499 1517 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1500 1518 nl = [
1501 1519 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1502 1520 ]
1503 1521 if self.nodeconstants.nullhex.startswith(id):
1504 1522 nl.append(self.nullid)
1505 1523 if len(nl) > 0:
1506 1524 if len(nl) == 1 and not maybewdir:
1507 1525 self._pcache[id] = nl[0]
1508 1526 return nl[0]
1509 1527 raise error.AmbiguousPrefixLookupError(
1510 1528 id, self.display_id, _(b'ambiguous identifier')
1511 1529 )
1512 1530 if maybewdir:
1513 1531 raise error.WdirUnsupported
1514 1532 return None
1515 1533 except TypeError:
1516 1534 pass
1517 1535
1518 1536 def lookup(self, id):
1519 1537 """locate a node based on:
1520 1538 - revision number or str(revision number)
1521 1539 - nodeid or subset of hex nodeid
1522 1540 """
1523 1541 n = self._match(id)
1524 1542 if n is not None:
1525 1543 return n
1526 1544 n = self._partialmatch(id)
1527 1545 if n:
1528 1546 return n
1529 1547
1530 1548 raise error.LookupError(id, self.display_id, _(b'no match found'))
1531 1549
1532 1550 def shortest(self, node, minlength=1):
1533 1551 """Find the shortest unambiguous prefix that matches node."""
1534 1552
1535 1553 def isvalid(prefix):
1536 1554 try:
1537 1555 matchednode = self._partialmatch(prefix)
1538 1556 except error.AmbiguousPrefixLookupError:
1539 1557 return False
1540 1558 except error.WdirUnsupported:
1541 1559 # single 'ff...' match
1542 1560 return True
1543 1561 if matchednode is None:
1544 1562 raise error.LookupError(node, self.display_id, _(b'no node'))
1545 1563 return True
1546 1564
1547 1565 def maybewdir(prefix):
1548 1566 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1549 1567
1550 1568 hexnode = hex(node)
1551 1569
1552 1570 def disambiguate(hexnode, minlength):
1553 1571 """Disambiguate against wdirid."""
1554 1572 for length in range(minlength, len(hexnode) + 1):
1555 1573 prefix = hexnode[:length]
1556 1574 if not maybewdir(prefix):
1557 1575 return prefix
1558 1576
1559 1577 if not getattr(self, 'filteredrevs', None):
1560 1578 try:
1561 1579 length = max(self.index.shortest(node), minlength)
1562 1580 return disambiguate(hexnode, length)
1563 1581 except error.RevlogError:
1564 1582 if node != self.nodeconstants.wdirid:
1565 1583 raise error.LookupError(
1566 1584 node, self.display_id, _(b'no node')
1567 1585 )
1568 1586 except AttributeError:
1569 1587 # Fall through to pure code
1570 1588 pass
1571 1589
1572 1590 if node == self.nodeconstants.wdirid:
1573 1591 for length in range(minlength, len(hexnode) + 1):
1574 1592 prefix = hexnode[:length]
1575 1593 if isvalid(prefix):
1576 1594 return prefix
1577 1595
1578 1596 for length in range(minlength, len(hexnode) + 1):
1579 1597 prefix = hexnode[:length]
1580 1598 if isvalid(prefix):
1581 1599 return disambiguate(hexnode, length)
1582 1600
1583 1601 def cmp(self, node, text):
1584 1602 """compare text with a given file revision
1585 1603
1586 1604 returns True if text is different than what is stored.
1587 1605 """
1588 1606 p1, p2 = self.parents(node)
1589 1607 return storageutil.hashrevisionsha1(text, p1, p2) != node
1590 1608
1591 1609 def _getsegmentforrevs(self, startrev, endrev, df=None):
1592 1610 """Obtain a segment of raw data corresponding to a range of revisions.
1593 1611
1594 1612 Accepts the start and end revisions and an optional already-open
1595 1613 file handle to be used for reading. If the file handle is read, its
1596 1614 seek position will not be preserved.
1597 1615
1598 1616 Requests for data may be satisfied by a cache.
1599 1617
1600 1618 Returns a 2-tuple of (offset, data) for the requested range of
1601 1619 revisions. Offset is the integer offset from the beginning of the
1602 1620 revlog and data is a str or buffer of the raw byte data.
1603 1621
1604 1622 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1605 1623 to determine where each revision's data begins and ends.
1606 1624 """
1607 1625 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1608 1626 # (functions are expensive).
1609 1627 index = self.index
1610 1628 istart = index[startrev]
1611 1629 start = int(istart[0] >> 16)
1612 1630 if startrev == endrev:
1613 1631 end = start + istart[1]
1614 1632 else:
1615 1633 iend = index[endrev]
1616 1634 end = int(iend[0] >> 16) + iend[1]
1617 1635
1618 1636 if self._inline:
1619 1637 start += (startrev + 1) * self.index.entry_size
1620 1638 end += (endrev + 1) * self.index.entry_size
1621 1639 length = end - start
1622 1640
1623 1641 return start, self._segmentfile.read_chunk(start, length, df)
1624 1642
1625 1643 def _chunk(self, rev, df=None):
1626 1644 """Obtain a single decompressed chunk for a revision.
1627 1645
1628 1646 Accepts an integer revision and an optional already-open file handle
1629 1647 to be used for reading. If used, the seek position of the file will not
1630 1648 be preserved.
1631 1649
1632 1650 Returns a str holding uncompressed data for the requested revision.
1633 1651 """
1634 1652 compression_mode = self.index[rev][10]
1635 1653 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1636 1654 if compression_mode == COMP_MODE_PLAIN:
1637 1655 return data
1638 1656 elif compression_mode == COMP_MODE_DEFAULT:
1639 1657 return self._decompressor(data)
1640 1658 elif compression_mode == COMP_MODE_INLINE:
1641 1659 return self.decompress(data)
1642 1660 else:
1643 1661 msg = b'unknown compression mode %d'
1644 1662 msg %= compression_mode
1645 1663 raise error.RevlogError(msg)
1646 1664
1647 1665 def _chunks(self, revs, df=None, targetsize=None):
1648 1666 """Obtain decompressed chunks for the specified revisions.
1649 1667
1650 1668 Accepts an iterable of numeric revisions that are assumed to be in
1651 1669 ascending order. Also accepts an optional already-open file handle
1652 1670 to be used for reading. If used, the seek position of the file will
1653 1671 not be preserved.
1654 1672
1655 1673 This function is similar to calling ``self._chunk()`` multiple times,
1656 1674 but is faster.
1657 1675
1658 1676 Returns a list with decompressed data for each requested revision.
1659 1677 """
1660 1678 if not revs:
1661 1679 return []
1662 1680 start = self.start
1663 1681 length = self.length
1664 1682 inline = self._inline
1665 1683 iosize = self.index.entry_size
1666 1684 buffer = util.buffer
1667 1685
1668 1686 l = []
1669 1687 ladd = l.append
1670 1688
1671 1689 if not self._withsparseread:
1672 1690 slicedchunks = (revs,)
1673 1691 else:
1674 1692 slicedchunks = deltautil.slicechunk(
1675 1693 self, revs, targetsize=targetsize
1676 1694 )
1677 1695
1678 1696 for revschunk in slicedchunks:
1679 1697 firstrev = revschunk[0]
1680 1698 # Skip trailing revisions with empty diff
1681 1699 for lastrev in revschunk[::-1]:
1682 1700 if length(lastrev) != 0:
1683 1701 break
1684 1702
1685 1703 try:
1686 1704 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1687 1705 except OverflowError:
1688 1706 # issue4215 - we can't cache a run of chunks greater than
1689 1707 # 2G on Windows
1690 1708 return [self._chunk(rev, df=df) for rev in revschunk]
1691 1709
1692 1710 decomp = self.decompress
1693 1711 # self._decompressor might be None, but will not be used in that case
1694 1712 def_decomp = self._decompressor
1695 1713 for rev in revschunk:
1696 1714 chunkstart = start(rev)
1697 1715 if inline:
1698 1716 chunkstart += (rev + 1) * iosize
1699 1717 chunklength = length(rev)
1700 1718 comp_mode = self.index[rev][10]
1701 1719 c = buffer(data, chunkstart - offset, chunklength)
1702 1720 if comp_mode == COMP_MODE_PLAIN:
1703 1721 ladd(c)
1704 1722 elif comp_mode == COMP_MODE_INLINE:
1705 1723 ladd(decomp(c))
1706 1724 elif comp_mode == COMP_MODE_DEFAULT:
1707 1725 ladd(def_decomp(c))
1708 1726 else:
1709 1727 msg = b'unknown compression mode %d'
1710 1728 msg %= comp_mode
1711 1729 raise error.RevlogError(msg)
1712 1730
1713 1731 return l
1714 1732
1715 1733 def deltaparent(self, rev):
1716 1734 """return deltaparent of the given revision"""
1717 1735 base = self.index[rev][3]
1718 1736 if base == rev:
1719 1737 return nullrev
1720 1738 elif self._generaldelta:
1721 1739 return base
1722 1740 else:
1723 1741 return rev - 1
1724 1742
1725 1743 def issnapshot(self, rev):
1726 1744 """tells whether rev is a snapshot"""
1727 1745 if not self._sparserevlog:
1728 1746 return self.deltaparent(rev) == nullrev
1729 1747 elif util.safehasattr(self.index, b'issnapshot'):
1730 1748 # directly assign the method to cache the testing and access
1731 1749 self.issnapshot = self.index.issnapshot
1732 1750 return self.issnapshot(rev)
1733 1751 if rev == nullrev:
1734 1752 return True
1735 1753 entry = self.index[rev]
1736 1754 base = entry[3]
1737 1755 if base == rev:
1738 1756 return True
1739 1757 if base == nullrev:
1740 1758 return True
1741 1759 p1 = entry[5]
1742 1760 p2 = entry[6]
1743 1761 if base == p1 or base == p2:
1744 1762 return False
1745 1763 return self.issnapshot(base)
1746 1764
1747 1765 def snapshotdepth(self, rev):
1748 1766 """number of snapshot in the chain before this one"""
1749 1767 if not self.issnapshot(rev):
1750 1768 raise error.ProgrammingError(b'revision %d not a snapshot')
1751 1769 return len(self._deltachain(rev)[0]) - 1
1752 1770
1753 1771 def revdiff(self, rev1, rev2):
1754 1772 """return or calculate a delta between two revisions
1755 1773
1756 1774 The delta calculated is in binary form and is intended to be written to
1757 1775 revlog data directly. So this function needs raw revision data.
1758 1776 """
1759 1777 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 1778 return bytes(self._chunk(rev2))
1761 1779
1762 1780 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1763 1781
1764 1782 def revision(self, nodeorrev, _df=None):
1765 1783 """return an uncompressed revision of a given node or revision
1766 1784 number.
1767 1785
1768 1786 _df - an existing file handle to read from. (internal-only)
1769 1787 """
1770 1788 return self._revisiondata(nodeorrev, _df)
1771 1789
1772 1790 def sidedata(self, nodeorrev, _df=None):
1773 1791 """a map of extra data related to the changeset but not part of the hash
1774 1792
1775 1793 This function currently return a dictionary. However, more advanced
1776 1794 mapping object will likely be used in the future for a more
1777 1795 efficient/lazy code.
1778 1796 """
1779 1797 # deal with <nodeorrev> argument type
1780 1798 if isinstance(nodeorrev, int):
1781 1799 rev = nodeorrev
1782 1800 else:
1783 1801 rev = self.rev(nodeorrev)
1784 1802 return self._sidedata(rev)
1785 1803
1786 1804 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1787 1805 # deal with <nodeorrev> argument type
1788 1806 if isinstance(nodeorrev, int):
1789 1807 rev = nodeorrev
1790 1808 node = self.node(rev)
1791 1809 else:
1792 1810 node = nodeorrev
1793 1811 rev = None
1794 1812
1795 1813 # fast path the special `nullid` rev
1796 1814 if node == self.nullid:
1797 1815 return b""
1798 1816
1799 1817 # ``rawtext`` is the text as stored inside the revlog. Might be the
1800 1818 # revision or might need to be processed to retrieve the revision.
1801 1819 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1802 1820
1803 1821 if raw and validated:
1804 1822 # if we don't want to process the raw text and that raw
1805 1823 # text is cached, we can exit early.
1806 1824 return rawtext
1807 1825 if rev is None:
1808 1826 rev = self.rev(node)
1809 1827 # the revlog's flag for this revision
1810 1828 # (usually alter its state or content)
1811 1829 flags = self.flags(rev)
1812 1830
1813 1831 if validated and flags == REVIDX_DEFAULT_FLAGS:
1814 1832 # no extra flags set, no flag processor runs, text = rawtext
1815 1833 return rawtext
1816 1834
1817 1835 if raw:
1818 1836 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1819 1837 text = rawtext
1820 1838 else:
1821 1839 r = flagutil.processflagsread(self, rawtext, flags)
1822 1840 text, validatehash = r
1823 1841 if validatehash:
1824 1842 self.checkhash(text, node, rev=rev)
1825 1843 if not validated:
1826 1844 self._revisioncache = (node, rev, rawtext)
1827 1845
1828 1846 return text
1829 1847
1830 1848 def _rawtext(self, node, rev, _df=None):
1831 1849 """return the possibly unvalidated rawtext for a revision
1832 1850
1833 1851 returns (rev, rawtext, validated)
1834 1852 """
1835 1853
1836 1854 # revision in the cache (could be useful to apply delta)
1837 1855 cachedrev = None
1838 1856 # An intermediate text to apply deltas to
1839 1857 basetext = None
1840 1858
1841 1859 # Check if we have the entry in cache
1842 1860 # The cache entry looks like (node, rev, rawtext)
1843 1861 if self._revisioncache:
1844 1862 if self._revisioncache[0] == node:
1845 1863 return (rev, self._revisioncache[2], True)
1846 1864 cachedrev = self._revisioncache[1]
1847 1865
1848 1866 if rev is None:
1849 1867 rev = self.rev(node)
1850 1868
1851 1869 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1852 1870 if stopped:
1853 1871 basetext = self._revisioncache[2]
1854 1872
1855 1873 # drop cache to save memory, the caller is expected to
1856 1874 # update self._revisioncache after validating the text
1857 1875 self._revisioncache = None
1858 1876
1859 1877 targetsize = None
1860 1878 rawsize = self.index[rev][2]
1861 1879 if 0 <= rawsize:
1862 1880 targetsize = 4 * rawsize
1863 1881
1864 1882 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1865 1883 if basetext is None:
1866 1884 basetext = bytes(bins[0])
1867 1885 bins = bins[1:]
1868 1886
1869 1887 rawtext = mdiff.patches(basetext, bins)
1870 1888 del basetext # let us have a chance to free memory early
1871 1889 return (rev, rawtext, False)
1872 1890
1873 1891 def _sidedata(self, rev):
1874 1892 """Return the sidedata for a given revision number."""
1875 1893 index_entry = self.index[rev]
1876 1894 sidedata_offset = index_entry[8]
1877 1895 sidedata_size = index_entry[9]
1878 1896
1879 1897 if self._inline:
1880 1898 sidedata_offset += self.index.entry_size * (1 + rev)
1881 1899 if sidedata_size == 0:
1882 1900 return {}
1883 1901
1884 1902 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1885 1903 filename = self._sidedatafile
1886 1904 end = self._docket.sidedata_end
1887 1905 offset = sidedata_offset
1888 1906 length = sidedata_size
1889 1907 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1890 1908 raise error.RevlogError(m)
1891 1909
1892 1910 comp_segment = self._segmentfile_sidedata.read_chunk(
1893 1911 sidedata_offset, sidedata_size
1894 1912 )
1895 1913
1896 1914 comp = self.index[rev][11]
1897 1915 if comp == COMP_MODE_PLAIN:
1898 1916 segment = comp_segment
1899 1917 elif comp == COMP_MODE_DEFAULT:
1900 1918 segment = self._decompressor(comp_segment)
1901 1919 elif comp == COMP_MODE_INLINE:
1902 1920 segment = self.decompress(comp_segment)
1903 1921 else:
1904 1922 msg = b'unknown compression mode %d'
1905 1923 msg %= comp
1906 1924 raise error.RevlogError(msg)
1907 1925
1908 1926 sidedata = sidedatautil.deserialize_sidedata(segment)
1909 1927 return sidedata
1910 1928
1911 1929 def rawdata(self, nodeorrev, _df=None):
1912 1930 """return an uncompressed raw data of a given node or revision number.
1913 1931
1914 1932 _df - an existing file handle to read from. (internal-only)
1915 1933 """
1916 1934 return self._revisiondata(nodeorrev, _df, raw=True)
1917 1935
1918 1936 def hash(self, text, p1, p2):
1919 1937 """Compute a node hash.
1920 1938
1921 1939 Available as a function so that subclasses can replace the hash
1922 1940 as needed.
1923 1941 """
1924 1942 return storageutil.hashrevisionsha1(text, p1, p2)
1925 1943
1926 1944 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1927 1945 """Check node hash integrity.
1928 1946
1929 1947 Available as a function so that subclasses can extend hash mismatch
1930 1948 behaviors as needed.
1931 1949 """
1932 1950 try:
1933 1951 if p1 is None and p2 is None:
1934 1952 p1, p2 = self.parents(node)
1935 1953 if node != self.hash(text, p1, p2):
1936 1954 # Clear the revision cache on hash failure. The revision cache
1937 1955 # only stores the raw revision and clearing the cache does have
1938 1956 # the side-effect that we won't have a cache hit when the raw
1939 1957 # revision data is accessed. But this case should be rare and
1940 1958 # it is extra work to teach the cache about the hash
1941 1959 # verification state.
1942 1960 if self._revisioncache and self._revisioncache[0] == node:
1943 1961 self._revisioncache = None
1944 1962
1945 1963 revornode = rev
1946 1964 if revornode is None:
1947 1965 revornode = templatefilters.short(hex(node))
1948 1966 raise error.RevlogError(
1949 1967 _(b"integrity check failed on %s:%s")
1950 1968 % (self.display_id, pycompat.bytestr(revornode))
1951 1969 )
1952 1970 except error.RevlogError:
1953 1971 if self._censorable and storageutil.iscensoredtext(text):
1954 1972 raise error.CensoredNodeError(self.display_id, node, text)
1955 1973 raise
1956 1974
1957 1975 def _enforceinlinesize(self, tr):
1958 1976 """Check if the revlog is too big for inline and convert if so.
1959 1977
1960 1978 This should be called after revisions are added to the revlog. If the
1961 1979 revlog has grown too large to be an inline revlog, it will convert it
1962 1980 to use multiple index and data files.
1963 1981 """
1964 1982 tiprev = len(self) - 1
1965 1983 total_size = self.start(tiprev) + self.length(tiprev)
1966 1984 if not self._inline or total_size < _maxinline:
1967 1985 return
1968 1986
1969 1987 troffset = tr.findoffset(self._indexfile)
1970 1988 if troffset is None:
1971 1989 raise error.RevlogError(
1972 1990 _(b"%s not found in the transaction") % self._indexfile
1973 1991 )
1974 1992 trindex = None
1975 1993 tr.add(self._datafile, 0)
1976 1994
1977 1995 existing_handles = False
1978 1996 if self._writinghandles is not None:
1979 1997 existing_handles = True
1980 1998 fp = self._writinghandles[0]
1981 1999 fp.flush()
1982 2000 fp.close()
1983 2001 # We can't use the cached file handle after close(). So prevent
1984 2002 # its usage.
1985 2003 self._writinghandles = None
1986 2004 self._segmentfile.writing_handle = None
1987 2005 # No need to deal with sidedata writing handle as it is only
1988 2006 # relevant with revlog-v2 which is never inline, not reaching
1989 2007 # this code
1990 2008
1991 2009 new_dfh = self._datafp(b'w+')
1992 2010 new_dfh.truncate(0) # drop any potentially existing data
1993 2011 try:
1994 2012 with self._indexfp() as read_ifh:
1995 2013 for r in self:
1996 2014 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1997 2015 if (
1998 2016 trindex is None
1999 2017 and troffset
2000 2018 <= self.start(r) + r * self.index.entry_size
2001 2019 ):
2002 2020 trindex = r
2003 2021 new_dfh.flush()
2004 2022
2005 2023 if trindex is None:
2006 2024 trindex = 0
2007 2025
2008 2026 with self.__index_new_fp() as fp:
2009 2027 self._format_flags &= ~FLAG_INLINE_DATA
2010 2028 self._inline = False
2011 2029 for i in self:
2012 2030 e = self.index.entry_binary(i)
2013 2031 if i == 0 and self._docket is None:
2014 2032 header = self._format_flags | self._format_version
2015 2033 header = self.index.pack_header(header)
2016 2034 e = header + e
2017 2035 fp.write(e)
2018 2036 if self._docket is not None:
2019 2037 self._docket.index_end = fp.tell()
2020 2038
2021 2039 # There is a small transactional race here. If the rename of
2022 2040 # the index fails, we should remove the datafile. It is more
2023 2041 # important to ensure that the data file is not truncated
2024 2042 # when the index is replaced as otherwise data is lost.
2025 2043 tr.replace(self._datafile, self.start(trindex))
2026 2044
2027 2045 # the temp file replace the real index when we exit the context
2028 2046 # manager
2029 2047
2030 2048 tr.replace(self._indexfile, trindex * self.index.entry_size)
2031 2049 nodemaputil.setup_persistent_nodemap(tr, self)
2032 2050 self._segmentfile = randomaccessfile.randomaccessfile(
2033 2051 self.opener,
2034 2052 self._datafile,
2035 2053 self._chunkcachesize,
2036 2054 )
2037 2055
2038 2056 if existing_handles:
2039 2057 # switched from inline to conventional reopen the index
2040 2058 ifh = self.__index_write_fp()
2041 2059 self._writinghandles = (ifh, new_dfh, None)
2042 2060 self._segmentfile.writing_handle = new_dfh
2043 2061 new_dfh = None
2044 2062 # No need to deal with sidedata writing handle as it is only
2045 2063 # relevant with revlog-v2 which is never inline, not reaching
2046 2064 # this code
2047 2065 finally:
2048 2066 if new_dfh is not None:
2049 2067 new_dfh.close()
2050 2068
2051 2069 def _nodeduplicatecallback(self, transaction, node):
2052 2070 """called when trying to add a node already stored."""
2053 2071
2054 2072 @contextlib.contextmanager
2055 2073 def reading(self):
2056 2074 """Context manager that keeps data and sidedata files open for reading"""
2057 2075 with self._segmentfile.reading():
2058 2076 with self._segmentfile_sidedata.reading():
2059 2077 yield
2060 2078
2061 2079 @contextlib.contextmanager
2062 2080 def _writing(self, transaction):
2063 2081 if self._trypending:
2064 2082 msg = b'try to write in a `trypending` revlog: %s'
2065 2083 msg %= self.display_id
2066 2084 raise error.ProgrammingError(msg)
2067 2085 if self._writinghandles is not None:
2068 2086 yield
2069 2087 else:
2070 2088 ifh = dfh = sdfh = None
2071 2089 try:
2072 2090 r = len(self)
2073 2091 # opening the data file.
2074 2092 dsize = 0
2075 2093 if r:
2076 2094 dsize = self.end(r - 1)
2077 2095 dfh = None
2078 2096 if not self._inline:
2079 2097 try:
2080 2098 dfh = self._datafp(b"r+")
2081 2099 if self._docket is None:
2082 2100 dfh.seek(0, os.SEEK_END)
2083 2101 else:
2084 2102 dfh.seek(self._docket.data_end, os.SEEK_SET)
2085 2103 except IOError as inst:
2086 2104 if inst.errno != errno.ENOENT:
2087 2105 raise
2088 2106 dfh = self._datafp(b"w+")
2089 2107 transaction.add(self._datafile, dsize)
2090 2108 if self._sidedatafile is not None:
2091 2109 # revlog-v2 does not inline, help Pytype
2092 2110 assert dfh is not None
2093 2111 try:
2094 2112 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2095 2113 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2096 2114 except IOError as inst:
2097 2115 if inst.errno != errno.ENOENT:
2098 2116 raise
2099 2117 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2100 2118 transaction.add(
2101 2119 self._sidedatafile, self._docket.sidedata_end
2102 2120 )
2103 2121
2104 2122 # opening the index file.
2105 2123 isize = r * self.index.entry_size
2106 2124 ifh = self.__index_write_fp()
2107 2125 if self._inline:
2108 2126 transaction.add(self._indexfile, dsize + isize)
2109 2127 else:
2110 2128 transaction.add(self._indexfile, isize)
2111 2129 # exposing all file handle for writing.
2112 2130 self._writinghandles = (ifh, dfh, sdfh)
2113 2131 self._segmentfile.writing_handle = ifh if self._inline else dfh
2114 2132 self._segmentfile_sidedata.writing_handle = sdfh
2115 2133 yield
2116 2134 if self._docket is not None:
2117 2135 self._write_docket(transaction)
2118 2136 finally:
2119 2137 self._writinghandles = None
2120 2138 self._segmentfile.writing_handle = None
2121 2139 self._segmentfile_sidedata.writing_handle = None
2122 2140 if dfh is not None:
2123 2141 dfh.close()
2124 2142 if sdfh is not None:
2125 2143 sdfh.close()
2126 2144 # closing the index file last to avoid exposing referent to
2127 2145 # potential unflushed data content.
2128 2146 if ifh is not None:
2129 2147 ifh.close()
2130 2148
2131 2149 def _write_docket(self, transaction):
2132 2150 """write the current docket on disk
2133 2151
2134 2152 Exist as a method to help changelog to implement transaction logic
2135 2153
2136 2154 We could also imagine using the same transaction logic for all revlog
2137 2155 since docket are cheap."""
2138 2156 self._docket.write(transaction)
2139 2157
2140 2158 def addrevision(
2141 2159 self,
2142 2160 text,
2143 2161 transaction,
2144 2162 link,
2145 2163 p1,
2146 2164 p2,
2147 2165 cachedelta=None,
2148 2166 node=None,
2149 2167 flags=REVIDX_DEFAULT_FLAGS,
2150 2168 deltacomputer=None,
2151 2169 sidedata=None,
2152 2170 ):
2153 2171 """add a revision to the log
2154 2172
2155 2173 text - the revision data to add
2156 2174 transaction - the transaction object used for rollback
2157 2175 link - the linkrev data to add
2158 2176 p1, p2 - the parent nodeids of the revision
2159 2177 cachedelta - an optional precomputed delta
2160 2178 node - nodeid of revision; typically node is not specified, and it is
2161 2179 computed by default as hash(text, p1, p2), however subclasses might
2162 2180 use different hashing method (and override checkhash() in such case)
2163 2181 flags - the known flags to set on the revision
2164 2182 deltacomputer - an optional deltacomputer instance shared between
2165 2183 multiple calls
2166 2184 """
2167 2185 if link == nullrev:
2168 2186 raise error.RevlogError(
2169 2187 _(b"attempted to add linkrev -1 to %s") % self.display_id
2170 2188 )
2171 2189
2172 2190 if sidedata is None:
2173 2191 sidedata = {}
2174 2192 elif sidedata and not self.hassidedata:
2175 2193 raise error.ProgrammingError(
2176 2194 _(b"trying to add sidedata to a revlog who don't support them")
2177 2195 )
2178 2196
2179 2197 if flags:
2180 2198 node = node or self.hash(text, p1, p2)
2181 2199
2182 2200 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2183 2201
2184 2202 # If the flag processor modifies the revision data, ignore any provided
2185 2203 # cachedelta.
2186 2204 if rawtext != text:
2187 2205 cachedelta = None
2188 2206
2189 2207 if len(rawtext) > _maxentrysize:
2190 2208 raise error.RevlogError(
2191 2209 _(
2192 2210 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2193 2211 )
2194 2212 % (self.display_id, len(rawtext))
2195 2213 )
2196 2214
2197 2215 node = node or self.hash(rawtext, p1, p2)
2198 2216 rev = self.index.get_rev(node)
2199 2217 if rev is not None:
2200 2218 return rev
2201 2219
2202 2220 if validatehash:
2203 2221 self.checkhash(rawtext, node, p1=p1, p2=p2)
2204 2222
2205 2223 return self.addrawrevision(
2206 2224 rawtext,
2207 2225 transaction,
2208 2226 link,
2209 2227 p1,
2210 2228 p2,
2211 2229 node,
2212 2230 flags,
2213 2231 cachedelta=cachedelta,
2214 2232 deltacomputer=deltacomputer,
2215 2233 sidedata=sidedata,
2216 2234 )
2217 2235
2218 2236 def addrawrevision(
2219 2237 self,
2220 2238 rawtext,
2221 2239 transaction,
2222 2240 link,
2223 2241 p1,
2224 2242 p2,
2225 2243 node,
2226 2244 flags,
2227 2245 cachedelta=None,
2228 2246 deltacomputer=None,
2229 2247 sidedata=None,
2230 2248 ):
2231 2249 """add a raw revision with known flags, node and parents
2232 2250 useful when reusing a revision not stored in this revlog (ex: received
2233 2251 over wire, or read from an external bundle).
2234 2252 """
2235 2253 with self._writing(transaction):
2236 2254 return self._addrevision(
2237 2255 node,
2238 2256 rawtext,
2239 2257 transaction,
2240 2258 link,
2241 2259 p1,
2242 2260 p2,
2243 2261 flags,
2244 2262 cachedelta,
2245 2263 deltacomputer=deltacomputer,
2246 2264 sidedata=sidedata,
2247 2265 )
2248 2266
2249 2267 def compress(self, data):
2250 2268 """Generate a possibly-compressed representation of data."""
2251 2269 if not data:
2252 2270 return b'', data
2253 2271
2254 2272 compressed = self._compressor.compress(data)
2255 2273
2256 2274 if compressed:
2257 2275 # The revlog compressor added the header in the returned data.
2258 2276 return b'', compressed
2259 2277
2260 2278 if data[0:1] == b'\0':
2261 2279 return b'', data
2262 2280 return b'u', data
2263 2281
2264 2282 def decompress(self, data):
2265 2283 """Decompress a revlog chunk.
2266 2284
2267 2285 The chunk is expected to begin with a header identifying the
2268 2286 format type so it can be routed to an appropriate decompressor.
2269 2287 """
2270 2288 if not data:
2271 2289 return data
2272 2290
2273 2291 # Revlogs are read much more frequently than they are written and many
2274 2292 # chunks only take microseconds to decompress, so performance is
2275 2293 # important here.
2276 2294 #
2277 2295 # We can make a few assumptions about revlogs:
2278 2296 #
2279 2297 # 1) the majority of chunks will be compressed (as opposed to inline
2280 2298 # raw data).
2281 2299 # 2) decompressing *any* data will likely by at least 10x slower than
2282 2300 # returning raw inline data.
2283 2301 # 3) we want to prioritize common and officially supported compression
2284 2302 # engines
2285 2303 #
2286 2304 # It follows that we want to optimize for "decompress compressed data
2287 2305 # when encoded with common and officially supported compression engines"
2288 2306 # case over "raw data" and "data encoded by less common or non-official
2289 2307 # compression engines." That is why we have the inline lookup first
2290 2308 # followed by the compengines lookup.
2291 2309 #
2292 2310 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2293 2311 # compressed chunks. And this matters for changelog and manifest reads.
2294 2312 t = data[0:1]
2295 2313
2296 2314 if t == b'x':
2297 2315 try:
2298 2316 return _zlibdecompress(data)
2299 2317 except zlib.error as e:
2300 2318 raise error.RevlogError(
2301 2319 _(b'revlog decompress error: %s')
2302 2320 % stringutil.forcebytestr(e)
2303 2321 )
2304 2322 # '\0' is more common than 'u' so it goes first.
2305 2323 elif t == b'\0':
2306 2324 return data
2307 2325 elif t == b'u':
2308 2326 return util.buffer(data, 1)
2309 2327
2310 2328 compressor = self._get_decompressor(t)
2311 2329
2312 2330 return compressor.decompress(data)
2313 2331
2314 2332 def _addrevision(
2315 2333 self,
2316 2334 node,
2317 2335 rawtext,
2318 2336 transaction,
2319 2337 link,
2320 2338 p1,
2321 2339 p2,
2322 2340 flags,
2323 2341 cachedelta,
2324 2342 alwayscache=False,
2325 2343 deltacomputer=None,
2326 2344 sidedata=None,
2327 2345 ):
2328 2346 """internal function to add revisions to the log
2329 2347
2330 2348 see addrevision for argument descriptions.
2331 2349
2332 2350 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2333 2351
2334 2352 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2335 2353 be used.
2336 2354
2337 2355 invariants:
2338 2356 - rawtext is optional (can be None); if not set, cachedelta must be set.
2339 2357 if both are set, they must correspond to each other.
2340 2358 """
2341 2359 if node == self.nullid:
2342 2360 raise error.RevlogError(
2343 2361 _(b"%s: attempt to add null revision") % self.display_id
2344 2362 )
2345 2363 if (
2346 2364 node == self.nodeconstants.wdirid
2347 2365 or node in self.nodeconstants.wdirfilenodeids
2348 2366 ):
2349 2367 raise error.RevlogError(
2350 2368 _(b"%s: attempt to add wdir revision") % self.display_id
2351 2369 )
2352 2370 if self._writinghandles is None:
2353 2371 msg = b'adding revision outside `revlog._writing` context'
2354 2372 raise error.ProgrammingError(msg)
2355 2373
2356 2374 if self._inline:
2357 2375 fh = self._writinghandles[0]
2358 2376 else:
2359 2377 fh = self._writinghandles[1]
2360 2378
2361 2379 btext = [rawtext]
2362 2380
2363 2381 curr = len(self)
2364 2382 prev = curr - 1
2365 2383
2366 2384 offset = self._get_data_offset(prev)
2367 2385
2368 2386 if self._concurrencychecker:
2369 2387 ifh, dfh, sdfh = self._writinghandles
2370 2388 # XXX no checking for the sidedata file
2371 2389 if self._inline:
2372 2390 # offset is "as if" it were in the .d file, so we need to add on
2373 2391 # the size of the entry metadata.
2374 2392 self._concurrencychecker(
2375 2393 ifh, self._indexfile, offset + curr * self.index.entry_size
2376 2394 )
2377 2395 else:
2378 2396 # Entries in the .i are a consistent size.
2379 2397 self._concurrencychecker(
2380 2398 ifh, self._indexfile, curr * self.index.entry_size
2381 2399 )
2382 2400 self._concurrencychecker(dfh, self._datafile, offset)
2383 2401
2384 2402 p1r, p2r = self.rev(p1), self.rev(p2)
2385 2403
2386 2404 # full versions are inserted when the needed deltas
2387 2405 # become comparable to the uncompressed text
2388 2406 if rawtext is None:
2389 2407 # need rawtext size, before changed by flag processors, which is
2390 2408 # the non-raw size. use revlog explicitly to avoid filelog's extra
2391 2409 # logic that might remove metadata size.
2392 2410 textlen = mdiff.patchedsize(
2393 2411 revlog.size(self, cachedelta[0]), cachedelta[1]
2394 2412 )
2395 2413 else:
2396 2414 textlen = len(rawtext)
2397 2415
2398 2416 if deltacomputer is None:
2399 2417 deltacomputer = deltautil.deltacomputer(self)
2400 2418
2401 2419 revinfo = revlogutils.revisioninfo(
2402 2420 node,
2403 2421 p1,
2404 2422 p2,
2405 2423 btext,
2406 2424 textlen,
2407 2425 cachedelta,
2408 2426 flags,
2409 2427 )
2410 2428
2411 2429 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2412 2430
2413 2431 compression_mode = COMP_MODE_INLINE
2414 2432 if self._docket is not None:
2415 2433 default_comp = self._docket.default_compression_header
2416 2434 r = deltautil.delta_compression(default_comp, deltainfo)
2417 2435 compression_mode, deltainfo = r
2418 2436
2419 2437 sidedata_compression_mode = COMP_MODE_INLINE
2420 2438 if sidedata and self.hassidedata:
2421 2439 sidedata_compression_mode = COMP_MODE_PLAIN
2422 2440 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2423 2441 sidedata_offset = self._docket.sidedata_end
2424 2442 h, comp_sidedata = self.compress(serialized_sidedata)
2425 2443 if (
2426 2444 h != b'u'
2427 2445 and comp_sidedata[0:1] != b'\0'
2428 2446 and len(comp_sidedata) < len(serialized_sidedata)
2429 2447 ):
2430 2448 assert not h
2431 2449 if (
2432 2450 comp_sidedata[0:1]
2433 2451 == self._docket.default_compression_header
2434 2452 ):
2435 2453 sidedata_compression_mode = COMP_MODE_DEFAULT
2436 2454 serialized_sidedata = comp_sidedata
2437 2455 else:
2438 2456 sidedata_compression_mode = COMP_MODE_INLINE
2439 2457 serialized_sidedata = comp_sidedata
2440 2458 else:
2441 2459 serialized_sidedata = b""
2442 2460 # Don't store the offset if the sidedata is empty, that way
2443 2461 # we can easily detect empty sidedata and they will be no different
2444 2462 # than ones we manually add.
2445 2463 sidedata_offset = 0
2446 2464
2465 rank = RANK_UNKNOWN
2466 if self._format_version == CHANGELOGV2:
2467 rank = len(list(self.ancestors([p1r, p2r], inclusive=True))) + 1
2468
2447 2469 e = revlogutils.entry(
2448 2470 flags=flags,
2449 2471 data_offset=offset,
2450 2472 data_compressed_length=deltainfo.deltalen,
2451 2473 data_uncompressed_length=textlen,
2452 2474 data_compression_mode=compression_mode,
2453 2475 data_delta_base=deltainfo.base,
2454 2476 link_rev=link,
2455 2477 parent_rev_1=p1r,
2456 2478 parent_rev_2=p2r,
2457 2479 node_id=node,
2458 2480 sidedata_offset=sidedata_offset,
2459 2481 sidedata_compressed_length=len(serialized_sidedata),
2460 2482 sidedata_compression_mode=sidedata_compression_mode,
2483 rank=rank,
2461 2484 )
2462 2485
2463 2486 self.index.append(e)
2464 2487 entry = self.index.entry_binary(curr)
2465 2488 if curr == 0 and self._docket is None:
2466 2489 header = self._format_flags | self._format_version
2467 2490 header = self.index.pack_header(header)
2468 2491 entry = header + entry
2469 2492 self._writeentry(
2470 2493 transaction,
2471 2494 entry,
2472 2495 deltainfo.data,
2473 2496 link,
2474 2497 offset,
2475 2498 serialized_sidedata,
2476 2499 sidedata_offset,
2477 2500 )
2478 2501
2479 2502 rawtext = btext[0]
2480 2503
2481 2504 if alwayscache and rawtext is None:
2482 2505 rawtext = deltacomputer.buildtext(revinfo, fh)
2483 2506
2484 2507 if type(rawtext) == bytes: # only accept immutable objects
2485 2508 self._revisioncache = (node, curr, rawtext)
2486 2509 self._chainbasecache[curr] = deltainfo.chainbase
2487 2510 return curr
2488 2511
2489 2512 def _get_data_offset(self, prev):
2490 2513 """Returns the current offset in the (in-transaction) data file.
2491 2514 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2492 2515 file to store that information: since sidedata can be rewritten to the
2493 2516 end of the data file within a transaction, you can have cases where, for
2494 2517 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2495 2518 to `n - 1`'s sidedata being written after `n`'s data.
2496 2519
2497 2520 TODO cache this in a docket file before getting out of experimental."""
2498 2521 if self._docket is None:
2499 2522 return self.end(prev)
2500 2523 else:
2501 2524 return self._docket.data_end
2502 2525
2503 2526 def _writeentry(
2504 2527 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2505 2528 ):
2506 2529 # Files opened in a+ mode have inconsistent behavior on various
2507 2530 # platforms. Windows requires that a file positioning call be made
2508 2531 # when the file handle transitions between reads and writes. See
2509 2532 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2510 2533 # platforms, Python or the platform itself can be buggy. Some versions
2511 2534 # of Solaris have been observed to not append at the end of the file
2512 2535 # if the file was seeked to before the end. See issue4943 for more.
2513 2536 #
2514 2537 # We work around this issue by inserting a seek() before writing.
2515 2538 # Note: This is likely not necessary on Python 3. However, because
2516 2539 # the file handle is reused for reads and may be seeked there, we need
2517 2540 # to be careful before changing this.
2518 2541 if self._writinghandles is None:
2519 2542 msg = b'adding revision outside `revlog._writing` context'
2520 2543 raise error.ProgrammingError(msg)
2521 2544 ifh, dfh, sdfh = self._writinghandles
2522 2545 if self._docket is None:
2523 2546 ifh.seek(0, os.SEEK_END)
2524 2547 else:
2525 2548 ifh.seek(self._docket.index_end, os.SEEK_SET)
2526 2549 if dfh:
2527 2550 if self._docket is None:
2528 2551 dfh.seek(0, os.SEEK_END)
2529 2552 else:
2530 2553 dfh.seek(self._docket.data_end, os.SEEK_SET)
2531 2554 if sdfh:
2532 2555 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2533 2556
2534 2557 curr = len(self) - 1
2535 2558 if not self._inline:
2536 2559 transaction.add(self._datafile, offset)
2537 2560 if self._sidedatafile:
2538 2561 transaction.add(self._sidedatafile, sidedata_offset)
2539 2562 transaction.add(self._indexfile, curr * len(entry))
2540 2563 if data[0]:
2541 2564 dfh.write(data[0])
2542 2565 dfh.write(data[1])
2543 2566 if sidedata:
2544 2567 sdfh.write(sidedata)
2545 2568 ifh.write(entry)
2546 2569 else:
2547 2570 offset += curr * self.index.entry_size
2548 2571 transaction.add(self._indexfile, offset)
2549 2572 ifh.write(entry)
2550 2573 ifh.write(data[0])
2551 2574 ifh.write(data[1])
2552 2575 assert not sidedata
2553 2576 self._enforceinlinesize(transaction)
2554 2577 if self._docket is not None:
2555 2578 # revlog-v2 always has 3 writing handles, help Pytype
2556 2579 wh1 = self._writinghandles[0]
2557 2580 wh2 = self._writinghandles[1]
2558 2581 wh3 = self._writinghandles[2]
2559 2582 assert wh1 is not None
2560 2583 assert wh2 is not None
2561 2584 assert wh3 is not None
2562 2585 self._docket.index_end = wh1.tell()
2563 2586 self._docket.data_end = wh2.tell()
2564 2587 self._docket.sidedata_end = wh3.tell()
2565 2588
2566 2589 nodemaputil.setup_persistent_nodemap(transaction, self)
2567 2590
2568 2591 def addgroup(
2569 2592 self,
2570 2593 deltas,
2571 2594 linkmapper,
2572 2595 transaction,
2573 2596 alwayscache=False,
2574 2597 addrevisioncb=None,
2575 2598 duplicaterevisioncb=None,
2576 2599 ):
2577 2600 """
2578 2601 add a delta group
2579 2602
2580 2603 given a set of deltas, add them to the revision log. the
2581 2604 first delta is against its parent, which should be in our
2582 2605 log, the rest are against the previous delta.
2583 2606
2584 2607 If ``addrevisioncb`` is defined, it will be called with arguments of
2585 2608 this revlog and the node that was added.
2586 2609 """
2587 2610
2588 2611 if self._adding_group:
2589 2612 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2590 2613
2591 2614 self._adding_group = True
2592 2615 empty = True
2593 2616 try:
2594 2617 with self._writing(transaction):
2595 2618 deltacomputer = deltautil.deltacomputer(self)
2596 2619 # loop through our set of deltas
2597 2620 for data in deltas:
2598 2621 (
2599 2622 node,
2600 2623 p1,
2601 2624 p2,
2602 2625 linknode,
2603 2626 deltabase,
2604 2627 delta,
2605 2628 flags,
2606 2629 sidedata,
2607 2630 ) = data
2608 2631 link = linkmapper(linknode)
2609 2632 flags = flags or REVIDX_DEFAULT_FLAGS
2610 2633
2611 2634 rev = self.index.get_rev(node)
2612 2635 if rev is not None:
2613 2636 # this can happen if two branches make the same change
2614 2637 self._nodeduplicatecallback(transaction, rev)
2615 2638 if duplicaterevisioncb:
2616 2639 duplicaterevisioncb(self, rev)
2617 2640 empty = False
2618 2641 continue
2619 2642
2620 2643 for p in (p1, p2):
2621 2644 if not self.index.has_node(p):
2622 2645 raise error.LookupError(
2623 2646 p, self.radix, _(b'unknown parent')
2624 2647 )
2625 2648
2626 2649 if not self.index.has_node(deltabase):
2627 2650 raise error.LookupError(
2628 2651 deltabase, self.display_id, _(b'unknown delta base')
2629 2652 )
2630 2653
2631 2654 baserev = self.rev(deltabase)
2632 2655
2633 2656 if baserev != nullrev and self.iscensored(baserev):
2634 2657 # if base is censored, delta must be full replacement in a
2635 2658 # single patch operation
2636 2659 hlen = struct.calcsize(b">lll")
2637 2660 oldlen = self.rawsize(baserev)
2638 2661 newlen = len(delta) - hlen
2639 2662 if delta[:hlen] != mdiff.replacediffheader(
2640 2663 oldlen, newlen
2641 2664 ):
2642 2665 raise error.CensoredBaseError(
2643 2666 self.display_id, self.node(baserev)
2644 2667 )
2645 2668
2646 2669 if not flags and self._peek_iscensored(baserev, delta):
2647 2670 flags |= REVIDX_ISCENSORED
2648 2671
2649 2672 # We assume consumers of addrevisioncb will want to retrieve
2650 2673 # the added revision, which will require a call to
2651 2674 # revision(). revision() will fast path if there is a cache
2652 2675 # hit. So, we tell _addrevision() to always cache in this case.
2653 2676 # We're only using addgroup() in the context of changegroup
2654 2677 # generation so the revision data can always be handled as raw
2655 2678 # by the flagprocessor.
2656 2679 rev = self._addrevision(
2657 2680 node,
2658 2681 None,
2659 2682 transaction,
2660 2683 link,
2661 2684 p1,
2662 2685 p2,
2663 2686 flags,
2664 2687 (baserev, delta),
2665 2688 alwayscache=alwayscache,
2666 2689 deltacomputer=deltacomputer,
2667 2690 sidedata=sidedata,
2668 2691 )
2669 2692
2670 2693 if addrevisioncb:
2671 2694 addrevisioncb(self, rev)
2672 2695 empty = False
2673 2696 finally:
2674 2697 self._adding_group = False
2675 2698 return not empty
2676 2699
2677 2700 def iscensored(self, rev):
2678 2701 """Check if a file revision is censored."""
2679 2702 if not self._censorable:
2680 2703 return False
2681 2704
2682 2705 return self.flags(rev) & REVIDX_ISCENSORED
2683 2706
2684 2707 def _peek_iscensored(self, baserev, delta):
2685 2708 """Quickly check if a delta produces a censored revision."""
2686 2709 if not self._censorable:
2687 2710 return False
2688 2711
2689 2712 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2690 2713
2691 2714 def getstrippoint(self, minlink):
2692 2715 """find the minimum rev that must be stripped to strip the linkrev
2693 2716
2694 2717 Returns a tuple containing the minimum rev and a set of all revs that
2695 2718 have linkrevs that will be broken by this strip.
2696 2719 """
2697 2720 return storageutil.resolvestripinfo(
2698 2721 minlink,
2699 2722 len(self) - 1,
2700 2723 self.headrevs(),
2701 2724 self.linkrev,
2702 2725 self.parentrevs,
2703 2726 )
2704 2727
2705 2728 def strip(self, minlink, transaction):
2706 2729 """truncate the revlog on the first revision with a linkrev >= minlink
2707 2730
2708 2731 This function is called when we're stripping revision minlink and
2709 2732 its descendants from the repository.
2710 2733
2711 2734 We have to remove all revisions with linkrev >= minlink, because
2712 2735 the equivalent changelog revisions will be renumbered after the
2713 2736 strip.
2714 2737
2715 2738 So we truncate the revlog on the first of these revisions, and
2716 2739 trust that the caller has saved the revisions that shouldn't be
2717 2740 removed and that it'll re-add them after this truncation.
2718 2741 """
2719 2742 if len(self) == 0:
2720 2743 return
2721 2744
2722 2745 rev, _ = self.getstrippoint(minlink)
2723 2746 if rev == len(self):
2724 2747 return
2725 2748
2726 2749 # first truncate the files on disk
2727 2750 data_end = self.start(rev)
2728 2751 if not self._inline:
2729 2752 transaction.add(self._datafile, data_end)
2730 2753 end = rev * self.index.entry_size
2731 2754 else:
2732 2755 end = data_end + (rev * self.index.entry_size)
2733 2756
2734 2757 if self._sidedatafile:
2735 2758 sidedata_end = self.sidedata_cut_off(rev)
2736 2759 transaction.add(self._sidedatafile, sidedata_end)
2737 2760
2738 2761 transaction.add(self._indexfile, end)
2739 2762 if self._docket is not None:
2740 2763 # XXX we could, leverage the docket while stripping. However it is
2741 2764 # not powerfull enough at the time of this comment
2742 2765 self._docket.index_end = end
2743 2766 self._docket.data_end = data_end
2744 2767 self._docket.sidedata_end = sidedata_end
2745 2768 self._docket.write(transaction, stripping=True)
2746 2769
2747 2770 # then reset internal state in memory to forget those revisions
2748 2771 self._revisioncache = None
2749 2772 self._chaininfocache = util.lrucachedict(500)
2750 2773 self._segmentfile.clear_cache()
2751 2774 self._segmentfile_sidedata.clear_cache()
2752 2775
2753 2776 del self.index[rev:-1]
2754 2777
2755 2778 def checksize(self):
2756 2779 """Check size of index and data files
2757 2780
2758 2781 return a (dd, di) tuple.
2759 2782 - dd: extra bytes for the "data" file
2760 2783 - di: extra bytes for the "index" file
2761 2784
2762 2785 A healthy revlog will return (0, 0).
2763 2786 """
2764 2787 expected = 0
2765 2788 if len(self):
2766 2789 expected = max(0, self.end(len(self) - 1))
2767 2790
2768 2791 try:
2769 2792 with self._datafp() as f:
2770 2793 f.seek(0, io.SEEK_END)
2771 2794 actual = f.tell()
2772 2795 dd = actual - expected
2773 2796 except IOError as inst:
2774 2797 if inst.errno != errno.ENOENT:
2775 2798 raise
2776 2799 dd = 0
2777 2800
2778 2801 try:
2779 2802 f = self.opener(self._indexfile)
2780 2803 f.seek(0, io.SEEK_END)
2781 2804 actual = f.tell()
2782 2805 f.close()
2783 2806 s = self.index.entry_size
2784 2807 i = max(0, actual // s)
2785 2808 di = actual - (i * s)
2786 2809 if self._inline:
2787 2810 databytes = 0
2788 2811 for r in self:
2789 2812 databytes += max(0, self.length(r))
2790 2813 dd = 0
2791 2814 di = actual - len(self) * s - databytes
2792 2815 except IOError as inst:
2793 2816 if inst.errno != errno.ENOENT:
2794 2817 raise
2795 2818 di = 0
2796 2819
2797 2820 return (dd, di)
2798 2821
2799 2822 def files(self):
2800 2823 res = [self._indexfile]
2801 2824 if self._docket_file is None:
2802 2825 if not self._inline:
2803 2826 res.append(self._datafile)
2804 2827 else:
2805 2828 res.append(self._docket_file)
2806 2829 res.extend(self._docket.old_index_filepaths(include_empty=False))
2807 2830 if self._docket.data_end:
2808 2831 res.append(self._datafile)
2809 2832 res.extend(self._docket.old_data_filepaths(include_empty=False))
2810 2833 if self._docket.sidedata_end:
2811 2834 res.append(self._sidedatafile)
2812 2835 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2813 2836 return res
2814 2837
2815 2838 def emitrevisions(
2816 2839 self,
2817 2840 nodes,
2818 2841 nodesorder=None,
2819 2842 revisiondata=False,
2820 2843 assumehaveparentrevisions=False,
2821 2844 deltamode=repository.CG_DELTAMODE_STD,
2822 2845 sidedata_helpers=None,
2823 2846 ):
2824 2847 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2825 2848 raise error.ProgrammingError(
2826 2849 b'unhandled value for nodesorder: %s' % nodesorder
2827 2850 )
2828 2851
2829 2852 if nodesorder is None and not self._generaldelta:
2830 2853 nodesorder = b'storage'
2831 2854
2832 2855 if (
2833 2856 not self._storedeltachains
2834 2857 and deltamode != repository.CG_DELTAMODE_PREV
2835 2858 ):
2836 2859 deltamode = repository.CG_DELTAMODE_FULL
2837 2860
2838 2861 return storageutil.emitrevisions(
2839 2862 self,
2840 2863 nodes,
2841 2864 nodesorder,
2842 2865 revlogrevisiondelta,
2843 2866 deltaparentfn=self.deltaparent,
2844 2867 candeltafn=self.candelta,
2845 2868 rawsizefn=self.rawsize,
2846 2869 revdifffn=self.revdiff,
2847 2870 flagsfn=self.flags,
2848 2871 deltamode=deltamode,
2849 2872 revisiondata=revisiondata,
2850 2873 assumehaveparentrevisions=assumehaveparentrevisions,
2851 2874 sidedata_helpers=sidedata_helpers,
2852 2875 )
2853 2876
2854 2877 DELTAREUSEALWAYS = b'always'
2855 2878 DELTAREUSESAMEREVS = b'samerevs'
2856 2879 DELTAREUSENEVER = b'never'
2857 2880
2858 2881 DELTAREUSEFULLADD = b'fulladd'
2859 2882
2860 2883 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2861 2884
2862 2885 def clone(
2863 2886 self,
2864 2887 tr,
2865 2888 destrevlog,
2866 2889 addrevisioncb=None,
2867 2890 deltareuse=DELTAREUSESAMEREVS,
2868 2891 forcedeltabothparents=None,
2869 2892 sidedata_helpers=None,
2870 2893 ):
2871 2894 """Copy this revlog to another, possibly with format changes.
2872 2895
2873 2896 The destination revlog will contain the same revisions and nodes.
2874 2897 However, it may not be bit-for-bit identical due to e.g. delta encoding
2875 2898 differences.
2876 2899
2877 2900 The ``deltareuse`` argument control how deltas from the existing revlog
2878 2901 are preserved in the destination revlog. The argument can have the
2879 2902 following values:
2880 2903
2881 2904 DELTAREUSEALWAYS
2882 2905 Deltas will always be reused (if possible), even if the destination
2883 2906 revlog would not select the same revisions for the delta. This is the
2884 2907 fastest mode of operation.
2885 2908 DELTAREUSESAMEREVS
2886 2909 Deltas will be reused if the destination revlog would pick the same
2887 2910 revisions for the delta. This mode strikes a balance between speed
2888 2911 and optimization.
2889 2912 DELTAREUSENEVER
2890 2913 Deltas will never be reused. This is the slowest mode of execution.
2891 2914 This mode can be used to recompute deltas (e.g. if the diff/delta
2892 2915 algorithm changes).
2893 2916 DELTAREUSEFULLADD
2894 2917 Revision will be re-added as if their were new content. This is
2895 2918 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2896 2919 eg: large file detection and handling.
2897 2920
2898 2921 Delta computation can be slow, so the choice of delta reuse policy can
2899 2922 significantly affect run time.
2900 2923
2901 2924 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2902 2925 two extremes. Deltas will be reused if they are appropriate. But if the
2903 2926 delta could choose a better revision, it will do so. This means if you
2904 2927 are converting a non-generaldelta revlog to a generaldelta revlog,
2905 2928 deltas will be recomputed if the delta's parent isn't a parent of the
2906 2929 revision.
2907 2930
2908 2931 In addition to the delta policy, the ``forcedeltabothparents``
2909 2932 argument controls whether to force compute deltas against both parents
2910 2933 for merges. By default, the current default is used.
2911 2934
2912 2935 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2913 2936 `sidedata_helpers`.
2914 2937 """
2915 2938 if deltareuse not in self.DELTAREUSEALL:
2916 2939 raise ValueError(
2917 2940 _(b'value for deltareuse invalid: %s') % deltareuse
2918 2941 )
2919 2942
2920 2943 if len(destrevlog):
2921 2944 raise ValueError(_(b'destination revlog is not empty'))
2922 2945
2923 2946 if getattr(self, 'filteredrevs', None):
2924 2947 raise ValueError(_(b'source revlog has filtered revisions'))
2925 2948 if getattr(destrevlog, 'filteredrevs', None):
2926 2949 raise ValueError(_(b'destination revlog has filtered revisions'))
2927 2950
2928 2951 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2929 2952 # if possible.
2930 2953 oldlazydelta = destrevlog._lazydelta
2931 2954 oldlazydeltabase = destrevlog._lazydeltabase
2932 2955 oldamd = destrevlog._deltabothparents
2933 2956
2934 2957 try:
2935 2958 if deltareuse == self.DELTAREUSEALWAYS:
2936 2959 destrevlog._lazydeltabase = True
2937 2960 destrevlog._lazydelta = True
2938 2961 elif deltareuse == self.DELTAREUSESAMEREVS:
2939 2962 destrevlog._lazydeltabase = False
2940 2963 destrevlog._lazydelta = True
2941 2964 elif deltareuse == self.DELTAREUSENEVER:
2942 2965 destrevlog._lazydeltabase = False
2943 2966 destrevlog._lazydelta = False
2944 2967
2945 2968 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2946 2969
2947 2970 self._clone(
2948 2971 tr,
2949 2972 destrevlog,
2950 2973 addrevisioncb,
2951 2974 deltareuse,
2952 2975 forcedeltabothparents,
2953 2976 sidedata_helpers,
2954 2977 )
2955 2978
2956 2979 finally:
2957 2980 destrevlog._lazydelta = oldlazydelta
2958 2981 destrevlog._lazydeltabase = oldlazydeltabase
2959 2982 destrevlog._deltabothparents = oldamd
2960 2983
2961 2984 def _clone(
2962 2985 self,
2963 2986 tr,
2964 2987 destrevlog,
2965 2988 addrevisioncb,
2966 2989 deltareuse,
2967 2990 forcedeltabothparents,
2968 2991 sidedata_helpers,
2969 2992 ):
2970 2993 """perform the core duty of `revlog.clone` after parameter processing"""
2971 2994 deltacomputer = deltautil.deltacomputer(destrevlog)
2972 2995 index = self.index
2973 2996 for rev in self:
2974 2997 entry = index[rev]
2975 2998
2976 2999 # Some classes override linkrev to take filtered revs into
2977 3000 # account. Use raw entry from index.
2978 3001 flags = entry[0] & 0xFFFF
2979 3002 linkrev = entry[4]
2980 3003 p1 = index[entry[5]][7]
2981 3004 p2 = index[entry[6]][7]
2982 3005 node = entry[7]
2983 3006
2984 3007 # (Possibly) reuse the delta from the revlog if allowed and
2985 3008 # the revlog chunk is a delta.
2986 3009 cachedelta = None
2987 3010 rawtext = None
2988 3011 if deltareuse == self.DELTAREUSEFULLADD:
2989 3012 text = self._revisiondata(rev)
2990 3013 sidedata = self.sidedata(rev)
2991 3014
2992 3015 if sidedata_helpers is not None:
2993 3016 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2994 3017 self, sidedata_helpers, sidedata, rev
2995 3018 )
2996 3019 flags = flags | new_flags[0] & ~new_flags[1]
2997 3020
2998 3021 destrevlog.addrevision(
2999 3022 text,
3000 3023 tr,
3001 3024 linkrev,
3002 3025 p1,
3003 3026 p2,
3004 3027 cachedelta=cachedelta,
3005 3028 node=node,
3006 3029 flags=flags,
3007 3030 deltacomputer=deltacomputer,
3008 3031 sidedata=sidedata,
3009 3032 )
3010 3033 else:
3011 3034 if destrevlog._lazydelta:
3012 3035 dp = self.deltaparent(rev)
3013 3036 if dp != nullrev:
3014 3037 cachedelta = (dp, bytes(self._chunk(rev)))
3015 3038
3016 3039 sidedata = None
3017 3040 if not cachedelta:
3018 3041 rawtext = self._revisiondata(rev)
3019 3042 sidedata = self.sidedata(rev)
3020 3043 if sidedata is None:
3021 3044 sidedata = self.sidedata(rev)
3022 3045
3023 3046 if sidedata_helpers is not None:
3024 3047 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3025 3048 self, sidedata_helpers, sidedata, rev
3026 3049 )
3027 3050 flags = flags | new_flags[0] & ~new_flags[1]
3028 3051
3029 3052 with destrevlog._writing(tr):
3030 3053 destrevlog._addrevision(
3031 3054 node,
3032 3055 rawtext,
3033 3056 tr,
3034 3057 linkrev,
3035 3058 p1,
3036 3059 p2,
3037 3060 flags,
3038 3061 cachedelta,
3039 3062 deltacomputer=deltacomputer,
3040 3063 sidedata=sidedata,
3041 3064 )
3042 3065
3043 3066 if addrevisioncb:
3044 3067 addrevisioncb(self, rev, node)
3045 3068
3046 3069 def censorrevision(self, tr, censornode, tombstone=b''):
3047 3070 if self._format_version == REVLOGV0:
3048 3071 raise error.RevlogError(
3049 3072 _(b'cannot censor with version %d revlogs')
3050 3073 % self._format_version
3051 3074 )
3052 3075 elif self._format_version == REVLOGV1:
3053 3076 rewrite.v1_censor(self, tr, censornode, tombstone)
3054 3077 else:
3055 3078 rewrite.v2_censor(self, tr, censornode, tombstone)
3056 3079
3057 3080 def verifyintegrity(self, state):
3058 3081 """Verifies the integrity of the revlog.
3059 3082
3060 3083 Yields ``revlogproblem`` instances describing problems that are
3061 3084 found.
3062 3085 """
3063 3086 dd, di = self.checksize()
3064 3087 if dd:
3065 3088 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3066 3089 if di:
3067 3090 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3068 3091
3069 3092 version = self._format_version
3070 3093
3071 3094 # The verifier tells us what version revlog we should be.
3072 3095 if version != state[b'expectedversion']:
3073 3096 yield revlogproblem(
3074 3097 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3075 3098 % (self.display_id, version, state[b'expectedversion'])
3076 3099 )
3077 3100
3078 3101 state[b'skipread'] = set()
3079 3102 state[b'safe_renamed'] = set()
3080 3103
3081 3104 for rev in self:
3082 3105 node = self.node(rev)
3083 3106
3084 3107 # Verify contents. 4 cases to care about:
3085 3108 #
3086 3109 # common: the most common case
3087 3110 # rename: with a rename
3088 3111 # meta: file content starts with b'\1\n', the metadata
3089 3112 # header defined in filelog.py, but without a rename
3090 3113 # ext: content stored externally
3091 3114 #
3092 3115 # More formally, their differences are shown below:
3093 3116 #
3094 3117 # | common | rename | meta | ext
3095 3118 # -------------------------------------------------------
3096 3119 # flags() | 0 | 0 | 0 | not 0
3097 3120 # renamed() | False | True | False | ?
3098 3121 # rawtext[0:2]=='\1\n'| False | True | True | ?
3099 3122 #
3100 3123 # "rawtext" means the raw text stored in revlog data, which
3101 3124 # could be retrieved by "rawdata(rev)". "text"
3102 3125 # mentioned below is "revision(rev)".
3103 3126 #
3104 3127 # There are 3 different lengths stored physically:
3105 3128 # 1. L1: rawsize, stored in revlog index
3106 3129 # 2. L2: len(rawtext), stored in revlog data
3107 3130 # 3. L3: len(text), stored in revlog data if flags==0, or
3108 3131 # possibly somewhere else if flags!=0
3109 3132 #
3110 3133 # L1 should be equal to L2. L3 could be different from them.
3111 3134 # "text" may or may not affect commit hash depending on flag
3112 3135 # processors (see flagutil.addflagprocessor).
3113 3136 #
3114 3137 # | common | rename | meta | ext
3115 3138 # -------------------------------------------------
3116 3139 # rawsize() | L1 | L1 | L1 | L1
3117 3140 # size() | L1 | L2-LM | L1(*) | L1 (?)
3118 3141 # len(rawtext) | L2 | L2 | L2 | L2
3119 3142 # len(text) | L2 | L2 | L2 | L3
3120 3143 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3121 3144 #
3122 3145 # LM: length of metadata, depending on rawtext
3123 3146 # (*): not ideal, see comment in filelog.size
3124 3147 # (?): could be "- len(meta)" if the resolved content has
3125 3148 # rename metadata
3126 3149 #
3127 3150 # Checks needed to be done:
3128 3151 # 1. length check: L1 == L2, in all cases.
3129 3152 # 2. hash check: depending on flag processor, we may need to
3130 3153 # use either "text" (external), or "rawtext" (in revlog).
3131 3154
3132 3155 try:
3133 3156 skipflags = state.get(b'skipflags', 0)
3134 3157 if skipflags:
3135 3158 skipflags &= self.flags(rev)
3136 3159
3137 3160 _verify_revision(self, skipflags, state, node)
3138 3161
3139 3162 l1 = self.rawsize(rev)
3140 3163 l2 = len(self.rawdata(node))
3141 3164
3142 3165 if l1 != l2:
3143 3166 yield revlogproblem(
3144 3167 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3145 3168 node=node,
3146 3169 )
3147 3170
3148 3171 except error.CensoredNodeError:
3149 3172 if state[b'erroroncensored']:
3150 3173 yield revlogproblem(
3151 3174 error=_(b'censored file data'), node=node
3152 3175 )
3153 3176 state[b'skipread'].add(node)
3154 3177 except Exception as e:
3155 3178 yield revlogproblem(
3156 3179 error=_(b'unpacking %s: %s')
3157 3180 % (short(node), stringutil.forcebytestr(e)),
3158 3181 node=node,
3159 3182 )
3160 3183 state[b'skipread'].add(node)
3161 3184
3162 3185 def storageinfo(
3163 3186 self,
3164 3187 exclusivefiles=False,
3165 3188 sharedfiles=False,
3166 3189 revisionscount=False,
3167 3190 trackedsize=False,
3168 3191 storedsize=False,
3169 3192 ):
3170 3193 d = {}
3171 3194
3172 3195 if exclusivefiles:
3173 3196 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3174 3197 if not self._inline:
3175 3198 d[b'exclusivefiles'].append((self.opener, self._datafile))
3176 3199
3177 3200 if sharedfiles:
3178 3201 d[b'sharedfiles'] = []
3179 3202
3180 3203 if revisionscount:
3181 3204 d[b'revisionscount'] = len(self)
3182 3205
3183 3206 if trackedsize:
3184 3207 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3185 3208
3186 3209 if storedsize:
3187 3210 d[b'storedsize'] = sum(
3188 3211 self.opener.stat(path).st_size for path in self.files()
3189 3212 )
3190 3213
3191 3214 return d
3192 3215
3193 3216 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3194 3217 if not self.hassidedata:
3195 3218 return
3196 3219 # revlog formats with sidedata support does not support inline
3197 3220 assert not self._inline
3198 3221 if not helpers[1] and not helpers[2]:
3199 3222 # Nothing to generate or remove
3200 3223 return
3201 3224
3202 3225 new_entries = []
3203 3226 # append the new sidedata
3204 3227 with self._writing(transaction):
3205 3228 ifh, dfh, sdfh = self._writinghandles
3206 3229 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3207 3230
3208 3231 current_offset = sdfh.tell()
3209 3232 for rev in range(startrev, endrev + 1):
3210 3233 entry = self.index[rev]
3211 3234 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3212 3235 store=self,
3213 3236 sidedata_helpers=helpers,
3214 3237 sidedata={},
3215 3238 rev=rev,
3216 3239 )
3217 3240
3218 3241 serialized_sidedata = sidedatautil.serialize_sidedata(
3219 3242 new_sidedata
3220 3243 )
3221 3244
3222 3245 sidedata_compression_mode = COMP_MODE_INLINE
3223 3246 if serialized_sidedata and self.hassidedata:
3224 3247 sidedata_compression_mode = COMP_MODE_PLAIN
3225 3248 h, comp_sidedata = self.compress(serialized_sidedata)
3226 3249 if (
3227 3250 h != b'u'
3228 3251 and comp_sidedata[0] != b'\0'
3229 3252 and len(comp_sidedata) < len(serialized_sidedata)
3230 3253 ):
3231 3254 assert not h
3232 3255 if (
3233 3256 comp_sidedata[0]
3234 3257 == self._docket.default_compression_header
3235 3258 ):
3236 3259 sidedata_compression_mode = COMP_MODE_DEFAULT
3237 3260 serialized_sidedata = comp_sidedata
3238 3261 else:
3239 3262 sidedata_compression_mode = COMP_MODE_INLINE
3240 3263 serialized_sidedata = comp_sidedata
3241 3264 if entry[8] != 0 or entry[9] != 0:
3242 3265 # rewriting entries that already have sidedata is not
3243 3266 # supported yet, because it introduces garbage data in the
3244 3267 # revlog.
3245 3268 msg = b"rewriting existing sidedata is not supported yet"
3246 3269 raise error.Abort(msg)
3247 3270
3248 3271 # Apply (potential) flags to add and to remove after running
3249 3272 # the sidedata helpers
3250 3273 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3251 3274 entry_update = (
3252 3275 current_offset,
3253 3276 len(serialized_sidedata),
3254 3277 new_offset_flags,
3255 3278 sidedata_compression_mode,
3256 3279 )
3257 3280
3258 3281 # the sidedata computation might have move the file cursors around
3259 3282 sdfh.seek(current_offset, os.SEEK_SET)
3260 3283 sdfh.write(serialized_sidedata)
3261 3284 new_entries.append(entry_update)
3262 3285 current_offset += len(serialized_sidedata)
3263 3286 self._docket.sidedata_end = sdfh.tell()
3264 3287
3265 3288 # rewrite the new index entries
3266 3289 ifh.seek(startrev * self.index.entry_size)
3267 3290 for i, e in enumerate(new_entries):
3268 3291 rev = startrev + i
3269 3292 self.index.replace_sidedata_info(rev, *e)
3270 3293 packed = self.index.entry_binary(rev)
3271 3294 if rev == 0 and self._docket is None:
3272 3295 header = self._format_flags | self._format_version
3273 3296 header = self.index.pack_header(header)
3274 3297 packed = header + packed
3275 3298 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now