##// END OF EJS Templates
revlog: introduce a plain compression mode...
marmoute -
r48027:b876f0bf default
parent child Browse files
Show More
@@ -1,3328 +1,3360 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 COMP_MODE_INLINE,
39 COMP_MODE_PLAIN,
39 40 FEATURES_BY_VERSION,
40 41 FLAG_GENERALDELTA,
41 42 FLAG_INLINE_DATA,
42 43 INDEX_HEADER,
43 44 REVLOGV0,
44 45 REVLOGV1,
45 46 REVLOGV1_FLAGS,
46 47 REVLOGV2,
47 48 REVLOGV2_FLAGS,
48 49 REVLOG_DEFAULT_FLAGS,
49 50 REVLOG_DEFAULT_FORMAT,
50 51 REVLOG_DEFAULT_VERSION,
51 52 SUPPORTED_FLAGS,
52 53 )
53 54 from .revlogutils.flagutil import (
54 55 REVIDX_DEFAULT_FLAGS,
55 56 REVIDX_ELLIPSIS,
56 57 REVIDX_EXTSTORED,
57 58 REVIDX_FLAGS_ORDER,
58 59 REVIDX_HASCOPIESINFO,
59 60 REVIDX_ISCENSORED,
60 61 REVIDX_RAWTEXT_CHANGING_FLAGS,
61 62 )
62 63 from .thirdparty import attr
63 64 from . import (
64 65 ancestor,
65 66 dagop,
66 67 error,
67 68 mdiff,
68 69 policy,
69 70 pycompat,
70 71 templatefilters,
71 72 util,
72 73 )
73 74 from .interfaces import (
74 75 repository,
75 76 util as interfaceutil,
76 77 )
77 78 from .revlogutils import (
78 79 deltas as deltautil,
79 80 docket as docketutil,
80 81 flagutil,
81 82 nodemap as nodemaputil,
82 83 revlogv0,
83 84 sidedata as sidedatautil,
84 85 )
85 86 from .utils import (
86 87 storageutil,
87 88 stringutil,
88 89 )
89 90
90 91 # blanked usage of all the name to prevent pyflakes constraints
91 92 # We need these name available in the module for extensions.
92 93
93 94 REVLOGV0
94 95 REVLOGV1
95 96 REVLOGV2
96 97 FLAG_INLINE_DATA
97 98 FLAG_GENERALDELTA
98 99 REVLOG_DEFAULT_FLAGS
99 100 REVLOG_DEFAULT_FORMAT
100 101 REVLOG_DEFAULT_VERSION
101 102 REVLOGV1_FLAGS
102 103 REVLOGV2_FLAGS
103 104 REVIDX_ISCENSORED
104 105 REVIDX_ELLIPSIS
105 106 REVIDX_HASCOPIESINFO
106 107 REVIDX_EXTSTORED
107 108 REVIDX_DEFAULT_FLAGS
108 109 REVIDX_FLAGS_ORDER
109 110 REVIDX_RAWTEXT_CHANGING_FLAGS
110 111
111 112 parsers = policy.importmod('parsers')
112 113 rustancestor = policy.importrust('ancestor')
113 114 rustdagop = policy.importrust('dagop')
114 115 rustrevlog = policy.importrust('revlog')
115 116
116 117 # Aliased for performance.
117 118 _zlibdecompress = zlib.decompress
118 119
119 120 # max size of revlog with inline data
120 121 _maxinline = 131072
121 122 _chunksize = 1048576
122 123
123 124 # Flag processors for REVIDX_ELLIPSIS.
124 125 def ellipsisreadprocessor(rl, text):
125 126 return text, False
126 127
127 128
128 129 def ellipsiswriteprocessor(rl, text):
129 130 return text, False
130 131
131 132
132 133 def ellipsisrawprocessor(rl, text):
133 134 return False
134 135
135 136
136 137 ellipsisprocessor = (
137 138 ellipsisreadprocessor,
138 139 ellipsiswriteprocessor,
139 140 ellipsisrawprocessor,
140 141 )
141 142
142 143
143 144 def offset_type(offset, type):
144 145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
145 146 raise ValueError(b'unknown revlog index flags')
146 147 return int(int(offset) << 16 | type)
147 148
148 149
149 150 def _verify_revision(rl, skipflags, state, node):
150 151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 152 point for extensions to influence the operation."""
152 153 if skipflags:
153 154 state[b'skipread'].add(node)
154 155 else:
155 156 # Side-effect: read content and verify hash.
156 157 rl.revision(node)
157 158
158 159
159 160 # True if a fast implementation for persistent-nodemap is available
160 161 #
161 162 # We also consider we have a "fast" implementation in "pure" python because
162 163 # people using pure don't really have performance consideration (and a
163 164 # wheelbarrow of other slowness source)
164 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 166 parsers, 'BaseIndexObject'
166 167 )
167 168
168 169
169 170 @attr.s(slots=True, frozen=True)
170 171 class _revisioninfo(object):
171 172 """Information about a revision that allows building its fulltext
172 173 node: expected hash of the revision
173 174 p1, p2: parent revs of the revision
174 175 btext: built text cache consisting of a one-element list
175 176 cachedelta: (baserev, uncompressed_delta) or None
176 177 flags: flags associated to the revision storage
177 178
178 179 One of btext[0] or cachedelta must be set.
179 180 """
180 181
181 182 node = attr.ib()
182 183 p1 = attr.ib()
183 184 p2 = attr.ib()
184 185 btext = attr.ib()
185 186 textlen = attr.ib()
186 187 cachedelta = attr.ib()
187 188 flags = attr.ib()
188 189
189 190
190 191 @interfaceutil.implementer(repository.irevisiondelta)
191 192 @attr.s(slots=True)
192 193 class revlogrevisiondelta(object):
193 194 node = attr.ib()
194 195 p1node = attr.ib()
195 196 p2node = attr.ib()
196 197 basenode = attr.ib()
197 198 flags = attr.ib()
198 199 baserevisionsize = attr.ib()
199 200 revision = attr.ib()
200 201 delta = attr.ib()
201 202 sidedata = attr.ib()
202 203 protocol_flags = attr.ib()
203 204 linknode = attr.ib(default=None)
204 205
205 206
206 207 @interfaceutil.implementer(repository.iverifyproblem)
207 208 @attr.s(frozen=True)
208 209 class revlogproblem(object):
209 210 warning = attr.ib(default=None)
210 211 error = attr.ib(default=None)
211 212 node = attr.ib(default=None)
212 213
213 214
214 215 def parse_index_v1(data, inline):
215 216 # call the C implementation to parse the index data
216 217 index, cache = parsers.parse_index2(data, inline)
217 218 return index, cache
218 219
219 220
220 221 def parse_index_v2(data, inline):
221 222 # call the C implementation to parse the index data
222 223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
223 224 return index, cache
224 225
225 226
226 227 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
227 228
228 229 def parse_index_v1_nodemap(data, inline):
229 230 index, cache = parsers.parse_index_devel_nodemap(data, inline)
230 231 return index, cache
231 232
232 233
233 234 else:
234 235 parse_index_v1_nodemap = None
235 236
236 237
237 238 def parse_index_v1_mixed(data, inline):
238 239 index, cache = parse_index_v1(data, inline)
239 240 return rustrevlog.MixedIndex(index), cache
240 241
241 242
242 243 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
243 244 # signed integer)
244 245 _maxentrysize = 0x7FFFFFFF
245 246
246 247
247 248 class revlog(object):
248 249 """
249 250 the underlying revision storage object
250 251
251 252 A revlog consists of two parts, an index and the revision data.
252 253
253 254 The index is a file with a fixed record size containing
254 255 information on each revision, including its nodeid (hash), the
255 256 nodeids of its parents, the position and offset of its data within
256 257 the data file, and the revision it's based on. Finally, each entry
257 258 contains a linkrev entry that can serve as a pointer to external
258 259 data.
259 260
260 261 The revision data itself is a linear collection of data chunks.
261 262 Each chunk represents a revision and is usually represented as a
262 263 delta against the previous chunk. To bound lookup time, runs of
263 264 deltas are limited to about 2 times the length of the original
264 265 version data. This makes retrieval of a version proportional to
265 266 its size, or O(1) relative to the number of revisions.
266 267
267 268 Both pieces of the revlog are written to in an append-only
268 269 fashion, which means we never need to rewrite a file to insert or
269 270 remove data, and can use some simple techniques to avoid the need
270 271 for locking while reading.
271 272
272 273 If checkambig, indexfile is opened with checkambig=True at
273 274 writing, to avoid file stat ambiguity.
274 275
275 276 If mmaplargeindex is True, and an mmapindexthreshold is set, the
276 277 index will be mmapped rather than read if it is larger than the
277 278 configured threshold.
278 279
279 280 If censorable is True, the revlog can have censored revisions.
280 281
281 282 If `upperboundcomp` is not None, this is the expected maximal gain from
282 283 compression for the data content.
283 284
284 285 `concurrencychecker` is an optional function that receives 3 arguments: a
285 286 file handle, a filename, and an expected position. It should check whether
286 287 the current position in the file handle is valid, and log/warn/fail (by
287 288 raising).
288 289
289 290
290 291 Internal details
291 292 ----------------
292 293
293 294 A large part of the revlog logic deals with revisions' "index entries", tuple
294 295 objects that contains the same "items" whatever the revlog version.
295 296 Different versions will have different ways of storing these items (sometimes
296 297 not having them at all), but the tuple will always be the same. New fields
297 298 are usually added at the end to avoid breaking existing code that relies
298 299 on the existing order. The field are defined as follows:
299 300
300 301 [0] offset:
301 302 The byte index of the start of revision data chunk.
302 303 That value is shifted up by 16 bits. use "offset = field >> 16" to
303 304 retrieve it.
304 305
305 306 flags:
306 307 A flag field that carries special information or changes the behavior
307 308 of the revision. (see `REVIDX_*` constants for details)
308 309 The flag field only occupies the first 16 bits of this field,
309 310 use "flags = field & 0xFFFF" to retrieve the value.
310 311
311 312 [1] compressed length:
312 313 The size, in bytes, of the chunk on disk
313 314
314 315 [2] uncompressed length:
315 316 The size, in bytes, of the full revision once reconstructed.
316 317
317 318 [3] base rev:
318 319 Either the base of the revision delta chain (without general
319 320 delta), or the base of the delta (stored in the data chunk)
320 321 with general delta.
321 322
322 323 [4] link rev:
323 324 Changelog revision number of the changeset introducing this
324 325 revision.
325 326
326 327 [5] parent 1 rev:
327 328 Revision number of the first parent
328 329
329 330 [6] parent 2 rev:
330 331 Revision number of the second parent
331 332
332 333 [7] node id:
333 334 The node id of the current revision
334 335
335 336 [8] sidedata offset:
336 337 The byte index of the start of the revision's side-data chunk.
337 338
338 339 [9] sidedata chunk length:
339 340 The size, in bytes, of the revision's side-data chunk.
340 341
341 342 [10] data compression mode:
342 343 two bits that detail the way the data chunk is compressed on disk.
343 344 (see "COMP_MODE_*" constants for details). For revlog version 0 and
344 345 1 this will always be COMP_MODE_INLINE.
345 346
346 347 """
347 348
348 349 _flagserrorclass = error.RevlogError
349 350
350 351 def __init__(
351 352 self,
352 353 opener,
353 354 target,
354 355 radix,
355 356 postfix=None, # only exist for `tmpcensored` now
356 357 checkambig=False,
357 358 mmaplargeindex=False,
358 359 censorable=False,
359 360 upperboundcomp=None,
360 361 persistentnodemap=False,
361 362 concurrencychecker=None,
362 363 trypending=False,
363 364 ):
364 365 """
365 366 create a revlog object
366 367
367 368 opener is a function that abstracts the file opening operation
368 369 and can be used to implement COW semantics or the like.
369 370
370 371 `target`: a (KIND, ID) tuple that identify the content stored in
371 372 this revlog. It help the rest of the code to understand what the revlog
372 373 is about without having to resort to heuristic and index filename
373 374 analysis. Note: that this must be reliably be set by normal code, but
374 375 that test, debug, or performance measurement code might not set this to
375 376 accurate value.
376 377 """
377 378 self.upperboundcomp = upperboundcomp
378 379
379 380 self.radix = radix
380 381
381 382 self._docket_file = None
382 383 self._indexfile = None
383 384 self._datafile = None
384 385 self._nodemap_file = None
385 386 self.postfix = postfix
386 387 self._trypending = trypending
387 388 self.opener = opener
388 389 if persistentnodemap:
389 390 self._nodemap_file = nodemaputil.get_nodemap_file(self)
390 391
391 392 assert target[0] in ALL_KINDS
392 393 assert len(target) == 2
393 394 self.target = target
394 395 # When True, indexfile is opened with checkambig=True at writing, to
395 396 # avoid file stat ambiguity.
396 397 self._checkambig = checkambig
397 398 self._mmaplargeindex = mmaplargeindex
398 399 self._censorable = censorable
399 400 # 3-tuple of (node, rev, text) for a raw revision.
400 401 self._revisioncache = None
401 402 # Maps rev to chain base rev.
402 403 self._chainbasecache = util.lrucachedict(100)
403 404 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
404 405 self._chunkcache = (0, b'')
405 406 # How much data to read and cache into the raw revlog data cache.
406 407 self._chunkcachesize = 65536
407 408 self._maxchainlen = None
408 409 self._deltabothparents = True
409 410 self.index = None
410 411 self._docket = None
411 412 self._nodemap_docket = None
412 413 # Mapping of partial identifiers to full nodes.
413 414 self._pcache = {}
414 415 # Mapping of revision integer to full node.
415 416 self._compengine = b'zlib'
416 417 self._compengineopts = {}
417 418 self._maxdeltachainspan = -1
418 419 self._withsparseread = False
419 420 self._sparserevlog = False
420 421 self.hassidedata = False
421 422 self._srdensitythreshold = 0.50
422 423 self._srmingapsize = 262144
423 424
424 425 # Make copy of flag processors so each revlog instance can support
425 426 # custom flags.
426 427 self._flagprocessors = dict(flagutil.flagprocessors)
427 428
428 429 # 2-tuple of file handles being used for active writing.
429 430 self._writinghandles = None
430 431 # prevent nesting of addgroup
431 432 self._adding_group = None
432 433
433 434 self._loadindex()
434 435
435 436 self._concurrencychecker = concurrencychecker
436 437
437 438 def _init_opts(self):
438 439 """process options (from above/config) to setup associated default revlog mode
439 440
440 441 These values might be affected when actually reading on disk information.
441 442
442 443 The relevant values are returned for use in _loadindex().
443 444
444 445 * newversionflags:
445 446 version header to use if we need to create a new revlog
446 447
447 448 * mmapindexthreshold:
448 449 minimal index size for start to use mmap
449 450
450 451 * force_nodemap:
451 452 force the usage of a "development" version of the nodemap code
452 453 """
453 454 mmapindexthreshold = None
454 455 opts = self.opener.options
455 456
456 457 if b'revlogv2' in opts:
457 458 new_header = REVLOGV2 | FLAG_INLINE_DATA
458 459 elif b'revlogv1' in opts:
459 460 new_header = REVLOGV1 | FLAG_INLINE_DATA
460 461 if b'generaldelta' in opts:
461 462 new_header |= FLAG_GENERALDELTA
462 463 elif b'revlogv0' in self.opener.options:
463 464 new_header = REVLOGV0
464 465 else:
465 466 new_header = REVLOG_DEFAULT_VERSION
466 467
467 468 if b'chunkcachesize' in opts:
468 469 self._chunkcachesize = opts[b'chunkcachesize']
469 470 if b'maxchainlen' in opts:
470 471 self._maxchainlen = opts[b'maxchainlen']
471 472 if b'deltabothparents' in opts:
472 473 self._deltabothparents = opts[b'deltabothparents']
473 474 self._lazydelta = bool(opts.get(b'lazydelta', True))
474 475 self._lazydeltabase = False
475 476 if self._lazydelta:
476 477 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
477 478 if b'compengine' in opts:
478 479 self._compengine = opts[b'compengine']
479 480 if b'zlib.level' in opts:
480 481 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
481 482 if b'zstd.level' in opts:
482 483 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
483 484 if b'maxdeltachainspan' in opts:
484 485 self._maxdeltachainspan = opts[b'maxdeltachainspan']
485 486 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
486 487 mmapindexthreshold = opts[b'mmapindexthreshold']
487 488 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
488 489 withsparseread = bool(opts.get(b'with-sparse-read', False))
489 490 # sparse-revlog forces sparse-read
490 491 self._withsparseread = self._sparserevlog or withsparseread
491 492 if b'sparse-read-density-threshold' in opts:
492 493 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
493 494 if b'sparse-read-min-gap-size' in opts:
494 495 self._srmingapsize = opts[b'sparse-read-min-gap-size']
495 496 if opts.get(b'enableellipsis'):
496 497 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
497 498
498 499 # revlog v0 doesn't have flag processors
499 500 for flag, processor in pycompat.iteritems(
500 501 opts.get(b'flagprocessors', {})
501 502 ):
502 503 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
503 504
504 505 if self._chunkcachesize <= 0:
505 506 raise error.RevlogError(
506 507 _(b'revlog chunk cache size %r is not greater than 0')
507 508 % self._chunkcachesize
508 509 )
509 510 elif self._chunkcachesize & (self._chunkcachesize - 1):
510 511 raise error.RevlogError(
511 512 _(b'revlog chunk cache size %r is not a power of 2')
512 513 % self._chunkcachesize
513 514 )
514 515 force_nodemap = opts.get(b'devel-force-nodemap', False)
515 516 return new_header, mmapindexthreshold, force_nodemap
516 517
517 518 def _get_data(self, filepath, mmap_threshold, size=None):
518 519 """return a file content with or without mmap
519 520
520 521 If the file is missing return the empty string"""
521 522 try:
522 523 with self.opener(filepath) as fp:
523 524 if mmap_threshold is not None:
524 525 file_size = self.opener.fstat(fp).st_size
525 526 if file_size >= mmap_threshold:
526 527 if size is not None:
527 528 # avoid potentiel mmap crash
528 529 size = min(file_size, size)
529 530 # TODO: should .close() to release resources without
530 531 # relying on Python GC
531 532 if size is None:
532 533 return util.buffer(util.mmapread(fp))
533 534 else:
534 535 return util.buffer(util.mmapread(fp, size))
535 536 if size is None:
536 537 return fp.read()
537 538 else:
538 539 return fp.read(size)
539 540 except IOError as inst:
540 541 if inst.errno != errno.ENOENT:
541 542 raise
542 543 return b''
543 544
544 545 def _loadindex(self):
545 546
546 547 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
547 548
548 549 if self.postfix is not None:
549 550 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
550 551 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
551 552 entry_point = b'%s.i.a' % self.radix
552 553 else:
553 554 entry_point = b'%s.i' % self.radix
554 555
555 556 entry_data = b''
556 557 self._initempty = True
557 558 entry_data = self._get_data(entry_point, mmapindexthreshold)
558 559 if len(entry_data) > 0:
559 560 header = INDEX_HEADER.unpack(entry_data[:4])[0]
560 561 self._initempty = False
561 562 else:
562 563 header = new_header
563 564
564 565 self._format_flags = header & ~0xFFFF
565 566 self._format_version = header & 0xFFFF
566 567
567 568 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
568 569 if supported_flags is None:
569 570 msg = _(b'unknown version (%d) in revlog %s')
570 571 msg %= (self._format_version, self.display_id)
571 572 raise error.RevlogError(msg)
572 573 elif self._format_flags & ~supported_flags:
573 574 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
574 575 display_flag = self._format_flags >> 16
575 576 msg %= (display_flag, self._format_version, self.display_id)
576 577 raise error.RevlogError(msg)
577 578
578 579 features = FEATURES_BY_VERSION[self._format_version]
579 580 self._inline = features[b'inline'](self._format_flags)
580 581 self._generaldelta = features[b'generaldelta'](self._format_flags)
581 582 self.hassidedata = features[b'sidedata']
582 583
583 584 if not features[b'docket']:
584 585 self._indexfile = entry_point
585 586 index_data = entry_data
586 587 else:
587 588 self._docket_file = entry_point
588 589 if self._initempty:
589 590 self._docket = docketutil.default_docket(self, header)
590 591 else:
591 592 self._docket = docketutil.parse_docket(
592 593 self, entry_data, use_pending=self._trypending
593 594 )
594 595 self._indexfile = self._docket.index_filepath()
595 596 index_data = b''
596 597 index_size = self._docket.index_end
597 598 if index_size > 0:
598 599 index_data = self._get_data(
599 600 self._indexfile, mmapindexthreshold, size=index_size
600 601 )
601 602 if len(index_data) < index_size:
602 603 msg = _(b'too few index data for %s: got %d, expected %d')
603 604 msg %= (self.display_id, len(index_data), index_size)
604 605 raise error.RevlogError(msg)
605 606
606 607 self._inline = False
607 608 # generaldelta implied by version 2 revlogs.
608 609 self._generaldelta = True
609 610 # the logic for persistent nodemap will be dealt with within the
610 611 # main docket, so disable it for now.
611 612 self._nodemap_file = None
612 613
613 614 if self.postfix is None:
614 615 self._datafile = b'%s.d' % self.radix
615 616 else:
616 617 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
617 618
618 619 self.nodeconstants = sha1nodeconstants
619 620 self.nullid = self.nodeconstants.nullid
620 621
621 622 # sparse-revlog can't be on without general-delta (issue6056)
622 623 if not self._generaldelta:
623 624 self._sparserevlog = False
624 625
625 626 self._storedeltachains = True
626 627
627 628 devel_nodemap = (
628 629 self._nodemap_file
629 630 and force_nodemap
630 631 and parse_index_v1_nodemap is not None
631 632 )
632 633
633 634 use_rust_index = False
634 635 if rustrevlog is not None:
635 636 if self._nodemap_file is not None:
636 637 use_rust_index = True
637 638 else:
638 639 use_rust_index = self.opener.options.get(b'rust.index')
639 640
640 641 self._parse_index = parse_index_v1
641 642 if self._format_version == REVLOGV0:
642 643 self._parse_index = revlogv0.parse_index_v0
643 644 elif self._format_version == REVLOGV2:
644 645 self._parse_index = parse_index_v2
645 646 elif devel_nodemap:
646 647 self._parse_index = parse_index_v1_nodemap
647 648 elif use_rust_index:
648 649 self._parse_index = parse_index_v1_mixed
649 650 try:
650 651 d = self._parse_index(index_data, self._inline)
651 652 index, _chunkcache = d
652 653 use_nodemap = (
653 654 not self._inline
654 655 and self._nodemap_file is not None
655 656 and util.safehasattr(index, 'update_nodemap_data')
656 657 )
657 658 if use_nodemap:
658 659 nodemap_data = nodemaputil.persisted_data(self)
659 660 if nodemap_data is not None:
660 661 docket = nodemap_data[0]
661 662 if (
662 663 len(d[0]) > docket.tip_rev
663 664 and d[0][docket.tip_rev][7] == docket.tip_node
664 665 ):
665 666 # no changelog tampering
666 667 self._nodemap_docket = docket
667 668 index.update_nodemap_data(*nodemap_data)
668 669 except (ValueError, IndexError):
669 670 raise error.RevlogError(
670 671 _(b"index %s is corrupted") % self.display_id
671 672 )
672 673 self.index, self._chunkcache = d
673 674 if not self._chunkcache:
674 675 self._chunkclear()
675 676 # revnum -> (chain-length, sum-delta-length)
676 677 self._chaininfocache = util.lrucachedict(500)
677 678 # revlog header -> revlog compressor
678 679 self._decompressors = {}
679 680
680 681 @util.propertycache
681 682 def revlog_kind(self):
682 683 return self.target[0]
683 684
684 685 @util.propertycache
685 686 def display_id(self):
686 687 """The public facing "ID" of the revlog that we use in message"""
687 688 # Maybe we should build a user facing representation of
688 689 # revlog.target instead of using `self.radix`
689 690 return self.radix
690 691
691 692 @util.propertycache
692 693 def _compressor(self):
693 694 engine = util.compengines[self._compengine]
694 695 return engine.revlogcompressor(self._compengineopts)
695 696
696 697 def _indexfp(self):
697 698 """file object for the revlog's index file"""
698 699 return self.opener(self._indexfile, mode=b"r")
699 700
700 701 def __index_write_fp(self):
701 702 # You should not use this directly and use `_writing` instead
702 703 try:
703 704 f = self.opener(
704 705 self._indexfile, mode=b"r+", checkambig=self._checkambig
705 706 )
706 707 if self._docket is None:
707 708 f.seek(0, os.SEEK_END)
708 709 else:
709 710 f.seek(self._docket.index_end, os.SEEK_SET)
710 711 return f
711 712 except IOError as inst:
712 713 if inst.errno != errno.ENOENT:
713 714 raise
714 715 return self.opener(
715 716 self._indexfile, mode=b"w+", checkambig=self._checkambig
716 717 )
717 718
718 719 def __index_new_fp(self):
719 720 # You should not use this unless you are upgrading from inline revlog
720 721 return self.opener(
721 722 self._indexfile,
722 723 mode=b"w",
723 724 checkambig=self._checkambig,
724 725 atomictemp=True,
725 726 )
726 727
727 728 def _datafp(self, mode=b'r'):
728 729 """file object for the revlog's data file"""
729 730 return self.opener(self._datafile, mode=mode)
730 731
731 732 @contextlib.contextmanager
732 733 def _datareadfp(self, existingfp=None):
733 734 """file object suitable to read data"""
734 735 # Use explicit file handle, if given.
735 736 if existingfp is not None:
736 737 yield existingfp
737 738
738 739 # Use a file handle being actively used for writes, if available.
739 740 # There is some danger to doing this because reads will seek the
740 741 # file. However, _writeentry() performs a SEEK_END before all writes,
741 742 # so we should be safe.
742 743 elif self._writinghandles:
743 744 if self._inline:
744 745 yield self._writinghandles[0]
745 746 else:
746 747 yield self._writinghandles[1]
747 748
748 749 # Otherwise open a new file handle.
749 750 else:
750 751 if self._inline:
751 752 func = self._indexfp
752 753 else:
753 754 func = self._datafp
754 755 with func() as fp:
755 756 yield fp
756 757
757 758 def tiprev(self):
758 759 return len(self.index) - 1
759 760
760 761 def tip(self):
761 762 return self.node(self.tiprev())
762 763
763 764 def __contains__(self, rev):
764 765 return 0 <= rev < len(self)
765 766
766 767 def __len__(self):
767 768 return len(self.index)
768 769
769 770 def __iter__(self):
770 771 return iter(pycompat.xrange(len(self)))
771 772
772 773 def revs(self, start=0, stop=None):
773 774 """iterate over all rev in this revlog (from start to stop)"""
774 775 return storageutil.iterrevs(len(self), start=start, stop=stop)
775 776
776 777 @property
777 778 def nodemap(self):
778 779 msg = (
779 780 b"revlog.nodemap is deprecated, "
780 781 b"use revlog.index.[has_node|rev|get_rev]"
781 782 )
782 783 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 784 return self.index.nodemap
784 785
785 786 @property
786 787 def _nodecache(self):
787 788 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
788 789 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 790 return self.index.nodemap
790 791
791 792 def hasnode(self, node):
792 793 try:
793 794 self.rev(node)
794 795 return True
795 796 except KeyError:
796 797 return False
797 798
798 799 def candelta(self, baserev, rev):
799 800 """whether two revisions (baserev, rev) can be delta-ed or not"""
800 801 # Disable delta if either rev requires a content-changing flag
801 802 # processor (ex. LFS). This is because such flag processor can alter
802 803 # the rawtext content that the delta will be based on, and two clients
803 804 # could have a same revlog node with different flags (i.e. different
804 805 # rawtext contents) and the delta could be incompatible.
805 806 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
806 807 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
807 808 ):
808 809 return False
809 810 return True
810 811
811 812 def update_caches(self, transaction):
812 813 if self._nodemap_file is not None:
813 814 if transaction is None:
814 815 nodemaputil.update_persistent_nodemap(self)
815 816 else:
816 817 nodemaputil.setup_persistent_nodemap(transaction, self)
817 818
818 819 def clearcaches(self):
819 820 self._revisioncache = None
820 821 self._chainbasecache.clear()
821 822 self._chunkcache = (0, b'')
822 823 self._pcache = {}
823 824 self._nodemap_docket = None
824 825 self.index.clearcaches()
825 826 # The python code is the one responsible for validating the docket, we
826 827 # end up having to refresh it here.
827 828 use_nodemap = (
828 829 not self._inline
829 830 and self._nodemap_file is not None
830 831 and util.safehasattr(self.index, 'update_nodemap_data')
831 832 )
832 833 if use_nodemap:
833 834 nodemap_data = nodemaputil.persisted_data(self)
834 835 if nodemap_data is not None:
835 836 self._nodemap_docket = nodemap_data[0]
836 837 self.index.update_nodemap_data(*nodemap_data)
837 838
838 839 def rev(self, node):
839 840 try:
840 841 return self.index.rev(node)
841 842 except TypeError:
842 843 raise
843 844 except error.RevlogError:
844 845 # parsers.c radix tree lookup failed
845 846 if (
846 847 node == self.nodeconstants.wdirid
847 848 or node in self.nodeconstants.wdirfilenodeids
848 849 ):
849 850 raise error.WdirUnsupported
850 851 raise error.LookupError(node, self.display_id, _(b'no node'))
851 852
852 853 # Accessors for index entries.
853 854
854 855 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
855 856 # are flags.
856 857 def start(self, rev):
857 858 return int(self.index[rev][0] >> 16)
858 859
859 860 def flags(self, rev):
860 861 return self.index[rev][0] & 0xFFFF
861 862
862 863 def length(self, rev):
863 864 return self.index[rev][1]
864 865
865 866 def sidedata_length(self, rev):
866 867 if not self.hassidedata:
867 868 return 0
868 869 return self.index[rev][9]
869 870
870 871 def rawsize(self, rev):
871 872 """return the length of the uncompressed text for a given revision"""
872 873 l = self.index[rev][2]
873 874 if l >= 0:
874 875 return l
875 876
876 877 t = self.rawdata(rev)
877 878 return len(t)
878 879
879 880 def size(self, rev):
880 881 """length of non-raw text (processed by a "read" flag processor)"""
881 882 # fast path: if no "read" flag processor could change the content,
882 883 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 884 flags = self.flags(rev)
884 885 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 886 return self.rawsize(rev)
886 887
887 888 return len(self.revision(rev, raw=False))
888 889
889 890 def chainbase(self, rev):
890 891 base = self._chainbasecache.get(rev)
891 892 if base is not None:
892 893 return base
893 894
894 895 index = self.index
895 896 iterrev = rev
896 897 base = index[iterrev][3]
897 898 while base != iterrev:
898 899 iterrev = base
899 900 base = index[iterrev][3]
900 901
901 902 self._chainbasecache[rev] = base
902 903 return base
903 904
904 905 def linkrev(self, rev):
905 906 return self.index[rev][4]
906 907
907 908 def parentrevs(self, rev):
908 909 try:
909 910 entry = self.index[rev]
910 911 except IndexError:
911 912 if rev == wdirrev:
912 913 raise error.WdirUnsupported
913 914 raise
914 915 if entry[5] == nullrev:
915 916 return entry[6], entry[5]
916 917 else:
917 918 return entry[5], entry[6]
918 919
919 920 # fast parentrevs(rev) where rev isn't filtered
920 921 _uncheckedparentrevs = parentrevs
921 922
922 923 def node(self, rev):
923 924 try:
924 925 return self.index[rev][7]
925 926 except IndexError:
926 927 if rev == wdirrev:
927 928 raise error.WdirUnsupported
928 929 raise
929 930
930 931 # Derived from index values.
931 932
932 933 def end(self, rev):
933 934 return self.start(rev) + self.length(rev)
934 935
935 936 def parents(self, node):
936 937 i = self.index
937 938 d = i[self.rev(node)]
938 939 # inline node() to avoid function call overhead
939 940 if d[5] == self.nullid:
940 941 return i[d[6]][7], i[d[5]][7]
941 942 else:
942 943 return i[d[5]][7], i[d[6]][7]
943 944
944 945 def chainlen(self, rev):
945 946 return self._chaininfo(rev)[0]
946 947
947 948 def _chaininfo(self, rev):
948 949 chaininfocache = self._chaininfocache
949 950 if rev in chaininfocache:
950 951 return chaininfocache[rev]
951 952 index = self.index
952 953 generaldelta = self._generaldelta
953 954 iterrev = rev
954 955 e = index[iterrev]
955 956 clen = 0
956 957 compresseddeltalen = 0
957 958 while iterrev != e[3]:
958 959 clen += 1
959 960 compresseddeltalen += e[1]
960 961 if generaldelta:
961 962 iterrev = e[3]
962 963 else:
963 964 iterrev -= 1
964 965 if iterrev in chaininfocache:
965 966 t = chaininfocache[iterrev]
966 967 clen += t[0]
967 968 compresseddeltalen += t[1]
968 969 break
969 970 e = index[iterrev]
970 971 else:
971 972 # Add text length of base since decompressing that also takes
972 973 # work. For cache hits the length is already included.
973 974 compresseddeltalen += e[1]
974 975 r = (clen, compresseddeltalen)
975 976 chaininfocache[rev] = r
976 977 return r
977 978
978 979 def _deltachain(self, rev, stoprev=None):
979 980 """Obtain the delta chain for a revision.
980 981
981 982 ``stoprev`` specifies a revision to stop at. If not specified, we
982 983 stop at the base of the chain.
983 984
984 985 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
985 986 revs in ascending order and ``stopped`` is a bool indicating whether
986 987 ``stoprev`` was hit.
987 988 """
988 989 # Try C implementation.
989 990 try:
990 991 return self.index.deltachain(rev, stoprev, self._generaldelta)
991 992 except AttributeError:
992 993 pass
993 994
994 995 chain = []
995 996
996 997 # Alias to prevent attribute lookup in tight loop.
997 998 index = self.index
998 999 generaldelta = self._generaldelta
999 1000
1000 1001 iterrev = rev
1001 1002 e = index[iterrev]
1002 1003 while iterrev != e[3] and iterrev != stoprev:
1003 1004 chain.append(iterrev)
1004 1005 if generaldelta:
1005 1006 iterrev = e[3]
1006 1007 else:
1007 1008 iterrev -= 1
1008 1009 e = index[iterrev]
1009 1010
1010 1011 if iterrev == stoprev:
1011 1012 stopped = True
1012 1013 else:
1013 1014 chain.append(iterrev)
1014 1015 stopped = False
1015 1016
1016 1017 chain.reverse()
1017 1018 return chain, stopped
1018 1019
1019 1020 def ancestors(self, revs, stoprev=0, inclusive=False):
1020 1021 """Generate the ancestors of 'revs' in reverse revision order.
1021 1022 Does not generate revs lower than stoprev.
1022 1023
1023 1024 See the documentation for ancestor.lazyancestors for more details."""
1024 1025
1025 1026 # first, make sure start revisions aren't filtered
1026 1027 revs = list(revs)
1027 1028 checkrev = self.node
1028 1029 for r in revs:
1029 1030 checkrev(r)
1030 1031 # and we're sure ancestors aren't filtered as well
1031 1032
1032 1033 if rustancestor is not None:
1033 1034 lazyancestors = rustancestor.LazyAncestors
1034 1035 arg = self.index
1035 1036 else:
1036 1037 lazyancestors = ancestor.lazyancestors
1037 1038 arg = self._uncheckedparentrevs
1038 1039 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1039 1040
1040 1041 def descendants(self, revs):
1041 1042 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1042 1043
1043 1044 def findcommonmissing(self, common=None, heads=None):
1044 1045 """Return a tuple of the ancestors of common and the ancestors of heads
1045 1046 that are not ancestors of common. In revset terminology, we return the
1046 1047 tuple:
1047 1048
1048 1049 ::common, (::heads) - (::common)
1049 1050
1050 1051 The list is sorted by revision number, meaning it is
1051 1052 topologically sorted.
1052 1053
1053 1054 'heads' and 'common' are both lists of node IDs. If heads is
1054 1055 not supplied, uses all of the revlog's heads. If common is not
1055 1056 supplied, uses nullid."""
1056 1057 if common is None:
1057 1058 common = [self.nullid]
1058 1059 if heads is None:
1059 1060 heads = self.heads()
1060 1061
1061 1062 common = [self.rev(n) for n in common]
1062 1063 heads = [self.rev(n) for n in heads]
1063 1064
1064 1065 # we want the ancestors, but inclusive
1065 1066 class lazyset(object):
1066 1067 def __init__(self, lazyvalues):
1067 1068 self.addedvalues = set()
1068 1069 self.lazyvalues = lazyvalues
1069 1070
1070 1071 def __contains__(self, value):
1071 1072 return value in self.addedvalues or value in self.lazyvalues
1072 1073
1073 1074 def __iter__(self):
1074 1075 added = self.addedvalues
1075 1076 for r in added:
1076 1077 yield r
1077 1078 for r in self.lazyvalues:
1078 1079 if not r in added:
1079 1080 yield r
1080 1081
1081 1082 def add(self, value):
1082 1083 self.addedvalues.add(value)
1083 1084
1084 1085 def update(self, values):
1085 1086 self.addedvalues.update(values)
1086 1087
1087 1088 has = lazyset(self.ancestors(common))
1088 1089 has.add(nullrev)
1089 1090 has.update(common)
1090 1091
1091 1092 # take all ancestors from heads that aren't in has
1092 1093 missing = set()
1093 1094 visit = collections.deque(r for r in heads if r not in has)
1094 1095 while visit:
1095 1096 r = visit.popleft()
1096 1097 if r in missing:
1097 1098 continue
1098 1099 else:
1099 1100 missing.add(r)
1100 1101 for p in self.parentrevs(r):
1101 1102 if p not in has:
1102 1103 visit.append(p)
1103 1104 missing = list(missing)
1104 1105 missing.sort()
1105 1106 return has, [self.node(miss) for miss in missing]
1106 1107
1107 1108 def incrementalmissingrevs(self, common=None):
1108 1109 """Return an object that can be used to incrementally compute the
1109 1110 revision numbers of the ancestors of arbitrary sets that are not
1110 1111 ancestors of common. This is an ancestor.incrementalmissingancestors
1111 1112 object.
1112 1113
1113 1114 'common' is a list of revision numbers. If common is not supplied, uses
1114 1115 nullrev.
1115 1116 """
1116 1117 if common is None:
1117 1118 common = [nullrev]
1118 1119
1119 1120 if rustancestor is not None:
1120 1121 return rustancestor.MissingAncestors(self.index, common)
1121 1122 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1122 1123
1123 1124 def findmissingrevs(self, common=None, heads=None):
1124 1125 """Return the revision numbers of the ancestors of heads that
1125 1126 are not ancestors of common.
1126 1127
1127 1128 More specifically, return a list of revision numbers corresponding to
1128 1129 nodes N such that every N satisfies the following constraints:
1129 1130
1130 1131 1. N is an ancestor of some node in 'heads'
1131 1132 2. N is not an ancestor of any node in 'common'
1132 1133
1133 1134 The list is sorted by revision number, meaning it is
1134 1135 topologically sorted.
1135 1136
1136 1137 'heads' and 'common' are both lists of revision numbers. If heads is
1137 1138 not supplied, uses all of the revlog's heads. If common is not
1138 1139 supplied, uses nullid."""
1139 1140 if common is None:
1140 1141 common = [nullrev]
1141 1142 if heads is None:
1142 1143 heads = self.headrevs()
1143 1144
1144 1145 inc = self.incrementalmissingrevs(common=common)
1145 1146 return inc.missingancestors(heads)
1146 1147
1147 1148 def findmissing(self, common=None, heads=None):
1148 1149 """Return the ancestors of heads that are not ancestors of common.
1149 1150
1150 1151 More specifically, return a list of nodes N such that every N
1151 1152 satisfies the following constraints:
1152 1153
1153 1154 1. N is an ancestor of some node in 'heads'
1154 1155 2. N is not an ancestor of any node in 'common'
1155 1156
1156 1157 The list is sorted by revision number, meaning it is
1157 1158 topologically sorted.
1158 1159
1159 1160 'heads' and 'common' are both lists of node IDs. If heads is
1160 1161 not supplied, uses all of the revlog's heads. If common is not
1161 1162 supplied, uses nullid."""
1162 1163 if common is None:
1163 1164 common = [self.nullid]
1164 1165 if heads is None:
1165 1166 heads = self.heads()
1166 1167
1167 1168 common = [self.rev(n) for n in common]
1168 1169 heads = [self.rev(n) for n in heads]
1169 1170
1170 1171 inc = self.incrementalmissingrevs(common=common)
1171 1172 return [self.node(r) for r in inc.missingancestors(heads)]
1172 1173
1173 1174 def nodesbetween(self, roots=None, heads=None):
1174 1175 """Return a topological path from 'roots' to 'heads'.
1175 1176
1176 1177 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1177 1178 topologically sorted list of all nodes N that satisfy both of
1178 1179 these constraints:
1179 1180
1180 1181 1. N is a descendant of some node in 'roots'
1181 1182 2. N is an ancestor of some node in 'heads'
1182 1183
1183 1184 Every node is considered to be both a descendant and an ancestor
1184 1185 of itself, so every reachable node in 'roots' and 'heads' will be
1185 1186 included in 'nodes'.
1186 1187
1187 1188 'outroots' is the list of reachable nodes in 'roots', i.e., the
1188 1189 subset of 'roots' that is returned in 'nodes'. Likewise,
1189 1190 'outheads' is the subset of 'heads' that is also in 'nodes'.
1190 1191
1191 1192 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1192 1193 unspecified, uses nullid as the only root. If 'heads' is
1193 1194 unspecified, uses list of all of the revlog's heads."""
1194 1195 nonodes = ([], [], [])
1195 1196 if roots is not None:
1196 1197 roots = list(roots)
1197 1198 if not roots:
1198 1199 return nonodes
1199 1200 lowestrev = min([self.rev(n) for n in roots])
1200 1201 else:
1201 1202 roots = [self.nullid] # Everybody's a descendant of nullid
1202 1203 lowestrev = nullrev
1203 1204 if (lowestrev == nullrev) and (heads is None):
1204 1205 # We want _all_ the nodes!
1205 1206 return (
1206 1207 [self.node(r) for r in self],
1207 1208 [self.nullid],
1208 1209 list(self.heads()),
1209 1210 )
1210 1211 if heads is None:
1211 1212 # All nodes are ancestors, so the latest ancestor is the last
1212 1213 # node.
1213 1214 highestrev = len(self) - 1
1214 1215 # Set ancestors to None to signal that every node is an ancestor.
1215 1216 ancestors = None
1216 1217 # Set heads to an empty dictionary for later discovery of heads
1217 1218 heads = {}
1218 1219 else:
1219 1220 heads = list(heads)
1220 1221 if not heads:
1221 1222 return nonodes
1222 1223 ancestors = set()
1223 1224 # Turn heads into a dictionary so we can remove 'fake' heads.
1224 1225 # Also, later we will be using it to filter out the heads we can't
1225 1226 # find from roots.
1226 1227 heads = dict.fromkeys(heads, False)
1227 1228 # Start at the top and keep marking parents until we're done.
1228 1229 nodestotag = set(heads)
1229 1230 # Remember where the top was so we can use it as a limit later.
1230 1231 highestrev = max([self.rev(n) for n in nodestotag])
1231 1232 while nodestotag:
1232 1233 # grab a node to tag
1233 1234 n = nodestotag.pop()
1234 1235 # Never tag nullid
1235 1236 if n == self.nullid:
1236 1237 continue
1237 1238 # A node's revision number represents its place in a
1238 1239 # topologically sorted list of nodes.
1239 1240 r = self.rev(n)
1240 1241 if r >= lowestrev:
1241 1242 if n not in ancestors:
1242 1243 # If we are possibly a descendant of one of the roots
1243 1244 # and we haven't already been marked as an ancestor
1244 1245 ancestors.add(n) # Mark as ancestor
1245 1246 # Add non-nullid parents to list of nodes to tag.
1246 1247 nodestotag.update(
1247 1248 [p for p in self.parents(n) if p != self.nullid]
1248 1249 )
1249 1250 elif n in heads: # We've seen it before, is it a fake head?
1250 1251 # So it is, real heads should not be the ancestors of
1251 1252 # any other heads.
1252 1253 heads.pop(n)
1253 1254 if not ancestors:
1254 1255 return nonodes
1255 1256 # Now that we have our set of ancestors, we want to remove any
1256 1257 # roots that are not ancestors.
1257 1258
1258 1259 # If one of the roots was nullid, everything is included anyway.
1259 1260 if lowestrev > nullrev:
1260 1261 # But, since we weren't, let's recompute the lowest rev to not
1261 1262 # include roots that aren't ancestors.
1262 1263
1263 1264 # Filter out roots that aren't ancestors of heads
1264 1265 roots = [root for root in roots if root in ancestors]
1265 1266 # Recompute the lowest revision
1266 1267 if roots:
1267 1268 lowestrev = min([self.rev(root) for root in roots])
1268 1269 else:
1269 1270 # No more roots? Return empty list
1270 1271 return nonodes
1271 1272 else:
1272 1273 # We are descending from nullid, and don't need to care about
1273 1274 # any other roots.
1274 1275 lowestrev = nullrev
1275 1276 roots = [self.nullid]
1276 1277 # Transform our roots list into a set.
1277 1278 descendants = set(roots)
1278 1279 # Also, keep the original roots so we can filter out roots that aren't
1279 1280 # 'real' roots (i.e. are descended from other roots).
1280 1281 roots = descendants.copy()
1281 1282 # Our topologically sorted list of output nodes.
1282 1283 orderedout = []
1283 1284 # Don't start at nullid since we don't want nullid in our output list,
1284 1285 # and if nullid shows up in descendants, empty parents will look like
1285 1286 # they're descendants.
1286 1287 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1287 1288 n = self.node(r)
1288 1289 isdescendant = False
1289 1290 if lowestrev == nullrev: # Everybody is a descendant of nullid
1290 1291 isdescendant = True
1291 1292 elif n in descendants:
1292 1293 # n is already a descendant
1293 1294 isdescendant = True
1294 1295 # This check only needs to be done here because all the roots
1295 1296 # will start being marked is descendants before the loop.
1296 1297 if n in roots:
1297 1298 # If n was a root, check if it's a 'real' root.
1298 1299 p = tuple(self.parents(n))
1299 1300 # If any of its parents are descendants, it's not a root.
1300 1301 if (p[0] in descendants) or (p[1] in descendants):
1301 1302 roots.remove(n)
1302 1303 else:
1303 1304 p = tuple(self.parents(n))
1304 1305 # A node is a descendant if either of its parents are
1305 1306 # descendants. (We seeded the dependents list with the roots
1306 1307 # up there, remember?)
1307 1308 if (p[0] in descendants) or (p[1] in descendants):
1308 1309 descendants.add(n)
1309 1310 isdescendant = True
1310 1311 if isdescendant and ((ancestors is None) or (n in ancestors)):
1311 1312 # Only include nodes that are both descendants and ancestors.
1312 1313 orderedout.append(n)
1313 1314 if (ancestors is not None) and (n in heads):
1314 1315 # We're trying to figure out which heads are reachable
1315 1316 # from roots.
1316 1317 # Mark this head as having been reached
1317 1318 heads[n] = True
1318 1319 elif ancestors is None:
1319 1320 # Otherwise, we're trying to discover the heads.
1320 1321 # Assume this is a head because if it isn't, the next step
1321 1322 # will eventually remove it.
1322 1323 heads[n] = True
1323 1324 # But, obviously its parents aren't.
1324 1325 for p in self.parents(n):
1325 1326 heads.pop(p, None)
1326 1327 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1327 1328 roots = list(roots)
1328 1329 assert orderedout
1329 1330 assert roots
1330 1331 assert heads
1331 1332 return (orderedout, roots, heads)
1332 1333
1333 1334 def headrevs(self, revs=None):
1334 1335 if revs is None:
1335 1336 try:
1336 1337 return self.index.headrevs()
1337 1338 except AttributeError:
1338 1339 return self._headrevs()
1339 1340 if rustdagop is not None:
1340 1341 return rustdagop.headrevs(self.index, revs)
1341 1342 return dagop.headrevs(revs, self._uncheckedparentrevs)
1342 1343
1343 1344 def computephases(self, roots):
1344 1345 return self.index.computephasesmapsets(roots)
1345 1346
1346 1347 def _headrevs(self):
1347 1348 count = len(self)
1348 1349 if not count:
1349 1350 return [nullrev]
1350 1351 # we won't iter over filtered rev so nobody is a head at start
1351 1352 ishead = [0] * (count + 1)
1352 1353 index = self.index
1353 1354 for r in self:
1354 1355 ishead[r] = 1 # I may be an head
1355 1356 e = index[r]
1356 1357 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1357 1358 return [r for r, val in enumerate(ishead) if val]
1358 1359
1359 1360 def heads(self, start=None, stop=None):
1360 1361 """return the list of all nodes that have no children
1361 1362
1362 1363 if start is specified, only heads that are descendants of
1363 1364 start will be returned
1364 1365 if stop is specified, it will consider all the revs from stop
1365 1366 as if they had no children
1366 1367 """
1367 1368 if start is None and stop is None:
1368 1369 if not len(self):
1369 1370 return [self.nullid]
1370 1371 return [self.node(r) for r in self.headrevs()]
1371 1372
1372 1373 if start is None:
1373 1374 start = nullrev
1374 1375 else:
1375 1376 start = self.rev(start)
1376 1377
1377 1378 stoprevs = {self.rev(n) for n in stop or []}
1378 1379
1379 1380 revs = dagop.headrevssubset(
1380 1381 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1381 1382 )
1382 1383
1383 1384 return [self.node(rev) for rev in revs]
1384 1385
1385 1386 def children(self, node):
1386 1387 """find the children of a given node"""
1387 1388 c = []
1388 1389 p = self.rev(node)
1389 1390 for r in self.revs(start=p + 1):
1390 1391 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1391 1392 if prevs:
1392 1393 for pr in prevs:
1393 1394 if pr == p:
1394 1395 c.append(self.node(r))
1395 1396 elif p == nullrev:
1396 1397 c.append(self.node(r))
1397 1398 return c
1398 1399
1399 1400 def commonancestorsheads(self, a, b):
1400 1401 """calculate all the heads of the common ancestors of nodes a and b"""
1401 1402 a, b = self.rev(a), self.rev(b)
1402 1403 ancs = self._commonancestorsheads(a, b)
1403 1404 return pycompat.maplist(self.node, ancs)
1404 1405
1405 1406 def _commonancestorsheads(self, *revs):
1406 1407 """calculate all the heads of the common ancestors of revs"""
1407 1408 try:
1408 1409 ancs = self.index.commonancestorsheads(*revs)
1409 1410 except (AttributeError, OverflowError): # C implementation failed
1410 1411 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1411 1412 return ancs
1412 1413
1413 1414 def isancestor(self, a, b):
1414 1415 """return True if node a is an ancestor of node b
1415 1416
1416 1417 A revision is considered an ancestor of itself."""
1417 1418 a, b = self.rev(a), self.rev(b)
1418 1419 return self.isancestorrev(a, b)
1419 1420
1420 1421 def isancestorrev(self, a, b):
1421 1422 """return True if revision a is an ancestor of revision b
1422 1423
1423 1424 A revision is considered an ancestor of itself.
1424 1425
1425 1426 The implementation of this is trivial but the use of
1426 1427 reachableroots is not."""
1427 1428 if a == nullrev:
1428 1429 return True
1429 1430 elif a == b:
1430 1431 return True
1431 1432 elif a > b:
1432 1433 return False
1433 1434 return bool(self.reachableroots(a, [b], [a], includepath=False))
1434 1435
1435 1436 def reachableroots(self, minroot, heads, roots, includepath=False):
1436 1437 """return (heads(::(<roots> and <roots>::<heads>)))
1437 1438
1438 1439 If includepath is True, return (<roots>::<heads>)."""
1439 1440 try:
1440 1441 return self.index.reachableroots2(
1441 1442 minroot, heads, roots, includepath
1442 1443 )
1443 1444 except AttributeError:
1444 1445 return dagop._reachablerootspure(
1445 1446 self.parentrevs, minroot, roots, heads, includepath
1446 1447 )
1447 1448
1448 1449 def ancestor(self, a, b):
1449 1450 """calculate the "best" common ancestor of nodes a and b"""
1450 1451
1451 1452 a, b = self.rev(a), self.rev(b)
1452 1453 try:
1453 1454 ancs = self.index.ancestors(a, b)
1454 1455 except (AttributeError, OverflowError):
1455 1456 ancs = ancestor.ancestors(self.parentrevs, a, b)
1456 1457 if ancs:
1457 1458 # choose a consistent winner when there's a tie
1458 1459 return min(map(self.node, ancs))
1459 1460 return self.nullid
1460 1461
1461 1462 def _match(self, id):
1462 1463 if isinstance(id, int):
1463 1464 # rev
1464 1465 return self.node(id)
1465 1466 if len(id) == self.nodeconstants.nodelen:
1466 1467 # possibly a binary node
1467 1468 # odds of a binary node being all hex in ASCII are 1 in 10**25
1468 1469 try:
1469 1470 node = id
1470 1471 self.rev(node) # quick search the index
1471 1472 return node
1472 1473 except error.LookupError:
1473 1474 pass # may be partial hex id
1474 1475 try:
1475 1476 # str(rev)
1476 1477 rev = int(id)
1477 1478 if b"%d" % rev != id:
1478 1479 raise ValueError
1479 1480 if rev < 0:
1480 1481 rev = len(self) + rev
1481 1482 if rev < 0 or rev >= len(self):
1482 1483 raise ValueError
1483 1484 return self.node(rev)
1484 1485 except (ValueError, OverflowError):
1485 1486 pass
1486 1487 if len(id) == 2 * self.nodeconstants.nodelen:
1487 1488 try:
1488 1489 # a full hex nodeid?
1489 1490 node = bin(id)
1490 1491 self.rev(node)
1491 1492 return node
1492 1493 except (TypeError, error.LookupError):
1493 1494 pass
1494 1495
1495 1496 def _partialmatch(self, id):
1496 1497 # we don't care wdirfilenodeids as they should be always full hash
1497 1498 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1498 1499 try:
1499 1500 partial = self.index.partialmatch(id)
1500 1501 if partial and self.hasnode(partial):
1501 1502 if maybewdir:
1502 1503 # single 'ff...' match in radix tree, ambiguous with wdir
1503 1504 raise error.RevlogError
1504 1505 return partial
1505 1506 if maybewdir:
1506 1507 # no 'ff...' match in radix tree, wdir identified
1507 1508 raise error.WdirUnsupported
1508 1509 return None
1509 1510 except error.RevlogError:
1510 1511 # parsers.c radix tree lookup gave multiple matches
1511 1512 # fast path: for unfiltered changelog, radix tree is accurate
1512 1513 if not getattr(self, 'filteredrevs', None):
1513 1514 raise error.AmbiguousPrefixLookupError(
1514 1515 id, self.display_id, _(b'ambiguous identifier')
1515 1516 )
1516 1517 # fall through to slow path that filters hidden revisions
1517 1518 except (AttributeError, ValueError):
1518 1519 # we are pure python, or key was too short to search radix tree
1519 1520 pass
1520 1521
1521 1522 if id in self._pcache:
1522 1523 return self._pcache[id]
1523 1524
1524 1525 if len(id) <= 40:
1525 1526 try:
1526 1527 # hex(node)[:...]
1527 1528 l = len(id) // 2 # grab an even number of digits
1528 1529 prefix = bin(id[: l * 2])
1529 1530 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1530 1531 nl = [
1531 1532 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1532 1533 ]
1533 1534 if self.nodeconstants.nullhex.startswith(id):
1534 1535 nl.append(self.nullid)
1535 1536 if len(nl) > 0:
1536 1537 if len(nl) == 1 and not maybewdir:
1537 1538 self._pcache[id] = nl[0]
1538 1539 return nl[0]
1539 1540 raise error.AmbiguousPrefixLookupError(
1540 1541 id, self.display_id, _(b'ambiguous identifier')
1541 1542 )
1542 1543 if maybewdir:
1543 1544 raise error.WdirUnsupported
1544 1545 return None
1545 1546 except TypeError:
1546 1547 pass
1547 1548
1548 1549 def lookup(self, id):
1549 1550 """locate a node based on:
1550 1551 - revision number or str(revision number)
1551 1552 - nodeid or subset of hex nodeid
1552 1553 """
1553 1554 n = self._match(id)
1554 1555 if n is not None:
1555 1556 return n
1556 1557 n = self._partialmatch(id)
1557 1558 if n:
1558 1559 return n
1559 1560
1560 1561 raise error.LookupError(id, self.display_id, _(b'no match found'))
1561 1562
1562 1563 def shortest(self, node, minlength=1):
1563 1564 """Find the shortest unambiguous prefix that matches node."""
1564 1565
1565 1566 def isvalid(prefix):
1566 1567 try:
1567 1568 matchednode = self._partialmatch(prefix)
1568 1569 except error.AmbiguousPrefixLookupError:
1569 1570 return False
1570 1571 except error.WdirUnsupported:
1571 1572 # single 'ff...' match
1572 1573 return True
1573 1574 if matchednode is None:
1574 1575 raise error.LookupError(node, self.display_id, _(b'no node'))
1575 1576 return True
1576 1577
1577 1578 def maybewdir(prefix):
1578 1579 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1579 1580
1580 1581 hexnode = hex(node)
1581 1582
1582 1583 def disambiguate(hexnode, minlength):
1583 1584 """Disambiguate against wdirid."""
1584 1585 for length in range(minlength, len(hexnode) + 1):
1585 1586 prefix = hexnode[:length]
1586 1587 if not maybewdir(prefix):
1587 1588 return prefix
1588 1589
1589 1590 if not getattr(self, 'filteredrevs', None):
1590 1591 try:
1591 1592 length = max(self.index.shortest(node), minlength)
1592 1593 return disambiguate(hexnode, length)
1593 1594 except error.RevlogError:
1594 1595 if node != self.nodeconstants.wdirid:
1595 1596 raise error.LookupError(
1596 1597 node, self.display_id, _(b'no node')
1597 1598 )
1598 1599 except AttributeError:
1599 1600 # Fall through to pure code
1600 1601 pass
1601 1602
1602 1603 if node == self.nodeconstants.wdirid:
1603 1604 for length in range(minlength, len(hexnode) + 1):
1604 1605 prefix = hexnode[:length]
1605 1606 if isvalid(prefix):
1606 1607 return prefix
1607 1608
1608 1609 for length in range(minlength, len(hexnode) + 1):
1609 1610 prefix = hexnode[:length]
1610 1611 if isvalid(prefix):
1611 1612 return disambiguate(hexnode, length)
1612 1613
1613 1614 def cmp(self, node, text):
1614 1615 """compare text with a given file revision
1615 1616
1616 1617 returns True if text is different than what is stored.
1617 1618 """
1618 1619 p1, p2 = self.parents(node)
1619 1620 return storageutil.hashrevisionsha1(text, p1, p2) != node
1620 1621
1621 1622 def _cachesegment(self, offset, data):
1622 1623 """Add a segment to the revlog cache.
1623 1624
1624 1625 Accepts an absolute offset and the data that is at that location.
1625 1626 """
1626 1627 o, d = self._chunkcache
1627 1628 # try to add to existing cache
1628 1629 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1629 1630 self._chunkcache = o, d + data
1630 1631 else:
1631 1632 self._chunkcache = offset, data
1632 1633
1633 1634 def _readsegment(self, offset, length, df=None):
1634 1635 """Load a segment of raw data from the revlog.
1635 1636
1636 1637 Accepts an absolute offset, length to read, and an optional existing
1637 1638 file handle to read from.
1638 1639
1639 1640 If an existing file handle is passed, it will be seeked and the
1640 1641 original seek position will NOT be restored.
1641 1642
1642 1643 Returns a str or buffer of raw byte data.
1643 1644
1644 1645 Raises if the requested number of bytes could not be read.
1645 1646 """
1646 1647 # Cache data both forward and backward around the requested
1647 1648 # data, in a fixed size window. This helps speed up operations
1648 1649 # involving reading the revlog backwards.
1649 1650 cachesize = self._chunkcachesize
1650 1651 realoffset = offset & ~(cachesize - 1)
1651 1652 reallength = (
1652 1653 (offset + length + cachesize) & ~(cachesize - 1)
1653 1654 ) - realoffset
1654 1655 with self._datareadfp(df) as df:
1655 1656 df.seek(realoffset)
1656 1657 d = df.read(reallength)
1657 1658
1658 1659 self._cachesegment(realoffset, d)
1659 1660 if offset != realoffset or reallength != length:
1660 1661 startoffset = offset - realoffset
1661 1662 if len(d) - startoffset < length:
1662 1663 raise error.RevlogError(
1663 1664 _(
1664 1665 b'partial read of revlog %s; expected %d bytes from '
1665 1666 b'offset %d, got %d'
1666 1667 )
1667 1668 % (
1668 1669 self._indexfile if self._inline else self._datafile,
1669 1670 length,
1670 1671 offset,
1671 1672 len(d) - startoffset,
1672 1673 )
1673 1674 )
1674 1675
1675 1676 return util.buffer(d, startoffset, length)
1676 1677
1677 1678 if len(d) < length:
1678 1679 raise error.RevlogError(
1679 1680 _(
1680 1681 b'partial read of revlog %s; expected %d bytes from offset '
1681 1682 b'%d, got %d'
1682 1683 )
1683 1684 % (
1684 1685 self._indexfile if self._inline else self._datafile,
1685 1686 length,
1686 1687 offset,
1687 1688 len(d),
1688 1689 )
1689 1690 )
1690 1691
1691 1692 return d
1692 1693
1693 1694 def _getsegment(self, offset, length, df=None):
1694 1695 """Obtain a segment of raw data from the revlog.
1695 1696
1696 1697 Accepts an absolute offset, length of bytes to obtain, and an
1697 1698 optional file handle to the already-opened revlog. If the file
1698 1699 handle is used, it's original seek position will not be preserved.
1699 1700
1700 1701 Requests for data may be returned from a cache.
1701 1702
1702 1703 Returns a str or a buffer instance of raw byte data.
1703 1704 """
1704 1705 o, d = self._chunkcache
1705 1706 l = len(d)
1706 1707
1707 1708 # is it in the cache?
1708 1709 cachestart = offset - o
1709 1710 cacheend = cachestart + length
1710 1711 if cachestart >= 0 and cacheend <= l:
1711 1712 if cachestart == 0 and cacheend == l:
1712 1713 return d # avoid a copy
1713 1714 return util.buffer(d, cachestart, cacheend - cachestart)
1714 1715
1715 1716 return self._readsegment(offset, length, df=df)
1716 1717
1717 1718 def _getsegmentforrevs(self, startrev, endrev, df=None):
1718 1719 """Obtain a segment of raw data corresponding to a range of revisions.
1719 1720
1720 1721 Accepts the start and end revisions and an optional already-open
1721 1722 file handle to be used for reading. If the file handle is read, its
1722 1723 seek position will not be preserved.
1723 1724
1724 1725 Requests for data may be satisfied by a cache.
1725 1726
1726 1727 Returns a 2-tuple of (offset, data) for the requested range of
1727 1728 revisions. Offset is the integer offset from the beginning of the
1728 1729 revlog and data is a str or buffer of the raw byte data.
1729 1730
1730 1731 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1731 1732 to determine where each revision's data begins and ends.
1732 1733 """
1733 1734 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1734 1735 # (functions are expensive).
1735 1736 index = self.index
1736 1737 istart = index[startrev]
1737 1738 start = int(istart[0] >> 16)
1738 1739 if startrev == endrev:
1739 1740 end = start + istart[1]
1740 1741 else:
1741 1742 iend = index[endrev]
1742 1743 end = int(iend[0] >> 16) + iend[1]
1743 1744
1744 1745 if self._inline:
1745 1746 start += (startrev + 1) * self.index.entry_size
1746 1747 end += (endrev + 1) * self.index.entry_size
1747 1748 length = end - start
1748 1749
1749 1750 return start, self._getsegment(start, length, df=df)
1750 1751
1751 1752 def _chunk(self, rev, df=None):
1752 1753 """Obtain a single decompressed chunk for a revision.
1753 1754
1754 1755 Accepts an integer revision and an optional already-open file handle
1755 1756 to be used for reading. If used, the seek position of the file will not
1756 1757 be preserved.
1757 1758
1758 1759 Returns a str holding uncompressed data for the requested revision.
1759 1760 """
1760 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1761 compression_mode = self.index[rev][10]
1762 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1763 if compression_mode == COMP_MODE_PLAIN:
1764 return data
1765 elif compression_mode == COMP_MODE_INLINE:
1766 return self.decompress(data)
1767 else:
1768 msg = 'unknown compression mode %d'
1769 msg %= compression_mode
1770 raise error.RevlogError(msg)
1761 1771
1762 1772 def _chunks(self, revs, df=None, targetsize=None):
1763 1773 """Obtain decompressed chunks for the specified revisions.
1764 1774
1765 1775 Accepts an iterable of numeric revisions that are assumed to be in
1766 1776 ascending order. Also accepts an optional already-open file handle
1767 1777 to be used for reading. If used, the seek position of the file will
1768 1778 not be preserved.
1769 1779
1770 1780 This function is similar to calling ``self._chunk()`` multiple times,
1771 1781 but is faster.
1772 1782
1773 1783 Returns a list with decompressed data for each requested revision.
1774 1784 """
1775 1785 if not revs:
1776 1786 return []
1777 1787 start = self.start
1778 1788 length = self.length
1779 1789 inline = self._inline
1780 1790 iosize = self.index.entry_size
1781 1791 buffer = util.buffer
1782 1792
1783 1793 l = []
1784 1794 ladd = l.append
1785 1795
1786 1796 if not self._withsparseread:
1787 1797 slicedchunks = (revs,)
1788 1798 else:
1789 1799 slicedchunks = deltautil.slicechunk(
1790 1800 self, revs, targetsize=targetsize
1791 1801 )
1792 1802
1793 1803 for revschunk in slicedchunks:
1794 1804 firstrev = revschunk[0]
1795 1805 # Skip trailing revisions with empty diff
1796 1806 for lastrev in revschunk[::-1]:
1797 1807 if length(lastrev) != 0:
1798 1808 break
1799 1809
1800 1810 try:
1801 1811 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1802 1812 except OverflowError:
1803 1813 # issue4215 - we can't cache a run of chunks greater than
1804 1814 # 2G on Windows
1805 1815 return [self._chunk(rev, df=df) for rev in revschunk]
1806 1816
1807 1817 decomp = self.decompress
1808 1818 for rev in revschunk:
1809 1819 chunkstart = start(rev)
1810 1820 if inline:
1811 1821 chunkstart += (rev + 1) * iosize
1812 1822 chunklength = length(rev)
1823 comp_mode = self.index[rev][10]
1813 1824 c = buffer(data, chunkstart - offset, chunklength)
1814 ladd(decomp(c))
1825 if comp_mode == COMP_MODE_PLAIN:
1826 ladd(c)
1827 elif comp_mode == COMP_MODE_INLINE:
1828 ladd(decomp(c))
1829 else:
1830 msg = 'unknown compression mode %d'
1831 msg %= comp_mode
1832 raise error.RevlogError(msg)
1815 1833
1816 1834 return l
1817 1835
1818 1836 def _chunkclear(self):
1819 1837 """Clear the raw chunk cache."""
1820 1838 self._chunkcache = (0, b'')
1821 1839
1822 1840 def deltaparent(self, rev):
1823 1841 """return deltaparent of the given revision"""
1824 1842 base = self.index[rev][3]
1825 1843 if base == rev:
1826 1844 return nullrev
1827 1845 elif self._generaldelta:
1828 1846 return base
1829 1847 else:
1830 1848 return rev - 1
1831 1849
1832 1850 def issnapshot(self, rev):
1833 1851 """tells whether rev is a snapshot"""
1834 1852 if not self._sparserevlog:
1835 1853 return self.deltaparent(rev) == nullrev
1836 1854 elif util.safehasattr(self.index, b'issnapshot'):
1837 1855 # directly assign the method to cache the testing and access
1838 1856 self.issnapshot = self.index.issnapshot
1839 1857 return self.issnapshot(rev)
1840 1858 if rev == nullrev:
1841 1859 return True
1842 1860 entry = self.index[rev]
1843 1861 base = entry[3]
1844 1862 if base == rev:
1845 1863 return True
1846 1864 if base == nullrev:
1847 1865 return True
1848 1866 p1 = entry[5]
1849 1867 p2 = entry[6]
1850 1868 if base == p1 or base == p2:
1851 1869 return False
1852 1870 return self.issnapshot(base)
1853 1871
1854 1872 def snapshotdepth(self, rev):
1855 1873 """number of snapshot in the chain before this one"""
1856 1874 if not self.issnapshot(rev):
1857 1875 raise error.ProgrammingError(b'revision %d not a snapshot')
1858 1876 return len(self._deltachain(rev)[0]) - 1
1859 1877
1860 1878 def revdiff(self, rev1, rev2):
1861 1879 """return or calculate a delta between two revisions
1862 1880
1863 1881 The delta calculated is in binary form and is intended to be written to
1864 1882 revlog data directly. So this function needs raw revision data.
1865 1883 """
1866 1884 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1867 1885 return bytes(self._chunk(rev2))
1868 1886
1869 1887 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1870 1888
1871 1889 def _processflags(self, text, flags, operation, raw=False):
1872 1890 """deprecated entry point to access flag processors"""
1873 1891 msg = b'_processflag(...) use the specialized variant'
1874 1892 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1875 1893 if raw:
1876 1894 return text, flagutil.processflagsraw(self, text, flags)
1877 1895 elif operation == b'read':
1878 1896 return flagutil.processflagsread(self, text, flags)
1879 1897 else: # write operation
1880 1898 return flagutil.processflagswrite(self, text, flags)
1881 1899
1882 1900 def revision(self, nodeorrev, _df=None, raw=False):
1883 1901 """return an uncompressed revision of a given node or revision
1884 1902 number.
1885 1903
1886 1904 _df - an existing file handle to read from. (internal-only)
1887 1905 raw - an optional argument specifying if the revision data is to be
1888 1906 treated as raw data when applying flag transforms. 'raw' should be set
1889 1907 to True when generating changegroups or in debug commands.
1890 1908 """
1891 1909 if raw:
1892 1910 msg = (
1893 1911 b'revlog.revision(..., raw=True) is deprecated, '
1894 1912 b'use revlog.rawdata(...)'
1895 1913 )
1896 1914 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1897 1915 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1898 1916
1899 1917 def sidedata(self, nodeorrev, _df=None):
1900 1918 """a map of extra data related to the changeset but not part of the hash
1901 1919
1902 1920 This function currently return a dictionary. However, more advanced
1903 1921 mapping object will likely be used in the future for a more
1904 1922 efficient/lazy code.
1905 1923 """
1906 1924 return self._revisiondata(nodeorrev, _df)[1]
1907 1925
1908 1926 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1909 1927 # deal with <nodeorrev> argument type
1910 1928 if isinstance(nodeorrev, int):
1911 1929 rev = nodeorrev
1912 1930 node = self.node(rev)
1913 1931 else:
1914 1932 node = nodeorrev
1915 1933 rev = None
1916 1934
1917 1935 # fast path the special `nullid` rev
1918 1936 if node == self.nullid:
1919 1937 return b"", {}
1920 1938
1921 1939 # ``rawtext`` is the text as stored inside the revlog. Might be the
1922 1940 # revision or might need to be processed to retrieve the revision.
1923 1941 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1924 1942
1925 1943 if self.hassidedata:
1926 1944 if rev is None:
1927 1945 rev = self.rev(node)
1928 1946 sidedata = self._sidedata(rev)
1929 1947 else:
1930 1948 sidedata = {}
1931 1949
1932 1950 if raw and validated:
1933 1951 # if we don't want to process the raw text and that raw
1934 1952 # text is cached, we can exit early.
1935 1953 return rawtext, sidedata
1936 1954 if rev is None:
1937 1955 rev = self.rev(node)
1938 1956 # the revlog's flag for this revision
1939 1957 # (usually alter its state or content)
1940 1958 flags = self.flags(rev)
1941 1959
1942 1960 if validated and flags == REVIDX_DEFAULT_FLAGS:
1943 1961 # no extra flags set, no flag processor runs, text = rawtext
1944 1962 return rawtext, sidedata
1945 1963
1946 1964 if raw:
1947 1965 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1948 1966 text = rawtext
1949 1967 else:
1950 1968 r = flagutil.processflagsread(self, rawtext, flags)
1951 1969 text, validatehash = r
1952 1970 if validatehash:
1953 1971 self.checkhash(text, node, rev=rev)
1954 1972 if not validated:
1955 1973 self._revisioncache = (node, rev, rawtext)
1956 1974
1957 1975 return text, sidedata
1958 1976
1959 1977 def _rawtext(self, node, rev, _df=None):
1960 1978 """return the possibly unvalidated rawtext for a revision
1961 1979
1962 1980 returns (rev, rawtext, validated)
1963 1981 """
1964 1982
1965 1983 # revision in the cache (could be useful to apply delta)
1966 1984 cachedrev = None
1967 1985 # An intermediate text to apply deltas to
1968 1986 basetext = None
1969 1987
1970 1988 # Check if we have the entry in cache
1971 1989 # The cache entry looks like (node, rev, rawtext)
1972 1990 if self._revisioncache:
1973 1991 if self._revisioncache[0] == node:
1974 1992 return (rev, self._revisioncache[2], True)
1975 1993 cachedrev = self._revisioncache[1]
1976 1994
1977 1995 if rev is None:
1978 1996 rev = self.rev(node)
1979 1997
1980 1998 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1981 1999 if stopped:
1982 2000 basetext = self._revisioncache[2]
1983 2001
1984 2002 # drop cache to save memory, the caller is expected to
1985 2003 # update self._revisioncache after validating the text
1986 2004 self._revisioncache = None
1987 2005
1988 2006 targetsize = None
1989 2007 rawsize = self.index[rev][2]
1990 2008 if 0 <= rawsize:
1991 2009 targetsize = 4 * rawsize
1992 2010
1993 2011 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1994 2012 if basetext is None:
1995 2013 basetext = bytes(bins[0])
1996 2014 bins = bins[1:]
1997 2015
1998 2016 rawtext = mdiff.patches(basetext, bins)
1999 2017 del basetext # let us have a chance to free memory early
2000 2018 return (rev, rawtext, False)
2001 2019
2002 2020 def _sidedata(self, rev):
2003 2021 """Return the sidedata for a given revision number."""
2004 2022 index_entry = self.index[rev]
2005 2023 sidedata_offset = index_entry[8]
2006 2024 sidedata_size = index_entry[9]
2007 2025
2008 2026 if self._inline:
2009 2027 sidedata_offset += self.index.entry_size * (1 + rev)
2010 2028 if sidedata_size == 0:
2011 2029 return {}
2012 2030
2013 2031 segment = self._getsegment(sidedata_offset, sidedata_size)
2014 2032 sidedata = sidedatautil.deserialize_sidedata(segment)
2015 2033 return sidedata
2016 2034
2017 2035 def rawdata(self, nodeorrev, _df=None):
2018 2036 """return an uncompressed raw data of a given node or revision number.
2019 2037
2020 2038 _df - an existing file handle to read from. (internal-only)
2021 2039 """
2022 2040 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2023 2041
2024 2042 def hash(self, text, p1, p2):
2025 2043 """Compute a node hash.
2026 2044
2027 2045 Available as a function so that subclasses can replace the hash
2028 2046 as needed.
2029 2047 """
2030 2048 return storageutil.hashrevisionsha1(text, p1, p2)
2031 2049
2032 2050 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2033 2051 """Check node hash integrity.
2034 2052
2035 2053 Available as a function so that subclasses can extend hash mismatch
2036 2054 behaviors as needed.
2037 2055 """
2038 2056 try:
2039 2057 if p1 is None and p2 is None:
2040 2058 p1, p2 = self.parents(node)
2041 2059 if node != self.hash(text, p1, p2):
2042 2060 # Clear the revision cache on hash failure. The revision cache
2043 2061 # only stores the raw revision and clearing the cache does have
2044 2062 # the side-effect that we won't have a cache hit when the raw
2045 2063 # revision data is accessed. But this case should be rare and
2046 2064 # it is extra work to teach the cache about the hash
2047 2065 # verification state.
2048 2066 if self._revisioncache and self._revisioncache[0] == node:
2049 2067 self._revisioncache = None
2050 2068
2051 2069 revornode = rev
2052 2070 if revornode is None:
2053 2071 revornode = templatefilters.short(hex(node))
2054 2072 raise error.RevlogError(
2055 2073 _(b"integrity check failed on %s:%s")
2056 2074 % (self.display_id, pycompat.bytestr(revornode))
2057 2075 )
2058 2076 except error.RevlogError:
2059 2077 if self._censorable and storageutil.iscensoredtext(text):
2060 2078 raise error.CensoredNodeError(self.display_id, node, text)
2061 2079 raise
2062 2080
2063 2081 def _enforceinlinesize(self, tr):
2064 2082 """Check if the revlog is too big for inline and convert if so.
2065 2083
2066 2084 This should be called after revisions are added to the revlog. If the
2067 2085 revlog has grown too large to be an inline revlog, it will convert it
2068 2086 to use multiple index and data files.
2069 2087 """
2070 2088 tiprev = len(self) - 1
2071 2089 total_size = self.start(tiprev) + self.length(tiprev)
2072 2090 if not self._inline or total_size < _maxinline:
2073 2091 return
2074 2092
2075 2093 troffset = tr.findoffset(self._indexfile)
2076 2094 if troffset is None:
2077 2095 raise error.RevlogError(
2078 2096 _(b"%s not found in the transaction") % self._indexfile
2079 2097 )
2080 2098 trindex = 0
2081 2099 tr.add(self._datafile, 0)
2082 2100
2083 2101 existing_handles = False
2084 2102 if self._writinghandles is not None:
2085 2103 existing_handles = True
2086 2104 fp = self._writinghandles[0]
2087 2105 fp.flush()
2088 2106 fp.close()
2089 2107 # We can't use the cached file handle after close(). So prevent
2090 2108 # its usage.
2091 2109 self._writinghandles = None
2092 2110
2093 2111 new_dfh = self._datafp(b'w+')
2094 2112 new_dfh.truncate(0) # drop any potentially existing data
2095 2113 try:
2096 2114 with self._indexfp() as read_ifh:
2097 2115 for r in self:
2098 2116 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2099 2117 if troffset <= self.start(r):
2100 2118 trindex = r
2101 2119 new_dfh.flush()
2102 2120
2103 2121 with self.__index_new_fp() as fp:
2104 2122 self._format_flags &= ~FLAG_INLINE_DATA
2105 2123 self._inline = False
2106 2124 for i in self:
2107 2125 e = self.index.entry_binary(i)
2108 2126 if i == 0 and self._docket is None:
2109 2127 header = self._format_flags | self._format_version
2110 2128 header = self.index.pack_header(header)
2111 2129 e = header + e
2112 2130 fp.write(e)
2113 2131 if self._docket is not None:
2114 2132 self._docket.index_end = fp.tell()
2115 2133 # the temp file replace the real index when we exit the context
2116 2134 # manager
2117 2135
2118 2136 tr.replace(self._indexfile, trindex * self.index.entry_size)
2119 2137 nodemaputil.setup_persistent_nodemap(tr, self)
2120 2138 self._chunkclear()
2121 2139
2122 2140 if existing_handles:
2123 2141 # switched from inline to conventional reopen the index
2124 2142 ifh = self.__index_write_fp()
2125 2143 self._writinghandles = (ifh, new_dfh)
2126 2144 new_dfh = None
2127 2145 finally:
2128 2146 if new_dfh is not None:
2129 2147 new_dfh.close()
2130 2148
2131 2149 def _nodeduplicatecallback(self, transaction, node):
2132 2150 """called when trying to add a node already stored."""
2133 2151
2134 2152 @contextlib.contextmanager
2135 2153 def _writing(self, transaction):
2136 2154 if self._trypending:
2137 2155 msg = b'try to write in a `trypending` revlog: %s'
2138 2156 msg %= self.display_id
2139 2157 raise error.ProgrammingError(msg)
2140 2158 if self._writinghandles is not None:
2141 2159 yield
2142 2160 else:
2143 2161 r = len(self)
2144 2162 dsize = 0
2145 2163 if r:
2146 2164 dsize = self.end(r - 1)
2147 2165 dfh = None
2148 2166 if not self._inline:
2149 2167 try:
2150 2168 dfh = self._datafp(b"r+")
2151 2169 if self._docket is None:
2152 2170 dfh.seek(0, os.SEEK_END)
2153 2171 else:
2154 2172 dfh.seek(self._docket.data_end, os.SEEK_SET)
2155 2173 except IOError as inst:
2156 2174 if inst.errno != errno.ENOENT:
2157 2175 raise
2158 2176 dfh = self._datafp(b"w+")
2159 2177 transaction.add(self._datafile, dsize)
2160 2178 try:
2161 2179 isize = r * self.index.entry_size
2162 2180 ifh = self.__index_write_fp()
2163 2181 if self._inline:
2164 2182 transaction.add(self._indexfile, dsize + isize)
2165 2183 else:
2166 2184 transaction.add(self._indexfile, isize)
2167 2185 try:
2168 2186 self._writinghandles = (ifh, dfh)
2169 2187 try:
2170 2188 yield
2171 2189 if self._docket is not None:
2172 2190 self._write_docket(transaction)
2173 2191 finally:
2174 2192 self._writinghandles = None
2175 2193 finally:
2176 2194 ifh.close()
2177 2195 finally:
2178 2196 if dfh is not None:
2179 2197 dfh.close()
2180 2198
2181 2199 def _write_docket(self, transaction):
2182 2200 """write the current docket on disk
2183 2201
2184 2202 Exist as a method to help changelog to implement transaction logic
2185 2203
2186 2204 We could also imagine using the same transaction logic for all revlog
2187 2205 since docket are cheap."""
2188 2206 self._docket.write(transaction)
2189 2207
2190 2208 def addrevision(
2191 2209 self,
2192 2210 text,
2193 2211 transaction,
2194 2212 link,
2195 2213 p1,
2196 2214 p2,
2197 2215 cachedelta=None,
2198 2216 node=None,
2199 2217 flags=REVIDX_DEFAULT_FLAGS,
2200 2218 deltacomputer=None,
2201 2219 sidedata=None,
2202 2220 ):
2203 2221 """add a revision to the log
2204 2222
2205 2223 text - the revision data to add
2206 2224 transaction - the transaction object used for rollback
2207 2225 link - the linkrev data to add
2208 2226 p1, p2 - the parent nodeids of the revision
2209 2227 cachedelta - an optional precomputed delta
2210 2228 node - nodeid of revision; typically node is not specified, and it is
2211 2229 computed by default as hash(text, p1, p2), however subclasses might
2212 2230 use different hashing method (and override checkhash() in such case)
2213 2231 flags - the known flags to set on the revision
2214 2232 deltacomputer - an optional deltacomputer instance shared between
2215 2233 multiple calls
2216 2234 """
2217 2235 if link == nullrev:
2218 2236 raise error.RevlogError(
2219 2237 _(b"attempted to add linkrev -1 to %s") % self.display_id
2220 2238 )
2221 2239
2222 2240 if sidedata is None:
2223 2241 sidedata = {}
2224 2242 elif sidedata and not self.hassidedata:
2225 2243 raise error.ProgrammingError(
2226 2244 _(b"trying to add sidedata to a revlog who don't support them")
2227 2245 )
2228 2246
2229 2247 if flags:
2230 2248 node = node or self.hash(text, p1, p2)
2231 2249
2232 2250 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2233 2251
2234 2252 # If the flag processor modifies the revision data, ignore any provided
2235 2253 # cachedelta.
2236 2254 if rawtext != text:
2237 2255 cachedelta = None
2238 2256
2239 2257 if len(rawtext) > _maxentrysize:
2240 2258 raise error.RevlogError(
2241 2259 _(
2242 2260 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2243 2261 )
2244 2262 % (self.display_id, len(rawtext))
2245 2263 )
2246 2264
2247 2265 node = node or self.hash(rawtext, p1, p2)
2248 2266 rev = self.index.get_rev(node)
2249 2267 if rev is not None:
2250 2268 return rev
2251 2269
2252 2270 if validatehash:
2253 2271 self.checkhash(rawtext, node, p1=p1, p2=p2)
2254 2272
2255 2273 return self.addrawrevision(
2256 2274 rawtext,
2257 2275 transaction,
2258 2276 link,
2259 2277 p1,
2260 2278 p2,
2261 2279 node,
2262 2280 flags,
2263 2281 cachedelta=cachedelta,
2264 2282 deltacomputer=deltacomputer,
2265 2283 sidedata=sidedata,
2266 2284 )
2267 2285
2268 2286 def addrawrevision(
2269 2287 self,
2270 2288 rawtext,
2271 2289 transaction,
2272 2290 link,
2273 2291 p1,
2274 2292 p2,
2275 2293 node,
2276 2294 flags,
2277 2295 cachedelta=None,
2278 2296 deltacomputer=None,
2279 2297 sidedata=None,
2280 2298 ):
2281 2299 """add a raw revision with known flags, node and parents
2282 2300 useful when reusing a revision not stored in this revlog (ex: received
2283 2301 over wire, or read from an external bundle).
2284 2302 """
2285 2303 with self._writing(transaction):
2286 2304 return self._addrevision(
2287 2305 node,
2288 2306 rawtext,
2289 2307 transaction,
2290 2308 link,
2291 2309 p1,
2292 2310 p2,
2293 2311 flags,
2294 2312 cachedelta,
2295 2313 deltacomputer=deltacomputer,
2296 2314 sidedata=sidedata,
2297 2315 )
2298 2316
2299 2317 def compress(self, data):
2300 2318 """Generate a possibly-compressed representation of data."""
2301 2319 if not data:
2302 2320 return b'', data
2303 2321
2304 2322 compressed = self._compressor.compress(data)
2305 2323
2306 2324 if compressed:
2307 2325 # The revlog compressor added the header in the returned data.
2308 2326 return b'', compressed
2309 2327
2310 2328 if data[0:1] == b'\0':
2311 2329 return b'', data
2312 2330 return b'u', data
2313 2331
2314 2332 def decompress(self, data):
2315 2333 """Decompress a revlog chunk.
2316 2334
2317 2335 The chunk is expected to begin with a header identifying the
2318 2336 format type so it can be routed to an appropriate decompressor.
2319 2337 """
2320 2338 if not data:
2321 2339 return data
2322 2340
2323 2341 # Revlogs are read much more frequently than they are written and many
2324 2342 # chunks only take microseconds to decompress, so performance is
2325 2343 # important here.
2326 2344 #
2327 2345 # We can make a few assumptions about revlogs:
2328 2346 #
2329 2347 # 1) the majority of chunks will be compressed (as opposed to inline
2330 2348 # raw data).
2331 2349 # 2) decompressing *any* data will likely by at least 10x slower than
2332 2350 # returning raw inline data.
2333 2351 # 3) we want to prioritize common and officially supported compression
2334 2352 # engines
2335 2353 #
2336 2354 # It follows that we want to optimize for "decompress compressed data
2337 2355 # when encoded with common and officially supported compression engines"
2338 2356 # case over "raw data" and "data encoded by less common or non-official
2339 2357 # compression engines." That is why we have the inline lookup first
2340 2358 # followed by the compengines lookup.
2341 2359 #
2342 2360 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2343 2361 # compressed chunks. And this matters for changelog and manifest reads.
2344 2362 t = data[0:1]
2345 2363
2346 2364 if t == b'x':
2347 2365 try:
2348 2366 return _zlibdecompress(data)
2349 2367 except zlib.error as e:
2350 2368 raise error.RevlogError(
2351 2369 _(b'revlog decompress error: %s')
2352 2370 % stringutil.forcebytestr(e)
2353 2371 )
2354 2372 # '\0' is more common than 'u' so it goes first.
2355 2373 elif t == b'\0':
2356 2374 return data
2357 2375 elif t == b'u':
2358 2376 return util.buffer(data, 1)
2359 2377
2360 2378 try:
2361 2379 compressor = self._decompressors[t]
2362 2380 except KeyError:
2363 2381 try:
2364 2382 engine = util.compengines.forrevlogheader(t)
2365 2383 compressor = engine.revlogcompressor(self._compengineopts)
2366 2384 self._decompressors[t] = compressor
2367 2385 except KeyError:
2368 2386 raise error.RevlogError(
2369 2387 _(b'unknown compression type %s') % binascii.hexlify(t)
2370 2388 )
2371 2389
2372 2390 return compressor.decompress(data)
2373 2391
2374 2392 def _addrevision(
2375 2393 self,
2376 2394 node,
2377 2395 rawtext,
2378 2396 transaction,
2379 2397 link,
2380 2398 p1,
2381 2399 p2,
2382 2400 flags,
2383 2401 cachedelta,
2384 2402 alwayscache=False,
2385 2403 deltacomputer=None,
2386 2404 sidedata=None,
2387 2405 ):
2388 2406 """internal function to add revisions to the log
2389 2407
2390 2408 see addrevision for argument descriptions.
2391 2409
2392 2410 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2393 2411
2394 2412 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2395 2413 be used.
2396 2414
2397 2415 invariants:
2398 2416 - rawtext is optional (can be None); if not set, cachedelta must be set.
2399 2417 if both are set, they must correspond to each other.
2400 2418 """
2401 2419 if node == self.nullid:
2402 2420 raise error.RevlogError(
2403 2421 _(b"%s: attempt to add null revision") % self.display_id
2404 2422 )
2405 2423 if (
2406 2424 node == self.nodeconstants.wdirid
2407 2425 or node in self.nodeconstants.wdirfilenodeids
2408 2426 ):
2409 2427 raise error.RevlogError(
2410 2428 _(b"%s: attempt to add wdir revision") % self.display_id
2411 2429 )
2412 2430 if self._writinghandles is None:
2413 2431 msg = b'adding revision outside `revlog._writing` context'
2414 2432 raise error.ProgrammingError(msg)
2415 2433
2416 2434 if self._inline:
2417 2435 fh = self._writinghandles[0]
2418 2436 else:
2419 2437 fh = self._writinghandles[1]
2420 2438
2421 2439 btext = [rawtext]
2422 2440
2423 2441 curr = len(self)
2424 2442 prev = curr - 1
2425 2443
2426 2444 offset = self._get_data_offset(prev)
2427 2445
2428 2446 if self._concurrencychecker:
2429 2447 ifh, dfh = self._writinghandles
2430 2448 if self._inline:
2431 2449 # offset is "as if" it were in the .d file, so we need to add on
2432 2450 # the size of the entry metadata.
2433 2451 self._concurrencychecker(
2434 2452 ifh, self._indexfile, offset + curr * self.index.entry_size
2435 2453 )
2436 2454 else:
2437 2455 # Entries in the .i are a consistent size.
2438 2456 self._concurrencychecker(
2439 2457 ifh, self._indexfile, curr * self.index.entry_size
2440 2458 )
2441 2459 self._concurrencychecker(dfh, self._datafile, offset)
2442 2460
2443 2461 p1r, p2r = self.rev(p1), self.rev(p2)
2444 2462
2445 2463 # full versions are inserted when the needed deltas
2446 2464 # become comparable to the uncompressed text
2447 2465 if rawtext is None:
2448 2466 # need rawtext size, before changed by flag processors, which is
2449 2467 # the non-raw size. use revlog explicitly to avoid filelog's extra
2450 2468 # logic that might remove metadata size.
2451 2469 textlen = mdiff.patchedsize(
2452 2470 revlog.size(self, cachedelta[0]), cachedelta[1]
2453 2471 )
2454 2472 else:
2455 2473 textlen = len(rawtext)
2456 2474
2457 2475 if deltacomputer is None:
2458 2476 deltacomputer = deltautil.deltacomputer(self)
2459 2477
2460 2478 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2461 2479
2462 2480 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2463 2481
2482 compression_mode = COMP_MODE_INLINE
2483 if self._docket is not None:
2484 h, d = deltainfo.data
2485 if not h and not d:
2486 # not data to store at all... declare them uncompressed
2487 compression_mode = COMP_MODE_PLAIN
2488 elif not h and d[0:1] == b'\0':
2489 compression_mode = COMP_MODE_PLAIN
2490 elif h == b'u':
2491 # we have a more efficient way to declare uncompressed
2492 h = b''
2493 compression_mode = COMP_MODE_PLAIN
2494 deltainfo = deltautil.drop_u_compression(deltainfo)
2495
2464 2496 if sidedata and self.hassidedata:
2465 2497 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2466 2498 sidedata_offset = offset + deltainfo.deltalen
2467 2499 else:
2468 2500 serialized_sidedata = b""
2469 2501 # Don't store the offset if the sidedata is empty, that way
2470 2502 # we can easily detect empty sidedata and they will be no different
2471 2503 # than ones we manually add.
2472 2504 sidedata_offset = 0
2473 2505
2474 2506 e = (
2475 2507 offset_type(offset, flags),
2476 2508 deltainfo.deltalen,
2477 2509 textlen,
2478 2510 deltainfo.base,
2479 2511 link,
2480 2512 p1r,
2481 2513 p2r,
2482 2514 node,
2483 2515 sidedata_offset,
2484 2516 len(serialized_sidedata),
2485 COMP_MODE_INLINE,
2517 compression_mode,
2486 2518 )
2487 2519
2488 2520 self.index.append(e)
2489 2521 entry = self.index.entry_binary(curr)
2490 2522 if curr == 0 and self._docket is None:
2491 2523 header = self._format_flags | self._format_version
2492 2524 header = self.index.pack_header(header)
2493 2525 entry = header + entry
2494 2526 self._writeentry(
2495 2527 transaction,
2496 2528 entry,
2497 2529 deltainfo.data,
2498 2530 link,
2499 2531 offset,
2500 2532 serialized_sidedata,
2501 2533 )
2502 2534
2503 2535 rawtext = btext[0]
2504 2536
2505 2537 if alwayscache and rawtext is None:
2506 2538 rawtext = deltacomputer.buildtext(revinfo, fh)
2507 2539
2508 2540 if type(rawtext) == bytes: # only accept immutable objects
2509 2541 self._revisioncache = (node, curr, rawtext)
2510 2542 self._chainbasecache[curr] = deltainfo.chainbase
2511 2543 return curr
2512 2544
2513 2545 def _get_data_offset(self, prev):
2514 2546 """Returns the current offset in the (in-transaction) data file.
2515 2547 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2516 2548 file to store that information: since sidedata can be rewritten to the
2517 2549 end of the data file within a transaction, you can have cases where, for
2518 2550 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2519 2551 to `n - 1`'s sidedata being written after `n`'s data.
2520 2552
2521 2553 TODO cache this in a docket file before getting out of experimental."""
2522 2554 if self._docket is None:
2523 2555 return self.end(prev)
2524 2556 else:
2525 2557 return self._docket.data_end
2526 2558
2527 2559 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2528 2560 # Files opened in a+ mode have inconsistent behavior on various
2529 2561 # platforms. Windows requires that a file positioning call be made
2530 2562 # when the file handle transitions between reads and writes. See
2531 2563 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2532 2564 # platforms, Python or the platform itself can be buggy. Some versions
2533 2565 # of Solaris have been observed to not append at the end of the file
2534 2566 # if the file was seeked to before the end. See issue4943 for more.
2535 2567 #
2536 2568 # We work around this issue by inserting a seek() before writing.
2537 2569 # Note: This is likely not necessary on Python 3. However, because
2538 2570 # the file handle is reused for reads and may be seeked there, we need
2539 2571 # to be careful before changing this.
2540 2572 if self._writinghandles is None:
2541 2573 msg = b'adding revision outside `revlog._writing` context'
2542 2574 raise error.ProgrammingError(msg)
2543 2575 ifh, dfh = self._writinghandles
2544 2576 if self._docket is None:
2545 2577 ifh.seek(0, os.SEEK_END)
2546 2578 else:
2547 2579 ifh.seek(self._docket.index_end, os.SEEK_SET)
2548 2580 if dfh:
2549 2581 if self._docket is None:
2550 2582 dfh.seek(0, os.SEEK_END)
2551 2583 else:
2552 2584 dfh.seek(self._docket.data_end, os.SEEK_SET)
2553 2585
2554 2586 curr = len(self) - 1
2555 2587 if not self._inline:
2556 2588 transaction.add(self._datafile, offset)
2557 2589 transaction.add(self._indexfile, curr * len(entry))
2558 2590 if data[0]:
2559 2591 dfh.write(data[0])
2560 2592 dfh.write(data[1])
2561 2593 if sidedata:
2562 2594 dfh.write(sidedata)
2563 2595 ifh.write(entry)
2564 2596 else:
2565 2597 offset += curr * self.index.entry_size
2566 2598 transaction.add(self._indexfile, offset)
2567 2599 ifh.write(entry)
2568 2600 ifh.write(data[0])
2569 2601 ifh.write(data[1])
2570 2602 if sidedata:
2571 2603 ifh.write(sidedata)
2572 2604 self._enforceinlinesize(transaction)
2573 2605 if self._docket is not None:
2574 2606 self._docket.index_end = self._writinghandles[0].tell()
2575 2607 self._docket.data_end = self._writinghandles[1].tell()
2576 2608
2577 2609 nodemaputil.setup_persistent_nodemap(transaction, self)
2578 2610
2579 2611 def addgroup(
2580 2612 self,
2581 2613 deltas,
2582 2614 linkmapper,
2583 2615 transaction,
2584 2616 alwayscache=False,
2585 2617 addrevisioncb=None,
2586 2618 duplicaterevisioncb=None,
2587 2619 ):
2588 2620 """
2589 2621 add a delta group
2590 2622
2591 2623 given a set of deltas, add them to the revision log. the
2592 2624 first delta is against its parent, which should be in our
2593 2625 log, the rest are against the previous delta.
2594 2626
2595 2627 If ``addrevisioncb`` is defined, it will be called with arguments of
2596 2628 this revlog and the node that was added.
2597 2629 """
2598 2630
2599 2631 if self._adding_group:
2600 2632 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2601 2633
2602 2634 self._adding_group = True
2603 2635 empty = True
2604 2636 try:
2605 2637 with self._writing(transaction):
2606 2638 deltacomputer = deltautil.deltacomputer(self)
2607 2639 # loop through our set of deltas
2608 2640 for data in deltas:
2609 2641 (
2610 2642 node,
2611 2643 p1,
2612 2644 p2,
2613 2645 linknode,
2614 2646 deltabase,
2615 2647 delta,
2616 2648 flags,
2617 2649 sidedata,
2618 2650 ) = data
2619 2651 link = linkmapper(linknode)
2620 2652 flags = flags or REVIDX_DEFAULT_FLAGS
2621 2653
2622 2654 rev = self.index.get_rev(node)
2623 2655 if rev is not None:
2624 2656 # this can happen if two branches make the same change
2625 2657 self._nodeduplicatecallback(transaction, rev)
2626 2658 if duplicaterevisioncb:
2627 2659 duplicaterevisioncb(self, rev)
2628 2660 empty = False
2629 2661 continue
2630 2662
2631 2663 for p in (p1, p2):
2632 2664 if not self.index.has_node(p):
2633 2665 raise error.LookupError(
2634 2666 p, self.radix, _(b'unknown parent')
2635 2667 )
2636 2668
2637 2669 if not self.index.has_node(deltabase):
2638 2670 raise error.LookupError(
2639 2671 deltabase, self.display_id, _(b'unknown delta base')
2640 2672 )
2641 2673
2642 2674 baserev = self.rev(deltabase)
2643 2675
2644 2676 if baserev != nullrev and self.iscensored(baserev):
2645 2677 # if base is censored, delta must be full replacement in a
2646 2678 # single patch operation
2647 2679 hlen = struct.calcsize(b">lll")
2648 2680 oldlen = self.rawsize(baserev)
2649 2681 newlen = len(delta) - hlen
2650 2682 if delta[:hlen] != mdiff.replacediffheader(
2651 2683 oldlen, newlen
2652 2684 ):
2653 2685 raise error.CensoredBaseError(
2654 2686 self.display_id, self.node(baserev)
2655 2687 )
2656 2688
2657 2689 if not flags and self._peek_iscensored(baserev, delta):
2658 2690 flags |= REVIDX_ISCENSORED
2659 2691
2660 2692 # We assume consumers of addrevisioncb will want to retrieve
2661 2693 # the added revision, which will require a call to
2662 2694 # revision(). revision() will fast path if there is a cache
2663 2695 # hit. So, we tell _addrevision() to always cache in this case.
2664 2696 # We're only using addgroup() in the context of changegroup
2665 2697 # generation so the revision data can always be handled as raw
2666 2698 # by the flagprocessor.
2667 2699 rev = self._addrevision(
2668 2700 node,
2669 2701 None,
2670 2702 transaction,
2671 2703 link,
2672 2704 p1,
2673 2705 p2,
2674 2706 flags,
2675 2707 (baserev, delta),
2676 2708 alwayscache=alwayscache,
2677 2709 deltacomputer=deltacomputer,
2678 2710 sidedata=sidedata,
2679 2711 )
2680 2712
2681 2713 if addrevisioncb:
2682 2714 addrevisioncb(self, rev)
2683 2715 empty = False
2684 2716 finally:
2685 2717 self._adding_group = False
2686 2718 return not empty
2687 2719
2688 2720 def iscensored(self, rev):
2689 2721 """Check if a file revision is censored."""
2690 2722 if not self._censorable:
2691 2723 return False
2692 2724
2693 2725 return self.flags(rev) & REVIDX_ISCENSORED
2694 2726
2695 2727 def _peek_iscensored(self, baserev, delta):
2696 2728 """Quickly check if a delta produces a censored revision."""
2697 2729 if not self._censorable:
2698 2730 return False
2699 2731
2700 2732 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2701 2733
2702 2734 def getstrippoint(self, minlink):
2703 2735 """find the minimum rev that must be stripped to strip the linkrev
2704 2736
2705 2737 Returns a tuple containing the minimum rev and a set of all revs that
2706 2738 have linkrevs that will be broken by this strip.
2707 2739 """
2708 2740 return storageutil.resolvestripinfo(
2709 2741 minlink,
2710 2742 len(self) - 1,
2711 2743 self.headrevs(),
2712 2744 self.linkrev,
2713 2745 self.parentrevs,
2714 2746 )
2715 2747
2716 2748 def strip(self, minlink, transaction):
2717 2749 """truncate the revlog on the first revision with a linkrev >= minlink
2718 2750
2719 2751 This function is called when we're stripping revision minlink and
2720 2752 its descendants from the repository.
2721 2753
2722 2754 We have to remove all revisions with linkrev >= minlink, because
2723 2755 the equivalent changelog revisions will be renumbered after the
2724 2756 strip.
2725 2757
2726 2758 So we truncate the revlog on the first of these revisions, and
2727 2759 trust that the caller has saved the revisions that shouldn't be
2728 2760 removed and that it'll re-add them after this truncation.
2729 2761 """
2730 2762 if len(self) == 0:
2731 2763 return
2732 2764
2733 2765 rev, _ = self.getstrippoint(minlink)
2734 2766 if rev == len(self):
2735 2767 return
2736 2768
2737 2769 # first truncate the files on disk
2738 2770 data_end = self.start(rev)
2739 2771 if not self._inline:
2740 2772 transaction.add(self._datafile, data_end)
2741 2773 end = rev * self.index.entry_size
2742 2774 else:
2743 2775 end = data_end + (rev * self.index.entry_size)
2744 2776
2745 2777 transaction.add(self._indexfile, end)
2746 2778 if self._docket is not None:
2747 2779 # XXX we could, leverage the docket while stripping. However it is
2748 2780 # not powerfull enough at the time of this comment
2749 2781 self._docket.index_end = end
2750 2782 self._docket.data_end = data_end
2751 2783 self._docket.write(transaction, stripping=True)
2752 2784
2753 2785 # then reset internal state in memory to forget those revisions
2754 2786 self._revisioncache = None
2755 2787 self._chaininfocache = util.lrucachedict(500)
2756 2788 self._chunkclear()
2757 2789
2758 2790 del self.index[rev:-1]
2759 2791
2760 2792 def checksize(self):
2761 2793 """Check size of index and data files
2762 2794
2763 2795 return a (dd, di) tuple.
2764 2796 - dd: extra bytes for the "data" file
2765 2797 - di: extra bytes for the "index" file
2766 2798
2767 2799 A healthy revlog will return (0, 0).
2768 2800 """
2769 2801 expected = 0
2770 2802 if len(self):
2771 2803 expected = max(0, self.end(len(self) - 1))
2772 2804
2773 2805 try:
2774 2806 with self._datafp() as f:
2775 2807 f.seek(0, io.SEEK_END)
2776 2808 actual = f.tell()
2777 2809 dd = actual - expected
2778 2810 except IOError as inst:
2779 2811 if inst.errno != errno.ENOENT:
2780 2812 raise
2781 2813 dd = 0
2782 2814
2783 2815 try:
2784 2816 f = self.opener(self._indexfile)
2785 2817 f.seek(0, io.SEEK_END)
2786 2818 actual = f.tell()
2787 2819 f.close()
2788 2820 s = self.index.entry_size
2789 2821 i = max(0, actual // s)
2790 2822 di = actual - (i * s)
2791 2823 if self._inline:
2792 2824 databytes = 0
2793 2825 for r in self:
2794 2826 databytes += max(0, self.length(r))
2795 2827 dd = 0
2796 2828 di = actual - len(self) * s - databytes
2797 2829 except IOError as inst:
2798 2830 if inst.errno != errno.ENOENT:
2799 2831 raise
2800 2832 di = 0
2801 2833
2802 2834 return (dd, di)
2803 2835
2804 2836 def files(self):
2805 2837 res = [self._indexfile]
2806 2838 if not self._inline:
2807 2839 res.append(self._datafile)
2808 2840 return res
2809 2841
2810 2842 def emitrevisions(
2811 2843 self,
2812 2844 nodes,
2813 2845 nodesorder=None,
2814 2846 revisiondata=False,
2815 2847 assumehaveparentrevisions=False,
2816 2848 deltamode=repository.CG_DELTAMODE_STD,
2817 2849 sidedata_helpers=None,
2818 2850 ):
2819 2851 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2820 2852 raise error.ProgrammingError(
2821 2853 b'unhandled value for nodesorder: %s' % nodesorder
2822 2854 )
2823 2855
2824 2856 if nodesorder is None and not self._generaldelta:
2825 2857 nodesorder = b'storage'
2826 2858
2827 2859 if (
2828 2860 not self._storedeltachains
2829 2861 and deltamode != repository.CG_DELTAMODE_PREV
2830 2862 ):
2831 2863 deltamode = repository.CG_DELTAMODE_FULL
2832 2864
2833 2865 return storageutil.emitrevisions(
2834 2866 self,
2835 2867 nodes,
2836 2868 nodesorder,
2837 2869 revlogrevisiondelta,
2838 2870 deltaparentfn=self.deltaparent,
2839 2871 candeltafn=self.candelta,
2840 2872 rawsizefn=self.rawsize,
2841 2873 revdifffn=self.revdiff,
2842 2874 flagsfn=self.flags,
2843 2875 deltamode=deltamode,
2844 2876 revisiondata=revisiondata,
2845 2877 assumehaveparentrevisions=assumehaveparentrevisions,
2846 2878 sidedata_helpers=sidedata_helpers,
2847 2879 )
2848 2880
2849 2881 DELTAREUSEALWAYS = b'always'
2850 2882 DELTAREUSESAMEREVS = b'samerevs'
2851 2883 DELTAREUSENEVER = b'never'
2852 2884
2853 2885 DELTAREUSEFULLADD = b'fulladd'
2854 2886
2855 2887 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2856 2888
2857 2889 def clone(
2858 2890 self,
2859 2891 tr,
2860 2892 destrevlog,
2861 2893 addrevisioncb=None,
2862 2894 deltareuse=DELTAREUSESAMEREVS,
2863 2895 forcedeltabothparents=None,
2864 2896 sidedata_helpers=None,
2865 2897 ):
2866 2898 """Copy this revlog to another, possibly with format changes.
2867 2899
2868 2900 The destination revlog will contain the same revisions and nodes.
2869 2901 However, it may not be bit-for-bit identical due to e.g. delta encoding
2870 2902 differences.
2871 2903
2872 2904 The ``deltareuse`` argument control how deltas from the existing revlog
2873 2905 are preserved in the destination revlog. The argument can have the
2874 2906 following values:
2875 2907
2876 2908 DELTAREUSEALWAYS
2877 2909 Deltas will always be reused (if possible), even if the destination
2878 2910 revlog would not select the same revisions for the delta. This is the
2879 2911 fastest mode of operation.
2880 2912 DELTAREUSESAMEREVS
2881 2913 Deltas will be reused if the destination revlog would pick the same
2882 2914 revisions for the delta. This mode strikes a balance between speed
2883 2915 and optimization.
2884 2916 DELTAREUSENEVER
2885 2917 Deltas will never be reused. This is the slowest mode of execution.
2886 2918 This mode can be used to recompute deltas (e.g. if the diff/delta
2887 2919 algorithm changes).
2888 2920 DELTAREUSEFULLADD
2889 2921 Revision will be re-added as if their were new content. This is
2890 2922 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2891 2923 eg: large file detection and handling.
2892 2924
2893 2925 Delta computation can be slow, so the choice of delta reuse policy can
2894 2926 significantly affect run time.
2895 2927
2896 2928 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2897 2929 two extremes. Deltas will be reused if they are appropriate. But if the
2898 2930 delta could choose a better revision, it will do so. This means if you
2899 2931 are converting a non-generaldelta revlog to a generaldelta revlog,
2900 2932 deltas will be recomputed if the delta's parent isn't a parent of the
2901 2933 revision.
2902 2934
2903 2935 In addition to the delta policy, the ``forcedeltabothparents``
2904 2936 argument controls whether to force compute deltas against both parents
2905 2937 for merges. By default, the current default is used.
2906 2938
2907 2939 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2908 2940 `sidedata_helpers`.
2909 2941 """
2910 2942 if deltareuse not in self.DELTAREUSEALL:
2911 2943 raise ValueError(
2912 2944 _(b'value for deltareuse invalid: %s') % deltareuse
2913 2945 )
2914 2946
2915 2947 if len(destrevlog):
2916 2948 raise ValueError(_(b'destination revlog is not empty'))
2917 2949
2918 2950 if getattr(self, 'filteredrevs', None):
2919 2951 raise ValueError(_(b'source revlog has filtered revisions'))
2920 2952 if getattr(destrevlog, 'filteredrevs', None):
2921 2953 raise ValueError(_(b'destination revlog has filtered revisions'))
2922 2954
2923 2955 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2924 2956 # if possible.
2925 2957 oldlazydelta = destrevlog._lazydelta
2926 2958 oldlazydeltabase = destrevlog._lazydeltabase
2927 2959 oldamd = destrevlog._deltabothparents
2928 2960
2929 2961 try:
2930 2962 if deltareuse == self.DELTAREUSEALWAYS:
2931 2963 destrevlog._lazydeltabase = True
2932 2964 destrevlog._lazydelta = True
2933 2965 elif deltareuse == self.DELTAREUSESAMEREVS:
2934 2966 destrevlog._lazydeltabase = False
2935 2967 destrevlog._lazydelta = True
2936 2968 elif deltareuse == self.DELTAREUSENEVER:
2937 2969 destrevlog._lazydeltabase = False
2938 2970 destrevlog._lazydelta = False
2939 2971
2940 2972 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2941 2973
2942 2974 self._clone(
2943 2975 tr,
2944 2976 destrevlog,
2945 2977 addrevisioncb,
2946 2978 deltareuse,
2947 2979 forcedeltabothparents,
2948 2980 sidedata_helpers,
2949 2981 )
2950 2982
2951 2983 finally:
2952 2984 destrevlog._lazydelta = oldlazydelta
2953 2985 destrevlog._lazydeltabase = oldlazydeltabase
2954 2986 destrevlog._deltabothparents = oldamd
2955 2987
2956 2988 def _clone(
2957 2989 self,
2958 2990 tr,
2959 2991 destrevlog,
2960 2992 addrevisioncb,
2961 2993 deltareuse,
2962 2994 forcedeltabothparents,
2963 2995 sidedata_helpers,
2964 2996 ):
2965 2997 """perform the core duty of `revlog.clone` after parameter processing"""
2966 2998 deltacomputer = deltautil.deltacomputer(destrevlog)
2967 2999 index = self.index
2968 3000 for rev in self:
2969 3001 entry = index[rev]
2970 3002
2971 3003 # Some classes override linkrev to take filtered revs into
2972 3004 # account. Use raw entry from index.
2973 3005 flags = entry[0] & 0xFFFF
2974 3006 linkrev = entry[4]
2975 3007 p1 = index[entry[5]][7]
2976 3008 p2 = index[entry[6]][7]
2977 3009 node = entry[7]
2978 3010
2979 3011 # (Possibly) reuse the delta from the revlog if allowed and
2980 3012 # the revlog chunk is a delta.
2981 3013 cachedelta = None
2982 3014 rawtext = None
2983 3015 if deltareuse == self.DELTAREUSEFULLADD:
2984 3016 text, sidedata = self._revisiondata(rev)
2985 3017
2986 3018 if sidedata_helpers is not None:
2987 3019 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2988 3020 self, sidedata_helpers, sidedata, rev
2989 3021 )
2990 3022 flags = flags | new_flags[0] & ~new_flags[1]
2991 3023
2992 3024 destrevlog.addrevision(
2993 3025 text,
2994 3026 tr,
2995 3027 linkrev,
2996 3028 p1,
2997 3029 p2,
2998 3030 cachedelta=cachedelta,
2999 3031 node=node,
3000 3032 flags=flags,
3001 3033 deltacomputer=deltacomputer,
3002 3034 sidedata=sidedata,
3003 3035 )
3004 3036 else:
3005 3037 if destrevlog._lazydelta:
3006 3038 dp = self.deltaparent(rev)
3007 3039 if dp != nullrev:
3008 3040 cachedelta = (dp, bytes(self._chunk(rev)))
3009 3041
3010 3042 sidedata = None
3011 3043 if not cachedelta:
3012 3044 rawtext, sidedata = self._revisiondata(rev)
3013 3045 if sidedata is None:
3014 3046 sidedata = self.sidedata(rev)
3015 3047
3016 3048 if sidedata_helpers is not None:
3017 3049 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3018 3050 self, sidedata_helpers, sidedata, rev
3019 3051 )
3020 3052 flags = flags | new_flags[0] & ~new_flags[1]
3021 3053
3022 3054 with destrevlog._writing(tr):
3023 3055 destrevlog._addrevision(
3024 3056 node,
3025 3057 rawtext,
3026 3058 tr,
3027 3059 linkrev,
3028 3060 p1,
3029 3061 p2,
3030 3062 flags,
3031 3063 cachedelta,
3032 3064 deltacomputer=deltacomputer,
3033 3065 sidedata=sidedata,
3034 3066 )
3035 3067
3036 3068 if addrevisioncb:
3037 3069 addrevisioncb(self, rev, node)
3038 3070
3039 3071 def censorrevision(self, tr, censornode, tombstone=b''):
3040 3072 if self._format_version == REVLOGV0:
3041 3073 raise error.RevlogError(
3042 3074 _(b'cannot censor with version %d revlogs')
3043 3075 % self._format_version
3044 3076 )
3045 3077
3046 3078 censorrev = self.rev(censornode)
3047 3079 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3048 3080
3049 3081 if len(tombstone) > self.rawsize(censorrev):
3050 3082 raise error.Abort(
3051 3083 _(b'censor tombstone must be no longer than censored data')
3052 3084 )
3053 3085
3054 3086 # Rewriting the revlog in place is hard. Our strategy for censoring is
3055 3087 # to create a new revlog, copy all revisions to it, then replace the
3056 3088 # revlogs on transaction close.
3057 3089 #
3058 3090 # This is a bit dangerous. We could easily have a mismatch of state.
3059 3091 newrl = revlog(
3060 3092 self.opener,
3061 3093 target=self.target,
3062 3094 radix=self.radix,
3063 3095 postfix=b'tmpcensored',
3064 3096 censorable=True,
3065 3097 )
3066 3098 newrl._format_version = self._format_version
3067 3099 newrl._format_flags = self._format_flags
3068 3100 newrl._generaldelta = self._generaldelta
3069 3101 newrl._parse_index = self._parse_index
3070 3102
3071 3103 for rev in self.revs():
3072 3104 node = self.node(rev)
3073 3105 p1, p2 = self.parents(node)
3074 3106
3075 3107 if rev == censorrev:
3076 3108 newrl.addrawrevision(
3077 3109 tombstone,
3078 3110 tr,
3079 3111 self.linkrev(censorrev),
3080 3112 p1,
3081 3113 p2,
3082 3114 censornode,
3083 3115 REVIDX_ISCENSORED,
3084 3116 )
3085 3117
3086 3118 if newrl.deltaparent(rev) != nullrev:
3087 3119 raise error.Abort(
3088 3120 _(
3089 3121 b'censored revision stored as delta; '
3090 3122 b'cannot censor'
3091 3123 ),
3092 3124 hint=_(
3093 3125 b'censoring of revlogs is not '
3094 3126 b'fully implemented; please report '
3095 3127 b'this bug'
3096 3128 ),
3097 3129 )
3098 3130 continue
3099 3131
3100 3132 if self.iscensored(rev):
3101 3133 if self.deltaparent(rev) != nullrev:
3102 3134 raise error.Abort(
3103 3135 _(
3104 3136 b'cannot censor due to censored '
3105 3137 b'revision having delta stored'
3106 3138 )
3107 3139 )
3108 3140 rawtext = self._chunk(rev)
3109 3141 else:
3110 3142 rawtext = self.rawdata(rev)
3111 3143
3112 3144 newrl.addrawrevision(
3113 3145 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3114 3146 )
3115 3147
3116 3148 tr.addbackup(self._indexfile, location=b'store')
3117 3149 if not self._inline:
3118 3150 tr.addbackup(self._datafile, location=b'store')
3119 3151
3120 3152 self.opener.rename(newrl._indexfile, self._indexfile)
3121 3153 if not self._inline:
3122 3154 self.opener.rename(newrl._datafile, self._datafile)
3123 3155
3124 3156 self.clearcaches()
3125 3157 self._loadindex()
3126 3158
3127 3159 def verifyintegrity(self, state):
3128 3160 """Verifies the integrity of the revlog.
3129 3161
3130 3162 Yields ``revlogproblem`` instances describing problems that are
3131 3163 found.
3132 3164 """
3133 3165 dd, di = self.checksize()
3134 3166 if dd:
3135 3167 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3136 3168 if di:
3137 3169 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3138 3170
3139 3171 version = self._format_version
3140 3172
3141 3173 # The verifier tells us what version revlog we should be.
3142 3174 if version != state[b'expectedversion']:
3143 3175 yield revlogproblem(
3144 3176 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3145 3177 % (self.display_id, version, state[b'expectedversion'])
3146 3178 )
3147 3179
3148 3180 state[b'skipread'] = set()
3149 3181 state[b'safe_renamed'] = set()
3150 3182
3151 3183 for rev in self:
3152 3184 node = self.node(rev)
3153 3185
3154 3186 # Verify contents. 4 cases to care about:
3155 3187 #
3156 3188 # common: the most common case
3157 3189 # rename: with a rename
3158 3190 # meta: file content starts with b'\1\n', the metadata
3159 3191 # header defined in filelog.py, but without a rename
3160 3192 # ext: content stored externally
3161 3193 #
3162 3194 # More formally, their differences are shown below:
3163 3195 #
3164 3196 # | common | rename | meta | ext
3165 3197 # -------------------------------------------------------
3166 3198 # flags() | 0 | 0 | 0 | not 0
3167 3199 # renamed() | False | True | False | ?
3168 3200 # rawtext[0:2]=='\1\n'| False | True | True | ?
3169 3201 #
3170 3202 # "rawtext" means the raw text stored in revlog data, which
3171 3203 # could be retrieved by "rawdata(rev)". "text"
3172 3204 # mentioned below is "revision(rev)".
3173 3205 #
3174 3206 # There are 3 different lengths stored physically:
3175 3207 # 1. L1: rawsize, stored in revlog index
3176 3208 # 2. L2: len(rawtext), stored in revlog data
3177 3209 # 3. L3: len(text), stored in revlog data if flags==0, or
3178 3210 # possibly somewhere else if flags!=0
3179 3211 #
3180 3212 # L1 should be equal to L2. L3 could be different from them.
3181 3213 # "text" may or may not affect commit hash depending on flag
3182 3214 # processors (see flagutil.addflagprocessor).
3183 3215 #
3184 3216 # | common | rename | meta | ext
3185 3217 # -------------------------------------------------
3186 3218 # rawsize() | L1 | L1 | L1 | L1
3187 3219 # size() | L1 | L2-LM | L1(*) | L1 (?)
3188 3220 # len(rawtext) | L2 | L2 | L2 | L2
3189 3221 # len(text) | L2 | L2 | L2 | L3
3190 3222 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3191 3223 #
3192 3224 # LM: length of metadata, depending on rawtext
3193 3225 # (*): not ideal, see comment in filelog.size
3194 3226 # (?): could be "- len(meta)" if the resolved content has
3195 3227 # rename metadata
3196 3228 #
3197 3229 # Checks needed to be done:
3198 3230 # 1. length check: L1 == L2, in all cases.
3199 3231 # 2. hash check: depending on flag processor, we may need to
3200 3232 # use either "text" (external), or "rawtext" (in revlog).
3201 3233
3202 3234 try:
3203 3235 skipflags = state.get(b'skipflags', 0)
3204 3236 if skipflags:
3205 3237 skipflags &= self.flags(rev)
3206 3238
3207 3239 _verify_revision(self, skipflags, state, node)
3208 3240
3209 3241 l1 = self.rawsize(rev)
3210 3242 l2 = len(self.rawdata(node))
3211 3243
3212 3244 if l1 != l2:
3213 3245 yield revlogproblem(
3214 3246 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3215 3247 node=node,
3216 3248 )
3217 3249
3218 3250 except error.CensoredNodeError:
3219 3251 if state[b'erroroncensored']:
3220 3252 yield revlogproblem(
3221 3253 error=_(b'censored file data'), node=node
3222 3254 )
3223 3255 state[b'skipread'].add(node)
3224 3256 except Exception as e:
3225 3257 yield revlogproblem(
3226 3258 error=_(b'unpacking %s: %s')
3227 3259 % (short(node), stringutil.forcebytestr(e)),
3228 3260 node=node,
3229 3261 )
3230 3262 state[b'skipread'].add(node)
3231 3263
3232 3264 def storageinfo(
3233 3265 self,
3234 3266 exclusivefiles=False,
3235 3267 sharedfiles=False,
3236 3268 revisionscount=False,
3237 3269 trackedsize=False,
3238 3270 storedsize=False,
3239 3271 ):
3240 3272 d = {}
3241 3273
3242 3274 if exclusivefiles:
3243 3275 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3244 3276 if not self._inline:
3245 3277 d[b'exclusivefiles'].append((self.opener, self._datafile))
3246 3278
3247 3279 if sharedfiles:
3248 3280 d[b'sharedfiles'] = []
3249 3281
3250 3282 if revisionscount:
3251 3283 d[b'revisionscount'] = len(self)
3252 3284
3253 3285 if trackedsize:
3254 3286 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3255 3287
3256 3288 if storedsize:
3257 3289 d[b'storedsize'] = sum(
3258 3290 self.opener.stat(path).st_size for path in self.files()
3259 3291 )
3260 3292
3261 3293 return d
3262 3294
3263 3295 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3264 3296 if not self.hassidedata:
3265 3297 return
3266 3298 # revlog formats with sidedata support does not support inline
3267 3299 assert not self._inline
3268 3300 if not helpers[1] and not helpers[2]:
3269 3301 # Nothing to generate or remove
3270 3302 return
3271 3303
3272 3304 new_entries = []
3273 3305 # append the new sidedata
3274 3306 with self._writing(transaction):
3275 3307 ifh, dfh = self._writinghandles
3276 3308 if self._docket is not None:
3277 3309 dfh.seek(self._docket.data_end, os.SEEK_SET)
3278 3310 else:
3279 3311 dfh.seek(0, os.SEEK_END)
3280 3312
3281 3313 current_offset = dfh.tell()
3282 3314 for rev in range(startrev, endrev + 1):
3283 3315 entry = self.index[rev]
3284 3316 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3285 3317 store=self,
3286 3318 sidedata_helpers=helpers,
3287 3319 sidedata={},
3288 3320 rev=rev,
3289 3321 )
3290 3322
3291 3323 serialized_sidedata = sidedatautil.serialize_sidedata(
3292 3324 new_sidedata
3293 3325 )
3294 3326 if entry[8] != 0 or entry[9] != 0:
3295 3327 # rewriting entries that already have sidedata is not
3296 3328 # supported yet, because it introduces garbage data in the
3297 3329 # revlog.
3298 3330 msg = b"rewriting existing sidedata is not supported yet"
3299 3331 raise error.Abort(msg)
3300 3332
3301 3333 # Apply (potential) flags to add and to remove after running
3302 3334 # the sidedata helpers
3303 3335 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3304 3336 entry_update = (
3305 3337 current_offset,
3306 3338 len(serialized_sidedata),
3307 3339 new_offset_flags,
3308 3340 )
3309 3341
3310 3342 # the sidedata computation might have move the file cursors around
3311 3343 dfh.seek(current_offset, os.SEEK_SET)
3312 3344 dfh.write(serialized_sidedata)
3313 3345 new_entries.append(entry_update)
3314 3346 current_offset += len(serialized_sidedata)
3315 3347 if self._docket is not None:
3316 3348 self._docket.data_end = dfh.tell()
3317 3349
3318 3350 # rewrite the new index entries
3319 3351 ifh.seek(startrev * self.index.entry_size)
3320 3352 for i, e in enumerate(new_entries):
3321 3353 rev = startrev + i
3322 3354 self.index.replace_sidedata_info(rev, *e)
3323 3355 packed = self.index.entry_binary(rev)
3324 3356 if rev == 0 and self._docket is None:
3325 3357 header = self._format_flags | self._format_version
3326 3358 header = self.index.pack_header(header)
3327 3359 packed = header + packed
3328 3360 ifh.write(packed)
@@ -1,165 +1,169 b''
1 1 # revlogdeltas.py - constant used for revlog logic.
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 16 ### Internal utily constants
17 17
18 18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 19 KIND_MANIFESTLOG = 1002
20 20 KIND_FILELOG = 1003
21 21 KIND_OTHER = 1004
22 22
23 23 ALL_KINDS = {
24 24 KIND_CHANGELOG,
25 25 KIND_MANIFESTLOG,
26 26 KIND_FILELOG,
27 27 KIND_OTHER,
28 28 }
29 29
30 30 ### main revlog header
31 31
32 32 INDEX_HEADER = struct.Struct(b">I")
33 33
34 34 ## revlog version
35 35 REVLOGV0 = 0
36 36 REVLOGV1 = 1
37 37 # Dummy value until file format is finalized.
38 38 REVLOGV2 = 0xDEAD
39 39
40 40 ## global revlog header flags
41 41 # Shared across v1 and v2.
42 42 FLAG_INLINE_DATA = 1 << 16
43 43 # Only used by v1, implied by v2.
44 44 FLAG_GENERALDELTA = 1 << 17
45 45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 48 REVLOGV0_FLAGS = 0
49 49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51 51
52 52 ### individual entry
53 53
54 54 ## index v0:
55 55 # 4 bytes: offset
56 56 # 4 bytes: compressed length
57 57 # 4 bytes: base rev
58 58 # 4 bytes: link rev
59 59 # 20 bytes: parent 1 nodeid
60 60 # 20 bytes: parent 2 nodeid
61 61 # 20 bytes: nodeid
62 62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63 63
64 64 ## index v1
65 65 # 6 bytes: offset
66 66 # 2 bytes: flags
67 67 # 4 bytes: compressed length
68 68 # 4 bytes: uncompressed length
69 69 # 4 bytes: base rev
70 70 # 4 bytes: link rev
71 71 # 4 bytes: parent 1 rev
72 72 # 4 bytes: parent 2 rev
73 73 # 32 bytes: nodeid
74 74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 75 assert INDEX_ENTRY_V1.size == 32 * 2
76 76
77 77 # 6 bytes: offset
78 78 # 2 bytes: flags
79 79 # 4 bytes: compressed length
80 80 # 4 bytes: uncompressed length
81 81 # 4 bytes: base rev
82 82 # 4 bytes: link rev
83 83 # 4 bytes: parent 1 rev
84 84 # 4 bytes: parent 2 rev
85 85 # 32 bytes: nodeid
86 86 # 8 bytes: sidedata offset
87 87 # 4 bytes: sidedata compressed length
88 88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
92 92
93 93 # revlog index flags
94 94
95 95 # For historical reasons, revlog's internal flags were exposed via the
96 96 # wire protocol and are even exposed in parts of the storage APIs.
97 97
98 98 # revision has censor metadata, must be verified
99 99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
100 100 # revision hash does not match data (narrowhg)
101 101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
102 102 # revision data is stored externally
103 103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
104 104 # revision changes files in a way that could affect copy tracing.
105 105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
106 106 REVIDX_DEFAULT_FLAGS = 0
107 107 # stable order in which flags need to be processed and their processors applied
108 108 REVIDX_FLAGS_ORDER = [
109 109 REVIDX_ISCENSORED,
110 110 REVIDX_ELLIPSIS,
111 111 REVIDX_EXTSTORED,
112 112 REVIDX_HASCOPIESINFO,
113 113 ]
114 114
115 115 # bitmark for flags that could cause rawdata content change
116 116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
117 117
118 118 ## chunk compression mode constants:
119 119 # These constants are used in revlog version >=2 to denote the compression used
120 120 # for a chunk.
121 121
122 # Chunk use no compression, the data stored on disk can be directly use as
123 # chunk value. Without any header information prefixed.
124 COMP_MODE_PLAIN = 0
125
122 126 # Chunk use a compression mode stored "inline" at the start of the chunk
123 127 # itself. This is the mode always used for revlog version "0" and "1"
124 128 COMP_MODE_INLINE = 2
125 129
126 130 SUPPORTED_FLAGS = {
127 131 REVLOGV0: REVLOGV0_FLAGS,
128 132 REVLOGV1: REVLOGV1_FLAGS,
129 133 REVLOGV2: REVLOGV2_FLAGS,
130 134 }
131 135
132 136 _no = lambda flags: False
133 137 _yes = lambda flags: True
134 138
135 139
136 140 def _from_flag(flag):
137 141 return lambda flags: bool(flags & flag)
138 142
139 143
140 144 FEATURES_BY_VERSION = {
141 145 REVLOGV0: {
142 146 b'inline': _no,
143 147 b'generaldelta': _no,
144 148 b'sidedata': False,
145 149 b'docket': False,
146 150 },
147 151 REVLOGV1: {
148 152 b'inline': _from_flag(FLAG_INLINE_DATA),
149 153 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
150 154 b'sidedata': False,
151 155 b'docket': False,
152 156 },
153 157 REVLOGV2: {
154 158 # The point of inline-revlog is to reduce the number of files used in
155 159 # the store. Using a docket defeat this purpose. So we needs other
156 160 # means to reduce the number of files for revlogv2.
157 161 b'inline': _no,
158 162 b'generaldelta': _yes,
159 163 b'sidedata': True,
160 164 b'docket': True,
161 165 },
162 166 }
163 167
164 168
165 169 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,1088 +1,1106 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import nullrev
17 17 from ..i18n import _
18 18 from ..pycompat import getattr
19 19
20 20 from .constants import (
21 21 REVIDX_ISCENSORED,
22 22 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 23 )
24 24
25 25 from ..thirdparty import attr
26 26
27 27 from .. import (
28 28 error,
29 29 mdiff,
30 30 util,
31 31 )
32 32
33 33 from . import flagutil
34 34
35 35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 36 LIMIT_DELTA2TEXT = 2
37 37
38 38
39 39 class _testrevlog(object):
40 40 """minimalist fake revlog to use in doctests"""
41 41
42 42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 43 """data is an list of revision payload boundaries"""
44 44 self._data = data
45 45 self._srdensitythreshold = density
46 46 self._srmingapsize = mingap
47 47 self._snapshot = set(snapshot)
48 48 self.index = None
49 49
50 50 def start(self, rev):
51 51 if rev == nullrev:
52 52 return 0
53 53 if rev == 0:
54 54 return 0
55 55 return self._data[rev - 1]
56 56
57 57 def end(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 return self._data[rev]
61 61
62 62 def length(self, rev):
63 63 return self.end(rev) - self.start(rev)
64 64
65 65 def __len__(self):
66 66 return len(self._data)
67 67
68 68 def issnapshot(self, rev):
69 69 if rev == nullrev:
70 70 return True
71 71 return rev in self._snapshot
72 72
73 73
74 74 def slicechunk(revlog, revs, targetsize=None):
75 75 """slice revs to reduce the amount of unrelated data to be read from disk.
76 76
77 77 ``revs`` is sliced into groups that should be read in one time.
78 78 Assume that revs are sorted.
79 79
80 80 The initial chunk is sliced until the overall density (payload/chunks-span
81 81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
82 82 `revlog._srmingapsize` is skipped.
83 83
84 84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
85 85 For consistency with other slicing choice, this limit won't go lower than
86 86 `revlog._srmingapsize`.
87 87
88 88 If individual revisions chunk are larger than this limit, they will still
89 89 be raised individually.
90 90
91 91 >>> data = [
92 92 ... 5, #00 (5)
93 93 ... 10, #01 (5)
94 94 ... 12, #02 (2)
95 95 ... 12, #03 (empty)
96 96 ... 27, #04 (15)
97 97 ... 31, #05 (4)
98 98 ... 31, #06 (empty)
99 99 ... 42, #07 (11)
100 100 ... 47, #08 (5)
101 101 ... 47, #09 (empty)
102 102 ... 48, #10 (1)
103 103 ... 51, #11 (3)
104 104 ... 74, #12 (23)
105 105 ... 85, #13 (11)
106 106 ... 86, #14 (1)
107 107 ... 91, #15 (5)
108 108 ... ]
109 109 >>> revlog = _testrevlog(data, snapshot=range(16))
110 110
111 111 >>> list(slicechunk(revlog, list(range(16))))
112 112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
113 113 >>> list(slicechunk(revlog, [0, 15]))
114 114 [[0], [15]]
115 115 >>> list(slicechunk(revlog, [0, 11, 15]))
116 116 [[0], [11], [15]]
117 117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
118 118 [[0], [11, 13, 15]]
119 119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
120 120 [[1, 2], [5, 8, 10, 11], [14]]
121 121
122 122 Slicing with a maximum chunk size
123 123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
124 124 [[0], [11], [13], [15]]
125 125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
126 126 [[0], [11], [13, 15]]
127 127
128 128 Slicing involving nullrev
129 129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
130 130 [[-1, 0], [11], [13, 15]]
131 131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
132 132 [[-1], [13], [15]]
133 133 """
134 134 if targetsize is not None:
135 135 targetsize = max(targetsize, revlog._srmingapsize)
136 136 # targetsize should not be specified when evaluating delta candidates:
137 137 # * targetsize is used to ensure we stay within specification when reading,
138 138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
139 139 if densityslicing is None:
140 140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
141 141 for chunk in densityslicing(
142 142 revs, revlog._srdensitythreshold, revlog._srmingapsize
143 143 ):
144 144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
145 145 yield subchunk
146 146
147 147
148 148 def _slicechunktosize(revlog, revs, targetsize=None):
149 149 """slice revs to match the target size
150 150
151 151 This is intended to be used on chunk that density slicing selected by that
152 152 are still too large compared to the read garantee of revlog. This might
153 153 happens when "minimal gap size" interrupted the slicing or when chain are
154 154 built in a way that create large blocks next to each other.
155 155
156 156 >>> data = [
157 157 ... 3, #0 (3)
158 158 ... 5, #1 (2)
159 159 ... 6, #2 (1)
160 160 ... 8, #3 (2)
161 161 ... 8, #4 (empty)
162 162 ... 11, #5 (3)
163 163 ... 12, #6 (1)
164 164 ... 13, #7 (1)
165 165 ... 14, #8 (1)
166 166 ... ]
167 167
168 168 == All snapshots cases ==
169 169 >>> revlog = _testrevlog(data, snapshot=range(9))
170 170
171 171 Cases where chunk is already small enough
172 172 >>> list(_slicechunktosize(revlog, [0], 3))
173 173 [[0]]
174 174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
175 175 [[6, 7]]
176 176 >>> list(_slicechunktosize(revlog, [0], None))
177 177 [[0]]
178 178 >>> list(_slicechunktosize(revlog, [6, 7], None))
179 179 [[6, 7]]
180 180
181 181 cases where we need actual slicing
182 182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
183 183 [[0], [1]]
184 184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
185 185 [[1], [3]]
186 186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
187 187 [[1, 2], [3]]
188 188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
189 189 [[3], [5]]
190 190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
191 191 [[3], [5]]
192 192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
193 193 [[5], [6, 7, 8]]
194 194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
195 195 [[0], [1, 2], [3], [5], [6, 7, 8]]
196 196
197 197 Case with too large individual chunk (must return valid chunk)
198 198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
199 199 [[0], [1]]
200 200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
201 201 [[1], [3]]
202 202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
203 203 [[3], [5]]
204 204
205 205 == No Snapshot cases ==
206 206 >>> revlog = _testrevlog(data)
207 207
208 208 Cases where chunk is already small enough
209 209 >>> list(_slicechunktosize(revlog, [0], 3))
210 210 [[0]]
211 211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
212 212 [[6, 7]]
213 213 >>> list(_slicechunktosize(revlog, [0], None))
214 214 [[0]]
215 215 >>> list(_slicechunktosize(revlog, [6, 7], None))
216 216 [[6, 7]]
217 217
218 218 cases where we need actual slicing
219 219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
220 220 [[0], [1]]
221 221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
222 222 [[1], [3]]
223 223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
224 224 [[1], [2, 3]]
225 225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
226 226 [[3], [5]]
227 227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
228 228 [[3], [4, 5]]
229 229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
230 230 [[5], [6, 7, 8]]
231 231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
232 232 [[0], [1, 2], [3], [5], [6, 7, 8]]
233 233
234 234 Case with too large individual chunk (must return valid chunk)
235 235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
236 236 [[0], [1]]
237 237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
238 238 [[1], [3]]
239 239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
240 240 [[3], [5]]
241 241
242 242 == mixed case ==
243 243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
244 244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
245 245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
246 246 """
247 247 assert targetsize is None or 0 <= targetsize
248 248 startdata = revlog.start(revs[0])
249 249 enddata = revlog.end(revs[-1])
250 250 fullspan = enddata - startdata
251 251 if targetsize is None or fullspan <= targetsize:
252 252 yield revs
253 253 return
254 254
255 255 startrevidx = 0
256 256 endrevidx = 1
257 257 iterrevs = enumerate(revs)
258 258 next(iterrevs) # skip first rev.
259 259 # first step: get snapshots out of the way
260 260 for idx, r in iterrevs:
261 261 span = revlog.end(r) - startdata
262 262 snapshot = revlog.issnapshot(r)
263 263 if span <= targetsize and snapshot:
264 264 endrevidx = idx + 1
265 265 else:
266 266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
267 267 if chunk:
268 268 yield chunk
269 269 startrevidx = idx
270 270 startdata = revlog.start(r)
271 271 endrevidx = idx + 1
272 272 if not snapshot:
273 273 break
274 274
275 275 # for the others, we use binary slicing to quickly converge toward valid
276 276 # chunks (otherwise, we might end up looking for start/end of many
277 277 # revisions). This logic is not looking for the perfect slicing point, it
278 278 # focuses on quickly converging toward valid chunks.
279 279 nbitem = len(revs)
280 280 while (enddata - startdata) > targetsize:
281 281 endrevidx = nbitem
282 282 if nbitem - startrevidx <= 1:
283 283 break # protect against individual chunk larger than limit
284 284 localenddata = revlog.end(revs[endrevidx - 1])
285 285 span = localenddata - startdata
286 286 while span > targetsize:
287 287 if endrevidx - startrevidx <= 1:
288 288 break # protect against individual chunk larger than limit
289 289 endrevidx -= (endrevidx - startrevidx) // 2
290 290 localenddata = revlog.end(revs[endrevidx - 1])
291 291 span = localenddata - startdata
292 292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
293 293 if chunk:
294 294 yield chunk
295 295 startrevidx = endrevidx
296 296 startdata = revlog.start(revs[startrevidx])
297 297
298 298 chunk = _trimchunk(revlog, revs, startrevidx)
299 299 if chunk:
300 300 yield chunk
301 301
302 302
303 303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
304 304 """slice revs to reduce the amount of unrelated data to be read from disk.
305 305
306 306 ``revs`` is sliced into groups that should be read in one time.
307 307 Assume that revs are sorted.
308 308
309 309 The initial chunk is sliced until the overall density (payload/chunks-span
310 310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
311 311 skipped.
312 312
313 313 >>> revlog = _testrevlog([
314 314 ... 5, #00 (5)
315 315 ... 10, #01 (5)
316 316 ... 12, #02 (2)
317 317 ... 12, #03 (empty)
318 318 ... 27, #04 (15)
319 319 ... 31, #05 (4)
320 320 ... 31, #06 (empty)
321 321 ... 42, #07 (11)
322 322 ... 47, #08 (5)
323 323 ... 47, #09 (empty)
324 324 ... 48, #10 (1)
325 325 ... 51, #11 (3)
326 326 ... 74, #12 (23)
327 327 ... 85, #13 (11)
328 328 ... 86, #14 (1)
329 329 ... 91, #15 (5)
330 330 ... ])
331 331
332 332 >>> list(_slicechunktodensity(revlog, list(range(16))))
333 333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 334 >>> list(_slicechunktodensity(revlog, [0, 15]))
335 335 [[0], [15]]
336 336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
337 337 [[0], [11], [15]]
338 338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
339 339 [[0], [11, 13, 15]]
340 340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 341 [[1, 2], [5, 8, 10, 11], [14]]
342 342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 343 ... mingapsize=20))
344 344 [[1, 2, 3, 5, 8, 10, 11], [14]]
345 345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 346 ... targetdensity=0.95))
347 347 [[1, 2], [5], [8, 10, 11], [14]]
348 348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 349 ... targetdensity=0.95, mingapsize=12))
350 350 [[1, 2], [5, 8, 10, 11], [14]]
351 351 """
352 352 start = revlog.start
353 353 length = revlog.length
354 354
355 355 if len(revs) <= 1:
356 356 yield revs
357 357 return
358 358
359 359 deltachainspan = segmentspan(revlog, revs)
360 360
361 361 if deltachainspan < mingapsize:
362 362 yield revs
363 363 return
364 364
365 365 readdata = deltachainspan
366 366 chainpayload = sum(length(r) for r in revs)
367 367
368 368 if deltachainspan:
369 369 density = chainpayload / float(deltachainspan)
370 370 else:
371 371 density = 1.0
372 372
373 373 if density >= targetdensity:
374 374 yield revs
375 375 return
376 376
377 377 # Store the gaps in a heap to have them sorted by decreasing size
378 378 gaps = []
379 379 prevend = None
380 380 for i, rev in enumerate(revs):
381 381 revstart = start(rev)
382 382 revlen = length(rev)
383 383
384 384 # Skip empty revisions to form larger holes
385 385 if revlen == 0:
386 386 continue
387 387
388 388 if prevend is not None:
389 389 gapsize = revstart - prevend
390 390 # only consider holes that are large enough
391 391 if gapsize > mingapsize:
392 392 gaps.append((gapsize, i))
393 393
394 394 prevend = revstart + revlen
395 395 # sort the gaps to pop them from largest to small
396 396 gaps.sort()
397 397
398 398 # Collect the indices of the largest holes until the density is acceptable
399 399 selected = []
400 400 while gaps and density < targetdensity:
401 401 gapsize, gapidx = gaps.pop()
402 402
403 403 selected.append(gapidx)
404 404
405 405 # the gap sizes are stored as negatives to be sorted decreasingly
406 406 # by the heap
407 407 readdata -= gapsize
408 408 if readdata > 0:
409 409 density = chainpayload / float(readdata)
410 410 else:
411 411 density = 1.0
412 412 selected.sort()
413 413
414 414 # Cut the revs at collected indices
415 415 previdx = 0
416 416 for idx in selected:
417 417
418 418 chunk = _trimchunk(revlog, revs, previdx, idx)
419 419 if chunk:
420 420 yield chunk
421 421
422 422 previdx = idx
423 423
424 424 chunk = _trimchunk(revlog, revs, previdx)
425 425 if chunk:
426 426 yield chunk
427 427
428 428
429 429 def _trimchunk(revlog, revs, startidx, endidx=None):
430 430 """returns revs[startidx:endidx] without empty trailing revs
431 431
432 432 Doctest Setup
433 433 >>> revlog = _testrevlog([
434 434 ... 5, #0
435 435 ... 10, #1
436 436 ... 12, #2
437 437 ... 12, #3 (empty)
438 438 ... 17, #4
439 439 ... 21, #5
440 440 ... 21, #6 (empty)
441 441 ... ])
442 442
443 443 Contiguous cases:
444 444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
445 445 [0, 1, 2, 3, 4, 5]
446 446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
447 447 [0, 1, 2, 3, 4]
448 448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
449 449 [0, 1, 2]
450 450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
451 451 [2]
452 452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
453 453 [3, 4, 5]
454 454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
455 455 [3, 4]
456 456
457 457 Discontiguous cases:
458 458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
459 459 [1, 3, 5]
460 460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
461 461 [1]
462 462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
463 463 [3, 5]
464 464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
465 465 [3, 5]
466 466 """
467 467 length = revlog.length
468 468
469 469 if endidx is None:
470 470 endidx = len(revs)
471 471
472 472 # If we have a non-emtpy delta candidate, there are nothing to trim
473 473 if revs[endidx - 1] < len(revlog):
474 474 # Trim empty revs at the end, except the very first revision of a chain
475 475 while (
476 476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
477 477 ):
478 478 endidx -= 1
479 479
480 480 return revs[startidx:endidx]
481 481
482 482
483 483 def segmentspan(revlog, revs):
484 484 """Get the byte span of a segment of revisions
485 485
486 486 revs is a sorted array of revision numbers
487 487
488 488 >>> revlog = _testrevlog([
489 489 ... 5, #0
490 490 ... 10, #1
491 491 ... 12, #2
492 492 ... 12, #3 (empty)
493 493 ... 17, #4
494 494 ... ])
495 495
496 496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
497 497 17
498 498 >>> segmentspan(revlog, [0, 4])
499 499 17
500 500 >>> segmentspan(revlog, [3, 4])
501 501 5
502 502 >>> segmentspan(revlog, [1, 2, 3,])
503 503 7
504 504 >>> segmentspan(revlog, [1, 3])
505 505 7
506 506 """
507 507 if not revs:
508 508 return 0
509 509 end = revlog.end(revs[-1])
510 510 return end - revlog.start(revs[0])
511 511
512 512
513 513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
514 514 """build full text from a (base, delta) pair and other metadata"""
515 515 # special case deltas which replace entire base; no need to decode
516 516 # base revision. this neatly avoids censored bases, which throw when
517 517 # they're decoded.
518 518 hlen = struct.calcsize(b">lll")
519 519 if delta[:hlen] == mdiff.replacediffheader(
520 520 revlog.rawsize(baserev), len(delta) - hlen
521 521 ):
522 522 fulltext = delta[hlen:]
523 523 else:
524 524 # deltabase is rawtext before changed by flag processors, which is
525 525 # equivalent to non-raw text
526 526 basetext = revlog.revision(baserev, _df=fh, raw=False)
527 527 fulltext = mdiff.patch(basetext, delta)
528 528
529 529 try:
530 530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
531 531 if validatehash:
532 532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
533 533 if flags & REVIDX_ISCENSORED:
534 534 raise error.StorageError(
535 535 _(b'node %s is not censored') % expectednode
536 536 )
537 537 except error.CensoredNodeError:
538 538 # must pass the censored index flag to add censored revisions
539 539 if not flags & REVIDX_ISCENSORED:
540 540 raise
541 541 return fulltext
542 542
543 543
544 544 @attr.s(slots=True, frozen=True)
545 545 class _deltainfo(object):
546 546 distance = attr.ib()
547 547 deltalen = attr.ib()
548 548 data = attr.ib()
549 549 base = attr.ib()
550 550 chainbase = attr.ib()
551 551 chainlen = attr.ib()
552 552 compresseddeltalen = attr.ib()
553 553 snapshotdepth = attr.ib()
554 554
555 555
556 def drop_u_compression(delta):
557 """turn into a "u" (no-compression) into no-compression without header
558
559 This is useful for revlog format that has better compression method.
560 """
561 assert delta.data[0] == b'u', delta.data[0]
562 return _deltainfo(
563 delta.distance,
564 delta.deltalen - 1,
565 (b'', delta.data[1]),
566 delta.base,
567 delta.chainbase,
568 delta.chainlen,
569 delta.compresseddeltalen,
570 delta.snapshotdepth,
571 )
572
573
556 574 def isgooddeltainfo(revlog, deltainfo, revinfo):
557 575 """Returns True if the given delta is good. Good means that it is within
558 576 the disk span, disk size, and chain length bounds that we know to be
559 577 performant."""
560 578 if deltainfo is None:
561 579 return False
562 580
563 581 # - 'deltainfo.distance' is the distance from the base revision --
564 582 # bounding it limits the amount of I/O we need to do.
565 583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
566 584 # deltas we need to apply -- bounding it limits the amount of CPU
567 585 # we consume.
568 586
569 587 textlen = revinfo.textlen
570 588 defaultmax = textlen * 4
571 589 maxdist = revlog._maxdeltachainspan
572 590 if not maxdist:
573 591 maxdist = deltainfo.distance # ensure the conditional pass
574 592 maxdist = max(maxdist, defaultmax)
575 593
576 594 # Bad delta from read span:
577 595 #
578 596 # If the span of data read is larger than the maximum allowed.
579 597 #
580 598 # In the sparse-revlog case, we rely on the associated "sparse reading"
581 599 # to avoid issue related to the span of data. In theory, it would be
582 600 # possible to build pathological revlog where delta pattern would lead
583 601 # to too many reads. However, they do not happen in practice at all. So
584 602 # we skip the span check entirely.
585 603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
586 604 return False
587 605
588 606 # Bad delta from new delta size:
589 607 #
590 608 # If the delta size is larger than the target text, storing the
591 609 # delta will be inefficient.
592 610 if textlen < deltainfo.deltalen:
593 611 return False
594 612
595 613 # Bad delta from cumulated payload size:
596 614 #
597 615 # If the sum of delta get larger than K * target text length.
598 616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
599 617 return False
600 618
601 619 # Bad delta from chain length:
602 620 #
603 621 # If the number of delta in the chain gets too high.
604 622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
605 623 return False
606 624
607 625 # bad delta from intermediate snapshot size limit
608 626 #
609 627 # If an intermediate snapshot size is higher than the limit. The
610 628 # limit exist to prevent endless chain of intermediate delta to be
611 629 # created.
612 630 if (
613 631 deltainfo.snapshotdepth is not None
614 632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
615 633 ):
616 634 return False
617 635
618 636 # bad delta if new intermediate snapshot is larger than the previous
619 637 # snapshot
620 638 if (
621 639 deltainfo.snapshotdepth
622 640 and revlog.length(deltainfo.base) < deltainfo.deltalen
623 641 ):
624 642 return False
625 643
626 644 return True
627 645
628 646
629 647 # If a revision's full text is that much bigger than a base candidate full
630 648 # text's, it is very unlikely that it will produce a valid delta. We no longer
631 649 # consider these candidates.
632 650 LIMIT_BASE2TEXT = 500
633 651
634 652
635 653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
636 654 """Provides group of revision to be tested as delta base
637 655
638 656 This top level function focus on emitting groups with unique and worthwhile
639 657 content. See _raw_candidate_groups for details about the group order.
640 658 """
641 659 # should we try to build a delta?
642 660 if not (len(revlog) and revlog._storedeltachains):
643 661 yield None
644 662 return
645 663
646 664 deltalength = revlog.length
647 665 deltaparent = revlog.deltaparent
648 666 sparse = revlog._sparserevlog
649 667 good = None
650 668
651 669 deltas_limit = textlen * LIMIT_DELTA2TEXT
652 670
653 671 tested = {nullrev}
654 672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
655 673 while True:
656 674 temptative = candidates.send(good)
657 675 if temptative is None:
658 676 break
659 677 group = []
660 678 for rev in temptative:
661 679 # skip over empty delta (no need to include them in a chain)
662 680 while revlog._generaldelta and not (
663 681 rev == nullrev or rev in tested or deltalength(rev)
664 682 ):
665 683 tested.add(rev)
666 684 rev = deltaparent(rev)
667 685 # no need to try a delta against nullrev, this will be done as a
668 686 # last resort.
669 687 if rev == nullrev:
670 688 continue
671 689 # filter out revision we tested already
672 690 if rev in tested:
673 691 continue
674 692 tested.add(rev)
675 693 # filter out delta base that will never produce good delta
676 694 if deltas_limit < revlog.length(rev):
677 695 continue
678 696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
679 697 continue
680 698 # no delta for rawtext-changing revs (see "candelta" for why)
681 699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
682 700 continue
683 701 # If we reach here, we are about to build and test a delta.
684 702 # The delta building process will compute the chaininfo in all
685 703 # case, since that computation is cached, it is fine to access it
686 704 # here too.
687 705 chainlen, chainsize = revlog._chaininfo(rev)
688 706 # if chain will be too long, skip base
689 707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
690 708 continue
691 709 # if chain already have too much data, skip base
692 710 if deltas_limit < chainsize:
693 711 continue
694 712 if sparse and revlog.upperboundcomp is not None:
695 713 maxcomp = revlog.upperboundcomp
696 714 basenotsnap = (p1, p2, nullrev)
697 715 if rev not in basenotsnap and revlog.issnapshot(rev):
698 716 snapshotdepth = revlog.snapshotdepth(rev)
699 717 # If text is significantly larger than the base, we can
700 718 # expect the resulting delta to be proportional to the size
701 719 # difference
702 720 revsize = revlog.rawsize(rev)
703 721 rawsizedistance = max(textlen - revsize, 0)
704 722 # use an estimate of the compression upper bound.
705 723 lowestrealisticdeltalen = rawsizedistance // maxcomp
706 724
707 725 # check the absolute constraint on the delta size
708 726 snapshotlimit = textlen >> snapshotdepth
709 727 if snapshotlimit < lowestrealisticdeltalen:
710 728 # delta lower bound is larger than accepted upper bound
711 729 continue
712 730
713 731 # check the relative constraint on the delta size
714 732 revlength = revlog.length(rev)
715 733 if revlength < lowestrealisticdeltalen:
716 734 # delta probable lower bound is larger than target base
717 735 continue
718 736
719 737 group.append(rev)
720 738 if group:
721 739 # XXX: in the sparse revlog case, group can become large,
722 740 # impacting performances. Some bounding or slicing mecanism
723 741 # would help to reduce this impact.
724 742 good = yield tuple(group)
725 743 yield None
726 744
727 745
728 746 def _findsnapshots(revlog, cache, start_rev):
729 747 """find snapshot from start_rev to tip"""
730 748 if util.safehasattr(revlog.index, b'findsnapshots'):
731 749 revlog.index.findsnapshots(cache, start_rev)
732 750 else:
733 751 deltaparent = revlog.deltaparent
734 752 issnapshot = revlog.issnapshot
735 753 for rev in revlog.revs(start_rev):
736 754 if issnapshot(rev):
737 755 cache[deltaparent(rev)].append(rev)
738 756
739 757
740 758 def _refinedgroups(revlog, p1, p2, cachedelta):
741 759 good = None
742 760 # First we try to reuse a the delta contained in the bundle.
743 761 # (or from the source revlog)
744 762 #
745 763 # This logic only applies to general delta repositories and can be disabled
746 764 # through configuration. Disabling reuse source delta is useful when
747 765 # we want to make sure we recomputed "optimal" deltas.
748 766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
749 767 # Assume what we received from the server is a good choice
750 768 # build delta will reuse the cache
751 769 good = yield (cachedelta[0],)
752 770 if good is not None:
753 771 yield None
754 772 return
755 773 snapshots = collections.defaultdict(list)
756 774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
757 775 good = yield candidates
758 776 if good is not None:
759 777 break
760 778
761 779 # If sparse revlog is enabled, we can try to refine the available deltas
762 780 if not revlog._sparserevlog:
763 781 yield None
764 782 return
765 783
766 784 # if we have a refinable value, try to refine it
767 785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
768 786 # refine snapshot down
769 787 previous = None
770 788 while previous != good:
771 789 previous = good
772 790 base = revlog.deltaparent(good)
773 791 if base == nullrev:
774 792 break
775 793 good = yield (base,)
776 794 # refine snapshot up
777 795 if not snapshots:
778 796 _findsnapshots(revlog, snapshots, good + 1)
779 797 previous = None
780 798 while good != previous:
781 799 previous = good
782 800 children = tuple(sorted(c for c in snapshots[good]))
783 801 good = yield children
784 802
785 803 # we have found nothing
786 804 yield None
787 805
788 806
789 807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
790 808 """Provides group of revision to be tested as delta base
791 809
792 810 This lower level function focus on emitting delta theorically interresting
793 811 without looking it any practical details.
794 812
795 813 The group order aims at providing fast or small candidates first.
796 814 """
797 815 gdelta = revlog._generaldelta
798 816 # gate sparse behind general-delta because of issue6056
799 817 sparse = gdelta and revlog._sparserevlog
800 818 curr = len(revlog)
801 819 prev = curr - 1
802 820 deltachain = lambda rev: revlog._deltachain(rev)[0]
803 821
804 822 if gdelta:
805 823 # exclude already lazy tested base if any
806 824 parents = [p for p in (p1, p2) if p != nullrev]
807 825
808 826 if not revlog._deltabothparents and len(parents) == 2:
809 827 parents.sort()
810 828 # To minimize the chance of having to build a fulltext,
811 829 # pick first whichever parent is closest to us (max rev)
812 830 yield (parents[1],)
813 831 # then the other one (min rev) if the first did not fit
814 832 yield (parents[0],)
815 833 elif len(parents) > 0:
816 834 # Test all parents (1 or 2), and keep the best candidate
817 835 yield parents
818 836
819 837 if sparse and parents:
820 838 if snapshots is None:
821 839 # map: base-rev: snapshot-rev
822 840 snapshots = collections.defaultdict(list)
823 841 # See if we can use an existing snapshot in the parent chains to use as
824 842 # a base for a new intermediate-snapshot
825 843 #
826 844 # search for snapshot in parents delta chain
827 845 # map: snapshot-level: snapshot-rev
828 846 parents_snaps = collections.defaultdict(set)
829 847 candidate_chains = [deltachain(p) for p in parents]
830 848 for chain in candidate_chains:
831 849 for idx, s in enumerate(chain):
832 850 if not revlog.issnapshot(s):
833 851 break
834 852 parents_snaps[idx].add(s)
835 853 snapfloor = min(parents_snaps[0]) + 1
836 854 _findsnapshots(revlog, snapshots, snapfloor)
837 855 # search for the highest "unrelated" revision
838 856 #
839 857 # Adding snapshots used by "unrelated" revision increase the odd we
840 858 # reuse an independant, yet better snapshot chain.
841 859 #
842 860 # XXX instead of building a set of revisions, we could lazily enumerate
843 861 # over the chains. That would be more efficient, however we stick to
844 862 # simple code for now.
845 863 all_revs = set()
846 864 for chain in candidate_chains:
847 865 all_revs.update(chain)
848 866 other = None
849 867 for r in revlog.revs(prev, snapfloor):
850 868 if r not in all_revs:
851 869 other = r
852 870 break
853 871 if other is not None:
854 872 # To avoid unfair competition, we won't use unrelated intermediate
855 873 # snapshot that are deeper than the ones from the parent delta
856 874 # chain.
857 875 max_depth = max(parents_snaps.keys())
858 876 chain = deltachain(other)
859 877 for idx, s in enumerate(chain):
860 878 if s < snapfloor:
861 879 continue
862 880 if max_depth < idx:
863 881 break
864 882 if not revlog.issnapshot(s):
865 883 break
866 884 parents_snaps[idx].add(s)
867 885 # Test them as possible intermediate snapshot base
868 886 # We test them from highest to lowest level. High level one are more
869 887 # likely to result in small delta
870 888 floor = None
871 889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
872 890 siblings = set()
873 891 for s in snaps:
874 892 siblings.update(snapshots[s])
875 893 # Before considering making a new intermediate snapshot, we check
876 894 # if an existing snapshot, children of base we consider, would be
877 895 # suitable.
878 896 #
879 897 # It give a change to reuse a delta chain "unrelated" to the
880 898 # current revision instead of starting our own. Without such
881 899 # re-use, topological branches would keep reopening new chains.
882 900 # Creating more and more snapshot as the repository grow.
883 901
884 902 if floor is not None:
885 903 # We only do this for siblings created after the one in our
886 904 # parent's delta chain. Those created before has less chances
887 905 # to be valid base since our ancestors had to create a new
888 906 # snapshot.
889 907 siblings = [r for r in siblings if floor < r]
890 908 yield tuple(sorted(siblings))
891 909 # then test the base from our parent's delta chain.
892 910 yield tuple(sorted(snaps))
893 911 floor = min(snaps)
894 912 # No suitable base found in the parent chain, search if any full
895 913 # snapshots emitted since parent's base would be a suitable base for an
896 914 # intermediate snapshot.
897 915 #
898 916 # It give a chance to reuse a delta chain unrelated to the current
899 917 # revisions instead of starting our own. Without such re-use,
900 918 # topological branches would keep reopening new full chains. Creating
901 919 # more and more snapshot as the repository grow.
902 920 yield tuple(snapshots[nullrev])
903 921
904 922 if not sparse:
905 923 # other approach failed try against prev to hopefully save us a
906 924 # fulltext.
907 925 yield (prev,)
908 926
909 927
910 928 class deltacomputer(object):
911 929 def __init__(self, revlog):
912 930 self.revlog = revlog
913 931
914 932 def buildtext(self, revinfo, fh):
915 933 """Builds a fulltext version of a revision
916 934
917 935 revinfo: _revisioninfo instance that contains all needed info
918 936 fh: file handle to either the .i or the .d revlog file,
919 937 depending on whether it is inlined or not
920 938 """
921 939 btext = revinfo.btext
922 940 if btext[0] is not None:
923 941 return btext[0]
924 942
925 943 revlog = self.revlog
926 944 cachedelta = revinfo.cachedelta
927 945 baserev = cachedelta[0]
928 946 delta = cachedelta[1]
929 947
930 948 fulltext = btext[0] = _textfromdelta(
931 949 fh,
932 950 revlog,
933 951 baserev,
934 952 delta,
935 953 revinfo.p1,
936 954 revinfo.p2,
937 955 revinfo.flags,
938 956 revinfo.node,
939 957 )
940 958 return fulltext
941 959
942 960 def _builddeltadiff(self, base, revinfo, fh):
943 961 revlog = self.revlog
944 962 t = self.buildtext(revinfo, fh)
945 963 if revlog.iscensored(base):
946 964 # deltas based on a censored revision must replace the
947 965 # full content in one patch, so delta works everywhere
948 966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
949 967 delta = header + t
950 968 else:
951 969 ptext = revlog.rawdata(base, _df=fh)
952 970 delta = mdiff.textdiff(ptext, t)
953 971
954 972 return delta
955 973
956 974 def _builddeltainfo(self, revinfo, base, fh):
957 975 # can we use the cached delta?
958 976 revlog = self.revlog
959 977 chainbase = revlog.chainbase(base)
960 978 if revlog._generaldelta:
961 979 deltabase = base
962 980 else:
963 981 deltabase = chainbase
964 982 snapshotdepth = None
965 983 if revlog._sparserevlog and deltabase == nullrev:
966 984 snapshotdepth = 0
967 985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
968 986 # A delta chain should always be one full snapshot,
969 987 # zero or more semi-snapshots, and zero or more deltas
970 988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
971 989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
972 990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
973 991 delta = None
974 992 if revinfo.cachedelta:
975 993 cachebase, cachediff = revinfo.cachedelta
976 994 # check if the diff still apply
977 995 currentbase = cachebase
978 996 while (
979 997 currentbase != nullrev
980 998 and currentbase != base
981 999 and self.revlog.length(currentbase) == 0
982 1000 ):
983 1001 currentbase = self.revlog.deltaparent(currentbase)
984 1002 if self.revlog._lazydelta and currentbase == base:
985 1003 delta = revinfo.cachedelta[1]
986 1004 if delta is None:
987 1005 delta = self._builddeltadiff(base, revinfo, fh)
988 1006 # snapshotdept need to be neither None nor 0 level snapshot
989 1007 if revlog.upperboundcomp is not None and snapshotdepth:
990 1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
991 1009 snapshotlimit = revinfo.textlen >> snapshotdepth
992 1010 if snapshotlimit < lowestrealisticdeltalen:
993 1011 return None
994 1012 if revlog.length(base) < lowestrealisticdeltalen:
995 1013 return None
996 1014 header, data = revlog.compress(delta)
997 1015 deltalen = len(header) + len(data)
998 1016 offset = revlog.end(len(revlog) - 1)
999 1017 dist = deltalen + offset - revlog.start(chainbase)
1000 1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1001 1019 chainlen += 1
1002 1020 compresseddeltalen += deltalen
1003 1021
1004 1022 return _deltainfo(
1005 1023 dist,
1006 1024 deltalen,
1007 1025 (header, data),
1008 1026 deltabase,
1009 1027 chainbase,
1010 1028 chainlen,
1011 1029 compresseddeltalen,
1012 1030 snapshotdepth,
1013 1031 )
1014 1032
1015 1033 def _fullsnapshotinfo(self, fh, revinfo):
1016 1034 curr = len(self.revlog)
1017 1035 rawtext = self.buildtext(revinfo, fh)
1018 1036 data = self.revlog.compress(rawtext)
1019 1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1020 1038 deltabase = chainbase = curr
1021 1039 snapshotdepth = 0
1022 1040 chainlen = 1
1023 1041
1024 1042 return _deltainfo(
1025 1043 dist,
1026 1044 deltalen,
1027 1045 data,
1028 1046 deltabase,
1029 1047 chainbase,
1030 1048 chainlen,
1031 1049 compresseddeltalen,
1032 1050 snapshotdepth,
1033 1051 )
1034 1052
1035 1053 def finddeltainfo(self, revinfo, fh):
1036 1054 """Find an acceptable delta against a candidate revision
1037 1055
1038 1056 revinfo: information about the revision (instance of _revisioninfo)
1039 1057 fh: file handle to either the .i or the .d revlog file,
1040 1058 depending on whether it is inlined or not
1041 1059
1042 1060 Returns the first acceptable candidate revision, as ordered by
1043 1061 _candidategroups
1044 1062
1045 1063 If no suitable deltabase is found, we return delta info for a full
1046 1064 snapshot.
1047 1065 """
1048 1066 if not revinfo.textlen:
1049 1067 return self._fullsnapshotinfo(fh, revinfo)
1050 1068
1051 1069 # no delta for flag processor revision (see "candelta" for why)
1052 1070 # not calling candelta since only one revision needs test, also to
1053 1071 # avoid overhead fetching flags again.
1054 1072 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1055 1073 return self._fullsnapshotinfo(fh, revinfo)
1056 1074
1057 1075 cachedelta = revinfo.cachedelta
1058 1076 p1 = revinfo.p1
1059 1077 p2 = revinfo.p2
1060 1078 revlog = self.revlog
1061 1079
1062 1080 deltainfo = None
1063 1081 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1064 1082 groups = _candidategroups(
1065 1083 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1066 1084 )
1067 1085 candidaterevs = next(groups)
1068 1086 while candidaterevs is not None:
1069 1087 nominateddeltas = []
1070 1088 if deltainfo is not None:
1071 1089 # if we already found a good delta,
1072 1090 # challenge it against refined candidates
1073 1091 nominateddeltas.append(deltainfo)
1074 1092 for candidaterev in candidaterevs:
1075 1093 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1076 1094 if candidatedelta is not None:
1077 1095 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1078 1096 nominateddeltas.append(candidatedelta)
1079 1097 if nominateddeltas:
1080 1098 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1081 1099 if deltainfo is not None:
1082 1100 candidaterevs = groups.send(deltainfo.base)
1083 1101 else:
1084 1102 candidaterevs = next(groups)
1085 1103
1086 1104 if deltainfo is None:
1087 1105 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1088 1106 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now