##// END OF EJS Templates
revlog: unify checks for supported flag...
marmoute -
r48004:0e9105bf default
parent child Browse files
Show More
@@ -1,3220 +1,3209 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 SUPPORTED_FLAGS,
49 50 )
50 51 from .revlogutils.flagutil import (
51 52 REVIDX_DEFAULT_FLAGS,
52 53 REVIDX_ELLIPSIS,
53 54 REVIDX_EXTSTORED,
54 55 REVIDX_FLAGS_ORDER,
55 56 REVIDX_HASCOPIESINFO,
56 57 REVIDX_ISCENSORED,
57 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 59 )
59 60 from .thirdparty import attr
60 61 from . import (
61 62 ancestor,
62 63 dagop,
63 64 error,
64 65 mdiff,
65 66 policy,
66 67 pycompat,
67 68 templatefilters,
68 69 util,
69 70 )
70 71 from .interfaces import (
71 72 repository,
72 73 util as interfaceutil,
73 74 )
74 75 from .revlogutils import (
75 76 deltas as deltautil,
76 77 flagutil,
77 78 nodemap as nodemaputil,
78 79 revlogv0,
79 80 sidedata as sidedatautil,
80 81 )
81 82 from .utils import (
82 83 storageutil,
83 84 stringutil,
84 85 )
85 86
86 87 # blanked usage of all the name to prevent pyflakes constraints
87 88 # We need these name available in the module for extensions.
88 89
89 90 REVLOGV0
90 91 REVLOGV1
91 92 REVLOGV2
92 93 FLAG_INLINE_DATA
93 94 FLAG_GENERALDELTA
94 95 REVLOG_DEFAULT_FLAGS
95 96 REVLOG_DEFAULT_FORMAT
96 97 REVLOG_DEFAULT_VERSION
97 98 REVLOGV1_FLAGS
98 99 REVLOGV2_FLAGS
99 100 REVIDX_ISCENSORED
100 101 REVIDX_ELLIPSIS
101 102 REVIDX_HASCOPIESINFO
102 103 REVIDX_EXTSTORED
103 104 REVIDX_DEFAULT_FLAGS
104 105 REVIDX_FLAGS_ORDER
105 106 REVIDX_RAWTEXT_CHANGING_FLAGS
106 107
107 108 parsers = policy.importmod('parsers')
108 109 rustancestor = policy.importrust('ancestor')
109 110 rustdagop = policy.importrust('dagop')
110 111 rustrevlog = policy.importrust('revlog')
111 112
112 113 # Aliased for performance.
113 114 _zlibdecompress = zlib.decompress
114 115
115 116 # max size of revlog with inline data
116 117 _maxinline = 131072
117 118 _chunksize = 1048576
118 119
119 120 # Flag processors for REVIDX_ELLIPSIS.
120 121 def ellipsisreadprocessor(rl, text):
121 122 return text, False
122 123
123 124
124 125 def ellipsiswriteprocessor(rl, text):
125 126 return text, False
126 127
127 128
128 129 def ellipsisrawprocessor(rl, text):
129 130 return False
130 131
131 132
132 133 ellipsisprocessor = (
133 134 ellipsisreadprocessor,
134 135 ellipsiswriteprocessor,
135 136 ellipsisrawprocessor,
136 137 )
137 138
138 139
139 140 def offset_type(offset, type):
140 141 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 142 raise ValueError(b'unknown revlog index flags')
142 143 return int(int(offset) << 16 | type)
143 144
144 145
145 146 def _verify_revision(rl, skipflags, state, node):
146 147 """Verify the integrity of the given revlog ``node`` while providing a hook
147 148 point for extensions to influence the operation."""
148 149 if skipflags:
149 150 state[b'skipread'].add(node)
150 151 else:
151 152 # Side-effect: read content and verify hash.
152 153 rl.revision(node)
153 154
154 155
155 156 # True if a fast implementation for persistent-nodemap is available
156 157 #
157 158 # We also consider we have a "fast" implementation in "pure" python because
158 159 # people using pure don't really have performance consideration (and a
159 160 # wheelbarrow of other slowness source)
160 161 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 162 parsers, 'BaseIndexObject'
162 163 )
163 164
164 165
165 166 @attr.s(slots=True, frozen=True)
166 167 class _revisioninfo(object):
167 168 """Information about a revision that allows building its fulltext
168 169 node: expected hash of the revision
169 170 p1, p2: parent revs of the revision
170 171 btext: built text cache consisting of a one-element list
171 172 cachedelta: (baserev, uncompressed_delta) or None
172 173 flags: flags associated to the revision storage
173 174
174 175 One of btext[0] or cachedelta must be set.
175 176 """
176 177
177 178 node = attr.ib()
178 179 p1 = attr.ib()
179 180 p2 = attr.ib()
180 181 btext = attr.ib()
181 182 textlen = attr.ib()
182 183 cachedelta = attr.ib()
183 184 flags = attr.ib()
184 185
185 186
186 187 @interfaceutil.implementer(repository.irevisiondelta)
187 188 @attr.s(slots=True)
188 189 class revlogrevisiondelta(object):
189 190 node = attr.ib()
190 191 p1node = attr.ib()
191 192 p2node = attr.ib()
192 193 basenode = attr.ib()
193 194 flags = attr.ib()
194 195 baserevisionsize = attr.ib()
195 196 revision = attr.ib()
196 197 delta = attr.ib()
197 198 sidedata = attr.ib()
198 199 protocol_flags = attr.ib()
199 200 linknode = attr.ib(default=None)
200 201
201 202
202 203 @interfaceutil.implementer(repository.iverifyproblem)
203 204 @attr.s(frozen=True)
204 205 class revlogproblem(object):
205 206 warning = attr.ib(default=None)
206 207 error = attr.ib(default=None)
207 208 node = attr.ib(default=None)
208 209
209 210
210 211 def parse_index_v1(data, inline):
211 212 # call the C implementation to parse the index data
212 213 index, cache = parsers.parse_index2(data, inline)
213 214 return index, cache
214 215
215 216
216 217 def parse_index_v2(data, inline):
217 218 # call the C implementation to parse the index data
218 219 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 220 return index, cache
220 221
221 222
222 223 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 224
224 225 def parse_index_v1_nodemap(data, inline):
225 226 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 227 return index, cache
227 228
228 229
229 230 else:
230 231 parse_index_v1_nodemap = None
231 232
232 233
233 234 def parse_index_v1_mixed(data, inline):
234 235 index, cache = parse_index_v1(data, inline)
235 236 return rustrevlog.MixedIndex(index), cache
236 237
237 238
238 239 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 240 # signed integer)
240 241 _maxentrysize = 0x7FFFFFFF
241 242
242 243
243 244 class revlog(object):
244 245 """
245 246 the underlying revision storage object
246 247
247 248 A revlog consists of two parts, an index and the revision data.
248 249
249 250 The index is a file with a fixed record size containing
250 251 information on each revision, including its nodeid (hash), the
251 252 nodeids of its parents, the position and offset of its data within
252 253 the data file, and the revision it's based on. Finally, each entry
253 254 contains a linkrev entry that can serve as a pointer to external
254 255 data.
255 256
256 257 The revision data itself is a linear collection of data chunks.
257 258 Each chunk represents a revision and is usually represented as a
258 259 delta against the previous chunk. To bound lookup time, runs of
259 260 deltas are limited to about 2 times the length of the original
260 261 version data. This makes retrieval of a version proportional to
261 262 its size, or O(1) relative to the number of revisions.
262 263
263 264 Both pieces of the revlog are written to in an append-only
264 265 fashion, which means we never need to rewrite a file to insert or
265 266 remove data, and can use some simple techniques to avoid the need
266 267 for locking while reading.
267 268
268 269 If checkambig, indexfile is opened with checkambig=True at
269 270 writing, to avoid file stat ambiguity.
270 271
271 272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 273 index will be mmapped rather than read if it is larger than the
273 274 configured threshold.
274 275
275 276 If censorable is True, the revlog can have censored revisions.
276 277
277 278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 279 compression for the data content.
279 280
280 281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 282 file handle, a filename, and an expected position. It should check whether
282 283 the current position in the file handle is valid, and log/warn/fail (by
283 284 raising).
284 285 """
285 286
286 287 _flagserrorclass = error.RevlogError
287 288
288 289 def __init__(
289 290 self,
290 291 opener,
291 292 target,
292 293 radix,
293 294 postfix=None,
294 295 checkambig=False,
295 296 mmaplargeindex=False,
296 297 censorable=False,
297 298 upperboundcomp=None,
298 299 persistentnodemap=False,
299 300 concurrencychecker=None,
300 301 ):
301 302 """
302 303 create a revlog object
303 304
304 305 opener is a function that abstracts the file opening operation
305 306 and can be used to implement COW semantics or the like.
306 307
307 308 `target`: a (KIND, ID) tuple that identify the content stored in
308 309 this revlog. It help the rest of the code to understand what the revlog
309 310 is about without having to resort to heuristic and index filename
310 311 analysis. Note: that this must be reliably be set by normal code, but
311 312 that test, debug, or performance measurement code might not set this to
312 313 accurate value.
313 314 """
314 315 self.upperboundcomp = upperboundcomp
315 316
316 317 self.radix = radix
317 318
318 319 self._indexfile = None
319 320 self._datafile = None
320 321 self._nodemap_file = None
321 322 self.postfix = postfix
322 323 self.opener = opener
323 324 if persistentnodemap:
324 325 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 326
326 327 assert target[0] in ALL_KINDS
327 328 assert len(target) == 2
328 329 self.target = target
329 330 # When True, indexfile is opened with checkambig=True at writing, to
330 331 # avoid file stat ambiguity.
331 332 self._checkambig = checkambig
332 333 self._mmaplargeindex = mmaplargeindex
333 334 self._censorable = censorable
334 335 # 3-tuple of (node, rev, text) for a raw revision.
335 336 self._revisioncache = None
336 337 # Maps rev to chain base rev.
337 338 self._chainbasecache = util.lrucachedict(100)
338 339 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 340 self._chunkcache = (0, b'')
340 341 # How much data to read and cache into the raw revlog data cache.
341 342 self._chunkcachesize = 65536
342 343 self._maxchainlen = None
343 344 self._deltabothparents = True
344 345 self.index = None
345 346 self._nodemap_docket = None
346 347 # Mapping of partial identifiers to full nodes.
347 348 self._pcache = {}
348 349 # Mapping of revision integer to full node.
349 350 self._compengine = b'zlib'
350 351 self._compengineopts = {}
351 352 self._maxdeltachainspan = -1
352 353 self._withsparseread = False
353 354 self._sparserevlog = False
354 355 self.hassidedata = False
355 356 self._srdensitythreshold = 0.50
356 357 self._srmingapsize = 262144
357 358
358 359 # Make copy of flag processors so each revlog instance can support
359 360 # custom flags.
360 361 self._flagprocessors = dict(flagutil.flagprocessors)
361 362
362 363 # 2-tuple of file handles being used for active writing.
363 364 self._writinghandles = None
364 365 # prevent nesting of addgroup
365 366 self._adding_group = None
366 367
367 368 self._loadindex()
368 369
369 370 self._concurrencychecker = concurrencychecker
370 371
371 372 def _init_opts(self):
372 373 """process options (from above/config) to setup associated default revlog mode
373 374
374 375 These values might be affected when actually reading on disk information.
375 376
376 377 The relevant values are returned for use in _loadindex().
377 378
378 379 * newversionflags:
379 380 version header to use if we need to create a new revlog
380 381
381 382 * mmapindexthreshold:
382 383 minimal index size for start to use mmap
383 384
384 385 * force_nodemap:
385 386 force the usage of a "development" version of the nodemap code
386 387 """
387 388 mmapindexthreshold = None
388 389 opts = self.opener.options
389 390
390 391 if b'revlogv2' in opts:
391 392 new_header = REVLOGV2 | FLAG_INLINE_DATA
392 393 elif b'revlogv1' in opts:
393 394 new_header = REVLOGV1 | FLAG_INLINE_DATA
394 395 if b'generaldelta' in opts:
395 396 new_header |= FLAG_GENERALDELTA
396 397 elif b'revlogv0' in self.opener.options:
397 398 new_header = REVLOGV0
398 399 else:
399 400 new_header = REVLOG_DEFAULT_VERSION
400 401
401 402 if b'chunkcachesize' in opts:
402 403 self._chunkcachesize = opts[b'chunkcachesize']
403 404 if b'maxchainlen' in opts:
404 405 self._maxchainlen = opts[b'maxchainlen']
405 406 if b'deltabothparents' in opts:
406 407 self._deltabothparents = opts[b'deltabothparents']
407 408 self._lazydelta = bool(opts.get(b'lazydelta', True))
408 409 self._lazydeltabase = False
409 410 if self._lazydelta:
410 411 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
411 412 if b'compengine' in opts:
412 413 self._compengine = opts[b'compengine']
413 414 if b'zlib.level' in opts:
414 415 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
415 416 if b'zstd.level' in opts:
416 417 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
417 418 if b'maxdeltachainspan' in opts:
418 419 self._maxdeltachainspan = opts[b'maxdeltachainspan']
419 420 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
420 421 mmapindexthreshold = opts[b'mmapindexthreshold']
421 422 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 423 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 424 # sparse-revlog forces sparse-read
424 425 self._withsparseread = self._sparserevlog or withsparseread
425 426 if b'sparse-read-density-threshold' in opts:
426 427 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 428 if b'sparse-read-min-gap-size' in opts:
428 429 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 430 if opts.get(b'enableellipsis'):
430 431 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431 432
432 433 # revlog v0 doesn't have flag processors
433 434 for flag, processor in pycompat.iteritems(
434 435 opts.get(b'flagprocessors', {})
435 436 ):
436 437 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437 438
438 439 if self._chunkcachesize <= 0:
439 440 raise error.RevlogError(
440 441 _(b'revlog chunk cache size %r is not greater than 0')
441 442 % self._chunkcachesize
442 443 )
443 444 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 445 raise error.RevlogError(
445 446 _(b'revlog chunk cache size %r is not a power of 2')
446 447 % self._chunkcachesize
447 448 )
448 449 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 450 return new_header, mmapindexthreshold, force_nodemap
450 451
451 452 def _get_data(self, filepath, mmap_threshold):
452 453 """return a file content with or without mmap
453 454
454 455 If the file is missing return the empty string"""
455 456 try:
456 457 with self.opener(filepath) as fp:
457 458 if mmap_threshold is not None:
458 459 file_size = self.opener.fstat(fp).st_size
459 460 if file_size >= mmap_threshold:
460 461 # TODO: should .close() to release resources without
461 462 # relying on Python GC
462 463 return util.buffer(util.mmapread(fp))
463 464 return fp.read()
464 465 except IOError as inst:
465 466 if inst.errno != errno.ENOENT:
466 467 raise
467 468 return b''
468 469
469 470 def _loadindex(self):
470 471
471 472 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472 473
473 474 if self.postfix is None:
474 475 entry_point = b'%s.i' % self.radix
475 476 else:
476 477 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477 478
478 479 entry_data = b''
479 480 self._initempty = True
480 481 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 482 if len(entry_data) > 0:
482 483 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 484 self._initempty = False
484 485 else:
485 486 header = new_header
486 487
487 488 self._format_flags = header & ~0xFFFF
488 489 self._format_version = header & 0xFFFF
489 490
491 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
492 if supported_flags is None:
493 msg = _(b'unknown version (%d) in revlog %s')
494 msg %= (self._format_version, self.display_id)
495 raise error.RevlogError(msg)
496 elif self._format_flags & ~supported_flags:
497 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
498 display_flag = self._format_flags >> 16
499 msg %= (display_flag, self._format_version, self.display_id)
500 raise error.RevlogError(msg)
501
490 502 if self._format_version == REVLOGV0:
491 if self._format_flags:
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 display_flag = self._format_flags >> 16
494 msg %= (display_flag, self._format_version, self.display_id)
495 raise error.RevlogError(msg)
496
497 503 self._inline = False
498 504 self._generaldelta = False
499
500 505 elif self._format_version == REVLOGV1:
501 if self._format_flags & ~REVLOGV1_FLAGS:
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 display_flag = self._format_flags >> 16
504 msg %= (display_flag, self._format_version, self.display_id)
505 raise error.RevlogError(msg)
506
507 506 self._inline = self._format_flags & FLAG_INLINE_DATA
508 507 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509
510 508 elif self._format_version == REVLOGV2:
511 if self._format_flags & ~REVLOGV2_FLAGS:
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 display_flag = self._format_flags >> 16
514 msg %= (display_flag, self._format_version, self.display_id)
515 raise error.RevlogError(msg)
516
517 509 # There is a bug in the transaction handling when going from an
518 510 # inline revlog to a separate index and data file. Turn it off until
519 511 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 512 # See issue6485
521 513 self._inline = False
522 514 # generaldelta implied by version 2 revlogs.
523 515 self._generaldelta = True
524 516 # revlog-v2 has built in sidedata support
525 517 self.hassidedata = True
526
527 518 else:
528 msg = _(b'unknown version (%d) in revlog %s')
529 msg %= (self._format_version, self.display_id)
530 raise error.RevlogError(msg)
519 assert False, 'unreachable'
531 520
532 521 index_data = entry_data
533 522 self._indexfile = entry_point
534 523
535 524 if self.postfix is None or self.postfix == b'a':
536 525 self._datafile = b'%s.d' % self.radix
537 526 else:
538 527 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
539 528
540 529 self.nodeconstants = sha1nodeconstants
541 530 self.nullid = self.nodeconstants.nullid
542 531
543 532 # sparse-revlog can't be on without general-delta (issue6056)
544 533 if not self._generaldelta:
545 534 self._sparserevlog = False
546 535
547 536 self._storedeltachains = True
548 537
549 538 devel_nodemap = (
550 539 self._nodemap_file
551 540 and force_nodemap
552 541 and parse_index_v1_nodemap is not None
553 542 )
554 543
555 544 use_rust_index = False
556 545 if rustrevlog is not None:
557 546 if self._nodemap_file is not None:
558 547 use_rust_index = True
559 548 else:
560 549 use_rust_index = self.opener.options.get(b'rust.index')
561 550
562 551 self._parse_index = parse_index_v1
563 552 if self._format_version == REVLOGV0:
564 553 self._parse_index = revlogv0.parse_index_v0
565 554 elif self._format_version == REVLOGV2:
566 555 self._parse_index = parse_index_v2
567 556 elif devel_nodemap:
568 557 self._parse_index = parse_index_v1_nodemap
569 558 elif use_rust_index:
570 559 self._parse_index = parse_index_v1_mixed
571 560 try:
572 561 d = self._parse_index(index_data, self._inline)
573 562 index, _chunkcache = d
574 563 use_nodemap = (
575 564 not self._inline
576 565 and self._nodemap_file is not None
577 566 and util.safehasattr(index, 'update_nodemap_data')
578 567 )
579 568 if use_nodemap:
580 569 nodemap_data = nodemaputil.persisted_data(self)
581 570 if nodemap_data is not None:
582 571 docket = nodemap_data[0]
583 572 if (
584 573 len(d[0]) > docket.tip_rev
585 574 and d[0][docket.tip_rev][7] == docket.tip_node
586 575 ):
587 576 # no changelog tampering
588 577 self._nodemap_docket = docket
589 578 index.update_nodemap_data(*nodemap_data)
590 579 except (ValueError, IndexError):
591 580 raise error.RevlogError(
592 581 _(b"index %s is corrupted") % self.display_id
593 582 )
594 583 self.index, self._chunkcache = d
595 584 if not self._chunkcache:
596 585 self._chunkclear()
597 586 # revnum -> (chain-length, sum-delta-length)
598 587 self._chaininfocache = util.lrucachedict(500)
599 588 # revlog header -> revlog compressor
600 589 self._decompressors = {}
601 590
602 591 @util.propertycache
603 592 def revlog_kind(self):
604 593 return self.target[0]
605 594
606 595 @util.propertycache
607 596 def display_id(self):
608 597 """The public facing "ID" of the revlog that we use in message"""
609 598 # Maybe we should build a user facing representation of
610 599 # revlog.target instead of using `self.radix`
611 600 return self.radix
612 601
613 602 @util.propertycache
614 603 def _compressor(self):
615 604 engine = util.compengines[self._compengine]
616 605 return engine.revlogcompressor(self._compengineopts)
617 606
618 607 def _indexfp(self):
619 608 """file object for the revlog's index file"""
620 609 return self.opener(self._indexfile, mode=b"r")
621 610
622 611 def __index_write_fp(self):
623 612 # You should not use this directly and use `_writing` instead
624 613 try:
625 614 f = self.opener(
626 615 self._indexfile, mode=b"r+", checkambig=self._checkambig
627 616 )
628 617 f.seek(0, os.SEEK_END)
629 618 return f
630 619 except IOError as inst:
631 620 if inst.errno != errno.ENOENT:
632 621 raise
633 622 return self.opener(
634 623 self._indexfile, mode=b"w+", checkambig=self._checkambig
635 624 )
636 625
637 626 def __index_new_fp(self):
638 627 # You should not use this unless you are upgrading from inline revlog
639 628 return self.opener(
640 629 self._indexfile,
641 630 mode=b"w",
642 631 checkambig=self._checkambig,
643 632 atomictemp=True,
644 633 )
645 634
646 635 def _datafp(self, mode=b'r'):
647 636 """file object for the revlog's data file"""
648 637 return self.opener(self._datafile, mode=mode)
649 638
650 639 @contextlib.contextmanager
651 640 def _datareadfp(self, existingfp=None):
652 641 """file object suitable to read data"""
653 642 # Use explicit file handle, if given.
654 643 if existingfp is not None:
655 644 yield existingfp
656 645
657 646 # Use a file handle being actively used for writes, if available.
658 647 # There is some danger to doing this because reads will seek the
659 648 # file. However, _writeentry() performs a SEEK_END before all writes,
660 649 # so we should be safe.
661 650 elif self._writinghandles:
662 651 if self._inline:
663 652 yield self._writinghandles[0]
664 653 else:
665 654 yield self._writinghandles[1]
666 655
667 656 # Otherwise open a new file handle.
668 657 else:
669 658 if self._inline:
670 659 func = self._indexfp
671 660 else:
672 661 func = self._datafp
673 662 with func() as fp:
674 663 yield fp
675 664
676 665 def tiprev(self):
677 666 return len(self.index) - 1
678 667
679 668 def tip(self):
680 669 return self.node(self.tiprev())
681 670
682 671 def __contains__(self, rev):
683 672 return 0 <= rev < len(self)
684 673
685 674 def __len__(self):
686 675 return len(self.index)
687 676
688 677 def __iter__(self):
689 678 return iter(pycompat.xrange(len(self)))
690 679
691 680 def revs(self, start=0, stop=None):
692 681 """iterate over all rev in this revlog (from start to stop)"""
693 682 return storageutil.iterrevs(len(self), start=start, stop=stop)
694 683
695 684 @property
696 685 def nodemap(self):
697 686 msg = (
698 687 b"revlog.nodemap is deprecated, "
699 688 b"use revlog.index.[has_node|rev|get_rev]"
700 689 )
701 690 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
702 691 return self.index.nodemap
703 692
704 693 @property
705 694 def _nodecache(self):
706 695 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
707 696 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
708 697 return self.index.nodemap
709 698
710 699 def hasnode(self, node):
711 700 try:
712 701 self.rev(node)
713 702 return True
714 703 except KeyError:
715 704 return False
716 705
717 706 def candelta(self, baserev, rev):
718 707 """whether two revisions (baserev, rev) can be delta-ed or not"""
719 708 # Disable delta if either rev requires a content-changing flag
720 709 # processor (ex. LFS). This is because such flag processor can alter
721 710 # the rawtext content that the delta will be based on, and two clients
722 711 # could have a same revlog node with different flags (i.e. different
723 712 # rawtext contents) and the delta could be incompatible.
724 713 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
725 714 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
726 715 ):
727 716 return False
728 717 return True
729 718
730 719 def update_caches(self, transaction):
731 720 if self._nodemap_file is not None:
732 721 if transaction is None:
733 722 nodemaputil.update_persistent_nodemap(self)
734 723 else:
735 724 nodemaputil.setup_persistent_nodemap(transaction, self)
736 725
737 726 def clearcaches(self):
738 727 self._revisioncache = None
739 728 self._chainbasecache.clear()
740 729 self._chunkcache = (0, b'')
741 730 self._pcache = {}
742 731 self._nodemap_docket = None
743 732 self.index.clearcaches()
744 733 # The python code is the one responsible for validating the docket, we
745 734 # end up having to refresh it here.
746 735 use_nodemap = (
747 736 not self._inline
748 737 and self._nodemap_file is not None
749 738 and util.safehasattr(self.index, 'update_nodemap_data')
750 739 )
751 740 if use_nodemap:
752 741 nodemap_data = nodemaputil.persisted_data(self)
753 742 if nodemap_data is not None:
754 743 self._nodemap_docket = nodemap_data[0]
755 744 self.index.update_nodemap_data(*nodemap_data)
756 745
757 746 def rev(self, node):
758 747 try:
759 748 return self.index.rev(node)
760 749 except TypeError:
761 750 raise
762 751 except error.RevlogError:
763 752 # parsers.c radix tree lookup failed
764 753 if (
765 754 node == self.nodeconstants.wdirid
766 755 or node in self.nodeconstants.wdirfilenodeids
767 756 ):
768 757 raise error.WdirUnsupported
769 758 raise error.LookupError(node, self.display_id, _(b'no node'))
770 759
771 760 # Accessors for index entries.
772 761
773 762 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
774 763 # are flags.
775 764 def start(self, rev):
776 765 return int(self.index[rev][0] >> 16)
777 766
778 767 def flags(self, rev):
779 768 return self.index[rev][0] & 0xFFFF
780 769
781 770 def length(self, rev):
782 771 return self.index[rev][1]
783 772
784 773 def sidedata_length(self, rev):
785 774 if not self.hassidedata:
786 775 return 0
787 776 return self.index[rev][9]
788 777
789 778 def rawsize(self, rev):
790 779 """return the length of the uncompressed text for a given revision"""
791 780 l = self.index[rev][2]
792 781 if l >= 0:
793 782 return l
794 783
795 784 t = self.rawdata(rev)
796 785 return len(t)
797 786
798 787 def size(self, rev):
799 788 """length of non-raw text (processed by a "read" flag processor)"""
800 789 # fast path: if no "read" flag processor could change the content,
801 790 # size is rawsize. note: ELLIPSIS is known to not change the content.
802 791 flags = self.flags(rev)
803 792 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
804 793 return self.rawsize(rev)
805 794
806 795 return len(self.revision(rev, raw=False))
807 796
808 797 def chainbase(self, rev):
809 798 base = self._chainbasecache.get(rev)
810 799 if base is not None:
811 800 return base
812 801
813 802 index = self.index
814 803 iterrev = rev
815 804 base = index[iterrev][3]
816 805 while base != iterrev:
817 806 iterrev = base
818 807 base = index[iterrev][3]
819 808
820 809 self._chainbasecache[rev] = base
821 810 return base
822 811
823 812 def linkrev(self, rev):
824 813 return self.index[rev][4]
825 814
826 815 def parentrevs(self, rev):
827 816 try:
828 817 entry = self.index[rev]
829 818 except IndexError:
830 819 if rev == wdirrev:
831 820 raise error.WdirUnsupported
832 821 raise
833 822 if entry[5] == nullrev:
834 823 return entry[6], entry[5]
835 824 else:
836 825 return entry[5], entry[6]
837 826
838 827 # fast parentrevs(rev) where rev isn't filtered
839 828 _uncheckedparentrevs = parentrevs
840 829
841 830 def node(self, rev):
842 831 try:
843 832 return self.index[rev][7]
844 833 except IndexError:
845 834 if rev == wdirrev:
846 835 raise error.WdirUnsupported
847 836 raise
848 837
849 838 # Derived from index values.
850 839
851 840 def end(self, rev):
852 841 return self.start(rev) + self.length(rev)
853 842
854 843 def parents(self, node):
855 844 i = self.index
856 845 d = i[self.rev(node)]
857 846 # inline node() to avoid function call overhead
858 847 if d[5] == self.nullid:
859 848 return i[d[6]][7], i[d[5]][7]
860 849 else:
861 850 return i[d[5]][7], i[d[6]][7]
862 851
863 852 def chainlen(self, rev):
864 853 return self._chaininfo(rev)[0]
865 854
866 855 def _chaininfo(self, rev):
867 856 chaininfocache = self._chaininfocache
868 857 if rev in chaininfocache:
869 858 return chaininfocache[rev]
870 859 index = self.index
871 860 generaldelta = self._generaldelta
872 861 iterrev = rev
873 862 e = index[iterrev]
874 863 clen = 0
875 864 compresseddeltalen = 0
876 865 while iterrev != e[3]:
877 866 clen += 1
878 867 compresseddeltalen += e[1]
879 868 if generaldelta:
880 869 iterrev = e[3]
881 870 else:
882 871 iterrev -= 1
883 872 if iterrev in chaininfocache:
884 873 t = chaininfocache[iterrev]
885 874 clen += t[0]
886 875 compresseddeltalen += t[1]
887 876 break
888 877 e = index[iterrev]
889 878 else:
890 879 # Add text length of base since decompressing that also takes
891 880 # work. For cache hits the length is already included.
892 881 compresseddeltalen += e[1]
893 882 r = (clen, compresseddeltalen)
894 883 chaininfocache[rev] = r
895 884 return r
896 885
897 886 def _deltachain(self, rev, stoprev=None):
898 887 """Obtain the delta chain for a revision.
899 888
900 889 ``stoprev`` specifies a revision to stop at. If not specified, we
901 890 stop at the base of the chain.
902 891
903 892 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
904 893 revs in ascending order and ``stopped`` is a bool indicating whether
905 894 ``stoprev`` was hit.
906 895 """
907 896 # Try C implementation.
908 897 try:
909 898 return self.index.deltachain(rev, stoprev, self._generaldelta)
910 899 except AttributeError:
911 900 pass
912 901
913 902 chain = []
914 903
915 904 # Alias to prevent attribute lookup in tight loop.
916 905 index = self.index
917 906 generaldelta = self._generaldelta
918 907
919 908 iterrev = rev
920 909 e = index[iterrev]
921 910 while iterrev != e[3] and iterrev != stoprev:
922 911 chain.append(iterrev)
923 912 if generaldelta:
924 913 iterrev = e[3]
925 914 else:
926 915 iterrev -= 1
927 916 e = index[iterrev]
928 917
929 918 if iterrev == stoprev:
930 919 stopped = True
931 920 else:
932 921 chain.append(iterrev)
933 922 stopped = False
934 923
935 924 chain.reverse()
936 925 return chain, stopped
937 926
938 927 def ancestors(self, revs, stoprev=0, inclusive=False):
939 928 """Generate the ancestors of 'revs' in reverse revision order.
940 929 Does not generate revs lower than stoprev.
941 930
942 931 See the documentation for ancestor.lazyancestors for more details."""
943 932
944 933 # first, make sure start revisions aren't filtered
945 934 revs = list(revs)
946 935 checkrev = self.node
947 936 for r in revs:
948 937 checkrev(r)
949 938 # and we're sure ancestors aren't filtered as well
950 939
951 940 if rustancestor is not None:
952 941 lazyancestors = rustancestor.LazyAncestors
953 942 arg = self.index
954 943 else:
955 944 lazyancestors = ancestor.lazyancestors
956 945 arg = self._uncheckedparentrevs
957 946 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
958 947
959 948 def descendants(self, revs):
960 949 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
961 950
962 951 def findcommonmissing(self, common=None, heads=None):
963 952 """Return a tuple of the ancestors of common and the ancestors of heads
964 953 that are not ancestors of common. In revset terminology, we return the
965 954 tuple:
966 955
967 956 ::common, (::heads) - (::common)
968 957
969 958 The list is sorted by revision number, meaning it is
970 959 topologically sorted.
971 960
972 961 'heads' and 'common' are both lists of node IDs. If heads is
973 962 not supplied, uses all of the revlog's heads. If common is not
974 963 supplied, uses nullid."""
975 964 if common is None:
976 965 common = [self.nullid]
977 966 if heads is None:
978 967 heads = self.heads()
979 968
980 969 common = [self.rev(n) for n in common]
981 970 heads = [self.rev(n) for n in heads]
982 971
983 972 # we want the ancestors, but inclusive
984 973 class lazyset(object):
985 974 def __init__(self, lazyvalues):
986 975 self.addedvalues = set()
987 976 self.lazyvalues = lazyvalues
988 977
989 978 def __contains__(self, value):
990 979 return value in self.addedvalues or value in self.lazyvalues
991 980
992 981 def __iter__(self):
993 982 added = self.addedvalues
994 983 for r in added:
995 984 yield r
996 985 for r in self.lazyvalues:
997 986 if not r in added:
998 987 yield r
999 988
1000 989 def add(self, value):
1001 990 self.addedvalues.add(value)
1002 991
1003 992 def update(self, values):
1004 993 self.addedvalues.update(values)
1005 994
1006 995 has = lazyset(self.ancestors(common))
1007 996 has.add(nullrev)
1008 997 has.update(common)
1009 998
1010 999 # take all ancestors from heads that aren't in has
1011 1000 missing = set()
1012 1001 visit = collections.deque(r for r in heads if r not in has)
1013 1002 while visit:
1014 1003 r = visit.popleft()
1015 1004 if r in missing:
1016 1005 continue
1017 1006 else:
1018 1007 missing.add(r)
1019 1008 for p in self.parentrevs(r):
1020 1009 if p not in has:
1021 1010 visit.append(p)
1022 1011 missing = list(missing)
1023 1012 missing.sort()
1024 1013 return has, [self.node(miss) for miss in missing]
1025 1014
1026 1015 def incrementalmissingrevs(self, common=None):
1027 1016 """Return an object that can be used to incrementally compute the
1028 1017 revision numbers of the ancestors of arbitrary sets that are not
1029 1018 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 1019 object.
1031 1020
1032 1021 'common' is a list of revision numbers. If common is not supplied, uses
1033 1022 nullrev.
1034 1023 """
1035 1024 if common is None:
1036 1025 common = [nullrev]
1037 1026
1038 1027 if rustancestor is not None:
1039 1028 return rustancestor.MissingAncestors(self.index, common)
1040 1029 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1041 1030
1042 1031 def findmissingrevs(self, common=None, heads=None):
1043 1032 """Return the revision numbers of the ancestors of heads that
1044 1033 are not ancestors of common.
1045 1034
1046 1035 More specifically, return a list of revision numbers corresponding to
1047 1036 nodes N such that every N satisfies the following constraints:
1048 1037
1049 1038 1. N is an ancestor of some node in 'heads'
1050 1039 2. N is not an ancestor of any node in 'common'
1051 1040
1052 1041 The list is sorted by revision number, meaning it is
1053 1042 topologically sorted.
1054 1043
1055 1044 'heads' and 'common' are both lists of revision numbers. If heads is
1056 1045 not supplied, uses all of the revlog's heads. If common is not
1057 1046 supplied, uses nullid."""
1058 1047 if common is None:
1059 1048 common = [nullrev]
1060 1049 if heads is None:
1061 1050 heads = self.headrevs()
1062 1051
1063 1052 inc = self.incrementalmissingrevs(common=common)
1064 1053 return inc.missingancestors(heads)
1065 1054
1066 1055 def findmissing(self, common=None, heads=None):
1067 1056 """Return the ancestors of heads that are not ancestors of common.
1068 1057
1069 1058 More specifically, return a list of nodes N such that every N
1070 1059 satisfies the following constraints:
1071 1060
1072 1061 1. N is an ancestor of some node in 'heads'
1073 1062 2. N is not an ancestor of any node in 'common'
1074 1063
1075 1064 The list is sorted by revision number, meaning it is
1076 1065 topologically sorted.
1077 1066
1078 1067 'heads' and 'common' are both lists of node IDs. If heads is
1079 1068 not supplied, uses all of the revlog's heads. If common is not
1080 1069 supplied, uses nullid."""
1081 1070 if common is None:
1082 1071 common = [self.nullid]
1083 1072 if heads is None:
1084 1073 heads = self.heads()
1085 1074
1086 1075 common = [self.rev(n) for n in common]
1087 1076 heads = [self.rev(n) for n in heads]
1088 1077
1089 1078 inc = self.incrementalmissingrevs(common=common)
1090 1079 return [self.node(r) for r in inc.missingancestors(heads)]
1091 1080
1092 1081 def nodesbetween(self, roots=None, heads=None):
1093 1082 """Return a topological path from 'roots' to 'heads'.
1094 1083
1095 1084 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1096 1085 topologically sorted list of all nodes N that satisfy both of
1097 1086 these constraints:
1098 1087
1099 1088 1. N is a descendant of some node in 'roots'
1100 1089 2. N is an ancestor of some node in 'heads'
1101 1090
1102 1091 Every node is considered to be both a descendant and an ancestor
1103 1092 of itself, so every reachable node in 'roots' and 'heads' will be
1104 1093 included in 'nodes'.
1105 1094
1106 1095 'outroots' is the list of reachable nodes in 'roots', i.e., the
1107 1096 subset of 'roots' that is returned in 'nodes'. Likewise,
1108 1097 'outheads' is the subset of 'heads' that is also in 'nodes'.
1109 1098
1110 1099 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1111 1100 unspecified, uses nullid as the only root. If 'heads' is
1112 1101 unspecified, uses list of all of the revlog's heads."""
1113 1102 nonodes = ([], [], [])
1114 1103 if roots is not None:
1115 1104 roots = list(roots)
1116 1105 if not roots:
1117 1106 return nonodes
1118 1107 lowestrev = min([self.rev(n) for n in roots])
1119 1108 else:
1120 1109 roots = [self.nullid] # Everybody's a descendant of nullid
1121 1110 lowestrev = nullrev
1122 1111 if (lowestrev == nullrev) and (heads is None):
1123 1112 # We want _all_ the nodes!
1124 1113 return (
1125 1114 [self.node(r) for r in self],
1126 1115 [self.nullid],
1127 1116 list(self.heads()),
1128 1117 )
1129 1118 if heads is None:
1130 1119 # All nodes are ancestors, so the latest ancestor is the last
1131 1120 # node.
1132 1121 highestrev = len(self) - 1
1133 1122 # Set ancestors to None to signal that every node is an ancestor.
1134 1123 ancestors = None
1135 1124 # Set heads to an empty dictionary for later discovery of heads
1136 1125 heads = {}
1137 1126 else:
1138 1127 heads = list(heads)
1139 1128 if not heads:
1140 1129 return nonodes
1141 1130 ancestors = set()
1142 1131 # Turn heads into a dictionary so we can remove 'fake' heads.
1143 1132 # Also, later we will be using it to filter out the heads we can't
1144 1133 # find from roots.
1145 1134 heads = dict.fromkeys(heads, False)
1146 1135 # Start at the top and keep marking parents until we're done.
1147 1136 nodestotag = set(heads)
1148 1137 # Remember where the top was so we can use it as a limit later.
1149 1138 highestrev = max([self.rev(n) for n in nodestotag])
1150 1139 while nodestotag:
1151 1140 # grab a node to tag
1152 1141 n = nodestotag.pop()
1153 1142 # Never tag nullid
1154 1143 if n == self.nullid:
1155 1144 continue
1156 1145 # A node's revision number represents its place in a
1157 1146 # topologically sorted list of nodes.
1158 1147 r = self.rev(n)
1159 1148 if r >= lowestrev:
1160 1149 if n not in ancestors:
1161 1150 # If we are possibly a descendant of one of the roots
1162 1151 # and we haven't already been marked as an ancestor
1163 1152 ancestors.add(n) # Mark as ancestor
1164 1153 # Add non-nullid parents to list of nodes to tag.
1165 1154 nodestotag.update(
1166 1155 [p for p in self.parents(n) if p != self.nullid]
1167 1156 )
1168 1157 elif n in heads: # We've seen it before, is it a fake head?
1169 1158 # So it is, real heads should not be the ancestors of
1170 1159 # any other heads.
1171 1160 heads.pop(n)
1172 1161 if not ancestors:
1173 1162 return nonodes
1174 1163 # Now that we have our set of ancestors, we want to remove any
1175 1164 # roots that are not ancestors.
1176 1165
1177 1166 # If one of the roots was nullid, everything is included anyway.
1178 1167 if lowestrev > nullrev:
1179 1168 # But, since we weren't, let's recompute the lowest rev to not
1180 1169 # include roots that aren't ancestors.
1181 1170
1182 1171 # Filter out roots that aren't ancestors of heads
1183 1172 roots = [root for root in roots if root in ancestors]
1184 1173 # Recompute the lowest revision
1185 1174 if roots:
1186 1175 lowestrev = min([self.rev(root) for root in roots])
1187 1176 else:
1188 1177 # No more roots? Return empty list
1189 1178 return nonodes
1190 1179 else:
1191 1180 # We are descending from nullid, and don't need to care about
1192 1181 # any other roots.
1193 1182 lowestrev = nullrev
1194 1183 roots = [self.nullid]
1195 1184 # Transform our roots list into a set.
1196 1185 descendants = set(roots)
1197 1186 # Also, keep the original roots so we can filter out roots that aren't
1198 1187 # 'real' roots (i.e. are descended from other roots).
1199 1188 roots = descendants.copy()
1200 1189 # Our topologically sorted list of output nodes.
1201 1190 orderedout = []
1202 1191 # Don't start at nullid since we don't want nullid in our output list,
1203 1192 # and if nullid shows up in descendants, empty parents will look like
1204 1193 # they're descendants.
1205 1194 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1206 1195 n = self.node(r)
1207 1196 isdescendant = False
1208 1197 if lowestrev == nullrev: # Everybody is a descendant of nullid
1209 1198 isdescendant = True
1210 1199 elif n in descendants:
1211 1200 # n is already a descendant
1212 1201 isdescendant = True
1213 1202 # This check only needs to be done here because all the roots
1214 1203 # will start being marked is descendants before the loop.
1215 1204 if n in roots:
1216 1205 # If n was a root, check if it's a 'real' root.
1217 1206 p = tuple(self.parents(n))
1218 1207 # If any of its parents are descendants, it's not a root.
1219 1208 if (p[0] in descendants) or (p[1] in descendants):
1220 1209 roots.remove(n)
1221 1210 else:
1222 1211 p = tuple(self.parents(n))
1223 1212 # A node is a descendant if either of its parents are
1224 1213 # descendants. (We seeded the dependents list with the roots
1225 1214 # up there, remember?)
1226 1215 if (p[0] in descendants) or (p[1] in descendants):
1227 1216 descendants.add(n)
1228 1217 isdescendant = True
1229 1218 if isdescendant and ((ancestors is None) or (n in ancestors)):
1230 1219 # Only include nodes that are both descendants and ancestors.
1231 1220 orderedout.append(n)
1232 1221 if (ancestors is not None) and (n in heads):
1233 1222 # We're trying to figure out which heads are reachable
1234 1223 # from roots.
1235 1224 # Mark this head as having been reached
1236 1225 heads[n] = True
1237 1226 elif ancestors is None:
1238 1227 # Otherwise, we're trying to discover the heads.
1239 1228 # Assume this is a head because if it isn't, the next step
1240 1229 # will eventually remove it.
1241 1230 heads[n] = True
1242 1231 # But, obviously its parents aren't.
1243 1232 for p in self.parents(n):
1244 1233 heads.pop(p, None)
1245 1234 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1246 1235 roots = list(roots)
1247 1236 assert orderedout
1248 1237 assert roots
1249 1238 assert heads
1250 1239 return (orderedout, roots, heads)
1251 1240
1252 1241 def headrevs(self, revs=None):
1253 1242 if revs is None:
1254 1243 try:
1255 1244 return self.index.headrevs()
1256 1245 except AttributeError:
1257 1246 return self._headrevs()
1258 1247 if rustdagop is not None:
1259 1248 return rustdagop.headrevs(self.index, revs)
1260 1249 return dagop.headrevs(revs, self._uncheckedparentrevs)
1261 1250
1262 1251 def computephases(self, roots):
1263 1252 return self.index.computephasesmapsets(roots)
1264 1253
1265 1254 def _headrevs(self):
1266 1255 count = len(self)
1267 1256 if not count:
1268 1257 return [nullrev]
1269 1258 # we won't iter over filtered rev so nobody is a head at start
1270 1259 ishead = [0] * (count + 1)
1271 1260 index = self.index
1272 1261 for r in self:
1273 1262 ishead[r] = 1 # I may be an head
1274 1263 e = index[r]
1275 1264 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1276 1265 return [r for r, val in enumerate(ishead) if val]
1277 1266
1278 1267 def heads(self, start=None, stop=None):
1279 1268 """return the list of all nodes that have no children
1280 1269
1281 1270 if start is specified, only heads that are descendants of
1282 1271 start will be returned
1283 1272 if stop is specified, it will consider all the revs from stop
1284 1273 as if they had no children
1285 1274 """
1286 1275 if start is None and stop is None:
1287 1276 if not len(self):
1288 1277 return [self.nullid]
1289 1278 return [self.node(r) for r in self.headrevs()]
1290 1279
1291 1280 if start is None:
1292 1281 start = nullrev
1293 1282 else:
1294 1283 start = self.rev(start)
1295 1284
1296 1285 stoprevs = {self.rev(n) for n in stop or []}
1297 1286
1298 1287 revs = dagop.headrevssubset(
1299 1288 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1300 1289 )
1301 1290
1302 1291 return [self.node(rev) for rev in revs]
1303 1292
1304 1293 def children(self, node):
1305 1294 """find the children of a given node"""
1306 1295 c = []
1307 1296 p = self.rev(node)
1308 1297 for r in self.revs(start=p + 1):
1309 1298 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1310 1299 if prevs:
1311 1300 for pr in prevs:
1312 1301 if pr == p:
1313 1302 c.append(self.node(r))
1314 1303 elif p == nullrev:
1315 1304 c.append(self.node(r))
1316 1305 return c
1317 1306
1318 1307 def commonancestorsheads(self, a, b):
1319 1308 """calculate all the heads of the common ancestors of nodes a and b"""
1320 1309 a, b = self.rev(a), self.rev(b)
1321 1310 ancs = self._commonancestorsheads(a, b)
1322 1311 return pycompat.maplist(self.node, ancs)
1323 1312
1324 1313 def _commonancestorsheads(self, *revs):
1325 1314 """calculate all the heads of the common ancestors of revs"""
1326 1315 try:
1327 1316 ancs = self.index.commonancestorsheads(*revs)
1328 1317 except (AttributeError, OverflowError): # C implementation failed
1329 1318 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1330 1319 return ancs
1331 1320
1332 1321 def isancestor(self, a, b):
1333 1322 """return True if node a is an ancestor of node b
1334 1323
1335 1324 A revision is considered an ancestor of itself."""
1336 1325 a, b = self.rev(a), self.rev(b)
1337 1326 return self.isancestorrev(a, b)
1338 1327
1339 1328 def isancestorrev(self, a, b):
1340 1329 """return True if revision a is an ancestor of revision b
1341 1330
1342 1331 A revision is considered an ancestor of itself.
1343 1332
1344 1333 The implementation of this is trivial but the use of
1345 1334 reachableroots is not."""
1346 1335 if a == nullrev:
1347 1336 return True
1348 1337 elif a == b:
1349 1338 return True
1350 1339 elif a > b:
1351 1340 return False
1352 1341 return bool(self.reachableroots(a, [b], [a], includepath=False))
1353 1342
1354 1343 def reachableroots(self, minroot, heads, roots, includepath=False):
1355 1344 """return (heads(::(<roots> and <roots>::<heads>)))
1356 1345
1357 1346 If includepath is True, return (<roots>::<heads>)."""
1358 1347 try:
1359 1348 return self.index.reachableroots2(
1360 1349 minroot, heads, roots, includepath
1361 1350 )
1362 1351 except AttributeError:
1363 1352 return dagop._reachablerootspure(
1364 1353 self.parentrevs, minroot, roots, heads, includepath
1365 1354 )
1366 1355
1367 1356 def ancestor(self, a, b):
1368 1357 """calculate the "best" common ancestor of nodes a and b"""
1369 1358
1370 1359 a, b = self.rev(a), self.rev(b)
1371 1360 try:
1372 1361 ancs = self.index.ancestors(a, b)
1373 1362 except (AttributeError, OverflowError):
1374 1363 ancs = ancestor.ancestors(self.parentrevs, a, b)
1375 1364 if ancs:
1376 1365 # choose a consistent winner when there's a tie
1377 1366 return min(map(self.node, ancs))
1378 1367 return self.nullid
1379 1368
1380 1369 def _match(self, id):
1381 1370 if isinstance(id, int):
1382 1371 # rev
1383 1372 return self.node(id)
1384 1373 if len(id) == self.nodeconstants.nodelen:
1385 1374 # possibly a binary node
1386 1375 # odds of a binary node being all hex in ASCII are 1 in 10**25
1387 1376 try:
1388 1377 node = id
1389 1378 self.rev(node) # quick search the index
1390 1379 return node
1391 1380 except error.LookupError:
1392 1381 pass # may be partial hex id
1393 1382 try:
1394 1383 # str(rev)
1395 1384 rev = int(id)
1396 1385 if b"%d" % rev != id:
1397 1386 raise ValueError
1398 1387 if rev < 0:
1399 1388 rev = len(self) + rev
1400 1389 if rev < 0 or rev >= len(self):
1401 1390 raise ValueError
1402 1391 return self.node(rev)
1403 1392 except (ValueError, OverflowError):
1404 1393 pass
1405 1394 if len(id) == 2 * self.nodeconstants.nodelen:
1406 1395 try:
1407 1396 # a full hex nodeid?
1408 1397 node = bin(id)
1409 1398 self.rev(node)
1410 1399 return node
1411 1400 except (TypeError, error.LookupError):
1412 1401 pass
1413 1402
1414 1403 def _partialmatch(self, id):
1415 1404 # we don't care wdirfilenodeids as they should be always full hash
1416 1405 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1417 1406 try:
1418 1407 partial = self.index.partialmatch(id)
1419 1408 if partial and self.hasnode(partial):
1420 1409 if maybewdir:
1421 1410 # single 'ff...' match in radix tree, ambiguous with wdir
1422 1411 raise error.RevlogError
1423 1412 return partial
1424 1413 if maybewdir:
1425 1414 # no 'ff...' match in radix tree, wdir identified
1426 1415 raise error.WdirUnsupported
1427 1416 return None
1428 1417 except error.RevlogError:
1429 1418 # parsers.c radix tree lookup gave multiple matches
1430 1419 # fast path: for unfiltered changelog, radix tree is accurate
1431 1420 if not getattr(self, 'filteredrevs', None):
1432 1421 raise error.AmbiguousPrefixLookupError(
1433 1422 id, self.display_id, _(b'ambiguous identifier')
1434 1423 )
1435 1424 # fall through to slow path that filters hidden revisions
1436 1425 except (AttributeError, ValueError):
1437 1426 # we are pure python, or key was too short to search radix tree
1438 1427 pass
1439 1428
1440 1429 if id in self._pcache:
1441 1430 return self._pcache[id]
1442 1431
1443 1432 if len(id) <= 40:
1444 1433 try:
1445 1434 # hex(node)[:...]
1446 1435 l = len(id) // 2 # grab an even number of digits
1447 1436 prefix = bin(id[: l * 2])
1448 1437 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1449 1438 nl = [
1450 1439 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1451 1440 ]
1452 1441 if self.nodeconstants.nullhex.startswith(id):
1453 1442 nl.append(self.nullid)
1454 1443 if len(nl) > 0:
1455 1444 if len(nl) == 1 and not maybewdir:
1456 1445 self._pcache[id] = nl[0]
1457 1446 return nl[0]
1458 1447 raise error.AmbiguousPrefixLookupError(
1459 1448 id, self.display_id, _(b'ambiguous identifier')
1460 1449 )
1461 1450 if maybewdir:
1462 1451 raise error.WdirUnsupported
1463 1452 return None
1464 1453 except TypeError:
1465 1454 pass
1466 1455
1467 1456 def lookup(self, id):
1468 1457 """locate a node based on:
1469 1458 - revision number or str(revision number)
1470 1459 - nodeid or subset of hex nodeid
1471 1460 """
1472 1461 n = self._match(id)
1473 1462 if n is not None:
1474 1463 return n
1475 1464 n = self._partialmatch(id)
1476 1465 if n:
1477 1466 return n
1478 1467
1479 1468 raise error.LookupError(id, self.display_id, _(b'no match found'))
1480 1469
1481 1470 def shortest(self, node, minlength=1):
1482 1471 """Find the shortest unambiguous prefix that matches node."""
1483 1472
1484 1473 def isvalid(prefix):
1485 1474 try:
1486 1475 matchednode = self._partialmatch(prefix)
1487 1476 except error.AmbiguousPrefixLookupError:
1488 1477 return False
1489 1478 except error.WdirUnsupported:
1490 1479 # single 'ff...' match
1491 1480 return True
1492 1481 if matchednode is None:
1493 1482 raise error.LookupError(node, self.display_id, _(b'no node'))
1494 1483 return True
1495 1484
1496 1485 def maybewdir(prefix):
1497 1486 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1498 1487
1499 1488 hexnode = hex(node)
1500 1489
1501 1490 def disambiguate(hexnode, minlength):
1502 1491 """Disambiguate against wdirid."""
1503 1492 for length in range(minlength, len(hexnode) + 1):
1504 1493 prefix = hexnode[:length]
1505 1494 if not maybewdir(prefix):
1506 1495 return prefix
1507 1496
1508 1497 if not getattr(self, 'filteredrevs', None):
1509 1498 try:
1510 1499 length = max(self.index.shortest(node), minlength)
1511 1500 return disambiguate(hexnode, length)
1512 1501 except error.RevlogError:
1513 1502 if node != self.nodeconstants.wdirid:
1514 1503 raise error.LookupError(
1515 1504 node, self.display_id, _(b'no node')
1516 1505 )
1517 1506 except AttributeError:
1518 1507 # Fall through to pure code
1519 1508 pass
1520 1509
1521 1510 if node == self.nodeconstants.wdirid:
1522 1511 for length in range(minlength, len(hexnode) + 1):
1523 1512 prefix = hexnode[:length]
1524 1513 if isvalid(prefix):
1525 1514 return prefix
1526 1515
1527 1516 for length in range(minlength, len(hexnode) + 1):
1528 1517 prefix = hexnode[:length]
1529 1518 if isvalid(prefix):
1530 1519 return disambiguate(hexnode, length)
1531 1520
1532 1521 def cmp(self, node, text):
1533 1522 """compare text with a given file revision
1534 1523
1535 1524 returns True if text is different than what is stored.
1536 1525 """
1537 1526 p1, p2 = self.parents(node)
1538 1527 return storageutil.hashrevisionsha1(text, p1, p2) != node
1539 1528
1540 1529 def _cachesegment(self, offset, data):
1541 1530 """Add a segment to the revlog cache.
1542 1531
1543 1532 Accepts an absolute offset and the data that is at that location.
1544 1533 """
1545 1534 o, d = self._chunkcache
1546 1535 # try to add to existing cache
1547 1536 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1548 1537 self._chunkcache = o, d + data
1549 1538 else:
1550 1539 self._chunkcache = offset, data
1551 1540
1552 1541 def _readsegment(self, offset, length, df=None):
1553 1542 """Load a segment of raw data from the revlog.
1554 1543
1555 1544 Accepts an absolute offset, length to read, and an optional existing
1556 1545 file handle to read from.
1557 1546
1558 1547 If an existing file handle is passed, it will be seeked and the
1559 1548 original seek position will NOT be restored.
1560 1549
1561 1550 Returns a str or buffer of raw byte data.
1562 1551
1563 1552 Raises if the requested number of bytes could not be read.
1564 1553 """
1565 1554 # Cache data both forward and backward around the requested
1566 1555 # data, in a fixed size window. This helps speed up operations
1567 1556 # involving reading the revlog backwards.
1568 1557 cachesize = self._chunkcachesize
1569 1558 realoffset = offset & ~(cachesize - 1)
1570 1559 reallength = (
1571 1560 (offset + length + cachesize) & ~(cachesize - 1)
1572 1561 ) - realoffset
1573 1562 with self._datareadfp(df) as df:
1574 1563 df.seek(realoffset)
1575 1564 d = df.read(reallength)
1576 1565
1577 1566 self._cachesegment(realoffset, d)
1578 1567 if offset != realoffset or reallength != length:
1579 1568 startoffset = offset - realoffset
1580 1569 if len(d) - startoffset < length:
1581 1570 raise error.RevlogError(
1582 1571 _(
1583 1572 b'partial read of revlog %s; expected %d bytes from '
1584 1573 b'offset %d, got %d'
1585 1574 )
1586 1575 % (
1587 1576 self._indexfile if self._inline else self._datafile,
1588 1577 length,
1589 1578 offset,
1590 1579 len(d) - startoffset,
1591 1580 )
1592 1581 )
1593 1582
1594 1583 return util.buffer(d, startoffset, length)
1595 1584
1596 1585 if len(d) < length:
1597 1586 raise error.RevlogError(
1598 1587 _(
1599 1588 b'partial read of revlog %s; expected %d bytes from offset '
1600 1589 b'%d, got %d'
1601 1590 )
1602 1591 % (
1603 1592 self._indexfile if self._inline else self._datafile,
1604 1593 length,
1605 1594 offset,
1606 1595 len(d),
1607 1596 )
1608 1597 )
1609 1598
1610 1599 return d
1611 1600
1612 1601 def _getsegment(self, offset, length, df=None):
1613 1602 """Obtain a segment of raw data from the revlog.
1614 1603
1615 1604 Accepts an absolute offset, length of bytes to obtain, and an
1616 1605 optional file handle to the already-opened revlog. If the file
1617 1606 handle is used, it's original seek position will not be preserved.
1618 1607
1619 1608 Requests for data may be returned from a cache.
1620 1609
1621 1610 Returns a str or a buffer instance of raw byte data.
1622 1611 """
1623 1612 o, d = self._chunkcache
1624 1613 l = len(d)
1625 1614
1626 1615 # is it in the cache?
1627 1616 cachestart = offset - o
1628 1617 cacheend = cachestart + length
1629 1618 if cachestart >= 0 and cacheend <= l:
1630 1619 if cachestart == 0 and cacheend == l:
1631 1620 return d # avoid a copy
1632 1621 return util.buffer(d, cachestart, cacheend - cachestart)
1633 1622
1634 1623 return self._readsegment(offset, length, df=df)
1635 1624
1636 1625 def _getsegmentforrevs(self, startrev, endrev, df=None):
1637 1626 """Obtain a segment of raw data corresponding to a range of revisions.
1638 1627
1639 1628 Accepts the start and end revisions and an optional already-open
1640 1629 file handle to be used for reading. If the file handle is read, its
1641 1630 seek position will not be preserved.
1642 1631
1643 1632 Requests for data may be satisfied by a cache.
1644 1633
1645 1634 Returns a 2-tuple of (offset, data) for the requested range of
1646 1635 revisions. Offset is the integer offset from the beginning of the
1647 1636 revlog and data is a str or buffer of the raw byte data.
1648 1637
1649 1638 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1650 1639 to determine where each revision's data begins and ends.
1651 1640 """
1652 1641 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1653 1642 # (functions are expensive).
1654 1643 index = self.index
1655 1644 istart = index[startrev]
1656 1645 start = int(istart[0] >> 16)
1657 1646 if startrev == endrev:
1658 1647 end = start + istart[1]
1659 1648 else:
1660 1649 iend = index[endrev]
1661 1650 end = int(iend[0] >> 16) + iend[1]
1662 1651
1663 1652 if self._inline:
1664 1653 start += (startrev + 1) * self.index.entry_size
1665 1654 end += (endrev + 1) * self.index.entry_size
1666 1655 length = end - start
1667 1656
1668 1657 return start, self._getsegment(start, length, df=df)
1669 1658
1670 1659 def _chunk(self, rev, df=None):
1671 1660 """Obtain a single decompressed chunk for a revision.
1672 1661
1673 1662 Accepts an integer revision and an optional already-open file handle
1674 1663 to be used for reading. If used, the seek position of the file will not
1675 1664 be preserved.
1676 1665
1677 1666 Returns a str holding uncompressed data for the requested revision.
1678 1667 """
1679 1668 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1680 1669
1681 1670 def _chunks(self, revs, df=None, targetsize=None):
1682 1671 """Obtain decompressed chunks for the specified revisions.
1683 1672
1684 1673 Accepts an iterable of numeric revisions that are assumed to be in
1685 1674 ascending order. Also accepts an optional already-open file handle
1686 1675 to be used for reading. If used, the seek position of the file will
1687 1676 not be preserved.
1688 1677
1689 1678 This function is similar to calling ``self._chunk()`` multiple times,
1690 1679 but is faster.
1691 1680
1692 1681 Returns a list with decompressed data for each requested revision.
1693 1682 """
1694 1683 if not revs:
1695 1684 return []
1696 1685 start = self.start
1697 1686 length = self.length
1698 1687 inline = self._inline
1699 1688 iosize = self.index.entry_size
1700 1689 buffer = util.buffer
1701 1690
1702 1691 l = []
1703 1692 ladd = l.append
1704 1693
1705 1694 if not self._withsparseread:
1706 1695 slicedchunks = (revs,)
1707 1696 else:
1708 1697 slicedchunks = deltautil.slicechunk(
1709 1698 self, revs, targetsize=targetsize
1710 1699 )
1711 1700
1712 1701 for revschunk in slicedchunks:
1713 1702 firstrev = revschunk[0]
1714 1703 # Skip trailing revisions with empty diff
1715 1704 for lastrev in revschunk[::-1]:
1716 1705 if length(lastrev) != 0:
1717 1706 break
1718 1707
1719 1708 try:
1720 1709 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1721 1710 except OverflowError:
1722 1711 # issue4215 - we can't cache a run of chunks greater than
1723 1712 # 2G on Windows
1724 1713 return [self._chunk(rev, df=df) for rev in revschunk]
1725 1714
1726 1715 decomp = self.decompress
1727 1716 for rev in revschunk:
1728 1717 chunkstart = start(rev)
1729 1718 if inline:
1730 1719 chunkstart += (rev + 1) * iosize
1731 1720 chunklength = length(rev)
1732 1721 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1733 1722
1734 1723 return l
1735 1724
1736 1725 def _chunkclear(self):
1737 1726 """Clear the raw chunk cache."""
1738 1727 self._chunkcache = (0, b'')
1739 1728
1740 1729 def deltaparent(self, rev):
1741 1730 """return deltaparent of the given revision"""
1742 1731 base = self.index[rev][3]
1743 1732 if base == rev:
1744 1733 return nullrev
1745 1734 elif self._generaldelta:
1746 1735 return base
1747 1736 else:
1748 1737 return rev - 1
1749 1738
1750 1739 def issnapshot(self, rev):
1751 1740 """tells whether rev is a snapshot"""
1752 1741 if not self._sparserevlog:
1753 1742 return self.deltaparent(rev) == nullrev
1754 1743 elif util.safehasattr(self.index, b'issnapshot'):
1755 1744 # directly assign the method to cache the testing and access
1756 1745 self.issnapshot = self.index.issnapshot
1757 1746 return self.issnapshot(rev)
1758 1747 if rev == nullrev:
1759 1748 return True
1760 1749 entry = self.index[rev]
1761 1750 base = entry[3]
1762 1751 if base == rev:
1763 1752 return True
1764 1753 if base == nullrev:
1765 1754 return True
1766 1755 p1 = entry[5]
1767 1756 p2 = entry[6]
1768 1757 if base == p1 or base == p2:
1769 1758 return False
1770 1759 return self.issnapshot(base)
1771 1760
1772 1761 def snapshotdepth(self, rev):
1773 1762 """number of snapshot in the chain before this one"""
1774 1763 if not self.issnapshot(rev):
1775 1764 raise error.ProgrammingError(b'revision %d not a snapshot')
1776 1765 return len(self._deltachain(rev)[0]) - 1
1777 1766
1778 1767 def revdiff(self, rev1, rev2):
1779 1768 """return or calculate a delta between two revisions
1780 1769
1781 1770 The delta calculated is in binary form and is intended to be written to
1782 1771 revlog data directly. So this function needs raw revision data.
1783 1772 """
1784 1773 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1785 1774 return bytes(self._chunk(rev2))
1786 1775
1787 1776 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1788 1777
1789 1778 def _processflags(self, text, flags, operation, raw=False):
1790 1779 """deprecated entry point to access flag processors"""
1791 1780 msg = b'_processflag(...) use the specialized variant'
1792 1781 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1793 1782 if raw:
1794 1783 return text, flagutil.processflagsraw(self, text, flags)
1795 1784 elif operation == b'read':
1796 1785 return flagutil.processflagsread(self, text, flags)
1797 1786 else: # write operation
1798 1787 return flagutil.processflagswrite(self, text, flags)
1799 1788
1800 1789 def revision(self, nodeorrev, _df=None, raw=False):
1801 1790 """return an uncompressed revision of a given node or revision
1802 1791 number.
1803 1792
1804 1793 _df - an existing file handle to read from. (internal-only)
1805 1794 raw - an optional argument specifying if the revision data is to be
1806 1795 treated as raw data when applying flag transforms. 'raw' should be set
1807 1796 to True when generating changegroups or in debug commands.
1808 1797 """
1809 1798 if raw:
1810 1799 msg = (
1811 1800 b'revlog.revision(..., raw=True) is deprecated, '
1812 1801 b'use revlog.rawdata(...)'
1813 1802 )
1814 1803 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 1804 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1816 1805
1817 1806 def sidedata(self, nodeorrev, _df=None):
1818 1807 """a map of extra data related to the changeset but not part of the hash
1819 1808
1820 1809 This function currently return a dictionary. However, more advanced
1821 1810 mapping object will likely be used in the future for a more
1822 1811 efficient/lazy code.
1823 1812 """
1824 1813 return self._revisiondata(nodeorrev, _df)[1]
1825 1814
1826 1815 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 1816 # deal with <nodeorrev> argument type
1828 1817 if isinstance(nodeorrev, int):
1829 1818 rev = nodeorrev
1830 1819 node = self.node(rev)
1831 1820 else:
1832 1821 node = nodeorrev
1833 1822 rev = None
1834 1823
1835 1824 # fast path the special `nullid` rev
1836 1825 if node == self.nullid:
1837 1826 return b"", {}
1838 1827
1839 1828 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 1829 # revision or might need to be processed to retrieve the revision.
1841 1830 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842 1831
1843 1832 if self.hassidedata:
1844 1833 if rev is None:
1845 1834 rev = self.rev(node)
1846 1835 sidedata = self._sidedata(rev)
1847 1836 else:
1848 1837 sidedata = {}
1849 1838
1850 1839 if raw and validated:
1851 1840 # if we don't want to process the raw text and that raw
1852 1841 # text is cached, we can exit early.
1853 1842 return rawtext, sidedata
1854 1843 if rev is None:
1855 1844 rev = self.rev(node)
1856 1845 # the revlog's flag for this revision
1857 1846 # (usually alter its state or content)
1858 1847 flags = self.flags(rev)
1859 1848
1860 1849 if validated and flags == REVIDX_DEFAULT_FLAGS:
1861 1850 # no extra flags set, no flag processor runs, text = rawtext
1862 1851 return rawtext, sidedata
1863 1852
1864 1853 if raw:
1865 1854 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1866 1855 text = rawtext
1867 1856 else:
1868 1857 r = flagutil.processflagsread(self, rawtext, flags)
1869 1858 text, validatehash = r
1870 1859 if validatehash:
1871 1860 self.checkhash(text, node, rev=rev)
1872 1861 if not validated:
1873 1862 self._revisioncache = (node, rev, rawtext)
1874 1863
1875 1864 return text, sidedata
1876 1865
1877 1866 def _rawtext(self, node, rev, _df=None):
1878 1867 """return the possibly unvalidated rawtext for a revision
1879 1868
1880 1869 returns (rev, rawtext, validated)
1881 1870 """
1882 1871
1883 1872 # revision in the cache (could be useful to apply delta)
1884 1873 cachedrev = None
1885 1874 # An intermediate text to apply deltas to
1886 1875 basetext = None
1887 1876
1888 1877 # Check if we have the entry in cache
1889 1878 # The cache entry looks like (node, rev, rawtext)
1890 1879 if self._revisioncache:
1891 1880 if self._revisioncache[0] == node:
1892 1881 return (rev, self._revisioncache[2], True)
1893 1882 cachedrev = self._revisioncache[1]
1894 1883
1895 1884 if rev is None:
1896 1885 rev = self.rev(node)
1897 1886
1898 1887 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1899 1888 if stopped:
1900 1889 basetext = self._revisioncache[2]
1901 1890
1902 1891 # drop cache to save memory, the caller is expected to
1903 1892 # update self._revisioncache after validating the text
1904 1893 self._revisioncache = None
1905 1894
1906 1895 targetsize = None
1907 1896 rawsize = self.index[rev][2]
1908 1897 if 0 <= rawsize:
1909 1898 targetsize = 4 * rawsize
1910 1899
1911 1900 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1912 1901 if basetext is None:
1913 1902 basetext = bytes(bins[0])
1914 1903 bins = bins[1:]
1915 1904
1916 1905 rawtext = mdiff.patches(basetext, bins)
1917 1906 del basetext # let us have a chance to free memory early
1918 1907 return (rev, rawtext, False)
1919 1908
1920 1909 def _sidedata(self, rev):
1921 1910 """Return the sidedata for a given revision number."""
1922 1911 index_entry = self.index[rev]
1923 1912 sidedata_offset = index_entry[8]
1924 1913 sidedata_size = index_entry[9]
1925 1914
1926 1915 if self._inline:
1927 1916 sidedata_offset += self.index.entry_size * (1 + rev)
1928 1917 if sidedata_size == 0:
1929 1918 return {}
1930 1919
1931 1920 segment = self._getsegment(sidedata_offset, sidedata_size)
1932 1921 sidedata = sidedatautil.deserialize_sidedata(segment)
1933 1922 return sidedata
1934 1923
1935 1924 def rawdata(self, nodeorrev, _df=None):
1936 1925 """return an uncompressed raw data of a given node or revision number.
1937 1926
1938 1927 _df - an existing file handle to read from. (internal-only)
1939 1928 """
1940 1929 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1941 1930
1942 1931 def hash(self, text, p1, p2):
1943 1932 """Compute a node hash.
1944 1933
1945 1934 Available as a function so that subclasses can replace the hash
1946 1935 as needed.
1947 1936 """
1948 1937 return storageutil.hashrevisionsha1(text, p1, p2)
1949 1938
1950 1939 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1951 1940 """Check node hash integrity.
1952 1941
1953 1942 Available as a function so that subclasses can extend hash mismatch
1954 1943 behaviors as needed.
1955 1944 """
1956 1945 try:
1957 1946 if p1 is None and p2 is None:
1958 1947 p1, p2 = self.parents(node)
1959 1948 if node != self.hash(text, p1, p2):
1960 1949 # Clear the revision cache on hash failure. The revision cache
1961 1950 # only stores the raw revision and clearing the cache does have
1962 1951 # the side-effect that we won't have a cache hit when the raw
1963 1952 # revision data is accessed. But this case should be rare and
1964 1953 # it is extra work to teach the cache about the hash
1965 1954 # verification state.
1966 1955 if self._revisioncache and self._revisioncache[0] == node:
1967 1956 self._revisioncache = None
1968 1957
1969 1958 revornode = rev
1970 1959 if revornode is None:
1971 1960 revornode = templatefilters.short(hex(node))
1972 1961 raise error.RevlogError(
1973 1962 _(b"integrity check failed on %s:%s")
1974 1963 % (self.display_id, pycompat.bytestr(revornode))
1975 1964 )
1976 1965 except error.RevlogError:
1977 1966 if self._censorable and storageutil.iscensoredtext(text):
1978 1967 raise error.CensoredNodeError(self.display_id, node, text)
1979 1968 raise
1980 1969
1981 1970 def _enforceinlinesize(self, tr):
1982 1971 """Check if the revlog is too big for inline and convert if so.
1983 1972
1984 1973 This should be called after revisions are added to the revlog. If the
1985 1974 revlog has grown too large to be an inline revlog, it will convert it
1986 1975 to use multiple index and data files.
1987 1976 """
1988 1977 tiprev = len(self) - 1
1989 1978 total_size = self.start(tiprev) + self.length(tiprev)
1990 1979 if not self._inline or total_size < _maxinline:
1991 1980 return
1992 1981
1993 1982 troffset = tr.findoffset(self._indexfile)
1994 1983 if troffset is None:
1995 1984 raise error.RevlogError(
1996 1985 _(b"%s not found in the transaction") % self._indexfile
1997 1986 )
1998 1987 trindex = 0
1999 1988 tr.add(self._datafile, 0)
2000 1989
2001 1990 existing_handles = False
2002 1991 if self._writinghandles is not None:
2003 1992 existing_handles = True
2004 1993 fp = self._writinghandles[0]
2005 1994 fp.flush()
2006 1995 fp.close()
2007 1996 # We can't use the cached file handle after close(). So prevent
2008 1997 # its usage.
2009 1998 self._writinghandles = None
2010 1999
2011 2000 new_dfh = self._datafp(b'w+')
2012 2001 new_dfh.truncate(0) # drop any potentially existing data
2013 2002 try:
2014 2003 with self._indexfp() as read_ifh:
2015 2004 for r in self:
2016 2005 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2017 2006 if troffset <= self.start(r):
2018 2007 trindex = r
2019 2008 new_dfh.flush()
2020 2009
2021 2010 with self.__index_new_fp() as fp:
2022 2011 self._format_flags &= ~FLAG_INLINE_DATA
2023 2012 self._inline = False
2024 2013 for i in self:
2025 2014 e = self.index.entry_binary(i)
2026 2015 if i == 0:
2027 2016 header = self._format_flags | self._format_version
2028 2017 header = self.index.pack_header(header)
2029 2018 e = header + e
2030 2019 fp.write(e)
2031 2020 # the temp file replace the real index when we exit the context
2032 2021 # manager
2033 2022
2034 2023 tr.replace(self._indexfile, trindex * self.index.entry_size)
2035 2024 nodemaputil.setup_persistent_nodemap(tr, self)
2036 2025 self._chunkclear()
2037 2026
2038 2027 if existing_handles:
2039 2028 # switched from inline to conventional reopen the index
2040 2029 ifh = self.__index_write_fp()
2041 2030 self._writinghandles = (ifh, new_dfh)
2042 2031 new_dfh = None
2043 2032 finally:
2044 2033 if new_dfh is not None:
2045 2034 new_dfh.close()
2046 2035
2047 2036 def _nodeduplicatecallback(self, transaction, node):
2048 2037 """called when trying to add a node already stored."""
2049 2038
2050 2039 @contextlib.contextmanager
2051 2040 def _writing(self, transaction):
2052 2041 if self._writinghandles is not None:
2053 2042 yield
2054 2043 else:
2055 2044 r = len(self)
2056 2045 dsize = 0
2057 2046 if r:
2058 2047 dsize = self.end(r - 1)
2059 2048 dfh = None
2060 2049 if not self._inline:
2061 2050 try:
2062 2051 dfh = self._datafp(b"r+")
2063 2052 dfh.seek(0, os.SEEK_END)
2064 2053 except IOError as inst:
2065 2054 if inst.errno != errno.ENOENT:
2066 2055 raise
2067 2056 dfh = self._datafp(b"w+")
2068 2057 transaction.add(self._datafile, dsize)
2069 2058 try:
2070 2059 isize = r * self.index.entry_size
2071 2060 ifh = self.__index_write_fp()
2072 2061 if self._inline:
2073 2062 transaction.add(self._indexfile, dsize + isize)
2074 2063 else:
2075 2064 transaction.add(self._indexfile, isize)
2076 2065 try:
2077 2066 self._writinghandles = (ifh, dfh)
2078 2067 try:
2079 2068 yield
2080 2069 finally:
2081 2070 self._writinghandles = None
2082 2071 finally:
2083 2072 ifh.close()
2084 2073 finally:
2085 2074 if dfh is not None:
2086 2075 dfh.close()
2087 2076
2088 2077 def addrevision(
2089 2078 self,
2090 2079 text,
2091 2080 transaction,
2092 2081 link,
2093 2082 p1,
2094 2083 p2,
2095 2084 cachedelta=None,
2096 2085 node=None,
2097 2086 flags=REVIDX_DEFAULT_FLAGS,
2098 2087 deltacomputer=None,
2099 2088 sidedata=None,
2100 2089 ):
2101 2090 """add a revision to the log
2102 2091
2103 2092 text - the revision data to add
2104 2093 transaction - the transaction object used for rollback
2105 2094 link - the linkrev data to add
2106 2095 p1, p2 - the parent nodeids of the revision
2107 2096 cachedelta - an optional precomputed delta
2108 2097 node - nodeid of revision; typically node is not specified, and it is
2109 2098 computed by default as hash(text, p1, p2), however subclasses might
2110 2099 use different hashing method (and override checkhash() in such case)
2111 2100 flags - the known flags to set on the revision
2112 2101 deltacomputer - an optional deltacomputer instance shared between
2113 2102 multiple calls
2114 2103 """
2115 2104 if link == nullrev:
2116 2105 raise error.RevlogError(
2117 2106 _(b"attempted to add linkrev -1 to %s") % self.display_id
2118 2107 )
2119 2108
2120 2109 if sidedata is None:
2121 2110 sidedata = {}
2122 2111 elif sidedata and not self.hassidedata:
2123 2112 raise error.ProgrammingError(
2124 2113 _(b"trying to add sidedata to a revlog who don't support them")
2125 2114 )
2126 2115
2127 2116 if flags:
2128 2117 node = node or self.hash(text, p1, p2)
2129 2118
2130 2119 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2131 2120
2132 2121 # If the flag processor modifies the revision data, ignore any provided
2133 2122 # cachedelta.
2134 2123 if rawtext != text:
2135 2124 cachedelta = None
2136 2125
2137 2126 if len(rawtext) > _maxentrysize:
2138 2127 raise error.RevlogError(
2139 2128 _(
2140 2129 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2141 2130 )
2142 2131 % (self.display_id, len(rawtext))
2143 2132 )
2144 2133
2145 2134 node = node or self.hash(rawtext, p1, p2)
2146 2135 rev = self.index.get_rev(node)
2147 2136 if rev is not None:
2148 2137 return rev
2149 2138
2150 2139 if validatehash:
2151 2140 self.checkhash(rawtext, node, p1=p1, p2=p2)
2152 2141
2153 2142 return self.addrawrevision(
2154 2143 rawtext,
2155 2144 transaction,
2156 2145 link,
2157 2146 p1,
2158 2147 p2,
2159 2148 node,
2160 2149 flags,
2161 2150 cachedelta=cachedelta,
2162 2151 deltacomputer=deltacomputer,
2163 2152 sidedata=sidedata,
2164 2153 )
2165 2154
2166 2155 def addrawrevision(
2167 2156 self,
2168 2157 rawtext,
2169 2158 transaction,
2170 2159 link,
2171 2160 p1,
2172 2161 p2,
2173 2162 node,
2174 2163 flags,
2175 2164 cachedelta=None,
2176 2165 deltacomputer=None,
2177 2166 sidedata=None,
2178 2167 ):
2179 2168 """add a raw revision with known flags, node and parents
2180 2169 useful when reusing a revision not stored in this revlog (ex: received
2181 2170 over wire, or read from an external bundle).
2182 2171 """
2183 2172 with self._writing(transaction):
2184 2173 return self._addrevision(
2185 2174 node,
2186 2175 rawtext,
2187 2176 transaction,
2188 2177 link,
2189 2178 p1,
2190 2179 p2,
2191 2180 flags,
2192 2181 cachedelta,
2193 2182 deltacomputer=deltacomputer,
2194 2183 sidedata=sidedata,
2195 2184 )
2196 2185
2197 2186 def compress(self, data):
2198 2187 """Generate a possibly-compressed representation of data."""
2199 2188 if not data:
2200 2189 return b'', data
2201 2190
2202 2191 compressed = self._compressor.compress(data)
2203 2192
2204 2193 if compressed:
2205 2194 # The revlog compressor added the header in the returned data.
2206 2195 return b'', compressed
2207 2196
2208 2197 if data[0:1] == b'\0':
2209 2198 return b'', data
2210 2199 return b'u', data
2211 2200
2212 2201 def decompress(self, data):
2213 2202 """Decompress a revlog chunk.
2214 2203
2215 2204 The chunk is expected to begin with a header identifying the
2216 2205 format type so it can be routed to an appropriate decompressor.
2217 2206 """
2218 2207 if not data:
2219 2208 return data
2220 2209
2221 2210 # Revlogs are read much more frequently than they are written and many
2222 2211 # chunks only take microseconds to decompress, so performance is
2223 2212 # important here.
2224 2213 #
2225 2214 # We can make a few assumptions about revlogs:
2226 2215 #
2227 2216 # 1) the majority of chunks will be compressed (as opposed to inline
2228 2217 # raw data).
2229 2218 # 2) decompressing *any* data will likely by at least 10x slower than
2230 2219 # returning raw inline data.
2231 2220 # 3) we want to prioritize common and officially supported compression
2232 2221 # engines
2233 2222 #
2234 2223 # It follows that we want to optimize for "decompress compressed data
2235 2224 # when encoded with common and officially supported compression engines"
2236 2225 # case over "raw data" and "data encoded by less common or non-official
2237 2226 # compression engines." That is why we have the inline lookup first
2238 2227 # followed by the compengines lookup.
2239 2228 #
2240 2229 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2241 2230 # compressed chunks. And this matters for changelog and manifest reads.
2242 2231 t = data[0:1]
2243 2232
2244 2233 if t == b'x':
2245 2234 try:
2246 2235 return _zlibdecompress(data)
2247 2236 except zlib.error as e:
2248 2237 raise error.RevlogError(
2249 2238 _(b'revlog decompress error: %s')
2250 2239 % stringutil.forcebytestr(e)
2251 2240 )
2252 2241 # '\0' is more common than 'u' so it goes first.
2253 2242 elif t == b'\0':
2254 2243 return data
2255 2244 elif t == b'u':
2256 2245 return util.buffer(data, 1)
2257 2246
2258 2247 try:
2259 2248 compressor = self._decompressors[t]
2260 2249 except KeyError:
2261 2250 try:
2262 2251 engine = util.compengines.forrevlogheader(t)
2263 2252 compressor = engine.revlogcompressor(self._compengineopts)
2264 2253 self._decompressors[t] = compressor
2265 2254 except KeyError:
2266 2255 raise error.RevlogError(
2267 2256 _(b'unknown compression type %s') % binascii.hexlify(t)
2268 2257 )
2269 2258
2270 2259 return compressor.decompress(data)
2271 2260
2272 2261 def _addrevision(
2273 2262 self,
2274 2263 node,
2275 2264 rawtext,
2276 2265 transaction,
2277 2266 link,
2278 2267 p1,
2279 2268 p2,
2280 2269 flags,
2281 2270 cachedelta,
2282 2271 alwayscache=False,
2283 2272 deltacomputer=None,
2284 2273 sidedata=None,
2285 2274 ):
2286 2275 """internal function to add revisions to the log
2287 2276
2288 2277 see addrevision for argument descriptions.
2289 2278
2290 2279 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2291 2280
2292 2281 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2293 2282 be used.
2294 2283
2295 2284 invariants:
2296 2285 - rawtext is optional (can be None); if not set, cachedelta must be set.
2297 2286 if both are set, they must correspond to each other.
2298 2287 """
2299 2288 if node == self.nullid:
2300 2289 raise error.RevlogError(
2301 2290 _(b"%s: attempt to add null revision") % self.display_id
2302 2291 )
2303 2292 if (
2304 2293 node == self.nodeconstants.wdirid
2305 2294 or node in self.nodeconstants.wdirfilenodeids
2306 2295 ):
2307 2296 raise error.RevlogError(
2308 2297 _(b"%s: attempt to add wdir revision") % self.display_id
2309 2298 )
2310 2299 if self._writinghandles is None:
2311 2300 msg = b'adding revision outside `revlog._writing` context'
2312 2301 raise error.ProgrammingError(msg)
2313 2302
2314 2303 if self._inline:
2315 2304 fh = self._writinghandles[0]
2316 2305 else:
2317 2306 fh = self._writinghandles[1]
2318 2307
2319 2308 btext = [rawtext]
2320 2309
2321 2310 curr = len(self)
2322 2311 prev = curr - 1
2323 2312
2324 2313 offset = self._get_data_offset(prev)
2325 2314
2326 2315 if self._concurrencychecker:
2327 2316 ifh, dfh = self._writinghandles
2328 2317 if self._inline:
2329 2318 # offset is "as if" it were in the .d file, so we need to add on
2330 2319 # the size of the entry metadata.
2331 2320 self._concurrencychecker(
2332 2321 ifh, self._indexfile, offset + curr * self.index.entry_size
2333 2322 )
2334 2323 else:
2335 2324 # Entries in the .i are a consistent size.
2336 2325 self._concurrencychecker(
2337 2326 ifh, self._indexfile, curr * self.index.entry_size
2338 2327 )
2339 2328 self._concurrencychecker(dfh, self._datafile, offset)
2340 2329
2341 2330 p1r, p2r = self.rev(p1), self.rev(p2)
2342 2331
2343 2332 # full versions are inserted when the needed deltas
2344 2333 # become comparable to the uncompressed text
2345 2334 if rawtext is None:
2346 2335 # need rawtext size, before changed by flag processors, which is
2347 2336 # the non-raw size. use revlog explicitly to avoid filelog's extra
2348 2337 # logic that might remove metadata size.
2349 2338 textlen = mdiff.patchedsize(
2350 2339 revlog.size(self, cachedelta[0]), cachedelta[1]
2351 2340 )
2352 2341 else:
2353 2342 textlen = len(rawtext)
2354 2343
2355 2344 if deltacomputer is None:
2356 2345 deltacomputer = deltautil.deltacomputer(self)
2357 2346
2358 2347 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2359 2348
2360 2349 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2361 2350
2362 2351 if sidedata and self.hassidedata:
2363 2352 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2364 2353 sidedata_offset = offset + deltainfo.deltalen
2365 2354 else:
2366 2355 serialized_sidedata = b""
2367 2356 # Don't store the offset if the sidedata is empty, that way
2368 2357 # we can easily detect empty sidedata and they will be no different
2369 2358 # than ones we manually add.
2370 2359 sidedata_offset = 0
2371 2360
2372 2361 e = (
2373 2362 offset_type(offset, flags),
2374 2363 deltainfo.deltalen,
2375 2364 textlen,
2376 2365 deltainfo.base,
2377 2366 link,
2378 2367 p1r,
2379 2368 p2r,
2380 2369 node,
2381 2370 sidedata_offset,
2382 2371 len(serialized_sidedata),
2383 2372 )
2384 2373
2385 2374 self.index.append(e)
2386 2375 entry = self.index.entry_binary(curr)
2387 2376 if curr == 0:
2388 2377 header = self._format_flags | self._format_version
2389 2378 header = self.index.pack_header(header)
2390 2379 entry = header + entry
2391 2380 self._writeentry(
2392 2381 transaction,
2393 2382 entry,
2394 2383 deltainfo.data,
2395 2384 link,
2396 2385 offset,
2397 2386 serialized_sidedata,
2398 2387 )
2399 2388
2400 2389 rawtext = btext[0]
2401 2390
2402 2391 if alwayscache and rawtext is None:
2403 2392 rawtext = deltacomputer.buildtext(revinfo, fh)
2404 2393
2405 2394 if type(rawtext) == bytes: # only accept immutable objects
2406 2395 self._revisioncache = (node, curr, rawtext)
2407 2396 self._chainbasecache[curr] = deltainfo.chainbase
2408 2397 return curr
2409 2398
2410 2399 def _get_data_offset(self, prev):
2411 2400 """Returns the current offset in the (in-transaction) data file.
2412 2401 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2413 2402 file to store that information: since sidedata can be rewritten to the
2414 2403 end of the data file within a transaction, you can have cases where, for
2415 2404 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2416 2405 to `n - 1`'s sidedata being written after `n`'s data.
2417 2406
2418 2407 TODO cache this in a docket file before getting out of experimental."""
2419 2408 if self._format_version != REVLOGV2:
2420 2409 return self.end(prev)
2421 2410
2422 2411 offset = 0
2423 2412 for rev, entry in enumerate(self.index):
2424 2413 sidedata_end = entry[8] + entry[9]
2425 2414 # Sidedata for a previous rev has potentially been written after
2426 2415 # this rev's end, so take the max.
2427 2416 offset = max(self.end(rev), offset, sidedata_end)
2428 2417 return offset
2429 2418
2430 2419 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2431 2420 # Files opened in a+ mode have inconsistent behavior on various
2432 2421 # platforms. Windows requires that a file positioning call be made
2433 2422 # when the file handle transitions between reads and writes. See
2434 2423 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2435 2424 # platforms, Python or the platform itself can be buggy. Some versions
2436 2425 # of Solaris have been observed to not append at the end of the file
2437 2426 # if the file was seeked to before the end. See issue4943 for more.
2438 2427 #
2439 2428 # We work around this issue by inserting a seek() before writing.
2440 2429 # Note: This is likely not necessary on Python 3. However, because
2441 2430 # the file handle is reused for reads and may be seeked there, we need
2442 2431 # to be careful before changing this.
2443 2432 if self._writinghandles is None:
2444 2433 msg = b'adding revision outside `revlog._writing` context'
2445 2434 raise error.ProgrammingError(msg)
2446 2435 ifh, dfh = self._writinghandles
2447 2436 ifh.seek(0, os.SEEK_END)
2448 2437 if dfh:
2449 2438 dfh.seek(0, os.SEEK_END)
2450 2439
2451 2440 curr = len(self) - 1
2452 2441 if not self._inline:
2453 2442 transaction.add(self._datafile, offset)
2454 2443 transaction.add(self._indexfile, curr * len(entry))
2455 2444 if data[0]:
2456 2445 dfh.write(data[0])
2457 2446 dfh.write(data[1])
2458 2447 if sidedata:
2459 2448 dfh.write(sidedata)
2460 2449 ifh.write(entry)
2461 2450 else:
2462 2451 offset += curr * self.index.entry_size
2463 2452 transaction.add(self._indexfile, offset)
2464 2453 ifh.write(entry)
2465 2454 ifh.write(data[0])
2466 2455 ifh.write(data[1])
2467 2456 if sidedata:
2468 2457 ifh.write(sidedata)
2469 2458 self._enforceinlinesize(transaction)
2470 2459 nodemaputil.setup_persistent_nodemap(transaction, self)
2471 2460
2472 2461 def addgroup(
2473 2462 self,
2474 2463 deltas,
2475 2464 linkmapper,
2476 2465 transaction,
2477 2466 alwayscache=False,
2478 2467 addrevisioncb=None,
2479 2468 duplicaterevisioncb=None,
2480 2469 ):
2481 2470 """
2482 2471 add a delta group
2483 2472
2484 2473 given a set of deltas, add them to the revision log. the
2485 2474 first delta is against its parent, which should be in our
2486 2475 log, the rest are against the previous delta.
2487 2476
2488 2477 If ``addrevisioncb`` is defined, it will be called with arguments of
2489 2478 this revlog and the node that was added.
2490 2479 """
2491 2480
2492 2481 if self._adding_group:
2493 2482 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2494 2483
2495 2484 self._adding_group = True
2496 2485 empty = True
2497 2486 try:
2498 2487 with self._writing(transaction):
2499 2488 deltacomputer = deltautil.deltacomputer(self)
2500 2489 # loop through our set of deltas
2501 2490 for data in deltas:
2502 2491 (
2503 2492 node,
2504 2493 p1,
2505 2494 p2,
2506 2495 linknode,
2507 2496 deltabase,
2508 2497 delta,
2509 2498 flags,
2510 2499 sidedata,
2511 2500 ) = data
2512 2501 link = linkmapper(linknode)
2513 2502 flags = flags or REVIDX_DEFAULT_FLAGS
2514 2503
2515 2504 rev = self.index.get_rev(node)
2516 2505 if rev is not None:
2517 2506 # this can happen if two branches make the same change
2518 2507 self._nodeduplicatecallback(transaction, rev)
2519 2508 if duplicaterevisioncb:
2520 2509 duplicaterevisioncb(self, rev)
2521 2510 empty = False
2522 2511 continue
2523 2512
2524 2513 for p in (p1, p2):
2525 2514 if not self.index.has_node(p):
2526 2515 raise error.LookupError(
2527 2516 p, self.radix, _(b'unknown parent')
2528 2517 )
2529 2518
2530 2519 if not self.index.has_node(deltabase):
2531 2520 raise error.LookupError(
2532 2521 deltabase, self.display_id, _(b'unknown delta base')
2533 2522 )
2534 2523
2535 2524 baserev = self.rev(deltabase)
2536 2525
2537 2526 if baserev != nullrev and self.iscensored(baserev):
2538 2527 # if base is censored, delta must be full replacement in a
2539 2528 # single patch operation
2540 2529 hlen = struct.calcsize(b">lll")
2541 2530 oldlen = self.rawsize(baserev)
2542 2531 newlen = len(delta) - hlen
2543 2532 if delta[:hlen] != mdiff.replacediffheader(
2544 2533 oldlen, newlen
2545 2534 ):
2546 2535 raise error.CensoredBaseError(
2547 2536 self.display_id, self.node(baserev)
2548 2537 )
2549 2538
2550 2539 if not flags and self._peek_iscensored(baserev, delta):
2551 2540 flags |= REVIDX_ISCENSORED
2552 2541
2553 2542 # We assume consumers of addrevisioncb will want to retrieve
2554 2543 # the added revision, which will require a call to
2555 2544 # revision(). revision() will fast path if there is a cache
2556 2545 # hit. So, we tell _addrevision() to always cache in this case.
2557 2546 # We're only using addgroup() in the context of changegroup
2558 2547 # generation so the revision data can always be handled as raw
2559 2548 # by the flagprocessor.
2560 2549 rev = self._addrevision(
2561 2550 node,
2562 2551 None,
2563 2552 transaction,
2564 2553 link,
2565 2554 p1,
2566 2555 p2,
2567 2556 flags,
2568 2557 (baserev, delta),
2569 2558 alwayscache=alwayscache,
2570 2559 deltacomputer=deltacomputer,
2571 2560 sidedata=sidedata,
2572 2561 )
2573 2562
2574 2563 if addrevisioncb:
2575 2564 addrevisioncb(self, rev)
2576 2565 empty = False
2577 2566 finally:
2578 2567 self._adding_group = False
2579 2568 return not empty
2580 2569
2581 2570 def iscensored(self, rev):
2582 2571 """Check if a file revision is censored."""
2583 2572 if not self._censorable:
2584 2573 return False
2585 2574
2586 2575 return self.flags(rev) & REVIDX_ISCENSORED
2587 2576
2588 2577 def _peek_iscensored(self, baserev, delta):
2589 2578 """Quickly check if a delta produces a censored revision."""
2590 2579 if not self._censorable:
2591 2580 return False
2592 2581
2593 2582 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2594 2583
2595 2584 def getstrippoint(self, minlink):
2596 2585 """find the minimum rev that must be stripped to strip the linkrev
2597 2586
2598 2587 Returns a tuple containing the minimum rev and a set of all revs that
2599 2588 have linkrevs that will be broken by this strip.
2600 2589 """
2601 2590 return storageutil.resolvestripinfo(
2602 2591 minlink,
2603 2592 len(self) - 1,
2604 2593 self.headrevs(),
2605 2594 self.linkrev,
2606 2595 self.parentrevs,
2607 2596 )
2608 2597
2609 2598 def strip(self, minlink, transaction):
2610 2599 """truncate the revlog on the first revision with a linkrev >= minlink
2611 2600
2612 2601 This function is called when we're stripping revision minlink and
2613 2602 its descendants from the repository.
2614 2603
2615 2604 We have to remove all revisions with linkrev >= minlink, because
2616 2605 the equivalent changelog revisions will be renumbered after the
2617 2606 strip.
2618 2607
2619 2608 So we truncate the revlog on the first of these revisions, and
2620 2609 trust that the caller has saved the revisions that shouldn't be
2621 2610 removed and that it'll re-add them after this truncation.
2622 2611 """
2623 2612 if len(self) == 0:
2624 2613 return
2625 2614
2626 2615 rev, _ = self.getstrippoint(minlink)
2627 2616 if rev == len(self):
2628 2617 return
2629 2618
2630 2619 # first truncate the files on disk
2631 2620 end = self.start(rev)
2632 2621 if not self._inline:
2633 2622 transaction.add(self._datafile, end)
2634 2623 end = rev * self.index.entry_size
2635 2624 else:
2636 2625 end += rev * self.index.entry_size
2637 2626
2638 2627 transaction.add(self._indexfile, end)
2639 2628
2640 2629 # then reset internal state in memory to forget those revisions
2641 2630 self._revisioncache = None
2642 2631 self._chaininfocache = util.lrucachedict(500)
2643 2632 self._chunkclear()
2644 2633
2645 2634 del self.index[rev:-1]
2646 2635
2647 2636 def checksize(self):
2648 2637 """Check size of index and data files
2649 2638
2650 2639 return a (dd, di) tuple.
2651 2640 - dd: extra bytes for the "data" file
2652 2641 - di: extra bytes for the "index" file
2653 2642
2654 2643 A healthy revlog will return (0, 0).
2655 2644 """
2656 2645 expected = 0
2657 2646 if len(self):
2658 2647 expected = max(0, self.end(len(self) - 1))
2659 2648
2660 2649 try:
2661 2650 with self._datafp() as f:
2662 2651 f.seek(0, io.SEEK_END)
2663 2652 actual = f.tell()
2664 2653 dd = actual - expected
2665 2654 except IOError as inst:
2666 2655 if inst.errno != errno.ENOENT:
2667 2656 raise
2668 2657 dd = 0
2669 2658
2670 2659 try:
2671 2660 f = self.opener(self._indexfile)
2672 2661 f.seek(0, io.SEEK_END)
2673 2662 actual = f.tell()
2674 2663 f.close()
2675 2664 s = self.index.entry_size
2676 2665 i = max(0, actual // s)
2677 2666 di = actual - (i * s)
2678 2667 if self._inline:
2679 2668 databytes = 0
2680 2669 for r in self:
2681 2670 databytes += max(0, self.length(r))
2682 2671 dd = 0
2683 2672 di = actual - len(self) * s - databytes
2684 2673 except IOError as inst:
2685 2674 if inst.errno != errno.ENOENT:
2686 2675 raise
2687 2676 di = 0
2688 2677
2689 2678 return (dd, di)
2690 2679
2691 2680 def files(self):
2692 2681 res = [self._indexfile]
2693 2682 if not self._inline:
2694 2683 res.append(self._datafile)
2695 2684 return res
2696 2685
2697 2686 def emitrevisions(
2698 2687 self,
2699 2688 nodes,
2700 2689 nodesorder=None,
2701 2690 revisiondata=False,
2702 2691 assumehaveparentrevisions=False,
2703 2692 deltamode=repository.CG_DELTAMODE_STD,
2704 2693 sidedata_helpers=None,
2705 2694 ):
2706 2695 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2707 2696 raise error.ProgrammingError(
2708 2697 b'unhandled value for nodesorder: %s' % nodesorder
2709 2698 )
2710 2699
2711 2700 if nodesorder is None and not self._generaldelta:
2712 2701 nodesorder = b'storage'
2713 2702
2714 2703 if (
2715 2704 not self._storedeltachains
2716 2705 and deltamode != repository.CG_DELTAMODE_PREV
2717 2706 ):
2718 2707 deltamode = repository.CG_DELTAMODE_FULL
2719 2708
2720 2709 return storageutil.emitrevisions(
2721 2710 self,
2722 2711 nodes,
2723 2712 nodesorder,
2724 2713 revlogrevisiondelta,
2725 2714 deltaparentfn=self.deltaparent,
2726 2715 candeltafn=self.candelta,
2727 2716 rawsizefn=self.rawsize,
2728 2717 revdifffn=self.revdiff,
2729 2718 flagsfn=self.flags,
2730 2719 deltamode=deltamode,
2731 2720 revisiondata=revisiondata,
2732 2721 assumehaveparentrevisions=assumehaveparentrevisions,
2733 2722 sidedata_helpers=sidedata_helpers,
2734 2723 )
2735 2724
2736 2725 DELTAREUSEALWAYS = b'always'
2737 2726 DELTAREUSESAMEREVS = b'samerevs'
2738 2727 DELTAREUSENEVER = b'never'
2739 2728
2740 2729 DELTAREUSEFULLADD = b'fulladd'
2741 2730
2742 2731 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2743 2732
2744 2733 def clone(
2745 2734 self,
2746 2735 tr,
2747 2736 destrevlog,
2748 2737 addrevisioncb=None,
2749 2738 deltareuse=DELTAREUSESAMEREVS,
2750 2739 forcedeltabothparents=None,
2751 2740 sidedata_helpers=None,
2752 2741 ):
2753 2742 """Copy this revlog to another, possibly with format changes.
2754 2743
2755 2744 The destination revlog will contain the same revisions and nodes.
2756 2745 However, it may not be bit-for-bit identical due to e.g. delta encoding
2757 2746 differences.
2758 2747
2759 2748 The ``deltareuse`` argument control how deltas from the existing revlog
2760 2749 are preserved in the destination revlog. The argument can have the
2761 2750 following values:
2762 2751
2763 2752 DELTAREUSEALWAYS
2764 2753 Deltas will always be reused (if possible), even if the destination
2765 2754 revlog would not select the same revisions for the delta. This is the
2766 2755 fastest mode of operation.
2767 2756 DELTAREUSESAMEREVS
2768 2757 Deltas will be reused if the destination revlog would pick the same
2769 2758 revisions for the delta. This mode strikes a balance between speed
2770 2759 and optimization.
2771 2760 DELTAREUSENEVER
2772 2761 Deltas will never be reused. This is the slowest mode of execution.
2773 2762 This mode can be used to recompute deltas (e.g. if the diff/delta
2774 2763 algorithm changes).
2775 2764 DELTAREUSEFULLADD
2776 2765 Revision will be re-added as if their were new content. This is
2777 2766 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2778 2767 eg: large file detection and handling.
2779 2768
2780 2769 Delta computation can be slow, so the choice of delta reuse policy can
2781 2770 significantly affect run time.
2782 2771
2783 2772 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2784 2773 two extremes. Deltas will be reused if they are appropriate. But if the
2785 2774 delta could choose a better revision, it will do so. This means if you
2786 2775 are converting a non-generaldelta revlog to a generaldelta revlog,
2787 2776 deltas will be recomputed if the delta's parent isn't a parent of the
2788 2777 revision.
2789 2778
2790 2779 In addition to the delta policy, the ``forcedeltabothparents``
2791 2780 argument controls whether to force compute deltas against both parents
2792 2781 for merges. By default, the current default is used.
2793 2782
2794 2783 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2795 2784 `sidedata_helpers`.
2796 2785 """
2797 2786 if deltareuse not in self.DELTAREUSEALL:
2798 2787 raise ValueError(
2799 2788 _(b'value for deltareuse invalid: %s') % deltareuse
2800 2789 )
2801 2790
2802 2791 if len(destrevlog):
2803 2792 raise ValueError(_(b'destination revlog is not empty'))
2804 2793
2805 2794 if getattr(self, 'filteredrevs', None):
2806 2795 raise ValueError(_(b'source revlog has filtered revisions'))
2807 2796 if getattr(destrevlog, 'filteredrevs', None):
2808 2797 raise ValueError(_(b'destination revlog has filtered revisions'))
2809 2798
2810 2799 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2811 2800 # if possible.
2812 2801 oldlazydelta = destrevlog._lazydelta
2813 2802 oldlazydeltabase = destrevlog._lazydeltabase
2814 2803 oldamd = destrevlog._deltabothparents
2815 2804
2816 2805 try:
2817 2806 if deltareuse == self.DELTAREUSEALWAYS:
2818 2807 destrevlog._lazydeltabase = True
2819 2808 destrevlog._lazydelta = True
2820 2809 elif deltareuse == self.DELTAREUSESAMEREVS:
2821 2810 destrevlog._lazydeltabase = False
2822 2811 destrevlog._lazydelta = True
2823 2812 elif deltareuse == self.DELTAREUSENEVER:
2824 2813 destrevlog._lazydeltabase = False
2825 2814 destrevlog._lazydelta = False
2826 2815
2827 2816 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2828 2817
2829 2818 self._clone(
2830 2819 tr,
2831 2820 destrevlog,
2832 2821 addrevisioncb,
2833 2822 deltareuse,
2834 2823 forcedeltabothparents,
2835 2824 sidedata_helpers,
2836 2825 )
2837 2826
2838 2827 finally:
2839 2828 destrevlog._lazydelta = oldlazydelta
2840 2829 destrevlog._lazydeltabase = oldlazydeltabase
2841 2830 destrevlog._deltabothparents = oldamd
2842 2831
2843 2832 def _clone(
2844 2833 self,
2845 2834 tr,
2846 2835 destrevlog,
2847 2836 addrevisioncb,
2848 2837 deltareuse,
2849 2838 forcedeltabothparents,
2850 2839 sidedata_helpers,
2851 2840 ):
2852 2841 """perform the core duty of `revlog.clone` after parameter processing"""
2853 2842 deltacomputer = deltautil.deltacomputer(destrevlog)
2854 2843 index = self.index
2855 2844 for rev in self:
2856 2845 entry = index[rev]
2857 2846
2858 2847 # Some classes override linkrev to take filtered revs into
2859 2848 # account. Use raw entry from index.
2860 2849 flags = entry[0] & 0xFFFF
2861 2850 linkrev = entry[4]
2862 2851 p1 = index[entry[5]][7]
2863 2852 p2 = index[entry[6]][7]
2864 2853 node = entry[7]
2865 2854
2866 2855 # (Possibly) reuse the delta from the revlog if allowed and
2867 2856 # the revlog chunk is a delta.
2868 2857 cachedelta = None
2869 2858 rawtext = None
2870 2859 if deltareuse == self.DELTAREUSEFULLADD:
2871 2860 text, sidedata = self._revisiondata(rev)
2872 2861
2873 2862 if sidedata_helpers is not None:
2874 2863 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2875 2864 self, sidedata_helpers, sidedata, rev
2876 2865 )
2877 2866 flags = flags | new_flags[0] & ~new_flags[1]
2878 2867
2879 2868 destrevlog.addrevision(
2880 2869 text,
2881 2870 tr,
2882 2871 linkrev,
2883 2872 p1,
2884 2873 p2,
2885 2874 cachedelta=cachedelta,
2886 2875 node=node,
2887 2876 flags=flags,
2888 2877 deltacomputer=deltacomputer,
2889 2878 sidedata=sidedata,
2890 2879 )
2891 2880 else:
2892 2881 if destrevlog._lazydelta:
2893 2882 dp = self.deltaparent(rev)
2894 2883 if dp != nullrev:
2895 2884 cachedelta = (dp, bytes(self._chunk(rev)))
2896 2885
2897 2886 sidedata = None
2898 2887 if not cachedelta:
2899 2888 rawtext, sidedata = self._revisiondata(rev)
2900 2889 if sidedata is None:
2901 2890 sidedata = self.sidedata(rev)
2902 2891
2903 2892 if sidedata_helpers is not None:
2904 2893 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2905 2894 self, sidedata_helpers, sidedata, rev
2906 2895 )
2907 2896 flags = flags | new_flags[0] & ~new_flags[1]
2908 2897
2909 2898 with destrevlog._writing(tr):
2910 2899 destrevlog._addrevision(
2911 2900 node,
2912 2901 rawtext,
2913 2902 tr,
2914 2903 linkrev,
2915 2904 p1,
2916 2905 p2,
2917 2906 flags,
2918 2907 cachedelta,
2919 2908 deltacomputer=deltacomputer,
2920 2909 sidedata=sidedata,
2921 2910 )
2922 2911
2923 2912 if addrevisioncb:
2924 2913 addrevisioncb(self, rev, node)
2925 2914
2926 2915 def censorrevision(self, tr, censornode, tombstone=b''):
2927 2916 if self._format_version == REVLOGV0:
2928 2917 raise error.RevlogError(
2929 2918 _(b'cannot censor with version %d revlogs')
2930 2919 % self._format_version
2931 2920 )
2932 2921
2933 2922 censorrev = self.rev(censornode)
2934 2923 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2935 2924
2936 2925 if len(tombstone) > self.rawsize(censorrev):
2937 2926 raise error.Abort(
2938 2927 _(b'censor tombstone must be no longer than censored data')
2939 2928 )
2940 2929
2941 2930 # Rewriting the revlog in place is hard. Our strategy for censoring is
2942 2931 # to create a new revlog, copy all revisions to it, then replace the
2943 2932 # revlogs on transaction close.
2944 2933 #
2945 2934 # This is a bit dangerous. We could easily have a mismatch of state.
2946 2935 newrl = revlog(
2947 2936 self.opener,
2948 2937 target=self.target,
2949 2938 radix=self.radix,
2950 2939 postfix=b'tmpcensored',
2951 2940 censorable=True,
2952 2941 )
2953 2942 newrl._format_version = self._format_version
2954 2943 newrl._format_flags = self._format_flags
2955 2944 newrl._generaldelta = self._generaldelta
2956 2945 newrl._parse_index = self._parse_index
2957 2946
2958 2947 for rev in self.revs():
2959 2948 node = self.node(rev)
2960 2949 p1, p2 = self.parents(node)
2961 2950
2962 2951 if rev == censorrev:
2963 2952 newrl.addrawrevision(
2964 2953 tombstone,
2965 2954 tr,
2966 2955 self.linkrev(censorrev),
2967 2956 p1,
2968 2957 p2,
2969 2958 censornode,
2970 2959 REVIDX_ISCENSORED,
2971 2960 )
2972 2961
2973 2962 if newrl.deltaparent(rev) != nullrev:
2974 2963 raise error.Abort(
2975 2964 _(
2976 2965 b'censored revision stored as delta; '
2977 2966 b'cannot censor'
2978 2967 ),
2979 2968 hint=_(
2980 2969 b'censoring of revlogs is not '
2981 2970 b'fully implemented; please report '
2982 2971 b'this bug'
2983 2972 ),
2984 2973 )
2985 2974 continue
2986 2975
2987 2976 if self.iscensored(rev):
2988 2977 if self.deltaparent(rev) != nullrev:
2989 2978 raise error.Abort(
2990 2979 _(
2991 2980 b'cannot censor due to censored '
2992 2981 b'revision having delta stored'
2993 2982 )
2994 2983 )
2995 2984 rawtext = self._chunk(rev)
2996 2985 else:
2997 2986 rawtext = self.rawdata(rev)
2998 2987
2999 2988 newrl.addrawrevision(
3000 2989 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3001 2990 )
3002 2991
3003 2992 tr.addbackup(self._indexfile, location=b'store')
3004 2993 if not self._inline:
3005 2994 tr.addbackup(self._datafile, location=b'store')
3006 2995
3007 2996 self.opener.rename(newrl._indexfile, self._indexfile)
3008 2997 if not self._inline:
3009 2998 self.opener.rename(newrl._datafile, self._datafile)
3010 2999
3011 3000 self.clearcaches()
3012 3001 self._loadindex()
3013 3002
3014 3003 def verifyintegrity(self, state):
3015 3004 """Verifies the integrity of the revlog.
3016 3005
3017 3006 Yields ``revlogproblem`` instances describing problems that are
3018 3007 found.
3019 3008 """
3020 3009 dd, di = self.checksize()
3021 3010 if dd:
3022 3011 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3023 3012 if di:
3024 3013 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3025 3014
3026 3015 version = self._format_version
3027 3016
3028 3017 # The verifier tells us what version revlog we should be.
3029 3018 if version != state[b'expectedversion']:
3030 3019 yield revlogproblem(
3031 3020 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3032 3021 % (self.display_id, version, state[b'expectedversion'])
3033 3022 )
3034 3023
3035 3024 state[b'skipread'] = set()
3036 3025 state[b'safe_renamed'] = set()
3037 3026
3038 3027 for rev in self:
3039 3028 node = self.node(rev)
3040 3029
3041 3030 # Verify contents. 4 cases to care about:
3042 3031 #
3043 3032 # common: the most common case
3044 3033 # rename: with a rename
3045 3034 # meta: file content starts with b'\1\n', the metadata
3046 3035 # header defined in filelog.py, but without a rename
3047 3036 # ext: content stored externally
3048 3037 #
3049 3038 # More formally, their differences are shown below:
3050 3039 #
3051 3040 # | common | rename | meta | ext
3052 3041 # -------------------------------------------------------
3053 3042 # flags() | 0 | 0 | 0 | not 0
3054 3043 # renamed() | False | True | False | ?
3055 3044 # rawtext[0:2]=='\1\n'| False | True | True | ?
3056 3045 #
3057 3046 # "rawtext" means the raw text stored in revlog data, which
3058 3047 # could be retrieved by "rawdata(rev)". "text"
3059 3048 # mentioned below is "revision(rev)".
3060 3049 #
3061 3050 # There are 3 different lengths stored physically:
3062 3051 # 1. L1: rawsize, stored in revlog index
3063 3052 # 2. L2: len(rawtext), stored in revlog data
3064 3053 # 3. L3: len(text), stored in revlog data if flags==0, or
3065 3054 # possibly somewhere else if flags!=0
3066 3055 #
3067 3056 # L1 should be equal to L2. L3 could be different from them.
3068 3057 # "text" may or may not affect commit hash depending on flag
3069 3058 # processors (see flagutil.addflagprocessor).
3070 3059 #
3071 3060 # | common | rename | meta | ext
3072 3061 # -------------------------------------------------
3073 3062 # rawsize() | L1 | L1 | L1 | L1
3074 3063 # size() | L1 | L2-LM | L1(*) | L1 (?)
3075 3064 # len(rawtext) | L2 | L2 | L2 | L2
3076 3065 # len(text) | L2 | L2 | L2 | L3
3077 3066 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3078 3067 #
3079 3068 # LM: length of metadata, depending on rawtext
3080 3069 # (*): not ideal, see comment in filelog.size
3081 3070 # (?): could be "- len(meta)" if the resolved content has
3082 3071 # rename metadata
3083 3072 #
3084 3073 # Checks needed to be done:
3085 3074 # 1. length check: L1 == L2, in all cases.
3086 3075 # 2. hash check: depending on flag processor, we may need to
3087 3076 # use either "text" (external), or "rawtext" (in revlog).
3088 3077
3089 3078 try:
3090 3079 skipflags = state.get(b'skipflags', 0)
3091 3080 if skipflags:
3092 3081 skipflags &= self.flags(rev)
3093 3082
3094 3083 _verify_revision(self, skipflags, state, node)
3095 3084
3096 3085 l1 = self.rawsize(rev)
3097 3086 l2 = len(self.rawdata(node))
3098 3087
3099 3088 if l1 != l2:
3100 3089 yield revlogproblem(
3101 3090 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3102 3091 node=node,
3103 3092 )
3104 3093
3105 3094 except error.CensoredNodeError:
3106 3095 if state[b'erroroncensored']:
3107 3096 yield revlogproblem(
3108 3097 error=_(b'censored file data'), node=node
3109 3098 )
3110 3099 state[b'skipread'].add(node)
3111 3100 except Exception as e:
3112 3101 yield revlogproblem(
3113 3102 error=_(b'unpacking %s: %s')
3114 3103 % (short(node), stringutil.forcebytestr(e)),
3115 3104 node=node,
3116 3105 )
3117 3106 state[b'skipread'].add(node)
3118 3107
3119 3108 def storageinfo(
3120 3109 self,
3121 3110 exclusivefiles=False,
3122 3111 sharedfiles=False,
3123 3112 revisionscount=False,
3124 3113 trackedsize=False,
3125 3114 storedsize=False,
3126 3115 ):
3127 3116 d = {}
3128 3117
3129 3118 if exclusivefiles:
3130 3119 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3131 3120 if not self._inline:
3132 3121 d[b'exclusivefiles'].append((self.opener, self._datafile))
3133 3122
3134 3123 if sharedfiles:
3135 3124 d[b'sharedfiles'] = []
3136 3125
3137 3126 if revisionscount:
3138 3127 d[b'revisionscount'] = len(self)
3139 3128
3140 3129 if trackedsize:
3141 3130 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3142 3131
3143 3132 if storedsize:
3144 3133 d[b'storedsize'] = sum(
3145 3134 self.opener.stat(path).st_size for path in self.files()
3146 3135 )
3147 3136
3148 3137 return d
3149 3138
3150 3139 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3151 3140 if not self.hassidedata:
3152 3141 return
3153 3142 # inline are not yet supported because they suffer from an issue when
3154 3143 # rewriting them (since it's not an append-only operation).
3155 3144 # See issue6485.
3156 3145 assert not self._inline
3157 3146 if not helpers[1] and not helpers[2]:
3158 3147 # Nothing to generate or remove
3159 3148 return
3160 3149
3161 3150 # changelog implement some "delayed" writing mechanism that assume that
3162 3151 # all index data is writen in append mode and is therefor incompatible
3163 3152 # with the seeked write done in this method. The use of such "delayed"
3164 3153 # writing will soon be removed for revlog version that support side
3165 3154 # data, so for now, we only keep this simple assert to highlight the
3166 3155 # situation.
3167 3156 delayed = getattr(self, '_delayed', False)
3168 3157 diverted = getattr(self, '_divert', False)
3169 3158 if delayed and not diverted:
3170 3159 msg = "cannot rewrite_sidedata of a delayed revlog"
3171 3160 raise error.ProgrammingError(msg)
3172 3161
3173 3162 new_entries = []
3174 3163 # append the new sidedata
3175 3164 with self._writing(transaction):
3176 3165 ifh, dfh = self._writinghandles
3177 3166 dfh.seek(0, os.SEEK_END)
3178 3167 current_offset = dfh.tell()
3179 3168 for rev in range(startrev, endrev + 1):
3180 3169 entry = self.index[rev]
3181 3170 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3182 3171 store=self,
3183 3172 sidedata_helpers=helpers,
3184 3173 sidedata={},
3185 3174 rev=rev,
3186 3175 )
3187 3176
3188 3177 serialized_sidedata = sidedatautil.serialize_sidedata(
3189 3178 new_sidedata
3190 3179 )
3191 3180 if entry[8] != 0 or entry[9] != 0:
3192 3181 # rewriting entries that already have sidedata is not
3193 3182 # supported yet, because it introduces garbage data in the
3194 3183 # revlog.
3195 3184 msg = b"rewriting existing sidedata is not supported yet"
3196 3185 raise error.Abort(msg)
3197 3186
3198 3187 # Apply (potential) flags to add and to remove after running
3199 3188 # the sidedata helpers
3200 3189 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3201 3190 entry = (new_offset_flags,) + entry[1:8]
3202 3191 entry += (current_offset, len(serialized_sidedata))
3203 3192
3204 3193 # the sidedata computation might have move the file cursors around
3205 3194 dfh.seek(current_offset, os.SEEK_SET)
3206 3195 dfh.write(serialized_sidedata)
3207 3196 new_entries.append(entry)
3208 3197 current_offset += len(serialized_sidedata)
3209 3198
3210 3199 # rewrite the new index entries
3211 3200 ifh.seek(startrev * self.index.entry_size)
3212 3201 for i, e in enumerate(new_entries):
3213 3202 rev = startrev + i
3214 3203 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3215 3204 packed = self.index.entry_binary(rev)
3216 3205 if rev == 0:
3217 3206 header = self._format_flags | self._format_version
3218 3207 header = self.index.pack_header(header)
3219 3208 packed = header + packed
3220 3209 ifh.write(packed)
@@ -1,116 +1,123 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 16 ### Internal utily constants
17 17
18 18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 19 KIND_MANIFESTLOG = 1002
20 20 KIND_FILELOG = 1003
21 21 KIND_OTHER = 1004
22 22
23 23 ALL_KINDS = {
24 24 KIND_CHANGELOG,
25 25 KIND_MANIFESTLOG,
26 26 KIND_FILELOG,
27 27 KIND_OTHER,
28 28 }
29 29
30 30 ### main revlog header
31 31
32 32 INDEX_HEADER = struct.Struct(b">I")
33 33
34 34 ## revlog version
35 35 REVLOGV0 = 0
36 36 REVLOGV1 = 1
37 37 # Dummy value until file format is finalized.
38 38 REVLOGV2 = 0xDEAD
39 39
40 40 ## global revlog header flags
41 41 # Shared across v1 and v2.
42 42 FLAG_INLINE_DATA = 1 << 16
43 43 # Only used by v1, implied by v2.
44 44 FLAG_GENERALDELTA = 1 << 17
45 45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
48 49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50 51
51 52 ### individual entry
52 53
53 54 ## index v0:
54 55 # 4 bytes: offset
55 56 # 4 bytes: compressed length
56 57 # 4 bytes: base rev
57 58 # 4 bytes: link rev
58 59 # 20 bytes: parent 1 nodeid
59 60 # 20 bytes: parent 2 nodeid
60 61 # 20 bytes: nodeid
61 62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62 63
63 64 ## index v1
64 65 # 6 bytes: offset
65 66 # 2 bytes: flags
66 67 # 4 bytes: compressed length
67 68 # 4 bytes: uncompressed length
68 69 # 4 bytes: base rev
69 70 # 4 bytes: link rev
70 71 # 4 bytes: parent 1 rev
71 72 # 4 bytes: parent 2 rev
72 73 # 32 bytes: nodeid
73 74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 75 assert INDEX_ENTRY_V1.size == 32 * 2
75 76
76 77 # 6 bytes: offset
77 78 # 2 bytes: flags
78 79 # 4 bytes: compressed length
79 80 # 4 bytes: uncompressed length
80 81 # 4 bytes: base rev
81 82 # 4 bytes: link rev
82 83 # 4 bytes: parent 1 rev
83 84 # 4 bytes: parent 2 rev
84 85 # 32 bytes: nodeid
85 86 # 8 bytes: sidedata offset
86 87 # 4 bytes: sidedata compressed length
87 88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
88 89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
89 90 assert INDEX_ENTRY_V2.size == 32 * 3
90 91
91 92 # revlog index flags
92 93
93 94 # For historical reasons, revlog's internal flags were exposed via the
94 95 # wire protocol and are even exposed in parts of the storage APIs.
95 96
96 97 # revision has censor metadata, must be verified
97 98 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
98 99 # revision hash does not match data (narrowhg)
99 100 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
100 101 # revision data is stored externally
101 102 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
102 103 # revision changes files in a way that could affect copy tracing.
103 104 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
104 105 REVIDX_DEFAULT_FLAGS = 0
105 106 # stable order in which flags need to be processed and their processors applied
106 107 REVIDX_FLAGS_ORDER = [
107 108 REVIDX_ISCENSORED,
108 109 REVIDX_ELLIPSIS,
109 110 REVIDX_EXTSTORED,
110 111 REVIDX_HASCOPIESINFO,
111 112 ]
112 113
113 114 # bitmark for flags that could cause rawdata content change
114 115 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
115 116
117 SUPPORTED_FLAGS = {
118 REVLOGV0: REVLOGV0_FLAGS,
119 REVLOGV1: REVLOGV1_FLAGS,
120 REVLOGV2: REVLOGV2_FLAGS,
121 }
122
116 123 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now